# Manuscript # Requirements: # pandoc-scholar (https://github.com/pandoc-scholar/pandoc-scholar) in PANDOC_SCHOLAR_PATH # manuscript ARTICLE_FILE = mutagenicity.mustache.md PANDOC_SCHOLAR_PATH = pandoc-scholar OUTFILE_PREFIX = mutagenicity DEFAULT_EXTENSIONS = pdf docx PANDOC_WRITER_OPTIONS = --filter=pandoc-crossref --filter=pandoc-citeproc TEMPLATE_FILE_LATEX = pandoc-scholar.latex # experiments # crossvalidations CV_DIR = crossvalidations/ CV_PREDICTIONS_DIR = $(CV_DIR)predictions/ LAZAR_MODELS_DIR = ../lazar/models/ LAZAR_PREDICTIONS_DIR = ../lazar/predictions/ TENSORFLOW_CV_DIR = $(CV_DIR)/tensorflow/ CONFUSION_MATRICES_DIR = $(CV_DIR)confusion-matrices/ CV_FILES = lazar-all.csv lazar-high-confidence.csv rf.csv lr.csv lr2.csv nn.csv svm.csv CONFUSION_MATRICES = $(addprefix $(CONFUSION_MATRICES_DIR)mp2d/, $(CV_FILES)) $(addprefix $(CONFUSION_MATRICES_DIR)cdk/, $(CV_FILES)) CV_PREDICTIONS = $(addprefix $(CV_PREDICTIONS_DIR)mp2d/, $(CV_FILES)) $(addprefix $(CV_PREDICTIONS_DIR)cdk/, $(CV_FILES)) CV_SUMMARY = $(CV_DIR)summary.yaml # PA predictions PA_DIR = pyrrolizidine-alkaloids/ TENSORFLOW_PA_DIR = $(PA_DIR)tensorflow/ PA_MP2D_LAZAR_DIR = $(PA_MP2D_DIR)lazar/ PA_CDK_LAZAR_DIR = $(PA_CDK_DIR)lazar/ PA_PREDICTIONS = $(addprefix $(PA_DIR)mp2d/, $(CV_FILES)) $(addprefix $(PA_DIR)cdk/, $(CV_FILES)) PA_SUMMARY = $(PA_DIR)summary.yaml # manuscript TSNE_FIGURES = $(addprefix figures/tsne-, \ mp2d-mutagenicity.png \ cdk-mutagenicity.png \ mp2d-lazar-high-confidence-classifications.png \ mp2d-lazar-all-classifications.png \ mp2d-rf-classifications.png \ mp2d-lr-classifications.png \ mp2d-lr2-classifications.png \ mp2d-nn-classifications.png \ mp2d-svm-classifications.png \ cdk-lazar-high-confidence-classifications.png \ cdk-lazar-all-classifications.png \ cdk-rf-classifications.png \ cdk-lr-classifications.png \ cdk-lr2-classifications.png \ cdk-nn-classifications.png \ cdk-svm-classifications.png \ ) FIGURES = figures/roc.png figures/pa-groups.png $(TSNE_FIGURES) DATA = data.yaml all: mutagenicity.pdf $(PA_DIR)pa-predictions.pdf $(CV_PREDICTIONS) $(CONFUSION_MATRICES) $(PA_PREDICTIONS) include $(PANDOC_SCHOLAR_PATH)/Makefile mutagenicity.mustache.md: $(DATA) mutagenicity.md $(FIGURES) mustache $^ > $@ # manuscript data $(DATA): $(PA_SUMMARY) $(CV_SUMMARY) mutagenicity/mutagenicity.csv mutagenicity/mutagenicity-cdk.csv $(PA_DIR)pa-predictions.csv scripts/data.rb $^ > $@ # figures ## tsne figures/tsne-mp2d-%.png: figures/tsne-mp2d-coordinates.csv figures/mp2d-% Rscript scripts/tsne-classifications.R $^ $@ figures/mp2d-%-classifications: figures/tsne-mp2d-coordinates.csv crossvalidations/predictions/mp2d/%.csv pyrrolizidine-alkaloids/mp2d/%.csv scripts/cv-pa-classifications.rb $^ > $@ figures/tsne-cdk-%.png: figures/tsne-cdk-coordinates.csv figures/cdk-% Rscript scripts/tsne-classifications.R $^ $@ figures/cdk-%-classifications: figures/tsne-cdk-coordinates.csv crossvalidations/predictions/cdk/%.csv pyrrolizidine-alkaloids/cdk/%.csv scripts/cv-pa-classifications.rb $^ > $@ figures/tsne-%-mutagenicity.png: figures/tsne-%-coordinates.csv figures/tsne-%-mutagenicity.csv Rscript scripts/tsne-mutagenicity.R $^ $@ ### factors figures/tsne-%-mutagenicity.csv: figures/tsne-%-coordinates.csv mutagenicity/mutagenicity.csv scripts/tsne-mutagenicity.rb $^ > $@ ### coordinates figures/tsne-cdk-coordinates.csv: figures/tsne-cdk-descriptors.csv Rscript scripts/tsne-cdk-coordinates.R $< $@ figures/tsne-mp2d-coordinates.csv: figures/tsne-mp2d-distances.csv Rscript scripts/tsne-mp2d-coordinates.R $< $@ ### input figures/tsne-cdk-descriptors.csv: mutagenicity/mutagenicity-cdk.csv pyrrolizidine-alkaloids/pa-cdk.csv scripts/tsne-cdk-descriptors.rb $^ > $@ figures/tsne-mp2d-distances.csv: mutagenicity/mutagenicity-mp2d pyrrolizidine-alkaloids/pa-mp2d scripts/tsne-mp2d-distances.rb $^ > $@ ## roc figures/roc.png: figures/roc.csv Rscript scripts/roc.R $< $@ figures/roc.csv: $(CV_SUMMARY) scripts/roc.rb $< > $@ ## pa predictions per group figures/pa-groups.png: figures/pa-groups.csv scripts/pa-groups.R $< $@ figures/pa-groups.csv: $(PA_DIR)pa-groups.csv $(PA_DIR)pa-predictions.csv scripts/pa-groups.rb $^ > $@ # PA predictions ## summary $(PA_SUMMARY): $(PA_DIR)pa-predictions.csv scripts/pa-summary.rb $< > $@ ## pdf table $(PA_DIR)pa-predictions.pdf: $(PA_DIR)pa-predictions.tex pdflatex -output-directory $(PA_DIR) $(PA_DIR)pa-predictions.tex $(PA_DIR)pa-predictions.tex: $(PA_DIR)/pa-groups.csv $(PA_DIR)pa-predictions.csv scripts/pa-predictions-latex.rb $^ > $@ ## table $(PA_DIR)pa-predictions.csv: $(PA_PREDICTIONS) scripts/pa-predictions.rb $^ > $@ ## predictions $(PA_DIR)%/lazar-all.csv: $(LAZAR_PREDICTIONS_DIR)pa-%/pa-%-prediction.csv scripts/lazar-pa-predictions.rb $< > $@ $(PA_DIR)mp2d/lazar-high-confidence.csv: $(LAZAR_PREDICTIONS_DIR)pa-mp2d/pa-mp2d-prediction.csv scripts/lazar-pa-predictions.rb $< 0.5 > $@ $(PA_DIR)cdk/lazar-high-confidence.csv: $(LAZAR_PREDICTIONS_DIR)pa-cdk/pa-cdk-prediction.csv scripts/lazar-pa-predictions.rb $< 0.9 > $@ $(PA_DIR)mp2d/%.csv: $(TENSORFLOW_PA_DIR)pred.%.v5-ext-ext-Padel-2D.csv $(PA_DIR)pa-cids.csv scripts/tensorflow-pa-predictions.rb $^ > $@ $(PA_DIR)cdk/%.csv: $(TENSORFLOW_PA_DIR)pred.%.v5-ext-Padel-2D.csv $(PA_DIR)pa-cids.csv scripts/tensorflow-pa-predictions.rb $^ > $@ ## sanitize PA input data $(PA_DIR)pa-ids.csv $(PA_DIR)pa-cids.csv $(PA_DIR)pa-names.tsv $(PA_DIR)pa-groups.csv $(PA_DIR)pa-cdk.csv: $(PA_DIR)src/180920_PA_complete_SMILES.csv $(PA_DIR)src/pa-groups.original.csv $(PA_DIR)src/PA-Padel-2D_m2.csv scripts/sanitize-pa-data.rb # crossvalidation ## summary $(CV_SUMMARY): $(CONFUSION_MATRICES) scripts/cv-summary.rb $^ > $@ ## confusion matrices $(CONFUSION_MATRICES_DIR)%: $(CV_PREDICTIONS_DIR)% scripts/confusion-matrix.rb $< > $@ ## predictions $(CV_PREDICTIONS_DIR)%/lazar-all.csv: $(LAZAR_MODELS_DIR)mutagenicity-%/crossvalidation/predictions.csv scripts/lazar-cv-predictions.rb $< > $@ $(CV_PREDICTIONS_DIR)mp2d/lazar-high-confidence.csv: $(LAZAR_MODELS_DIR)mutagenicity-mp2d/crossvalidation/predictions.csv scripts/lazar-cv-predictions.rb $< 0.5 > $@ $(CV_PREDICTIONS_DIR)cdk/lazar-high-confidence.csv: $(LAZAR_MODELS_DIR)mutagenicity-cdk/crossvalidation/predictions.csv scripts/lazar-cv-predictions.rb $< 0.9 > $@ $(CV_PREDICTIONS_DIR)mp2d/%.csv: $(TENSORFLOW_CV_DIR)pred.%.v4_ext.csv scripts/tensorflow-cv-predictions.rb $< > $@ $(CV_PREDICTIONS_DIR)cdk/%.csv: $(TENSORFLOW_CV_DIR)pred.%.norm.v4.csv scripts/tensorflow-cv-predictions.rb $< > $@ # cdk descriptors mutagenicity/mutagenicity-cdk.csv: mutagenicity/src/mutagenicity-mod-2.new.csv cut -f1,3- -d ',' $< > $@ # mp2d fingerprints mutagenicity/mutagenicity-mp2d: mutagenicity/mutagenicity.csv scripts/mp2d-fingerprints.rb $< > $@ $(PA_DIR)pa-mp2d: $(PA_DIR)pa-cids.csv scripts/mp2d-fingerprints.rb $< > $@