# Manuscript # Requirements: # pandoc-scholar (https://github.com/pandoc-scholar/pandoc-scholar) in PANDOC_SCHOLAR_PATH # manuscript ARTICLE_FILE = mutagenicity.mustache.md PANDOC_SCHOLAR_PATH = pandoc-scholar OUTFILE_PREFIX = mutagenicity DEFAULT_EXTENSIONS = pdf PANDOC_WRITER_OPTIONS = --filter=pandoc-crossref --filter=pandoc-citeproc TEMPLATE_FILE_LATEX = pandoc-scholar.latex # experiments # crossvalidations CV_DIR = crossvalidations/ CV_PREDICTIONS_DIR = $(CV_DIR)predictions/ LAZAR_MODELS_DIR = ../lazar/models/ LAZAR_PREDICTIONS_DIR = ../lazar/predictions/ TENSORFLOW_CV_DIR = $(CV_DIR)/tensorflow/ CONFUSION_MATRICES_DIR = $(CV_DIR)confusion-matrices/ CV_FILES = lazar-all.csv lazar-high-confidence.csv rf.csv lr.csv lr2.csv nn.csv svm.csv CONFUSION_MATRICES = $(addprefix $(CONFUSION_MATRICES_DIR)mp2d/, $(CV_FILES)) $(addprefix $(CONFUSION_MATRICES_DIR)cdk/, $(CV_FILES)) CV_PREDICTIONS = $(addprefix $(CV_PREDICTIONS_DIR)mp2d/, $(CV_FILES)) $(addprefix $(CV_PREDICTIONS_DIR)cdk/, $(CV_FILES)) CV_SUMMARY = $(CV_DIR)summary.yaml # PA predictions PA_DIR = pyrrolizidine-alkaloids/ TENSORFLOW_PA_DIR = $(PA_DIR)tensorflow/ PA_MP2D_LAZAR_DIR = $(PA_MP2D_DIR)lazar/ PA_CDK_LAZAR_DIR = $(PA_CDK_DIR)lazar/ PA_PREDICTIONS = $(addprefix $(PA_DIR)mp2d/, $(CV_FILES)) $(addprefix $(PA_DIR)cdk/, $(CV_FILES)) PA_SUMMARY = $(PA_DIR)summary.yaml # manuscript PA_FIGURES = $(addprefix figures/, \ Dehydropyrrolizidine.png \ Diester.png \ Macrocyclic.diester.png \ Monoester.png \ N.oxide.png \ Otonecine.png \ Platynecine.png \ Retronecine.png \ Tertiary.PA.png \ ) FIGURES = figures/roc.png figures/tsne-mp2d-mutagenicity.png figures/tsne-cdk-mutagenicity.png $(PA_FIGURES) DATA = data.yaml all: mutagenicity.pdf $(PA_DIR)pa-predictions.pdf $(CV_PREDICTIONS) $(CONFUSION_MATRICES) $(PA_PREDICTIONS) include $(PANDOC_SCHOLAR_PATH)/Makefile mutagenicity.mustache.md: $(DATA) mutagenicity.md $(FIGURES) mustache $^ > $@ # manuscript data $(DATA): $(PA_SUMMARY) $(CV_SUMMARY) mutagenicity/mutagenicity.csv mutagenicity/mutagenicity-cdk.csv $(PA_DIR)pa-predictions.csv scripts/data.rb $^ > $@ # figures ## tsne figures/tsne-%-mutagenicity.png: figures/tsne-%-coordinates.csv figures/tsne-%-mutagenicity.csv Rscript scripts/tsne-mutagenicity.R $^ $@ ### factors figures/tsne-%-mutagenicity.csv: figures/tsne-%-coordinates.csv mutagenicity/mutagenicity.csv scripts/tsne-mutagenicity.rb $^ > $@ ### coordinates figures/tsne-cdk-coordinates.csv: figures/tsne-cdk-descriptors.csv Rscript scripts/tsne-cdk-coordinates.R $< $@ figures/tsne-mp2d-coordinates.csv: figures/tsne-mp2d-distances.csv Rscript scripts/tsne-mp2d-coordinates.R $< $@ ### input figures/tsne-cdk-descriptors.csv: mutagenicity/mutagenicity-cdk.csv pyrrolizidine-alkaloids/pa-cdk.csv scripts/tsne-cdk-descriptors.rb $^ > $@ figures/tsne-mp2d-distances.csv: mutagenicity/mutagenicity-mp2d pyrrolizidine-alkaloids/pa-mp2d scripts/tsne-mp2d-distances.rb $^ > $@ ## roc figures/roc.png: figures/roc.csv Rscript scripts/roc.R figures/roc.csv: $(CV_SUMMARY) scripts/roc.rb $< > $@ ## pa predictions per group $(PA_FIGURES): $(PA_DIR)pa-groups.csv $(PA_DIR)pa-predictions.csv scripts/pa-groups.R $^ # PA predictions ## summary $(PA_SUMMARY): $(PA_DIR)pa-predictions.csv scripts/pa-summary.rb $< > $@ ## pdf table $(PA_DIR)pa-predictions.pdf: $(PA_DIR)pa-predictions.tex pdflatex -output-directory $(PA_DIR) $(PA_DIR)pa-predictions.tex $(PA_DIR)pa-predictions.tex: $(PA_DIR)/pa-groups.csv $(PA_DIR)pa-predictions.csv scripts/pa-predictions-latex.rb $^ > $@ ## table $(PA_DIR)pa-predictions.csv: $(PA_PREDICTIONS) scripts/pa-predictions.rb $^ > $@ ## predictions $(PA_DIR)%/lazar-all.csv: $(LAZAR_PREDICTIONS_DIR)pa-%/predictions scripts/lazar-pa-predictions.rb $< > $@ $(PA_DIR)%/lazar-high-confidence.csv: $(LAZAR_PREDICTIONS_DIR)pa-%/predictions scripts/lazar-pa-predictions.rb $< 0.5 > $@ $(PA_DIR)mp2d/%.csv: $(TENSORFLOW_PA_DIR)pred.%.v5-ext-ext-Padel-2D.csv $(PA_DIR)pa-cids.csv scripts/tensorflow-pa-predictions.rb $^ > $@ $(PA_DIR)cdk/%.csv: $(TENSORFLOW_PA_DIR)pred.%.v5-ext-Padel-2D.csv $(PA_DIR)pa-cids.csv scripts/tensorflow-pa-predictions.rb $^ > $@ ## sanitize PA input data $(PA_DIR)pa-ids.csv $(PA_DIR)pa-cids.csv $(PA_DIR)pa-names.tsv $(PA_DIR)pa-groups.csv $(PA_DIR)pa-cdk.csv: $(PA_DIR)src/180920_PA_complete_SMILES.csv $(PA_DIR)src/pa-groups.original.csv $(PA_DIR)src/PA-Padel-2D_m2.csv scripts/sanitize-pa-data.rb # crossvalidation ## summary $(CV_SUMMARY): $(CONFUSION_MATRICES) scripts/cv-summary.rb $^ > $@ ## confusion matrices $(CONFUSION_MATRICES_DIR)%: $(CV_PREDICTIONS_DIR)% scripts/confusion-matrix.rb $< > $@ ## predictions $(CV_PREDICTIONS_DIR)%/lazar-all.csv: $(LAZAR_MODELS_DIR)mutagenicity-% scripts/lazar-cv-predictions.rb $< > $@ $(CV_PREDICTIONS_DIR)%/lazar-high-confidence.csv: $(LAZAR_MODELS_DIR)mutagenicity-% scripts/lazar-cv-predictions.rb $< 0.5 > $@ $(CV_PREDICTIONS_DIR)mp2d/%.csv: $(TENSORFLOW_CV_DIR)pred.%.v4_ext.csv scripts/tensorflow-cv-predictions.rb $< > $@ $(CV_PREDICTIONS_DIR)cdk/%.csv: $(TENSORFLOW_CV_DIR)pred.%.norm.v4.csv scripts/tensorflow-cv-predictions.rb $< > $@ # cdk descriptors mutagenicity/mutagenicity-cdk.csv: mutagenicity/src/mutagenicity-mod-2.new.csv cut -f1,3- -d ',' $< > $@ # mp2d fingerprints mutagenicity/mutagenicity-mp2d: mutagenicity/mutagenicity.csv scripts/mp2d-fingerprints.rb $< > $@ $(PA_DIR)pa-mp2d: $(PA_DIR)pa-cids.csv scripts/mp2d-fingerprints.rb $< > $@