diff options
author | Christoph Helma <helma@in-silico.ch> | 2021-02-22 23:26:29 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2021-02-22 23:26:29 +0100 |
commit | ed83d4c5347ebf43b2de55782b290b66bada4561 (patch) | |
tree | ddf3ee1eb6d4f5d250835345798086b5204a23ee /Makefile | |
parent | 3af0c3d5c5b7f7d506a4582bbe3dca7d22bbefcc (diff) |
more script consolidations
Diffstat (limited to 'Makefile')
-rw-r--r-- | Makefile | 227 |
1 files changed, 90 insertions, 137 deletions
@@ -16,88 +16,37 @@ TEMPLATE_FILE_LATEX = pandoc-scholar.latex # crossvalidations CV_DIR = crossvalidations/ +CV_PREDICTIONS_DIR = $(CV_DIR)predictions/ + LAZAR_MODELS_DIR = ../lazar/models/ LAZAR_PREDICTIONS_DIR = ../lazar/predictions/ -LAZAR_MP2D_CONFUSION_MATRIX_DIR = $(LAZAR_MODELS_DIR)mutagenicity-mp2d/crossvalidation/confusion-matrices/ -LAZAR_CDK_CONFUSION_MATRIX_DIR = $(LAZAR_MODELS_DIR)mutagenicity-cdk/crossvalidation/confusion-matrices/ -TENSORFLOW_MP2D_CV_DIR = $(CV_DIR)mp2d/tensorflow/ -TENSORFLOW_CDK_CV_DIR = $(CV_DIR)cdk/tensorflow/ +TENSORFLOW_CV_DIR = $(CV_DIR)/tensorflow/ CONFUSION_MATRICES_DIR = $(CV_DIR)confusion-matrices/ -LAZAR_CONFUSION_MATRICES = $(addprefix $(CONFUSION_MATRICES_DIR), \ - lazar-mp2d-all.csv \ - lazar-mp2d-high-confidence.csv \ - lazar-cdk-all.csv \ - lazar-cdk-high-confidence.csv \ -) - -TENSORFLOW_MP2D_CONFUSION_MATRICES = \ - tensorflow-rf-mp2d.csv \ - tensorflow-lr-mp2d.csv \ - tensorflow-lr2-mp2d.csv \ - tensorflow-nn-mp2d.csv \ - tensorflow-svm-mp2d.csv +CV_FILES = lazar-all.csv lazar-high-confidence.csv rf.csv lr.csv lr2.csv nn.csv svm.csv -TENSORFLOW_CDK_CONFUSION_MATRICES = \ - tensorflow-rf-cdk.csv \ - tensorflow-lr-cdk.csv \ - tensorflow-lr2-cdk.csv \ - tensorflow-nn-cdk.csv \ - tensorflow-svm-cdk.csv +CONFUSION_MATRICES = $(addprefix $(CONFUSION_MATRICES_DIR)mp2d/, $(CV_FILES)) $(addprefix $(CONFUSION_MATRICES_DIR)cdk/, $(CV_FILES)) -TENSORFLOW_CONFUSION_MATRICES = $(addprefix $(CONFUSION_MATRICES_DIR), $(TENSORFLOW_MP2D_CONFUSION_MATRICES) $(TENSORFLOW_CDK_CONFUSION_MATRICES)) - -CONFUSION_MATRICES = $(LAZAR_CONFUSION_MATRICES) $(TENSORFLOW_CONFUSION_MATRICES) +CV_PREDICTIONS = $(addprefix $(CV_PREDICTIONS_DIR)mp2d/, $(CV_FILES)) $(addprefix $(CV_PREDICTIONS_DIR)cdk/, $(CV_FILES)) CV_SUMMARY = $(CV_DIR)summary.yaml # PA predictions PA_DIR = pyrrolizidine-alkaloids/ - -PA_MP2D_DIR = $(PA_DIR)mp2d/ -PA_CDK_DIR = $(PA_DIR)cdk/ +TENSORFLOW_PA_DIR = $(PA_DIR)tensorflow/ PA_MP2D_LAZAR_DIR = $(PA_MP2D_DIR)lazar/ PA_CDK_LAZAR_DIR = $(PA_CDK_DIR)lazar/ -PA_MP2D_TENSORFLOW_DIR = $(PA_MP2D_DIR)tensorflow/ -PA_CDK_TENSORFLOW_DIR = $(PA_CDK_DIR)tensorflow/ - -PA_MP2D_TENSORFLOW_PREDICTIONS = $(addprefix $(PA_MP2D_TENSORFLOW_DIR), \ - pred.lr2.v5-ext-ext-Padel-2D.csv \ - pred.lr.v5-ext-ext-Padel-2D.csv \ - pred.nn.v5-ext-ext-Padel-2D.csv \ - pred.rf.v5-ext-ext-Padel-2D.csv \ - pred.svm.v5-ext-ext-Padel-2D.csv \ -) - -PA_CDK_TENSORFLOW_PREDICTIONS = $(addprefix $(PA_CDK_TENSORFLOW_DIR), \ - pred.lr2.v5-ext-Padel-2D.csv \ - pred.lr.v5-ext-Padel-2D.csv \ - pred.nn.v5-ext-Padel-2D.csv \ - pred.rf.v5-ext-Padel-2D.csv \ - pred.svm.v5-ext-Padel-2D.csv \ -) - -PA_PREDICTIONS = \ - $(PA_MP2D_LAZAR_DIR)predictions\ - $(PA_CDK_LAZAR_DIR)predictions \ - $(PA_MP2D_TENSORFLOW_PREDICTIONS) \ - $(PA_CDK_TENSORFLOW_PREDICTIONS) +PA_PREDICTIONS = $(addprefix $(PA_DIR)mp2d/, $(CV_FILES)) $(addprefix $(PA_DIR)cdk/, $(CV_FILES)) PA_SUMMARY = $(PA_DIR)summary.yaml # manuscript -TABLES = $(addprefix tables/, \ - lazar-summary.csv \ - tensorflow-summary.csv \ - pa-tab.tex \ -) - PA_FIGURES = $(addprefix figures/, \ Dehydropyrrolizidine.png \ Diester.png \ @@ -110,131 +59,135 @@ PA_FIGURES = $(addprefix figures/, \ Tertiary.PA.png \ ) -FIGURES = figures/roc.png figures/tsne-mp2d.png figures/tsne-cdk.png $(PA_FIGURES) +FIGURES = figures/roc.png figures/tsne-mp2d-mutagenicity.png figures/tsne-cdk-mutagenicity.png $(PA_FIGURES) DATA = data.yaml -all: mutagenicity.pdf $(PA_DIR)pa-predictions.pdf +all: mutagenicity.pdf $(PA_DIR)pa-predictions.pdf $(CV_PREDICTIONS) $(CONFUSION_MATRICES) $(PA_PREDICTIONS) include $(PANDOC_SCHOLAR_PATH)/Makefile -mutagenicity.mustache.md: $(DATA) mutagenicity.md $(TABLES) $(FIGURES) +mutagenicity.mustache.md: $(DATA) mutagenicity.md $(FIGURES) mustache $^ > $@ -$(PA_DIR)pa-predictions.pdf: $(PA_DIR)pa-predictions.tex - pdflatex -output-directory $(PA_DIR) $(PA_DIR)pa-predictions.tex +# manuscript data +$(DATA): $(PA_SUMMARY) $(CV_SUMMARY) mutagenicity/mutagenicity.csv mutagenicity/mutagenicity-cdk.csv $(PA_DIR)pa-predictions.csv + scripts/data.rb $^ > $@ # figures -figures/tsne-cdk.png: figures/tsne-cdk.csv - Rscript scripts/tsne-cdk.R +## tsne + +figures/tsne-%-mutagenicity.png: figures/tsne-%-coordinates.csv figures/tsne-%-mutagenicity.csv + Rscript scripts/tsne-mutagenicity.R $^ $@ + +### factors + +figures/tsne-%-mutagenicity.csv: figures/tsne-%-coordinates.csv mutagenicity/mutagenicity.csv + scripts/tsne-mutagenicity.rb $^ > $@ + +### coordinates + +figures/tsne-cdk-coordinates.csv: figures/tsne-cdk-descriptors.csv + Rscript scripts/tsne-cdk-coordinates.R $< $@ -figures/tsne-cdk.csv: mutagenicity/cdk/mutagenicity-mod-2.new.csv pyrrolizidine-alkaloids/cdk/PA-Padel-2D_m2.csv - scripts/cdk-descriptors.rb $^ > $@ +figures/tsne-mp2d-coordinates.csv: figures/tsne-mp2d-distances.csv + Rscript scripts/tsne-mp2d-coordinates.R $< $@ + +### input -figures/tsne-mp2d.png: figures/tsne-mp2d.csv - Rscript scripts/tsne-mp2d.R +figures/tsne-cdk-descriptors.csv: mutagenicity/mutagenicity-cdk.csv pyrrolizidine-alkaloids/pa-cdk.csv + scripts/tsne-cdk-descriptors.rb $^ > $@ -figures/tsne-mp2d.csv: $(LAZAR_MODELS_DIR)mutagenicity-mp2d/independent-variables - scripts/mp2d-distances.rb > figures/tsne-mp2d.csv +figures/tsne-mp2d-distances.csv: mutagenicity/mutagenicity-mp2d pyrrolizidine-alkaloids/pa-mp2d + scripts/tsne-mp2d-distances.rb $^ > $@ + +## roc figures/roc.png: figures/roc.csv Rscript scripts/roc.R figures/roc.csv: $(CV_SUMMARY) - scripts/summary2roc.rb $< > $@ - -$(PA_FIGURES): $(PA_DIR)pa-predictions.csv - scripts/pa-groups.R - -# tables - -tables/pa-summary.csv: $(PA_SUMMARY) - scripts/pa-summary-table.rb $< > $@ - -$(PA_DIR)pa-predictions.tex: $(PA_DIR)pa-predictions.csv - scripts/pa-predictions-latex.rb $< > $@ - -$(PA_DIR)a-predictions.csv: $(PA_PREDICTIONS) - scripts/pa-table.rb > $@ + scripts/roc.rb $< > $@ -tables/lazar-summary.csv: $(CV_SUMMARY) - scripts/summary2table.rb lazar > $@ +## pa predictions per group -tables/tensorflow-summary.csv: $(CV_SUMMARY) - scripts/summary2table.rb tensorflow > $@ +$(PA_FIGURES): $(PA_DIR)pa-groups.csv $(PA_DIR)pa-predictions.csv + scripts/pa-groups.R $^ -$(DATA): $(PA_SUMMARY) $(CV_SUMMARY) mutagenicity/mutagenicity.csv mutagenicity/cdk/mutagenicity-mod-2.new.csv $(PA_DIR)pa-predictions.csv - scripts/data.rb $^ > $@ +# PA predictions -# PA summary +## summary $(PA_SUMMARY): $(PA_DIR)pa-predictions.csv scripts/pa-summary.rb $< > $@ -# PA lazar predictions +## pdf table + +$(PA_DIR)pa-predictions.pdf: $(PA_DIR)pa-predictions.tex + pdflatex -output-directory $(PA_DIR) $(PA_DIR)pa-predictions.tex + +$(PA_DIR)pa-predictions.tex: $(PA_DIR)/pa-groups.csv $(PA_DIR)pa-predictions.csv + scripts/pa-predictions-latex.rb $^ > $@ -$(PA_DIR)pa-canonical-smiles: $(PA_DIR)180920_PA_complete_SMILES.csv - scripts/pa-smiles.rb $< | obabel -ismi - -ocan | tr -d "\t" > $@ +## table -$(PA_CDK_LAZAR_DIR)predictions: $(LAZAR_PREDICTIONS_DIR)pa-cdk/predictions - cp $< $@ +$(PA_DIR)pa-predictions.csv: $(PA_PREDICTIONS) + scripts/pa-predictions.rb $^ > $@ -$(PA_MP2D_LAZAR_DIR)predictions: $(LAZAR_PREDICTIONS_DIR)pa-mp2d/predictions - cp $< $@ +## predictions -# crossvalidation summary +$(PA_DIR)%/lazar-all.csv: $(LAZAR_PREDICTIONS_DIR)pa-%/predictions + scripts/lazar-pa-predictions.rb $< > $@ -$(CV_SUMMARY): $(CONFUSION_MATRICES) - scripts/confusion-matrix-summary.rb $^ > $@ +$(PA_DIR)%/lazar-high-confidence.csv: $(LAZAR_PREDICTIONS_DIR)pa-%/predictions + scripts/lazar-pa-predictions.rb $< 0.5 > $@ -# confusion matrices +$(PA_DIR)mp2d/%.csv: $(TENSORFLOW_PA_DIR)pred.%.v5-ext-ext-Padel-2D.csv $(PA_DIR)pa-cids.csv + scripts/tensorflow-pa-predictions.rb $^ > $@ -## lazar +$(PA_DIR)cdk/%.csv: $(TENSORFLOW_PA_DIR)pred.%.v5-ext-Padel-2D.csv $(PA_DIR)pa-cids.csv + scripts/tensorflow-pa-predictions.rb $^ > $@ -$(CONFUSION_MATRICES_DIR)lazar-mp2d-all.csv: $(LAZAR_MP2D_CONFUSION_MATRIX_DIR)all - cp $< $@ +## sanitize PA input data -$(CONFUSION_MATRICES_DIR)lazar-mp2d-high-confidence.csv: $(LAZAR_MP2D_CONFUSION_MATRIX_DIR)high_confidence - cp $< $@ +$(PA_DIR)pa-ids.csv $(PA_DIR)pa-cids.csv $(PA_DIR)pa-names.tsv $(PA_DIR)pa-groups.csv $(PA_DIR)pa-cdk.csv: $(PA_DIR)src/180920_PA_complete_SMILES.csv $(PA_DIR)src/pa-groups.original.csv $(PA_DIR)src/PA-Padel-2D_m2.csv + scripts/sanitize-pa-data.rb -$(CONFUSION_MATRICES_DIR)lazar-cdk-all.csv: $(LAZAR_CDK_CONFUSION_MATRIX_DIR)all - cp $< $@ +# crossvalidation -$(CONFUSION_MATRICES_DIR)lazar-cdk-high-confidence.csv: $(LAZAR_CDK_CONFUSION_MATRIX_DIR)high_confidence - cp $< $@ +## summary -## tensorflow +$(CV_SUMMARY): $(CONFUSION_MATRICES) + scripts/cv-summary.rb $^ > $@ -### mp2d +## confusion matrices -$(CONFUSION_MATRICES_DIR)tensorflow-lr-mp2d.csv: $(TENSORFLOW_MP2D_CV_DIR)pred.lr.v4_ext.csv - scripts/cv-tensorflow-confusion-matrix.rb $< > $@ +$(CONFUSION_MATRICES_DIR)%: $(CV_PREDICTIONS_DIR)% + scripts/confusion-matrix.rb $< > $@ -$(CONFUSION_MATRICES_DIR)tensorflow-lr2-mp2d.csv: $(TENSORFLOW_MP2D_CV_DIR)pred.lr2.v4_ext.csv - scripts/cv-tensorflow-confusion-matrix.rb $< > $@ +## predictions -$(CONFUSION_MATRICES_DIR)tensorflow-nn-mp2d.csv: $(TENSORFLOW_MP2D_CV_DIR)pred.nn.v4_ext.csv - scripts/cv-tensorflow-confusion-matrix.rb $< > $@ +$(CV_PREDICTIONS_DIR)%/lazar-all.csv: $(LAZAR_MODELS_DIR)mutagenicity-% + scripts/lazar-cv-predictions.rb $< > $@ -$(CONFUSION_MATRICES_DIR)tensorflow-rf-mp2d.csv: $(TENSORFLOW_MP2D_CV_DIR)pred.rf.v4_ext.csv - scripts/cv-tensorflow-confusion-matrix.rb $< > $@ +$(CV_PREDICTIONS_DIR)%/lazar-high-confidence.csv: $(LAZAR_MODELS_DIR)mutagenicity-% + scripts/lazar-cv-predictions.rb $< 0.5 > $@ -$(CONFUSION_MATRICES_DIR)tensorflow-svm-mp2d.csv: $(TENSORFLOW_MP2D_CV_DIR)pred.svm.v4_ext.csv - scripts/cv-tensorflow-confusion-matrix.rb $< > $@ +$(CV_PREDICTIONS_DIR)mp2d/%.csv: $(TENSORFLOW_CV_DIR)pred.%.v4_ext.csv + scripts/tensorflow-cv-predictions.rb $< > $@ -### cdk +$(CV_PREDICTIONS_DIR)cdk/%.csv: $(TENSORFLOW_CV_DIR)pred.%.norm.v4.csv + scripts/tensorflow-cv-predictions.rb $< > $@ -$(CONFUSION_MATRICES_DIR)tensorflow-lr-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.lr.v4.norm.csv - scripts/cv-tensorflow-confusion-matrix.rb $< > $@ +# cdk descriptors -$(CONFUSION_MATRICES_DIR)tensorflow-lr2-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.lr2.norm.v4.csv - scripts/cv-tensorflow-confusion-matrix.rb $< > $@ +mutagenicity/mutagenicity-cdk.csv: mutagenicity/src/mutagenicity-mod-2.new.csv + cut -f1,3- -d ',' $< > $@ -$(CONFUSION_MATRICES_DIR)tensorflow-nn-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.nn.v4.norm.csv - scripts/cv-tensorflow-confusion-matrix.rb $< > $@ +# mp2d fingerprints -$(CONFUSION_MATRICES_DIR)tensorflow-rf-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.rf.norm.v4.csv - scripts/cv-tensorflow-confusion-matrix.rb $< > $@ +mutagenicity/mutagenicity-mp2d: mutagenicity/mutagenicity.csv + scripts/mp2d-fingerprints.rb $< > $@ -$(CONFUSION_MATRICES_DIR)tensorflow-svm-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.svm.norm.v4.csv - scripts/cv-tensorflow-confusion-matrix.rb $< > $@ +$(PA_DIR)pa-mp2d: $(PA_DIR)pa-cids.csv + scripts/mp2d-fingerprints.rb $< > $@ |