summaryrefslogtreecommitdiff
path: root/Makefile
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2021-02-22 23:26:29 +0100
committerChristoph Helma <helma@in-silico.ch>2021-02-22 23:26:29 +0100
commited83d4c5347ebf43b2de55782b290b66bada4561 (patch)
treeddf3ee1eb6d4f5d250835345798086b5204a23ee /Makefile
parent3af0c3d5c5b7f7d506a4582bbe3dca7d22bbefcc (diff)
more script consolidations
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile227
1 files changed, 90 insertions, 137 deletions
diff --git a/Makefile b/Makefile
index e1227d3..a3ec0ee 100644
--- a/Makefile
+++ b/Makefile
@@ -16,88 +16,37 @@ TEMPLATE_FILE_LATEX = pandoc-scholar.latex
# crossvalidations
CV_DIR = crossvalidations/
+CV_PREDICTIONS_DIR = $(CV_DIR)predictions/
+
LAZAR_MODELS_DIR = ../lazar/models/
LAZAR_PREDICTIONS_DIR = ../lazar/predictions/
-LAZAR_MP2D_CONFUSION_MATRIX_DIR = $(LAZAR_MODELS_DIR)mutagenicity-mp2d/crossvalidation/confusion-matrices/
-LAZAR_CDK_CONFUSION_MATRIX_DIR = $(LAZAR_MODELS_DIR)mutagenicity-cdk/crossvalidation/confusion-matrices/
-TENSORFLOW_MP2D_CV_DIR = $(CV_DIR)mp2d/tensorflow/
-TENSORFLOW_CDK_CV_DIR = $(CV_DIR)cdk/tensorflow/
+TENSORFLOW_CV_DIR = $(CV_DIR)/tensorflow/
CONFUSION_MATRICES_DIR = $(CV_DIR)confusion-matrices/
-LAZAR_CONFUSION_MATRICES = $(addprefix $(CONFUSION_MATRICES_DIR), \
- lazar-mp2d-all.csv \
- lazar-mp2d-high-confidence.csv \
- lazar-cdk-all.csv \
- lazar-cdk-high-confidence.csv \
-)
-
-TENSORFLOW_MP2D_CONFUSION_MATRICES = \
- tensorflow-rf-mp2d.csv \
- tensorflow-lr-mp2d.csv \
- tensorflow-lr2-mp2d.csv \
- tensorflow-nn-mp2d.csv \
- tensorflow-svm-mp2d.csv
+CV_FILES = lazar-all.csv lazar-high-confidence.csv rf.csv lr.csv lr2.csv nn.csv svm.csv
-TENSORFLOW_CDK_CONFUSION_MATRICES = \
- tensorflow-rf-cdk.csv \
- tensorflow-lr-cdk.csv \
- tensorflow-lr2-cdk.csv \
- tensorflow-nn-cdk.csv \
- tensorflow-svm-cdk.csv
+CONFUSION_MATRICES = $(addprefix $(CONFUSION_MATRICES_DIR)mp2d/, $(CV_FILES)) $(addprefix $(CONFUSION_MATRICES_DIR)cdk/, $(CV_FILES))
-TENSORFLOW_CONFUSION_MATRICES = $(addprefix $(CONFUSION_MATRICES_DIR), $(TENSORFLOW_MP2D_CONFUSION_MATRICES) $(TENSORFLOW_CDK_CONFUSION_MATRICES))
-
-CONFUSION_MATRICES = $(LAZAR_CONFUSION_MATRICES) $(TENSORFLOW_CONFUSION_MATRICES)
+CV_PREDICTIONS = $(addprefix $(CV_PREDICTIONS_DIR)mp2d/, $(CV_FILES)) $(addprefix $(CV_PREDICTIONS_DIR)cdk/, $(CV_FILES))
CV_SUMMARY = $(CV_DIR)summary.yaml
# PA predictions
PA_DIR = pyrrolizidine-alkaloids/
-
-PA_MP2D_DIR = $(PA_DIR)mp2d/
-PA_CDK_DIR = $(PA_DIR)cdk/
+TENSORFLOW_PA_DIR = $(PA_DIR)tensorflow/
PA_MP2D_LAZAR_DIR = $(PA_MP2D_DIR)lazar/
PA_CDK_LAZAR_DIR = $(PA_CDK_DIR)lazar/
-PA_MP2D_TENSORFLOW_DIR = $(PA_MP2D_DIR)tensorflow/
-PA_CDK_TENSORFLOW_DIR = $(PA_CDK_DIR)tensorflow/
-
-PA_MP2D_TENSORFLOW_PREDICTIONS = $(addprefix $(PA_MP2D_TENSORFLOW_DIR), \
- pred.lr2.v5-ext-ext-Padel-2D.csv \
- pred.lr.v5-ext-ext-Padel-2D.csv \
- pred.nn.v5-ext-ext-Padel-2D.csv \
- pred.rf.v5-ext-ext-Padel-2D.csv \
- pred.svm.v5-ext-ext-Padel-2D.csv \
-)
-
-PA_CDK_TENSORFLOW_PREDICTIONS = $(addprefix $(PA_CDK_TENSORFLOW_DIR), \
- pred.lr2.v5-ext-Padel-2D.csv \
- pred.lr.v5-ext-Padel-2D.csv \
- pred.nn.v5-ext-Padel-2D.csv \
- pred.rf.v5-ext-Padel-2D.csv \
- pred.svm.v5-ext-Padel-2D.csv \
-)
-
-PA_PREDICTIONS = \
- $(PA_MP2D_LAZAR_DIR)predictions\
- $(PA_CDK_LAZAR_DIR)predictions \
- $(PA_MP2D_TENSORFLOW_PREDICTIONS) \
- $(PA_CDK_TENSORFLOW_PREDICTIONS)
+PA_PREDICTIONS = $(addprefix $(PA_DIR)mp2d/, $(CV_FILES)) $(addprefix $(PA_DIR)cdk/, $(CV_FILES))
PA_SUMMARY = $(PA_DIR)summary.yaml
# manuscript
-TABLES = $(addprefix tables/, \
- lazar-summary.csv \
- tensorflow-summary.csv \
- pa-tab.tex \
-)
-
PA_FIGURES = $(addprefix figures/, \
Dehydropyrrolizidine.png \
Diester.png \
@@ -110,131 +59,135 @@ PA_FIGURES = $(addprefix figures/, \
Tertiary.PA.png \
)
-FIGURES = figures/roc.png figures/tsne-mp2d.png figures/tsne-cdk.png $(PA_FIGURES)
+FIGURES = figures/roc.png figures/tsne-mp2d-mutagenicity.png figures/tsne-cdk-mutagenicity.png $(PA_FIGURES)
DATA = data.yaml
-all: mutagenicity.pdf $(PA_DIR)pa-predictions.pdf
+all: mutagenicity.pdf $(PA_DIR)pa-predictions.pdf $(CV_PREDICTIONS) $(CONFUSION_MATRICES) $(PA_PREDICTIONS)
include $(PANDOC_SCHOLAR_PATH)/Makefile
-mutagenicity.mustache.md: $(DATA) mutagenicity.md $(TABLES) $(FIGURES)
+mutagenicity.mustache.md: $(DATA) mutagenicity.md $(FIGURES)
mustache $^ > $@
-$(PA_DIR)pa-predictions.pdf: $(PA_DIR)pa-predictions.tex
- pdflatex -output-directory $(PA_DIR) $(PA_DIR)pa-predictions.tex
+# manuscript data
+$(DATA): $(PA_SUMMARY) $(CV_SUMMARY) mutagenicity/mutagenicity.csv mutagenicity/mutagenicity-cdk.csv $(PA_DIR)pa-predictions.csv
+ scripts/data.rb $^ > $@
# figures
-figures/tsne-cdk.png: figures/tsne-cdk.csv
- Rscript scripts/tsne-cdk.R
+## tsne
+
+figures/tsne-%-mutagenicity.png: figures/tsne-%-coordinates.csv figures/tsne-%-mutagenicity.csv
+ Rscript scripts/tsne-mutagenicity.R $^ $@
+
+### factors
+
+figures/tsne-%-mutagenicity.csv: figures/tsne-%-coordinates.csv mutagenicity/mutagenicity.csv
+ scripts/tsne-mutagenicity.rb $^ > $@
+
+### coordinates
+
+figures/tsne-cdk-coordinates.csv: figures/tsne-cdk-descriptors.csv
+ Rscript scripts/tsne-cdk-coordinates.R $< $@
-figures/tsne-cdk.csv: mutagenicity/cdk/mutagenicity-mod-2.new.csv pyrrolizidine-alkaloids/cdk/PA-Padel-2D_m2.csv
- scripts/cdk-descriptors.rb $^ > $@
+figures/tsne-mp2d-coordinates.csv: figures/tsne-mp2d-distances.csv
+ Rscript scripts/tsne-mp2d-coordinates.R $< $@
+
+### input
-figures/tsne-mp2d.png: figures/tsne-mp2d.csv
- Rscript scripts/tsne-mp2d.R
+figures/tsne-cdk-descriptors.csv: mutagenicity/mutagenicity-cdk.csv pyrrolizidine-alkaloids/pa-cdk.csv
+ scripts/tsne-cdk-descriptors.rb $^ > $@
-figures/tsne-mp2d.csv: $(LAZAR_MODELS_DIR)mutagenicity-mp2d/independent-variables
- scripts/mp2d-distances.rb > figures/tsne-mp2d.csv
+figures/tsne-mp2d-distances.csv: mutagenicity/mutagenicity-mp2d pyrrolizidine-alkaloids/pa-mp2d
+ scripts/tsne-mp2d-distances.rb $^ > $@
+
+## roc
figures/roc.png: figures/roc.csv
Rscript scripts/roc.R
figures/roc.csv: $(CV_SUMMARY)
- scripts/summary2roc.rb $< > $@
-
-$(PA_FIGURES): $(PA_DIR)pa-predictions.csv
- scripts/pa-groups.R
-
-# tables
-
-tables/pa-summary.csv: $(PA_SUMMARY)
- scripts/pa-summary-table.rb $< > $@
-
-$(PA_DIR)pa-predictions.tex: $(PA_DIR)pa-predictions.csv
- scripts/pa-predictions-latex.rb $< > $@
-
-$(PA_DIR)a-predictions.csv: $(PA_PREDICTIONS)
- scripts/pa-table.rb > $@
+ scripts/roc.rb $< > $@
-tables/lazar-summary.csv: $(CV_SUMMARY)
- scripts/summary2table.rb lazar > $@
+## pa predictions per group
-tables/tensorflow-summary.csv: $(CV_SUMMARY)
- scripts/summary2table.rb tensorflow > $@
+$(PA_FIGURES): $(PA_DIR)pa-groups.csv $(PA_DIR)pa-predictions.csv
+ scripts/pa-groups.R $^
-$(DATA): $(PA_SUMMARY) $(CV_SUMMARY) mutagenicity/mutagenicity.csv mutagenicity/cdk/mutagenicity-mod-2.new.csv $(PA_DIR)pa-predictions.csv
- scripts/data.rb $^ > $@
+# PA predictions
-# PA summary
+## summary
$(PA_SUMMARY): $(PA_DIR)pa-predictions.csv
scripts/pa-summary.rb $< > $@
-# PA lazar predictions
+## pdf table
+
+$(PA_DIR)pa-predictions.pdf: $(PA_DIR)pa-predictions.tex
+ pdflatex -output-directory $(PA_DIR) $(PA_DIR)pa-predictions.tex
+
+$(PA_DIR)pa-predictions.tex: $(PA_DIR)/pa-groups.csv $(PA_DIR)pa-predictions.csv
+ scripts/pa-predictions-latex.rb $^ > $@
-$(PA_DIR)pa-canonical-smiles: $(PA_DIR)180920_PA_complete_SMILES.csv
- scripts/pa-smiles.rb $< | obabel -ismi - -ocan | tr -d "\t" > $@
+## table
-$(PA_CDK_LAZAR_DIR)predictions: $(LAZAR_PREDICTIONS_DIR)pa-cdk/predictions
- cp $< $@
+$(PA_DIR)pa-predictions.csv: $(PA_PREDICTIONS)
+ scripts/pa-predictions.rb $^ > $@
-$(PA_MP2D_LAZAR_DIR)predictions: $(LAZAR_PREDICTIONS_DIR)pa-mp2d/predictions
- cp $< $@
+## predictions
-# crossvalidation summary
+$(PA_DIR)%/lazar-all.csv: $(LAZAR_PREDICTIONS_DIR)pa-%/predictions
+ scripts/lazar-pa-predictions.rb $< > $@
-$(CV_SUMMARY): $(CONFUSION_MATRICES)
- scripts/confusion-matrix-summary.rb $^ > $@
+$(PA_DIR)%/lazar-high-confidence.csv: $(LAZAR_PREDICTIONS_DIR)pa-%/predictions
+ scripts/lazar-pa-predictions.rb $< 0.5 > $@
-# confusion matrices
+$(PA_DIR)mp2d/%.csv: $(TENSORFLOW_PA_DIR)pred.%.v5-ext-ext-Padel-2D.csv $(PA_DIR)pa-cids.csv
+ scripts/tensorflow-pa-predictions.rb $^ > $@
-## lazar
+$(PA_DIR)cdk/%.csv: $(TENSORFLOW_PA_DIR)pred.%.v5-ext-Padel-2D.csv $(PA_DIR)pa-cids.csv
+ scripts/tensorflow-pa-predictions.rb $^ > $@
-$(CONFUSION_MATRICES_DIR)lazar-mp2d-all.csv: $(LAZAR_MP2D_CONFUSION_MATRIX_DIR)all
- cp $< $@
+## sanitize PA input data
-$(CONFUSION_MATRICES_DIR)lazar-mp2d-high-confidence.csv: $(LAZAR_MP2D_CONFUSION_MATRIX_DIR)high_confidence
- cp $< $@
+$(PA_DIR)pa-ids.csv $(PA_DIR)pa-cids.csv $(PA_DIR)pa-names.tsv $(PA_DIR)pa-groups.csv $(PA_DIR)pa-cdk.csv: $(PA_DIR)src/180920_PA_complete_SMILES.csv $(PA_DIR)src/pa-groups.original.csv $(PA_DIR)src/PA-Padel-2D_m2.csv
+ scripts/sanitize-pa-data.rb
-$(CONFUSION_MATRICES_DIR)lazar-cdk-all.csv: $(LAZAR_CDK_CONFUSION_MATRIX_DIR)all
- cp $< $@
+# crossvalidation
-$(CONFUSION_MATRICES_DIR)lazar-cdk-high-confidence.csv: $(LAZAR_CDK_CONFUSION_MATRIX_DIR)high_confidence
- cp $< $@
+## summary
-## tensorflow
+$(CV_SUMMARY): $(CONFUSION_MATRICES)
+ scripts/cv-summary.rb $^ > $@
-### mp2d
+## confusion matrices
-$(CONFUSION_MATRICES_DIR)tensorflow-lr-mp2d.csv: $(TENSORFLOW_MP2D_CV_DIR)pred.lr.v4_ext.csv
- scripts/cv-tensorflow-confusion-matrix.rb $< > $@
+$(CONFUSION_MATRICES_DIR)%: $(CV_PREDICTIONS_DIR)%
+ scripts/confusion-matrix.rb $< > $@
-$(CONFUSION_MATRICES_DIR)tensorflow-lr2-mp2d.csv: $(TENSORFLOW_MP2D_CV_DIR)pred.lr2.v4_ext.csv
- scripts/cv-tensorflow-confusion-matrix.rb $< > $@
+## predictions
-$(CONFUSION_MATRICES_DIR)tensorflow-nn-mp2d.csv: $(TENSORFLOW_MP2D_CV_DIR)pred.nn.v4_ext.csv
- scripts/cv-tensorflow-confusion-matrix.rb $< > $@
+$(CV_PREDICTIONS_DIR)%/lazar-all.csv: $(LAZAR_MODELS_DIR)mutagenicity-%
+ scripts/lazar-cv-predictions.rb $< > $@
-$(CONFUSION_MATRICES_DIR)tensorflow-rf-mp2d.csv: $(TENSORFLOW_MP2D_CV_DIR)pred.rf.v4_ext.csv
- scripts/cv-tensorflow-confusion-matrix.rb $< > $@
+$(CV_PREDICTIONS_DIR)%/lazar-high-confidence.csv: $(LAZAR_MODELS_DIR)mutagenicity-%
+ scripts/lazar-cv-predictions.rb $< 0.5 > $@
-$(CONFUSION_MATRICES_DIR)tensorflow-svm-mp2d.csv: $(TENSORFLOW_MP2D_CV_DIR)pred.svm.v4_ext.csv
- scripts/cv-tensorflow-confusion-matrix.rb $< > $@
+$(CV_PREDICTIONS_DIR)mp2d/%.csv: $(TENSORFLOW_CV_DIR)pred.%.v4_ext.csv
+ scripts/tensorflow-cv-predictions.rb $< > $@
-### cdk
+$(CV_PREDICTIONS_DIR)cdk/%.csv: $(TENSORFLOW_CV_DIR)pred.%.norm.v4.csv
+ scripts/tensorflow-cv-predictions.rb $< > $@
-$(CONFUSION_MATRICES_DIR)tensorflow-lr-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.lr.v4.norm.csv
- scripts/cv-tensorflow-confusion-matrix.rb $< > $@
+# cdk descriptors
-$(CONFUSION_MATRICES_DIR)tensorflow-lr2-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.lr2.norm.v4.csv
- scripts/cv-tensorflow-confusion-matrix.rb $< > $@
+mutagenicity/mutagenicity-cdk.csv: mutagenicity/src/mutagenicity-mod-2.new.csv
+ cut -f1,3- -d ',' $< > $@
-$(CONFUSION_MATRICES_DIR)tensorflow-nn-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.nn.v4.norm.csv
- scripts/cv-tensorflow-confusion-matrix.rb $< > $@
+# mp2d fingerprints
-$(CONFUSION_MATRICES_DIR)tensorflow-rf-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.rf.norm.v4.csv
- scripts/cv-tensorflow-confusion-matrix.rb $< > $@
+mutagenicity/mutagenicity-mp2d: mutagenicity/mutagenicity.csv
+ scripts/mp2d-fingerprints.rb $< > $@
-$(CONFUSION_MATRICES_DIR)tensorflow-svm-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.svm.norm.v4.csv
- scripts/cv-tensorflow-confusion-matrix.rb $< > $@
+$(PA_DIR)pa-mp2d: $(PA_DIR)pa-cids.csv
+ scripts/mp2d-fingerprints.rb $< > $@