summaryrefslogtreecommitdiff
path: root/Makefile
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2021-02-17 23:11:49 +0100
committerChristoph Helma <helma@in-silico.ch>2021-02-17 23:11:49 +0100
commit9d7b4aaff715e731ba81bf131dfaa9de5a9d0fdd (patch)
tree7a84e5a702a1e0cb8d10c8512da822b2b8c9a5c1 /Makefile
parent3bfc5bfb57e8f130b1b3d9a90fc34744278ef6b5 (diff)
cleanup, scripts adjusted, improved figures
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile53
1 files changed, 26 insertions, 27 deletions
diff --git a/Makefile b/Makefile
index 41128b3..5b97564 100644
--- a/Makefile
+++ b/Makefile
@@ -15,10 +15,12 @@ TEMPLATE_FILE_LATEX = pandoc-scholar.latex
# crossvalidations
-CV_DIR = 10-fold-crossvalidations/
+CV_DIR = crossvalidations/
+LAZAR_MODELS_DIR = ../lazar/models/
+LAZAR_PREDICTIONS_DIR = ../lazar/predictions/
-LAZAR_MP2D_CONFUSION_MATRIX_DIR = $(CV_DIR)mp2d/lazar/crossvalidation/confusion_matrices/
-LAZAR_CDK_CONFUSION_MATRIX_DIR = $(CV_DIR)cdk/lazar/crossvalidation/confusion_matrices/
+LAZAR_MP2D_CONFUSION_MATRIX_DIR = $(LAZAR_MODELS_DIR)mutagenicity-mp2d/crossvalidation/confusion-matrices/
+LAZAR_CDK_CONFUSION_MATRIX_DIR = $(LAZAR_MODELS_DIR)mutagenicity-cdk/crossvalidation/confusion-matrices/
TENSORFLOW_MP2D_CV_DIR = $(CV_DIR)mp2d/tensorflow/
TENSORFLOW_CDK_CV_DIR = $(CV_DIR)cdk/tensorflow/
@@ -54,7 +56,6 @@ CV_SUMMARY = $(CV_DIR)summary.yaml
# PA predictions
PA_DIR = pyrrolizidine-alkaloids/
-PA_DATA_DIR = data/pyrrolizidine-alkaloids/
PA_MP2D_DIR = $(PA_DIR)mp2d/
PA_CDK_DIR = $(PA_DIR)cdk/
@@ -82,8 +83,8 @@ PA_CDK_TENSORFLOW_PREDICTIONS = $(addprefix $(PA_CDK_TENSORFLOW_DIR), \
)
PA_PREDICTIONS = \
- $(PA_MP2D_LAZAR_DIR)pa-mp2d-predictions.csv \
- $(PA_CDK_LAZAR_DIR)pa-cdk-predictions.csv \
+ $(PA_MP2D_LAZAR_DIR)predictions\
+ $(PA_CDK_LAZAR_DIR)predictions \
$(PA_MP2D_TENSORFLOW_PREDICTIONS) \
$(PA_CDK_TENSORFLOW_PREDICTIONS)
@@ -95,13 +96,9 @@ TABLES = $(addprefix tables/, \
lazar-summary.csv \
tensorflow-summary.csv \
pa-tab.tex \
- pa-summary.csv \
)
-FIGURES = $(addprefix figures/, \
- roc.png \
- tsne-mp2d.png \
- tsne-cdk.png \
+PA_FIGURES = $(addprefix figures/, \
Dehydropyrrolizidine.png \
Diester.png \
Macrocyclic.diester.png \
@@ -113,6 +110,8 @@ FIGURES = $(addprefix figures/, \
Tertiary.PA.png \
)
+FIGURES = figures/roc.png figures/tsne-mp2d.png figures/tsne-cdk.png $(PA_FIGURES)
+
SUMMARY = summary.yaml
all: mutagenicity.pdf
@@ -126,15 +125,13 @@ mutagenicity.mustache.md: $(SUMMARY) mutagenicity.md $(TABLES) $(FIGURES)
figures/tsne-cdk.png: figures/tsne-cdk.csv
Rscript scripts/tsne-cdk.R
-# TODO: filtered CDK descriptors
-figures/tsne-cdk.csv: data/training/GenoTox-database.csv data/pyrrolizidine-alkaloids/PA-Padel-2D_m2.csv
+figures/tsne-cdk.csv: mutagenicity/cdk/mutagenicity-mod-2.new.csv pyrrolizidine-alkaloids/PA-Padel-2D_m2.csv
scripts/cdk-descriptors.rb $^ > $@
figures/tsne-mp2d.png: figures/tsne-mp2d.csv
Rscript scripts/tsne-mp2d.R
-# TODO: exported fingerprints
-figures/tsne-mp2d.csv: 10-fold-crossvalidations/mp2d/lazar/independent_variables
+figures/tsne-mp2d.csv: $(LAZAR_MODELS_DIR)mutagenicity-mp2d/independent-variables
scripts/mp2d-distances.rb > figures/tsne-mp2d.csv
figures/roc.png: figures/roc.csv
@@ -143,6 +140,9 @@ figures/roc.png: figures/roc.csv
figures/roc.csv: $(CV_SUMMARY)
scripts/summary2roc.rb $< > $@
+$(PA_FIGURES): tables/pa-table.csv
+ scripts/pa-groups.R
+
# tables
tables/pa-summary.csv: $(PA_SUMMARY)
@@ -170,15 +170,14 @@ $(PA_SUMMARY): tables/pa-table.csv
# PA lazar predictions
-# TODO move to script dir, generate independent_variables from data
-$(PA_CDK_LAZAR_DIR)pa-cdk-predictions.csv: $(PA_CDK_LAZAR_DIR)pa_independent_variables
- ../lazar/bin/batch_padel_classification.rb 10-fold-crossvalidations/lazar-pa-cdk $< > $@
+$(PA_DIR)pa-canonical-smiles: $(PA_DIR)180920_PA_complete_SMILES.csv
+ scripts/pa-smiles.rb $< | obabel -ismi - -ocan | tr -d "\t" > $@
-$(PA_MP2D_LAZAR_DIR)pa-mp2d-predictions.csv: $(PA_DATA_DIR)pa-smiles.csv
- ../lazar/bin/batch_fingerprint_classification.rb ../lazar/models/mutagenicity $< > $@
+$(PA_CDK_LAZAR_DIR)predictions: $(LAZAR_PREDICTIONS_DIR)pa-cdk/predictions
+ cp $< $@
-$(PA_DATA_DIR)pa-smiles.csv: $(PA_DATA_DIR)180920_PA_complete_SMILES.csv
- cut -f1,4 -d ';' $< | sed 's/;/,/' > $@
+$(PA_MP2D_LAZAR_DIR)predictions: $(LAZAR_PREDICTIONS_DIR)pa-mp2d/predictions
+ cp $< $@
# crossvalidation summary
@@ -222,17 +221,17 @@ $(CONFUSION_MATRICES_DIR)tensorflow-svm-mp2d.csv: $(TENSORFLOW_MP2D_CV_DIR)pred.
### cdk
-$(CONFUSION_MATRICES_DIR)tensorflow-lr-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.lr.v4.csv
+$(CONFUSION_MATRICES_DIR)tensorflow-lr-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.lr.v4.norm.csv
scripts/cv-tensorflow-confusion-matrix.rb $< > $@
-$(CONFUSION_MATRICES_DIR)tensorflow-lr2-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.lr2.v4.csv
+$(CONFUSION_MATRICES_DIR)tensorflow-lr2-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.lr2.norm.v4.csv
scripts/cv-tensorflow-confusion-matrix.rb $< > $@
-$(CONFUSION_MATRICES_DIR)tensorflow-nn-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.nn.v4.csv
+$(CONFUSION_MATRICES_DIR)tensorflow-nn-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.nn.v4.norm.csv
scripts/cv-tensorflow-confusion-matrix.rb $< > $@
-$(CONFUSION_MATRICES_DIR)tensorflow-rf-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.rf.v4.csv
+$(CONFUSION_MATRICES_DIR)tensorflow-rf-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.rf.norm.v4.csv
scripts/cv-tensorflow-confusion-matrix.rb $< > $@
-$(CONFUSION_MATRICES_DIR)tensorflow-svm-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.svm.v4.csv
+$(CONFUSION_MATRICES_DIR)tensorflow-svm-cdk.csv: $(TENSORFLOW_CDK_CV_DIR)pred.svm.norm.v4.csv
scripts/cv-tensorflow-confusion-matrix.rb $< > $@