summaryrefslogtreecommitdiff
path: root/Makefile
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2020-10-10 17:05:41 +0200
committerChristoph Helma <helma@in-silico.ch>2020-10-10 17:05:41 +0200
commite451d812f3b63d1987c8f1e7f5557156fdab984f (patch)
treef5b4e1730f0b75593925b3287d3a37fa70fa507e /Makefile
parent23ce84a7da69104fa763d5a3911b7b0ad98fbdbc (diff)
Makefile and scripts cleanup; lazar, R and tensorflow tables
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile132
1 files changed, 42 insertions, 90 deletions
diff --git a/Makefile b/Makefile
index 8b39538..b2a4708 100644
--- a/Makefile
+++ b/Makefile
@@ -7,122 +7,69 @@ ARTICLE_FILE = mutagenicity.mustache.md
PANDOC_SCHOLAR_PATH = pandoc-scholar
OUTFILE_PREFIX = mutagenicity
DEFAULT_EXTENSIONS = pdf #latex docx html #odt epub
-#PANDOC_WRITER_OPTIONS = --filter=panpipe --filter=pandoc-placetable --filter=pandoc-citeproc -M tmpvar=test
PANDOC_WRITER_OPTIONS = --filter=pandoc-placetable --filter=pandoc-crossref --filter=pandoc-citeproc
TEMPLATE_FILE_LATEX = pandoc-scholar.latex
-# Lazar
-
-LAZAR_DIR = ../lazar
-LAZAR_MODEL_DIR = $(LAZAR_DIR)/models/mutagenicity
-LAZAR_PADEL_MODEL_DIR = $(LAZAR_DIR)/models/mutagenicity-padel
-LAZAR_SUMMARY_DIR = $(LAZAR_MODEL_DIR)/crossvalidation/summaries
-LAZAR_PADEL_SUMMARY_DIR = $(LAZAR_PADEL_MODEL_DIR)/crossvalidation/summaries
-LAZAR_CONFUSION_MATRIX_DIR = $(LAZAR_MODEL_DIR)/crossvalidation/confusion_matrices
-LAZAR_PADEL_CONFUSION_MATRIX_DIR = $(LAZAR_PADEL_MODEL_DIR)/crossvalidation/confusion_matrices
-
# Experiments
-SUMMARIES_DIR = 10-fold-crossvalidations/summaries
-CONFUSION_MATRICES_DIR = 10-fold-crossvalidations/confusion-matrices
+LAZAR_CONFUSION_MATRIX_DIR = 10-fold-crossvalidations/lazar/crossvalidation/confusion_matrices
+LAZAR_PADEL_CONFUSION_MATRIX_DIR = 10-fold-crossvalidations/lazar-padel/crossvalidation/confusion_matrices
R_CV_DIR = 10-fold-crossvalidations/R
TENSORFLOW_CV_DIR = 10-fold-crossvalidations/tensorflow
-#TABLES = tables/r-summary.csv tables/tf-summary.csv tables/lazar-summary.csv tables/R-SVM.csv tables/R-RF.csv tables/R-DL.csv tables/tensorflow-all.csv tables/tensorflow-selected.csv tables/lazar-all.csv tables/lazar-high-confidence.csv tables/lazar-padel-all.csv tables/lazar-padel-high-confidence.csv
-TABLES = tables/R-SVM.csv tables/R-RF.csv tables/R-DL.csv tables/tensorflow-all.csv tables/tensorflow-selected.csv tables/lazar-all.csv tables/lazar-high-confidence.csv tables/lazar-padel-all.csv tables/lazar-padel-high-confidence.csv #tables/pred.rf.v3.csv tables/pred.lr.v3.csv tables/pred.lr2.v3.csv tables/pred.nn.v3.csv
-
-R_SUMMARIES = $(SUMMARIES_DIR)/R-SVM.json $(SUMMARIES_DIR)/R-RF.json $(SUMMARIES_DIR)/R-DL.json
-TF_SUMMARIES = $(SUMMARIES_DIR)/tensorflow-all.json $(SUMMARIES_DIR)/tensorflow-selected.json $(SUMMARIES_DIR)/pred.lr.v3.json $(SUMMARIES_DIR)/pred.lr2.v3.json $(SUMMARIES_DIR)/pred.nn.v3.json $(SUMMARIES_DIR)/pred.rf.v3.json
-LAZAR_SUMMARIES = $(SUMMARIES_DIR)/lazar-all.json $(SUMMARIES_DIR)/lazar-high-confidence.json $(SUMMARIES_DIR)/lazar-padel-all.json $(SUMMARIES_DIR)/lazar-padel-high-confidence.json
+CONFUSION_MATRICES_DIR = 10-fold-crossvalidations/confusion-matrices
+CONFUSION_MATRICES = $(CONFUSION_MATRICES_DIR)/R-SVM.csv $(CONFUSION_MATRICES_DIR)/R-RF.csv $(CONFUSION_MATRICES_DIR)/R-DL.csv $(CONFUSION_MATRICES_DIR)/lazar-all.csv $(CONFUSION_MATRICES_DIR)/lazar-high-confidence.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-all.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-high-confidence.csv $(CONFUSION_MATRICES_DIR)/tensorflow-lr.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-lr2.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-nn.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-rf.v3.csv
-#SUMMARIES = $(R_SUMMARIES) $(TF_SUMMARIES) $(LAZAR_SUMMARIES)
+CV_SUMMARY = 10-fold-crossvalidations/summary.yaml
+TABLES = tables/lazar-summary.csv tables/r-summary.csv tables/tensorflow-summary.csv
+FIGURES = figures/roc.png figures/tsne-mp2d.png figures/tsne-padel.png #figures/pa-predictions.png
-CONFUSION_MATRICES = $(CONFUSION_MATRICES_DIR)/R-SVM.csv $(CONFUSION_MATRICES_DIR)/R-RF.csv $(CONFUSION_MATRICES_DIR)/R-DL.csv $(CONFUSION_MATRICES_DIR)/tensorflow-all.csv $(CONFUSION_MATRICES_DIR)/tensorflow-selected.csv $(CONFUSION_MATRICES_DIR)/lazar-all.csv $(CONFUSION_MATRICES_DIR)/lazar-high-confidence.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-all.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-high-confidence.csv $(CONFUSION_MATRICES_DIR)/tensorflow-lr.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-lr2.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-nn.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-rf.v3.csv
-DATA = data/mutagenicity.sdf data/mutagenicity.csv data/mutagenicity-fingerprints.csv
-FIGURES = figures/roc.png figures/tsne-mp2d.png figures/tsne-padel.png
+# manuscript
-all: $(DATA) $(TABLES) $(FIGURES) mutagenicity.pdf
+all: $(TABLES) $(FIGURES) mutagenicity.pdf
include $(PANDOC_SCHOLAR_PATH)/Makefile
-export: $(DATA)
-mutagenicity.mustache.md: results.yaml mutagenicity.md $(FIGURES)
+mutagenicity.mustache.md: $(CV_SUMMARY) mutagenicity.md $(TABLES) $(FIGURES)
mustache $^ > $@
# figures
+
figures/tsne-padel.png: figures/tsne-padel.csv
- scripts/padel-tsne.R
+ scripts/tsne-padel.R
figures/tsne-padel.csv: data/GenoTox-database.csv pyrrolizidine-alkaloids/PA-Padel-2D_m2.csv
scripts/padel-descriptors.rb $^ > $@
figures/tsne-mp2d.png: figures/tsne-mp2d.csv
- scripts/mp2d-tsne.R
+ scripts/tsne-mp2d.R
-figures/tsne-mp2d.csv: ../lazar/models/mutagenicity/independent_variables
+figures/tsne-mp2d.csv: 10-fold-crossvalidations/lazar/independent_variables
scripts/mp2d-distances.rb > figures/tsne-mp2d.csv
-figures/roc.png: figures/results.csv
+figures/roc.png: figures/roc.csv
scripts/roc.R
-figures/results.csv: results.yaml
+figures/roc.csv: $(CV_SUMMARY)
scripts/results2csv.rb $< > $@
# tables
-tables/r-summary.csv: $(R_SUMMARIES)
- scripts/summaries2table.rb $^ > $@
-
-tables/tf-summary.csv: $(TF_SUMMARIES)
- scripts/summaries2table.rb $^ > $@
+tables/lazar-summary.csv: $(CV_SUMMARY)
+ scripts/summaries2table.rb lazar > $@
-tables/lazar-summary.csv: $(LAZAR_SUMMARIES)
- scripts/summaries2table.rb $^ > $@
+tables/r-summary.csv: $(CV_SUMMARY)
+ scripts/summaries2table.rb R > $@
-tables/%.csv: $(CONFUSION_MATRICES_DIR)/%.csv
- scripts/confusion-matrix2table.rb $< > $@
+tables/tensorflow-summary.csv: $(CV_SUMMARY)
+ scripts/summaries2table.rb tensorflow > $@
-# summaries
+# crossvalidation summary
-#$(SUMMARIES_DIR)/results.json: $(SUMMARIES)
- #scripts/results.rb $^ > $@
-
-#$(SUMMARIES_DIR)/%.json: $(CONFUSION_MATRICES_DIR)/%.csv
- #scripts/confusion-matrix-summary.rb $< > $@
-
-results.yaml: $(CONFUSION_MATRICES)
+$(CV_SUMMARY): $(CONFUSION_MATRICES)
scripts/confusion-matrix-summary.rb $^ > $@
# confusion matrices
-## tensorflow
-$(CONFUSION_MATRICES_DIR)/tensorflow-selected.csv: $(TENSORFLOW_CV_DIR)/pred.sorted.csv
- scripts/cv-tensorflow-confusion-matrix.rb $< > $@
-
-$(CONFUSION_MATRICES_DIR)/tensorflow-all.csv: $(TENSORFLOW_CV_DIR)/pred_ext.sorted.csv
- scripts/cv-tensorflow-confusion-matrix.rb $< > $@
-
-$(CONFUSION_MATRICES_DIR)/tensorflow-lr.v3.csv: $(TENSORFLOW_CV_DIR)/pred.lr.v3.sorted.csv
- scripts/cv-tensorflow-confusion-matrix.rb $< > $@
-
-$(CONFUSION_MATRICES_DIR)/tensorflow-lr2.v3.csv: $(TENSORFLOW_CV_DIR)/pred.lr2.v3.sorted.csv
- scripts/cv-tensorflow-confusion-matrix.rb $< > $@
-
-$(CONFUSION_MATRICES_DIR)/tensorflow-nn.v3.csv: $(TENSORFLOW_CV_DIR)/pred.nn.v3.sorted.csv
- scripts/cv-tensorflow-confusion-matrix.rb $< > $@
-
-$(CONFUSION_MATRICES_DIR)/tensorflow-rf.v3.csv: $(TENSORFLOW_CV_DIR)/pred.rf.v3.sorted.csv
- scripts/cv-tensorflow-confusion-matrix.rb $< > $@
-
-## R
-$(CONFUSION_MATRICES_DIR)/R-SVM.csv: $(R_CV_DIR)/Sgl-Observations-SVM.csv
- scripts/cv-r-confusion-matrix.rb $< > $@
-
-$(CONFUSION_MATRICES_DIR)/R-RF.csv: $(R_CV_DIR)/Sgl-Observations-RF.csv
- scripts/cv-r-confusion-matrix.rb $< > $@
-
-$(CONFUSION_MATRICES_DIR)/R-DL.csv: $(R_CV_DIR)/Sgl-Observations-DL.csv
- scripts/cv-r-confusion-matrix.rb $< > $@
-
## lazar
$(CONFUSION_MATRICES_DIR)/lazar-all.csv: $(LAZAR_CONFUSION_MATRIX_DIR)
cp $</all $@
@@ -136,21 +83,26 @@ $(CONFUSION_MATRICES_DIR)/lazar-padel-all.csv: $(LAZAR_PADEL_CONFUSION_MATRIX_DI
$(CONFUSION_MATRICES_DIR)/lazar-padel-high-confidence.csv: $(LAZAR_PADEL_CONFUSION_MATRIX_DIR)
cp $</high_confidence $@
-# exports
+## R
+$(CONFUSION_MATRICES_DIR)/R-SVM.csv: $(R_CV_DIR)/Sgl-Observations-SVM.csv
+ scripts/cv-r-confusion-matrix.rb $< > $@
-data/mutagenicity-fingerprints.csv: $(LAZAR_DIR)/models/mutagenicity
- $(LAZAR_DIR)/bin/export-fingerprints.rb $< > $@
+$(CONFUSION_MATRICES_DIR)/R-RF.csv: $(R_CV_DIR)/Sgl-Observations-RF.csv
+ scripts/cv-r-confusion-matrix.rb $< > $@
-data/mutagenicity.csv: $(LAZAR_DIR)/models/mutagenicity/Mutagenicity-Salmonella_typhimurium.csv
- cp $< > $@
+$(CONFUSION_MATRICES_DIR)/R-DL.csv: $(R_CV_DIR)/Sgl-Observations-DL.csv
+ scripts/cv-r-confusion-matrix.rb $< > $@
-data/mutagenicity.sdf: $(LAZAR_DIR)/models/mutagenicity/Mutagenicity-Salmonella_typhimurium.csv
- $(LAZAR_DIR)/bin/export-sdf.rb $< > $@
-
-# lazar models and crossvalidations
+## tensorflow
+$(CONFUSION_MATRICES_DIR)/tensorflow-lr.v3.csv: $(TENSORFLOW_CV_DIR)/pred.lr.v3.sorted.csv
+ scripts/cv-tensorflow-confusion-matrix.rb $< > $@
-$(LAZAR_SUMMARY_DIR):
- make -C $(LAZAR_MODEL_DIR)
+$(CONFUSION_MATRICES_DIR)/tensorflow-lr2.v3.csv: $(TENSORFLOW_CV_DIR)/pred.lr2.v3.sorted.csv
+ scripts/cv-tensorflow-confusion-matrix.rb $< > $@
+
+$(CONFUSION_MATRICES_DIR)/tensorflow-nn.v3.csv: $(TENSORFLOW_CV_DIR)/pred.nn.v3.sorted.csv
+ scripts/cv-tensorflow-confusion-matrix.rb $< > $@
+
+$(CONFUSION_MATRICES_DIR)/tensorflow-rf.v3.csv: $(TENSORFLOW_CV_DIR)/pred.rf.v3.sorted.csv
+ scripts/cv-tensorflow-confusion-matrix.rb $< > $@
-$(LAZAR_PADEL_SUMMARY_DIR):
- make -C $(LAZAR_PADEL_MODEL_DIR)