diff options
author | Christoph Helma <helma@in-silico.ch> | 2020-10-10 17:05:41 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2020-10-10 17:05:41 +0200 |
commit | e451d812f3b63d1987c8f1e7f5557156fdab984f (patch) | |
tree | f5b4e1730f0b75593925b3287d3a37fa70fa507e /Makefile | |
parent | 23ce84a7da69104fa763d5a3911b7b0ad98fbdbc (diff) |
Makefile and scripts cleanup; lazar, R and tensorflow tables
Diffstat (limited to 'Makefile')
-rw-r--r-- | Makefile | 132 |
1 files changed, 42 insertions, 90 deletions
@@ -7,122 +7,69 @@ ARTICLE_FILE = mutagenicity.mustache.md PANDOC_SCHOLAR_PATH = pandoc-scholar OUTFILE_PREFIX = mutagenicity DEFAULT_EXTENSIONS = pdf #latex docx html #odt epub -#PANDOC_WRITER_OPTIONS = --filter=panpipe --filter=pandoc-placetable --filter=pandoc-citeproc -M tmpvar=test PANDOC_WRITER_OPTIONS = --filter=pandoc-placetable --filter=pandoc-crossref --filter=pandoc-citeproc TEMPLATE_FILE_LATEX = pandoc-scholar.latex -# Lazar - -LAZAR_DIR = ../lazar -LAZAR_MODEL_DIR = $(LAZAR_DIR)/models/mutagenicity -LAZAR_PADEL_MODEL_DIR = $(LAZAR_DIR)/models/mutagenicity-padel -LAZAR_SUMMARY_DIR = $(LAZAR_MODEL_DIR)/crossvalidation/summaries -LAZAR_PADEL_SUMMARY_DIR = $(LAZAR_PADEL_MODEL_DIR)/crossvalidation/summaries -LAZAR_CONFUSION_MATRIX_DIR = $(LAZAR_MODEL_DIR)/crossvalidation/confusion_matrices -LAZAR_PADEL_CONFUSION_MATRIX_DIR = $(LAZAR_PADEL_MODEL_DIR)/crossvalidation/confusion_matrices - # Experiments -SUMMARIES_DIR = 10-fold-crossvalidations/summaries -CONFUSION_MATRICES_DIR = 10-fold-crossvalidations/confusion-matrices +LAZAR_CONFUSION_MATRIX_DIR = 10-fold-crossvalidations/lazar/crossvalidation/confusion_matrices +LAZAR_PADEL_CONFUSION_MATRIX_DIR = 10-fold-crossvalidations/lazar-padel/crossvalidation/confusion_matrices R_CV_DIR = 10-fold-crossvalidations/R TENSORFLOW_CV_DIR = 10-fold-crossvalidations/tensorflow -#TABLES = tables/r-summary.csv tables/tf-summary.csv tables/lazar-summary.csv tables/R-SVM.csv tables/R-RF.csv tables/R-DL.csv tables/tensorflow-all.csv tables/tensorflow-selected.csv tables/lazar-all.csv tables/lazar-high-confidence.csv tables/lazar-padel-all.csv tables/lazar-padel-high-confidence.csv -TABLES = tables/R-SVM.csv tables/R-RF.csv tables/R-DL.csv tables/tensorflow-all.csv tables/tensorflow-selected.csv tables/lazar-all.csv tables/lazar-high-confidence.csv tables/lazar-padel-all.csv tables/lazar-padel-high-confidence.csv #tables/pred.rf.v3.csv tables/pred.lr.v3.csv tables/pred.lr2.v3.csv tables/pred.nn.v3.csv - -R_SUMMARIES = $(SUMMARIES_DIR)/R-SVM.json $(SUMMARIES_DIR)/R-RF.json $(SUMMARIES_DIR)/R-DL.json -TF_SUMMARIES = $(SUMMARIES_DIR)/tensorflow-all.json $(SUMMARIES_DIR)/tensorflow-selected.json $(SUMMARIES_DIR)/pred.lr.v3.json $(SUMMARIES_DIR)/pred.lr2.v3.json $(SUMMARIES_DIR)/pred.nn.v3.json $(SUMMARIES_DIR)/pred.rf.v3.json -LAZAR_SUMMARIES = $(SUMMARIES_DIR)/lazar-all.json $(SUMMARIES_DIR)/lazar-high-confidence.json $(SUMMARIES_DIR)/lazar-padel-all.json $(SUMMARIES_DIR)/lazar-padel-high-confidence.json +CONFUSION_MATRICES_DIR = 10-fold-crossvalidations/confusion-matrices +CONFUSION_MATRICES = $(CONFUSION_MATRICES_DIR)/R-SVM.csv $(CONFUSION_MATRICES_DIR)/R-RF.csv $(CONFUSION_MATRICES_DIR)/R-DL.csv $(CONFUSION_MATRICES_DIR)/lazar-all.csv $(CONFUSION_MATRICES_DIR)/lazar-high-confidence.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-all.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-high-confidence.csv $(CONFUSION_MATRICES_DIR)/tensorflow-lr.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-lr2.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-nn.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-rf.v3.csv -#SUMMARIES = $(R_SUMMARIES) $(TF_SUMMARIES) $(LAZAR_SUMMARIES) +CV_SUMMARY = 10-fold-crossvalidations/summary.yaml +TABLES = tables/lazar-summary.csv tables/r-summary.csv tables/tensorflow-summary.csv +FIGURES = figures/roc.png figures/tsne-mp2d.png figures/tsne-padel.png #figures/pa-predictions.png -CONFUSION_MATRICES = $(CONFUSION_MATRICES_DIR)/R-SVM.csv $(CONFUSION_MATRICES_DIR)/R-RF.csv $(CONFUSION_MATRICES_DIR)/R-DL.csv $(CONFUSION_MATRICES_DIR)/tensorflow-all.csv $(CONFUSION_MATRICES_DIR)/tensorflow-selected.csv $(CONFUSION_MATRICES_DIR)/lazar-all.csv $(CONFUSION_MATRICES_DIR)/lazar-high-confidence.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-all.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-high-confidence.csv $(CONFUSION_MATRICES_DIR)/tensorflow-lr.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-lr2.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-nn.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-rf.v3.csv -DATA = data/mutagenicity.sdf data/mutagenicity.csv data/mutagenicity-fingerprints.csv -FIGURES = figures/roc.png figures/tsne-mp2d.png figures/tsne-padel.png +# manuscript -all: $(DATA) $(TABLES) $(FIGURES) mutagenicity.pdf +all: $(TABLES) $(FIGURES) mutagenicity.pdf include $(PANDOC_SCHOLAR_PATH)/Makefile -export: $(DATA) -mutagenicity.mustache.md: results.yaml mutagenicity.md $(FIGURES) +mutagenicity.mustache.md: $(CV_SUMMARY) mutagenicity.md $(TABLES) $(FIGURES) mustache $^ > $@ # figures + figures/tsne-padel.png: figures/tsne-padel.csv - scripts/padel-tsne.R + scripts/tsne-padel.R figures/tsne-padel.csv: data/GenoTox-database.csv pyrrolizidine-alkaloids/PA-Padel-2D_m2.csv scripts/padel-descriptors.rb $^ > $@ figures/tsne-mp2d.png: figures/tsne-mp2d.csv - scripts/mp2d-tsne.R + scripts/tsne-mp2d.R -figures/tsne-mp2d.csv: ../lazar/models/mutagenicity/independent_variables +figures/tsne-mp2d.csv: 10-fold-crossvalidations/lazar/independent_variables scripts/mp2d-distances.rb > figures/tsne-mp2d.csv -figures/roc.png: figures/results.csv +figures/roc.png: figures/roc.csv scripts/roc.R -figures/results.csv: results.yaml +figures/roc.csv: $(CV_SUMMARY) scripts/results2csv.rb $< > $@ # tables -tables/r-summary.csv: $(R_SUMMARIES) - scripts/summaries2table.rb $^ > $@ - -tables/tf-summary.csv: $(TF_SUMMARIES) - scripts/summaries2table.rb $^ > $@ +tables/lazar-summary.csv: $(CV_SUMMARY) + scripts/summaries2table.rb lazar > $@ -tables/lazar-summary.csv: $(LAZAR_SUMMARIES) - scripts/summaries2table.rb $^ > $@ +tables/r-summary.csv: $(CV_SUMMARY) + scripts/summaries2table.rb R > $@ -tables/%.csv: $(CONFUSION_MATRICES_DIR)/%.csv - scripts/confusion-matrix2table.rb $< > $@ +tables/tensorflow-summary.csv: $(CV_SUMMARY) + scripts/summaries2table.rb tensorflow > $@ -# summaries +# crossvalidation summary -#$(SUMMARIES_DIR)/results.json: $(SUMMARIES) - #scripts/results.rb $^ > $@ - -#$(SUMMARIES_DIR)/%.json: $(CONFUSION_MATRICES_DIR)/%.csv - #scripts/confusion-matrix-summary.rb $< > $@ - -results.yaml: $(CONFUSION_MATRICES) +$(CV_SUMMARY): $(CONFUSION_MATRICES) scripts/confusion-matrix-summary.rb $^ > $@ # confusion matrices -## tensorflow -$(CONFUSION_MATRICES_DIR)/tensorflow-selected.csv: $(TENSORFLOW_CV_DIR)/pred.sorted.csv - scripts/cv-tensorflow-confusion-matrix.rb $< > $@ - -$(CONFUSION_MATRICES_DIR)/tensorflow-all.csv: $(TENSORFLOW_CV_DIR)/pred_ext.sorted.csv - scripts/cv-tensorflow-confusion-matrix.rb $< > $@ - -$(CONFUSION_MATRICES_DIR)/tensorflow-lr.v3.csv: $(TENSORFLOW_CV_DIR)/pred.lr.v3.sorted.csv - scripts/cv-tensorflow-confusion-matrix.rb $< > $@ - -$(CONFUSION_MATRICES_DIR)/tensorflow-lr2.v3.csv: $(TENSORFLOW_CV_DIR)/pred.lr2.v3.sorted.csv - scripts/cv-tensorflow-confusion-matrix.rb $< > $@ - -$(CONFUSION_MATRICES_DIR)/tensorflow-nn.v3.csv: $(TENSORFLOW_CV_DIR)/pred.nn.v3.sorted.csv - scripts/cv-tensorflow-confusion-matrix.rb $< > $@ - -$(CONFUSION_MATRICES_DIR)/tensorflow-rf.v3.csv: $(TENSORFLOW_CV_DIR)/pred.rf.v3.sorted.csv - scripts/cv-tensorflow-confusion-matrix.rb $< > $@ - -## R -$(CONFUSION_MATRICES_DIR)/R-SVM.csv: $(R_CV_DIR)/Sgl-Observations-SVM.csv - scripts/cv-r-confusion-matrix.rb $< > $@ - -$(CONFUSION_MATRICES_DIR)/R-RF.csv: $(R_CV_DIR)/Sgl-Observations-RF.csv - scripts/cv-r-confusion-matrix.rb $< > $@ - -$(CONFUSION_MATRICES_DIR)/R-DL.csv: $(R_CV_DIR)/Sgl-Observations-DL.csv - scripts/cv-r-confusion-matrix.rb $< > $@ - ## lazar $(CONFUSION_MATRICES_DIR)/lazar-all.csv: $(LAZAR_CONFUSION_MATRIX_DIR) cp $</all $@ @@ -136,21 +83,26 @@ $(CONFUSION_MATRICES_DIR)/lazar-padel-all.csv: $(LAZAR_PADEL_CONFUSION_MATRIX_DI $(CONFUSION_MATRICES_DIR)/lazar-padel-high-confidence.csv: $(LAZAR_PADEL_CONFUSION_MATRIX_DIR) cp $</high_confidence $@ -# exports +## R +$(CONFUSION_MATRICES_DIR)/R-SVM.csv: $(R_CV_DIR)/Sgl-Observations-SVM.csv + scripts/cv-r-confusion-matrix.rb $< > $@ -data/mutagenicity-fingerprints.csv: $(LAZAR_DIR)/models/mutagenicity - $(LAZAR_DIR)/bin/export-fingerprints.rb $< > $@ +$(CONFUSION_MATRICES_DIR)/R-RF.csv: $(R_CV_DIR)/Sgl-Observations-RF.csv + scripts/cv-r-confusion-matrix.rb $< > $@ -data/mutagenicity.csv: $(LAZAR_DIR)/models/mutagenicity/Mutagenicity-Salmonella_typhimurium.csv - cp $< > $@ +$(CONFUSION_MATRICES_DIR)/R-DL.csv: $(R_CV_DIR)/Sgl-Observations-DL.csv + scripts/cv-r-confusion-matrix.rb $< > $@ -data/mutagenicity.sdf: $(LAZAR_DIR)/models/mutagenicity/Mutagenicity-Salmonella_typhimurium.csv - $(LAZAR_DIR)/bin/export-sdf.rb $< > $@ - -# lazar models and crossvalidations +## tensorflow +$(CONFUSION_MATRICES_DIR)/tensorflow-lr.v3.csv: $(TENSORFLOW_CV_DIR)/pred.lr.v3.sorted.csv + scripts/cv-tensorflow-confusion-matrix.rb $< > $@ -$(LAZAR_SUMMARY_DIR): - make -C $(LAZAR_MODEL_DIR) +$(CONFUSION_MATRICES_DIR)/tensorflow-lr2.v3.csv: $(TENSORFLOW_CV_DIR)/pred.lr2.v3.sorted.csv + scripts/cv-tensorflow-confusion-matrix.rb $< > $@ + +$(CONFUSION_MATRICES_DIR)/tensorflow-nn.v3.csv: $(TENSORFLOW_CV_DIR)/pred.nn.v3.sorted.csv + scripts/cv-tensorflow-confusion-matrix.rb $< > $@ + +$(CONFUSION_MATRICES_DIR)/tensorflow-rf.v3.csv: $(TENSORFLOW_CV_DIR)/pred.rf.v3.sorted.csv + scripts/cv-tensorflow-confusion-matrix.rb $< > $@ -$(LAZAR_PADEL_SUMMARY_DIR): - make -C $(LAZAR_PADEL_MODEL_DIR) |