summaryrefslogtreecommitdiff
path: root/Makefile
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2019-08-19 15:20:28 +0200
committerChristoph Helma <helma@in-silico.ch>2019-08-19 15:20:28 +0200
commitb82b4e640a11f3821b2bcf78fbbeead5d80b9066 (patch)
treec0020cb804f3439db3af5248eed4b99cbbb91a79 /Makefile
parentb6ad21e340bce9ba2a2ad09fe48c656f0c2e3905 (diff)
obsolete files removed, adjusted export scripts
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile151
1 files changed, 52 insertions, 99 deletions
diff --git a/Makefile b/Makefile
index 1ab10b2..47c6aad 100644
--- a/Makefile
+++ b/Makefile
@@ -1,138 +1,91 @@
+# Manuscript
+# please install pandoc-scholar (https://github.com/pandoc-scholar/pandoc-scholar) in the pandoc-scholar directory or point PANDOC_SCHOLAR_PATH to tour installation
+
ARTICLE_FILE = mutagenicity.md
PANDOC_SCHOLAR_PATH = pandoc-scholar
OUTFILE_PREFIX = mutagenicity
DEFAULT_EXTENSIONS = latex pdf docx #odt epub html
+PANDOC_WRITER_OPTIONS = --filter=pandoc-citeproc
#PANDOC_WRITER_OPTIONS = --filter=pandoc-placetable --filter=pandoc-citeproc
TEMPLATE_FILE_LATEX = pandoc-scholar.latex
include $(PANDOC_SCHOLAR_PATH)/Makefile
-#tables = tables/R-SVM.csv tables/R-RF.csv tables/R-DL.csv
+# Lazar
-summaries = summaries/mutagenicity-merged-crossvalidations.json summaries/carcinogenicity-crossvalidations.json summaries/pa_mutagenicity_neighbor_sets.json summaries/pa_carcinogenicity_neighbor_sets.json
+LAZAR_DIR = "../lazar"
-exports = export/mutagenicity-merged.sdf export/mutagenicity-merged.csv export/pa_mutagenicity.csv export/pa_carcinogenicity.csv export/pa-fingerprints.csv export/mutagenicity-merged-fingerprints.csv
+# Experiments
-all: $(summaries) $(exports) #$(tables)
+SUMMARIES_DIR = 10-fold-crossvalidations/summaries
+CONFUSION_MATRICES_DIR = 10-fold-crossvalidations/confusion-matrices
+R_CV_DIR = 10-fold-crossvalidations/R
+TENSORFLOW_CV_DIR = 10-fold-crossvalidations/tensorflow
+LAZAR_CV_DIR = 10-fold-crossvalidations/lazar
-# confusion matrices
+#tables = tables/R-SVM.csv tables/R-RF.csv tables/R-DL.csv
-summaries/confusion-matrices/R-SVM.csv: 10-fold-crossvalidations/R/Sgl-Observations-SVM.csv
- scripts/cv-r-confusion-matrix.rb $< > $@
+SUMMARIES = $(SUMMARIES_DIR)/R-SVM.csv $(SUMMARIES_DIR)/R-RF.csv $(SUMMARIES_DIR)/R-DL.csv $(SUMMARIES_DIR)/tensorflow-all.csv $(SUMMARIES_DIR)/tensorflow-selected.csv $(SUMMARIES_DIR)/lazar-all.csv $(SUMMARIES_DIR)/lazar-high-confidence.csv
+# TODO lazar model/cv with PaDEL descriptors
-summaries/confusion-matrices/R-RF.csv: 10-fold-crossvalidations/R/Sgl-Observations-RF.csv
- scripts/cv-r-confusion-matrix.rb $< > $@
+DATA = data/mutagenicity.sdf data/mutagenicity.csv data/mutagenicity-fingerprints.csv
-summaries/confusion-matrices/R-DL.csv: 10-fold-crossvalidations/R/Sgl-Observations-DL.csv
- scripts/cv-r-confusion-matrix.rb $< > $@
+all: $(SUMMARIES) $(DATA) #$(tables)
# summaries
-summaries/pa_carcinogenicity_neighbor_sets.json: data/pa_carcinogenicity.json
- scripts/pa_neighbor_sets.rb $< > $@
-
-summaries/pa_mutagenicity_neighbor_sets.json: data/pa_mutagenicity.json
- scripts/pa_neighbor_sets.rb $< > $@
-
-summaries/mutagenicity-merged-crossvalidations.json: models/mutagenicity-merged.id
- scripts/crossvalidation-summary.rb $< > $@
-
-summaries/carcinogenicity-crossvalidations.json: models/carcinogenicity.id
- scripts/crossvalidation-summary.rb $< > $@
-
-# exports
-
-export/pa-fingerprints.csv: data/PA.id
- scripts/export-fingerprints.rb $< > $@
-
-export/mutagenicity-merged-fingerprints.csv: data/mutagenicity-merged.id
- scripts/export-fingerprints.rb $< > $@
+$(SUMMARIES_DIR)/%.csv: $(CONFUSION_MATRICES_DIR)/%.csv
+ scripts/confusion-matrix-summary.rb $< > $@
+# confusion matrices
-export/mutagenicity-merged.csv: data/mutagenicity-merged.id
- scripts/export.rb $< csv > $@
+## tensorflow
+$(CONFUSION_MATRICES_DIR)/tensorflow-selected.csv: $(TENSORFLOW_CV_DIR)/pred.csv
+ scripts/cv-tensorflow-confusion-matrix.rb $< > $@
-export/mutagenicity-merged.sdf: data/mutagenicity-merged.id
- scripts/export.rb $< sdf > $@
+$(CONFUSION_MATRICES_DIR)/tensorflow-all.csv: $(TENSORFLOW_CV_DIR)/pred_ext.csv
+ scripts/cv-tensorflow-confusion-matrix.rb $< > $@
-export/pa_mutagenicity.csv: data/pa_mutagenicity.json
- scripts/json2csv.rb $< > $@
+## R
+$(CONFUSION_MATRICES_DIR)/R-SVM.csv: $(R_CV_DIR)/Sgl-Observations-SVM.csv
+ scripts/cv-r-confusion-matrix.rb $< > $@
-export/pa_carcinogenicity.csv: data/pa_carcinogenicity.json
- scripts/json2csv.rb $< > $@
+$(CONFUSION_MATRICES_DIR)/R-RF.csv: $(R_CV_DIR)/Sgl-Observations-RF.csv
+ scripts/cv-r-confusion-matrix.rb $< > $@
-# predictions
+$(CONFUSION_MATRICES_DIR)/R-DL.csv: $(R_CV_DIR)/Sgl-Observations-DL.csv
+ scripts/cv-r-confusion-matrix.rb $< > $@
-data/pa_carcinogenicity.json: data/PA.id models/carcinogenicity.id
- scripts/predict.rb $^ > $@
+## lazar
+$(CONFUSION_MATRICES_DIR)/lazar-all.csv: $(LAZAR_CV_DIR)/lazar-crossvalidation.id
+ scripts/cv-lazar-confusion-matrix.rb all $< > $@
-data/pa_mutagenicity.json: data/PA.id models/mutagenicity-merged.id
- scripts/predict.rb $^ > $@
+$(CONFUSION_MATRICES_DIR)/lazar-high-confidence.csv: $(LAZAR_CV_DIR)/lazar-crossvalidation.id
+ scripts/cv-lazar-confusion-matrix.rb high-confidence $< > $@
-# models and validations
+# TODO lazar model/cv with PaDEL descriptors
-models/mutagenicity-merged.id: data/mutagenicity-merged.id
- scripts/model.rb $< > $@
+# exports
-models/carcinogenicity.id: data/carcinogenicity.id
- scripts/model.rb $< > $@
+data/mutagenicity-fingerprints.csv: $(LAZAR_CV_DIR)/lazar-crossvalidation.id
+ scripts/export-fingerprints.rb $< > $@
-# test data
+data/mutagenicity.csv: $(LAZAR_CV_DIR)/lazar-crossvalidation.id
+ scripts/export.rb $< training_csv > $@
-data/PA.id: data/PA.sdf
- scripts/import.rb $< > $@
+data/mutagenicity.sdf: $(LAZAR_CV_DIR)/lazar-crossvalidation.id
+ scripts/export.rb $< sdf > $@
-# training data
-
-data/mutagenicity-merged.id: data/hansen.id data/efsa.id data/kazius.id
- scripts/merge.rb $^ > $@
-
-data/carcinogenicity.id:
- scripts/import-pubchem.rb 1205 > $@
-
-# kazius
-
-data/kazius.id: data/cas_4337.sdf
- scripts/import.rb $< > $@
-
-data/cas_4337.sdf: data/cas_4337.zip
- cd data && unzip cas_4337.zip
+# lazar models and crossvalidations
-data/cas_4337.zip:
- cd data && wget "http://cheminformatics.org/datasets/bursi/cas_4337.zip"
-# efsa
+$(LAZAR_CV_DIR)/lazar-crossvalidation.id: ../lazar/data/Mutagenicity-Salmonella_typhimurium.csv # adjust to match the location of your lazar libraries
+ scripts/lazar-crossvalidation.rb $< > $@
-data/efsa.id: data/efsa.csv
- scripts/import.rb $< > $@
-
-data/efsa.csv: data/GENOTOX_data_and_dictionary.tsv
- scripts/efsa2csv.rb $< > $@
-
-data/GENOTOX_data_and_dictionary.tsv: data/GENOTOX_data_and_dictionary.xls
- xls2csv -s cp1252 -d utf-8 -x -c " " $< > $@
-
-data/GENOTOX_data_and_dictionary.xls:
- cd data && wget "https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX%20data%20and%20dictionary.xls" -o $@
-
-# hansen
-
-data/hansen.id: data/hansen.csv
- scripts/import.rb $< > $@
-
-data/hansen.csv: data/Mutagenicity_N6512.csv
- scripts/hansen2csv.rb $< > $@
-
-data/Mutagenicity_N6512.csv:
- cd data && wget "http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv"
+# import
+# TODO lazar model/cv with PaDEL descriptors
# cleanup
-clean:
- rm data/*.id
-
-clean_db:
- rm data/*.id && scripts/drop-database.rb
-
-cleanall:
- rm data/*
+drop-database:
+ scripts/drop-database.rb