diff options
author | Christoph Helma <helma@in-silico.ch> | 2019-08-19 15:20:28 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2019-08-19 15:20:28 +0200 |
commit | b82b4e640a11f3821b2bcf78fbbeead5d80b9066 (patch) | |
tree | c0020cb804f3439db3af5248eed4b99cbbb91a79 /Makefile | |
parent | b6ad21e340bce9ba2a2ad09fe48c656f0c2e3905 (diff) |
obsolete files removed, adjusted export scripts
Diffstat (limited to 'Makefile')
-rw-r--r-- | Makefile | 151 |
1 files changed, 52 insertions, 99 deletions
@@ -1,138 +1,91 @@ +# Manuscript +# please install pandoc-scholar (https://github.com/pandoc-scholar/pandoc-scholar) in the pandoc-scholar directory or point PANDOC_SCHOLAR_PATH to tour installation + ARTICLE_FILE = mutagenicity.md PANDOC_SCHOLAR_PATH = pandoc-scholar OUTFILE_PREFIX = mutagenicity DEFAULT_EXTENSIONS = latex pdf docx #odt epub html +PANDOC_WRITER_OPTIONS = --filter=pandoc-citeproc #PANDOC_WRITER_OPTIONS = --filter=pandoc-placetable --filter=pandoc-citeproc TEMPLATE_FILE_LATEX = pandoc-scholar.latex include $(PANDOC_SCHOLAR_PATH)/Makefile -#tables = tables/R-SVM.csv tables/R-RF.csv tables/R-DL.csv +# Lazar -summaries = summaries/mutagenicity-merged-crossvalidations.json summaries/carcinogenicity-crossvalidations.json summaries/pa_mutagenicity_neighbor_sets.json summaries/pa_carcinogenicity_neighbor_sets.json +LAZAR_DIR = "../lazar" -exports = export/mutagenicity-merged.sdf export/mutagenicity-merged.csv export/pa_mutagenicity.csv export/pa_carcinogenicity.csv export/pa-fingerprints.csv export/mutagenicity-merged-fingerprints.csv +# Experiments -all: $(summaries) $(exports) #$(tables) +SUMMARIES_DIR = 10-fold-crossvalidations/summaries +CONFUSION_MATRICES_DIR = 10-fold-crossvalidations/confusion-matrices +R_CV_DIR = 10-fold-crossvalidations/R +TENSORFLOW_CV_DIR = 10-fold-crossvalidations/tensorflow +LAZAR_CV_DIR = 10-fold-crossvalidations/lazar -# confusion matrices +#tables = tables/R-SVM.csv tables/R-RF.csv tables/R-DL.csv -summaries/confusion-matrices/R-SVM.csv: 10-fold-crossvalidations/R/Sgl-Observations-SVM.csv - scripts/cv-r-confusion-matrix.rb $< > $@ +SUMMARIES = $(SUMMARIES_DIR)/R-SVM.csv $(SUMMARIES_DIR)/R-RF.csv $(SUMMARIES_DIR)/R-DL.csv $(SUMMARIES_DIR)/tensorflow-all.csv $(SUMMARIES_DIR)/tensorflow-selected.csv $(SUMMARIES_DIR)/lazar-all.csv $(SUMMARIES_DIR)/lazar-high-confidence.csv +# TODO lazar model/cv with PaDEL descriptors -summaries/confusion-matrices/R-RF.csv: 10-fold-crossvalidations/R/Sgl-Observations-RF.csv - scripts/cv-r-confusion-matrix.rb $< > $@ +DATA = data/mutagenicity.sdf data/mutagenicity.csv data/mutagenicity-fingerprints.csv -summaries/confusion-matrices/R-DL.csv: 10-fold-crossvalidations/R/Sgl-Observations-DL.csv - scripts/cv-r-confusion-matrix.rb $< > $@ +all: $(SUMMARIES) $(DATA) #$(tables) # summaries -summaries/pa_carcinogenicity_neighbor_sets.json: data/pa_carcinogenicity.json - scripts/pa_neighbor_sets.rb $< > $@ - -summaries/pa_mutagenicity_neighbor_sets.json: data/pa_mutagenicity.json - scripts/pa_neighbor_sets.rb $< > $@ - -summaries/mutagenicity-merged-crossvalidations.json: models/mutagenicity-merged.id - scripts/crossvalidation-summary.rb $< > $@ - -summaries/carcinogenicity-crossvalidations.json: models/carcinogenicity.id - scripts/crossvalidation-summary.rb $< > $@ - -# exports - -export/pa-fingerprints.csv: data/PA.id - scripts/export-fingerprints.rb $< > $@ - -export/mutagenicity-merged-fingerprints.csv: data/mutagenicity-merged.id - scripts/export-fingerprints.rb $< > $@ +$(SUMMARIES_DIR)/%.csv: $(CONFUSION_MATRICES_DIR)/%.csv + scripts/confusion-matrix-summary.rb $< > $@ +# confusion matrices -export/mutagenicity-merged.csv: data/mutagenicity-merged.id - scripts/export.rb $< csv > $@ +## tensorflow +$(CONFUSION_MATRICES_DIR)/tensorflow-selected.csv: $(TENSORFLOW_CV_DIR)/pred.csv + scripts/cv-tensorflow-confusion-matrix.rb $< > $@ -export/mutagenicity-merged.sdf: data/mutagenicity-merged.id - scripts/export.rb $< sdf > $@ +$(CONFUSION_MATRICES_DIR)/tensorflow-all.csv: $(TENSORFLOW_CV_DIR)/pred_ext.csv + scripts/cv-tensorflow-confusion-matrix.rb $< > $@ -export/pa_mutagenicity.csv: data/pa_mutagenicity.json - scripts/json2csv.rb $< > $@ +## R +$(CONFUSION_MATRICES_DIR)/R-SVM.csv: $(R_CV_DIR)/Sgl-Observations-SVM.csv + scripts/cv-r-confusion-matrix.rb $< > $@ -export/pa_carcinogenicity.csv: data/pa_carcinogenicity.json - scripts/json2csv.rb $< > $@ +$(CONFUSION_MATRICES_DIR)/R-RF.csv: $(R_CV_DIR)/Sgl-Observations-RF.csv + scripts/cv-r-confusion-matrix.rb $< > $@ -# predictions +$(CONFUSION_MATRICES_DIR)/R-DL.csv: $(R_CV_DIR)/Sgl-Observations-DL.csv + scripts/cv-r-confusion-matrix.rb $< > $@ -data/pa_carcinogenicity.json: data/PA.id models/carcinogenicity.id - scripts/predict.rb $^ > $@ +## lazar +$(CONFUSION_MATRICES_DIR)/lazar-all.csv: $(LAZAR_CV_DIR)/lazar-crossvalidation.id + scripts/cv-lazar-confusion-matrix.rb all $< > $@ -data/pa_mutagenicity.json: data/PA.id models/mutagenicity-merged.id - scripts/predict.rb $^ > $@ +$(CONFUSION_MATRICES_DIR)/lazar-high-confidence.csv: $(LAZAR_CV_DIR)/lazar-crossvalidation.id + scripts/cv-lazar-confusion-matrix.rb high-confidence $< > $@ -# models and validations +# TODO lazar model/cv with PaDEL descriptors -models/mutagenicity-merged.id: data/mutagenicity-merged.id - scripts/model.rb $< > $@ +# exports -models/carcinogenicity.id: data/carcinogenicity.id - scripts/model.rb $< > $@ +data/mutagenicity-fingerprints.csv: $(LAZAR_CV_DIR)/lazar-crossvalidation.id + scripts/export-fingerprints.rb $< > $@ -# test data +data/mutagenicity.csv: $(LAZAR_CV_DIR)/lazar-crossvalidation.id + scripts/export.rb $< training_csv > $@ -data/PA.id: data/PA.sdf - scripts/import.rb $< > $@ +data/mutagenicity.sdf: $(LAZAR_CV_DIR)/lazar-crossvalidation.id + scripts/export.rb $< sdf > $@ -# training data - -data/mutagenicity-merged.id: data/hansen.id data/efsa.id data/kazius.id - scripts/merge.rb $^ > $@ - -data/carcinogenicity.id: - scripts/import-pubchem.rb 1205 > $@ - -# kazius - -data/kazius.id: data/cas_4337.sdf - scripts/import.rb $< > $@ - -data/cas_4337.sdf: data/cas_4337.zip - cd data && unzip cas_4337.zip +# lazar models and crossvalidations -data/cas_4337.zip: - cd data && wget "http://cheminformatics.org/datasets/bursi/cas_4337.zip" -# efsa +$(LAZAR_CV_DIR)/lazar-crossvalidation.id: ../lazar/data/Mutagenicity-Salmonella_typhimurium.csv # adjust to match the location of your lazar libraries + scripts/lazar-crossvalidation.rb $< > $@ -data/efsa.id: data/efsa.csv - scripts/import.rb $< > $@ - -data/efsa.csv: data/GENOTOX_data_and_dictionary.tsv - scripts/efsa2csv.rb $< > $@ - -data/GENOTOX_data_and_dictionary.tsv: data/GENOTOX_data_and_dictionary.xls - xls2csv -s cp1252 -d utf-8 -x -c " " $< > $@ - -data/GENOTOX_data_and_dictionary.xls: - cd data && wget "https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX%20data%20and%20dictionary.xls" -o $@ - -# hansen - -data/hansen.id: data/hansen.csv - scripts/import.rb $< > $@ - -data/hansen.csv: data/Mutagenicity_N6512.csv - scripts/hansen2csv.rb $< > $@ - -data/Mutagenicity_N6512.csv: - cd data && wget "http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv" +# import +# TODO lazar model/cv with PaDEL descriptors # cleanup -clean: - rm data/*.id - -clean_db: - rm data/*.id && scripts/drop-database.rb - -cleanall: - rm data/* +drop-database: + scripts/drop-database.rb |