summaryrefslogtreecommitdiff
path: root/Makefile
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2018-10-07 19:39:34 +0200
committerChristoph Helma <helma@in-silico.ch>2018-10-07 19:39:34 +0200
commite97f7369339b215fda7d0169b567a211db6c5c38 (patch)
treebcd1592c2684527c18f0e5b18e4b4b948f031aa1 /Makefile
parentecc3b569aacdeea0c80af372a2fe70d8aea15f64 (diff)
Rakefile instead of Makefile and scripts
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile68
1 files changed, 42 insertions, 26 deletions
diff --git a/Makefile b/Makefile
index 6ce86d3..015f075 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,17 @@ summaries = summaries/weighted_majority_merged_sim0.1.json summaries/weighted_ma
all: $(summaries)
+# exports
+
+export/mutagenicity-merged.sdf: data/mutagenicity-merged.id
+ scripts/export.rb $< sdf > $@
+
+export/mutagenicity-merged.csv: data/mutagenicity-merged.id
+ scripts/export.rb $< > $@
+
+export/carcinogenicity.csv: data/carcinogenicity.id
+ scripts/export.rb $< > $@
+
summaries/pa_carcinogenicity_neighbor_sets.json: experiments/pa_carcinogenicity_prediction.json
scripts/pa_neighbor_sets.rb $< > $@
@@ -34,43 +45,37 @@ summaries/weighted_majority_merged_sim0.1.json: experiments/weighted_majority_me
experiments/weighted_majority_merged_sim0.1.id: data/mutagenicity-merged.csv
scripts/repeated_crossvalidation.rb $< > $@
-experiments/pa_carcinogenicity_prediction.json: data/carcinogenicity.csv data/PA_complete_SMILES_fixed.csv
+experiments/pa_carcinogenicity.id: data/carcinogenicity.id data/PA.id
scripts/predict.rb $^ > $@
-experiments/pa_mutagenicity_prediction.json: data/mutagenicity-merged.csv data/PA_complete_SMILES_fixed.csv
+experiments/pa_mutagenicity.id: data/mutagenicity-merged.id data/PA.id
scripts/predict.rb $^ > $@
-# test data
-
-data/PA_complete_SMILES_fixed.csv: data/PA.sdf
- cat $< | scripts/convert_pa.rb > $@
-
-data/PA_complete_SMILES_fixed.csv: data/PA_complete_SMILES.csv
- cat $< | scripts/convert_pa.rb > $@
-
-# training data
+# models
-export/mutagenicity-merged.sdf: data/mutagenicity-merged.id
- scripts/export-sdf.rb $< > $@
+models/mutagenicity-merged.id: data/mutagenicity-merged.id
+ scripts/merge.rb $^ > $@
-data/mutagenicity-merged.id: data/mutagenicity-merged.csv
- scripts/import-csv.rb $< > $@
+models/carcinogenicity.id: data/carcinogenicity.id
+ scripts/import-pubchem.rb 1205
-data/mutagenicity-merged.csv: $(data)
- scripts/merge-mutagenicity.rb $(data) > $@
+# test data
-data/carcinogenicity.csv: data/aid1205.csv
- scripts/carcinogenicity2csv.rb $< > $@
+data/PA.id: data/PA.sdf
+ scripts/import.rb $< sdf > $@
+
+# training data
-# carcinogenicity
+data/mutagenicity-merged.id: data/hansen.id data/kazius.id data/efsa.id
+ scripts/merge.rb $^ > $@
-data/aid1205.csv:
- cd data && curl https://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/1205/CSV -o $@
+data/carcinogenicity.id:
+ scripts/import-pubchem.rb 1205
# kazius
-data/kazius.csv: data/cas_4337.sdf
- scripts/kazius2csv.rb $< > $@
+data/kazius.id: data/cas_4337.sdf
+ scripts/import.rb $< sdf > $@
data/cas_4337.sdf: data/cas_4337.zip
cd data && unzip cas_4337.zip
@@ -80,8 +85,11 @@ data/cas_4337.zip:
# efsa
+data/efsa.id: data/efsa.csv
+ scripts/import.rb $< > $@
+
data/efsa.csv: data/GENOTOX_data_and_dictionary.tsv
- scripts/efsa2csv.rb $< > $@
+ scripts/efsa2csv.rb $< > $@
data/GENOTOX_data_and_dictionary.tsv: data/GENOTOX_data_and_dictionary.xls
xls2csv -s cp1252 -d utf-8 -x -c " " $< > $@
@@ -91,11 +99,19 @@ data/GENOTOX_data_and_dictionary.xls:
# hansen
+data/hansen.id: data/hansen.csv
+ scripts/import.rb $< > $@
+
data/hansen.csv: data/Mutagenicity_N6512.csv
scripts/hansen2csv.rb $< > $@
data/Mutagenicity_N6512.csv:
cd data && wget "http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv"
+# cleanup
+
clean:
- rm data/hansen.csv
+ rm data/*.id
+
+cleanall:
+ rm data/*