summaryrefslogtreecommitdiff
path: root/Makefile
diff options
context:
space:
mode:
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile40
1 files changed, 40 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index 987ca73..1bbcb57 100644
--- a/Makefile
+++ b/Makefile
@@ -1,11 +1,51 @@
+# TODO
+# efsa smiles errors (encodings?)
+# efsa script via csv
+# remove headers from csv files
+# add header at merged csv
+
+data = data/kazius.csv data/efsa.csv data/hansen.csv
+
+#all: $(data)
+# experiments
+
+experiments/weighted_majority_merged.id: data/merged.csv
+ scripts/repeated_crossvalidation.rb data/merged.csv > $@
+
+# training data
+
+data/merged.csv: $(data)
+ echo "SMILES,Activity" > $@ && cat $(data) | sort -u | sed '/SMILES/d' >> $@
+
+# kazius
+
+data/kazius.csv: data/cas_4337.sdf
+ scripts/kazius2csv.rb $< > $@
+
+data/cas_4337.sdf: data/cas_4337.zip
+ cd data && unzip cas_4337.zip
+
+data/cas_4337.zip:
+ cd data && wget "http://cheminformatics.org/datasets/bursi/cas_4337.zip"
+
+# efsa
+
data/efsa.csv: data/GENOTOX_data_and_dictionary.csv
scripts/efsa2csv.rb $< > $@
data/GENOTOX_data_and_dictionary.csv: data/GENOTOX_data_and_dictionary.xls
xls2csv $< > $@
+data/GENOTOX_data_and_dictionary.xls:
+ cd data && wget "https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX%20data%20and%20dictionary.xls" -o $@
+
+# hansen
+
data/hansen.csv: data/Mutagenicity_N6512.csv
scripts/hansen2csv.rb $< > $@
+data/Mutagenicity_N6512.csv:
+ cd data && wget "http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv"
+
clean:
rm data/hansen.csv