diff options
Diffstat (limited to 'Makefile')
-rw-r--r-- | Makefile | 40 |
1 files changed, 40 insertions, 0 deletions
@@ -1,11 +1,51 @@ +# TODO +# efsa smiles errors (encodings?) +# efsa script via csv +# remove headers from csv files +# add header at merged csv + +data = data/kazius.csv data/efsa.csv data/hansen.csv + +#all: $(data) +# experiments + +experiments/weighted_majority_merged.id: data/merged.csv + scripts/repeated_crossvalidation.rb data/merged.csv > $@ + +# training data + +data/merged.csv: $(data) + echo "SMILES,Activity" > $@ && cat $(data) | sort -u | sed '/SMILES/d' >> $@ + +# kazius + +data/kazius.csv: data/cas_4337.sdf + scripts/kazius2csv.rb $< > $@ + +data/cas_4337.sdf: data/cas_4337.zip + cd data && unzip cas_4337.zip + +data/cas_4337.zip: + cd data && wget "http://cheminformatics.org/datasets/bursi/cas_4337.zip" + +# efsa + data/efsa.csv: data/GENOTOX_data_and_dictionary.csv scripts/efsa2csv.rb $< > $@ data/GENOTOX_data_and_dictionary.csv: data/GENOTOX_data_and_dictionary.xls xls2csv $< > $@ +data/GENOTOX_data_and_dictionary.xls: + cd data && wget "https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX%20data%20and%20dictionary.xls" -o $@ + +# hansen + data/hansen.csv: data/Mutagenicity_N6512.csv scripts/hansen2csv.rb $< > $@ +data/Mutagenicity_N6512.csv: + cd data && wget "http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv" + clean: rm data/hansen.csv |