# TODO # efsa smiles errors (encodings?) # efsa script via csv # remove headers from csv files # add header at merged csv data = data/kazius.csv data/efsa.csv data/hansen.csv #all: $(data) # experiments summaries/weighted_majority_kazius.json: experiments/weighted_majority_kazius.id scripts/repeated_crossvalidation_summary.rb $< > $@ experiments/weighted_majority_kazius.id: data/kazius.csv scripts/repeated_crossvalidation.rb $< > $@ summaries/weighted_majority_merged.json: experiments/weighted_majority_merged.id scripts/repeated_crossvalidation_summary.rb $< > $@ experiments/weighted_majority_merged.id: data/merged.csv scripts/repeated_crossvalidation.rb $< > $@ # training data data/merged.csv: $(data) echo "SMILES,Activity" > $@ && cat $(data) | sort -u | sed '/SMILES/d' >> $@ # kazius data/kazius.csv: data/cas_4337.sdf scripts/kazius2csv.rb $< > $@ data/cas_4337.sdf: data/cas_4337.zip cd data && unzip cas_4337.zip data/cas_4337.zip: cd data && wget "http://cheminformatics.org/datasets/bursi/cas_4337.zip" # efsa data/efsa.csv: data/GENOTOX_data_and_dictionary.csv scripts/efsa2csv.rb $< > $@ data/GENOTOX_data_and_dictionary.csv: data/GENOTOX_data_and_dictionary.xls xls2csv $< > $@ data/GENOTOX_data_and_dictionary.xls: cd data && wget "https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX%20data%20and%20dictionary.xls" -o $@ # hansen data/hansen.csv: data/Mutagenicity_N6512.csv scripts/hansen2csv.rb $< > $@ data/Mutagenicity_N6512.csv: cd data && wget "http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv" clean: rm data/hansen.csv