blob: 1bbcb5754a91a3f6585b647470356d9e7217be34 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
# TODO
# efsa smiles errors (encodings?)
# efsa script via csv
# remove headers from csv files
# add header at merged csv
data = data/kazius.csv data/efsa.csv data/hansen.csv
#all: $(data)
# experiments
experiments/weighted_majority_merged.id: data/merged.csv
scripts/repeated_crossvalidation.rb data/merged.csv > $@
# training data
data/merged.csv: $(data)
echo "SMILES,Activity" > $@ && cat $(data) | sort -u | sed '/SMILES/d' >> $@
# kazius
data/kazius.csv: data/cas_4337.sdf
scripts/kazius2csv.rb $< > $@
data/cas_4337.sdf: data/cas_4337.zip
cd data && unzip cas_4337.zip
data/cas_4337.zip:
cd data && wget "http://cheminformatics.org/datasets/bursi/cas_4337.zip"
# efsa
data/efsa.csv: data/GENOTOX_data_and_dictionary.csv
scripts/efsa2csv.rb $< > $@
data/GENOTOX_data_and_dictionary.csv: data/GENOTOX_data_and_dictionary.xls
xls2csv $< > $@
data/GENOTOX_data_and_dictionary.xls:
cd data && wget "https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX%20data%20and%20dictionary.xls" -o $@
# hansen
data/hansen.csv: data/Mutagenicity_N6512.csv
scripts/hansen2csv.rb $< > $@
data/Mutagenicity_N6512.csv:
cd data && wget "http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv"
clean:
rm data/hansen.csv
|