blob: 015f07599f5ee185188e62096ec8b266bed3e9b9 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
data = data/kazius.csv data/efsa.csv data/hansen.csv
summaries = summaries/weighted_majority_merged_sim0.1.json summaries/weighted_majority_kazius_sim0.1.json summaries/weighted_majority_merged_sim0.5.json summaries/weighted_majority_kazius_sim0.5.json summaries/pa_mutagenicity_neighbor_sets.json summaries/pa_carcinogenicity_neighbor_sets.json
all: $(summaries)
# exports
export/mutagenicity-merged.sdf: data/mutagenicity-merged.id
scripts/export.rb $< sdf > $@
export/mutagenicity-merged.csv: data/mutagenicity-merged.id
scripts/export.rb $< > $@
export/carcinogenicity.csv: data/carcinogenicity.id
scripts/export.rb $< > $@
summaries/pa_carcinogenicity_neighbor_sets.json: experiments/pa_carcinogenicity_prediction.json
scripts/pa_neighbor_sets.rb $< > $@
summaries/pa_mutagenicity_neighbor_sets.json: experiments/pa_mutagenicity_prediction.json
scripts/pa_neighbor_sets.rb $< > $@
summaries/weighted_majority_kazius_sim0.5.json: experiments/weighted_majority_kazius_sim0.5.id
scripts/repeated_crossvalidation_summary.rb $< > $@
experiments/weighted_majority_kazius_sim0.5.id: data/kazius.csv
scripts/repeated_crossvalidation.rb $< > $@
summaries/weighted_majority_merged_sim0.5.json: experiments/weighted_majority_merged_sim0.5.id
scripts/repeated_crossvalidation_summary.rb $< > $@
experiments/weighted_majority_merged_sim0.5.id: data/mutagenicity-merged.csv
scripts/repeated_crossvalidation.rb $< > $@
summaries/weighted_majority_kazius_sim0.1.json: experiments/weighted_majority_kazius_sim0.1.id
scripts/repeated_crossvalidation_summary.rb $< > $@
experiments/weighted_majority_kazius_sim0.1.id: data/kazius.csv
scripts/repeated_crossvalidation.rb $< > $@
summaries/weighted_majority_merged_sim0.1.json: experiments/weighted_majority_merged_sim0.1.id
scripts/repeated_crossvalidation_summary.rb $< > $@
experiments/weighted_majority_merged_sim0.1.id: data/mutagenicity-merged.csv
scripts/repeated_crossvalidation.rb $< > $@
experiments/pa_carcinogenicity.id: data/carcinogenicity.id data/PA.id
scripts/predict.rb $^ > $@
experiments/pa_mutagenicity.id: data/mutagenicity-merged.id data/PA.id
scripts/predict.rb $^ > $@
# models
models/mutagenicity-merged.id: data/mutagenicity-merged.id
scripts/merge.rb $^ > $@
models/carcinogenicity.id: data/carcinogenicity.id
scripts/import-pubchem.rb 1205
# test data
data/PA.id: data/PA.sdf
scripts/import.rb $< sdf > $@
# training data
data/mutagenicity-merged.id: data/hansen.id data/kazius.id data/efsa.id
scripts/merge.rb $^ > $@
data/carcinogenicity.id:
scripts/import-pubchem.rb 1205
# kazius
data/kazius.id: data/cas_4337.sdf
scripts/import.rb $< sdf > $@
data/cas_4337.sdf: data/cas_4337.zip
cd data && unzip cas_4337.zip
data/cas_4337.zip:
cd data && wget "http://cheminformatics.org/datasets/bursi/cas_4337.zip"
# efsa
data/efsa.id: data/efsa.csv
scripts/import.rb $< > $@
data/efsa.csv: data/GENOTOX_data_and_dictionary.tsv
scripts/efsa2csv.rb $< > $@
data/GENOTOX_data_and_dictionary.tsv: data/GENOTOX_data_and_dictionary.xls
xls2csv -s cp1252 -d utf-8 -x -c " " $< > $@
data/GENOTOX_data_and_dictionary.xls:
cd data && wget "https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX%20data%20and%20dictionary.xls" -o $@
# hansen
data/hansen.id: data/hansen.csv
scripts/import.rb $< > $@
data/hansen.csv: data/Mutagenicity_N6512.csv
scripts/hansen2csv.rb $< > $@
data/Mutagenicity_N6512.csv:
cd data && wget "http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv"
# cleanup
clean:
rm data/*.id
cleanall:
rm data/*
|