summaryrefslogtreecommitdiff
path: root/models/mutagenicity-mp2d/Makefile
diff options
context:
space:
mode:
Diffstat (limited to 'models/mutagenicity-mp2d/Makefile')
-rw-r--r--models/mutagenicity-mp2d/Makefile75
1 files changed, 75 insertions, 0 deletions
diff --git a/models/mutagenicity-mp2d/Makefile b/models/mutagenicity-mp2d/Makefile
new file mode 100644
index 0000000..b80be6d
--- /dev/null
+++ b/models/mutagenicity-mp2d/Makefile
@@ -0,0 +1,75 @@
+SRC = Mutagenicity-Salmonella_typhimurium.csv
+LAZAR_BIN = ../../bin
+MODEL = independent-variable-type dependent-variable-type dependent-variable-values similarity-thresholds independent-variables dependent-variables smiles metadata.json
+
+all: crossvalidation/summaries
+
+model: $(MODEL)
+
+# Crossvalidation
+
+crossvalidation/summaries: crossvalidation
+ $(LAZAR_BIN)/classification-summary.rb crossvalidation
+
+crossvalidation: $(MODEL)
+ $(LAZAR_BIN)/crossvalidation.rb .
+
+# Model
+
+independent-variables: smiles
+ $(LAZAR_BIN)/fingerprints.rb smiles > $@
+
+dependent-variables: $(SRC)
+ cut -f2 -d ',' $< | sed '1d' > $@
+
+smiles: $(SRC)
+ cut -f1 -d ',' $< | sed '1d' > $@
+
+# Metadata
+
+metadata.json:
+ echo '{"species":"Salmonella typhimurium","endpoint":"Mutagenicity","source":"http://cheminformatics.org/datasets/bursi/cas_4337.zip, http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv, https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX data and dictionary.xls","qmrf":{"group":"QMRF 4.10. Mutagenicity","name":"OECD 471 Bacterial Reverse Mutation Test"}}' > $@
+
+# Model parameters
+
+independent-variable-type:
+ echo "binary" > $@
+
+dependent-variable-type:
+ echo "binary" > $@
+
+dependent-variable-values:
+ echo -e "\"0\"\n\"1\"" > $@
+
+similarity-thresholds:
+ echo -e "0.2\n0.5" > $@
+
+# Merge data
+
+Mutagenicity-Salmonella_typhimurium.csv: download/efsa.csv download/hansen.csv download/kazius.csv
+ $(LAZAR_BIN)/merge_csv.rb $^
+
+# Convert data
+
+download/efsa.csv: download/efsa.tsv
+ download/efsa_classification.rb $< > $@
+
+download/efsa.tsv: download/efsa.xls
+ xls2csv -s cp1252 -d utf-8 -x -c " " $< > $@
+
+download/hansen.csv: download/Mutagenicity_N6512.csv
+ download/hansen_convert.rb $< > $@
+
+download/kazius.csv: download/cas_4337.sdf
+ $(LAZAR_BIN)/sdf2csv.rb $< > $@
+
+# Download original data
+
+download/cas_4337.sdf:
+ cd download && wget http://cheminformatics.org/datasets/bursi/cas_4337.zip && unzip cas_4337.zip
+
+download/Mutagenicity_N6512.csv:
+ cd download && wget http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv
+
+download/efsa.xls:
+ wget "https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX data and dictionary.xls" -o $@