summaryrefslogtreecommitdiff
path: root/models/mutagenicity/Makefile
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2019-09-07 18:20:10 +0200
committerChristoph Helma <helma@in-silico.ch>2019-09-07 18:20:10 +0200
commit791398c12af4f8290095425dac87e3c852905ab6 (patch)
tree608ff890a49641ff196b00f1326555da975d3e1b /models/mutagenicity/Makefile
parenta35be3d59a513701f8822af5b56510647d8d531c (diff)
obsolete data and java directories deleted
Diffstat (limited to 'models/mutagenicity/Makefile')
-rw-r--r--models/mutagenicity/Makefile50
1 files changed, 48 insertions, 2 deletions
diff --git a/models/mutagenicity/Makefile b/models/mutagenicity/Makefile
index 637004c..1e0c565 100644
--- a/models/mutagenicity/Makefile
+++ b/models/mutagenicity/Makefile
@@ -1,12 +1,28 @@
LAZAR_BIN = ../../bin
-METADATA = independent_variable_type dependent_variable_type dependent_variable_values similarity_thresholds
+PARAMETERS = independent_variable_type dependent_variable_type dependent_variable_values similarity_thresholds
+
+all: crossvalidation/summaries metadata.json
+
+# Crossvalidation
+
+crossvalidation/summaries: crossvalidation
+ $(LAZAR_BIN)/classification_summary.rb crossvalidation
crossvalidation: independent_variables
$(LAZAR_BIN)/classification_crossvalidation.rb .
-independent_variables: Mutagenicity-Salmonella_typhimurium.csv $(METADATA)
+# Model
+
+independent_variables: Mutagenicity-Salmonella_typhimurium.csv $(PARAMETERS)
$(LAZAR_BIN)/fingerprint_independent_variables.rb $<
+# Metadata
+
+metadata.json:
+ echo '{"species":"Salmonella typhimurium","endpoint":"Mutagenicity","source":"http://cheminformatics.org/datasets/bursi/cas_4337.zip, http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv, https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX data and dictionary.xls","qmrf":{"group":"QMRF 4.10. Mutagenicity","name":"OECD 471 Bacterial Reverse Mutation Test"}}' > $@
+
+# Model parameters
+
independent_variable_type:
echo "binary" > $@
@@ -18,3 +34,33 @@ dependent_variable_values:
similarity_thresholds:
echo -e "0.2\n0.5" > $@
+
+# Merge data
+
+Mutagenicity-Salmonella_typhimurium.csv: download/efsa.csv download/hansen.csv download/kazius.csv
+ $(LAZAR_BIN)/merge_csv.rb $^
+
+# Convert data
+
+download/efsa.csv: download/efsa.tsv
+ download/efsa_classification.rb $< > $@
+
+download/efsa.tsv: download/efsa.xls
+ xls2csv -s cp1252 -d utf-8 -x -c " " $< > $@
+
+download/hansen.csv: download/Mutagenicity_N6512.csv
+ download/hansen_convert.rb $< > $@
+
+download/kazius.csv: download/cas_4337.sdf
+ $(LAZAR_BIN)/sdf2csv.rb $< > $@
+
+# Download original data
+
+download/cas_4337.sdf:
+ cd download && wget http://cheminformatics.org/datasets/bursi/cas_4337.zip && unzip cas_4337.zip
+
+download/Mutagenicity_N6512.csv:
+ cd download && wget http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv
+
+download/efsa.xls:
+ wget "https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX data and dictionary.xls" -o $@