From 791398c12af4f8290095425dac87e3c852905ab6 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sat, 7 Sep 2019 18:20:10 +0200 Subject: obsolete data and java directories deleted --- models/mutagenicity/Makefile | 50 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) (limited to 'models/mutagenicity/Makefile') diff --git a/models/mutagenicity/Makefile b/models/mutagenicity/Makefile index 637004c..1e0c565 100644 --- a/models/mutagenicity/Makefile +++ b/models/mutagenicity/Makefile @@ -1,12 +1,28 @@ LAZAR_BIN = ../../bin -METADATA = independent_variable_type dependent_variable_type dependent_variable_values similarity_thresholds +PARAMETERS = independent_variable_type dependent_variable_type dependent_variable_values similarity_thresholds + +all: crossvalidation/summaries metadata.json + +# Crossvalidation + +crossvalidation/summaries: crossvalidation + $(LAZAR_BIN)/classification_summary.rb crossvalidation crossvalidation: independent_variables $(LAZAR_BIN)/classification_crossvalidation.rb . -independent_variables: Mutagenicity-Salmonella_typhimurium.csv $(METADATA) +# Model + +independent_variables: Mutagenicity-Salmonella_typhimurium.csv $(PARAMETERS) $(LAZAR_BIN)/fingerprint_independent_variables.rb $< +# Metadata + +metadata.json: + echo '{"species":"Salmonella typhimurium","endpoint":"Mutagenicity","source":"http://cheminformatics.org/datasets/bursi/cas_4337.zip, http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv, https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX data and dictionary.xls","qmrf":{"group":"QMRF 4.10. Mutagenicity","name":"OECD 471 Bacterial Reverse Mutation Test"}}' > $@ + +# Model parameters + independent_variable_type: echo "binary" > $@ @@ -18,3 +34,33 @@ dependent_variable_values: similarity_thresholds: echo -e "0.2\n0.5" > $@ + +# Merge data + +Mutagenicity-Salmonella_typhimurium.csv: download/efsa.csv download/hansen.csv download/kazius.csv + $(LAZAR_BIN)/merge_csv.rb $^ + +# Convert data + +download/efsa.csv: download/efsa.tsv + download/efsa_classification.rb $< > $@ + +download/efsa.tsv: download/efsa.xls + xls2csv -s cp1252 -d utf-8 -x -c " " $< > $@ + +download/hansen.csv: download/Mutagenicity_N6512.csv + download/hansen_convert.rb $< > $@ + +download/kazius.csv: download/cas_4337.sdf + $(LAZAR_BIN)/sdf2csv.rb $< > $@ + +# Download original data + +download/cas_4337.sdf: + cd download && wget http://cheminformatics.org/datasets/bursi/cas_4337.zip && unzip cas_4337.zip + +download/Mutagenicity_N6512.csv: + cd download && wget http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv + +download/efsa.xls: + wget "https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX data and dictionary.xls" -o $@ -- cgit v1.2.3