From a29eb3e38414cd252850c9c4fb356f8b2bef6fb4 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 12 Feb 2021 19:54:07 +0100 Subject: model.rb refactored, mp2d models updated --- models/mutagenicity-mp2d/Makefile | 75 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 models/mutagenicity-mp2d/Makefile (limited to 'models/mutagenicity-mp2d/Makefile') diff --git a/models/mutagenicity-mp2d/Makefile b/models/mutagenicity-mp2d/Makefile new file mode 100644 index 0000000..b80be6d --- /dev/null +++ b/models/mutagenicity-mp2d/Makefile @@ -0,0 +1,75 @@ +SRC = Mutagenicity-Salmonella_typhimurium.csv +LAZAR_BIN = ../../bin +MODEL = independent-variable-type dependent-variable-type dependent-variable-values similarity-thresholds independent-variables dependent-variables smiles metadata.json + +all: crossvalidation/summaries + +model: $(MODEL) + +# Crossvalidation + +crossvalidation/summaries: crossvalidation + $(LAZAR_BIN)/classification-summary.rb crossvalidation + +crossvalidation: $(MODEL) + $(LAZAR_BIN)/crossvalidation.rb . + +# Model + +independent-variables: smiles + $(LAZAR_BIN)/fingerprints.rb smiles > $@ + +dependent-variables: $(SRC) + cut -f2 -d ',' $< | sed '1d' > $@ + +smiles: $(SRC) + cut -f1 -d ',' $< | sed '1d' > $@ + +# Metadata + +metadata.json: + echo '{"species":"Salmonella typhimurium","endpoint":"Mutagenicity","source":"http://cheminformatics.org/datasets/bursi/cas_4337.zip, http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv, https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX data and dictionary.xls","qmrf":{"group":"QMRF 4.10. Mutagenicity","name":"OECD 471 Bacterial Reverse Mutation Test"}}' > $@ + +# Model parameters + +independent-variable-type: + echo "binary" > $@ + +dependent-variable-type: + echo "binary" > $@ + +dependent-variable-values: + echo -e "\"0\"\n\"1\"" > $@ + +similarity-thresholds: + echo -e "0.2\n0.5" > $@ + +# Merge data + +Mutagenicity-Salmonella_typhimurium.csv: download/efsa.csv download/hansen.csv download/kazius.csv + $(LAZAR_BIN)/merge_csv.rb $^ + +# Convert data + +download/efsa.csv: download/efsa.tsv + download/efsa_classification.rb $< > $@ + +download/efsa.tsv: download/efsa.xls + xls2csv -s cp1252 -d utf-8 -x -c " " $< > $@ + +download/hansen.csv: download/Mutagenicity_N6512.csv + download/hansen_convert.rb $< > $@ + +download/kazius.csv: download/cas_4337.sdf + $(LAZAR_BIN)/sdf2csv.rb $< > $@ + +# Download original data + +download/cas_4337.sdf: + cd download && wget http://cheminformatics.org/datasets/bursi/cas_4337.zip && unzip cas_4337.zip + +download/Mutagenicity_N6512.csv: + cd download && wget http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv + +download/efsa.xls: + wget "https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX data and dictionary.xls" -o $@ -- cgit v1.2.3