From 03ee143bbcdc6f97533ae9f4d3b47711fdae625b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 14 Feb 2017 12:16:29 +0100 Subject: rf models, similarity 0.1 --- Makefile | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index 85b8518..574325e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ # Variables -datasets = data/median-correlation.csv data/test_log10.csv data/training_log10.csv data/mazzatorta_log10.csv data/swiss_log10.csv data/test.json data/training.json data/mazzatorta.json data/swiss.json +datasets = data/median-correlation.csv data/test_log10.csv data/training_log10.csv data/mazzatorta_log10.csv data/swiss_log10.csv crossvalidations = data/training_log10-cv-0.csv data/training_log10-cv-1.csv data/training_log10-cv-2.csv validations = data/training-test-predictions.csv $(crossvalidations) data/misclassifications.csv figures = figures/functional-groups.pdf figures/test-prediction.pdf figures/test-correlation.pdf figures/crossvalidation.pdf figures/dataset-variability.pdf @@ -21,7 +21,7 @@ loael.docx: loael.md figures/functional-groups.pdf: data/functional-groups-reduced4R.csv scripts/functional-groups.R -figures/dataset-variability.pdf: data/mazzatorta.csv data/swiss.csv +figures/dataset-variability.pdf: data/mazzatorta_log10.csv data/swiss_log10.csv scripts/dataset-variability.R figures/crossvalidation.pdf: $(crossvalidations) @@ -39,10 +39,10 @@ data/misclassifications.csv: data/training-test-predictions.csv scripts/misclassifications.rb data/training-test-predictions.csv: data/training-test-predictions.id - scripts/test-validation-results.rb > data/training-test-predictions.csv + scripts/test-validation-results.rb data/training-test-predictions.id: data/test_log10.csv data/training_log10.csv - scripts/test-validation.rb > data/training-test-predictions.id + scripts/test-validation.rb data/training_log10-cv-0.csv: data/training_log10.csv scripts/crossvalidation.rb training_log10.csv 0 @@ -60,36 +60,39 @@ data/functional-groups-reduced4R.csv: data/functional-groups-reduced.csv # Medians for dataset correlation data/median-correlation.csv: data/mazzatorta_log10.csv data/swiss_log10.csv - scripts/create-median-correlation.rb > data/median-correlation.csv + scripts/create-median-correlation.rb # Test set data/test_log10.csv: data/mazzatorta_log10.csv data/swiss_log10.csv - scripts/create-test.rb > data/test_log10.csv + scripts/create-test.rb -data/test.json: data/mazzatorta.json - cp data/mazzatorta.json data/test.json +#data/test.json: data/mazzatorta.json + #cp data/mazzatorta.json data/test.json # Combined training set data/training_log10.csv: data/mazzatorta_log10.csv data/swiss_log10.csv - scripts/create-training.rb > data/training_log10.csv + scripts/create-training.rb -data/training.json: data/mazzatorta.json - cp data/mazzatorta.json data/training.json +#data/training.json: data/mazzatorta.json + #cp data/mazzatorta.json data/training.json # -log10 transformations data/mazzatorta_log10.csv: data/mazzatorta.csv ../lazar/scripts/mmol2-log10.rb data/mazzatorta.csv + sed -i 's/-log10(LOAEL)/LOAEL/' data/mazzatorta_log10.csv # R cannot parse -log10(LOAEL) header data/swiss_log10.csv: data/swiss.csv ../lazar/scripts/mmol2-log10.rb data/swiss.csv + sed -i 's/-log10(LOAEL)/LOAEL/' data/swiss_log10.csv # R cannot parse -log10(LOAEL) header # Datasets with unique smiles data/mazzatorta.csv: data/LOAEL_mg_corrected_smiles_mmol.csv - scripts/mazzatorta-unique-smiles.rb data/LOAEL_mg_corrected_smiles_mmol.csv > data/mazzatorta.csv + scripts/mazzatorta-unique-smiles.rb data/LOAEL_mg_corrected_smiles_mmol.csv data/swiss.csv: data/NOAEL-LOAEL_SMILES_rat_chron.csv - scripts/noael_loael2mmol.rb data/NOAEL-LOAEL_SMILES_rat_chron.csv > data/swiss.csv + scripts/noael_loael2mmol.rb data/NOAEL-LOAEL_SMILES_rat_chron.csv clean: rm figures/*pdf cd data && rm `ls -I "*LOAEL*" -I "*functional*" -I "*SMARTS*"` + mongo development --eval "db.dropDatabase()" -- cgit v1.2.3