summaryrefslogtreecommitdiff
path: root/Makefile
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2018-08-23 17:08:34 +0200
committerChristoph Helma <helma@in-silico.ch>2018-08-23 17:08:34 +0200
commit3146140b2530bc89d13c494f2e4317b952fc31cc (patch)
tree10fd82e10e23c69ec7c36e8bc88345a9b4c3941d /Makefile
parentc94ac24d68c137e93d11f0a7d7621ab0b2e808d7 (diff)
unique neighbor sets
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile52
1 files changed, 46 insertions, 6 deletions
diff --git a/Makefile b/Makefile
index be4fa18..b554210 100644
--- a/Makefile
+++ b/Makefile
@@ -3,29 +3,69 @@
# efsa script via csv
# remove headers from csv files
# add header at merged csv
+# remove \u003e/00
+# remove log messages in result
data = data/kazius.csv data/efsa.csv data/hansen.csv
-#all: $(data)
-# experiments
+summaries = summaries/weighted_majority_merged_sim0.1.json summaries/weighted_majority_kazius_sim0.1.json summaries/weighted_majority_merged_sim0.5.json summaries/weighted_majority_kazius_sim0.5.json summaries/pa_mutagenicity_neighbor_sets.json summaries/pa_carcinogenicity_neighbor_sets.json
-summaries/weighted_majority_kazius.json: experiments/weighted_majority_kazius.id
+all: $(summaries)
+
+summaries/pa_carcinogenicity_neighbor_sets.json: experiments/pa_carcinogenicity_prediction.json
+ scripts/pa_neighbor_sets.rb $< > $@
+
+summaries/pa_mutagenicity_neighbor_sets.json: experiments/pa_mutagenicity_prediction.json
+ scripts/pa_neighbor_sets.rb $< > $@
+
+summaries/weighted_majority_kazius_sim0.5.json: experiments/weighted_majority_kazius_sim0.5.id
+ scripts/repeated_crossvalidation_summary.rb $< > $@
+
+experiments/weighted_majority_kazius_sim0.5.id: data/kazius.csv
+ scripts/repeated_crossvalidation.rb $< > $@
+
+summaries/weighted_majority_merged_sim0.5.json: experiments/weighted_majority_merged_sim0.5.id
+ scripts/repeated_crossvalidation_summary.rb $< > $@
+
+experiments/weighted_majority_merged_sim0.5.id: data/merged.csv
+ scripts/repeated_crossvalidation.rb $< > $@
+
+summaries/weighted_majority_kazius_sim0.1.json: experiments/weighted_majority_kazius_sim0.1.id
scripts/repeated_crossvalidation_summary.rb $< > $@
-experiments/weighted_majority_kazius.id: data/kazius.csv
+experiments/weighted_majority_kazius_sim0.1.id: data/kazius.csv
scripts/repeated_crossvalidation.rb $< > $@
-summaries/weighted_majority_merged.json: experiments/weighted_majority_merged.id
+summaries/weighted_majority_merged_sim0.1.json: experiments/weighted_majority_merged_sim0.1.id
scripts/repeated_crossvalidation_summary.rb $< > $@
-experiments/weighted_majority_merged.id: data/merged.csv
+experiments/weighted_majority_merged_sim0.1.id: data/merged.csv
scripts/repeated_crossvalidation.rb $< > $@
+
+experiments/pa_carcinogenicity_prediction.json: data/carcinogenicity.csv data/PA_complete_SMILES_fixed.csv
+ scripts/predict.rb $^ > $@
+
+experiments/pa_mutagenicity_prediction.json: data/merged.csv data/PA_complete_SMILES_fixed.csv
+ scripts/predict.rb $^ > $@
+
+# test data
+
+data/PA_complete_SMILES_fixed.csv: data/PA_complete_SMILES.csv
+ cat $< | scripts/convert_pa.rb > $@
# training data
data/merged.csv: $(data)
echo "SMILES,Activity" > $@ && cat $(data) | sort -u | sed '/SMILES/d' >> $@
+data/carcinogenicity.csv: data/aid1205.csv
+ scripts/carcinogenicity2csv.rb $< > $@
+
+# carcinogenicity
+
+data/aid1205.csv:
+ cd data && curl https://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/1205/CSV -o $@
+
# kazius
data/kazius.csv: data/cas_4337.sdf