diff options
Diffstat (limited to 'Makefile')
-rw-r--r-- | Makefile | 68 |
1 files changed, 42 insertions, 26 deletions
@@ -4,6 +4,17 @@ summaries = summaries/weighted_majority_merged_sim0.1.json summaries/weighted_ma all: $(summaries) +# exports + +export/mutagenicity-merged.sdf: data/mutagenicity-merged.id + scripts/export.rb $< sdf > $@ + +export/mutagenicity-merged.csv: data/mutagenicity-merged.id + scripts/export.rb $< > $@ + +export/carcinogenicity.csv: data/carcinogenicity.id + scripts/export.rb $< > $@ + summaries/pa_carcinogenicity_neighbor_sets.json: experiments/pa_carcinogenicity_prediction.json scripts/pa_neighbor_sets.rb $< > $@ @@ -34,43 +45,37 @@ summaries/weighted_majority_merged_sim0.1.json: experiments/weighted_majority_me experiments/weighted_majority_merged_sim0.1.id: data/mutagenicity-merged.csv scripts/repeated_crossvalidation.rb $< > $@ -experiments/pa_carcinogenicity_prediction.json: data/carcinogenicity.csv data/PA_complete_SMILES_fixed.csv +experiments/pa_carcinogenicity.id: data/carcinogenicity.id data/PA.id scripts/predict.rb $^ > $@ -experiments/pa_mutagenicity_prediction.json: data/mutagenicity-merged.csv data/PA_complete_SMILES_fixed.csv +experiments/pa_mutagenicity.id: data/mutagenicity-merged.id data/PA.id scripts/predict.rb $^ > $@ -# test data - -data/PA_complete_SMILES_fixed.csv: data/PA.sdf - cat $< | scripts/convert_pa.rb > $@ - -data/PA_complete_SMILES_fixed.csv: data/PA_complete_SMILES.csv - cat $< | scripts/convert_pa.rb > $@ - -# training data +# models -export/mutagenicity-merged.sdf: data/mutagenicity-merged.id - scripts/export-sdf.rb $< > $@ +models/mutagenicity-merged.id: data/mutagenicity-merged.id + scripts/merge.rb $^ > $@ -data/mutagenicity-merged.id: data/mutagenicity-merged.csv - scripts/import-csv.rb $< > $@ +models/carcinogenicity.id: data/carcinogenicity.id + scripts/import-pubchem.rb 1205 -data/mutagenicity-merged.csv: $(data) - scripts/merge-mutagenicity.rb $(data) > $@ +# test data -data/carcinogenicity.csv: data/aid1205.csv - scripts/carcinogenicity2csv.rb $< > $@ +data/PA.id: data/PA.sdf + scripts/import.rb $< sdf > $@ + +# training data -# carcinogenicity +data/mutagenicity-merged.id: data/hansen.id data/kazius.id data/efsa.id + scripts/merge.rb $^ > $@ -data/aid1205.csv: - cd data && curl https://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/1205/CSV -o $@ +data/carcinogenicity.id: + scripts/import-pubchem.rb 1205 # kazius -data/kazius.csv: data/cas_4337.sdf - scripts/kazius2csv.rb $< > $@ +data/kazius.id: data/cas_4337.sdf + scripts/import.rb $< sdf > $@ data/cas_4337.sdf: data/cas_4337.zip cd data && unzip cas_4337.zip @@ -80,8 +85,11 @@ data/cas_4337.zip: # efsa +data/efsa.id: data/efsa.csv + scripts/import.rb $< > $@ + data/efsa.csv: data/GENOTOX_data_and_dictionary.tsv - scripts/efsa2csv.rb $< > $@ + scripts/efsa2csv.rb $< > $@ data/GENOTOX_data_and_dictionary.tsv: data/GENOTOX_data_and_dictionary.xls xls2csv -s cp1252 -d utf-8 -x -c " " $< > $@ @@ -91,11 +99,19 @@ data/GENOTOX_data_and_dictionary.xls: # hansen +data/hansen.id: data/hansen.csv + scripts/import.rb $< > $@ + data/hansen.csv: data/Mutagenicity_N6512.csv scripts/hansen2csv.rb $< > $@ data/Mutagenicity_N6512.csv: cd data && wget "http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv" +# cleanup + clean: - rm data/hansen.csv + rm data/*.id + +cleanall: + rm data/* |