diff options
author | Christoph Helma <helma@in-silico.ch> | 2018-09-17 16:53:19 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2018-09-17 16:53:19 +0200 |
commit | b2f12e257037faa21c14a54eec0205c45c5686c6 (patch) | |
tree | 3e91207be85679f6e64969bbbdacd4d86e0d567e /Makefile | |
parent | 21c114dc55eef123a91d74fb81f36877fb66c44e (diff) |
efsa parsing fixed, contradictory results
Diffstat (limited to 'Makefile')
-rw-r--r-- | Makefile | 12 |
1 files changed, 4 insertions, 8 deletions
@@ -1,8 +1,4 @@ # TODO -# efsa smiles errors (encodings?) -# efsa script via csv -# remove headers from csv files -# add header at merged csv # remove \u003e/00 # remove log messages in result @@ -62,7 +58,7 @@ data/mutagenicity-merged.id: data/mutagenicity-merged.csv scripts/import-csv.rb $< > $@ data/mutagenicity-merged.csv: $(data) - echo "SMILES,Activity" > $@ && cat $(data) | sort -u | sed '/SMILES/d' >> $@ + scripts/merge-mutagenicity.rb $(data) > $@ data/carcinogenicity.csv: data/aid1205.csv scripts/carcinogenicity2csv.rb $< > $@ @@ -85,11 +81,11 @@ data/cas_4337.zip: # efsa -data/efsa.csv: data/GENOTOX_data_and_dictionary.csv +data/efsa.csv: data/GENOTOX_data_and_dictionary.tsv scripts/efsa2csv.rb $< > $@ -data/GENOTOX_data_and_dictionary.csv: data/GENOTOX_data_and_dictionary.xls - xls2csv $< > $@ +data/GENOTOX_data_and_dictionary.tsv: data/GENOTOX_data_and_dictionary.xls + xls2csv -s cp1252 -d utf-8 -x -c " " $< > $@ data/GENOTOX_data_and_dictionary.xls: cd data && wget "https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX%20data%20and%20dictionary.xls" -o $@ |