summaryrefslogtreecommitdiff
path: root/Makefile
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2018-09-17 16:53:19 +0200
committerChristoph Helma <helma@in-silico.ch>2018-09-17 16:53:19 +0200
commitb2f12e257037faa21c14a54eec0205c45c5686c6 (patch)
tree3e91207be85679f6e64969bbbdacd4d86e0d567e /Makefile
parent21c114dc55eef123a91d74fb81f36877fb66c44e (diff)
efsa parsing fixed, contradictory results
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile12
1 files changed, 4 insertions, 8 deletions
diff --git a/Makefile b/Makefile
index a73e178..7f1ec93 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,4 @@
# TODO
-# efsa smiles errors (encodings?)
-# efsa script via csv
-# remove headers from csv files
-# add header at merged csv
# remove \u003e/00
# remove log messages in result
@@ -62,7 +58,7 @@ data/mutagenicity-merged.id: data/mutagenicity-merged.csv
scripts/import-csv.rb $< > $@
data/mutagenicity-merged.csv: $(data)
- echo "SMILES,Activity" > $@ && cat $(data) | sort -u | sed '/SMILES/d' >> $@
+ scripts/merge-mutagenicity.rb $(data) > $@
data/carcinogenicity.csv: data/aid1205.csv
scripts/carcinogenicity2csv.rb $< > $@
@@ -85,11 +81,11 @@ data/cas_4337.zip:
# efsa
-data/efsa.csv: data/GENOTOX_data_and_dictionary.csv
+data/efsa.csv: data/GENOTOX_data_and_dictionary.tsv
scripts/efsa2csv.rb $< > $@
-data/GENOTOX_data_and_dictionary.csv: data/GENOTOX_data_and_dictionary.xls
- xls2csv $< > $@
+data/GENOTOX_data_and_dictionary.tsv: data/GENOTOX_data_and_dictionary.xls
+ xls2csv -s cp1252 -d utf-8 -x -c " " $< > $@
data/GENOTOX_data_and_dictionary.xls:
cd data && wget "https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX%20data%20and%20dictionary.xls" -o $@