diff options
Diffstat (limited to 'scripts/efsa2csv.rb')
-rwxr-xr-x | scripts/efsa2csv.rb | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/scripts/efsa2csv.rb b/scripts/efsa2csv.rb new file mode 100755 index 0000000..5251cae --- /dev/null +++ b/scripts/efsa2csv.rb @@ -0,0 +1,29 @@ +#!/usr/bin/env ruby +require 'csv' +require_relative '../../lazar/lib/lazar.rb' + +i = 0 +db = {} +CSV.foreach(ARGV[0], :encoding => "UTF-8", :col_sep => "\t", :liberal_parsing => true) do |row| + #STDERR.puts i if i%100 == 0 + if i > 0 and row[11] and !row[11].empty? and row[24].match(/Salmonella/i) and ( row[25].match("TA 98") or row[25].match("TA 100") ) and row[33] + begin + c = OpenTox::Compound.from_smiles(row[11].gsub('"','')).smiles + rescue + c = OpenTox::Compound.from_inchi(row[12]).smiles # some smiles (row[11]) contain non-parseable characters + end + db[c] ||= {} + db[c][:id] ||= row[2] + if row[33].match(/Positiv/i) + db[c][:value] = 1 # at least one positive result in TA 98 or TA 100 + elsif row[33].match(/Negativ/i) + db[c][:value] ||= 0 + end + end + i += 1 +end + +puts "ID,SMILES,Mutagenicity" +db.each do |s,v| + puts [v[:id],s,v[:value]].join "," +end |