summaryrefslogtreecommitdiff
path: root/scripts/efsa2csv.rb
blob: 5251cae18e3a45279bd3ee456aafaecd0fcdad73 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/usr/bin/env ruby
require 'csv'
require_relative '../../lazar/lib/lazar.rb'

i = 0
db = {}
CSV.foreach(ARGV[0], :encoding => "UTF-8", :col_sep => "\t", :liberal_parsing => true) do |row|
  #STDERR.puts i if i%100 == 0
  if i > 0 and row[11] and !row[11].empty? and row[24].match(/Salmonella/i) and ( row[25].match("TA 98") or row[25].match("TA 100") ) and row[33]
    begin
      c = OpenTox::Compound.from_smiles(row[11].gsub('"','')).smiles
    rescue
      c = OpenTox::Compound.from_inchi(row[12]).smiles # some smiles (row[11]) contain non-parseable characters
    end
    db[c] ||= {}
    db[c][:id] ||= row[2]
    if row[33].match(/Positiv/i)
      db[c][:value] = 1 # at least one positive result in TA 98 or TA 100
    elsif row[33].match(/Negativ/i)
      db[c][:value] ||= 0
    end
  end
  i += 1
end

puts "ID,SMILES,Mutagenicity"
db.each do |s,v|
  puts [v[:id],s,v[:value]].join ","
end