summaryrefslogtreecommitdiff
path: root/models/mutagenicity-mp2d/download/efsa_classification.rb
blob: 1e4dace8875f1b4385a247bb593a11aeb6f8d0f3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/usr/bin/env ruby
require 'csv'
require_relative "../../../lib/compound.rb"

# convert EFSA data to mutagenicity classifications
i = 0
db = {}
CSV.foreach(ARGV[0], :encoding => "UTF-8", :col_sep => "\t", :liberal_parsing => true) do |row|
  if i > 0 and row[11] and !row[11].empty? and row[24].match(/Salmonella/i) and ( row[25].match("TA 98") or row[25].match("TA 100") ) and row[33]
    begin
      c = Compound.from_smiles(row[11].gsub('"','')).smiles
    rescue
      c = Compound.from_inchi(row[12]).smiles # some smiles (row[11]) contain non-parseable characters
    end
    db[c] ||= {}
    db[c][:id] ||= row[2]
    if row[33].match(/Positiv/i)
      db[c][:value] = "mutagenic" # at least one positive result in TA 98 or TA 100
    elsif row[33].match(/Negativ/i)
      db[c][:value] ||= "non-mutagenic"
    end
  end
  i += 1
end

puts "ID,SMILES,Mutagenicity"
db.each do |s,v|
  puts [v[:id],s,v[:value]].join ","
end