blob: 48d4fb8d17007f6df803adfec7c18cb38e47f014 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
#!/usr/bin/env ruby
require 'csv'
require_relative '../../lazar/lib/lazar.rb'
i = 0
db = {}
CSV.foreach(ARGV[0], :encoding => "UTF-8", :col_sep => "\t", :liberal_parsing => true) do |row|
#STDERR.puts i if i%100 == 0
if i > 0 and row[11] and !row[11].empty? and row[24].match(/Salmonella/i) and ( row[25].match("TA 98") or row[25].match("TA 100") ) and row[33]
begin
c = OpenTox::Compound.from_smiles(row[11].gsub('"','')).smiles
rescue
c = OpenTox::Compound.from_inchi(row[12]).smiles # some smiles (row[11]) contain non-parseable characters
end
db[c] ||= {}
db[c][:id] ||= "efsa_#{row[2]}"
if row[33].match(/Positiv/i)
db[c][:value] = 1 # at least one positive result in TA 98 or TA 100
elsif row[33].match(/Negativ/i)
db[c][:value] ||= 0
end
end
i += 1
end
db.each do |s,v|
puts [v[:id],s,v[:value]].join ","
end
|