diff options
Diffstat (limited to 'models/mutagenicity/download/efsa_classification.rb')
-rwxr-xr-x | models/mutagenicity/download/efsa_classification.rb | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/models/mutagenicity/download/efsa_classification.rb b/models/mutagenicity/download/efsa_classification.rb new file mode 100755 index 0000000..edd17e3 --- /dev/null +++ b/models/mutagenicity/download/efsa_classification.rb @@ -0,0 +1,28 @@ +#!/usr/bin/env ruby +require_relative "../../../lib/compound.rb" + +# convert EFSA data to mutagenicity classifications +i = 0 +db = {} +CSV.foreach(ARGV[0], :encoding => "UTF-8", :col_sep => "\t", :liberal_parsing => true) do |row| + if i > 0 and row[11] and !row[11].empty? and row[24].match(/Salmonella/i) and ( row[25].match("TA 98") or row[25].match("TA 100") ) and row[33] + begin + c = Compound.from_smiles(row[11].gsub('"','')).smiles + rescue + c = Compound.from_inchi(row[12]).smiles # some smiles (row[11]) contain non-parseable characters + end + db[c] ||= {} + db[c][:id] ||= row[2] + if row[33].match(/Positiv/i) + db[c][:value] = "mutagenic" # at least one positive result in TA 98 or TA 100 + elsif row[33].match(/Negativ/i) + db[c][:value] ||= "non-mutagenic" + end + end + i += 1 +end + +puts "ID,SMILES,Mutagenicity" +db.each do |s,v| + puts [v[:id],s,v[:value]].join "," +end |