summaryrefslogtreecommitdiff
path: root/models/mutagenicity-mp2d/download/efsa_classification.rb
diff options
context:
space:
mode:
Diffstat (limited to 'models/mutagenicity-mp2d/download/efsa_classification.rb')
-rwxr-xr-xmodels/mutagenicity-mp2d/download/efsa_classification.rb29
1 files changed, 29 insertions, 0 deletions
diff --git a/models/mutagenicity-mp2d/download/efsa_classification.rb b/models/mutagenicity-mp2d/download/efsa_classification.rb
new file mode 100755
index 0000000..1e4dace
--- /dev/null
+++ b/models/mutagenicity-mp2d/download/efsa_classification.rb
@@ -0,0 +1,29 @@
+#!/usr/bin/env ruby
+require 'csv'
+require_relative "../../../lib/compound.rb"
+
+# convert EFSA data to mutagenicity classifications
+i = 0
+db = {}
+CSV.foreach(ARGV[0], :encoding => "UTF-8", :col_sep => "\t", :liberal_parsing => true) do |row|
+ if i > 0 and row[11] and !row[11].empty? and row[24].match(/Salmonella/i) and ( row[25].match("TA 98") or row[25].match("TA 100") ) and row[33]
+ begin
+ c = Compound.from_smiles(row[11].gsub('"','')).smiles
+ rescue
+ c = Compound.from_inchi(row[12]).smiles # some smiles (row[11]) contain non-parseable characters
+ end
+ db[c] ||= {}
+ db[c][:id] ||= row[2]
+ if row[33].match(/Positiv/i)
+ db[c][:value] = "mutagenic" # at least one positive result in TA 98 or TA 100
+ elsif row[33].match(/Negativ/i)
+ db[c][:value] ||= "non-mutagenic"
+ end
+ end
+ i += 1
+end
+
+puts "ID,SMILES,Mutagenicity"
+db.each do |s,v|
+ puts [v[:id],s,v[:value]].join ","
+end