diff options
author | mguetlein <martin.guetlein@gmail.com> | 2014-10-27 15:48:01 +0100 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2014-10-27 15:48:01 +0100 |
commit | 36ea2f865014a761be9fe74719f4c88dbaffeb81 (patch) | |
tree | baf3f9799cf0d3b3896699845bdd2548eadd27a5 /nch/03_validate_compounds.rb | |
parent | 9892566a309ac99c21adad9fee5a2e625d8d4042 (diff) |
inital commit more documentation will be added
Diffstat (limited to 'nch/03_validate_compounds.rb')
-rwxr-xr-x | nch/03_validate_compounds.rb | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/nch/03_validate_compounds.rb b/nch/03_validate_compounds.rb new file mode 100755 index 0000000..1ddb719 --- /dev/null +++ b/nch/03_validate_compounds.rb @@ -0,0 +1,49 @@ +#!/usr/bin/env ruby + +require "csv" +require "uri" + +require "./config.rb" + +DATA.each do |d| + puts "\ndataset name #{d}" + + next if d=="LOAEL-mg" + + @inchis = [] + + all_compounds = (d=="LOAEL-mol" ? "endpoint" : "complete") + [all_compounds,"orig-features"].each do |mode| + + csv_string = CSV.generate({:force_quotes=>true}) do |csv| + first = true + CSV.foreach("data/02/#{d}_#{mode}.csv") do |row| + if (first) + first = false + else + inchi = URI.unescape(row[0]) + if mode==all_compounds + @inchis << inchi + else + raise "features-set inchi not found in complete-set #{inchi}" unless @inchis.first==inchi + @inchis.delete(inchi) + end + end + end + end + if mode==all_compounds + puts @inchis.size.to_s+" compounds in complete-set" + @inchis.uniq! + File.open("data/03/#{d}_uniq.csv","w").puts "\""+(["InChI"]+@inchis).join("\"\n\"")+"\"" + puts @inchis.size.to_s+" uniq compounds in complete-set (written to data/03/#{d}_uniq.csv)" + else + raise "complete-set inchis not found in features-set #{@inchis.inspect}" unless @inchis.size==0 + puts "inchis in features-set uniq and all included in complete-set" + end + + end + + puts "" + +end + |