From 36ea2f865014a761be9fe74719f4c88dbaffeb81 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 27 Oct 2014 15:48:01 +0100 Subject: inital commit more documentation will be added --- nch/03_validate_compounds.rb | 49 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100755 nch/03_validate_compounds.rb (limited to 'nch/03_validate_compounds.rb') diff --git a/nch/03_validate_compounds.rb b/nch/03_validate_compounds.rb new file mode 100755 index 0000000..1ddb719 --- /dev/null +++ b/nch/03_validate_compounds.rb @@ -0,0 +1,49 @@ +#!/usr/bin/env ruby + +require "csv" +require "uri" + +require "./config.rb" + +DATA.each do |d| + puts "\ndataset name #{d}" + + next if d=="LOAEL-mg" + + @inchis = [] + + all_compounds = (d=="LOAEL-mol" ? "endpoint" : "complete") + [all_compounds,"orig-features"].each do |mode| + + csv_string = CSV.generate({:force_quotes=>true}) do |csv| + first = true + CSV.foreach("data/02/#{d}_#{mode}.csv") do |row| + if (first) + first = false + else + inchi = URI.unescape(row[0]) + if mode==all_compounds + @inchis << inchi + else + raise "features-set inchi not found in complete-set #{inchi}" unless @inchis.first==inchi + @inchis.delete(inchi) + end + end + end + end + if mode==all_compounds + puts @inchis.size.to_s+" compounds in complete-set" + @inchis.uniq! + File.open("data/03/#{d}_uniq.csv","w").puts "\""+(["InChI"]+@inchis).join("\"\n\"")+"\"" + puts @inchis.size.to_s+" uniq compounds in complete-set (written to data/03/#{d}_uniq.csv)" + else + raise "complete-set inchis not found in features-set #{@inchis.inspect}" unless @inchis.size==0 + puts "inchis in features-set uniq and all included in complete-set" + end + + end + + puts "" + +end + -- cgit v1.2.3