From ab52d09a2a69218624dd8be8a54f6559fe7d933c Mon Sep 17 00:00:00 2001 From: davor Date: Mon, 16 Jan 2012 17:57:49 +0100 Subject: Added feature ds for outl and improved inchi check --- 5x_cv/check_datasets_inchi_equality.rb | 98 ++++++++++++++++++++++++++++++++++ 5x_cv/datasets_nestle_outl.yaml | 67 +++++++++++++++++++++++ 2 files changed, 165 insertions(+) create mode 100644 5x_cv/check_datasets_inchi_equality.rb create mode 100644 5x_cv/datasets_nestle_outl.yaml diff --git a/5x_cv/check_datasets_inchi_equality.rb b/5x_cv/check_datasets_inchi_equality.rb new file mode 100644 index 0000000..7d38e96 --- /dev/null +++ b/5x_cv/check_datasets_inchi_equality.rb @@ -0,0 +1,98 @@ +require 'rubygems' +require 'opentox-ruby' +require 'yaml' + +@subjectid = nil + + + +def check_ds(t_ds_uri, f_ds_uri) + puts t_ds_uri + puts f_ds_uri + + regression_training_dataset = OpenTox::Dataset.find(t_ds_uri, @subjectid)#3963;1572; + regression_feature_dataset = OpenTox::Dataset.find(f_ds_uri, @subjectid)#3971;3946; + + train_ds = regression_training_dataset.data_entries.keys + train_cmds = regression_training_dataset.compounds + feature_ds = regression_feature_dataset.data_entries.keys + feature_cmds = regression_feature_dataset.compounds + + puts "----- Check activity inchi -----" + match=0 + mismatch=0 + train_ds.each { |i| + if feature_ds.include?(i) + match = match + 1 + else + mismatch = mismatch + 1 + end + } + if mismatch > 0 + puts "NOT all training compounds represented in feature dataset!!!" unless mismatch > 0 + puts "match: #{match}; and mismatch: !!!!!#{mismatch}!!!!!" + else + puts "All training compounds represented in feature dataset." unless mismatch > 0 + puts "match: #{match}; mismatch: #{mismatch}" + puts "OK!!!" + end + + train_ds.sort! + feature_ds.sort! + + if train_ds == feature_ds + puts "train_ds == feature_ds" + else + a = train_ds - feature_ds + #puts "d: '#{a}'" + puts "train_ds: " + train_ds.size.to_s + "; feature_ds: "+ feature_ds.size.to_s + puts "train_ds =NOT feature_ds" + end + + + + puts "----- Check compound inchi -----" + match=0 + mismatch=0 + train_cmds.each { |i| + if feature_cmds.include?(i) + match = match + 1 + else + mismatch = mismatch + 1 + end + } + if mismatch > 0 + puts "NOT all training compounds represented in feature dataset!!!" unless mismatch > 0 + puts "match: #{match}; and mismatch: !!!!!#{mismatch}!!!!!" + else + puts "All training compounds represented in feature dataset." unless mismatch > 0 + puts "match: #{match}; mismatch: #{mismatch}" + puts "OK!!!" + end + + + + feature_cmds.sort! + train_cmds.sort! + + if train_cmds == feature_cmds + puts "train_cmds == feature_cmds" + else + b = train_cmds - feature_cmds + #puts "d: '#{b}'" + puts "train_cmds: " + train_cmds.size.to_s + "; feature_cmds: " + feature_cmds.size.to_s + puts "train_cmds =NOT feature_cmds" + end + puts +end + + + + +ds = YAML::load_file("datasets_nestle.yaml") +ds.keys.each { |dataset| + ds[dataset].keys.each { |pc| + puts pc + check_ds(ds[dataset]["dataset"], ds[dataset][pc]) + } +} diff --git a/5x_cv/datasets_nestle_outl.yaml b/5x_cv/datasets_nestle_outl.yaml new file mode 100644 index 0000000..d6308f5 --- /dev/null +++ b/5x_cv/datasets_nestle_outl.yaml @@ -0,0 +1,67 @@ +{ + + "MDD": { + "dataset": "http://toxcreate3.in-silico.ch:8086/dataset/1571", + "electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/4007", + "constitutional": "http://toxcreate3.in-silico.ch:8086/dataset/4006", +# "geometrical": "http://toxcreate3.in-silico.ch:8086/dataset/1905", +# "topological": "http://toxcreate3.in-silico.ch:8086/dataset/1906", +# "hybrid": "http://toxcreate3.in-silico.ch:8086/dataset/1907", + "constitutional,electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/5265" + }, + + "FHM": { + "dataset": "http://toxcreate3.in-silico.ch:8086/dataset/1572", + "electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/4019", + "constitutional": "http://toxcreate3.in-silico.ch:8086/dataset/4008" #, +# "geometrical": "http://toxcreate3.in-silico.ch:8086/dataset/1908", +# "topological": "http://toxcreate3.in-silico.ch:8086/dataset/1909", +# "hybrid": "http://toxcreate3.in-silico.ch:8086/dataset/1910" + }, + + "RAT": { + "dataset": "http://toxcreate3.in-silico.ch:8086/dataset/1573", + "electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/4077", + "constitutional": "http://toxcreate3.in-silico.ch:8086/dataset/4020" #, +# "geometrical": "http://toxcreate3.in-silico.ch:8086/dataset/1911", +# "topological": "http://toxcreate3.in-silico.ch:8086/dataset/1912", +# "hybrid": "http://toxcreate3.in-silico.ch:8086/dataset/1913" + }, + + "MOU": { + "dataset": "http://toxcreate3.in-silico.ch:8086/dataset/1574", + "electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/4115", + "constitutional": "http://toxcreate3.in-silico.ch:8086/dataset/4101" #, +# "geometrical": "http://toxcreate3.in-silico.ch:8086/dataset/1914", +# "topological": "http://toxcreate3.in-silico.ch:8086/dataset/1915", +# "hybrid": "http://toxcreate3.in-silico.ch:8086/dataset/1916" + }, + +# "MDD2": { +# "dataset": "http://toxcreate3.in-silico.ch:8086/dataset/", +# "electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/", +# "constitutional": "http://toxcreate3.in-silico.ch:8086/dataset/", +# "constitutional,electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/" +# }, +# + "FHM2": { + "dataset": "http://toxcreate3.in-silico.ch:8086/dataset/4975", + "electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/5263", + "constitutional": "http://toxcreate3.in-silico.ch:8086/dataset/5262", + "constitutional,electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/5266" + }, + + "RAT2": { + "dataset": "http://toxcreate3.in-silico.ch:8086/dataset/4977", + "electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/5253", + "constitutional": "http://toxcreate3.in-silico.ch:8086/dataset/5246", + "constitutional,electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/5254" + }, + + "MOU2": { + "dataset": "http://toxcreate3.in-silico.ch:8086/dataset/4976", + "electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/4115", + "constitutional": "http://toxcreate3.in-silico.ch:8086/dataset/5264", + "constitutional,electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/5198" + } +} -- cgit v1.2.3