diff options
author | Andreas Maunz <andreas@maunz.de> | 2012-02-09 15:44:28 +0100 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2012-02-09 15:44:28 +0100 |
commit | 7a37114695d783a770cdca854df36cc4ee347758 (patch) | |
tree | 7c00197151b077646f9cb3c4e3406d0e6fd58781 /cv/data/utils/check_datasets_inchi_equality.rb | |
parent | 1a0957e830030b9c376eb298f9e8a837249d3296 (diff) | |
parent | 82f8512dc5e2de822433e2dcec3b579962e6ae2a (diff) |
Merge branch 'pc_new_1' into development
Conflicts:
algorithm.rb
dataset.rb
lazar.rb
Diffstat (limited to 'cv/data/utils/check_datasets_inchi_equality.rb')
-rw-r--r-- | cv/data/utils/check_datasets_inchi_equality.rb | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/cv/data/utils/check_datasets_inchi_equality.rb b/cv/data/utils/check_datasets_inchi_equality.rb new file mode 100644 index 0000000..75d6558 --- /dev/null +++ b/cv/data/utils/check_datasets_inchi_equality.rb @@ -0,0 +1,98 @@ +require 'rubygems' +require 'opentox-ruby' +require 'yaml' + +@subjectid = nil + + + +def check_ds(t_ds_uri, f_ds_uri) + puts t_ds_uri + puts f_ds_uri + + regression_training_dataset = OpenTox::Dataset.find(t_ds_uri, @subjectid) + regression_feature_dataset = OpenTox::Dataset.find(f_ds_uri, @subjectid) + + train_ds = regression_training_dataset.data_entries.keys + train_cmds = regression_training_dataset.compounds + feature_ds = regression_feature_dataset.data_entries.keys + feature_cmds = regression_feature_dataset.compounds + + puts "----- Check activity inchi -----" + match=0 + mismatch=0 + train_ds.each { |i| + if feature_ds.include?(i) + match = match + 1 + else + mismatch = mismatch + 1 + end + } + if mismatch > 0 + puts "NOT all training compounds represented in feature dataset!!!" unless mismatch > 0 + puts "match: #{match}; and mismatch: !!!!!#{mismatch}!!!!!" + else + puts "All training compounds represented in feature dataset." unless mismatch > 0 + puts "match: #{match}; mismatch: #{mismatch}" + puts "OK!!!" + end + + train_ds.sort! + feature_ds.sort! + + if train_ds == feature_ds + puts "train_ds == feature_ds" + else + a = train_ds - feature_ds + #puts "d: '#{a}'" + puts "train_ds: " + train_ds.size.to_s + "; feature_ds: "+ feature_ds.size.to_s + puts "train_ds =NOT feature_ds" + end + + + + puts "----- Check compound inchi -----" + match=0 + mismatch=0 + train_cmds.each { |i| + if feature_cmds.include?(i) + match = match + 1 + else + mismatch = mismatch + 1 + end + } + if mismatch > 0 + puts "NOT all training compounds represented in feature dataset!!!" unless mismatch > 0 + puts "match: #{match}; and mismatch: !!!!!#{mismatch}!!!!!" + else + puts "All training compounds represented in feature dataset." unless mismatch > 0 + puts "match: #{match}; mismatch: #{mismatch}" + puts "OK!!!" + end + + + + feature_cmds.sort! + train_cmds.sort! + + if train_cmds == feature_cmds + puts "train_cmds == feature_cmds" + else + b = train_cmds - feature_cmds + #puts "d: '#{b}'" + puts "train_cmds: " + train_cmds.size.to_s + "; feature_cmds: " + feature_cmds.size.to_s + puts "train_cmds =NOT feature_cmds" + end + puts +end + + + + +ds = YAML::load_file("../datasets.yaml") +ds.keys.each { |dataset| + ds[dataset].keys.each { |pc| + puts pc + check_ds(ds[dataset]["dataset"], ds[dataset][pc]) + } +} |