summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordavor <vorgrimmlerdavid@gmx.de>2012-01-16 17:57:49 +0100
committerdavor <vorgrimmlerdavid@gmx.de>2012-01-16 17:57:49 +0100
commitab52d09a2a69218624dd8be8a54f6559fe7d933c (patch)
treea619b151fd38ac5bdafae04296fc689b5ec2041a
parenta7a96be15208b75d1b74497e8c1afbe0e8cced30 (diff)
Added feature ds for outl and improved inchi check
-rw-r--r--5x_cv/check_datasets_inchi_equality.rb98
-rw-r--r--5x_cv/datasets_nestle_outl.yaml67
2 files changed, 165 insertions, 0 deletions
diff --git a/5x_cv/check_datasets_inchi_equality.rb b/5x_cv/check_datasets_inchi_equality.rb
new file mode 100644
index 0000000..7d38e96
--- /dev/null
+++ b/5x_cv/check_datasets_inchi_equality.rb
@@ -0,0 +1,98 @@
+require 'rubygems'
+require 'opentox-ruby'
+require 'yaml'
+
+@subjectid = nil
+
+
+
+def check_ds(t_ds_uri, f_ds_uri)
+ puts t_ds_uri
+ puts f_ds_uri
+
+ regression_training_dataset = OpenTox::Dataset.find(t_ds_uri, @subjectid)#3963;1572;
+ regression_feature_dataset = OpenTox::Dataset.find(f_ds_uri, @subjectid)#3971;3946;
+
+ train_ds = regression_training_dataset.data_entries.keys
+ train_cmds = regression_training_dataset.compounds
+ feature_ds = regression_feature_dataset.data_entries.keys
+ feature_cmds = regression_feature_dataset.compounds
+
+ puts "----- Check activity inchi -----"
+ match=0
+ mismatch=0
+ train_ds.each { |i|
+ if feature_ds.include?(i)
+ match = match + 1
+ else
+ mismatch = mismatch + 1
+ end
+ }
+ if mismatch > 0
+ puts "NOT all training compounds represented in feature dataset!!!" unless mismatch > 0
+ puts "match: #{match}; and mismatch: !!!!!#{mismatch}!!!!!"
+ else
+ puts "All training compounds represented in feature dataset." unless mismatch > 0
+ puts "match: #{match}; mismatch: #{mismatch}"
+ puts "OK!!!"
+ end
+
+ train_ds.sort!
+ feature_ds.sort!
+
+ if train_ds == feature_ds
+ puts "train_ds == feature_ds"
+ else
+ a = train_ds - feature_ds
+ #puts "d: '#{a}'"
+ puts "train_ds: " + train_ds.size.to_s + "; feature_ds: "+ feature_ds.size.to_s
+ puts "train_ds =NOT feature_ds"
+ end
+
+
+
+ puts "----- Check compound inchi -----"
+ match=0
+ mismatch=0
+ train_cmds.each { |i|
+ if feature_cmds.include?(i)
+ match = match + 1
+ else
+ mismatch = mismatch + 1
+ end
+ }
+ if mismatch > 0
+ puts "NOT all training compounds represented in feature dataset!!!" unless mismatch > 0
+ puts "match: #{match}; and mismatch: !!!!!#{mismatch}!!!!!"
+ else
+ puts "All training compounds represented in feature dataset." unless mismatch > 0
+ puts "match: #{match}; mismatch: #{mismatch}"
+ puts "OK!!!"
+ end
+
+
+
+ feature_cmds.sort!
+ train_cmds.sort!
+
+ if train_cmds == feature_cmds
+ puts "train_cmds == feature_cmds"
+ else
+ b = train_cmds - feature_cmds
+ #puts "d: '#{b}'"
+ puts "train_cmds: " + train_cmds.size.to_s + "; feature_cmds: " + feature_cmds.size.to_s
+ puts "train_cmds =NOT feature_cmds"
+ end
+ puts
+end
+
+
+
+
+ds = YAML::load_file("datasets_nestle.yaml")
+ds.keys.each { |dataset|
+ ds[dataset].keys.each { |pc|
+ puts pc
+ check_ds(ds[dataset]["dataset"], ds[dataset][pc])
+ }
+}
diff --git a/5x_cv/datasets_nestle_outl.yaml b/5x_cv/datasets_nestle_outl.yaml
new file mode 100644
index 0000000..d6308f5
--- /dev/null
+++ b/5x_cv/datasets_nestle_outl.yaml
@@ -0,0 +1,67 @@
+{
+
+ "MDD": {
+ "dataset": "http://toxcreate3.in-silico.ch:8086/dataset/1571",
+ "electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/4007",
+ "constitutional": "http://toxcreate3.in-silico.ch:8086/dataset/4006",
+# "geometrical": "http://toxcreate3.in-silico.ch:8086/dataset/1905",
+# "topological": "http://toxcreate3.in-silico.ch:8086/dataset/1906",
+# "hybrid": "http://toxcreate3.in-silico.ch:8086/dataset/1907",
+ "constitutional,electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/5265"
+ },
+
+ "FHM": {
+ "dataset": "http://toxcreate3.in-silico.ch:8086/dataset/1572",
+ "electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/4019",
+ "constitutional": "http://toxcreate3.in-silico.ch:8086/dataset/4008" #,
+# "geometrical": "http://toxcreate3.in-silico.ch:8086/dataset/1908",
+# "topological": "http://toxcreate3.in-silico.ch:8086/dataset/1909",
+# "hybrid": "http://toxcreate3.in-silico.ch:8086/dataset/1910"
+ },
+
+ "RAT": {
+ "dataset": "http://toxcreate3.in-silico.ch:8086/dataset/1573",
+ "electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/4077",
+ "constitutional": "http://toxcreate3.in-silico.ch:8086/dataset/4020" #,
+# "geometrical": "http://toxcreate3.in-silico.ch:8086/dataset/1911",
+# "topological": "http://toxcreate3.in-silico.ch:8086/dataset/1912",
+# "hybrid": "http://toxcreate3.in-silico.ch:8086/dataset/1913"
+ },
+
+ "MOU": {
+ "dataset": "http://toxcreate3.in-silico.ch:8086/dataset/1574",
+ "electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/4115",
+ "constitutional": "http://toxcreate3.in-silico.ch:8086/dataset/4101" #,
+# "geometrical": "http://toxcreate3.in-silico.ch:8086/dataset/1914",
+# "topological": "http://toxcreate3.in-silico.ch:8086/dataset/1915",
+# "hybrid": "http://toxcreate3.in-silico.ch:8086/dataset/1916"
+ },
+
+# "MDD2": {
+# "dataset": "http://toxcreate3.in-silico.ch:8086/dataset/",
+# "electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/",
+# "constitutional": "http://toxcreate3.in-silico.ch:8086/dataset/",
+# "constitutional,electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/"
+# },
+#
+ "FHM2": {
+ "dataset": "http://toxcreate3.in-silico.ch:8086/dataset/4975",
+ "electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/5263",
+ "constitutional": "http://toxcreate3.in-silico.ch:8086/dataset/5262",
+ "constitutional,electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/5266"
+ },
+
+ "RAT2": {
+ "dataset": "http://toxcreate3.in-silico.ch:8086/dataset/4977",
+ "electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/5253",
+ "constitutional": "http://toxcreate3.in-silico.ch:8086/dataset/5246",
+ "constitutional,electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/5254"
+ },
+
+ "MOU2": {
+ "dataset": "http://toxcreate3.in-silico.ch:8086/dataset/4976",
+ "electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/4115",
+ "constitutional": "http://toxcreate3.in-silico.ch:8086/dataset/5264",
+ "constitutional,electronic,cpsa": "http://toxcreate3.in-silico.ch:8086/dataset/5198"
+ }
+}