summaryrefslogtreecommitdiff
path: root/5x_cv
diff options
context:
space:
mode:
Diffstat (limited to '5x_cv')
-rw-r--r--5x_cv/check_datasets_inchi_equality.rb98
-rw-r--r--5x_cv/get_csv_versions.rb3
-rw-r--r--5x_cv/get_csv_versions.sh20
3 files changed, 121 insertions, 0 deletions
diff --git a/5x_cv/check_datasets_inchi_equality.rb b/5x_cv/check_datasets_inchi_equality.rb
new file mode 100644
index 0000000..7d38e96
--- /dev/null
+++ b/5x_cv/check_datasets_inchi_equality.rb
@@ -0,0 +1,98 @@
+require 'rubygems'
+require 'opentox-ruby'
+require 'yaml'
+
+@subjectid = nil
+
+
+
+def check_ds(t_ds_uri, f_ds_uri)
+ puts t_ds_uri
+ puts f_ds_uri
+
+ regression_training_dataset = OpenTox::Dataset.find(t_ds_uri, @subjectid)#3963;1572;
+ regression_feature_dataset = OpenTox::Dataset.find(f_ds_uri, @subjectid)#3971;3946;
+
+ train_ds = regression_training_dataset.data_entries.keys
+ train_cmds = regression_training_dataset.compounds
+ feature_ds = regression_feature_dataset.data_entries.keys
+ feature_cmds = regression_feature_dataset.compounds
+
+ puts "----- Check activity inchi -----"
+ match=0
+ mismatch=0
+ train_ds.each { |i|
+ if feature_ds.include?(i)
+ match = match + 1
+ else
+ mismatch = mismatch + 1
+ end
+ }
+ if mismatch > 0
+ puts "NOT all training compounds represented in feature dataset!!!" unless mismatch > 0
+ puts "match: #{match}; and mismatch: !!!!!#{mismatch}!!!!!"
+ else
+ puts "All training compounds represented in feature dataset." unless mismatch > 0
+ puts "match: #{match}; mismatch: #{mismatch}"
+ puts "OK!!!"
+ end
+
+ train_ds.sort!
+ feature_ds.sort!
+
+ if train_ds == feature_ds
+ puts "train_ds == feature_ds"
+ else
+ a = train_ds - feature_ds
+ #puts "d: '#{a}'"
+ puts "train_ds: " + train_ds.size.to_s + "; feature_ds: "+ feature_ds.size.to_s
+ puts "train_ds =NOT feature_ds"
+ end
+
+
+
+ puts "----- Check compound inchi -----"
+ match=0
+ mismatch=0
+ train_cmds.each { |i|
+ if feature_cmds.include?(i)
+ match = match + 1
+ else
+ mismatch = mismatch + 1
+ end
+ }
+ if mismatch > 0
+ puts "NOT all training compounds represented in feature dataset!!!" unless mismatch > 0
+ puts "match: #{match}; and mismatch: !!!!!#{mismatch}!!!!!"
+ else
+ puts "All training compounds represented in feature dataset." unless mismatch > 0
+ puts "match: #{match}; mismatch: #{mismatch}"
+ puts "OK!!!"
+ end
+
+
+
+ feature_cmds.sort!
+ train_cmds.sort!
+
+ if train_cmds == feature_cmds
+ puts "train_cmds == feature_cmds"
+ else
+ b = train_cmds - feature_cmds
+ #puts "d: '#{b}'"
+ puts "train_cmds: " + train_cmds.size.to_s + "; feature_cmds: " + feature_cmds.size.to_s
+ puts "train_cmds =NOT feature_cmds"
+ end
+ puts
+end
+
+
+
+
+ds = YAML::load_file("datasets_nestle.yaml")
+ds.keys.each { |dataset|
+ ds[dataset].keys.each { |pc|
+ puts pc
+ check_ds(ds[dataset]["dataset"], ds[dataset][pc])
+ }
+}
diff --git a/5x_cv/get_csv_versions.rb b/5x_cv/get_csv_versions.rb
new file mode 100644
index 0000000..c09a46e
--- /dev/null
+++ b/5x_cv/get_csv_versions.rb
@@ -0,0 +1,3 @@
+require 'yaml'
+ds = YAML::load_file("datasets_nestle.yaml")
+ds.keys.each { |d| puts d ; ds[d].keys.each {|t| puts " #{t}"; cmd = " curl -H 'accept:text/csv' #{ds[d][t]} > csv_file; mv -v --backup=numbered csv_file #{d}_#{t.gsub(/,/, '_')}.csv" unless t=="dataset"; puts cmd } }
diff --git a/5x_cv/get_csv_versions.sh b/5x_cv/get_csv_versions.sh
new file mode 100644
index 0000000..f3a29eb
--- /dev/null
+++ b/5x_cv/get_csv_versions.sh
@@ -0,0 +1,20 @@
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/2902 > csv_file; mv -v --backup=numbered csv_file FHM_electronic_cpsa.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1908 > csv_file; mv -v --backup=numbered csv_file FHM_geometrical.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1909 > csv_file; mv -v --backup=numbered csv_file FHM_topological.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1910 > csv_file; mv -v --backup=numbered csv_file FHM_hybrid.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/2903 > csv_file; mv -v --backup=numbered csv_file FHM_constitutional.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/2900 > csv_file; mv -v --backup=numbered csv_file MDD_electronic_cpsa.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1905 > csv_file; mv -v --backup=numbered csv_file MDD_geometrical.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1906 > csv_file; mv -v --backup=numbered csv_file MDD_topological.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1907 > csv_file; mv -v --backup=numbered csv_file MDD_hybrid.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/2901 > csv_file; mv -v --backup=numbered csv_file MDD_constitutional.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/2904 > csv_file; mv -v --backup=numbered csv_file RAT_electronic_cpsa.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1911 > csv_file; mv -v --backup=numbered csv_file RAT_geometrical.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1912 > csv_file; mv -v --backup=numbered csv_file RAT_topological.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1913 > csv_file; mv -v --backup=numbered csv_file RAT_hybrid.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/2905 > csv_file; mv -v --backup=numbered csv_file RAT_constitutional.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/2906 > csv_file; mv -v --backup=numbered csv_file MOU_electronic_cpsa.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1914 > csv_file; mv -v --backup=numbered csv_file MOU_geometrical.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1915 > csv_file; mv -v --backup=numbered csv_file MOU_topological.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1916 > csv_file; mv -v --backup=numbered csv_file MOU_hybrid.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/2907 > csv_file; mv -v --backup=numbered csv_file MOU_constitutional.csv