diff options
author | Andreas Maunz <andreas@maunz.de> | 2012-02-09 15:44:28 +0100 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2012-02-09 15:44:28 +0100 |
commit | 7a37114695d783a770cdca854df36cc4ee347758 (patch) | |
tree | 7c00197151b077646f9cb3c4e3406d0e6fd58781 /5x_cv | |
parent | 1a0957e830030b9c376eb298f9e8a837249d3296 (diff) | |
parent | 82f8512dc5e2de822433e2dcec3b579962e6ae2a (diff) |
Merge branch 'pc_new_1' into development
Conflicts:
algorithm.rb
dataset.rb
lazar.rb
Diffstat (limited to '5x_cv')
-rw-r--r-- | 5x_cv/check_datasets_inchi_equality.rb | 98 | ||||
-rw-r--r-- | 5x_cv/get_csv_versions.rb | 3 | ||||
-rw-r--r-- | 5x_cv/get_csv_versions.sh | 20 |
3 files changed, 121 insertions, 0 deletions
diff --git a/5x_cv/check_datasets_inchi_equality.rb b/5x_cv/check_datasets_inchi_equality.rb new file mode 100644 index 0000000..7d38e96 --- /dev/null +++ b/5x_cv/check_datasets_inchi_equality.rb @@ -0,0 +1,98 @@ +require 'rubygems' +require 'opentox-ruby' +require 'yaml' + +@subjectid = nil + + + +def check_ds(t_ds_uri, f_ds_uri) + puts t_ds_uri + puts f_ds_uri + + regression_training_dataset = OpenTox::Dataset.find(t_ds_uri, @subjectid)#3963;1572; + regression_feature_dataset = OpenTox::Dataset.find(f_ds_uri, @subjectid)#3971;3946; + + train_ds = regression_training_dataset.data_entries.keys + train_cmds = regression_training_dataset.compounds + feature_ds = regression_feature_dataset.data_entries.keys + feature_cmds = regression_feature_dataset.compounds + + puts "----- Check activity inchi -----" + match=0 + mismatch=0 + train_ds.each { |i| + if feature_ds.include?(i) + match = match + 1 + else + mismatch = mismatch + 1 + end + } + if mismatch > 0 + puts "NOT all training compounds represented in feature dataset!!!" unless mismatch > 0 + puts "match: #{match}; and mismatch: !!!!!#{mismatch}!!!!!" + else + puts "All training compounds represented in feature dataset." unless mismatch > 0 + puts "match: #{match}; mismatch: #{mismatch}" + puts "OK!!!" + end + + train_ds.sort! + feature_ds.sort! + + if train_ds == feature_ds + puts "train_ds == feature_ds" + else + a = train_ds - feature_ds + #puts "d: '#{a}'" + puts "train_ds: " + train_ds.size.to_s + "; feature_ds: "+ feature_ds.size.to_s + puts "train_ds =NOT feature_ds" + end + + + + puts "----- Check compound inchi -----" + match=0 + mismatch=0 + train_cmds.each { |i| + if feature_cmds.include?(i) + match = match + 1 + else + mismatch = mismatch + 1 + end + } + if mismatch > 0 + puts "NOT all training compounds represented in feature dataset!!!" unless mismatch > 0 + puts "match: #{match}; and mismatch: !!!!!#{mismatch}!!!!!" + else + puts "All training compounds represented in feature dataset." unless mismatch > 0 + puts "match: #{match}; mismatch: #{mismatch}" + puts "OK!!!" + end + + + + feature_cmds.sort! + train_cmds.sort! + + if train_cmds == feature_cmds + puts "train_cmds == feature_cmds" + else + b = train_cmds - feature_cmds + #puts "d: '#{b}'" + puts "train_cmds: " + train_cmds.size.to_s + "; feature_cmds: " + feature_cmds.size.to_s + puts "train_cmds =NOT feature_cmds" + end + puts +end + + + + +ds = YAML::load_file("datasets_nestle.yaml") +ds.keys.each { |dataset| + ds[dataset].keys.each { |pc| + puts pc + check_ds(ds[dataset]["dataset"], ds[dataset][pc]) + } +} diff --git a/5x_cv/get_csv_versions.rb b/5x_cv/get_csv_versions.rb new file mode 100644 index 0000000..c09a46e --- /dev/null +++ b/5x_cv/get_csv_versions.rb @@ -0,0 +1,3 @@ +require 'yaml' +ds = YAML::load_file("datasets_nestle.yaml") +ds.keys.each { |d| puts d ; ds[d].keys.each {|t| puts " #{t}"; cmd = " curl -H 'accept:text/csv' #{ds[d][t]} > csv_file; mv -v --backup=numbered csv_file #{d}_#{t.gsub(/,/, '_')}.csv" unless t=="dataset"; puts cmd } } diff --git a/5x_cv/get_csv_versions.sh b/5x_cv/get_csv_versions.sh new file mode 100644 index 0000000..f3a29eb --- /dev/null +++ b/5x_cv/get_csv_versions.sh @@ -0,0 +1,20 @@ + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/2902 > csv_file; mv -v --backup=numbered csv_file FHM_electronic_cpsa.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1908 > csv_file; mv -v --backup=numbered csv_file FHM_geometrical.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1909 > csv_file; mv -v --backup=numbered csv_file FHM_topological.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1910 > csv_file; mv -v --backup=numbered csv_file FHM_hybrid.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/2903 > csv_file; mv -v --backup=numbered csv_file FHM_constitutional.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/2900 > csv_file; mv -v --backup=numbered csv_file MDD_electronic_cpsa.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1905 > csv_file; mv -v --backup=numbered csv_file MDD_geometrical.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1906 > csv_file; mv -v --backup=numbered csv_file MDD_topological.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1907 > csv_file; mv -v --backup=numbered csv_file MDD_hybrid.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/2901 > csv_file; mv -v --backup=numbered csv_file MDD_constitutional.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/2904 > csv_file; mv -v --backup=numbered csv_file RAT_electronic_cpsa.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1911 > csv_file; mv -v --backup=numbered csv_file RAT_geometrical.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1912 > csv_file; mv -v --backup=numbered csv_file RAT_topological.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1913 > csv_file; mv -v --backup=numbered csv_file RAT_hybrid.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/2905 > csv_file; mv -v --backup=numbered csv_file RAT_constitutional.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/2906 > csv_file; mv -v --backup=numbered csv_file MOU_electronic_cpsa.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1914 > csv_file; mv -v --backup=numbered csv_file MOU_geometrical.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1915 > csv_file; mv -v --backup=numbered csv_file MOU_topological.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/1916 > csv_file; mv -v --backup=numbered csv_file MOU_hybrid.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/2907 > csv_file; mv -v --backup=numbered csv_file MOU_constitutional.csv |