summaryrefslogtreecommitdiff
path: root/cv/data/utils
diff options
context:
space:
mode:
Diffstat (limited to 'cv/data/utils')
-rw-r--r--cv/data/utils/check_datasets_inchi_equality.rb98
-rw-r--r--cv/data/utils/create_selected_feature_ds.rb47
-rwxr-xr-xcv/data/utils/get_csv.sh20
-rw-r--r--cv/data/utils/get_csv_versions.rb3
4 files changed, 168 insertions, 0 deletions
diff --git a/cv/data/utils/check_datasets_inchi_equality.rb b/cv/data/utils/check_datasets_inchi_equality.rb
new file mode 100644
index 0000000..75d6558
--- /dev/null
+++ b/cv/data/utils/check_datasets_inchi_equality.rb
@@ -0,0 +1,98 @@
+require 'rubygems'
+require 'opentox-ruby'
+require 'yaml'
+
+@subjectid = nil
+
+
+
+def check_ds(t_ds_uri, f_ds_uri)
+ puts t_ds_uri
+ puts f_ds_uri
+
+ regression_training_dataset = OpenTox::Dataset.find(t_ds_uri, @subjectid)
+ regression_feature_dataset = OpenTox::Dataset.find(f_ds_uri, @subjectid)
+
+ train_ds = regression_training_dataset.data_entries.keys
+ train_cmds = regression_training_dataset.compounds
+ feature_ds = regression_feature_dataset.data_entries.keys
+ feature_cmds = regression_feature_dataset.compounds
+
+ puts "----- Check activity inchi -----"
+ match=0
+ mismatch=0
+ train_ds.each { |i|
+ if feature_ds.include?(i)
+ match = match + 1
+ else
+ mismatch = mismatch + 1
+ end
+ }
+ if mismatch > 0
+ puts "NOT all training compounds represented in feature dataset!!!" unless mismatch > 0
+ puts "match: #{match}; and mismatch: !!!!!#{mismatch}!!!!!"
+ else
+ puts "All training compounds represented in feature dataset." unless mismatch > 0
+ puts "match: #{match}; mismatch: #{mismatch}"
+ puts "OK!!!"
+ end
+
+ train_ds.sort!
+ feature_ds.sort!
+
+ if train_ds == feature_ds
+ puts "train_ds == feature_ds"
+ else
+ a = train_ds - feature_ds
+ #puts "d: '#{a}'"
+ puts "train_ds: " + train_ds.size.to_s + "; feature_ds: "+ feature_ds.size.to_s
+ puts "train_ds =NOT feature_ds"
+ end
+
+
+
+ puts "----- Check compound inchi -----"
+ match=0
+ mismatch=0
+ train_cmds.each { |i|
+ if feature_cmds.include?(i)
+ match = match + 1
+ else
+ mismatch = mismatch + 1
+ end
+ }
+ if mismatch > 0
+ puts "NOT all training compounds represented in feature dataset!!!" unless mismatch > 0
+ puts "match: #{match}; and mismatch: !!!!!#{mismatch}!!!!!"
+ else
+ puts "All training compounds represented in feature dataset." unless mismatch > 0
+ puts "match: #{match}; mismatch: #{mismatch}"
+ puts "OK!!!"
+ end
+
+
+
+ feature_cmds.sort!
+ train_cmds.sort!
+
+ if train_cmds == feature_cmds
+ puts "train_cmds == feature_cmds"
+ else
+ b = train_cmds - feature_cmds
+ #puts "d: '#{b}'"
+ puts "train_cmds: " + train_cmds.size.to_s + "; feature_cmds: " + feature_cmds.size.to_s
+ puts "train_cmds =NOT feature_cmds"
+ end
+ puts
+end
+
+
+
+
+ds = YAML::load_file("../datasets.yaml")
+ds.keys.each { |dataset|
+ ds[dataset].keys.each { |pc|
+ puts pc
+ check_ds(ds[dataset]["dataset"], ds[dataset][pc])
+ }
+}
diff --git a/cv/data/utils/create_selected_feature_ds.rb b/cv/data/utils/create_selected_feature_ds.rb
new file mode 100644
index 0000000..0e5f063
--- /dev/null
+++ b/cv/data/utils/create_selected_feature_ds.rb
@@ -0,0 +1,47 @@
+require 'rubygems'
+require 'opentox-ruby'
+require 'yaml'
+
+@subjectid = nil
+
+
+
+def create_f_ds(t_ds_uri, f_ds_uri, del)
+
+ regression_training_dataset = OpenTox::Dataset.find(t_ds_uri, @subjectid)
+ prediction_feature = regression_training_dataset.features.keys.first
+ regression_feature_dataset = OpenTox::Dataset.find(f_ds_uri, @subjectid)
+
+ params = {}
+ params[:dataset_uri] = regression_training_dataset.uri
+ params[:prediction_feature_uri] = prediction_feature
+ params[:feature_dataset_uri] = regression_feature_dataset.uri
+ params[:del_missing] = del
+ puts params.to_yaml
+ feature_selection_algo_uri = File.join(CONFIG[:services]["opentox-algorithm"],"feature_selection/rfe")
+ puts feature_selection_algo_uri
+
+ result = OpenTox::RestClientWrapper.post( feature_selection_algo_uri, params)
+ puts "--- Feature dataset is: ---"
+ puts result
+
+ puts
+end
+
+
+
+
+ds = YAML::load_file("../datasets.yaml")
+ds.keys.each { |dataset|
+ puts "----------------- next dataset -----------------"
+ ds[dataset].keys.each { |pc|
+ puts pc unless pc == "dataset"
+ [false, true].each { |del_missing|
+ begin
+ create_f_ds(ds[dataset]["dataset"], ds[dataset][pc], del_missing) unless pc == "dataset"
+ rescue
+ end
+ }
+ puts "-----------------" unless pc == "dataset"
+ }
+}
diff --git a/cv/data/utils/get_csv.sh b/cv/data/utils/get_csv.sh
new file mode 100755
index 0000000..0ca8129
--- /dev/null
+++ b/cv/data/utils/get_csv.sh
@@ -0,0 +1,20 @@
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5263 > csv_file; mv -v --backup=numbered csv_file FHM_electronic_cpsa.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5765 > csv_file; mv -v --backup=numbered csv_file FHM_topological.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5262 > csv_file; mv -v --backup=numbered csv_file FHM_constitutional.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5266 > csv_file; mv -v --backup=numbered csv_file FHM_constitutional_electronic_cpsa.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5478 > csv_file; mv -v --backup=numbered csv_file MDD_electronic_cpsa.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5766 > csv_file; mv -v --backup=numbered csv_file MDD_topological.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5475 > csv_file; mv -v --backup=numbered csv_file MDD_constitutional.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5479 > csv_file; mv -v --backup=numbered csv_file MDD_constitutional_electronic_cpsa.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/6230 > csv_file; mv -v --backup=numbered csv_file RAT_electronic_cpsa.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/6243 > csv_file; mv -v --backup=numbered csv_file RAT_topological.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5465 > csv_file; mv -v --backup=numbered csv_file RAT_training.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/6226 > csv_file; mv -v --backup=numbered csv_file RAT_constitutional.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5464 > csv_file; mv -v --backup=numbered csv_file RAT_constitutional_electronic_cpsa.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5466 > csv_file; mv -v --backup=numbered csv_file RAT_test.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/6231 > csv_file; mv -v --backup=numbered csv_file MOU_electronic_cpsa.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/6229 > csv_file; mv -v --backup=numbered csv_file MOU_topological.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5467 > csv_file; mv -v --backup=numbered csv_file MOU_training.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5459 > csv_file; mv -v --backup=numbered csv_file MOU_constitutional.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5461 > csv_file; mv -v --backup=numbered csv_file MOU_constitutional_electronic_cpsa.csv
+ curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5468 > csv_file; mv -v --backup=numbered csv_file MOU_test.csv
diff --git a/cv/data/utils/get_csv_versions.rb b/cv/data/utils/get_csv_versions.rb
new file mode 100644
index 0000000..4fa360f
--- /dev/null
+++ b/cv/data/utils/get_csv_versions.rb
@@ -0,0 +1,3 @@
+require 'yaml'
+ds = YAML::load_file("../datasets.yaml")
+ds.keys.each { |d| puts d ; ds[d].keys.each {|t| puts " #{t}"; cmd = " curl -H 'accept:text/csv' #{ds[d][t]} > csv_file; mv -v --backup=numbered csv_file #{d}_#{t.gsub(/,/, '_')}.csv" unless t=="dataset"; puts cmd } }