diff options
Diffstat (limited to 'cv/data/utils')
-rw-r--r-- | cv/data/utils/check_datasets_inchi_equality.rb | 98 | ||||
-rw-r--r-- | cv/data/utils/create_selected_feature_ds.rb | 47 | ||||
-rwxr-xr-x | cv/data/utils/get_csv.sh | 20 | ||||
-rw-r--r-- | cv/data/utils/get_csv_versions.rb | 3 |
4 files changed, 168 insertions, 0 deletions
diff --git a/cv/data/utils/check_datasets_inchi_equality.rb b/cv/data/utils/check_datasets_inchi_equality.rb new file mode 100644 index 0000000..75d6558 --- /dev/null +++ b/cv/data/utils/check_datasets_inchi_equality.rb @@ -0,0 +1,98 @@ +require 'rubygems' +require 'opentox-ruby' +require 'yaml' + +@subjectid = nil + + + +def check_ds(t_ds_uri, f_ds_uri) + puts t_ds_uri + puts f_ds_uri + + regression_training_dataset = OpenTox::Dataset.find(t_ds_uri, @subjectid) + regression_feature_dataset = OpenTox::Dataset.find(f_ds_uri, @subjectid) + + train_ds = regression_training_dataset.data_entries.keys + train_cmds = regression_training_dataset.compounds + feature_ds = regression_feature_dataset.data_entries.keys + feature_cmds = regression_feature_dataset.compounds + + puts "----- Check activity inchi -----" + match=0 + mismatch=0 + train_ds.each { |i| + if feature_ds.include?(i) + match = match + 1 + else + mismatch = mismatch + 1 + end + } + if mismatch > 0 + puts "NOT all training compounds represented in feature dataset!!!" unless mismatch > 0 + puts "match: #{match}; and mismatch: !!!!!#{mismatch}!!!!!" + else + puts "All training compounds represented in feature dataset." unless mismatch > 0 + puts "match: #{match}; mismatch: #{mismatch}" + puts "OK!!!" + end + + train_ds.sort! + feature_ds.sort! + + if train_ds == feature_ds + puts "train_ds == feature_ds" + else + a = train_ds - feature_ds + #puts "d: '#{a}'" + puts "train_ds: " + train_ds.size.to_s + "; feature_ds: "+ feature_ds.size.to_s + puts "train_ds =NOT feature_ds" + end + + + + puts "----- Check compound inchi -----" + match=0 + mismatch=0 + train_cmds.each { |i| + if feature_cmds.include?(i) + match = match + 1 + else + mismatch = mismatch + 1 + end + } + if mismatch > 0 + puts "NOT all training compounds represented in feature dataset!!!" unless mismatch > 0 + puts "match: #{match}; and mismatch: !!!!!#{mismatch}!!!!!" + else + puts "All training compounds represented in feature dataset." unless mismatch > 0 + puts "match: #{match}; mismatch: #{mismatch}" + puts "OK!!!" + end + + + + feature_cmds.sort! + train_cmds.sort! + + if train_cmds == feature_cmds + puts "train_cmds == feature_cmds" + else + b = train_cmds - feature_cmds + #puts "d: '#{b}'" + puts "train_cmds: " + train_cmds.size.to_s + "; feature_cmds: " + feature_cmds.size.to_s + puts "train_cmds =NOT feature_cmds" + end + puts +end + + + + +ds = YAML::load_file("../datasets.yaml") +ds.keys.each { |dataset| + ds[dataset].keys.each { |pc| + puts pc + check_ds(ds[dataset]["dataset"], ds[dataset][pc]) + } +} diff --git a/cv/data/utils/create_selected_feature_ds.rb b/cv/data/utils/create_selected_feature_ds.rb new file mode 100644 index 0000000..0e5f063 --- /dev/null +++ b/cv/data/utils/create_selected_feature_ds.rb @@ -0,0 +1,47 @@ +require 'rubygems' +require 'opentox-ruby' +require 'yaml' + +@subjectid = nil + + + +def create_f_ds(t_ds_uri, f_ds_uri, del) + + regression_training_dataset = OpenTox::Dataset.find(t_ds_uri, @subjectid) + prediction_feature = regression_training_dataset.features.keys.first + regression_feature_dataset = OpenTox::Dataset.find(f_ds_uri, @subjectid) + + params = {} + params[:dataset_uri] = regression_training_dataset.uri + params[:prediction_feature_uri] = prediction_feature + params[:feature_dataset_uri] = regression_feature_dataset.uri + params[:del_missing] = del + puts params.to_yaml + feature_selection_algo_uri = File.join(CONFIG[:services]["opentox-algorithm"],"feature_selection/rfe") + puts feature_selection_algo_uri + + result = OpenTox::RestClientWrapper.post( feature_selection_algo_uri, params) + puts "--- Feature dataset is: ---" + puts result + + puts +end + + + + +ds = YAML::load_file("../datasets.yaml") +ds.keys.each { |dataset| + puts "----------------- next dataset -----------------" + ds[dataset].keys.each { |pc| + puts pc unless pc == "dataset" + [false, true].each { |del_missing| + begin + create_f_ds(ds[dataset]["dataset"], ds[dataset][pc], del_missing) unless pc == "dataset" + rescue + end + } + puts "-----------------" unless pc == "dataset" + } +} diff --git a/cv/data/utils/get_csv.sh b/cv/data/utils/get_csv.sh new file mode 100755 index 0000000..0ca8129 --- /dev/null +++ b/cv/data/utils/get_csv.sh @@ -0,0 +1,20 @@ + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5263 > csv_file; mv -v --backup=numbered csv_file FHM_electronic_cpsa.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5765 > csv_file; mv -v --backup=numbered csv_file FHM_topological.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5262 > csv_file; mv -v --backup=numbered csv_file FHM_constitutional.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5266 > csv_file; mv -v --backup=numbered csv_file FHM_constitutional_electronic_cpsa.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5478 > csv_file; mv -v --backup=numbered csv_file MDD_electronic_cpsa.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5766 > csv_file; mv -v --backup=numbered csv_file MDD_topological.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5475 > csv_file; mv -v --backup=numbered csv_file MDD_constitutional.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5479 > csv_file; mv -v --backup=numbered csv_file MDD_constitutional_electronic_cpsa.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/6230 > csv_file; mv -v --backup=numbered csv_file RAT_electronic_cpsa.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/6243 > csv_file; mv -v --backup=numbered csv_file RAT_topological.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5465 > csv_file; mv -v --backup=numbered csv_file RAT_training.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/6226 > csv_file; mv -v --backup=numbered csv_file RAT_constitutional.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5464 > csv_file; mv -v --backup=numbered csv_file RAT_constitutional_electronic_cpsa.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5466 > csv_file; mv -v --backup=numbered csv_file RAT_test.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/6231 > csv_file; mv -v --backup=numbered csv_file MOU_electronic_cpsa.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/6229 > csv_file; mv -v --backup=numbered csv_file MOU_topological.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5467 > csv_file; mv -v --backup=numbered csv_file MOU_training.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5459 > csv_file; mv -v --backup=numbered csv_file MOU_constitutional.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5461 > csv_file; mv -v --backup=numbered csv_file MOU_constitutional_electronic_cpsa.csv + curl -H 'accept:text/csv' http://toxcreate3.in-silico.ch:8086/dataset/5468 > csv_file; mv -v --backup=numbered csv_file MOU_test.csv diff --git a/cv/data/utils/get_csv_versions.rb b/cv/data/utils/get_csv_versions.rb new file mode 100644 index 0000000..4fa360f --- /dev/null +++ b/cv/data/utils/get_csv_versions.rb @@ -0,0 +1,3 @@ +require 'yaml' +ds = YAML::load_file("../datasets.yaml") +ds.keys.each { |d| puts d ; ds[d].keys.each {|t| puts " #{t}"; cmd = " curl -H 'accept:text/csv' #{ds[d][t]} > csv_file; mv -v --backup=numbered csv_file #{d}_#{t.gsub(/,/, '_')}.csv" unless t=="dataset"; puts cmd } } |