summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2014-10-30 17:30:36 +0100
committermguetlein <martin.guetlein@gmail.com>2014-10-30 17:30:36 +0100
commit7d6c3ab68c0ea916e785ad26ee5c22c311901465 (patch)
tree7997dba6ea88ce3bf8628f739c496b8fbf50695a
parent81c6bf3582f136f382d17123a20f196453eb93f7 (diff)
add grid search scripts
-rwxr-xr-xnch/07_validate.rb4
-rwxr-xr-xnch/08_grid_search.rb53
-rwxr-xr-xnch/09_eval_grid.rb88
-rw-r--r--nch/config.rb55
-rw-r--r--nch/results/grid.yaml151
5 files changed, 340 insertions, 11 deletions
diff --git a/nch/07_validate.rb b/nch/07_validate.rb
index 4641983..d20d8ac 100755
--- a/nch/07_validate.rb
+++ b/nch/07_validate.rb
@@ -13,7 +13,9 @@ DATA.each do |d|
prediction_feature = prediction_feature(d)
test_dataset_uri = test_dataset_uri(d) if d=="MOU"
- [ new_feature_dataset_uri(d), orig_feature_dataset_uri(d) ].each do |feature_dataset_uri|
+ [ #new_feature_dataset_uri(d),
+ orig_feature_dataset_uri(d)
+ ].each do |feature_dataset_uri|
puts "\nvalidating with features #{feature_dataset_uri}"
params = { :dataset_uri => dataset_uri,
diff --git a/nch/08_grid_search.rb b/nch/08_grid_search.rb
new file mode 100755
index 0000000..c5ae2a3
--- /dev/null
+++ b/nch/08_grid_search.rb
@@ -0,0 +1,53 @@
+#!/usr/bin/env ruby
+
+require "./config.rb"
+
+require "bundler"
+Bundler.require
+
+DATA.each do |d|
+ puts ""
+ puts d
+
+ dataset_uri = dataset_uri(d)
+ prediction_feature = prediction_feature(d)
+
+ results = []
+# skip_ratio = 0.8
+# random_seed = 3
+ {
+ all_feature_dataset_uri(d)=>"all-pc-features",
+# new_feature_dataset_uri(d)=>"new-pc-features",
+# ob_feature_dataset_uri(d)=>"ob-pc-features",
+ }.each do |feature_dataset_uri,f_desc|
+ [ 0.65 ].each do |min_sim| # [0.2,0.4,0.6,0.8]
+ [ 0.0 ].each do |min_train| #[0.0,0.1,0.2]
+
+ puts "\nfeatures #{f_desc}"
+ puts "min_sim #{min_sim}"
+ puts "min_train #{min_train}"
+
+ params = { :dataset_uri => dataset_uri,
+ :algorithm_uri => File.join($algorithm[:uri],"lazar"),
+ :algorithm_params => "feature_dataset_uri=#{feature_dataset_uri};min_sim=#{min_sim};min_train_performance=#{min_train}",
+ :prediction_feature => prediction_feature,
+ :loo => "uniq", }
+ if defined?(skip_ratio)
+ params[:skip_ratio] = skip_ratio
+ params[:random_seed] = random_seed
+ end
+ cv_uri = wait_for_task(OpenTox::RestClientWrapper.post("http://localhost:8087/validation/crossvalidation/loo",params))
+ cv = OpenTox::Crossvalidation.find(cv_uri)
+ puts "Cross-Validation: "+cv.uri
+ r_square = cv.statistics.metadata[RDF::OT.regressionStatistics.to_s][RDF::OT.rSquare.to_s]
+ unpredicted = cv.statistics.metadata[RDF::OT.numUnpredicted.to_s]
+ puts "r^2 #{r_square}"
+ # r = OpenTox::CrossvalidationReport.create(cv.uri)
+ # puts "Report: "+r.uri
+
+ results << {:min_sim => min_sim, :min_train=>min_train, :cv => cv_uri, :r_square => r_square, :unpredicted => unpredicted, :features => f_desc}#, :report => r.uri}
+ puts results.to_yaml
+ end
+ end
+ end
+end
diff --git a/nch/09_eval_grid.rb b/nch/09_eval_grid.rb
new file mode 100755
index 0000000..69bc31d
--- /dev/null
+++ b/nch/09_eval_grid.rb
@@ -0,0 +1,88 @@
+#!/usr/bin/env ruby
+
+require "./config.rb"
+
+require "bundler"
+Bundler.require
+
+res = YAML.load("---
+- :min_sim: 0.0
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/54
+ :r_square: 0.45370176424121433
+ :unpredicted: '0'
+- :min_sim: 0.1
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/55
+ :r_square: 0.4595956327160562
+ :unpredicted: '0'
+- :min_sim: 0.2
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/56
+ :r_square: 0.4277448070809924
+ :unpredicted: '0'
+- :min_sim: 0.3
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/57
+ :r_square: 0.44307217405529253
+ :unpredicted: '0'
+- :min_sim: 0.4
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/58
+ :r_square: 0.4536563787494641
+ :unpredicted: '0'
+- :min_sim: 0.5
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/59
+ :r_square: 0.4461298986550688
+ :unpredicted: '0'
+- :min_sim: 0.6
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/60
+ :r_square: 0.4720263367371236
+ :unpredicted: '1'
+- :min_sim: 0.7
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/61
+ :r_square: 0.5057497593073133
+ :unpredicted: '15'
+- :min_sim: 0.8
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/62
+ :r_square: 0.5505636328364947
+ :unpredicted: '65'
+")
+
+all=182
+
+min_train = [nil,0.1,0.2,0.3,0.4,0.5]
+
+s = [ "sim\\train" ]
+min_train.each do |m|
+ s << (m==nil ? 0.0 : m).to_s
+end
+out = [s]
+
+res.each do |r|
+ s = [ r[:min_sim].to_s ]
+ stats = OpenTox::Crossvalidation.find(r[:cv]).statistics
+ min_train.each do |m|
+ data = (m==nil ? stats.metadata : stats.filter(m))
+ v = sprintf("%.3f",data[RDF::OT.regressionStatistics.to_s][RDF::OT.rSquare.to_s])
+ v << "("
+ v << sprintf("%2d",(all - (data[RDF::OT.numInstances.to_s].to_i-data[RDF::OT.numUnpredicted.to_s].to_i)))
+ v << ")"
+ s << v
+ end
+ out << s
+end
+
+
+def print_2d_array(a, cs=10)
+ report = []
+ report << a.enum_for(:each_with_index).map { |ia, i|
+ ia.map{|e| "%#{cs}s" % e}.join(" | ") }
+ puts report.join("\n")
+end
+
+print_2d_array out
diff --git a/nch/config.rb b/nch/config.rb
index 293cdc8..7a186bc 100644
--- a/nch/config.rb
+++ b/nch/config.rb
@@ -1,19 +1,22 @@
-#DATA = ["LOAEL-mol", "LOAEL-mg","MOU"]
-DATA = ["MOU"]
+#DATA = ["LOAEL-mol", "LOAEL-mg", "MOU"]
+DATA = ["LOAEL-mol"]
+#DATA = ["MOU"]
URIS = {
"LOAEL-mol"=>{
- # :dataset_uri=>"http://localhost:8083/dataset/3da90c55-0388-42a0-8ada-978abe4a515c",
- # :prediction_feature=>"http://localhost:8084/feature/2a74d78d-5b3d-438c-a1e5-6cfb16bd9354",
- # :new_feature_dataset_uri=>"http://localhost:8083/dataset/7de04de4-41ce-4528-97c2-fd92fbb4d0b8",
- # :orig_feature_dataset_uri=>"http://localhost:8083/dataset/e9016641-dddb-434f-bb05-63d80a37679a",
+ :dataset_uri=>"http://localhost:8083/dataset/3da90c55-0388-42a0-8ada-978abe4a515c",
+ :prediction_feature=>"http://localhost:8084/feature/2a74d78d-5b3d-438c-a1e5-6cfb16bd9354",
+ :new_feature_dataset_uri=>"http://localhost:8083/dataset/7de04de4-41ce-4528-97c2-fd92fbb4d0b8",
+ :orig_feature_dataset_uri=>"http://localhost:8083/dataset/e9016641-dddb-434f-bb05-63d80a37679a",
+ :all_feature_dataset_uri=>"http://localhost:8083/dataset/bc551e60-b2e7-4fb9-9aa3-52433ac1358c",
+ :ob_feature_dataset_uri=>"http://localhost:8083/dataset/ac44674f-efa9-4cf9-b1ff-37e63e3cf8aa",
},
"LOAEL-mg"=>{
- # :dataset_uri=>"http://localhost:8083/dataset/4f3b9de4-0494-4339-8ebd-e6c6c1984a23",
- # :prediction_feature=>"http://localhost:8084/feature/ba5b0f78-36bc-4ac3-8020-9d8b2ca3bd13",
- # :new_feature_dataset_uri=>"http://localhost:8083/dataset/7de04de4-41ce-4528-97c2-fd92fbb4d0b8",
- # :orig_feature_dataset_uri=>"http://localhost:8083/dataset/e9016641-dddb-434f-bb05-63d80a37679a",
+ :dataset_uri=>"http://localhost:8083/dataset/4f3b9de4-0494-4339-8ebd-e6c6c1984a23",
+ :prediction_feature=>"http://localhost:8084/feature/ba5b0f78-36bc-4ac3-8020-9d8b2ca3bd13",
+ :new_feature_dataset_uri=>"http://localhost:8083/dataset/7de04de4-41ce-4528-97c2-fd92fbb4d0b8",
+ :orig_feature_dataset_uri=>"http://localhost:8083/dataset/e9016641-dddb-434f-bb05-63d80a37679a",
},
"MOU"=>{
# :dataset_uri=>"http://localhost:8083/dataset/f0af478a-51e6-41a5-adb2-d1a9bedf8981",
@@ -70,6 +73,38 @@ def test_dataset_uri(d)
end
end
+def all_feature_dataset_uri(d)
+ if URIS[d] and URIS[d][:all_feature_dataset_uri]
+ URIS[d][:all_feature_dataset_uri]
+ else
+ u_dataset = OpenTox::Dataset.new
+ u_dataset.upload File.join("data/03/#{d}_uniq.csv")
+ puts "Unique Dataset: "+u_dataset.uri
+ info(u_dataset)
+ @descriptors = OpenTox::Algorithm::Descriptor.physchem_descriptors.keys - ["Cdk.IPMolecularLearning"]
+ all_feat_uri = wait_for_task(OpenTox::RestClientWrapper.post("http://localhost:8081/algorithm/descriptor/physchem",{:dataset_uri => u_dataset.uri, :descriptors => @descriptors}))
+ f_dataset = OpenTox::Dataset.new all_feat_uri
+ info(f_dataset)
+ plz_add "all_feature_dataset_uri #{all_feat_uri}"
+ end
+end
+
+def ob_feature_dataset_uri(d)
+ if URIS[d] and URIS[d][:ob_feature_dataset_uri]
+ URIS[d][:ob_feature_dataset_uri]
+ else
+ u_dataset = OpenTox::Dataset.new
+ u_dataset.upload File.join("data/03/#{d}_uniq.csv")
+ puts "Unique Dataset: "+u_dataset.uri
+ info(u_dataset)
+ @descriptors = OpenTox::Algorithm::Descriptor.physchem_descriptors.keys.keep_if{|x| x=~/^Openbabel\./}
+ ob_feat_uri = wait_for_task(OpenTox::RestClientWrapper.post("http://localhost:8081/algorithm/descriptor/physchem",{:dataset_uri => u_dataset.uri, :descriptors => @descriptors}))
+ f_dataset = OpenTox::Dataset.new ob_feat_uri
+ info(f_dataset)
+ plz_add "ob_feature_dataset_uri #{ob_feat_uri}"
+ end
+end
+
def new_feature_dataset_uri(d)
if URIS[d] and URIS[d][:new_feature_dataset_uri]
URIS[d][:new_feature_dataset_uri]
diff --git a/nch/results/grid.yaml b/nch/results/grid.yaml
new file mode 100644
index 0000000..fb136a7
--- /dev/null
+++ b/nch/results/grid.yaml
@@ -0,0 +1,151 @@
+0.9
+random-seed:0
+---
+- :min_sim: 0.2
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/44
+ :r_square: 0.6022551813587134
+ :unpredicted: '0'
+- :min_sim: 0.4
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/45
+ :r_square: 0.6796007578295739
+ :unpredicted: '0'
+- :min_sim: 0.6
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/46
+ :r_square: 0.657330788592535
+ :unpredicted: '0'
+- :min_sim: 0.8
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/48
+ :r_square: 0.661561905445333
+ :unpredicted: '17'
+
+0.8
+random-seed:1
+---
+- :min_sim: 0.2
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/50
+ :r_square: 0.6030899122765869
+ :unpredicted: '0'
+- :min_sim: 0.4
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/51
+ :r_square: 0.6043871746312677
+ :unpredicted: '0'
+- :min_sim: 0.6
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/52
+ :r_square: 0.6191244139466336
+ :unpredicted: '0'
+
+0.66
+random-seed:2
+
+---
+- :min_sim: 0.0
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/54
+ :r_square: 0.45370176424121433
+ :unpredicted: '0'
+- :min_sim: 0.1
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/55
+ :r_square: 0.4595956327160562
+ :unpredicted: '0'
+- :min_sim: 0.2
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/56
+ :r_square: 0.4277448070809924
+ :unpredicted: '0'
+- :min_sim: 0.3
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/57
+ :r_square: 0.44307217405529253
+ :unpredicted: '0'
+- :min_sim: 0.4
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/58
+ :r_square: 0.4536563787494641
+ :unpredicted: '0'
+- :min_sim: 0.5
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/59
+ :r_square: 0.4461298986550688
+ :unpredicted: '0'
+- :min_sim: 0.6
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/60
+ :r_square: 0.4720263367371236
+ :unpredicted: '1'
+- :min_sim: 0.7
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/61
+ :r_square: 0.5057497593073133
+ :unpredicted: '15'
+- :min_sim: 0.8
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/62
+ :r_square: 0.5505636328364947
+ :unpredicted: '65'
+
+complete cv
+min_sim 0
+---
+- :min_sim: 0.4
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/65
+ :r_square: 0.49992001578007306
+ :unpredicted: '0'
+- :min_sim: 0.5
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/66
+ :r_square: 0.5048172615733829
+ :unpredicted: '0'
+- :min_sim: 0.55
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/67
+ :r_square: 0.5108609305937302
+ :unpredicted: '1'
+- :min_sim: 0.6
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/68
+ :r_square: 0.5022488522195739
+ :unpredicted: '1'
+- :min_sim: 0.65
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/69
+ :r_square: 0.5113758122376881
+ :unpredicted: '12'
+- :min_sim: 0.7
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/70
+ :r_square: 0.5133188488988734
+ :unpredicted: '44'
+
+
+skip_ratio = 0.8
+random_seed = 3
+---
+- :min_sim: 0.65
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/73
+ :r_square: 0.28572954365573644
+ :unpredicted: '3'
+ :features: all-pc-features
+- :min_sim: 0.65
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/74
+ :r_square: 0.20115321576362188
+ :unpredicted: '3'
+ :features: new-pc-features
+- :min_sim: 0.65
+ :min_train: 0.0
+ :cv: http://localhost:8087/validation/crossvalidation/76
+ :r_square: 0.013357820165398548
+ :unpredicted: '5'
+ :features: ob-pc-features
+
+