From 7d6c3ab68c0ea916e785ad26ee5c22c311901465 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 30 Oct 2014 17:30:36 +0100 Subject: add grid search scripts --- nch/07_validate.rb | 4 +- nch/08_grid_search.rb | 53 ++++++++++++++++++ nch/09_eval_grid.rb | 88 +++++++++++++++++++++++++++++ nch/config.rb | 55 ++++++++++++++---- nch/results/grid.yaml | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 340 insertions(+), 11 deletions(-) create mode 100755 nch/08_grid_search.rb create mode 100755 nch/09_eval_grid.rb create mode 100644 nch/results/grid.yaml diff --git a/nch/07_validate.rb b/nch/07_validate.rb index 4641983..d20d8ac 100755 --- a/nch/07_validate.rb +++ b/nch/07_validate.rb @@ -13,7 +13,9 @@ DATA.each do |d| prediction_feature = prediction_feature(d) test_dataset_uri = test_dataset_uri(d) if d=="MOU" - [ new_feature_dataset_uri(d), orig_feature_dataset_uri(d) ].each do |feature_dataset_uri| + [ #new_feature_dataset_uri(d), + orig_feature_dataset_uri(d) + ].each do |feature_dataset_uri| puts "\nvalidating with features #{feature_dataset_uri}" params = { :dataset_uri => dataset_uri, diff --git a/nch/08_grid_search.rb b/nch/08_grid_search.rb new file mode 100755 index 0000000..c5ae2a3 --- /dev/null +++ b/nch/08_grid_search.rb @@ -0,0 +1,53 @@ +#!/usr/bin/env ruby + +require "./config.rb" + +require "bundler" +Bundler.require + +DATA.each do |d| + puts "" + puts d + + dataset_uri = dataset_uri(d) + prediction_feature = prediction_feature(d) + + results = [] +# skip_ratio = 0.8 +# random_seed = 3 + { + all_feature_dataset_uri(d)=>"all-pc-features", +# new_feature_dataset_uri(d)=>"new-pc-features", +# ob_feature_dataset_uri(d)=>"ob-pc-features", + }.each do |feature_dataset_uri,f_desc| + [ 0.65 ].each do |min_sim| # [0.2,0.4,0.6,0.8] + [ 0.0 ].each do |min_train| #[0.0,0.1,0.2] + + puts "\nfeatures #{f_desc}" + puts "min_sim #{min_sim}" + puts "min_train #{min_train}" + + params = { :dataset_uri => dataset_uri, + :algorithm_uri => File.join($algorithm[:uri],"lazar"), + :algorithm_params => "feature_dataset_uri=#{feature_dataset_uri};min_sim=#{min_sim};min_train_performance=#{min_train}", + :prediction_feature => prediction_feature, + :loo => "uniq", } + if defined?(skip_ratio) + params[:skip_ratio] = skip_ratio + params[:random_seed] = random_seed + end + cv_uri = wait_for_task(OpenTox::RestClientWrapper.post("http://localhost:8087/validation/crossvalidation/loo",params)) + cv = OpenTox::Crossvalidation.find(cv_uri) + puts "Cross-Validation: "+cv.uri + r_square = cv.statistics.metadata[RDF::OT.regressionStatistics.to_s][RDF::OT.rSquare.to_s] + unpredicted = cv.statistics.metadata[RDF::OT.numUnpredicted.to_s] + puts "r^2 #{r_square}" + # r = OpenTox::CrossvalidationReport.create(cv.uri) + # puts "Report: "+r.uri + + results << {:min_sim => min_sim, :min_train=>min_train, :cv => cv_uri, :r_square => r_square, :unpredicted => unpredicted, :features => f_desc}#, :report => r.uri} + puts results.to_yaml + end + end + end +end diff --git a/nch/09_eval_grid.rb b/nch/09_eval_grid.rb new file mode 100755 index 0000000..69bc31d --- /dev/null +++ b/nch/09_eval_grid.rb @@ -0,0 +1,88 @@ +#!/usr/bin/env ruby + +require "./config.rb" + +require "bundler" +Bundler.require + +res = YAML.load("--- +- :min_sim: 0.0 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/54 + :r_square: 0.45370176424121433 + :unpredicted: '0' +- :min_sim: 0.1 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/55 + :r_square: 0.4595956327160562 + :unpredicted: '0' +- :min_sim: 0.2 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/56 + :r_square: 0.4277448070809924 + :unpredicted: '0' +- :min_sim: 0.3 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/57 + :r_square: 0.44307217405529253 + :unpredicted: '0' +- :min_sim: 0.4 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/58 + :r_square: 0.4536563787494641 + :unpredicted: '0' +- :min_sim: 0.5 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/59 + :r_square: 0.4461298986550688 + :unpredicted: '0' +- :min_sim: 0.6 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/60 + :r_square: 0.4720263367371236 + :unpredicted: '1' +- :min_sim: 0.7 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/61 + :r_square: 0.5057497593073133 + :unpredicted: '15' +- :min_sim: 0.8 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/62 + :r_square: 0.5505636328364947 + :unpredicted: '65' +") + +all=182 + +min_train = [nil,0.1,0.2,0.3,0.4,0.5] + +s = [ "sim\\train" ] +min_train.each do |m| + s << (m==nil ? 0.0 : m).to_s +end +out = [s] + +res.each do |r| + s = [ r[:min_sim].to_s ] + stats = OpenTox::Crossvalidation.find(r[:cv]).statistics + min_train.each do |m| + data = (m==nil ? stats.metadata : stats.filter(m)) + v = sprintf("%.3f",data[RDF::OT.regressionStatistics.to_s][RDF::OT.rSquare.to_s]) + v << "(" + v << sprintf("%2d",(all - (data[RDF::OT.numInstances.to_s].to_i-data[RDF::OT.numUnpredicted.to_s].to_i))) + v << ")" + s << v + end + out << s +end + + +def print_2d_array(a, cs=10) + report = [] + report << a.enum_for(:each_with_index).map { |ia, i| + ia.map{|e| "%#{cs}s" % e}.join(" | ") } + puts report.join("\n") +end + +print_2d_array out diff --git a/nch/config.rb b/nch/config.rb index 293cdc8..7a186bc 100644 --- a/nch/config.rb +++ b/nch/config.rb @@ -1,19 +1,22 @@ -#DATA = ["LOAEL-mol", "LOAEL-mg","MOU"] -DATA = ["MOU"] +#DATA = ["LOAEL-mol", "LOAEL-mg", "MOU"] +DATA = ["LOAEL-mol"] +#DATA = ["MOU"] URIS = { "LOAEL-mol"=>{ - # :dataset_uri=>"http://localhost:8083/dataset/3da90c55-0388-42a0-8ada-978abe4a515c", - # :prediction_feature=>"http://localhost:8084/feature/2a74d78d-5b3d-438c-a1e5-6cfb16bd9354", - # :new_feature_dataset_uri=>"http://localhost:8083/dataset/7de04de4-41ce-4528-97c2-fd92fbb4d0b8", - # :orig_feature_dataset_uri=>"http://localhost:8083/dataset/e9016641-dddb-434f-bb05-63d80a37679a", + :dataset_uri=>"http://localhost:8083/dataset/3da90c55-0388-42a0-8ada-978abe4a515c", + :prediction_feature=>"http://localhost:8084/feature/2a74d78d-5b3d-438c-a1e5-6cfb16bd9354", + :new_feature_dataset_uri=>"http://localhost:8083/dataset/7de04de4-41ce-4528-97c2-fd92fbb4d0b8", + :orig_feature_dataset_uri=>"http://localhost:8083/dataset/e9016641-dddb-434f-bb05-63d80a37679a", + :all_feature_dataset_uri=>"http://localhost:8083/dataset/bc551e60-b2e7-4fb9-9aa3-52433ac1358c", + :ob_feature_dataset_uri=>"http://localhost:8083/dataset/ac44674f-efa9-4cf9-b1ff-37e63e3cf8aa", }, "LOAEL-mg"=>{ - # :dataset_uri=>"http://localhost:8083/dataset/4f3b9de4-0494-4339-8ebd-e6c6c1984a23", - # :prediction_feature=>"http://localhost:8084/feature/ba5b0f78-36bc-4ac3-8020-9d8b2ca3bd13", - # :new_feature_dataset_uri=>"http://localhost:8083/dataset/7de04de4-41ce-4528-97c2-fd92fbb4d0b8", - # :orig_feature_dataset_uri=>"http://localhost:8083/dataset/e9016641-dddb-434f-bb05-63d80a37679a", + :dataset_uri=>"http://localhost:8083/dataset/4f3b9de4-0494-4339-8ebd-e6c6c1984a23", + :prediction_feature=>"http://localhost:8084/feature/ba5b0f78-36bc-4ac3-8020-9d8b2ca3bd13", + :new_feature_dataset_uri=>"http://localhost:8083/dataset/7de04de4-41ce-4528-97c2-fd92fbb4d0b8", + :orig_feature_dataset_uri=>"http://localhost:8083/dataset/e9016641-dddb-434f-bb05-63d80a37679a", }, "MOU"=>{ # :dataset_uri=>"http://localhost:8083/dataset/f0af478a-51e6-41a5-adb2-d1a9bedf8981", @@ -70,6 +73,38 @@ def test_dataset_uri(d) end end +def all_feature_dataset_uri(d) + if URIS[d] and URIS[d][:all_feature_dataset_uri] + URIS[d][:all_feature_dataset_uri] + else + u_dataset = OpenTox::Dataset.new + u_dataset.upload File.join("data/03/#{d}_uniq.csv") + puts "Unique Dataset: "+u_dataset.uri + info(u_dataset) + @descriptors = OpenTox::Algorithm::Descriptor.physchem_descriptors.keys - ["Cdk.IPMolecularLearning"] + all_feat_uri = wait_for_task(OpenTox::RestClientWrapper.post("http://localhost:8081/algorithm/descriptor/physchem",{:dataset_uri => u_dataset.uri, :descriptors => @descriptors})) + f_dataset = OpenTox::Dataset.new all_feat_uri + info(f_dataset) + plz_add "all_feature_dataset_uri #{all_feat_uri}" + end +end + +def ob_feature_dataset_uri(d) + if URIS[d] and URIS[d][:ob_feature_dataset_uri] + URIS[d][:ob_feature_dataset_uri] + else + u_dataset = OpenTox::Dataset.new + u_dataset.upload File.join("data/03/#{d}_uniq.csv") + puts "Unique Dataset: "+u_dataset.uri + info(u_dataset) + @descriptors = OpenTox::Algorithm::Descriptor.physchem_descriptors.keys.keep_if{|x| x=~/^Openbabel\./} + ob_feat_uri = wait_for_task(OpenTox::RestClientWrapper.post("http://localhost:8081/algorithm/descriptor/physchem",{:dataset_uri => u_dataset.uri, :descriptors => @descriptors})) + f_dataset = OpenTox::Dataset.new ob_feat_uri + info(f_dataset) + plz_add "ob_feature_dataset_uri #{ob_feat_uri}" + end +end + def new_feature_dataset_uri(d) if URIS[d] and URIS[d][:new_feature_dataset_uri] URIS[d][:new_feature_dataset_uri] diff --git a/nch/results/grid.yaml b/nch/results/grid.yaml new file mode 100644 index 0000000..fb136a7 --- /dev/null +++ b/nch/results/grid.yaml @@ -0,0 +1,151 @@ +0.9 +random-seed:0 +--- +- :min_sim: 0.2 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/44 + :r_square: 0.6022551813587134 + :unpredicted: '0' +- :min_sim: 0.4 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/45 + :r_square: 0.6796007578295739 + :unpredicted: '0' +- :min_sim: 0.6 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/46 + :r_square: 0.657330788592535 + :unpredicted: '0' +- :min_sim: 0.8 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/48 + :r_square: 0.661561905445333 + :unpredicted: '17' + +0.8 +random-seed:1 +--- +- :min_sim: 0.2 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/50 + :r_square: 0.6030899122765869 + :unpredicted: '0' +- :min_sim: 0.4 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/51 + :r_square: 0.6043871746312677 + :unpredicted: '0' +- :min_sim: 0.6 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/52 + :r_square: 0.6191244139466336 + :unpredicted: '0' + +0.66 +random-seed:2 + +--- +- :min_sim: 0.0 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/54 + :r_square: 0.45370176424121433 + :unpredicted: '0' +- :min_sim: 0.1 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/55 + :r_square: 0.4595956327160562 + :unpredicted: '0' +- :min_sim: 0.2 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/56 + :r_square: 0.4277448070809924 + :unpredicted: '0' +- :min_sim: 0.3 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/57 + :r_square: 0.44307217405529253 + :unpredicted: '0' +- :min_sim: 0.4 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/58 + :r_square: 0.4536563787494641 + :unpredicted: '0' +- :min_sim: 0.5 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/59 + :r_square: 0.4461298986550688 + :unpredicted: '0' +- :min_sim: 0.6 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/60 + :r_square: 0.4720263367371236 + :unpredicted: '1' +- :min_sim: 0.7 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/61 + :r_square: 0.5057497593073133 + :unpredicted: '15' +- :min_sim: 0.8 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/62 + :r_square: 0.5505636328364947 + :unpredicted: '65' + +complete cv +min_sim 0 +--- +- :min_sim: 0.4 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/65 + :r_square: 0.49992001578007306 + :unpredicted: '0' +- :min_sim: 0.5 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/66 + :r_square: 0.5048172615733829 + :unpredicted: '0' +- :min_sim: 0.55 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/67 + :r_square: 0.5108609305937302 + :unpredicted: '1' +- :min_sim: 0.6 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/68 + :r_square: 0.5022488522195739 + :unpredicted: '1' +- :min_sim: 0.65 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/69 + :r_square: 0.5113758122376881 + :unpredicted: '12' +- :min_sim: 0.7 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/70 + :r_square: 0.5133188488988734 + :unpredicted: '44' + + +skip_ratio = 0.8 +random_seed = 3 +--- +- :min_sim: 0.65 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/73 + :r_square: 0.28572954365573644 + :unpredicted: '3' + :features: all-pc-features +- :min_sim: 0.65 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/74 + :r_square: 0.20115321576362188 + :unpredicted: '3' + :features: new-pc-features +- :min_sim: 0.65 + :min_train: 0.0 + :cv: http://localhost:8087/validation/crossvalidation/76 + :r_square: 0.013357820165398548 + :unpredicted: '5' + :features: ob-pc-features + + -- cgit v1.2.3