diff options
author | mguetlein <martin.guetlein@gmail.com> | 2012-10-12 10:27:15 +0200 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2012-10-12 10:27:15 +0200 |
commit | ba9d3e628e424a84d8c892c4fdf49c2258a95352 (patch) | |
tree | 4dbbf4269e6e3dc1dd94024921255a506a5fa921 /validation | |
parent | cd8800cf377b5b8c07dc25f3629e503f01bc9112 (diff) |
add contra splitting, provide access to dataset-splits, minor stuff
Diffstat (limited to 'validation')
-rwxr-xr-x | validation/validation_application.rb | 45 | ||||
-rwxr-xr-x | validation/validation_service.rb | 10 | ||||
-rwxr-xr-x | validation/validation_test.rb | 32 |
3 files changed, 70 insertions, 17 deletions
diff --git a/validation/validation_application.rb b/validation/validation_application.rb index 9233502..c873a72 100755 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -9,8 +9,8 @@ require 'validation/validation_service.rb' helpers do def check_stratified(params) params[:stratified] = "false" unless params[:stratified] - raise OpenTox::BadRequestError.new "stratified != true|false|super|super4|super5|anti, is #{params[:stratified]}" unless - params[:stratified]=~/^(true|false|super|super4|super5|anti)$/ + raise OpenTox::BadRequestError.new "stratified != true|false|super|super4|super5|contra, is #{params[:stratified]}" unless + params[:stratified]=~/^(true|false|super|super4|super5|contra)$/ end end @@ -745,6 +745,47 @@ end # return validation.send(params[:attribute]) #end + +def get_splits(id) + require "#{ENV['HOME']}/workspace/ValidationExperiments/dataset_split.rb" + validation = Validation::Validation[id] + raise OpenTox::NotFoundError.new "Validation '#{id}' not found." unless validation + Exp::DatasetSplit.find({:train_dataset_uri => validation.training_dataset_uri,:test_dataset_uri => validation.test_dataset_uri,}) +end + +def get_split(id,id2) + get_splits(id).each do |s| + return s if id2.to_s==s.id.to_s + end + raise "not found: dataset split with id #{id2}" unless split +end + +get '/:id/split' do + splits = get_splits(params[:id]) + base_uri = "http://local-ot/validation/#{params[:id]}/split/" + uris = splits.collect{|s| base_uri+s.id} + if request.env['HTTP_ACCEPT'] =~ /text\/html/ + content_type "text/html" + OpenTox.text_to_html uris.join("\n") + else + content_type "text/uri-list" + uris.join("\n") + end +end + +get '/:id/split/:id2' do + split = get_split(params[:id],params[:id2]) + content_type "text/html" + split.inspect + OpenTox.text_to_html ["http://local-ot/validation/#{params[:id]}/split/#{params[:id2]}/viz",split].to_yaml +end + +get '/:id/split/:id2/viz' do + split = get_split(params[:id],params[:id2]) + content_type("image/svg+xml") + result = body(File.new(split.svg_path)) +end + get '/:id/migrate_median_confidence' do LOGGER.debug "migrate median confidence" validation = Validation::Validation[params[:id]] diff --git a/validation/validation_service.rb b/validation/validation_service.rb index e099e25..f57b3a5 100755 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -653,15 +653,21 @@ module Validation meta = { DC.creator => $url_provider.url_for('/training_test_split',:full) } case stratified - when /^(true|super|super4|super5|anti)$/ + when /^(true|super|super4|super5|contra)$/ raise "store split clusters not available for true stratified splits" if store_split_clusters and stratified=="true" if stratified=="true" raise OpenTox::BadRequestError.new "prediction feature required for stratified splits" unless prediction_feature LOGGER.warn "split features are ignored for stratified splits (use super instead)" if features features = [prediction_feature] + elsif stratified=="contra" + raise OpenTox::BadRequestError.new "prediction feature required for anti-stratified splits" unless prediction_feature + LOGGER.debug "prediction feature is removed for anti-stratified splits" + features = orig_dataset.features.keys-[prediction_feature] + raise unless features.size==orig_dataset.features.size-1 else LOGGER.warn "prediction feature is ignored for super- or anti-stratified splits" if prediction_feature end + LOGGER.debug "Using "+features.size.to_s+"/"+orig_dataset.features.size.to_s+" features for splitting" if features r_util = OpenTox::RUtil.new train, test = r_util.stratified_split( orig_dataset, meta, missing_values, split_ratio, @subjectid, random_seed, features, stratified, store_split_clusters ) @@ -705,7 +711,7 @@ module Validation end LOGGER.debug "split done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'" else - raise OpenTox::BadRequestError.new "stratified != false|true|super, is #{stratified}" + raise OpenTox::BadRequestError.new "stratified != false|true|super|contra, is #{stratified}" end result end diff --git a/validation/validation_test.rb b/validation/validation_test.rb index 1a41881..a7fae88 100755 --- a/validation/validation_test.rb +++ b/validation/validation_test.rb @@ -60,6 +60,10 @@ class ValidationTest < Test::Unit::TestCase begin $test_case = self + get '2568/split' + puts last_response.body + exit + # dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603206?pagesize=250&page=0" # test_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603206?pagesize=250&page=1" # #prediction_feature = "http://apps.ideaconsult.net:8080/ambit2/feature/528321" @@ -85,16 +89,16 @@ class ValidationTest < Test::Unit::TestCase # {:validation_uris=>"http://local-ot/validation/389,http://local-ot/validation/390,http://local-ot/validation/391,http://local-ot/validation/392", # :identifier=>"split1,split1,split2,split2"} - post "/training_test_validation",{:prediction_feature=>"http://local-ot/dataset/9264/feature/endpoint", - :training_dataset_uri=>"http://local-ot/dataset/119127", - :algorithm_uri=>"http://local-ot/weka/RandomForest", - :test_dataset_uri=>"http://local-ot/dataset/119128", - :test_target_dataset_uri=>"http://local-ot/dataset/9264", - :algorithm_params=>""} - uri = last_response.body - rep = wait_for_task(uri) - puts rep - exit +# post "/training_test_validation",{:prediction_feature=>"http://local-ot/dataset/9264/feature/endpoint", +# :training_dataset_uri=>"http://local-ot/dataset/119127", +# :algorithm_uri=>"http://local-ot/weka/RandomForest", +# :test_dataset_uri=>"http://local-ot/dataset/119128", +# :test_target_dataset_uri=>"http://local-ot/dataset/9264", +# :algorithm_params=>""} +# uri = last_response.body +# rep = wait_for_task(uri) +# puts rep +# exit # # #post "/report/validation",{:validation_uris=>"http://local-ot/validation/22849",:min_confidence=>0.5} # get "/22849",{:min_confidence=>0.5} @@ -108,10 +112,12 @@ class ValidationTest < Test::Unit::TestCase # #dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603306?feature_uris[]=http://apps.ideaconsult.net:8080/ambit2/feature/764036" # #dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603204" - #dataset_uri = "http://local-ot/dataset/9264" - #post "/plain_training_test_split",{:dataset_uri=>dataset_uri, :stratified=>"super", :split_ratio=>0.25} + dataset_uri = "http://local-ot/dataset/12084" + prediction_feature = "http://local-ot/dataset/12084/feature/bbrc/1" + post "/plain_training_test_split",{:dataset_uri=>dataset_uri, :stratified=>"contra", :split_ratio=>0.25, :missing_values=>"0", + :prediction_feature => prediction_feature} - get '31355/median_confidence' + #get '31355/median_confidence' puts last_response.body exit |