From 14f49658a0b867b4cc68242804b530548d31097f Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 23 Jul 2012 22:30:28 +0200 Subject: new params for new r-util funcitons --- validation/validation_application.rb | 7 ++++--- validation/validation_service.rb | 11 +++++++---- validation/validation_test.rb | 16 ++++++++++------ 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/validation/validation_application.rb b/validation/validation_application.rb index 689241c..eaa3ce2 100755 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -9,8 +9,8 @@ require 'validation/validation_service.rb' helpers do def check_stratified(params) params[:stratified] = "false" unless params[:stratified] - raise OpenTox::BadRequestError.new "stratified != true|false|super|super4|anti, is #{params[:stratified]}" unless - params[:stratified]=~/^(true|false|super|super4|anti)$/ + raise OpenTox::BadRequestError.new "stratified != true|false|super|super4|super5|anti, is #{params[:stratified]}" unless + params[:stratified]=~/^(true|false|super|super4|super5|anti)$/ end end @@ -586,7 +586,8 @@ post '/plain_training_test_split' do split_features = nil split_features = params[:split_features].split(";") if params[:split_features] result = Validation::Util.train_test_dataset_split(params[:dataset_uri], params[:prediction_feature], @subjectid, - params[:stratified], params[:split_ratio], params[:random_seed], params[:missing_values], task, split_features) + params[:stratified], params[:split_ratio], params[:random_seed], params[:missing_values], task, split_features, + params[:store_split_clusters]) content_type "text/uri-list" res = result[:training_dataset_uri]+"\n"+result[:test_dataset_uri]+"\n" LOGGER.info "plain training test split done #{res.to_s.gsub("\n"," \\n ")}" diff --git a/validation/validation_service.rb b/validation/validation_service.rb index 1b29630..a2e97d0 100755 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -500,7 +500,7 @@ module Validation meta, self.subjectid ).uri test_dataset_uris << test_dataset_uri end - when /^(true|super|super4)$/ + when /^(true|super|super4|super5)$/ raise "DO NOT USED SUPER-STRATIFICATION FOR VAL-EXPERIMENTS AND CV, IF SO SOLVE _MISSING_VAULE_NA_ PROBLEM" if stratified=~/super/ if stratified=="true" features = [ self.prediction_feature ] @@ -627,7 +627,7 @@ module Validation # splits a dataset into test and training dataset # returns map with training_dataset_uri and test_dataset_uri def self.train_test_dataset_split( orig_dataset_uri, prediction_feature, subjectid, stratified="false", - split_ratio=nil, random_seed=nil, missing_values=nil, task=nil, features=nil ) + split_ratio=nil, random_seed=nil, missing_values=nil, task=nil, features=nil, store_split_clusters=false ) split_ratio=0.67 unless split_ratio split_ratio = split_ratio.to_f @@ -651,7 +651,8 @@ module Validation meta = { DC.creator => $url_provider.url_for('/training_test_split',:full) } case stratified - when /^(true|super|super4|anti)$/ + when /^(true|super|super4|super5|anti)$/ + raise "store split clusters not available for true stratified splits" if store_split_clusters and stratified=="true" if stratified=="true" raise OpenTox::BadRequestError.new "prediction feature required for stratified splits" unless prediction_feature LOGGER.warn "split features are ignored for stratified splits (use super instead)" if features @@ -660,10 +661,12 @@ module Validation LOGGER.warn "prediction feature is ignored for super- or anti-stratified splits" if prediction_feature end r_util = OpenTox::RUtil.new - train, test = r_util.stratified_split( orig_dataset, meta, missing_values, split_ratio, @subjectid, random_seed, features, stratified ) + train, test = r_util.stratified_split( orig_dataset, meta, missing_values, split_ratio, + @subjectid, random_seed, features, stratified, store_split_clusters ) r_util.quit_r result = {:training_dataset_uri => train.uri, :test_dataset_uri => test.uri} when "false" + raise "store split clusters not available for random splits" if store_split_clusters LOGGER.warn "split features are ignored for non-stratified splits (use super instead)" if features compounds = orig_dataset.compounds raise OpenTox::BadRequestError.new "Cannot split datset, num compounds in dataset < 2 ("+compounds.size.to_s+")" if compounds.size<2 diff --git a/validation/validation_test.rb b/validation/validation_test.rb index 5a7c2aa..1a41881 100755 --- a/validation/validation_test.rb +++ b/validation/validation_test.rb @@ -85,12 +85,16 @@ class ValidationTest < Test::Unit::TestCase # {:validation_uris=>"http://local-ot/validation/389,http://local-ot/validation/390,http://local-ot/validation/391,http://local-ot/validation/392", # :identifier=>"split1,split1,split2,split2"} -# post "/training_test_validation",{:prediction_feature=>"http://opentox.informatik.uni-freiburg.de/dataset/2/feature/MTP", -# :training_dataset_uri=>"http://opentox.informatik.uni-freiburg.de/dataset/4755", -# :algorithm_uri=>"http://opentox.informatik.uni-freiburg.de/superservice", -# :test_dataset_uri=>"http://opentox.informatik.uni-freiburg.de/dataset/3556", -# :algorithm_params=>"prediction_algorithm=http://opentox.informatik.uni-freiburg.de/weka/M5P;create_bbrc_features=false;ad_algorithm=http://opentox.informatik.uni-freiburg.de/appdomain/EuclideanDistance"} -# exit + post "/training_test_validation",{:prediction_feature=>"http://local-ot/dataset/9264/feature/endpoint", + :training_dataset_uri=>"http://local-ot/dataset/119127", + :algorithm_uri=>"http://local-ot/weka/RandomForest", + :test_dataset_uri=>"http://local-ot/dataset/119128", + :test_target_dataset_uri=>"http://local-ot/dataset/9264", + :algorithm_params=>""} + uri = last_response.body + rep = wait_for_task(uri) + puts rep + exit # # #post "/report/validation",{:validation_uris=>"http://local-ot/validation/22849",:min_confidence=>0.5} # get "/22849",{:min_confidence=>0.5} -- cgit v1.2.3