summaryrefslogtreecommitdiff
path: root/validation
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2012-10-12 10:27:15 +0200
committermguetlein <martin.guetlein@gmail.com>2012-10-12 10:27:15 +0200
commitba9d3e628e424a84d8c892c4fdf49c2258a95352 (patch)
tree4dbbf4269e6e3dc1dd94024921255a506a5fa921 /validation
parentcd8800cf377b5b8c07dc25f3629e503f01bc9112 (diff)
add contra splitting, provide access to dataset-splits, minor stuff
Diffstat (limited to 'validation')
-rwxr-xr-xvalidation/validation_application.rb45
-rwxr-xr-xvalidation/validation_service.rb10
-rwxr-xr-xvalidation/validation_test.rb32
3 files changed, 70 insertions, 17 deletions
diff --git a/validation/validation_application.rb b/validation/validation_application.rb
index 9233502..c873a72 100755
--- a/validation/validation_application.rb
+++ b/validation/validation_application.rb
@@ -9,8 +9,8 @@ require 'validation/validation_service.rb'
helpers do
def check_stratified(params)
params[:stratified] = "false" unless params[:stratified]
- raise OpenTox::BadRequestError.new "stratified != true|false|super|super4|super5|anti, is #{params[:stratified]}" unless
- params[:stratified]=~/^(true|false|super|super4|super5|anti)$/
+ raise OpenTox::BadRequestError.new "stratified != true|false|super|super4|super5|contra, is #{params[:stratified]}" unless
+ params[:stratified]=~/^(true|false|super|super4|super5|contra)$/
end
end
@@ -745,6 +745,47 @@ end
# return validation.send(params[:attribute])
#end
+
+def get_splits(id)
+ require "#{ENV['HOME']}/workspace/ValidationExperiments/dataset_split.rb"
+ validation = Validation::Validation[id]
+ raise OpenTox::NotFoundError.new "Validation '#{id}' not found." unless validation
+ Exp::DatasetSplit.find({:train_dataset_uri => validation.training_dataset_uri,:test_dataset_uri => validation.test_dataset_uri,})
+end
+
+def get_split(id,id2)
+ get_splits(id).each do |s|
+ return s if id2.to_s==s.id.to_s
+ end
+ raise "not found: dataset split with id #{id2}" unless split
+end
+
+get '/:id/split' do
+ splits = get_splits(params[:id])
+ base_uri = "http://local-ot/validation/#{params[:id]}/split/"
+ uris = splits.collect{|s| base_uri+s.id}
+ if request.env['HTTP_ACCEPT'] =~ /text\/html/
+ content_type "text/html"
+ OpenTox.text_to_html uris.join("\n")
+ else
+ content_type "text/uri-list"
+ uris.join("\n")
+ end
+end
+
+get '/:id/split/:id2' do
+ split = get_split(params[:id],params[:id2])
+ content_type "text/html"
+ split.inspect
+ OpenTox.text_to_html ["http://local-ot/validation/#{params[:id]}/split/#{params[:id2]}/viz",split].to_yaml
+end
+
+get '/:id/split/:id2/viz' do
+ split = get_split(params[:id],params[:id2])
+ content_type("image/svg+xml")
+ result = body(File.new(split.svg_path))
+end
+
get '/:id/migrate_median_confidence' do
LOGGER.debug "migrate median confidence"
validation = Validation::Validation[params[:id]]
diff --git a/validation/validation_service.rb b/validation/validation_service.rb
index e099e25..f57b3a5 100755
--- a/validation/validation_service.rb
+++ b/validation/validation_service.rb
@@ -653,15 +653,21 @@ module Validation
meta = { DC.creator => $url_provider.url_for('/training_test_split',:full) }
case stratified
- when /^(true|super|super4|super5|anti)$/
+ when /^(true|super|super4|super5|contra)$/
raise "store split clusters not available for true stratified splits" if store_split_clusters and stratified=="true"
if stratified=="true"
raise OpenTox::BadRequestError.new "prediction feature required for stratified splits" unless prediction_feature
LOGGER.warn "split features are ignored for stratified splits (use super instead)" if features
features = [prediction_feature]
+ elsif stratified=="contra"
+ raise OpenTox::BadRequestError.new "prediction feature required for anti-stratified splits" unless prediction_feature
+ LOGGER.debug "prediction feature is removed for anti-stratified splits"
+ features = orig_dataset.features.keys-[prediction_feature]
+ raise unless features.size==orig_dataset.features.size-1
else
LOGGER.warn "prediction feature is ignored for super- or anti-stratified splits" if prediction_feature
end
+ LOGGER.debug "Using "+features.size.to_s+"/"+orig_dataset.features.size.to_s+" features for splitting" if features
r_util = OpenTox::RUtil.new
train, test = r_util.stratified_split( orig_dataset, meta, missing_values, split_ratio,
@subjectid, random_seed, features, stratified, store_split_clusters )
@@ -705,7 +711,7 @@ module Validation
end
LOGGER.debug "split done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'"
else
- raise OpenTox::BadRequestError.new "stratified != false|true|super, is #{stratified}"
+ raise OpenTox::BadRequestError.new "stratified != false|true|super|contra, is #{stratified}"
end
result
end
diff --git a/validation/validation_test.rb b/validation/validation_test.rb
index 1a41881..a7fae88 100755
--- a/validation/validation_test.rb
+++ b/validation/validation_test.rb
@@ -60,6 +60,10 @@ class ValidationTest < Test::Unit::TestCase
begin
$test_case = self
+ get '2568/split'
+ puts last_response.body
+ exit
+
# dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603206?pagesize=250&page=0"
# test_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603206?pagesize=250&page=1"
# #prediction_feature = "http://apps.ideaconsult.net:8080/ambit2/feature/528321"
@@ -85,16 +89,16 @@ class ValidationTest < Test::Unit::TestCase
# {:validation_uris=>"http://local-ot/validation/389,http://local-ot/validation/390,http://local-ot/validation/391,http://local-ot/validation/392",
# :identifier=>"split1,split1,split2,split2"}
- post "/training_test_validation",{:prediction_feature=>"http://local-ot/dataset/9264/feature/endpoint",
- :training_dataset_uri=>"http://local-ot/dataset/119127",
- :algorithm_uri=>"http://local-ot/weka/RandomForest",
- :test_dataset_uri=>"http://local-ot/dataset/119128",
- :test_target_dataset_uri=>"http://local-ot/dataset/9264",
- :algorithm_params=>""}
- uri = last_response.body
- rep = wait_for_task(uri)
- puts rep
- exit
+# post "/training_test_validation",{:prediction_feature=>"http://local-ot/dataset/9264/feature/endpoint",
+# :training_dataset_uri=>"http://local-ot/dataset/119127",
+# :algorithm_uri=>"http://local-ot/weka/RandomForest",
+# :test_dataset_uri=>"http://local-ot/dataset/119128",
+# :test_target_dataset_uri=>"http://local-ot/dataset/9264",
+# :algorithm_params=>""}
+# uri = last_response.body
+# rep = wait_for_task(uri)
+# puts rep
+# exit
#
# #post "/report/validation",{:validation_uris=>"http://local-ot/validation/22849",:min_confidence=>0.5}
# get "/22849",{:min_confidence=>0.5}
@@ -108,10 +112,12 @@ class ValidationTest < Test::Unit::TestCase
# #dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603306?feature_uris[]=http://apps.ideaconsult.net:8080/ambit2/feature/764036"
# #dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603204"
- #dataset_uri = "http://local-ot/dataset/9264"
- #post "/plain_training_test_split",{:dataset_uri=>dataset_uri, :stratified=>"super", :split_ratio=>0.25}
+ dataset_uri = "http://local-ot/dataset/12084"
+ prediction_feature = "http://local-ot/dataset/12084/feature/bbrc/1"
+ post "/plain_training_test_split",{:dataset_uri=>dataset_uri, :stratified=>"contra", :split_ratio=>0.25, :missing_values=>"0",
+ :prediction_feature => prediction_feature}
- get '31355/median_confidence'
+ #get '31355/median_confidence'
puts last_response.body
exit