summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2012-10-12 10:27:15 +0200
committermguetlein <martin.guetlein@gmail.com>2012-10-12 10:27:15 +0200
commitba9d3e628e424a84d8c892c4fdf49c2258a95352 (patch)
tree4dbbf4269e6e3dc1dd94024921255a506a5fa921
parentcd8800cf377b5b8c07dc25f3629e503f01bc9112 (diff)
add contra splitting, provide access to dataset-splits, minor stuff
-rwxr-xr-xlib/predictions.rb135
-rw-r--r--report/plot_factory.rb2
-rw-r--r--report/report_service.rb2
-rwxr-xr-xreport/validation_access.rb3
-rwxr-xr-xvalidation/validation_application.rb45
-rwxr-xr-xvalidation/validation_service.rb10
-rwxr-xr-xvalidation/validation_test.rb32
7 files changed, 154 insertions, 75 deletions
diff --git a/lib/predictions.rb b/lib/predictions.rb
index 63578fd..4708630 100755
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -1,4 +1,6 @@
+require "rubygems"
+
require "lib/prediction_data.rb"
require "statsample"
@@ -813,66 +815,87 @@ module Lib
require "rubygems"
require "opentox-ruby"
- max_deviation = rand * 0.9
- avg_deviation = max_deviation * 0.5
-
- p = []
- a = []
- c = []
- (100 + rand(1000)).times do |i|
- r = rand
- deviation = rand * max_deviation
- a << r
- p << r + ((rand<0.5 ? -1 : 1) * deviation)
- #c << 0.5
- if (deviation > avg_deviation)
- c << 0.4
- else
- c << 0.6
- end
- #puts a[-1].to_s+" "+p[-1].to_s
- end
- puts "num values "+p.size.to_s
+ max_deviation = rand * 0.5
- #a = [1.0,2.0, 3.0,4.0, 5.0]
- #p = [1.5,2.25,3.0,3.75,4.5]
+ weired_index = 0
- #a = [1.0,2.0,3.0,4.0,5.0]
- #p = [1.5,2.5,3.5,4.5,5.5]
-
- #p = a.collect{|v| v-0.5}
- #p = a.collect{|v| v+0.5}
+ 10.times do
- #p = [2.0,2.5,3.0,3.5,4.0]
-
- c = Array.new(p.size,nil)
+ p = []
+ a = []
+ 500.times do |i|
+ r = rand
+ deviation = rand * max_deviation
+ a << r
+ p << r + ((rand<0.5 ? -1 : 1) * deviation)
+ end
+ puts "num values "+p.size.to_s
+ c = Array.new(p.size,nil)
+ data = { :predicted_values => p, :actual_values => a, :confidence_values => c,
+ :feature_type => "regression", :accept_values => nil }
+ pred = Predictions.new(data)
+ puts "ccc "+"%.3f" % pred.concordance_correlation_coefficient.to_s
+ puts "rmse "+"%.3f" % pred.root_mean_squared_error.to_s
+ ccc_1 = pred.concordance_correlation_coefficient
+ rmse_1 = pred.root_mean_squared_error
+
+ p = []
+ a = []
+ 2500.times do |i|
+ r = rand
+ deviation = rand * max_deviation
+ a << r
+ p << r + ((rand<0.5 ? -1 : 1) * deviation)
+ end
+ puts "num values "+p.size.to_s
+ c = Array.new(p.size,nil)
+ data = { :predicted_values => p, :actual_values => a, :confidence_values => c,
+ :feature_type => "regression", :accept_values => nil }
+ pred = Predictions.new(data)
+ puts "ccc "+"%.3f" % pred.concordance_correlation_coefficient.to_s
+ puts "rmse "+"%.3f" % pred.root_mean_squared_error.to_s
+ ccc_2 = pred.concordance_correlation_coefficient
+ rmse_2 = pred.root_mean_squared_error
+
+ ccc_d = ccc_1 - ccc_2
+ rmse_d = rmse_1 - rmse_2
+
+ dev = 0.0005
+
+ puts "ccc-d "+"%.3f" % ccc_d
+ puts "rmse-d "+"%.3f" % rmse_d
+
+ if ccc_d.abs>dev or rmse_d.abs>dev
+
+ if ((ccc_d>0 and rmse_d<0) or (ccc_d<0 and rmse_d>0)) #ccc_d.abs>dev and rmse_d.abs>dev and
+ puts "weired"
+ weired_index += 1
+ else
+ puts "not weired"
+ weired_index -= 1
+ end
+ puts weired_index
+ else
+ #puts "only small"
+ end
+
+ end
- data = { :predicted_values => p, :actual_values => a, :confidence_values => c,
- :feature_type => "regression", :accept_values => nil }
-
- pred = Predictions.new(data)
- puts "internal"
- #puts "r-square old "+pred.r_square_old.to_s
- puts "cor "+pred.sample_correlation_coefficient.to_s
- #puts "weighted cor "+pred.weighted_sample_correlation_coefficient.to_s
- puts "r-square "+pred.r_square.to_s
- puts "ccc "+pred.concordance_correlation_coefficient.to_s
-
- puts "R"
- rutil = OpenTox::RUtil.new
-
- rutil.r.assign "v1",a
- rutil.r.assign "v2",p
- puts "r cor "+rutil.r.pull("cor(v1,v2)").to_s
- rutil.r.eval "fit <- lm(v1 ~ v2)"
- rutil.r.eval "sum <- summary(fit)"
- puts "r r-square "+rutil.r.pull("sum$r.squared").to_s
- puts "r adjusted-r-square "+rutil.r.pull("sum$adj.r.squared").to_s
- rutil.r.eval "save.image(\"/tmp/image.R\")"
- #rutil.r.eval "require(epiR)"
- #rutil.r.eval "tmp.ccc <- epi.ccc(v1,v2)"
- #puts "r ccc "+rutil.r.pull("tmp.ccc$rho.c$est").to_s
- rutil.quit_r
+# puts "R"
+# rutil = OpenTox::RUtil.new
+#
+# rutil.r.assign "v1",a
+# rutil.r.assign "v2",p
+# puts "r cor "+rutil.r.pull("cor(v1,v2)").to_s
+# rutil.r.eval "fit <- lm(v1 ~ v2)"
+# rutil.r.eval "sum <- summary(fit)"
+# puts "r r-square "+rutil.r.pull("sum$r.squared").to_s
+# puts "r adjusted-r-square "+rutil.r.pull("sum$adj.r.squared").to_s
+# rutil.r.eval "save.image(\"/tmp/image.R\")"
+# #rutil.r.eval "require(epiR)"
+# #rutil.r.eval "tmp.ccc <- epi.ccc(v1,v2)"
+# #puts "r ccc "+rutil.r.pull("tmp.ccc$rho.c$est").to_s
+# rutil.quit_r
end
def prediction_feature_value_map(proc)
diff --git a/report/plot_factory.rb b/report/plot_factory.rb
index 61c3eea..a2be092 100644
--- a/report/plot_factory.rb
+++ b/report/plot_factory.rb
@@ -125,7 +125,7 @@ module Reports
else
Reports::r_util.feature_value_plot(out_files, validation_set.validations[0].training_feature_dataset_uri,
validation_set.validations[0].test_feature_dataset_uri, "Training Data", "Test Data",
- nil, validation_set.validations[0].subjectid, waiting_task )
+ validation_set.validations[0].prediction_feature, validation_set.validations[0].subjectid, waiting_task )
end
end
diff --git a/report/report_service.rb b/report/report_service.rb
index f315b04..f11a7a8 100644
--- a/report/report_service.rb
+++ b/report/report_service.rb
@@ -25,7 +25,7 @@ module Reports
def initialize(home_uri)
raise "supposed to be a singleton" if defined?@@instance
raise "plz specify home_uri" unless home_uri
- LOGGER.info "init report service"
+ #LOGGER.info "init report service"
@home_uri = home_uri
@@instance = self
end
diff --git a/report/validation_access.rb b/report/validation_access.rb
index 4d9ed9f..463337b 100755
--- a/report/validation_access.rb
+++ b/report/validation_access.rb
@@ -158,6 +158,9 @@ class Reports::ValidationDB
if features_found
LOGGER.debug "all training-features found in test-datset"
uri = test_dataset.uri
+ elsif validation.model_uri=~/superservice/
+ uri = OpenTox::RestClientWrapper.post(validation.model_uri+"/test_dataset_features",
+ {:dataset_uri=>validation.test_dataset_uri}).to_s
else
m = OpenTox::Model::Generic.find(validation.model_uri, subjectid)
feat_gen = nil
diff --git a/validation/validation_application.rb b/validation/validation_application.rb
index 9233502..c873a72 100755
--- a/validation/validation_application.rb
+++ b/validation/validation_application.rb
@@ -9,8 +9,8 @@ require 'validation/validation_service.rb'
helpers do
def check_stratified(params)
params[:stratified] = "false" unless params[:stratified]
- raise OpenTox::BadRequestError.new "stratified != true|false|super|super4|super5|anti, is #{params[:stratified]}" unless
- params[:stratified]=~/^(true|false|super|super4|super5|anti)$/
+ raise OpenTox::BadRequestError.new "stratified != true|false|super|super4|super5|contra, is #{params[:stratified]}" unless
+ params[:stratified]=~/^(true|false|super|super4|super5|contra)$/
end
end
@@ -745,6 +745,47 @@ end
# return validation.send(params[:attribute])
#end
+
+def get_splits(id)
+ require "#{ENV['HOME']}/workspace/ValidationExperiments/dataset_split.rb"
+ validation = Validation::Validation[id]
+ raise OpenTox::NotFoundError.new "Validation '#{id}' not found." unless validation
+ Exp::DatasetSplit.find({:train_dataset_uri => validation.training_dataset_uri,:test_dataset_uri => validation.test_dataset_uri,})
+end
+
+def get_split(id,id2)
+ get_splits(id).each do |s|
+ return s if id2.to_s==s.id.to_s
+ end
+ raise "not found: dataset split with id #{id2}" unless split
+end
+
+get '/:id/split' do
+ splits = get_splits(params[:id])
+ base_uri = "http://local-ot/validation/#{params[:id]}/split/"
+ uris = splits.collect{|s| base_uri+s.id}
+ if request.env['HTTP_ACCEPT'] =~ /text\/html/
+ content_type "text/html"
+ OpenTox.text_to_html uris.join("\n")
+ else
+ content_type "text/uri-list"
+ uris.join("\n")
+ end
+end
+
+get '/:id/split/:id2' do
+ split = get_split(params[:id],params[:id2])
+ content_type "text/html"
+ split.inspect
+ OpenTox.text_to_html ["http://local-ot/validation/#{params[:id]}/split/#{params[:id2]}/viz",split].to_yaml
+end
+
+get '/:id/split/:id2/viz' do
+ split = get_split(params[:id],params[:id2])
+ content_type("image/svg+xml")
+ result = body(File.new(split.svg_path))
+end
+
get '/:id/migrate_median_confidence' do
LOGGER.debug "migrate median confidence"
validation = Validation::Validation[params[:id]]
diff --git a/validation/validation_service.rb b/validation/validation_service.rb
index e099e25..f57b3a5 100755
--- a/validation/validation_service.rb
+++ b/validation/validation_service.rb
@@ -653,15 +653,21 @@ module Validation
meta = { DC.creator => $url_provider.url_for('/training_test_split',:full) }
case stratified
- when /^(true|super|super4|super5|anti)$/
+ when /^(true|super|super4|super5|contra)$/
raise "store split clusters not available for true stratified splits" if store_split_clusters and stratified=="true"
if stratified=="true"
raise OpenTox::BadRequestError.new "prediction feature required for stratified splits" unless prediction_feature
LOGGER.warn "split features are ignored for stratified splits (use super instead)" if features
features = [prediction_feature]
+ elsif stratified=="contra"
+ raise OpenTox::BadRequestError.new "prediction feature required for anti-stratified splits" unless prediction_feature
+ LOGGER.debug "prediction feature is removed for anti-stratified splits"
+ features = orig_dataset.features.keys-[prediction_feature]
+ raise unless features.size==orig_dataset.features.size-1
else
LOGGER.warn "prediction feature is ignored for super- or anti-stratified splits" if prediction_feature
end
+ LOGGER.debug "Using "+features.size.to_s+"/"+orig_dataset.features.size.to_s+" features for splitting" if features
r_util = OpenTox::RUtil.new
train, test = r_util.stratified_split( orig_dataset, meta, missing_values, split_ratio,
@subjectid, random_seed, features, stratified, store_split_clusters )
@@ -705,7 +711,7 @@ module Validation
end
LOGGER.debug "split done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'"
else
- raise OpenTox::BadRequestError.new "stratified != false|true|super, is #{stratified}"
+ raise OpenTox::BadRequestError.new "stratified != false|true|super|contra, is #{stratified}"
end
result
end
diff --git a/validation/validation_test.rb b/validation/validation_test.rb
index 1a41881..a7fae88 100755
--- a/validation/validation_test.rb
+++ b/validation/validation_test.rb
@@ -60,6 +60,10 @@ class ValidationTest < Test::Unit::TestCase
begin
$test_case = self
+ get '2568/split'
+ puts last_response.body
+ exit
+
# dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603206?pagesize=250&page=0"
# test_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603206?pagesize=250&page=1"
# #prediction_feature = "http://apps.ideaconsult.net:8080/ambit2/feature/528321"
@@ -85,16 +89,16 @@ class ValidationTest < Test::Unit::TestCase
# {:validation_uris=>"http://local-ot/validation/389,http://local-ot/validation/390,http://local-ot/validation/391,http://local-ot/validation/392",
# :identifier=>"split1,split1,split2,split2"}
- post "/training_test_validation",{:prediction_feature=>"http://local-ot/dataset/9264/feature/endpoint",
- :training_dataset_uri=>"http://local-ot/dataset/119127",
- :algorithm_uri=>"http://local-ot/weka/RandomForest",
- :test_dataset_uri=>"http://local-ot/dataset/119128",
- :test_target_dataset_uri=>"http://local-ot/dataset/9264",
- :algorithm_params=>""}
- uri = last_response.body
- rep = wait_for_task(uri)
- puts rep
- exit
+# post "/training_test_validation",{:prediction_feature=>"http://local-ot/dataset/9264/feature/endpoint",
+# :training_dataset_uri=>"http://local-ot/dataset/119127",
+# :algorithm_uri=>"http://local-ot/weka/RandomForest",
+# :test_dataset_uri=>"http://local-ot/dataset/119128",
+# :test_target_dataset_uri=>"http://local-ot/dataset/9264",
+# :algorithm_params=>""}
+# uri = last_response.body
+# rep = wait_for_task(uri)
+# puts rep
+# exit
#
# #post "/report/validation",{:validation_uris=>"http://local-ot/validation/22849",:min_confidence=>0.5}
# get "/22849",{:min_confidence=>0.5}
@@ -108,10 +112,12 @@ class ValidationTest < Test::Unit::TestCase
# #dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603306?feature_uris[]=http://apps.ideaconsult.net:8080/ambit2/feature/764036"
# #dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603204"
- #dataset_uri = "http://local-ot/dataset/9264"
- #post "/plain_training_test_split",{:dataset_uri=>dataset_uri, :stratified=>"super", :split_ratio=>0.25}
+ dataset_uri = "http://local-ot/dataset/12084"
+ prediction_feature = "http://local-ot/dataset/12084/feature/bbrc/1"
+ post "/plain_training_test_split",{:dataset_uri=>dataset_uri, :stratified=>"contra", :split_ratio=>0.25, :missing_values=>"0",
+ :prediction_feature => prediction_feature}
- get '31355/median_confidence'
+ #get '31355/median_confidence'
puts last_response.body
exit