summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2012-06-15 14:03:59 +0200
committermguetlein <martin.guetlein@gmail.com>2012-06-15 14:03:59 +0200
commit6061f3abcd6d0ecd28eb1ba2ec5bff488fbd0961 (patch)
tree70ccc50e48189b65fe9803ac536e533c784da155
parentb3d06283d94de0ba7b3e386726876dffcc401095 (diff)
dataset add() speedup, handle missing values in dataframe to dataset, modfied debug msges
-rwxr-xr-xexample.rb2
-rwxr-xr-xvalidation/validation_application.rb13
-rwxr-xr-xvalidation/validation_service.rb14
-rwxr-xr-xvalidation/validation_test.rb9
4 files changed, 25 insertions, 13 deletions
diff --git a/example.rb b/example.rb
index 636579e..d6fba95 100755
--- a/example.rb
+++ b/example.rb
@@ -1,5 +1,5 @@
-require 'lib/test_util.rb'
+#require 'lib/test_util.rb'
class Example
diff --git a/validation/validation_application.rb b/validation/validation_application.rb
index 6b2ef3a..258a681 100755
--- a/validation/validation_application.rb
+++ b/validation/validation_application.rb
@@ -488,7 +488,8 @@ post '/training_test_split' do
check_stratified(params)
task = OpenTox::Task.create( "Perform training test split validation", url_for("/training_test_split", :full) ) do |task| #, params
params.merge!( Validation::Util.train_test_dataset_split(params[:dataset_uri], params[:prediction_feature],
- @subjectid, params[:stratified], params[:split_ratio], params[:random_seed], OpenTox::SubTask.create(task,0,33)))
+ @subjectid, params[:stratified], params[:split_ratio], params[:random_seed], params[:missing_values],
+ OpenTox::SubTask.create(task,0,33)))
v = Validation::Validation.create :validation_type => "training_test_split",
:training_dataset_uri => params[:training_dataset_uri],
:test_dataset_uri => params[:test_dataset_uri],
@@ -583,10 +584,10 @@ post '/plain_training_test_split' do
check_stratified(params)
task = OpenTox::Task.create( "Create data-split", url_for("/plain_training_test_split", :full) ) do |task|
result = Validation::Util.train_test_dataset_split(params[:dataset_uri], params[:prediction_feature], @subjectid,
- params[:stratified], params[:split_ratio], params[:random_seed], task)
+ params[:stratified], params[:split_ratio], params[:random_seed], params[:missing_values], task)
content_type "text/uri-list"
res = result[:training_dataset_uri]+"\n"+result[:test_dataset_uri]+"\n"
- LOGGER.info "plain training test split done #{res}"
+ LOGGER.info "plain training test split done #{res.to_s.gsub("\n"," \\n ")}"
res
end
return_task(task)
@@ -672,7 +673,7 @@ get '/:id/viz' do
data.features.each do |f,m|
d.add_feature(f,m)
data.data_entries[c][f].each do |v|
- d.add(c,f,v)
+ d.add(c,f,v,true)
end if data.data_entries[c][f]
end
end
@@ -685,8 +686,8 @@ get '/:id/viz' do
[p,a].each do |v|
raise p.class.to_s+" "+p.inspect unless p.is_a?(Array) and p.size==1
end
- d.add(c,predicted_nice_feature,p[0])
- d.add(c,correct_classified_feature,p[0]==a[0] ? "correct" : "miss")
+ d.add(c,predicted_nice_feature,p[0],true)
+ d.add(c,correct_classified_feature,p[0]==a[0] ? "correct" : "miss",true)
end
end
d.to_csv
diff --git a/validation/validation_service.rb b/validation/validation_service.rb
index 3966d7e..5398ace 100755
--- a/validation/validation_service.rb
+++ b/validation/validation_service.rb
@@ -491,6 +491,7 @@ module Validation
test_dataset_uris << test_dataset_uri
end
when /true|super/
+ raise "DO NOT USED SUPER-STRATIFICATION FOR VAL-EXPERIMENTS AND CV, IF SO SOLVE _MISSING_VAULE_NA_ PROBLEM" if stratified=="super"
if stratified=="true"
features = [ self.prediction_feature ]
else
@@ -540,7 +541,7 @@ module Validation
"' not found in dataset, features are: \n"+
orig_dataset.features.inspect unless orig_dataset.features.include?(prediction_feature)
else
- LOGGER.warn "no prediciton feature given, all features included in test dataset"
+ LOGGER.debug "no prediciton feature given, all features included in test dataset"
end
compounds = orig_dataset.compounds
@@ -615,12 +616,15 @@ module Validation
# splits a dataset into test and training dataset
# returns map with training_dataset_uri and test_dataset_uri
- def self.train_test_dataset_split( orig_dataset_uri, prediction_feature, subjectid, stratified="false", split_ratio=nil, random_seed=nil, task=nil )
+ def self.train_test_dataset_split( orig_dataset_uri, prediction_feature, subjectid, stratified="false",
+ split_ratio=nil, random_seed=nil, missing_values=nil, task=nil )
+
split_ratio=0.67 unless split_ratio
split_ratio = split_ratio.to_f
random_seed=1 unless random_seed
random_seed = random_seed.to_i
-
+ missing_values = "NA" unless missing_values
+
raise OpenTox::NotFoundError.new "Split ratio invalid: "+split_ratio.to_s unless split_ratio and split_ratio=split_ratio.to_f
raise OpenTox::NotFoundError.new "Split ratio not >0 and <1 :"+split_ratio.to_s unless split_ratio>0 && split_ratio<1
orig_dataset = Lib::DatasetCache.find orig_dataset_uri, subjectid
@@ -631,7 +635,7 @@ module Validation
"' not found in dataset, features are: \n"+
orig_dataset.features.keys.inspect unless orig_dataset.features.include?(prediction_feature)
else
- LOGGER.warn "no prediciton feature given, all features will be included in test dataset"
+ LOGGER.debug "no prediciton feature given, all features will be included in test dataset"
end
meta = { DC.creator => $url_provider.url_for('/training_test_split',:full) }
@@ -646,7 +650,7 @@ module Validation
features = nil
end
r_util = OpenTox::RUtil.new
- train, test = r_util.stratified_split( orig_dataset, meta, "NA", split_ratio, @subjectid, random_seed, features, stratified=="anti" )
+ train, test = r_util.stratified_split( orig_dataset, meta, missing_values, split_ratio, @subjectid, random_seed, features, stratified=="anti" )
r_util.quit_r
result = {:training_dataset_uri => train.uri, :test_dataset_uri => test.uri}
when "false"
diff --git a/validation/validation_test.rb b/validation/validation_test.rb
index ce6d5c1..78f35d2 100755
--- a/validation/validation_test.rb
+++ b/validation/validation_test.rb
@@ -85,7 +85,14 @@ class ValidationTest < Test::Unit::TestCase
# {:validation_uris=>"http://local-ot/validation/389,http://local-ot/validation/390,http://local-ot/validation/391,http://local-ot/validation/392",
# :identifier=>"split1,split1,split2,split2"}
- #post "/report/validation",{:validation_uris=>"http://local-ot/validation/22849",:min_confidence=>0.5}
+ post "/training_test_validation",{:prediction_feature=>"http://opentox.informatik.uni-freiburg.de/dataset/2/feature/MTP",
+ :training_dataset_uri=>"http://opentox.informatik.uni-freiburg.de/dataset/4755",
+ :algorithm_uri=>"http://opentox.informatik.uni-freiburg.de/superservice",
+ :test_dataset_uri=>"http://opentox.informatik.uni-freiburg.de/dataset/3556",
+ :algorithm_params=>"prediction_algorithm=http://opentox.informatik.uni-freiburg.de/weka/M5P;create_bbrc_features=false;ad_algorithm=http://opentox.informatik.uni-freiburg.de/appdomain/EuclideanDistance"}
+ exit
+
+ #post "/report/validation",{:validation_uris=>"http://local-ot/validation/22849",:min_confidence=>0.5}
get "/22849",{:min_confidence=>0.5}
exit