summaryrefslogtreecommitdiff
path: root/validation
diff options
context:
space:
mode:
Diffstat (limited to 'validation')
-rwxr-xr-x[-rw-r--r--]validation/validation_application.rb610
-rwxr-xr-x[-rw-r--r--]validation/validation_format.rb116
-rwxr-xr-x[-rw-r--r--]validation/validation_service.rb549
-rwxr-xr-x[-rw-r--r--]validation/validation_test.rb352
4 files changed, 1217 insertions, 410 deletions
diff --git a/validation/validation_application.rb b/validation/validation_application.rb
index a43a2a6..e07acf0 100644..100755
--- a/validation/validation_application.rb
+++ b/validation/validation_application.rb
@@ -1,199 +1,454 @@
-[ 'rubygems', 'sinatra', 'sinatra/url_for', 'active_record', 'ar-extensions', 'opentox-ruby-api-wrapper' ].each do |lib|
+[ 'rubygems', 'sinatra', 'sinatra/url_for', 'opentox-ruby' ].each do |lib|
require lib
end
-require 'validation/validation_service.rb'
require 'lib/merge.rb'
+#require 'lib/active_record_setup.rb'
+require 'validation/validation_service.rb'
get '/crossvalidation/?' do
LOGGER.info "list all crossvalidations"
+ #uri_list = Validation::Crossvalidation.all.collect{ |cv| cv.crossvalidation_uri }.join("\n")+"\n"
+ uri_list = Lib::DataMapperUtil.all(Validation::Crossvalidation,params).collect{ |cv| cv.crossvalidation_uri }.join("\n")+"\n"
+
+ #uri_list = Validation::Crossvalidation.find_like(params).collect{ |cv| cv.crossvalidation_uri }.join("\n")+"\n"
+ if request.env['HTTP_ACCEPT'] =~ /text\/html/
+ related_links =
+ "Single validations: "+url_for("/",:full)+"\n"+
+ "Crossvalidation reports: "+url_for("/report/crossvalidation",:full)
+ description =
+ "A list of all crossvalidations.\n"+
+ "Use the POST method to perform a crossvalidation."
+ post_params = [[:dataset_uri,:algorithm_uri,:prediction_feature,[:num_folds,10],[:random_seed,1],[:stratified,false],[:algorithm_params,""]]]
+ content_type "text/html"
+ OpenTox.text_to_html uri_list,@subjectid,related_links,description,post_params
+ else
+ content_type "text/uri-list"
+ uri_list
+ end
+end
+
+post '/crossvalidation/?' do
+ task = OpenTox::Task.create( "Perform crossvalidation", url_for("/crossvalidation", :full) ) do |task| #, params
+ LOGGER.info "creating crossvalidation "+params.inspect
+ raise OpenTox::BadRequestError.new "dataset_uri missing" unless params[:dataset_uri]
+ raise OpenTox::BadRequestError.new "algorithm_uri missing" unless params[:algorithm_uri]
+ raise OpenTox::BadRequestError.new "prediction_feature missing" unless params[:prediction_feature]
+ raise OpenTox::BadRequestError.new "illegal param-value num_folds: '"+params[:num_folds].to_s+"', must be integer >1" unless params[:num_folds]==nil or
+ params[:num_folds].to_i>1
+
+ cv_params = { :dataset_uri => params[:dataset_uri],
+ :algorithm_uri => params[:algorithm_uri] }
+ [ :num_folds, :random_seed, :stratified ].each{ |sym| cv_params[sym] = params[sym] if params[sym] }
+ cv = Validation::Crossvalidation.new cv_params
+ cv.subjectid = @subjectid
+ cv.perform_cv( params[:prediction_feature], params[:algorithm_params], task )
+ cv.crossvalidation_uri
+ end
+ return_task(task)
+end
+
+post '/crossvalidation/cleanup/?' do
+ LOGGER.info "crossvalidation cleanup, starting..."
content_type "text/uri-list"
- params.each{ |k,v| halt 400,"no crossvalidation-attribute: "+k.to_s unless Validation::Crossvalidation.column_names.include?(k.gsub(/_like$/,"")) }
- Validation::Crossvalidation.find(:all, :conditions => params).collect{ |d| url_for("/crossvalidation/", :full) + d.id.to_s }.join("\n")
+ deleted = []
+ #Validation::Crossvalidation.find_like(params).each do |cv|
+ Validation::Crossvalidation.all( { :finished => false } ).each do |cv|
+ #num_vals = Validation::Validation.find( :all, :conditions => { :crossvalidation_id => cv.id } ).size
+ #num_vals = Validation::Validation.all( :crossvalidation_id => cv.id ).size
+ #if cv.num_folds != num_vals || !cv.finished
+ LOGGER.debug "delete cv with id:"+cv.id.to_s+", finished is false"
+ deleted << cv.crossvalidation_uri
+ #Validation::Crossvalidation.delete(cv.id)
+ cv.subjectid = @subjectid
+ cv.delete
+ #end
+ end
+ LOGGER.info "crossvalidation cleanup, deleted "+deleted.size.to_s+" cvs"
+ deleted.join("\n")+"\n"
end
post '/crossvalidation/loo/?' do
- halt 500, "not yet implemented"
+ raise "not yet implemented"
end
get '/crossvalidation/loo/?' do
- halt 400, "GET operation not supported, use POST for performing a loo-crossvalidation, see "+url_for("/crossvalidation", :full)+" for crossvalidation results"
+ raise OpenTox::BadRequestError.new "GET operation not supported, use POST for performing a loo-crossvalidation, see "+url_for("/crossvalidation", :full)+" for crossvalidation results"
end
get '/crossvalidation/:id' do
LOGGER.info "get crossvalidation with id "+params[:id].to_s
- begin
- crossvalidation = Validation::Crossvalidation.find(params[:id])
- rescue ActiveRecord::RecordNotFound => ex
- halt 404, "Crossvalidation '#{params[:id]}' not found."
- end
+# begin
+# #crossvalidation = Validation::Crossvalidation.find(params[:id])
+# rescue ActiveRecord::RecordNotFound => ex
+# raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found."
+# end
+ crossvalidation = Validation::Crossvalidation.get(params[:id])
+ raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found." unless crossvalidation
case request.env['HTTP_ACCEPT'].to_s
when "application/rdf+xml"
content_type "application/rdf+xml"
- result = crossvalidation.to_rdf
- when /application\/x-yaml|\*\/\*|/ # matches 'text/x-yaml', '*/*', ''
+ crossvalidation.to_rdf
+ when /text\/html/
+ related_links =
+ "Search for corresponding cv report: "+url_for("/report/crossvalidation?crossvalidation="+crossvalidation.crossvalidation_uri,:full)+"\n"+
+ "Statistics for this crossvalidation: "+url_for("/crossvalidation/"+params[:id]+"/statistics",:full)+"\n"+
+ "Predictions of this crossvalidation: "+url_for("/crossvalidation/"+params[:id]+"/predictions",:full)+"\n"+
+ "All crossvalidations: "+url_for("/crossvalidation",:full)+"\n"+
+ "All crossvalidation reports: "+url_for("/report/crossvalidation",:full)
+ description =
+ "A crossvalidation resource."
+ content_type "text/html"
+ OpenTox.text_to_html crossvalidation.to_yaml,@subjectid,related_links,description
+ when /application\/x-yaml|\*\/\*/
content_type "application/x-yaml"
- result = crossvalidation.to_yaml
+ crossvalidation.to_yaml
else
- halt 400, "MIME type '"+request.env['HTTP_ACCEPT'].to_s+"' not supported."
- end
- result
-end
-
-delete '/crossvalidation/:id/?' do
- LOGGER.info "delete crossvalidation with id "+params[:id].to_s
- content_type "text/plain"
- begin
- crossvalidation = Validation::Crossvalidation.find(params[:id])
- rescue ActiveRecord::RecordNotFound => ex
- halt 404, "Crossvalidation '#{params[:id]}' not found."
- end
- Validation::Crossvalidation.delete(params[:id])
-end
-
-get '/crossvalidation/:id/validations' do
- LOGGER.info "get all validations for crossvalidation with id "+params[:id].to_s
- begin
- crossvalidation = Validation::Crossvalidation.find(params[:id])
- rescue ActiveRecord::RecordNotFound => ex
- halt 404, "Crossvalidation '#{params[:id]}' not found."
+ raise OpenTox::BadRequestError.new "MIME type '"+request.env['HTTP_ACCEPT'].to_s+"' not supported, valid Accept-Headers: \"application/rdf+xml\", \"application/x-yaml\", \"text/html\"."
end
- content_type "text/uri-list"
- Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ).collect{ |v| v.validation_uri.to_s }.join("\n")+"\n"
end
-
get '/crossvalidation/:id/statistics' do
LOGGER.info "get merged validation-result for crossvalidation with id "+params[:id].to_s
- begin
- crossvalidation = Validation::Crossvalidation.find(params[:id])
- rescue ActiveRecord::RecordNotFound => ex
- halt 404, "Crossvalidation '#{params[:id]}' not found."
- end
+# begin
+ #crossvalidation = Validation::Crossvalidation.find(params[:id])
+# rescue ActiveRecord::RecordNotFound => ex
+# raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found."
+# end
+ #crossvalidation = Validation::Crossvalidation.find(params[:id])
+ crossvalidation = Validation::Crossvalidation.get(params[:id])
+
+ raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found." unless crossvalidation
+ raise OpenTox::BadRequestError.new "Crossvalidation '"+params[:id].to_s+"' not finished" unless crossvalidation.finished
Lib::MergeObjects.register_merge_attributes( Validation::Validation,
- Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL-[:validation_uri]) unless
+ Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL-[:date,:validation_uri,:crossvalidation_uri]) unless
Lib::MergeObjects.merge_attributes_registered?(Validation::Validation)
- v = Lib::MergeObjects.merge_array_objects( Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ) )
- v.validation_uri = nil
+ #v = Lib::MergeObjects.merge_array_objects( Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ) )
+ v = Lib::MergeObjects.merge_array_objects( Validation::Validation.all( :crossvalidation_id => params[:id] ) )
v.created_at = nil
v.id = nil
- content_type "application/x-yaml"
- v.to_yaml
-end
-
-
-post '/crossvalidation/?' do
- content_type "text/uri-list"
- task_uri = OpenTox::Task.as_task do
- LOGGER.info "creating crossvalidation "+params.inspect
- halt 400, "dataset_uri missing" unless params[:dataset_uri]
- halt 400, "algorithm_uri missing" unless params[:algorithm_uri]
- halt 400, "prediction_feature missing" unless params[:prediction_feature]
- halt 400, "illegal param-value num_folds: '"+params[:num_folds].to_s+"', must be integer >1" unless params[:num_folds]==nil or
- params[:num_folds].to_i>1
-
- cv_params = { :dataset_uri => params[:dataset_uri],
- :algorithm_uri => params[:algorithm_uri] }
- [ :num_folds, :random_seed, :stratified ].each{ |sym| cv_params[sym] = params[sym] if params[sym] }
- cv = Validation::Crossvalidation.new cv_params
- cv.create_cv_datasets( params[:prediction_feature] )
- cv.perform_cv( params[:algorithm_params])
- content_type "text/uri-list"
- cv.crossvalidation_uri
+
+ case request.env['HTTP_ACCEPT'].to_s
+ when /text\/html/
+ related_links =
+ "The corresponding crossvalidation resource: "+url_for("/crossvalidation/"+params[:id],:full)
+ description =
+ "The averaged statistics for the crossvalidation."
+ content_type "text/html"
+ OpenTox.text_to_html v.to_yaml,@subjectid,related_links,description
+ else
+ content_type "application/x-yaml"
+ v.to_yaml
end
- halt 202,task_uri
end
-get '/training_test_split' do
- halt 400, "GET operation not supported, use POST to perform a training_test_split, see "+url_for("/", :full)+" for validation results"
+delete '/crossvalidation/:id/?' do
+ LOGGER.info "delete crossvalidation with id "+params[:id].to_s
+ content_type "text/plain"
+# begin
+ #crossvalidation = Validation::Crossvalidation.find(params[:id])
+# rescue ActiveRecord::RecordNotFound => ex
+# raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found."
+# end
+# Validation::Crossvalidation.delete(params[:id])
+
+ cv = Validation::Crossvalidation.get(params[:id])
+ cv.subjectid = @subjectid
+ raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found." unless cv
+ cv.delete
end
-get '/?' do
- LOGGER.info "list all validations"
- content_type "text/uri-list"
- params.each{ |k,v| halt 400,"no validation-attribute: "+k.to_s unless Validation::Validation.column_names.include?(k.gsub(/_like$/,"")) }
- Validation::Validation.find(:all, :conditions => params).collect{ |d| url_for("/", :full) + d.id.to_s }.join("\n")
-end
+#get '/crossvalidation/:id/validations' do
+# LOGGER.info "get all validations for crossvalidation with id "+params[:id].to_s
+# begin
+# crossvalidation = Validation::Crossvalidation.find(params[:id])
+# rescue ActiveRecord::RecordNotFound => ex
+# raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found."
+# end
+# content_type "text/uri-list"
+# Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ).collect{ |v| v.validation_uri.to_s }.join("\n")+"\n"
+#end
-get '/:id' do
- LOGGER.info "get validation with id "+params[:id].to_s+" '"+request.env['HTTP_ACCEPT'].to_s+"'"
+get '/crossvalidation/:id/predictions' do
+ LOGGER.info "get predictions for crossvalidation with id "+params[:id].to_s
begin
- validation = Validation::Validation.find(params[:id])
+ #crossvalidation = Validation::Crossvalidation.find(params[:id])
+ crossvalidation = Validation::Crossvalidation.get(params[:id])
rescue ActiveRecord::RecordNotFound => ex
- halt 404, "Validation '#{params[:id]}' not found."
+ raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found."
end
-
+ raise OpenTox::BadRequestError.new "Crossvalidation '"+params[:id].to_s+"' not finished" unless crossvalidation.finished
+
+ content_type "application/x-yaml"
+ #validations = Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } )
+ validations = Validation::Validation.all( :crossvalidation_id => params[:id] )
+ p = Lib::OTPredictions.to_array( validations.collect{ |v| v.compute_validation_stats_with_model(nil, true) } ).to_yaml
+
case request.env['HTTP_ACCEPT'].to_s
- when "application/rdf+xml"
- content_type "application/rdf+xml"
- result = validation.to_rdf
- when /application\/x-yaml|\*\/\*|^$/ # matches 'application/x-yaml', '*/*', ''
- content_type "application/x-yaml"
- result = validation.to_yaml
+ when /text\/html/
+ content_type "text/html"
+ description =
+ "The crossvalidation predictions as (yaml-)array."
+ related_links =
+ "All crossvalidations: "+url_for("/crossvalidation",:full)+"\n"+
+ "Correspoding crossvalidation: "+url_for("/crossvalidation/"+params[:id],:full)
+ OpenTox.text_to_html p,@subjectid, related_links, description
else
- halt 400, "MIME type '"+request.env['HTTP_ACCEPT'].to_s+"' not supported, valid Accept-Headers are \"application/rdf+xml\" and \"application/x-yaml\"."
+ content_type "text/x-yaml"
+ p
+ end
+end
+
+get '/?' do
+
+ LOGGER.info "list all validations, params: "+params.inspect
+ #uri_list = Validation::Validation.find_like(params).collect{ |v| v.validation_uri }.join("\n")+"\n"
+ #uri_list = Validation::Validation.all(params).collect{ |v| v.validation_uri }.join("\n")+"\n"
+ uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n"
+
+
+ if request.env['HTTP_ACCEPT'] =~ /text\/html/
+ related_links =
+ "To perform a validation:\n"+
+ "* "+url_for("/test_set_validation",:full)+"\n"+
+ "* "+url_for("/training_test_validation",:full)+"\n"+
+ "* "+url_for("/bootstrapping",:full)+"\n"+
+ "* "+url_for("/training_test_split",:full)+"\n"+
+ "* "+url_for("/crossvalidation",:full)+"\n"+
+ "Validation reporting: "+url_for("/report",:full)+"\n"+
+ "REACH relevant reporting: "+url_for("/reach_report",:full)+"\n"+
+ "Examples for using this service: "+url_for("/examples",:full)+"\n"
+ description =
+ "A validation web service for the OpenTox project ( http://opentox.org ).\n"+
+ "In the root directory (this is where you are now), a list of all validation resources is returned."
+ content_type "text/html"
+ OpenTox.text_to_html uri_list,@subjectid,related_links,description
+ else
+ content_type "text/uri-list"
+ uri_list
end
- result
end
post '/?' do
- content_type "text/uri-list"
- task_uri = OpenTox::Task.as_task do |task|
- LOGGER.info "creating validation "+params.inspect
- if params[:model_uri] and params[:test_dataset_uri] and !params[:training_dataset_uri] and !params[:algorithm_uri]
- v = Validation::Validation.new :model_uri => params[:model_uri],
+ raise OpenTox::BadRequestError.new "Post not supported, to perfom a validation use '/test_set_validation', '/training_test_validation', 'bootstrapping', 'training_test_split'"
+end
+
+post '/test_set_validation' do
+ LOGGER.info "creating test-set-validation "+params.inspect
+ if params[:model_uri] and params[:test_dataset_uri] and !params[:training_dataset_uri] and !params[:algorithm_uri]
+ task = OpenTox::Task.create( "Perform test-set-validation", url_for("/", :full) ) do |task| #, params
+ v = Validation::Validation.new :validation_type => "test_set_validation",
+ :model_uri => params[:model_uri],
:test_dataset_uri => params[:test_dataset_uri],
:test_target_dataset_uri => params[:test_target_dataset_uri],
:prediction_feature => params[:prediction_feature]
- v.validate_model
- elsif params[:algorithm_uri] and params[:training_dataset_uri] and params[:test_dataset_uri] and params[:prediction_feature] and !params[:model_uri]
- v = Validation::Validation.new :algorithm_uri => params[:algorithm_uri],
+ v.subjectid = @subjectid
+ v.validate_model( task )
+ v.validation_uri
+ end
+ return_task(task)
+ else
+ raise OpenTox::BadRequestError.new "illegal parameters, pls specify model_uri and test_dataset_uri\n"+
+ "params given: "+params.inspect
+ end
+end
+
+get '/test_set_validation' do
+ LOGGER.info "list all test-set-validations, params: "+params.inspect
+
+ #uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "test_set_validation" } ).collect{ |v| v.validation_uri }.join("\n")+"\n"
+ #uri_list = Validation::Validation.all( :validation_type => "test_set_validation" ).collect{ |v| v.validation_uri }.join("\n")+"\n"
+ params[:validation_type] = "test_set_validation"
+ uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n"
+
+ if request.env['HTTP_ACCEPT'] =~ /text\/html/
+ related_links =
+ "All validations: "+url_for("/",:full)+"\n"+
+ "Validation reports: "+url_for("/report/validation",:full)
+ description =
+ "A list of all test-set-validations.\n"+
+ "To perform a test-set-validation use the POST method."
+ post_params = [[:model_uri, :test_dataset_uri, [:test_target_dataset_uri,"same-as-test_dataset_uri"], [:prediction_feature, "dependent-variable-of-model"]]]
+ content_type "text/html"
+ OpenTox.text_to_html uri_list,@subjectid,related_links,description,post_params
+ else
+ content_type "text/uri-list"
+ uri_list
+ end
+end
+
+post '/training_test_validation/?' do
+ LOGGER.info "creating training-test-validation "+params.inspect
+ if params[:algorithm_uri] and params[:training_dataset_uri] and params[:test_dataset_uri] and params[:prediction_feature] and !params[:model_uri]
+ task = OpenTox::Task.create( "Perform training-test-validation", url_for("/", :full) ) do |task| #, params
+ v = Validation::Validation.new :validation_type => "training_test_validation",
+ :algorithm_uri => params[:algorithm_uri],
:training_dataset_uri => params[:training_dataset_uri],
:test_dataset_uri => params[:test_dataset_uri],
:test_target_dataset_uri => params[:test_target_dataset_uri],
:prediction_feature => params[:prediction_feature]
- v.validate_algorithm( params[:algorithm_params])
- else
- halt 400, "illegal parameter combination for validation, use either\n"+
- "* model_uri, test_dataset_uri\n"+
- "* algorithm_uri, training_dataset_uri, test_dataset_uri, prediction_feature\n"+
- "params given: "+params.inspect
+ v.subjectid = @subjectid
+ v.validate_algorithm( params[:algorithm_params], task )
+ v.validation_uri
end
+ return_task(task)
+ else
+ raise OpenTox::BadRequestError.new "illegal parameters, pls specify algorithm_uri, training_dataset_uri, test_dataset_uri, prediction_feature\n"+
+ "params given: "+params.inspect
+ end
+end
+
+get '/training_test_validation' do
+ LOGGER.info "list all training-test-validations, params: "+params.inspect
+ #uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "training_test_validation" } ).collect{ |v| v.validation_uri }.join("\n")+"\n"
+ #uri_list = Validation::Validation.all( :validation_type => "training_test_validation" ).collect{ |v| v.validation_uri }.join("\n")+"\n"
+ params[:validation_type] = "training_test_validation"
+ uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n"
+
+ if request.env['HTTP_ACCEPT'] =~ /text\/html/
+ related_links =
+ "All validations: "+url_for("/",:full)+"\n"+
+ "Validation reports: "+url_for("/report/validation",:full)
+ description =
+ "A list of all training-test-validations.\n"+
+ "To perform a training-test-validation use the POST method."
+ post_params = [[:algorithm_uri,
+ :training_dataset_uri,
+ :test_dataset_uri,
+ [:test_target_dataset_uri,"same-as-test_dataset_uri"],
+ :prediction_feature,
+ [:algorithm_params, ""]]]
+ content_type "text/html"
+ OpenTox.text_to_html uri_list,@subjectid,related_links,description,post_params
+ else
content_type "text/uri-list"
+ uri_list
+ end
+end
+
+post '/bootstrapping' do
+ task = OpenTox::Task.create( "Perform bootstrapping validation", url_for("/bootstrapping", :full) ) do |task| #, params
+ LOGGER.info "performing bootstrapping validation "+params.inspect
+ raise OpenTox::BadRequestError.new "dataset_uri missing" unless params[:dataset_uri]
+ raise OpenTox::BadRequestError.new "algorithm_uri missing" unless params[:algorithm_uri]
+ raise OpenTox::BadRequestError.new "prediction_feature missing" unless params[:prediction_feature]
+
+ params.merge!( Validation::Util.bootstrapping( params[:dataset_uri],
+ params[:prediction_feature], @subjectid,
+ params[:random_seed], OpenTox::SubTask.create(task,0,33)) )
+ v = Validation::Validation.new :validation_type => "bootstrapping",
+ :test_target_dataset_uri => params[:dataset_uri],
+ :prediction_feature => params[:prediction_feature],
+ :algorithm_uri => params[:algorithm_uri]
+ v.subjectid = @subjectid
+ v.validate_algorithm( params[:algorithm_params], OpenTox::SubTask.create(task,33,100))
v.validation_uri
end
- halt 202,task_uri
+ return_task(task)
+end
+
+get '/bootstrapping' do
+ LOGGER.info "list all bootstrapping-validations, params: "+params.inspect
+ #uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "bootstrapping" } ).collect{ |v| v.validation_uri }.join("\n")+"\n"
+ #uri_list = Validation::Validation.all( :validation_type => "bootstrapping" ).collect{ |v| v.validation_uri }.join("\n")+"\n"
+ params[:validation_type] = "bootstrapping"
+ uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n"
+
+ if request.env['HTTP_ACCEPT'] =~ /text\/html/
+ related_links =
+ "All validations: "+url_for("/",:full)+"\n"+
+ "Validation reports: "+url_for("/report/validation",:full)
+ description =
+ "A list of all bootstrapping-validations.\n"+
+ "To perform a bootstrapping-validation use the POST method."
+ post_params = [[:algorithm_uri,
+ :dataset_uri,
+ :prediction_feature,
+ [:algorithm_params, ""],
+ [:random_seed, 1]]]
+ content_type "text/html"
+ OpenTox.text_to_html uri_list,@subjectid,related_links,description,post_params
+ else
+ content_type "text/uri-list"
+ uri_list
+ end
end
post '/training_test_split' do
- content_type "text/uri-list"
- task_uri = OpenTox::Task.as_task do
+
+ task = OpenTox::Task.create( "Perform training test split validation", url_for("/training_test_split", :full) ) do |task| #, params
LOGGER.info "creating training test split "+params.inspect
- halt 400, "dataset_uri missing" unless params[:dataset_uri]
- halt 400, "algorithm_uri missing" unless params[:algorithm_uri]
- halt 400, "prediction_feature missing" unless params[:prediction_feature]
+ raise OpenTox::BadRequestError.new "dataset_uri missing" unless params[:dataset_uri]
+ raise OpenTox::BadRequestError.new "algorithm_uri missing" unless params[:algorithm_uri]
+ raise OpenTox::BadRequestError.new "prediction_feature missing" unless params[:prediction_feature]
- params.merge!(Validation::Util.train_test_dataset_split(params[:dataset_uri], params[:prediction_feature], params[:split_ratio], params[:random_seed]))
- v = Validation::Validation.new :training_dataset_uri => params[:training_dataset_uri],
+ params.merge!( Validation::Util.train_test_dataset_split(params[:dataset_uri], params[:prediction_feature],
+ @subjectid, params[:split_ratio], params[:random_seed], OpenTox::SubTask.create(task,0,33)))
+ v = Validation::Validation.new :validation_type => "training_test_split",
+ :training_dataset_uri => params[:training_dataset_uri],
:test_dataset_uri => params[:test_dataset_uri],
:test_target_dataset_uri => params[:dataset_uri],
:prediction_feature => params[:prediction_feature],
:algorithm_uri => params[:algorithm_uri]
- v.validate_algorithm( params[:algorithm_params])
- content_type "text/uri-list"
+ v.subjectid = @subjectid
+ v.validate_algorithm( params[:algorithm_params], OpenTox::SubTask.create(task,33,100))
v.validation_uri
end
- halt 202,task_uri
+ return_task(task)
+
+end
+
+get '/training_test_split' do
+ LOGGER.info "list all training-test-split-validations, params: "+params.inspect
+ #uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "training_test_split" } ).collect{ |v| v.validation_uri }.join("\n")+"\n"
+ #uri_list = Validation::Validation.all( :validation_type => "training_test_split" ).collect{ |v| v.validation_uri }.join("\n")+"\n"
+ params[:validation_type] = "training_test_split"
+ uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n"
+
+ if request.env['HTTP_ACCEPT'] =~ /text\/html/
+ related_links =
+ "All validations: "+url_for("/",:full)+"\n"+
+ "Validation reports: "+url_for("/report/validation",:full)
+ description =
+ "A list of all training-test-split-validations.\n"+
+ "To perform a training-test-split-validation use the POST method."
+ post_params = [[:algorithm_uri,
+ :dataset_uri,
+ :prediction_feature,
+ [:algorithm_params, ""],
+ [:random_seed, 1],
+ [:split_ratio, 0.66]]]
+ content_type "text/html"
+ OpenTox.text_to_html uri_list,@subjectid,related_links,description,post_params
+ else
+ content_type "text/uri-list"
+ uri_list
+ end
end
+post '/cleanup/?' do
+ LOGGER.info "validation cleanup, starting..."
+ content_type "text/uri-list"
+ deleted = []
+ #Validation::Validation.find( :all, :conditions => { :prediction_dataset_uri => nil } ).each do |val|
+ Validation::Validation.all( :finished => false ).each do |val|
+ LOGGER.debug "delete val with id:"+val.id.to_s+", finished is false"
+ deleted << val.validation_uri
+ #Validation::Validation.delete(val.id)
+ val.subjectid = @subjectid
+ val.delete
+ end
+ LOGGER.info "validation cleanup, deleted "+deleted.size.to_s+" validations"
+ deleted.join("\n")+"\n"
+end
post '/plain_training_test_split' do
LOGGER.info "creating pure training test split "+params.inspect
- halt 400, "dataset_uri missing" unless params[:dataset_uri]
+ raise OpenTox::BadRequestError.new "dataset_uri missing" unless params[:dataset_uri]
result = Validation::Util.train_test_dataset_split(params[:dataset_uri], params[:prediction_feature], params[:split_ratio], params[:random_seed])
content_type "text/uri-list"
@@ -201,56 +456,115 @@ post '/plain_training_test_split' do
end
post '/validate_datasets' do
- content_type "text/uri-list"
- task_uri = OpenTox::Task.as_task do
+ task = OpenTox::Task.create( "Perform dataset validation", url_for("/validate_datasets", :full) ) do |task| #, params
LOGGER.info "validating values "+params.inspect
- halt 400, "test_dataset_uri missing" unless params[:test_dataset_uri]
- halt 400, "prediction_datset_uri missing" unless params[:prediction_dataset_uri]
+ raise OpenTox::BadRequestError.new "test_dataset_uri missing" unless params[:test_dataset_uri]
+ raise OpenTox::BadRequestError.new "prediction_datset_uri missing" unless params[:prediction_dataset_uri]
+ params[:validation_type] = "validate_datasets"
if params[:model_uri]
v = Validation::Validation.new params
- v.compute_validation_stats_with_model()
+ v.subjectid = @subjectid
+ v.compute_validation_stats_with_model(nil,false,task)
else
- halt 400, "please specify 'model_uri' or 'prediction_feature'" unless params[:prediction_feature]
- halt 400, "please specify 'model_uri' or 'predicted_feature'" unless params[:predicted_feature]
- halt 400, "please specify 'model_uri' or set either 'classification' or 'regression' flag" unless
+ raise OpenTox::BadRequestError.new "please specify 'model_uri' or 'prediction_feature'" unless params[:prediction_feature]
+ raise OpenTox::BadRequestError.new "please specify 'model_uri' or 'predicted_feature'" unless params[:predicted_feature]
+ raise OpenTox::BadRequestError.new "please specify 'model_uri' or set either 'classification' or 'regression' flag" unless
params[:classification] or params[:regression]
predicted_feature = params.delete("predicted_feature")
- clazz = params.delete("classification")!=nil
- regr = params.delete("regression")!=nil
- v = Validation::Validation.new params
- v.compute_validation_stats((clazz and !regr),predicted_feature)
+ feature_type = "classification" if params.delete("classification")!=nil
+ feature_type = "regression" if params.delete("regression")!=nil
+ v = Validation::Validation.new params
+ v.subjectid = @subjectid
+ v.compute_validation_stats(feature_type,predicted_feature,nil,nil,false,task)
end
- content_type "text/uri-list"
v.validation_uri
end
- halt 202,task_uri
+ return_task(task)
end
-get '/:id/:attribute' do
- LOGGER.info "access validation attribute "+params.inspect
+get '/:id/predictions' do
+ LOGGER.info "get validation predictions "+params.inspect
begin
- validation = Validation::Validation.find(params[:id])
+ #validation = Validation::Validation.find(params[:id])
+ validation = Validation::Validation.get(params[:id])
rescue ActiveRecord::RecordNotFound => ex
- halt 404, "Validation '#{params[:id]}' not found."
+ raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found."
end
- begin
- raise unless validation.attribute_loaded?(params[:attribute])
- rescue
- halt 400, "Not a validation attribute: "+params[:attribute].to_s
+ raise OpenTox::BadRequestError.new "Validation '"+params[:id].to_s+"' not finished" unless validation.finished
+ p = validation.compute_validation_stats_with_model(nil, true)
+ case request.env['HTTP_ACCEPT'].to_s
+ when /text\/html/
+ content_type "text/html"
+ description =
+ "The validation predictions as (yaml-)array."
+ related_links =
+ "All validations: "+url_for("/",:full)+"\n"+
+ "Correspoding validation: "+url_for("/"+params[:id],:full)
+ OpenTox.text_to_html p.to_array.to_yaml,@subjectid, related_links, description
+ else
+ content_type "text/x-yaml"
+ p.to_array.to_yaml
+ end
+end
+
+#get '/:id/:attribute' do
+# LOGGER.info "access validation attribute "+params.inspect
+# begin
+# validation = Validation::Validation.find(params[:id])
+# rescue ActiveRecord::RecordNotFound => ex
+# raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found."
+# end
+# begin
+# raise unless validation.attribute_loaded?(params[:attribute])
+# rescue
+# raise OpenTox::BadRequestError.new "Not a validation attribute: "+params[:attribute].to_s
+# end
+# content_type "text/plain"
+# return validation.send(params[:attribute])
+#end
+
+get '/:id' do
+ LOGGER.info "get validation with id "+params[:id].to_s+" '"+request.env['HTTP_ACCEPT'].to_s+"'"
+# begin
+ #validation = Validation::Validation.find(params[:id])
+# rescue ActiveRecord::RecordNotFound => ex
+# raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found."
+# end
+ validation = Validation::Validation.get(params[:id])
+ raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found." unless validation
+
+ case request.env['HTTP_ACCEPT'].to_s
+ when "application/rdf+xml"
+ content_type "application/rdf+xml"
+ validation.to_rdf
+ when /text\/html/
+ content_type "text/html"
+ description =
+ "A validation resource."
+ related_links =
+ "Search for corresponding report: "+url_for("/report/validation?validation="+validation.validation_uri,:full)+"\n"+
+ "Get validation predictions: "+url_for("/"+params[:id]+"/predictions",:full)+"\n"+
+ "All validations: "+url_for("/",:full)+"\n"+
+ "All validation reports: "+url_for("/report/validation",:full)
+ OpenTox.text_to_html validation.to_yaml,@subjectid,related_links,description
+ else #default is yaml
+ content_type "application/x-yaml"
+ validation.to_yaml
end
- content_type "text/plain"
- return validation.send(params[:attribute])
end
delete '/:id' do
LOGGER.info "delete validation with id "+params[:id].to_s
- begin
- validation = Validation::Validation.find(params[:id])
- rescue ActiveRecord::RecordNotFound => ex
- halt 404, "Validation '#{params[:id]}' not found."
- end
+# begin
+ #validation = Validation::Validation.find(params[:id])
+# rescue ActiveRecord::RecordNotFound => ex
+# raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found."
+# end
+ validation = Validation::Validation.get(params[:id])
+ validation.subjectid = @subjectid
+ raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found." unless validation
content_type "text/plain"
- Validation::Validation.delete(params[:id])
+ validation.delete
end \ No newline at end of file
diff --git a/validation/validation_format.rb b/validation/validation_format.rb
index a172f8a..f69ceac 100644..100755
--- a/validation/validation_format.rb
+++ b/validation/validation_format.rb
@@ -1,41 +1,37 @@
-require "lib/rdf_provider.rb"
+require "lib/format_util.rb"
module Validation
-
# adding to_yaml and to_rdf functionality to validation
class Validation < Lib::Validation
- include Lib::RDFProvider
- # get_content_as_hash is the basis for to_yaml and to_rdf
- # the idea is that everything is stored in a hash structure
- # the hash is directly printed in to_yaml, whereas the has_keys can be used to resolve
- # the right properties, classes for to_rdf
- def get_content_as_hash
-
- LOGGER.debug self.validation_uri
+ # builds hash for valiation, as the internal presentation differs from the owl-object
+ # the hash is directly printed in to_yaml, or added to the owl-structure
+ def get_content_as_hash()
h = {}
- Lib::VAL_PROPS.each{|p| h[p] = self.send(p)}
+ (Lib::VAL_PROPS - [:validation_uri]).each do |p|
+ h[p] = self.send(p)
+ end
if crossvalidation_id!=nil
- cv = {}
- Lib::VAL_CV_PROPS.each do |p|
- cv[p] = self.send(p)
- end
- # replace crossvalidation id with uri
+ cv = {:type => OT.CrossvalidationInfo}
+ #skip crossvalidation_id
+ cv[:crossvalidation_fold] = self.crossvalidation_fold
+ cv[:crossvalidation_uri] = self.crossvalidation_uri
h[:crossvalidation_info] = cv
end
if classification_statistics
- clazz = {}
+ raise "classification_statistics is no has: "+classification_statistics.class.to_s unless classification_statistics.is_a?(Hash)
+ clazz = { :type => OT.ClassificationStatistics }
Lib::VAL_CLASS_PROPS_SINGLE.each{ |p| clazz[p] = classification_statistics[p] }
# transpose results per class
class_values = {}
Lib::VAL_CLASS_PROPS_PER_CLASS.each do |p|
- $sinatra.halt 500, "missing classification statitstics: "+p.to_s+" "+classification_statistics.inspect unless classification_statistics[p]
+ raise "missing classification statitstics: "+p.to_s+" "+classification_statistics.inspect if classification_statistics[p]==nil
classification_statistics[p].each do |class_value, property_value|
- class_values[class_value] = {:class_value => class_value} unless class_values.has_key?(class_value)
+ class_values[class_value] = {:class_value => class_value, :type => OT.ClassValueStatistics} unless class_values.has_key?(class_value)
map = class_values[class_value]
map[p] = property_value
end
@@ -44,99 +40,63 @@ module Validation
#converting confusion matrix
cells = []
- $sinatra.halt 500,"confusion matrix missing" unless classification_statistics[:confusion_matrix]!=nil
+ raise "confusion matrix missing" unless classification_statistics[:confusion_matrix]!=nil
classification_statistics[:confusion_matrix].each do |k,v|
- cell = {}
+ cell = { :type => OT.ConfusionMatrixCell }
# key in confusion matrix is map with predicted and actual attribute
k.each{ |kk,vv| cell[kk] = vv }
cell[:confusion_matrix_value] = v
cells.push cell
end
- cm = { :confusion_matrix_cell => cells }
+ cm = { :confusion_matrix_cell => cells, :type => OT.ConfusionMatrix }
clazz[:confusion_matrix] = cm
h[:classification_statistics] = clazz
elsif regression_statistics
- regr = {}
+ regr = {:type => OT.RegressionStatistics }
Lib::VAL_REGR_PROPS.each{ |p| regr[p] = regression_statistics[p]}
h[:regression_statistics] = regr
end
return h
end
- def rdf_title
- "Validation"
+ def to_rdf
+ s = OpenTox::Serializer::Owl.new
+ s.add_resource(validation_uri,OT.Validation,get_content_as_hash.keys_to_rdf_format.keys_to_owl_uris)
+ s.to_rdfxml
end
- def uri
- validation_uri
+ def to_yaml
+ get_content_as_hash.keys_to_rdf_format.keys_to_owl_uris.to_yaml
end
- LITERALS = [ :created_at, :real_runtime, :num_instances, :num_without_class,
- :percent_without_class, :num_unpredicted, :percent_unpredicted,
- :crossvalidation_fold ] +
- (Lib::VAL_CLASS_PROPS - [ :confusion_matrix ]) + Lib::VAL_REGR_PROPS +
- [ :class_value, :confusion_matrix_value,
- :confusion_matrix_actual, :confusion_matrix_predicted ]
-
- LITERAL_NAMES = {:created_at => OT["date"] }
-
- OBJECT_PROPERTIES = { :model_uri => OT['validationModel'], :training_dataset_uri => OT['validationTrainingDataset'], :algorithm_uri => OT['validationAlgorithm'],
- :prediction_feature => OT['predictedFeature'], :test_dataset_uri => OT['validationTestDataset'], :test_target_dataset_uri => OT['validationTestTargetDataset'],
- :prediction_dataset_uri => OT['validationPredictionDataset'], :crossvalidation_info => OT['hasValidationInfo'],
- :crossvalidation_uri => OT['validationCrossvalidation'],
- :classification_statistics => OT['hasValidationInfo'], :regression_statistics => OT['hasValidationInfo'],
- :class_value_statistics => OT['classValueStatistics'], :confusion_matrix => OT['confusionMatrix'],
- :confusion_matrix_cell => OT['confusionMatrixCell'], #:class_value => OT['classValue'],
- #:confusion_matrix_actual => OT['confusionMatrixActual'], :confusion_matrix_predicted => OT['confusionMatrixPredicted']
- }
-
- OBJECTS = { :model_uri => OT['Model'], :training_dataset_uri => OT['Dataset'], :test_dataset_uri => OT['Dataset'],
- :test_target_dataset_uri => OT['Dataset'], :prediction_dataset_uri => OT['Dataset'], :prediction_feature => OT['Feature'],
- :algorithm_uri => OT['Algorithm'],}
-
- CLASSES = { :crossvalidation_info => OT['CrossvalidationInfo'], :classification_statistics => OT['ClassificationStatistics'],
- :regression_statistics => OT['RegresssionStatistics'], :class_value_statistics => OT['ClassValueStatistics'],
- :confusion_matrix => OT['ConfusionMatrix'], :confusion_matrix_cell => OT['ConfusionMatrixCell']}
-
- IGNORE = [ :id, :validation_uri, :crossvalidation_id ]
-
end
class Crossvalidation < Lib::Crossvalidation
- include Lib::RDFProvider
-
+
def get_content_as_hash
h = {}
- Lib::CROSS_VAL_PROPS_REDUNDANT.each{|p| h[p] = self.send(p)}
+ (Lib::CROSS_VAL_PROPS_REDUNDANT - [:crossvalidation_uri]).each do |p|
+ h[p] = self.send(p)
+ end
v = []
- Validation.find( :all, :conditions => { :crossvalidation_id => self.id } ).each do |val|
+ #Validation.find( :all, :conditions => { :crossvalidation_id => self.id } ).each do |val|
+ Validation.all( :crossvalidation_id => self.id ).each do |val|
v.push( val.validation_uri.to_s )
end
- h[:validations] = v
+ h[:validation_uris] = v
h
end
- def uri
- crossvalidation_uri
+ def to_rdf
+ s = OpenTox::Serializer::Owl.new
+ s.add_resource(crossvalidation_uri,OT.Crossvalidation,get_content_as_hash.keys_to_rdf_format.keys_to_owl_uris)
+ s.to_rdfxml
end
- def rdf_title
- "Crossvalidation"
+ def to_yaml
+ get_content_as_hash.keys_to_rdf_format.keys_to_owl_uris.to_yaml
end
-
- LITERALS = [ :created_at, :stratified, :num_folds, :random_seed ]
-
- LITERAL_NAMES = {:created_at => OT["date"] }
-
- OBJECT_PROPERTIES = { :dataset_uri => OT['crossvalidationDataset'], :algorithm_uri => OT['crossvalidationAlgorithm'],
- :validations => OT['crossvalidationValidation'] }
-
- OBJECTS = { :dataset_uri => OT['Dataset'], :validations => OT['Validation'], :algorithm_uri => OT['Algorithm']}
-
- CLASSES = {}
-
- IGNORE = [ :id, :crossvalidation_uri ]
end
end
diff --git a/validation/validation_service.rb b/validation/validation_service.rb
index cfbb681..a1efba5 100644..100755
--- a/validation/validation_service.rb
+++ b/validation/validation_service.rb
@@ -1,7 +1,5 @@
-require "rdf/redland"
-
require "lib/validation_db.rb"
require "lib/ot_predictions.rb"
@@ -31,171 +29,287 @@ class Array
end
module Validation
-
+
class Validation < Lib::Validation
# constructs a validation object, Rsets id und uri
- def initialize( params={} )
- $sinatra.halt 500,"do not set id manually" if params[:id]
- $sinatra.halt 500,"do not set uri manually" if params[:validation_uri]
- super params
- self.save!
- raise "internal error, validation-id not set "+to_yaml if self.id==nil
- self.attributes = { :validation_uri => $sinatra.url_for("/"+self.id.to_s, :full).to_s }
- self.save!
- end
+ #def initialize( params={} )
+ #raise "do not set id manually" if params[:id]
+ #params[:finished] = false
+ #super params
+ #self.save!
+ #raise "internal error, validation-id not set "+to_yaml if self.id==nil
+ #end
# deletes a validation
# PENDING: model and referenced datasets are deleted as well, keep it that way?
- def delete
-
- model = OpenTox::Model::PredictionModel.find(self.model_uri) if self.model_uri
- model.destroy if model
-
- #[@test_dataset_uri, @training_dataset_uri, @prediction_dataset_uri].each do |d|
- #dataset = OpenTox::Dataset.find(d) if d
- #dataset.delete if dataset
- #end
- destroy
+ def delete( delete_all=true )
+ if (delete_all)
+ to_delete = [:model_uri, :training_dataset_uri, :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri ]
+ case self.validation_type
+ when /test_set_validation/
+ to_delete -= [ :model_uri, :training_dataset_uri, :test_dataset_uri, :test_target_dataset_uri ]
+ when /bootstrapping/
+ to_delete -= [ :test_target_dataset_uri ]
+ when /training_test_validation/
+ to_delete -= [ :training_dataset_uri, :test_dataset_uri, :test_target_dataset_uri ]
+ when /training_test_split/
+ to_delete -= [ :test_target_dataset_uri ]
+ when /validate_dataset/
+ to_delete = []
+ when /crossvalidation/
+ to_delete -= [ :test_target_dataset_uri ]
+ else
+ raise "unknown dataset type"
+ end
+ to_delete.each do |attr|
+ uri = self.send(attr)
+ LOGGER.debug "also deleting "+attr.to_s+" : "+uri.to_s if uri
+ begin
+ OpenTox::RestClientWrapper.delete(uri, :subjectid => subjectid) if uri
+ rescue => ex
+ LOGGER.warn "could not delete "+uri.to_s+" : "+ex.message.to_s
+ end
+ end
+ end
+ self.destroy
+ if (subjectid)
+ begin
+ res = OpenTox::Authorization.delete_policies_from_uri(validation_uri, subjectid)
+ LOGGER.debug "Deleted validation policy: #{res}"
+ rescue
+ LOGGER.warn "Policy delete error for validation: #{validation_uri}"
+ end
+ end
"Successfully deleted validation "+self.id.to_s+"."
end
# validates an algorithm by building a model and validating this model
- def validate_algorithm( algorithm_params=nil )
-
- $sinatra.halt 404, "no algorithm uri: '"+self.algorithm_uri.to_s+"'" if self.algorithm_uri==nil or self.algorithm_uri.to_s.size<1
+ def validate_algorithm( algorithm_params=nil, task=nil )
+ raise "validation_type missing" unless self.validation_type
+ raise OpenTox::BadRequestError.new "no algorithm uri: '"+self.algorithm_uri.to_s+"'" if self.algorithm_uri==nil or self.algorithm_uri.to_s.size<1
params = { :dataset_uri => self.training_dataset_uri, :prediction_feature => self.prediction_feature }
if (algorithm_params!=nil)
algorithm_params.split(";").each do |alg_params|
alg_param = alg_params.split("=")
- $sinatra.halt 404, "invalid algorithm param: '"+alg_params.to_s+"'" unless alg_param.size==2 or alg_param[0].to_s.size<1 or alg_param[1].to_s.size<1
+ raise OpenTox::BadRequestError.new "invalid algorithm param: '"+alg_params.to_s+"'" unless alg_param.size==2 or alg_param[0].to_s.size<1 or alg_param[1].to_s.size<1
LOGGER.warn "algorihtm param contains empty space, encode? "+alg_param[1].to_s if alg_param[1] =~ /\s/
params[alg_param[0].to_sym] = alg_param[1]
end
end
LOGGER.debug "building model '"+algorithm_uri.to_s+"' "+params.inspect
- model = OpenTox::Model::PredictionModel.build(algorithm_uri, params)
- $sinatra.halt 500,"model building failed" unless model
- self.attributes = { :model_uri => model.uri }
- self.save!
+ algorithm = OpenTox::Algorithm::Generic.new(algorithm_uri)
+ params[:subjectid] = subjectid
+ self.model_uri = algorithm.run(params, OpenTox::SubTask.create(task, 0, 33))
+
+ #model = OpenTox::Model::PredictionModel.build(algorithm_uri, params,
+ # OpenTox::SubTask.create(task, 0, 33) )
- $sinatra.halt 500,"error after building model: model.dependent_variable != validation.prediciton_feature ("+
- model.dependentVariables.to_s+" != "+self.prediction_feature+")" if self.prediction_feature!=model.dependentVariables
+ raise "model building failed" unless model_uri
+ #self.attributes = { :model_uri => model_uri }
+ #self.save!
+
+# self.save if self.new?
+# self.update :model_uri => model_uri
+
+ #raise "error after building model: model.dependent_variable != validation.prediciton_feature ("+
+ # model.dependentVariables.to_s+" != "+self.prediction_feature+")" if self.prediction_feature!=model.dependentVariables
- validate_model
+ validate_model OpenTox::SubTask.create(task, 33, 100)
end
# validates a model
# PENDING: a new dataset is created to store the predictions, this should be optional: delete predictions afterwards yes/no
- def validate_model
+ def validate_model( task=nil )
+ raise "validation_type missing" unless self.validation_type
LOGGER.debug "validating model '"+self.model_uri+"'"
- model = OpenTox::Model::PredictionModel.find(self.model_uri)
- $sinatra.halt 400, "model not found: "+self.model_uri.to_s unless model
+ #model = OpenTox::Model::PredictionModel.find(self.model_uri)
+ #raise OpenTox::NotFoundError.new "model not found: "+self.model_uri.to_s unless model
+ model = OpenTox::Model::Generic.find(self.model_uri, self.subjectid)
unless self.algorithm_uri
- self.attributes = { :algorithm_uri => model.algorithm }
- self.save!
+# self.attributes = { :algorithm_uri => model.algorithm }
+# self.save!
+ #self.update :algorithm_uri => model.algorithm
+ self.algorithm_uri = model.metadata[OT.algorithm]
end
- if self.prediction_feature
- $sinatra.halt 400, "error validating model: model.dependent_variable != validation.prediciton_feature ("+
- model.dependentVariables+" != "+self.prediction_feature+")" if self.prediction_feature!=model.dependentVariables
+ if self.prediction_feature and model.uri=~/ambit2\/model/
+ LOGGER.warn "REMOVE AMBIT HACK TO __NOT__ RELY ON DEPENDENT VARIABLE"
else
- $sinatra.halt 400, "model has no dependentVariables specified, please give prediction feature for model validation" unless model.dependentVariables
- self.attributes = { :prediction_feature => model.dependentVariables }
- self.save!
+ dependentVariables = model.metadata[OT.dependentVariables]
+ if self.prediction_feature
+ raise OpenTox::NotFoundError.new "error validating model: model.dependent_variable != validation.prediction_feature ("+
+ dependentVariables.to_s+" != "+self.prediction_feature+"), model-metadata is "+model.metadata.inspect if self.prediction_feature!=dependentVariables
+ else
+ raise OpenTox::NotFoundError.new "model has no dependentVariables specified, please give prediction feature for model validation" unless dependentVariables
+ #self.attributes = { :prediction_feature => model.dependentVariables }
+ #self.save!
+ #self.update :prediction_feature => model.dependentVariables
+ self.prediction_feature = model.metadata[OT.dependentVariables]
+ end
end
prediction_dataset_uri = ""
benchmark = Benchmark.measure do
- prediction_dataset_uri = model.predict_dataset(self.test_dataset_uri)
+ #prediction_dataset_uri = model.predict_dataset(self.test_dataset_uri, OpenTox::SubTask.create(task, 0, 50))
+ prediction_dataset_uri = model.run(
+ {:dataset_uri => self.test_dataset_uri, :subjectid => self.subjectid},
+ "text/uri-list",
+ OpenTox::SubTask.create(task, 0, 50))
end
- self.attributes = { :prediction_dataset_uri => prediction_dataset_uri,
- :real_runtime => benchmark.real }
- self.save!
-
- compute_validation_stats_with_model( model )
+# self.attributes = { :prediction_dataset_uri => prediction_dataset_uri,
+# :real_runtime => benchmark.real }
+# self.save!
+# self.update :prediction_dataset_uri => prediction_dataset_uri,
+# :real_runtime => benchmark.real
+ self.prediction_dataset_uri = prediction_dataset_uri
+ self.real_runtime = benchmark.real
+
+ compute_validation_stats_with_model( model, false, OpenTox::SubTask.create(task, 50, 100) )
end
- def compute_validation_stats_with_model( model=nil )
+ def compute_validation_stats_with_model( model=nil, dry_run=false, task=nil )
- model = OpenTox::Model::PredictionModel.find(self.model_uri) if model==nil and self.model_uri
- $sinatra.halt 400, "model not found: "+self.model_uri.to_s unless model
- prediction_feature = self.prediction_feature ? nil : model.dependentVariables
- algorithm_uri = self.algorithm_uri ? nil : model.algorithm
- compute_validation_stats( model.classification?, model.predictedVariables, prediction_feature, algorithm_uri )
+ #model = OpenTox::Model::PredictionModel.find(self.model_uri) if model==nil and self.model_uri
+ #raise OpenTox::NotFoundError.new "model not found: "+self.model_uri.to_s unless model
+ model = OpenTox::Model::Generic.find(self.model_uri, self.subjectid) if model==nil and self.model_uri
+ raise OpenTox::NotFoundError.new "model not found: "+self.model_uri.to_s unless model
+
+ dependentVariables = model.metadata[OT.dependentVariables]
+ prediction_feature = self.prediction_feature ? nil : dependentVariables
+ algorithm_uri = self.algorithm_uri ? nil : model.metadata[OT.algorithm]
+ predictedVariables = model.metadata[OT.predictedVariables]
+ compute_validation_stats( model.feature_type(self.subjectid), predictedVariables,
+ prediction_feature, algorithm_uri, dry_run, task )
end
- def compute_validation_stats( classification, predicted_feature, prediction_feature=nil, algorithm_uri=nil)
+ def compute_validation_stats( feature_type, predicted_feature, prediction_feature=nil,
+ algorithm_uri=nil, dry_run=false, task=nil )
- self.attributes = { :prediction_feature => prediction_feature } if self.prediction_feature==nil && prediction_feature
- self.attributes = { :algorithm_uri => algorithm_uri } if self.algorithm_uri==nil && algorithm_uri
- self.save!
+# self.attributes = { :prediction_feature => prediction_feature } if self.prediction_feature==nil && prediction_feature
+# self.attributes = { :algorithm_uri => algorithm_uri } if self.algorithm_uri==nil && algorithm_uri
+# self.save!
+# self.update :prediction_feature => prediction_feature if self.prediction_feature==nil && prediction_feature
+# self.update :algorithm_uri => algorithm_uri if self.algorithm_uri==nil && algorithm_uri
+ self.prediction_feature = prediction_feature if self.prediction_feature==nil && prediction_feature
+ self.algorithm_uri = algorithm_uri if self.algorithm_uri==nil && algorithm_uri
LOGGER.debug "computing prediction stats"
- prediction = Lib::OTPredictions.new( classification,
+ prediction = Lib::OTPredictions.new( feature_type,
self.test_dataset_uri, self.test_target_dataset_uri, self.prediction_feature,
- self.prediction_dataset_uri, predicted_feature )
- if prediction.classification?
- self.attributes = { :classification_statistics => prediction.compute_stats }
- else
- self.attributes = { :regression_statistics => prediction.compute_stats }
+ self.prediction_dataset_uri, predicted_feature, self.subjectid, OpenTox::SubTask.create(task, 0, 80) )
+ #reading datasets and computing the main stats is 80% the work
+
+ unless dry_run
+ case feature_type
+ when "classification"
+ #self.attributes = { :classification_statistics => prediction.compute_stats }
+ #self.update :classification_statistics => prediction.compute_stats
+ self.classification_statistics = prediction.compute_stats
+ when "regression"
+ #self.attributes = { :regression_statistics => prediction.compute_stats }
+ self.regression_statistics = prediction.compute_stats
+ end
+# self.attributes = { :num_instances => prediction.num_instances,
+# :num_without_class => prediction.num_without_class,
+# :percent_without_class => prediction.percent_without_class,
+# :num_unpredicted => prediction.num_unpredicted,
+# :percent_unpredicted => prediction.percent_unpredicted,
+# :finished => true}
+# self.save!
+ self.attributes= {:num_instances => prediction.num_instances,
+ :num_without_class => prediction.num_without_class,
+ :percent_without_class => prediction.percent_without_class,
+ :num_unpredicted => prediction.num_unpredicted,
+ :percent_unpredicted => prediction.percent_unpredicted,
+ :finished => true}
+ begin
+ self.save
+ rescue DataMapper::SaveFailureError => e
+ raise "could not save validation: "+e.resource.errors.inspect
+ end
end
- self.attributes = { :num_instances => prediction.num_instances,
- :num_without_class => prediction.num_without_class,
- :percent_without_class => prediction.percent_without_class,
- :num_unpredicted => prediction.num_unpredicted,
- :percent_unpredicted => prediction.percent_unpredicted }
- self.save!
+ task.progress(100) if task
+ prediction
end
end
class Crossvalidation < Lib::Crossvalidation
# constructs a crossvalidation, id and uri are set
- def initialize( params={} )
-
- $sinatra.halt 500,"do not set id manually" if params[:id]
- $sinatra.halt 500,"do not set uri manually" if params[:crossvalidation_uri]
-
- params[:num_folds] = 10 if params[:num_folds]==nil
- params[:random_seed] = 1 if params[:random_seed]==nil
- params[:stratified] = false if params[:stratified]==nil
- super params
- self.save!
- raise "internal error, crossvalidation-id not set" if self.id==nil
- self.attributes = { :crossvalidation_uri => $sinatra.url_for("/crossvalidation/"+self.id.to_s, :full) }
- self.save!
+ #def initialize( params={} )
+ #
+ # raise "do not set id manually" if params[:id]
+ # params[:num_folds] = 10 if params[:num_folds]==nil
+ # params[:random_seed] = 1 if params[:random_seed]==nil
+ # params[:stratified] = false if params[:stratified]==nil
+ # params[:finished] = false
+ # super params
+ # self.save!
+ # raise "internal error, crossvalidation-id not set" if self.id==nil
+ #end
+
+ def perform_cv ( prediction_feature, algorithm_params=nil, task=nil )
+
+ create_cv_datasets( prediction_feature, OpenTox::SubTask.create(task, 0, 33) )
+ perform_cv_validations( algorithm_params, OpenTox::SubTask.create(task, 33, 100) )
end
# deletes a crossvalidation, all validations are deleted as well
def delete
- Validation.all(:crossvalidation_id => self.id).each{ |v| v.delete }
- destroy
+ Validation.all(:crossvalidation_id => self.id).each do |v|
+ v.subjectid = self.subjectid
+ v.delete
+ end
+ self.destroy
+ if (subjectid)
+ begin
+ res = OpenTox::Authorization.delete_policies_from_uri(crossvalidation_uri, subjectid)
+ LOGGER.debug "Deleted crossvalidation policy: #{res}"
+ rescue
+ LOGGER.warn "Policy delete error for crossvalidation: #{crossvalidation_uri}"
+ end
+ end
"Successfully deleted crossvalidation "+self.id.to_s+"."
end
# creates the cv folds
- # PENDING copying datasets of an equal (same dataset, same params) crossvalidation is disabled for now
- def create_cv_datasets( prediction_feature )
-
- create_new_cv_datasets( prediction_feature ) #unless copy_cv_datasets( prediction_feature )
+ def create_cv_datasets( prediction_feature, task=nil )
+ if copy_cv_datasets( prediction_feature )
+ # dataset folds of a previous crossvalidaiton could be used
+ task.progress(100) if task
+ else
+ create_new_cv_datasets( prediction_feature, task )
+ end
end
# executes the cross-validation (build models and validates them)
- def perform_cv ( algorithm_params=nil )
+ def perform_cv_validations( algorithm_params, task=nil )
- LOGGER.debug "perform cv validations"
+ LOGGER.debug "perform cv validations "+algorithm_params.inspect
+ i = 0
+ task_step = 100 / self.num_folds.to_f;
@tmp_validations.each do | val |
validation = Validation.new val
- validation.validate_algorithm( algorithm_params )
- #break
+ validation.subjectid = self.subjectid
+ validation.validate_algorithm( algorithm_params,
+ OpenTox::SubTask.create(task, i * task_step, ( i + 1 ) * task_step) )
+ raise "validation '"+validation.validation_uri+"' for crossvaldation could not be finished" unless
+ validation.finished
+ i += 1
end
+
+# self.attributes = { :finished => true }
+# self.save!
+ #self.save if self.new?
+ self.finished = true
+ self.save
end
private
@@ -203,39 +317,48 @@ module Validation
# returns true if successfull, false otherwise
def copy_cv_datasets( prediction_feature )
- equal_cvs = Crossvalidation.all( { :dataset_uri => self.dataset_uri, :num_folds => self.num_folds,
- :stratified => self.stratified, :random_seed => self.random_seed } ).reject{ |cv| cv.id == self.id }
- return false if equal_cvs.size == 0
- cv = equal_cvs[0]
- Validation.all( :crossvalidation_id => cv.id ).each do |v|
-
- if self.stratified and v.prediction_feature != prediction_feature
- return false;
+ cvs = Crossvalidation.all( {
+ :dataset_uri => self.dataset_uri,
+ :num_folds => self.num_folds,
+ :stratified => self.stratified,
+ :random_seed => self.random_seed,
+ :finished => true} ).reject{ |cv| cv.id == self.id }
+ cvs.each do |cv|
+ next if AA_SERVER and !OpenTox::Authorization.authorized?(cv.crossvalidation_uri,"GET",self.subjectid)
+ tmp_val = []
+ Validation.all( :crossvalidation_id => cv.id ).each do |v|
+ break unless
+ v.prediction_feature == prediction_feature and
+ OpenTox::Dataset.exist?(v.training_dataset_uri,self.subjectid) and
+ OpenTox::Dataset.exist?(v.test_dataset_uri,self.subjectid)
+ #make sure self.id is set
+ self.save if self.new?
+ tmp_val << { :validation_type => "crossvalidation",
+ :training_dataset_uri => v.training_dataset_uri,
+ :test_dataset_uri => v.test_dataset_uri,
+ :test_target_dataset_uri => self.dataset_uri,
+ :crossvalidation_id => self.id,
+ :crossvalidation_fold => v.crossvalidation_fold,
+ :prediction_feature => prediction_feature,
+ :algorithm_uri => self.algorithm_uri }
end
- unless (OpenTox::Dataset.find(v.training_dataset_uri) and
- OpenTox::Dataset.find(v.test_dataset_uri))
- LOGGER.debug "dataset uris obsolete, aborting copy of datasets"
- Validation.all( :crossvalidation_id => self.id ).each{ |v| v.delete }
- return false
+ if tmp_val.size == self.num_folds
+ @tmp_validations = tmp_val
+ LOGGER.debug "copied dataset uris from cv "+cv.crossvalidation_uri.to_s #+":\n"+tmp_val.inspect
+ return true
end
- validation = Validation.new :crossvalidation_id => self.id,
- :crossvalidation_fold => v.crossvalidation_fold,
- :training_dataset_uri => v.training_dataset_uri,
- :test_dataset_uri => v.test_dataset_uri,
- :algorithm_uri => self.algorithm_uri
end
- LOGGER.debug "copied dataset uris from cv "+cv.crossvalidation_uri.to_s
- return true
+ false
end
# creates cv folds (training and testdatasets)
# stores uris in validation objects
- def create_new_cv_datasets( prediction_feature )
+ def create_new_cv_datasets( prediction_feature, task = nil )
- $sinatra.halt(500,"random seed not set") unless self.random_seed
+ raise "random seed not set "+self.inspect unless self.random_seed
LOGGER.debug "creating datasets for crossvalidation"
- orig_dataset = OpenTox::Dataset.find(self.dataset_uri)
- $sinatra.halt 400, "Dataset not found: "+self.dataset_uri.to_s unless orig_dataset
+ orig_dataset = OpenTox::Dataset.find(self.dataset_uri,self.subjectid)
+ raise OpenTox::NotFoundError.new "Dataset not found: "+self.dataset_uri.to_s unless orig_dataset
shuffled_compounds = orig_dataset.compounds.shuffle( self.random_seed )
@@ -279,7 +402,7 @@ module Validation
end
LOGGER.debug "cv: num instances for each fold: "+split_compounds.collect{|c| c.size}.join(", ")
- test_features = orig_dataset.features.dclone - [prediction_feature]
+ test_features = orig_dataset.features.keys.dclone - [prediction_feature]
@tmp_validations = []
@@ -290,7 +413,7 @@ module Validation
'_f'+n.to_s+'of'+self.num_folds.to_s+
'_r'+self.random_seed.to_s+
'_s'+self.stratified.to_s
- source = $sinatra.url_for('/crossvalidation',:full)
+ source = $url_provider.url_for('/crossvalidation',:full)
test_compounds = []
train_compounds = []
@@ -305,22 +428,31 @@ module Validation
end
end
- $sinatra.halt 500,"internal error, num test compounds not correct" unless (shuffled_compounds.size/self.num_folds - test_compounds.size).abs <= 1
- $sinatra.halt 500,"internal error, num train compounds not correct" unless shuffled_compounds.size - test_compounds.size == train_compounds.size
+ raise "internal error, num test compounds not correct" unless (shuffled_compounds.size/self.num_folds - test_compounds.size).abs <= 1
+ raise "internal error, num train compounds not correct" unless shuffled_compounds.size - test_compounds.size == train_compounds.size
LOGGER.debug "training set: "+datasetname+"_train, compounds: "+train_compounds.size.to_s
- train_dataset_uri = orig_dataset.create_new_dataset( train_compounds, orig_dataset.features, datasetname + '_train', source )
+ #train_dataset_uri = orig_dataset.create_new_dataset( train_compounds, orig_dataset.features, datasetname + '_train', source )
+ train_dataset_uri = orig_dataset.split( train_compounds, orig_dataset.features.keys,
+ { DC.title => datasetname + '_train', DC.creator => source }, self.subjectid ).uri
LOGGER.debug "test set: "+datasetname+"_test, compounds: "+test_compounds.size.to_s
- test_dataset_uri = orig_dataset.create_new_dataset( test_compounds, test_features, datasetname + '_test', source )
-
- tmp_validation = { :training_dataset_uri => train_dataset_uri,
+ #test_dataset_uri = orig_dataset.create_new_dataset( test_compounds, test_features, datasetname + '_test', source )
+ test_dataset_uri = orig_dataset.split( test_compounds, test_features,
+ { DC.title => datasetname + '_test', DC.creator => source }, self.subjectid ).uri
+
+ #make sure self.id is set
+ self.save if self.new?
+ tmp_validation = { :validation_type => "crossvalidation",
+ :training_dataset_uri => train_dataset_uri,
:test_dataset_uri => test_dataset_uri,
:test_target_dataset_uri => self.dataset_uri,
:crossvalidation_id => self.id, :crossvalidation_fold => n,
:prediction_feature => prediction_feature,
:algorithm_uri => self.algorithm_uri }
@tmp_validations << tmp_validation
+
+ task.progress( n / self.num_folds.to_f * 100 ) if task
end
end
end
@@ -328,27 +460,116 @@ module Validation
module Util
+ # splits a dataset into test and training dataset via bootstrapping
+ # (training dataset-size is n, sampling from orig dataset with replacement)
+ # returns map with training_dataset_uri and test_dataset_uri
+ def self.bootstrapping( orig_dataset_uri, prediction_feature, subjectid, random_seed=nil, task=nil )
+
+ random_seed=1 unless random_seed
+
+ orig_dataset = OpenTox::Dataset.find orig_dataset_uri,subjectid
+ orig_dataset.load_all
+ raise OpenTox::NotFoundError.new "Dataset not found: "+orig_dataset_uri.to_s unless orig_dataset
+ if prediction_feature
+ raise OpenTox::NotFoundError.new "Prediction feature '"+prediction_feature.to_s+
+ "' not found in dataset, features are: \n"+
+ orig_dataset.features.inspect unless orig_dataset.features.include?(prediction_feature)
+ else
+ LOGGER.warn "no prediciton feature given, all features included in test dataset"
+ end
+
+ compounds = orig_dataset.compounds
+ raise OpenTox::NotFoundError.new "Cannot split datset, num compounds in dataset < 2 ("+compounds.size.to_s+")" if compounds.size<2
+
+ compounds.each do |c|
+ raise OpenTox::NotFoundError.new "Bootstrapping not yet implemented for duplicate compounds" if
+ orig_dataset.data_entries[c][prediction_feature].size > 1
+ end
+
+ srand random_seed.to_i
+ while true
+ training_compounds = []
+ compounds.size.times do
+ training_compounds << compounds[rand(compounds.size)]
+ end
+ test_compounds = []
+ compounds.each do |c|
+ test_compounds << c unless training_compounds.include?(c)
+ end
+ if test_compounds.size > 0
+ break
+ else
+ srand rand(10000)
+ end
+ end
+
+ LOGGER.debug "bootstrapping on dataset "+orig_dataset_uri+
+ " into training ("+training_compounds.size.to_s+") and test ("+test_compounds.size.to_s+")"+
+ ", duplicates in training dataset: "+test_compounds.size.to_s
+ task.progress(33) if task
+
+ result = {}
+# result[:training_dataset_uri] = orig_dataset.create_new_dataset( training_compounds,
+# orig_dataset.features,
+# "Bootstrapping training dataset of "+orig_dataset.title.to_s,
+# $sinatra.url_for('/bootstrapping',:full) )
+ result[:training_dataset_uri] = orig_dataset.split( training_compounds,
+ orig_dataset.features.keys,
+ { DC.title => "Bootstrapping training dataset of "+orig_dataset.title.to_s,
+ DC.creator => $url_provider.url_for('/bootstrapping',:full) },
+ subjectid ).uri
+ task.progress(66) if task
+
+# result[:test_dataset_uri] = orig_dataset.create_new_dataset( test_compounds,
+# orig_dataset.features.dclone - [prediction_feature],
+# "Bootstrapping test dataset of "+orig_dataset.title.to_s,
+# $sinatra.url_for('/bootstrapping',:full) )
+ result[:test_dataset_uri] = orig_dataset.split( test_compounds,
+ orig_dataset.features.keys.dclone - [prediction_feature],
+ { DC.title => "Bootstrapping test dataset of "+orig_dataset.title.to_s,
+ DC.creator => $url_provider.url_for('/bootstrapping',:full)} ,
+ subjectid ).uri
+ task.progress(100) if task
+
+ if ENV['RACK_ENV'] =~ /test|debug/
+ training_dataset = OpenTox::Dataset.find result[:training_dataset_uri],subjectid
+ raise OpenTox::NotFoundError.new "Training dataset not found: '"+result[:training_dataset_uri].to_s+"'" unless training_dataset
+ training_dataset.load_all
+ value_count = 0
+ training_dataset.compounds.each do |c|
+ value_count += training_dataset.data_entries[c][prediction_feature].size
+ end
+ raise "training compounds error" unless value_count==training_compounds.size
+ raise OpenTox::NotFoundError.new "Test dataset not found: '"+result[:test_dataset_uri].to_s+"'" unless
+ OpenTox::Dataset.find result[:test_dataset_uri], subjectid
+ end
+ LOGGER.debug "bootstrapping done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'"
+
+ return result
+ end
+
# splits a dataset into test and training dataset
# returns map with training_dataset_uri and test_dataset_uri
- def self.train_test_dataset_split( orig_dataset_uri, prediction_feature, split_ratio=nil, random_seed=nil )
+ def self.train_test_dataset_split( orig_dataset_uri, prediction_feature, subjectid, split_ratio=nil, random_seed=nil, task=nil )
split_ratio=0.67 unless split_ratio
random_seed=1 unless random_seed
- orig_dataset = OpenTox::Dataset.find orig_dataset_uri
- $sinatra.halt 400, "Dataset not found: "+orig_dataset_uri.to_s unless orig_dataset
- $sinatra.halt 400, "Split ratio invalid: "+split_ratio.to_s unless split_ratio and split_ratio=split_ratio.to_f
- $sinatra.halt 400, "Split ratio not >0 and <1 :"+split_ratio.to_s unless split_ratio>0 && split_ratio<1
+ orig_dataset = OpenTox::Dataset.find orig_dataset_uri, subjectid
+ orig_dataset.load_all subjectid
+ raise OpenTox::NotFoundError.new "Dataset not found: "+orig_dataset_uri.to_s unless orig_dataset
+ raise OpenTox::NotFoundError.new "Split ratio invalid: "+split_ratio.to_s unless split_ratio and split_ratio=split_ratio.to_f
+ raise OpenTox::NotFoundError.new "Split ratio not >0 and <1 :"+split_ratio.to_s unless split_ratio>0 && split_ratio<1
if prediction_feature
- $sinatra.halt 400, "Prediction feature '"+prediction_feature.to_s+
+ raise OpenTox::NotFoundError.new "Prediction feature '"+prediction_feature.to_s+
"' not found in dataset, features are: \n"+
- orig_dataset.features.inspect unless orig_dataset.features.include?(prediction_feature)
+ orig_dataset.features.keys.inspect unless orig_dataset.features.include?(prediction_feature)
else
LOGGER.warn "no prediciton feature given, all features included in test dataset"
end
compounds = orig_dataset.compounds
- $sinatra.halt 400, "Cannot split datset, num compounds in dataset < 2 ("+compounds.size.to_s+")" if compounds.size<2
+ raise OpenTox::BadRequestError.new "Cannot split datset, num compounds in dataset < 2 ("+compounds.size.to_s+")" if compounds.size<2
split = (compounds.size*split_ratio).to_i
split = [split,1].max
split = [split,compounds.size-2].min
@@ -356,24 +577,56 @@ module Validation
LOGGER.debug "splitting dataset "+orig_dataset_uri+
" into train:0-"+split.to_s+" and test:"+(split+1).to_s+"-"+(compounds.size-1).to_s+
" (shuffled with seed "+random_seed.to_s+")"
-
compounds.shuffle!( random_seed )
+ task.progress(33) if task
result = {}
- result[:training_dataset_uri] = orig_dataset.create_new_dataset( compounds[0..split],
- orig_dataset.features,
- "Training dataset split of "+orig_dataset.title.to_s,
- $sinatra.url_for('/training_test_split',:full) )
- result[:test_dataset_uri] = orig_dataset.create_new_dataset( compounds[(split+1)..-1],
- orig_dataset.features.dclone - [prediction_feature],
- "Test dataset split of "+orig_dataset.title.to_s,
- $sinatra.url_for('/training_test_split',:full) )
-
- $sinatra.halt 400, "Training dataset not found: '"+result[:training_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:training_dataset_uri]
- $sinatra.halt 400, "Test dataset not found: '"+result[:test_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:test_dataset_uri]
+# result[:training_dataset_uri] = orig_dataset.create_new_dataset( compounds[0..split],
+# orig_dataset.features,
+# "Training dataset split of "+orig_dataset.title.to_s,
+# $sinatra.url_for('/training_test_split',:full) )
+
+# orig_dataset.data_entries.each do |k,v|
+# puts k.inspect+" =>"+v.inspect
+# puts v.values[0].to_s+" "+v.values[0].class.to_s
+# end
+
+ result[:training_dataset_uri] = orig_dataset.split( compounds[0..split],
+ orig_dataset.features.keys,
+ { DC.title => "Training dataset split of "+orig_dataset.title.to_s,
+ DC.creator => $url_provider.url_for('/training_test_split',:full) },
+ subjectid ).uri
+ task.progress(66) if task
+
+# d = OpenTox::Dataset.find(result[:training_dataset_uri])
+# d.data_entries.values.each do |v|
+# puts v.inspect
+# puts v.values[0].to_s+" "+v.values[0].class.to_s
+# end
+# raise "stop here"
+
+# result[:test_dataset_uri] = orig_dataset.create_new_dataset( compounds[(split+1)..-1],
+# orig_dataset.features.dclone - [prediction_feature],
+# "Test dataset split of "+orig_dataset.title.to_s,
+# $sinatra.url_for('/training_test_split',:full) )
+ result[:test_dataset_uri] = orig_dataset.split( compounds[(split+1)..-1],
+ orig_dataset.features.keys.dclone - [prediction_feature],
+ { DC.title => "Test dataset split of "+orig_dataset.title.to_s,
+ DC.creator => $url_provider.url_for('/training_test_split',:full) },
+ subjectid ).uri
+ task.progress(100) if task
+
+ if ENV['RACK_ENV'] =~ /test|debug/
+ raise OpenTox::NotFoundError.new "Training dataset not found: '"+result[:training_dataset_uri].to_s+"'" unless
+ OpenTox::Dataset.find(result[:training_dataset_uri],subjectid)
+ test_data = OpenTox::Dataset.find result[:test_dataset_uri],subjectid
+ raise OpenTox::NotFoundError.new "Test dataset not found: '"+result[:test_dataset_uri].to_s+"'" unless test_data
+ test_data.load_compounds subjectid
+ raise "Test dataset num coumpounds != "+(compounds.size-split-1).to_s+", instead: "+
+ test_data.compounds.size.to_s+"\n"+test_data.to_yaml unless test_data.compounds.size==(compounds.size-1-split)
+ end
LOGGER.debug "split done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'"
-
return result
end
diff --git a/validation/validation_test.rb b/validation/validation_test.rb
index cbaa84b..ffb25c4 100644..100755
--- a/validation/validation_test.rb
+++ b/validation/validation_test.rb
@@ -1,3 +1,11 @@
+
+require "rubygems"
+require "sinatra"
+before {
+ request.env['HTTP_HOST']="local-ot/validation"
+ request.env["REQUEST_URI"]=request.env["PATH_INFO"]
+}
+
require "uri"
require "yaml"
ENV['RACK_ENV'] = 'test'
@@ -7,70 +15,338 @@ require 'rack/test'
require 'lib/test_util.rb'
require 'test/test_examples.rb'
-LOGGER = MyLogger.new(STDOUT)
+LOGGER = OTLogger.new(STDOUT)
LOGGER.datetime_format = "%Y-%m-%d %H:%M:%S "
LOGGER.formatter = Logger::Formatter.new
+if AA_SERVER
+ TEST_USER = "mgtest"
+ TEST_PW = "mgpasswd"
+ #TEST_USER = "guest"
+ #TEST_PW = "guest"
+ SUBJECTID = OpenTox::Authorization.authenticate(TEST_USER,TEST_PW)
+ raise "could not log in" unless SUBJECTID
+ puts "logged in: "+SUBJECTID.to_s
+else
+ puts "AA disabled"
+ SUBJECTID = nil
+end
+
+#Rack::Test::DEFAULT_HOST = "local-ot" #"/validation"
+module Sinatra
+
+ set :raise_errors, false
+ set :show_exceptions, false
+
+ module UrlForHelper
+ BASE = "http://local-ot/validation"
+ def url_for url_fragment, mode=:path_only
+ case mode
+ when :path_only
+ raise "not impl"
+ when :full
+ end
+ "#{BASE}#{url_fragment}"
+ end
+ end
+end
+
class ValidationTest < Test::Unit::TestCase
include Rack::Test::Methods
include Lib::TestUtil
def test_it
- $test_case = self
+ begin
+ $test_case = self
+
+# prediction_feature = "https://ambit.uni-plovdiv.bg:8443/ambit2/feature/26221"
+# puts OpenTox::Feature.find(prediction_feature).domain.inspect
+# exit
+
+# begin
+# #OpenTox::RestClientWrapper.get "http://local-ot/validation/runtime-error",{:accept => "application/rdf+xml"}
+# puts OpenTox::RestClientWrapper.post "http://opentox.ntua.gr:4000/model/0d8a9a27-3481-4450-bca1-d420a791de9d",
+# { :asdfasdf => "asdfasdf" } #{:dataset=>"http://apps.ideaconsult.net:8080/ambit2/dataset/54?max=2"},
+# { :accept => "text/uri-list", :subjectid => SUBJECTID }
+# #puts OpenTox::RestClientWrapper.post "http://opentox.ntua.gr:4000/model/0d8a9a27-3481-4450-bca1-d420a791de9d",{},{:accept => "text/uri-list", :subjectid => "AQIC5wM2LY4SfcwUNX97nTvaSTdYJ+nTUqZsR0UitJ4+jlc=@AAJTSQACMDE=#"}
+# rescue => err
+# rep = OpenTox::ErrorReport.create(err, "")
+# puts rep.to_yaml
+# end
+
+ # "http://opentox.ntua.gr:4000/model/0d8a9a27-3481-4450-bca1-d420a791de9d"
+
+# get "/19999",nil,'HTTP_ACCEPT' => "text/html"
+# exit
+#
+# get "/234234232341",nil,'HTTP_ACCEPT' => "application/x-yaml"
+# puts last_response.body
+##
+# get "/crossvalidation/1",nil,'HTTP_ACCEPT' => "application/rdf+xml"
+# puts last_response.body
+# exit
+
+ # d = OpenTox::Dataset.find("http://ot-dev.in-silico.ch/dataset/307")
+ # puts d.compounds.inspect
+ # exit
+
+ #get "?model=http://local-ot/model/1"
+ # get "/crossvalidation/3/predictions"
+ # puts last_response.body
+
+ # post "/validate_datasets",{
+ # :test_dataset_uri=>"http://apps.deaconsult.net:8080/ambit2/dataset/R3924",
+ # :prediction_dataset_uri=>"http://apps.ideaconsult.net:8080/ambit2/dataset/R3924?feature_uris[]=http%3A%2F%2Fapps.ideaconsult.net%3A8080%2Fambit2%2Fmodel%2F52%2Fpredicted",
+ # #:test_target_dataset_uri=>"http://local-ot/dataset/202",
+ # :prediction_feature=>"http://apps.ideaconsult.net:8080/ambit2/feature/21715",
+ # :predicted_feature=>"http://apps.ideaconsult.net:8080/ambit2/feature/28944",
+ # :regression=>"true"}
+ # #:classification=>"true"}
+ # puts last_response.body
+
+ #post "/crossvalidation/cleanup"
+ #puts last_response.body
+
+ #get "/crossvalidation/19/predictions",nil,'HTTP_ACCEPT' => "application/x-yaml" #/statistics"
+ # post "",:model_uri=>"http://local-ot/model/1",:test_dataset_uri=>"http://local-ot/dataset/3",
+ # :test_target_dataset_uri=>"http://local-ot/dataset/1"
+
+ # get "/crossvalidation/2",nil,'HTTP_ACCEPT' => "application/rdf+xml"
+ #puts last_response.body
+ #exit
- #get "/crossvalidation/4/statistics"
-# post "",:model_uri=>"http://localhost/model/1",:test_dataset_uri=>"http://localhost/dataset/3",
-# :test_target_dataset_uri=>"http://localhost/dataset/1"
+# OpenTox::Crossvalidation.create(
+# :dataset_uri=>"http://local-ot/dataset/1874",
+# :algorithm_uri=>"http://local-ot/algorithm/lazar",
+# :prediction_feature=>"http://local-ot/dataset/1874/feature/Hamster%20Carcinogenicity",
+# :algorithm_params=>"feature_generation_uri=http://local-ot/algorithm/fminer/bbrc")
- #get "/crossvalidation/1",nil,'HTTP_ACCEPT' => "application/rdf+xml"
- #puts last_response.body
-
-# post "/test_validation",:select=>"6d" #,:report=>"yes,please"
-# puts last_response.body
-
-# post "/validate_datasets",{
-# :test_dataset_uri=>"http://localhost/dataset/204",
-# :prediction_dataset_uri=>"http://localhost/dataset/206",
-# :test_target_dataset_uri=>"http://localhost/dataset/202",
-# :prediction_feature=>"http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk",
-# :predicted_feature=>"http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk_lazar_regression",
-# :regression=>"true"}
-# #:classification=>"true"}
-# puts last_response.body
-
- run_test("3b" ) #, "http://localhost/validation/826") #,"http://localhost/validation/report/validation/36") #, "http://localhost/validation/321")
-
- #run_test("7a") #,"http://localhost/validation/crossvalidation/10") #, "http://localhost/validation/321")
-
- #run_test("8b", "http://localhost/validation/crossvalidation/4")
-
- #puts Nightly.build_nightly("1")
+#http://local-ot/dataset/1878
+
+ #get "/crossvalidation?model_uri=lazar"
+ # post "/test_validation",:select=>"6d" #,:report=>"yes,please"
+ #puts last_response.body
+
+ # post "/validate_datasets",{
+ # :test_dataset_uri=>"http://local-ot/dataset/204",
+ # :prediction_dataset_uri=>"http://local-ot/dataset/206",
+ # :test_target_dataset_uri=>"http://local-ot/dataset/202",
+ # :prediction_feature=>"http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk",
+ # :predicted_feature=>"http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk_lazar_regression",
+ # :regression=>"true"}
+ # #:classification=>"true"}
+ # puts last_response.body
+
+ # post "/validate_datasets",{
+ # :test_dataset_uri=>"http://local-ot/dataset/89",
+ # :prediction_dataset_uri=>"http://local-ot/dataset/91",
+ # :test_target_dataset_uri=>"http://local-ot/dataset/87",
+ # :prediction_feature=>"http://local-ot/dataset/1/feature/hamster_carcinogenicity",
+ # :predicted_feature=>"",
+ ## :regression=>"true"}
+ # :classification=>"true"}
+ # puts last_response.body
+
+ # m = OpenTox::Model::Generic.find("http://local-ot/model/1323333")
+ # puts m.to_yaml
+
+# post "/validate_datasets",{
+# :test_dataset_uri=>"http://local-ot/dataset/390",
+# :prediction_dataset_uri=>"http://local-ot/dataset/392",
+# :test_target_dataset_uri=>"http://local-ot/dataset/388",
+# :prediction_feature=>"http://local-ot/dataset/388/feature/repdose_classification",
+# :model_uri=>"http://local-ot/model/31"}
+# #:regression=>"true"}
+# # :classification=>"true"}
+# uri = last_response.body
+# val = wait_for_task(uri)
+# puts val
+# get "/"+val.split("/")[-1]
+
+# post "/validate_datasets",{
+# :test_dataset_uri=>"http://opentox.informatik.uni-freiburg.de/dataset/409",
+# :prediction_dataset_uri=>"http://opentox.informatik.uni-freiburg.de/dataset/410",
+# :test_target_dataset_uri=>"https://ambit.uni-plovdiv.bg:8443/ambit2/dataset/R401560",
+# :prediction_feature=>"https://ambit.uni-plovdiv.bg:8443/ambit2/feature/22190",
+# :predicted_feature=>"https://ambit.uni-plovdiv.bg:8443/ambit2/feature/218304",
+# :regression=>"true",
+# :subjectid=>SUBJECTID}
+# #:model_uri=>"http://local-ot/model/31"}
+# #:regression=>"true"}
+# # :classification=>"true"}
+# uri = last_response.body
+# val = wait_for_task(uri)
+# puts val
+# #get "/"+val.split("/")[-1]
+
+
+
+ #ambit_service = "https://ambit.uni-plovdiv.bg:8443/ambit2"
+ #https%3A%2F%2Fambit.uni-plovdiv.bg%3A8443%2Fambit2
+
+# post "/validate_datasets",{
+# :test_dataset_uri=>ambit_service+"/dataset/R401577?max=50",
+# :prediction_dataset_uri=>ambit_service+"/dataset/R401577?max=50&feature_uris[]="+CGI.escape(ambit_service)+"%2Fmodel%2F35194%2Fpredicted",
+# #:test_target_dataset_uri=>ambit_service+"/dataset/R401560",
+# :prediction_feature=>ambit_service+"/feature/26221",
+# :predicted_feature=>ambit_service+"/feature/218699",
+# :classification=>"true",
+# :subjectid=>SUBJECTID}
+# #:model_uri=>"http://local-ot/model/31"}
+# #:regression=>"true"}
+# # :classification=>"true"}
+# uri = last_response.body
+# val = wait_for_task(uri)
+# puts val
+# #get "/"+val.split("/")[-1]
+
+
+# d = OpenTox::Dataset.find("https://ambit.uni-plovdiv.bg:8443/ambit2/dataset/R545",SUBJECTID)
+# puts d.compounds.inspect
+# exit
+
+# f = File.new("data/ambit-dataset.rdf")
+# d = ValidationExamples::Util.upload_dataset(f, SUBJECTID)
+# puts d
+
+# d = OpenTox::Dataset.find("https://ambit.uni-plovdiv.bg:8443/ambit2/dataset/R401560",SUBJECTID)
+# #puts d.compounds.to_yaml
+# #puts d.features.keys.to_yaml
+# puts d.to_yaml
+# d2 = d.split(d.compounds[0..5], d.features.keys[0..1], {}, SUBJECTID)
+# puts d2.to_yaml
+
+ # run_test("1b")#,:validation_uri=>"http://local-ot/validation/253") #,"http://local-ot/validation/28")#,"http://local-ot/validation/394");
+
+ #run_test("3b",:validation_uri=>"http://local-ot/validation/crossvalidation/45") #,{:dataset_uri => "http://local-ot/dataset/773", :prediction_feature => "http://local-ot/dataset/773/feature/Hamster%20Carcinogenicity"})
+
+# p = {
+# :dataset_uri=>"http://local-ot/dataset/527",
+# :algorithm_uri => "http://local-ot/majority/class/algorithm",
+# :prediction_feature=>"http://local-ot/dataset/527/feature/Hamster%20Carcinogenicity",
+# :num_folds => 2 }
+ #cv = OpenTox::Crossvalidation.create(p, SUBJECTID)
+# cv = OpenTox::Crossvalidation.find("http://local-ot/validation/crossvalidation/17", SUBJECTID)
+# puts cv.uri
+## puts cv.find_or_create_report.uri
+# puts cv.summary(SUBJECTID).inspect
+
+ #puts OpenTox::Authorization.list_policy_uris(SUBJECTID).inspect
+
+ #puts OpenTox::Authorization.list_policy_uris(SUBJECTID).inspect
+
+ #run_test("19d") #,{:dataset_uri => "http://local-ot/dataset/313", :prediction_feature => "http://local-ot/dataset/313/feature/repdose_classification"})
+
+# model = OpenTox::Model::Generic.find("http://local-ot/majority/class/model/58")
+# OpenTox::QMRFReport.create(model)
+
+
+ #get "/12123123123123123"
+ #get "/chain"
+
+ #OpenTox::RestClientWrapper.get("http://local-ot/validation/task-error")
+ #get "/error",nil,'HTTP_ACCEPT' => "application/rdf+xml"
+ #puts ""
+ #puts ""
+ #puts last_response.body
+ #exit
+
+# get "/error"
+# puts last_response.body
+
+ #delete "/1",:subjectid=>SUBJECTID
+
+ run_test("19i")
+
+ #run_test("3a","http://local-ot/validation/crossvalidation/4")
+ #run_test("3b","http://local-ot/validation/crossvalidation/3")
+
+ #run_test("8a", "http://local-ot/validation/crossvalidation/6")
+ #run_test("8b", "http://local-ot/validation/crossvalidation/5")
+
+ #run_test("11b", "http://local-ot/validation/crossvalidation/2" )# //local-ot/validation/42")#, "http://local-ot/validation/report/validation/8") #,"http://local-ot/validation/report/validation/36") #, "http://local-ot/validation/321")
+ # run_test("7a","http://local-ot/validation/40") #,"http://local-ot/validation/crossvalidation/10") #, "http://local-ot/validation/321")
+ #run_test("8b", "http://local-ot/validation/crossvalidation/4")
+
+ #puts Nightly.build_nightly("1")
+
+ #prepare_examples
+ #do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE
+ #do_test_examples_ortona
- #prepare_examples
- #do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE
+ rescue => ex
+ rep = OpenTox::ErrorReport.create(ex, "")
+ puts rep.to_yaml
+ ensure
+ #OpenTox::Authorization.logout(SUBJECTID) if AA_SERVER
+ end
end
def app
Sinatra::Application
end
- def run_test(select, validation_uri=nil)
+ def run_test(select=nil, overwrite={}, delete=false )
+
+ if AA_SERVER && SUBJECTID && delete
+ policies_before = OpenTox::Authorization.list_policy_uris(SUBJECTID)
+ end
+
+ puts ValidationExamples.list unless select
validationExamples = ValidationExamples.select(select)
validationExamples.each do |vv|
vv.each do |v|
ex = v.new
- ex.validation_uri = validation_uri
+ ex.subjectid = SUBJECTID
+
+ overwrite.each do |k,v|
+ ex.send(k.to_s+"=",v)
+ end
+
unless ex.validation_uri
ex.upload_files
ex.check_requirements
ex.validate
- LOGGER.debug "validation done "+ex.validation_uri.to_s
+
+ LOGGER.debug "validation done '"+ex.validation_uri.to_s+"'"
+ end
+ if !delete and ex.validation_uri
+ if SUBJECTID
+ puts ex.validation_uri+"?subjectid="+CGI.escape(SUBJECTID)
+ else
+ puts ex.validation_uri
+ end
+ end
+
+ unless ex.report_uri
+ ex.report
+ end
+ if !delete and ex.report_uri
+ if SUBJECTID
+ puts ex.report_uri+"?subjectid="+CGI.escape(SUBJECTID)
+ else
+ puts ex.report_uri
+ end
end
- ex.verify_yaml
- ex.report
+ ##ex.verify_yaml
+ ##ex.compare_yaml_vs_rdf
+ ex.delete if delete
end
end
+
+ if AA_SERVER && SUBJECTID && delete
+ policies_after= OpenTox::Authorization.list_policy_uris(SUBJECTID)
+ diff = policies_after.size - policies_before.size
+ if (diff != 0)
+ policies_before.each do |k,v|
+ policies_after.delete(k)
+ end
+ LOGGER.warn diff.to_s+" policies NOT deleted:\n"+policies_after.collect{|k,v| k.to_s+" => "+v.to_s}.join("\n")
+ else
+ LOGGER.debug "all policies deleted"
+ end
+ end
end
def prepare_examples
@@ -78,7 +354,11 @@ class ValidationTest < Test::Unit::TestCase
end
def do_test_examples # USES CURL, DO NOT FORGET TO RESTART
- get '/test_examples'
+ post '/test_examples'
+ end
+
+ def do_test_examples_ortona
+ post '/test_examples',:examples=>"http://ortona.informatik.uni-freiburg.de/validation/examples"
end
end