diff options
Diffstat (limited to 'validation')
-rwxr-xr-x[-rw-r--r--] | validation/validation_application.rb | 610 | ||||
-rwxr-xr-x[-rw-r--r--] | validation/validation_format.rb | 116 | ||||
-rwxr-xr-x[-rw-r--r--] | validation/validation_service.rb | 549 | ||||
-rwxr-xr-x[-rw-r--r--] | validation/validation_test.rb | 352 |
4 files changed, 1217 insertions, 410 deletions
diff --git a/validation/validation_application.rb b/validation/validation_application.rb index a43a2a6..e07acf0 100644..100755 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -1,199 +1,454 @@ -[ 'rubygems', 'sinatra', 'sinatra/url_for', 'active_record', 'ar-extensions', 'opentox-ruby-api-wrapper' ].each do |lib| +[ 'rubygems', 'sinatra', 'sinatra/url_for', 'opentox-ruby' ].each do |lib| require lib end -require 'validation/validation_service.rb' require 'lib/merge.rb' +#require 'lib/active_record_setup.rb' +require 'validation/validation_service.rb' get '/crossvalidation/?' do LOGGER.info "list all crossvalidations" + #uri_list = Validation::Crossvalidation.all.collect{ |cv| cv.crossvalidation_uri }.join("\n")+"\n" + uri_list = Lib::DataMapperUtil.all(Validation::Crossvalidation,params).collect{ |cv| cv.crossvalidation_uri }.join("\n")+"\n" + + #uri_list = Validation::Crossvalidation.find_like(params).collect{ |cv| cv.crossvalidation_uri }.join("\n")+"\n" + if request.env['HTTP_ACCEPT'] =~ /text\/html/ + related_links = + "Single validations: "+url_for("/",:full)+"\n"+ + "Crossvalidation reports: "+url_for("/report/crossvalidation",:full) + description = + "A list of all crossvalidations.\n"+ + "Use the POST method to perform a crossvalidation." + post_params = [[:dataset_uri,:algorithm_uri,:prediction_feature,[:num_folds,10],[:random_seed,1],[:stratified,false],[:algorithm_params,""]]] + content_type "text/html" + OpenTox.text_to_html uri_list,@subjectid,related_links,description,post_params + else + content_type "text/uri-list" + uri_list + end +end + +post '/crossvalidation/?' do + task = OpenTox::Task.create( "Perform crossvalidation", url_for("/crossvalidation", :full) ) do |task| #, params + LOGGER.info "creating crossvalidation "+params.inspect + raise OpenTox::BadRequestError.new "dataset_uri missing" unless params[:dataset_uri] + raise OpenTox::BadRequestError.new "algorithm_uri missing" unless params[:algorithm_uri] + raise OpenTox::BadRequestError.new "prediction_feature missing" unless params[:prediction_feature] + raise OpenTox::BadRequestError.new "illegal param-value num_folds: '"+params[:num_folds].to_s+"', must be integer >1" unless params[:num_folds]==nil or + params[:num_folds].to_i>1 + + cv_params = { :dataset_uri => params[:dataset_uri], + :algorithm_uri => params[:algorithm_uri] } + [ :num_folds, :random_seed, :stratified ].each{ |sym| cv_params[sym] = params[sym] if params[sym] } + cv = Validation::Crossvalidation.new cv_params + cv.subjectid = @subjectid + cv.perform_cv( params[:prediction_feature], params[:algorithm_params], task ) + cv.crossvalidation_uri + end + return_task(task) +end + +post '/crossvalidation/cleanup/?' do + LOGGER.info "crossvalidation cleanup, starting..." content_type "text/uri-list" - params.each{ |k,v| halt 400,"no crossvalidation-attribute: "+k.to_s unless Validation::Crossvalidation.column_names.include?(k.gsub(/_like$/,"")) } - Validation::Crossvalidation.find(:all, :conditions => params).collect{ |d| url_for("/crossvalidation/", :full) + d.id.to_s }.join("\n") + deleted = [] + #Validation::Crossvalidation.find_like(params).each do |cv| + Validation::Crossvalidation.all( { :finished => false } ).each do |cv| + #num_vals = Validation::Validation.find( :all, :conditions => { :crossvalidation_id => cv.id } ).size + #num_vals = Validation::Validation.all( :crossvalidation_id => cv.id ).size + #if cv.num_folds != num_vals || !cv.finished + LOGGER.debug "delete cv with id:"+cv.id.to_s+", finished is false" + deleted << cv.crossvalidation_uri + #Validation::Crossvalidation.delete(cv.id) + cv.subjectid = @subjectid + cv.delete + #end + end + LOGGER.info "crossvalidation cleanup, deleted "+deleted.size.to_s+" cvs" + deleted.join("\n")+"\n" end post '/crossvalidation/loo/?' do - halt 500, "not yet implemented" + raise "not yet implemented" end get '/crossvalidation/loo/?' do - halt 400, "GET operation not supported, use POST for performing a loo-crossvalidation, see "+url_for("/crossvalidation", :full)+" for crossvalidation results" + raise OpenTox::BadRequestError.new "GET operation not supported, use POST for performing a loo-crossvalidation, see "+url_for("/crossvalidation", :full)+" for crossvalidation results" end get '/crossvalidation/:id' do LOGGER.info "get crossvalidation with id "+params[:id].to_s - begin - crossvalidation = Validation::Crossvalidation.find(params[:id]) - rescue ActiveRecord::RecordNotFound => ex - halt 404, "Crossvalidation '#{params[:id]}' not found." - end +# begin +# #crossvalidation = Validation::Crossvalidation.find(params[:id]) +# rescue ActiveRecord::RecordNotFound => ex +# raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found." +# end + crossvalidation = Validation::Crossvalidation.get(params[:id]) + raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found." unless crossvalidation case request.env['HTTP_ACCEPT'].to_s when "application/rdf+xml" content_type "application/rdf+xml" - result = crossvalidation.to_rdf - when /application\/x-yaml|\*\/\*|/ # matches 'text/x-yaml', '*/*', '' + crossvalidation.to_rdf + when /text\/html/ + related_links = + "Search for corresponding cv report: "+url_for("/report/crossvalidation?crossvalidation="+crossvalidation.crossvalidation_uri,:full)+"\n"+ + "Statistics for this crossvalidation: "+url_for("/crossvalidation/"+params[:id]+"/statistics",:full)+"\n"+ + "Predictions of this crossvalidation: "+url_for("/crossvalidation/"+params[:id]+"/predictions",:full)+"\n"+ + "All crossvalidations: "+url_for("/crossvalidation",:full)+"\n"+ + "All crossvalidation reports: "+url_for("/report/crossvalidation",:full) + description = + "A crossvalidation resource." + content_type "text/html" + OpenTox.text_to_html crossvalidation.to_yaml,@subjectid,related_links,description + when /application\/x-yaml|\*\/\*/ content_type "application/x-yaml" - result = crossvalidation.to_yaml + crossvalidation.to_yaml else - halt 400, "MIME type '"+request.env['HTTP_ACCEPT'].to_s+"' not supported." - end - result -end - -delete '/crossvalidation/:id/?' do - LOGGER.info "delete crossvalidation with id "+params[:id].to_s - content_type "text/plain" - begin - crossvalidation = Validation::Crossvalidation.find(params[:id]) - rescue ActiveRecord::RecordNotFound => ex - halt 404, "Crossvalidation '#{params[:id]}' not found." - end - Validation::Crossvalidation.delete(params[:id]) -end - -get '/crossvalidation/:id/validations' do - LOGGER.info "get all validations for crossvalidation with id "+params[:id].to_s - begin - crossvalidation = Validation::Crossvalidation.find(params[:id]) - rescue ActiveRecord::RecordNotFound => ex - halt 404, "Crossvalidation '#{params[:id]}' not found." + raise OpenTox::BadRequestError.new "MIME type '"+request.env['HTTP_ACCEPT'].to_s+"' not supported, valid Accept-Headers: \"application/rdf+xml\", \"application/x-yaml\", \"text/html\"." end - content_type "text/uri-list" - Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ).collect{ |v| v.validation_uri.to_s }.join("\n")+"\n" end - get '/crossvalidation/:id/statistics' do LOGGER.info "get merged validation-result for crossvalidation with id "+params[:id].to_s - begin - crossvalidation = Validation::Crossvalidation.find(params[:id]) - rescue ActiveRecord::RecordNotFound => ex - halt 404, "Crossvalidation '#{params[:id]}' not found." - end +# begin + #crossvalidation = Validation::Crossvalidation.find(params[:id]) +# rescue ActiveRecord::RecordNotFound => ex +# raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found." +# end + #crossvalidation = Validation::Crossvalidation.find(params[:id]) + crossvalidation = Validation::Crossvalidation.get(params[:id]) + + raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found." unless crossvalidation + raise OpenTox::BadRequestError.new "Crossvalidation '"+params[:id].to_s+"' not finished" unless crossvalidation.finished Lib::MergeObjects.register_merge_attributes( Validation::Validation, - Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL-[:validation_uri]) unless + Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL-[:date,:validation_uri,:crossvalidation_uri]) unless Lib::MergeObjects.merge_attributes_registered?(Validation::Validation) - v = Lib::MergeObjects.merge_array_objects( Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ) ) - v.validation_uri = nil + #v = Lib::MergeObjects.merge_array_objects( Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ) ) + v = Lib::MergeObjects.merge_array_objects( Validation::Validation.all( :crossvalidation_id => params[:id] ) ) v.created_at = nil v.id = nil - content_type "application/x-yaml" - v.to_yaml -end - - -post '/crossvalidation/?' do - content_type "text/uri-list" - task_uri = OpenTox::Task.as_task do - LOGGER.info "creating crossvalidation "+params.inspect - halt 400, "dataset_uri missing" unless params[:dataset_uri] - halt 400, "algorithm_uri missing" unless params[:algorithm_uri] - halt 400, "prediction_feature missing" unless params[:prediction_feature] - halt 400, "illegal param-value num_folds: '"+params[:num_folds].to_s+"', must be integer >1" unless params[:num_folds]==nil or - params[:num_folds].to_i>1 - - cv_params = { :dataset_uri => params[:dataset_uri], - :algorithm_uri => params[:algorithm_uri] } - [ :num_folds, :random_seed, :stratified ].each{ |sym| cv_params[sym] = params[sym] if params[sym] } - cv = Validation::Crossvalidation.new cv_params - cv.create_cv_datasets( params[:prediction_feature] ) - cv.perform_cv( params[:algorithm_params]) - content_type "text/uri-list" - cv.crossvalidation_uri + + case request.env['HTTP_ACCEPT'].to_s + when /text\/html/ + related_links = + "The corresponding crossvalidation resource: "+url_for("/crossvalidation/"+params[:id],:full) + description = + "The averaged statistics for the crossvalidation." + content_type "text/html" + OpenTox.text_to_html v.to_yaml,@subjectid,related_links,description + else + content_type "application/x-yaml" + v.to_yaml end - halt 202,task_uri end -get '/training_test_split' do - halt 400, "GET operation not supported, use POST to perform a training_test_split, see "+url_for("/", :full)+" for validation results" +delete '/crossvalidation/:id/?' do + LOGGER.info "delete crossvalidation with id "+params[:id].to_s + content_type "text/plain" +# begin + #crossvalidation = Validation::Crossvalidation.find(params[:id]) +# rescue ActiveRecord::RecordNotFound => ex +# raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found." +# end +# Validation::Crossvalidation.delete(params[:id]) + + cv = Validation::Crossvalidation.get(params[:id]) + cv.subjectid = @subjectid + raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found." unless cv + cv.delete end -get '/?' do - LOGGER.info "list all validations" - content_type "text/uri-list" - params.each{ |k,v| halt 400,"no validation-attribute: "+k.to_s unless Validation::Validation.column_names.include?(k.gsub(/_like$/,"")) } - Validation::Validation.find(:all, :conditions => params).collect{ |d| url_for("/", :full) + d.id.to_s }.join("\n") -end +#get '/crossvalidation/:id/validations' do +# LOGGER.info "get all validations for crossvalidation with id "+params[:id].to_s +# begin +# crossvalidation = Validation::Crossvalidation.find(params[:id]) +# rescue ActiveRecord::RecordNotFound => ex +# raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found." +# end +# content_type "text/uri-list" +# Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ).collect{ |v| v.validation_uri.to_s }.join("\n")+"\n" +#end -get '/:id' do - LOGGER.info "get validation with id "+params[:id].to_s+" '"+request.env['HTTP_ACCEPT'].to_s+"'" +get '/crossvalidation/:id/predictions' do + LOGGER.info "get predictions for crossvalidation with id "+params[:id].to_s begin - validation = Validation::Validation.find(params[:id]) + #crossvalidation = Validation::Crossvalidation.find(params[:id]) + crossvalidation = Validation::Crossvalidation.get(params[:id]) rescue ActiveRecord::RecordNotFound => ex - halt 404, "Validation '#{params[:id]}' not found." + raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found." end - + raise OpenTox::BadRequestError.new "Crossvalidation '"+params[:id].to_s+"' not finished" unless crossvalidation.finished + + content_type "application/x-yaml" + #validations = Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ) + validations = Validation::Validation.all( :crossvalidation_id => params[:id] ) + p = Lib::OTPredictions.to_array( validations.collect{ |v| v.compute_validation_stats_with_model(nil, true) } ).to_yaml + case request.env['HTTP_ACCEPT'].to_s - when "application/rdf+xml" - content_type "application/rdf+xml" - result = validation.to_rdf - when /application\/x-yaml|\*\/\*|^$/ # matches 'application/x-yaml', '*/*', '' - content_type "application/x-yaml" - result = validation.to_yaml + when /text\/html/ + content_type "text/html" + description = + "The crossvalidation predictions as (yaml-)array." + related_links = + "All crossvalidations: "+url_for("/crossvalidation",:full)+"\n"+ + "Correspoding crossvalidation: "+url_for("/crossvalidation/"+params[:id],:full) + OpenTox.text_to_html p,@subjectid, related_links, description else - halt 400, "MIME type '"+request.env['HTTP_ACCEPT'].to_s+"' not supported, valid Accept-Headers are \"application/rdf+xml\" and \"application/x-yaml\"." + content_type "text/x-yaml" + p + end +end + +get '/?' do + + LOGGER.info "list all validations, params: "+params.inspect + #uri_list = Validation::Validation.find_like(params).collect{ |v| v.validation_uri }.join("\n")+"\n" + #uri_list = Validation::Validation.all(params).collect{ |v| v.validation_uri }.join("\n")+"\n" + uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + + + if request.env['HTTP_ACCEPT'] =~ /text\/html/ + related_links = + "To perform a validation:\n"+ + "* "+url_for("/test_set_validation",:full)+"\n"+ + "* "+url_for("/training_test_validation",:full)+"\n"+ + "* "+url_for("/bootstrapping",:full)+"\n"+ + "* "+url_for("/training_test_split",:full)+"\n"+ + "* "+url_for("/crossvalidation",:full)+"\n"+ + "Validation reporting: "+url_for("/report",:full)+"\n"+ + "REACH relevant reporting: "+url_for("/reach_report",:full)+"\n"+ + "Examples for using this service: "+url_for("/examples",:full)+"\n" + description = + "A validation web service for the OpenTox project ( http://opentox.org ).\n"+ + "In the root directory (this is where you are now), a list of all validation resources is returned." + content_type "text/html" + OpenTox.text_to_html uri_list,@subjectid,related_links,description + else + content_type "text/uri-list" + uri_list end - result end post '/?' do - content_type "text/uri-list" - task_uri = OpenTox::Task.as_task do |task| - LOGGER.info "creating validation "+params.inspect - if params[:model_uri] and params[:test_dataset_uri] and !params[:training_dataset_uri] and !params[:algorithm_uri] - v = Validation::Validation.new :model_uri => params[:model_uri], + raise OpenTox::BadRequestError.new "Post not supported, to perfom a validation use '/test_set_validation', '/training_test_validation', 'bootstrapping', 'training_test_split'" +end + +post '/test_set_validation' do + LOGGER.info "creating test-set-validation "+params.inspect + if params[:model_uri] and params[:test_dataset_uri] and !params[:training_dataset_uri] and !params[:algorithm_uri] + task = OpenTox::Task.create( "Perform test-set-validation", url_for("/", :full) ) do |task| #, params + v = Validation::Validation.new :validation_type => "test_set_validation", + :model_uri => params[:model_uri], :test_dataset_uri => params[:test_dataset_uri], :test_target_dataset_uri => params[:test_target_dataset_uri], :prediction_feature => params[:prediction_feature] - v.validate_model - elsif params[:algorithm_uri] and params[:training_dataset_uri] and params[:test_dataset_uri] and params[:prediction_feature] and !params[:model_uri] - v = Validation::Validation.new :algorithm_uri => params[:algorithm_uri], + v.subjectid = @subjectid + v.validate_model( task ) + v.validation_uri + end + return_task(task) + else + raise OpenTox::BadRequestError.new "illegal parameters, pls specify model_uri and test_dataset_uri\n"+ + "params given: "+params.inspect + end +end + +get '/test_set_validation' do + LOGGER.info "list all test-set-validations, params: "+params.inspect + + #uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "test_set_validation" } ).collect{ |v| v.validation_uri }.join("\n")+"\n" + #uri_list = Validation::Validation.all( :validation_type => "test_set_validation" ).collect{ |v| v.validation_uri }.join("\n")+"\n" + params[:validation_type] = "test_set_validation" + uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + + if request.env['HTTP_ACCEPT'] =~ /text\/html/ + related_links = + "All validations: "+url_for("/",:full)+"\n"+ + "Validation reports: "+url_for("/report/validation",:full) + description = + "A list of all test-set-validations.\n"+ + "To perform a test-set-validation use the POST method." + post_params = [[:model_uri, :test_dataset_uri, [:test_target_dataset_uri,"same-as-test_dataset_uri"], [:prediction_feature, "dependent-variable-of-model"]]] + content_type "text/html" + OpenTox.text_to_html uri_list,@subjectid,related_links,description,post_params + else + content_type "text/uri-list" + uri_list + end +end + +post '/training_test_validation/?' do + LOGGER.info "creating training-test-validation "+params.inspect + if params[:algorithm_uri] and params[:training_dataset_uri] and params[:test_dataset_uri] and params[:prediction_feature] and !params[:model_uri] + task = OpenTox::Task.create( "Perform training-test-validation", url_for("/", :full) ) do |task| #, params + v = Validation::Validation.new :validation_type => "training_test_validation", + :algorithm_uri => params[:algorithm_uri], :training_dataset_uri => params[:training_dataset_uri], :test_dataset_uri => params[:test_dataset_uri], :test_target_dataset_uri => params[:test_target_dataset_uri], :prediction_feature => params[:prediction_feature] - v.validate_algorithm( params[:algorithm_params]) - else - halt 400, "illegal parameter combination for validation, use either\n"+ - "* model_uri, test_dataset_uri\n"+ - "* algorithm_uri, training_dataset_uri, test_dataset_uri, prediction_feature\n"+ - "params given: "+params.inspect + v.subjectid = @subjectid + v.validate_algorithm( params[:algorithm_params], task ) + v.validation_uri end + return_task(task) + else + raise OpenTox::BadRequestError.new "illegal parameters, pls specify algorithm_uri, training_dataset_uri, test_dataset_uri, prediction_feature\n"+ + "params given: "+params.inspect + end +end + +get '/training_test_validation' do + LOGGER.info "list all training-test-validations, params: "+params.inspect + #uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "training_test_validation" } ).collect{ |v| v.validation_uri }.join("\n")+"\n" + #uri_list = Validation::Validation.all( :validation_type => "training_test_validation" ).collect{ |v| v.validation_uri }.join("\n")+"\n" + params[:validation_type] = "training_test_validation" + uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + + if request.env['HTTP_ACCEPT'] =~ /text\/html/ + related_links = + "All validations: "+url_for("/",:full)+"\n"+ + "Validation reports: "+url_for("/report/validation",:full) + description = + "A list of all training-test-validations.\n"+ + "To perform a training-test-validation use the POST method." + post_params = [[:algorithm_uri, + :training_dataset_uri, + :test_dataset_uri, + [:test_target_dataset_uri,"same-as-test_dataset_uri"], + :prediction_feature, + [:algorithm_params, ""]]] + content_type "text/html" + OpenTox.text_to_html uri_list,@subjectid,related_links,description,post_params + else content_type "text/uri-list" + uri_list + end +end + +post '/bootstrapping' do + task = OpenTox::Task.create( "Perform bootstrapping validation", url_for("/bootstrapping", :full) ) do |task| #, params + LOGGER.info "performing bootstrapping validation "+params.inspect + raise OpenTox::BadRequestError.new "dataset_uri missing" unless params[:dataset_uri] + raise OpenTox::BadRequestError.new "algorithm_uri missing" unless params[:algorithm_uri] + raise OpenTox::BadRequestError.new "prediction_feature missing" unless params[:prediction_feature] + + params.merge!( Validation::Util.bootstrapping( params[:dataset_uri], + params[:prediction_feature], @subjectid, + params[:random_seed], OpenTox::SubTask.create(task,0,33)) ) + v = Validation::Validation.new :validation_type => "bootstrapping", + :test_target_dataset_uri => params[:dataset_uri], + :prediction_feature => params[:prediction_feature], + :algorithm_uri => params[:algorithm_uri] + v.subjectid = @subjectid + v.validate_algorithm( params[:algorithm_params], OpenTox::SubTask.create(task,33,100)) v.validation_uri end - halt 202,task_uri + return_task(task) +end + +get '/bootstrapping' do + LOGGER.info "list all bootstrapping-validations, params: "+params.inspect + #uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "bootstrapping" } ).collect{ |v| v.validation_uri }.join("\n")+"\n" + #uri_list = Validation::Validation.all( :validation_type => "bootstrapping" ).collect{ |v| v.validation_uri }.join("\n")+"\n" + params[:validation_type] = "bootstrapping" + uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + + if request.env['HTTP_ACCEPT'] =~ /text\/html/ + related_links = + "All validations: "+url_for("/",:full)+"\n"+ + "Validation reports: "+url_for("/report/validation",:full) + description = + "A list of all bootstrapping-validations.\n"+ + "To perform a bootstrapping-validation use the POST method." + post_params = [[:algorithm_uri, + :dataset_uri, + :prediction_feature, + [:algorithm_params, ""], + [:random_seed, 1]]] + content_type "text/html" + OpenTox.text_to_html uri_list,@subjectid,related_links,description,post_params + else + content_type "text/uri-list" + uri_list + end end post '/training_test_split' do - content_type "text/uri-list" - task_uri = OpenTox::Task.as_task do + + task = OpenTox::Task.create( "Perform training test split validation", url_for("/training_test_split", :full) ) do |task| #, params LOGGER.info "creating training test split "+params.inspect - halt 400, "dataset_uri missing" unless params[:dataset_uri] - halt 400, "algorithm_uri missing" unless params[:algorithm_uri] - halt 400, "prediction_feature missing" unless params[:prediction_feature] + raise OpenTox::BadRequestError.new "dataset_uri missing" unless params[:dataset_uri] + raise OpenTox::BadRequestError.new "algorithm_uri missing" unless params[:algorithm_uri] + raise OpenTox::BadRequestError.new "prediction_feature missing" unless params[:prediction_feature] - params.merge!(Validation::Util.train_test_dataset_split(params[:dataset_uri], params[:prediction_feature], params[:split_ratio], params[:random_seed])) - v = Validation::Validation.new :training_dataset_uri => params[:training_dataset_uri], + params.merge!( Validation::Util.train_test_dataset_split(params[:dataset_uri], params[:prediction_feature], + @subjectid, params[:split_ratio], params[:random_seed], OpenTox::SubTask.create(task,0,33))) + v = Validation::Validation.new :validation_type => "training_test_split", + :training_dataset_uri => params[:training_dataset_uri], :test_dataset_uri => params[:test_dataset_uri], :test_target_dataset_uri => params[:dataset_uri], :prediction_feature => params[:prediction_feature], :algorithm_uri => params[:algorithm_uri] - v.validate_algorithm( params[:algorithm_params]) - content_type "text/uri-list" + v.subjectid = @subjectid + v.validate_algorithm( params[:algorithm_params], OpenTox::SubTask.create(task,33,100)) v.validation_uri end - halt 202,task_uri + return_task(task) + +end + +get '/training_test_split' do + LOGGER.info "list all training-test-split-validations, params: "+params.inspect + #uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "training_test_split" } ).collect{ |v| v.validation_uri }.join("\n")+"\n" + #uri_list = Validation::Validation.all( :validation_type => "training_test_split" ).collect{ |v| v.validation_uri }.join("\n")+"\n" + params[:validation_type] = "training_test_split" + uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + + if request.env['HTTP_ACCEPT'] =~ /text\/html/ + related_links = + "All validations: "+url_for("/",:full)+"\n"+ + "Validation reports: "+url_for("/report/validation",:full) + description = + "A list of all training-test-split-validations.\n"+ + "To perform a training-test-split-validation use the POST method." + post_params = [[:algorithm_uri, + :dataset_uri, + :prediction_feature, + [:algorithm_params, ""], + [:random_seed, 1], + [:split_ratio, 0.66]]] + content_type "text/html" + OpenTox.text_to_html uri_list,@subjectid,related_links,description,post_params + else + content_type "text/uri-list" + uri_list + end end +post '/cleanup/?' do + LOGGER.info "validation cleanup, starting..." + content_type "text/uri-list" + deleted = [] + #Validation::Validation.find( :all, :conditions => { :prediction_dataset_uri => nil } ).each do |val| + Validation::Validation.all( :finished => false ).each do |val| + LOGGER.debug "delete val with id:"+val.id.to_s+", finished is false" + deleted << val.validation_uri + #Validation::Validation.delete(val.id) + val.subjectid = @subjectid + val.delete + end + LOGGER.info "validation cleanup, deleted "+deleted.size.to_s+" validations" + deleted.join("\n")+"\n" +end post '/plain_training_test_split' do LOGGER.info "creating pure training test split "+params.inspect - halt 400, "dataset_uri missing" unless params[:dataset_uri] + raise OpenTox::BadRequestError.new "dataset_uri missing" unless params[:dataset_uri] result = Validation::Util.train_test_dataset_split(params[:dataset_uri], params[:prediction_feature], params[:split_ratio], params[:random_seed]) content_type "text/uri-list" @@ -201,56 +456,115 @@ post '/plain_training_test_split' do end post '/validate_datasets' do - content_type "text/uri-list" - task_uri = OpenTox::Task.as_task do + task = OpenTox::Task.create( "Perform dataset validation", url_for("/validate_datasets", :full) ) do |task| #, params LOGGER.info "validating values "+params.inspect - halt 400, "test_dataset_uri missing" unless params[:test_dataset_uri] - halt 400, "prediction_datset_uri missing" unless params[:prediction_dataset_uri] + raise OpenTox::BadRequestError.new "test_dataset_uri missing" unless params[:test_dataset_uri] + raise OpenTox::BadRequestError.new "prediction_datset_uri missing" unless params[:prediction_dataset_uri] + params[:validation_type] = "validate_datasets" if params[:model_uri] v = Validation::Validation.new params - v.compute_validation_stats_with_model() + v.subjectid = @subjectid + v.compute_validation_stats_with_model(nil,false,task) else - halt 400, "please specify 'model_uri' or 'prediction_feature'" unless params[:prediction_feature] - halt 400, "please specify 'model_uri' or 'predicted_feature'" unless params[:predicted_feature] - halt 400, "please specify 'model_uri' or set either 'classification' or 'regression' flag" unless + raise OpenTox::BadRequestError.new "please specify 'model_uri' or 'prediction_feature'" unless params[:prediction_feature] + raise OpenTox::BadRequestError.new "please specify 'model_uri' or 'predicted_feature'" unless params[:predicted_feature] + raise OpenTox::BadRequestError.new "please specify 'model_uri' or set either 'classification' or 'regression' flag" unless params[:classification] or params[:regression] predicted_feature = params.delete("predicted_feature") - clazz = params.delete("classification")!=nil - regr = params.delete("regression")!=nil - v = Validation::Validation.new params - v.compute_validation_stats((clazz and !regr),predicted_feature) + feature_type = "classification" if params.delete("classification")!=nil + feature_type = "regression" if params.delete("regression")!=nil + v = Validation::Validation.new params + v.subjectid = @subjectid + v.compute_validation_stats(feature_type,predicted_feature,nil,nil,false,task) end - content_type "text/uri-list" v.validation_uri end - halt 202,task_uri + return_task(task) end -get '/:id/:attribute' do - LOGGER.info "access validation attribute "+params.inspect +get '/:id/predictions' do + LOGGER.info "get validation predictions "+params.inspect begin - validation = Validation::Validation.find(params[:id]) + #validation = Validation::Validation.find(params[:id]) + validation = Validation::Validation.get(params[:id]) rescue ActiveRecord::RecordNotFound => ex - halt 404, "Validation '#{params[:id]}' not found." + raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found." end - begin - raise unless validation.attribute_loaded?(params[:attribute]) - rescue - halt 400, "Not a validation attribute: "+params[:attribute].to_s + raise OpenTox::BadRequestError.new "Validation '"+params[:id].to_s+"' not finished" unless validation.finished + p = validation.compute_validation_stats_with_model(nil, true) + case request.env['HTTP_ACCEPT'].to_s + when /text\/html/ + content_type "text/html" + description = + "The validation predictions as (yaml-)array." + related_links = + "All validations: "+url_for("/",:full)+"\n"+ + "Correspoding validation: "+url_for("/"+params[:id],:full) + OpenTox.text_to_html p.to_array.to_yaml,@subjectid, related_links, description + else + content_type "text/x-yaml" + p.to_array.to_yaml + end +end + +#get '/:id/:attribute' do +# LOGGER.info "access validation attribute "+params.inspect +# begin +# validation = Validation::Validation.find(params[:id]) +# rescue ActiveRecord::RecordNotFound => ex +# raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found." +# end +# begin +# raise unless validation.attribute_loaded?(params[:attribute]) +# rescue +# raise OpenTox::BadRequestError.new "Not a validation attribute: "+params[:attribute].to_s +# end +# content_type "text/plain" +# return validation.send(params[:attribute]) +#end + +get '/:id' do + LOGGER.info "get validation with id "+params[:id].to_s+" '"+request.env['HTTP_ACCEPT'].to_s+"'" +# begin + #validation = Validation::Validation.find(params[:id]) +# rescue ActiveRecord::RecordNotFound => ex +# raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found." +# end + validation = Validation::Validation.get(params[:id]) + raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found." unless validation + + case request.env['HTTP_ACCEPT'].to_s + when "application/rdf+xml" + content_type "application/rdf+xml" + validation.to_rdf + when /text\/html/ + content_type "text/html" + description = + "A validation resource." + related_links = + "Search for corresponding report: "+url_for("/report/validation?validation="+validation.validation_uri,:full)+"\n"+ + "Get validation predictions: "+url_for("/"+params[:id]+"/predictions",:full)+"\n"+ + "All validations: "+url_for("/",:full)+"\n"+ + "All validation reports: "+url_for("/report/validation",:full) + OpenTox.text_to_html validation.to_yaml,@subjectid,related_links,description + else #default is yaml + content_type "application/x-yaml" + validation.to_yaml end - content_type "text/plain" - return validation.send(params[:attribute]) end delete '/:id' do LOGGER.info "delete validation with id "+params[:id].to_s - begin - validation = Validation::Validation.find(params[:id]) - rescue ActiveRecord::RecordNotFound => ex - halt 404, "Validation '#{params[:id]}' not found." - end +# begin + #validation = Validation::Validation.find(params[:id]) +# rescue ActiveRecord::RecordNotFound => ex +# raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found." +# end + validation = Validation::Validation.get(params[:id]) + validation.subjectid = @subjectid + raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found." unless validation content_type "text/plain" - Validation::Validation.delete(params[:id]) + validation.delete end
\ No newline at end of file diff --git a/validation/validation_format.rb b/validation/validation_format.rb index a172f8a..f69ceac 100644..100755 --- a/validation/validation_format.rb +++ b/validation/validation_format.rb @@ -1,41 +1,37 @@ -require "lib/rdf_provider.rb" +require "lib/format_util.rb" module Validation - # adding to_yaml and to_rdf functionality to validation class Validation < Lib::Validation - include Lib::RDFProvider - # get_content_as_hash is the basis for to_yaml and to_rdf - # the idea is that everything is stored in a hash structure - # the hash is directly printed in to_yaml, whereas the has_keys can be used to resolve - # the right properties, classes for to_rdf - def get_content_as_hash - - LOGGER.debug self.validation_uri + # builds hash for valiation, as the internal presentation differs from the owl-object + # the hash is directly printed in to_yaml, or added to the owl-structure + def get_content_as_hash() h = {} - Lib::VAL_PROPS.each{|p| h[p] = self.send(p)} + (Lib::VAL_PROPS - [:validation_uri]).each do |p| + h[p] = self.send(p) + end if crossvalidation_id!=nil - cv = {} - Lib::VAL_CV_PROPS.each do |p| - cv[p] = self.send(p) - end - # replace crossvalidation id with uri + cv = {:type => OT.CrossvalidationInfo} + #skip crossvalidation_id + cv[:crossvalidation_fold] = self.crossvalidation_fold + cv[:crossvalidation_uri] = self.crossvalidation_uri h[:crossvalidation_info] = cv end if classification_statistics - clazz = {} + raise "classification_statistics is no has: "+classification_statistics.class.to_s unless classification_statistics.is_a?(Hash) + clazz = { :type => OT.ClassificationStatistics } Lib::VAL_CLASS_PROPS_SINGLE.each{ |p| clazz[p] = classification_statistics[p] } # transpose results per class class_values = {} Lib::VAL_CLASS_PROPS_PER_CLASS.each do |p| - $sinatra.halt 500, "missing classification statitstics: "+p.to_s+" "+classification_statistics.inspect unless classification_statistics[p] + raise "missing classification statitstics: "+p.to_s+" "+classification_statistics.inspect if classification_statistics[p]==nil classification_statistics[p].each do |class_value, property_value| - class_values[class_value] = {:class_value => class_value} unless class_values.has_key?(class_value) + class_values[class_value] = {:class_value => class_value, :type => OT.ClassValueStatistics} unless class_values.has_key?(class_value) map = class_values[class_value] map[p] = property_value end @@ -44,99 +40,63 @@ module Validation #converting confusion matrix cells = [] - $sinatra.halt 500,"confusion matrix missing" unless classification_statistics[:confusion_matrix]!=nil + raise "confusion matrix missing" unless classification_statistics[:confusion_matrix]!=nil classification_statistics[:confusion_matrix].each do |k,v| - cell = {} + cell = { :type => OT.ConfusionMatrixCell } # key in confusion matrix is map with predicted and actual attribute k.each{ |kk,vv| cell[kk] = vv } cell[:confusion_matrix_value] = v cells.push cell end - cm = { :confusion_matrix_cell => cells } + cm = { :confusion_matrix_cell => cells, :type => OT.ConfusionMatrix } clazz[:confusion_matrix] = cm h[:classification_statistics] = clazz elsif regression_statistics - regr = {} + regr = {:type => OT.RegressionStatistics } Lib::VAL_REGR_PROPS.each{ |p| regr[p] = regression_statistics[p]} h[:regression_statistics] = regr end return h end - def rdf_title - "Validation" + def to_rdf + s = OpenTox::Serializer::Owl.new + s.add_resource(validation_uri,OT.Validation,get_content_as_hash.keys_to_rdf_format.keys_to_owl_uris) + s.to_rdfxml end - def uri - validation_uri + def to_yaml + get_content_as_hash.keys_to_rdf_format.keys_to_owl_uris.to_yaml end - LITERALS = [ :created_at, :real_runtime, :num_instances, :num_without_class, - :percent_without_class, :num_unpredicted, :percent_unpredicted, - :crossvalidation_fold ] + - (Lib::VAL_CLASS_PROPS - [ :confusion_matrix ]) + Lib::VAL_REGR_PROPS + - [ :class_value, :confusion_matrix_value, - :confusion_matrix_actual, :confusion_matrix_predicted ] - - LITERAL_NAMES = {:created_at => OT["date"] } - - OBJECT_PROPERTIES = { :model_uri => OT['validationModel'], :training_dataset_uri => OT['validationTrainingDataset'], :algorithm_uri => OT['validationAlgorithm'], - :prediction_feature => OT['predictedFeature'], :test_dataset_uri => OT['validationTestDataset'], :test_target_dataset_uri => OT['validationTestTargetDataset'], - :prediction_dataset_uri => OT['validationPredictionDataset'], :crossvalidation_info => OT['hasValidationInfo'], - :crossvalidation_uri => OT['validationCrossvalidation'], - :classification_statistics => OT['hasValidationInfo'], :regression_statistics => OT['hasValidationInfo'], - :class_value_statistics => OT['classValueStatistics'], :confusion_matrix => OT['confusionMatrix'], - :confusion_matrix_cell => OT['confusionMatrixCell'], #:class_value => OT['classValue'], - #:confusion_matrix_actual => OT['confusionMatrixActual'], :confusion_matrix_predicted => OT['confusionMatrixPredicted'] - } - - OBJECTS = { :model_uri => OT['Model'], :training_dataset_uri => OT['Dataset'], :test_dataset_uri => OT['Dataset'], - :test_target_dataset_uri => OT['Dataset'], :prediction_dataset_uri => OT['Dataset'], :prediction_feature => OT['Feature'], - :algorithm_uri => OT['Algorithm'],} - - CLASSES = { :crossvalidation_info => OT['CrossvalidationInfo'], :classification_statistics => OT['ClassificationStatistics'], - :regression_statistics => OT['RegresssionStatistics'], :class_value_statistics => OT['ClassValueStatistics'], - :confusion_matrix => OT['ConfusionMatrix'], :confusion_matrix_cell => OT['ConfusionMatrixCell']} - - IGNORE = [ :id, :validation_uri, :crossvalidation_id ] - end class Crossvalidation < Lib::Crossvalidation - include Lib::RDFProvider - + def get_content_as_hash h = {} - Lib::CROSS_VAL_PROPS_REDUNDANT.each{|p| h[p] = self.send(p)} + (Lib::CROSS_VAL_PROPS_REDUNDANT - [:crossvalidation_uri]).each do |p| + h[p] = self.send(p) + end v = [] - Validation.find( :all, :conditions => { :crossvalidation_id => self.id } ).each do |val| + #Validation.find( :all, :conditions => { :crossvalidation_id => self.id } ).each do |val| + Validation.all( :crossvalidation_id => self.id ).each do |val| v.push( val.validation_uri.to_s ) end - h[:validations] = v + h[:validation_uris] = v h end - def uri - crossvalidation_uri + def to_rdf + s = OpenTox::Serializer::Owl.new + s.add_resource(crossvalidation_uri,OT.Crossvalidation,get_content_as_hash.keys_to_rdf_format.keys_to_owl_uris) + s.to_rdfxml end - def rdf_title - "Crossvalidation" + def to_yaml + get_content_as_hash.keys_to_rdf_format.keys_to_owl_uris.to_yaml end - - LITERALS = [ :created_at, :stratified, :num_folds, :random_seed ] - - LITERAL_NAMES = {:created_at => OT["date"] } - - OBJECT_PROPERTIES = { :dataset_uri => OT['crossvalidationDataset'], :algorithm_uri => OT['crossvalidationAlgorithm'], - :validations => OT['crossvalidationValidation'] } - - OBJECTS = { :dataset_uri => OT['Dataset'], :validations => OT['Validation'], :algorithm_uri => OT['Algorithm']} - - CLASSES = {} - - IGNORE = [ :id, :crossvalidation_uri ] end end diff --git a/validation/validation_service.rb b/validation/validation_service.rb index cfbb681..a1efba5 100644..100755 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -1,7 +1,5 @@ -require "rdf/redland" - require "lib/validation_db.rb" require "lib/ot_predictions.rb" @@ -31,171 +29,287 @@ class Array end module Validation - + class Validation < Lib::Validation # constructs a validation object, Rsets id und uri - def initialize( params={} ) - $sinatra.halt 500,"do not set id manually" if params[:id] - $sinatra.halt 500,"do not set uri manually" if params[:validation_uri] - super params - self.save! - raise "internal error, validation-id not set "+to_yaml if self.id==nil - self.attributes = { :validation_uri => $sinatra.url_for("/"+self.id.to_s, :full).to_s } - self.save! - end + #def initialize( params={} ) + #raise "do not set id manually" if params[:id] + #params[:finished] = false + #super params + #self.save! + #raise "internal error, validation-id not set "+to_yaml if self.id==nil + #end # deletes a validation # PENDING: model and referenced datasets are deleted as well, keep it that way? - def delete - - model = OpenTox::Model::PredictionModel.find(self.model_uri) if self.model_uri - model.destroy if model - - #[@test_dataset_uri, @training_dataset_uri, @prediction_dataset_uri].each do |d| - #dataset = OpenTox::Dataset.find(d) if d - #dataset.delete if dataset - #end - destroy + def delete( delete_all=true ) + if (delete_all) + to_delete = [:model_uri, :training_dataset_uri, :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri ] + case self.validation_type + when /test_set_validation/ + to_delete -= [ :model_uri, :training_dataset_uri, :test_dataset_uri, :test_target_dataset_uri ] + when /bootstrapping/ + to_delete -= [ :test_target_dataset_uri ] + when /training_test_validation/ + to_delete -= [ :training_dataset_uri, :test_dataset_uri, :test_target_dataset_uri ] + when /training_test_split/ + to_delete -= [ :test_target_dataset_uri ] + when /validate_dataset/ + to_delete = [] + when /crossvalidation/ + to_delete -= [ :test_target_dataset_uri ] + else + raise "unknown dataset type" + end + to_delete.each do |attr| + uri = self.send(attr) + LOGGER.debug "also deleting "+attr.to_s+" : "+uri.to_s if uri + begin + OpenTox::RestClientWrapper.delete(uri, :subjectid => subjectid) if uri + rescue => ex + LOGGER.warn "could not delete "+uri.to_s+" : "+ex.message.to_s + end + end + end + self.destroy + if (subjectid) + begin + res = OpenTox::Authorization.delete_policies_from_uri(validation_uri, subjectid) + LOGGER.debug "Deleted validation policy: #{res}" + rescue + LOGGER.warn "Policy delete error for validation: #{validation_uri}" + end + end "Successfully deleted validation "+self.id.to_s+"." end # validates an algorithm by building a model and validating this model - def validate_algorithm( algorithm_params=nil ) - - $sinatra.halt 404, "no algorithm uri: '"+self.algorithm_uri.to_s+"'" if self.algorithm_uri==nil or self.algorithm_uri.to_s.size<1 + def validate_algorithm( algorithm_params=nil, task=nil ) + raise "validation_type missing" unless self.validation_type + raise OpenTox::BadRequestError.new "no algorithm uri: '"+self.algorithm_uri.to_s+"'" if self.algorithm_uri==nil or self.algorithm_uri.to_s.size<1 params = { :dataset_uri => self.training_dataset_uri, :prediction_feature => self.prediction_feature } if (algorithm_params!=nil) algorithm_params.split(";").each do |alg_params| alg_param = alg_params.split("=") - $sinatra.halt 404, "invalid algorithm param: '"+alg_params.to_s+"'" unless alg_param.size==2 or alg_param[0].to_s.size<1 or alg_param[1].to_s.size<1 + raise OpenTox::BadRequestError.new "invalid algorithm param: '"+alg_params.to_s+"'" unless alg_param.size==2 or alg_param[0].to_s.size<1 or alg_param[1].to_s.size<1 LOGGER.warn "algorihtm param contains empty space, encode? "+alg_param[1].to_s if alg_param[1] =~ /\s/ params[alg_param[0].to_sym] = alg_param[1] end end LOGGER.debug "building model '"+algorithm_uri.to_s+"' "+params.inspect - model = OpenTox::Model::PredictionModel.build(algorithm_uri, params) - $sinatra.halt 500,"model building failed" unless model - self.attributes = { :model_uri => model.uri } - self.save! + algorithm = OpenTox::Algorithm::Generic.new(algorithm_uri) + params[:subjectid] = subjectid + self.model_uri = algorithm.run(params, OpenTox::SubTask.create(task, 0, 33)) + + #model = OpenTox::Model::PredictionModel.build(algorithm_uri, params, + # OpenTox::SubTask.create(task, 0, 33) ) - $sinatra.halt 500,"error after building model: model.dependent_variable != validation.prediciton_feature ("+ - model.dependentVariables.to_s+" != "+self.prediction_feature+")" if self.prediction_feature!=model.dependentVariables + raise "model building failed" unless model_uri + #self.attributes = { :model_uri => model_uri } + #self.save! + +# self.save if self.new? +# self.update :model_uri => model_uri + + #raise "error after building model: model.dependent_variable != validation.prediciton_feature ("+ + # model.dependentVariables.to_s+" != "+self.prediction_feature+")" if self.prediction_feature!=model.dependentVariables - validate_model + validate_model OpenTox::SubTask.create(task, 33, 100) end # validates a model # PENDING: a new dataset is created to store the predictions, this should be optional: delete predictions afterwards yes/no - def validate_model + def validate_model( task=nil ) + raise "validation_type missing" unless self.validation_type LOGGER.debug "validating model '"+self.model_uri+"'" - model = OpenTox::Model::PredictionModel.find(self.model_uri) - $sinatra.halt 400, "model not found: "+self.model_uri.to_s unless model + #model = OpenTox::Model::PredictionModel.find(self.model_uri) + #raise OpenTox::NotFoundError.new "model not found: "+self.model_uri.to_s unless model + model = OpenTox::Model::Generic.find(self.model_uri, self.subjectid) unless self.algorithm_uri - self.attributes = { :algorithm_uri => model.algorithm } - self.save! +# self.attributes = { :algorithm_uri => model.algorithm } +# self.save! + #self.update :algorithm_uri => model.algorithm + self.algorithm_uri = model.metadata[OT.algorithm] end - if self.prediction_feature - $sinatra.halt 400, "error validating model: model.dependent_variable != validation.prediciton_feature ("+ - model.dependentVariables+" != "+self.prediction_feature+")" if self.prediction_feature!=model.dependentVariables + if self.prediction_feature and model.uri=~/ambit2\/model/ + LOGGER.warn "REMOVE AMBIT HACK TO __NOT__ RELY ON DEPENDENT VARIABLE" else - $sinatra.halt 400, "model has no dependentVariables specified, please give prediction feature for model validation" unless model.dependentVariables - self.attributes = { :prediction_feature => model.dependentVariables } - self.save! + dependentVariables = model.metadata[OT.dependentVariables] + if self.prediction_feature + raise OpenTox::NotFoundError.new "error validating model: model.dependent_variable != validation.prediction_feature ("+ + dependentVariables.to_s+" != "+self.prediction_feature+"), model-metadata is "+model.metadata.inspect if self.prediction_feature!=dependentVariables + else + raise OpenTox::NotFoundError.new "model has no dependentVariables specified, please give prediction feature for model validation" unless dependentVariables + #self.attributes = { :prediction_feature => model.dependentVariables } + #self.save! + #self.update :prediction_feature => model.dependentVariables + self.prediction_feature = model.metadata[OT.dependentVariables] + end end prediction_dataset_uri = "" benchmark = Benchmark.measure do - prediction_dataset_uri = model.predict_dataset(self.test_dataset_uri) + #prediction_dataset_uri = model.predict_dataset(self.test_dataset_uri, OpenTox::SubTask.create(task, 0, 50)) + prediction_dataset_uri = model.run( + {:dataset_uri => self.test_dataset_uri, :subjectid => self.subjectid}, + "text/uri-list", + OpenTox::SubTask.create(task, 0, 50)) end - self.attributes = { :prediction_dataset_uri => prediction_dataset_uri, - :real_runtime => benchmark.real } - self.save! - - compute_validation_stats_with_model( model ) +# self.attributes = { :prediction_dataset_uri => prediction_dataset_uri, +# :real_runtime => benchmark.real } +# self.save! +# self.update :prediction_dataset_uri => prediction_dataset_uri, +# :real_runtime => benchmark.real + self.prediction_dataset_uri = prediction_dataset_uri + self.real_runtime = benchmark.real + + compute_validation_stats_with_model( model, false, OpenTox::SubTask.create(task, 50, 100) ) end - def compute_validation_stats_with_model( model=nil ) + def compute_validation_stats_with_model( model=nil, dry_run=false, task=nil ) - model = OpenTox::Model::PredictionModel.find(self.model_uri) if model==nil and self.model_uri - $sinatra.halt 400, "model not found: "+self.model_uri.to_s unless model - prediction_feature = self.prediction_feature ? nil : model.dependentVariables - algorithm_uri = self.algorithm_uri ? nil : model.algorithm - compute_validation_stats( model.classification?, model.predictedVariables, prediction_feature, algorithm_uri ) + #model = OpenTox::Model::PredictionModel.find(self.model_uri) if model==nil and self.model_uri + #raise OpenTox::NotFoundError.new "model not found: "+self.model_uri.to_s unless model + model = OpenTox::Model::Generic.find(self.model_uri, self.subjectid) if model==nil and self.model_uri + raise OpenTox::NotFoundError.new "model not found: "+self.model_uri.to_s unless model + + dependentVariables = model.metadata[OT.dependentVariables] + prediction_feature = self.prediction_feature ? nil : dependentVariables + algorithm_uri = self.algorithm_uri ? nil : model.metadata[OT.algorithm] + predictedVariables = model.metadata[OT.predictedVariables] + compute_validation_stats( model.feature_type(self.subjectid), predictedVariables, + prediction_feature, algorithm_uri, dry_run, task ) end - def compute_validation_stats( classification, predicted_feature, prediction_feature=nil, algorithm_uri=nil) + def compute_validation_stats( feature_type, predicted_feature, prediction_feature=nil, + algorithm_uri=nil, dry_run=false, task=nil ) - self.attributes = { :prediction_feature => prediction_feature } if self.prediction_feature==nil && prediction_feature - self.attributes = { :algorithm_uri => algorithm_uri } if self.algorithm_uri==nil && algorithm_uri - self.save! +# self.attributes = { :prediction_feature => prediction_feature } if self.prediction_feature==nil && prediction_feature +# self.attributes = { :algorithm_uri => algorithm_uri } if self.algorithm_uri==nil && algorithm_uri +# self.save! +# self.update :prediction_feature => prediction_feature if self.prediction_feature==nil && prediction_feature +# self.update :algorithm_uri => algorithm_uri if self.algorithm_uri==nil && algorithm_uri + self.prediction_feature = prediction_feature if self.prediction_feature==nil && prediction_feature + self.algorithm_uri = algorithm_uri if self.algorithm_uri==nil && algorithm_uri LOGGER.debug "computing prediction stats" - prediction = Lib::OTPredictions.new( classification, + prediction = Lib::OTPredictions.new( feature_type, self.test_dataset_uri, self.test_target_dataset_uri, self.prediction_feature, - self.prediction_dataset_uri, predicted_feature ) - if prediction.classification? - self.attributes = { :classification_statistics => prediction.compute_stats } - else - self.attributes = { :regression_statistics => prediction.compute_stats } + self.prediction_dataset_uri, predicted_feature, self.subjectid, OpenTox::SubTask.create(task, 0, 80) ) + #reading datasets and computing the main stats is 80% the work + + unless dry_run + case feature_type + when "classification" + #self.attributes = { :classification_statistics => prediction.compute_stats } + #self.update :classification_statistics => prediction.compute_stats + self.classification_statistics = prediction.compute_stats + when "regression" + #self.attributes = { :regression_statistics => prediction.compute_stats } + self.regression_statistics = prediction.compute_stats + end +# self.attributes = { :num_instances => prediction.num_instances, +# :num_without_class => prediction.num_without_class, +# :percent_without_class => prediction.percent_without_class, +# :num_unpredicted => prediction.num_unpredicted, +# :percent_unpredicted => prediction.percent_unpredicted, +# :finished => true} +# self.save! + self.attributes= {:num_instances => prediction.num_instances, + :num_without_class => prediction.num_without_class, + :percent_without_class => prediction.percent_without_class, + :num_unpredicted => prediction.num_unpredicted, + :percent_unpredicted => prediction.percent_unpredicted, + :finished => true} + begin + self.save + rescue DataMapper::SaveFailureError => e + raise "could not save validation: "+e.resource.errors.inspect + end end - self.attributes = { :num_instances => prediction.num_instances, - :num_without_class => prediction.num_without_class, - :percent_without_class => prediction.percent_without_class, - :num_unpredicted => prediction.num_unpredicted, - :percent_unpredicted => prediction.percent_unpredicted } - self.save! + task.progress(100) if task + prediction end end class Crossvalidation < Lib::Crossvalidation # constructs a crossvalidation, id and uri are set - def initialize( params={} ) - - $sinatra.halt 500,"do not set id manually" if params[:id] - $sinatra.halt 500,"do not set uri manually" if params[:crossvalidation_uri] - - params[:num_folds] = 10 if params[:num_folds]==nil - params[:random_seed] = 1 if params[:random_seed]==nil - params[:stratified] = false if params[:stratified]==nil - super params - self.save! - raise "internal error, crossvalidation-id not set" if self.id==nil - self.attributes = { :crossvalidation_uri => $sinatra.url_for("/crossvalidation/"+self.id.to_s, :full) } - self.save! + #def initialize( params={} ) + # + # raise "do not set id manually" if params[:id] + # params[:num_folds] = 10 if params[:num_folds]==nil + # params[:random_seed] = 1 if params[:random_seed]==nil + # params[:stratified] = false if params[:stratified]==nil + # params[:finished] = false + # super params + # self.save! + # raise "internal error, crossvalidation-id not set" if self.id==nil + #end + + def perform_cv ( prediction_feature, algorithm_params=nil, task=nil ) + + create_cv_datasets( prediction_feature, OpenTox::SubTask.create(task, 0, 33) ) + perform_cv_validations( algorithm_params, OpenTox::SubTask.create(task, 33, 100) ) end # deletes a crossvalidation, all validations are deleted as well def delete - Validation.all(:crossvalidation_id => self.id).each{ |v| v.delete } - destroy + Validation.all(:crossvalidation_id => self.id).each do |v| + v.subjectid = self.subjectid + v.delete + end + self.destroy + if (subjectid) + begin + res = OpenTox::Authorization.delete_policies_from_uri(crossvalidation_uri, subjectid) + LOGGER.debug "Deleted crossvalidation policy: #{res}" + rescue + LOGGER.warn "Policy delete error for crossvalidation: #{crossvalidation_uri}" + end + end "Successfully deleted crossvalidation "+self.id.to_s+"." end # creates the cv folds - # PENDING copying datasets of an equal (same dataset, same params) crossvalidation is disabled for now - def create_cv_datasets( prediction_feature ) - - create_new_cv_datasets( prediction_feature ) #unless copy_cv_datasets( prediction_feature ) + def create_cv_datasets( prediction_feature, task=nil ) + if copy_cv_datasets( prediction_feature ) + # dataset folds of a previous crossvalidaiton could be used + task.progress(100) if task + else + create_new_cv_datasets( prediction_feature, task ) + end end # executes the cross-validation (build models and validates them) - def perform_cv ( algorithm_params=nil ) + def perform_cv_validations( algorithm_params, task=nil ) - LOGGER.debug "perform cv validations" + LOGGER.debug "perform cv validations "+algorithm_params.inspect + i = 0 + task_step = 100 / self.num_folds.to_f; @tmp_validations.each do | val | validation = Validation.new val - validation.validate_algorithm( algorithm_params ) - #break + validation.subjectid = self.subjectid + validation.validate_algorithm( algorithm_params, + OpenTox::SubTask.create(task, i * task_step, ( i + 1 ) * task_step) ) + raise "validation '"+validation.validation_uri+"' for crossvaldation could not be finished" unless + validation.finished + i += 1 end + +# self.attributes = { :finished => true } +# self.save! + #self.save if self.new? + self.finished = true + self.save end private @@ -203,39 +317,48 @@ module Validation # returns true if successfull, false otherwise def copy_cv_datasets( prediction_feature ) - equal_cvs = Crossvalidation.all( { :dataset_uri => self.dataset_uri, :num_folds => self.num_folds, - :stratified => self.stratified, :random_seed => self.random_seed } ).reject{ |cv| cv.id == self.id } - return false if equal_cvs.size == 0 - cv = equal_cvs[0] - Validation.all( :crossvalidation_id => cv.id ).each do |v| - - if self.stratified and v.prediction_feature != prediction_feature - return false; + cvs = Crossvalidation.all( { + :dataset_uri => self.dataset_uri, + :num_folds => self.num_folds, + :stratified => self.stratified, + :random_seed => self.random_seed, + :finished => true} ).reject{ |cv| cv.id == self.id } + cvs.each do |cv| + next if AA_SERVER and !OpenTox::Authorization.authorized?(cv.crossvalidation_uri,"GET",self.subjectid) + tmp_val = [] + Validation.all( :crossvalidation_id => cv.id ).each do |v| + break unless + v.prediction_feature == prediction_feature and + OpenTox::Dataset.exist?(v.training_dataset_uri,self.subjectid) and + OpenTox::Dataset.exist?(v.test_dataset_uri,self.subjectid) + #make sure self.id is set + self.save if self.new? + tmp_val << { :validation_type => "crossvalidation", + :training_dataset_uri => v.training_dataset_uri, + :test_dataset_uri => v.test_dataset_uri, + :test_target_dataset_uri => self.dataset_uri, + :crossvalidation_id => self.id, + :crossvalidation_fold => v.crossvalidation_fold, + :prediction_feature => prediction_feature, + :algorithm_uri => self.algorithm_uri } end - unless (OpenTox::Dataset.find(v.training_dataset_uri) and - OpenTox::Dataset.find(v.test_dataset_uri)) - LOGGER.debug "dataset uris obsolete, aborting copy of datasets" - Validation.all( :crossvalidation_id => self.id ).each{ |v| v.delete } - return false + if tmp_val.size == self.num_folds + @tmp_validations = tmp_val + LOGGER.debug "copied dataset uris from cv "+cv.crossvalidation_uri.to_s #+":\n"+tmp_val.inspect + return true end - validation = Validation.new :crossvalidation_id => self.id, - :crossvalidation_fold => v.crossvalidation_fold, - :training_dataset_uri => v.training_dataset_uri, - :test_dataset_uri => v.test_dataset_uri, - :algorithm_uri => self.algorithm_uri end - LOGGER.debug "copied dataset uris from cv "+cv.crossvalidation_uri.to_s - return true + false end # creates cv folds (training and testdatasets) # stores uris in validation objects - def create_new_cv_datasets( prediction_feature ) + def create_new_cv_datasets( prediction_feature, task = nil ) - $sinatra.halt(500,"random seed not set") unless self.random_seed + raise "random seed not set "+self.inspect unless self.random_seed LOGGER.debug "creating datasets for crossvalidation" - orig_dataset = OpenTox::Dataset.find(self.dataset_uri) - $sinatra.halt 400, "Dataset not found: "+self.dataset_uri.to_s unless orig_dataset + orig_dataset = OpenTox::Dataset.find(self.dataset_uri,self.subjectid) + raise OpenTox::NotFoundError.new "Dataset not found: "+self.dataset_uri.to_s unless orig_dataset shuffled_compounds = orig_dataset.compounds.shuffle( self.random_seed ) @@ -279,7 +402,7 @@ module Validation end LOGGER.debug "cv: num instances for each fold: "+split_compounds.collect{|c| c.size}.join(", ") - test_features = orig_dataset.features.dclone - [prediction_feature] + test_features = orig_dataset.features.keys.dclone - [prediction_feature] @tmp_validations = [] @@ -290,7 +413,7 @@ module Validation '_f'+n.to_s+'of'+self.num_folds.to_s+ '_r'+self.random_seed.to_s+ '_s'+self.stratified.to_s - source = $sinatra.url_for('/crossvalidation',:full) + source = $url_provider.url_for('/crossvalidation',:full) test_compounds = [] train_compounds = [] @@ -305,22 +428,31 @@ module Validation end end - $sinatra.halt 500,"internal error, num test compounds not correct" unless (shuffled_compounds.size/self.num_folds - test_compounds.size).abs <= 1 - $sinatra.halt 500,"internal error, num train compounds not correct" unless shuffled_compounds.size - test_compounds.size == train_compounds.size + raise "internal error, num test compounds not correct" unless (shuffled_compounds.size/self.num_folds - test_compounds.size).abs <= 1 + raise "internal error, num train compounds not correct" unless shuffled_compounds.size - test_compounds.size == train_compounds.size LOGGER.debug "training set: "+datasetname+"_train, compounds: "+train_compounds.size.to_s - train_dataset_uri = orig_dataset.create_new_dataset( train_compounds, orig_dataset.features, datasetname + '_train', source ) + #train_dataset_uri = orig_dataset.create_new_dataset( train_compounds, orig_dataset.features, datasetname + '_train', source ) + train_dataset_uri = orig_dataset.split( train_compounds, orig_dataset.features.keys, + { DC.title => datasetname + '_train', DC.creator => source }, self.subjectid ).uri LOGGER.debug "test set: "+datasetname+"_test, compounds: "+test_compounds.size.to_s - test_dataset_uri = orig_dataset.create_new_dataset( test_compounds, test_features, datasetname + '_test', source ) - - tmp_validation = { :training_dataset_uri => train_dataset_uri, + #test_dataset_uri = orig_dataset.create_new_dataset( test_compounds, test_features, datasetname + '_test', source ) + test_dataset_uri = orig_dataset.split( test_compounds, test_features, + { DC.title => datasetname + '_test', DC.creator => source }, self.subjectid ).uri + + #make sure self.id is set + self.save if self.new? + tmp_validation = { :validation_type => "crossvalidation", + :training_dataset_uri => train_dataset_uri, :test_dataset_uri => test_dataset_uri, :test_target_dataset_uri => self.dataset_uri, :crossvalidation_id => self.id, :crossvalidation_fold => n, :prediction_feature => prediction_feature, :algorithm_uri => self.algorithm_uri } @tmp_validations << tmp_validation + + task.progress( n / self.num_folds.to_f * 100 ) if task end end end @@ -328,27 +460,116 @@ module Validation module Util + # splits a dataset into test and training dataset via bootstrapping + # (training dataset-size is n, sampling from orig dataset with replacement) + # returns map with training_dataset_uri and test_dataset_uri + def self.bootstrapping( orig_dataset_uri, prediction_feature, subjectid, random_seed=nil, task=nil ) + + random_seed=1 unless random_seed + + orig_dataset = OpenTox::Dataset.find orig_dataset_uri,subjectid + orig_dataset.load_all + raise OpenTox::NotFoundError.new "Dataset not found: "+orig_dataset_uri.to_s unless orig_dataset + if prediction_feature + raise OpenTox::NotFoundError.new "Prediction feature '"+prediction_feature.to_s+ + "' not found in dataset, features are: \n"+ + orig_dataset.features.inspect unless orig_dataset.features.include?(prediction_feature) + else + LOGGER.warn "no prediciton feature given, all features included in test dataset" + end + + compounds = orig_dataset.compounds + raise OpenTox::NotFoundError.new "Cannot split datset, num compounds in dataset < 2 ("+compounds.size.to_s+")" if compounds.size<2 + + compounds.each do |c| + raise OpenTox::NotFoundError.new "Bootstrapping not yet implemented for duplicate compounds" if + orig_dataset.data_entries[c][prediction_feature].size > 1 + end + + srand random_seed.to_i + while true + training_compounds = [] + compounds.size.times do + training_compounds << compounds[rand(compounds.size)] + end + test_compounds = [] + compounds.each do |c| + test_compounds << c unless training_compounds.include?(c) + end + if test_compounds.size > 0 + break + else + srand rand(10000) + end + end + + LOGGER.debug "bootstrapping on dataset "+orig_dataset_uri+ + " into training ("+training_compounds.size.to_s+") and test ("+test_compounds.size.to_s+")"+ + ", duplicates in training dataset: "+test_compounds.size.to_s + task.progress(33) if task + + result = {} +# result[:training_dataset_uri] = orig_dataset.create_new_dataset( training_compounds, +# orig_dataset.features, +# "Bootstrapping training dataset of "+orig_dataset.title.to_s, +# $sinatra.url_for('/bootstrapping',:full) ) + result[:training_dataset_uri] = orig_dataset.split( training_compounds, + orig_dataset.features.keys, + { DC.title => "Bootstrapping training dataset of "+orig_dataset.title.to_s, + DC.creator => $url_provider.url_for('/bootstrapping',:full) }, + subjectid ).uri + task.progress(66) if task + +# result[:test_dataset_uri] = orig_dataset.create_new_dataset( test_compounds, +# orig_dataset.features.dclone - [prediction_feature], +# "Bootstrapping test dataset of "+orig_dataset.title.to_s, +# $sinatra.url_for('/bootstrapping',:full) ) + result[:test_dataset_uri] = orig_dataset.split( test_compounds, + orig_dataset.features.keys.dclone - [prediction_feature], + { DC.title => "Bootstrapping test dataset of "+orig_dataset.title.to_s, + DC.creator => $url_provider.url_for('/bootstrapping',:full)} , + subjectid ).uri + task.progress(100) if task + + if ENV['RACK_ENV'] =~ /test|debug/ + training_dataset = OpenTox::Dataset.find result[:training_dataset_uri],subjectid + raise OpenTox::NotFoundError.new "Training dataset not found: '"+result[:training_dataset_uri].to_s+"'" unless training_dataset + training_dataset.load_all + value_count = 0 + training_dataset.compounds.each do |c| + value_count += training_dataset.data_entries[c][prediction_feature].size + end + raise "training compounds error" unless value_count==training_compounds.size + raise OpenTox::NotFoundError.new "Test dataset not found: '"+result[:test_dataset_uri].to_s+"'" unless + OpenTox::Dataset.find result[:test_dataset_uri], subjectid + end + LOGGER.debug "bootstrapping done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'" + + return result + end + # splits a dataset into test and training dataset # returns map with training_dataset_uri and test_dataset_uri - def self.train_test_dataset_split( orig_dataset_uri, prediction_feature, split_ratio=nil, random_seed=nil ) + def self.train_test_dataset_split( orig_dataset_uri, prediction_feature, subjectid, split_ratio=nil, random_seed=nil, task=nil ) split_ratio=0.67 unless split_ratio random_seed=1 unless random_seed - orig_dataset = OpenTox::Dataset.find orig_dataset_uri - $sinatra.halt 400, "Dataset not found: "+orig_dataset_uri.to_s unless orig_dataset - $sinatra.halt 400, "Split ratio invalid: "+split_ratio.to_s unless split_ratio and split_ratio=split_ratio.to_f - $sinatra.halt 400, "Split ratio not >0 and <1 :"+split_ratio.to_s unless split_ratio>0 && split_ratio<1 + orig_dataset = OpenTox::Dataset.find orig_dataset_uri, subjectid + orig_dataset.load_all subjectid + raise OpenTox::NotFoundError.new "Dataset not found: "+orig_dataset_uri.to_s unless orig_dataset + raise OpenTox::NotFoundError.new "Split ratio invalid: "+split_ratio.to_s unless split_ratio and split_ratio=split_ratio.to_f + raise OpenTox::NotFoundError.new "Split ratio not >0 and <1 :"+split_ratio.to_s unless split_ratio>0 && split_ratio<1 if prediction_feature - $sinatra.halt 400, "Prediction feature '"+prediction_feature.to_s+ + raise OpenTox::NotFoundError.new "Prediction feature '"+prediction_feature.to_s+ "' not found in dataset, features are: \n"+ - orig_dataset.features.inspect unless orig_dataset.features.include?(prediction_feature) + orig_dataset.features.keys.inspect unless orig_dataset.features.include?(prediction_feature) else LOGGER.warn "no prediciton feature given, all features included in test dataset" end compounds = orig_dataset.compounds - $sinatra.halt 400, "Cannot split datset, num compounds in dataset < 2 ("+compounds.size.to_s+")" if compounds.size<2 + raise OpenTox::BadRequestError.new "Cannot split datset, num compounds in dataset < 2 ("+compounds.size.to_s+")" if compounds.size<2 split = (compounds.size*split_ratio).to_i split = [split,1].max split = [split,compounds.size-2].min @@ -356,24 +577,56 @@ module Validation LOGGER.debug "splitting dataset "+orig_dataset_uri+ " into train:0-"+split.to_s+" and test:"+(split+1).to_s+"-"+(compounds.size-1).to_s+ " (shuffled with seed "+random_seed.to_s+")" - compounds.shuffle!( random_seed ) + task.progress(33) if task result = {} - result[:training_dataset_uri] = orig_dataset.create_new_dataset( compounds[0..split], - orig_dataset.features, - "Training dataset split of "+orig_dataset.title.to_s, - $sinatra.url_for('/training_test_split',:full) ) - result[:test_dataset_uri] = orig_dataset.create_new_dataset( compounds[(split+1)..-1], - orig_dataset.features.dclone - [prediction_feature], - "Test dataset split of "+orig_dataset.title.to_s, - $sinatra.url_for('/training_test_split',:full) ) - - $sinatra.halt 400, "Training dataset not found: '"+result[:training_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:training_dataset_uri] - $sinatra.halt 400, "Test dataset not found: '"+result[:test_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:test_dataset_uri] +# result[:training_dataset_uri] = orig_dataset.create_new_dataset( compounds[0..split], +# orig_dataset.features, +# "Training dataset split of "+orig_dataset.title.to_s, +# $sinatra.url_for('/training_test_split',:full) ) + +# orig_dataset.data_entries.each do |k,v| +# puts k.inspect+" =>"+v.inspect +# puts v.values[0].to_s+" "+v.values[0].class.to_s +# end + + result[:training_dataset_uri] = orig_dataset.split( compounds[0..split], + orig_dataset.features.keys, + { DC.title => "Training dataset split of "+orig_dataset.title.to_s, + DC.creator => $url_provider.url_for('/training_test_split',:full) }, + subjectid ).uri + task.progress(66) if task + +# d = OpenTox::Dataset.find(result[:training_dataset_uri]) +# d.data_entries.values.each do |v| +# puts v.inspect +# puts v.values[0].to_s+" "+v.values[0].class.to_s +# end +# raise "stop here" + +# result[:test_dataset_uri] = orig_dataset.create_new_dataset( compounds[(split+1)..-1], +# orig_dataset.features.dclone - [prediction_feature], +# "Test dataset split of "+orig_dataset.title.to_s, +# $sinatra.url_for('/training_test_split',:full) ) + result[:test_dataset_uri] = orig_dataset.split( compounds[(split+1)..-1], + orig_dataset.features.keys.dclone - [prediction_feature], + { DC.title => "Test dataset split of "+orig_dataset.title.to_s, + DC.creator => $url_provider.url_for('/training_test_split',:full) }, + subjectid ).uri + task.progress(100) if task + + if ENV['RACK_ENV'] =~ /test|debug/ + raise OpenTox::NotFoundError.new "Training dataset not found: '"+result[:training_dataset_uri].to_s+"'" unless + OpenTox::Dataset.find(result[:training_dataset_uri],subjectid) + test_data = OpenTox::Dataset.find result[:test_dataset_uri],subjectid + raise OpenTox::NotFoundError.new "Test dataset not found: '"+result[:test_dataset_uri].to_s+"'" unless test_data + test_data.load_compounds subjectid + raise "Test dataset num coumpounds != "+(compounds.size-split-1).to_s+", instead: "+ + test_data.compounds.size.to_s+"\n"+test_data.to_yaml unless test_data.compounds.size==(compounds.size-1-split) + end LOGGER.debug "split done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'" - return result end diff --git a/validation/validation_test.rb b/validation/validation_test.rb index cbaa84b..ffb25c4 100644..100755 --- a/validation/validation_test.rb +++ b/validation/validation_test.rb @@ -1,3 +1,11 @@ + +require "rubygems" +require "sinatra" +before { + request.env['HTTP_HOST']="local-ot/validation" + request.env["REQUEST_URI"]=request.env["PATH_INFO"] +} + require "uri" require "yaml" ENV['RACK_ENV'] = 'test' @@ -7,70 +15,338 @@ require 'rack/test' require 'lib/test_util.rb' require 'test/test_examples.rb' -LOGGER = MyLogger.new(STDOUT) +LOGGER = OTLogger.new(STDOUT) LOGGER.datetime_format = "%Y-%m-%d %H:%M:%S " LOGGER.formatter = Logger::Formatter.new +if AA_SERVER + TEST_USER = "mgtest" + TEST_PW = "mgpasswd" + #TEST_USER = "guest" + #TEST_PW = "guest" + SUBJECTID = OpenTox::Authorization.authenticate(TEST_USER,TEST_PW) + raise "could not log in" unless SUBJECTID + puts "logged in: "+SUBJECTID.to_s +else + puts "AA disabled" + SUBJECTID = nil +end + +#Rack::Test::DEFAULT_HOST = "local-ot" #"/validation" +module Sinatra + + set :raise_errors, false + set :show_exceptions, false + + module UrlForHelper + BASE = "http://local-ot/validation" + def url_for url_fragment, mode=:path_only + case mode + when :path_only + raise "not impl" + when :full + end + "#{BASE}#{url_fragment}" + end + end +end + class ValidationTest < Test::Unit::TestCase include Rack::Test::Methods include Lib::TestUtil def test_it - $test_case = self + begin + $test_case = self + +# prediction_feature = "https://ambit.uni-plovdiv.bg:8443/ambit2/feature/26221" +# puts OpenTox::Feature.find(prediction_feature).domain.inspect +# exit + +# begin +# #OpenTox::RestClientWrapper.get "http://local-ot/validation/runtime-error",{:accept => "application/rdf+xml"} +# puts OpenTox::RestClientWrapper.post "http://opentox.ntua.gr:4000/model/0d8a9a27-3481-4450-bca1-d420a791de9d", +# { :asdfasdf => "asdfasdf" } #{:dataset=>"http://apps.ideaconsult.net:8080/ambit2/dataset/54?max=2"}, +# { :accept => "text/uri-list", :subjectid => SUBJECTID } +# #puts OpenTox::RestClientWrapper.post "http://opentox.ntua.gr:4000/model/0d8a9a27-3481-4450-bca1-d420a791de9d",{},{:accept => "text/uri-list", :subjectid => "AQIC5wM2LY4SfcwUNX97nTvaSTdYJ+nTUqZsR0UitJ4+jlc=@AAJTSQACMDE=#"} +# rescue => err +# rep = OpenTox::ErrorReport.create(err, "") +# puts rep.to_yaml +# end + + # "http://opentox.ntua.gr:4000/model/0d8a9a27-3481-4450-bca1-d420a791de9d" + +# get "/19999",nil,'HTTP_ACCEPT' => "text/html" +# exit +# +# get "/234234232341",nil,'HTTP_ACCEPT' => "application/x-yaml" +# puts last_response.body +## +# get "/crossvalidation/1",nil,'HTTP_ACCEPT' => "application/rdf+xml" +# puts last_response.body +# exit + + # d = OpenTox::Dataset.find("http://ot-dev.in-silico.ch/dataset/307") + # puts d.compounds.inspect + # exit + + #get "?model=http://local-ot/model/1" + # get "/crossvalidation/3/predictions" + # puts last_response.body + + # post "/validate_datasets",{ + # :test_dataset_uri=>"http://apps.deaconsult.net:8080/ambit2/dataset/R3924", + # :prediction_dataset_uri=>"http://apps.ideaconsult.net:8080/ambit2/dataset/R3924?feature_uris[]=http%3A%2F%2Fapps.ideaconsult.net%3A8080%2Fambit2%2Fmodel%2F52%2Fpredicted", + # #:test_target_dataset_uri=>"http://local-ot/dataset/202", + # :prediction_feature=>"http://apps.ideaconsult.net:8080/ambit2/feature/21715", + # :predicted_feature=>"http://apps.ideaconsult.net:8080/ambit2/feature/28944", + # :regression=>"true"} + # #:classification=>"true"} + # puts last_response.body + + #post "/crossvalidation/cleanup" + #puts last_response.body + + #get "/crossvalidation/19/predictions",nil,'HTTP_ACCEPT' => "application/x-yaml" #/statistics" + # post "",:model_uri=>"http://local-ot/model/1",:test_dataset_uri=>"http://local-ot/dataset/3", + # :test_target_dataset_uri=>"http://local-ot/dataset/1" + + # get "/crossvalidation/2",nil,'HTTP_ACCEPT' => "application/rdf+xml" + #puts last_response.body + #exit - #get "/crossvalidation/4/statistics" -# post "",:model_uri=>"http://localhost/model/1",:test_dataset_uri=>"http://localhost/dataset/3", -# :test_target_dataset_uri=>"http://localhost/dataset/1" +# OpenTox::Crossvalidation.create( +# :dataset_uri=>"http://local-ot/dataset/1874", +# :algorithm_uri=>"http://local-ot/algorithm/lazar", +# :prediction_feature=>"http://local-ot/dataset/1874/feature/Hamster%20Carcinogenicity", +# :algorithm_params=>"feature_generation_uri=http://local-ot/algorithm/fminer/bbrc") - #get "/crossvalidation/1",nil,'HTTP_ACCEPT' => "application/rdf+xml" - #puts last_response.body - -# post "/test_validation",:select=>"6d" #,:report=>"yes,please" -# puts last_response.body - -# post "/validate_datasets",{ -# :test_dataset_uri=>"http://localhost/dataset/204", -# :prediction_dataset_uri=>"http://localhost/dataset/206", -# :test_target_dataset_uri=>"http://localhost/dataset/202", -# :prediction_feature=>"http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk", -# :predicted_feature=>"http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk_lazar_regression", -# :regression=>"true"} -# #:classification=>"true"} -# puts last_response.body - - run_test("3b" ) #, "http://localhost/validation/826") #,"http://localhost/validation/report/validation/36") #, "http://localhost/validation/321") - - #run_test("7a") #,"http://localhost/validation/crossvalidation/10") #, "http://localhost/validation/321") - - #run_test("8b", "http://localhost/validation/crossvalidation/4") - - #puts Nightly.build_nightly("1") +#http://local-ot/dataset/1878 + + #get "/crossvalidation?model_uri=lazar" + # post "/test_validation",:select=>"6d" #,:report=>"yes,please" + #puts last_response.body + + # post "/validate_datasets",{ + # :test_dataset_uri=>"http://local-ot/dataset/204", + # :prediction_dataset_uri=>"http://local-ot/dataset/206", + # :test_target_dataset_uri=>"http://local-ot/dataset/202", + # :prediction_feature=>"http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk", + # :predicted_feature=>"http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk_lazar_regression", + # :regression=>"true"} + # #:classification=>"true"} + # puts last_response.body + + # post "/validate_datasets",{ + # :test_dataset_uri=>"http://local-ot/dataset/89", + # :prediction_dataset_uri=>"http://local-ot/dataset/91", + # :test_target_dataset_uri=>"http://local-ot/dataset/87", + # :prediction_feature=>"http://local-ot/dataset/1/feature/hamster_carcinogenicity", + # :predicted_feature=>"", + ## :regression=>"true"} + # :classification=>"true"} + # puts last_response.body + + # m = OpenTox::Model::Generic.find("http://local-ot/model/1323333") + # puts m.to_yaml + +# post "/validate_datasets",{ +# :test_dataset_uri=>"http://local-ot/dataset/390", +# :prediction_dataset_uri=>"http://local-ot/dataset/392", +# :test_target_dataset_uri=>"http://local-ot/dataset/388", +# :prediction_feature=>"http://local-ot/dataset/388/feature/repdose_classification", +# :model_uri=>"http://local-ot/model/31"} +# #:regression=>"true"} +# # :classification=>"true"} +# uri = last_response.body +# val = wait_for_task(uri) +# puts val +# get "/"+val.split("/")[-1] + +# post "/validate_datasets",{ +# :test_dataset_uri=>"http://opentox.informatik.uni-freiburg.de/dataset/409", +# :prediction_dataset_uri=>"http://opentox.informatik.uni-freiburg.de/dataset/410", +# :test_target_dataset_uri=>"https://ambit.uni-plovdiv.bg:8443/ambit2/dataset/R401560", +# :prediction_feature=>"https://ambit.uni-plovdiv.bg:8443/ambit2/feature/22190", +# :predicted_feature=>"https://ambit.uni-plovdiv.bg:8443/ambit2/feature/218304", +# :regression=>"true", +# :subjectid=>SUBJECTID} +# #:model_uri=>"http://local-ot/model/31"} +# #:regression=>"true"} +# # :classification=>"true"} +# uri = last_response.body +# val = wait_for_task(uri) +# puts val +# #get "/"+val.split("/")[-1] + + + + #ambit_service = "https://ambit.uni-plovdiv.bg:8443/ambit2" + #https%3A%2F%2Fambit.uni-plovdiv.bg%3A8443%2Fambit2 + +# post "/validate_datasets",{ +# :test_dataset_uri=>ambit_service+"/dataset/R401577?max=50", +# :prediction_dataset_uri=>ambit_service+"/dataset/R401577?max=50&feature_uris[]="+CGI.escape(ambit_service)+"%2Fmodel%2F35194%2Fpredicted", +# #:test_target_dataset_uri=>ambit_service+"/dataset/R401560", +# :prediction_feature=>ambit_service+"/feature/26221", +# :predicted_feature=>ambit_service+"/feature/218699", +# :classification=>"true", +# :subjectid=>SUBJECTID} +# #:model_uri=>"http://local-ot/model/31"} +# #:regression=>"true"} +# # :classification=>"true"} +# uri = last_response.body +# val = wait_for_task(uri) +# puts val +# #get "/"+val.split("/")[-1] + + +# d = OpenTox::Dataset.find("https://ambit.uni-plovdiv.bg:8443/ambit2/dataset/R545",SUBJECTID) +# puts d.compounds.inspect +# exit + +# f = File.new("data/ambit-dataset.rdf") +# d = ValidationExamples::Util.upload_dataset(f, SUBJECTID) +# puts d + +# d = OpenTox::Dataset.find("https://ambit.uni-plovdiv.bg:8443/ambit2/dataset/R401560",SUBJECTID) +# #puts d.compounds.to_yaml +# #puts d.features.keys.to_yaml +# puts d.to_yaml +# d2 = d.split(d.compounds[0..5], d.features.keys[0..1], {}, SUBJECTID) +# puts d2.to_yaml + + # run_test("1b")#,:validation_uri=>"http://local-ot/validation/253") #,"http://local-ot/validation/28")#,"http://local-ot/validation/394"); + + #run_test("3b",:validation_uri=>"http://local-ot/validation/crossvalidation/45") #,{:dataset_uri => "http://local-ot/dataset/773", :prediction_feature => "http://local-ot/dataset/773/feature/Hamster%20Carcinogenicity"}) + +# p = { +# :dataset_uri=>"http://local-ot/dataset/527", +# :algorithm_uri => "http://local-ot/majority/class/algorithm", +# :prediction_feature=>"http://local-ot/dataset/527/feature/Hamster%20Carcinogenicity", +# :num_folds => 2 } + #cv = OpenTox::Crossvalidation.create(p, SUBJECTID) +# cv = OpenTox::Crossvalidation.find("http://local-ot/validation/crossvalidation/17", SUBJECTID) +# puts cv.uri +## puts cv.find_or_create_report.uri +# puts cv.summary(SUBJECTID).inspect + + #puts OpenTox::Authorization.list_policy_uris(SUBJECTID).inspect + + #puts OpenTox::Authorization.list_policy_uris(SUBJECTID).inspect + + #run_test("19d") #,{:dataset_uri => "http://local-ot/dataset/313", :prediction_feature => "http://local-ot/dataset/313/feature/repdose_classification"}) + +# model = OpenTox::Model::Generic.find("http://local-ot/majority/class/model/58") +# OpenTox::QMRFReport.create(model) + + + #get "/12123123123123123" + #get "/chain" + + #OpenTox::RestClientWrapper.get("http://local-ot/validation/task-error") + #get "/error",nil,'HTTP_ACCEPT' => "application/rdf+xml" + #puts "" + #puts "" + #puts last_response.body + #exit + +# get "/error" +# puts last_response.body + + #delete "/1",:subjectid=>SUBJECTID + + run_test("19i") + + #run_test("3a","http://local-ot/validation/crossvalidation/4") + #run_test("3b","http://local-ot/validation/crossvalidation/3") + + #run_test("8a", "http://local-ot/validation/crossvalidation/6") + #run_test("8b", "http://local-ot/validation/crossvalidation/5") + + #run_test("11b", "http://local-ot/validation/crossvalidation/2" )# //local-ot/validation/42")#, "http://local-ot/validation/report/validation/8") #,"http://local-ot/validation/report/validation/36") #, "http://local-ot/validation/321") + # run_test("7a","http://local-ot/validation/40") #,"http://local-ot/validation/crossvalidation/10") #, "http://local-ot/validation/321") + #run_test("8b", "http://local-ot/validation/crossvalidation/4") + + #puts Nightly.build_nightly("1") + + #prepare_examples + #do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE + #do_test_examples_ortona - #prepare_examples - #do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE + rescue => ex + rep = OpenTox::ErrorReport.create(ex, "") + puts rep.to_yaml + ensure + #OpenTox::Authorization.logout(SUBJECTID) if AA_SERVER + end end def app Sinatra::Application end - def run_test(select, validation_uri=nil) + def run_test(select=nil, overwrite={}, delete=false ) + + if AA_SERVER && SUBJECTID && delete + policies_before = OpenTox::Authorization.list_policy_uris(SUBJECTID) + end + + puts ValidationExamples.list unless select validationExamples = ValidationExamples.select(select) validationExamples.each do |vv| vv.each do |v| ex = v.new - ex.validation_uri = validation_uri + ex.subjectid = SUBJECTID + + overwrite.each do |k,v| + ex.send(k.to_s+"=",v) + end + unless ex.validation_uri ex.upload_files ex.check_requirements ex.validate - LOGGER.debug "validation done "+ex.validation_uri.to_s + + LOGGER.debug "validation done '"+ex.validation_uri.to_s+"'" + end + if !delete and ex.validation_uri + if SUBJECTID + puts ex.validation_uri+"?subjectid="+CGI.escape(SUBJECTID) + else + puts ex.validation_uri + end + end + + unless ex.report_uri + ex.report + end + if !delete and ex.report_uri + if SUBJECTID + puts ex.report_uri+"?subjectid="+CGI.escape(SUBJECTID) + else + puts ex.report_uri + end end - ex.verify_yaml - ex.report + ##ex.verify_yaml + ##ex.compare_yaml_vs_rdf + ex.delete if delete end end + + if AA_SERVER && SUBJECTID && delete + policies_after= OpenTox::Authorization.list_policy_uris(SUBJECTID) + diff = policies_after.size - policies_before.size + if (diff != 0) + policies_before.each do |k,v| + policies_after.delete(k) + end + LOGGER.warn diff.to_s+" policies NOT deleted:\n"+policies_after.collect{|k,v| k.to_s+" => "+v.to_s}.join("\n") + else + LOGGER.debug "all policies deleted" + end + end end def prepare_examples @@ -78,7 +354,11 @@ class ValidationTest < Test::Unit::TestCase end def do_test_examples # USES CURL, DO NOT FORGET TO RESTART - get '/test_examples' + post '/test_examples' + end + + def do_test_examples_ortona + post '/test_examples',:examples=>"http://ortona.informatik.uni-freiburg.de/validation/examples" end end |