diff options
-rw-r--r-- | EXAMPLES | 96 | ||||
-rw-r--r-- | application.rb | 13 | ||||
-rw-r--r-- | example.rb | 85 | ||||
-rw-r--r-- | lib/wrapper.rb | 33 | ||||
-rw-r--r-- | report/r_plot_factory.rb | 4 | ||||
-rw-r--r-- | report/report_application.rb | 2 | ||||
-rw-r--r-- | report/report_factory.rb | 26 | ||||
-rw-r--r-- | report/report_service.rb | 14 | ||||
-rw-r--r-- | report/report_test.rb | 16 | ||||
-rw-r--r-- | report/validation_access.rb | 14 | ||||
-rw-r--r-- | report/validation_data.rb | 10 | ||||
-rw-r--r-- | test.rb | 4 | ||||
-rw-r--r-- | validation/validation_application.rb | 28 | ||||
-rw-r--r-- | validation/validation_format.rb | 413 | ||||
-rw-r--r-- | validation/validation_service.rb | 553 | ||||
-rw-r--r-- | validation/validation_test.rb | 57 |
16 files changed, 775 insertions, 593 deletions
@@ -4,7 +4,7 @@ API-Definition see http://opentox.org/dev/apis/api-1.1/Validation -API-Examples +API-Examples Validation =================================================================== how to @@ -141,3 +141,97 @@ result example (accept-header: text/uri-list) +API-Examples Reports +=================================================================== + + +get list of available report types +------------------------------------------------------------------- + +>>> curl <validation_service>/report + +result example (accept-header: application/rdf-xml) +<<< not yet supported + +result example (accept-header: text/uri-list) +<<< <validation_service>/report/validation +<<< <validation_service>/report/crossvalidation +<<< <validation_service>/report/algorithm_comparison + + +get list of available reports of type validation +------------------------------------------------------------------- + +>>> curl ot.validation.de/report/validation + +result example (accept-header: application/rdf-xml) +<<< not yet supported + +result example (accept-header: text/uri-list) +<<< <validation_service>/report/validation/<validation_report_id> +<<< <validation_service>/report/validation/id_i +<<< <validation_service>/report/validation/id_j + + +get validation report +------------------------------------------------------------------- + +>>> curl <validation_service>/report/validation/<validation_report_id> + +Supported formats (accept-headers): +* "text/xml" content of report in docbook-article format +* "text/html" report formated with default docbook-article-xsl + +Hint: Visit <validation_service>/report/validation/<validation_report_id> with a browser to see the report in html format + + +create validation report from validation +------------------------------------------------------------------- + +>>> curl -X POST -d validation_uris="<validation_service>/<validation_id>" \ + ot.validation.de/report/validation + +result example (accept-header: application/rdf-xml) +<<< not yet supported + +result example (accept-header: text/uri-list) +<<< <validation_service>/report/validation/id_i + + +get list of available reports of type crossvalidation +------------------------------------------------------------------- + +>>> curl <validation_service>/report/crossvalidation + +result example (accept-header: application/rdf-xml) +<<< not yet supported + +result example (accept-header: text/uri-list) +<<< <validation_service>/report/crossvalidation/<crossvalidation_report_id> +<<< <validation_service>/report/crossvalidation/id_i +<<< <validation_service>/report/crossvalidation/id_j + + +get crossvalidation report +------------------------------------------------------------------- + +>>> curl <validation_service>/report/crossvalidation/<crossvalidation_report_id> + +Supported formats (accept-headers): +* "text/xml" content of report in docbook-article format +* "text/html" report formated with default docbook-article-xsl + +Hint: Visit <validation_service>/report/crossvalidation/<crossvalidation_report_id> with a browser to see the report in html format + + +create crossvalidation report from crossvalidation +------------------------------------------------------------------- + +>>> curl -X POST -d validation_uris="<validation_service>/crossvalidation/<crossvalidation_id>" \ + ot.validation.de/report/crossvalidation + +result example (accept-header: application/rdf-xml) +<<< not yet supported + +result example (accept-header: text/uri-list) +<<< <validation_service>/report/crossvalidation/id_i
\ No newline at end of file diff --git a/application.rb b/application.rb index c9506ed..bbd599f 100644 --- a/application.rb +++ b/application.rb @@ -17,14 +17,19 @@ end get '/prepare_examples/?' do Example.prepare_example_resources - "done" end -# order is important, first add example methods, than validation -# (otherwise sinatra will try to locate a validation with name examples) +get '/test_examples/?' do + Example.test_examples +end + +# order is important, first add example methods and reports, than validation +# (otherwise sinatra will try to locate a validation with name examples or report) + -require "validation/validation_application.rb" require "report/report_application.rb" +require "validation/validation_application.rb" + @@ -10,6 +10,8 @@ class Example @@train_data=File.join @@config[:services]["opentox-dataset"],"2" @@test_data=File.join @@config[:services]["opentox-dataset"],"3" + @@summary="" + def self.transform_example file = File.new("EXAMPLES", "r") @@ -28,7 +30,10 @@ class Example "prediction_feature" => @@feature, "algorithm_uri" => @@alg, "algorithm_params" => @@alg_params, - "crossvalidation_id" => "1",} + "crossvalidation_id" => "1", + "validation_report_id" => "1", + "crossvalidation_report_id" => "1", + } sub.each do |k,v| res.gsub!(/<#{k}>/,v) @@ -37,37 +42,97 @@ class Example end def self.delete_all(uri_list_service) - uri_list = RestClient.get(uri_list_service) + uri_list = OpenTox::RestClientWrapper.get(uri_list_service) uri_list.split("\n").each do |uri| - RestClient.delete(uri) + OpenTox::RestClientWrapper.delete(uri) end end + def self.log(log_string) + LOGGER.debug log_string + @@summary += log_string+"\n" + end + def self.prepare_example_resources + @@summary = "" delete_all(@@config[:services]["opentox-dataset"]) + data = File.read(@@file.path) - data_uri = RestClient.post @@config[:services]["opentox-dataset"], data, :content_type => "application/rdf+xml" - puts "uploaded dataset "+data_uri + data_uri = OpenTox::RestClientWrapper.post @@config[:services]["opentox-dataset"], data, :content_type => "application/rdf+xml" + log "uploaded dataset "+data_uri raise "failed to prepare demo" unless data_uri==@@data Lib::Validation.auto_migrate! delete_all(@@config[:services]["opentox-model"]) - vali_uri = RestClient.post File.join(@@config[:services]["opentox-validation"],'/training_test_split'), { :dataset_uri => data_uri, + vali_uri = OpenTox::RestClientWrapper.post File.join(@@config[:services]["opentox-validation"],'/training_test_split'), { :dataset_uri => data_uri, :algorithm_uri => @@alg, :prediction_feature => @@feature, :algorithm_params => @@alg_params } - puts "created validation via training test split "+vali_uri + log "created validation via training test split "+vali_uri raise "failed to prepare demo" unless vali_uri==File.join(@@config[:services]["opentox-validation"],'/1') Lib::Crossvalidation.auto_migrate! - cv_uri = RestClient.post File.join(@@config[:services]["opentox-validation"],'/crossvalidation'), { :dataset_uri => data_uri, + cv_uri = OpenTox::RestClientWrapper.post File.join(@@config[:services]["opentox-validation"],'/crossvalidation'), { :dataset_uri => data_uri, :algorithm_uri => @@alg, :prediction_feature => @@feature, :algorithm_params => @@alg_params, :num_folds => 5, :stratified => false } - puts "created crossvalidation "+cv_uri + log "created crossvalidation "+cv_uri raise "failed to prepare demo" unless cv_uri==File.join(@@config[:services]["opentox-validation"],'/crossvalidation/1') + delete_all(File.join(@@config[:services]["opentox-validation"],"/report/validation")) + val_report_uri = OpenTox::RestClientWrapper.post File.join(@@config[:services]["opentox-validation"],'/report/validation'), { :validation_uris => vali_uri } + log "created validation report: "+val_report_uri + raise "failed to prepare demo" unless val_report_uri==File.join(@@config[:services]["opentox-validation"],'/report/validation/1') + + delete_all(File.join(@@config[:services]["opentox-validation"],"/report/crossvalidation")) + cv_report_uri = OpenTox::RestClientWrapper.post File.join(@@config[:services]["opentox-validation"],'/report/crossvalidation'), { :validation_uris => cv_uri } + log "created crossvalidation report: "+cv_report_uri + raise "failed to prepare demo" unless cv_report_uri==File.join(@@config[:services]["opentox-validation"],'/report/crossvalidation/1') + log "done" + + @@summary + end + + + def self.test_examples + lines = transform_example.split("\n") + curl_call = false + curl_calls = [] + + lines.each do |line| + if line =~ /^\s*>>>\s*.*/ + line.gsub!(/^\s*>>>\s*/,"") + if line =~ /.*\s*\\s*$/ + curl_call = true + line.gsub!(/\s*\\s*$/," ") + else + curl_call = false + end + curl_calls.push( line ) + elsif curl_call + if line =~ /.*\s*\\s*$/ + curl_call = true + line.gsub!(/\s*\\s*$/," ") + else + curl_call = false + end + curl_calls[-1] = curl_calls[-1]+line + end + end + + @@summary = "" + curl_calls.each do |cmd| + log "testing: "+cmd + IO.popen(cmd.to_s+" 2> /dev/null") do |f| + while line = f.gets + #response += indent.to_s+line + end + end + log ($?==0)?"ok":"failed" + end + @@summary end -end
\ No newline at end of file + +end diff --git a/lib/wrapper.rb b/lib/wrapper.rb index c34a31b..40df0e5 100644 --- a/lib/wrapper.rb +++ b/lib/wrapper.rb @@ -43,38 +43,43 @@ module OpenTox module RestClientWrapper - def self.get(uri, params=nil) - execute( "get", uri, params ) + def self.get(uri, headers=nil) + execute( "get", uri, nil, headers ) end - def self.post(uri, params=nil) - execute( "post", uri, params ) + def self.post(uri, payload=nil, headers=nil) + execute( "post", uri, payload, headers ) end - def self.delete(uri, params=nil) - execute( "delete", uri, params ) + def self.delete(uri, headers=nil) + execute( "delete", uri, nil, headers ) end private - def self.execute( rest_call, uri, params=nil ) + def self.execute( rest_call, uri, payload, headers ) - do_halt 400,"uri is null",uri,params unless uri + do_halt 400,"uri is null",uri,payload,headers unless uri begin - RestClient.send(rest_call, uri, params) + if payload + RestClient.send(rest_call, uri, payload, headers) + else + RestClient.send(rest_call, uri, headers) + end rescue RestClient::RequestFailed, RestClient::RequestTimeout => ex - do_halt 502,ex.message,uri,params + do_halt 502,ex.message,uri,payload,headers rescue SocketError, RestClient::ResourceNotFound => ex - do_halt 400,ex.message,uri,params + do_halt 400,ex.message,uri,payload,headers rescue Exception => ex - do_halt 500,"add error '"+ex.class.to_s+"'' to rescue in OpenTox::RestClientWrapper::execute(), msg: '"+ex.message.to_s+"'",uri,params + do_halt 500,"add error '"+ex.class.to_s+"'' to rescue in OpenTox::RestClientWrapper::execute(), msg: '"+ex.message.to_s+"'",uri,payload,headers end end - def self.do_halt(status, msg, uri=nil, params=nil) + def self.do_halt(status, msg, uri, payload, headers) message = msg+"" message += ", uri: '"+uri.to_s+"'" if uri - message += ", params: '"+params.inspect+"'" if params + message += ", payload: '"+payload.inspect+"'" if payload + message += ", headers: '"+headers.inspect+"'" if headers if defined?(halt) halt(status,message) diff --git a/report/r_plot_factory.rb b/report/r_plot_factory.rb index 759691e..cc58e22 100644 --- a/report/r_plot_factory.rb +++ b/report/r_plot_factory.rb @@ -48,6 +48,8 @@ module Reports::RPlotFactory # def self.create_roc_plot( out_file, validation_set, class_value, split_set_attribute=nil, show_single_curves=false ) + raise "roc plot not available" + LOGGER.debug "creating roc plot, out-file:"+out_file.to_s r = Reports::RocPlot.new( out_file ) if split_set_attribute @@ -192,7 +194,7 @@ class Reports::RocPlot < Reports::RPlot R.eval("pred <- prediction(prediction_values,actual_values)") R.eval 'perf <- performance(pred,"tpr","fpr")' begin - # WORKAROUND to check weather the r calls worked out so far + # WORKAROUND to check weather the r calls worked out so far R.pull "perf@x.name" rescue => ex raise "error while creating roc plot ("+ex.message.to_s+")" diff --git a/report/report_application.rb b/report/report_application.rb index 3ff3c24..1c92172 100644 --- a/report/report_application.rb +++ b/report/report_application.rb @@ -46,5 +46,5 @@ delete '/report/:type/:id' do end post '/report/:type' do - perform{ |rs| rs.create_report(params[:type],params[:uri_list]?params[:uri_list].split("\n"):nil) } + perform{ |rs| rs.create_report(params[:type],params[:validation_uris]?params[:validation_uris].split("\n"):nil) } end diff --git a/report/report_factory.rb b/report/report_factory.rb index 5120a51..2d7de03 100644 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -19,7 +19,7 @@ module Reports::ReportFactory RT_CV = "crossvalidation" RT_ALG_COMP = "algorithm_comparison" - REPORT_TYPES = [RT_FASTTOX, RT_VALIDATION, RT_CV, RT_ALG_COMP ] + REPORT_TYPES = [RT_VALIDATION, RT_CV, RT_ALG_COMP ] #,RT_FASTTOX # creates a report of a certain type according to the validation data in validation_set # @@ -220,17 +220,17 @@ class Reports::ReportContent split_set_attribute = nil, plot_file_name="roc-plot.svg", section_title="Roc Plot", - section_text="This section contains the roc plot.", + section_text=nil, image_title=nil, image_caption=nil) - unless image_title - if class_value - image_title = "Roc Plot for class-value '"+class_value+"'" - else - image_title = "Roc Plot for all classes" - end - end + if class_value + section_text = "This section contains the roc plot for class '"+class_value+"'." unless section_text + image_title = "Roc Plot for class-value '"+class_value+"'" unless image_title + else + section_text = "This section contains the roc plot." unless section_text + image_title = "Roc Plot for all classes" unless image_title + end section_roc = @xml_report.add_section(@xml_report.get_root_element, section_title) if validation_set.first.get_predictions @@ -241,7 +241,9 @@ class Reports::ReportContent Reports::RPlotFactory.create_roc_plot( plot_file_path, validation_set, class_value, split_set_attribute, validation_set.size>1 ) @xml_report.add_imagefigure(section_roc, image_title, plot_file_name, "SVG", image_caption) rescue RuntimeError => ex - LOGGER.error("could not create roc plot: "+ex.message) + LOGGER.error("could not create roc plot: "+ex.message) + LOGGER.debug("if R cannot find your libs, try adding R_LIBS='<lib>' to your ~/.Renviron file") + rm_tmp_file(plot_file_name) @xml_report.add_paragraph(section_roc, "could not create roc plot: "+ex.message) end else @@ -307,4 +309,8 @@ class Reports::ReportContent return tmp_file_path end + def rm_tmp_file(tmp_file_name) + @tmp_files.delete(tmp_file_name) if @tmp_files.has_key?(tmp_file_name) + end + end
\ No newline at end of file diff --git a/report/report_service.rb b/report/report_service.rb index 4d046b6..5c75ae4 100644 --- a/report/report_service.rb +++ b/report/report_service.rb @@ -35,22 +35,22 @@ module Reports @persistance.list_reports(type).collect{ |id| get_uri(type,id) }.join("\n") end - # creates a report of a certain type, __uri_list__ must contain be a list of validation or cross-validation-uris + # creates a report of a certain type, __validation_uris__ must contain be a list of validation or cross-validation-uris # returns the uir of the report # # call-seq: - # create_report(type, uri_list) => string + # create_report(type, validation_uris) => string # - def create_report(type, uri_list) + def create_report(type, validation_uris) LOGGER.info "create report of type '"+type.to_s+"'" check_report_type(type) # step1: load validations - raise Reports::BadRequest.new("validation uri_list missing") unless uri_list - LOGGER.debug "validation uri_list: '"+uri_list.inspect+"'" - validation_set = Reports::ValidationSet.new(uri_list) - raise Reports::BadRequest.new("cannot get validations from uri_list '"+uri_list.inspect+"'") unless validation_set and validation_set.size > 0 + raise Reports::BadRequest.new("validation_uris missing") unless validation_uris + LOGGER.debug "validation_uris: '"+validation_uris.inspect+"'" + validation_set = Reports::ValidationSet.new(validation_uris) + raise Reports::BadRequest.new("cannot get validations from validation_uris '"+validation_uris.inspect+"'") unless validation_set and validation_set.size > 0 LOGGER.debug "loaded "+validation_set.size.to_s+" validation/s" #step 2: create report of type diff --git a/report/report_test.rb b/report/report_test.rb index 1c04d1e..c3ee26c 100644 --- a/report/report_test.rb +++ b/report/report_test.rb @@ -31,10 +31,10 @@ require "lib/test_util.rb" # get '/osterhase' # assert last_response.not_found? # -# post '', :uri_list => "validation_uri_1\nvalidation_uri_2" +# post '', :validation_uris => "validation_uri_1\nvalidation_uri_2" # assert last_response.status == 400 # -# post '', :uri_list => "validation_uri_1" +# post '', :validation_uris => "validation_uri_1" # assert last_response.ok? # report_uri = last_response.body # type = $rep.parse_type(report_uri) @@ -57,7 +57,7 @@ require "lib/test_util.rb" # "algorithm_comparison"=> ("validation_uri\n"*(Reports::OTMockLayer::NUM_DATASETS * Reports::OTMockLayer::NUM_ALGS * Reports::OTMockLayer::NUM_FOLDS)) } # map.each do |t,u| # Reports.reset_ot_access -# post '/'+t.to_s, :uri_list=>u.to_s +# post '/'+t.to_s, :validation_uris=>u.to_s # assert last_response.ok? # report_uri = last_response.body # type = $rep.parse_type(report_uri) @@ -121,16 +121,16 @@ class Reports::ReportServiceTest < Test::Unit::TestCase #val_uri = create_single_validation(data_uri) #val_uri = create_single_validation(data_uri, WS_CLASS_ALG_2, WS_FEATURE_ALG_2) - val_uri = File.join(WS_VAL,"1") + #val_uri = File.join(WS_VAL,"1") # #add_resource val_uri - create_report(rep, val_uri, "validation") + #create_report(rep, val_uri, "validation") #val_uri = create_cross_validation(data_uri, WS_CLASS_ALG_2, WS_FEATURE_ALG_2) # #val_uri = create_cross_validation(data_uri) - #val_uri = File.join(WS_VAL,"crossvalidation/1") + val_uri = File.join(WS_VAL,"crossvalidation/1") # #val_uri2 = "http://localhost:4007/crossvalidation/14" # # add_resource val_uri -# create_report(rep, val_uri, "crossvalidation") + create_report(rep, val_uri, "crossvalidation") # #val_uri2 = create_cross_validation(data_uri, WS_CLASS_ALG_2, WS_FEATURE_ALG_2) # #val_uri = ["http://localhost:4007/crossvalidation/6", "http://localhost:4007/crossvalidation/8"] @@ -175,7 +175,7 @@ class Reports::ReportServiceTest < Test::Unit::TestCase #assert_raise(Reports::BadRequest){report_service.get_report(type, id, "weihnachtsmann")} report_service.get_report(type, id, "text/html") - report_service.get_report(type, id, "application/pdf") + #report_service.get_report(type, id, "application/pdf") #assert_raise(Reports::NotFound){report_service.delete_report(type, 877658)} # rep.delete_report(type, id) diff --git a/report/validation_access.rb b/report/validation_access.rb index 355a2f8..ce09131 100644 --- a/report/validation_access.rb +++ b/report/validation_access.rb @@ -21,7 +21,7 @@ class Reports::ValidationAccess raise "not implemented" end - def resolve_cv_uris(uri_list) + def resolve_cv_uris(validation_uris) raise "not implemented" end @@ -33,9 +33,9 @@ end class Reports::ValidationDB < Reports::ValidationAccess - def resolve_cv_uris(uri_list) + def resolve_cv_uris(validation_uris) res = [] - uri_list.each do |u| + validation_uris.each do |u| if u.to_s =~ /.*\/crossvalidation\/[0-9]+/ cv_id = u.split("/")[-1].to_i res += Lib::Validation.all(:crossvalidation_id => cv_id).collect{|v| v.uri.to_s} @@ -98,9 +98,9 @@ end class Reports::ValidationWebservice < Reports::ValidationAccess - def resolve_cv_uris(uri_list) + def resolve_cv_uris(validation_uris) res = [] - uri_list.each do |u| + validation_uris.each do |u| if u.to_s =~ /.*\/crossvalidation\/.*/ uri = u.to_s+"/validations" begin @@ -190,9 +190,9 @@ class Reports::ValidationMockLayer < Reports::ValidationAccess @count = 0 end - def resolve_cv_uris(uri_list) + def resolve_cv_uris(validation_uris) res = [] - uri_list.each do |u| + validation_uris.each do |u| if u.to_s =~ /.*crossvalidation.*/ res += ["validation_x"]*NUM_FOLDS else diff --git a/report/validation_data.rb b/report/validation_data.rb index 416b0b7..a101748 100644 --- a/report/validation_data.rb +++ b/report/validation_data.rb @@ -50,8 +50,8 @@ module Reports end end - def self.resolve_cv_uris(uri_list) - @@validation_access.resolve_cv_uris(uri_list) + def self.resolve_cv_uris(validation_uris) + @@validation_access.resolve_cv_uris(validation_uris) end @@validation_attributes = Lib::ALL_PROPS + @@ -189,10 +189,10 @@ module Reports # class ValidationSet - def initialize(uri_list = nil) - uri_list = Reports::Validation.resolve_cv_uris(uri_list) if uri_list + def initialize(validation_uris = nil) + validation_uris = Reports::Validation.resolve_cv_uris(validation_uris) if validation_uris @validations = Array.new - uri_list.each{|u| @validations.push(Reports::Validation.new(u))} if uri_list + validation_uris.each{|u| @validations.push(Reports::Validation.new(u))} if validation_uris end def get(index) @@ -1,4 +1,4 @@ -#require "validation/validation_test.rb" +require "validation/validation_test.rb" -require "report/report_test.rb"
\ No newline at end of file +#require "report/report_test.rb"
\ No newline at end of file diff --git a/validation/validation_application.rb b/validation/validation_application.rb index 5921907..a7cdc18 100644 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -25,12 +25,12 @@ end ## REST API get '/crossvalidation/?' do LOGGER.info "list all crossvalidations" - Crossvalidation.all.collect{ |d| url_for("/crossvalidation/", :full) + d.id.to_s }.join("\n") + Validation::Crossvalidation.all.collect{ |d| url_for("/crossvalidation/", :full) + d.id.to_s }.join("\n") end get '/crossvalidation/:id' do LOGGER.info "get crossvalidation with id "+params[:id].to_s - halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Crossvalidation.get(params[:id]) + halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.get(params[:id]) case request.env['HTTP_ACCEPT'].to_s when "application/rdf+xml" @@ -47,14 +47,14 @@ end delete '/crossvalidation/:id/?' do LOGGER.info "delete crossvalidation with id "+params[:id].to_s - halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Crossvalidation.get(params[:id]) + halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.get(params[:id]) crossvalidation.delete end get '/crossvalidation/:id/validations' do LOGGER.info "get all validations for crossvalidation with id "+params[:id].to_s - halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Crossvalidation.get(params[:id]) - Validation.all(:crossvalidation_id => params[:id]).collect{ |v| v.uri.to_s }.join("\n")+"\n" + halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.get(params[:id]) + Validation::Validation.all(:crossvalidation_id => params[:id]).collect{ |v| v.uri.to_s }.join("\n")+"\n" end post '/crossvalidation/?' do @@ -65,7 +65,7 @@ post '/crossvalidation/?' do cv_params = { :dataset_uri => params[:dataset_uri], :algorithm_uri => params[:algorithm_uri] } [ :num_folds, :random_seed, :stratified ].each{ |sym| cv_params[sym] = params[sym] if params[sym] } - cv = Crossvalidation.new cv_params + cv = Validation::Crossvalidation.new cv_params cv.create_cv_datasets( params[:prediction_feature] ) cv.perform_cv( params[:algorithm_params]) cv.uri @@ -73,12 +73,12 @@ end get '/?' do LOGGER.info "list all validations" - Validation.all.collect{ |d| url_for("/", :full) + d.id.to_s }.join("\n") + Validation::Validation.all.collect{ |d| url_for("/", :full) + d.id.to_s }.join("\n") end get '/:id' do LOGGER.info "get validation with id "+params[:id].to_s+" '"+request.env['HTTP_ACCEPT'].to_s+"'" - halt 404, "Validation '#{params[:id]}' not found." unless validation = Validation.get(params[:id]) + halt 404, "Validation '#{params[:id]}' not found." unless validation = Validation::Validation.get(params[:id]) case request.env['HTTP_ACCEPT'].to_s when "application/rdf+xml" @@ -96,12 +96,12 @@ end post '/?' do LOGGER.info "creating validation "+params.inspect if params[:model_uri] and params[:test_dataset_uri] and !params[:training_dataset_uri] and !params[:algorithm_uri] and params[:prediction_feature] - v = Validation.new :model_uri => params[:model_uri], + v = Validation::Validation.new :model_uri => params[:model_uri], :test_dataset_uri => params[:test_dataset_uri], :prediction_feature => params[:prediction_feature] v.validate_model elsif params[:algorithm_uri] and params[:training_dataset_uri] and params[:test_dataset_uri] and params[:prediction_feature] and !params[:model_uri] - v = Validation.new :training_dataset_uri => params[:training_dataset_uri], + v = Validation::Validation.new :training_dataset_uri => params[:training_dataset_uri], :test_dataset_uri => params[:test_dataset_uri], :prediction_feature => params[:prediction_feature] v.validate_algorithm( params[:algorithm_uri], params[:algorithm_params]) @@ -121,8 +121,8 @@ post '/training_test_split' do halt 400, "algorithm_uri missing" unless params[:algorithm_uri] halt 400, "prediction_feature missing" unless params[:prediction_feature] - params.merge!(ValidationUtil.train_test_dataset_split(params[:dataset_uri], params[:split_ratio], params[:random_seed])) - v = Validation.new :training_dataset_uri => params[:training_dataset_uri], + params.merge!(Validation::Util.train_test_dataset_split(params[:dataset_uri], params[:split_ratio], params[:random_seed])) + v = Validation::Validation.new :training_dataset_uri => params[:training_dataset_uri], :test_dataset_uri => params[:test_dataset_uri], :prediction_feature => params[:prediction_feature] v.validate_algorithm( params[:algorithm_uri], params[:algorithm_params]) @@ -131,7 +131,7 @@ end get '/:id/:attribute' do LOGGER.info "access validation attribute "+params.inspect - halt 404, "Validation #{params[:id]} not found." unless validation = Validation.get(params[:id]) + halt 404, "Validation #{params[:id]} not found." unless validation = Validation::Validation.get(params[:id]) begin raise unless validation.attribute_loaded?(params[:attribute]) rescue @@ -142,6 +142,6 @@ end delete '/:id' do LOGGER.info "delete validation with id "+params[:id].to_s - halt 404, "Validation #{params[:id]} not found." unless validation = Validation.get(params[:id]) + halt 404, "Validation #{params[:id]} not found." unless validation = Validation::Validation.get(params[:id]) validation.delete end
\ No newline at end of file diff --git a/validation/validation_format.rb b/validation/validation_format.rb index b953a9b..748271f 100644 --- a/validation/validation_format.rb +++ b/validation/validation_format.rb @@ -1,234 +1,237 @@ -# adding to_yaml and to_rdf functionality to validation -class Validation < Lib::Validation +module Validation - # get_content is the basis for to_yaml and to_rdf - # the idea is that everything is stored in a hash structure - # the hash is directly printed in to_yaml, while the has_keys can be used to resolve - # the right properties, classes - def get_content - - h = {} - Lib::VAL_PROPS.each{|p| h[p] = self.send(p)} - if crossvalidation_id!=nil - cv = {} - Lib::VAL_CV_PROPS.each do |p| - cv[p] = self.send(p) - end - h[:crossvalidation_info] = cv - end - if classification_statistics - clazz = {} - Lib::VAL_CLASS_PROPS_SINGLE.each{ |p| clazz[p] = classification_statistics[p] } + # adding to_yaml and to_rdf functionality to validation + class Validation < Lib::Validation + + # get_content is the basis for to_yaml and to_rdf + # the idea is that everything is stored in a hash structure + # the hash is directly printed in to_yaml, while the has_keys can be used to resolve + # the right properties, classes + def get_content - # transpose results per class - class_values = {} - Lib::VAL_CLASS_PROPS_PER_CLASS.each do |p| - classification_statistics[p].each do |class_value, property_value| - class_values[class_value] = {:class_value => class_value} unless class_values.has_key?(class_value) - map = class_values[class_value] - map[p] = property_value + h = {} + Lib::VAL_PROPS.each{|p| h[p] = self.send(p)} + if crossvalidation_id!=nil + cv = {} + Lib::VAL_CV_PROPS.each do |p| + cv[p] = self.send(p) end + h[:crossvalidation_info] = cv end - clazz[:class_value_statistics] = class_values.values - - #converting confusion matrix - cells = [] - classification_statistics[:confusion_matrix].each do |k,v| - cell = {} - # key in confusion matrix is map with predicted and actual attribute - k.each{ |kk,vv| cell[kk] = vv } - cell[:confusion_matrix_value] = v - cells.push cell + if classification_statistics + clazz = {} + Lib::VAL_CLASS_PROPS_SINGLE.each{ |p| clazz[p] = classification_statistics[p] } + + # transpose results per class + class_values = {} + Lib::VAL_CLASS_PROPS_PER_CLASS.each do |p| + classification_statistics[p].each do |class_value, property_value| + class_values[class_value] = {:class_value => class_value} unless class_values.has_key?(class_value) + map = class_values[class_value] + map[p] = property_value + end + end + clazz[:class_value_statistics] = class_values.values + + #converting confusion matrix + cells = [] + classification_statistics[:confusion_matrix].each do |k,v| + cell = {} + # key in confusion matrix is map with predicted and actual attribute + k.each{ |kk,vv| cell[kk] = vv } + cell[:confusion_matrix_value] = v + cells.push cell + end + cm = { :confusion_matrix_cell => cells } + clazz[:confusion_matrix] = cm + + h[:classification_statistics] = clazz + elsif regression_statistics + regr = {} + Lib::VAL_REGR_PROPS.each{ |p| regr[p] = regression_statistics[p]} + h[:regression_statistics] = regr end - cm = { :confusion_matrix_cell => cells } - clazz[:confusion_matrix] = cm + return h + end + + def to_yaml + get_content.to_yaml + end + + def to_rdf + owl = ValidationOwl.new() + owl.title = "Validation" + owl.uri = uri + owl.add_content( ValidationToRDF.new, get_content, "Validation" ) + owl.rdf + end + end + + class Crossvalidation < Lib::Crossvalidation + + def get_content + h = {} + Lib::CROSS_VAL_PROPS.each{|p| h[p] = self.send(p)} - h[:classification_statistics] = clazz - elsif regression_statistics - regr = {} - Lib::VAL_REGR_PROPS.each{ |p| regr[p] = regression_statistics[p]} - h[:regression_statistics] = regr + v = [] + Validation.all(:crossvalidation_id => self.id).each do |val| + v.push({ :validation_uri => val.uri.to_s }) + end + h[:validations] = v + h end - return h - end - - def to_yaml - get_content.to_yaml - end - - def to_rdf - owl = ValidationOwl.new() - owl.title = "Validation" - owl.uri = uri - owl.add_content( ValidationToRDF.new, get_content, "Validation" ) - owl.rdf - end - end - -class Crossvalidation < Lib::Crossvalidation - - def get_content - h = {} - Lib::CROSS_VAL_PROPS.each{|p| h[p] = self.send(p)} - v = [] - Validation.all(:crossvalidation_id => self.id).each do |val| - v.push({ :validation_uri => val.uri.to_s }) + def to_yaml + get_content.to_yaml end - h[:validations] = v - h - end - - def to_yaml - get_content.to_yaml - end - - def to_rdf - owl = ValidationOwl.new() - owl.title = "Crossvalidation" - owl.uri = uri - owl.add_content( CrossvalidationToRDF.new, get_content, "Crossvalidation" ) - owl.rdf - end - -end - - -class ValidationOwl - include OpenTox::Owl - - def initialize - super - end - - def add_content( content_to_rdf, output, clazz ) - @content_to_rdf = content_to_rdf - recursiv_add_content( output, @model.subject(RDF['type'],OT[clazz]) ) + + def to_rdf + owl = ValidationOwl.new() + owl.title = "Crossvalidation" + owl.uri = uri + owl.add_content( CrossvalidationToRDF.new, get_content, "Crossvalidation" ) + owl.rdf + end + end + - private - def recursiv_add_content( output, node ) - output.each do |k,v| - raise "null value: "+k.to_s if v==nil - if v.is_a?(Hash) - new_node = add_class( k, node ) - recursiv_add_content( v, new_node ) - elsif v.is_a?(Array) - v.each do |value| + class ValidationOwl + include OpenTox::Owl + + def initialize + super + end + + def add_content( content_to_rdf, output, clazz ) + @content_to_rdf = content_to_rdf + recursiv_add_content( output, @model.subject(RDF['type'],OT[clazz]) ) + end + + private + def recursiv_add_content( output, node ) + output.each do |k,v| + raise "null value: "+k.to_s if v==nil + if v.is_a?(Hash) new_node = add_class( k, node ) - recursiv_add_content( value, new_node ) + recursiv_add_content( v, new_node ) + elsif v.is_a?(Array) + v.each do |value| + new_node = add_class( k, node ) + recursiv_add_content( value, new_node ) + end + elsif @content_to_rdf.literal?(k) + set_literal( k, v, node) + elsif @content_to_rdf.object_property?(k) + add_object_property( k, v, node) + elsif [ :uri, :id, :finished ].index(k)!=nil + #skip + else + raise "illegal value k:"+k.to_s+" v:"+v.to_s end - elsif @content_to_rdf.literal?(k) - set_literal( k, v, node) - elsif @content_to_rdf.object_property?(k) - add_object_property( k, v, node) - elsif [ :uri, :id, :finished ].index(k)!=nil - #skip - else - raise "illegal value k:"+k.to_s+" v:"+v.to_s end end - end - - def add_class( property, node ) - raise "no object prop: "+property.to_s unless @content_to_rdf.object_property?(property) - raise "no class name: "+property.to_s unless @content_to_rdf.class_name(property) - res = @model.create_resource - @model.add res, RDF['type'], @content_to_rdf.class_name(property) - @model.add res, DC['title'], @content_to_rdf.class_name(property) - @model.add node, @content_to_rdf.object_property_name(property), res - return res - end - def set_literal(property, value, node ) - raise "empty literal value "+property.to_s if value==nil || value.to_s.size==0 - raise "no literal name "+propety.to_s unless @content_to_rdf.literal_name(property) - begin - l = @model.object(subject, @content_to_rdf.literal_name(property)) - @model.delete node, @content_to_rdf.literal_name(property), l - rescue - end - @model.add node, @content_to_rdf.literal_name(property), value.to_s + def add_class( property, node ) + raise "no object prop: "+property.to_s unless @content_to_rdf.object_property?(property) + raise "no class name: "+property.to_s unless @content_to_rdf.class_name(property) + res = @model.create_resource + @model.add res, RDF['type'], @content_to_rdf.class_name(property) + @model.add res, DC['title'], @content_to_rdf.class_name(property) + @model.add node, @content_to_rdf.object_property_name(property), res + return res + end + + def set_literal(property, value, node ) + raise "empty literal value "+property.to_s if value==nil || value.to_s.size==0 + raise "no literal name "+propety.to_s unless @content_to_rdf.literal_name(property) + begin + l = @model.object(subject, @content_to_rdf.literal_name(property)) + @model.delete node, @content_to_rdf.literal_name(property), l + rescue + end + @model.add node, @content_to_rdf.literal_name(property), value.to_s + end + + def add_object_property(property, value, node ) + raise "empty object property value "+property.to_s if value==nil || value.to_s.size==0 + raise "no object property name "+propety.to_s unless @content_to_rdf.object_property_name(property) + @model.add node, @content_to_rdf.object_property_name(property), Redland::Uri.new(value) # untyped individual comes from this line, why?? + #@model.add Redland::Uri.new(value), RDF['type'], type + end + end - def add_object_property(property, value, node ) - raise "empty object property value "+property.to_s if value==nil || value.to_s.size==0 - raise "no object property name "+propety.to_s unless @content_to_rdf.object_property_name(property) - @model.add node, @content_to_rdf.object_property_name(property), Redland::Uri.new(value) # untyped individual comes from this line, why?? - #@model.add Redland::Uri.new(value), RDF['type'], type - end -end - - -class ContentToRDF - - def literal?( prop ) - @literals.index( prop ) != nil - end + class ContentToRDF + + def literal?( prop ) + @literals.index( prop ) != nil + end + + def literal_name( prop ) + #PENDING + return OT[prop.to_s] + end + + def object_property?( prop ) + @object_properties.has_key?( prop ) + end + + def object_property_name( prop ) + return @object_properties[ prop ] + end - def literal_name( prop ) - #PENDING - return OT[prop.to_s] + def class_name( prop ) + return @classes[ prop ] + end + end - def object_property?( prop ) - @object_properties.has_key?( prop ) - end - def object_property_name( prop ) - return @object_properties[ prop ] - end - - def class_name( prop ) - return @classes[ prop ] + class CrossvalidationToRDF < ContentToRDF + + def initialize() + @literals = [ :stratified, :num_folds, :random_seed ] + @object_properties = { :dataset_uri => OT['crossvalidationDataset'], :algorithm_uri => OT['crossvalidationAlgorithm'], + :validation_uri => OT['crossvalidationValidation'], :validations => OT['crossvalidationValidations'] } + @classes = { :validations => OT['CrossvalidationValidations'] } + end end -end - - -class CrossvalidationToRDF < ContentToRDF + class ValidationToRDF < ContentToRDF + + def initialize() + @literals = [ :created_at, :real_runtime, :num_instances, :num_without_class, + :percent_without_class, :num_unpredicted, :percent_unpredicted, + :crossvalidation_fold, :crossvalidation_id, + :num_correct, :num_incorrect, :percent_correct, :percent_incorrect, + :area_under_roc, :false_negative_rate, :false_positive_rate, + :f_measure, :num_false_positives, :num_false_negatives, + :num_true_positives, :num_true_negatives, :precision, + :recall, :true_negative_rate, :true_positive_rate, + :confusion_matrix_value ] + # created at -> date + # owl.set_literal(OT['numInstances'],validation.num_instances) + # owl.set_literal(OT['numWithoutClass'],validation.num_without_class) + # owl.set_literal(OT['percentWithoutClass'],validation.percent_without_class) + # owl.set_literal(OT['numUnpredicted'],validation.num_unpredicted) + # owl.set_literal(OT['percentUnpredicted'],validation.percent_unpredicted) + + + @object_properties = { :model_uri => OT['validationModel'], :training_dataset_uri => OT['validationTrainingDataset'], + :prediction_feature => OT['predictedFeature'], :test_dataset_uri => OT['validationTestDataset'], + :prediction_dataset_uri => OT['validationPredictionDataset'], :crossvalidation_info => OT['hasValidationInfo'], + :classification_statistics => OT['hasValidationInfo'], + :class_value_statistics => OT['classValueStatistics'], :confusion_matrix => OT['confusionMatrix'], + :confusion_matrix_cell => OT['confusionMatrixCell'], :class_value => OT['class_value'], + :confusion_matrix_actual => OT['confusionMatrixActual'], :confusion_matrix_predicted => OT['confusionMatrixPredicted'] } + + @classes = { :crossvalidation_info => OT['CrossvalidationInfo'], :classification_statistics => OT['ClassificationStatistics'], + :class_value_statistics => OT['ClassValueStatistics'], + :confusion_matrix => OT['ConfusionMatrix'], :confusion_matrix_cell => OT['ConfusionMatrixCell']} + end - def initialize() - @literals = [ :stratified, :num_folds, :random_seed ] - @object_properties = { :dataset_uri => OT['crossvalidationDataset'], :algorithm_uri => OT['crossvalidationAlgorithm'], - :validation_uri => OT['crossvalidationValidation'], :validations => OT['crossvalidationValidations'] } - @classes = { :validations => OT['CrossvalidationValidations'] } end end - -class ValidationToRDF < ContentToRDF - - def initialize() - @literals = [ :created_at, :real_runtime, :num_instances, :num_without_class, - :percent_without_class, :num_unpredicted, :percent_unpredicted, - :crossvalidation_fold, :crossvalidation_id, - :num_correct, :num_incorrect, :percent_correct, :percent_incorrect, - :area_under_roc, :false_negative_rate, :false_positive_rate, - :f_measure, :num_false_positives, :num_false_negatives, - :num_true_positives, :num_true_negatives, :precision, - :recall, :true_negative_rate, :true_positive_rate, - :confusion_matrix_value ] - # created at -> date - # owl.set_literal(OT['numInstances'],validation.num_instances) - # owl.set_literal(OT['numWithoutClass'],validation.num_without_class) - # owl.set_literal(OT['percentWithoutClass'],validation.percent_without_class) - # owl.set_literal(OT['numUnpredicted'],validation.num_unpredicted) - # owl.set_literal(OT['percentUnpredicted'],validation.percent_unpredicted) - - - @object_properties = { :model_uri => OT['validationModel'], :training_dataset_uri => OT['validationTrainingDataset'], - :prediction_feature => OT['predictedFeature'], :test_dataset_uri => OT['validationTestDataset'], - :prediction_dataset_uri => OT['validationPredictionDataset'], :crossvalidation_info => OT['hasValidationInfo'], - :classification_statistics => OT['hasValidationInfo'], - :class_value_statistics => OT['classValueStatistics'], :confusion_matrix => OT['confusionMatrix'], - :confusion_matrix_cell => OT['confusionMatrixCell'], :class_value => OT['class_value'], - :confusion_matrix_actual => OT['confusionMatrixActual'], :confusion_matrix_predicted => OT['confusionMatrixPredicted'] } - - @classes = { :crossvalidation_info => OT['CrossvalidationInfo'], :classification_statistics => OT['ClassificationStatistics'], - :class_value_statistics => OT['ClassValueStatistics'], - :confusion_matrix => OT['ConfusionMatrix'], :confusion_matrix_cell => OT['ConfusionMatrixCell']} - end - -end diff --git a/validation/validation_service.rb b/validation/validation_service.rb index ff545b4..3231676 100644 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -31,334 +31,335 @@ class Array end +module Validation -class Validation < Lib::Validation - - # constructs a validation object, sets id und uri - def initialize( params={} ) - - raise "do not set id manually" if params[:id] - raise "do not set uri manually" if params[:uri] - super params - save unless attribute_dirty?("id") - raise "internal error, id not set "+to_yaml unless @id - update :uri => $sinatra.url_for("/"+@id.to_s, :full) - end - - # deletes a validation - # PENDING: model and referenced datasets are deleted as well, keep it that way? - def delete - - model = OpenTox::Model::PredictionModel.find(@model_uri) if @model_uri - model.destroy if model + class Validation < Lib::Validation + + # constructs a validation object, Rsets id und uri + def initialize( params={} ) + + raise "do not set id manually" if params[:id] + raise "do not set uri manually" if params[:uri] + super params + save unless attribute_dirty?("id") + raise "internal error, id not set "+to_yaml unless @id + update :uri => $sinatra.url_for("/"+@id.to_s, :full) + end - #[@test_dataset_uri, @training_dataset_uri, @prediction_dataset_uri].each do |d| - #dataset = OpenTox::Dataset.find(d) if d - #dataset.delete if dataset - #end - destroy - "Successfully deleted validation "+@id.to_s+"." - end - - # validates an algorithm by building a model and validating this model - def validate_algorithm( algorithm_uri, algorithm_params=nil ) + # deletes a validation + # PENDING: model and referenced datasets are deleted as well, keep it that way? + def delete - $sinatra.halt 404, "no algorithm uri: '"+algorithm_uri+"'" if algorithm_uri==nil or algorithm_uri.to_s.size<1 + model = OpenTox::Model::PredictionModel.find(@model_uri) if @model_uri + model.destroy if model + + #[@test_dataset_uri, @training_dataset_uri, @prediction_dataset_uri].each do |d| + #dataset = OpenTox::Dataset.find(d) if d + #dataset.delete if dataset + #end + destroy + "Successfully deleted validation "+@id.to_s+"." + end - params = { :dataset_uri => @training_dataset_uri, :feature_uri => @prediction_feature } - if (algorithm_params!=nil) - algorithm_params.split(";").each do |alg_params| - alg_param = alg_params.split("=") - #puts "param "+alg_param.to_s - $sinatra.halt 404, "invalid algorithm param: '"+alg_params.to_s+"'" unless alg_param.size==2 or alg_param[0].to_s.size<1 or alg_param[1].to_s.size<1 - params[alg_param[0].to_sym] = alg_param[1] + # validates an algorithm by building a model and validating this model + def validate_algorithm( algorithm_uri, algorithm_params=nil ) + + $sinatra.halt 404, "no algorithm uri: '"+algorithm_uri+"'" if algorithm_uri==nil or algorithm_uri.to_s.size<1 + + params = { :dataset_uri => @training_dataset_uri, :feature_uri => @prediction_feature } + if (algorithm_params!=nil) + algorithm_params.split(";").each do |alg_params| + alg_param = alg_params.split("=") + #puts "param "+alg_param.to_s + $sinatra.halt 404, "invalid algorithm param: '"+alg_params.to_s+"'" unless alg_param.size==2 or alg_param[0].to_s.size<1 or alg_param[1].to_s.size<1 + params[alg_param[0].to_sym] = alg_param[1] + end end + LOGGER.debug "building model '"+algorithm_uri.to_s+"' "+params.inspect + + model_uri = OpenTox::RestClientWrapper.post algorithm_uri,params + update :model_uri => model_uri + + validate_model end - LOGGER.debug "building model '"+algorithm_uri.to_s+"' "+params.inspect - model_uri = OpenTox::RestClientWrapper.post algorithm_uri,params - update :model_uri => model_uri - - validate_model + # validates a model + # PENDING: a new dataset is created to store the predictions, this should be optional: STORE predictions yes/no + def validate_model + + LOGGER.debug "validating model '"+@model_uri+"'" + test_dataset = OpenTox::Dataset.find @test_dataset_uri + $sinatra.halt 400, "test dataset no found: "+@test_dataset_uri.to_s unless test_dataset + + model = OpenTox::Model::PredictionModel.find(@model_uri) + $sinatra.halt 400, "model not found: "+@model_uri.to_s unless model + + prediction_dataset_uri = "" + benchmark = Benchmark.measure do + prediction_dataset_uri = model.predict_dataset(@test_dataset_uri) + end + + LOGGER.debug "computing prediction stats" + prediction = Lib::OTPredictions.new( model.classification?, @prediction_feature, @test_dataset_uri, prediction_dataset_uri ) + if prediction.classification? + update :classification_statistics => prediction.compute_stats + else + update :regression_statistics => prediction.compute_stats + end + update :prediction_dataset_uri => prediction_dataset_uri, + :finished => true, + :real_runtime => benchmark.real, + :num_instances => prediction.num_instances, + :num_without_class => prediction.num_without_class, + :percent_without_class => prediction.percent_without_class, + :num_unpredicted => prediction.num_unpredicted, + :percent_unpredicted => prediction.percent_unpredicted + end end - # validates a model - # PENDING: a new dataset is created to store the predictions, this should be optional: STORE predictions yes/no - def validate_model - - LOGGER.debug "validating model '"+@model_uri+"'" - test_dataset = OpenTox::Dataset.find @test_dataset_uri - $sinatra.halt 400, "test dataset no found: "+@test_dataset_uri.to_s unless test_dataset - - model = OpenTox::Model::PredictionModel.find(@model_uri) - $sinatra.halt 400, "model not found: "+@model_uri.to_s unless model + class Crossvalidation < Lib::Crossvalidation - prediction_dataset_uri = "" - benchmark = Benchmark.measure do - prediction_dataset_uri = model.predict_dataset(@test_dataset_uri) + # constructs a crossvalidation, id and uri are set + def initialize( params={} ) + + raise "do not set id manually" if params[:id] + raise "do not set uri manually" if params[:uri] + super params + save unless attribute_dirty?("id") + raise "internal error, id not set" unless @id + update :uri => $sinatra.url_for("/crossvalidation/"+@id.to_s, :full) end - LOGGER.debug "computing prediction stats" - prediction = Lib::OTPredictions.new( model.classification?, @prediction_feature, @test_dataset_uri, prediction_dataset_uri ) - if prediction.classification? - update :classification_statistics => prediction.compute_stats - else - update :regression_statistics => prediction.compute_stats + # deletes a crossvalidation, all validations are deleted as well + def delete + Validation.all(:crossvalidation_id => @id).each{ |v| v.delete } + destroy + "Successfully deleted crossvalidation "+@id.to_s+"." end - update :prediction_dataset_uri => prediction_dataset_uri, - :finished => true, - :real_runtime => benchmark.real, - :num_instances => prediction.num_instances, - :num_without_class => prediction.num_without_class, - :percent_without_class => prediction.percent_without_class, - :num_unpredicted => prediction.num_unpredicted, - :percent_unpredicted => prediction.percent_unpredicted - end -end - -class Crossvalidation < Lib::Crossvalidation - - # constructs a crossvalidation, id and uri are set - def initialize( params={} ) - raise "do not set id manually" if params[:id] - raise "do not set uri manually" if params[:uri] - super params - save unless attribute_dirty?("id") - raise "internal error, id not set" unless @id - update :uri => $sinatra.url_for("/crossvalidation/"+@id.to_s, :full) - end - - # deletes a crossvalidation, all validations are deleted as well - def delete - Validation.all(:crossvalidation_id => @id).each{ |v| v.delete } - destroy - "Successfully deleted crossvalidation "+@id.to_s+"." - end - - # creates the cv folds - # PENDING copying datasets of an equal (same dataset, same params) crossvalidation is disabled for now - def create_cv_datasets( prediction_feature ) - - create_new_cv_datasets( prediction_feature ) #unless copy_cv_datasets( prediction_feature ) - end + # creates the cv folds + # PENDING copying datasets of an equal (same dataset, same params) crossvalidation is disabled for now + def create_cv_datasets( prediction_feature ) - # executes the cross-validation (build models and validates them) - def perform_cv ( algorithm_params=nil ) - - LOGGER.debug "perform cv validations" - Validation.all( :crossvalidation_id => id ).each do |v| - v.validate_algorithm( @algorithm_uri, algorithm_params ) - #break + create_new_cv_datasets( prediction_feature ) #unless copy_cv_datasets( prediction_feature ) end - end - - private - # copies datasets from an older crossvalidation on the same dataset and the same folds - # returns true if successfull, false otherwise - def copy_cv_datasets( prediction_feature ) - equal_cvs = Crossvalidation.all( { :dataset_uri => @dataset_uri, :num_folds => @num_folds, - :stratified => @stratified, :random_seed => @random_seed } ).reject{ |cv| cv.id == @id } - return false if equal_cvs.size == 0 - cv = equal_cvs[0] - Validation.all( :crossvalidation_id => cv.id ).each do |v| - - if @stratified and v.prediction_feature != prediction_feature - return false; - end - unless (OpenTox::Dataset.find(v.training_dataset_uri) and - OpenTox::Dataset.find(v.test_dataset_uri)) - LOGGER.debug "dataset uris obsolete, aborting copy of datasets" - Validation.all( :crossvalidation_id => @id ).each{ |v| v.delete } - return false + # executes the cross-validation (build models and validates them) + def perform_cv ( algorithm_params=nil ) + + LOGGER.debug "perform cv validations" + Validation.all( :crossvalidation_id => id ).each do |v| + v.validate_algorithm( @algorithm_uri, algorithm_params ) + #break end - validation = Validation.new :crossvalidation_id => @id, - :crossvalidation_fold => v.crossvalidation_fold, - :training_dataset_uri => v.training_dataset_uri, - :test_dataset_uri => v.test_dataset_uri end - LOGGER.debug "copyied dataset uris from cv "+cv.uri.to_s - return true - end - - # creates cv folds (training and testdatasets) - # stores uris in validation objects - def create_new_cv_datasets( prediction_feature ) - LOGGER.debug "creating datasets for crossvalidation" - orig_dataset = OpenTox::Dataset.find(@dataset_uri) - $sinatra.halt 400, "Dataset not found: "+@dataset_uri.to_s unless orig_dataset - - shuffled_compounds = orig_dataset.compounds.shuffle( @random_seed ) - - unless @stratified - split_compounds = shuffled_compounds.chunk( @num_folds ) - else - class_compounds = {} # "inactive" => compounds[], "active" => compounds[] .. - shuffled_compounds.each do |c| - orig_dataset.features(c).each do |a| - value = OpenTox::Feature.new(:uri => a.uri).value(prediction_feature).to_s - class_compounds[value] = [] unless class_compounds.has_key?(value) - class_compounds[value].push(c) + private + # copies datasets from an older crossvalidation on the same dataset and the same folds + # returns true if successfull, false otherwise + def copy_cv_datasets( prediction_feature ) + + equal_cvs = Crossvalidation.all( { :dataset_uri => @dataset_uri, :num_folds => @num_folds, + :stratified => @stratified, :random_seed => @random_seed } ).reject{ |cv| cv.id == @id } + return false if equal_cvs.size == 0 + cv = equal_cvs[0] + Validation.all( :crossvalidation_id => cv.id ).each do |v| + + if @stratified and v.prediction_feature != prediction_feature + return false; end - end - LOGGER.debug "stratified cv: different class values: "+class_compounds.keys.join(", ") - LOGGER.debug "stratified cv: num instances for each class value: "+class_compounds.values.collect{|c| c.size}.join(", ") - - split_class_compounds = [] # inactive_compounds[fold_i][], active_compounds[fold_i][], .. - class_compounds.values.each do |compounds| - split_class_compounds.push( compounds.chunk( @num_folds ) ) - end - LOGGER.debug "stratified cv: splits for class values: "+split_class_compounds.collect{ |c| c.collect{ |cc| cc.size }.join("/") }.join(", ") - - # we cannot just merge the splits of the different class_values of each fold - # this could lead to folds, which sizes differ for more than 1 compound - split_compounds = [] - split_class_compounds.each do |split_comp| - # step 1: sort current split in ascending order - split_comp.sort!{|x,y| x.size <=> y.size } - # step 2: add splits - (0..@num_folds-1).each do |i| - unless split_compounds[i] - split_compounds[i] = split_comp[i] - else - split_compounds[i] += split_comp[i] - end + unless (OpenTox::Dataset.find(v.training_dataset_uri) and + OpenTox::Dataset.find(v.test_dataset_uri)) + LOGGER.debug "dataset uris obsolete, aborting copy of datasets" + Validation.all( :crossvalidation_id => @id ).each{ |v| v.delete } + return false end - # step 3: sort (total) split in descending order - split_compounds.sort!{|x,y| y.size <=> x.size } + validation = Validation.new :crossvalidation_id => @id, + :crossvalidation_fold => v.crossvalidation_fold, + :training_dataset_uri => v.training_dataset_uri, + :test_dataset_uri => v.test_dataset_uri end + LOGGER.debug "copyied dataset uris from cv "+cv.uri.to_s + return true end - LOGGER.debug "cv: num instances for each fold: "+split_compounds.collect{|c| c.size}.join(", ") - - data = orig_dataset.data - (1..@num_folds).each do |n| + # creates cv folds (training and testdatasets) + # stores uris in validation objects + def create_new_cv_datasets( prediction_feature ) - datasetname = 'cv'+@id.to_s + - #'_d'+orig_dataset.name.to_s + - '_f'+n.to_s+'of'+@num_folds.to_s+ - '_r'+@random_seed.to_s+ - '_s'+@stratified.to_s - source = $sinatra.url_for('/crossvalidation',:full) + LOGGER.debug "creating datasets for crossvalidation" + orig_dataset = OpenTox::Dataset.find(@dataset_uri) + $sinatra.halt 400, "Dataset not found: "+@dataset_uri.to_s unless orig_dataset - test_compounds = [] - train_compounds = [] + shuffled_compounds = orig_dataset.compounds.shuffle( @random_seed ) - (1..@num_folds).each do |nn| - compounds = split_compounds.at(nn-1) + unless @stratified + split_compounds = shuffled_compounds.chunk( @num_folds ) + else + class_compounds = {} # "inactive" => compounds[], "active" => compounds[] .. + shuffled_compounds.each do |c| + orig_dataset.features(c).each do |a| + value = OpenTox::Feature.new(:uri => a.uri).value(prediction_feature).to_s + class_compounds[value] = [] unless class_compounds.has_key?(value) + class_compounds[value].push(c) + end + end + LOGGER.debug "stratified cv: different class values: "+class_compounds.keys.join(", ") + LOGGER.debug "stratified cv: num instances for each class value: "+class_compounds.values.collect{|c| c.size}.join(", ") + + split_class_compounds = [] # inactive_compounds[fold_i][], active_compounds[fold_i][], .. + class_compounds.values.each do |compounds| + split_class_compounds.push( compounds.chunk( @num_folds ) ) + end + LOGGER.debug "stratified cv: splits for class values: "+split_class_compounds.collect{ |c| c.collect{ |cc| cc.size }.join("/") }.join(", ") - if n == nn - compounds.each{ |compound| test_compounds.push(compound)} - else - compounds.each{ |compound| train_compounds.push(compound)} - end + # we cannot just merge the splits of the different class_values of each fold + # this could lead to folds, which sizes differ for more than 1 compound + split_compounds = [] + split_class_compounds.each do |split_comp| + # step 1: sort current split in ascending order + split_comp.sort!{|x,y| x.size <=> y.size } + # step 2: add splits + (0..@num_folds-1).each do |i| + unless split_compounds[i] + split_compounds[i] = split_comp[i] + else + split_compounds[i] += split_comp[i] + end + end + # step 3: sort (total) split in descending order + split_compounds.sort!{|x,y| y.size <=> x.size } + end end + LOGGER.debug "cv: num instances for each fold: "+split_compounds.collect{|c| c.size}.join(", ") - raise "internal error, num test compounds not correct" unless (shuffled_compounds.size/@num_folds - test_compounds.size).abs <= 1 - raise "internal error, num train compounds not correct" unless shuffled_compounds.size - test_compounds.size == train_compounds.size + data = orig_dataset.data - LOGGER.debug "training set: "+datasetname+"_train" - train_dataset_uri = ValidationUtil::create_new_dataset( data, train_compounds, datasetname + '_train', source ) + (1..@num_folds).each do |n| + + datasetname = 'cv'+@id.to_s + + #'_d'+orig_dataset.name.to_s + + '_f'+n.to_s+'of'+@num_folds.to_s+ + '_r'+@random_seed.to_s+ + '_s'+@stratified.to_s + source = $sinatra.url_for('/crossvalidation',:full) + + test_compounds = [] + train_compounds = [] + + (1..@num_folds).each do |nn| + compounds = split_compounds.at(nn-1) + + if n == nn + compounds.each{ |compound| test_compounds.push(compound)} + else + compounds.each{ |compound| train_compounds.push(compound)} + end + end + + raise "internal error, num test compounds not correct" unless (shuffled_compounds.size/@num_folds - test_compounds.size).abs <= 1 + raise "internal error, num train compounds not correct" unless shuffled_compounds.size - test_compounds.size == train_compounds.size + + LOGGER.debug "training set: "+datasetname+"_train" + train_dataset_uri = Util::create_new_dataset( data, train_compounds, datasetname + '_train', source ) + + LOGGER.debug "test set: "+datasetname+"_test" + test_dataset_uri = Util::create_new_dataset( data, test_compounds, datasetname + '_test', source ) - LOGGER.debug "test set: "+datasetname+"_test" - test_dataset_uri = ValidationUtil::create_new_dataset( data, test_compounds, datasetname + '_test', source ) - - validation = Validation.new :training_dataset_uri => train_dataset_uri, - :test_dataset_uri => test_dataset_uri, - :crossvalidation_id => @id, :crossvalidation_fold => n, - :prediction_feature => prediction_feature + validation = Validation.new :training_dataset_uri => train_dataset_uri, + :test_dataset_uri => test_dataset_uri, + :crossvalidation_id => @id, :crossvalidation_fold => n, + :prediction_feature => prediction_feature + end end end -end - - -module ValidationUtil - def self.create_new_dataset( orig_dataset_data, compounds, title, source ) + module Util - dataset = OpenTox::Dataset.new - dataset.title = title - dataset.source = source - compounds.each do |c| + def self.create_new_dataset( orig_dataset_data, compounds, title, source ) - compound = dataset.find_or_create_compound(c.to_s) - featureValuesArray = orig_dataset_data[c] + dataset = OpenTox::Dataset.new + dataset.title = title + dataset.source = source - featureValuesArray.each do |featureValues| - featureValues.each do |f, v| + compounds.each do |c| - raise "null value not handled yet" if v==nil - if v.is_a?(Hash) - tuple = dataset.create_tuple(f,v) - dataset.add_tuple(compound,tuple) - else - dataset.add(compound,f,v) + compound = dataset.find_or_create_compound(c.to_s) + featureValuesArray = orig_dataset_data[c] + + featureValuesArray.each do |featureValues| + featureValues.each do |f, v| + + raise "null value not handled yet" if v==nil + if v.is_a?(Hash) + tuple = dataset.create_tuple(f,v) + dataset.add_tuple(compound,tuple) + else + dataset.add(compound,f,v) + end end end end + + uri = dataset.save + raise "no dataset uri" if uri==nil || uri.to_s.length<1 + return uri + end - - uri = dataset.save - raise "no dataset uri" if uri==nil || uri.to_s.length<1 - return uri - end - - # splits a dataset into test and training dataset - # returns map with training_dataset_uri and test_dataset_uri - def self.train_test_dataset_split( orig_dataset_uri, split_ratio=nil, random_seed=nil ) - - split_ratio=0.67 unless split_ratio - random_seed=1 unless random_seed - - orig_dataset = OpenTox::Dataset.find orig_dataset_uri - $sinatra.halt 400, "Dataset not found: "+orig_dataset_uri.to_s unless orig_dataset - $sinatra.halt 400, "Split ratio invalid: "+split_ratio unless split_ratio and split_ratio=split_ratio.to_f - $sinatra.halt 400, "Split ratio not >0 and <1" unless split_ratio>0 && split_ratio<1 - - compounds = orig_dataset.compounds - - $sinatra.halt 400, "Dataset size < 2" if compounds.size<2 - split = (compounds.size*split_ratio).to_i - split = [split,1].max - split = [split,compounds.size-2].min - - LOGGER.debug "splitting dataset "+orig_dataset_uri+ - " into train:0-"+split.to_s+" and test:"+(split+1).to_s+"-"+(compounds.size-1).to_s+ - " (shuffled with seed "+random_seed.to_s+")" - - compounds.shuffle!( random_seed ) - train_compounds = compounds[0..split] - test_compounds = compounds[(split+1)..-1] - - data = orig_dataset.data - - result = {} - {:training_dataset_uri => train_compounds, :test_dataset_uri => test_compounds}.each do |sym, compound_array| + # splits a dataset into test and training dataset + # returns map with training_dataset_uri and test_dataset_uri + def self.train_test_dataset_split( orig_dataset_uri, split_ratio=nil, random_seed=nil ) - if sym == :training_dataset_uri - title = "Training dataset split of "+orig_dataset.title.to_s - else - title = "Test dataset split of "+orig_dataset.title.to_s + split_ratio=0.67 unless split_ratio + random_seed=1 unless random_seed + + orig_dataset = OpenTox::Dataset.find orig_dataset_uri + $sinatra.halt 400, "Dataset not found: "+orig_dataset_uri.to_s unless orig_dataset + $sinatra.halt 400, "Split ratio invalid: "+split_ratio unless split_ratio and split_ratio=split_ratio.to_f + $sinatra.halt 400, "Split ratio not >0 and <1" unless split_ratio>0 && split_ratio<1 + + compounds = orig_dataset.compounds + + $sinatra.halt 400, "Dataset size < 2" if compounds.size<2 + split = (compounds.size*split_ratio).to_i + split = [split,1].max + split = [split,compounds.size-2].min + + LOGGER.debug "splitting dataset "+orig_dataset_uri+ + " into train:0-"+split.to_s+" and test:"+(split+1).to_s+"-"+(compounds.size-1).to_s+ + " (shuffled with seed "+random_seed.to_s+")" + + compounds.shuffle!( random_seed ) + train_compounds = compounds[0..split] + test_compounds = compounds[(split+1)..-1] + + data = orig_dataset.data + + result = {} + {:training_dataset_uri => train_compounds, :test_dataset_uri => test_compounds}.each do |sym, compound_array| + + if sym == :training_dataset_uri + title = "Training dataset split of "+orig_dataset.title.to_s + else + title = "Test dataset split of "+orig_dataset.title.to_s + end + source = $sinatra.url_for('/training_test_split',:full) + result[sym] = create_new_dataset( data, compound_array, title, source ) end - source = $sinatra.url_for('/training_test_split',:full) - result[sym] = create_new_dataset( data, compound_array, title, source ) + + $sinatra.halt 400, "Training dataset not found: '"+result[:training_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:training_dataset_uri] + $sinatra.halt 400, "Test dataset not found: '"+result[:test_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:test_dataset_uri] + + LOGGER.debug "split done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'" + + return result end - - $sinatra.halt 400, "Training dataset not found: '"+result[:training_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:training_dataset_uri] - $sinatra.halt 400, "Test dataset not found: '"+result[:test_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:test_dataset_uri] - - LOGGER.debug "split done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'" - - return result + end end - - diff --git a/validation/validation_test.rb b/validation/validation_test.rb index 613d338..e43780f 100644 --- a/validation/validation_test.rb +++ b/validation/validation_test.rb @@ -161,36 +161,37 @@ class ValidationTest < Test::Unit::TestCase # end # end - def test_split - begin - data_uri = upload_data(WS_DATA, FILE) - #data_uri = "http://ot.dataset.de/199" #bbrc - #data_uri = "http://ot.dataset.de/67" #hamster - - #data_uri=WS_DATA+"/"+DATA - post '/training_test_split', { :dataset_uri => data_uri, :algorithm_uri => WS_CLASS_ALG, :prediction_feature => FEATURE_URI, - :algorithm_params => "feature_generation_uri="+WS_FEATURE_ALG, :split_ratio=>0.8, :random_seed=>5} - verify_validation - ensure - delete_resources - end - end - -# def test_nothing -# -# #puts "testing nothing" -# -# get '/examples' -# -# #get '/350',nil,'HTTP_ACCEPT' => "application/rdf+xml" -# #get '/350',nil,'HTTP_ACCEPT' => "text/x-yaml" -# -# #get '/crossvalidation/1',nil,'HTTP_ACCEPT' => "application/rdf+xml" -# #get '/crossvalidation/1',nil,'HTTP_ACCEPT' => "text/x-yaml" -# -# puts last_response.body +# def test_split +# begin +# data_uri = upload_data(WS_DATA, FILE) +# #data_uri = "http://ot.dataset.de/199" #bbrc +# #data_uri = "http://ot.dataset.de/67" #hamster +# +# #data_uri=WS_DATA+"/"+DATA +# post '/training_test_split', { :dataset_uri => data_uri, :algorithm_uri => WS_CLASS_ALG, :prediction_feature => FEATURE_URI, +# :algorithm_params => "feature_generation_uri="+WS_FEATURE_ALG, :split_ratio=>0.8, :random_seed=>5} +# verify_validation +# ensure +# delete_resources +# end # end + def test_nothing + + #puts "testing nothing" + + #get '/prepare_examples' + get '/test_examples' + + #get '/350',nil,'HTTP_ACCEPT' => "application/rdf+xml" + #get '/350',nil,'HTTP_ACCEPT' => "text/x-yaml" + + #get '/crossvalidation/1',nil,'HTTP_ACCEPT' => "application/rdf+xml" + #get '/crossvalidation/1',nil,'HTTP_ACCEPT' => "text/x-yaml" + + puts last_response.body + end + private def verify_validation (delete=true) |