diff options
author | mguetlein <martin.guetlein@gmail.com> | 2010-09-07 09:25:39 +0200 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2010-09-07 09:25:39 +0200 |
commit | 7d69d09e79882cb826dacac2b6474fade13e8690 (patch) | |
tree | c1acc2b07627bbb787a1e4d24a425c90283277aa | |
parent | 9ec38f1ebaffa51cac69e5e1a09c988556a690a1 (diff) |
add validation to qmrf-report, bootstrapping
-rw-r--r-- | EXAMPLES | 21 | ||||
-rw-r--r-- | lib/active_record_setup.rb | 2 | ||||
-rw-r--r-- | reach_reports/reach_service.rb | 120 | ||||
-rw-r--r-- | reach_reports/reach_test.rb | 41 | ||||
-rw-r--r-- | report/report_service.rb | 2 | ||||
-rw-r--r-- | test/test_examples.rb | 28 | ||||
-rw-r--r-- | test/test_examples_util.rb | 16 | ||||
-rw-r--r-- | validation/validation_application.rb | 29 | ||||
-rw-r--r-- | validation/validation_service.rb | 64 | ||||
-rw-r--r-- | validation/validation_test.rb | 31 |
10 files changed, 320 insertions, 34 deletions
@@ -116,6 +116,27 @@ result example (accept-header: text/uri-list) <<< <validation_service>/id_i +validate an algorithm on a dataset via bootstrapping +------------------------------------------------------------------- + +>>> curl -X POST -d algorithm_uri="<algorithm_uri>" \ + -d dataset_uri="<dataset_uri>" \ + -d prediction_feature="<prediction_feature>" \ + -d algorithm_params="<algorithm_params>" \ + -d random_seed=2 \ + <validation_service>/bootstrapping + +optional params: +algorithm_params, default is empty +random_seed, default is 1 + +result example (accept-header: application/rdf-xml) +<<< not yet supported + +result example (accept-header: text/uri-list) +<<< <validation_service>/id_i + + get list of available crossvalidations ------------------------------------------------------------------- diff --git a/lib/active_record_setup.rb b/lib/active_record_setup.rb index 85690b6..cea4fb2 100644 --- a/lib/active_record_setup.rb +++ b/lib/active_record_setup.rb @@ -28,7 +28,7 @@ class ActiveRecord::Base key = key+"_uri" unless self.column_names.include?(key) key = key+"s" - raise "no attribute found: '"+k.to_s+"'" unless self.column_names.include?(key) + $sinatra.halt 400,"no attribute found: '"+k.to_s+"'" unless self.column_names.include?(key) end end end diff --git a/reach_reports/reach_service.rb b/reach_reports/reach_service.rb index f7b899c..a742c49 100644 --- a/reach_reports/reach_service.rb +++ b/reach_reports/reach_service.rb @@ -1,3 +1,15 @@ + +class Array + + def to_html + return "" unless size>0 + s = "<html>\n<head>\n</head>\n<body>\n" + s += join(" <br>\n") + s += "</body>\n</html>\n" + return s + end +end + module ReachReports def self.list_reports(type) @@ -45,17 +57,17 @@ module ReachReports #halt 202,task_uri end - def self.build_qmrf_report(r) - model = OpenTox::Model::Generic.find(r.model_uri) + + def self.build_qmrf_report(r) + + model = OpenTox::Model::PredictionModel.find(r.model_uri) + classification = model.classification? # chapter 1 - #r.QSAR_title = model.title r.qsar_identifier = QsarIdentifier.new r.qsar_identifier.qsar_title = model.title - - # TODO - # QSAR_models -> sparql same endpoint + # TODO QSAR_models -> sparql same endpoint r.qsar_identifier.qsar_software << QsarSoftware.new( :url => model.uri, :name => model.title, :contact => model.creator ) algorithm = OpenTox::Algorithm::Generic.find(model.algorithm) if model.algorithm r.qsar_identifier.qsar_software << QsarSoftware.new( :url => algorithm.uri, :name => algorithm.title ) @@ -89,19 +101,89 @@ module ReachReports # chapter 5 # TODO app_domain_description, app_domain_method, app_domain_software, applicability_limits - dataset = model.trainingDataset ? OpenTox::Dataset.find(model.trainingDataset) : nil + training_dataset = model.trainingDataset ? OpenTox::Dataset.find(model.trainingDataset+"/metadata") : nil # chapter 6 r.qsar_robustness = QsarRobustness.new - r.qsar_robustness.training_set_availability = dataset ? "Yes" : "No" + if training_dataset + r.qsar_robustness.training_set_availability = "Yes" + r.qsar_robustness.training_set_data = TrainingSetData.new(:chemname => "Yes", :cas => "Yes", + :smiles => "Yes", :inchi => "Yes", :mol => "Yes", :formula => "Yes") + end + #TODO "training_set_data" => "6.2", # "training_set_descriptors" => "6.3", # "dependent_var_availability" => "6.4", "other_info" => "6.5", "preprocessing" => "6.6", "goodness_of_fit" => "6.7", # "loo" => "6.8", - puts Lib::Crossvalidation.find(:all, :conditions => {:algorithm_uri => model.algorithm}).inspect if model.algorithm - #exit + + val_datasets = [] + + if model.algorithm + cvs = Lib::Crossvalidation.find(:all, :conditions => {:algorithm_uri => model.algorithm}) + cvs = [] unless cvs + lmo = [ "found "+cvs.size.to_s+" crossvalidation/s for algorithm '"+model.algorithm ] + lmo << "" + cvs.each do |cv| + lmo << "crossvalidation: "+cv.crossvalidation_uri + lmo << "dataset (see 9.3 Validation data): "+cv.dataset_uri + val_datasets << cv.dataset_uri + lmo << "num-folds: "+cv.num_folds.to_s + val = YAML.load( OpenTox::RestClientWrapper.get File.join(cv.crossvalidation_uri,"statistics") ) + if classification + lmo << "percent_correct: "+val[:classification_statistics][:percent_correct].to_s + lmo << "weighted AUC: "+val[:classification_statistics][:weighted_area_under_roc].to_s + else + lmo << "root_mean_squared_error: "+val[:regression_statistics][:root_mean_squared_error].to_s + lmo << "r_square "+val[:regression_statistics][:r_square].to_s + end + reports = OpenTox::RestClientWrapper.get File.join(CONFIG[:services]["opentox-validation"],"report/crossvalidation?crossvalidation_uris="+cv.crossvalidation_uri) + if reports and reports.size>0 + lmo << "for more info see report: "+reports + else + lmo << "for more info see report: not yet created for '"+cv.crossvalidation_uri+"'" + end + lmo << "" + end + else + lmo = [ "no prediction algortihm for model found, crossvalidation not possible" ] + end + r.qsar_robustness.lmo = lmo.to_html # "lmo" => "6.9", "yscrambling" => "6.10", "bootstrap" => "6.11", "other_statistics" => "6.12", + vals = Lib::Validation.find(:all, :conditions => {:model_uri => model.uri}) + if vals and vals.size > 0 + + r.qsar_predictivity = QsarPredictivity.new + r.qsar_predictivity.validation_set_availability = "Yes" + r.qsar_predictivity.validation_set_data = ValidationSetData.new(:chemname => "Yes", :cas => "Yes", + :smiles => "Yes", :inchi => "Yes", :mol => "Yes", :formula => "Yes") + + v = [ "found '"+vals.size.to_s+"' test-set validations of model '"+model.uri+"'" ] + v << "" + vals.each do |validation| + v << "validation: "+validation.validation_uri + v << "dataset (see 9.3 Validation data): "+validation.test_dataset_uri + val_datasets << validation.test_dataset_uri + if classification + v << "percent_correct: "+validation.classification_statistics[:percent_correct].to_s + v << "weighted AUC: "+validation.classification_statistics[:weighted_area_under_roc].to_s + else + v << "root_mean_squared_error: "+validation.regression_statistics[:root_mean_squared_error].to_s + v << "r_square "+validation.regression_statistics[:r_square].to_s + end + reports = OpenTox::RestClientWrapper.get File.join(CONFIG[:services]["opentox-validation"],"report/validation?validation_uris="+validation.validation_uri) + if reports and reports.size>0 + v << "for more info see report: "+reports + else + v << "for more info see report: not yet created for '"+validation.validation_uri+"'" + end + v << "" + end + else + v = [ "no validation of model '"+model.uri+" found" ] + end + r.qsar_predictivity.validation_predictivity = v.to_html + # chapter 7 # "validation_set_availability" => "7.1", "validation_set_data" => "7.2", "validation_set_descriptors" => "7.3", # "validation_dependent_var_availability" => "7.4", "validation_other_info" => "7.5", "experimental_design" => "7.6", @@ -112,15 +194,25 @@ module ReachReports # chapter 9 # "comments" => "9.1", "bibliography" => "9.2", "attachments" => "9.3", + r.qsar_miscellaneous = QsarMiscellaneous.new + r.qsar_miscellaneous.attachment_training_data << AttachmentTrainingData.new( - { :description => dataset.title, + { :description => training_dataset.title, :filetype => "owl-dl", - :url => dataset.uri} ) if dataset + :url => model.trainingDataset} ) if training_dataset + + val_datasets.each do |data_uri| + d = OpenTox::Dataset.find(data_uri+"/metadata") + r.qsar_miscellaneous.attachment_validation_data << AttachmentValidationData.new( + { :description => d.title, + :filetype => "owl-dl", + :url => data_uri} ) + end - - r.save + + end # def self.get_report_content(type, id, *keys) diff --git a/reach_reports/reach_test.rb b/reach_reports/reach_test.rb index 152b355..2f07302 100644 --- a/reach_reports/reach_test.rb +++ b/reach_reports/reach_test.rb @@ -11,6 +11,21 @@ LOGGER = MyLogger.new(STDOUT) LOGGER.datetime_format = "%Y-%m-%d %H:%M:%S " LOGGER.formatter = Logger::Formatter.new +#Rack::Test::DEFAULT_HOST = "localhost/validation" +module Sinatra + module UrlForHelper + BASE = "http://localhost/validation" + def url_for url_fragment, mode=:path_only + case mode + when :path_only + raise "not impl" + when :full + end + "#{BASE}#{url_fragment}" + end + end +end + #DataMapper::Model.raise_on_save_failure = true # #class TestResourceX @@ -96,19 +111,20 @@ class ReachTest < Test::Unit::TestCase # #puts "data found "+data.to_s[0..1000] # puts OpenTox::RestClientWrapper.post("http://localhost/validation/reach_report/qmrf/20",{:content_type => "application/qmrf-xml"},data).to_s.chomp - post "/reach_report/qmrf/8" - puts last_response.body - +# post "/reach_report/qmrf/8" +# puts last_response.body -# #model_uri = "http://localhost/model/1" -# model_uri = "http://apps.ideaconsult.net:8080/ambit2/model/2" -# post '/reach_report/qmrf',:model_uri=>model_uri #http://localhost/model/1" -# ##post '/reach_report/qprf',:compound_uri=>"http://localhost/compound/XYZ" -# uri = last_response.body -# id = uri.split("/")[-1] -# puts uri + model_uri = "http://localhost/model/1" + #model_uri = "http://localhost/majority/regr/model/12" + #model_uri = "http://localhost/majority/class/model/1" + #model_uri = "http://apps.ideaconsult.net:8080/ambit2/model/2" + post '/reach_report/qmrf',:model_uri=>model_uri #http://localhost/model/1" + ##post '/reach_report/qprf',:compound_uri=>"http://localhost/compound/XYZ" + uri = last_response.body + id = uri.split("/")[-1] + puts uri - id = "8" +# id = "8" #get '/reach_report/qmrf' #puts last_response.body @@ -128,6 +144,9 @@ class ReachTest < Test::Unit::TestCase #r = ReachReports::QmrfReport.find_like( :QSAR_title => "Hamster") #puts r.collect{|rr| "report with id:"+rr.id.to_s}.inspect + + File.new("/home/martin/win/home/qmr_rep_del_me.xml","w").puts last_response.body + File.new("/home/martin/info_home/.public_html/qmr_rep_del_me.xml","w").puts last_response.body end end diff --git a/report/report_service.rb b/report/report_service.rb index 04a2d2e..9aaa2e0 100644 --- a/report/report_service.rb +++ b/report/report_service.rb @@ -30,7 +30,7 @@ module Reports # def get_all_reports(type, filter_params) - LOGGER.info "get all reports of type '"+type.to_s+"'" + LOGGER.info "get all reports of type '"+type.to_s+"', filter_params: '"+filter_params.inspect+"'" check_report_type(type) @persistance.list_reports(type, filter_params).collect{ |id| get_uri(type,id) }.join("\n") end diff --git a/test/test_examples.rb b/test/test_examples.rb index d7edba0..8927fb1 100644 --- a/test/test_examples.rb +++ b/test/test_examples.rb @@ -81,6 +81,30 @@ module ValidationExamples end ######################################################################################################## + + class HamsterBootstrapping < BootstrappingValidation + def initialize + @dataset_file = File.new("data/hamster_carcinogenicity.yaml","r") + @prediction_feature = "http://localhost/toxmodel/feature#Hamster%20Carcinogenicity%20(DSSTOX/CPDB)" + end + end + + class LazarHamsterBootstrapping < HamsterBootstrapping + def initialize + @algorithm_uri = File.join(@@config[:services]["opentox-algorithm"],"lazar") + @algorithm_params = "feature_generation_uri="+File.join(@@config[:services]["opentox-algorithm"],"fminer") + super + end + end + + class MajorityHamsterBootstrapping < HamsterBootstrapping + def initialize + @algorithm_uri = File.join(@@config[:services]["opentox-majority"],"/class/algorithm") + super + end + end + + ######################################################################################################## class HamsterTrainingTest < TrainingTestValidation def initialize @@ -303,6 +327,10 @@ module ValidationExamples "10a" => [ ISTLazarISTEpaCrossvalidation ], "11b" => [ MajorityISTRatLiverCrossvalidation ], + + "12" => [ LazarHamsterBootstrapping, MajorityHamsterBootstrapping ], + "12a" => [ LazarHamsterBootstrapping ], + "12b" => [ MajorityHamsterBootstrapping ], } def self.list diff --git a/test/test_examples_util.rb b/test/test_examples_util.rb index b18607d..c5a9900 100644 --- a/test/test_examples_util.rb +++ b/test/test_examples_util.rb @@ -40,7 +40,7 @@ module ValidationExamples $test_case.post uri,params return wait($test_case.last_response.body) else - return OpenTox::RestClientWrapper.post(File.join(@@config[:services]["opentox-validation"],uri),params) + return OpenTox::RestClientWrapper.post(File.join(@@config[:services]["opentox-validation"],uri),params).to_s end end @@ -323,6 +323,20 @@ module ValidationExamples end end + class BootstrappingValidation < ValidationExample + def params + [:algorithm_uri, :dataset_uri, :prediction_feature] + end + + def opt_params + [ :algorithm_params, :random_seed ] + end + + def validation_type + "bootstrapping" + end + end + class CrossValidation < ValidationExample def params [:algorithm_uri, :dataset_uri, :prediction_feature] diff --git a/validation/validation_application.rb b/validation/validation_application.rb index 7ba968f..a79de22 100644 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -121,10 +121,17 @@ post '/crossvalidation/?' do halt 202,task_uri end + get '/training_test_split' do halt 400, "GET operation not supported, use POST to perform a training_test_split, see "+url_for("/", :full)+" for validation results" end + +get '/bootstrapping' do + halt 400, "GET operation not supported, use POST to perform a bootstrapping, see "+url_for("/", :full)+" for validation results" +end + + get '/?' do LOGGER.info "list all validations, params: "+params.inspect content_type "text/uri-list" @@ -203,6 +210,28 @@ post '/training_test_split' do end +post '/bootstrapping' do + content_type "text/uri-list" + task_uri = OpenTox::Task.as_task( "Perform bootstrapping validation", url_for("/bootstrapping", :full) ) do + LOGGER.info "performing bootstrapping validation "+params.inspect + halt 400, "dataset_uri missing" unless params[:dataset_uri] + halt 400, "algorithm_uri missing" unless params[:algorithm_uri] + halt 400, "prediction_feature missing" unless params[:prediction_feature] + + params.merge!(Validation::Util.bootstrapping(params[:dataset_uri], params[:prediction_feature], params[:random_seed])) + v = Validation::Validation.new :training_dataset_uri => params[:training_dataset_uri], + :test_dataset_uri => params[:test_dataset_uri], + :test_target_dataset_uri => params[:dataset_uri], + :prediction_feature => params[:prediction_feature], + :algorithm_uri => params[:algorithm_uri] + v.validate_algorithm( params[:algorithm_params]) + content_type "text/uri-list" + v.validation_uri + end + halt 202,task_uri +end + + post '/plain_training_test_split' do LOGGER.info "creating pure training test split "+params.inspect halt 400, "dataset_uri missing" unless params[:dataset_uri] diff --git a/validation/validation_service.rb b/validation/validation_service.rb index ce25ee9..67fdbee 100644 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -332,6 +332,70 @@ module Validation module Util + # splits a dataset into test and training dataset via bootstrapping + # (training dataset-size is n, sampling from orig dataset with replacement) + # returns map with training_dataset_uri and test_dataset_uri + def self.bootstrapping( orig_dataset_uri, prediction_feature, random_seed=nil ) + + random_seed=1 unless random_seed + + orig_dataset = OpenTox::Dataset.find orig_dataset_uri + $sinatra.halt 400, "Dataset not found: "+orig_dataset_uri.to_s unless orig_dataset + if prediction_feature + $sinatra.halt 400, "Prediction feature '"+prediction_feature.to_s+ + "' not found in dataset, features are: \n"+ + orig_dataset.features.inspect unless orig_dataset.features.include?(prediction_feature) + else + LOGGER.warn "no prediciton feature given, all features included in test dataset" + end + + compounds = orig_dataset.compounds + $sinatra.halt 400, "Cannot split datset, num compounds in dataset < 2 ("+compounds.size.to_s+")" if compounds.size<2 + + srand random_seed.to_i + while true + training_compounds = [] + compounds.size.times do + training_compounds << compounds[rand(compounds.size)] + end + test_compounds = [] + compounds.each do |c| + test_compounds << c unless training_compounds.include?(c) + end + if test_compounds.size > 0 + break + else + srand rand(10000) + end + end + + LOGGER.debug "bootstrapping on dataset "+orig_dataset_uri+ + " into training ("+training_compounds.size.to_s+") and test ("+test_compounds.size.to_s+")"+ + ", duplicates in training dataset: "+test_compounds.size.to_s + + result = {} + result[:training_dataset_uri] = orig_dataset.create_new_dataset( training_compounds, + orig_dataset.features, + "Bootstrapping training dataset of "+orig_dataset.title.to_s, + $sinatra.url_for('/bootstrapping',:full) ) + result[:test_dataset_uri] = orig_dataset.create_new_dataset( test_compounds, + orig_dataset.features.dclone - [prediction_feature], + "Bootstrapping test dataset of "+orig_dataset.title.to_s, + $sinatra.url_for('/bootstrapping',:full) ) + + if ENV['RACK_ENV'] =~ /test|debug/ + training_dataset = OpenTox::Dataset.find result[:training_dataset_uri] + $sinatra.halt 400, "Training dataset not found: '"+result[:training_dataset_uri].to_s+"'" unless training_dataset + training_compounds_verify = training_dataset.compounds + $sinatra.halt 500, "training compounds error" unless training_compounds_verify==training_compounds + $sinatra.halt 400, "Test dataset not found: '"+result[:test_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:test_dataset_uri] + end + + LOGGER.debug "bootstrapping done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'" + + return result + end + # splits a dataset into test and training dataset # returns map with training_dataset_uri and test_dataset_uri def self.train_test_dataset_split( orig_dataset_uri, prediction_feature, split_ratio=nil, random_seed=nil ) diff --git a/validation/validation_test.rb b/validation/validation_test.rb index d96c012..9b5a11f 100644 --- a/validation/validation_test.rb +++ b/validation/validation_test.rb @@ -11,6 +11,21 @@ LOGGER = MyLogger.new(STDOUT) LOGGER.datetime_format = "%Y-%m-%d %H:%M:%S " LOGGER.formatter = Logger::Formatter.new +#Rack::Test::DEFAULT_HOST = "localhost" #"/validation" +module Sinatra + module UrlForHelper + BASE = "http://localhost/validation" + def url_for url_fragment, mode=:path_only + case mode + when :path_only + raise "not impl" + when :full + end + "#{BASE}#{url_fragment}" + end + end +end + class ValidationTest < Test::Unit::TestCase include Rack::Test::Methods @@ -37,17 +52,19 @@ class ValidationTest < Test::Unit::TestCase # #:classification=>"true"} # puts last_response.body - #delete "/7" + #get "/" + #puts last_response.body #get "/crossvalidation/4/statistics" # post "",:model_uri=>"http://localhost/model/1",:test_dataset_uri=>"http://localhost/dataset/3", # :test_target_dataset_uri=>"http://localhost/dataset/1" - # get "/1",nil,'HTTP_ACCEPT' => "application/rdf+xml" - # puts last_response.body +# get "/crossvalidation/2",nil,'HTTP_ACCEPT' => "application/rdf+xml" +# puts last_response.body + #get "/crossvalidation?model_uri=lazar" # post "/test_validation",:select=>"6d" #,:report=>"yes,please" -# puts last_response.body + #puts last_response.body # post "/validate_datasets",{ # :test_dataset_uri=>"http://localhost/dataset/204", @@ -59,7 +76,9 @@ class ValidationTest < Test::Unit::TestCase # #:classification=>"true"} # puts last_response.body - #run_test("1a") #, "http://localhost/validation/crossvalidation/5" )# //localhost/validation/42")#, "http://localhost/validation/report/validation/8") #,"http://localhost/validation/report/validation/36") #, "http://localhost/validation/321") + #run_test("12b"); #,"http://localhost/validation/crossvalidation/2"); + + #run_test("11b", "http://localhost/validation/crossvalidation/2" )# //localhost/validation/42")#, "http://localhost/validation/report/validation/8") #,"http://localhost/validation/report/validation/36") #, "http://localhost/validation/321") # run_test("7a","http://localhost/validation/40") #,"http://localhost/validation/crossvalidation/10") #, "http://localhost/validation/321") @@ -68,7 +87,7 @@ class ValidationTest < Test::Unit::TestCase #puts Nightly.build_nightly("1") #prepare_examples - #do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE + do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE end def app |