summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2010-09-07 09:25:39 +0200
committermguetlein <martin.guetlein@gmail.com>2010-09-07 09:25:39 +0200
commit7d69d09e79882cb826dacac2b6474fade13e8690 (patch)
treec1acc2b07627bbb787a1e4d24a425c90283277aa
parent9ec38f1ebaffa51cac69e5e1a09c988556a690a1 (diff)
add validation to qmrf-report, bootstrapping
-rw-r--r--EXAMPLES21
-rw-r--r--lib/active_record_setup.rb2
-rw-r--r--reach_reports/reach_service.rb120
-rw-r--r--reach_reports/reach_test.rb41
-rw-r--r--report/report_service.rb2
-rw-r--r--test/test_examples.rb28
-rw-r--r--test/test_examples_util.rb16
-rw-r--r--validation/validation_application.rb29
-rw-r--r--validation/validation_service.rb64
-rw-r--r--validation/validation_test.rb31
10 files changed, 320 insertions, 34 deletions
diff --git a/EXAMPLES b/EXAMPLES
index 944688f..3653ed5 100644
--- a/EXAMPLES
+++ b/EXAMPLES
@@ -116,6 +116,27 @@ result example (accept-header: text/uri-list)
<<< <validation_service>/id_i
+validate an algorithm on a dataset via bootstrapping
+-------------------------------------------------------------------
+
+>>> curl -X POST -d algorithm_uri="<algorithm_uri>" \
+ -d dataset_uri="<dataset_uri>" \
+ -d prediction_feature="<prediction_feature>" \
+ -d algorithm_params="<algorithm_params>" \
+ -d random_seed=2 \
+ <validation_service>/bootstrapping
+
+optional params:
+algorithm_params, default is empty
+random_seed, default is 1
+
+result example (accept-header: application/rdf-xml)
+<<< not yet supported
+
+result example (accept-header: text/uri-list)
+<<< <validation_service>/id_i
+
+
get list of available crossvalidations
-------------------------------------------------------------------
diff --git a/lib/active_record_setup.rb b/lib/active_record_setup.rb
index 85690b6..cea4fb2 100644
--- a/lib/active_record_setup.rb
+++ b/lib/active_record_setup.rb
@@ -28,7 +28,7 @@ class ActiveRecord::Base
key = key+"_uri"
unless self.column_names.include?(key)
key = key+"s"
- raise "no attribute found: '"+k.to_s+"'" unless self.column_names.include?(key)
+ $sinatra.halt 400,"no attribute found: '"+k.to_s+"'" unless self.column_names.include?(key)
end
end
end
diff --git a/reach_reports/reach_service.rb b/reach_reports/reach_service.rb
index f7b899c..a742c49 100644
--- a/reach_reports/reach_service.rb
+++ b/reach_reports/reach_service.rb
@@ -1,3 +1,15 @@
+
+class Array
+
+ def to_html
+ return "" unless size>0
+ s = "<html>\n<head>\n</head>\n<body>\n"
+ s += join(" <br>\n")
+ s += "</body>\n</html>\n"
+ return s
+ end
+end
+
module ReachReports
def self.list_reports(type)
@@ -45,17 +57,17 @@ module ReachReports
#halt 202,task_uri
end
- def self.build_qmrf_report(r)
- model = OpenTox::Model::Generic.find(r.model_uri)
+
+ def self.build_qmrf_report(r)
+
+ model = OpenTox::Model::PredictionModel.find(r.model_uri)
+ classification = model.classification?
# chapter 1
- #r.QSAR_title = model.title
r.qsar_identifier = QsarIdentifier.new
r.qsar_identifier.qsar_title = model.title
-
- # TODO
- # QSAR_models -> sparql same endpoint
+ # TODO QSAR_models -> sparql same endpoint
r.qsar_identifier.qsar_software << QsarSoftware.new( :url => model.uri, :name => model.title, :contact => model.creator )
algorithm = OpenTox::Algorithm::Generic.find(model.algorithm) if model.algorithm
r.qsar_identifier.qsar_software << QsarSoftware.new( :url => algorithm.uri, :name => algorithm.title )
@@ -89,19 +101,89 @@ module ReachReports
# chapter 5
# TODO app_domain_description, app_domain_method, app_domain_software, applicability_limits
- dataset = model.trainingDataset ? OpenTox::Dataset.find(model.trainingDataset) : nil
+ training_dataset = model.trainingDataset ? OpenTox::Dataset.find(model.trainingDataset+"/metadata") : nil
# chapter 6
r.qsar_robustness = QsarRobustness.new
- r.qsar_robustness.training_set_availability = dataset ? "Yes" : "No"
+ if training_dataset
+ r.qsar_robustness.training_set_availability = "Yes"
+ r.qsar_robustness.training_set_data = TrainingSetData.new(:chemname => "Yes", :cas => "Yes",
+ :smiles => "Yes", :inchi => "Yes", :mol => "Yes", :formula => "Yes")
+ end
+
#TODO "training_set_data" => "6.2",
# "training_set_descriptors" => "6.3",
# "dependent_var_availability" => "6.4", "other_info" => "6.5", "preprocessing" => "6.6", "goodness_of_fit" => "6.7",
# "loo" => "6.8",
- puts Lib::Crossvalidation.find(:all, :conditions => {:algorithm_uri => model.algorithm}).inspect if model.algorithm
- #exit
+
+ val_datasets = []
+
+ if model.algorithm
+ cvs = Lib::Crossvalidation.find(:all, :conditions => {:algorithm_uri => model.algorithm})
+ cvs = [] unless cvs
+ lmo = [ "found "+cvs.size.to_s+" crossvalidation/s for algorithm '"+model.algorithm ]
+ lmo << ""
+ cvs.each do |cv|
+ lmo << "crossvalidation: "+cv.crossvalidation_uri
+ lmo << "dataset (see 9.3 Validation data): "+cv.dataset_uri
+ val_datasets << cv.dataset_uri
+ lmo << "num-folds: "+cv.num_folds.to_s
+ val = YAML.load( OpenTox::RestClientWrapper.get File.join(cv.crossvalidation_uri,"statistics") )
+ if classification
+ lmo << "percent_correct: "+val[:classification_statistics][:percent_correct].to_s
+ lmo << "weighted AUC: "+val[:classification_statistics][:weighted_area_under_roc].to_s
+ else
+ lmo << "root_mean_squared_error: "+val[:regression_statistics][:root_mean_squared_error].to_s
+ lmo << "r_square "+val[:regression_statistics][:r_square].to_s
+ end
+ reports = OpenTox::RestClientWrapper.get File.join(CONFIG[:services]["opentox-validation"],"report/crossvalidation?crossvalidation_uris="+cv.crossvalidation_uri)
+ if reports and reports.size>0
+ lmo << "for more info see report: "+reports
+ else
+ lmo << "for more info see report: not yet created for '"+cv.crossvalidation_uri+"'"
+ end
+ lmo << ""
+ end
+ else
+ lmo = [ "no prediction algortihm for model found, crossvalidation not possible" ]
+ end
+ r.qsar_robustness.lmo = lmo.to_html
# "lmo" => "6.9", "yscrambling" => "6.10", "bootstrap" => "6.11", "other_statistics" => "6.12",
+ vals = Lib::Validation.find(:all, :conditions => {:model_uri => model.uri})
+ if vals and vals.size > 0
+
+ r.qsar_predictivity = QsarPredictivity.new
+ r.qsar_predictivity.validation_set_availability = "Yes"
+ r.qsar_predictivity.validation_set_data = ValidationSetData.new(:chemname => "Yes", :cas => "Yes",
+ :smiles => "Yes", :inchi => "Yes", :mol => "Yes", :formula => "Yes")
+
+ v = [ "found '"+vals.size.to_s+"' test-set validations of model '"+model.uri+"'" ]
+ v << ""
+ vals.each do |validation|
+ v << "validation: "+validation.validation_uri
+ v << "dataset (see 9.3 Validation data): "+validation.test_dataset_uri
+ val_datasets << validation.test_dataset_uri
+ if classification
+ v << "percent_correct: "+validation.classification_statistics[:percent_correct].to_s
+ v << "weighted AUC: "+validation.classification_statistics[:weighted_area_under_roc].to_s
+ else
+ v << "root_mean_squared_error: "+validation.regression_statistics[:root_mean_squared_error].to_s
+ v << "r_square "+validation.regression_statistics[:r_square].to_s
+ end
+ reports = OpenTox::RestClientWrapper.get File.join(CONFIG[:services]["opentox-validation"],"report/validation?validation_uris="+validation.validation_uri)
+ if reports and reports.size>0
+ v << "for more info see report: "+reports
+ else
+ v << "for more info see report: not yet created for '"+validation.validation_uri+"'"
+ end
+ v << ""
+ end
+ else
+ v = [ "no validation of model '"+model.uri+" found" ]
+ end
+ r.qsar_predictivity.validation_predictivity = v.to_html
+
# chapter 7
# "validation_set_availability" => "7.1", "validation_set_data" => "7.2", "validation_set_descriptors" => "7.3",
# "validation_dependent_var_availability" => "7.4", "validation_other_info" => "7.5", "experimental_design" => "7.6",
@@ -112,15 +194,25 @@ module ReachReports
# chapter 9
# "comments" => "9.1", "bibliography" => "9.2", "attachments" => "9.3",
+
r.qsar_miscellaneous = QsarMiscellaneous.new
+
r.qsar_miscellaneous.attachment_training_data << AttachmentTrainingData.new(
- { :description => dataset.title,
+ { :description => training_dataset.title,
:filetype => "owl-dl",
- :url => dataset.uri} ) if dataset
+ :url => model.trainingDataset} ) if training_dataset
+
+ val_datasets.each do |data_uri|
+ d = OpenTox::Dataset.find(data_uri+"/metadata")
+ r.qsar_miscellaneous.attachment_validation_data << AttachmentValidationData.new(
+ { :description => d.title,
+ :filetype => "owl-dl",
+ :url => data_uri} )
+ end
-
-
r.save
+
+
end
# def self.get_report_content(type, id, *keys)
diff --git a/reach_reports/reach_test.rb b/reach_reports/reach_test.rb
index 152b355..2f07302 100644
--- a/reach_reports/reach_test.rb
+++ b/reach_reports/reach_test.rb
@@ -11,6 +11,21 @@ LOGGER = MyLogger.new(STDOUT)
LOGGER.datetime_format = "%Y-%m-%d %H:%M:%S "
LOGGER.formatter = Logger::Formatter.new
+#Rack::Test::DEFAULT_HOST = "localhost/validation"
+module Sinatra
+ module UrlForHelper
+ BASE = "http://localhost/validation"
+ def url_for url_fragment, mode=:path_only
+ case mode
+ when :path_only
+ raise "not impl"
+ when :full
+ end
+ "#{BASE}#{url_fragment}"
+ end
+ end
+end
+
#DataMapper::Model.raise_on_save_failure = true
#
#class TestResourceX
@@ -96,19 +111,20 @@ class ReachTest < Test::Unit::TestCase
# #puts "data found "+data.to_s[0..1000]
# puts OpenTox::RestClientWrapper.post("http://localhost/validation/reach_report/qmrf/20",{:content_type => "application/qmrf-xml"},data).to_s.chomp
- post "/reach_report/qmrf/8"
- puts last_response.body
-
+# post "/reach_report/qmrf/8"
+# puts last_response.body
-# #model_uri = "http://localhost/model/1"
-# model_uri = "http://apps.ideaconsult.net:8080/ambit2/model/2"
-# post '/reach_report/qmrf',:model_uri=>model_uri #http://localhost/model/1"
-# ##post '/reach_report/qprf',:compound_uri=>"http://localhost/compound/XYZ"
-# uri = last_response.body
-# id = uri.split("/")[-1]
-# puts uri
+ model_uri = "http://localhost/model/1"
+ #model_uri = "http://localhost/majority/regr/model/12"
+ #model_uri = "http://localhost/majority/class/model/1"
+ #model_uri = "http://apps.ideaconsult.net:8080/ambit2/model/2"
+ post '/reach_report/qmrf',:model_uri=>model_uri #http://localhost/model/1"
+ ##post '/reach_report/qprf',:compound_uri=>"http://localhost/compound/XYZ"
+ uri = last_response.body
+ id = uri.split("/")[-1]
+ puts uri
- id = "8"
+# id = "8"
#get '/reach_report/qmrf'
#puts last_response.body
@@ -128,6 +144,9 @@ class ReachTest < Test::Unit::TestCase
#r = ReachReports::QmrfReport.find_like( :QSAR_title => "Hamster")
#puts r.collect{|rr| "report with id:"+rr.id.to_s}.inspect
+
+ File.new("/home/martin/win/home/qmr_rep_del_me.xml","w").puts last_response.body
+ File.new("/home/martin/info_home/.public_html/qmr_rep_del_me.xml","w").puts last_response.body
end
end
diff --git a/report/report_service.rb b/report/report_service.rb
index 04a2d2e..9aaa2e0 100644
--- a/report/report_service.rb
+++ b/report/report_service.rb
@@ -30,7 +30,7 @@ module Reports
#
def get_all_reports(type, filter_params)
- LOGGER.info "get all reports of type '"+type.to_s+"'"
+ LOGGER.info "get all reports of type '"+type.to_s+"', filter_params: '"+filter_params.inspect+"'"
check_report_type(type)
@persistance.list_reports(type, filter_params).collect{ |id| get_uri(type,id) }.join("\n")
end
diff --git a/test/test_examples.rb b/test/test_examples.rb
index d7edba0..8927fb1 100644
--- a/test/test_examples.rb
+++ b/test/test_examples.rb
@@ -81,6 +81,30 @@ module ValidationExamples
end
########################################################################################################
+
+ class HamsterBootstrapping < BootstrappingValidation
+ def initialize
+ @dataset_file = File.new("data/hamster_carcinogenicity.yaml","r")
+ @prediction_feature = "http://localhost/toxmodel/feature#Hamster%20Carcinogenicity%20(DSSTOX/CPDB)"
+ end
+ end
+
+ class LazarHamsterBootstrapping < HamsterBootstrapping
+ def initialize
+ @algorithm_uri = File.join(@@config[:services]["opentox-algorithm"],"lazar")
+ @algorithm_params = "feature_generation_uri="+File.join(@@config[:services]["opentox-algorithm"],"fminer")
+ super
+ end
+ end
+
+ class MajorityHamsterBootstrapping < HamsterBootstrapping
+ def initialize
+ @algorithm_uri = File.join(@@config[:services]["opentox-majority"],"/class/algorithm")
+ super
+ end
+ end
+
+ ########################################################################################################
class HamsterTrainingTest < TrainingTestValidation
def initialize
@@ -303,6 +327,10 @@ module ValidationExamples
"10a" => [ ISTLazarISTEpaCrossvalidation ],
"11b" => [ MajorityISTRatLiverCrossvalidation ],
+
+ "12" => [ LazarHamsterBootstrapping, MajorityHamsterBootstrapping ],
+ "12a" => [ LazarHamsterBootstrapping ],
+ "12b" => [ MajorityHamsterBootstrapping ],
}
def self.list
diff --git a/test/test_examples_util.rb b/test/test_examples_util.rb
index b18607d..c5a9900 100644
--- a/test/test_examples_util.rb
+++ b/test/test_examples_util.rb
@@ -40,7 +40,7 @@ module ValidationExamples
$test_case.post uri,params
return wait($test_case.last_response.body)
else
- return OpenTox::RestClientWrapper.post(File.join(@@config[:services]["opentox-validation"],uri),params)
+ return OpenTox::RestClientWrapper.post(File.join(@@config[:services]["opentox-validation"],uri),params).to_s
end
end
@@ -323,6 +323,20 @@ module ValidationExamples
end
end
+ class BootstrappingValidation < ValidationExample
+ def params
+ [:algorithm_uri, :dataset_uri, :prediction_feature]
+ end
+
+ def opt_params
+ [ :algorithm_params, :random_seed ]
+ end
+
+ def validation_type
+ "bootstrapping"
+ end
+ end
+
class CrossValidation < ValidationExample
def params
[:algorithm_uri, :dataset_uri, :prediction_feature]
diff --git a/validation/validation_application.rb b/validation/validation_application.rb
index 7ba968f..a79de22 100644
--- a/validation/validation_application.rb
+++ b/validation/validation_application.rb
@@ -121,10 +121,17 @@ post '/crossvalidation/?' do
halt 202,task_uri
end
+
get '/training_test_split' do
halt 400, "GET operation not supported, use POST to perform a training_test_split, see "+url_for("/", :full)+" for validation results"
end
+
+get '/bootstrapping' do
+ halt 400, "GET operation not supported, use POST to perform a bootstrapping, see "+url_for("/", :full)+" for validation results"
+end
+
+
get '/?' do
LOGGER.info "list all validations, params: "+params.inspect
content_type "text/uri-list"
@@ -203,6 +210,28 @@ post '/training_test_split' do
end
+post '/bootstrapping' do
+ content_type "text/uri-list"
+ task_uri = OpenTox::Task.as_task( "Perform bootstrapping validation", url_for("/bootstrapping", :full) ) do
+ LOGGER.info "performing bootstrapping validation "+params.inspect
+ halt 400, "dataset_uri missing" unless params[:dataset_uri]
+ halt 400, "algorithm_uri missing" unless params[:algorithm_uri]
+ halt 400, "prediction_feature missing" unless params[:prediction_feature]
+
+ params.merge!(Validation::Util.bootstrapping(params[:dataset_uri], params[:prediction_feature], params[:random_seed]))
+ v = Validation::Validation.new :training_dataset_uri => params[:training_dataset_uri],
+ :test_dataset_uri => params[:test_dataset_uri],
+ :test_target_dataset_uri => params[:dataset_uri],
+ :prediction_feature => params[:prediction_feature],
+ :algorithm_uri => params[:algorithm_uri]
+ v.validate_algorithm( params[:algorithm_params])
+ content_type "text/uri-list"
+ v.validation_uri
+ end
+ halt 202,task_uri
+end
+
+
post '/plain_training_test_split' do
LOGGER.info "creating pure training test split "+params.inspect
halt 400, "dataset_uri missing" unless params[:dataset_uri]
diff --git a/validation/validation_service.rb b/validation/validation_service.rb
index ce25ee9..67fdbee 100644
--- a/validation/validation_service.rb
+++ b/validation/validation_service.rb
@@ -332,6 +332,70 @@ module Validation
module Util
+ # splits a dataset into test and training dataset via bootstrapping
+ # (training dataset-size is n, sampling from orig dataset with replacement)
+ # returns map with training_dataset_uri and test_dataset_uri
+ def self.bootstrapping( orig_dataset_uri, prediction_feature, random_seed=nil )
+
+ random_seed=1 unless random_seed
+
+ orig_dataset = OpenTox::Dataset.find orig_dataset_uri
+ $sinatra.halt 400, "Dataset not found: "+orig_dataset_uri.to_s unless orig_dataset
+ if prediction_feature
+ $sinatra.halt 400, "Prediction feature '"+prediction_feature.to_s+
+ "' not found in dataset, features are: \n"+
+ orig_dataset.features.inspect unless orig_dataset.features.include?(prediction_feature)
+ else
+ LOGGER.warn "no prediciton feature given, all features included in test dataset"
+ end
+
+ compounds = orig_dataset.compounds
+ $sinatra.halt 400, "Cannot split datset, num compounds in dataset < 2 ("+compounds.size.to_s+")" if compounds.size<2
+
+ srand random_seed.to_i
+ while true
+ training_compounds = []
+ compounds.size.times do
+ training_compounds << compounds[rand(compounds.size)]
+ end
+ test_compounds = []
+ compounds.each do |c|
+ test_compounds << c unless training_compounds.include?(c)
+ end
+ if test_compounds.size > 0
+ break
+ else
+ srand rand(10000)
+ end
+ end
+
+ LOGGER.debug "bootstrapping on dataset "+orig_dataset_uri+
+ " into training ("+training_compounds.size.to_s+") and test ("+test_compounds.size.to_s+")"+
+ ", duplicates in training dataset: "+test_compounds.size.to_s
+
+ result = {}
+ result[:training_dataset_uri] = orig_dataset.create_new_dataset( training_compounds,
+ orig_dataset.features,
+ "Bootstrapping training dataset of "+orig_dataset.title.to_s,
+ $sinatra.url_for('/bootstrapping',:full) )
+ result[:test_dataset_uri] = orig_dataset.create_new_dataset( test_compounds,
+ orig_dataset.features.dclone - [prediction_feature],
+ "Bootstrapping test dataset of "+orig_dataset.title.to_s,
+ $sinatra.url_for('/bootstrapping',:full) )
+
+ if ENV['RACK_ENV'] =~ /test|debug/
+ training_dataset = OpenTox::Dataset.find result[:training_dataset_uri]
+ $sinatra.halt 400, "Training dataset not found: '"+result[:training_dataset_uri].to_s+"'" unless training_dataset
+ training_compounds_verify = training_dataset.compounds
+ $sinatra.halt 500, "training compounds error" unless training_compounds_verify==training_compounds
+ $sinatra.halt 400, "Test dataset not found: '"+result[:test_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:test_dataset_uri]
+ end
+
+ LOGGER.debug "bootstrapping done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'"
+
+ return result
+ end
+
# splits a dataset into test and training dataset
# returns map with training_dataset_uri and test_dataset_uri
def self.train_test_dataset_split( orig_dataset_uri, prediction_feature, split_ratio=nil, random_seed=nil )
diff --git a/validation/validation_test.rb b/validation/validation_test.rb
index d96c012..9b5a11f 100644
--- a/validation/validation_test.rb
+++ b/validation/validation_test.rb
@@ -11,6 +11,21 @@ LOGGER = MyLogger.new(STDOUT)
LOGGER.datetime_format = "%Y-%m-%d %H:%M:%S "
LOGGER.formatter = Logger::Formatter.new
+#Rack::Test::DEFAULT_HOST = "localhost" #"/validation"
+module Sinatra
+ module UrlForHelper
+ BASE = "http://localhost/validation"
+ def url_for url_fragment, mode=:path_only
+ case mode
+ when :path_only
+ raise "not impl"
+ when :full
+ end
+ "#{BASE}#{url_fragment}"
+ end
+ end
+end
+
class ValidationTest < Test::Unit::TestCase
include Rack::Test::Methods
@@ -37,17 +52,19 @@ class ValidationTest < Test::Unit::TestCase
# #:classification=>"true"}
# puts last_response.body
- #delete "/7"
+ #get "/"
+ #puts last_response.body
#get "/crossvalidation/4/statistics"
# post "",:model_uri=>"http://localhost/model/1",:test_dataset_uri=>"http://localhost/dataset/3",
# :test_target_dataset_uri=>"http://localhost/dataset/1"
- # get "/1",nil,'HTTP_ACCEPT' => "application/rdf+xml"
- # puts last_response.body
+# get "/crossvalidation/2",nil,'HTTP_ACCEPT' => "application/rdf+xml"
+# puts last_response.body
+ #get "/crossvalidation?model_uri=lazar"
# post "/test_validation",:select=>"6d" #,:report=>"yes,please"
-# puts last_response.body
+ #puts last_response.body
# post "/validate_datasets",{
# :test_dataset_uri=>"http://localhost/dataset/204",
@@ -59,7 +76,9 @@ class ValidationTest < Test::Unit::TestCase
# #:classification=>"true"}
# puts last_response.body
- #run_test("1a") #, "http://localhost/validation/crossvalidation/5" )# //localhost/validation/42")#, "http://localhost/validation/report/validation/8") #,"http://localhost/validation/report/validation/36") #, "http://localhost/validation/321")
+ #run_test("12b"); #,"http://localhost/validation/crossvalidation/2");
+
+ #run_test("11b", "http://localhost/validation/crossvalidation/2" )# //localhost/validation/42")#, "http://localhost/validation/report/validation/8") #,"http://localhost/validation/report/validation/36") #, "http://localhost/validation/321")
# run_test("7a","http://localhost/validation/40") #,"http://localhost/validation/crossvalidation/10") #, "http://localhost/validation/321")
@@ -68,7 +87,7 @@ class ValidationTest < Test::Unit::TestCase
#puts Nightly.build_nightly("1")
#prepare_examples
- #do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE
+ do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE
end
def app