diff options
author | Martin Gütlein <martin.guetlein@gmail.com> | 2010-01-14 14:08:14 +0100 |
---|---|---|
committer | Martin Gütlein <martin.guetlein@gmail.com> | 2010-01-14 14:08:14 +0100 |
commit | e65b7f04ce114affd6f1a3318c938f6a19fa1451 (patch) | |
tree | f39c29a0653b27c39fc53ba3bbe4f5caf43a96bb | |
parent | 7a27e607b3b997b85e7ea62c3ab3464ae84030cb (diff) |
some commenting, reordering, creating examples without restclient
-rw-r--r-- | application.rb | 4 | ||||
-rw-r--r-- | example.rb | 76 | ||||
-rw-r--r-- | lib/ot_predictions.rb | 2 | ||||
-rw-r--r-- | lib/predictions.rb | 13 | ||||
-rw-r--r-- | lib/validation_db.rb | 2 | ||||
-rw-r--r-- | lib/wrapper.rb | 7 | ||||
-rw-r--r-- | report/report_service.rb | 7 | ||||
-rw-r--r-- | test.rb | 4 | ||||
-rw-r--r-- | validation/validation_application.rb | 6 | ||||
-rw-r--r-- | validation/validation_format.rb | 225 | ||||
-rw-r--r-- | validation/validation_service.rb | 13 | ||||
-rw-r--r-- | validation/validation_test.rb | 8 |
12 files changed, 161 insertions, 206 deletions
diff --git a/application.rb b/application.rb index 55a7598..3625833 100644 --- a/application.rb +++ b/application.rb @@ -11,16 +11,19 @@ end require "example.rb" get '/examples/?' do + LOGGER.info "list examples" content_type "text/plain" Example.transform_example end get '/prepare_examples/?' do + LOGGER.info "prepare examples" content_type "text/plain" Example.prepare_example_resources end get '/test_examples/?' do + LOGGER.info "test examples" content_type "text/plain" Example.test_examples end @@ -28,7 +31,6 @@ end # order is important, first add example methods and reports, than validation # (otherwise sinatra will try to locate a validation with name examples or report) - require "report/report_application.rb" require "validation/validation_application.rb" @@ -12,6 +12,7 @@ class Example @@summary="" + # replaces placeholdes ( in <> brackets ) in EXAMPLE file with uris and ids def self.transform_example file = File.new("EXAMPLES", "r") @@ -41,61 +42,45 @@ class Example res end - def self.delete_all(uri_list_service) - uri_list = OpenTox::RestClientWrapper.get(uri_list_service) - uri_list.split("\n").each do |uri| - OpenTox::RestClientWrapper.delete(uri) - end - end - - def self.log(log_string) - LOGGER.debug log_string - @@summary += log_string+"\n" - end - + # creates the resources that are requested by the examples def self.prepare_example_resources @@summary = "" delete_all(@@config[:services]["opentox-dataset"]) + log "upload dataset" data = File.read(@@file.path) data_uri = OpenTox::RestClientWrapper.post @@config[:services]["opentox-dataset"], data, :content_type => "application/rdf+xml" - log "uploaded dataset "+data_uri - raise "failed to prepare demo" unless data_uri==@@data - + + log "train-test-validation" Lib::Validation.auto_migrate! delete_all(@@config[:services]["opentox-model"]) - vali_uri = OpenTox::RestClientWrapper.post File.join(@@config[:services]["opentox-validation"],'/training_test_split'), { :dataset_uri => data_uri, - :algorithm_uri => @@alg, - :prediction_feature => @@feature, - :algorithm_params => @@alg_params } - log "created validation via training test split "+vali_uri - raise "failed to prepare demo" unless vali_uri==File.join(@@config[:services]["opentox-validation"],'/1') + split_params = Validation::Util.train_test_dataset_split(data_uri, 0.9, 1) + v = Validation::Validation.new :training_dataset_uri => split_params[:training_dataset_uri], + :test_dataset_uri => split_params[:test_dataset_uri], + :prediction_feature => @@feature + v.validate_algorithm( @@alg, @@alg_params) + log "crossvalidation" Lib::Crossvalidation.auto_migrate! - cv_uri = OpenTox::RestClientWrapper.post File.join(@@config[:services]["opentox-validation"],'/crossvalidation'), { :dataset_uri => data_uri, - :algorithm_uri => @@alg, - :prediction_feature => @@feature, - :algorithm_params => @@alg_params, - :num_folds => 5, :stratified => false } - log "created crossvalidation "+cv_uri - raise "failed to prepare demo" unless cv_uri==File.join(@@config[:services]["opentox-validation"],'/crossvalidation/1') + cv = Validation::Crossvalidation.new({ :dataset_uri => data_uri, :algorithm_uri => @@alg, :num_folds => 5, :stratified => false }) + cv.create_cv_datasets( @@feature ) + cv.perform_cv( @@alg_params ) + + log "create validation report" + rep = Reports::ReportService.new(File.join(@@config[:services]["opentox-validation"],"report")) + rep.delete_all_reports("validation") + rep.create_report("validation",v.uri) - delete_all(File.join(@@config[:services]["opentox-validation"],"/report/validation")) - val_report_uri = OpenTox::RestClientWrapper.post File.join(@@config[:services]["opentox-validation"],'/report/validation'), { :validation_uris => vali_uri } - log "created validation report: "+val_report_uri - raise "failed to prepare demo" unless val_report_uri==File.join(@@config[:services]["opentox-validation"],'/report/validation/1') + log "create crossvalidation report" + rep.delete_all_reports("crossvalidation") + rep.create_report("crossvalidation",cv.uri) - delete_all(File.join(@@config[:services]["opentox-validation"],"/report/crossvalidation")) - cv_report_uri = OpenTox::RestClientWrapper.post File.join(@@config[:services]["opentox-validation"],'/report/crossvalidation'), { :validation_uris => cv_uri } - log "created crossvalidation report: "+cv_report_uri - raise "failed to prepare demo" unless cv_report_uri==File.join(@@config[:services]["opentox-validation"],'/report/crossvalidation/1') log "done" - @@summary end - + # performs all curl calls listed in examples after ">>>", next line is added if line ends with "\" def self.test_examples lines = transform_example.split("\n") curl_call = false @@ -146,4 +131,19 @@ class Example @@summary end + private + # deletes resources listed by service + def self.delete_all(uri_list_service) + uri_list = OpenTox::RestClientWrapper.get(uri_list_service) + uri_list.split("\n").each do |uri| + OpenTox::RestClientWrapper.delete(uri) + end + end + + # logs string and and adds to summary + def self.log(log_string) + LOGGER.info log_string + @@summary += log_string+"\n" + end + end diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index f1930cc..aaa5d6f 100644 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -78,7 +78,7 @@ module Lib index += 1 end - super(predicted_values, actual_values, confidence_values, prediction_feature, is_classification, class_values) + super(predicted_values, actual_values, confidence_values, is_classification, class_values) raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size end diff --git a/lib/predictions.rb b/lib/predictions.rb index 9e6356e..1d53ab9 100644 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -25,14 +25,12 @@ module Lib def initialize( predicted_values, actual_values, confidence_values, - prediction_feature, is_classification, prediction_feature_values=nil ) @predicted_values = predicted_values @actual_values = actual_values @confidence_values = confidence_values - @prediction_feature = prediction_feature @is_classification = is_classification @prediction_feature_values = prediction_feature_values @num_classes = 1 @@ -61,7 +59,7 @@ module Lib end end - init_stats + init_stats() (0..@predicted_values.size-1).each do |i| update_stats( @predicted_values[i], @actual_values[i], @confidence_values[i] ) end @@ -175,6 +173,9 @@ module Lib return @num_unpredicted end + # internal structure of confusion matrix: + # hash with keys: hash{ :confusion_matrix_actual => <class_value>, :confusion_matrix_predicted => <class_value> } + # and values: <int-value> def confusion_matrix raise "no classification" unless @is_classification res = {} @@ -187,8 +188,6 @@ module Lib return res end - - def area_under_roc(class_index=nil) return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if class_index==nil @@ -361,7 +360,7 @@ module Lib end - ######################################################################################## + # regression ####################################################################################### def root_mean_squared_error Math.sqrt(@sum_squared_error / (@num_with_actual_value - @num_unpredicted).to_f) @@ -375,7 +374,7 @@ module Lib return @variance_predicted / @variance_actual end - ######################################################################################## + # data for roc-plots ################################################################################### def roc_confidence_values(class_value) class_index = @prediction_feature_values.index(class_value) diff --git a/lib/validation_db.rb b/lib/validation_db.rb index 30a0bda..5811f6c 100644 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -3,7 +3,6 @@ require lib end - module Lib VAL_PROPS = [ :id, :uri, :model_uri, :training_dataset_uri, :prediction_feature, @@ -43,7 +42,6 @@ module Lib property :finished, Boolean, :default => false property :created_at, DateTime property :real_runtime, Float - #property :cpu_runtime, Float property :num_instances, Integer property :num_without_class, Integer diff --git a/lib/wrapper.rb b/lib/wrapper.rb index 40df0e5..cff9b2f 100644 --- a/lib/wrapper.rb +++ b/lib/wrapper.rb @@ -10,6 +10,13 @@ module OpenTox module Model class PredictionModel + attr_reader :uri + + def self.build( algorithm_uri, algorithm_parms ) + uri = OpenTox::RestClientWrapper.post algorithm_uri,algorithm_parms + PredictionModel.new(uri) + end + def self.find( uri ) begin RestClient.get uri,:accept => "application/rdf+xml" diff --git a/report/report_service.rb b/report/report_service.rb index 5c75ae4..36bd176 100644 --- a/report/report_service.rb +++ b/report/report_service.rb @@ -102,6 +102,13 @@ module Reports @persistance.delete_report(type, id) end + def delete_all_reports( type ) + + LOGGER.info "deleteing all reports of ype '"+type.to_s+"'" + check_report_type(type) + @persistance.list_reports(type).each{ |id| @persistance.delete_report(type, id) } + end + def parse_type( report_uri ) raise "invalid uri" unless report_uri.to_s =~/^#{@home_uri}.*/ diff --git a/test.rb b/test.rb deleted file mode 100644 index 2265fb2..0000000 --- a/test.rb +++ /dev/null @@ -1,4 +0,0 @@ - -require "validation/validation_test.rb" - -#require "report/report_test.rb"
\ No newline at end of file diff --git a/validation/validation_application.rb b/validation/validation_application.rb index 2c5806e..beb16ab 100644 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -9,20 +9,22 @@ require 'validation/validation_service.rb' # hack: store self in $sinatra to make url_for method accessible in validation_service # (before is executed in every rest call, problem is that the request object is not set, until the first rest-call ) before {$sinatra = self unless $sinatra} + unless(defined? LOGGER) LOGGER = Logger.new(STDOUT) LOGGER.datetime_format = "%Y-%m-%d %H:%M:%S " end + class Sinatra::Base - # logging halts (!= 202) + # overwriting halt to log halts (!= 202) def halt(status,msg) LOGGER.error "halt "+status.to_s+" "+msg.to_s if (status != 202) throw :halt, [status, msg] end end -## REST API + get '/crossvalidation/?' do LOGGER.info "list all crossvalidations" diff --git a/validation/validation_format.rb b/validation/validation_format.rb index 748271f..ed5c7b1 100644 --- a/validation/validation_format.rb +++ b/validation/validation_format.rb @@ -1,14 +1,18 @@ -module Validation +require "lib/rdf_provider.rb" +module Validation + + # adding to_yaml and to_rdf functionality to validation class Validation < Lib::Validation + include Lib::RDFProvider - # get_content is the basis for to_yaml and to_rdf + # get_content_as_hash is the basis for to_yaml and to_rdf # the idea is that everything is stored in a hash structure - # the hash is directly printed in to_yaml, while the has_keys can be used to resolve - # the right properties, classes - def get_content + # the hash is directly printed in to_yaml, whereas the has_keys can be used to resolve + # the right properties, classes for to_rdf + def get_content_as_hash h = {} Lib::VAL_PROPS.each{|p| h[p] = self.send(p)} @@ -55,22 +59,75 @@ module Validation return h end + # build hash structure and return with to_yaml def to_yaml - get_content.to_yaml + get_content_as_hash.to_yaml + end + + def rdf_title + "Validation" + end + + def uri + @uri + end + + @@literals = [ :created_at, :real_runtime, :num_instances, :num_without_class, + :percent_without_class, :num_unpredicted, :percent_unpredicted, + :crossvalidation_fold, :crossvalidation_id, + :num_correct, :num_incorrect, :percent_correct, :percent_incorrect, + :area_under_roc, :false_negative_rate, :false_positive_rate, + :f_measure, :num_false_positives, :num_false_negatives, + :num_true_positives, :num_true_negatives, :precision, + :recall, :true_negative_rate, :true_positive_rate, + :confusion_matrix_value ] + # created at -> date + # owl.set_literal(OT['numInstances'],validation.num_instances) + # owl.set_literal(OT['numWithoutClass'],validation.num_without_class) + # owl.set_literal(OT['percentWithoutClass'],validation.percent_without_class) + # owl.set_literal(OT['numUnpredicted'],validation.num_unpredicted) + # owl.set_literal(OT['percentUnpredicted'],validation.percent_unpredicted) + + + @@object_properties = { :model_uri => OT['validationModel'], :training_dataset_uri => OT['validationTrainingDataset'], + :prediction_feature => OT['predictedFeature'], :test_dataset_uri => OT['validationTestDataset'], + :prediction_dataset_uri => OT['validationPredictionDataset'], :crossvalidation_info => OT['hasValidationInfo'], + :classification_statistics => OT['hasValidationInfo'], + :class_value_statistics => OT['classValueStatistics'], :confusion_matrix => OT['confusionMatrix'], + :confusion_matrix_cell => OT['confusionMatrixCell'], :class_value => OT['class_value'], + :confusion_matrix_actual => OT['confusionMatrixActual'], :confusion_matrix_predicted => OT['confusionMatrixPredicted'] } + + @@classes = { :crossvalidation_info => OT['CrossvalidationInfo'], :classification_statistics => OT['ClassificationStatistics'], + :class_value_statistics => OT['ClassValueStatistics'], + :confusion_matrix => OT['ConfusionMatrix'], :confusion_matrix_cell => OT['ConfusionMatrixCell']} + + def literal?( prop ) + @@literals.index( prop ) != nil end - def to_rdf - owl = ValidationOwl.new() - owl.title = "Validation" - owl.uri = uri - owl.add_content( ValidationToRDF.new, get_content, "Validation" ) - owl.rdf + def literal_name( prop ) + #PENDING + return OT[prop.to_s] + end + + def object_property?( prop ) + @@object_properties.has_key?( prop ) end - end + + def object_property_name( prop ) + return @@object_properties[ prop ] + end + + def class_name( prop ) + return @@classes[ prop ] + end + + end class Crossvalidation < Lib::Crossvalidation + include Lib::RDFProvider - def get_content + def get_content_as_hash h = {} Lib::CROSS_VAL_PROPS.each{|p| h[p] = self.send(p)} @@ -83,91 +140,24 @@ module Validation end def to_yaml - get_content.to_yaml + get_content_as_hash.to_yaml end - def to_rdf - owl = ValidationOwl.new() - owl.title = "Crossvalidation" - owl.uri = uri - owl.add_content( CrossvalidationToRDF.new, get_content, "Crossvalidation" ) - owl.rdf + def rdf_title + "Crossvalidation" end - end - - - class ValidationOwl - include OpenTox::Owl - - def initialize - super - end - - def add_content( content_to_rdf, output, clazz ) - @content_to_rdf = content_to_rdf - recursiv_add_content( output, @model.subject(RDF['type'],OT[clazz]) ) - end - - private - def recursiv_add_content( output, node ) - output.each do |k,v| - raise "null value: "+k.to_s if v==nil - if v.is_a?(Hash) - new_node = add_class( k, node ) - recursiv_add_content( v, new_node ) - elsif v.is_a?(Array) - v.each do |value| - new_node = add_class( k, node ) - recursiv_add_content( value, new_node ) - end - elsif @content_to_rdf.literal?(k) - set_literal( k, v, node) - elsif @content_to_rdf.object_property?(k) - add_object_property( k, v, node) - elsif [ :uri, :id, :finished ].index(k)!=nil - #skip - else - raise "illegal value k:"+k.to_s+" v:"+v.to_s - end - end - end - - def add_class( property, node ) - raise "no object prop: "+property.to_s unless @content_to_rdf.object_property?(property) - raise "no class name: "+property.to_s unless @content_to_rdf.class_name(property) - res = @model.create_resource - @model.add res, RDF['type'], @content_to_rdf.class_name(property) - @model.add res, DC['title'], @content_to_rdf.class_name(property) - @model.add node, @content_to_rdf.object_property_name(property), res - return res - end - - def set_literal(property, value, node ) - raise "empty literal value "+property.to_s if value==nil || value.to_s.size==0 - raise "no literal name "+propety.to_s unless @content_to_rdf.literal_name(property) - begin - l = @model.object(subject, @content_to_rdf.literal_name(property)) - @model.delete node, @content_to_rdf.literal_name(property), l - rescue - end - @model.add node, @content_to_rdf.literal_name(property), value.to_s + def uri + @uri end - def add_object_property(property, value, node ) - raise "empty object property value "+property.to_s if value==nil || value.to_s.size==0 - raise "no object property name "+propety.to_s unless @content_to_rdf.object_property_name(property) - @model.add node, @content_to_rdf.object_property_name(property), Redland::Uri.new(value) # untyped individual comes from this line, why?? - #@model.add Redland::Uri.new(value), RDF['type'], type - end - - end - - - class ContentToRDF + @@literals = [ :stratified, :num_folds, :random_seed ] + @@object_properties = { :dataset_uri => OT['crossvalidationDataset'], :algorithm_uri => OT['crossvalidationAlgorithm'], + :validation_uri => OT['crossvalidationValidation'], :validations => OT['crossvalidationValidations'] } + @@classes = { :validations => OT['CrossvalidationValidations'] } def literal?( prop ) - @literals.index( prop ) != nil + @@literals.index( prop ) != nil end def literal_name( prop ) @@ -176,62 +166,15 @@ module Validation end def object_property?( prop ) - @object_properties.has_key?( prop ) + @@object_properties.has_key?( prop ) end def object_property_name( prop ) - return @object_properties[ prop ] + return @@object_properties[ prop ] end def class_name( prop ) - return @classes[ prop ] + return @@classes[ prop ] end - - end - - - class CrossvalidationToRDF < ContentToRDF - - def initialize() - @literals = [ :stratified, :num_folds, :random_seed ] - @object_properties = { :dataset_uri => OT['crossvalidationDataset'], :algorithm_uri => OT['crossvalidationAlgorithm'], - :validation_uri => OT['crossvalidationValidation'], :validations => OT['crossvalidationValidations'] } - @classes = { :validations => OT['CrossvalidationValidations'] } - end - end - - class ValidationToRDF < ContentToRDF - - def initialize() - @literals = [ :created_at, :real_runtime, :num_instances, :num_without_class, - :percent_without_class, :num_unpredicted, :percent_unpredicted, - :crossvalidation_fold, :crossvalidation_id, - :num_correct, :num_incorrect, :percent_correct, :percent_incorrect, - :area_under_roc, :false_negative_rate, :false_positive_rate, - :f_measure, :num_false_positives, :num_false_negatives, - :num_true_positives, :num_true_negatives, :precision, - :recall, :true_negative_rate, :true_positive_rate, - :confusion_matrix_value ] - # created at -> date - # owl.set_literal(OT['numInstances'],validation.num_instances) - # owl.set_literal(OT['numWithoutClass'],validation.num_without_class) - # owl.set_literal(OT['percentWithoutClass'],validation.percent_without_class) - # owl.set_literal(OT['numUnpredicted'],validation.num_unpredicted) - # owl.set_literal(OT['percentUnpredicted'],validation.percent_unpredicted) - - - @object_properties = { :model_uri => OT['validationModel'], :training_dataset_uri => OT['validationTrainingDataset'], - :prediction_feature => OT['predictedFeature'], :test_dataset_uri => OT['validationTestDataset'], - :prediction_dataset_uri => OT['validationPredictionDataset'], :crossvalidation_info => OT['hasValidationInfo'], - :classification_statistics => OT['hasValidationInfo'], - :class_value_statistics => OT['classValueStatistics'], :confusion_matrix => OT['confusionMatrix'], - :confusion_matrix_cell => OT['confusionMatrixCell'], :class_value => OT['class_value'], - :confusion_matrix_actual => OT['confusionMatrixActual'], :confusion_matrix_predicted => OT['confusionMatrixPredicted'] } - - @classes = { :crossvalidation_info => OT['CrossvalidationInfo'], :classification_statistics => OT['ClassificationStatistics'], - :class_value_statistics => OT['ClassValueStatistics'], - :confusion_matrix => OT['ConfusionMatrix'], :confusion_matrix_cell => OT['ConfusionMatrixCell']} - end - end end diff --git a/validation/validation_service.rb b/validation/validation_service.rb index 3231676..05f4b93 100644 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -41,6 +41,7 @@ module Validation raise "do not set id manually" if params[:id] raise "do not set uri manually" if params[:uri] super params + # hack to overcome datamapper bug: save to set id save unless attribute_dirty?("id") raise "internal error, id not set "+to_yaml unless @id update :uri => $sinatra.url_for("/"+@id.to_s, :full) @@ -77,14 +78,13 @@ module Validation end LOGGER.debug "building model '"+algorithm_uri.to_s+"' "+params.inspect - model_uri = OpenTox::RestClientWrapper.post algorithm_uri,params - update :model_uri => model_uri - + model = OpenTox::Model::PredictionModel.build(algorithm_uri, params) + update :model_uri => model.uri validate_model end # validates a model - # PENDING: a new dataset is created to store the predictions, this should be optional: STORE predictions yes/no + # PENDING: a new dataset is created to store the predictions, this should be optional: delete predictions afterwards yes/no def validate_model LOGGER.debug "validating model '"+@model_uri+"'" @@ -125,6 +125,7 @@ module Validation raise "do not set id manually" if params[:id] raise "do not set uri manually" if params[:uri] super params + # hack to overcome datamapper bug: save to set id save unless attribute_dirty?("id") raise "internal error, id not set" unless @id update :uri => $sinatra.url_for("/crossvalidation/"+@id.to_s, :full) @@ -277,7 +278,8 @@ module Validation module Util - + # creates a new dataset from orig_dataset.data + # copies only features and compounds included in compounds array def self.create_new_dataset( orig_dataset_data, compounds, title, source ) dataset = OpenTox::Dataset.new @@ -306,7 +308,6 @@ module Validation uri = dataset.save raise "no dataset uri" if uri==nil || uri.to_s.length<1 return uri - end diff --git a/validation/validation_test.rb b/validation/validation_test.rb index f6a3e6a..72fabe6 100644 --- a/validation/validation_test.rb +++ b/validation/validation_test.rb @@ -134,8 +134,8 @@ class ValidationTest < Test::Unit::TestCase # #model_uri = "http://ot.model.de/12" # #data_uri_test = "http://ot.dataset.de/67" # -# model_uri = "http://ot.model.de/45" #trained with training split from hamster -# data_uri_test = "http://ot.dataset.de/367" #hamster test split +# model_uri = "http://ot.model.de/1" +# data_uri_test = "http://ot.dataset.de/3" # # post '', {:test_dataset_uri => data_uri_test, :model_uri => model_uri, :prediction_feature => FEATURE_URI} # verify_validation @@ -185,13 +185,13 @@ class ValidationTest < Test::Unit::TestCase #get '/prepare_examples' get '/test_examples' - #get '/350',nil,'HTTP_ACCEPT' => "application/rdf+xml" + #get '/1',nil,'HTTP_ACCEPT' => "application/rdf+xml" #get '/350',nil,'HTTP_ACCEPT' => "text/x-yaml" #get '/crossvalidation/1',nil,'HTTP_ACCEPT' => "application/rdf+xml" #get '/crossvalidation/1',nil,'HTTP_ACCEPT' => "text/x-yaml" - #puts last_response.body + puts last_response.body end private |