diff options
author | mguetlein <martin.guetlein@gmail.com> | 2010-07-13 13:35:25 +0200 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2010-07-13 13:35:25 +0200 |
commit | 2291a400b7fe5f7ffe4e173b8a910db75e64fbb6 (patch) | |
tree | 3cb2947c146b794700ff8c831b7944e4fb7103ba | |
parent | b0d858950a942d4f283663e1950b019823c2bbc2 (diff) |
fixed r-square, added validated_datasets (to example page as well)
-rw-r--r-- | EXAMPLES | 19 | ||||
-rw-r--r-- | example.rb | 5 | ||||
-rw-r--r-- | lib/predictions.rb | 30 | ||||
-rw-r--r-- | lib/validation_db.rb | 3 | ||||
-rw-r--r-- | validation/validation_application.rb | 22 | ||||
-rw-r--r-- | validation/validation_format.rb | 14 | ||||
-rw-r--r-- | validation/validation_service.rb | 22 | ||||
-rw-r--r-- | validation/validation_test.rb | 16 |
8 files changed, 101 insertions, 30 deletions
@@ -90,6 +90,25 @@ result example (accept-header: text/uri-list) <<< <validation_service>/id_i +create a validation by comparing values datasets +----------------------------------------------------- + +>>> curl -X POST -d test_dataset_uri="<test_dataset_uri>" \ + -d test_target_dataset_uri="<dataset_uri>" \ + -d prediction_dataset_uri="<prediction_dataset_uri>" \ + -d prediction_feature="<prediction_feature>" \ + -d predicted_feature="<predicted_feature>" \ + -d classification="true" \ + <validation_service>/validate_datasets + +optional params: +test_target_dataset_uri, default is test_dataset_uri +regression or classification has to be specified + +result example (accept-header: text/uri-list) +<<< <validation_service>/id_i + + validate an algorithm on a dataset via training-test-split ------------------------------------------------------------------- @@ -7,11 +7,13 @@ class Example @@file_type="text/x-yaml" @@model=File.join @@config[:services]["opentox-model"],"1" @@feature= URI.encode("http://localhost/toxmodel/feature#Hamster%20Carcinogenicity%20(DSSTOX/CPDB)") + @@predicted_feature= URI.encode("http://localhost/toxmodel/feature#Hamster%20Carcinogenicity%20(DSSTOX/CPDB)_lazar_classification") @@alg = File.join @@config[:services]["opentox-algorithm"],"lazar" @@alg_params = "feature_generation_uri="+File.join(@@config[:services]["opentox-algorithm"],"fminer") @@data=File.join @@config[:services]["opentox-dataset"],"1" @@train_data=File.join @@config[:services]["opentox-dataset"],"2" @@test_data=File.join @@config[:services]["opentox-dataset"],"3" + @@prediction_data=File.join @@config[:services]["opentox-dataset"],"5" @@css_file="http://apps.ideaconsult.net:8080/ToxPredict/style/global.css" @@summary="" @@ -39,7 +41,8 @@ class Example "validation_report_id" => "1", "crossvalidation_report_id" => "1", "css_file" => @@css_file, - } + "prediction_dataset_uri" => @@prediction_data, + "predicted_feature" => @@predicted_feature } sub.each do |k,v| res.gsub!(/<#{k}>/,v) diff --git a/lib/predictions.rb b/lib/predictions.rb index a183534..f6351f8 100644 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -97,6 +97,12 @@ module Lib @variance_predicted = 0 @variance_actual = 0 + + @sum_actual = 0 + @sum_predicted = 0 + @sum_multiply = 0 + @sum_squares_actual = 0 + @sum_squares_predicted = 0 end end @@ -134,6 +140,12 @@ module Lib @prediction_mean, old_prediction_mean, predicted_value ) @variance_actual = Util.compute_variance( @variance_actual, @num_predicted, @actual_mean, old_actual_mean, actual_value ) + + @sum_actual += actual_value + @sum_predicted += predicted_value + @sum_multiply += (actual_value*predicted_value) + @sum_squares_actual += actual_value**2 + @sum_squares_predicted += predicted_value**2 end end end @@ -418,9 +430,23 @@ module Lib Math.sqrt(@sum_abs_error / (@num_with_actual_value - @num_unpredicted).to_f) end + def sum_squared_error + return @sum_squared_error + end + def r_square - return 0 if @variance_actual==0 - return @variance_predicted / @variance_actual + return sample_correlation_coefficient ** 2 + end + + def sample_correlation_coefficient + # formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient + return ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) / + ( Math.sqrt( @num_predicted * @sum_squares_actual - @sum_actual**2 ) * + Math.sqrt( @num_predicted * @sum_squares_predicted - @sum_predicted**2 ) ) + end + + def total_sum_of_squares + return @variance_actual * ( @num_predicted - 1 ) end def target_variance_predicted diff --git a/lib/validation_db.rb b/lib/validation_db.rb index aea036f..1152abf 100644 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -46,7 +46,8 @@ module Lib VAL_CLASS_PROPS_EXTENDED = VAL_CLASS_PROPS + [:accuracy] # :regression_statistics - VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square, :target_variance_actual, :target_variance_predicted ] + VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square, + :target_variance_actual, :target_variance_predicted, :sum_squared_error, :sample_correlation_coefficient ] CROSS_VAL_PROPS = [:dataset_uri, :num_folds, :stratified, :random_seed] CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :created_at] + CROSS_VAL_PROPS diff --git a/validation/validation_application.rb b/validation/validation_application.rb index a10b75a..a43a2a6 100644 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -200,16 +200,28 @@ post '/plain_training_test_split' do result[:training_dataset_uri]+"\n"+result[:test_dataset_uri]+"\n" end -post '/create_validation' do +post '/validate_datasets' do content_type "text/uri-list" task_uri = OpenTox::Task.as_task do - LOGGER.info "creating validation "+params.inspect + LOGGER.info "validating values "+params.inspect halt 400, "test_dataset_uri missing" unless params[:test_dataset_uri] halt 400, "prediction_datset_uri missing" unless params[:prediction_dataset_uri] - halt 400, "model_uri missing" unless params[:model_uri] - v = Validation::Validation.new params - v.compute_validation_stats() + if params[:model_uri] + v = Validation::Validation.new params + v.compute_validation_stats_with_model() + else + halt 400, "please specify 'model_uri' or 'prediction_feature'" unless params[:prediction_feature] + halt 400, "please specify 'model_uri' or 'predicted_feature'" unless params[:predicted_feature] + halt 400, "please specify 'model_uri' or set either 'classification' or 'regression' flag" unless + params[:classification] or params[:regression] + + predicted_feature = params.delete("predicted_feature") + clazz = params.delete("classification")!=nil + regr = params.delete("regression")!=nil + v = Validation::Validation.new params + v.compute_validation_stats((clazz and !regr),predicted_feature) + end content_type "text/uri-list" v.validation_uri end diff --git a/validation/validation_format.rb b/validation/validation_format.rb index 79526a9..a172f8a 100644 --- a/validation/validation_format.rb +++ b/validation/validation_format.rb @@ -74,16 +74,10 @@ module Validation LITERALS = [ :created_at, :real_runtime, :num_instances, :num_without_class, :percent_without_class, :num_unpredicted, :percent_unpredicted, - :crossvalidation_fold, #:crossvalidation_id, - :num_correct, :num_incorrect, :percent_correct, :percent_incorrect, - :area_under_roc, :false_negative_rate, :false_positive_rate, - :f_measure, :num_false_positives, :num_false_negatives, - :num_true_positives, :num_true_negatives, :precision, - :recall, :true_negative_rate, :true_positive_rate, - :confusion_matrix_value, :weighted_area_under_roc, - :target_variance_actual, :root_mean_squared_error, - :target_variance_predicted, :mean_absolute_error, :r_square, :class_value, - :confusion_matrix_actual, :confusion_matrix_predicted ] + :crossvalidation_fold ] + + (Lib::VAL_CLASS_PROPS - [ :confusion_matrix ]) + Lib::VAL_REGR_PROPS + + [ :class_value, :confusion_matrix_value, + :confusion_matrix_actual, :confusion_matrix_predicted ] LITERAL_NAMES = {:created_at => OT["date"] } diff --git a/validation/validation_service.rb b/validation/validation_service.rb index 96fd84a..e2fc0eb 100644 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -118,22 +118,28 @@ module Validation :real_runtime => benchmark.real } self.save - compute_validation_stats(model) + compute_validation_stats_with_model( model ) end - - def compute_validation_stats(model = nil) - model = OpenTox::Model::PredictionModel.find(self.model_uri) unless model + def compute_validation_stats_with_model( model=nil ) + + model = OpenTox::Model::PredictionModel.find(self.model_uri) if model==nil and self.model_uri $sinatra.halt 400, "model not found: "+self.model_uri.to_s unless model + prediction_feature = self.prediction_feature ? nil : model.dependentVariables + algorithm_uri = self.algorithm_uri ? nil : model.algorithm + compute_validation_stats( model.classification?, model.predictedVariables, prediction_feature, algorithm_uri ) + end + + def compute_validation_stats( classification, predicted_feature, prediction_feature=nil, algorithm_uri=nil) - self.attributes = { :prediction_feature => model.dependentVariables } unless self.prediction_feature - self.attributes = { :algorithm_uri => model.algorithm } unless self.algorithm_uri + self.attributes = { :prediction_feature => prediction_feature } if self.prediction_feature==nil && prediction_feature + self.attributes = { :algorithm_uri => algorithm_uri } if self.algorithm_uri==nil && algorithm_uri self.save LOGGER.debug "computing prediction stats" - prediction = Lib::OTPredictions.new( model.classification?, + prediction = Lib::OTPredictions.new( classification, self.test_dataset_uri, self.test_target_dataset_uri, self.prediction_feature, - self.prediction_dataset_uri, model.predictedVariables ) + self.prediction_dataset_uri, predicted_feature ) if prediction.classification? self.attributes = { :classification_statistics => prediction.compute_stats } else diff --git a/validation/validation_test.rb b/validation/validation_test.rb index 3ae1460..97fff4b 100644 --- a/validation/validation_test.rb +++ b/validation/validation_test.rb @@ -29,16 +29,26 @@ class ValidationTest < Test::Unit::TestCase # post "/test_validation",:select=>"6d" #,:report=>"yes,please" # puts last_response.body - #run_test("9a") #,"http://localhost/validation/report/validation/36") #, "http://localhost/validation/321") +# post "/validate_datasets",{ +# :test_dataset_uri=>"http://localhost/dataset/204", +# :prediction_dataset_uri=>"http://localhost/dataset/206", +# :test_target_dataset_uri=>"http://localhost/dataset/202", +# :prediction_feature=>"http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk", +# :predicted_feature=>"http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk_lazar_regression", +# :regression=>"true"} +# #:classification=>"true"} +# puts last_response.body + + #run_test("1b" ) #, "http://localhost/validation/826") #,"http://localhost/validation/report/validation/36") #, "http://localhost/validation/321") - run_test("9a","http://localhost/validation/crossvalidation/10") #, "http://localhost/validation/321") + #run_test("7a") #,"http://localhost/validation/crossvalidation/10") #, "http://localhost/validation/321") #run_test("8b", "http://localhost/validation/crossvalidation/4") #puts Nightly.build_nightly("1") #prepare_examples - #do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE + do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE end def app |