summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2010-07-13 13:35:25 +0200
committermguetlein <martin.guetlein@gmail.com>2010-07-13 13:35:25 +0200
commit2291a400b7fe5f7ffe4e173b8a910db75e64fbb6 (patch)
tree3cb2947c146b794700ff8c831b7944e4fb7103ba
parentb0d858950a942d4f283663e1950b019823c2bbc2 (diff)
fixed r-square, added validated_datasets (to example page as well)
-rw-r--r--EXAMPLES19
-rw-r--r--example.rb5
-rw-r--r--lib/predictions.rb30
-rw-r--r--lib/validation_db.rb3
-rw-r--r--validation/validation_application.rb22
-rw-r--r--validation/validation_format.rb14
-rw-r--r--validation/validation_service.rb22
-rw-r--r--validation/validation_test.rb16
8 files changed, 101 insertions, 30 deletions
diff --git a/EXAMPLES b/EXAMPLES
index 22e6031..c0fb8fe 100644
--- a/EXAMPLES
+++ b/EXAMPLES
@@ -90,6 +90,25 @@ result example (accept-header: text/uri-list)
<<< <validation_service>/id_i
+create a validation by comparing values datasets
+-----------------------------------------------------
+
+>>> curl -X POST -d test_dataset_uri="<test_dataset_uri>" \
+ -d test_target_dataset_uri="<dataset_uri>" \
+ -d prediction_dataset_uri="<prediction_dataset_uri>" \
+ -d prediction_feature="<prediction_feature>" \
+ -d predicted_feature="<predicted_feature>" \
+ -d classification="true" \
+ <validation_service>/validate_datasets
+
+optional params:
+test_target_dataset_uri, default is test_dataset_uri
+regression or classification has to be specified
+
+result example (accept-header: text/uri-list)
+<<< <validation_service>/id_i
+
+
validate an algorithm on a dataset via training-test-split
-------------------------------------------------------------------
diff --git a/example.rb b/example.rb
index b0de21d..ceb1329 100644
--- a/example.rb
+++ b/example.rb
@@ -7,11 +7,13 @@ class Example
@@file_type="text/x-yaml"
@@model=File.join @@config[:services]["opentox-model"],"1"
@@feature= URI.encode("http://localhost/toxmodel/feature#Hamster%20Carcinogenicity%20(DSSTOX/CPDB)")
+ @@predicted_feature= URI.encode("http://localhost/toxmodel/feature#Hamster%20Carcinogenicity%20(DSSTOX/CPDB)_lazar_classification")
@@alg = File.join @@config[:services]["opentox-algorithm"],"lazar"
@@alg_params = "feature_generation_uri="+File.join(@@config[:services]["opentox-algorithm"],"fminer")
@@data=File.join @@config[:services]["opentox-dataset"],"1"
@@train_data=File.join @@config[:services]["opentox-dataset"],"2"
@@test_data=File.join @@config[:services]["opentox-dataset"],"3"
+ @@prediction_data=File.join @@config[:services]["opentox-dataset"],"5"
@@css_file="http://apps.ideaconsult.net:8080/ToxPredict/style/global.css"
@@summary=""
@@ -39,7 +41,8 @@ class Example
"validation_report_id" => "1",
"crossvalidation_report_id" => "1",
"css_file" => @@css_file,
- }
+ "prediction_dataset_uri" => @@prediction_data,
+ "predicted_feature" => @@predicted_feature }
sub.each do |k,v|
res.gsub!(/<#{k}>/,v)
diff --git a/lib/predictions.rb b/lib/predictions.rb
index a183534..f6351f8 100644
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -97,6 +97,12 @@ module Lib
@variance_predicted = 0
@variance_actual = 0
+
+ @sum_actual = 0
+ @sum_predicted = 0
+ @sum_multiply = 0
+ @sum_squares_actual = 0
+ @sum_squares_predicted = 0
end
end
@@ -134,6 +140,12 @@ module Lib
@prediction_mean, old_prediction_mean, predicted_value )
@variance_actual = Util.compute_variance( @variance_actual, @num_predicted,
@actual_mean, old_actual_mean, actual_value )
+
+ @sum_actual += actual_value
+ @sum_predicted += predicted_value
+ @sum_multiply += (actual_value*predicted_value)
+ @sum_squares_actual += actual_value**2
+ @sum_squares_predicted += predicted_value**2
end
end
end
@@ -418,9 +430,23 @@ module Lib
Math.sqrt(@sum_abs_error / (@num_with_actual_value - @num_unpredicted).to_f)
end
+ def sum_squared_error
+ return @sum_squared_error
+ end
+
def r_square
- return 0 if @variance_actual==0
- return @variance_predicted / @variance_actual
+ return sample_correlation_coefficient ** 2
+ end
+
+ def sample_correlation_coefficient
+ # formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient
+ return ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) /
+ ( Math.sqrt( @num_predicted * @sum_squares_actual - @sum_actual**2 ) *
+ Math.sqrt( @num_predicted * @sum_squares_predicted - @sum_predicted**2 ) )
+ end
+
+ def total_sum_of_squares
+ return @variance_actual * ( @num_predicted - 1 )
end
def target_variance_predicted
diff --git a/lib/validation_db.rb b/lib/validation_db.rb
index aea036f..1152abf 100644
--- a/lib/validation_db.rb
+++ b/lib/validation_db.rb
@@ -46,7 +46,8 @@ module Lib
VAL_CLASS_PROPS_EXTENDED = VAL_CLASS_PROPS + [:accuracy]
# :regression_statistics
- VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square, :target_variance_actual, :target_variance_predicted ]
+ VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square,
+ :target_variance_actual, :target_variance_predicted, :sum_squared_error, :sample_correlation_coefficient ]
CROSS_VAL_PROPS = [:dataset_uri, :num_folds, :stratified, :random_seed]
CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :created_at] + CROSS_VAL_PROPS
diff --git a/validation/validation_application.rb b/validation/validation_application.rb
index a10b75a..a43a2a6 100644
--- a/validation/validation_application.rb
+++ b/validation/validation_application.rb
@@ -200,16 +200,28 @@ post '/plain_training_test_split' do
result[:training_dataset_uri]+"\n"+result[:test_dataset_uri]+"\n"
end
-post '/create_validation' do
+post '/validate_datasets' do
content_type "text/uri-list"
task_uri = OpenTox::Task.as_task do
- LOGGER.info "creating validation "+params.inspect
+ LOGGER.info "validating values "+params.inspect
halt 400, "test_dataset_uri missing" unless params[:test_dataset_uri]
halt 400, "prediction_datset_uri missing" unless params[:prediction_dataset_uri]
- halt 400, "model_uri missing" unless params[:model_uri]
- v = Validation::Validation.new params
- v.compute_validation_stats()
+ if params[:model_uri]
+ v = Validation::Validation.new params
+ v.compute_validation_stats_with_model()
+ else
+ halt 400, "please specify 'model_uri' or 'prediction_feature'" unless params[:prediction_feature]
+ halt 400, "please specify 'model_uri' or 'predicted_feature'" unless params[:predicted_feature]
+ halt 400, "please specify 'model_uri' or set either 'classification' or 'regression' flag" unless
+ params[:classification] or params[:regression]
+
+ predicted_feature = params.delete("predicted_feature")
+ clazz = params.delete("classification")!=nil
+ regr = params.delete("regression")!=nil
+ v = Validation::Validation.new params
+ v.compute_validation_stats((clazz and !regr),predicted_feature)
+ end
content_type "text/uri-list"
v.validation_uri
end
diff --git a/validation/validation_format.rb b/validation/validation_format.rb
index 79526a9..a172f8a 100644
--- a/validation/validation_format.rb
+++ b/validation/validation_format.rb
@@ -74,16 +74,10 @@ module Validation
LITERALS = [ :created_at, :real_runtime, :num_instances, :num_without_class,
:percent_without_class, :num_unpredicted, :percent_unpredicted,
- :crossvalidation_fold, #:crossvalidation_id,
- :num_correct, :num_incorrect, :percent_correct, :percent_incorrect,
- :area_under_roc, :false_negative_rate, :false_positive_rate,
- :f_measure, :num_false_positives, :num_false_negatives,
- :num_true_positives, :num_true_negatives, :precision,
- :recall, :true_negative_rate, :true_positive_rate,
- :confusion_matrix_value, :weighted_area_under_roc,
- :target_variance_actual, :root_mean_squared_error,
- :target_variance_predicted, :mean_absolute_error, :r_square, :class_value,
- :confusion_matrix_actual, :confusion_matrix_predicted ]
+ :crossvalidation_fold ] +
+ (Lib::VAL_CLASS_PROPS - [ :confusion_matrix ]) + Lib::VAL_REGR_PROPS +
+ [ :class_value, :confusion_matrix_value,
+ :confusion_matrix_actual, :confusion_matrix_predicted ]
LITERAL_NAMES = {:created_at => OT["date"] }
diff --git a/validation/validation_service.rb b/validation/validation_service.rb
index 96fd84a..e2fc0eb 100644
--- a/validation/validation_service.rb
+++ b/validation/validation_service.rb
@@ -118,22 +118,28 @@ module Validation
:real_runtime => benchmark.real }
self.save
- compute_validation_stats(model)
+ compute_validation_stats_with_model( model )
end
-
- def compute_validation_stats(model = nil)
- model = OpenTox::Model::PredictionModel.find(self.model_uri) unless model
+ def compute_validation_stats_with_model( model=nil )
+
+ model = OpenTox::Model::PredictionModel.find(self.model_uri) if model==nil and self.model_uri
$sinatra.halt 400, "model not found: "+self.model_uri.to_s unless model
+ prediction_feature = self.prediction_feature ? nil : model.dependentVariables
+ algorithm_uri = self.algorithm_uri ? nil : model.algorithm
+ compute_validation_stats( model.classification?, model.predictedVariables, prediction_feature, algorithm_uri )
+ end
+
+ def compute_validation_stats( classification, predicted_feature, prediction_feature=nil, algorithm_uri=nil)
- self.attributes = { :prediction_feature => model.dependentVariables } unless self.prediction_feature
- self.attributes = { :algorithm_uri => model.algorithm } unless self.algorithm_uri
+ self.attributes = { :prediction_feature => prediction_feature } if self.prediction_feature==nil && prediction_feature
+ self.attributes = { :algorithm_uri => algorithm_uri } if self.algorithm_uri==nil && algorithm_uri
self.save
LOGGER.debug "computing prediction stats"
- prediction = Lib::OTPredictions.new( model.classification?,
+ prediction = Lib::OTPredictions.new( classification,
self.test_dataset_uri, self.test_target_dataset_uri, self.prediction_feature,
- self.prediction_dataset_uri, model.predictedVariables )
+ self.prediction_dataset_uri, predicted_feature )
if prediction.classification?
self.attributes = { :classification_statistics => prediction.compute_stats }
else
diff --git a/validation/validation_test.rb b/validation/validation_test.rb
index 3ae1460..97fff4b 100644
--- a/validation/validation_test.rb
+++ b/validation/validation_test.rb
@@ -29,16 +29,26 @@ class ValidationTest < Test::Unit::TestCase
# post "/test_validation",:select=>"6d" #,:report=>"yes,please"
# puts last_response.body
- #run_test("9a") #,"http://localhost/validation/report/validation/36") #, "http://localhost/validation/321")
+# post "/validate_datasets",{
+# :test_dataset_uri=>"http://localhost/dataset/204",
+# :prediction_dataset_uri=>"http://localhost/dataset/206",
+# :test_target_dataset_uri=>"http://localhost/dataset/202",
+# :prediction_feature=>"http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk",
+# :predicted_feature=>"http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk_lazar_regression",
+# :regression=>"true"}
+# #:classification=>"true"}
+# puts last_response.body
+
+ #run_test("1b" ) #, "http://localhost/validation/826") #,"http://localhost/validation/report/validation/36") #, "http://localhost/validation/321")
- run_test("9a","http://localhost/validation/crossvalidation/10") #, "http://localhost/validation/321")
+ #run_test("7a") #,"http://localhost/validation/crossvalidation/10") #, "http://localhost/validation/321")
#run_test("8b", "http://localhost/validation/crossvalidation/4")
#puts Nightly.build_nightly("1")
#prepare_examples
- #do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE
+ do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE
end
def app