From 8a199a09a6d9ac8b0349af0d7c5b5320bdcec9b5 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 26 Mar 2012 11:29:14 +0200 Subject: add concordance correlation coefficient, adjust feature value plotting --- lib/predictions.rb | 68 ++++++++++++++++++++++++++++++++++------ lib/validation_db.rb | 2 +- report/plot_factory.rb | 5 +-- report/report_factory.rb | 5 ++- report/validation_access.rb | 39 ++++++++++++++++------- validation/validation_service.rb | 5 +-- 6 files changed, 96 insertions(+), 28 deletions(-) diff --git a/lib/predictions.rb b/lib/predictions.rb index 233267d..348ac44 100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -577,6 +577,31 @@ module Lib # return weighted_sample_correlation_coefficient ** 2 #end + def concordance_correlation_coefficient + begin + numerator = 0 + @predicted_values.size.times do |i| + numerator += (@actual_values[i]-@actual_mean) * (@predicted_values[i]-@prediction_mean) if + @actual_values[i]!=nil and @predicted_values[i]!=nil + end + numerator *= 2 + denominator = total_sum_of_squares + denominator += prediction_total_sum_of_squares + denominator += @num_predicted * (@actual_mean - @prediction_mean)**2 + ccc = numerator / denominator + ( ccc.infinite? || ccc.nan? ) ? 0 : ccc + rescue; 0; end + end + + def prediction_total_sum_of_squares + #return @variance_actual * ( @num_predicted - 1 ) + sum = 0 + @predicted_values.size.times do |i| + sum += (@predicted_values[i]-@prediction_mean)**2 if @actual_values[i]!=nil and @predicted_values[i]!=nil + end + sum + end + def sample_correlation_coefficient begin # formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient @@ -804,22 +829,45 @@ module Lib end puts "num values "+p.size.to_s - pred = Predictions.new(p,a,c,"regression") + #a = [1.0,2.0, 3.0,4.0, 5.0] + #p = [1.5,2.25,3.0,3.75,4.5] + + #a = [1.0,2.0,3.0,4.0,5.0] + #p = [1.5,2.5,3.5,4.5,5.5] + + #p = a.collect{|v| v-0.5} + #p = a.collect{|v| v+0.5} + + #p = [2.0,2.5,3.0,3.5,4.0] + + c = Array.new(p.size,nil) + + data = { :predicted_values => p, :actual_values => a, :confidence_values => c, + :feature_type => "regression", :accept_values => nil } + + pred = Predictions.new(data) puts "internal" #puts "r-square old "+pred.r_square_old.to_s puts "cor "+pred.sample_correlation_coefficient.to_s - puts "weighted cor "+pred.weighted_sample_correlation_coefficient.to_s + #puts "weighted cor "+pred.weighted_sample_correlation_coefficient.to_s puts "r-square "+pred.r_square.to_s + puts "ccc "+pred.concordance_correlation_coefficient.to_s puts "R" - @@r = RinRuby.new(true,false) unless defined?(@@r) and @@r - @@r.assign "v1",a - @@r.assign "v2",p - puts "r cor "+@@r.pull("cor(v1,v2)").to_s - @@r.eval "fit <- lm(v1 ~ v2)" - @@r.eval "sum <- summary(fit)" - puts "r r-square "+@@r.pull("sum$r.squared").to_s - puts "r adjusted-r-square "+@@r.pull("sum$adj.r.squared").to_s + rutil = OpenTox::RUtil.new + + rutil.r.assign "v1",a + rutil.r.assign "v2",p + puts "r cor "+rutil.r.pull("cor(v1,v2)").to_s + rutil.r.eval "fit <- lm(v1 ~ v2)" + rutil.r.eval "sum <- summary(fit)" + puts "r r-square "+rutil.r.pull("sum$r.squared").to_s + puts "r adjusted-r-square "+rutil.r.pull("sum$adj.r.squared").to_s + rutil.r.eval "save.image(\"/tmp/image.R\")" + #rutil.r.eval "require(epiR)" + #rutil.r.eval "tmp.ccc <- epi.ccc(v1,v2)" + #puts "r ccc "+rutil.r.pull("tmp.ccc$rho.c$est").to_s + rutil.quit_r end def prediction_feature_value_map(proc) diff --git a/lib/validation_db.rb b/lib/validation_db.rb index c3a3f71..7d83966 100755 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -38,7 +38,7 @@ module Validation # :regression_statistics VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square, :weighted_r_square, :target_variance_actual, :target_variance_predicted, :sum_squared_error, :sample_correlation_coefficient, - :weighted_mean_absolute_error, :weighted_root_mean_squared_error ] + :weighted_mean_absolute_error, :weighted_root_mean_squared_error, :concordance_correlation_coefficient ] CROSS_VAL_PROPS = [:dataset_uri, :num_folds, :stratified, :random_seed] CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :date] + CROSS_VAL_PROPS diff --git a/report/plot_factory.rb b/report/plot_factory.rb index f114dd3..6e90dbc 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -4,7 +4,8 @@ ENV['RANK_PLOTTER_JAR'] = "RankPlotter/RankPlotter.jar" unless ENV['RANK_PLOTTER CONF_PLOT_RANGE = { :accuracy => [0.45,1.05], :true_positive_rate => [0.45,1.05],:true_negative_rate => [0.45,1.05], :false_positive_rate => [0.45,1.05], :false_negative_rate => [0.45,1.05], :positive_predictive_value => [0.45,1.05], - :negative_predictive_value => [0.45,1.05], :r_square => [0, 1.05], :sample_correlation_coefficient => [0, 1.05] } + :negative_predictive_value => [0.45,1.05], :r_square => [0, 1.05], :sample_correlation_coefficient => [0, 1.05], + :concordance_correlation_coefficient => [0, 1.05] } class Array def swap!(i,j) @@ -124,7 +125,7 @@ module Reports else Reports::r_util.feature_value_plot(out_files, validation_set.validations[0].training_feature_dataset_uri, validation_set.validations[0].test_feature_dataset_uri, "Training Data", "Test Data", - nil, true, validation_set.validations[0].subjectid, waiting_task ) + nil, validation_set.validations[0].subjectid, waiting_task ) end end diff --git a/report/report_factory.rb b/report/report_factory.rb index 07a5ac5..f51b999 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -9,7 +9,7 @@ VAL_ATTR_CLASS = [ :num_instances, :num_unpredicted, :accuracy, :weighted_accura :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate, :positive_predictive_value, :negative_predictive_value ] VAL_ATTR_REGR = [ :num_instances, :num_unpredicted, :root_mean_squared_error, :weighted_root_mean_squared_error, :mean_absolute_error, :weighted_mean_absolute_error, :r_square, :weighted_r_square, - :sample_correlation_coefficient ] + :sample_correlation_coefficient, :concordance_correlation_coefficient ] #VAL_ATTR_BOX_PLOT_CLASS = [ :accuracy, :average_area_under_roc, # :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ] @@ -113,6 +113,9 @@ module Reports::ReportFactory report.add_confidence_plot(validation_set, :root_mean_squared_error, nil) report.add_confidence_plot(validation_set, :r_square, nil) report.align_last_two_images "Confidence Plots" + report.add_confidence_plot(validation_set, :sample_correlation_coefficient, nil) + report.add_confidence_plot(validation_set, :concordance_correlation_coefficient, nil) + report.align_last_two_images "More Confidence Plots" end task.progress(70) if task report.add_train_test_plot( validation_set, false, OpenTox::SubTask.create(task,70,80) ) diff --git a/report/validation_access.rb b/report/validation_access.rb index aaa7bdc..e2a3978 100755 --- a/report/validation_access.rb +++ b/report/validation_access.rb @@ -145,20 +145,35 @@ class Reports::ValidationDB end def test_feature_dataset_uri(validation, subjectid) - m = OpenTox::Model::Generic.find(validation.model_uri, subjectid) - feat_gen = nil - m.metadata[OT.parameters].each do |h| - if h[DC.title] and h[DC.title]=~/feature_generation/ and h[OT.paramValue] - feat_gen = h[OT.paramValue] + training_features = Lib::DatasetCache.find( training_feature_dataset_uri(validation,subjectid), subjectid ) + test_dataset = Lib::DatasetCache.find( validation.test_dataset_uri, subjectid ) + features_found = true + training_features.features.keys.each do |f| + unless test_dataset.features.keys.include?(f) + features_found = false + LOGGER.debug "training-feature are not in test-datset #{f}" break end - end if m and m.metadata[OT.parameters] - raise "no feature creation alg found" unless feat_gen - feat_gen = File.join(feat_gen,"match") if feat_gen=~/fminer/ - uri = OpenTox::RestClientWrapper.post(feat_gen,{:subjectid => subjectid, - :feature_dataset_uri=>training_feature_dataset_uri(validation,subjectid), - :dataset_uri=>validation.test_dataset_uri}) - @@tmp_resources << uri + end + if features_found + LOGGER.debug "all training-features found in test-datset" + uri = test_dataset.uri + else + m = OpenTox::Model::Generic.find(validation.model_uri, subjectid) + feat_gen = nil + m.metadata[OT.parameters].each do |h| + if h[DC.title] and h[DC.title]=~/feature_generation/ and h[OT.paramValue] + feat_gen = h[OT.paramValue] + break + end + end if m and m.metadata[OT.parameters] + raise "no feature creation alg found" unless feat_gen + feat_gen = File.join(feat_gen,"match") if feat_gen=~/fminer/ + uri = OpenTox::RestClientWrapper.post(feat_gen,{:subjectid => subjectid, + :feature_dataset_uri=>training_feature_dataset_uri(validation,subjectid), + :dataset_uri=>validation.test_dataset_uri}) + @@tmp_resources << uri + end uri end diff --git a/validation/validation_service.rb b/validation/validation_service.rb index 25081f4..686a287 100755 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -210,9 +210,10 @@ module Validation algorithm_uri = self.algorithm_uri ? nil : model.metadata[OT.algorithm] predicted_variable = model.predicted_variable(self.subjectid) predicted_confidence = model.predicted_confidence(self.subjectid) - raise "cannot determine whether model '"+model.uri.to_s+"' performs classification or regression, "+ + raise "cannot determine whether model '"+model.uri.to_s+"' performs classification or regression: '#{feature_type}', "+ "please set rdf-type of predictedVariables feature '"+predicted_variable.to_s+ - "' to NominalFeature or NumericFeature" if (feature_type.to_s!="classification" and feature_type.to_s!="regression") + "' to NominalFeature or NumericFeature" if + (feature_type.to_s!="classification" and feature_type.to_s!="regression") compute_prediction_data( feature_type, predicted_variable, predicted_confidence, prediction_feature, algorithm_uri, task ) end -- cgit v1.2.3