diff options
Diffstat (limited to 'lib')
-rwxr-xr-x | lib/ot_predictions.rb | 21 | ||||
-rw-r--r-- | lib/prediction_data.rb | 4 | ||||
-rwxr-xr-x | lib/predictions.rb | 68 | ||||
-rwxr-xr-x | lib/validation_db.rb | 20 |
4 files changed, 90 insertions, 23 deletions
diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index 3be845b..2752fcc 100755 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -35,7 +35,7 @@ module Lib OTPredictions.to_array( [self] ) end - def self.to_array( predictions, add_pic=false, format=false ) + def self.to_array( predictions, add_pic=false, format=false, validation_uris=nil ) confidence_available = false predictions.each do |p| @@ -43,7 +43,10 @@ module Lib end res = [] conf_column = nil + count = 0 predictions.each do |p| + v_uris = validation_uris[count] if validation_uris + count += 1 (0..p.num_instances-1).each do |i| a = [] @@ -75,6 +78,9 @@ module Lib conf_column = a.size if conf_column==nil a << p.confidence_value(i) end + if validation_uris + a << v_uris[i] + end a << p.identifier(i) res << a end @@ -90,12 +96,13 @@ module Lib end end header = [] - header << "compound" if add_pic - header << "actual value" - header << "predicted value" - header << "classification" if predictions[0].feature_type=="classification" - header << "confidence value" if predictions[0].confidence_values_available? - header << "compound-uri" + header << "Compound" if add_pic + header << "Actual value" + header << "Predicted value" + header << "Classification" if predictions[0].feature_type=="classification" + header << "Confidence value" if predictions[0].confidence_values_available? + header << "Validation URI" if validation_uris + header << "Compound URI" res.insert(0, header) return res diff --git a/lib/prediction_data.rb b/lib/prediction_data.rb index 42da5fc..d387d24 100644 --- a/lib/prediction_data.rb +++ b/lib/prediction_data.rb @@ -270,7 +270,7 @@ module Lib def self.classification_vals(dataset, compound, feature, accept_values) v_indices = [] values(dataset, compound, feature).each do |v| - i = accept_values.index(v.to_s) + i = accept_values.index(v) raise "illegal class_value of prediction (value is '"+v.to_s+"'), accept values are "+ accept_values.inspect unless v==nil or i!=nil v_indices << i @@ -294,4 +294,4 @@ module Lib v_mod end end -end
\ No newline at end of file +end diff --git a/lib/predictions.rb b/lib/predictions.rb index 233267d..d929f1a 100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -577,6 +577,31 @@ module Lib # return weighted_sample_correlation_coefficient ** 2 #end + def concordance_correlation_coefficient + begin + numerator = 0 + @predicted_values.size.times do |i| + numerator += (@actual_values[i]-@actual_mean) * (@predicted_values[i]-@prediction_mean) if + @actual_values[i]!=nil and @predicted_values[i]!=nil + end + numerator *= 2 + denominator = total_sum_of_squares + denominator += prediction_total_sum_of_squares + denominator += @num_predicted * (@actual_mean - @prediction_mean)**2 + ccc = numerator / denominator + ( ccc.infinite? || ccc.nan? ) ? 0 : ccc + rescue; 0; end + end + + def prediction_total_sum_of_squares + #return @variance_actual * ( @num_predicted - 1 ) + sum = 0 + @predicted_values.size.times do |i| + sum += (@predicted_values[i]-@prediction_mean)**2 if @actual_values[i]!=nil and @predicted_values[i]!=nil + end + sum + end + def sample_correlation_coefficient begin # formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient @@ -804,22 +829,45 @@ module Lib end puts "num values "+p.size.to_s - pred = Predictions.new(p,a,c,"regression") + #a = [1.0,2.0, 3.0,4.0, 5.0] + #p = [1.5,2.25,3.0,3.75,4.5] + + #a = [1.0,2.0,3.0,4.0,5.0] + #p = [1.5,2.5,3.5,4.5,5.5] + + #p = a.collect{|v| v-0.5} + #p = a.collect{|v| v+0.5} + + #p = [2.0,2.5,3.0,3.5,4.0] + + c = Array.new(p.size,nil) + + data = { :predicted_values => p, :actual_values => a, :confidence_values => c, + :feature_type => "regression", :accept_values => nil } + + pred = Predictions.new(data) puts "internal" #puts "r-square old "+pred.r_square_old.to_s puts "cor "+pred.sample_correlation_coefficient.to_s - puts "weighted cor "+pred.weighted_sample_correlation_coefficient.to_s + #puts "weighted cor "+pred.weighted_sample_correlation_coefficient.to_s puts "r-square "+pred.r_square.to_s + puts "ccc "+pred.concordance_correlation_coefficient.to_s puts "R" - @@r = RinRuby.new(true,false) unless defined?(@@r) and @@r - @@r.assign "v1",a - @@r.assign "v2",p - puts "r cor "+@@r.pull("cor(v1,v2)").to_s - @@r.eval "fit <- lm(v1 ~ v2)" - @@r.eval "sum <- summary(fit)" - puts "r r-square "+@@r.pull("sum$r.squared").to_s - puts "r adjusted-r-square "+@@r.pull("sum$adj.r.squared").to_s + rutil = OpenTox::RUtil.new + + rutil.r.assign "v1",a + rutil.r.assign "v2",p + puts "r cor "+rutil.r.pull("cor(v1,v2)").to_s + rutil.r.eval "fit <- lm(v1 ~ v2)" + rutil.r.eval "sum <- summary(fit)" + puts "r r-square "+rutil.r.pull("sum$r.squared").to_s + puts "r adjusted-r-square "+rutil.r.pull("sum$adj.r.squared").to_s + #rutil.r.eval "save.image(\"/tmp/image.R\")" + #rutil.r.eval "require(epiR)" + #rutil.r.eval "tmp.ccc <- epi.ccc(v1,v2)" + #puts "r ccc "+rutil.r.pull("tmp.ccc$rho.c$est").to_s + rutil.quit_r end def prediction_feature_value_map(proc) diff --git a/lib/validation_db.rb b/lib/validation_db.rb index c3a3f71..086853e 100755 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -6,8 +6,9 @@ require "lib/merge.rb" module Validation - VAL_PROPS_GENERAL = [ :validation_uri, :validation_type, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature, - :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :date ] + VAL_PROPS_GENERAL = [ :validation_uri, :validation_type, :model_uri, :algorithm_uri, :algorithm_params, + :training_dataset_uri, :prediction_feature, :test_dataset_uri, :test_target_dataset_uri, + :prediction_dataset_uri, :date ] VAL_PROPS_SUM = [ :num_instances, :num_without_class, :num_unpredicted ] VAL_PROPS_AVG = [:real_runtime, :percent_without_class, :percent_unpredicted ] VAL_PROPS = VAL_PROPS_GENERAL + VAL_PROPS_SUM + VAL_PROPS_AVG @@ -38,10 +39,11 @@ module Validation # :regression_statistics VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square, :weighted_r_square, :target_variance_actual, :target_variance_predicted, :sum_squared_error, :sample_correlation_coefficient, - :weighted_mean_absolute_error, :weighted_root_mean_squared_error ] + :weighted_mean_absolute_error, :weighted_root_mean_squared_error, :concordance_correlation_coefficient ] CROSS_VAL_PROPS = [:dataset_uri, :num_folds, :stratified, :random_seed] - CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :date] + CROSS_VAL_PROPS + CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :algorithm_params, + :prediction_feature, :date] + CROSS_VAL_PROPS ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS + VAL_REGR_PROPS + CROSS_VAL_PROPS @@ -55,6 +57,7 @@ module Validation attribute :validation_type attribute :model_uri attribute :algorithm_uri + attribute :algorithm_params attribute :training_dataset_uri attribute :test_target_dataset_uri attribute :test_dataset_uri @@ -77,6 +80,11 @@ module Validation index :model_uri index :validation_type index :crossvalidation_id + index :algorithm_uri + index :algorithm_params + index :prediction_feature + index :training_dataset_uri + index :test_dataset_uri attr_accessor :subjectid @@ -141,6 +149,8 @@ module Validation class Crossvalidation < Ohm::Model attribute :algorithm_uri + attribute :algorithm_params + attribute :prediction_feature attribute :dataset_uri attribute :date attribute :num_folds @@ -152,6 +162,8 @@ module Validation attr_accessor :subjectid index :algorithm_uri + index :algorithm_params + index :prediction_feature index :dataset_uri index :num_folds index :random_seed |