diff options
author | mguetlein <martin.guetlein@gmail.com> | 2010-11-26 16:05:25 +0100 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2010-11-26 16:05:25 +0100 |
commit | 16a25a63ba7882901a778745d0a32baaafc22cad (patch) | |
tree | 9bf89ee176575aa49da36a4bc57e8c09a75fad4a /lib | |
parent | a6d79dffc5f65300f06f3a33451ef26d0fb96f08 (diff) |
huge commit, main changes: validation_type and .finished introduced, supporting subtasks, reporting slightly refactored
Diffstat (limited to 'lib')
-rw-r--r-- | lib/active_record_setup.rb | 9 | ||||
-rw-r--r-- | lib/ot_predictions.rb | 11 | ||||
-rw-r--r-- | lib/predictions.rb | 67 | ||||
-rw-r--r-- | lib/validation_db.rb | 57 |
4 files changed, 110 insertions, 34 deletions
diff --git a/lib/active_record_setup.rb b/lib/active_record_setup.rb index cea4fb2..3682c7a 100644 --- a/lib/active_record_setup.rb +++ b/lib/active_record_setup.rb @@ -28,7 +28,14 @@ class ActiveRecord::Base key = key+"_uri" unless self.column_names.include?(key) key = key+"s" - $sinatra.halt 400,"no attribute found: '"+k.to_s+"'" unless self.column_names.include?(key) + unless self.column_names.include?(key) + err = "no attribute found: '"+k.to_s+"'" + if $sinatra + $sinatra.halt 400,err + else + raise err + end + end end end end diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index dcd7d09..0175a0c 100644 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -15,7 +15,8 @@ module Lib return @compounds[instance_index] end - def initialize(is_classification, test_dataset_uri, test_target_dataset_uri, prediction_feature, prediction_dataset_uri, predicted_variable) + def initialize(is_classification, test_dataset_uri, test_target_dataset_uri, + prediction_feature, prediction_dataset_uri, predicted_variable, task=nil) LOGGER.debug("loading prediciton via test-dataset:'"+test_dataset_uri.to_s+ "', test-target-datset:'"+test_target_dataset_uri.to_s+ @@ -79,6 +80,7 @@ module Lib actual_values.push value end end + task.progress(40) if task # loaded actual values prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri raise "prediction dataset not found: '"+prediction_dataset_uri.to_s+"'" unless prediction_dataset @@ -118,9 +120,11 @@ module Lib confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable) end end + task.progress(80) if task # loaded predicted values and confidence super(predicted_values, actual_values, confidence_values, is_classification, class_values) raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size + task.progress(100) if task # done with the mathmatics end @@ -128,7 +132,7 @@ module Lib res = {} if @is_classification - (Lib::VAL_CLASS_PROPS_EXTENDED).each{ |s| res[s] = send(s)} + (Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)} else (Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) } end @@ -151,7 +155,8 @@ module Lib a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+ URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic rescue => ex - a.push("Could not add pic: "+ex.message) + #a.push("Could not add pic: "+ex.message) + a.push(p.identifier(i)) end a << (format ? p.actual_value(i).to_nice_s : p.actual_value(i)) diff --git a/lib/predictions.rb b/lib/predictions.rb index 2873689..6e50e94 100644 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -23,13 +23,13 @@ module Lib actual_values, confidence_values, is_classification, - prediction_feature_values=nil ) + class_domain=nil ) @predicted_values = predicted_values @actual_values = actual_values @confidence_values = confidence_values @is_classification = is_classification - @prediction_feature_values = prediction_feature_values + @class_domain = class_domain @num_classes = 1 #puts "predicted: "+predicted_values.inspect @@ -43,23 +43,27 @@ module Lib raise "illegal num confidence values "+num_info if @confidence_values.size != @predicted_values.size @confidence_values.each{ |c| raise "illegal confidence value: '"+c.to_s+"'" unless c==nil or (c.is_a?(Numeric) and c>=0 and c<=1) } - conf_val_tmp = {} - @confidence_values.each{ |c| conf_val_tmp[c] = nil } - if conf_val_tmp.keys.size<2 - LOGGER.warn("prediction w/o confidence values"); - @confidence_values=nil - end + ## check if there is more than one different conf value + ## DEPRECATED? not sure anymore what this was about, + ## I am pretty sure this was for r-plot of roc curves + ## roc curvers are now plotted manually + #conf_val_tmp = {} + #@confidence_values.each{ |c| conf_val_tmp[c] = nil } + #if conf_val_tmp.keys.size<2 + # LOGGER.warn("prediction w/o confidence values"); + # @confidence_values=nil + #end if @is_classification - raise "prediction_feature_values missing while performing classification" unless @prediction_feature_values - @num_classes = @prediction_feature_values.size + raise "class_domain missing while performing classification" unless @class_domain + @num_classes = @class_domain.size raise "num classes < 2" if @num_classes<2 { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+ "has to be either nil or index of predicted-values" if v!=nil and (v<0 or v>@num_classes)} end else - raise "prediction_feature_values != nil while performing regression" if @prediction_feature_values + raise "class_domain != nil while performing regression" if @class_domain { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+ "has to be either nil or number" unless v==nil or v.is_a?(Numeric)} @@ -81,7 +85,7 @@ module Lib if @is_classification @confusion_matrix = [] - @prediction_feature_values.each do |v| + @class_domain.each do |v| @confusion_matrix.push( Array.new( @num_classes, 0 ) ) end @@ -209,31 +213,35 @@ module Lib res = {} (0..@num_classes-1).each do |actual| (0..@num_classes-1).each do |predicted| - res[{:confusion_matrix_actual => @prediction_feature_values[actual], - :confusion_matrix_predicted => @prediction_feature_values[predicted]}] = @confusion_matrix[actual][predicted] + res[{:confusion_matrix_actual => @class_domain[actual], + :confusion_matrix_predicted => @class_domain[predicted]}] = @confusion_matrix[actual][predicted] end end return res end def area_under_roc(class_index=nil) - return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if class_index==nil + return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if + class_index==nil return 0.0 if @confidence_values==nil LOGGER.warn("TODO: implement approx computiation of AUC,"+ - "so far Wilcoxon-Man-Whitney is used (exponential)") if @predicted_values.size>1000 + "so far Wilcoxon-Man-Whitney is used (exponential)") if + @predicted_values.size>1000 + #puts "COMPUTING AUC "+class_index.to_s tp_conf = [] fp_conf = [] (0..@predicted_values.size-1).each do |i| if @predicted_values[i]==class_index - if @actual_values[i]==class_index + if @actual_values[i]==@predicted_values[i] tp_conf.push(@confidence_values[i]) else fp_conf.push(@confidence_values[i]) end end end + #puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n" return 0.0 if tp_conf.size == 0 return 1.0 if fp_conf.size == 0 @@ -241,9 +249,9 @@ module Lib tp_conf.each do |tp| fp_conf.each do |fp| sum += 1 if tp>fp + sum += 0.5 if tp==fp end end - return sum / (tp_conf.size * fp_conf.size).to_f end @@ -460,21 +468,30 @@ module Lib # data for roc-plots ################################################################################### def get_roc_values(class_value) + + #puts "get_roc_values for class_value: "+class_value.to_s raise "no confidence values" if @confidence_values==nil - class_index = @prediction_feature_values.index(class_value) - raise "class not found "+class_value.to_s if class_index==nil and class_value!=nil + raise "no class-value specified" if class_value==nil + + class_index = @class_domain.index(class_value) + raise "class not found "+class_value.to_s if class_index==nil c = []; p = []; a = [] (0..@predicted_values.size-1).each do |i| # NOTE: not predicted instances are ignored here - if (@predicted_values[i]!=nil and (class_value==nil or @predicted_values[i]==class_index)) + if @predicted_values[i]!=nil and @predicted_values[i]==class_index c << @confidence_values[i] p << @predicted_values[i] a << @actual_values[i] end end - return {:predicted_values => p, :actual_values => a, :confidence_values => c} + # DO NOT raise exception here, maybe different validations are concated + #raise "no instance predicted as '"+class_value+"'" if p.size == 0 + + h = {:predicted_values => p, :actual_values => a, :confidence_values => c} + #puts h.inspect + return h end ######################################################################################## @@ -489,7 +506,7 @@ module Lib def predicted_value(instance_index) if @is_classification - @predicted_values[instance_index]==nil ? nil : @prediction_feature_values[@predicted_values[instance_index]] + @predicted_values[instance_index]==nil ? nil : @class_domain[@predicted_values[instance_index]] else @predicted_values[instance_index] end @@ -501,7 +518,7 @@ module Lib def actual_value(instance_index) if @is_classification - @actual_values[instance_index]==nil ? nil : @prediction_feature_values[@actual_values[instance_index]] + @actual_values[instance_index]==nil ? nil : @class_domain[@actual_values[instance_index]] else @actual_values[instance_index] end @@ -535,7 +552,7 @@ module Lib def prediction_feature_value_map(proc) res = {} (0..@num_classes-1).each do |i| - res[@prediction_feature_values[i]] = proc.call(i) + res[@class_domain[i]] = proc.call(i) end return res end diff --git a/lib/validation_db.rb b/lib/validation_db.rb index 6de0bd4..cb3ece7 100644 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -6,7 +6,7 @@ require "lib/merge.rb" module Lib - VAL_PROPS_GENERAL = [ :validation_uri, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature, + VAL_PROPS_GENERAL = [ :validation_uri, :validation_type, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature, :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :date ] VAL_PROPS_SUM = [ :num_instances, :num_without_class, :num_unpredicted ] VAL_PROPS_AVG = [:real_runtime, :percent_without_class, :percent_unpredicted ] @@ -17,9 +17,11 @@ module Lib # :classification_statistics VAL_CLASS_PROPS_SINGLE_SUM = [ :num_correct, :num_incorrect, :confusion_matrix ] - VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect, :weighted_area_under_roc ] + VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect, + :weighted_area_under_roc, :accuracy ] VAL_CLASS_PROPS_SINGLE = VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_SINGLE_AVG + # :class_value_statistics VAL_CLASS_PROPS_PER_CLASS_SUM = [ :num_false_positives, :num_false_negatives, :num_true_positives, :num_true_negatives ] @@ -32,7 +34,6 @@ module Lib :true_negative_rate, :true_positive_rate ] #:precision, :recall, VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS - VAL_CLASS_PROPS_EXTENDED = VAL_CLASS_PROPS + [:accuracy] # :regression_statistics VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square, @@ -41,20 +42,66 @@ module Lib CROSS_VAL_PROPS = [:dataset_uri, :num_folds, :stratified, :random_seed] CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :date] + CROSS_VAL_PROPS - ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS_EXTENDED + VAL_REGR_PROPS + CROSS_VAL_PROPS + ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS + VAL_REGR_PROPS + CROSS_VAL_PROPS VAL_MERGE_GENERAL = VAL_PROPS_GENERAL + VAL_CV_PROPS + [:classification_statistics, :regression_statistics] + CROSS_VAL_PROPS VAL_MERGE_SUM = VAL_PROPS_SUM + VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_PER_CLASS_SUM VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS - + + class Validation < ActiveRecord::Base serialize :classification_statistics serialize :regression_statistics alias_attribute :date, :created_at + + def validation_uri + $sinatra.url_for("/"+self.id.to_s, :full) + end + + def crossvalidation_uri + $sinatra.url_for("/crossvalidation/"+self.crossvalidation_id.to_s, :full) if self.crossvalidation_id + end + + def self.classification_property?( property ) + VAL_CLASS_PROPS.include?( property ) + end + + def self.depends_on_class_value?( property ) + VAL_CLASS_PROPS_PER_CLASS.include?( property ) + end + + def self.complement_exists?( property ) + VAL_CLASS_PROPS_PER_CLASS_COMPLEMENT_EXISTS.include?( property ) + end + end class Crossvalidation < ActiveRecord::Base alias_attribute :date, :created_at + + def crossvalidation_uri + $sinatra.url_for("/crossvalidation/"+self.id.to_s, :full) if self.id + end + + # convenience method to list all crossvalidations that are unique + # in terms of dataset_uri,num_folds,stratified,random_seed + # further conditions can be specified in __conditions__ + def self.find_all_uniq(conditions={}) + cvs = Lib::Crossvalidation.find(:all, :conditions => conditions) + uniq = [] + cvs.each do |cv| + match = false + uniq.each do |cv2| + if cv.dataset_uri == cv2.dataset_uri and cv.num_folds == cv2.num_folds and + cv.stratified == cv2.stratified and cv.random_seed == cv2.random_seed + match = true + break + end + end + uniq << cv unless match + end + uniq + end end end |