summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2010-11-26 16:05:25 +0100
committermguetlein <martin.guetlein@gmail.com>2010-11-26 16:05:25 +0100
commit16a25a63ba7882901a778745d0a32baaafc22cad (patch)
tree9bf89ee176575aa49da36a4bc57e8c09a75fad4a /lib
parenta6d79dffc5f65300f06f3a33451ef26d0fb96f08 (diff)
huge commit, main changes: validation_type and .finished introduced, supporting subtasks, reporting slightly refactored
Diffstat (limited to 'lib')
-rw-r--r--lib/active_record_setup.rb9
-rw-r--r--lib/ot_predictions.rb11
-rw-r--r--lib/predictions.rb67
-rw-r--r--lib/validation_db.rb57
4 files changed, 110 insertions, 34 deletions
diff --git a/lib/active_record_setup.rb b/lib/active_record_setup.rb
index cea4fb2..3682c7a 100644
--- a/lib/active_record_setup.rb
+++ b/lib/active_record_setup.rb
@@ -28,7 +28,14 @@ class ActiveRecord::Base
key = key+"_uri"
unless self.column_names.include?(key)
key = key+"s"
- $sinatra.halt 400,"no attribute found: '"+k.to_s+"'" unless self.column_names.include?(key)
+ unless self.column_names.include?(key)
+ err = "no attribute found: '"+k.to_s+"'"
+ if $sinatra
+ $sinatra.halt 400,err
+ else
+ raise err
+ end
+ end
end
end
end
diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb
index dcd7d09..0175a0c 100644
--- a/lib/ot_predictions.rb
+++ b/lib/ot_predictions.rb
@@ -15,7 +15,8 @@ module Lib
return @compounds[instance_index]
end
- def initialize(is_classification, test_dataset_uri, test_target_dataset_uri, prediction_feature, prediction_dataset_uri, predicted_variable)
+ def initialize(is_classification, test_dataset_uri, test_target_dataset_uri,
+ prediction_feature, prediction_dataset_uri, predicted_variable, task=nil)
LOGGER.debug("loading prediciton via test-dataset:'"+test_dataset_uri.to_s+
"', test-target-datset:'"+test_target_dataset_uri.to_s+
@@ -79,6 +80,7 @@ module Lib
actual_values.push value
end
end
+ task.progress(40) if task # loaded actual values
prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri
raise "prediction dataset not found: '"+prediction_dataset_uri.to_s+"'" unless prediction_dataset
@@ -118,9 +120,11 @@ module Lib
confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable)
end
end
+ task.progress(80) if task # loaded predicted values and confidence
super(predicted_values, actual_values, confidence_values, is_classification, class_values)
raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size
+ task.progress(100) if task # done with the mathmatics
end
@@ -128,7 +132,7 @@ module Lib
res = {}
if @is_classification
- (Lib::VAL_CLASS_PROPS_EXTENDED).each{ |s| res[s] = send(s)}
+ (Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)}
else
(Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) }
end
@@ -151,7 +155,8 @@ module Lib
a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+
URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic
rescue => ex
- a.push("Could not add pic: "+ex.message)
+ #a.push("Could not add pic: "+ex.message)
+ a.push(p.identifier(i))
end
a << (format ? p.actual_value(i).to_nice_s : p.actual_value(i))
diff --git a/lib/predictions.rb b/lib/predictions.rb
index 2873689..6e50e94 100644
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -23,13 +23,13 @@ module Lib
actual_values,
confidence_values,
is_classification,
- prediction_feature_values=nil )
+ class_domain=nil )
@predicted_values = predicted_values
@actual_values = actual_values
@confidence_values = confidence_values
@is_classification = is_classification
- @prediction_feature_values = prediction_feature_values
+ @class_domain = class_domain
@num_classes = 1
#puts "predicted: "+predicted_values.inspect
@@ -43,23 +43,27 @@ module Lib
raise "illegal num confidence values "+num_info if @confidence_values.size != @predicted_values.size
@confidence_values.each{ |c| raise "illegal confidence value: '"+c.to_s+"'" unless c==nil or (c.is_a?(Numeric) and c>=0 and c<=1) }
- conf_val_tmp = {}
- @confidence_values.each{ |c| conf_val_tmp[c] = nil }
- if conf_val_tmp.keys.size<2
- LOGGER.warn("prediction w/o confidence values");
- @confidence_values=nil
- end
+ ## check if there is more than one different conf value
+ ## DEPRECATED? not sure anymore what this was about,
+ ## I am pretty sure this was for r-plot of roc curves
+ ## roc curvers are now plotted manually
+ #conf_val_tmp = {}
+ #@confidence_values.each{ |c| conf_val_tmp[c] = nil }
+ #if conf_val_tmp.keys.size<2
+ # LOGGER.warn("prediction w/o confidence values");
+ # @confidence_values=nil
+ #end
if @is_classification
- raise "prediction_feature_values missing while performing classification" unless @prediction_feature_values
- @num_classes = @prediction_feature_values.size
+ raise "class_domain missing while performing classification" unless @class_domain
+ @num_classes = @class_domain.size
raise "num classes < 2" if @num_classes<2
{ "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+
"has to be either nil or index of predicted-values" if v!=nil and (v<0 or v>@num_classes)}
end
else
- raise "prediction_feature_values != nil while performing regression" if @prediction_feature_values
+ raise "class_domain != nil while performing regression" if @class_domain
{ "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+
"has to be either nil or number" unless v==nil or v.is_a?(Numeric)}
@@ -81,7 +85,7 @@ module Lib
if @is_classification
@confusion_matrix = []
- @prediction_feature_values.each do |v|
+ @class_domain.each do |v|
@confusion_matrix.push( Array.new( @num_classes, 0 ) )
end
@@ -209,31 +213,35 @@ module Lib
res = {}
(0..@num_classes-1).each do |actual|
(0..@num_classes-1).each do |predicted|
- res[{:confusion_matrix_actual => @prediction_feature_values[actual],
- :confusion_matrix_predicted => @prediction_feature_values[predicted]}] = @confusion_matrix[actual][predicted]
+ res[{:confusion_matrix_actual => @class_domain[actual],
+ :confusion_matrix_predicted => @class_domain[predicted]}] = @confusion_matrix[actual][predicted]
end
end
return res
end
def area_under_roc(class_index=nil)
- return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if class_index==nil
+ return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if
+ class_index==nil
return 0.0 if @confidence_values==nil
LOGGER.warn("TODO: implement approx computiation of AUC,"+
- "so far Wilcoxon-Man-Whitney is used (exponential)") if @predicted_values.size>1000
+ "so far Wilcoxon-Man-Whitney is used (exponential)") if
+ @predicted_values.size>1000
+ #puts "COMPUTING AUC "+class_index.to_s
tp_conf = []
fp_conf = []
(0..@predicted_values.size-1).each do |i|
if @predicted_values[i]==class_index
- if @actual_values[i]==class_index
+ if @actual_values[i]==@predicted_values[i]
tp_conf.push(@confidence_values[i])
else
fp_conf.push(@confidence_values[i])
end
end
end
+ #puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n"
return 0.0 if tp_conf.size == 0
return 1.0 if fp_conf.size == 0
@@ -241,9 +249,9 @@ module Lib
tp_conf.each do |tp|
fp_conf.each do |fp|
sum += 1 if tp>fp
+ sum += 0.5 if tp==fp
end
end
-
return sum / (tp_conf.size * fp_conf.size).to_f
end
@@ -460,21 +468,30 @@ module Lib
# data for roc-plots ###################################################################################
def get_roc_values(class_value)
+
+ #puts "get_roc_values for class_value: "+class_value.to_s
raise "no confidence values" if @confidence_values==nil
- class_index = @prediction_feature_values.index(class_value)
- raise "class not found "+class_value.to_s if class_index==nil and class_value!=nil
+ raise "no class-value specified" if class_value==nil
+
+ class_index = @class_domain.index(class_value)
+ raise "class not found "+class_value.to_s if class_index==nil
c = []; p = []; a = []
(0..@predicted_values.size-1).each do |i|
# NOTE: not predicted instances are ignored here
- if (@predicted_values[i]!=nil and (class_value==nil or @predicted_values[i]==class_index))
+ if @predicted_values[i]!=nil and @predicted_values[i]==class_index
c << @confidence_values[i]
p << @predicted_values[i]
a << @actual_values[i]
end
end
- return {:predicted_values => p, :actual_values => a, :confidence_values => c}
+ # DO NOT raise exception here, maybe different validations are concated
+ #raise "no instance predicted as '"+class_value+"'" if p.size == 0
+
+ h = {:predicted_values => p, :actual_values => a, :confidence_values => c}
+ #puts h.inspect
+ return h
end
########################################################################################
@@ -489,7 +506,7 @@ module Lib
def predicted_value(instance_index)
if @is_classification
- @predicted_values[instance_index]==nil ? nil : @prediction_feature_values[@predicted_values[instance_index]]
+ @predicted_values[instance_index]==nil ? nil : @class_domain[@predicted_values[instance_index]]
else
@predicted_values[instance_index]
end
@@ -501,7 +518,7 @@ module Lib
def actual_value(instance_index)
if @is_classification
- @actual_values[instance_index]==nil ? nil : @prediction_feature_values[@actual_values[instance_index]]
+ @actual_values[instance_index]==nil ? nil : @class_domain[@actual_values[instance_index]]
else
@actual_values[instance_index]
end
@@ -535,7 +552,7 @@ module Lib
def prediction_feature_value_map(proc)
res = {}
(0..@num_classes-1).each do |i|
- res[@prediction_feature_values[i]] = proc.call(i)
+ res[@class_domain[i]] = proc.call(i)
end
return res
end
diff --git a/lib/validation_db.rb b/lib/validation_db.rb
index 6de0bd4..cb3ece7 100644
--- a/lib/validation_db.rb
+++ b/lib/validation_db.rb
@@ -6,7 +6,7 @@ require "lib/merge.rb"
module Lib
- VAL_PROPS_GENERAL = [ :validation_uri, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature,
+ VAL_PROPS_GENERAL = [ :validation_uri, :validation_type, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature,
:test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :date ]
VAL_PROPS_SUM = [ :num_instances, :num_without_class, :num_unpredicted ]
VAL_PROPS_AVG = [:real_runtime, :percent_without_class, :percent_unpredicted ]
@@ -17,9 +17,11 @@ module Lib
# :classification_statistics
VAL_CLASS_PROPS_SINGLE_SUM = [ :num_correct, :num_incorrect, :confusion_matrix ]
- VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect, :weighted_area_under_roc ]
+ VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect,
+ :weighted_area_under_roc, :accuracy ]
VAL_CLASS_PROPS_SINGLE = VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_SINGLE_AVG
+
# :class_value_statistics
VAL_CLASS_PROPS_PER_CLASS_SUM = [ :num_false_positives, :num_false_negatives,
:num_true_positives, :num_true_negatives ]
@@ -32,7 +34,6 @@ module Lib
:true_negative_rate, :true_positive_rate ] #:precision, :recall,
VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS
- VAL_CLASS_PROPS_EXTENDED = VAL_CLASS_PROPS + [:accuracy]
# :regression_statistics
VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square,
@@ -41,20 +42,66 @@ module Lib
CROSS_VAL_PROPS = [:dataset_uri, :num_folds, :stratified, :random_seed]
CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :date] + CROSS_VAL_PROPS
- ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS_EXTENDED + VAL_REGR_PROPS + CROSS_VAL_PROPS
+ ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS + VAL_REGR_PROPS + CROSS_VAL_PROPS
VAL_MERGE_GENERAL = VAL_PROPS_GENERAL + VAL_CV_PROPS + [:classification_statistics, :regression_statistics] + CROSS_VAL_PROPS
VAL_MERGE_SUM = VAL_PROPS_SUM + VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_PER_CLASS_SUM
VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS
-
+
+
class Validation < ActiveRecord::Base
serialize :classification_statistics
serialize :regression_statistics
alias_attribute :date, :created_at
+
+ def validation_uri
+ $sinatra.url_for("/"+self.id.to_s, :full)
+ end
+
+ def crossvalidation_uri
+ $sinatra.url_for("/crossvalidation/"+self.crossvalidation_id.to_s, :full) if self.crossvalidation_id
+ end
+
+ def self.classification_property?( property )
+ VAL_CLASS_PROPS.include?( property )
+ end
+
+ def self.depends_on_class_value?( property )
+ VAL_CLASS_PROPS_PER_CLASS.include?( property )
+ end
+
+ def self.complement_exists?( property )
+ VAL_CLASS_PROPS_PER_CLASS_COMPLEMENT_EXISTS.include?( property )
+ end
+
end
class Crossvalidation < ActiveRecord::Base
alias_attribute :date, :created_at
+
+ def crossvalidation_uri
+ $sinatra.url_for("/crossvalidation/"+self.id.to_s, :full) if self.id
+ end
+
+ # convenience method to list all crossvalidations that are unique
+ # in terms of dataset_uri,num_folds,stratified,random_seed
+ # further conditions can be specified in __conditions__
+ def self.find_all_uniq(conditions={})
+ cvs = Lib::Crossvalidation.find(:all, :conditions => conditions)
+ uniq = []
+ cvs.each do |cv|
+ match = false
+ uniq.each do |cv2|
+ if cv.dataset_uri == cv2.dataset_uri and cv.num_folds == cv2.num_folds and
+ cv.stratified == cv2.stratified and cv.random_seed == cv2.random_seed
+ match = true
+ break
+ end
+ end
+ uniq << cv unless match
+ end
+ uniq
+ end
end
end