summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2011-05-06 20:05:04 +0200
committermguetlein <martin.guetlein@gmail.com>2011-05-06 20:05:04 +0200
commit02220bab22c0ea60394b71dfba536284ada17eb8 (patch)
treea949adc0b870c1c98a87f12f283f56803e8c18bc
parent710976325cd0e23297e07c0a2f2460573287a49b (diff)
validation utilizes/requires acceptValue for classification
-rwxr-xr-xlib/ot_predictions.rb23
-rwxr-xr-xlib/predictions.rb24
-rwxr-xr-xreport/report_content.rb14
-rwxr-xr-xreport/validation_access.rb9
-rwxr-xr-xreport/validation_data.rb46
5 files changed, 65 insertions, 51 deletions
diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb
index 5033425..1fd601c 100755
--- a/lib/ot_predictions.rb
+++ b/lib/ot_predictions.rb
@@ -54,13 +54,20 @@ module Lib
@compounds = test_dataset.compounds
LOGGER.debug "test dataset size: "+@compounds.size.to_s
raise "test dataset is empty "+test_dataset_uri.to_s unless @compounds.size>0
- class_values = feature_type=="classification" ? OpenTox::Feature.find(prediction_feature, subjectid).domain : nil
+
+ if feature_type=="classification"
+ accept_values = test_target_dataset.features[prediction_feature][OT.acceptValue]
+ raise "'"+OT.acceptValue.to_s+"' missing/invalid for feature '"+prediction_feature.to_s+"' in dataset '"+
+ test_target_dataset_uri.to_s+"', acceptValues are: '"+accept_values.inspect+"'" if accept_values==nil or accept_values.length<2
+ else
+ accept_values=nil
+ end
actual_values = []
@compounds.each do |c|
case feature_type
when "classification"
- actual_values << classification_value(test_target_dataset, c, prediction_feature, class_values)
+ actual_values << classification_value(test_target_dataset, c, prediction_feature, accept_values)
when "regression"
actual_values << regression_value(test_target_dataset, c, prediction_feature)
end
@@ -108,7 +115,7 @@ module Lib
case feature_type
when "classification"
# TODO: remove LAZAR_PREDICTION_DATASET_HACK
- predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, class_values)
+ predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, accept_values)
when "regression"
predicted_values << regression_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable)
end
@@ -126,7 +133,7 @@ module Lib
end
task.progress(80) if task # loaded predicted values and confidence
- super(predicted_values, actual_values, confidence_values, feature_type, class_values)
+ super(predicted_values, actual_values, confidence_values, feature_type, accept_values)
raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size
task.progress(100) if task # done with the mathmatics
end
@@ -143,11 +150,11 @@ module Lib
end
end
- def classification_value(dataset, compound, feature, class_values)
+ def classification_value(dataset, compound, feature, accept_values)
v = value(dataset, compound, feature)
- i = class_values.index(v)
- raise "illegal class_value of prediction (value is '"+v.to_s+"', class is '"+v.class.to_s+"'), possible values are "+
- class_values.inspect unless v==nil or i!=nil
+ i = accept_values.index(v.to_s)
+ raise "illegal class_value of prediction (value is '"+v.to_s+"'), accept values are "+
+ accept_values.inspect unless v==nil or i!=nil
i
end
diff --git a/lib/predictions.rb b/lib/predictions.rb
index 5850024..db3c60c 100755
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -23,13 +23,13 @@ module Lib
actual_values,
confidence_values,
feature_type,
- class_domain=nil )
+ accept_values=nil )
@predicted_values = predicted_values
@actual_values = actual_values
@confidence_values = confidence_values
@feature_type = feature_type
- @class_domain = class_domain
+ @accept_values = accept_values
@num_classes = 1
#puts "predicted: "+predicted_values.inspect
@@ -58,15 +58,15 @@ module Lib
case @feature_type
when "classification"
- raise "class_domain missing while performing classification" unless @class_domain
- @num_classes = @class_domain.size
+ raise "accept_values missing while performing classification" unless @accept_values
+ @num_classes = @accept_values.size
raise "num classes < 2" if @num_classes<2
{ "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+
"has to be either nil or index of predicted-values" if v!=nil and (!v.is_a?(Numeric) or v<0 or v>@num_classes)}
end
when "regresssion"
- raise "class_domain != nil while performing regression" if @class_domain
+ raise "accept_values != nil while performing regression" if @accept_values
{ "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+
"has to be either nil or number" unless v==nil or v.is_a?(Numeric)}
@@ -89,7 +89,7 @@ module Lib
case @feature_type
when "classification"
@confusion_matrix = []
- @class_domain.each do |v|
+ @accept_values.each do |v|
@confusion_matrix.push( Array.new( @num_classes, 0 ) )
end
@@ -235,8 +235,8 @@ module Lib
res = {}
(0..@num_classes-1).each do |actual|
(0..@num_classes-1).each do |predicted|
- res[{:confusion_matrix_actual => @class_domain[actual],
- :confusion_matrix_predicted => @class_domain[predicted]}] = @confusion_matrix[actual][predicted]
+ res[{:confusion_matrix_actual => @accept_values[actual],
+ :confusion_matrix_predicted => @accept_values[predicted]}] = @confusion_matrix[actual][predicted]
end
end
return res
@@ -495,7 +495,7 @@ module Lib
raise "no confidence values" if @confidence_values==nil
raise "no class-value specified" if class_value==nil
- class_index = @class_domain.index(class_value)
+ class_index = @accept_values.index(class_value)
raise "class not found "+class_value.to_s if class_index==nil
c = []; p = []; a = []
@@ -529,7 +529,7 @@ module Lib
def predicted_value(instance_index)
case @feature_type
when "classification"
- @predicted_values[instance_index]==nil ? nil : @class_domain[@predicted_values[instance_index]]
+ @predicted_values[instance_index]==nil ? nil : @accept_values[@predicted_values[instance_index]]
when "regression"
@predicted_values[instance_index]
end
@@ -542,7 +542,7 @@ module Lib
def actual_value(instance_index)
case @feature_type
when "classification"
- @actual_values[instance_index]==nil ? nil : @class_domain[@actual_values[instance_index]]
+ @actual_values[instance_index]==nil ? nil : @accept_values[@actual_values[instance_index]]
when "regression"
@actual_values[instance_index]
end
@@ -576,7 +576,7 @@ module Lib
def prediction_feature_value_map(proc)
res = {}
(0..@num_classes-1).each do |i|
- res[@class_domain[i]] = proc.call(i)
+ res[@accept_values[i]] = proc.call(i)
end
return res
end
diff --git a/report/report_content.rb b/report/report_content.rb
index 1345e6f..36f9955 100755
--- a/report/report_content.rb
+++ b/report/report_content.rb
@@ -189,9 +189,9 @@ class Reports::ReportContent
end
@xml_report.add_paragraph(section_roc, section_text) if section_text
- class_domain = validation_set.get_class_domain
- class_domain.size.times do |i|
- class_value = class_domain[i]
+ accept_values = validation_set.get_accept_values
+ accept_values.size.times do |i|
+ class_value = accept_values[i]
image_title = image_titles ? image_titles[i] : "ROC Plot for class-value '"+class_value.to_s+"'"
image_caption = image_captions ? image_captions[i] : nil
plot_file_name = "roc_plot"+@tmp_file_count.to_s+".svg"
@@ -236,11 +236,11 @@ class Reports::ReportContent
image_titles=nil,
image_captions=nil)
- class_domain = validation_set.get_domain_for_attr(rank_attribute)
- puts "ranking plot for "+rank_attribute.to_s+", class values: "+class_domain.to_s
+ accept_values = validation_set.get_class_values_for(rank_attribute)
+ puts "ranking plot for "+rank_attribute.to_s+", class values: "+accept_values.to_s
- class_domain.size.times do |i|
- class_value = class_domain[i]
+ accept_values.size.times do |i|
+ class_value = accept_values[i]
if image_titles
image_title = image_titles[i]
else
diff --git a/report/validation_access.rb b/report/validation_access.rb
index 96dfbf3..22c7146 100755
--- a/report/validation_access.rb
+++ b/report/validation_access.rb
@@ -74,8 +74,13 @@ class Reports::ValidationDB
validation.predicted_variable, subjectid, task)
end
- def get_class_domain( validation )
- OpenTox::Feature.new( validation.prediction_feature ).domain
+ def get_accept_values( validation )
+ # PENDING So far, one has to load the whole dataset to get the accept_value from ambit
+ d = OpenTox::Dataset.find( validation.test_target_dataset_uri )
+ accept_values = d.features[validation.prediction_feature][OT.acceptValue]
+ raise "cannot get accept values from dataset "+validation.test_target_dataset_uri.to_s+" for feature "+
+ validation.prediction_feature+":\n"+d.features[validation.prediction_feature].to_yaml unless accept_values!=nil
+ accept_values
end
def feature_type( validation, subjectid=nil )
diff --git a/report/validation_data.rb b/report/validation_data.rb
index 9212c98..fcb8fc0 100755
--- a/report/validation_data.rb
+++ b/report/validation_data.rb
@@ -104,9 +104,9 @@ module Reports
# returns the predictions feature values (i.e. the domain of the class attribute)
#
- def get_class_domain()
- @class_domain = @@validation_access.get_class_domain(self) unless @class_domain
- @class_domain
+ def get_accept_values()
+ @accept_values = @@validation_access.get_accept_values(self) unless @accept_values
+ @accept_values
end
# is classification/regression validation? cache to save rest-calls
@@ -233,7 +233,7 @@ module Reports
# def get_true_prediction_feature_value
# if all_classification?
-# class_values = get_class_domain
+# class_values = get_accept_values
# if class_values.size == 2
# (0..1).each do |i|
# return class_values[i] if (class_values[i].to_s.downcase == "true" || class_values[i].to_s.downcase == "active")
@@ -243,21 +243,23 @@ module Reports
# return nil
# end
- def get_class_domain( )
- return unique_value("get_class_domain")
+ def get_accept_values( )
+ return unique_value("get_accept_values")
end
- def get_domain_for_attr( attribute )
- class_domain = get_class_domain()
- if Validation::Validation.classification_property?(attribute) and
- !Validation::Validation.depends_on_class_value?(attribute)
- [ nil ]
- elsif Validation::Validation.classification_property?(attribute) and
- class_domain.size==2 and
- Validation::Validation.complement_exists?(attribute)
- [ class_domain[0] ]
+ def get_accept_values_for_attr( attribute )
+ if !Validation::Validation.classification_property?(attribute)
+ []
else
- class_domain
+ accept_values = get_accept_values()
+ if !Validation::Validation.depends_on_class_value?(attribute)
+ [ nil ]
+ elsif accept_values.size==2 and
+ Validation::Validation.complement_exists?(attribute)
+ [ accept_values[0] ]
+ else
+ accept_values
+ end
end
end
@@ -298,10 +300,10 @@ module Reports
#puts col_values.inspect
# get domain for classification attribute, i.e. ["true","false"]
- class_domain = get_domain_for_attr(attribute_val)
+ accept_values = get_accept_values_for_attr(attribute_val)
# or the attribute has a complementary value, i.e. true_positive_rate
# -> domain is reduced to one class value
- first_value_elem = (class_domain.size==1 && class_domain[0]!=nil)
+ first_value_elem = (accept_values.size==1 && accept_values[0]!=nil)
cell_values = {}
row_values.each do |row|
@@ -311,7 +313,7 @@ module Reports
if v.send(attribute_row)==row and v.send(attribute_col)==col
raise "two validation have equal row and column values"if val!=nil
val = v.send(attribute_val)
- val = val[class_domain[0]] if first_value_elem
+ val = val[accept_values[0]] if first_value_elem
val = val.to_nice_s
end
end
@@ -357,13 +359,13 @@ module Reports
else
attribute_not_nil[index] = true if remove_nil_attributes
- class_domain = get_domain_for_attr(a)
+ accept_values = get_accept_values_for_attr(a)
# get domain for classification attribute, i.e. ["true","false"]
- if class_domain.size==1 && class_domain[0]!=nil
+ if accept_values.size==1 && accept_values[0]!=nil
# or the attribute has a complementary value, i.e. true_positive_rate
# -> domain is reduced to one class value
raise "illegal state, value for "+a.to_s+" is no hash: '"+val.to_s+"'" unless (val.is_a?(Hash))
- val = val[class_domain[0]]
+ val = val[accept_values[0]]
end
if variance