diff options
author | mguetlein <martin.guetlein@gmail.com> | 2011-05-06 20:05:04 +0200 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2011-05-06 20:05:04 +0200 |
commit | 02220bab22c0ea60394b71dfba536284ada17eb8 (patch) | |
tree | a949adc0b870c1c98a87f12f283f56803e8c18bc | |
parent | 710976325cd0e23297e07c0a2f2460573287a49b (diff) |
validation utilizes/requires acceptValue for classification
-rwxr-xr-x | lib/ot_predictions.rb | 23 | ||||
-rwxr-xr-x | lib/predictions.rb | 24 | ||||
-rwxr-xr-x | report/report_content.rb | 14 | ||||
-rwxr-xr-x | report/validation_access.rb | 9 | ||||
-rwxr-xr-x | report/validation_data.rb | 46 |
5 files changed, 65 insertions, 51 deletions
diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index 5033425..1fd601c 100755 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -54,13 +54,20 @@ module Lib @compounds = test_dataset.compounds LOGGER.debug "test dataset size: "+@compounds.size.to_s raise "test dataset is empty "+test_dataset_uri.to_s unless @compounds.size>0 - class_values = feature_type=="classification" ? OpenTox::Feature.find(prediction_feature, subjectid).domain : nil + + if feature_type=="classification" + accept_values = test_target_dataset.features[prediction_feature][OT.acceptValue] + raise "'"+OT.acceptValue.to_s+"' missing/invalid for feature '"+prediction_feature.to_s+"' in dataset '"+ + test_target_dataset_uri.to_s+"', acceptValues are: '"+accept_values.inspect+"'" if accept_values==nil or accept_values.length<2 + else + accept_values=nil + end actual_values = [] @compounds.each do |c| case feature_type when "classification" - actual_values << classification_value(test_target_dataset, c, prediction_feature, class_values) + actual_values << classification_value(test_target_dataset, c, prediction_feature, accept_values) when "regression" actual_values << regression_value(test_target_dataset, c, prediction_feature) end @@ -108,7 +115,7 @@ module Lib case feature_type when "classification" # TODO: remove LAZAR_PREDICTION_DATASET_HACK - predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, class_values) + predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, accept_values) when "regression" predicted_values << regression_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable) end @@ -126,7 +133,7 @@ module Lib end task.progress(80) if task # loaded predicted values and confidence - super(predicted_values, actual_values, confidence_values, feature_type, class_values) + super(predicted_values, actual_values, confidence_values, feature_type, accept_values) raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size task.progress(100) if task # done with the mathmatics end @@ -143,11 +150,11 @@ module Lib end end - def classification_value(dataset, compound, feature, class_values) + def classification_value(dataset, compound, feature, accept_values) v = value(dataset, compound, feature) - i = class_values.index(v) - raise "illegal class_value of prediction (value is '"+v.to_s+"', class is '"+v.class.to_s+"'), possible values are "+ - class_values.inspect unless v==nil or i!=nil + i = accept_values.index(v.to_s) + raise "illegal class_value of prediction (value is '"+v.to_s+"'), accept values are "+ + accept_values.inspect unless v==nil or i!=nil i end diff --git a/lib/predictions.rb b/lib/predictions.rb index 5850024..db3c60c 100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -23,13 +23,13 @@ module Lib actual_values, confidence_values, feature_type, - class_domain=nil ) + accept_values=nil ) @predicted_values = predicted_values @actual_values = actual_values @confidence_values = confidence_values @feature_type = feature_type - @class_domain = class_domain + @accept_values = accept_values @num_classes = 1 #puts "predicted: "+predicted_values.inspect @@ -58,15 +58,15 @@ module Lib case @feature_type when "classification" - raise "class_domain missing while performing classification" unless @class_domain - @num_classes = @class_domain.size + raise "accept_values missing while performing classification" unless @accept_values + @num_classes = @accept_values.size raise "num classes < 2" if @num_classes<2 { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+ "has to be either nil or index of predicted-values" if v!=nil and (!v.is_a?(Numeric) or v<0 or v>@num_classes)} end when "regresssion" - raise "class_domain != nil while performing regression" if @class_domain + raise "accept_values != nil while performing regression" if @accept_values { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+ "has to be either nil or number" unless v==nil or v.is_a?(Numeric)} @@ -89,7 +89,7 @@ module Lib case @feature_type when "classification" @confusion_matrix = [] - @class_domain.each do |v| + @accept_values.each do |v| @confusion_matrix.push( Array.new( @num_classes, 0 ) ) end @@ -235,8 +235,8 @@ module Lib res = {} (0..@num_classes-1).each do |actual| (0..@num_classes-1).each do |predicted| - res[{:confusion_matrix_actual => @class_domain[actual], - :confusion_matrix_predicted => @class_domain[predicted]}] = @confusion_matrix[actual][predicted] + res[{:confusion_matrix_actual => @accept_values[actual], + :confusion_matrix_predicted => @accept_values[predicted]}] = @confusion_matrix[actual][predicted] end end return res @@ -495,7 +495,7 @@ module Lib raise "no confidence values" if @confidence_values==nil raise "no class-value specified" if class_value==nil - class_index = @class_domain.index(class_value) + class_index = @accept_values.index(class_value) raise "class not found "+class_value.to_s if class_index==nil c = []; p = []; a = [] @@ -529,7 +529,7 @@ module Lib def predicted_value(instance_index) case @feature_type when "classification" - @predicted_values[instance_index]==nil ? nil : @class_domain[@predicted_values[instance_index]] + @predicted_values[instance_index]==nil ? nil : @accept_values[@predicted_values[instance_index]] when "regression" @predicted_values[instance_index] end @@ -542,7 +542,7 @@ module Lib def actual_value(instance_index) case @feature_type when "classification" - @actual_values[instance_index]==nil ? nil : @class_domain[@actual_values[instance_index]] + @actual_values[instance_index]==nil ? nil : @accept_values[@actual_values[instance_index]] when "regression" @actual_values[instance_index] end @@ -576,7 +576,7 @@ module Lib def prediction_feature_value_map(proc) res = {} (0..@num_classes-1).each do |i| - res[@class_domain[i]] = proc.call(i) + res[@accept_values[i]] = proc.call(i) end return res end diff --git a/report/report_content.rb b/report/report_content.rb index 1345e6f..36f9955 100755 --- a/report/report_content.rb +++ b/report/report_content.rb @@ -189,9 +189,9 @@ class Reports::ReportContent end @xml_report.add_paragraph(section_roc, section_text) if section_text - class_domain = validation_set.get_class_domain - class_domain.size.times do |i| - class_value = class_domain[i] + accept_values = validation_set.get_accept_values + accept_values.size.times do |i| + class_value = accept_values[i] image_title = image_titles ? image_titles[i] : "ROC Plot for class-value '"+class_value.to_s+"'" image_caption = image_captions ? image_captions[i] : nil plot_file_name = "roc_plot"+@tmp_file_count.to_s+".svg" @@ -236,11 +236,11 @@ class Reports::ReportContent image_titles=nil, image_captions=nil) - class_domain = validation_set.get_domain_for_attr(rank_attribute) - puts "ranking plot for "+rank_attribute.to_s+", class values: "+class_domain.to_s + accept_values = validation_set.get_class_values_for(rank_attribute) + puts "ranking plot for "+rank_attribute.to_s+", class values: "+accept_values.to_s - class_domain.size.times do |i| - class_value = class_domain[i] + accept_values.size.times do |i| + class_value = accept_values[i] if image_titles image_title = image_titles[i] else diff --git a/report/validation_access.rb b/report/validation_access.rb index 96dfbf3..22c7146 100755 --- a/report/validation_access.rb +++ b/report/validation_access.rb @@ -74,8 +74,13 @@ class Reports::ValidationDB validation.predicted_variable, subjectid, task) end - def get_class_domain( validation ) - OpenTox::Feature.new( validation.prediction_feature ).domain + def get_accept_values( validation ) + # PENDING So far, one has to load the whole dataset to get the accept_value from ambit + d = OpenTox::Dataset.find( validation.test_target_dataset_uri ) + accept_values = d.features[validation.prediction_feature][OT.acceptValue] + raise "cannot get accept values from dataset "+validation.test_target_dataset_uri.to_s+" for feature "+ + validation.prediction_feature+":\n"+d.features[validation.prediction_feature].to_yaml unless accept_values!=nil + accept_values end def feature_type( validation, subjectid=nil ) diff --git a/report/validation_data.rb b/report/validation_data.rb index 9212c98..fcb8fc0 100755 --- a/report/validation_data.rb +++ b/report/validation_data.rb @@ -104,9 +104,9 @@ module Reports # returns the predictions feature values (i.e. the domain of the class attribute) # - def get_class_domain() - @class_domain = @@validation_access.get_class_domain(self) unless @class_domain - @class_domain + def get_accept_values() + @accept_values = @@validation_access.get_accept_values(self) unless @accept_values + @accept_values end # is classification/regression validation? cache to save rest-calls @@ -233,7 +233,7 @@ module Reports # def get_true_prediction_feature_value # if all_classification? -# class_values = get_class_domain +# class_values = get_accept_values # if class_values.size == 2 # (0..1).each do |i| # return class_values[i] if (class_values[i].to_s.downcase == "true" || class_values[i].to_s.downcase == "active") @@ -243,21 +243,23 @@ module Reports # return nil # end - def get_class_domain( ) - return unique_value("get_class_domain") + def get_accept_values( ) + return unique_value("get_accept_values") end - def get_domain_for_attr( attribute ) - class_domain = get_class_domain() - if Validation::Validation.classification_property?(attribute) and - !Validation::Validation.depends_on_class_value?(attribute) - [ nil ] - elsif Validation::Validation.classification_property?(attribute) and - class_domain.size==2 and - Validation::Validation.complement_exists?(attribute) - [ class_domain[0] ] + def get_accept_values_for_attr( attribute ) + if !Validation::Validation.classification_property?(attribute) + [] else - class_domain + accept_values = get_accept_values() + if !Validation::Validation.depends_on_class_value?(attribute) + [ nil ] + elsif accept_values.size==2 and + Validation::Validation.complement_exists?(attribute) + [ accept_values[0] ] + else + accept_values + end end end @@ -298,10 +300,10 @@ module Reports #puts col_values.inspect # get domain for classification attribute, i.e. ["true","false"] - class_domain = get_domain_for_attr(attribute_val) + accept_values = get_accept_values_for_attr(attribute_val) # or the attribute has a complementary value, i.e. true_positive_rate # -> domain is reduced to one class value - first_value_elem = (class_domain.size==1 && class_domain[0]!=nil) + first_value_elem = (accept_values.size==1 && accept_values[0]!=nil) cell_values = {} row_values.each do |row| @@ -311,7 +313,7 @@ module Reports if v.send(attribute_row)==row and v.send(attribute_col)==col raise "two validation have equal row and column values"if val!=nil val = v.send(attribute_val) - val = val[class_domain[0]] if first_value_elem + val = val[accept_values[0]] if first_value_elem val = val.to_nice_s end end @@ -357,13 +359,13 @@ module Reports else attribute_not_nil[index] = true if remove_nil_attributes - class_domain = get_domain_for_attr(a) + accept_values = get_accept_values_for_attr(a) # get domain for classification attribute, i.e. ["true","false"] - if class_domain.size==1 && class_domain[0]!=nil + if accept_values.size==1 && accept_values[0]!=nil # or the attribute has a complementary value, i.e. true_positive_rate # -> domain is reduced to one class value raise "illegal state, value for "+a.to_s+" is no hash: '"+val.to_s+"'" unless (val.is_a?(Hash)) - val = val[class_domain[0]] + val = val[accept_values[0]] end if variance |