diff options
author | mguetlein <martin.guetlein@gmail.com> | 2011-05-17 10:46:45 +0200 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2011-05-17 10:46:45 +0200 |
commit | 9ce03c0f50bb9129b584327d56fa4c9277849227 (patch) | |
tree | 8c0213ec8e3e5ac2ca918ab03a78c6fa99f2fcdc /report | |
parent | eb5f8b5da9b247d62abc8a7b9eb2e44fe46a1c79 (diff) |
crossvalidation statistics fix: compute cv-statistics with cv-predictions instead of averaging cv-validation-statistics
Diffstat (limited to 'report')
-rw-r--r-- | report/plot_factory.rb | 2 | ||||
-rwxr-xr-x | report/report_content.rb | 14 | ||||
-rwxr-xr-x | report/validation_access.rb | 32 | ||||
-rwxr-xr-x | report/validation_data.rb | 39 |
4 files changed, 66 insertions, 21 deletions
diff --git a/report/plot_factory.rb b/report/plot_factory.rb index a4e415a..b7c920a 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -354,7 +354,7 @@ module Reports c = roc_values[:confidence_values] p = roc_values[:predicted_values] a = roc_values[:actual_values] - raise "no prediction values for roc-plot" if p.size==0 + raise "no prediction values for confidence plot" if p.size==0 (0..p.size-2).each do |i| ((i+1)..p.size-1).each do |j| diff --git a/report/report_content.rb b/report/report_content.rb index ca04f25..cc4c13c 100755 --- a/report/report_content.rb +++ b/report/report_content.rb @@ -31,11 +31,11 @@ class Reports::ReportContent level = 0.90 test_matrix = Reports::ReportStatisticalTest.test_matrix( validation_set.validations, group_attribute, test_attribute, "paired_ttest", level ) - puts test_matrix.inspect + #puts test_matrix.inspect titles = test_matrix[:titles] matrix = test_matrix[:matrix] table = [] - puts titles.inspect + #puts titles.inspect table << [""] + titles titles.size.times do |i| table << [titles[i]] + matrix[i].collect{|v| (v==nil || v==0) ? "" : (v<0 ? "-" : "+") } @@ -47,10 +47,10 @@ class Reports::ReportContent end def add_predictions( validation_set, - validation_attributes=[], - section_title="Predictions", - section_text=nil, - table_title="Predictions") + validation_attributes=[], + section_title="Predictions", + section_text=nil, + table_title="Predictions") #PENING raise "validation attributes not implemented in get prediction array" if validation_attributes.size>0 @@ -109,7 +109,7 @@ class Reports::ReportContent if (search_for_existing_report_type) vals.size.times do |i| - puts i + #puts i if (i==0) vals[i] = [ "Reports" ] + vals[i] puts vals[i].inspect diff --git a/report/validation_access.rb b/report/validation_access.rb index e9b6e19..d0c3a1d 100755 --- a/report/validation_access.rb +++ b/report/validation_access.rb @@ -25,7 +25,7 @@ class Reports::ValidationDB raise OpenTox::NotFoundError.new "crossvalidation with id "+cv_id.to_s+" not found" unless cv raise OpenTox::BadRequestError.new("crossvalidation with id '"+cv_id.to_s+"' not finished") unless cv.finished #res += Validation::Validation.find( :all, :conditions => { :crossvalidation_id => cv_id } ).collect{|v| v.validation_uri.to_s} - res += Validation::Validation.find( :crossvalidation_id => cv_id ).collect{|v| v.validation_uri.to_s } + res += Validation::Validation.find( :crossvalidation_id => cv_id, :validation_type => "crossvalidation" ).collect{|v| v.validation_uri.to_s } else res += [u.to_s] end @@ -35,7 +35,7 @@ class Reports::ValidationDB def init_validation(validation, uri, subjectid=nil) - raise OpenTox::BadRequestError.new "not a validation uri: "+uri.to_s unless uri =~ /.*\/[0-9]+/ + raise OpenTox::BadRequestError.new "not a validation uri: "+uri.to_s unless uri =~ /\/[0-9]+$/ validation_id = uri.split("/")[-1] raise OpenTox::BadRequestError.new "invalid validation id "+validation_id.to_s unless validation_id!=nil and (validation_id.to_i > 0 || validation_id.to_s=="0" ) @@ -56,6 +56,31 @@ class Reports::ValidationDB subset_props.each{ |prop| validation.send("#{prop.to_s}=".to_sym, subset[prop]) } if subset end end + + def init_validation_from_cv_statistics( validation, cv_uri, subjectid=nil ) + + raise OpenTox::BadRequestError.new "not a crossvalidation uri: "+cv_uri.to_s unless cv_uri.uri? and cv_uri =~ /crossvalidation.*\/[0-9]+$/ + cv_id = cv_uri.split("/")[-1] + raise OpenTox::NotAuthorizedError.new "Not authorized: GET "+cv_uri.to_s if + AA_SERVER and !OpenTox::Authorization.authorized?(cv_uri,"GET",subjectid) + cv = Validation::Crossvalidation.get(cv_id) + raise OpenTox::NotFoundError.new "crossvalidation with id "+crossvalidation_id.to_s+" not found" unless cv + raise OpenTox::BadRequestError.new "crossvalidation with id "+crossvalidation_id.to_s+" is not finished yet" unless cv.finished + v = Validation::Validation.from_cv_statistics(cv_id, subjectid) + (Validation::VAL_PROPS + Validation::VAL_CV_PROPS).each do |p| + validation.send("#{p.to_s}=".to_sym, v.send(p)) + end + {:classification_statistics => Validation::VAL_CLASS_PROPS, + :regression_statistics => Validation::VAL_REGR_PROPS}.each do |subset_name,subset_props| + subset = v.send(subset_name) + subset_props.each{ |prop| validation.send("#{prop.to_s}=".to_sym, subset[prop]) } if subset + end + #cv props + Validation::CROSS_VAL_PROPS.each do |p| + validation.send("#{p.to_s}=".to_sym, cv.send(p.to_s)) + end + validation.crossvalidation_uri = cv_uri + end def init_cv(validation) @@ -76,7 +101,8 @@ class Reports::ValidationDB def get_accept_values( validation, subjectid=nil ) # PENDING So far, one has to load the whole dataset to get the accept_value from ambit - d = OpenTox::Dataset.find( validation.test_target_dataset_uri, subjectid ) + d = Lib::DatasetCache.find( validation.test_target_dataset_uri, subjectid ) + raise "cannot get test target dataset for accept values, dataset: "+validation.test_target_dataset_uri.to_s unless d accept_values = d.features[validation.prediction_feature][OT.acceptValue] raise "cannot get accept values from dataset "+validation.test_target_dataset_uri.to_s+" for feature "+ validation.prediction_feature+":\n"+d.features[validation.prediction_feature].to_yaml unless accept_values!=nil diff --git a/report/validation_data.rb b/report/validation_data.rb index 42b179b..11fa737 100755 --- a/report/validation_data.rb +++ b/report/validation_data.rb @@ -81,6 +81,12 @@ module Reports @subjectid = subjectid #raise "subjectid is nil" unless subjectid end + + def self.from_cv_statistics( cv_uri, subjectid = nil ) + v = ReportValidation.new(nil, subjectid) + @@validation_access.init_validation_from_cv_statistics(v, cv_uri, subjectid) + v + end # returns/creates predictions, cache to save rest-calls/computation time # @@ -409,17 +415,30 @@ module Reports #compute grouping grouping = Util.group(@validations, equal_attributes) #puts "groups "+grouping.size.to_s - - Lib::MergeObjects.register_merge_attributes( ReportValidation, - Validation::VAL_MERGE_AVG,Validation::VAL_MERGE_SUM,Validation::VAL_MERGE_GENERAL) unless - Lib::MergeObjects.merge_attributes_registered?(ReportValidation) - - #merge - grouping.each do |g| - new_set.validations.push(g[0].clone_validation) - g[1..-1].each do |v| - new_set.validations[-1] = Lib::MergeObjects.merge_objects(new_set.validations[-1],v) + + if ( equal_attributes.include?(:crossvalidation_id) ) + # do not merge, use crossvalidation statistics + raise "statistics vs merging problem" if equal_attributes.size!=1 + grouping.each do |g| + new_set.validations << ReportValidation.from_cv_statistics(g[0].crossvalidation_uri) end + else + #merge + Lib::MergeObjects.register_merge_attributes( ReportValidation, + Validation::VAL_MERGE_AVG,Validation::VAL_MERGE_SUM,Validation::VAL_MERGE_GENERAL) unless + Lib::MergeObjects.merge_attributes_registered?(ReportValidation) + grouping.each do |g| + new_set.validations << g[0].clone_validation + w = 1 + g[1..-1].each do |v| + new_set.validations[-1] = Lib::MergeObjects.merge_objects(new_set.validations[-1],v,w,1) + w+=1 + end + end + end + + new_set.validations.each do |v| + raise "not a validation "+v.class.to_s+" "+v.to_s unless v.is_a?(Reports::ReportValidation) end return new_set |