diff options
author | mguetlein <martin.guetlein@gmail.com> | 2012-06-09 10:36:27 +0200 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2012-06-09 10:36:27 +0200 |
commit | ee0eb28550ada392b7903a49d650f40c695e8612 (patch) | |
tree | 4bfff63a3bcd48a12312cbbc960f11788a904d51 /report | |
parent | 14ade1644b69da6229ef6f06f83fc32b2d1957ce (diff) |
new branch val_exp
Diffstat (limited to 'report')
-rw-r--r-- | report/plot_factory.rb | 6 | ||||
-rwxr-xr-x | report/report_content.rb | 22 | ||||
-rwxr-xr-x | report/report_factory.rb | 23 | ||||
-rw-r--r-- | report/statistical_test.rb | 14 | ||||
-rwxr-xr-x | report/validation_access.rb | 49 | ||||
-rwxr-xr-x | report/validation_data.rb | 1 |
6 files changed, 79 insertions, 36 deletions
diff --git a/report/plot_factory.rb b/report/plot_factory.rb index 6e90dbc..61c3eea 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -246,7 +246,11 @@ module Reports data[v.send(title_attribute).to_s] << value end - Reports::r_util.boxplot( out_files, data) + data_array = [] + data.each do |k,v| + data_array << [k, v] + end + Reports::r_util.boxplot( out_files, data_array.sort) end def self.create_bar_plot( out_files, validation_set, title_attribute, value_attributes ) diff --git a/report/report_content.rb b/report/report_content.rb index 033b367..03adc30 100755 --- a/report/report_content.rb +++ b/report/report_content.rb @@ -28,11 +28,11 @@ class Reports::ReportContent end_section() end - def add_paired_ttest_tables( validation_set, + def add_ttest_tables( validation_set, group_attribute, test_attributes, ttest_level = 0.9, - section_title = "Paired t-test", + section_title = "T-Test", section_text = nil) raise "no test_attributes given: "+test_attributes.inspect unless test_attributes.is_a?(Array) and test_attributes.size>0 @@ -46,7 +46,7 @@ class Reports::ReportContent accept_values.each do |accept_value| test_matrix = Reports::ReportStatisticalTest.test_matrix( validation_set.validations, - group_attribute, test_attribute, accept_value, "paired_ttest", ttest_level ) + group_attribute, test_attribute, accept_value, "ttest", ttest_level ) #puts test_matrix.inspect titles = test_matrix[:titles] matrix = test_matrix[:matrix] @@ -356,7 +356,7 @@ class Reports::ReportContent @xml_report.add_paragraph(section_box, section_text) if section_text plot_png = nil; plot_svg = nil - begin + #begin plot_input = [] value_attributes.each do |a| accept = validation_set.get_accept_values_for_attr(a) @@ -388,13 +388,13 @@ class Reports::ReportContent @xml_report.add_imagefigures_in_row(section_box,f,"Boxplots #{i}") i+=1 end - rescue Exception => ex - msg = "WARNING could not create box plot: "+ex.message - LOGGER.error(msg) - rm_tmp_file(plot_png[:name]) if plot_png - rm_tmp_file(plot_svg[:name]) if plot_svg - @xml_report.add_paragraph(section_box, msg) - end + #rescue Exception => ex + # msg = "WARNING could not create box plot: "+ex.message + # LOGGER.error(msg) + # rm_tmp_file(plot_png[:name]) if plot_png + # rm_tmp_file(plot_svg[:name]) if plot_svg + # @xml_report.add_paragraph(section_box, msg) + #end end private diff --git a/report/report_factory.rb b/report/report_factory.rb index f73ffd9..b67fbf1 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -14,7 +14,7 @@ VAL_ATTR_REGR = [ :num_instances, :num_unpredicted, :root_mean_squared_error, #VAL_ATTR_BOX_PLOT_CLASS = [ :accuracy, :average_area_under_roc, # :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ] VAL_ATTR_BOX_PLOT_CLASS = [ :accuracy, :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate, :positive_predictive_value, :negative_predictive_value ] -VAL_ATTR_BOX_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ] +VAL_ATTR_BOX_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square, :concordance_correlation_coefficient ] VAL_ATTR_TTEST_REGR = [ :r_square, :root_mean_squared_error ] VAL_ATTR_TTEST_CLASS = [ :accuracy, :average_area_under_roc ] @@ -61,7 +61,7 @@ module Reports::ReportFactory i = 0 task_step = 100 / validation_set.size.to_f validation_set.validations.each do |v| - v.get_predictions( OpenTox::SubTask.create(task, i*task_step, (i+1)*task_step ) ) + v.get_predictions( OpenTox::SubTask.create(task, i*task_step, [(i+1)*task_step,100].min ) ) i += 1 end end @@ -299,7 +299,7 @@ module Reports::ReportFactory report.add_result(merged,result_attributes,res_titel,res_titel,res_text) # pending: regression stats have different scales!!! report.add_box_plot(set, :identifier, box_plot_attributes) - report.add_paired_ttest_tables(set, :identifier, ttest_attributes, ttest_significance) if ttest_significance>0 + report.add_ttest_tables(set, :identifier, ttest_attributes, ttest_significance) if ttest_significance>0 report.end_section end task.progress(100) if task @@ -314,7 +314,7 @@ module Reports::ReportFactory validation_set.get_values(:identifier).inspect) if validation_set.num_different_values(:identifier)<2 #validation_set.load_cv_attributes - pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) ) + #pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) ) report = Reports::ReportContent.new("Method comparison report") add_filter_warning(report, validation_set.filter_params) if validation_set.filter_params!=nil @@ -322,24 +322,33 @@ module Reports::ReportFactory case validation_set.unique_feature_type when "classification" result_attributes += VAL_ATTR_CLASS + ttest_attributes = VAL_ATTR_TTEST_CLASS box_plot_attributes = VAL_ATTR_BOX_PLOT_CLASS else result_attributes += VAL_ATTR_REGR + ttest_attributes = VAL_ATTR_TTEST_REGR box_plot_attributes = VAL_ATTR_BOX_PLOT_REGR end merged = validation_set.merge([:identifier]) merged.sort(:identifier) - merged.validations.each do |v| v.validation_uri = v.validation_uri.split(";").uniq.join(" ") v.validation_report_uri = v.validation_report_uri.split(";").uniq.join(" ") if v.validation_report_uri end - msg = merged.validations.collect{|v| v.identifier+" ("+Lib::MergeObjects.merge_count(v).to_s+"x)"}.join(", ") report.add_result(merged,result_attributes,"Average Results","Results",msg) - report.add_box_plot(validation_set, :identifier, box_plot_attributes) + if params[:ttest_attributes] and params[:ttest_attributes].chomp.size>0 + ttest_attributes = params[:ttest_attributes].split(",").collect{|a| a.to_sym} + end + ttest_significance = 0.9 + if params[:ttest_significance] + ttest_significance = params[:ttest_significance].to_f + end + #report.add_ttest_tables(validation_set, :identifier, ttest_attributes, ttest_significance) if ttest_significance>0 + #report.add_ttest_tables(validation_set, :identifier, ttest_attributes, 0.75) if ttest_significance>0 + #report.add_ttest_tables(validation_set, :identifier, ttest_attributes, 0.5) if ttest_significance>0 report end diff --git a/report/statistical_test.rb b/report/statistical_test.rb index da46f6b..fd4b810 100644 --- a/report/statistical_test.rb +++ b/report/statistical_test.rb @@ -6,7 +6,7 @@ module Reports class ReportStatisticalTest # __grouped_validations__ : array of validation arrays - def self.test_matrix( validations, group_attribute, test_attribute, class_value, test_method="paired_ttest", significance_level=0.95 ) + def self.test_matrix( validations, group_attribute, test_attribute, class_value, test_method="ttest", significance_level=0.95 ) raise "statistical-test: '"+test_method+"' does not exist" unless ReportStatisticalTest.respond_to?(test_method) grouped_validations = Reports::Util.group(validations, [group_attribute]) @@ -35,12 +35,20 @@ module Reports {:titles => titles, :matrix => matrix, :num_results => grouped_validations[0].size} end - def self.paired_ttest( validations1, validations2, attribute, class_value, significance_level=0.95 ) + def self.ttest( validations1, validations2, attribute, class_value, significance_level=0.95 ) array1 = validations1.collect{ |v| (v.send(attribute).is_a?(Hash) ? v.send(attribute)[class_value].to_f : v.send(attribute).to_f) } array2 = validations2.collect{ |v| (v.send(attribute).is_a?(Hash) ? v.send(attribute)[class_value].to_f : v.send(attribute).to_f) } LOGGER.debug "paired-t-testing "+attribute.to_s+" "+array1.inspect+" vs "+array2.inspect - Reports::r_util.paired_ttest(array1, array2, significance_level) + if array1.size>1 && array2.size>1 + Reports::r_util.paired_ttest(array1, array2, significance_level) + elsif array1.size==1 && array2.size>1 + -1 * Reports::r_util.ttest(array2, array1[0], significance_level) + elsif array1.size>1 && array2.size==1 + Reports::r_util.ttest(array1, array2[0], significance_level) + else + raise "illegal input for ttest" + end end end diff --git a/report/validation_access.rb b/report/validation_access.rb index e2a3978..4d9ed9f 100755 --- a/report/validation_access.rb +++ b/report/validation_access.rb @@ -195,30 +195,50 @@ class Reports::ValidationDB Lib::OTPredictions.new( data.data, data.compounds ) end + @@accept_values = {} + def get_accept_values( validation, subjectid=nil ) - # PENDING So far, one has to load the whole dataset to get the accept_value from ambit - test_target_datasets = validation.test_target_dataset_uri - test_target_datasets = validation.test_dataset_uri unless test_target_datasets - res = nil - test_target_datasets.split(";").each do |test_target_dataset| - d = Lib::DatasetCache.find( test_target_dataset, subjectid ) - raise "cannot get test target dataset for accept values, dataset: "+test_target_dataset.to_s unless d - accept_values = d.accept_values(validation.prediction_feature) - raise "cannot get accept values from dataset "+test_target_dataset.to_s+" for feature "+ - validation.prediction_feature+":\n"+d.features[validation.prediction_feature].to_yaml unless accept_values!=nil - raise "different accept values" if res && res!=accept_values - res = accept_values + begin + return @@accept_values[validation.prediction_feature] if @@accept_values[validation.prediction_feature] + LOGGER.debug "get accept values ..." + pred = OpenTox::Feature.find(validation.prediction_feature) + accept = pred.metadata[OT.acceptValue] + accept = accept[0] if accept.is_a?(Array) and accept.size==1 and accept[0].is_a?(Array) + raise unless accept.is_a?(Array) and accept.size>1 + @@accept_values[validation.prediction_feature] = accept + LOGGER.debug "get accept values ... #{accept} #{accept.size}" + accept + rescue + # PENDING So far, one has to load the whole dataset to get the accept_value from ambit + test_target_datasets = validation.test_target_dataset_uri + test_target_datasets = validation.test_dataset_uri unless test_target_datasets + res = nil + test_target_datasets.split(";").each do |test_target_dataset| + d = Lib::DatasetCache.find( test_target_dataset, subjectid ) + raise "cannot get test target dataset for accept values, dataset: "+test_target_dataset.to_s unless d + accept_values = d.accept_values(validation.prediction_feature) + raise "cannot get accept values from dataset "+test_target_dataset.to_s+" for feature "+ + validation.prediction_feature+":\n"+d.features[validation.prediction_feature].to_yaml unless accept_values!=nil + raise "different accept values" if res && res!=accept_values + res = accept_values + end + res end - res end def feature_type( validation, subjectid=nil ) - OpenTox::Model::Generic.new(validation.model_uri).feature_type(subjectid) + if validation.model_uri.include?(";") + model_uri = validation.model_uri.split(";")[0] + else + model_uri = validation.model_uri + end + OpenTox::Model::Generic.new(model_uri).feature_type(subjectid) #get_model(validation).classification? end def predicted_variable(validation, subjectid=nil) raise "cannot derive model depended props for merged validations" if Lib::MergeObjects.merged?(validation) + raise "multiple models in this validation, cannot get one predicted variable (#{validation.model_uri})" if validation.model_uri.include?(";") model = OpenTox::Model::Generic.find(validation.model_uri, subjectid) raise OpenTox::NotFoundError.new "model not found '"+validation.model_uri+"'" unless model model.predicted_variable(subjectid) @@ -226,6 +246,7 @@ class Reports::ValidationDB def predicted_confidence(validation, subjectid=nil) raise "cannot derive model depended props for merged validations" if Lib::MergeObjects.merged?(validation) + raise "multiple models in this validation, cannot get one predicted confidence (#{validation.model_uri})" if validation.model_uri.include?(";") model = OpenTox::Model::Generic.find(validation.model_uri, subjectid) raise OpenTox::NotFoundError.new "model not found '"+validation.model_uri+"'" unless model model.predicted_confidence(subjectid) diff --git a/report/validation_data.rb b/report/validation_data.rb index 3806fd7..95636fc 100755 --- a/report/validation_data.rb +++ b/report/validation_data.rb @@ -396,6 +396,7 @@ module Reports @validations.each do |v| index = -1 array.push(attributes.collect do |a| + index += 1 if VAL_ATTR_VARIANCE.index(a) variance = v.send( (a.to_s+"_variance").to_sym ) |