summaryrefslogtreecommitdiff
path: root/report
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2012-06-09 10:36:27 +0200
committermguetlein <martin.guetlein@gmail.com>2012-06-09 10:36:27 +0200
commitee0eb28550ada392b7903a49d650f40c695e8612 (patch)
tree4bfff63a3bcd48a12312cbbc960f11788a904d51 /report
parent14ade1644b69da6229ef6f06f83fc32b2d1957ce (diff)
new branch val_exp
Diffstat (limited to 'report')
-rw-r--r--report/plot_factory.rb6
-rwxr-xr-xreport/report_content.rb22
-rwxr-xr-xreport/report_factory.rb23
-rw-r--r--report/statistical_test.rb14
-rwxr-xr-xreport/validation_access.rb49
-rwxr-xr-xreport/validation_data.rb1
6 files changed, 79 insertions, 36 deletions
diff --git a/report/plot_factory.rb b/report/plot_factory.rb
index 6e90dbc..61c3eea 100644
--- a/report/plot_factory.rb
+++ b/report/plot_factory.rb
@@ -246,7 +246,11 @@ module Reports
data[v.send(title_attribute).to_s] << value
end
- Reports::r_util.boxplot( out_files, data)
+ data_array = []
+ data.each do |k,v|
+ data_array << [k, v]
+ end
+ Reports::r_util.boxplot( out_files, data_array.sort)
end
def self.create_bar_plot( out_files, validation_set, title_attribute, value_attributes )
diff --git a/report/report_content.rb b/report/report_content.rb
index 033b367..03adc30 100755
--- a/report/report_content.rb
+++ b/report/report_content.rb
@@ -28,11 +28,11 @@ class Reports::ReportContent
end_section()
end
- def add_paired_ttest_tables( validation_set,
+ def add_ttest_tables( validation_set,
group_attribute,
test_attributes,
ttest_level = 0.9,
- section_title = "Paired t-test",
+ section_title = "T-Test",
section_text = nil)
raise "no test_attributes given: "+test_attributes.inspect unless test_attributes.is_a?(Array) and test_attributes.size>0
@@ -46,7 +46,7 @@ class Reports::ReportContent
accept_values.each do |accept_value|
test_matrix = Reports::ReportStatisticalTest.test_matrix( validation_set.validations,
- group_attribute, test_attribute, accept_value, "paired_ttest", ttest_level )
+ group_attribute, test_attribute, accept_value, "ttest", ttest_level )
#puts test_matrix.inspect
titles = test_matrix[:titles]
matrix = test_matrix[:matrix]
@@ -356,7 +356,7 @@ class Reports::ReportContent
@xml_report.add_paragraph(section_box, section_text) if section_text
plot_png = nil; plot_svg = nil
- begin
+ #begin
plot_input = []
value_attributes.each do |a|
accept = validation_set.get_accept_values_for_attr(a)
@@ -388,13 +388,13 @@ class Reports::ReportContent
@xml_report.add_imagefigures_in_row(section_box,f,"Boxplots #{i}")
i+=1
end
- rescue Exception => ex
- msg = "WARNING could not create box plot: "+ex.message
- LOGGER.error(msg)
- rm_tmp_file(plot_png[:name]) if plot_png
- rm_tmp_file(plot_svg[:name]) if plot_svg
- @xml_report.add_paragraph(section_box, msg)
- end
+ #rescue Exception => ex
+ # msg = "WARNING could not create box plot: "+ex.message
+ # LOGGER.error(msg)
+ # rm_tmp_file(plot_png[:name]) if plot_png
+ # rm_tmp_file(plot_svg[:name]) if plot_svg
+ # @xml_report.add_paragraph(section_box, msg)
+ #end
end
private
diff --git a/report/report_factory.rb b/report/report_factory.rb
index f73ffd9..b67fbf1 100755
--- a/report/report_factory.rb
+++ b/report/report_factory.rb
@@ -14,7 +14,7 @@ VAL_ATTR_REGR = [ :num_instances, :num_unpredicted, :root_mean_squared_error,
#VAL_ATTR_BOX_PLOT_CLASS = [ :accuracy, :average_area_under_roc,
# :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ]
VAL_ATTR_BOX_PLOT_CLASS = [ :accuracy, :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate, :positive_predictive_value, :negative_predictive_value ]
-VAL_ATTR_BOX_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ]
+VAL_ATTR_BOX_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square, :concordance_correlation_coefficient ]
VAL_ATTR_TTEST_REGR = [ :r_square, :root_mean_squared_error ]
VAL_ATTR_TTEST_CLASS = [ :accuracy, :average_area_under_roc ]
@@ -61,7 +61,7 @@ module Reports::ReportFactory
i = 0
task_step = 100 / validation_set.size.to_f
validation_set.validations.each do |v|
- v.get_predictions( OpenTox::SubTask.create(task, i*task_step, (i+1)*task_step ) )
+ v.get_predictions( OpenTox::SubTask.create(task, i*task_step, [(i+1)*task_step,100].min ) )
i += 1
end
end
@@ -299,7 +299,7 @@ module Reports::ReportFactory
report.add_result(merged,result_attributes,res_titel,res_titel,res_text)
# pending: regression stats have different scales!!!
report.add_box_plot(set, :identifier, box_plot_attributes)
- report.add_paired_ttest_tables(set, :identifier, ttest_attributes, ttest_significance) if ttest_significance>0
+ report.add_ttest_tables(set, :identifier, ttest_attributes, ttest_significance) if ttest_significance>0
report.end_section
end
task.progress(100) if task
@@ -314,7 +314,7 @@ module Reports::ReportFactory
validation_set.get_values(:identifier).inspect) if validation_set.num_different_values(:identifier)<2
#validation_set.load_cv_attributes
- pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) )
+ #pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) )
report = Reports::ReportContent.new("Method comparison report")
add_filter_warning(report, validation_set.filter_params) if validation_set.filter_params!=nil
@@ -322,24 +322,33 @@ module Reports::ReportFactory
case validation_set.unique_feature_type
when "classification"
result_attributes += VAL_ATTR_CLASS
+ ttest_attributes = VAL_ATTR_TTEST_CLASS
box_plot_attributes = VAL_ATTR_BOX_PLOT_CLASS
else
result_attributes += VAL_ATTR_REGR
+ ttest_attributes = VAL_ATTR_TTEST_REGR
box_plot_attributes = VAL_ATTR_BOX_PLOT_REGR
end
merged = validation_set.merge([:identifier])
merged.sort(:identifier)
-
merged.validations.each do |v|
v.validation_uri = v.validation_uri.split(";").uniq.join(" ")
v.validation_report_uri = v.validation_report_uri.split(";").uniq.join(" ") if v.validation_report_uri
end
-
msg = merged.validations.collect{|v| v.identifier+" ("+Lib::MergeObjects.merge_count(v).to_s+"x)"}.join(", ")
report.add_result(merged,result_attributes,"Average Results","Results",msg)
-
report.add_box_plot(validation_set, :identifier, box_plot_attributes)
+ if params[:ttest_attributes] and params[:ttest_attributes].chomp.size>0
+ ttest_attributes = params[:ttest_attributes].split(",").collect{|a| a.to_sym}
+ end
+ ttest_significance = 0.9
+ if params[:ttest_significance]
+ ttest_significance = params[:ttest_significance].to_f
+ end
+ #report.add_ttest_tables(validation_set, :identifier, ttest_attributes, ttest_significance) if ttest_significance>0
+ #report.add_ttest_tables(validation_set, :identifier, ttest_attributes, 0.75) if ttest_significance>0
+ #report.add_ttest_tables(validation_set, :identifier, ttest_attributes, 0.5) if ttest_significance>0
report
end
diff --git a/report/statistical_test.rb b/report/statistical_test.rb
index da46f6b..fd4b810 100644
--- a/report/statistical_test.rb
+++ b/report/statistical_test.rb
@@ -6,7 +6,7 @@ module Reports
class ReportStatisticalTest
# __grouped_validations__ : array of validation arrays
- def self.test_matrix( validations, group_attribute, test_attribute, class_value, test_method="paired_ttest", significance_level=0.95 )
+ def self.test_matrix( validations, group_attribute, test_attribute, class_value, test_method="ttest", significance_level=0.95 )
raise "statistical-test: '"+test_method+"' does not exist" unless ReportStatisticalTest.respond_to?(test_method)
grouped_validations = Reports::Util.group(validations, [group_attribute])
@@ -35,12 +35,20 @@ module Reports
{:titles => titles, :matrix => matrix, :num_results => grouped_validations[0].size}
end
- def self.paired_ttest( validations1, validations2, attribute, class_value, significance_level=0.95 )
+ def self.ttest( validations1, validations2, attribute, class_value, significance_level=0.95 )
array1 = validations1.collect{ |v| (v.send(attribute).is_a?(Hash) ? v.send(attribute)[class_value].to_f : v.send(attribute).to_f) }
array2 = validations2.collect{ |v| (v.send(attribute).is_a?(Hash) ? v.send(attribute)[class_value].to_f : v.send(attribute).to_f) }
LOGGER.debug "paired-t-testing "+attribute.to_s+" "+array1.inspect+" vs "+array2.inspect
- Reports::r_util.paired_ttest(array1, array2, significance_level)
+ if array1.size>1 && array2.size>1
+ Reports::r_util.paired_ttest(array1, array2, significance_level)
+ elsif array1.size==1 && array2.size>1
+ -1 * Reports::r_util.ttest(array2, array1[0], significance_level)
+ elsif array1.size>1 && array2.size==1
+ Reports::r_util.ttest(array1, array2[0], significance_level)
+ else
+ raise "illegal input for ttest"
+ end
end
end
diff --git a/report/validation_access.rb b/report/validation_access.rb
index e2a3978..4d9ed9f 100755
--- a/report/validation_access.rb
+++ b/report/validation_access.rb
@@ -195,30 +195,50 @@ class Reports::ValidationDB
Lib::OTPredictions.new( data.data, data.compounds )
end
+ @@accept_values = {}
+
def get_accept_values( validation, subjectid=nil )
- # PENDING So far, one has to load the whole dataset to get the accept_value from ambit
- test_target_datasets = validation.test_target_dataset_uri
- test_target_datasets = validation.test_dataset_uri unless test_target_datasets
- res = nil
- test_target_datasets.split(";").each do |test_target_dataset|
- d = Lib::DatasetCache.find( test_target_dataset, subjectid )
- raise "cannot get test target dataset for accept values, dataset: "+test_target_dataset.to_s unless d
- accept_values = d.accept_values(validation.prediction_feature)
- raise "cannot get accept values from dataset "+test_target_dataset.to_s+" for feature "+
- validation.prediction_feature+":\n"+d.features[validation.prediction_feature].to_yaml unless accept_values!=nil
- raise "different accept values" if res && res!=accept_values
- res = accept_values
+ begin
+ return @@accept_values[validation.prediction_feature] if @@accept_values[validation.prediction_feature]
+ LOGGER.debug "get accept values ..."
+ pred = OpenTox::Feature.find(validation.prediction_feature)
+ accept = pred.metadata[OT.acceptValue]
+ accept = accept[0] if accept.is_a?(Array) and accept.size==1 and accept[0].is_a?(Array)
+ raise unless accept.is_a?(Array) and accept.size>1
+ @@accept_values[validation.prediction_feature] = accept
+ LOGGER.debug "get accept values ... #{accept} #{accept.size}"
+ accept
+ rescue
+ # PENDING So far, one has to load the whole dataset to get the accept_value from ambit
+ test_target_datasets = validation.test_target_dataset_uri
+ test_target_datasets = validation.test_dataset_uri unless test_target_datasets
+ res = nil
+ test_target_datasets.split(";").each do |test_target_dataset|
+ d = Lib::DatasetCache.find( test_target_dataset, subjectid )
+ raise "cannot get test target dataset for accept values, dataset: "+test_target_dataset.to_s unless d
+ accept_values = d.accept_values(validation.prediction_feature)
+ raise "cannot get accept values from dataset "+test_target_dataset.to_s+" for feature "+
+ validation.prediction_feature+":\n"+d.features[validation.prediction_feature].to_yaml unless accept_values!=nil
+ raise "different accept values" if res && res!=accept_values
+ res = accept_values
+ end
+ res
end
- res
end
def feature_type( validation, subjectid=nil )
- OpenTox::Model::Generic.new(validation.model_uri).feature_type(subjectid)
+ if validation.model_uri.include?(";")
+ model_uri = validation.model_uri.split(";")[0]
+ else
+ model_uri = validation.model_uri
+ end
+ OpenTox::Model::Generic.new(model_uri).feature_type(subjectid)
#get_model(validation).classification?
end
def predicted_variable(validation, subjectid=nil)
raise "cannot derive model depended props for merged validations" if Lib::MergeObjects.merged?(validation)
+ raise "multiple models in this validation, cannot get one predicted variable (#{validation.model_uri})" if validation.model_uri.include?(";")
model = OpenTox::Model::Generic.find(validation.model_uri, subjectid)
raise OpenTox::NotFoundError.new "model not found '"+validation.model_uri+"'" unless model
model.predicted_variable(subjectid)
@@ -226,6 +246,7 @@ class Reports::ValidationDB
def predicted_confidence(validation, subjectid=nil)
raise "cannot derive model depended props for merged validations" if Lib::MergeObjects.merged?(validation)
+ raise "multiple models in this validation, cannot get one predicted confidence (#{validation.model_uri})" if validation.model_uri.include?(";")
model = OpenTox::Model::Generic.find(validation.model_uri, subjectid)
raise OpenTox::NotFoundError.new "model not found '"+validation.model_uri+"'" unless model
model.predicted_confidence(subjectid)
diff --git a/report/validation_data.rb b/report/validation_data.rb
index 3806fd7..95636fc 100755
--- a/report/validation_data.rb
+++ b/report/validation_data.rb
@@ -396,6 +396,7 @@ module Reports
@validations.each do |v|
index = -1
array.push(attributes.collect do |a|
+
index += 1
if VAL_ATTR_VARIANCE.index(a)
variance = v.send( (a.to_s+"_variance").to_sym )