From 1f4ebe46790443225beff01b9e1918f5baec31f8 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Sat, 13 Aug 2011 14:20:18 +0200 Subject: add bootstrapping description --- validation/validation_application.rb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/validation/validation_application.rb b/validation/validation_application.rb index d2dfef0..0658309 100755 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -400,7 +400,14 @@ get '/bootstrapping' do "All validations: "+url_for("/",:full)+"\n"+ "Validation reports: "+url_for("/report/validation",:full) description = - "A list of all bootstrapping-validations.\n"+ + "A list of all bootstrapping-validations.\n\n"+ + "Bootstrapping performs sampling with replacement to create a training dataset and test dataset from the orignial dataset.\n"+ + "Subsequently, a model is built with the training dataset and validated on the test-dataset.\n\n"+ + "Quote from R Kohavi - A study of cross-validation and bootstrap for accuracy estimation and model selection,\n"+ + "International joint Conference on artificial intelligence, 1995:\n"+ + "'Given a dataset of size n, a bootstrap sample is created by sampling n instances uniformly from the data (with replacement).\n"+ + " Since the dataset is sampled with replacement, the probability of any given instance not being chosen after n samples is (1 - 1/n)^n = e^-1 = 0.368;\n"+ + " the expected number of distinct instances from the original dataset appearing in the test set is thus 0.632n.'\n\n"+ "To perform a bootstrapping-validation use the POST method." post_command = OpenTox::PostCommand.new request.url,"Perform bootstrapping-validation" post_command.attributes << OpenTox::PostAttribute.new("algorithm_uri") -- cgit v1.2.3 From 2a945a2bcaeae5b6c4b9b8b5861da0c031a5106a Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 15 Aug 2011 09:15:56 +0200 Subject: add search with model in crossvalidation and qmrf report as discussed with nina in munich --- reach_reports/reach_application.rb | 5 +++-- reach_reports/reach_service.rb | 6 ++++-- validation/validation_application.rb | 6 ++++++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/reach_reports/reach_application.rb b/reach_reports/reach_application.rb index e35df7b..d77906d 100755 --- a/reach_reports/reach_application.rb +++ b/reach_reports/reach_application.rb @@ -48,6 +48,7 @@ end get '/reach_report/:type' do type = extract_type(params) LOGGER.info "list all "+type+" reports" + uris = ReachReports.list_reports(type,params[:model] || params[:model_uri]) if request.env['HTTP_ACCEPT'] =~ /text\/html/ content_type "text/html" related_links = @@ -66,10 +67,10 @@ get '/reach_report/:type' do when /(?i)QPRF/ #TODO end - OpenTox.text_to_html ReachReports.list_reports(type),@subjectid,related_links,description,post_command + OpenTox.text_to_html uris,@subjectid,related_links,description,post_command else content_type "text/uri-list" - ReachReports.list_reports(type) + uris end end diff --git a/reach_reports/reach_service.rb b/reach_reports/reach_service.rb index fa4c0d7..2030dbd 100755 --- a/reach_reports/reach_service.rb +++ b/reach_reports/reach_service.rb @@ -12,10 +12,12 @@ end module ReachReports - def self.list_reports(type) + def self.list_reports(type, model_uri=nil) case type when /(?i)QMRF/ - ReachReports::QmrfReport.all.collect{ |r| r.report_uri }.join("\n")+"\n" + params = {} + params[:model_uri]=model_uri if model_uri + ReachReports::QmrfReport.all(params).collect{ |r| r.report_uri }.join("\n")+"\n" when /(?i)QPRF/ ReachReports::QprfReport.all.collect{ |r| r.report_uri }.join("\n")+"\n" end diff --git a/validation/validation_application.rb b/validation/validation_application.rb index 0658309..d652de4 100755 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -8,6 +8,12 @@ require 'validation/validation_service.rb' get '/crossvalidation/?' do LOGGER.info "list all crossvalidations" + model_uri = params.delete("model") || params.delete("model_uri") + if model_uri + model = OpenTox::Model::Generic.find(model_uri) + params[:algorithm] = model.metadata[OT.algorithm] + params[:dataset] = model.metadata[OT.trainingDataset] + end uri_list = Lib::OhmUtil.find( Validation::Crossvalidation, params ).sort.collect{|v| v.crossvalidation_uri}.join("\n") + "\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = -- cgit v1.2.3 From 01cc1d014f1f9ccdeb5925e3fa7d64b2d06c2085 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 15 Aug 2011 09:53:59 +0200 Subject: adding qmrf jars --- reach_reports/reach_application.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/reach_reports/reach_application.rb b/reach_reports/reach_application.rb index d77906d..b380c92 100755 --- a/reach_reports/reach_application.rb +++ b/reach_reports/reach_application.rb @@ -182,6 +182,12 @@ get '/reach_report/:type/:id/editor' do + + + + + + -- cgit v1.2.3 From d27d53d98238ede80fc3b1a0c277ca890a84c736 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 18 Aug 2011 10:38:51 +0200 Subject: fix ROC stuff, rename weighted_auc to average_auc --- lib/predictions.rb | 57 ++++++++++++++++++++++++++++++------------ lib/validation_db.rb | 4 +-- reach_reports/reach_service.rb | 2 +- report/plot_factory.rb | 37 ++++++++++++--------------- report/report_content.rb | 5 ++-- report/report_factory.rb | 36 ++++++++++++++------------ report/validation_data.rb | 4 +-- 7 files changed, 85 insertions(+), 60 deletions(-) diff --git a/lib/predictions.rb b/lib/predictions.rb index b71359d..bfb25da 100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -254,7 +254,6 @@ module Lib return res end - # does only take the instances that are classified as into account def area_under_roc(class_index=nil) return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if class_index==nil @@ -268,15 +267,16 @@ module Lib tp_conf = [] fp_conf = [] (0..@predicted_values.size-1).each do |i| - if @predicted_values[i]==class_index - if @actual_values[i]==@predicted_values[i] - tp_conf.push(@confidence_values[i]) + if @predicted_values[i]!=nil + c = @confidence_values[i] * (@predicted_values[i]==class_index ? 1 : -1) + if @actual_values[i]==class_index + tp_conf << c else - fp_conf.push(@confidence_values[i]) + fp_conf << c end end end - #puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n" + puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n" return 0.0 if tp_conf.size == 0 return 1.0 if fp_conf.size == 0 @@ -432,22 +432,18 @@ module Lib return incorrect end - # Note: - # * (un-weighted) area under roc is computed with all __predicted__ isntances for a certain class - # * weighted weights each auc with the number of __acutal__ instances - # its like that, because its like that in weka - def weighted_area_under_roc - w_auc = weighted_measure( :area_under_roc ) + def average_area_under_roc + w_auc = average_measure( :area_under_roc ) w_auc.nan? ? 0 : w_auc end - def weighted_f_measure - return weighted_measure( :f_measure ) + def average_f_measure + return average_measure( :f_measure ) end private - # the is weighted with the number of instances for each actual class value - def weighted_measure( measure ) + # the is averaged over the number of instances for each actual class value + def average_measure( measure ) sum_instances = 0 num_instances_per_class = Array.new(@num_classes, 0) @@ -562,6 +558,35 @@ module Lib # data for (roc-)plots ################################################################################### + def get_roc_prediction_values(class_value) + + #puts "get_roc_values for class_value: "+class_value.to_s + raise "no confidence values" unless confidence_values_available? + raise "no class-value specified" if class_value==nil + + class_index = @accept_values.index(class_value) if class_value!=nil + raise "class not found "+class_value.to_s if (class_value!=nil && class_index==nil) + + c = []; tp = [] + (0..@predicted_values.size-1).each do |i| + if @predicted_values[i]!=nil + c << @confidence_values[i] * (@predicted_values[i]==class_index ? 1 : -1) + if (@actual_values[i]==class_index) + tp << 1 + else + tp << 0 + end + end + end + + # DO NOT raise exception here, maybe different validations are concated + #raise "no instance predicted as '"+class_value+"'" if p.size == 0 + + h = {:true_positives => tp, :confidence_values => c} + #puts h.inspect + return h + end + def get_prediction_values(class_value) #puts "get_roc_values for class_value: "+class_value.to_s diff --git a/lib/validation_db.rb b/lib/validation_db.rb index fb7a8b5..9af43de 100755 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -18,7 +18,7 @@ module Validation # :classification_statistics VAL_CLASS_PROPS_SINGLE_SUM = [ :num_correct, :num_incorrect, :confusion_matrix ] VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect, - :weighted_area_under_roc, :accuracy, :weighted_accuracy ] + :average_area_under_roc, :accuracy, :weighted_accuracy ] VAL_CLASS_PROPS_SINGLE = VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_SINGLE_AVG # :class_value_statistics @@ -30,7 +30,7 @@ module Validation VAL_CLASS_PROPS_PER_CLASS = VAL_CLASS_PROPS_PER_CLASS_SUM + VAL_CLASS_PROPS_PER_CLASS_AVG VAL_CLASS_PROPS_PER_CLASS_COMPLEMENT_EXISTS = [ :num_false_positives, :num_false_negatives, :num_true_positives, :num_true_negatives, :false_negative_rate, :false_positive_rate, - :true_negative_rate, :true_positive_rate ] #:precision, :recall, + :true_negative_rate, :true_positive_rate, :area_under_roc ] #:precision, :recall, VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS diff --git a/reach_reports/reach_service.rb b/reach_reports/reach_service.rb index 2030dbd..bfa760e 100755 --- a/reach_reports/reach_service.rb +++ b/reach_reports/reach_service.rb @@ -229,7 +229,7 @@ module ReachReports case feature_type when "classification" v << "percent_correct: "+validation.classification_statistics[:percent_correct].to_s - v << "weighted AUC: "+validation.classification_statistics[:weighted_area_under_roc].to_s + v << "average AUC: "+validation.classification_statistics[:average_area_under_roc].to_s when "regression" v << "root_mean_squared_error: "+validation.regression_statistics[:root_mean_squared_error].to_s v << "r_square "+validation.regression_statistics[:r_square].to_s diff --git a/report/plot_factory.rb b/report/plot_factory.rb index bf59960..27e934d 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -294,15 +294,14 @@ module Reports private def self.transform_roc_predictions(validation_set, class_value, add_label=true ) if (validation_set.size > 1) - values = { :predicted_values => [], :actual_values => [], :confidence_values => []} + values = { :true_positives => [], :confidence_values => []} (0..validation_set.size-1).each do |i| - roc_values = validation_set.get(i).get_predictions.get_prediction_values(class_value) - values[:predicted_values] += roc_values[:predicted_values] + roc_values = validation_set.get(i).get_predictions.get_roc_prediction_values(class_value) + values[:true_positives ] += roc_values[:true_positives ] values[:confidence_values] += roc_values[:confidence_values] - values[:actual_values] += roc_values[:actual_values] end else - values = validation_set.validations[0].get_predictions.get_prediction_values(class_value) + values = validation_set.validations[0].get_predictions.get_roc_prediction_values(class_value) end tp_fp_rates = get_tp_fp_rates(values) labels = [] @@ -357,8 +356,7 @@ module Reports # :predicted_values => [1, 0, 0, 1, 0, 1], # :actual_values => [0, 1, 0, 0, 1, 1]} roc_values = {:confidence_values => [0.9, 0.8, 0.7, 0.6, 0.5, 0.4], - :predicted_values => [1, 1, 1, 1, 1, 1], - :actual_values => [1, 0, 1, 0, 1, 0]} + :true_positives => [1, 1, 1, 0, 1, 0]} tp_fp_rates = get_tp_fp_rates(roc_values) labels = [] tp_fp_rates[:youden].each do |point,confidence| @@ -431,16 +429,15 @@ module Reports def self.get_tp_fp_rates(roc_values) c = roc_values[:confidence_values] - p = roc_values[:predicted_values] - a = roc_values[:actual_values] - raise "no prediction values for roc-plot" if p.size==0 + tp = roc_values[:true_positives] + raise "no prediction values for roc-plot" if tp.size==0 # hack for painting perfect/worst roc curve, otherwhise fp/tp-rate will always be 100% # determine if perfect/worst roc curve fp_found = false tp_found = false - (0..p.size-1).each do |i| - if a[i]!=p[i] + (0..tp.size-1).each do |i| + if tp[i]==0 fp_found |= true else tp_found |=true @@ -448,28 +445,26 @@ module Reports break if tp_found and fp_found end unless fp_found and tp_found #if perfect/worst add wrong/right instance with lowest confidence - a << (tp_found ? 0 : 1) - p << 1 + tp << (tp_found ? 0 : 1) c << -Float::MAX end - (0..p.size-2).each do |i| - ((i+1)..p.size-1).each do |j| + (0..tp.size-2).each do |i| + ((i+1)..tp.size-1).each do |j| if c[i]0 if prediction_set.size!=validation_set.size section_text += "\nWARNING: roc plot information not available for all validation results" diff --git a/report/report_factory.rb b/report/report_factory.rb index 340f276..1cf7b94 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -5,19 +5,19 @@ VAL_ATTR_TRAIN_TEST = [ :model_uri, :training_dataset_uri, :test_dataset_uri, :p VAL_ATTR_CV = [ :algorithm_uri, :dataset_uri, :num_folds, :crossvalidation_fold ] # selected attributes of interest when performing classification -VAL_ATTR_CLASS = [ :num_instances, :num_unpredicted, :accuracy, :weighted_accuracy, :weighted_area_under_roc, +VAL_ATTR_CLASS = [ :num_instances, :num_unpredicted, :accuracy, :weighted_accuracy, :average_area_under_roc, :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ] VAL_ATTR_REGR = [ :num_instances, :num_unpredicted, :root_mean_squared_error, :weighted_root_mean_squared_error, :mean_absolute_error, :weighted_mean_absolute_error, :r_square, :weighted_r_square, :sample_correlation_coefficient ] -#VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :weighted_area_under_roc, +#VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :average_area_under_roc, # :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ] VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :f_measure, :true_positive_rate, :true_negative_rate ] VAL_ATTR_BAR_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ] VAL_ATTR_TTEST_REGR = [:r_square, :root_mean_squared_error] -VAL_ATTR_TTEST_CLASS = [:percent_correct, :weighted_area_under_roc] +VAL_ATTR_TTEST_CLASS = [:percent_correct, :average_area_under_roc] # = Reports::ReportFactory @@ -76,11 +76,13 @@ module Reports::ReportFactory report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results") report.add_confusion_matrix(val) report.add_section("Plots") - ([nil] + validation_set.get_accept_values).each do |accept_value| - report.add_roc_plot(validation_set, accept_value) - report.add_confidence_plot(validation_set, accept_value) - title = accept_value ? "Plots for predicted class-value '"+accept_value.to_s+"'" : "Plots for all predictions" - report.align_last_two_images title + report.add_confidence_plot(validation_set) + if (validation_set.get_accept_values.size == 2) + report.add_roc_plot(validation_set, validation_set.get_accept_values[0]) + else + validation_set.get_accept_values.each do |accept_value| + report.add_roc_plot(validation_set, accept_value) + end end report.end_section when "regression" @@ -127,12 +129,14 @@ module Reports::ReportFactory report.add_confusion_matrix(cv_set.validations[0]) report.add_section("Plots") [nil, :crossvalidation_fold].each do |split_attribute| - ([nil] + validation_set.get_accept_values).each do |accept_value| - report.add_roc_plot(validation_set, accept_value, split_attribute) - report.add_confidence_plot(validation_set, accept_value, split_attribute) - title = accept_value ? "Plots for predicted class-value '"+accept_value.to_s+"'" : "Plots for all predictions" - title += split_attribute ? ", separated by crossvalidation fold" : " (accumulated over all folds)" - report.align_last_two_images title + + report.add_confidence_plot(validation_set,nil,split_attribute) + if (validation_set.get_accept_values.size == 2) + report.add_roc_plot(validation_set, validation_set.get_accept_values[0], split_attribute) + else + validation_set.get_accept_values.each do |accept_value| + report.add_roc_plot(validation_set, accept_value, split_attribute) + end end end report.end_section @@ -199,8 +203,8 @@ module Reports::ReportFactory if (validation_set.num_different_values(:dataset_uri)>1) all_merged = validation_set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri]) report.add_ranking_plots(all_merged, :algorithm_uri, :dataset_uri, - [:percent_correct, :weighted_area_under_roc, :true_positive_rate, :true_negative_rate] ) - report.add_result_overview(all_merged, :algorithm_uri, :dataset_uri, [:percent_correct, :weighted_area_under_roc, :true_positive_rate, :true_negative_rate]) + [:percent_correct, :average_area_under_roc, :true_positive_rate, :true_negative_rate] ) + report.add_result_overview(all_merged, :algorithm_uri, :dataset_uri, [:percent_correct, :average_area_under_roc, :true_positive_rate, :true_negative_rate]) end result_attributes = [:identifier,:crossvalidation_uri,:crossvalidation_report_uri]+VAL_ATTR_CV-[:crossvalidation_fold,:num_folds,:dataset_uri] diff --git a/report/validation_data.rb b/report/validation_data.rb index aa146a6..b6522b6 100755 --- a/report/validation_data.rb +++ b/report/validation_data.rb @@ -1,9 +1,9 @@ # the variance is computed when merging results for these attributes VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct, :root_mean_squared_error, :mean_absolute_error, - :r_square, :accuracy, :weighted_area_under_roc, :weighted_accuracy, :weighted_root_mean_squared_error, :weighted_mean_absolute_error, + :r_square, :accuracy, :average_area_under_roc, :weighted_accuracy, :weighted_root_mean_squared_error, :weighted_mean_absolute_error, :weighted_r_square ] -VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :weighted_area_under_roc, :accuracy, :f_measure ] +VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :average_area_under_roc, :accuracy, :f_measure ] ATTR_NICE_NAME = {} -- cgit v1.2.3 From 32f9e3f97c1a8278cf5022f619d9e969a37fed38 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 18 Aug 2011 11:25:39 +0200 Subject: fix ROC stuff [2] --- report/report_factory.rb | 18 ++++++++++-------- report/validation_data.rb | 17 ++++++++++++++--- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/report/report_factory.rb b/report/report_factory.rb index 1cf7b94..e3e0f3c 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -78,10 +78,11 @@ module Reports::ReportFactory report.add_section("Plots") report.add_confidence_plot(validation_set) if (validation_set.get_accept_values.size == 2) - report.add_roc_plot(validation_set, validation_set.get_accept_values[0]) - else - validation_set.get_accept_values.each do |accept_value| - report.add_roc_plot(validation_set, accept_value) + if validation_set.get_true_accept_value!=nil + report.add_roc_plot(validation_set, validation_set.get_true_accept_value) + else + report.add_roc_plot(validation_set, validation_set.get_accept_values[0]) + report.add_roc_plot(validation_set, validation_set.get_accept_values[1]) end end report.end_section @@ -132,10 +133,11 @@ module Reports::ReportFactory report.add_confidence_plot(validation_set,nil,split_attribute) if (validation_set.get_accept_values.size == 2) - report.add_roc_plot(validation_set, validation_set.get_accept_values[0], split_attribute) - else - validation_set.get_accept_values.each do |accept_value| - report.add_roc_plot(validation_set, accept_value, split_attribute) + if validation_set.get_true_accept_value!=nil + report.add_roc_plot(validation_set, validation_set.get_true_accept_value,split_attribute) + else + report.add_roc_plot(validation_set, validation_set.get_accept_values[0],split_attribute) + report.add_roc_plot(validation_set, validation_set.get_accept_values[1],split_attribute) end end end diff --git a/report/validation_data.rb b/report/validation_data.rb index b6522b6..f5ecae7 100755 --- a/report/validation_data.rb +++ b/report/validation_data.rb @@ -263,6 +263,18 @@ module Reports return unique_value("get_accept_values") end + def get_true_accept_value() + accept_values = get_accept_values() + if accept_values.size==2 + if (accept_values[0] =~ TRUE_REGEXP and !(accept_values[1] =~ TRUE_REGEXP)) + return accept_values[0] + elsif (accept_values[1] =~ TRUE_REGEXP and !(accept_values[0] =~ TRUE_REGEXP)) + return accept_values[1] + end + end + nil + end + def get_accept_values_for_attr( attribute ) if !Validation::Validation.classification_property?(attribute) [] @@ -270,9 +282,8 @@ module Reports accept_values = get_accept_values() if !Validation::Validation.depends_on_class_value?(attribute) [ nil ] - elsif accept_values.size==2 and - Validation::Validation.complement_exists?(attribute) - [ accept_values[0] ] + elsif accept_values.size==2 and get_true_accept_value()!=nil and Validation::Validation.complement_exists?(attribute) + [ get_true_accept_value() ] else accept_values end -- cgit v1.2.3 From 46369e24a84ba72ec42be19af5afd4c81e5eb235 Mon Sep 17 00:00:00 2001 From: mr Date: Thu, 18 Aug 2011 12:28:47 +0200 Subject: add missing subjectids --- reach_reports/reach_service.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/reach_reports/reach_service.rb b/reach_reports/reach_service.rb index 2030dbd..e8aa0a2 100755 --- a/reach_reports/reach_service.rb +++ b/reach_reports/reach_service.rb @@ -110,8 +110,8 @@ module ReachReports # TODO app_domain_description, app_domain_method, app_domain_software, applicability_limits #training_dataset = model.trainingDataset ? OpenTox::Dataset.find(model.trainingDataset+"/metadata") : nil - if ( OpenTox::Dataset.exist?(model.metadata[OT.trainingDataset]) ) - training_dataset = OpenTox::Dataset.new( model.metadata[OT.trainingDataset] ) + if ( OpenTox::Dataset.exist?(model.metadata[OT.trainingDataset], r.subjectid) ) + training_dataset = OpenTox::Dataset.new( model.metadata[OT.trainingDataset], r.subjectid ) training_dataset.load_metadata( r.subjectid ) else training_dataset = nil @@ -272,7 +272,7 @@ module ReachReports val_datasets.each do |data_uri| if OpenTox::Dataset.exist?(data_uri, r.subjectid) - d = OpenTox::Dataset.new(data_uri) + d = OpenTox::Dataset.new(data_uri, r.subjectid) d.load_metadata( r.subjectid) r.qsar_miscellaneous.attachment_validation_data << AttachmentValidationData.new( { :description => d.title, -- cgit v1.2.3 From bad2d7444ab40a59770678c0b0e4057d5edeceef Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 18 Aug 2011 12:57:33 +0200 Subject: add confidence plots for various classification stats --- lib/predictions.rb | 13 ++++++++---- report/plot_factory.rb | 53 ++++++++++++++++++++++++++++++++++++++++-------- report/report_content.rb | 7 ++++--- report/report_factory.rb | 16 +++++++++++++-- 4 files changed, 72 insertions(+), 17 deletions(-) diff --git a/lib/predictions.rb b/lib/predictions.rb index bfb25da..7de1751 100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -587,19 +587,24 @@ module Lib return h end - def get_prediction_values(class_value) + def get_prediction_values(actual_accept_value, predicted_accept_value) #puts "get_roc_values for class_value: "+class_value.to_s raise "no confidence values" unless confidence_values_available? #raise "no class-value specified" if class_value==nil - class_index = @accept_values.index(class_value) if class_value!=nil - raise "class not found "+class_value.to_s if (class_value!=nil && class_index==nil) + actual_class_index = @accept_values.index(actual_accept_value) if actual_accept_value!=nil + raise "class not found '"+actual_accept_value.to_s+"' in "+@accept_values.inspect if (actual_accept_value!=nil && actual_class_index==nil) + + predicted_class_index = @accept_values.index(predicted_accept_value) if predicted_accept_value!=nil + raise "class not found "+predicted_accept_value.to_s+" in "+@accept_values.inspect if (predicted_accept_value!=nil && predicted_class_index==nil) c = []; p = []; a = [] (0..@predicted_values.size-1).each do |i| # NOTE: not predicted instances are ignored here - if @predicted_values[i]!=nil and (class_index==nil || @predicted_values[i]==class_index) + if @predicted_values[i]!=nil and + (predicted_class_index==nil || @predicted_values[i]==predicted_class_index) and + (actual_class_index==nil || @actual_values[i]==actual_class_index) c << @confidence_values[i] p << @predicted_values[i] a << @actual_values[i] diff --git a/report/plot_factory.rb b/report/plot_factory.rb index 27e934d..2074ce5 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -130,8 +130,43 @@ module Reports end end + def self.confidence_plot_class_performance( validation_set, actual_accept_value, predicted_accept_value ) + true_class = nil + if actual_accept_value==nil and predicted_accept_value==nil + perf = "Accuracy" + elsif actual_accept_value!=nil + if validation_set.get_true_accept_value==actual_accept_value + perf = "True Positive Rate" + true_class = actual_accept_value + elsif validation_set.get_accept_values.size==2 and validation_set.get_true_accept_value==(validation_set.get_accept_values-[actual_accept_value])[0] + perf = "True Negative Rate" + true_class = validation_set.get_true_accept_value + else + perf = "True Positive Rate" + true_class = actual_accept_value + end + elsif predicted_accept_value!=nil + if validation_set.get_true_accept_value==predicted_accept_value + perf = "Positive Predictive Value" + true_class = predicted_accept_value + elsif validation_set.get_accept_values.size==2 and validation_set.get_true_accept_value==(validation_set.get_accept_values-[predicted_accept_value])[0] + perf = "Negative Predictive Value" + true_class = validation_set.get_true_accept_value + else + perf = "Positive Predictive Value" + true_class = predicted_accept_value + end + end + title = perf+" vs Confidence Plot" + title += " (with True-Class: '"+true_class.to_s+"')" if true_class!=nil + {:title =>title, :performance => perf} + end + - def self.create_confidence_plot( out_files, validation_set, class_value, split_set_attribute=nil, show_single_curves=false ) + def self.create_confidence_plot( out_files, validation_set, actual_accept_value = nil, + predicted_accept_value = nil, split_set_attribute=nil, show_single_curves=false ) + + raise "param combination not supported" if actual_accept_value!=nil and predicted_accept_value!=nil out_files = [out_files] unless out_files.is_a?(Array) LOGGER.debug "creating confidence plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_files.inspect @@ -143,7 +178,7 @@ module Reports performance = [] attribute_values.each do |value| begin - data = transform_confidence_predictions(validation_set.filter({split_set_attribute => value}), class_value, false) + data = transform_confidence_predictions(validation_set.filter({split_set_attribute => value}), actual_accept_value, predicted_accept_value, false) names << split_set_attribute.to_s.nice_attr+" "+value.to_s confidence << data[:confidence][0] performance << data[:performance][0] @@ -155,17 +190,19 @@ module Reports out_files.each do |out_file| case validation_set.unique_feature_type when "classification" - RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, confidence, performance) + info = confidence_plot_class_performance( validation_set, actual_accept_value, predicted_accept_value ) + RubyPlot::accuracy_confidence_plot(out_file, info[:title], "Confidence", info[:performance], names, confidence, performance) when "regression" RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", names, confidence, performance, true) end end else - data = transform_confidence_predictions(validation_set, class_value, show_single_curves) + data = transform_confidence_predictions(validation_set, actual_accept_value, predicted_accept_value, show_single_curves) out_files.each do |out_file| case validation_set.unique_feature_type when "classification" - RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", data[:names], data[:confidence], data[:performance]) + info = confidence_plot_class_performance( validation_set, actual_accept_value, predicted_accept_value ) + RubyPlot::accuracy_confidence_plot(out_file, info[:title], "Confidence", info[:performance], data[:names], data[:confidence], data[:performance]) when "regression" RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", data[:names], data[:confidence], data[:performance], true) end @@ -312,7 +349,7 @@ module Reports end - def self.transform_confidence_predictions(validation_set, class_value, add_single_folds=false) + def self.transform_confidence_predictions(validation_set, actual_accept_value, predicted_accept_value, add_single_folds=false) if (validation_set.size > 1) @@ -320,7 +357,7 @@ module Reports sum_confidence_values = { :predicted_values => [], :actual_values => [], :confidence_values => []} (0..validation_set.size-1).each do |i| - confidence_values = validation_set.get(i).get_predictions.get_prediction_values(class_value) + confidence_values = validation_set.get(i).get_predictions.get_prediction_values(actual_accept_value, predicted_accept_value) sum_confidence_values[:predicted_values] += confidence_values[:predicted_values] sum_confidence_values[:confidence_values] += confidence_values[:confidence_values] sum_confidence_values[:actual_values] += confidence_values[:actual_values] @@ -345,7 +382,7 @@ module Reports return { :names => names, :performance => performance, :confidence => confidence, :faint => faint } else - confidence_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value) + confidence_values = validation_set.validations[0].get_predictions.get_prediction_values(actual_accept_value, predicted_accept_value) pref_conf_rates = get_performance_confidence_rates(confidence_values, validation_set.unique_feature_type) return { :names => [""], :performance => [pref_conf_rates[:performance]], :confidence => [pref_conf_rates[:confidence]] } end diff --git a/report/report_content.rb b/report/report_content.rb index 9c33038..8d6d44b 100755 --- a/report/report_content.rb +++ b/report/report_content.rb @@ -213,9 +213,10 @@ class Reports::ReportContent end def add_confidence_plot( validation_set, - accept_value = nil, + actual_accept_value = nil, + predicted_accept_value = nil, split_set_attribute = nil, - image_title = "Percent Correct vs Confidence Plot", + image_title = "Confidence Plot", section_text="") #section_conf = @xml_report.add_section(@current_section, section_title) @@ -233,7 +234,7 @@ class Reports::ReportContent begin plot_png = add_tmp_file("conf_plot", "png") plot_svg = add_tmp_file("conf_plot", "svg") - Reports::PlotFactory.create_confidence_plot( [plot_png[:path], plot_svg[:path]], prediction_set, accept_value, split_set_attribute, false ) + Reports::PlotFactory.create_confidence_plot( [plot_png[:path], plot_svg[:path]], prediction_set, actual_accept_value, predicted_accept_value, split_set_attribute, false ) @xml_report.add_imagefigure(section_conf, image_title, plot_png[:name], "PNG", 100, plot_svg[:name]) rescue Exception => ex msg = "WARNING could not create confidence plot: "+ex.message diff --git a/report/report_factory.rb b/report/report_factory.rb index e3e0f3c..2a50869 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -76,15 +76,21 @@ module Reports::ReportFactory report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results") report.add_confusion_matrix(val) report.add_section("Plots") - report.add_confidence_plot(validation_set) if (validation_set.get_accept_values.size == 2) if validation_set.get_true_accept_value!=nil report.add_roc_plot(validation_set, validation_set.get_true_accept_value) else report.add_roc_plot(validation_set, validation_set.get_accept_values[0]) report.add_roc_plot(validation_set, validation_set.get_accept_values[1]) + report.align_last_two_images "ROC Plots" end end + report.add_confidence_plot(validation_set) + validation_set.get_accept_values.each do |accept_value| + report.add_confidence_plot(validation_set, accept_value, nil) + report.add_confidence_plot(validation_set, nil, accept_value) + report.align_last_two_images "Confidence Plots" + end report.end_section when "regression" report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_REGR, "Results", "Results") @@ -131,15 +137,21 @@ module Reports::ReportFactory report.add_section("Plots") [nil, :crossvalidation_fold].each do |split_attribute| - report.add_confidence_plot(validation_set,nil,split_attribute) if (validation_set.get_accept_values.size == 2) if validation_set.get_true_accept_value!=nil report.add_roc_plot(validation_set, validation_set.get_true_accept_value,split_attribute) else report.add_roc_plot(validation_set, validation_set.get_accept_values[0],split_attribute) report.add_roc_plot(validation_set, validation_set.get_accept_values[1],split_attribute) + report.align_last_two_images "ROC Plots" end end + report.add_confidence_plot(validation_set,nil,nil,split_attribute) + validation_set.get_accept_values.each do |accept_value| + report.add_confidence_plot(validation_set, accept_value, nil,split_attribute) + report.add_confidence_plot(validation_set, nil, accept_value,split_attribute) + report.align_last_two_images "Confidence Plots" + end end report.end_section report.add_result(validation_set, [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds, :dataset_uri, :algorithm_uri], -- cgit v1.2.3 From 3f1714c78e46f391b951b1a01adcd9badc713891 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 18 Aug 2011 13:46:10 +0200 Subject: add npv calculation, add ppv (renamed from precision) and npv to important classificaiton stats --- lib/predictions.rb | 23 ++++++++++++++++++++++- lib/validation_db.rb | 5 +++-- report/report_factory.rb | 2 +- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/lib/predictions.rb b/lib/predictions.rb index 7de1751..2e90885 100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -300,7 +300,11 @@ module Lib end def precision(class_index=nil) - return prediction_feature_value_map( lambda{ |i| precision(i) } ) if class_index==nil + return positive_predictive_value(class_index) + end + + def positive_predictive_value(class_index=nil) + return prediction_feature_value_map( lambda{ |i| positive_predictive_value(i) } ) if class_index==nil correct = 0 # all instances with prediction class_index that are correctly classified total = 0 # all instances with prediciton class_index @@ -312,6 +316,23 @@ module Lib return correct/total.to_f end + def negative_predictive_value(class_index=nil) + return prediction_feature_value_map( lambda{ |i| negative_predictive_value(i) } ) if class_index==nil + + correct = 0 # all instances with prediction class_index that are correctly classified + total = 0 # all instances with prediciton class_index + (0..@num_classes-1).each do |i| + if i != class_index + (0..@num_classes-1).each do |j| + correct += @confusion_matrix[j][i] if j != class_index + total += @confusion_matrix[j][i] + end + end + end + return 0 if total==0 + return correct/total.to_f + end + def recall(class_index=nil) return true_positive_rate(class_index) end diff --git a/lib/validation_db.rb b/lib/validation_db.rb index 9af43de..be004fb 100755 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -25,12 +25,13 @@ module Validation VAL_CLASS_PROPS_PER_CLASS_SUM = [ :num_false_positives, :num_false_negatives, :num_true_positives, :num_true_negatives ] VAL_CLASS_PROPS_PER_CLASS_AVG = [ :area_under_roc, :false_negative_rate, :false_positive_rate, - :f_measure, :precision, + :f_measure, :positive_predictive_value, :negative_predictive_value, :true_negative_rate, :true_positive_rate ] #:recall, VAL_CLASS_PROPS_PER_CLASS = VAL_CLASS_PROPS_PER_CLASS_SUM + VAL_CLASS_PROPS_PER_CLASS_AVG VAL_CLASS_PROPS_PER_CLASS_COMPLEMENT_EXISTS = [ :num_false_positives, :num_false_negatives, :num_true_positives, :num_true_negatives, :false_negative_rate, :false_positive_rate, - :true_negative_rate, :true_positive_rate, :area_under_roc ] #:precision, :recall, + :true_negative_rate, :true_positive_rate, :area_under_roc, + :positive_predictive_value, :negative_predictive_value ] #:precision, :recall, VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS diff --git a/report/report_factory.rb b/report/report_factory.rb index 2a50869..f6f76bd 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -6,7 +6,7 @@ VAL_ATTR_CV = [ :algorithm_uri, :dataset_uri, :num_folds, :crossvalidation_fold # selected attributes of interest when performing classification VAL_ATTR_CLASS = [ :num_instances, :num_unpredicted, :accuracy, :weighted_accuracy, :average_area_under_roc, - :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ] + :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate, :positive_predictive_value, :negative_predictive_value ] VAL_ATTR_REGR = [ :num_instances, :num_unpredicted, :root_mean_squared_error, :weighted_root_mean_squared_error, :mean_absolute_error, :weighted_mean_absolute_error, :r_square, :weighted_r_square, :sample_correlation_coefficient ] -- cgit v1.2.3 From f17213fa992e3a644b33cd3a4f6778a6a79dd152 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 18 Aug 2011 16:34:50 +0200 Subject: add ppv and npv to bar plot, remove debug output --- lib/predictions.rb | 2 +- report/report_factory.rb | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/predictions.rb b/lib/predictions.rb index 2e90885..6c0e996 100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -276,7 +276,7 @@ module Lib end end end - puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n" + #puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n" return 0.0 if tp_conf.size == 0 return 1.0 if fp_conf.size == 0 diff --git a/report/report_factory.rb b/report/report_factory.rb index f6f76bd..9995b42 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -13,11 +13,11 @@ VAL_ATTR_REGR = [ :num_instances, :num_unpredicted, :root_mean_squared_error, #VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :average_area_under_roc, # :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ] -VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :f_measure, :true_positive_rate, :true_negative_rate ] +VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :f_measure, :true_positive_rate, :true_negative_rate, :positive_predictive_value, :negative_predictive_value ] VAL_ATTR_BAR_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ] -VAL_ATTR_TTEST_REGR = [:r_square, :root_mean_squared_error] -VAL_ATTR_TTEST_CLASS = [:percent_correct, :average_area_under_roc] +VAL_ATTR_TTEST_REGR = [ :r_square, :root_mean_squared_error ] +VAL_ATTR_TTEST_CLASS = [ :accuracy, :average_area_under_roc ] # = Reports::ReportFactory @@ -240,6 +240,12 @@ module Reports::ReportFactory if params[:ttest_significance] ttest_significance = params[:ttest_significance].to_f end + + bar_plot_attributes += ttest_attributes + bar_plot_attributes.uniq! + + result_attributes += ttest_attributes + result_attributes.uniq! dataset_grouping.each do |validations| -- cgit v1.2.3 From 6d81c4cd94335864514964756d908641b5aef28f Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 18 Aug 2011 17:16:21 +0200 Subject: adding tttestable attributes list --- report/report_application.rb | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/report/report_application.rb b/report/report_application.rb index b96fb27..5fec6d1 100755 --- a/report/report_application.rb +++ b/report/report_application.rb @@ -50,6 +50,10 @@ get '/report/?' do end end +def wrap(s, width=78) + s.gsub(/(.{1,#{width}})(\s+|\Z)/, "\\1\n") +end + get '/report/:report_type' do perform do |rs| case request.env['HTTP_ACCEPT'].to_s @@ -60,8 +64,15 @@ get '/report/:report_type' do "Crossvalidations: "+url_for("/crossvalidation",:full) description = "A list of all "+params[:report_type]+" reports. To create a report, use the POST method." + if params[:report_type]=="algorithm_comparison" + description += "\n\nThis report can be used to compare the validation results of different algorithms that have been validated on the same dataset." + description += "\nThe following attributes can be compared with the t-test:" + description += "\n\n* All validation types:\n"+wrap((Validation::VAL_PROPS_SUM+Validation::VAL_PROPS_AVG).join(", "),120) + description += "\n* Classification validations:\n"+wrap(Validation::VAL_CLASS_PROPS.join(", "),120) + description += "\n* Regresssion validations:\n"+wrap(Validation::VAL_REGR_PROPS.join(", "),120) + end + post_params = [[:validation_uris]] - post_command = OpenTox::PostCommand.new request.url,"Create validation report" val_uri_description = params[:report_type]=="algorithm_comparison" ? "Separate multiple uris with ','" : nil # trick for easy report creation -- cgit v1.2.3 From 9db1f68871ad3e9be92744fd908f9fee9eeb18a0 Mon Sep 17 00:00:00 2001 From: mr Date: Fri, 19 Aug 2011 14:31:29 +0200 Subject: add missing subjectid --- validation/validation_application.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validation/validation_application.rb b/validation/validation_application.rb index d652de4..7e0e10f 100755 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -10,7 +10,7 @@ get '/crossvalidation/?' do LOGGER.info "list all crossvalidations" model_uri = params.delete("model") || params.delete("model_uri") if model_uri - model = OpenTox::Model::Generic.find(model_uri) + model = OpenTox::Model::Generic.find(model_uri, @subjectid) params[:algorithm] = model.metadata[OT.algorithm] params[:dataset] = model.metadata[OT.trainingDataset] end -- cgit v1.2.3