diff options
Diffstat (limited to 'report')
-rwxr-xr-x | report/environment.rb | 16 | ||||
-rw-r--r-- | report/plot_factory.rb | 344 | ||||
-rwxr-xr-x | report/report_content.rb | 99 | ||||
-rwxr-xr-x | report/report_factory.rb | 132 | ||||
-rw-r--r-- | report/report_service.rb | 14 | ||||
-rw-r--r-- | report/statistical_test.rb | 49 | ||||
-rwxr-xr-x | report/validation_access.rb | 176 | ||||
-rwxr-xr-x | report/validation_data.rb | 56 |
8 files changed, 631 insertions, 255 deletions
diff --git a/report/environment.rb b/report/environment.rb index 72320a0..7addc45 100755 --- a/report/environment.rb +++ b/report/environment.rb @@ -1,12 +1,24 @@ - ['rubygems', 'logger', 'fileutils', 'sinatra', 'sinatra/url_for', 'rest_client', 'yaml', 'fileutils', 'mime/types', 'abbrev', 'rexml/document', 'ruby-plot', 'opentox-ruby' ].each do |g| require g end -gem 'ruby-plot', "~>0.5.0" +gem 'ruby-plot', "~>0.6.0" module Reports + + def self.r_util + @@r_util = OpenTox::RUtil.new unless defined?@@r_util and @@r_util + @@r_util + end + + def self.quit_r + if defined?@@r_util and @@r_util + @@r_util.quit_r + @@r_util = nil + end + end + end require "lib/ot_predictions.rb" diff --git a/report/plot_factory.rb b/report/plot_factory.rb index 2074ce5..f114dd3 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -2,6 +2,10 @@ ENV['JAVA_HOME'] = "/usr/bin" unless ENV['JAVA_HOME'] ENV['PATH'] = ENV['JAVA_HOME']+":"+ENV['PATH'] unless ENV['PATH'].split(":").index(ENV['JAVA_HOME']) ENV['RANK_PLOTTER_JAR'] = "RankPlotter/RankPlotter.jar" unless ENV['RANK_PLOTTER_JAR'] +CONF_PLOT_RANGE = { :accuracy => [0.45,1.05], :true_positive_rate => [0.45,1.05],:true_negative_rate => [0.45,1.05], + :false_positive_rate => [0.45,1.05], :false_negative_rate => [0.45,1.05], :positive_predictive_value => [0.45,1.05], + :negative_predictive_value => [0.45,1.05], :r_square => [0, 1.05], :sample_correlation_coefficient => [0, 1.05] } + class Array def swap!(i,j) tmp = self[i] @@ -47,7 +51,6 @@ class Array end end - module Reports module PlotFactory @@ -81,9 +84,11 @@ module Reports y_i = valid_indices.collect{ |i| y_i[i] } end - names << ( name_attribute==:crossvalidation_fold ? "fold " : "" ) + v.send(name_attribute).to_s - x << x_i - y << y_i + if x_i.size>0 + names << ( name_attribute==:crossvalidation_fold ? "fold " : "" ) + v.send(name_attribute).to_s + x << x_i + y << y_i + end end names = [""] if names.size==1 @@ -95,6 +100,34 @@ module Reports omit_count end + def self.create_train_test_plot( out_files, validation_set, only_prediction_feature, waiting_task ) + if only_prediction_feature + train = [] + test = [] + validation_set.validations.each do |v| + [[v.test_dataset_uri, test, v.test_target_dataset_uri], + [v.training_dataset_uri, train, v.training_dataset_uri]].each do |uri,array,uri2| + d = Lib::DatasetCache.find(uri, validation_set.validations[0].subjectid) + d2 = Lib::DatasetCache.find((uri2 ? uri2 : uri), validation_set.validations[0].subjectid) + d.compounds.each do |c| + d2.data_entries[c][v.prediction_feature].each do |val| + array << val + end if d2.data_entries[c] and d2.data_entries[c][v.prediction_feature] + end + end + end + waiting_task.progress(50) if waiting_task + + numerical = validation_set.unique_feature_type=="regression" + Reports::r_util.double_hist_plot(out_files, train, test, numerical, numerical, "Training Data", "Test Data", + "Prediction Feature Distribution", validation_set.validations.first.prediction_feature ) + else + Reports::r_util.feature_value_plot(out_files, validation_set.validations[0].training_feature_dataset_uri, + validation_set.validations[0].test_feature_dataset_uri, "Training Data", "Test Data", + nil, true, validation_set.validations[0].subjectid, waiting_task ) + end + end + # creates a roc plot (result is plotted into out_file) # * if (split_set_attributes == nil?) @@ -130,31 +163,22 @@ module Reports end end - def self.confidence_plot_class_performance( validation_set, actual_accept_value, predicted_accept_value ) + def self.confidence_plot_class_performance( validation_set, performance_attribute, performance_accept_value ) true_class = nil - if actual_accept_value==nil and predicted_accept_value==nil - perf = "Accuracy" - elsif actual_accept_value!=nil - if validation_set.get_true_accept_value==actual_accept_value - perf = "True Positive Rate" - true_class = actual_accept_value - elsif validation_set.get_accept_values.size==2 and validation_set.get_true_accept_value==(validation_set.get_accept_values-[actual_accept_value])[0] - perf = "True Negative Rate" + if performance_accept_value==nil + perf = performance_attribute.to_s.nice_attr + else + invert_true_class = (validation_set.get_accept_values.size==2 and + validation_set.get_true_accept_value==(validation_set.get_accept_values-[performance_accept_value])[0]) + if invert_true_class && performance_attribute==:true_positive_rate + perf = :true_negative_rate.to_s.nice_attr true_class = validation_set.get_true_accept_value - else - perf = "True Positive Rate" - true_class = actual_accept_value - end - elsif predicted_accept_value!=nil - if validation_set.get_true_accept_value==predicted_accept_value - perf = "Positive Predictive Value" - true_class = predicted_accept_value - elsif validation_set.get_accept_values.size==2 and validation_set.get_true_accept_value==(validation_set.get_accept_values-[predicted_accept_value])[0] - perf = "Negative Predictive Value" + elsif invert_true_class && performance_attribute==:positive_predictive_value + perf = :negative_predictive_value.to_s.nice_attr true_class = validation_set.get_true_accept_value else - perf = "Positive Predictive Value" - true_class = predicted_accept_value + perf = performance_attribute.to_s.nice_attr + true_class = performance_accept_value end end title = perf+" vs Confidence Plot" @@ -162,12 +186,8 @@ module Reports {:title =>title, :performance => perf} end - - def self.create_confidence_plot( out_files, validation_set, actual_accept_value = nil, - predicted_accept_value = nil, split_set_attribute=nil, show_single_curves=false ) + def self.create_confidence_plot( out_files, validation_set, performance_attribute, performance_accept_value, split_set_attribute=nil, show_single_curves=false ) - raise "param combination not supported" if actual_accept_value!=nil and predicted_accept_value!=nil - out_files = [out_files] unless out_files.is_a?(Array) LOGGER.debug "creating confidence plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_files.inspect @@ -178,7 +198,7 @@ module Reports performance = [] attribute_values.each do |value| begin - data = transform_confidence_predictions(validation_set.filter({split_set_attribute => value}), actual_accept_value, predicted_accept_value, false) + data = transform_confidence_predictions(validation_set.filter({split_set_attribute => value}), performance_attribute, performance_accept_value, false) names << split_set_attribute.to_s.nice_attr+" "+value.to_s confidence << data[:confidence][0] performance << data[:performance][0] @@ -186,30 +206,47 @@ module Reports LOGGER.warn "could not create confidence plot for "+value.to_s end end - #RubyPlot::plot_lines(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, fp_rates, tp_rates ) out_files.each do |out_file| - case validation_set.unique_feature_type - when "classification" - info = confidence_plot_class_performance( validation_set, actual_accept_value, predicted_accept_value ) - RubyPlot::accuracy_confidence_plot(out_file, info[:title], "Confidence", info[:performance], names, confidence, performance) - when "regression" - RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", names, confidence, performance, true) - end + info = confidence_plot_class_performance( validation_set, performance_attribute, performance_accept_value ) + RubyPlot::confidence_plot(out_file, info[:title], "Confidence", info[:performance], + names, confidence, performance, CONF_PLOT_RANGE[performance_attribute]) end else - data = transform_confidence_predictions(validation_set, actual_accept_value, predicted_accept_value, show_single_curves) - out_files.each do |out_file| - case validation_set.unique_feature_type - when "classification" - info = confidence_plot_class_performance( validation_set, actual_accept_value, predicted_accept_value ) - RubyPlot::accuracy_confidence_plot(out_file, info[:title], "Confidence", info[:performance], data[:names], data[:confidence], data[:performance]) - when "regression" - RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", data[:names], data[:confidence], data[:performance], true) - end + data = transform_confidence_predictions(validation_set, performance_attribute, performance_accept_value, show_single_curves) + out_files.each do |out_file| + info = confidence_plot_class_performance( validation_set, performance_attribute, performance_accept_value ) + RubyPlot::confidence_plot(out_file, info[:title], "Confidence", info[:performance], + data[:names], data[:confidence], data[:performance], CONF_PLOT_RANGE[performance_attribute]) end end end + def self.create_box_plot( out_files, validation_set, title_attribute, value_attribute, class_value ) + + out_files = [out_files] unless out_files.is_a?(Array) + LOGGER.debug "creating box plot, out-files:"+out_files.inspect + + data = {} + validation_set.validations.each do |v| + value = v.send(value_attribute) + if value.is_a?(Hash) + if class_value==nil + avg_value = 0 + value.values.each{ |val| avg_value+=val } + value = avg_value/value.values.size.to_f + else + raise "box plot value is hash, but no entry for class-value ("+class_value.to_s+ + "); value for "+value_attribute.to_s+" -> "+value.inspect unless value.key?(class_value) + value = value[class_value] + end + end + + data[v.send(title_attribute).to_s] = [] unless data[v.send(title_attribute).to_s] + data[v.send(title_attribute).to_s] << value + end + + Reports::r_util.boxplot( out_files, data) + end def self.create_bar_plot( out_files, validation_set, title_attribute, value_attributes ) @@ -349,70 +386,164 @@ module Reports end - def self.transform_confidence_predictions(validation_set, actual_accept_value, predicted_accept_value, add_single_folds=false) + + def self.transform_confidence_predictions(validation_set, performance_attribute, performance_accept_value, add_single_folds) + + feature_type = validation_set.unique_feature_type + accept_values = validation_set.unique_feature_type=="classification" ? validation_set.get_accept_values : nil if (validation_set.size > 1) - names = []; performance = []; confidence = []; faint = [] sum_confidence_values = { :predicted_values => [], :actual_values => [], :confidence_values => []} (0..validation_set.size-1).each do |i| - confidence_values = validation_set.get(i).get_predictions.get_prediction_values(actual_accept_value, predicted_accept_value) + confidence_values = validation_set.get(i).get_predictions.get_prediction_values(performance_attribute, performance_accept_value) sum_confidence_values[:predicted_values] += confidence_values[:predicted_values] sum_confidence_values[:confidence_values] += confidence_values[:confidence_values] sum_confidence_values[:actual_values] += confidence_values[:actual_values] if add_single_folds begin - pref_conf_rates = get_performance_confidence_rates(confidence_values) + perf_conf_rates = get_performance_confidence_rates(confidence_values, performance_attribute, performance_accept_value, + feature_type, accept_values) names << "fold "+i.to_s - performance << pref_conf_rates[:performance] - confidence << pref_conf_rates[:confidence] + performance << perf_conf_rates[:performance] + confidence << perf_conf_rates[:confidence] faint << true rescue LOGGER.warn "could not get confidence vals for fold "+i.to_s end end end - pref_conf_rates = get_performance_confidence_rates(sum_confidence_values, validation_set.unique_feature_type) + perf_conf_rates = get_performance_confidence_rates(sum_confidence_values, performance_attribute, performance_accept_value, + feature_type, accept_values) names << nil # "all" - performance << pref_conf_rates[:performance] - confidence << pref_conf_rates[:confidence] + performance << perf_conf_rates[:performance] + confidence << perf_conf_rates[:confidence] faint << false return { :names => names, :performance => performance, :confidence => confidence, :faint => faint } else - confidence_values = validation_set.validations[0].get_predictions.get_prediction_values(actual_accept_value, predicted_accept_value) - pref_conf_rates = get_performance_confidence_rates(confidence_values, validation_set.unique_feature_type) - return { :names => [""], :performance => [pref_conf_rates[:performance]], :confidence => [pref_conf_rates[:confidence]] } + confidence_values = validation_set.validations[0].get_predictions.get_prediction_values(performance_attribute, performance_accept_value) + perf_conf_rates = get_performance_confidence_rates(confidence_values, performance_attribute, performance_accept_value, + feature_type, accept_values) + return { :names => [""], :performance => [perf_conf_rates[:performance]], :confidence => [perf_conf_rates[:confidence]] } end end def self.demo_roc_plot -# roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6], -# :predicted_values => [1, 0, 0, 1, 0, 1], -# :actual_values => [0, 1, 0, 0, 1, 1]} - roc_values = {:confidence_values => [0.9, 0.8, 0.7, 0.6, 0.5, 0.4], - :true_positives => [1, 1, 1, 0, 1, 0]} - tp_fp_rates = get_tp_fp_rates(roc_values) - labels = [] - tp_fp_rates[:youden].each do |point,confidence| - labels << ["confidence: "+confidence.to_s, point[0], point[1]] - end - + + seed = 831 #rand(1000) + puts seed + srand seed + plot_data = [] - plot_data << RubyPlot::LinePlotData.new(:name => "testname", :x_values => tp_fp_rates[:fp_rate], :y_values => tp_fp_rates[:tp_rate], :labels => labels) + n = 250 + a_cutoff = 0.5 + + a_real = [] + a_class = [] + n.times do |i| + a_real << rand + a_class << ( a_real[-1]>a_cutoff ? "a" : "b") + end + + puts a_real.to_csv + puts a_class.to_csv + + p_props = [[],[]] + p_classes = [] + + 2.times do |index| + + if (index==0) + p_noise = 0.15 + p_cutoff = 0.8 + else + p_noise = 0.5 + p_cutoff = 0.5 + end + + p_real = [] + p_class = [] + p_prop = [] + correct = [] + n.times do |i| + if rand<0.04 + p_real << rand + else + p_real << (a_real[i] + ((rand * p_noise) * (rand<0.5 ? 1 : -1))) + end + p_prop << ((p_cutoff-p_real[i]).abs) + p_class << ( p_real[-1]>p_cutoff ? "a" : "b") + correct << ((p_class[i]==a_class[i]) ? 1 : 0) + end + + puts "" + puts p_real.to_csv + puts p_class.to_csv + puts p_prop.to_csv + + p_prop_max = p_prop.max + p_prop_min = p_prop.min + p_prop_delta = p_prop_max - p_prop_min + n.times do |i| + p_prop[i] = (p_prop[i] - p_prop_min)/p_prop_delta.to_f + p_props[index][i] = p_prop[i] + end + + puts p_prop.to_csv + + p_classes << p_class + + (0..n-2).each do |i| + (i+1..n-1).each do |j| + if p_prop[i]<p_prop[j] + tmp = p_prop[i] + p_prop[i] = p_prop[j] + p_prop[j] = tmp + tmp = correct[i] + correct[i] = correct[j] + correct[j] = tmp + end + end + end + + puts p_prop.to_csv + puts correct.to_csv + puts "acc: "+(correct.sum/n.to_f).to_s + + roc_values = {:confidence_values => p_prop, + :true_positives => correct} + tp_fp_rates = get_tp_fp_rates(roc_values) + labels = [] + tp_fp_rates[:youden].each do |point,confidence| + labels << ["confidence: "+confidence.to_s, point[0], point[1]] + end + + plot_data << RubyPlot::LinePlotData.new(:name => "alg"+index.to_s, + :x_values => tp_fp_rates[:fp_rate], + :y_values => tp_fp_rates[:tp_rate]) + #,:labels => labels) + end + + puts "instance,class,prediction_1,propability_1,prediction_2,propability_2" + n.times do |i| + puts (i+1).to_s+","+a_class[i].to_s+","+p_classes[0][i].to_s+ + ","+p_props[0][i].to_s+ + ","+p_classes[1][i].to_s+","+p_props[1][i].to_s + end RubyPlot::plot_lines("/tmp/plot.png", "ROC-Plot", "False positive rate", "True Positive Rate", plot_data ) end - def self.get_performance_confidence_rates(roc_values, feature_type) + def self.get_performance_confidence_rates(pred_values, performance_attribute, performance_accept_value, feature_type, accept_values) - c = roc_values[:confidence_values] - p = roc_values[:predicted_values] - a = roc_values[:actual_values] + c = pred_values[:confidence_values] + p = pred_values[:predicted_values] + a = pred_values[:actual_values] raise "no prediction values for confidence plot" if p.size==0 (0..p.size-2).each do |i| @@ -425,40 +556,28 @@ module Reports end end #puts c.inspect+"\n"+a.inspect+"\n"+p.inspect+"\n\n" - perf = [] conf = [] - - case feature_type - when "classification" - count = 0 - correct = 0 - (0..p.size-1).each do |i| - count += 1 - correct += 1 if p[i]==a[i] - if i>0 && (c[i]>=conf[-1]-0.00001) - perf.pop - conf.pop - end - perf << correct/count.to_f * 100 - conf << c[i] + predictions = nil + (0..p.size-1).each do |i| + # melt nearly identical confidence values to get a smoother graph + if i>0 && (c[i]>=conf[-1]-0.00001) + perf.pop + conf.pop end - when "regression" - count = 0 - sum_squared_error = 0 - (0..p.size-1).each do |i| - count += 1 - sum_squared_error += (p[i]-a[i])**2 - if i>0 && (c[i]>=conf[-1]-0.00001) - perf.pop - conf.pop - end - perf << Math.sqrt(sum_squared_error/count.to_f) - conf << c[i] + if (predictions == nil) + data = {:predicted_values => [p[i]],:actual_values => [a[i]], :confidence_values => [c[i]], + :feature_type => feature_type, :accept_values => accept_values} + predictions = Lib::Predictions.new(data) + else + predictions.update_stats(p[i], a[i], c[i]) end + + val = predictions.send(performance_attribute) + val = val[performance_accept_value] if val.is_a?(Hash) + perf << val + conf << c[i] end - #puts perf.inspect - return {:performance => perf,:confidence => conf} end @@ -553,7 +672,20 @@ end #require "rubygems" #require "ruby-plot" -##Reports::PlotFactory::demo_ranking_plot +###Reports::PlotFactory::demo_ranking_plot +#class Array +# def sum +# inject( nil ) { |sum,x| sum ? sum+x : x } +# end +# +# def to_csv +# s = "" +# each do |x| +# s += (x.is_a?(Float) ? ("%.3f"%x) : (" "+x.to_s) )+", " +# end +# s +# end +#end #Reports::PlotFactory::demo_roc_plot #a = [1, 0, 1, 2, 3, 0, 2] diff --git a/report/report_content.rb b/report/report_content.rb index 8d6d44b..80473c5 100755 --- a/report/report_content.rb +++ b/report/report_content.rb @@ -22,6 +22,12 @@ class Reports::ReportContent @current_section = @xml_report.get_root_element end + def add_warning(warning) + sec = @xml_report.add_section(@current_section, "Warning") + @xml_report.add_paragraph(sec, warning) + end_section() + end + def add_paired_ttest_tables( validation_set, group_attribute, test_attributes, @@ -55,7 +61,6 @@ class Reports::ReportContent test_matrix[:num_results].to_s, table, true, true) end end - Reports::ReportStatisticalTest.quit_r end def add_predictions( validation_set, @@ -156,6 +161,7 @@ class Reports::ReportContent section_text += "\nWARNING: regression plot information not available for all validation results" if prediction_set.size!=validation_set.size @xml_report.add_paragraph(section_regr, section_text) if section_text + begin log_str = (log ? "_log" : "") plot_png = add_tmp_file("regr_plot"+log_str, "png") @@ -176,6 +182,39 @@ class Reports::ReportContent align_last_two_images section_title+" in logarithmic and linear scale (values <= 0 are omitted in logarithmic scale)" end + def add_train_test_plot( validation_set, + only_prediction_feature, + waiting_task, + section_title="Training Test Distribution Plot", + section_text=nil, + image_title=nil) + + section_plot = @current_section + prediction_set = validation_set.collect{ |v| v.get_predictions } + @xml_report.add_paragraph(section_plot, section_text) if section_text + + begin + plot_png = add_tmp_file("train_test_plot_#{only_prediction_feature}", "png") + plot_svg = add_tmp_file("train_test_plot_#{only_prediction_feature}", "svg") + omit_count = Reports::PlotFactory.create_train_test_plot( [plot_png[:path], plot_svg[:path]], + prediction_set, only_prediction_feature, waiting_task ) + unless image_title + if only_prediction_feature + image_title = "Prediction Feature: #{validation_set.validations.first.prediction_feature}" + else + image_title = "Features Excluding Prediction Feature" + end + end + @xml_report.add_imagefigure(section_plot, image_title, plot_png[:name], "PNG", 100, plot_svg[:name]) + rescue Exception => ex + LOGGER.error("Could not create train test plot: "+ex.message) + rm_tmp_file(plot_png[:name]) if plot_png + rm_tmp_file(plot_svg[:name]) if plot_svg + @xml_report.add_paragraph(section_plot, "could not create train test plot: "+ex.message) + end + + end + def add_roc_plot( validation_set, accept_value, split_set_attribute=nil, @@ -213,8 +252,8 @@ class Reports::ReportContent end def add_confidence_plot( validation_set, - actual_accept_value = nil, - predicted_accept_value = nil, + performance_attribute, + performance_accept_value, split_set_attribute = nil, image_title = "Confidence Plot", section_text="") @@ -234,7 +273,8 @@ class Reports::ReportContent begin plot_png = add_tmp_file("conf_plot", "png") plot_svg = add_tmp_file("conf_plot", "svg") - Reports::PlotFactory.create_confidence_plot( [plot_png[:path], plot_svg[:path]], prediction_set, actual_accept_value, predicted_accept_value, split_set_attribute, false ) + Reports::PlotFactory.create_confidence_plot( [plot_png[:path], plot_svg[:path]], prediction_set, performance_attribute, + performance_accept_value, split_set_attribute, false ) @xml_report.add_imagefigure(section_conf, image_title, plot_png[:name], "PNG", 100, plot_svg[:name]) rescue Exception => ex msg = "WARNING could not create confidence plot: "+ex.message @@ -309,6 +349,57 @@ class Reports::ReportContent @xml_report.add_imagefigure(section_bar, image_title, plot_png[:name], "PNG", 100, plot_svg[:name]) end + def add_box_plot(validation_set, + title_attribute, + value_attributes, + section_title="Boxplots", + section_text=nil) + + section_box = @xml_report.add_section(@current_section, section_title) + @xml_report.add_paragraph(section_box, section_text) if section_text + + plot_png = nil; plot_svg = nil + begin + plot_input = [] + value_attributes.each do |a| + accept = validation_set.get_accept_values_for_attr(a) + if accept and accept.size>0 + accept.each do |c| + title = a.to_s.gsub("_","-") + ( (accept.size==1 || c==nil) ? "" : "("+c.to_s+")" ) + plot_input << [a,c,title] + end + else + plot_input << [a,nil,a.to_s.gsub("_","-")] + end + end + + i = 0 + figs = [] + plot_input.each do |attrib,class_value,image_title| + plot_png = add_tmp_file("box_plot#{i}", "png") + plot_svg = add_tmp_file("box_plot#{i}", "svg") + Reports::PlotFactory.create_box_plot([plot_png[:path], plot_svg[:path]], + validation_set, title_attribute, attrib, class_value ) + figs << @xml_report.imagefigure(image_title, plot_png[:name], + "PNG", 50, plot_svg[:name]) + plot_png = nil; plot_svg = nil + i += 1 + end + + i = 1 + figs.each_slice(4) do |f| + @xml_report.add_imagefigures_in_row(section_box,f,"Boxplots #{i}") + i+=1 + end + rescue Exception => ex + msg = "WARNING could not create box plot: "+ex.message + LOGGER.error(msg) + rm_tmp_file(plot_png[:name]) if plot_png + rm_tmp_file(plot_svg[:name]) if plot_svg + @xml_report.add_paragraph(section_box, msg) + end + end + private def add_tmp_file(name, extension) tmp_file_name = name.to_s+@tmp_file_count.to_s+"."+extension.to_s diff --git a/report/report_factory.rb b/report/report_factory.rb index 9995b42..2bb74ee 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -11,10 +11,10 @@ VAL_ATTR_REGR = [ :num_instances, :num_unpredicted, :root_mean_squared_error, :weighted_root_mean_squared_error, :mean_absolute_error, :weighted_mean_absolute_error, :r_square, :weighted_r_square, :sample_correlation_coefficient ] -#VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :average_area_under_roc, +#VAL_ATTR_BOX_PLOT_CLASS = [ :accuracy, :average_area_under_roc, # :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ] -VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :f_measure, :true_positive_rate, :true_negative_rate, :positive_predictive_value, :negative_predictive_value ] -VAL_ATTR_BAR_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ] +VAL_ATTR_BOX_PLOT_CLASS = [ :accuracy, :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate, :positive_predictive_value, :negative_predictive_value ] +VAL_ATTR_BOX_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ] VAL_ATTR_TTEST_REGR = [ :r_square, :root_mean_squared_error ] VAL_ATTR_TTEST_CLASS = [ :accuracy, :average_area_under_roc ] @@ -29,8 +29,9 @@ module Reports::ReportFactory RT_VALIDATION = "validation" RT_CV = "crossvalidation" RT_ALG_COMP = "algorithm_comparison" + RT_METHOD_COMP = "method_comparison" - REPORT_TYPES = [RT_VALIDATION, RT_CV, RT_ALG_COMP ] + REPORT_TYPES = [RT_VALIDATION, RT_CV, RT_ALG_COMP, RT_METHOD_COMP ] # creates a report of a certain type according to the validation data in validation_set # @@ -40,11 +41,13 @@ module Reports::ReportFactory def self.create_report(type, validation_set, params={}, task=nil) case type when RT_VALIDATION - create_report_validation(validation_set, task) + create_report_validation(validation_set, {}, task) when RT_CV - create_report_crossvalidation(validation_set, task) + create_report_crossvalidation(validation_set, {}, task) when RT_ALG_COMP create_report_compare_algorithms(validation_set, params, task) + when RT_METHOD_COMP + create_report_compare_methods(validation_set, params, task) else raise "unknown report type "+type.to_s end @@ -63,14 +66,26 @@ module Reports::ReportFactory end end - def self.create_report_validation(validation_set, task=nil) + def self.add_filter_warning(report, filter_params) + msg = "The validation results for this report have been filtered." + msg += " Minimum confidence: "+ filter_params[:min_confidence].to_s if + filter_params[:min_confidence]!=nil + msg += " Minimum number of predictions (sorted with confidence): "+ filter_params[:min_num_predictions].to_s if + filter_params[:min_num_predictions]!=nil + msg += " Maximum number of predictions: "+ filter_params[:max_num_predictions].to_s if + filter_params[:max_num_predictions]!=nil + report.add_warning(msg) + end + + def self.create_report_validation(validation_set, params, task=nil) raise OpenTox::BadRequestError.new("num validations is not equal to 1") unless validation_set.size==1 val = validation_set.validations[0] - pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) ) + pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,50) ) report = Reports::ReportContent.new("Validation report") - + add_filter_warning(report, validation_set.filter_params) if validation_set.filter_params!=nil + case val.feature_type when "classification" report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results") @@ -85,30 +100,35 @@ module Reports::ReportFactory report.align_last_two_images "ROC Plots" end end - report.add_confidence_plot(validation_set) + report.add_confidence_plot(validation_set, :accuracy, nil) validation_set.get_accept_values.each do |accept_value| - report.add_confidence_plot(validation_set, accept_value, nil) - report.add_confidence_plot(validation_set, nil, accept_value) + report.add_confidence_plot(validation_set, :true_positive_rate, accept_value) + report.add_confidence_plot(validation_set, :positive_predictive_value, accept_value) report.align_last_two_images "Confidence Plots" end - report.end_section when "regression" report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_REGR, "Results", "Results") report.add_section("Plots") report.add_regression_plot(validation_set, :model_uri) - report.add_confidence_plot(validation_set) - report.end_section + report.add_confidence_plot(validation_set, :root_mean_squared_error, nil) + report.add_confidence_plot(validation_set, :r_square, nil) + report.align_last_two_images "Confidence Plots" end - task.progress(90) if task - + task.progress(70) if task + report.add_train_test_plot( validation_set, false, OpenTox::SubTask.create(task,70,80) ) + report.add_train_test_plot( validation_set, true, OpenTox::SubTask.create(task,80,90) ) + report.align_last_two_images "Training Test Data Distribution Plots" + report.end_section + report.add_result(validation_set, Validation::ALL_PROPS, "All Results", "All Results") report.add_predictions( validation_set ) task.progress(100) if task report end - def self.create_report_crossvalidation(validation_set, task=nil) + def self.create_report_crossvalidation(validation_set, params, task=nil) + raise OpenTox::BadRequestError.new "cv report not implemented for filter params" if validation_set.filter_params!=nil raise OpenTox::BadRequestError.new("num validations is not >1") unless validation_set.size>1 raise OpenTox::BadRequestError.new("crossvalidation-id not unique and != nil: "+ validation_set.get_values(:crossvalidation_id,false).inspect) if validation_set.unique_value(:crossvalidation_id)==nil @@ -117,7 +137,7 @@ module Reports::ReportFactory validation_set.unique_value(:num_folds).to_s+")") unless validation_set.unique_value(:num_folds).to_i==validation_set.size raise OpenTox::BadRequestError.new("num different folds is not equal to num validations") unless validation_set.num_different_values(:crossvalidation_fold)==validation_set.size raise OpenTox::BadRequestError.new("validations must have unique feature type, i.e. must be either all regression, "+ - "or all classification validations") unless validation_set.unique_feature_type + "or all classification validations") unless validation_set.unique_feature_type pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) ) validation_set.validations.sort! do |x,y| x.crossvalidation_fold.to_f <=> y.crossvalidation_fold.to_f @@ -136,34 +156,40 @@ module Reports::ReportFactory report.add_confusion_matrix(cv_set.validations[0]) report.add_section("Plots") [nil, :crossvalidation_fold].each do |split_attribute| - if (validation_set.get_accept_values.size == 2) if validation_set.get_true_accept_value!=nil report.add_roc_plot(validation_set, validation_set.get_true_accept_value,split_attribute) else - report.add_roc_plot(validation_set, validation_set.get_accept_values[0],split_attribute) - report.add_roc_plot(validation_set, validation_set.get_accept_values[1],split_attribute) + report.add_roc_plot(validation_set, validation_set.get_accept_values[0], split_attribute) + report.add_roc_plot(validation_set, validation_set.get_accept_values[1], split_attribute) report.align_last_two_images "ROC Plots" end end - report.add_confidence_plot(validation_set,nil,nil,split_attribute) + report.add_confidence_plot(validation_set,:accuracy,nil,split_attribute) validation_set.get_accept_values.each do |accept_value| - report.add_confidence_plot(validation_set, accept_value, nil,split_attribute) - report.add_confidence_plot(validation_set, nil, accept_value,split_attribute) + report.add_confidence_plot(validation_set, :true_positive_rate, accept_value, split_attribute) + report.add_confidence_plot(validation_set, :positive_predictive_value, accept_value, split_attribute) report.align_last_two_images "Confidence Plots" end end report.end_section - report.add_result(validation_set, [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds, :dataset_uri, :algorithm_uri], + report.add_result(validation_set, + [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds, :dataset_uri, :algorithm_uri], "Results","Results") when "regression" report.add_result(cv_set, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],res_titel, res_titel, res_text) report.add_section("Plots") report.add_regression_plot(validation_set, :crossvalidation_fold) - report.add_confidence_plot(validation_set) - report.add_confidence_plot(validation_set, nil, :crossvalidation_fold) + report.add_confidence_plot(validation_set, :root_mean_squared_error, nil) + report.add_confidence_plot(validation_set, :r_square, nil) + report.align_last_two_images "Confidence Plots" + report.add_confidence_plot(validation_set, :root_mean_squared_error, nil, :crossvalidation_fold) + report.add_confidence_plot(validation_set, :r_square, nil, :crossvalidation_fold) + report.align_last_two_images "Confidence Plots Across Folds" report.end_section - report.add_result(validation_set, [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds, :dataset_uri, :algorithm_uri], "Results","Results") + report.add_result(validation_set, + [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds, :dataset_uri, :algorithm_uri], + "Results","Results") end task.progress(90) if task @@ -213,6 +239,7 @@ module Reports::ReportFactory pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) ) report = Reports::ReportContent.new("Algorithm comparison report") + add_filter_warning(report, validation_set.filter_params) if validation_set.filter_params!=nil if (validation_set.num_different_values(:dataset_uri)>1) all_merged = validation_set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri]) @@ -226,11 +253,11 @@ module Reports::ReportFactory when "classification" result_attributes += VAL_ATTR_CLASS ttest_attributes = VAL_ATTR_TTEST_CLASS - bar_plot_attributes = VAL_ATTR_BAR_PLOT_CLASS + box_plot_attributes = VAL_ATTR_BOX_PLOT_CLASS else result_attributes += VAL_ATTR_REGR ttest_attributes = VAL_ATTR_TTEST_REGR - bar_plot_attributes = VAL_ATTR_BAR_PLOT_REGR + box_plot_attributes = VAL_ATTR_BOX_PLOT_REGR end if params[:ttest_attributes] and params[:ttest_attributes].chomp.size>0 @@ -241,8 +268,8 @@ module Reports::ReportFactory ttest_significance = params[:ttest_significance].to_f end - bar_plot_attributes += ttest_attributes - bar_plot_attributes.uniq! + box_plot_attributes += ttest_attributes + box_plot_attributes.uniq! result_attributes += ttest_attributes result_attributes.uniq! @@ -265,13 +292,50 @@ module Reports::ReportFactory res_text = "These performance statistics have been derieved by computing the mean of the statistics on each crossvalidation fold." report.add_result(merged,result_attributes,res_titel,res_titel,res_text) # pending: regression stats have different scales!!! - report.add_bar_plot(merged, :identifier, bar_plot_attributes) if validation_set.unique_feature_type=="classification" + report.add_box_plot(set, :identifier, box_plot_attributes) report.add_paired_ttest_tables(set, :identifier, ttest_attributes, ttest_significance) if ttest_significance>0 report.end_section end task.progress(100) if task report end + + def self.create_report_compare_methods(validation_set, params={}, task=nil) + raise OpenTox::BadRequestError.new("num validations is not >1") unless validation_set.size>1 + raise OpenTox::BadRequestError.new("validations must have unique feature type, i.e. must be either all regression, "+ + "or all classification validations") unless validation_set.unique_feature_type + raise OpenTox::BadRequestError.new("number of different identifiers <2: "+ + validation_set.get_values(:identifier).inspect) if validation_set.num_different_values(:identifier)<2 + #validation_set.load_cv_attributes + + pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) ) + report = Reports::ReportContent.new("Method comparison report") + add_filter_warning(report, validation_set.filter_params) if validation_set.filter_params!=nil + + result_attributes = [:identifier,:validation_uri,:validation_report_uri]+VAL_ATTR_CV-[:crossvalidation_fold,:num_folds,:dataset_uri] + case validation_set.unique_feature_type + when "classification" + result_attributes += VAL_ATTR_CLASS + box_plot_attributes = VAL_ATTR_BOX_PLOT_CLASS + else + result_attributes += VAL_ATTR_REGR + box_plot_attributes = VAL_ATTR_BOX_PLOT_REGR + end + + merged = validation_set.merge([:identifier]) + merged.sort(:identifier) + + merged.validations.each do |v| + v.validation_uri = v.validation_uri.split(";").uniq.join(" ") + v.validation_report_uri = v.validation_report_uri.split(";").uniq.join(" ") if v.validation_report_uri + end + + msg = merged.validations.collect{|v| v.identifier+" ("+Lib::MergeObjects.merge_count(v).to_s+"x)"}.join(", ") + report.add_result(merged,result_attributes,"Average Results","Results",msg) + + report.add_box_plot(validation_set, :identifier, box_plot_attributes) + report + end end diff --git a/report/report_service.rb b/report/report_service.rb index f299122..f315b04 100644 --- a/report/report_service.rb +++ b/report/report_service.rb @@ -72,7 +72,15 @@ module Reports LOGGER.debug "identifier: '"+identifier.inspect+"'" raise "illegal num identifiers: "+identifier.size.to_s+" should be equal to num validation-uris ("+validation_uris.size.to_s+")" if identifier and identifier.size!=validation_uris.size - validation_set = Reports::ValidationSet.new(validation_uris, identifier, subjectid) + + filter_params = nil + [:min_confidence, :min_num_predictions, :max_num_predictions].each do |key| + if params[key] != nil + filter_params = {} unless filter_params + filter_params[key] = params[key].to_f + end + end + validation_set = Reports::ValidationSet.new(validation_uris, identifier, filter_params, subjectid) raise OpenTox::BadRequestError.new("cannot get validations from validation_uris '"+validation_uris.inspect+"'") unless validation_set and validation_set.size > 0 LOGGER.debug "loaded "+validation_set.size.to_s+" validation/s" task.progress(10) if task @@ -81,7 +89,9 @@ module Reports report_content = Reports::ReportFactory.create_report(type, validation_set, params, OpenTox::SubTask.create(task,10,90)) LOGGER.debug "report created" - + Reports::quit_r + Reports.validation_access.delete_tmp_resources(subjectid) + #step 3: persist report if creation not failed id = @@persistance.new_report(report_content, type, create_meta_data(type, validation_set, validation_uris), self, subjectid) LOGGER.debug "report persisted with id: '"+id.to_s+"'" diff --git a/report/statistical_test.rb b/report/statistical_test.rb index 8d6bd62..da46f6b 100644 --- a/report/statistical_test.rb +++ b/report/statistical_test.rb @@ -1,38 +1,6 @@ #require "rubygems" #require "rinruby" -module LIB - class StatisticalTest - - # -1 -> array1 < array2 - # 0 -> not difference - # 1 -> array2 > array1 - # - def self.pairedTTest(array1, array2, significance_level=0.95) - - @@r = RinRuby.new(true,false) unless defined?(@@r) and @@r - @@r.assign "v1",array1 - @@r.assign "v2",array2 - @@r.eval "ttest = t.test(v1,v2,paired=T)" - t = @@r.pull "ttest$statistic" - p = @@r.pull "ttest$p.value" - if (1-significance_level > p) - t - else - 0 - end - end - - def self.quit_r - begin - @@r.quit - @@r = nil - rescue - end - end - end -end - module Reports class ReportStatisticalTest @@ -69,26 +37,15 @@ module Reports def self.paired_ttest( validations1, validations2, attribute, class_value, significance_level=0.95 ) - array1 = validations1.collect{ |v| (v.send(attribute).is_a?(Hash) ? v.send(attribute)[class_value] : v.send(attribute)) } - array2 = validations2.collect{ |v| (v.send(attribute).is_a?(Hash) ? v.send(attribute)[class_value] : v.send(attribute)) } + array1 = validations1.collect{ |v| (v.send(attribute).is_a?(Hash) ? v.send(attribute)[class_value].to_f : v.send(attribute).to_f) } + array2 = validations2.collect{ |v| (v.send(attribute).is_a?(Hash) ? v.send(attribute)[class_value].to_f : v.send(attribute).to_f) } LOGGER.debug "paired-t-testing "+attribute.to_s+" "+array1.inspect+" vs "+array2.inspect - LIB::StatisticalTest.pairedTTest(array1, array2, significance_level) + Reports::r_util.paired_ttest(array1, array2, significance_level) end - def self.quit_r - LIB::StatisticalTest.quit_r - end - end end -#t1 = Time.new -#10.times do -# puts LIB::StatisticalTest.pairedTTest([1,2,3,4,5,12,4,2],[2,3,3,3,56,3,4,5]) -#end -#LIB::StatisticalTest.quitR -#t2 = Time.new -#puts t2-t1 diff --git a/report/validation_access.rb b/report/validation_access.rb index 299b124..aaa7bdc 100755 --- a/report/validation_access.rb +++ b/report/validation_access.rb @@ -7,27 +7,39 @@ require "lib/validation_db.rb" # class Reports::ValidationDB - def resolve_cv_uris(validation_uris, identifier=nil, subjectid=nil) + @@tmp_resources = [] + + def same_service?(uri) + self_uri = URI.parse($url_provider.url) + val_uri = URI.parse(uri) + self_uri.host == val_uri.host && self_uri.port == val_uri.port + end + + def resolve_cv_uris(validation_uris, identifier, subjectid) res = {} count = 0 validation_uris.each do |u| + if u.to_s =~ /.*\/crossvalidation\/[0-9]+/ - cv_id = u.split("/")[-1].to_i cv = nil + cv_id = u.split("/")[-1].to_i + val_uris = nil + + if same_service?u + raise OpenTox::NotAuthorizedError.new "Not authorized: GET "+u.to_s if + AA_SERVER and !OpenTox::Authorization.authorized?(u,"GET",subjectid) + cv = Validation::Crossvalidation.get( cv_id ) + raise OpenTox::NotFoundError.new "crossvalidation with id "+cv_id.to_s+" not found" unless cv + raise OpenTox::BadRequestError.new("crossvalidation with id '"+cv_id.to_s+"' not finished") unless cv.finished + #res += Validation::Validation.find( :all, :conditions => { :crossvalidation_id => cv_id } ).collect{|v| v.validation_uri.to_s} + val_uris = Validation::Validation.find( :crossvalidation_id => cv_id, :validation_type => "crossvalidation" ).collect{|v| v.validation_uri.to_s} + else + val_base_uri = u.gsub(/\/crossvalidation\/[0-9]+/,"") + val_uris = OpenTox::RestClientWrapper.get( val_base_uri+"?crossvalidation_id="+cv_id.to_s+"&validation_type=crossvalidation", {:subjectid => subjectid, :accept => "text/uri-list" }).split("\n") + end - raise OpenTox::NotAuthorizedError.new "Not authorized: GET "+u.to_s if - AA_SERVER and !OpenTox::Authorization.authorized?(u,"GET",subjectid) -# begin -# #cv = Lib::Crossvalidation.find( cv_id ) -# rescue => ex -# raise "could not access crossvalidation with id "+validation_id.to_s+", error-msg: "+ex.message -# end - cv = Validation::Crossvalidation.get( cv_id ) - raise OpenTox::NotFoundError.new "crossvalidation with id "+cv_id.to_s+" not found" unless cv - raise OpenTox::BadRequestError.new("crossvalidation with id '"+cv_id.to_s+"' not finished") unless cv.finished - #res += Validation::Validation.find( :all, :conditions => { :crossvalidation_id => cv_id } ).collect{|v| v.validation_uri.to_s} - Validation::Validation.find( :crossvalidation_id => cv_id, :validation_type => "crossvalidation" ).each do |v| - res[v.validation_uri.to_s] = identifier ? identifier[count] : nil + val_uris.each do |v_uri| + res[v_uri] = identifier ? identifier[count] : nil end else res[u.to_s] = identifier ? identifier[count] : nil @@ -37,40 +49,62 @@ class Reports::ValidationDB res end - def init_validation(validation, uri, subjectid=nil) - + def init_validation(validation, uri, filter_params, subjectid) + raise OpenTox::BadRequestError.new "not a validation uri: "+uri.to_s unless uri =~ /\/[0-9]+$/ validation_id = uri.split("/")[-1] raise OpenTox::BadRequestError.new "invalid validation id "+validation_id.to_s unless validation_id!=nil and (validation_id.to_i > 0 || validation_id.to_s=="0" ) + v = nil - raise OpenTox::NotAuthorizedError.new "Not authorized: GET "+uri.to_s if - AA_SERVER and !OpenTox::Authorization.authorized?(uri,"GET",subjectid) - v = Validation::Validation.get(validation_id) + + if same_service? uri + raise OpenTox::NotAuthorizedError.new "Not authorized: GET "+uri.to_s if + AA_SERVER and !OpenTox::Authorization.authorized?(uri,"GET",subjectid) + v = Validation::Validation.get(validation_id) + else + v = YAML::load(OpenTox::RestClientWrapper.get uri, {:subjectid=>subjectid, :accept=>"application/serialize"}) + end + v.subjectid = subjectid + v.filter_predictions(filter_params[:min_confidence], filter_params[:min_num_predictions], filter_params[:max_num_predictions]) if + filter_params + raise OpenTox::NotFoundError.new "validation with id "+validation_id.to_s+" not found" unless v raise OpenTox::BadRequestError.new "validation with id "+validation_id.to_s+" is not finished yet" unless v.finished - (Validation::VAL_PROPS + Validation::VAL_CV_PROPS).each do |p| validation.send("#{p.to_s}=".to_sym, v.send(p)) end + # set uris manually, in case external validation is used + validation.validation_uri = uri + validation.crossvalidation_uri = uri.gsub(/\/[0-9]+/,"")+"/crossvalidation/"+validation.crossvalidation_id if validation.crossvalidation_id!=nil + {:classification_statistics => Validation::VAL_CLASS_PROPS, :regression_statistics => Validation::VAL_REGR_PROPS}.each do |subset_name,subset_props| subset = v.send(subset_name) - subset_props.each{ |prop| validation.send("#{prop.to_s}=".to_sym, subset[prop]) } if subset + subset_props.each{ |prop| validation.send("#{prop.to_s}=".to_sym, subset[prop]) } if subset end end - def init_validation_from_cv_statistics( validation, cv_uri, subjectid=nil ) + def init_validation_from_cv_statistics( validation, cv_uri, filter_params, subjectid ) raise OpenTox::BadRequestError.new "not a crossvalidation uri: "+cv_uri.to_s unless cv_uri.uri? and cv_uri =~ /crossvalidation.*\/[0-9]+$/ - cv_id = cv_uri.split("/")[-1] - raise OpenTox::NotAuthorizedError.new "Not authorized: GET "+cv_uri.to_s if - AA_SERVER and !OpenTox::Authorization.authorized?(cv_uri,"GET",subjectid) - cv = Validation::Crossvalidation.get(cv_id) - raise OpenTox::NotFoundError.new "crossvalidation with id "+crossvalidation_id.to_s+" not found" unless cv - raise OpenTox::BadRequestError.new "crossvalidation with id "+crossvalidation_id.to_s+" is not finished yet" unless cv.finished - v = Validation::Validation.from_cv_statistics(cv_id, subjectid) + + if same_service?cv_uri + cv_id = cv_uri.split("/")[-1] + raise OpenTox::NotAuthorizedError.new "Not authorized: GET "+cv_uri.to_s if + AA_SERVER and !OpenTox::Authorization.authorized?(cv_uri,"GET",subjectid) + cv = Validation::Crossvalidation.get(cv_id) + raise OpenTox::NotFoundError.new "crossvalidation with id "+crossvalidation_id.to_s+" not found" unless cv + raise OpenTox::BadRequestError.new "crossvalidation with id "+crossvalidation_id.to_s+" is not finished yet" unless cv.finished + v = Validation::Validation.from_cv_statistics(cv_id, subjectid) + else + cv = YAML::load(OpenTox::RestClientWrapper.get cv_uri, {:subjectid=>subjectid, :accept=>"application/serialize"}) + v = YAML::load(OpenTox::RestClientWrapper.get cv_uri+"/statistics", {:subjectid=>subjectid, :accept=>"application/serialize"}) + end + v.filter_predictions(filter_params[:min_confidence], filter_params[:min_num_predictions], filter_params[:max_num_predictions]) if + filter_params + (Validation::VAL_PROPS + Validation::VAL_CV_PROPS).each do |p| validation.send("#{p.to_s}=".to_sym, v.send(p)) end @@ -84,35 +118,83 @@ class Reports::ValidationDB validation.send("#{p.to_s}=".to_sym, cv.send(p.to_s)) end validation.crossvalidation_uri = cv_uri + validation.validation_uri = cv_uri+"/statistics" end - def init_cv(validation) - - #cv = Lib::Crossvalidation.find(validation.crossvalidation_id) - cv = Validation::Crossvalidation.get(validation.crossvalidation_id) - raise OpenTox::BadRequestError.new "no crossvalidation found with id "+validation.crossvalidation_id.to_s unless cv + def init_cv(validation, subjectid) + cv = nil + if same_service?validation.crossvalidation_uri + cv = Validation::Crossvalidation.get(validation.crossvalidation_id) + raise OpenTox::BadRequestError.new "no crossvalidation found with id "+validation.crossvalidation_id.to_s unless cv + else + cv = YAML::load(OpenTox::RestClientWrapper.get validation.crossvalidation_uri, {:subjectid=>subjectid, :accept=>"application/serialize"}) + end Validation::CROSS_VAL_PROPS.each do |p| - validation.send("#{p.to_s}=".to_sym, cv.send(p.to_s)) + validation.send("#{p.to_s}=".to_sym, cv.send(p.to_s)) + end + end + + def training_feature_dataset_uri(validation, subjectid) + m = OpenTox::Model::Generic.find(validation.model_uri, subjectid) + if m + f = m.metadata[OT.featureDataset] + return f.chomp if f end + raise "no feature dataset found" end - def get_predictions(validation, subjectid=nil, task=nil) - Lib::OTPredictions.new( validation.feature_type, validation.test_dataset_uri, + def test_feature_dataset_uri(validation, subjectid) + m = OpenTox::Model::Generic.find(validation.model_uri, subjectid) + feat_gen = nil + m.metadata[OT.parameters].each do |h| + if h[DC.title] and h[DC.title]=~/feature_generation/ and h[OT.paramValue] + feat_gen = h[OT.paramValue] + break + end + end if m and m.metadata[OT.parameters] + raise "no feature creation alg found" unless feat_gen + feat_gen = File.join(feat_gen,"match") if feat_gen=~/fminer/ + uri = OpenTox::RestClientWrapper.post(feat_gen,{:subjectid => subjectid, + :feature_dataset_uri=>training_feature_dataset_uri(validation,subjectid), + :dataset_uri=>validation.test_dataset_uri}) + @@tmp_resources << uri + uri + end + + def delete_tmp_resources(subjectid) + @@tmp_resources.each do |uri| + OpenTox::RestClientWrapper.delete uri,{:subjectid=>subjectid} + end + @@tmp_resources = [] + end + + def get_predictions(validation, filter_params, subjectid, task) + # we need compound info, cannot reuse stored prediction data + data = Lib::PredictionData.create( validation.feature_type, validation.test_dataset_uri, validation.test_target_dataset_uri, validation.prediction_feature, validation.prediction_dataset_uri, - validation.predicted_variable, validation.predicted_confidence, subjectid, task) + validation.predicted_variable, validation.predicted_confidence, subjectid, OpenTox::SubTask.create(task, 0, 80 ) ) + data = Lib::PredictionData.filter_data( data.data, data.compounds, + filter_params[:min_confidence], filter_params[:min_num_predictions], filter_params[:max_num_predictions] ) if filter_params!=nil + task.progress(100) if task + Lib::OTPredictions.new( data.data, data.compounds ) end def get_accept_values( validation, subjectid=nil ) # PENDING So far, one has to load the whole dataset to get the accept_value from ambit - test_target_dataset = validation.test_target_dataset_uri - test_target_dataset = validation.test_dataset_uri unless test_target_dataset - d = Lib::DatasetCache.find( test_target_dataset, subjectid ) - raise "cannot get test target dataset for accept values, dataset: "+test_target_dataset.to_s unless d - accept_values = d.accept_values(validation.prediction_feature) - raise "cannot get accept values from dataset "+test_target_dataset.to_s+" for feature "+ - validation.prediction_feature+":\n"+d.features[validation.prediction_feature].to_yaml unless accept_values!=nil - accept_values + test_target_datasets = validation.test_target_dataset_uri + test_target_datasets = validation.test_dataset_uri unless test_target_datasets + res = nil + test_target_datasets.split(";").each do |test_target_dataset| + d = Lib::DatasetCache.find( test_target_dataset, subjectid ) + raise "cannot get test target dataset for accept values, dataset: "+test_target_dataset.to_s unless d + accept_values = d.accept_values(validation.prediction_feature) + raise "cannot get accept values from dataset "+test_target_dataset.to_s+" for feature "+ + validation.prediction_feature+":\n"+d.features[validation.prediction_feature].to_yaml unless accept_values!=nil + raise "different accept values" if res && res!=accept_values + res = accept_values + end + res end def feature_type( validation, subjectid=nil ) diff --git a/report/validation_data.rb b/report/validation_data.rb index f5ecae7..3806fd7 100755 --- a/report/validation_data.rb +++ b/report/validation_data.rb @@ -86,21 +86,39 @@ module Reports VAL_ATTR_RANKING.collect{ |a| (a.to_s+"_ranking").to_sym } @@validation_attributes.each{ |a| attr_accessor a } - attr_reader :predictions, :subjectid - attr_accessor :identifier, :validation_report_uri, :crossvalidation_report_uri + attr_reader :predictions, :filter_params + attr_accessor :identifier, :validation_report_uri, :crossvalidation_report_uri, :subjectid - def initialize(uri = nil, subjectid = nil) - Reports.validation_access.init_validation(self, uri, subjectid) if uri + def initialize(uri = nil, filter_params=nil, subjectid = nil) + Reports.validation_access.init_validation(self, uri, filter_params, subjectid) if uri @subjectid = subjectid + raise unless filter_params==nil || filter_params.is_a?(Hash) + @filter_params = filter_params + @created_resources = [] #raise "subjectid is nil" unless subjectid end - def self.from_cv_statistics( cv_uri, subjectid = nil ) - v = ReportValidation.new(nil, subjectid) - Reports.validation_access.init_validation_from_cv_statistics(v, cv_uri, subjectid) + def self.from_cv_statistics( cv_uri, filter_params, subjectid ) + v = ReportValidation.new(nil, filter_params, subjectid) + Reports.validation_access.init_validation_from_cv_statistics(v, cv_uri, filter_params, subjectid) v end - + + def training_feature_dataset_uri + unless @training_feature_dataset + @training_feature_dataset = Reports.validation_access.training_feature_dataset_uri( self, @subjectid ) + end + @training_feature_dataset + end + + #hack this does create the features for the test dataset + def test_feature_dataset_uri + unless @test_feature_dataset + @test_feature_dataset = Reports.validation_access.test_feature_dataset_uri( self, @subjectid ) + end + @test_feature_dataset + end + # returns/creates predictions, cache to save rest-calls/computation time # # call-seq: @@ -116,7 +134,7 @@ module Reports task.progress(100) if task nil else - @predictions = Reports.validation_access.get_predictions( self, @subjectid, task ) + @predictions = Reports.validation_access.get_predictions( self, @filter_params, @subjectid, task ) end end end @@ -148,7 +166,7 @@ module Reports # loads all crossvalidation attributes, of the corresponding cv into this object def load_cv_attributes raise "crossvalidation-id not set" unless @crossvalidation_id - Reports.validation_access.init_cv(self) + Reports.validation_access.init_cv(self, @subjectid) # load cv report ids = Reports.persistance.list_reports("crossvalidation",{:crossvalidation=>self.crossvalidation_uri.to_s }) @crossvalidation_report_uri = ReportService.instance.get_uri("crossvalidation",ids[-1]) if ids and ids.size>0 @@ -167,13 +185,13 @@ module Reports # class ValidationSet - def initialize(validation_uris=nil, identifier=nil, subjectid=nil) + def initialize(validation_uris=nil, identifier=nil, filter_params=nil, subjectid=nil) @unique_values = {} @validations = [] if validation_uris validation_uri_and_ids = ReportValidation.resolve_cv_uris(validation_uris, identifier, subjectid) validation_uri_and_ids.each do |u,id| - v = ReportValidation.new(u, subjectid) + v = ReportValidation.new(u, filter_params, subjectid) v.identifier = id if id ids = Reports.persistance.list_reports("validation",{:validation_uris=>v.validation_uri }) v.validation_report_uri = ReportService.instance.get_uri("validation",ids[-1]) if ids and ids.size>0 @@ -228,6 +246,10 @@ module Reports return false end + def filter_params + @validations.first.filter_params + end + # loads the attributes of the related crossvalidation into all validation objects # def load_cv_attributes @@ -396,12 +418,17 @@ module Reports end if variance + #puts "variance given #{a}, #{val.inspect}, #{val.class}, #{variance.inspect}, #{variance.class}" if (val.is_a?(Array)) raise "not implemented" elsif (val.is_a?(Hash)) val.collect{ |i,j| i.to_nice_s+": "+j.to_nice_s + " +- " + variance[i].to_nice_s }.join(", ") else + if (variance.is_a?(Hash)) + raise "invalid variance" unless accept_values.size==1 && accept_values[0]!=nil + variance = variance[accept_values[0]] + end val.to_nice_s + " +- " + variance.to_nice_s end else @@ -424,7 +451,7 @@ module Reports new_set = ValidationSet.new grouping = Util.group(@validations, [:crossvalidation_id]) grouping.each do |g| - v = ReportValidation.from_cv_statistics(g[0].crossvalidation_uri, g[0].subjectid) + v = ReportValidation.from_cv_statistics(g[0].crossvalidation_uri, @validations.first.filter_params, g[0].subjectid) v.identifier = g.collect{|vv| vv.identifier}.uniq.join(";") new_set.validations << v end @@ -450,7 +477,8 @@ module Reports #merge Lib::MergeObjects.register_merge_attributes( ReportValidation, - Validation::VAL_MERGE_AVG+Validation::VAL_MERGE_SUM,[],Validation::VAL_MERGE_GENERAL+[:identifier, :validation_report_uri, :crossvalidation_report_uri]) unless + Validation::VAL_MERGE_AVG+Validation::VAL_MERGE_SUM,[], + Validation::VAL_MERGE_GENERAL+[:identifier, :validation_report_uri, :crossvalidation_report_uri, :subjectid]) unless Lib::MergeObjects.merge_attributes_registered?(ReportValidation) grouping.each do |g| new_set.validations << g[0].clone_validation |