From 78564782ca749dd13f063f0a04070bef89377354 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 24 May 2011 19:44:06 +0200 Subject: adjust to ruby-plot version 0.5, simplification + support for various max youden-index points --- report/environment.rb | 2 +- report/plot_factory.rb | 101 +++++++++++++++++------------------------- test/test_examples.rb | 26 ++++++++++- validation/validation_test.rb | 32 ++++++++++--- 4 files changed, 92 insertions(+), 69 deletions(-) diff --git a/report/environment.rb b/report/environment.rb index bdb4a04..a7d454f 100755 --- a/report/environment.rb +++ b/report/environment.rb @@ -4,7 +4,7 @@ 'rexml/document', 'ruby-plot', 'opentox-ruby' ].each do |g| require g end -gem 'ruby-plot', "~>0.4.0" +gem 'ruby-plot', "~>0.5.0" #R.quit diff --git a/report/plot_factory.rb b/report/plot_factory.rb index dfb1369..d09e506 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -95,41 +95,28 @@ module Reports # * each of theses validation sets is plotted as a roc-curve # def self.create_roc_plot( out_files, validation_set, class_value, split_set_attribute=nil, - x_label="False positive rate", y_label="True Positive Rate", show_single_curves=false ) + x_label="False positive rate", y_label="True Positive Rate" ) out_files = [out_files] unless out_files.is_a?(Array) LOGGER.debug "creating roc plot for '"+validation_set.size.to_s+"' validations, out-files:"+out_files.inspect + data = [] if split_set_attribute attribute_values = validation_set.get_values(split_set_attribute) - names = [] - fp_rates = [] - tp_rates = [] attribute_values.each do |value| begin - data = transform_roc_predictions(validation_set.filter({split_set_attribute => value}), class_value, false) - names << value.to_s - fp_rates << data[:fp_rate][0] - tp_rates << data[:tp_rate][0] + data << transform_roc_predictions(validation_set.filter({split_set_attribute => value}), class_value, false ) rescue LOGGER.warn "could not create ROC plot for "+value.to_s end end - out_files.each do |out_file| - RubyPlot::plot_lines(out_file, "ROC-Plot", x_label, y_label, names, fp_rates, tp_rates ) - end else - data = transform_roc_predictions(validation_set, class_value, show_single_curves) - labels = [] - data[:youden].each do |points| - points.each do |point,confidence| - labels << ["confidence: "+confidence.to_nice_s, point[0], point[1]] - end - end - out_files.each do |out_file| - RubyPlot::plot_lines(out_file, "ROC-Plot", x_label, y_label, data[:names], data[:fp_rate], data[:tp_rate], data[:faint], labels ) - end + data << transform_roc_predictions(validation_set, class_value ) end + + out_files.each do |out_file| + RubyPlot::plot_lines(out_file, "ROC-Plot", x_label, y_label, data ) + end end @@ -291,44 +278,27 @@ module Reports end private - def self.transform_roc_predictions(validation_set, class_value, add_single_folds=false) - + def self.transform_roc_predictions(validation_set, class_value, add_label=true ) if (validation_set.size > 1) - - names = []; fp_rate = []; tp_rate = []; faint = []; youden = [] - sum_roc_values = { :predicted_values => [], :actual_values => [], :confidence_values => []} - + values = { :predicted_values => [], :actual_values => [], :confidence_values => []} (0..validation_set.size-1).each do |i| roc_values = validation_set.get(i).get_predictions.get_prediction_values(class_value) - sum_roc_values[:predicted_values] += roc_values[:predicted_values] - sum_roc_values[:confidence_values] += roc_values[:confidence_values] - sum_roc_values[:actual_values] += roc_values[:actual_values] - if add_single_folds - begin - tp_fp_rates = get_tp_fp_rates(roc_values) - names << "fold "+i.to_s - fp_rate << tp_fp_rates[:fp_rate] - tp_rate << tp_fp_rates[:tp_rate] - faint << true - rescue - LOGGER.warn "could not get ROC vals for fold "+i.to_s - end - end + values[:predicted_values] += roc_values[:predicted_values] + values[:confidence_values] += roc_values[:confidence_values] + values[:actual_values] += roc_values[:actual_values] end - tp_fp_rates = get_tp_fp_rates(sum_roc_values) - names << nil # "all" - fp_rate << tp_fp_rates[:fp_rate] - tp_rate << tp_fp_rates[:tp_rate] - youden << tp_fp_rates[:youden] - faint << false - return { :names => names, :fp_rate => fp_rate, :tp_rate => tp_rate, :faint => faint, :youden => youden } else - roc_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value) - tp_fp_rates = get_tp_fp_rates(roc_values) - return { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]], :youden => [tp_fp_rates[:youden]] } + values = validation_set.validations[0].get_predictions.get_prediction_values(class_value) end + tp_fp_rates = get_tp_fp_rates(values) + labels = [] + tp_fp_rates[:youden].each do |point,confidence| + labels << ["confidence: "+confidence.to_nice_s, point[0], point[1]] + end if add_label + RubyPlot::LinePlotData.new(:name => "default", :x_values => tp_fp_rates[:fp_rate], :y_values => tp_fp_rates[:tp_rate], :labels => labels) end + def self.transform_confidence_predictions(validation_set, class_value, add_single_folds=false) if (validation_set.size > 1) @@ -368,16 +338,25 @@ module Reports end end - def self.demo_rock_plot - roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6], - :predicted_values => [1, 0, 0, 1, 0, 1], - :actual_values => [0, 1, 0, 0, 1, 1]} + def self.demo_roc_plot +# roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6], +# :predicted_values => [1, 0, 0, 1, 0, 1], +# :actual_values => [0, 1, 0, 0, 1, 1]} + roc_values = {:confidence_values => [0.9, 0.8, 0.7, 0.6, 0.5, 0.4], + :predicted_values => [1, 1, 1, 1, 1, 1], + :actual_values => [1, 0, 1, 0, 1, 0]} tp_fp_rates = get_tp_fp_rates(roc_values) - data = { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] } + labels = [] + tp_fp_rates[:youden].each do |point,confidence| + labels << ["confidence: "+confidence.to_s, point[0], point[1]] + end + + plot_data = [] + plot_data << RubyPlot::LinePlotData.new(:name => "testname", :x_values => tp_fp_rates[:fp_rate], :y_values => tp_fp_rates[:tp_rate], :labels => labels) RubyPlot::plot_lines("/tmp/plot.png", "ROC-Plot", "False positive rate", - "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] ) + "True Positive Rate", plot_data ) end def self.get_performance_confidence_rates(roc_values, feature_type) @@ -503,7 +482,9 @@ module Reports max = youden.max youden_hash = {} (0..tp_rate.size-1).each do |i| - youden_hash[i] = c2[i] if youden[i]==max + if youden[i]==max and i>0 + youden_hash[i] = c2[i] + end end #puts youden.inspect+"\n"+youden_hash.inspect+"\n\n" @@ -526,8 +507,8 @@ end #require "rubygems" #require "ruby-plot" -#Reports::PlotFactory::demo_ranking_plot -#Reports::PlotFactory::demo_rock_plot +##Reports::PlotFactory::demo_ranking_plot +#Reports::PlotFactory::demo_roc_plot #a = [1, 0, 1, 2, 3, 0, 2] #puts a.compress_sum([100, 90, 70, 70, 30, 10, 0]).inspect diff --git a/test/test_examples.rb b/test/test_examples.rb index eb0543f..f3c0b7e 100755 --- a/test/test_examples.rb +++ b/test/test_examples.rb @@ -100,7 +100,7 @@ module ValidationExamples class EPAFHMCrossvalidation < CrossValidation def initialize - @dataset_file = File.new("data/EPAFHM.med.csv","r") + @dataset_file = File.new("data/EPAFHM.csv","r") #@prediction_feature = "http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk" @num_folds = 10 end @@ -505,6 +505,26 @@ module ValidationExamples end end + class AnotherAmbitJ48TrainingTest < TrainingTestValidation + def initialize + @algorithm_uri = "http://apps.ideaconsult.net:8080/ambit2/algorithm/J48" + @training_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/585758" + @test_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/585758" + @prediction_feature= "http://apps.ideaconsult.net:8080/ambit2/feature/111148" + end + end + + class TumTrainingTest < TrainingTestValidation + def initialize + @algorithm_uri = "http://lxkramer34.informatik.tu-muenchen.de:8080/OpenTox-dev/algorithm/kNNclassification" + @training_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/585758" + @test_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/585758" + @prediction_feature= "http://apps.ideaconsult.net:8080/ambit2/feature/111148" + end + end + + + class LazarVsNtuaCrossvalidation < CrossValidation def initialize @@ -803,7 +823,11 @@ module ValidationExamples "22b" => [ NtuaTrainingTestSplit ], "22c" => [ NtuaCrossvalidation ], "22d" => [ LazarVsNtuaCrossvalidation ], + + #impt "22e" => [ AmbitVsNtuaTrainingTest ], + "22f" => [ AnotherAmbitJ48TrainingTest ], + "22g" => [ TumTrainingTest ], } diff --git a/validation/validation_test.rb b/validation/validation_test.rb index 2c86548..ae71749 100755 --- a/validation/validation_test.rb +++ b/validation/validation_test.rb @@ -60,7 +60,23 @@ class ValidationTest < Test::Unit::TestCase begin $test_case = self - #get 'crossvalidation/138/statistics' +# post "/validate_datasets",{ +# :test_dataset_uri=>"http://local-ot/dataset/6907", +# :prediction_dataset_uri=>"http://local-ot/dataset/6909", +# :test_target_dataset_uri=>"http://local-ot/dataset/6905", +# :prediction_feature=>"http://local-ot/dataset/6905/feature/Hamster%20Carcinogenicity", +# #:model_uri=>"http://local-ot/model/1078", +# :predicted_variable=>"http://local-ot/dataset/6909/feature/prediction/Hamster%20Carcinogenicity/value", +# :predicted_confidence=>"http://local-ot/dataset/6909/feature/prediction/Hamster%20Carcinogenicity/confidence", +# #:regression=>"true"} +# :classification=>"true"} +# +# puts last_response.body +# uri = last_response.body +# rep = wait_for_task(uri) +# puts rep + + #get 'crossvalidation/19/statistics' #get 'crossvalidation/189/statistics' #puts last_response.body # run_test("1b") @@ -68,7 +84,9 @@ class ValidationTest < Test::Unit::TestCase #get '/crossvalidation/79/predictions',nil,'HTTP_ACCEPT' => "application/x-yaml" #puts last_response.body - #run_test("22e") #,:validation_uri=>"http://local-ot/validation/84" ) + run_test("22f") #,:validation_uri=>"http://local-ot/validation/84" ) + + #run_test("21b") #run_test("21c") @@ -97,16 +115,16 @@ class ValidationTest < Test::Unit::TestCase # rep = wait_for_task(uri) # puts rep - #run_test("1a", {:validation_uri=>"http://local-ot/validation/466"}) + #run_test("1a", {:validation_uri=>"http://local-ot/validation/305"}) # puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" - #run_test("3a",{:validation_uri=>"http://local-ot/validation/crossvalidation/149"}) + #run_test("3a",{:validation_uri=>"http://local-ot/validation/crossvalidation/6"}) #puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" - #run_test("13a", {:validation_uri=>"http://local-ot/validation/406"}) + #run_test("13a") #, {:validation_uri=>"http://local-ot/validation/406"}) # puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" - #run_test("14a",{:validation_uri=>"http://local-ot/validation/crossvalidation/148"}) + #run_test("14a") #,{:validation_uri=>"http://local-ot/validation/crossvalidation/148"}) # puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" - run_test("1a") + #run_test("1a") # run_test("3d",{ # :dataset_uri => "http://local-ot/dataset/2897", -- cgit v1.2.3