summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2011-05-24 19:44:06 +0200
committermguetlein <martin.guetlein@gmail.com>2011-05-24 19:44:06 +0200
commit78564782ca749dd13f063f0a04070bef89377354 (patch)
treebfca12dbd6e2a276234808387f095fde8c9e2ada
parent9bafc6c362b0e595fcf78fde0d588937db9ae122 (diff)
adjust to ruby-plot version 0.5, simplification + support for various max youden-index points
-rwxr-xr-xreport/environment.rb2
-rw-r--r--report/plot_factory.rb101
-rwxr-xr-xtest/test_examples.rb26
-rwxr-xr-xvalidation/validation_test.rb32
4 files changed, 92 insertions, 69 deletions
diff --git a/report/environment.rb b/report/environment.rb
index bdb4a04..a7d454f 100755
--- a/report/environment.rb
+++ b/report/environment.rb
@@ -4,7 +4,7 @@
'rexml/document', 'ruby-plot', 'opentox-ruby' ].each do |g|
require g
end
-gem 'ruby-plot', "~>0.4.0"
+gem 'ruby-plot', "~>0.5.0"
#R.quit
diff --git a/report/plot_factory.rb b/report/plot_factory.rb
index dfb1369..d09e506 100644
--- a/report/plot_factory.rb
+++ b/report/plot_factory.rb
@@ -95,41 +95,28 @@ module Reports
# * each of theses validation sets is plotted as a roc-curve
#
def self.create_roc_plot( out_files, validation_set, class_value, split_set_attribute=nil,
- x_label="False positive rate", y_label="True Positive Rate", show_single_curves=false )
+ x_label="False positive rate", y_label="True Positive Rate" )
out_files = [out_files] unless out_files.is_a?(Array)
LOGGER.debug "creating roc plot for '"+validation_set.size.to_s+"' validations, out-files:"+out_files.inspect
+ data = []
if split_set_attribute
attribute_values = validation_set.get_values(split_set_attribute)
- names = []
- fp_rates = []
- tp_rates = []
attribute_values.each do |value|
begin
- data = transform_roc_predictions(validation_set.filter({split_set_attribute => value}), class_value, false)
- names << value.to_s
- fp_rates << data[:fp_rate][0]
- tp_rates << data[:tp_rate][0]
+ data << transform_roc_predictions(validation_set.filter({split_set_attribute => value}), class_value, false )
rescue
LOGGER.warn "could not create ROC plot for "+value.to_s
end
end
- out_files.each do |out_file|
- RubyPlot::plot_lines(out_file, "ROC-Plot", x_label, y_label, names, fp_rates, tp_rates )
- end
else
- data = transform_roc_predictions(validation_set, class_value, show_single_curves)
- labels = []
- data[:youden].each do |points|
- points.each do |point,confidence|
- labels << ["confidence: "+confidence.to_nice_s, point[0], point[1]]
- end
- end
- out_files.each do |out_file|
- RubyPlot::plot_lines(out_file, "ROC-Plot", x_label, y_label, data[:names], data[:fp_rate], data[:tp_rate], data[:faint], labels )
- end
+ data << transform_roc_predictions(validation_set, class_value )
end
+
+ out_files.each do |out_file|
+ RubyPlot::plot_lines(out_file, "ROC-Plot", x_label, y_label, data )
+ end
end
@@ -291,44 +278,27 @@ module Reports
end
private
- def self.transform_roc_predictions(validation_set, class_value, add_single_folds=false)
-
+ def self.transform_roc_predictions(validation_set, class_value, add_label=true )
if (validation_set.size > 1)
-
- names = []; fp_rate = []; tp_rate = []; faint = []; youden = []
- sum_roc_values = { :predicted_values => [], :actual_values => [], :confidence_values => []}
-
+ values = { :predicted_values => [], :actual_values => [], :confidence_values => []}
(0..validation_set.size-1).each do |i|
roc_values = validation_set.get(i).get_predictions.get_prediction_values(class_value)
- sum_roc_values[:predicted_values] += roc_values[:predicted_values]
- sum_roc_values[:confidence_values] += roc_values[:confidence_values]
- sum_roc_values[:actual_values] += roc_values[:actual_values]
- if add_single_folds
- begin
- tp_fp_rates = get_tp_fp_rates(roc_values)
- names << "fold "+i.to_s
- fp_rate << tp_fp_rates[:fp_rate]
- tp_rate << tp_fp_rates[:tp_rate]
- faint << true
- rescue
- LOGGER.warn "could not get ROC vals for fold "+i.to_s
- end
- end
+ values[:predicted_values] += roc_values[:predicted_values]
+ values[:confidence_values] += roc_values[:confidence_values]
+ values[:actual_values] += roc_values[:actual_values]
end
- tp_fp_rates = get_tp_fp_rates(sum_roc_values)
- names << nil # "all"
- fp_rate << tp_fp_rates[:fp_rate]
- tp_rate << tp_fp_rates[:tp_rate]
- youden << tp_fp_rates[:youden]
- faint << false
- return { :names => names, :fp_rate => fp_rate, :tp_rate => tp_rate, :faint => faint, :youden => youden }
else
- roc_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value)
- tp_fp_rates = get_tp_fp_rates(roc_values)
- return { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]], :youden => [tp_fp_rates[:youden]] }
+ values = validation_set.validations[0].get_predictions.get_prediction_values(class_value)
end
+ tp_fp_rates = get_tp_fp_rates(values)
+ labels = []
+ tp_fp_rates[:youden].each do |point,confidence|
+ labels << ["confidence: "+confidence.to_nice_s, point[0], point[1]]
+ end if add_label
+ RubyPlot::LinePlotData.new(:name => "default", :x_values => tp_fp_rates[:fp_rate], :y_values => tp_fp_rates[:tp_rate], :labels => labels)
end
+
def self.transform_confidence_predictions(validation_set, class_value, add_single_folds=false)
if (validation_set.size > 1)
@@ -368,16 +338,25 @@ module Reports
end
end
- def self.demo_rock_plot
- roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6],
- :predicted_values => [1, 0, 0, 1, 0, 1],
- :actual_values => [0, 1, 0, 0, 1, 1]}
+ def self.demo_roc_plot
+# roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6],
+# :predicted_values => [1, 0, 0, 1, 0, 1],
+# :actual_values => [0, 1, 0, 0, 1, 1]}
+ roc_values = {:confidence_values => [0.9, 0.8, 0.7, 0.6, 0.5, 0.4],
+ :predicted_values => [1, 1, 1, 1, 1, 1],
+ :actual_values => [1, 0, 1, 0, 1, 0]}
tp_fp_rates = get_tp_fp_rates(roc_values)
- data = { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] }
+ labels = []
+ tp_fp_rates[:youden].each do |point,confidence|
+ labels << ["confidence: "+confidence.to_s, point[0], point[1]]
+ end
+
+ plot_data = []
+ plot_data << RubyPlot::LinePlotData.new(:name => "testname", :x_values => tp_fp_rates[:fp_rate], :y_values => tp_fp_rates[:tp_rate], :labels => labels)
RubyPlot::plot_lines("/tmp/plot.png",
"ROC-Plot",
"False positive rate",
- "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] )
+ "True Positive Rate", plot_data )
end
def self.get_performance_confidence_rates(roc_values, feature_type)
@@ -503,7 +482,9 @@ module Reports
max = youden.max
youden_hash = {}
(0..tp_rate.size-1).each do |i|
- youden_hash[i] = c2[i] if youden[i]==max
+ if youden[i]==max and i>0
+ youden_hash[i] = c2[i]
+ end
end
#puts youden.inspect+"\n"+youden_hash.inspect+"\n\n"
@@ -526,8 +507,8 @@ end
#require "rubygems"
#require "ruby-plot"
-#Reports::PlotFactory::demo_ranking_plot
-#Reports::PlotFactory::demo_rock_plot
+##Reports::PlotFactory::demo_ranking_plot
+#Reports::PlotFactory::demo_roc_plot
#a = [1, 0, 1, 2, 3, 0, 2]
#puts a.compress_sum([100, 90, 70, 70, 30, 10, 0]).inspect
diff --git a/test/test_examples.rb b/test/test_examples.rb
index eb0543f..f3c0b7e 100755
--- a/test/test_examples.rb
+++ b/test/test_examples.rb
@@ -100,7 +100,7 @@ module ValidationExamples
class EPAFHMCrossvalidation < CrossValidation
def initialize
- @dataset_file = File.new("data/EPAFHM.med.csv","r")
+ @dataset_file = File.new("data/EPAFHM.csv","r")
#@prediction_feature = "http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk"
@num_folds = 10
end
@@ -505,6 +505,26 @@ module ValidationExamples
end
end
+ class AnotherAmbitJ48TrainingTest < TrainingTestValidation
+ def initialize
+ @algorithm_uri = "http://apps.ideaconsult.net:8080/ambit2/algorithm/J48"
+ @training_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/585758"
+ @test_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/585758"
+ @prediction_feature= "http://apps.ideaconsult.net:8080/ambit2/feature/111148"
+ end
+ end
+
+ class TumTrainingTest < TrainingTestValidation
+ def initialize
+ @algorithm_uri = "http://lxkramer34.informatik.tu-muenchen.de:8080/OpenTox-dev/algorithm/kNNclassification"
+ @training_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/585758"
+ @test_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/585758"
+ @prediction_feature= "http://apps.ideaconsult.net:8080/ambit2/feature/111148"
+ end
+ end
+
+
+
class LazarVsNtuaCrossvalidation < CrossValidation
def initialize
@@ -803,7 +823,11 @@ module ValidationExamples
"22b" => [ NtuaTrainingTestSplit ],
"22c" => [ NtuaCrossvalidation ],
"22d" => [ LazarVsNtuaCrossvalidation ],
+
+ #impt
"22e" => [ AmbitVsNtuaTrainingTest ],
+ "22f" => [ AnotherAmbitJ48TrainingTest ],
+ "22g" => [ TumTrainingTest ],
}
diff --git a/validation/validation_test.rb b/validation/validation_test.rb
index 2c86548..ae71749 100755
--- a/validation/validation_test.rb
+++ b/validation/validation_test.rb
@@ -60,7 +60,23 @@ class ValidationTest < Test::Unit::TestCase
begin
$test_case = self
- #get 'crossvalidation/138/statistics'
+# post "/validate_datasets",{
+# :test_dataset_uri=>"http://local-ot/dataset/6907",
+# :prediction_dataset_uri=>"http://local-ot/dataset/6909",
+# :test_target_dataset_uri=>"http://local-ot/dataset/6905",
+# :prediction_feature=>"http://local-ot/dataset/6905/feature/Hamster%20Carcinogenicity",
+# #:model_uri=>"http://local-ot/model/1078",
+# :predicted_variable=>"http://local-ot/dataset/6909/feature/prediction/Hamster%20Carcinogenicity/value",
+# :predicted_confidence=>"http://local-ot/dataset/6909/feature/prediction/Hamster%20Carcinogenicity/confidence",
+# #:regression=>"true"}
+# :classification=>"true"}
+#
+# puts last_response.body
+# uri = last_response.body
+# rep = wait_for_task(uri)
+# puts rep
+
+ #get 'crossvalidation/19/statistics'
#get 'crossvalidation/189/statistics'
#puts last_response.body
# run_test("1b")
@@ -68,7 +84,9 @@ class ValidationTest < Test::Unit::TestCase
#get '/crossvalidation/79/predictions',nil,'HTTP_ACCEPT' => "application/x-yaml"
#puts last_response.body
- #run_test("22e") #,:validation_uri=>"http://local-ot/validation/84" )
+ run_test("22f") #,:validation_uri=>"http://local-ot/validation/84" )
+
+
#run_test("21b")
#run_test("21c")
@@ -97,16 +115,16 @@ class ValidationTest < Test::Unit::TestCase
# rep = wait_for_task(uri)
# puts rep
- #run_test("1a", {:validation_uri=>"http://local-ot/validation/466"})
+ #run_test("1a", {:validation_uri=>"http://local-ot/validation/305"})
# puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
- #run_test("3a",{:validation_uri=>"http://local-ot/validation/crossvalidation/149"})
+ #run_test("3a",{:validation_uri=>"http://local-ot/validation/crossvalidation/6"})
#puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
- #run_test("13a", {:validation_uri=>"http://local-ot/validation/406"})
+ #run_test("13a") #, {:validation_uri=>"http://local-ot/validation/406"})
# puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
- #run_test("14a",{:validation_uri=>"http://local-ot/validation/crossvalidation/148"})
+ #run_test("14a") #,{:validation_uri=>"http://local-ot/validation/crossvalidation/148"})
# puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
- run_test("1a")
+ #run_test("1a")
# run_test("3d",{
# :dataset_uri => "http://local-ot/dataset/2897",