From 78564782ca749dd13f063f0a04070bef89377354 Mon Sep 17 00:00:00 2001
From: mguetlein <martin.guetlein@gmail.com>
Date: Tue, 24 May 2011 19:44:06 +0200
Subject: adjust to ruby-plot version 0.5, simplification + support for various
 max youden-index points

---
 report/environment.rb         |   2 +-
 report/plot_factory.rb        | 101 +++++++++++++++++-------------------------
 test/test_examples.rb         |  26 ++++++++++-
 validation/validation_test.rb |  32 ++++++++++---
 4 files changed, 92 insertions(+), 69 deletions(-)

diff --git a/report/environment.rb b/report/environment.rb
index bdb4a04..a7d454f 100755
--- a/report/environment.rb
+++ b/report/environment.rb
@@ -4,7 +4,7 @@
   'rexml/document',  'ruby-plot', 'opentox-ruby' ].each do |g|
     require g
 end
-gem 'ruby-plot', "~>0.4.0"
+gem 'ruby-plot', "~>0.5.0"
 
 #R.quit
 
diff --git a/report/plot_factory.rb b/report/plot_factory.rb
index dfb1369..d09e506 100644
--- a/report/plot_factory.rb
+++ b/report/plot_factory.rb
@@ -95,41 +95,28 @@ module Reports
     #   * each of theses validation sets is plotted as a roc-curve  
     #
     def self.create_roc_plot( out_files, validation_set, class_value, split_set_attribute=nil,
-        x_label="False positive rate", y_label="True Positive Rate", show_single_curves=false )
+        x_label="False positive rate", y_label="True Positive Rate" )
       
       out_files = [out_files] unless out_files.is_a?(Array)
       LOGGER.debug "creating roc plot for '"+validation_set.size.to_s+"' validations, out-files:"+out_files.inspect
       
+      data = []
       if split_set_attribute
         attribute_values = validation_set.get_values(split_set_attribute)
-        names = []
-        fp_rates = []
-        tp_rates = []
         attribute_values.each do |value|
           begin
-            data = transform_roc_predictions(validation_set.filter({split_set_attribute => value}), class_value, false)
-            names << value.to_s
-            fp_rates << data[:fp_rate][0]
-            tp_rates << data[:tp_rate][0]
+            data << transform_roc_predictions(validation_set.filter({split_set_attribute => value}), class_value, false )
           rescue
             LOGGER.warn "could not create ROC plot for "+value.to_s
           end
         end
-        out_files.each do |out_file|
-          RubyPlot::plot_lines(out_file, "ROC-Plot", x_label, y_label, names, fp_rates, tp_rates )
-        end  
       else
-        data = transform_roc_predictions(validation_set, class_value, show_single_curves)
-        labels = []
-        data[:youden].each do |points|
-          points.each do |point,confidence|
-            labels << ["confidence: "+confidence.to_nice_s, point[0], point[1]]
-          end
-        end
-        out_files.each do |out_file|
-          RubyPlot::plot_lines(out_file, "ROC-Plot", x_label, y_label, data[:names], data[:fp_rate], data[:tp_rate], data[:faint], labels )
-        end
+        data << transform_roc_predictions(validation_set, class_value )
       end  
+      
+      out_files.each do |out_file|
+        RubyPlot::plot_lines(out_file, "ROC-Plot", x_label, y_label, data )
+      end
     end
     
     
@@ -291,44 +278,27 @@ module Reports
     end
     
     private
-    def self.transform_roc_predictions(validation_set, class_value, add_single_folds=false)
-      
+    def self.transform_roc_predictions(validation_set, class_value, add_label=true )
       if (validation_set.size > 1)
-        
-        names = []; fp_rate = []; tp_rate = []; faint = []; youden = []
-        sum_roc_values = { :predicted_values => [], :actual_values => [], :confidence_values => []}
-        
+        values = { :predicted_values => [], :actual_values => [], :confidence_values => []}
         (0..validation_set.size-1).each do |i|
           roc_values = validation_set.get(i).get_predictions.get_prediction_values(class_value)
-          sum_roc_values[:predicted_values] += roc_values[:predicted_values]
-          sum_roc_values[:confidence_values] += roc_values[:confidence_values]
-          sum_roc_values[:actual_values] += roc_values[:actual_values]
-          if add_single_folds
-            begin
-              tp_fp_rates = get_tp_fp_rates(roc_values)
-              names << "fold "+i.to_s
-              fp_rate << tp_fp_rates[:fp_rate]
-              tp_rate << tp_fp_rates[:tp_rate]
-              faint << true
-            rescue
-              LOGGER.warn "could not get ROC vals for fold "+i.to_s
-            end
-          end
+          values[:predicted_values] += roc_values[:predicted_values]
+          values[:confidence_values] += roc_values[:confidence_values]
+          values[:actual_values] += roc_values[:actual_values]
         end
-        tp_fp_rates = get_tp_fp_rates(sum_roc_values)
-        names << nil # "all"
-        fp_rate << tp_fp_rates[:fp_rate]
-        tp_rate << tp_fp_rates[:tp_rate]
-        youden << tp_fp_rates[:youden]
-        faint << false
-        return { :names => names, :fp_rate => fp_rate, :tp_rate => tp_rate, :faint => faint, :youden => youden }
       else
-        roc_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value)
-        tp_fp_rates = get_tp_fp_rates(roc_values)
-        return { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]], :youden => [tp_fp_rates[:youden]] }
+        values = validation_set.validations[0].get_predictions.get_prediction_values(class_value)
       end
+      tp_fp_rates = get_tp_fp_rates(values)
+      labels = []
+      tp_fp_rates[:youden].each do |point,confidence|
+        labels << ["confidence: "+confidence.to_nice_s, point[0], point[1]]
+      end if add_label
+      RubyPlot::LinePlotData.new(:name => "default", :x_values => tp_fp_rates[:fp_rate], :y_values => tp_fp_rates[:tp_rate], :labels => labels)
     end
     
+    
     def self.transform_confidence_predictions(validation_set, class_value, add_single_folds=false)
       
       if (validation_set.size > 1)
@@ -368,16 +338,25 @@ module Reports
       end
     end    
     
-    def self.demo_rock_plot
-      roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6], 
-                    :predicted_values =>  [1, 0, 0, 1, 0, 1],
-                    :actual_values =>     [0, 1, 0, 0, 1, 1]}
+    def self.demo_roc_plot
+#      roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6], 
+#                    :predicted_values =>  [1, 0, 0, 1, 0, 1],
+#                    :actual_values =>     [0, 1, 0, 0, 1, 1]}
+      roc_values = {:confidence_values => [0.9, 0.8, 0.7, 0.6, 0.5, 0.4], 
+                    :predicted_values =>  [1, 1, 1, 1, 1, 1],
+                    :actual_values =>     [1, 0, 1, 0, 1, 0]}
       tp_fp_rates = get_tp_fp_rates(roc_values)
-      data = { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] }                    
+      labels = []
+      tp_fp_rates[:youden].each do |point,confidence|
+        labels << ["confidence: "+confidence.to_s, point[0], point[1]]
+      end
+
+      plot_data = []
+      plot_data << RubyPlot::LinePlotData.new(:name => "testname", :x_values => tp_fp_rates[:fp_rate], :y_values => tp_fp_rates[:tp_rate], :labels => labels)
       RubyPlot::plot_lines("/tmp/plot.png",
         "ROC-Plot", 
         "False positive rate", 
-        "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] )
+        "True Positive Rate", plot_data )
     end
     
     def self.get_performance_confidence_rates(roc_values, feature_type)
@@ -503,7 +482,9 @@ module Reports
       max = youden.max
       youden_hash = {}
       (0..tp_rate.size-1).each do |i|
-        youden_hash[i] = c2[i] if youden[i]==max
+        if youden[i]==max and i>0
+          youden_hash[i] = c2[i]
+        end
       end
       #puts youden.inspect+"\n"+youden_hash.inspect+"\n\n"
       
@@ -526,8 +507,8 @@ end
 
 #require "rubygems"
 #require "ruby-plot"
-#Reports::PlotFactory::demo_ranking_plot
-#Reports::PlotFactory::demo_rock_plot
+##Reports::PlotFactory::demo_ranking_plot
+#Reports::PlotFactory::demo_roc_plot
 
 #a = [1,    0,  1,  2,  3,  0, 2]
 #puts a.compress_sum([100, 90, 70, 70, 30, 10, 0]).inspect
diff --git a/test/test_examples.rb b/test/test_examples.rb
index eb0543f..f3c0b7e 100755
--- a/test/test_examples.rb
+++ b/test/test_examples.rb
@@ -100,7 +100,7 @@ module ValidationExamples
   
     class EPAFHMCrossvalidation < CrossValidation
     def initialize
-      @dataset_file = File.new("data/EPAFHM.med.csv","r")
+      @dataset_file = File.new("data/EPAFHM.csv","r")
       #@prediction_feature = "http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk"
       @num_folds = 10
     end
@@ -505,6 +505,26 @@ module ValidationExamples
     end
   end  
   
+  class AnotherAmbitJ48TrainingTest < TrainingTestValidation
+    def initialize
+      @algorithm_uri = "http://apps.ideaconsult.net:8080/ambit2/algorithm/J48"
+      @training_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/585758"
+      @test_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/585758"
+      @prediction_feature= "http://apps.ideaconsult.net:8080/ambit2/feature/111148"
+    end
+  end    
+
+ class TumTrainingTest < TrainingTestValidation
+    def initialize
+      @algorithm_uri = "http://lxkramer34.informatik.tu-muenchen.de:8080/OpenTox-dev/algorithm/kNNclassification"
+      @training_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/585758"
+      @test_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/585758"
+      @prediction_feature= "http://apps.ideaconsult.net:8080/ambit2/feature/111148"
+    end
+  end    
+
+  
+ 
   
   class LazarVsNtuaCrossvalidation < CrossValidation
     def initialize
@@ -803,7 +823,11 @@ module ValidationExamples
       "22b" =>  [ NtuaTrainingTestSplit ],
       "22c" =>  [ NtuaCrossvalidation ],
       "22d" =>  [ LazarVsNtuaCrossvalidation ],
+
+      #impt      
       "22e" =>  [ AmbitVsNtuaTrainingTest ],
+      "22f" =>  [ AnotherAmbitJ48TrainingTest ],
+      "22g" =>  [ TumTrainingTest ],
       
     }
   
diff --git a/validation/validation_test.rb b/validation/validation_test.rb
index 2c86548..ae71749 100755
--- a/validation/validation_test.rb
+++ b/validation/validation_test.rb
@@ -60,7 +60,23 @@ class ValidationTest < Test::Unit::TestCase
     begin
       $test_case = self
       
-      #get 'crossvalidation/138/statistics'
+#      post "/validate_datasets",{
+#        :test_dataset_uri=>"http://local-ot/dataset/6907",
+#        :prediction_dataset_uri=>"http://local-ot/dataset/6909",
+#        :test_target_dataset_uri=>"http://local-ot/dataset/6905",
+#        :prediction_feature=>"http://local-ot/dataset/6905/feature/Hamster%20Carcinogenicity",
+#        #:model_uri=>"http://local-ot/model/1078",
+#        :predicted_variable=>"http://local-ot/dataset/6909/feature/prediction/Hamster%20Carcinogenicity/value",
+#        :predicted_confidence=>"http://local-ot/dataset/6909/feature/prediction/Hamster%20Carcinogenicity/confidence",
+#        #:regression=>"true"}
+#        :classification=>"true"}
+#        
+#      puts last_response.body
+#      uri = last_response.body
+#      rep = wait_for_task(uri)
+#      puts rep
+      
+      #get 'crossvalidation/19/statistics'
       #get 'crossvalidation/189/statistics'
       #puts last_response.body
 #      run_test("1b")       
@@ -68,7 +84,9 @@ class ValidationTest < Test::Unit::TestCase
       #get '/crossvalidation/79/predictions',nil,'HTTP_ACCEPT' => "application/x-yaml"
       #puts last_response.body
       
-      #run_test("22e") #,:validation_uri=>"http://local-ot/validation/84" )
+      run_test("22f") #,:validation_uri=>"http://local-ot/validation/84" )
+    
+
       #run_test("21b")
       #run_test("21c")
 
@@ -97,16 +115,16 @@ class ValidationTest < Test::Unit::TestCase
 #      rep = wait_for_task(uri)
 #      puts rep
       
-      #run_test("1a", {:validation_uri=>"http://local-ot/validation/466"})
+      #run_test("1a", {:validation_uri=>"http://local-ot/validation/305"})
 #      puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
-      #run_test("3a",{:validation_uri=>"http://local-ot/validation/crossvalidation/149"})
+      #run_test("3a",{:validation_uri=>"http://local-ot/validation/crossvalidation/6"})
       #puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
-      #run_test("13a", {:validation_uri=>"http://local-ot/validation/406"})
+     #run_test("13a") #, {:validation_uri=>"http://local-ot/validation/406"})
 #      puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
-      #run_test("14a",{:validation_uri=>"http://local-ot/validation/crossvalidation/148"})
+      #run_test("14a") #,{:validation_uri=>"http://local-ot/validation/crossvalidation/148"})
 #      puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
 
-      run_test("1a")
+      #run_test("1a")
       
 #      run_test("3d",{
 #        :dataset_uri => "http://local-ot/dataset/2897", 
-- 
cgit v1.2.3