Merge remote branch 'mguetlein/test' into development

Conflicts: application.rb example.rb lib/validation_db.rb nightly/nightly.rb report/environment.rb test/test_examples.rb test/test_examples_util.rb
author: Christoph Helma <helma@in-silico.ch> 2011-03-03 13:00:47 +0100
committer: Christoph Helma <helma@in-silico.ch> 2011-03-03 13:00:47 +0100
commit: dcd0a5a659c303c50a59d1271947851245db10e7 (patch)
tree: eaae695edf72c3a99cde58b9caaa7825d6d4f31a /lib
parent: 8b46f5a4f389d7cd54f6e8b38025d275f9d3ed1b (diff)
parent: e57856a3c2cd10df207e722301c24a022e9fd802 (diff)
8 files changed, 535 insertions, 314 deletions
diff --git a/lib/active_record_setup.rb b/lib/active_record_setup.rb
new file mode 100755
index 0000000..b43e692
--- /dev/null
+++ b/lib/active_record_setup.rb
@@ -0,0 +1,50 @@
+
+#gem "activerecord", "= 2.3.8"
+#gem "ar-extensions", "= 0.9.2"
+['rubygems', 'logger', 'active_record', 'opentox-ruby' ].each do |g| #'ar-extensions',
+    require g
+end
+
+unless ActiveRecord::Base.connected?
+  ActiveRecord::Base.establish_connection(  
+     :adapter => CONFIG[:database][:adapter],
+     :host => CONFIG[:database][:host],
+     :database => CONFIG[:database][:database],
+     :username => CONFIG[:database][:username],
+     :password => CONFIG[:database][:password]
+  )
+  ActiveRecord::Base.logger = Logger.new("/dev/null")
+end
+
+class ActiveRecord::Base
+  
+  def self.find_like(filter_params)
+    
+    raise "find like removed"
+    
+    #puts "params before "+filter_params.inspect
+    filter_params.keys.each do |k|
+      key = k.to_s
+      unless self.column_names.include?(key)
+        key = key.from_rdf_format
+        unless self.column_names.include?(key)
+          key = key+"_uri"
+          unless self.column_names.include?(key)
+            key = key+"s"
+            unless self.column_names.include?(key)
+              err = "no attribute found: '"+k.to_s+"'"
+#              if $sinatra
+#                $sinatra.halt 400,err
+#              else
+                raise err
+#              end
+            end
+          end
+        end
+      end
+      filter_params[key+"_like"] = filter_params.delete(k)
+    end
+    #puts "params after "+filter_params.inspect
+    self.find(:all, :conditions => filter_params)
+  end
+end
+\ No newline at end of file
diff --git a/lib/data_mapper_util.rb b/lib/data_mapper_util.rb
new file mode 100644
index 0000000..23f52f5
--- /dev/null
+++ b/lib/data_mapper_util.rb
@@ -0,0 +1,37 @@
+
+#DataObjects::Mysql.logger = DataObjects::Logger.new(STDOUT, 0) 
+
+module Lib
+  module DataMapperUtil 
+    
+    def self.check_params(model, params)
+      prop_names = model.properties.collect{|p| p.name.to_s if p.is_a?DataMapper::Property::Object}
+      params.keys.each do |k|
+        key = k.to_s
+        if (key == "subjectid")
+          params.delete(k)
+        else
+          unless prop_names.include?(key)
+            key = key.from_rdf_format
+            unless prop_names.include?(key)
+              key = key+"_uri"
+              unless prop_names.include?(key)
+                key = key+"s"
+                unless prop_names.include?(key)
+                  raise OpenTox::BadRequestError.new "no attribute found: '"+k.to_s+"'"
+                end
+              end
+            end
+          end
+          params[key.to_sym] = params.delete(k)
+        end
+      end
+      params
+    end
+    
+    def self.all(model, filter_params)
+      model.all(check_params(model,filter_params))
+    end
+    
+  end 
+end
+\ No newline at end of file
diff --git a/lib/format_util.rb b/lib/format_util.rb
new file mode 100644
index 0000000..3d3a3e6
--- /dev/null
+++ b/lib/format_util.rb
@@ -0,0 +1,68 @@
+
+
+class String
+  
+  # :prediction_feature -> predictionFeature
+  # :test_dataset_uri -> testDataset
+  # :validation_uris -> validation
+  def to_rdf_format
+    s = gsub(/_uri(s|)$/,"")
+    s.gsub(/_./) do |m|
+      m.gsub!(/^_/,"")
+      m.upcase
+    end
+  end
+  
+  def from_rdf_format
+    gsub(/[A-Z]/) do |m|
+      "_"+m.downcase
+    end
+  end
+  
+  DC_KEYS = [ "title", "creator", "date", "format" ]
+  RDF_KEYS = [ "type" ]
+  
+  def to_owl_uri
+    if DC_KEYS.include?(self)
+      return DC.send(self)
+    elsif RDF_KEYS.include?(self)
+      return RDF.send(self)
+    else
+      return OT.send(self)
+    end
+  end
+end
+
+class Hash
+  
+  # applies to_rdf_format to all keys
+  def keys_to_rdf_format
+    res = {}
+    keys.each do |k|
+      v = self[k]
+      if v.is_a?(Hash)
+        v = v.keys_to_rdf_format
+      elsif v.is_a?(Array)
+        v = v.collect{ |vv| vv.is_a?(Hash) ? vv.keys_to_rdf_format : vv }
+      end
+      res[k.to_s.to_rdf_format] = v
+    end
+    return res
+  end
+  
+  def keys_to_owl_uris
+    res = {}
+    keys.each do |k|
+      v = self[k]
+      if v.is_a?(Hash)
+        v = v.keys_to_owl_uris
+      elsif v.is_a?(Array)
+        v = v.collect{ |vv| vv.is_a?(Hash) ? vv.keys_to_owl_uris : vv }
+      end
+      res[k.to_s.to_owl_uri] = v
+    end
+    return res
+  end
+  
+end
+
diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb
index 63debc0..eb80205 100644..100755
--- a/lib/ot_predictions.rb
+++ b/lib/ot_predictions.rb
@@ -15,22 +15,18 @@ module Lib
       return @compounds[instance_index]
     end
   
-    def initialize(is_classification, test_dataset_uri, test_target_dataset_uri, prediction_feature, prediction_dataset_uri, predicted_variable)
+    def initialize(feature_type, test_dataset_uri, test_target_dataset_uri, 
+      prediction_feature, prediction_dataset_uri, predicted_variable, subjectid=nil, task=nil)
       
-        LOGGER.debug("loading prediciton via test-dateset:'"+test_dataset_uri.to_s+
+        LOGGER.debug("loading prediciton via test-dataset:'"+test_dataset_uri.to_s+
           "', test-target-datset:'"+test_target_dataset_uri.to_s+
           "', prediction-dataset:'"+prediction_dataset_uri.to_s+
           "', prediction_feature: '"+prediction_feature.to_s+"' "+
           "', predicted_variable: '"+predicted_variable.to_s+"'")
           
-        if prediction_feature =~ /ambit.uni-plovdiv.bg.*feature.*264185/
-          LOGGER.warn "HACK for report example"  
-          prediction_feature = "http://ambit.uni-plovdiv.bg:8080/ambit2/feature/264187"
-        end
-         
         predicted_variable=prediction_feature if predicted_variable==nil
         
-        test_dataset = OpenTox::Dataset.find test_dataset_uri
+        test_dataset = OpenTox::Dataset.find test_dataset_uri,subjectid
         raise "test dataset not found: '"+test_dataset_uri.to_s+"'" unless test_dataset
         raise "prediction_feature missing" unless prediction_feature
         
@@ -40,9 +36,9 @@ module Lib
           raise "prediction_feature not found in test_dataset, specify a test_target_dataset\n"+
                 "prediction_feature: '"+prediction_feature.to_s+"'\n"+
                 "test_dataset: '"+test_target_dataset_uri.to_s+"'\n"+
-                "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.index(prediction_feature)==nil
+                "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.keys.index(prediction_feature)==nil
         else
-          test_target_dataset = OpenTox::Dataset.find test_target_dataset_uri
+          test_target_dataset = OpenTox::Dataset.find test_target_dataset_uri,subjectid
           raise "test target datset not found: '"+test_target_dataset_uri.to_s+"'" unless test_target_dataset
           if CHECK_VALUES
             test_dataset.compounds.each do |c|
@@ -52,38 +48,47 @@ module Lib
           raise "prediction_feature not found in test_target_dataset\n"+
                 "prediction_feature: '"+prediction_feature.to_s+"'\n"+
                 "test_target_dataset: '"+test_target_dataset_uri.to_s+"'\n"+
-                "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.index(prediction_feature)==nil
+                "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.keys.index(prediction_feature)==nil
         end
         
         @compounds = test_dataset.compounds
         LOGGER.debug "test dataset size: "+@compounds.size.to_s
-        raise "test dataset is empty" unless @compounds.size>0
-        class_values = is_classification ? OpenTox::Feature.domain(prediction_feature) : nil
+        raise "test dataset is empty "+test_dataset_uri.to_s unless @compounds.size>0
+        class_values = feature_type=="classification" ? OpenTox::Feature.find(prediction_feature, subjectid).domain : nil
         
         actual_values = []
         @compounds.each do |c|
-          value = test_target_dataset.get_value(c, prediction_feature)
-          
-          if is_classification
-            value = value.to_s unless value==nil
-            raise "illegal class_value of actual value "+value.to_s+" class: "+
-              value.class.to_s unless value==nil or class_values.index(value)!=nil
-            actual_values.push class_values.index(value) 
-          else
-            begin
-              value = value.to_f unless value==nil or value.is_a?(Numeric)
-            rescue
-              LOGGER.warn "no numeric value for regression: '"+value.to_s+"'"
-              value = nil
-            end
-            actual_values.push value
+          case feature_type
+          when "classification"
+            actual_values << classification_value(test_target_dataset, c, prediction_feature, class_values)
+          when "regression"
+            actual_values << regression_value(test_target_dataset, c, prediction_feature)
           end
         end
+        task.progress(40) if task # loaded actual values
         
-        prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri
+        prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri,subjectid
         raise "prediction dataset not found: '"+prediction_dataset_uri.to_s+"'" unless prediction_dataset
-        raise "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+prediction_dataset.features.inspect if prediction_dataset.features.index(predicted_variable)==nil
         
+        # TODO: remove LAZAR_PREDICTION_DATASET_HACK
+        no_prediction_feature = prediction_dataset.features.keys.index(predicted_variable)==nil
+        if no_prediction_feature
+          one_entry_per_compound = true
+          @compounds.each do |c|
+            if prediction_dataset.data_entries[c] and prediction_dataset.data_entries[c].size != 1
+              one_entry_per_compound = false
+              break
+            end
+          end
+          msg = "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+
+            prediction_dataset.features.keys.inspect
+          if one_entry_per_compound
+            LOGGER.warn msg
+          else
+            raise msg
+          end
+        end
+
         raise "more predicted than test compounds test:"+@compounds.size.to_s+" < prediction:"+
           prediction_dataset.compounds.size.to_s if @compounds.size < prediction_dataset.compounds.size
         if CHECK_VALUES
@@ -100,41 +105,138 @@ module Lib
             predicted_values << nil
             confidence_values << nil
           else
-            if is_classification
-              value = prediction_dataset.get_predicted_class(c, predicted_variable)
-              value = value.to_s unless value==nil
-              raise "illegal class_value of predicted value "+value.to_s+" class: "+value.class.to_s unless value==nil or class_values.index(value)!=nil
-              predicted_values << class_values.index(value)
-              confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable)
-            else
-              value = prediction_dataset.get_predicted_regression(c, predicted_variable)
-              begin
-                value = value.to_f unless value==nil or value.is_a?(Numeric)
-              rescue
-                LOGGER.warn "no numeric value for regression: '"+value.to_s+"'"
-                value = nil
-              end
-              predicted_values << value
-              confidence_values << nil
+            case feature_type
+            when "classification"
+              # TODO: remove LAZAR_PREDICTION_DATASET_HACK
+              predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, class_values)
+            when "regression"
+              predicted_values << regression_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable)
+            end
+            # TODO confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable)
+            conf = 1
+            begin
+              feature = prediction_dataset.data_entries[c].keys[0]
+              feature_data = prediction_dataset.features[feature]
+              conf = feature_data[OT.confidence] if feature_data[OT.confidence]!=nil 
+            rescue
+              LOGGER.warn "could not get confidence"
             end
+            confidence_values << conf
           end
         end
+        task.progress(80) if task # loaded predicted values and confidence
         
-        super(predicted_values, actual_values, confidence_values, is_classification, class_values)
+        super(predicted_values, actual_values, confidence_values, feature_type, class_values)
         raise "illegal num compounds "+num_info if  @compounds.size != @predicted_values.size
+        task.progress(100) if task # done with the mathmatics
     end
     
+    private
+    def regression_value(dataset, compound, feature)
+      v = value(dataset, compound, feature)
+      begin
+        v = v.to_f unless v==nil or v.is_a?(Numeric)
+        v
+      rescue
+        LOGGER.warn "no numeric value for regression: '"+v.to_s+"'"
+        nil
+      end
+    end
+    
+    def classification_value(dataset, compound, feature, class_values)
+      v = value(dataset, compound, feature)
+      i = class_values.index(v)
+      raise "illegal class_value of prediction (value is '"+v.to_s+"', class is '"+v.class.to_s+"'), possible values are "+
+        class_values.inspect unless v==nil or i!=nil
+      i
+    end
+    
+    def value(dataset, compound, feature)
+      return nil if dataset.data_entries[compound]==nil
+      if feature==nil
+        v = dataset.data_entries[compound].values[0]
+      else
+        v = dataset.data_entries[compound][feature]
+      end
+      return nil if v==nil 
+      raise "no array "+v.class.to_s+" : '"+v.to_s+"'" unless v.is_a?(Array)
+      if v.size>1
+        v.uniq!
+        raise "not yet implemented: multiple non-equal values "+compound.to_s+" "+v.inspect if v.size>1
+        v = v[0]
+      elsif v.size==1
+        v = v[0]
+      else
+        v = nil
+      end
+      raise "array" if v.is_a?(Array)
+      v = nil if v.to_s.size==0
+      v
+    end
 
+    public
     def compute_stats
     
       res = {}
-      if @is_classification
-        (Lib::VAL_CLASS_PROPS_EXTENDED).each{ |s| res[s] = send(s)}  
-      else
+      case @feature_type
+      when "classification"
+        (Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)}  
+      when "regression"
         (Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) }  
       end
       return res
     end
     
+    def to_array()
+      OTPredictions.to_array( [self] )
+    end
+    
+    def self.to_array( predictions, add_pic=false, format=false )
+  
+      res = []
+      predictions.each do |p|
+        (0..p.num_instances-1).each do |i|
+          a = []
+          
+          #PENDING!
+          begin
+            #a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+
+            #  URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic
+            a << p.identifier(i)+"/image"
+          rescue => ex
+            raise ex
+            #a.push("Could not add pic: "+ex.message)
+            #a.push(p.identifier(i))
+          end
+          
+          a << (format ? p.actual_value(i).to_nice_s : p.actual_value(i))
+          a << (format ? p.predicted_value(i).to_nice_s : p.predicted_value(i))
+          if p.feature_type=="classification"
+            if (p.predicted_value(i)!=nil and p.actual_value(i)!=nil)
+              a << (p.classification_miss?(i) ? 1 : 0)
+            else
+              a << nil
+            end
+          end
+          if p.confidence_values_available?
+            a << (format ? p.confidence_value(i).to_nice_s : p.confidence_value(i))
+          end
+          a << p.identifier(i)
+          res << a
+        end
+      end
+        
+      header = []
+      header << "compound" if add_pic
+      header << "actual value"
+      header << "predicted value"
+      header << "missclassified" if predictions[0].feature_type=="classification"
+      header << "confidence value" if predictions[0].confidence_values_available?
+      header << "compound-uri"
+      res.insert(0, header)
+      
+      return res
+  end
+    
   end
 end
diff --git a/lib/predictions.rb b/lib/predictions.rb
index f6351f8..5850024 100644..100755
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -22,20 +22,22 @@ module Lib
     def initialize( predicted_values, 
                     actual_values, 
                     confidence_values, 
-                    is_classification, 
-                    prediction_feature_values=nil )
+                    feature_type, 
+                    class_domain=nil )
                     
       @predicted_values = predicted_values
       @actual_values = actual_values
       @confidence_values = confidence_values
-      @is_classification = is_classification
-      @prediction_feature_values = prediction_feature_values
+      @feature_type = feature_type
+      @class_domain = class_domain
       @num_classes = 1
       
       #puts "predicted:  "+predicted_values.inspect
       #puts "actual:     "+actual_values.inspect
       #puts "confidence: "+confidence_values.inspect
       
+      raise "unknown feature_type: "+@feature_type.to_s unless 
+        @feature_type=="classification" || @feature_type=="regression"
       raise "no predictions" if @predicted_values.size == 0
       num_info = "predicted:"+@predicted_values.size.to_s+
         " confidence:"+@confidence_values.size.to_s+" actual:"+@actual_values.size.to_s
@@ -43,23 +45,28 @@ module Lib
       raise "illegal num confidence values "+num_info if  @confidence_values.size != @predicted_values.size
       
       @confidence_values.each{ |c| raise "illegal confidence value: '"+c.to_s+"'" unless c==nil or (c.is_a?(Numeric) and c>=0 and c<=1) }
-      conf_val_tmp = {}
-      @confidence_values.each{ |c| conf_val_tmp[c] = nil }
-      if conf_val_tmp.keys.size<2
-        LOGGER.warn("prediction w/o confidence values");
-        @confidence_values=nil
-      end
+      ## check if there is more than one different conf value
+      ## DEPRECATED? not sure anymore what this was about, 
+      ##             I am pretty sure this was for r-plot of roc curves
+      ##             roc curvers are now plotted manually
+      #conf_val_tmp = {}
+      #@confidence_values.each{ |c| conf_val_tmp[c] = nil }
+      #if conf_val_tmp.keys.size<2
+      #  LOGGER.warn("prediction w/o confidence values");
+      #  @confidence_values=nil
+      #end
       
-      if @is_classification
-        raise "prediction_feature_values missing while performing classification" unless @prediction_feature_values
-        @num_classes = @prediction_feature_values.size
+      case @feature_type
+      when "classification"
+        raise "class_domain missing while performing classification" unless @class_domain
+        @num_classes = @class_domain.size
         raise "num classes < 2" if @num_classes<2
         { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
           values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+
-            "has to be either nil or index of predicted-values" if v!=nil and (v<0 or v>@num_classes)}
+            "has to be either nil or index of predicted-values" if v!=nil and (!v.is_a?(Numeric) or v<0 or v>@num_classes)}
         end
-      else
-        raise "prediction_feature_values != nil while performing regression" if @prediction_feature_values
+      when "regresssion"
+        raise "class_domain != nil while performing regression" if @class_domain
         { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
           values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+
             "has to be either nil or number" unless v==nil or v.is_a?(Numeric)}
@@ -79,15 +86,16 @@ module Lib
       @num_predicted = 0
       @num_unpredicted = 0
       
-      if @is_classification
+      case @feature_type
+      when "classification"
         @confusion_matrix = []
-        @prediction_feature_values.each do |v|
+        @class_domain.each do |v|
           @confusion_matrix.push( Array.new( @num_classes, 0 ) )
         end
         
         @num_correct = 0
         @num_incorrect = 0
-      else
+      when "regression"
         @sum_error = 0
         @sum_abs_error = 0
         @sum_squared_error = 0
@@ -118,14 +126,15 @@ module Lib
         else
           @num_predicted += 1
           
-          if @is_classification
+          case @feature_type
+          when "classification"
             @confusion_matrix[actual_value][predicted_value] += 1
             if (predicted_value == actual_value)
               @num_correct += 1
             else
               @num_incorrect += 1
             end
-          else
+          when "regression"
             delta = predicted_value - actual_value
             @sum_error += delta
             @sum_abs_error += delta.abs
@@ -152,21 +161,38 @@ module Lib
     end
     
     def percent_correct
-      raise "no classification" unless @is_classification
+      raise "no classification" unless @feature_type=="classification"
       return 0 if @num_with_actual_value==0
-      return 100 * @num_correct / @num_with_actual_value.to_f
+      return 100 * @num_correct / (@num_with_actual_value - @num_unpredicted).to_f
     end
     
     def percent_incorrect
-      raise "no classification" unless @is_classification
+      raise "no classification" unless @feature_type=="classification"
       return 0 if @num_with_actual_value==0
-      return 100 * @num_incorrect / @num_with_actual_value.to_f
+      return 100 * @num_incorrect / (@num_with_actual_value - @num_unpredicted).to_f
     end
     
     def accuracy
       return percent_correct / 100.0
     end
     
+    def weighted_accuracy
+      raise "no classification" unless @feature_type=="classification"
+      total = 0
+      correct = 0
+      (0..@predicted_values.size-1).each do |i|
+        if @predicted_values[i]!=nil
+          total += @confidence_values[i]
+          correct += @confidence_values[i] if @actual_values[i]==@predicted_values[i]
+        end
+      end
+      if total==0 || correct == 0
+        return 0  
+      else
+        return correct / total 
+      end
+    end
+
     def percent_unpredicted
       return 0 if @num_with_actual_value==0
       return 100 * @num_unpredicted / @num_with_actual_value.to_f
@@ -186,17 +212,17 @@ module Lib
     end
 
     def num_correct
-      raise "no classification" unless @is_classification
+      raise "no classification" unless @feature_type=="classification"
       return @num_correct
     end
 
     def num_incorrect
-      raise "no classification" unless @is_classification
+      raise "no classification" unless @feature_type=="classification"
       return @num_incorrect
     end
     
     def num_unclassified
-      raise "no classification" unless @is_classification
+      raise "no classification" unless @feature_type=="classification"
       return @num_unpredicted
     end
     
@@ -205,35 +231,39 @@ module Lib
     #     and values: <int-value>
     def confusion_matrix
       
-      raise "no classification" unless @is_classification
+      raise "no classification" unless @feature_type=="classification"
       res = {}
       (0..@num_classes-1).each do |actual|
           (0..@num_classes-1).each do |predicted|
-            res[{:confusion_matrix_actual => @prediction_feature_values[actual],
-                 :confusion_matrix_predicted => @prediction_feature_values[predicted]}] = @confusion_matrix[actual][predicted]
+            res[{:confusion_matrix_actual => @class_domain[actual],
+                 :confusion_matrix_predicted => @class_domain[predicted]}] = @confusion_matrix[actual][predicted]
         end
       end
       return res
     end
     
     def area_under_roc(class_index=nil)
-      return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if class_index==nil
+      return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if 
+        class_index==nil
       return 0.0 if @confidence_values==nil
       
       LOGGER.warn("TODO: implement approx computiation of AUC,"+
-        "so far Wilcoxon-Man-Whitney is used (exponential)") if @predicted_values.size>1000
+        "so far Wilcoxon-Man-Whitney is used (exponential)") if 
+        @predicted_values.size>1000
+      #puts "COMPUTING AUC "+class_index.to_s
       
       tp_conf = []
       fp_conf = []
       (0..@predicted_values.size-1).each do |i|
         if @predicted_values[i]==class_index
-          if @actual_values[i]==class_index
+          if @actual_values[i]==@predicted_values[i]
             tp_conf.push(@confidence_values[i])
           else
             fp_conf.push(@confidence_values[i])
           end
         end
       end
+      #puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n"
       
       return 0.0 if tp_conf.size == 0
       return 1.0 if fp_conf.size == 0
@@ -241,9 +271,9 @@ module Lib
       tp_conf.each do |tp|
         fp_conf.each do |fp|
           sum += 1 if tp>fp
+          sum += 0.5 if tp==fp
         end
       end
-      
       return sum / (tp_conf.size * fp_conf.size).to_f
     end
     
@@ -441,8 +471,8 @@ module Lib
     def sample_correlation_coefficient
       # formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient
       return ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) /
-             ( Math.sqrt( @num_predicted * @sum_squares_actual - @sum_actual**2 ) *
-               Math.sqrt( @num_predicted * @sum_squares_predicted - @sum_predicted**2 ) )
+             ( Math.sqrt( [0, @num_predicted * @sum_squares_actual - @sum_actual**2].max ) *
+               Math.sqrt( [0, @num_predicted * @sum_squares_predicted - @sum_predicted**2].max ) )
     end
     
     def total_sum_of_squares
@@ -460,21 +490,30 @@ module Lib
     # data for roc-plots ###################################################################################
     
     def get_roc_values(class_value)
+      
+      #puts "get_roc_values for class_value: "+class_value.to_s
       raise "no confidence values" if @confidence_values==nil
-      class_index = @prediction_feature_values.index(class_value)
-      raise "class not found "+class_value.to_s if class_index==nil and class_value!=nil
+      raise "no class-value specified" if class_value==nil
+      
+      class_index = @class_domain.index(class_value)
+      raise "class not found "+class_value.to_s if class_index==nil
       
       c = []; p = []; a = []
       (0..@predicted_values.size-1).each do |i|
         # NOTE: not predicted instances are ignored here
-        if (@predicted_values[i]!=nil and (class_value==nil or @predicted_values[i]==class_index))
+        if @predicted_values[i]!=nil and @predicted_values[i]==class_index
           c << @confidence_values[i]
           p << @predicted_values[i]
           a << @actual_values[i]
         end
       end
       
-      return {:predicted_values => p, :actual_values => a, :confidence_values => c}
+      # DO NOT raise exception here, maybe different validations are concated
+      #raise "no instance predicted as '"+class_value+"'" if p.size == 0
+      
+      h = {:predicted_values => p, :actual_values => a, :confidence_values => c}
+      #puts h.inspect
+      return h
     end
     
     ########################################################################################
@@ -488,9 +527,10 @@ module Lib
     end
   
     def predicted_value(instance_index)
-      if @is_classification
-        @predicted_values[instance_index]==nil ? nil : @prediction_feature_values[@predicted_values[instance_index]]
-      else
+      case @feature_type 
+      when "classification"
+        @predicted_values[instance_index]==nil ? nil : @class_domain[@predicted_values[instance_index]]
+      when "regression"
         @predicted_values[instance_index]
       end
     end
@@ -500,9 +540,10 @@ module Lib
     end
     
     def actual_value(instance_index)
-      if @is_classification
-        @actual_values[instance_index]==nil ? nil : @prediction_feature_values[@actual_values[instance_index]]
-      else
+      case @feature_type 
+      when "classification"
+        @actual_values[instance_index]==nil ? nil : @class_domain[@actual_values[instance_index]]
+      when "regression"
         @actual_values[instance_index]
       end
     end
@@ -512,13 +553,13 @@ module Lib
     end      
     
     def classification_miss?(instance_index)
-      raise "no classification" unless @is_classification
+      raise "no classification" unless @feature_type=="classification"
       return false if predicted_value(instance_index)==nil or actual_value(instance_index)==nil
       return predicted_value(instance_index) != actual_value(instance_index)
     end
     
-    def classification?
-      @is_classification
+    def feature_type
+      @feature_type
     end
     
     def confidence_values_available?
@@ -535,7 +576,7 @@ module Lib
     def prediction_feature_value_map(proc)
       res = {}
       (0..@num_classes-1).each do |i|
-        res[@prediction_feature_values[i]] = proc.call(i)
+        res[@class_domain[i]] = proc.call(i)
       end
       return res
     end
diff --git a/lib/rdf_provider.rb b/lib/rdf_provider.rb
deleted file mode 100644
index 7fa3ecc..0000000
--- a/lib/rdf_provider.rb
+++ /dev/null
@@ -1,188 +0,0 @@
-
-class String
-  def convert_underscore
-    gsub(/_./) do |m|
-      m.gsub!(/^_/,"")
-      m.upcase
-    end
-  end
-end
-
-module Lib
-  module RDFProvider
-    
-    def to_rdf
-      HashToOwl.to_rdf(self)
-    end
-    
-    def uri
-      raise "not implemented"
-    end
-    
-    def rdf_title
-      raise "not implemented"
-    end
-    
-    # the rdf output is generated from the hash that is provided by this method
-    # the keys in the hash structure are used to defined type of the resource (literal, objectProperty, dataProperty)
-    # example: if the structure should contain a literal named "size" with value 5
-    # * add :property_xy => 5 to your hash
-    # * make sure literal?(:property_xy) returns true
-    # * literal_name(:property_xy) must return "size"
-    #
-    def get_content_as_hash
-      raise "not implemented"
-    end
-    
-    def to_yaml
-      get_content_as_hash.to_yaml
-    end
-    
-    def rdf_ignore?( prop )
-      self.class::IGNORE.index( prop ) != nil
-    end
-    
-    def literal?( prop )
-      self.class::LITERALS.index( prop ) != nil
-    end
-    
-    def literal_name( prop )
-      if self.class::LITERAL_NAMES.has_key?(prop)
-        self.class::LITERAL_NAMES[prop]
-      else
-        OT[prop.to_s.convert_underscore]
-      end
-    end
-    
-    def object_property?( prop )
-      self.class::OBJECT_PROPERTIES.has_key?( prop )
-    end
-    
-    def object_property_name( prop )
-      return self.class::OBJECT_PROPERTIES[ prop ]
-    end
-    
-    def object_type( prop )
-      return self.class::OBJECTS[ prop ]
-    end
-  
-    def class?(prop)
-      self.class::CLASSES.has_key?( prop )
-    end
-    
-    def class_name( prop )
-      return self.class::CLASSES[ prop ]
-    end
-    
-  end
-  
-  class HashToOwl
-    #include OpenTox::Owl
-    
-    def self.to_rdf( rdf_provider )
-      
-      owl = OpenTox::Owl.create(rdf_provider.rdf_title, rdf_provider.uri )
-      toOwl = HashToOwl.new(owl)
-      toOwl.add_content(rdf_provider)
-      toOwl.rdf
-    end
-  
-    def add_content( rdf_provider ) 
-      @rdf_provider = rdf_provider
-      recursiv_add_content( @rdf_provider.get_content_as_hash, @owl.root_node )
-    end
-    
-    def rdf
-      @owl.rdf
-    end
-    
-    private
-    def initialize(owl)
-      @owl = owl
-      @model = owl.model
-    end
-    
-    def recursiv_add_content( output, node )
-      output.each do |k,v|
-        if v==nil
-          LOGGER.warn "skipping nil value: "+k.to_s
-          next
-        end
-        if @rdf_provider.rdf_ignore?(k)
-          #do nothing
-        elsif v.is_a?(Hash)
-          new_node = add_class( k, node )
-          recursiv_add_content( v, new_node )
-        elsif v.is_a?(Array)
-          v.each do |value|
-            if @rdf_provider.class?(k)
-              new_node = add_class( k, node )
-              recursiv_add_content( value, new_node )
-            else
-              add_object_property( k, value, node)
-            end
-          end
-        elsif @rdf_provider.literal?(k)
-          set_literal( k, v, node)
-        elsif @rdf_provider.object_property?(k)
-          add_object_property( k, v, node)
-        else
-          raise "illegal value k:"+k.to_s+" v:"+v.to_s
-        end
-      end
-    end
-  
-    def add_class( property, node )
-      raise "no object prop: "+property.to_s unless @rdf_provider.object_property?(property)
-      raise "no class name: "+property.to_s unless @rdf_provider.class_name(property)
-      # to avoid anonymous nodes, make up uris for sub-objects
-      # use counter to make sure each uri is unique
-      # for example we will get ../confusion_matrix_cell/1, ../confusion_matrix_cell/2, ...
-      count = 1
-      while (true)
-        res = Redland::Resource.new( File.join(node.uri.to_s,property.to_s+"/"+count.to_s) )  
-        break if @model.subject(@rdf_provider.object_property_name(property), res).nil?
-        count += 1
-      end
-      clazz = Redland::Resource.new(@rdf_provider.class_name(property))
-      @model.add res, RDF['type'], clazz
-      @model.add res, DC['title'], clazz
-      @model.add clazz, RDF['type'], OWL['Class']
-      @model.add DC['title'], RDF['type'],OWL['AnnotationProperty']
-      
-      objectProp = Redland::Resource.new(@rdf_provider.object_property_name(property))
-      @model.add objectProp, RDF['type'], OWL['ObjectProperty']
-      @model.add node, objectProp, res
-      return res
-    end
-    
-    def set_literal(property, value, node )
-      raise "empty literal value "+property.to_s if value==nil || value.to_s.size==0
-      raise "no literal name "+propety.to_s unless @rdf_provider.literal_name(property)
-      begin
-        l = @model.object(subject, @rdf_provider.literal_name(property))
-        @model.delete node, @rdf_provider.literal_name(property), l
-      rescue
-      end
-      literalProp =  Redland::Resource.new(@rdf_provider.literal_name(property))
-      @model.add literalProp, RDF['type'],OWL['AnnotationProperty']
-      @model.add node, literalProp, Redland::Literal.create(value)
-    end
-    
-    def add_object_property(property, value, node )
-      raise "empty object property value "+property.to_s if value==nil || value.to_s.size==0
-      raise "no object property name "+propety.to_s unless @rdf_provider.object_property_name(property)
-      raise "no object type "+property.to_s unless @rdf_provider.object_type(property)
-      
-      objectProp = Redland::Resource.new(@rdf_provider.object_property_name(property))
-      @model.add objectProp, RDF['type'], OWL['ObjectProperty']
-      
-      val = Redland::Resource.new(value)
-      type = Redland::Resource.new(@rdf_provider.object_type(property))
-      @model.add node, objectProp, val
-      @model.add val, RDF['type'], type
-      @model.add type, RDF['type'], OWL['Class']
-    end
-    
-  end
-end
diff --git a/lib/test_util.rb b/lib/test_util.rb
index ecab76c..590d295 100644..100755
--- a/lib/test_util.rb
+++ b/lib/test_util.rb
@@ -10,11 +10,12 @@ module Lib
     end
     
     def self.wait_for_task(uri)
-      if OpenTox::Utils.task_uri?(uri)
+      if uri.task_uri?
         task = OpenTox::Task.find(uri)
         task.wait_for_completion
-        raise "task failed: "+uri.to_s+", error is:\n"+task.description if task.error?
-        uri = task.resultURI
+        #raise "task failed: "+uri.to_s+", error is:\n"+task.description if task.error?
+        LOGGER.error "task failed :\n"+task.to_yaml if task.error?
+        uri = task.result_uri
       end
       return uri
     end
diff --git a/lib/validation_db.rb b/lib/validation_db.rb
index 7afab90..0d5db21 100644..100755
--- a/lib/validation_db.rb
+++ b/lib/validation_db.rb
@@ -4,21 +4,10 @@
 #end
 require "lib/merge.rb"
 
-unless ActiveRecord::Base.connected?
-  ActiveRecord::Base.establish_connection(  
-     :adapter => CONFIG[:database][:adapter],
-     :host => CONFIG[:database][:host],
-     :database => CONFIG[:database][:database],
-     :username => CONFIG[:database][:username],
-     :password => CONFIG[:database][:password]
-  )
-  ActiveRecord::Base.logger = Logger.new("/dev/null")
-end
-
 module Lib
 
-  VAL_PROPS_GENERAL = [ :validation_uri, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature,
-                :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :created_at ] 
+  VAL_PROPS_GENERAL = [ :validation_uri, :validation_type, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature,
+                :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :date ] 
   VAL_PROPS_SUM = [ :num_instances, :num_without_class, :num_unpredicted ]
   VAL_PROPS_AVG = [:real_runtime, :percent_without_class, :percent_unpredicted ]
   VAL_PROPS = VAL_PROPS_GENERAL + VAL_PROPS_SUM + VAL_PROPS_AVG
@@ -28,7 +17,8 @@ module Lib
   
   # :classification_statistics
   VAL_CLASS_PROPS_SINGLE_SUM = [ :num_correct, :num_incorrect, :confusion_matrix  ]
-  VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect, :weighted_area_under_roc ]
+  VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect, 
+    :weighted_area_under_roc, :accuracy, :weighted_accuracy ] 
   VAL_CLASS_PROPS_SINGLE = VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_SINGLE_AVG
   
   # :class_value_statistics
@@ -43,26 +33,146 @@ module Lib
                                 :true_negative_rate, :true_positive_rate ] #:precision, :recall, 
                                 
   VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS
-  VAL_CLASS_PROPS_EXTENDED = VAL_CLASS_PROPS + [:accuracy]
 
   # :regression_statistics
   VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square, 
     :target_variance_actual, :target_variance_predicted, :sum_squared_error, :sample_correlation_coefficient ]
   
   CROSS_VAL_PROPS = [:dataset_uri, :num_folds, :stratified, :random_seed]
-  CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :created_at] + CROSS_VAL_PROPS 
+  CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :date] + CROSS_VAL_PROPS 
   
-  ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS_EXTENDED + VAL_REGR_PROPS + CROSS_VAL_PROPS
+  ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS + VAL_REGR_PROPS + CROSS_VAL_PROPS
 
   VAL_MERGE_GENERAL = VAL_PROPS_GENERAL + VAL_CV_PROPS + [:classification_statistics, :regression_statistics] + CROSS_VAL_PROPS
   VAL_MERGE_SUM = VAL_PROPS_SUM + VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_PER_CLASS_SUM
   VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS
- 
-  class Validation < ActiveRecord::Base
-    serialize :classification_statistics
-    serialize :regression_statistics
+  
+
+#  class Validation < ActiveRecord::Base
+#    serialize :classification_statistics
+#    serialize :regression_statistics
+#    
+#    alias_attribute :date, :created_at
+    
+  class Validation 
+    include DataMapper::Resource
+  
+    property :id, Serial
+    property :validation_type, String, :length => 512
+    property :model_uri, String, :length => 512
+    property :algorithm_uri, String, :length => 512
+    property :training_dataset_uri, String, :length => 512
+    property :test_target_dataset_uri, String, :length => 512
+    property :test_dataset_uri, String, :length => 512
+    property :prediction_dataset_uri, String, :length => 512
+    property :prediction_feature, String, :length => 512
+    property :created_at, DateTime
+    property :num_instances, Integer
+    property :num_without_class, Integer
+    property :num_unpredicted, Integer
+    property :crossvalidation_id, Integer
+    property :crossvalidation_fold, Integer
+    property :real_runtime, Float
+    property :percent_without_class, Float
+    property :percent_unpredicted, Float
+    property :classification_statistics, Object
+    property :regression_statistics, Object
+    property :finished, Boolean, :default => false
+    
+    attr_accessor :subjectid
+    
+    after :save, :check_policy
+    private
+    def check_policy
+      OpenTox::Authorization.check_policy(validation_uri, subjectid)
+    end
+    
+    public
+    def date
+      created_at
+    end
+    
+    def validation_uri
+      raise "no id" if self.id==nil
+      $url_provider.url_for("/"+self.id.to_s, :full)
+    end
+    
+    def crossvalidation_uri
+      $url_provider.url_for("/crossvalidation/"+self.crossvalidation_id.to_s, :full) if self.crossvalidation_id
+    end
+    
+    def self.classification_property?( property )
+      VAL_CLASS_PROPS.include?( property )
+    end
+    
+    def self.depends_on_class_value?( property )
+      VAL_CLASS_PROPS_PER_CLASS.include?( property )
+    end
+    
+    def self.complement_exists?( property )
+      VAL_CLASS_PROPS_PER_CLASS_COMPLEMENT_EXISTS.include?( property )
+    end
+    
   end
   
-  class Crossvalidation < ActiveRecord::Base
+#  class Crossvalidation < ActiveRecord::Base
+#    alias_attribute :date, :created_at
+  class Crossvalidation 
+    include DataMapper::Resource
+  
+    property :id, Serial
+    property :algorithm_uri, String, :length => 512
+    property :dataset_uri, String, :length => 512
+    property :created_at, DateTime
+    property :num_folds, Integer, :default => 10
+    property :random_seed, Integer, :default => 1
+    property :finished, Boolean, :default => false
+    property :stratified, Boolean, :default => false
+    
+    attr_accessor :subjectid
+        
+    after :save, :check_policy
+    private
+    def check_policy
+      OpenTox::Authorization.check_policy(crossvalidation_uri, subjectid)
+    end
+    
+    public
+    def date
+      created_at
+    end
+    
+    def crossvalidation_uri
+      raise "no id" if self.id==nil
+      $url_provider.url_for("/crossvalidation/"+self.id.to_s, :full) if self.id
+    end
+    
+    # convenience method to list all crossvalidations that are unique 
+    # in terms of dataset_uri,num_folds,stratified,random_seed
+    # further conditions can be specified in __conditions__
+    def self.find_all_uniq(conditions={}, subjectid=nil )
+      #cvs = Lib::Crossvalidation.find(:all, :conditions => conditions)
+      cvs = Lib::Crossvalidation.all(:conditions => conditions)
+      uniq = []
+      cvs.each do |cv|
+        next if AA_SERVER and !OpenTox::Authorization.authorized?(cv.crossvalidation_uri,"GET",subjectid)
+        match = false
+        uniq.each do |cv2|
+          if cv.dataset_uri == cv2.dataset_uri and cv.num_folds == cv2.num_folds and 
+            cv.stratified == cv2.stratified and cv.random_seed == cv2.random_seed
+            match = true
+            break
+          end
+        end
+        uniq << cv unless match
+      end
+      uniq
+    end
   end
 end
+
+
+Lib::Validation.auto_upgrade!
+Lib::Validation.raise_on_save_failure = true
+Lib::Crossvalidation.auto_upgrade!
+Lib::Crossvalidation.raise_on_save_failure = true
author	Christoph Helma <helma@in-silico.ch>	2011-03-03 13:00:47 +0100
committer	Christoph Helma <helma@in-silico.ch>	2011-03-03 13:00:47 +0100
commit	dcd0a5a659c303c50a59d1271947851245db10e7 (patch)
tree	eaae695edf72c3a99cde58b9caaa7825d6d4f31a /lib
parent	8b46f5a4f389d7cd54f6e8b38025d275f9d3ed1b (diff)
parent	e57856a3c2cd10df207e722301c24a022e9fd802 (diff)