diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/merge.rb | 14 | ||||
-rw-r--r-- | lib/ohm_util.rb | 40 | ||||
-rwxr-xr-x | lib/ot_predictions.rb | 29 | ||||
-rwxr-xr-x | lib/predictions.rb | 48 | ||||
-rwxr-xr-x | lib/validation_db.rb | 136 |
5 files changed, 168 insertions, 99 deletions
diff --git a/lib/merge.rb b/lib/merge.rb index 527415e..ecbe133 100644 --- a/lib/merge.rb +++ b/lib/merge.rb @@ -20,8 +20,7 @@ module Lib def self.merge_array_objects( array ) return nil if array.size == nil return array[0] if array.size==1 - - m = self.merge_objects(array[0], array[1] ) + m = self.merge_objects(array[0], array[1]) (2..array.size-1).each do |i| m = self.merge_objects(m, array[i] ) end @@ -33,8 +32,7 @@ module Lib end def self.merge_objects( object1, object2 ) - - raise "classes not equal" if object1.class != object2.class + raise "classes not equal : "+object1.class.to_s+" != "+object2.class.to_s if object1.class != object2.class object_class = object1.class raise "register which attributes to merge first, nothing found for class "+object_class.to_s unless merge_attributes_registered?(object_class) raise "not supported, successivly add unmerged object to a merge object" if merge_count(object2)>1 @@ -71,6 +69,11 @@ module Lib variance = nil if (avg=avg_attribute?(object_class, attribute)) || sum_attribute?(object_class, attribute) + # we string to numerics if wanted, value1 is no string anymore if weight>1 + if value2.is_a?(String) and ((weight1==1 and value1.is_a?(String)) or (weight1>1 and value1.is_a?(Numeric))) + value1 = value1.to_f + value2 = value2.to_f + end if (value1==nil and value2==nil ) #do nothing elsif value1.is_a?(Numeric) and value2.is_a?(Numeric) @@ -104,7 +107,8 @@ module Lib end end else - raise "invalid, cannot avg/sum non-numeric content for attribute: "+attribute.to_s+" contents: '"+value1.to_s+"', '"+value2.to_s+"'" + raise "invalid, cannot avg/sum non-numeric content for attribute: "+attribute.to_s+" contents: '"+value1.to_s+"' ("+ + value1.class.to_s+"), '"+value2.to_s+"' ("+value2.class.to_s+")" end elsif non_numeric_attribute?(object_class, attribute) if (value1.is_a?(Hash) and value2.is_a?(Hash)) diff --git a/lib/ohm_util.rb b/lib/ohm_util.rb new file mode 100644 index 0000000..856f9d2 --- /dev/null +++ b/lib/ohm_util.rb @@ -0,0 +1,40 @@ + +module Lib + module OhmUtil + + def self.check_params(model, params) + prop_names = model.attributes.collect{|p| p.to_s} + params.keys.each do |k| + key = k.to_s + if (key == "subjectid" || key == "media") + params.delete(k) + else + unless prop_names.include?(key) + key = key.from_rdf_format + unless prop_names.include?(key) + key = key+"_uri" + unless prop_names.include?(key) + key = key+"s" + unless prop_names.include?(key) + raise OpenTox::BadRequestError.new "no attribute found: '"+k.to_s+"'" + end + end + end + end + params[key.to_sym] = params.delete(k) + end + end + params + end + + def self.find(model, filter_params) + params = check_params(model,filter_params) + if (params.size==0) + model.all + else + model.find(params) + end + end + + end +end
\ No newline at end of file diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index f812854..22f9b20 100755 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -54,13 +54,20 @@ module Lib @compounds = test_dataset.compounds LOGGER.debug "test dataset size: "+@compounds.size.to_s raise "test dataset is empty "+test_dataset_uri.to_s unless @compounds.size>0 - class_values = feature_type=="classification" ? OpenTox::Feature.find(prediction_feature, subjectid).domain : nil + + if feature_type=="classification" + accept_values = test_target_dataset.features[prediction_feature][OT.acceptValue] + raise "'"+OT.acceptValue.to_s+"' missing/invalid for feature '"+prediction_feature.to_s+"' in dataset '"+ + test_target_dataset_uri.to_s+"', acceptValues are: '"+accept_values.inspect+"'" if accept_values==nil or accept_values.length<2 + else + accept_values=nil + end actual_values = [] @compounds.each do |c| case feature_type when "classification" - actual_values << classification_value(test_target_dataset, c, prediction_feature, class_values) + actual_values << classification_value(test_target_dataset, c, prediction_feature, accept_values) when "regression" actual_values << regression_value(test_target_dataset, c, prediction_feature) end @@ -108,7 +115,7 @@ module Lib case feature_type when "classification" # TODO: remove LAZAR_PREDICTION_DATASET_HACK - predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, class_values) + predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, accept_values) when "regression" predicted_values << regression_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable) end @@ -126,7 +133,7 @@ module Lib end task.progress(80) if task # loaded predicted values and confidence - super(predicted_values, actual_values, confidence_values, feature_type, class_values) + super(predicted_values, actual_values, confidence_values, feature_type, accept_values) raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size task.progress(100) if task # done with the mathmatics end @@ -143,11 +150,11 @@ module Lib end end - def classification_value(dataset, compound, feature, class_values) + def classification_value(dataset, compound, feature, accept_values) v = value(dataset, compound, feature) - i = class_values.index(v) - raise "illegal class_value of prediction (value is '"+v.to_s+"', class is '"+v.class.to_s+"'), possible values are "+ - class_values.inspect unless v==nil or i!=nil + i = accept_values.index(v.to_s) + raise "illegal class_value of prediction (value is '"+v.to_s+"'), accept values are "+ + accept_values.inspect unless v==nil or i!=nil i end @@ -184,9 +191,9 @@ module Lib res = {} case @feature_type when "classification" - (Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)} + (Validation::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)} when "regression" - (Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) } + (Validation::VAL_REGR_PROPS).each{ |s| res[s] = send(s) } end return res end @@ -206,7 +213,7 @@ module Lib begin #a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+ # URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic - a << p.identifier(i)+"/image" + a << p.identifier(i)+"?media=image/png" rescue => ex raise ex #a.push("Could not add pic: "+ex.message) diff --git a/lib/predictions.rb b/lib/predictions.rb index 5850024..420790e 100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -23,13 +23,13 @@ module Lib actual_values, confidence_values, feature_type, - class_domain=nil ) + accept_values=nil ) @predicted_values = predicted_values @actual_values = actual_values @confidence_values = confidence_values @feature_type = feature_type - @class_domain = class_domain + @accept_values = accept_values @num_classes = 1 #puts "predicted: "+predicted_values.inspect @@ -58,15 +58,15 @@ module Lib case @feature_type when "classification" - raise "class_domain missing while performing classification" unless @class_domain - @num_classes = @class_domain.size + raise "accept_values missing while performing classification" unless @accept_values + @num_classes = @accept_values.size raise "num classes < 2" if @num_classes<2 { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+ "has to be either nil or index of predicted-values" if v!=nil and (!v.is_a?(Numeric) or v<0 or v>@num_classes)} end when "regresssion" - raise "class_domain != nil while performing regression" if @class_domain + raise "accept_values != nil while performing regression" if @accept_values { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+ "has to be either nil or number" unless v==nil or v.is_a?(Numeric)} @@ -88,8 +88,16 @@ module Lib case @feature_type when "classification" + + # confusion-matrix will contain counts for predictions in a 2d array: + # index of first dim: actual values + # index of second dim: predicited values + # example: + # * summing up over all i with fixed n + # * confusion_matrix[i][n] + # * will give the number of instances that are predicted as n @confusion_matrix = [] - @class_domain.each do |v| + @accept_values.each do |v| @confusion_matrix.push( Array.new( @num_classes, 0 ) ) end @@ -235,8 +243,8 @@ module Lib res = {} (0..@num_classes-1).each do |actual| (0..@num_classes-1).each do |predicted| - res[{:confusion_matrix_actual => @class_domain[actual], - :confusion_matrix_predicted => @class_domain[predicted]}] = @confusion_matrix[actual][predicted] + res[{:confusion_matrix_actual => @accept_values[actual], + :confusion_matrix_predicted => @accept_values[predicted]}] = @confusion_matrix[actual][predicted] end end return res @@ -289,8 +297,8 @@ module Lib def precision(class_index=nil) return prediction_feature_value_map( lambda{ |i| precision(i) } ) if class_index==nil - correct = 0 - total = 0 + correct = 0 # all instances with prediction class_index that are correctly classified + total = 0 # all instances with prediciton class_index (0..@num_classes-1).each do |i| correct += @confusion_matrix[i][class_index] if i == class_index total += @confusion_matrix[i][class_index] @@ -457,7 +465,7 @@ module Lib def mean_absolute_error return 0 if (@num_with_actual_value - @num_unpredicted)==0 - Math.sqrt(@sum_abs_error / (@num_with_actual_value - @num_unpredicted).to_f) + @sum_abs_error / (@num_with_actual_value - @num_unpredicted).to_f end def sum_squared_error @@ -487,21 +495,21 @@ module Lib return @variance_actual end - # data for roc-plots ################################################################################### + # data for (roc-)plots ################################################################################### - def get_roc_values(class_value) + def get_prediction_values(class_value) #puts "get_roc_values for class_value: "+class_value.to_s raise "no confidence values" if @confidence_values==nil - raise "no class-value specified" if class_value==nil + #raise "no class-value specified" if class_value==nil - class_index = @class_domain.index(class_value) - raise "class not found "+class_value.to_s if class_index==nil + class_index = @accept_values.index(class_value) if class_value!=nil + raise "class not found "+class_value.to_s if (class_value!=nil && class_index==nil) c = []; p = []; a = [] (0..@predicted_values.size-1).each do |i| # NOTE: not predicted instances are ignored here - if @predicted_values[i]!=nil and @predicted_values[i]==class_index + if @predicted_values[i]!=nil and (class_index==nil || @predicted_values[i]==class_index) c << @confidence_values[i] p << @predicted_values[i] a << @actual_values[i] @@ -529,7 +537,7 @@ module Lib def predicted_value(instance_index) case @feature_type when "classification" - @predicted_values[instance_index]==nil ? nil : @class_domain[@predicted_values[instance_index]] + @predicted_values[instance_index]==nil ? nil : @accept_values[@predicted_values[instance_index]] when "regression" @predicted_values[instance_index] end @@ -542,7 +550,7 @@ module Lib def actual_value(instance_index) case @feature_type when "classification" - @actual_values[instance_index]==nil ? nil : @class_domain[@actual_values[instance_index]] + @actual_values[instance_index]==nil ? nil : @accept_values[@actual_values[instance_index]] when "regression" @actual_values[instance_index] end @@ -576,7 +584,7 @@ module Lib def prediction_feature_value_map(proc) res = {} (0..@num_classes-1).each do |i| - res[@class_domain[i]] = proc.call(i) + res[@accept_values[i]] = proc.call(i) end return res end diff --git a/lib/validation_db.rb b/lib/validation_db.rb index 0d5db21..e2595c5 100755 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -4,7 +4,7 @@ #end require "lib/merge.rb" -module Lib +module Validation VAL_PROPS_GENERAL = [ :validation_uri, :validation_type, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature, :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :date ] @@ -48,50 +48,62 @@ module Lib VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS -# class Validation < ActiveRecord::Base -# serialize :classification_statistics -# serialize :regression_statistics -# -# alias_attribute :date, :created_at - - class Validation - include DataMapper::Resource + class Validation < Ohm::Model - property :id, Serial - property :validation_type, String, :length => 512 - property :model_uri, String, :length => 512 - property :algorithm_uri, String, :length => 512 - property :training_dataset_uri, String, :length => 512 - property :test_target_dataset_uri, String, :length => 512 - property :test_dataset_uri, String, :length => 512 - property :prediction_dataset_uri, String, :length => 512 - property :prediction_feature, String, :length => 512 - property :created_at, DateTime - property :num_instances, Integer - property :num_without_class, Integer - property :num_unpredicted, Integer - property :crossvalidation_id, Integer - property :crossvalidation_fold, Integer - property :real_runtime, Float - property :percent_without_class, Float - property :percent_unpredicted, Float - property :classification_statistics, Object - property :regression_statistics, Object - property :finished, Boolean, :default => false + attribute :validation_type + attribute :model_uri + attribute :algorithm_uri + attribute :training_dataset_uri + attribute :test_target_dataset_uri + attribute :test_dataset_uri + attribute :prediction_dataset_uri + attribute :prediction_feature + attribute :date + attribute :num_instances + attribute :num_without_class + attribute :num_unpredicted + attribute :crossvalidation_id + attribute :crossvalidation_fold + attribute :real_runtime + attribute :percent_without_class + attribute :percent_unpredicted + attribute :classification_statistics_yaml + attribute :regression_statistics_yaml + attribute :finished + + index :model_uri + index :validation_type + index :crossvalidation_id attr_accessor :subjectid - after :save, :check_policy - private - def check_policy - OpenTox::Authorization.check_policy(validation_uri, subjectid) + def self.create(params={}) + params[:date] = Time.new + super params end - public - def date - created_at + def classification_statistics + YAML.load(self.classification_statistics_yaml) if self.classification_statistics_yaml + end + + def classification_statistics=(cs) + self.classification_statistics_yaml = cs.to_yaml + end + + def regression_statistics + YAML.load(self.regression_statistics_yaml) if self.regression_statistics_yaml + end + + def regression_statistics=(rs) + self.regression_statistics_yaml = rs.to_yaml + end + + def save + super + OpenTox::Authorization.check_policy(validation_uri, subjectid) end + public def validation_uri raise "no id" if self.id==nil $url_provider.url_for("/"+self.id.to_s, :full) @@ -115,33 +127,36 @@ module Lib end -# class Crossvalidation < ActiveRecord::Base -# alias_attribute :date, :created_at - class Crossvalidation - include DataMapper::Resource + class Crossvalidation < Ohm::Model - property :id, Serial - property :algorithm_uri, String, :length => 512 - property :dataset_uri, String, :length => 512 - property :created_at, DateTime - property :num_folds, Integer, :default => 10 - property :random_seed, Integer, :default => 1 - property :finished, Boolean, :default => false - property :stratified, Boolean, :default => false + attribute :algorithm_uri + attribute :dataset_uri + attribute :date + attribute :num_folds + attribute :random_seed + attribute :finished + attribute :stratified attr_accessor :subjectid - - after :save, :check_policy - private - def check_policy - OpenTox::Authorization.check_policy(crossvalidation_uri, subjectid) + + index :algorithm_uri + index :dataset_uri + index :num_folds + index :random_seed + index :stratified + index :finished + + def self.create(params={}) + params[:date] = Time.new + super params end - public - def date - created_at + def save + super + OpenTox::Authorization.check_policy(crossvalidation_uri, subjectid) end + public def crossvalidation_uri raise "no id" if self.id==nil $url_provider.url_for("/crossvalidation/"+self.id.to_s, :full) if self.id @@ -152,7 +167,7 @@ module Lib # further conditions can be specified in __conditions__ def self.find_all_uniq(conditions={}, subjectid=nil ) #cvs = Lib::Crossvalidation.find(:all, :conditions => conditions) - cvs = Lib::Crossvalidation.all(:conditions => conditions) + cvs = Crossvalidation.find( conditions ) uniq = [] cvs.each do |cv| next if AA_SERVER and !OpenTox::Authorization.authorized?(cv.crossvalidation_uri,"GET",subjectid) @@ -171,8 +186,3 @@ module Lib end end - -Lib::Validation.auto_upgrade! -Lib::Validation.raise_on_save_failure = true -Lib::Crossvalidation.auto_upgrade! -Lib::Crossvalidation.raise_on_save_failure = true |