summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/merge.rb14
-rw-r--r--lib/ohm_util.rb40
-rwxr-xr-xlib/ot_predictions.rb29
-rwxr-xr-xlib/predictions.rb48
-rwxr-xr-xlib/validation_db.rb136
5 files changed, 168 insertions, 99 deletions
diff --git a/lib/merge.rb b/lib/merge.rb
index 527415e..ecbe133 100644
--- a/lib/merge.rb
+++ b/lib/merge.rb
@@ -20,8 +20,7 @@ module Lib
def self.merge_array_objects( array )
return nil if array.size == nil
return array[0] if array.size==1
-
- m = self.merge_objects(array[0], array[1] )
+ m = self.merge_objects(array[0], array[1])
(2..array.size-1).each do |i|
m = self.merge_objects(m, array[i] )
end
@@ -33,8 +32,7 @@ module Lib
end
def self.merge_objects( object1, object2 )
-
- raise "classes not equal" if object1.class != object2.class
+ raise "classes not equal : "+object1.class.to_s+" != "+object2.class.to_s if object1.class != object2.class
object_class = object1.class
raise "register which attributes to merge first, nothing found for class "+object_class.to_s unless merge_attributes_registered?(object_class)
raise "not supported, successivly add unmerged object to a merge object" if merge_count(object2)>1
@@ -71,6 +69,11 @@ module Lib
variance = nil
if (avg=avg_attribute?(object_class, attribute)) || sum_attribute?(object_class, attribute)
+ # we string to numerics if wanted, value1 is no string anymore if weight>1
+ if value2.is_a?(String) and ((weight1==1 and value1.is_a?(String)) or (weight1>1 and value1.is_a?(Numeric)))
+ value1 = value1.to_f
+ value2 = value2.to_f
+ end
if (value1==nil and value2==nil )
#do nothing
elsif value1.is_a?(Numeric) and value2.is_a?(Numeric)
@@ -104,7 +107,8 @@ module Lib
end
end
else
- raise "invalid, cannot avg/sum non-numeric content for attribute: "+attribute.to_s+" contents: '"+value1.to_s+"', '"+value2.to_s+"'"
+ raise "invalid, cannot avg/sum non-numeric content for attribute: "+attribute.to_s+" contents: '"+value1.to_s+"' ("+
+ value1.class.to_s+"), '"+value2.to_s+"' ("+value2.class.to_s+")"
end
elsif non_numeric_attribute?(object_class, attribute)
if (value1.is_a?(Hash) and value2.is_a?(Hash))
diff --git a/lib/ohm_util.rb b/lib/ohm_util.rb
new file mode 100644
index 0000000..856f9d2
--- /dev/null
+++ b/lib/ohm_util.rb
@@ -0,0 +1,40 @@
+
+module Lib
+ module OhmUtil
+
+ def self.check_params(model, params)
+ prop_names = model.attributes.collect{|p| p.to_s}
+ params.keys.each do |k|
+ key = k.to_s
+ if (key == "subjectid" || key == "media")
+ params.delete(k)
+ else
+ unless prop_names.include?(key)
+ key = key.from_rdf_format
+ unless prop_names.include?(key)
+ key = key+"_uri"
+ unless prop_names.include?(key)
+ key = key+"s"
+ unless prop_names.include?(key)
+ raise OpenTox::BadRequestError.new "no attribute found: '"+k.to_s+"'"
+ end
+ end
+ end
+ end
+ params[key.to_sym] = params.delete(k)
+ end
+ end
+ params
+ end
+
+ def self.find(model, filter_params)
+ params = check_params(model,filter_params)
+ if (params.size==0)
+ model.all
+ else
+ model.find(params)
+ end
+ end
+
+ end
+end \ No newline at end of file
diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb
index f812854..22f9b20 100755
--- a/lib/ot_predictions.rb
+++ b/lib/ot_predictions.rb
@@ -54,13 +54,20 @@ module Lib
@compounds = test_dataset.compounds
LOGGER.debug "test dataset size: "+@compounds.size.to_s
raise "test dataset is empty "+test_dataset_uri.to_s unless @compounds.size>0
- class_values = feature_type=="classification" ? OpenTox::Feature.find(prediction_feature, subjectid).domain : nil
+
+ if feature_type=="classification"
+ accept_values = test_target_dataset.features[prediction_feature][OT.acceptValue]
+ raise "'"+OT.acceptValue.to_s+"' missing/invalid for feature '"+prediction_feature.to_s+"' in dataset '"+
+ test_target_dataset_uri.to_s+"', acceptValues are: '"+accept_values.inspect+"'" if accept_values==nil or accept_values.length<2
+ else
+ accept_values=nil
+ end
actual_values = []
@compounds.each do |c|
case feature_type
when "classification"
- actual_values << classification_value(test_target_dataset, c, prediction_feature, class_values)
+ actual_values << classification_value(test_target_dataset, c, prediction_feature, accept_values)
when "regression"
actual_values << regression_value(test_target_dataset, c, prediction_feature)
end
@@ -108,7 +115,7 @@ module Lib
case feature_type
when "classification"
# TODO: remove LAZAR_PREDICTION_DATASET_HACK
- predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, class_values)
+ predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, accept_values)
when "regression"
predicted_values << regression_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable)
end
@@ -126,7 +133,7 @@ module Lib
end
task.progress(80) if task # loaded predicted values and confidence
- super(predicted_values, actual_values, confidence_values, feature_type, class_values)
+ super(predicted_values, actual_values, confidence_values, feature_type, accept_values)
raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size
task.progress(100) if task # done with the mathmatics
end
@@ -143,11 +150,11 @@ module Lib
end
end
- def classification_value(dataset, compound, feature, class_values)
+ def classification_value(dataset, compound, feature, accept_values)
v = value(dataset, compound, feature)
- i = class_values.index(v)
- raise "illegal class_value of prediction (value is '"+v.to_s+"', class is '"+v.class.to_s+"'), possible values are "+
- class_values.inspect unless v==nil or i!=nil
+ i = accept_values.index(v.to_s)
+ raise "illegal class_value of prediction (value is '"+v.to_s+"'), accept values are "+
+ accept_values.inspect unless v==nil or i!=nil
i
end
@@ -184,9 +191,9 @@ module Lib
res = {}
case @feature_type
when "classification"
- (Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)}
+ (Validation::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)}
when "regression"
- (Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) }
+ (Validation::VAL_REGR_PROPS).each{ |s| res[s] = send(s) }
end
return res
end
@@ -206,7 +213,7 @@ module Lib
begin
#a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+
# URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic
- a << p.identifier(i)+"/image"
+ a << p.identifier(i)+"?media=image/png"
rescue => ex
raise ex
#a.push("Could not add pic: "+ex.message)
diff --git a/lib/predictions.rb b/lib/predictions.rb
index 5850024..420790e 100755
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -23,13 +23,13 @@ module Lib
actual_values,
confidence_values,
feature_type,
- class_domain=nil )
+ accept_values=nil )
@predicted_values = predicted_values
@actual_values = actual_values
@confidence_values = confidence_values
@feature_type = feature_type
- @class_domain = class_domain
+ @accept_values = accept_values
@num_classes = 1
#puts "predicted: "+predicted_values.inspect
@@ -58,15 +58,15 @@ module Lib
case @feature_type
when "classification"
- raise "class_domain missing while performing classification" unless @class_domain
- @num_classes = @class_domain.size
+ raise "accept_values missing while performing classification" unless @accept_values
+ @num_classes = @accept_values.size
raise "num classes < 2" if @num_classes<2
{ "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+
"has to be either nil or index of predicted-values" if v!=nil and (!v.is_a?(Numeric) or v<0 or v>@num_classes)}
end
when "regresssion"
- raise "class_domain != nil while performing regression" if @class_domain
+ raise "accept_values != nil while performing regression" if @accept_values
{ "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+
"has to be either nil or number" unless v==nil or v.is_a?(Numeric)}
@@ -88,8 +88,16 @@ module Lib
case @feature_type
when "classification"
+
+ # confusion-matrix will contain counts for predictions in a 2d array:
+ # index of first dim: actual values
+ # index of second dim: predicited values
+ # example:
+ # * summing up over all i with fixed n
+ # * confusion_matrix[i][n]
+ # * will give the number of instances that are predicted as n
@confusion_matrix = []
- @class_domain.each do |v|
+ @accept_values.each do |v|
@confusion_matrix.push( Array.new( @num_classes, 0 ) )
end
@@ -235,8 +243,8 @@ module Lib
res = {}
(0..@num_classes-1).each do |actual|
(0..@num_classes-1).each do |predicted|
- res[{:confusion_matrix_actual => @class_domain[actual],
- :confusion_matrix_predicted => @class_domain[predicted]}] = @confusion_matrix[actual][predicted]
+ res[{:confusion_matrix_actual => @accept_values[actual],
+ :confusion_matrix_predicted => @accept_values[predicted]}] = @confusion_matrix[actual][predicted]
end
end
return res
@@ -289,8 +297,8 @@ module Lib
def precision(class_index=nil)
return prediction_feature_value_map( lambda{ |i| precision(i) } ) if class_index==nil
- correct = 0
- total = 0
+ correct = 0 # all instances with prediction class_index that are correctly classified
+ total = 0 # all instances with prediciton class_index
(0..@num_classes-1).each do |i|
correct += @confusion_matrix[i][class_index] if i == class_index
total += @confusion_matrix[i][class_index]
@@ -457,7 +465,7 @@ module Lib
def mean_absolute_error
return 0 if (@num_with_actual_value - @num_unpredicted)==0
- Math.sqrt(@sum_abs_error / (@num_with_actual_value - @num_unpredicted).to_f)
+ @sum_abs_error / (@num_with_actual_value - @num_unpredicted).to_f
end
def sum_squared_error
@@ -487,21 +495,21 @@ module Lib
return @variance_actual
end
- # data for roc-plots ###################################################################################
+ # data for (roc-)plots ###################################################################################
- def get_roc_values(class_value)
+ def get_prediction_values(class_value)
#puts "get_roc_values for class_value: "+class_value.to_s
raise "no confidence values" if @confidence_values==nil
- raise "no class-value specified" if class_value==nil
+ #raise "no class-value specified" if class_value==nil
- class_index = @class_domain.index(class_value)
- raise "class not found "+class_value.to_s if class_index==nil
+ class_index = @accept_values.index(class_value) if class_value!=nil
+ raise "class not found "+class_value.to_s if (class_value!=nil && class_index==nil)
c = []; p = []; a = []
(0..@predicted_values.size-1).each do |i|
# NOTE: not predicted instances are ignored here
- if @predicted_values[i]!=nil and @predicted_values[i]==class_index
+ if @predicted_values[i]!=nil and (class_index==nil || @predicted_values[i]==class_index)
c << @confidence_values[i]
p << @predicted_values[i]
a << @actual_values[i]
@@ -529,7 +537,7 @@ module Lib
def predicted_value(instance_index)
case @feature_type
when "classification"
- @predicted_values[instance_index]==nil ? nil : @class_domain[@predicted_values[instance_index]]
+ @predicted_values[instance_index]==nil ? nil : @accept_values[@predicted_values[instance_index]]
when "regression"
@predicted_values[instance_index]
end
@@ -542,7 +550,7 @@ module Lib
def actual_value(instance_index)
case @feature_type
when "classification"
- @actual_values[instance_index]==nil ? nil : @class_domain[@actual_values[instance_index]]
+ @actual_values[instance_index]==nil ? nil : @accept_values[@actual_values[instance_index]]
when "regression"
@actual_values[instance_index]
end
@@ -576,7 +584,7 @@ module Lib
def prediction_feature_value_map(proc)
res = {}
(0..@num_classes-1).each do |i|
- res[@class_domain[i]] = proc.call(i)
+ res[@accept_values[i]] = proc.call(i)
end
return res
end
diff --git a/lib/validation_db.rb b/lib/validation_db.rb
index 0d5db21..e2595c5 100755
--- a/lib/validation_db.rb
+++ b/lib/validation_db.rb
@@ -4,7 +4,7 @@
#end
require "lib/merge.rb"
-module Lib
+module Validation
VAL_PROPS_GENERAL = [ :validation_uri, :validation_type, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature,
:test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :date ]
@@ -48,50 +48,62 @@ module Lib
VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS
-# class Validation < ActiveRecord::Base
-# serialize :classification_statistics
-# serialize :regression_statistics
-#
-# alias_attribute :date, :created_at
-
- class Validation
- include DataMapper::Resource
+ class Validation < Ohm::Model
- property :id, Serial
- property :validation_type, String, :length => 512
- property :model_uri, String, :length => 512
- property :algorithm_uri, String, :length => 512
- property :training_dataset_uri, String, :length => 512
- property :test_target_dataset_uri, String, :length => 512
- property :test_dataset_uri, String, :length => 512
- property :prediction_dataset_uri, String, :length => 512
- property :prediction_feature, String, :length => 512
- property :created_at, DateTime
- property :num_instances, Integer
- property :num_without_class, Integer
- property :num_unpredicted, Integer
- property :crossvalidation_id, Integer
- property :crossvalidation_fold, Integer
- property :real_runtime, Float
- property :percent_without_class, Float
- property :percent_unpredicted, Float
- property :classification_statistics, Object
- property :regression_statistics, Object
- property :finished, Boolean, :default => false
+ attribute :validation_type
+ attribute :model_uri
+ attribute :algorithm_uri
+ attribute :training_dataset_uri
+ attribute :test_target_dataset_uri
+ attribute :test_dataset_uri
+ attribute :prediction_dataset_uri
+ attribute :prediction_feature
+ attribute :date
+ attribute :num_instances
+ attribute :num_without_class
+ attribute :num_unpredicted
+ attribute :crossvalidation_id
+ attribute :crossvalidation_fold
+ attribute :real_runtime
+ attribute :percent_without_class
+ attribute :percent_unpredicted
+ attribute :classification_statistics_yaml
+ attribute :regression_statistics_yaml
+ attribute :finished
+
+ index :model_uri
+ index :validation_type
+ index :crossvalidation_id
attr_accessor :subjectid
- after :save, :check_policy
- private
- def check_policy
- OpenTox::Authorization.check_policy(validation_uri, subjectid)
+ def self.create(params={})
+ params[:date] = Time.new
+ super params
end
- public
- def date
- created_at
+ def classification_statistics
+ YAML.load(self.classification_statistics_yaml) if self.classification_statistics_yaml
+ end
+
+ def classification_statistics=(cs)
+ self.classification_statistics_yaml = cs.to_yaml
+ end
+
+ def regression_statistics
+ YAML.load(self.regression_statistics_yaml) if self.regression_statistics_yaml
+ end
+
+ def regression_statistics=(rs)
+ self.regression_statistics_yaml = rs.to_yaml
+ end
+
+ def save
+ super
+ OpenTox::Authorization.check_policy(validation_uri, subjectid)
end
+ public
def validation_uri
raise "no id" if self.id==nil
$url_provider.url_for("/"+self.id.to_s, :full)
@@ -115,33 +127,36 @@ module Lib
end
-# class Crossvalidation < ActiveRecord::Base
-# alias_attribute :date, :created_at
- class Crossvalidation
- include DataMapper::Resource
+ class Crossvalidation < Ohm::Model
- property :id, Serial
- property :algorithm_uri, String, :length => 512
- property :dataset_uri, String, :length => 512
- property :created_at, DateTime
- property :num_folds, Integer, :default => 10
- property :random_seed, Integer, :default => 1
- property :finished, Boolean, :default => false
- property :stratified, Boolean, :default => false
+ attribute :algorithm_uri
+ attribute :dataset_uri
+ attribute :date
+ attribute :num_folds
+ attribute :random_seed
+ attribute :finished
+ attribute :stratified
attr_accessor :subjectid
-
- after :save, :check_policy
- private
- def check_policy
- OpenTox::Authorization.check_policy(crossvalidation_uri, subjectid)
+
+ index :algorithm_uri
+ index :dataset_uri
+ index :num_folds
+ index :random_seed
+ index :stratified
+ index :finished
+
+ def self.create(params={})
+ params[:date] = Time.new
+ super params
end
- public
- def date
- created_at
+ def save
+ super
+ OpenTox::Authorization.check_policy(crossvalidation_uri, subjectid)
end
+ public
def crossvalidation_uri
raise "no id" if self.id==nil
$url_provider.url_for("/crossvalidation/"+self.id.to_s, :full) if self.id
@@ -152,7 +167,7 @@ module Lib
# further conditions can be specified in __conditions__
def self.find_all_uniq(conditions={}, subjectid=nil )
#cvs = Lib::Crossvalidation.find(:all, :conditions => conditions)
- cvs = Lib::Crossvalidation.all(:conditions => conditions)
+ cvs = Crossvalidation.find( conditions )
uniq = []
cvs.each do |cv|
next if AA_SERVER and !OpenTox::Authorization.authorized?(cv.crossvalidation_uri,"GET",subjectid)
@@ -171,8 +186,3 @@ module Lib
end
end
-
-Lib::Validation.auto_upgrade!
-Lib::Validation.raise_on_save_failure = true
-Lib::Crossvalidation.auto_upgrade!
-Lib::Crossvalidation.raise_on_save_failure = true