summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2011-01-13 15:18:45 +0100
committermguetlein <martin.guetlein@gmail.com>2011-01-13 15:18:45 +0100
commiteeb0b6df2dbdae29ccf3f4ccd923002e0ed65506 (patch)
treec0b0956d5b107e613c3236ede71462fa7f18cf05 /lib
parent5786ebd5095451e928d9811bd6bbd98af72de99d (diff)
adjust to new wrapper, replace activerecord with datamapper (remove activerecord code), is_classification to feature_type
Diffstat (limited to 'lib')
-rwxr-xr-x[-rw-r--r--]lib/active_record_setup.rb19
-rwxr-xr-x[-rw-r--r--]lib/ot_predictions.rb136
-rwxr-xr-x[-rw-r--r--]lib/predictions.rb51
-rwxr-xr-x[-rw-r--r--]lib/test_util.rb4
-rwxr-xr-x[-rw-r--r--]lib/validation_db.rb65
5 files changed, 194 insertions, 81 deletions
diff --git a/lib/active_record_setup.rb b/lib/active_record_setup.rb
index 3682c7a..404bfec 100644..100755
--- a/lib/active_record_setup.rb
+++ b/lib/active_record_setup.rb
@@ -1,17 +1,17 @@
-gem "activerecord", "= 2.3.8"
-gem "ar-extensions", "= 0.9.2"
-['rubygems', 'logger', 'active_record', 'ar-extensions', 'opentox-ruby-api-wrapper' ].each do |g|
+#gem "activerecord", "= 2.3.8"
+#gem "ar-extensions", "= 0.9.2"
+['rubygems', 'logger', 'active_record', 'opentox-ruby' ].each do |g| #'ar-extensions',
require g
end
unless ActiveRecord::Base.connected?
ActiveRecord::Base.establish_connection(
- :adapter => @@config[:database][:adapter],
- :host => @@config[:database][:host],
- :database => @@config[:database][:database],
- :username => @@config[:database][:username],
- :password => @@config[:database][:password]
+ :adapter => CONFIG[:database][:adapter],
+ :host => CONFIG[:database][:host],
+ :database => CONFIG[:database][:database],
+ :username => CONFIG[:database][:username],
+ :password => CONFIG[:database][:password]
)
ActiveRecord::Base.logger = Logger.new("/dev/null")
end
@@ -19,6 +19,9 @@ end
class ActiveRecord::Base
def self.find_like(filter_params)
+
+ raise "find like removed"
+
#puts "params before "+filter_params.inspect
filter_params.keys.each do |k|
key = k.to_s
diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb
index 0175a0c..52683a6 100644..100755
--- a/lib/ot_predictions.rb
+++ b/lib/ot_predictions.rb
@@ -15,7 +15,7 @@ module Lib
return @compounds[instance_index]
end
- def initialize(is_classification, test_dataset_uri, test_target_dataset_uri,
+ def initialize(feature_type, test_dataset_uri, test_target_dataset_uri,
prediction_feature, prediction_dataset_uri, predicted_variable, task=nil)
LOGGER.debug("loading prediciton via test-dataset:'"+test_dataset_uri.to_s+
@@ -53,38 +53,47 @@ module Lib
raise "prediction_feature not found in test_target_dataset\n"+
"prediction_feature: '"+prediction_feature.to_s+"'\n"+
"test_target_dataset: '"+test_target_dataset_uri.to_s+"'\n"+
- "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.index(prediction_feature)==nil
+ "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.keys.index(prediction_feature)==nil
end
+ test_dataset.load_all
@compounds = test_dataset.compounds
LOGGER.debug "test dataset size: "+@compounds.size.to_s
raise "test dataset is empty" unless @compounds.size>0
- class_values = is_classification ? OpenTox::Feature.domain(prediction_feature) : nil
+ class_values = feature_type=="classification" ? OpenTox::Feature.new(prediction_feature).domain : nil
actual_values = []
@compounds.each do |c|
- value = test_target_dataset.get_value(c, prediction_feature)
-
- if is_classification
- value = value.to_s unless value==nil
- raise "illegal class_value of actual value "+value.to_s+" class: "+
- value.class.to_s unless value==nil or class_values.index(value)!=nil
- actual_values.push class_values.index(value)
- else
- begin
- value = value.to_f unless value==nil or value.is_a?(Numeric)
- rescue
- LOGGER.warn "no numeric value for regression: '"+value.to_s+"'"
- value = nil
- end
- actual_values.push value
+ case feature_type
+ when "classification"
+ actual_values << classification_value(test_target_dataset, c, prediction_feature, class_values)
+ when "regression"
+ actual_values << regression_value(test_target_dataset, c, prediction_feature)
end
end
task.progress(40) if task # loaded actual values
prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri
raise "prediction dataset not found: '"+prediction_dataset_uri.to_s+"'" unless prediction_dataset
- raise "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+prediction_dataset.features.inspect if prediction_dataset.features.index(predicted_variable)==nil
+
+ # TODO: remove LAZAR_PREDICTION_DATASET_HACK
+ no_prediction_feature = prediction_dataset.features.keys.index(predicted_variable)==nil
+ if no_prediction_feature
+ one_entry_per_compound = prediction_dataset.data_entries.keys.size == @compounds.size
+ @compounds.each do |c|
+ if prediction_dataset.data_entries[c].size != 1
+ one_entry_per_compound = false
+ break
+ end
+ end
+ msg = "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+
+ prediction_dataset.features.keys.inspect
+ if one_entry_per_compound
+ LOGGER.warn msg
+ else
+ raise msg
+ end
+ end
raise "more predicted than test compounds test:"+@compounds.size.to_s+" < prediction:"+
prediction_dataset.compounds.size.to_s if @compounds.size < prediction_dataset.compounds.size
@@ -102,38 +111,79 @@ module Lib
predicted_values << nil
confidence_values << nil
else
- if is_classification
- value = prediction_dataset.get_predicted_class(c, predicted_variable)
- value = value.to_s unless value==nil
- raise "illegal class_value of predicted value "+value.to_s+" class: "+value.class.to_s unless value==nil or class_values.index(value)!=nil
- predicted_values << class_values.index(value)
- else
- value = prediction_dataset.get_predicted_regression(c, predicted_variable)
- begin
- value = value.to_f unless value==nil or value.is_a?(Numeric)
- rescue
- LOGGER.warn "no numeric value for regression: '"+value.to_s+"'"
- value = nil
- end
- predicted_values << value
+ case feature_type
+ when "classification"
+ # TODO: remove LAZAR_PREDICTION_DATASET_HACK
+ predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, class_values)
+ when "regression"
+ predicted_values << regression_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable)
+ end
+ # TODO confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable)
+ conf = 1
+ begin
+ feature = prediction_dataset.data_entries[c].keys[0]
+ feature_data = prediction_dataset.features[feature]
+ conf = feature_data[OT.confidence] if feature_data[OT.confidence]!=nil
+ rescue
+ LOGGER.warn "could not get confidence"
end
- confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable)
+ confidence_values << conf
end
end
task.progress(80) if task # loaded predicted values and confidence
- super(predicted_values, actual_values, confidence_values, is_classification, class_values)
+ super(predicted_values, actual_values, confidence_values, feature_type, class_values)
raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size
task.progress(100) if task # done with the mathmatics
end
+ private
+ def regression_value(dataset, compound, feature)
+ v = value(dataset, compound, feature)
+ begin
+ v = v.to_f unless v==nil or v.is_a?(Numeric)
+ v
+ rescue
+ LOGGER.warn "no numeric value for regression: '"+v.to_s+"'"
+ nil
+ end
+ end
+
+ def classification_value(dataset, compound, feature, class_values)
+ v = value(dataset, compound, feature)
+ i = class_values.index(v)
+ raise "illegal class_value of predicted value "+v.to_s+" class: "+v.class.to_s unless v==nil or i!=nil
+ i
+ end
+
+ def value(dataset, compound, feature)
+
+ if feature==nil
+ v = dataset.data_entries[compound].values[0]
+ else
+ v = dataset.data_entries[compound][feature]
+ end
+ raise "no array" unless v.is_a?(Array)
+ if v.size>1
+ raise "multiple values"
+ elsif v.size==1
+ v = v[0]
+ else
+ v = nil
+ end
+ raise "array" if v.is_a?(Array)
+ v = nil if v.to_s.size==0
+ v
+ end
+ public
def compute_stats
res = {}
- if @is_classification
+ case @feature_type
+ when "classification"
(Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)}
- else
+ when "regression"
(Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) }
end
return res
@@ -152,16 +202,18 @@ module Lib
#PENDING!
begin
- a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+
- URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic
+ #a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+
+ # URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic
+ a << p.identifier(i)+"/image"
rescue => ex
+ raise ex
#a.push("Could not add pic: "+ex.message)
- a.push(p.identifier(i))
+ #a.push(p.identifier(i))
end
a << (format ? p.actual_value(i).to_nice_s : p.actual_value(i))
a << (format ? p.predicted_value(i).to_nice_s : p.predicted_value(i))
- if p.classification?
+ if p.feature_type=="classification"
if (p.predicted_value(i)!=nil and p.actual_value(i)!=nil)
a << (p.classification_miss?(i) ? 1 : 0)
else
@@ -180,7 +232,7 @@ module Lib
header << "compound" if add_pic
header << "actual value"
header << "predicted value"
- header << "missclassified" if predictions[0].classification?
+ header << "missclassified" if predictions[0].feature_type=="classification"
header << "confidence value" if predictions[0].confidence_values_available?
header << "compound-uri"
res.insert(0, header)
diff --git a/lib/predictions.rb b/lib/predictions.rb
index 6e50e94..e73dda6 100644..100755
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -22,13 +22,13 @@ module Lib
def initialize( predicted_values,
actual_values,
confidence_values,
- is_classification,
+ feature_type,
class_domain=nil )
@predicted_values = predicted_values
@actual_values = actual_values
@confidence_values = confidence_values
- @is_classification = is_classification
+ @feature_type = feature_type
@class_domain = class_domain
@num_classes = 1
@@ -36,6 +36,8 @@ module Lib
#puts "actual: "+actual_values.inspect
#puts "confidence: "+confidence_values.inspect
+ raise "unknown feature_type: "+@feature_type.to_s unless
+ @feature_type=="classification" || @feature_type=="regression"
raise "no predictions" if @predicted_values.size == 0
num_info = "predicted:"+@predicted_values.size.to_s+
" confidence:"+@confidence_values.size.to_s+" actual:"+@actual_values.size.to_s
@@ -54,15 +56,16 @@ module Lib
# @confidence_values=nil
#end
- if @is_classification
+ case @feature_type
+ when "classification"
raise "class_domain missing while performing classification" unless @class_domain
@num_classes = @class_domain.size
raise "num classes < 2" if @num_classes<2
{ "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+
- "has to be either nil or index of predicted-values" if v!=nil and (v<0 or v>@num_classes)}
+ "has to be either nil or index of predicted-values" if v!=nil and (!v.is_a?(Numeric) or v<0 or v>@num_classes)}
end
- else
+ when "regresssion"
raise "class_domain != nil while performing regression" if @class_domain
{ "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+
@@ -83,7 +86,8 @@ module Lib
@num_predicted = 0
@num_unpredicted = 0
- if @is_classification
+ case @feature_type
+ when "classification"
@confusion_matrix = []
@class_domain.each do |v|
@confusion_matrix.push( Array.new( @num_classes, 0 ) )
@@ -91,7 +95,7 @@ module Lib
@num_correct = 0
@num_incorrect = 0
- else
+ when "regression"
@sum_error = 0
@sum_abs_error = 0
@sum_squared_error = 0
@@ -122,14 +126,15 @@ module Lib
else
@num_predicted += 1
- if @is_classification
+ case @feature_type
+ when "classification"
@confusion_matrix[actual_value][predicted_value] += 1
if (predicted_value == actual_value)
@num_correct += 1
else
@num_incorrect += 1
end
- else
+ when "regression"
delta = predicted_value - actual_value
@sum_error += delta
@sum_abs_error += delta.abs
@@ -156,13 +161,13 @@ module Lib
end
def percent_correct
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return 0 if @num_with_actual_value==0
return 100 * @num_correct / @num_with_actual_value.to_f
end
def percent_incorrect
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return 0 if @num_with_actual_value==0
return 100 * @num_incorrect / @num_with_actual_value.to_f
end
@@ -190,17 +195,17 @@ module Lib
end
def num_correct
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return @num_correct
end
def num_incorrect
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return @num_incorrect
end
def num_unclassified
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return @num_unpredicted
end
@@ -209,7 +214,7 @@ module Lib
# and values: <int-value>
def confusion_matrix
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
res = {}
(0..@num_classes-1).each do |actual|
(0..@num_classes-1).each do |predicted|
@@ -505,9 +510,10 @@ module Lib
end
def predicted_value(instance_index)
- if @is_classification
+ case @feature_type
+ when "classification"
@predicted_values[instance_index]==nil ? nil : @class_domain[@predicted_values[instance_index]]
- else
+ when "regression"
@predicted_values[instance_index]
end
end
@@ -517,9 +523,10 @@ module Lib
end
def actual_value(instance_index)
- if @is_classification
+ case @feature_type
+ when "classification"
@actual_values[instance_index]==nil ? nil : @class_domain[@actual_values[instance_index]]
- else
+ when "regression"
@actual_values[instance_index]
end
end
@@ -529,13 +536,13 @@ module Lib
end
def classification_miss?(instance_index)
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return false if predicted_value(instance_index)==nil or actual_value(instance_index)==nil
return predicted_value(instance_index) != actual_value(instance_index)
end
- def classification?
- @is_classification
+ def feature_type
+ @feature_type
end
def confidence_values_available?
diff --git a/lib/test_util.rb b/lib/test_util.rb
index ecab76c..0deee04 100644..100755
--- a/lib/test_util.rb
+++ b/lib/test_util.rb
@@ -10,11 +10,11 @@ module Lib
end
def self.wait_for_task(uri)
- if OpenTox::Utils.task_uri?(uri)
+ if uri.task_uri?
task = OpenTox::Task.find(uri)
task.wait_for_completion
raise "task failed: "+uri.to_s+", error is:\n"+task.description if task.error?
- uri = task.resultURI
+ uri = task.result_uri
end
return uri
end
diff --git a/lib/validation_db.rb b/lib/validation_db.rb
index cb3ece7..83b7e2f 100644..100755
--- a/lib/validation_db.rb
+++ b/lib/validation_db.rb
@@ -49,11 +49,40 @@ module Lib
VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS
- class Validation < ActiveRecord::Base
- serialize :classification_statistics
- serialize :regression_statistics
+# class Validation < ActiveRecord::Base
+# serialize :classification_statistics
+# serialize :regression_statistics
+#
+# alias_attribute :date, :created_at
- alias_attribute :date, :created_at
+ class Validation
+ include DataMapper::Resource
+
+ property :id, Serial
+ property :validation_type, String, :length => 255
+ property :model_uri, String, :length => 255
+ property :algorithm_uri, String, :length => 255
+ property :training_dataset_uri, String, :length => 255
+ property :test_target_dataset_uri, String, :length => 255
+ property :test_dataset_uri, String, :length => 255
+ property :prediction_dataset_uri, String, :length => 255
+ property :prediction_feature, String, :length => 255
+ property :created_at, DateTime
+ property :num_instances, Integer
+ property :num_without_class, Integer
+ property :num_unpredicted, Integer
+ property :crossvalidation_id, Integer
+ property :crossvalidation_fold, Integer
+ property :real_runtime, Float
+ property :percent_without_class, Float
+ property :percent_unpredicted, Float
+ property :classification_statistics, Object
+ property :regression_statistics, Object
+ property :finished, Boolean, :default => false
+
+ def date
+ created_at
+ end
def validation_uri
$sinatra.url_for("/"+self.id.to_s, :full)
@@ -77,8 +106,23 @@ module Lib
end
- class Crossvalidation < ActiveRecord::Base
- alias_attribute :date, :created_at
+# class Crossvalidation < ActiveRecord::Base
+# alias_attribute :date, :created_at
+ class Crossvalidation
+ include DataMapper::Resource
+
+ property :id, Serial
+ property :algorithm_uri, String, :length => 255
+ property :dataset_uri, String, :length => 255
+ property :created_at, DateTime
+ property :num_folds, Integer, :default => 10
+ property :random_seed, Integer, :default => 1
+ property :finished, Boolean, :default => false
+ property :stratified, Boolean, :default => false
+
+ def date
+ created_at
+ end
def crossvalidation_uri
$sinatra.url_for("/crossvalidation/"+self.id.to_s, :full) if self.id
@@ -88,7 +132,8 @@ module Lib
# in terms of dataset_uri,num_folds,stratified,random_seed
# further conditions can be specified in __conditions__
def self.find_all_uniq(conditions={})
- cvs = Lib::Crossvalidation.find(:all, :conditions => conditions)
+ #cvs = Lib::Crossvalidation.find(:all, :conditions => conditions)
+ cvs = Lib::Crossvalidation.all(:conditions => conditions)
uniq = []
cvs.each do |cv|
match = false
@@ -105,3 +150,9 @@ module Lib
end
end
end
+
+
+Lib::Validation.auto_upgrade!
+Lib::Validation.raise_on_save_failure = true
+Lib::Crossvalidation.auto_upgrade!
+Lib::Crossvalidation.raise_on_save_failure = true \ No newline at end of file