diff options
author | mguetlein <martin.guetlein@gmail.com> | 2011-01-13 15:18:45 +0100 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2011-01-13 15:18:45 +0100 |
commit | eeb0b6df2dbdae29ccf3f4ccd923002e0ed65506 (patch) | |
tree | c0b0956d5b107e613c3236ede71462fa7f18cf05 /lib | |
parent | 5786ebd5095451e928d9811bd6bbd98af72de99d (diff) |
adjust to new wrapper, replace activerecord with datamapper (remove activerecord code), is_classification to feature_type
Diffstat (limited to 'lib')
-rwxr-xr-x[-rw-r--r--] | lib/active_record_setup.rb | 19 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/ot_predictions.rb | 136 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/predictions.rb | 51 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/test_util.rb | 4 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/validation_db.rb | 65 |
5 files changed, 194 insertions, 81 deletions
diff --git a/lib/active_record_setup.rb b/lib/active_record_setup.rb index 3682c7a..404bfec 100644..100755 --- a/lib/active_record_setup.rb +++ b/lib/active_record_setup.rb @@ -1,17 +1,17 @@ -gem "activerecord", "= 2.3.8" -gem "ar-extensions", "= 0.9.2" -['rubygems', 'logger', 'active_record', 'ar-extensions', 'opentox-ruby-api-wrapper' ].each do |g| +#gem "activerecord", "= 2.3.8" +#gem "ar-extensions", "= 0.9.2" +['rubygems', 'logger', 'active_record', 'opentox-ruby' ].each do |g| #'ar-extensions', require g end unless ActiveRecord::Base.connected? ActiveRecord::Base.establish_connection( - :adapter => @@config[:database][:adapter], - :host => @@config[:database][:host], - :database => @@config[:database][:database], - :username => @@config[:database][:username], - :password => @@config[:database][:password] + :adapter => CONFIG[:database][:adapter], + :host => CONFIG[:database][:host], + :database => CONFIG[:database][:database], + :username => CONFIG[:database][:username], + :password => CONFIG[:database][:password] ) ActiveRecord::Base.logger = Logger.new("/dev/null") end @@ -19,6 +19,9 @@ end class ActiveRecord::Base def self.find_like(filter_params) + + raise "find like removed" + #puts "params before "+filter_params.inspect filter_params.keys.each do |k| key = k.to_s diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index 0175a0c..52683a6 100644..100755 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -15,7 +15,7 @@ module Lib return @compounds[instance_index] end - def initialize(is_classification, test_dataset_uri, test_target_dataset_uri, + def initialize(feature_type, test_dataset_uri, test_target_dataset_uri, prediction_feature, prediction_dataset_uri, predicted_variable, task=nil) LOGGER.debug("loading prediciton via test-dataset:'"+test_dataset_uri.to_s+ @@ -53,38 +53,47 @@ module Lib raise "prediction_feature not found in test_target_dataset\n"+ "prediction_feature: '"+prediction_feature.to_s+"'\n"+ "test_target_dataset: '"+test_target_dataset_uri.to_s+"'\n"+ - "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.index(prediction_feature)==nil + "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.keys.index(prediction_feature)==nil end + test_dataset.load_all @compounds = test_dataset.compounds LOGGER.debug "test dataset size: "+@compounds.size.to_s raise "test dataset is empty" unless @compounds.size>0 - class_values = is_classification ? OpenTox::Feature.domain(prediction_feature) : nil + class_values = feature_type=="classification" ? OpenTox::Feature.new(prediction_feature).domain : nil actual_values = [] @compounds.each do |c| - value = test_target_dataset.get_value(c, prediction_feature) - - if is_classification - value = value.to_s unless value==nil - raise "illegal class_value of actual value "+value.to_s+" class: "+ - value.class.to_s unless value==nil or class_values.index(value)!=nil - actual_values.push class_values.index(value) - else - begin - value = value.to_f unless value==nil or value.is_a?(Numeric) - rescue - LOGGER.warn "no numeric value for regression: '"+value.to_s+"'" - value = nil - end - actual_values.push value + case feature_type + when "classification" + actual_values << classification_value(test_target_dataset, c, prediction_feature, class_values) + when "regression" + actual_values << regression_value(test_target_dataset, c, prediction_feature) end end task.progress(40) if task # loaded actual values prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri raise "prediction dataset not found: '"+prediction_dataset_uri.to_s+"'" unless prediction_dataset - raise "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+prediction_dataset.features.inspect if prediction_dataset.features.index(predicted_variable)==nil + + # TODO: remove LAZAR_PREDICTION_DATASET_HACK + no_prediction_feature = prediction_dataset.features.keys.index(predicted_variable)==nil + if no_prediction_feature + one_entry_per_compound = prediction_dataset.data_entries.keys.size == @compounds.size + @compounds.each do |c| + if prediction_dataset.data_entries[c].size != 1 + one_entry_per_compound = false + break + end + end + msg = "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+ + prediction_dataset.features.keys.inspect + if one_entry_per_compound + LOGGER.warn msg + else + raise msg + end + end raise "more predicted than test compounds test:"+@compounds.size.to_s+" < prediction:"+ prediction_dataset.compounds.size.to_s if @compounds.size < prediction_dataset.compounds.size @@ -102,38 +111,79 @@ module Lib predicted_values << nil confidence_values << nil else - if is_classification - value = prediction_dataset.get_predicted_class(c, predicted_variable) - value = value.to_s unless value==nil - raise "illegal class_value of predicted value "+value.to_s+" class: "+value.class.to_s unless value==nil or class_values.index(value)!=nil - predicted_values << class_values.index(value) - else - value = prediction_dataset.get_predicted_regression(c, predicted_variable) - begin - value = value.to_f unless value==nil or value.is_a?(Numeric) - rescue - LOGGER.warn "no numeric value for regression: '"+value.to_s+"'" - value = nil - end - predicted_values << value + case feature_type + when "classification" + # TODO: remove LAZAR_PREDICTION_DATASET_HACK + predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, class_values) + when "regression" + predicted_values << regression_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable) + end + # TODO confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable) + conf = 1 + begin + feature = prediction_dataset.data_entries[c].keys[0] + feature_data = prediction_dataset.features[feature] + conf = feature_data[OT.confidence] if feature_data[OT.confidence]!=nil + rescue + LOGGER.warn "could not get confidence" end - confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable) + confidence_values << conf end end task.progress(80) if task # loaded predicted values and confidence - super(predicted_values, actual_values, confidence_values, is_classification, class_values) + super(predicted_values, actual_values, confidence_values, feature_type, class_values) raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size task.progress(100) if task # done with the mathmatics end + private + def regression_value(dataset, compound, feature) + v = value(dataset, compound, feature) + begin + v = v.to_f unless v==nil or v.is_a?(Numeric) + v + rescue + LOGGER.warn "no numeric value for regression: '"+v.to_s+"'" + nil + end + end + + def classification_value(dataset, compound, feature, class_values) + v = value(dataset, compound, feature) + i = class_values.index(v) + raise "illegal class_value of predicted value "+v.to_s+" class: "+v.class.to_s unless v==nil or i!=nil + i + end + + def value(dataset, compound, feature) + + if feature==nil + v = dataset.data_entries[compound].values[0] + else + v = dataset.data_entries[compound][feature] + end + raise "no array" unless v.is_a?(Array) + if v.size>1 + raise "multiple values" + elsif v.size==1 + v = v[0] + else + v = nil + end + raise "array" if v.is_a?(Array) + v = nil if v.to_s.size==0 + v + end + public def compute_stats res = {} - if @is_classification + case @feature_type + when "classification" (Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)} - else + when "regression" (Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) } end return res @@ -152,16 +202,18 @@ module Lib #PENDING! begin - a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+ - URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic + #a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+ + # URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic + a << p.identifier(i)+"/image" rescue => ex + raise ex #a.push("Could not add pic: "+ex.message) - a.push(p.identifier(i)) + #a.push(p.identifier(i)) end a << (format ? p.actual_value(i).to_nice_s : p.actual_value(i)) a << (format ? p.predicted_value(i).to_nice_s : p.predicted_value(i)) - if p.classification? + if p.feature_type=="classification" if (p.predicted_value(i)!=nil and p.actual_value(i)!=nil) a << (p.classification_miss?(i) ? 1 : 0) else @@ -180,7 +232,7 @@ module Lib header << "compound" if add_pic header << "actual value" header << "predicted value" - header << "missclassified" if predictions[0].classification? + header << "missclassified" if predictions[0].feature_type=="classification" header << "confidence value" if predictions[0].confidence_values_available? header << "compound-uri" res.insert(0, header) diff --git a/lib/predictions.rb b/lib/predictions.rb index 6e50e94..e73dda6 100644..100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -22,13 +22,13 @@ module Lib def initialize( predicted_values, actual_values, confidence_values, - is_classification, + feature_type, class_domain=nil ) @predicted_values = predicted_values @actual_values = actual_values @confidence_values = confidence_values - @is_classification = is_classification + @feature_type = feature_type @class_domain = class_domain @num_classes = 1 @@ -36,6 +36,8 @@ module Lib #puts "actual: "+actual_values.inspect #puts "confidence: "+confidence_values.inspect + raise "unknown feature_type: "+@feature_type.to_s unless + @feature_type=="classification" || @feature_type=="regression" raise "no predictions" if @predicted_values.size == 0 num_info = "predicted:"+@predicted_values.size.to_s+ " confidence:"+@confidence_values.size.to_s+" actual:"+@actual_values.size.to_s @@ -54,15 +56,16 @@ module Lib # @confidence_values=nil #end - if @is_classification + case @feature_type + when "classification" raise "class_domain missing while performing classification" unless @class_domain @num_classes = @class_domain.size raise "num classes < 2" if @num_classes<2 { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+ - "has to be either nil or index of predicted-values" if v!=nil and (v<0 or v>@num_classes)} + "has to be either nil or index of predicted-values" if v!=nil and (!v.is_a?(Numeric) or v<0 or v>@num_classes)} end - else + when "regresssion" raise "class_domain != nil while performing regression" if @class_domain { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+ @@ -83,7 +86,8 @@ module Lib @num_predicted = 0 @num_unpredicted = 0 - if @is_classification + case @feature_type + when "classification" @confusion_matrix = [] @class_domain.each do |v| @confusion_matrix.push( Array.new( @num_classes, 0 ) ) @@ -91,7 +95,7 @@ module Lib @num_correct = 0 @num_incorrect = 0 - else + when "regression" @sum_error = 0 @sum_abs_error = 0 @sum_squared_error = 0 @@ -122,14 +126,15 @@ module Lib else @num_predicted += 1 - if @is_classification + case @feature_type + when "classification" @confusion_matrix[actual_value][predicted_value] += 1 if (predicted_value == actual_value) @num_correct += 1 else @num_incorrect += 1 end - else + when "regression" delta = predicted_value - actual_value @sum_error += delta @sum_abs_error += delta.abs @@ -156,13 +161,13 @@ module Lib end def percent_correct - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return 0 if @num_with_actual_value==0 return 100 * @num_correct / @num_with_actual_value.to_f end def percent_incorrect - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return 0 if @num_with_actual_value==0 return 100 * @num_incorrect / @num_with_actual_value.to_f end @@ -190,17 +195,17 @@ module Lib end def num_correct - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return @num_correct end def num_incorrect - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return @num_incorrect end def num_unclassified - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return @num_unpredicted end @@ -209,7 +214,7 @@ module Lib # and values: <int-value> def confusion_matrix - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" res = {} (0..@num_classes-1).each do |actual| (0..@num_classes-1).each do |predicted| @@ -505,9 +510,10 @@ module Lib end def predicted_value(instance_index) - if @is_classification + case @feature_type + when "classification" @predicted_values[instance_index]==nil ? nil : @class_domain[@predicted_values[instance_index]] - else + when "regression" @predicted_values[instance_index] end end @@ -517,9 +523,10 @@ module Lib end def actual_value(instance_index) - if @is_classification + case @feature_type + when "classification" @actual_values[instance_index]==nil ? nil : @class_domain[@actual_values[instance_index]] - else + when "regression" @actual_values[instance_index] end end @@ -529,13 +536,13 @@ module Lib end def classification_miss?(instance_index) - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return false if predicted_value(instance_index)==nil or actual_value(instance_index)==nil return predicted_value(instance_index) != actual_value(instance_index) end - def classification? - @is_classification + def feature_type + @feature_type end def confidence_values_available? diff --git a/lib/test_util.rb b/lib/test_util.rb index ecab76c..0deee04 100644..100755 --- a/lib/test_util.rb +++ b/lib/test_util.rb @@ -10,11 +10,11 @@ module Lib end def self.wait_for_task(uri) - if OpenTox::Utils.task_uri?(uri) + if uri.task_uri? task = OpenTox::Task.find(uri) task.wait_for_completion raise "task failed: "+uri.to_s+", error is:\n"+task.description if task.error? - uri = task.resultURI + uri = task.result_uri end return uri end diff --git a/lib/validation_db.rb b/lib/validation_db.rb index cb3ece7..83b7e2f 100644..100755 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -49,11 +49,40 @@ module Lib VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS - class Validation < ActiveRecord::Base - serialize :classification_statistics - serialize :regression_statistics +# class Validation < ActiveRecord::Base +# serialize :classification_statistics +# serialize :regression_statistics +# +# alias_attribute :date, :created_at - alias_attribute :date, :created_at + class Validation + include DataMapper::Resource + + property :id, Serial + property :validation_type, String, :length => 255 + property :model_uri, String, :length => 255 + property :algorithm_uri, String, :length => 255 + property :training_dataset_uri, String, :length => 255 + property :test_target_dataset_uri, String, :length => 255 + property :test_dataset_uri, String, :length => 255 + property :prediction_dataset_uri, String, :length => 255 + property :prediction_feature, String, :length => 255 + property :created_at, DateTime + property :num_instances, Integer + property :num_without_class, Integer + property :num_unpredicted, Integer + property :crossvalidation_id, Integer + property :crossvalidation_fold, Integer + property :real_runtime, Float + property :percent_without_class, Float + property :percent_unpredicted, Float + property :classification_statistics, Object + property :regression_statistics, Object + property :finished, Boolean, :default => false + + def date + created_at + end def validation_uri $sinatra.url_for("/"+self.id.to_s, :full) @@ -77,8 +106,23 @@ module Lib end - class Crossvalidation < ActiveRecord::Base - alias_attribute :date, :created_at +# class Crossvalidation < ActiveRecord::Base +# alias_attribute :date, :created_at + class Crossvalidation + include DataMapper::Resource + + property :id, Serial + property :algorithm_uri, String, :length => 255 + property :dataset_uri, String, :length => 255 + property :created_at, DateTime + property :num_folds, Integer, :default => 10 + property :random_seed, Integer, :default => 1 + property :finished, Boolean, :default => false + property :stratified, Boolean, :default => false + + def date + created_at + end def crossvalidation_uri $sinatra.url_for("/crossvalidation/"+self.id.to_s, :full) if self.id @@ -88,7 +132,8 @@ module Lib # in terms of dataset_uri,num_folds,stratified,random_seed # further conditions can be specified in __conditions__ def self.find_all_uniq(conditions={}) - cvs = Lib::Crossvalidation.find(:all, :conditions => conditions) + #cvs = Lib::Crossvalidation.find(:all, :conditions => conditions) + cvs = Lib::Crossvalidation.all(:conditions => conditions) uniq = [] cvs.each do |cv| match = false @@ -105,3 +150,9 @@ module Lib end end end + + +Lib::Validation.auto_upgrade! +Lib::Validation.raise_on_save_failure = true +Lib::Crossvalidation.auto_upgrade! +Lib::Crossvalidation.raise_on_save_failure = true
\ No newline at end of file |