diff options
author | Christoph Helma <helma@in-silico.ch> | 2011-03-03 13:00:47 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2011-03-03 13:00:47 +0100 |
commit | dcd0a5a659c303c50a59d1271947851245db10e7 (patch) | |
tree | eaae695edf72c3a99cde58b9caaa7825d6d4f31a /lib | |
parent | 8b46f5a4f389d7cd54f6e8b38025d275f9d3ed1b (diff) | |
parent | e57856a3c2cd10df207e722301c24a022e9fd802 (diff) |
Merge remote branch 'mguetlein/test' into development
Conflicts:
application.rb
example.rb
lib/validation_db.rb
nightly/nightly.rb
report/environment.rb
test/test_examples.rb
test/test_examples_util.rb
Diffstat (limited to 'lib')
-rwxr-xr-x | lib/active_record_setup.rb | 50 | ||||
-rw-r--r-- | lib/data_mapper_util.rb | 37 | ||||
-rw-r--r-- | lib/format_util.rb | 68 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/ot_predictions.rb | 202 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/predictions.rb | 143 | ||||
-rw-r--r-- | lib/rdf_provider.rb | 188 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/test_util.rb | 7 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/validation_db.rb | 154 |
8 files changed, 535 insertions, 314 deletions
diff --git a/lib/active_record_setup.rb b/lib/active_record_setup.rb new file mode 100755 index 0000000..b43e692 --- /dev/null +++ b/lib/active_record_setup.rb @@ -0,0 +1,50 @@ + +#gem "activerecord", "= 2.3.8" +#gem "ar-extensions", "= 0.9.2" +['rubygems', 'logger', 'active_record', 'opentox-ruby' ].each do |g| #'ar-extensions', + require g +end + +unless ActiveRecord::Base.connected? + ActiveRecord::Base.establish_connection( + :adapter => CONFIG[:database][:adapter], + :host => CONFIG[:database][:host], + :database => CONFIG[:database][:database], + :username => CONFIG[:database][:username], + :password => CONFIG[:database][:password] + ) + ActiveRecord::Base.logger = Logger.new("/dev/null") +end + +class ActiveRecord::Base + + def self.find_like(filter_params) + + raise "find like removed" + + #puts "params before "+filter_params.inspect + filter_params.keys.each do |k| + key = k.to_s + unless self.column_names.include?(key) + key = key.from_rdf_format + unless self.column_names.include?(key) + key = key+"_uri" + unless self.column_names.include?(key) + key = key+"s" + unless self.column_names.include?(key) + err = "no attribute found: '"+k.to_s+"'" +# if $sinatra +# $sinatra.halt 400,err +# else + raise err +# end + end + end + end + end + filter_params[key+"_like"] = filter_params.delete(k) + end + #puts "params after "+filter_params.inspect + self.find(:all, :conditions => filter_params) + end +end
\ No newline at end of file diff --git a/lib/data_mapper_util.rb b/lib/data_mapper_util.rb new file mode 100644 index 0000000..23f52f5 --- /dev/null +++ b/lib/data_mapper_util.rb @@ -0,0 +1,37 @@ + +#DataObjects::Mysql.logger = DataObjects::Logger.new(STDOUT, 0) + +module Lib + module DataMapperUtil + + def self.check_params(model, params) + prop_names = model.properties.collect{|p| p.name.to_s if p.is_a?DataMapper::Property::Object} + params.keys.each do |k| + key = k.to_s + if (key == "subjectid") + params.delete(k) + else + unless prop_names.include?(key) + key = key.from_rdf_format + unless prop_names.include?(key) + key = key+"_uri" + unless prop_names.include?(key) + key = key+"s" + unless prop_names.include?(key) + raise OpenTox::BadRequestError.new "no attribute found: '"+k.to_s+"'" + end + end + end + end + params[key.to_sym] = params.delete(k) + end + end + params + end + + def self.all(model, filter_params) + model.all(check_params(model,filter_params)) + end + + end +end
\ No newline at end of file diff --git a/lib/format_util.rb b/lib/format_util.rb new file mode 100644 index 0000000..3d3a3e6 --- /dev/null +++ b/lib/format_util.rb @@ -0,0 +1,68 @@ + + +class String + + # :prediction_feature -> predictionFeature + # :test_dataset_uri -> testDataset + # :validation_uris -> validation + def to_rdf_format + s = gsub(/_uri(s|)$/,"") + s.gsub(/_./) do |m| + m.gsub!(/^_/,"") + m.upcase + end + end + + def from_rdf_format + gsub(/[A-Z]/) do |m| + "_"+m.downcase + end + end + + DC_KEYS = [ "title", "creator", "date", "format" ] + RDF_KEYS = [ "type" ] + + def to_owl_uri + if DC_KEYS.include?(self) + return DC.send(self) + elsif RDF_KEYS.include?(self) + return RDF.send(self) + else + return OT.send(self) + end + end +end + +class Hash + + # applies to_rdf_format to all keys + def keys_to_rdf_format + res = {} + keys.each do |k| + v = self[k] + if v.is_a?(Hash) + v = v.keys_to_rdf_format + elsif v.is_a?(Array) + v = v.collect{ |vv| vv.is_a?(Hash) ? vv.keys_to_rdf_format : vv } + end + res[k.to_s.to_rdf_format] = v + end + return res + end + + def keys_to_owl_uris + res = {} + keys.each do |k| + v = self[k] + if v.is_a?(Hash) + v = v.keys_to_owl_uris + elsif v.is_a?(Array) + v = v.collect{ |vv| vv.is_a?(Hash) ? vv.keys_to_owl_uris : vv } + end + res[k.to_s.to_owl_uri] = v + end + return res + end + +end + diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index 63debc0..eb80205 100644..100755 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -15,22 +15,18 @@ module Lib return @compounds[instance_index] end - def initialize(is_classification, test_dataset_uri, test_target_dataset_uri, prediction_feature, prediction_dataset_uri, predicted_variable) + def initialize(feature_type, test_dataset_uri, test_target_dataset_uri, + prediction_feature, prediction_dataset_uri, predicted_variable, subjectid=nil, task=nil) - LOGGER.debug("loading prediciton via test-dateset:'"+test_dataset_uri.to_s+ + LOGGER.debug("loading prediciton via test-dataset:'"+test_dataset_uri.to_s+ "', test-target-datset:'"+test_target_dataset_uri.to_s+ "', prediction-dataset:'"+prediction_dataset_uri.to_s+ "', prediction_feature: '"+prediction_feature.to_s+"' "+ "', predicted_variable: '"+predicted_variable.to_s+"'") - if prediction_feature =~ /ambit.uni-plovdiv.bg.*feature.*264185/ - LOGGER.warn "HACK for report example" - prediction_feature = "http://ambit.uni-plovdiv.bg:8080/ambit2/feature/264187" - end - predicted_variable=prediction_feature if predicted_variable==nil - test_dataset = OpenTox::Dataset.find test_dataset_uri + test_dataset = OpenTox::Dataset.find test_dataset_uri,subjectid raise "test dataset not found: '"+test_dataset_uri.to_s+"'" unless test_dataset raise "prediction_feature missing" unless prediction_feature @@ -40,9 +36,9 @@ module Lib raise "prediction_feature not found in test_dataset, specify a test_target_dataset\n"+ "prediction_feature: '"+prediction_feature.to_s+"'\n"+ "test_dataset: '"+test_target_dataset_uri.to_s+"'\n"+ - "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.index(prediction_feature)==nil + "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.keys.index(prediction_feature)==nil else - test_target_dataset = OpenTox::Dataset.find test_target_dataset_uri + test_target_dataset = OpenTox::Dataset.find test_target_dataset_uri,subjectid raise "test target datset not found: '"+test_target_dataset_uri.to_s+"'" unless test_target_dataset if CHECK_VALUES test_dataset.compounds.each do |c| @@ -52,38 +48,47 @@ module Lib raise "prediction_feature not found in test_target_dataset\n"+ "prediction_feature: '"+prediction_feature.to_s+"'\n"+ "test_target_dataset: '"+test_target_dataset_uri.to_s+"'\n"+ - "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.index(prediction_feature)==nil + "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.keys.index(prediction_feature)==nil end @compounds = test_dataset.compounds LOGGER.debug "test dataset size: "+@compounds.size.to_s - raise "test dataset is empty" unless @compounds.size>0 - class_values = is_classification ? OpenTox::Feature.domain(prediction_feature) : nil + raise "test dataset is empty "+test_dataset_uri.to_s unless @compounds.size>0 + class_values = feature_type=="classification" ? OpenTox::Feature.find(prediction_feature, subjectid).domain : nil actual_values = [] @compounds.each do |c| - value = test_target_dataset.get_value(c, prediction_feature) - - if is_classification - value = value.to_s unless value==nil - raise "illegal class_value of actual value "+value.to_s+" class: "+ - value.class.to_s unless value==nil or class_values.index(value)!=nil - actual_values.push class_values.index(value) - else - begin - value = value.to_f unless value==nil or value.is_a?(Numeric) - rescue - LOGGER.warn "no numeric value for regression: '"+value.to_s+"'" - value = nil - end - actual_values.push value + case feature_type + when "classification" + actual_values << classification_value(test_target_dataset, c, prediction_feature, class_values) + when "regression" + actual_values << regression_value(test_target_dataset, c, prediction_feature) end end + task.progress(40) if task # loaded actual values - prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri + prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri,subjectid raise "prediction dataset not found: '"+prediction_dataset_uri.to_s+"'" unless prediction_dataset - raise "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+prediction_dataset.features.inspect if prediction_dataset.features.index(predicted_variable)==nil + # TODO: remove LAZAR_PREDICTION_DATASET_HACK + no_prediction_feature = prediction_dataset.features.keys.index(predicted_variable)==nil + if no_prediction_feature + one_entry_per_compound = true + @compounds.each do |c| + if prediction_dataset.data_entries[c] and prediction_dataset.data_entries[c].size != 1 + one_entry_per_compound = false + break + end + end + msg = "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+ + prediction_dataset.features.keys.inspect + if one_entry_per_compound + LOGGER.warn msg + else + raise msg + end + end + raise "more predicted than test compounds test:"+@compounds.size.to_s+" < prediction:"+ prediction_dataset.compounds.size.to_s if @compounds.size < prediction_dataset.compounds.size if CHECK_VALUES @@ -100,41 +105,138 @@ module Lib predicted_values << nil confidence_values << nil else - if is_classification - value = prediction_dataset.get_predicted_class(c, predicted_variable) - value = value.to_s unless value==nil - raise "illegal class_value of predicted value "+value.to_s+" class: "+value.class.to_s unless value==nil or class_values.index(value)!=nil - predicted_values << class_values.index(value) - confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable) - else - value = prediction_dataset.get_predicted_regression(c, predicted_variable) - begin - value = value.to_f unless value==nil or value.is_a?(Numeric) - rescue - LOGGER.warn "no numeric value for regression: '"+value.to_s+"'" - value = nil - end - predicted_values << value - confidence_values << nil + case feature_type + when "classification" + # TODO: remove LAZAR_PREDICTION_DATASET_HACK + predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, class_values) + when "regression" + predicted_values << regression_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable) + end + # TODO confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable) + conf = 1 + begin + feature = prediction_dataset.data_entries[c].keys[0] + feature_data = prediction_dataset.features[feature] + conf = feature_data[OT.confidence] if feature_data[OT.confidence]!=nil + rescue + LOGGER.warn "could not get confidence" end + confidence_values << conf end end + task.progress(80) if task # loaded predicted values and confidence - super(predicted_values, actual_values, confidence_values, is_classification, class_values) + super(predicted_values, actual_values, confidence_values, feature_type, class_values) raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size + task.progress(100) if task # done with the mathmatics end + private + def regression_value(dataset, compound, feature) + v = value(dataset, compound, feature) + begin + v = v.to_f unless v==nil or v.is_a?(Numeric) + v + rescue + LOGGER.warn "no numeric value for regression: '"+v.to_s+"'" + nil + end + end + + def classification_value(dataset, compound, feature, class_values) + v = value(dataset, compound, feature) + i = class_values.index(v) + raise "illegal class_value of prediction (value is '"+v.to_s+"', class is '"+v.class.to_s+"'), possible values are "+ + class_values.inspect unless v==nil or i!=nil + i + end + + def value(dataset, compound, feature) + return nil if dataset.data_entries[compound]==nil + if feature==nil + v = dataset.data_entries[compound].values[0] + else + v = dataset.data_entries[compound][feature] + end + return nil if v==nil + raise "no array "+v.class.to_s+" : '"+v.to_s+"'" unless v.is_a?(Array) + if v.size>1 + v.uniq! + raise "not yet implemented: multiple non-equal values "+compound.to_s+" "+v.inspect if v.size>1 + v = v[0] + elsif v.size==1 + v = v[0] + else + v = nil + end + raise "array" if v.is_a?(Array) + v = nil if v.to_s.size==0 + v + end + public def compute_stats res = {} - if @is_classification - (Lib::VAL_CLASS_PROPS_EXTENDED).each{ |s| res[s] = send(s)} - else + case @feature_type + when "classification" + (Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)} + when "regression" (Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) } end return res end + def to_array() + OTPredictions.to_array( [self] ) + end + + def self.to_array( predictions, add_pic=false, format=false ) + + res = [] + predictions.each do |p| + (0..p.num_instances-1).each do |i| + a = [] + + #PENDING! + begin + #a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+ + # URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic + a << p.identifier(i)+"/image" + rescue => ex + raise ex + #a.push("Could not add pic: "+ex.message) + #a.push(p.identifier(i)) + end + + a << (format ? p.actual_value(i).to_nice_s : p.actual_value(i)) + a << (format ? p.predicted_value(i).to_nice_s : p.predicted_value(i)) + if p.feature_type=="classification" + if (p.predicted_value(i)!=nil and p.actual_value(i)!=nil) + a << (p.classification_miss?(i) ? 1 : 0) + else + a << nil + end + end + if p.confidence_values_available? + a << (format ? p.confidence_value(i).to_nice_s : p.confidence_value(i)) + end + a << p.identifier(i) + res << a + end + end + + header = [] + header << "compound" if add_pic + header << "actual value" + header << "predicted value" + header << "missclassified" if predictions[0].feature_type=="classification" + header << "confidence value" if predictions[0].confidence_values_available? + header << "compound-uri" + res.insert(0, header) + + return res + end + end end diff --git a/lib/predictions.rb b/lib/predictions.rb index f6351f8..5850024 100644..100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -22,20 +22,22 @@ module Lib def initialize( predicted_values, actual_values, confidence_values, - is_classification, - prediction_feature_values=nil ) + feature_type, + class_domain=nil ) @predicted_values = predicted_values @actual_values = actual_values @confidence_values = confidence_values - @is_classification = is_classification - @prediction_feature_values = prediction_feature_values + @feature_type = feature_type + @class_domain = class_domain @num_classes = 1 #puts "predicted: "+predicted_values.inspect #puts "actual: "+actual_values.inspect #puts "confidence: "+confidence_values.inspect + raise "unknown feature_type: "+@feature_type.to_s unless + @feature_type=="classification" || @feature_type=="regression" raise "no predictions" if @predicted_values.size == 0 num_info = "predicted:"+@predicted_values.size.to_s+ " confidence:"+@confidence_values.size.to_s+" actual:"+@actual_values.size.to_s @@ -43,23 +45,28 @@ module Lib raise "illegal num confidence values "+num_info if @confidence_values.size != @predicted_values.size @confidence_values.each{ |c| raise "illegal confidence value: '"+c.to_s+"'" unless c==nil or (c.is_a?(Numeric) and c>=0 and c<=1) } - conf_val_tmp = {} - @confidence_values.each{ |c| conf_val_tmp[c] = nil } - if conf_val_tmp.keys.size<2 - LOGGER.warn("prediction w/o confidence values"); - @confidence_values=nil - end + ## check if there is more than one different conf value + ## DEPRECATED? not sure anymore what this was about, + ## I am pretty sure this was for r-plot of roc curves + ## roc curvers are now plotted manually + #conf_val_tmp = {} + #@confidence_values.each{ |c| conf_val_tmp[c] = nil } + #if conf_val_tmp.keys.size<2 + # LOGGER.warn("prediction w/o confidence values"); + # @confidence_values=nil + #end - if @is_classification - raise "prediction_feature_values missing while performing classification" unless @prediction_feature_values - @num_classes = @prediction_feature_values.size + case @feature_type + when "classification" + raise "class_domain missing while performing classification" unless @class_domain + @num_classes = @class_domain.size raise "num classes < 2" if @num_classes<2 { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+ - "has to be either nil or index of predicted-values" if v!=nil and (v<0 or v>@num_classes)} + "has to be either nil or index of predicted-values" if v!=nil and (!v.is_a?(Numeric) or v<0 or v>@num_classes)} end - else - raise "prediction_feature_values != nil while performing regression" if @prediction_feature_values + when "regresssion" + raise "class_domain != nil while performing regression" if @class_domain { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+ "has to be either nil or number" unless v==nil or v.is_a?(Numeric)} @@ -79,15 +86,16 @@ module Lib @num_predicted = 0 @num_unpredicted = 0 - if @is_classification + case @feature_type + when "classification" @confusion_matrix = [] - @prediction_feature_values.each do |v| + @class_domain.each do |v| @confusion_matrix.push( Array.new( @num_classes, 0 ) ) end @num_correct = 0 @num_incorrect = 0 - else + when "regression" @sum_error = 0 @sum_abs_error = 0 @sum_squared_error = 0 @@ -118,14 +126,15 @@ module Lib else @num_predicted += 1 - if @is_classification + case @feature_type + when "classification" @confusion_matrix[actual_value][predicted_value] += 1 if (predicted_value == actual_value) @num_correct += 1 else @num_incorrect += 1 end - else + when "regression" delta = predicted_value - actual_value @sum_error += delta @sum_abs_error += delta.abs @@ -152,21 +161,38 @@ module Lib end def percent_correct - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return 0 if @num_with_actual_value==0 - return 100 * @num_correct / @num_with_actual_value.to_f + return 100 * @num_correct / (@num_with_actual_value - @num_unpredicted).to_f end def percent_incorrect - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return 0 if @num_with_actual_value==0 - return 100 * @num_incorrect / @num_with_actual_value.to_f + return 100 * @num_incorrect / (@num_with_actual_value - @num_unpredicted).to_f end def accuracy return percent_correct / 100.0 end + def weighted_accuracy + raise "no classification" unless @feature_type=="classification" + total = 0 + correct = 0 + (0..@predicted_values.size-1).each do |i| + if @predicted_values[i]!=nil + total += @confidence_values[i] + correct += @confidence_values[i] if @actual_values[i]==@predicted_values[i] + end + end + if total==0 || correct == 0 + return 0 + else + return correct / total + end + end + def percent_unpredicted return 0 if @num_with_actual_value==0 return 100 * @num_unpredicted / @num_with_actual_value.to_f @@ -186,17 +212,17 @@ module Lib end def num_correct - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return @num_correct end def num_incorrect - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return @num_incorrect end def num_unclassified - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return @num_unpredicted end @@ -205,35 +231,39 @@ module Lib # and values: <int-value> def confusion_matrix - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" res = {} (0..@num_classes-1).each do |actual| (0..@num_classes-1).each do |predicted| - res[{:confusion_matrix_actual => @prediction_feature_values[actual], - :confusion_matrix_predicted => @prediction_feature_values[predicted]}] = @confusion_matrix[actual][predicted] + res[{:confusion_matrix_actual => @class_domain[actual], + :confusion_matrix_predicted => @class_domain[predicted]}] = @confusion_matrix[actual][predicted] end end return res end def area_under_roc(class_index=nil) - return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if class_index==nil + return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if + class_index==nil return 0.0 if @confidence_values==nil LOGGER.warn("TODO: implement approx computiation of AUC,"+ - "so far Wilcoxon-Man-Whitney is used (exponential)") if @predicted_values.size>1000 + "so far Wilcoxon-Man-Whitney is used (exponential)") if + @predicted_values.size>1000 + #puts "COMPUTING AUC "+class_index.to_s tp_conf = [] fp_conf = [] (0..@predicted_values.size-1).each do |i| if @predicted_values[i]==class_index - if @actual_values[i]==class_index + if @actual_values[i]==@predicted_values[i] tp_conf.push(@confidence_values[i]) else fp_conf.push(@confidence_values[i]) end end end + #puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n" return 0.0 if tp_conf.size == 0 return 1.0 if fp_conf.size == 0 @@ -241,9 +271,9 @@ module Lib tp_conf.each do |tp| fp_conf.each do |fp| sum += 1 if tp>fp + sum += 0.5 if tp==fp end end - return sum / (tp_conf.size * fp_conf.size).to_f end @@ -441,8 +471,8 @@ module Lib def sample_correlation_coefficient # formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient return ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) / - ( Math.sqrt( @num_predicted * @sum_squares_actual - @sum_actual**2 ) * - Math.sqrt( @num_predicted * @sum_squares_predicted - @sum_predicted**2 ) ) + ( Math.sqrt( [0, @num_predicted * @sum_squares_actual - @sum_actual**2].max ) * + Math.sqrt( [0, @num_predicted * @sum_squares_predicted - @sum_predicted**2].max ) ) end def total_sum_of_squares @@ -460,21 +490,30 @@ module Lib # data for roc-plots ################################################################################### def get_roc_values(class_value) + + #puts "get_roc_values for class_value: "+class_value.to_s raise "no confidence values" if @confidence_values==nil - class_index = @prediction_feature_values.index(class_value) - raise "class not found "+class_value.to_s if class_index==nil and class_value!=nil + raise "no class-value specified" if class_value==nil + + class_index = @class_domain.index(class_value) + raise "class not found "+class_value.to_s if class_index==nil c = []; p = []; a = [] (0..@predicted_values.size-1).each do |i| # NOTE: not predicted instances are ignored here - if (@predicted_values[i]!=nil and (class_value==nil or @predicted_values[i]==class_index)) + if @predicted_values[i]!=nil and @predicted_values[i]==class_index c << @confidence_values[i] p << @predicted_values[i] a << @actual_values[i] end end - return {:predicted_values => p, :actual_values => a, :confidence_values => c} + # DO NOT raise exception here, maybe different validations are concated + #raise "no instance predicted as '"+class_value+"'" if p.size == 0 + + h = {:predicted_values => p, :actual_values => a, :confidence_values => c} + #puts h.inspect + return h end ######################################################################################## @@ -488,9 +527,10 @@ module Lib end def predicted_value(instance_index) - if @is_classification - @predicted_values[instance_index]==nil ? nil : @prediction_feature_values[@predicted_values[instance_index]] - else + case @feature_type + when "classification" + @predicted_values[instance_index]==nil ? nil : @class_domain[@predicted_values[instance_index]] + when "regression" @predicted_values[instance_index] end end @@ -500,9 +540,10 @@ module Lib end def actual_value(instance_index) - if @is_classification - @actual_values[instance_index]==nil ? nil : @prediction_feature_values[@actual_values[instance_index]] - else + case @feature_type + when "classification" + @actual_values[instance_index]==nil ? nil : @class_domain[@actual_values[instance_index]] + when "regression" @actual_values[instance_index] end end @@ -512,13 +553,13 @@ module Lib end def classification_miss?(instance_index) - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return false if predicted_value(instance_index)==nil or actual_value(instance_index)==nil return predicted_value(instance_index) != actual_value(instance_index) end - def classification? - @is_classification + def feature_type + @feature_type end def confidence_values_available? @@ -535,7 +576,7 @@ module Lib def prediction_feature_value_map(proc) res = {} (0..@num_classes-1).each do |i| - res[@prediction_feature_values[i]] = proc.call(i) + res[@class_domain[i]] = proc.call(i) end return res end diff --git a/lib/rdf_provider.rb b/lib/rdf_provider.rb deleted file mode 100644 index 7fa3ecc..0000000 --- a/lib/rdf_provider.rb +++ /dev/null @@ -1,188 +0,0 @@ - -class String - def convert_underscore - gsub(/_./) do |m| - m.gsub!(/^_/,"") - m.upcase - end - end -end - -module Lib - module RDFProvider - - def to_rdf - HashToOwl.to_rdf(self) - end - - def uri - raise "not implemented" - end - - def rdf_title - raise "not implemented" - end - - # the rdf output is generated from the hash that is provided by this method - # the keys in the hash structure are used to defined type of the resource (literal, objectProperty, dataProperty) - # example: if the structure should contain a literal named "size" with value 5 - # * add :property_xy => 5 to your hash - # * make sure literal?(:property_xy) returns true - # * literal_name(:property_xy) must return "size" - # - def get_content_as_hash - raise "not implemented" - end - - def to_yaml - get_content_as_hash.to_yaml - end - - def rdf_ignore?( prop ) - self.class::IGNORE.index( prop ) != nil - end - - def literal?( prop ) - self.class::LITERALS.index( prop ) != nil - end - - def literal_name( prop ) - if self.class::LITERAL_NAMES.has_key?(prop) - self.class::LITERAL_NAMES[prop] - else - OT[prop.to_s.convert_underscore] - end - end - - def object_property?( prop ) - self.class::OBJECT_PROPERTIES.has_key?( prop ) - end - - def object_property_name( prop ) - return self.class::OBJECT_PROPERTIES[ prop ] - end - - def object_type( prop ) - return self.class::OBJECTS[ prop ] - end - - def class?(prop) - self.class::CLASSES.has_key?( prop ) - end - - def class_name( prop ) - return self.class::CLASSES[ prop ] - end - - end - - class HashToOwl - #include OpenTox::Owl - - def self.to_rdf( rdf_provider ) - - owl = OpenTox::Owl.create(rdf_provider.rdf_title, rdf_provider.uri ) - toOwl = HashToOwl.new(owl) - toOwl.add_content(rdf_provider) - toOwl.rdf - end - - def add_content( rdf_provider ) - @rdf_provider = rdf_provider - recursiv_add_content( @rdf_provider.get_content_as_hash, @owl.root_node ) - end - - def rdf - @owl.rdf - end - - private - def initialize(owl) - @owl = owl - @model = owl.model - end - - def recursiv_add_content( output, node ) - output.each do |k,v| - if v==nil - LOGGER.warn "skipping nil value: "+k.to_s - next - end - if @rdf_provider.rdf_ignore?(k) - #do nothing - elsif v.is_a?(Hash) - new_node = add_class( k, node ) - recursiv_add_content( v, new_node ) - elsif v.is_a?(Array) - v.each do |value| - if @rdf_provider.class?(k) - new_node = add_class( k, node ) - recursiv_add_content( value, new_node ) - else - add_object_property( k, value, node) - end - end - elsif @rdf_provider.literal?(k) - set_literal( k, v, node) - elsif @rdf_provider.object_property?(k) - add_object_property( k, v, node) - else - raise "illegal value k:"+k.to_s+" v:"+v.to_s - end - end - end - - def add_class( property, node ) - raise "no object prop: "+property.to_s unless @rdf_provider.object_property?(property) - raise "no class name: "+property.to_s unless @rdf_provider.class_name(property) - # to avoid anonymous nodes, make up uris for sub-objects - # use counter to make sure each uri is unique - # for example we will get ../confusion_matrix_cell/1, ../confusion_matrix_cell/2, ... - count = 1 - while (true) - res = Redland::Resource.new( File.join(node.uri.to_s,property.to_s+"/"+count.to_s) ) - break if @model.subject(@rdf_provider.object_property_name(property), res).nil? - count += 1 - end - clazz = Redland::Resource.new(@rdf_provider.class_name(property)) - @model.add res, RDF['type'], clazz - @model.add res, DC['title'], clazz - @model.add clazz, RDF['type'], OWL['Class'] - @model.add DC['title'], RDF['type'],OWL['AnnotationProperty'] - - objectProp = Redland::Resource.new(@rdf_provider.object_property_name(property)) - @model.add objectProp, RDF['type'], OWL['ObjectProperty'] - @model.add node, objectProp, res - return res - end - - def set_literal(property, value, node ) - raise "empty literal value "+property.to_s if value==nil || value.to_s.size==0 - raise "no literal name "+propety.to_s unless @rdf_provider.literal_name(property) - begin - l = @model.object(subject, @rdf_provider.literal_name(property)) - @model.delete node, @rdf_provider.literal_name(property), l - rescue - end - literalProp = Redland::Resource.new(@rdf_provider.literal_name(property)) - @model.add literalProp, RDF['type'],OWL['AnnotationProperty'] - @model.add node, literalProp, Redland::Literal.create(value) - end - - def add_object_property(property, value, node ) - raise "empty object property value "+property.to_s if value==nil || value.to_s.size==0 - raise "no object property name "+propety.to_s unless @rdf_provider.object_property_name(property) - raise "no object type "+property.to_s unless @rdf_provider.object_type(property) - - objectProp = Redland::Resource.new(@rdf_provider.object_property_name(property)) - @model.add objectProp, RDF['type'], OWL['ObjectProperty'] - - val = Redland::Resource.new(value) - type = Redland::Resource.new(@rdf_provider.object_type(property)) - @model.add node, objectProp, val - @model.add val, RDF['type'], type - @model.add type, RDF['type'], OWL['Class'] - end - - end -end diff --git a/lib/test_util.rb b/lib/test_util.rb index ecab76c..590d295 100644..100755 --- a/lib/test_util.rb +++ b/lib/test_util.rb @@ -10,11 +10,12 @@ module Lib end def self.wait_for_task(uri) - if OpenTox::Utils.task_uri?(uri) + if uri.task_uri? task = OpenTox::Task.find(uri) task.wait_for_completion - raise "task failed: "+uri.to_s+", error is:\n"+task.description if task.error? - uri = task.resultURI + #raise "task failed: "+uri.to_s+", error is:\n"+task.description if task.error? + LOGGER.error "task failed :\n"+task.to_yaml if task.error? + uri = task.result_uri end return uri end diff --git a/lib/validation_db.rb b/lib/validation_db.rb index 7afab90..0d5db21 100644..100755 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -4,21 +4,10 @@ #end require "lib/merge.rb" -unless ActiveRecord::Base.connected? - ActiveRecord::Base.establish_connection( - :adapter => CONFIG[:database][:adapter], - :host => CONFIG[:database][:host], - :database => CONFIG[:database][:database], - :username => CONFIG[:database][:username], - :password => CONFIG[:database][:password] - ) - ActiveRecord::Base.logger = Logger.new("/dev/null") -end - module Lib - VAL_PROPS_GENERAL = [ :validation_uri, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature, - :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :created_at ] + VAL_PROPS_GENERAL = [ :validation_uri, :validation_type, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature, + :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :date ] VAL_PROPS_SUM = [ :num_instances, :num_without_class, :num_unpredicted ] VAL_PROPS_AVG = [:real_runtime, :percent_without_class, :percent_unpredicted ] VAL_PROPS = VAL_PROPS_GENERAL + VAL_PROPS_SUM + VAL_PROPS_AVG @@ -28,7 +17,8 @@ module Lib # :classification_statistics VAL_CLASS_PROPS_SINGLE_SUM = [ :num_correct, :num_incorrect, :confusion_matrix ] - VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect, :weighted_area_under_roc ] + VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect, + :weighted_area_under_roc, :accuracy, :weighted_accuracy ] VAL_CLASS_PROPS_SINGLE = VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_SINGLE_AVG # :class_value_statistics @@ -43,26 +33,146 @@ module Lib :true_negative_rate, :true_positive_rate ] #:precision, :recall, VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS - VAL_CLASS_PROPS_EXTENDED = VAL_CLASS_PROPS + [:accuracy] # :regression_statistics VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square, :target_variance_actual, :target_variance_predicted, :sum_squared_error, :sample_correlation_coefficient ] CROSS_VAL_PROPS = [:dataset_uri, :num_folds, :stratified, :random_seed] - CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :created_at] + CROSS_VAL_PROPS + CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :date] + CROSS_VAL_PROPS - ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS_EXTENDED + VAL_REGR_PROPS + CROSS_VAL_PROPS + ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS + VAL_REGR_PROPS + CROSS_VAL_PROPS VAL_MERGE_GENERAL = VAL_PROPS_GENERAL + VAL_CV_PROPS + [:classification_statistics, :regression_statistics] + CROSS_VAL_PROPS VAL_MERGE_SUM = VAL_PROPS_SUM + VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_PER_CLASS_SUM VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS - - class Validation < ActiveRecord::Base - serialize :classification_statistics - serialize :regression_statistics + + +# class Validation < ActiveRecord::Base +# serialize :classification_statistics +# serialize :regression_statistics +# +# alias_attribute :date, :created_at + + class Validation + include DataMapper::Resource + + property :id, Serial + property :validation_type, String, :length => 512 + property :model_uri, String, :length => 512 + property :algorithm_uri, String, :length => 512 + property :training_dataset_uri, String, :length => 512 + property :test_target_dataset_uri, String, :length => 512 + property :test_dataset_uri, String, :length => 512 + property :prediction_dataset_uri, String, :length => 512 + property :prediction_feature, String, :length => 512 + property :created_at, DateTime + property :num_instances, Integer + property :num_without_class, Integer + property :num_unpredicted, Integer + property :crossvalidation_id, Integer + property :crossvalidation_fold, Integer + property :real_runtime, Float + property :percent_without_class, Float + property :percent_unpredicted, Float + property :classification_statistics, Object + property :regression_statistics, Object + property :finished, Boolean, :default => false + + attr_accessor :subjectid + + after :save, :check_policy + private + def check_policy + OpenTox::Authorization.check_policy(validation_uri, subjectid) + end + + public + def date + created_at + end + + def validation_uri + raise "no id" if self.id==nil + $url_provider.url_for("/"+self.id.to_s, :full) + end + + def crossvalidation_uri + $url_provider.url_for("/crossvalidation/"+self.crossvalidation_id.to_s, :full) if self.crossvalidation_id + end + + def self.classification_property?( property ) + VAL_CLASS_PROPS.include?( property ) + end + + def self.depends_on_class_value?( property ) + VAL_CLASS_PROPS_PER_CLASS.include?( property ) + end + + def self.complement_exists?( property ) + VAL_CLASS_PROPS_PER_CLASS_COMPLEMENT_EXISTS.include?( property ) + end + end - class Crossvalidation < ActiveRecord::Base +# class Crossvalidation < ActiveRecord::Base +# alias_attribute :date, :created_at + class Crossvalidation + include DataMapper::Resource + + property :id, Serial + property :algorithm_uri, String, :length => 512 + property :dataset_uri, String, :length => 512 + property :created_at, DateTime + property :num_folds, Integer, :default => 10 + property :random_seed, Integer, :default => 1 + property :finished, Boolean, :default => false + property :stratified, Boolean, :default => false + + attr_accessor :subjectid + + after :save, :check_policy + private + def check_policy + OpenTox::Authorization.check_policy(crossvalidation_uri, subjectid) + end + + public + def date + created_at + end + + def crossvalidation_uri + raise "no id" if self.id==nil + $url_provider.url_for("/crossvalidation/"+self.id.to_s, :full) if self.id + end + + # convenience method to list all crossvalidations that are unique + # in terms of dataset_uri,num_folds,stratified,random_seed + # further conditions can be specified in __conditions__ + def self.find_all_uniq(conditions={}, subjectid=nil ) + #cvs = Lib::Crossvalidation.find(:all, :conditions => conditions) + cvs = Lib::Crossvalidation.all(:conditions => conditions) + uniq = [] + cvs.each do |cv| + next if AA_SERVER and !OpenTox::Authorization.authorized?(cv.crossvalidation_uri,"GET",subjectid) + match = false + uniq.each do |cv2| + if cv.dataset_uri == cv2.dataset_uri and cv.num_folds == cv2.num_folds and + cv.stratified == cv2.stratified and cv.random_seed == cv2.random_seed + match = true + break + end + end + uniq << cv unless match + end + uniq + end end end + + +Lib::Validation.auto_upgrade! +Lib::Validation.raise_on_save_failure = true +Lib::Crossvalidation.auto_upgrade! +Lib::Crossvalidation.raise_on_save_failure = true |