summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2011-03-03 13:00:47 +0100
committerChristoph Helma <helma@in-silico.ch>2011-03-03 13:00:47 +0100
commitdcd0a5a659c303c50a59d1271947851245db10e7 (patch)
treeeaae695edf72c3a99cde58b9caaa7825d6d4f31a /lib
parent8b46f5a4f389d7cd54f6e8b38025d275f9d3ed1b (diff)
parente57856a3c2cd10df207e722301c24a022e9fd802 (diff)
Merge remote branch 'mguetlein/test' into development
Conflicts: application.rb example.rb lib/validation_db.rb nightly/nightly.rb report/environment.rb test/test_examples.rb test/test_examples_util.rb
Diffstat (limited to 'lib')
-rwxr-xr-xlib/active_record_setup.rb50
-rw-r--r--lib/data_mapper_util.rb37
-rw-r--r--lib/format_util.rb68
-rwxr-xr-x[-rw-r--r--]lib/ot_predictions.rb202
-rwxr-xr-x[-rw-r--r--]lib/predictions.rb143
-rw-r--r--lib/rdf_provider.rb188
-rwxr-xr-x[-rw-r--r--]lib/test_util.rb7
-rwxr-xr-x[-rw-r--r--]lib/validation_db.rb154
8 files changed, 535 insertions, 314 deletions
diff --git a/lib/active_record_setup.rb b/lib/active_record_setup.rb
new file mode 100755
index 0000000..b43e692
--- /dev/null
+++ b/lib/active_record_setup.rb
@@ -0,0 +1,50 @@
+
+#gem "activerecord", "= 2.3.8"
+#gem "ar-extensions", "= 0.9.2"
+['rubygems', 'logger', 'active_record', 'opentox-ruby' ].each do |g| #'ar-extensions',
+ require g
+end
+
+unless ActiveRecord::Base.connected?
+ ActiveRecord::Base.establish_connection(
+ :adapter => CONFIG[:database][:adapter],
+ :host => CONFIG[:database][:host],
+ :database => CONFIG[:database][:database],
+ :username => CONFIG[:database][:username],
+ :password => CONFIG[:database][:password]
+ )
+ ActiveRecord::Base.logger = Logger.new("/dev/null")
+end
+
+class ActiveRecord::Base
+
+ def self.find_like(filter_params)
+
+ raise "find like removed"
+
+ #puts "params before "+filter_params.inspect
+ filter_params.keys.each do |k|
+ key = k.to_s
+ unless self.column_names.include?(key)
+ key = key.from_rdf_format
+ unless self.column_names.include?(key)
+ key = key+"_uri"
+ unless self.column_names.include?(key)
+ key = key+"s"
+ unless self.column_names.include?(key)
+ err = "no attribute found: '"+k.to_s+"'"
+# if $sinatra
+# $sinatra.halt 400,err
+# else
+ raise err
+# end
+ end
+ end
+ end
+ end
+ filter_params[key+"_like"] = filter_params.delete(k)
+ end
+ #puts "params after "+filter_params.inspect
+ self.find(:all, :conditions => filter_params)
+ end
+end \ No newline at end of file
diff --git a/lib/data_mapper_util.rb b/lib/data_mapper_util.rb
new file mode 100644
index 0000000..23f52f5
--- /dev/null
+++ b/lib/data_mapper_util.rb
@@ -0,0 +1,37 @@
+
+#DataObjects::Mysql.logger = DataObjects::Logger.new(STDOUT, 0)
+
+module Lib
+ module DataMapperUtil
+
+ def self.check_params(model, params)
+ prop_names = model.properties.collect{|p| p.name.to_s if p.is_a?DataMapper::Property::Object}
+ params.keys.each do |k|
+ key = k.to_s
+ if (key == "subjectid")
+ params.delete(k)
+ else
+ unless prop_names.include?(key)
+ key = key.from_rdf_format
+ unless prop_names.include?(key)
+ key = key+"_uri"
+ unless prop_names.include?(key)
+ key = key+"s"
+ unless prop_names.include?(key)
+ raise OpenTox::BadRequestError.new "no attribute found: '"+k.to_s+"'"
+ end
+ end
+ end
+ end
+ params[key.to_sym] = params.delete(k)
+ end
+ end
+ params
+ end
+
+ def self.all(model, filter_params)
+ model.all(check_params(model,filter_params))
+ end
+
+ end
+end \ No newline at end of file
diff --git a/lib/format_util.rb b/lib/format_util.rb
new file mode 100644
index 0000000..3d3a3e6
--- /dev/null
+++ b/lib/format_util.rb
@@ -0,0 +1,68 @@
+
+
+class String
+
+ # :prediction_feature -> predictionFeature
+ # :test_dataset_uri -> testDataset
+ # :validation_uris -> validation
+ def to_rdf_format
+ s = gsub(/_uri(s|)$/,"")
+ s.gsub(/_./) do |m|
+ m.gsub!(/^_/,"")
+ m.upcase
+ end
+ end
+
+ def from_rdf_format
+ gsub(/[A-Z]/) do |m|
+ "_"+m.downcase
+ end
+ end
+
+ DC_KEYS = [ "title", "creator", "date", "format" ]
+ RDF_KEYS = [ "type" ]
+
+ def to_owl_uri
+ if DC_KEYS.include?(self)
+ return DC.send(self)
+ elsif RDF_KEYS.include?(self)
+ return RDF.send(self)
+ else
+ return OT.send(self)
+ end
+ end
+end
+
+class Hash
+
+ # applies to_rdf_format to all keys
+ def keys_to_rdf_format
+ res = {}
+ keys.each do |k|
+ v = self[k]
+ if v.is_a?(Hash)
+ v = v.keys_to_rdf_format
+ elsif v.is_a?(Array)
+ v = v.collect{ |vv| vv.is_a?(Hash) ? vv.keys_to_rdf_format : vv }
+ end
+ res[k.to_s.to_rdf_format] = v
+ end
+ return res
+ end
+
+ def keys_to_owl_uris
+ res = {}
+ keys.each do |k|
+ v = self[k]
+ if v.is_a?(Hash)
+ v = v.keys_to_owl_uris
+ elsif v.is_a?(Array)
+ v = v.collect{ |vv| vv.is_a?(Hash) ? vv.keys_to_owl_uris : vv }
+ end
+ res[k.to_s.to_owl_uri] = v
+ end
+ return res
+ end
+
+end
+
diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb
index 63debc0..eb80205 100644..100755
--- a/lib/ot_predictions.rb
+++ b/lib/ot_predictions.rb
@@ -15,22 +15,18 @@ module Lib
return @compounds[instance_index]
end
- def initialize(is_classification, test_dataset_uri, test_target_dataset_uri, prediction_feature, prediction_dataset_uri, predicted_variable)
+ def initialize(feature_type, test_dataset_uri, test_target_dataset_uri,
+ prediction_feature, prediction_dataset_uri, predicted_variable, subjectid=nil, task=nil)
- LOGGER.debug("loading prediciton via test-dateset:'"+test_dataset_uri.to_s+
+ LOGGER.debug("loading prediciton via test-dataset:'"+test_dataset_uri.to_s+
"', test-target-datset:'"+test_target_dataset_uri.to_s+
"', prediction-dataset:'"+prediction_dataset_uri.to_s+
"', prediction_feature: '"+prediction_feature.to_s+"' "+
"', predicted_variable: '"+predicted_variable.to_s+"'")
- if prediction_feature =~ /ambit.uni-plovdiv.bg.*feature.*264185/
- LOGGER.warn "HACK for report example"
- prediction_feature = "http://ambit.uni-plovdiv.bg:8080/ambit2/feature/264187"
- end
-
predicted_variable=prediction_feature if predicted_variable==nil
- test_dataset = OpenTox::Dataset.find test_dataset_uri
+ test_dataset = OpenTox::Dataset.find test_dataset_uri,subjectid
raise "test dataset not found: '"+test_dataset_uri.to_s+"'" unless test_dataset
raise "prediction_feature missing" unless prediction_feature
@@ -40,9 +36,9 @@ module Lib
raise "prediction_feature not found in test_dataset, specify a test_target_dataset\n"+
"prediction_feature: '"+prediction_feature.to_s+"'\n"+
"test_dataset: '"+test_target_dataset_uri.to_s+"'\n"+
- "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.index(prediction_feature)==nil
+ "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.keys.index(prediction_feature)==nil
else
- test_target_dataset = OpenTox::Dataset.find test_target_dataset_uri
+ test_target_dataset = OpenTox::Dataset.find test_target_dataset_uri,subjectid
raise "test target datset not found: '"+test_target_dataset_uri.to_s+"'" unless test_target_dataset
if CHECK_VALUES
test_dataset.compounds.each do |c|
@@ -52,38 +48,47 @@ module Lib
raise "prediction_feature not found in test_target_dataset\n"+
"prediction_feature: '"+prediction_feature.to_s+"'\n"+
"test_target_dataset: '"+test_target_dataset_uri.to_s+"'\n"+
- "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.index(prediction_feature)==nil
+ "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.keys.index(prediction_feature)==nil
end
@compounds = test_dataset.compounds
LOGGER.debug "test dataset size: "+@compounds.size.to_s
- raise "test dataset is empty" unless @compounds.size>0
- class_values = is_classification ? OpenTox::Feature.domain(prediction_feature) : nil
+ raise "test dataset is empty "+test_dataset_uri.to_s unless @compounds.size>0
+ class_values = feature_type=="classification" ? OpenTox::Feature.find(prediction_feature, subjectid).domain : nil
actual_values = []
@compounds.each do |c|
- value = test_target_dataset.get_value(c, prediction_feature)
-
- if is_classification
- value = value.to_s unless value==nil
- raise "illegal class_value of actual value "+value.to_s+" class: "+
- value.class.to_s unless value==nil or class_values.index(value)!=nil
- actual_values.push class_values.index(value)
- else
- begin
- value = value.to_f unless value==nil or value.is_a?(Numeric)
- rescue
- LOGGER.warn "no numeric value for regression: '"+value.to_s+"'"
- value = nil
- end
- actual_values.push value
+ case feature_type
+ when "classification"
+ actual_values << classification_value(test_target_dataset, c, prediction_feature, class_values)
+ when "regression"
+ actual_values << regression_value(test_target_dataset, c, prediction_feature)
end
end
+ task.progress(40) if task # loaded actual values
- prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri
+ prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri,subjectid
raise "prediction dataset not found: '"+prediction_dataset_uri.to_s+"'" unless prediction_dataset
- raise "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+prediction_dataset.features.inspect if prediction_dataset.features.index(predicted_variable)==nil
+ # TODO: remove LAZAR_PREDICTION_DATASET_HACK
+ no_prediction_feature = prediction_dataset.features.keys.index(predicted_variable)==nil
+ if no_prediction_feature
+ one_entry_per_compound = true
+ @compounds.each do |c|
+ if prediction_dataset.data_entries[c] and prediction_dataset.data_entries[c].size != 1
+ one_entry_per_compound = false
+ break
+ end
+ end
+ msg = "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+
+ prediction_dataset.features.keys.inspect
+ if one_entry_per_compound
+ LOGGER.warn msg
+ else
+ raise msg
+ end
+ end
+
raise "more predicted than test compounds test:"+@compounds.size.to_s+" < prediction:"+
prediction_dataset.compounds.size.to_s if @compounds.size < prediction_dataset.compounds.size
if CHECK_VALUES
@@ -100,41 +105,138 @@ module Lib
predicted_values << nil
confidence_values << nil
else
- if is_classification
- value = prediction_dataset.get_predicted_class(c, predicted_variable)
- value = value.to_s unless value==nil
- raise "illegal class_value of predicted value "+value.to_s+" class: "+value.class.to_s unless value==nil or class_values.index(value)!=nil
- predicted_values << class_values.index(value)
- confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable)
- else
- value = prediction_dataset.get_predicted_regression(c, predicted_variable)
- begin
- value = value.to_f unless value==nil or value.is_a?(Numeric)
- rescue
- LOGGER.warn "no numeric value for regression: '"+value.to_s+"'"
- value = nil
- end
- predicted_values << value
- confidence_values << nil
+ case feature_type
+ when "classification"
+ # TODO: remove LAZAR_PREDICTION_DATASET_HACK
+ predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, class_values)
+ when "regression"
+ predicted_values << regression_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable)
+ end
+ # TODO confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable)
+ conf = 1
+ begin
+ feature = prediction_dataset.data_entries[c].keys[0]
+ feature_data = prediction_dataset.features[feature]
+ conf = feature_data[OT.confidence] if feature_data[OT.confidence]!=nil
+ rescue
+ LOGGER.warn "could not get confidence"
end
+ confidence_values << conf
end
end
+ task.progress(80) if task # loaded predicted values and confidence
- super(predicted_values, actual_values, confidence_values, is_classification, class_values)
+ super(predicted_values, actual_values, confidence_values, feature_type, class_values)
raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size
+ task.progress(100) if task # done with the mathmatics
end
+ private
+ def regression_value(dataset, compound, feature)
+ v = value(dataset, compound, feature)
+ begin
+ v = v.to_f unless v==nil or v.is_a?(Numeric)
+ v
+ rescue
+ LOGGER.warn "no numeric value for regression: '"+v.to_s+"'"
+ nil
+ end
+ end
+
+ def classification_value(dataset, compound, feature, class_values)
+ v = value(dataset, compound, feature)
+ i = class_values.index(v)
+ raise "illegal class_value of prediction (value is '"+v.to_s+"', class is '"+v.class.to_s+"'), possible values are "+
+ class_values.inspect unless v==nil or i!=nil
+ i
+ end
+
+ def value(dataset, compound, feature)
+ return nil if dataset.data_entries[compound]==nil
+ if feature==nil
+ v = dataset.data_entries[compound].values[0]
+ else
+ v = dataset.data_entries[compound][feature]
+ end
+ return nil if v==nil
+ raise "no array "+v.class.to_s+" : '"+v.to_s+"'" unless v.is_a?(Array)
+ if v.size>1
+ v.uniq!
+ raise "not yet implemented: multiple non-equal values "+compound.to_s+" "+v.inspect if v.size>1
+ v = v[0]
+ elsif v.size==1
+ v = v[0]
+ else
+ v = nil
+ end
+ raise "array" if v.is_a?(Array)
+ v = nil if v.to_s.size==0
+ v
+ end
+ public
def compute_stats
res = {}
- if @is_classification
- (Lib::VAL_CLASS_PROPS_EXTENDED).each{ |s| res[s] = send(s)}
- else
+ case @feature_type
+ when "classification"
+ (Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)}
+ when "regression"
(Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) }
end
return res
end
+ def to_array()
+ OTPredictions.to_array( [self] )
+ end
+
+ def self.to_array( predictions, add_pic=false, format=false )
+
+ res = []
+ predictions.each do |p|
+ (0..p.num_instances-1).each do |i|
+ a = []
+
+ #PENDING!
+ begin
+ #a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+
+ # URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic
+ a << p.identifier(i)+"/image"
+ rescue => ex
+ raise ex
+ #a.push("Could not add pic: "+ex.message)
+ #a.push(p.identifier(i))
+ end
+
+ a << (format ? p.actual_value(i).to_nice_s : p.actual_value(i))
+ a << (format ? p.predicted_value(i).to_nice_s : p.predicted_value(i))
+ if p.feature_type=="classification"
+ if (p.predicted_value(i)!=nil and p.actual_value(i)!=nil)
+ a << (p.classification_miss?(i) ? 1 : 0)
+ else
+ a << nil
+ end
+ end
+ if p.confidence_values_available?
+ a << (format ? p.confidence_value(i).to_nice_s : p.confidence_value(i))
+ end
+ a << p.identifier(i)
+ res << a
+ end
+ end
+
+ header = []
+ header << "compound" if add_pic
+ header << "actual value"
+ header << "predicted value"
+ header << "missclassified" if predictions[0].feature_type=="classification"
+ header << "confidence value" if predictions[0].confidence_values_available?
+ header << "compound-uri"
+ res.insert(0, header)
+
+ return res
+ end
+
end
end
diff --git a/lib/predictions.rb b/lib/predictions.rb
index f6351f8..5850024 100644..100755
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -22,20 +22,22 @@ module Lib
def initialize( predicted_values,
actual_values,
confidence_values,
- is_classification,
- prediction_feature_values=nil )
+ feature_type,
+ class_domain=nil )
@predicted_values = predicted_values
@actual_values = actual_values
@confidence_values = confidence_values
- @is_classification = is_classification
- @prediction_feature_values = prediction_feature_values
+ @feature_type = feature_type
+ @class_domain = class_domain
@num_classes = 1
#puts "predicted: "+predicted_values.inspect
#puts "actual: "+actual_values.inspect
#puts "confidence: "+confidence_values.inspect
+ raise "unknown feature_type: "+@feature_type.to_s unless
+ @feature_type=="classification" || @feature_type=="regression"
raise "no predictions" if @predicted_values.size == 0
num_info = "predicted:"+@predicted_values.size.to_s+
" confidence:"+@confidence_values.size.to_s+" actual:"+@actual_values.size.to_s
@@ -43,23 +45,28 @@ module Lib
raise "illegal num confidence values "+num_info if @confidence_values.size != @predicted_values.size
@confidence_values.each{ |c| raise "illegal confidence value: '"+c.to_s+"'" unless c==nil or (c.is_a?(Numeric) and c>=0 and c<=1) }
- conf_val_tmp = {}
- @confidence_values.each{ |c| conf_val_tmp[c] = nil }
- if conf_val_tmp.keys.size<2
- LOGGER.warn("prediction w/o confidence values");
- @confidence_values=nil
- end
+ ## check if there is more than one different conf value
+ ## DEPRECATED? not sure anymore what this was about,
+ ## I am pretty sure this was for r-plot of roc curves
+ ## roc curvers are now plotted manually
+ #conf_val_tmp = {}
+ #@confidence_values.each{ |c| conf_val_tmp[c] = nil }
+ #if conf_val_tmp.keys.size<2
+ # LOGGER.warn("prediction w/o confidence values");
+ # @confidence_values=nil
+ #end
- if @is_classification
- raise "prediction_feature_values missing while performing classification" unless @prediction_feature_values
- @num_classes = @prediction_feature_values.size
+ case @feature_type
+ when "classification"
+ raise "class_domain missing while performing classification" unless @class_domain
+ @num_classes = @class_domain.size
raise "num classes < 2" if @num_classes<2
{ "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+
- "has to be either nil or index of predicted-values" if v!=nil and (v<0 or v>@num_classes)}
+ "has to be either nil or index of predicted-values" if v!=nil and (!v.is_a?(Numeric) or v<0 or v>@num_classes)}
end
- else
- raise "prediction_feature_values != nil while performing regression" if @prediction_feature_values
+ when "regresssion"
+ raise "class_domain != nil while performing regression" if @class_domain
{ "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+
"has to be either nil or number" unless v==nil or v.is_a?(Numeric)}
@@ -79,15 +86,16 @@ module Lib
@num_predicted = 0
@num_unpredicted = 0
- if @is_classification
+ case @feature_type
+ when "classification"
@confusion_matrix = []
- @prediction_feature_values.each do |v|
+ @class_domain.each do |v|
@confusion_matrix.push( Array.new( @num_classes, 0 ) )
end
@num_correct = 0
@num_incorrect = 0
- else
+ when "regression"
@sum_error = 0
@sum_abs_error = 0
@sum_squared_error = 0
@@ -118,14 +126,15 @@ module Lib
else
@num_predicted += 1
- if @is_classification
+ case @feature_type
+ when "classification"
@confusion_matrix[actual_value][predicted_value] += 1
if (predicted_value == actual_value)
@num_correct += 1
else
@num_incorrect += 1
end
- else
+ when "regression"
delta = predicted_value - actual_value
@sum_error += delta
@sum_abs_error += delta.abs
@@ -152,21 +161,38 @@ module Lib
end
def percent_correct
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return 0 if @num_with_actual_value==0
- return 100 * @num_correct / @num_with_actual_value.to_f
+ return 100 * @num_correct / (@num_with_actual_value - @num_unpredicted).to_f
end
def percent_incorrect
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return 0 if @num_with_actual_value==0
- return 100 * @num_incorrect / @num_with_actual_value.to_f
+ return 100 * @num_incorrect / (@num_with_actual_value - @num_unpredicted).to_f
end
def accuracy
return percent_correct / 100.0
end
+ def weighted_accuracy
+ raise "no classification" unless @feature_type=="classification"
+ total = 0
+ correct = 0
+ (0..@predicted_values.size-1).each do |i|
+ if @predicted_values[i]!=nil
+ total += @confidence_values[i]
+ correct += @confidence_values[i] if @actual_values[i]==@predicted_values[i]
+ end
+ end
+ if total==0 || correct == 0
+ return 0
+ else
+ return correct / total
+ end
+ end
+
def percent_unpredicted
return 0 if @num_with_actual_value==0
return 100 * @num_unpredicted / @num_with_actual_value.to_f
@@ -186,17 +212,17 @@ module Lib
end
def num_correct
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return @num_correct
end
def num_incorrect
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return @num_incorrect
end
def num_unclassified
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return @num_unpredicted
end
@@ -205,35 +231,39 @@ module Lib
# and values: <int-value>
def confusion_matrix
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
res = {}
(0..@num_classes-1).each do |actual|
(0..@num_classes-1).each do |predicted|
- res[{:confusion_matrix_actual => @prediction_feature_values[actual],
- :confusion_matrix_predicted => @prediction_feature_values[predicted]}] = @confusion_matrix[actual][predicted]
+ res[{:confusion_matrix_actual => @class_domain[actual],
+ :confusion_matrix_predicted => @class_domain[predicted]}] = @confusion_matrix[actual][predicted]
end
end
return res
end
def area_under_roc(class_index=nil)
- return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if class_index==nil
+ return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if
+ class_index==nil
return 0.0 if @confidence_values==nil
LOGGER.warn("TODO: implement approx computiation of AUC,"+
- "so far Wilcoxon-Man-Whitney is used (exponential)") if @predicted_values.size>1000
+ "so far Wilcoxon-Man-Whitney is used (exponential)") if
+ @predicted_values.size>1000
+ #puts "COMPUTING AUC "+class_index.to_s
tp_conf = []
fp_conf = []
(0..@predicted_values.size-1).each do |i|
if @predicted_values[i]==class_index
- if @actual_values[i]==class_index
+ if @actual_values[i]==@predicted_values[i]
tp_conf.push(@confidence_values[i])
else
fp_conf.push(@confidence_values[i])
end
end
end
+ #puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n"
return 0.0 if tp_conf.size == 0
return 1.0 if fp_conf.size == 0
@@ -241,9 +271,9 @@ module Lib
tp_conf.each do |tp|
fp_conf.each do |fp|
sum += 1 if tp>fp
+ sum += 0.5 if tp==fp
end
end
-
return sum / (tp_conf.size * fp_conf.size).to_f
end
@@ -441,8 +471,8 @@ module Lib
def sample_correlation_coefficient
# formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient
return ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) /
- ( Math.sqrt( @num_predicted * @sum_squares_actual - @sum_actual**2 ) *
- Math.sqrt( @num_predicted * @sum_squares_predicted - @sum_predicted**2 ) )
+ ( Math.sqrt( [0, @num_predicted * @sum_squares_actual - @sum_actual**2].max ) *
+ Math.sqrt( [0, @num_predicted * @sum_squares_predicted - @sum_predicted**2].max ) )
end
def total_sum_of_squares
@@ -460,21 +490,30 @@ module Lib
# data for roc-plots ###################################################################################
def get_roc_values(class_value)
+
+ #puts "get_roc_values for class_value: "+class_value.to_s
raise "no confidence values" if @confidence_values==nil
- class_index = @prediction_feature_values.index(class_value)
- raise "class not found "+class_value.to_s if class_index==nil and class_value!=nil
+ raise "no class-value specified" if class_value==nil
+
+ class_index = @class_domain.index(class_value)
+ raise "class not found "+class_value.to_s if class_index==nil
c = []; p = []; a = []
(0..@predicted_values.size-1).each do |i|
# NOTE: not predicted instances are ignored here
- if (@predicted_values[i]!=nil and (class_value==nil or @predicted_values[i]==class_index))
+ if @predicted_values[i]!=nil and @predicted_values[i]==class_index
c << @confidence_values[i]
p << @predicted_values[i]
a << @actual_values[i]
end
end
- return {:predicted_values => p, :actual_values => a, :confidence_values => c}
+ # DO NOT raise exception here, maybe different validations are concated
+ #raise "no instance predicted as '"+class_value+"'" if p.size == 0
+
+ h = {:predicted_values => p, :actual_values => a, :confidence_values => c}
+ #puts h.inspect
+ return h
end
########################################################################################
@@ -488,9 +527,10 @@ module Lib
end
def predicted_value(instance_index)
- if @is_classification
- @predicted_values[instance_index]==nil ? nil : @prediction_feature_values[@predicted_values[instance_index]]
- else
+ case @feature_type
+ when "classification"
+ @predicted_values[instance_index]==nil ? nil : @class_domain[@predicted_values[instance_index]]
+ when "regression"
@predicted_values[instance_index]
end
end
@@ -500,9 +540,10 @@ module Lib
end
def actual_value(instance_index)
- if @is_classification
- @actual_values[instance_index]==nil ? nil : @prediction_feature_values[@actual_values[instance_index]]
- else
+ case @feature_type
+ when "classification"
+ @actual_values[instance_index]==nil ? nil : @class_domain[@actual_values[instance_index]]
+ when "regression"
@actual_values[instance_index]
end
end
@@ -512,13 +553,13 @@ module Lib
end
def classification_miss?(instance_index)
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return false if predicted_value(instance_index)==nil or actual_value(instance_index)==nil
return predicted_value(instance_index) != actual_value(instance_index)
end
- def classification?
- @is_classification
+ def feature_type
+ @feature_type
end
def confidence_values_available?
@@ -535,7 +576,7 @@ module Lib
def prediction_feature_value_map(proc)
res = {}
(0..@num_classes-1).each do |i|
- res[@prediction_feature_values[i]] = proc.call(i)
+ res[@class_domain[i]] = proc.call(i)
end
return res
end
diff --git a/lib/rdf_provider.rb b/lib/rdf_provider.rb
deleted file mode 100644
index 7fa3ecc..0000000
--- a/lib/rdf_provider.rb
+++ /dev/null
@@ -1,188 +0,0 @@
-
-class String
- def convert_underscore
- gsub(/_./) do |m|
- m.gsub!(/^_/,"")
- m.upcase
- end
- end
-end
-
-module Lib
- module RDFProvider
-
- def to_rdf
- HashToOwl.to_rdf(self)
- end
-
- def uri
- raise "not implemented"
- end
-
- def rdf_title
- raise "not implemented"
- end
-
- # the rdf output is generated from the hash that is provided by this method
- # the keys in the hash structure are used to defined type of the resource (literal, objectProperty, dataProperty)
- # example: if the structure should contain a literal named "size" with value 5
- # * add :property_xy => 5 to your hash
- # * make sure literal?(:property_xy) returns true
- # * literal_name(:property_xy) must return "size"
- #
- def get_content_as_hash
- raise "not implemented"
- end
-
- def to_yaml
- get_content_as_hash.to_yaml
- end
-
- def rdf_ignore?( prop )
- self.class::IGNORE.index( prop ) != nil
- end
-
- def literal?( prop )
- self.class::LITERALS.index( prop ) != nil
- end
-
- def literal_name( prop )
- if self.class::LITERAL_NAMES.has_key?(prop)
- self.class::LITERAL_NAMES[prop]
- else
- OT[prop.to_s.convert_underscore]
- end
- end
-
- def object_property?( prop )
- self.class::OBJECT_PROPERTIES.has_key?( prop )
- end
-
- def object_property_name( prop )
- return self.class::OBJECT_PROPERTIES[ prop ]
- end
-
- def object_type( prop )
- return self.class::OBJECTS[ prop ]
- end
-
- def class?(prop)
- self.class::CLASSES.has_key?( prop )
- end
-
- def class_name( prop )
- return self.class::CLASSES[ prop ]
- end
-
- end
-
- class HashToOwl
- #include OpenTox::Owl
-
- def self.to_rdf( rdf_provider )
-
- owl = OpenTox::Owl.create(rdf_provider.rdf_title, rdf_provider.uri )
- toOwl = HashToOwl.new(owl)
- toOwl.add_content(rdf_provider)
- toOwl.rdf
- end
-
- def add_content( rdf_provider )
- @rdf_provider = rdf_provider
- recursiv_add_content( @rdf_provider.get_content_as_hash, @owl.root_node )
- end
-
- def rdf
- @owl.rdf
- end
-
- private
- def initialize(owl)
- @owl = owl
- @model = owl.model
- end
-
- def recursiv_add_content( output, node )
- output.each do |k,v|
- if v==nil
- LOGGER.warn "skipping nil value: "+k.to_s
- next
- end
- if @rdf_provider.rdf_ignore?(k)
- #do nothing
- elsif v.is_a?(Hash)
- new_node = add_class( k, node )
- recursiv_add_content( v, new_node )
- elsif v.is_a?(Array)
- v.each do |value|
- if @rdf_provider.class?(k)
- new_node = add_class( k, node )
- recursiv_add_content( value, new_node )
- else
- add_object_property( k, value, node)
- end
- end
- elsif @rdf_provider.literal?(k)
- set_literal( k, v, node)
- elsif @rdf_provider.object_property?(k)
- add_object_property( k, v, node)
- else
- raise "illegal value k:"+k.to_s+" v:"+v.to_s
- end
- end
- end
-
- def add_class( property, node )
- raise "no object prop: "+property.to_s unless @rdf_provider.object_property?(property)
- raise "no class name: "+property.to_s unless @rdf_provider.class_name(property)
- # to avoid anonymous nodes, make up uris for sub-objects
- # use counter to make sure each uri is unique
- # for example we will get ../confusion_matrix_cell/1, ../confusion_matrix_cell/2, ...
- count = 1
- while (true)
- res = Redland::Resource.new( File.join(node.uri.to_s,property.to_s+"/"+count.to_s) )
- break if @model.subject(@rdf_provider.object_property_name(property), res).nil?
- count += 1
- end
- clazz = Redland::Resource.new(@rdf_provider.class_name(property))
- @model.add res, RDF['type'], clazz
- @model.add res, DC['title'], clazz
- @model.add clazz, RDF['type'], OWL['Class']
- @model.add DC['title'], RDF['type'],OWL['AnnotationProperty']
-
- objectProp = Redland::Resource.new(@rdf_provider.object_property_name(property))
- @model.add objectProp, RDF['type'], OWL['ObjectProperty']
- @model.add node, objectProp, res
- return res
- end
-
- def set_literal(property, value, node )
- raise "empty literal value "+property.to_s if value==nil || value.to_s.size==0
- raise "no literal name "+propety.to_s unless @rdf_provider.literal_name(property)
- begin
- l = @model.object(subject, @rdf_provider.literal_name(property))
- @model.delete node, @rdf_provider.literal_name(property), l
- rescue
- end
- literalProp = Redland::Resource.new(@rdf_provider.literal_name(property))
- @model.add literalProp, RDF['type'],OWL['AnnotationProperty']
- @model.add node, literalProp, Redland::Literal.create(value)
- end
-
- def add_object_property(property, value, node )
- raise "empty object property value "+property.to_s if value==nil || value.to_s.size==0
- raise "no object property name "+propety.to_s unless @rdf_provider.object_property_name(property)
- raise "no object type "+property.to_s unless @rdf_provider.object_type(property)
-
- objectProp = Redland::Resource.new(@rdf_provider.object_property_name(property))
- @model.add objectProp, RDF['type'], OWL['ObjectProperty']
-
- val = Redland::Resource.new(value)
- type = Redland::Resource.new(@rdf_provider.object_type(property))
- @model.add node, objectProp, val
- @model.add val, RDF['type'], type
- @model.add type, RDF['type'], OWL['Class']
- end
-
- end
-end
diff --git a/lib/test_util.rb b/lib/test_util.rb
index ecab76c..590d295 100644..100755
--- a/lib/test_util.rb
+++ b/lib/test_util.rb
@@ -10,11 +10,12 @@ module Lib
end
def self.wait_for_task(uri)
- if OpenTox::Utils.task_uri?(uri)
+ if uri.task_uri?
task = OpenTox::Task.find(uri)
task.wait_for_completion
- raise "task failed: "+uri.to_s+", error is:\n"+task.description if task.error?
- uri = task.resultURI
+ #raise "task failed: "+uri.to_s+", error is:\n"+task.description if task.error?
+ LOGGER.error "task failed :\n"+task.to_yaml if task.error?
+ uri = task.result_uri
end
return uri
end
diff --git a/lib/validation_db.rb b/lib/validation_db.rb
index 7afab90..0d5db21 100644..100755
--- a/lib/validation_db.rb
+++ b/lib/validation_db.rb
@@ -4,21 +4,10 @@
#end
require "lib/merge.rb"
-unless ActiveRecord::Base.connected?
- ActiveRecord::Base.establish_connection(
- :adapter => CONFIG[:database][:adapter],
- :host => CONFIG[:database][:host],
- :database => CONFIG[:database][:database],
- :username => CONFIG[:database][:username],
- :password => CONFIG[:database][:password]
- )
- ActiveRecord::Base.logger = Logger.new("/dev/null")
-end
-
module Lib
- VAL_PROPS_GENERAL = [ :validation_uri, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature,
- :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :created_at ]
+ VAL_PROPS_GENERAL = [ :validation_uri, :validation_type, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature,
+ :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :date ]
VAL_PROPS_SUM = [ :num_instances, :num_without_class, :num_unpredicted ]
VAL_PROPS_AVG = [:real_runtime, :percent_without_class, :percent_unpredicted ]
VAL_PROPS = VAL_PROPS_GENERAL + VAL_PROPS_SUM + VAL_PROPS_AVG
@@ -28,7 +17,8 @@ module Lib
# :classification_statistics
VAL_CLASS_PROPS_SINGLE_SUM = [ :num_correct, :num_incorrect, :confusion_matrix ]
- VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect, :weighted_area_under_roc ]
+ VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect,
+ :weighted_area_under_roc, :accuracy, :weighted_accuracy ]
VAL_CLASS_PROPS_SINGLE = VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_SINGLE_AVG
# :class_value_statistics
@@ -43,26 +33,146 @@ module Lib
:true_negative_rate, :true_positive_rate ] #:precision, :recall,
VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS
- VAL_CLASS_PROPS_EXTENDED = VAL_CLASS_PROPS + [:accuracy]
# :regression_statistics
VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square,
:target_variance_actual, :target_variance_predicted, :sum_squared_error, :sample_correlation_coefficient ]
CROSS_VAL_PROPS = [:dataset_uri, :num_folds, :stratified, :random_seed]
- CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :created_at] + CROSS_VAL_PROPS
+ CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :date] + CROSS_VAL_PROPS
- ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS_EXTENDED + VAL_REGR_PROPS + CROSS_VAL_PROPS
+ ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS + VAL_REGR_PROPS + CROSS_VAL_PROPS
VAL_MERGE_GENERAL = VAL_PROPS_GENERAL + VAL_CV_PROPS + [:classification_statistics, :regression_statistics] + CROSS_VAL_PROPS
VAL_MERGE_SUM = VAL_PROPS_SUM + VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_PER_CLASS_SUM
VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS
-
- class Validation < ActiveRecord::Base
- serialize :classification_statistics
- serialize :regression_statistics
+
+
+# class Validation < ActiveRecord::Base
+# serialize :classification_statistics
+# serialize :regression_statistics
+#
+# alias_attribute :date, :created_at
+
+ class Validation
+ include DataMapper::Resource
+
+ property :id, Serial
+ property :validation_type, String, :length => 512
+ property :model_uri, String, :length => 512
+ property :algorithm_uri, String, :length => 512
+ property :training_dataset_uri, String, :length => 512
+ property :test_target_dataset_uri, String, :length => 512
+ property :test_dataset_uri, String, :length => 512
+ property :prediction_dataset_uri, String, :length => 512
+ property :prediction_feature, String, :length => 512
+ property :created_at, DateTime
+ property :num_instances, Integer
+ property :num_without_class, Integer
+ property :num_unpredicted, Integer
+ property :crossvalidation_id, Integer
+ property :crossvalidation_fold, Integer
+ property :real_runtime, Float
+ property :percent_without_class, Float
+ property :percent_unpredicted, Float
+ property :classification_statistics, Object
+ property :regression_statistics, Object
+ property :finished, Boolean, :default => false
+
+ attr_accessor :subjectid
+
+ after :save, :check_policy
+ private
+ def check_policy
+ OpenTox::Authorization.check_policy(validation_uri, subjectid)
+ end
+
+ public
+ def date
+ created_at
+ end
+
+ def validation_uri
+ raise "no id" if self.id==nil
+ $url_provider.url_for("/"+self.id.to_s, :full)
+ end
+
+ def crossvalidation_uri
+ $url_provider.url_for("/crossvalidation/"+self.crossvalidation_id.to_s, :full) if self.crossvalidation_id
+ end
+
+ def self.classification_property?( property )
+ VAL_CLASS_PROPS.include?( property )
+ end
+
+ def self.depends_on_class_value?( property )
+ VAL_CLASS_PROPS_PER_CLASS.include?( property )
+ end
+
+ def self.complement_exists?( property )
+ VAL_CLASS_PROPS_PER_CLASS_COMPLEMENT_EXISTS.include?( property )
+ end
+
end
- class Crossvalidation < ActiveRecord::Base
+# class Crossvalidation < ActiveRecord::Base
+# alias_attribute :date, :created_at
+ class Crossvalidation
+ include DataMapper::Resource
+
+ property :id, Serial
+ property :algorithm_uri, String, :length => 512
+ property :dataset_uri, String, :length => 512
+ property :created_at, DateTime
+ property :num_folds, Integer, :default => 10
+ property :random_seed, Integer, :default => 1
+ property :finished, Boolean, :default => false
+ property :stratified, Boolean, :default => false
+
+ attr_accessor :subjectid
+
+ after :save, :check_policy
+ private
+ def check_policy
+ OpenTox::Authorization.check_policy(crossvalidation_uri, subjectid)
+ end
+
+ public
+ def date
+ created_at
+ end
+
+ def crossvalidation_uri
+ raise "no id" if self.id==nil
+ $url_provider.url_for("/crossvalidation/"+self.id.to_s, :full) if self.id
+ end
+
+ # convenience method to list all crossvalidations that are unique
+ # in terms of dataset_uri,num_folds,stratified,random_seed
+ # further conditions can be specified in __conditions__
+ def self.find_all_uniq(conditions={}, subjectid=nil )
+ #cvs = Lib::Crossvalidation.find(:all, :conditions => conditions)
+ cvs = Lib::Crossvalidation.all(:conditions => conditions)
+ uniq = []
+ cvs.each do |cv|
+ next if AA_SERVER and !OpenTox::Authorization.authorized?(cv.crossvalidation_uri,"GET",subjectid)
+ match = false
+ uniq.each do |cv2|
+ if cv.dataset_uri == cv2.dataset_uri and cv.num_folds == cv2.num_folds and
+ cv.stratified == cv2.stratified and cv.random_seed == cv2.random_seed
+ match = true
+ break
+ end
+ end
+ uniq << cv unless match
+ end
+ uniq
+ end
end
end
+
+
+Lib::Validation.auto_upgrade!
+Lib::Validation.raise_on_save_failure = true
+Lib::Crossvalidation.auto_upgrade!
+Lib::Crossvalidation.raise_on_save_failure = true