summaryrefslogtreecommitdiff
path: root/lib/ot_predictions.rb
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2011-01-13 15:18:45 +0100
committermguetlein <martin.guetlein@gmail.com>2011-01-13 15:18:45 +0100
commiteeb0b6df2dbdae29ccf3f4ccd923002e0ed65506 (patch)
treec0b0956d5b107e613c3236ede71462fa7f18cf05 /lib/ot_predictions.rb
parent5786ebd5095451e928d9811bd6bbd98af72de99d (diff)
adjust to new wrapper, replace activerecord with datamapper (remove activerecord code), is_classification to feature_type
Diffstat (limited to 'lib/ot_predictions.rb')
-rwxr-xr-x[-rw-r--r--]lib/ot_predictions.rb136
1 files changed, 94 insertions, 42 deletions
diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb
index 0175a0c..52683a6 100644..100755
--- a/lib/ot_predictions.rb
+++ b/lib/ot_predictions.rb
@@ -15,7 +15,7 @@ module Lib
return @compounds[instance_index]
end
- def initialize(is_classification, test_dataset_uri, test_target_dataset_uri,
+ def initialize(feature_type, test_dataset_uri, test_target_dataset_uri,
prediction_feature, prediction_dataset_uri, predicted_variable, task=nil)
LOGGER.debug("loading prediciton via test-dataset:'"+test_dataset_uri.to_s+
@@ -53,38 +53,47 @@ module Lib
raise "prediction_feature not found in test_target_dataset\n"+
"prediction_feature: '"+prediction_feature.to_s+"'\n"+
"test_target_dataset: '"+test_target_dataset_uri.to_s+"'\n"+
- "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.index(prediction_feature)==nil
+ "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.keys.index(prediction_feature)==nil
end
+ test_dataset.load_all
@compounds = test_dataset.compounds
LOGGER.debug "test dataset size: "+@compounds.size.to_s
raise "test dataset is empty" unless @compounds.size>0
- class_values = is_classification ? OpenTox::Feature.domain(prediction_feature) : nil
+ class_values = feature_type=="classification" ? OpenTox::Feature.new(prediction_feature).domain : nil
actual_values = []
@compounds.each do |c|
- value = test_target_dataset.get_value(c, prediction_feature)
-
- if is_classification
- value = value.to_s unless value==nil
- raise "illegal class_value of actual value "+value.to_s+" class: "+
- value.class.to_s unless value==nil or class_values.index(value)!=nil
- actual_values.push class_values.index(value)
- else
- begin
- value = value.to_f unless value==nil or value.is_a?(Numeric)
- rescue
- LOGGER.warn "no numeric value for regression: '"+value.to_s+"'"
- value = nil
- end
- actual_values.push value
+ case feature_type
+ when "classification"
+ actual_values << classification_value(test_target_dataset, c, prediction_feature, class_values)
+ when "regression"
+ actual_values << regression_value(test_target_dataset, c, prediction_feature)
end
end
task.progress(40) if task # loaded actual values
prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri
raise "prediction dataset not found: '"+prediction_dataset_uri.to_s+"'" unless prediction_dataset
- raise "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+prediction_dataset.features.inspect if prediction_dataset.features.index(predicted_variable)==nil
+
+ # TODO: remove LAZAR_PREDICTION_DATASET_HACK
+ no_prediction_feature = prediction_dataset.features.keys.index(predicted_variable)==nil
+ if no_prediction_feature
+ one_entry_per_compound = prediction_dataset.data_entries.keys.size == @compounds.size
+ @compounds.each do |c|
+ if prediction_dataset.data_entries[c].size != 1
+ one_entry_per_compound = false
+ break
+ end
+ end
+ msg = "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+
+ prediction_dataset.features.keys.inspect
+ if one_entry_per_compound
+ LOGGER.warn msg
+ else
+ raise msg
+ end
+ end
raise "more predicted than test compounds test:"+@compounds.size.to_s+" < prediction:"+
prediction_dataset.compounds.size.to_s if @compounds.size < prediction_dataset.compounds.size
@@ -102,38 +111,79 @@ module Lib
predicted_values << nil
confidence_values << nil
else
- if is_classification
- value = prediction_dataset.get_predicted_class(c, predicted_variable)
- value = value.to_s unless value==nil
- raise "illegal class_value of predicted value "+value.to_s+" class: "+value.class.to_s unless value==nil or class_values.index(value)!=nil
- predicted_values << class_values.index(value)
- else
- value = prediction_dataset.get_predicted_regression(c, predicted_variable)
- begin
- value = value.to_f unless value==nil or value.is_a?(Numeric)
- rescue
- LOGGER.warn "no numeric value for regression: '"+value.to_s+"'"
- value = nil
- end
- predicted_values << value
+ case feature_type
+ when "classification"
+ # TODO: remove LAZAR_PREDICTION_DATASET_HACK
+ predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, class_values)
+ when "regression"
+ predicted_values << regression_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable)
+ end
+ # TODO confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable)
+ conf = 1
+ begin
+ feature = prediction_dataset.data_entries[c].keys[0]
+ feature_data = prediction_dataset.features[feature]
+ conf = feature_data[OT.confidence] if feature_data[OT.confidence]!=nil
+ rescue
+ LOGGER.warn "could not get confidence"
end
- confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable)
+ confidence_values << conf
end
end
task.progress(80) if task # loaded predicted values and confidence
- super(predicted_values, actual_values, confidence_values, is_classification, class_values)
+ super(predicted_values, actual_values, confidence_values, feature_type, class_values)
raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size
task.progress(100) if task # done with the mathmatics
end
+ private
+ def regression_value(dataset, compound, feature)
+ v = value(dataset, compound, feature)
+ begin
+ v = v.to_f unless v==nil or v.is_a?(Numeric)
+ v
+ rescue
+ LOGGER.warn "no numeric value for regression: '"+v.to_s+"'"
+ nil
+ end
+ end
+
+ def classification_value(dataset, compound, feature, class_values)
+ v = value(dataset, compound, feature)
+ i = class_values.index(v)
+ raise "illegal class_value of predicted value "+v.to_s+" class: "+v.class.to_s unless v==nil or i!=nil
+ i
+ end
+
+ def value(dataset, compound, feature)
+
+ if feature==nil
+ v = dataset.data_entries[compound].values[0]
+ else
+ v = dataset.data_entries[compound][feature]
+ end
+ raise "no array" unless v.is_a?(Array)
+ if v.size>1
+ raise "multiple values"
+ elsif v.size==1
+ v = v[0]
+ else
+ v = nil
+ end
+ raise "array" if v.is_a?(Array)
+ v = nil if v.to_s.size==0
+ v
+ end
+ public
def compute_stats
res = {}
- if @is_classification
+ case @feature_type
+ when "classification"
(Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)}
- else
+ when "regression"
(Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) }
end
return res
@@ -152,16 +202,18 @@ module Lib
#PENDING!
begin
- a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+
- URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic
+ #a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+
+ # URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic
+ a << p.identifier(i)+"/image"
rescue => ex
+ raise ex
#a.push("Could not add pic: "+ex.message)
- a.push(p.identifier(i))
+ #a.push(p.identifier(i))
end
a << (format ? p.actual_value(i).to_nice_s : p.actual_value(i))
a << (format ? p.predicted_value(i).to_nice_s : p.predicted_value(i))
- if p.classification?
+ if p.feature_type=="classification"
if (p.predicted_value(i)!=nil and p.actual_value(i)!=nil)
a << (p.classification_miss?(i) ? 1 : 0)
else
@@ -180,7 +232,7 @@ module Lib
header << "compound" if add_pic
header << "actual value"
header << "predicted value"
- header << "missclassified" if predictions[0].classification?
+ header << "missclassified" if predictions[0].feature_type=="classification"
header << "confidence value" if predictions[0].confidence_values_available?
header << "compound-uri"
res.insert(0, header)