summaryrefslogtreecommitdiff
path: root/lib/ot_predictions.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/ot_predictions.rb')
-rwxr-xr-x[-rw-r--r--]lib/ot_predictions.rb206
1 files changed, 156 insertions, 50 deletions
diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb
index 63debc0..f812854 100644..100755
--- a/lib/ot_predictions.rb
+++ b/lib/ot_predictions.rb
@@ -15,22 +15,18 @@ module Lib
return @compounds[instance_index]
end
- def initialize(is_classification, test_dataset_uri, test_target_dataset_uri, prediction_feature, prediction_dataset_uri, predicted_variable)
+ def initialize(feature_type, test_dataset_uri, test_target_dataset_uri,
+ prediction_feature, prediction_dataset_uri, predicted_variable, subjectid=nil, task=nil)
- LOGGER.debug("loading prediciton via test-dateset:'"+test_dataset_uri.to_s+
+ LOGGER.debug("loading prediciton via test-dataset:'"+test_dataset_uri.to_s+
"', test-target-datset:'"+test_target_dataset_uri.to_s+
"', prediction-dataset:'"+prediction_dataset_uri.to_s+
"', prediction_feature: '"+prediction_feature.to_s+"' "+
"', predicted_variable: '"+predicted_variable.to_s+"'")
- if prediction_feature =~ /ambit.uni-plovdiv.bg.*feature.*264185/
- LOGGER.warn "HACK for report example"
- prediction_feature = "http://ambit.uni-plovdiv.bg:8080/ambit2/feature/264187"
- end
-
predicted_variable=prediction_feature if predicted_variable==nil
- test_dataset = OpenTox::Dataset.find test_dataset_uri
+ test_dataset = OpenTox::Dataset.find test_dataset_uri,subjectid
raise "test dataset not found: '"+test_dataset_uri.to_s+"'" unless test_dataset
raise "prediction_feature missing" unless prediction_feature
@@ -40,9 +36,9 @@ module Lib
raise "prediction_feature not found in test_dataset, specify a test_target_dataset\n"+
"prediction_feature: '"+prediction_feature.to_s+"'\n"+
"test_dataset: '"+test_target_dataset_uri.to_s+"'\n"+
- "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.index(prediction_feature)==nil
+ "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.keys.index(prediction_feature)==nil
else
- test_target_dataset = OpenTox::Dataset.find test_target_dataset_uri
+ test_target_dataset = OpenTox::Dataset.find test_target_dataset_uri,subjectid
raise "test target datset not found: '"+test_target_dataset_uri.to_s+"'" unless test_target_dataset
if CHECK_VALUES
test_dataset.compounds.each do |c|
@@ -52,38 +48,47 @@ module Lib
raise "prediction_feature not found in test_target_dataset\n"+
"prediction_feature: '"+prediction_feature.to_s+"'\n"+
"test_target_dataset: '"+test_target_dataset_uri.to_s+"'\n"+
- "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.index(prediction_feature)==nil
+ "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.keys.index(prediction_feature)==nil
end
@compounds = test_dataset.compounds
LOGGER.debug "test dataset size: "+@compounds.size.to_s
- raise "test dataset is empty" unless @compounds.size>0
- class_values = is_classification ? OpenTox::Feature.domain(prediction_feature) : nil
+ raise "test dataset is empty "+test_dataset_uri.to_s unless @compounds.size>0
+ class_values = feature_type=="classification" ? OpenTox::Feature.find(prediction_feature, subjectid).domain : nil
actual_values = []
@compounds.each do |c|
- value = test_target_dataset.get_value(c, prediction_feature)
-
- if is_classification
- value = value.to_s unless value==nil
- raise "illegal class_value of actual value "+value.to_s+" class: "+
- value.class.to_s unless value==nil or class_values.index(value)!=nil
- actual_values.push class_values.index(value)
- else
- begin
- value = value.to_f unless value==nil or value.is_a?(Numeric)
- rescue
- LOGGER.warn "no numeric value for regression: '"+value.to_s+"'"
- value = nil
- end
- actual_values.push value
+ case feature_type
+ when "classification"
+ actual_values << classification_value(test_target_dataset, c, prediction_feature, class_values)
+ when "regression"
+ actual_values << regression_value(test_target_dataset, c, prediction_feature)
end
end
+ task.progress(40) if task # loaded actual values
- prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri
+ prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri,subjectid
raise "prediction dataset not found: '"+prediction_dataset_uri.to_s+"'" unless prediction_dataset
- raise "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+prediction_dataset.features.inspect if prediction_dataset.features.index(predicted_variable)==nil
+ # TODO: remove LAZAR_PREDICTION_DATASET_HACK
+ no_prediction_feature = prediction_dataset.features.keys.index(predicted_variable)==nil
+ if no_prediction_feature
+ one_entry_per_compound = true
+ @compounds.each do |c|
+ if prediction_dataset.data_entries[c] and prediction_dataset.data_entries[c].size != 1
+ one_entry_per_compound = false
+ break
+ end
+ end
+ msg = "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+
+ prediction_dataset.features.keys.inspect
+ if one_entry_per_compound
+ LOGGER.warn msg
+ else
+ raise msg
+ end
+ end
+
raise "more predicted than test compounds test:"+@compounds.size.to_s+" < prediction:"+
prediction_dataset.compounds.size.to_s if @compounds.size < prediction_dataset.compounds.size
if CHECK_VALUES
@@ -100,41 +105,142 @@ module Lib
predicted_values << nil
confidence_values << nil
else
- if is_classification
- value = prediction_dataset.get_predicted_class(c, predicted_variable)
- value = value.to_s unless value==nil
- raise "illegal class_value of predicted value "+value.to_s+" class: "+value.class.to_s unless value==nil or class_values.index(value)!=nil
- predicted_values << class_values.index(value)
- confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable)
- else
- value = prediction_dataset.get_predicted_regression(c, predicted_variable)
- begin
- value = value.to_f unless value==nil or value.is_a?(Numeric)
- rescue
- LOGGER.warn "no numeric value for regression: '"+value.to_s+"'"
- value = nil
- end
- predicted_values << value
- confidence_values << nil
+ case feature_type
+ when "classification"
+ # TODO: remove LAZAR_PREDICTION_DATASET_HACK
+ predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, class_values)
+ when "regression"
+ predicted_values << regression_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable)
+ end
+ # TODO confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable)
+ conf = 1
+ begin
+ feature = prediction_dataset.data_entries[c].keys[0]
+ feature_data = prediction_dataset.features[feature]
+ conf = feature_data[OT.confidence] if feature_data[OT.confidence]!=nil
+ rescue
+ LOGGER.warn "could not get confidence"
end
+ confidence_values << conf
end
end
+ task.progress(80) if task # loaded predicted values and confidence
- super(predicted_values, actual_values, confidence_values, is_classification, class_values)
+ super(predicted_values, actual_values, confidence_values, feature_type, class_values)
raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size
+ task.progress(100) if task # done with the mathmatics
end
+ private
+ def regression_value(dataset, compound, feature)
+ v = value(dataset, compound, feature)
+ begin
+ v = v.to_f unless v==nil or v.is_a?(Numeric)
+ v
+ rescue
+ LOGGER.warn "no numeric value for regression: '"+v.to_s+"'"
+ nil
+ end
+ end
+
+ def classification_value(dataset, compound, feature, class_values)
+ v = value(dataset, compound, feature)
+ i = class_values.index(v)
+ raise "illegal class_value of prediction (value is '"+v.to_s+"', class is '"+v.class.to_s+"'), possible values are "+
+ class_values.inspect unless v==nil or i!=nil
+ i
+ end
+
+ def value(dataset, compound, feature)
+ return nil if dataset.data_entries[compound]==nil
+ if feature==nil
+ v = dataset.data_entries[compound].values[0]
+ else
+ v = dataset.data_entries[compound][feature]
+ end
+ return nil if v==nil
+ raise "no array "+v.class.to_s+" : '"+v.to_s+"'" unless v.is_a?(Array)
+ if v.size>1
+ v.uniq!
+ if v.size>1
+ v = nil
+ LOGGER.warn "not yet implemented: multiple non-equal values "+compound.to_s+" "+v.inspect
+ else
+ v = v[0]
+ end
+ elsif v.size==1
+ v = v[0]
+ else
+ v = nil
+ end
+ raise "array" if v.is_a?(Array)
+ v = nil if v.to_s.size==0
+ v
+ end
+ public
def compute_stats
res = {}
- if @is_classification
- (Lib::VAL_CLASS_PROPS_EXTENDED).each{ |s| res[s] = send(s)}
- else
+ case @feature_type
+ when "classification"
+ (Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)}
+ when "regression"
(Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) }
end
return res
end
+ def to_array()
+ OTPredictions.to_array( [self] )
+ end
+
+ def self.to_array( predictions, add_pic=false, format=false )
+
+ res = []
+ predictions.each do |p|
+ (0..p.num_instances-1).each do |i|
+ a = []
+
+ #PENDING!
+ begin
+ #a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+
+ # URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic
+ a << p.identifier(i)+"/image"
+ rescue => ex
+ raise ex
+ #a.push("Could not add pic: "+ex.message)
+ #a.push(p.identifier(i))
+ end
+
+ a << (format ? p.actual_value(i).to_nice_s : p.actual_value(i))
+ a << (format ? p.predicted_value(i).to_nice_s : p.predicted_value(i))
+ if p.feature_type=="classification"
+ if (p.predicted_value(i)!=nil and p.actual_value(i)!=nil)
+ a << (p.classification_miss?(i) ? 1 : 0)
+ else
+ a << nil
+ end
+ end
+ if p.confidence_values_available?
+ a << (format ? p.confidence_value(i).to_nice_s : p.confidence_value(i))
+ end
+ a << p.identifier(i)
+ res << a
+ end
+ end
+
+ header = []
+ header << "compound" if add_pic
+ header << "actual value"
+ header << "predicted value"
+ header << "missclassified" if predictions[0].feature_type=="classification"
+ header << "confidence value" if predictions[0].confidence_values_available?
+ header << "compound-uri"
+ res.insert(0, header)
+
+ return res
+ end
+
end
end