diff options
Diffstat (limited to 'lib/ot_predictions.rb')
-rwxr-xr-x[-rw-r--r--] | lib/ot_predictions.rb | 206 |
1 files changed, 156 insertions, 50 deletions
diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index 63debc0..f812854 100644..100755 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -15,22 +15,18 @@ module Lib return @compounds[instance_index] end - def initialize(is_classification, test_dataset_uri, test_target_dataset_uri, prediction_feature, prediction_dataset_uri, predicted_variable) + def initialize(feature_type, test_dataset_uri, test_target_dataset_uri, + prediction_feature, prediction_dataset_uri, predicted_variable, subjectid=nil, task=nil) - LOGGER.debug("loading prediciton via test-dateset:'"+test_dataset_uri.to_s+ + LOGGER.debug("loading prediciton via test-dataset:'"+test_dataset_uri.to_s+ "', test-target-datset:'"+test_target_dataset_uri.to_s+ "', prediction-dataset:'"+prediction_dataset_uri.to_s+ "', prediction_feature: '"+prediction_feature.to_s+"' "+ "', predicted_variable: '"+predicted_variable.to_s+"'") - if prediction_feature =~ /ambit.uni-plovdiv.bg.*feature.*264185/ - LOGGER.warn "HACK for report example" - prediction_feature = "http://ambit.uni-plovdiv.bg:8080/ambit2/feature/264187" - end - predicted_variable=prediction_feature if predicted_variable==nil - test_dataset = OpenTox::Dataset.find test_dataset_uri + test_dataset = OpenTox::Dataset.find test_dataset_uri,subjectid raise "test dataset not found: '"+test_dataset_uri.to_s+"'" unless test_dataset raise "prediction_feature missing" unless prediction_feature @@ -40,9 +36,9 @@ module Lib raise "prediction_feature not found in test_dataset, specify a test_target_dataset\n"+ "prediction_feature: '"+prediction_feature.to_s+"'\n"+ "test_dataset: '"+test_target_dataset_uri.to_s+"'\n"+ - "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.index(prediction_feature)==nil + "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.keys.index(prediction_feature)==nil else - test_target_dataset = OpenTox::Dataset.find test_target_dataset_uri + test_target_dataset = OpenTox::Dataset.find test_target_dataset_uri,subjectid raise "test target datset not found: '"+test_target_dataset_uri.to_s+"'" unless test_target_dataset if CHECK_VALUES test_dataset.compounds.each do |c| @@ -52,38 +48,47 @@ module Lib raise "prediction_feature not found in test_target_dataset\n"+ "prediction_feature: '"+prediction_feature.to_s+"'\n"+ "test_target_dataset: '"+test_target_dataset_uri.to_s+"'\n"+ - "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.index(prediction_feature)==nil + "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.keys.index(prediction_feature)==nil end @compounds = test_dataset.compounds LOGGER.debug "test dataset size: "+@compounds.size.to_s - raise "test dataset is empty" unless @compounds.size>0 - class_values = is_classification ? OpenTox::Feature.domain(prediction_feature) : nil + raise "test dataset is empty "+test_dataset_uri.to_s unless @compounds.size>0 + class_values = feature_type=="classification" ? OpenTox::Feature.find(prediction_feature, subjectid).domain : nil actual_values = [] @compounds.each do |c| - value = test_target_dataset.get_value(c, prediction_feature) - - if is_classification - value = value.to_s unless value==nil - raise "illegal class_value of actual value "+value.to_s+" class: "+ - value.class.to_s unless value==nil or class_values.index(value)!=nil - actual_values.push class_values.index(value) - else - begin - value = value.to_f unless value==nil or value.is_a?(Numeric) - rescue - LOGGER.warn "no numeric value for regression: '"+value.to_s+"'" - value = nil - end - actual_values.push value + case feature_type + when "classification" + actual_values << classification_value(test_target_dataset, c, prediction_feature, class_values) + when "regression" + actual_values << regression_value(test_target_dataset, c, prediction_feature) end end + task.progress(40) if task # loaded actual values - prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri + prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri,subjectid raise "prediction dataset not found: '"+prediction_dataset_uri.to_s+"'" unless prediction_dataset - raise "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+prediction_dataset.features.inspect if prediction_dataset.features.index(predicted_variable)==nil + # TODO: remove LAZAR_PREDICTION_DATASET_HACK + no_prediction_feature = prediction_dataset.features.keys.index(predicted_variable)==nil + if no_prediction_feature + one_entry_per_compound = true + @compounds.each do |c| + if prediction_dataset.data_entries[c] and prediction_dataset.data_entries[c].size != 1 + one_entry_per_compound = false + break + end + end + msg = "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+ + prediction_dataset.features.keys.inspect + if one_entry_per_compound + LOGGER.warn msg + else + raise msg + end + end + raise "more predicted than test compounds test:"+@compounds.size.to_s+" < prediction:"+ prediction_dataset.compounds.size.to_s if @compounds.size < prediction_dataset.compounds.size if CHECK_VALUES @@ -100,41 +105,142 @@ module Lib predicted_values << nil confidence_values << nil else - if is_classification - value = prediction_dataset.get_predicted_class(c, predicted_variable) - value = value.to_s unless value==nil - raise "illegal class_value of predicted value "+value.to_s+" class: "+value.class.to_s unless value==nil or class_values.index(value)!=nil - predicted_values << class_values.index(value) - confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable) - else - value = prediction_dataset.get_predicted_regression(c, predicted_variable) - begin - value = value.to_f unless value==nil or value.is_a?(Numeric) - rescue - LOGGER.warn "no numeric value for regression: '"+value.to_s+"'" - value = nil - end - predicted_values << value - confidence_values << nil + case feature_type + when "classification" + # TODO: remove LAZAR_PREDICTION_DATASET_HACK + predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, class_values) + when "regression" + predicted_values << regression_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable) + end + # TODO confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable) + conf = 1 + begin + feature = prediction_dataset.data_entries[c].keys[0] + feature_data = prediction_dataset.features[feature] + conf = feature_data[OT.confidence] if feature_data[OT.confidence]!=nil + rescue + LOGGER.warn "could not get confidence" end + confidence_values << conf end end + task.progress(80) if task # loaded predicted values and confidence - super(predicted_values, actual_values, confidence_values, is_classification, class_values) + super(predicted_values, actual_values, confidence_values, feature_type, class_values) raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size + task.progress(100) if task # done with the mathmatics end + private + def regression_value(dataset, compound, feature) + v = value(dataset, compound, feature) + begin + v = v.to_f unless v==nil or v.is_a?(Numeric) + v + rescue + LOGGER.warn "no numeric value for regression: '"+v.to_s+"'" + nil + end + end + + def classification_value(dataset, compound, feature, class_values) + v = value(dataset, compound, feature) + i = class_values.index(v) + raise "illegal class_value of prediction (value is '"+v.to_s+"', class is '"+v.class.to_s+"'), possible values are "+ + class_values.inspect unless v==nil or i!=nil + i + end + + def value(dataset, compound, feature) + return nil if dataset.data_entries[compound]==nil + if feature==nil + v = dataset.data_entries[compound].values[0] + else + v = dataset.data_entries[compound][feature] + end + return nil if v==nil + raise "no array "+v.class.to_s+" : '"+v.to_s+"'" unless v.is_a?(Array) + if v.size>1 + v.uniq! + if v.size>1 + v = nil + LOGGER.warn "not yet implemented: multiple non-equal values "+compound.to_s+" "+v.inspect + else + v = v[0] + end + elsif v.size==1 + v = v[0] + else + v = nil + end + raise "array" if v.is_a?(Array) + v = nil if v.to_s.size==0 + v + end + public def compute_stats res = {} - if @is_classification - (Lib::VAL_CLASS_PROPS_EXTENDED).each{ |s| res[s] = send(s)} - else + case @feature_type + when "classification" + (Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)} + when "regression" (Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) } end return res end + def to_array() + OTPredictions.to_array( [self] ) + end + + def self.to_array( predictions, add_pic=false, format=false ) + + res = [] + predictions.each do |p| + (0..p.num_instances-1).each do |i| + a = [] + + #PENDING! + begin + #a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+ + # URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic + a << p.identifier(i)+"/image" + rescue => ex + raise ex + #a.push("Could not add pic: "+ex.message) + #a.push(p.identifier(i)) + end + + a << (format ? p.actual_value(i).to_nice_s : p.actual_value(i)) + a << (format ? p.predicted_value(i).to_nice_s : p.predicted_value(i)) + if p.feature_type=="classification" + if (p.predicted_value(i)!=nil and p.actual_value(i)!=nil) + a << (p.classification_miss?(i) ? 1 : 0) + else + a << nil + end + end + if p.confidence_values_available? + a << (format ? p.confidence_value(i).to_nice_s : p.confidence_value(i)) + end + a << p.identifier(i) + res << a + end + end + + header = [] + header << "compound" if add_pic + header << "actual value" + header << "predicted value" + header << "missclassified" if predictions[0].feature_type=="classification" + header << "confidence value" if predictions[0].confidence_values_available? + header << "compound-uri" + res.insert(0, header) + + return res + end + end end |