diff options
author | mguetlein <martin.guetlein@gmail.com> | 2011-01-13 15:18:45 +0100 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2011-01-13 15:18:45 +0100 |
commit | eeb0b6df2dbdae29ccf3f4ccd923002e0ed65506 (patch) | |
tree | c0b0956d5b107e613c3236ede71462fa7f18cf05 /lib/ot_predictions.rb | |
parent | 5786ebd5095451e928d9811bd6bbd98af72de99d (diff) |
adjust to new wrapper, replace activerecord with datamapper (remove activerecord code), is_classification to feature_type
Diffstat (limited to 'lib/ot_predictions.rb')
-rwxr-xr-x[-rw-r--r--] | lib/ot_predictions.rb | 136 |
1 files changed, 94 insertions, 42 deletions
diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index 0175a0c..52683a6 100644..100755 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -15,7 +15,7 @@ module Lib return @compounds[instance_index] end - def initialize(is_classification, test_dataset_uri, test_target_dataset_uri, + def initialize(feature_type, test_dataset_uri, test_target_dataset_uri, prediction_feature, prediction_dataset_uri, predicted_variable, task=nil) LOGGER.debug("loading prediciton via test-dataset:'"+test_dataset_uri.to_s+ @@ -53,38 +53,47 @@ module Lib raise "prediction_feature not found in test_target_dataset\n"+ "prediction_feature: '"+prediction_feature.to_s+"'\n"+ "test_target_dataset: '"+test_target_dataset_uri.to_s+"'\n"+ - "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.index(prediction_feature)==nil + "available features are: "+test_target_dataset.features.inspect if test_target_dataset.features.keys.index(prediction_feature)==nil end + test_dataset.load_all @compounds = test_dataset.compounds LOGGER.debug "test dataset size: "+@compounds.size.to_s raise "test dataset is empty" unless @compounds.size>0 - class_values = is_classification ? OpenTox::Feature.domain(prediction_feature) : nil + class_values = feature_type=="classification" ? OpenTox::Feature.new(prediction_feature).domain : nil actual_values = [] @compounds.each do |c| - value = test_target_dataset.get_value(c, prediction_feature) - - if is_classification - value = value.to_s unless value==nil - raise "illegal class_value of actual value "+value.to_s+" class: "+ - value.class.to_s unless value==nil or class_values.index(value)!=nil - actual_values.push class_values.index(value) - else - begin - value = value.to_f unless value==nil or value.is_a?(Numeric) - rescue - LOGGER.warn "no numeric value for regression: '"+value.to_s+"'" - value = nil - end - actual_values.push value + case feature_type + when "classification" + actual_values << classification_value(test_target_dataset, c, prediction_feature, class_values) + when "regression" + actual_values << regression_value(test_target_dataset, c, prediction_feature) end end task.progress(40) if task # loaded actual values prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri raise "prediction dataset not found: '"+prediction_dataset_uri.to_s+"'" unless prediction_dataset - raise "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+prediction_dataset.features.inspect if prediction_dataset.features.index(predicted_variable)==nil + + # TODO: remove LAZAR_PREDICTION_DATASET_HACK + no_prediction_feature = prediction_dataset.features.keys.index(predicted_variable)==nil + if no_prediction_feature + one_entry_per_compound = prediction_dataset.data_entries.keys.size == @compounds.size + @compounds.each do |c| + if prediction_dataset.data_entries[c].size != 1 + one_entry_per_compound = false + break + end + end + msg = "prediction-feature not found: '"+predicted_variable+"' in prediction-dataset: "+prediction_dataset_uri.to_s+", available features: "+ + prediction_dataset.features.keys.inspect + if one_entry_per_compound + LOGGER.warn msg + else + raise msg + end + end raise "more predicted than test compounds test:"+@compounds.size.to_s+" < prediction:"+ prediction_dataset.compounds.size.to_s if @compounds.size < prediction_dataset.compounds.size @@ -102,38 +111,79 @@ module Lib predicted_values << nil confidence_values << nil else - if is_classification - value = prediction_dataset.get_predicted_class(c, predicted_variable) - value = value.to_s unless value==nil - raise "illegal class_value of predicted value "+value.to_s+" class: "+value.class.to_s unless value==nil or class_values.index(value)!=nil - predicted_values << class_values.index(value) - else - value = prediction_dataset.get_predicted_regression(c, predicted_variable) - begin - value = value.to_f unless value==nil or value.is_a?(Numeric) - rescue - LOGGER.warn "no numeric value for regression: '"+value.to_s+"'" - value = nil - end - predicted_values << value + case feature_type + when "classification" + # TODO: remove LAZAR_PREDICTION_DATASET_HACK + predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, class_values) + when "regression" + predicted_values << regression_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable) + end + # TODO confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable) + conf = 1 + begin + feature = prediction_dataset.data_entries[c].keys[0] + feature_data = prediction_dataset.features[feature] + conf = feature_data[OT.confidence] if feature_data[OT.confidence]!=nil + rescue + LOGGER.warn "could not get confidence" end - confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable) + confidence_values << conf end end task.progress(80) if task # loaded predicted values and confidence - super(predicted_values, actual_values, confidence_values, is_classification, class_values) + super(predicted_values, actual_values, confidence_values, feature_type, class_values) raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size task.progress(100) if task # done with the mathmatics end + private + def regression_value(dataset, compound, feature) + v = value(dataset, compound, feature) + begin + v = v.to_f unless v==nil or v.is_a?(Numeric) + v + rescue + LOGGER.warn "no numeric value for regression: '"+v.to_s+"'" + nil + end + end + + def classification_value(dataset, compound, feature, class_values) + v = value(dataset, compound, feature) + i = class_values.index(v) + raise "illegal class_value of predicted value "+v.to_s+" class: "+v.class.to_s unless v==nil or i!=nil + i + end + + def value(dataset, compound, feature) + + if feature==nil + v = dataset.data_entries[compound].values[0] + else + v = dataset.data_entries[compound][feature] + end + raise "no array" unless v.is_a?(Array) + if v.size>1 + raise "multiple values" + elsif v.size==1 + v = v[0] + else + v = nil + end + raise "array" if v.is_a?(Array) + v = nil if v.to_s.size==0 + v + end + public def compute_stats res = {} - if @is_classification + case @feature_type + when "classification" (Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)} - else + when "regression" (Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) } end return res @@ -152,16 +202,18 @@ module Lib #PENDING! begin - a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+ - URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic + #a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+ + # URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic + a << p.identifier(i)+"/image" rescue => ex + raise ex #a.push("Could not add pic: "+ex.message) - a.push(p.identifier(i)) + #a.push(p.identifier(i)) end a << (format ? p.actual_value(i).to_nice_s : p.actual_value(i)) a << (format ? p.predicted_value(i).to_nice_s : p.predicted_value(i)) - if p.classification? + if p.feature_type=="classification" if (p.predicted_value(i)!=nil and p.actual_value(i)!=nil) a << (p.classification_miss?(i) ? 1 : 0) else @@ -180,7 +232,7 @@ module Lib header << "compound" if add_pic header << "actual value" header << "predicted value" - header << "missclassified" if predictions[0].classification? + header << "missclassified" if predictions[0].feature_type=="classification" header << "confidence value" if predictions[0].confidence_values_available? header << "compound-uri" res.insert(0, header) |