diff options
Diffstat (limited to 'lib/ot_predictions.rb')
-rwxr-xr-x | lib/ot_predictions.rb | 119 |
1 files changed, 93 insertions, 26 deletions
diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index e9da980..42dd572 100755 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -4,12 +4,15 @@ require "./lib/predictions.rb" module Lib class OTPredictions < Predictions - - def initialize(data, compounds=nil, training_values=nil) + + attr_reader :training_values, :prediction_feature_title + + def initialize(data, compounds=nil, training_values=nil, prediction_feature_title=nil) internal_server_error unless data.is_a?(Hash) super(data) @compounds = compounds @training_values = training_values + @prediction_feature_title = prediction_feature_title end def identifier(instance_index) @@ -20,10 +23,6 @@ module Lib @compounds[instance_index] end - def training_values() - @training_values - end - def compute_stats() res = {} case feature_type @@ -39,7 +38,7 @@ module Lib OTPredictions.to_array( [self] ) end - def self.to_array( predictions, add_pic=false, format=false, validation_uris=nil ) + def self.to_array( predictions, format=false, validation_uris=nil ) confidence_available = false training_data_available = false @@ -57,6 +56,7 @@ module Lib p.num_instances.times do |i| a = {} a["Compound"] = p.identifier(i)+"?media=image/png&size=150" + a["Compound URI"] = p.identifier(i) a["Training value"] = p.training_values[p.identifier(i)] if training_data_available a["Test value"] = p.actual_value(i) a["Predicted value"] = p.predicted_value(i) @@ -75,7 +75,6 @@ module Lib end a["Confidence value"] = p.confidence_value(i) if confidence_available a["Validation URI"] = v_uris[i] if validation_uris - a["Compound URI"] = p.identifier(i) idx = join_map["#{p.identifier(i)}#{v_uris ? v_uris[i] : ''}"] if (idx!=nil and format) # join equal compounds unless formatting is disabled @@ -91,7 +90,7 @@ module Lib end end - unless predictions[0].feature_type=="classification" + unless predictions.first.feature_type=="classification" # compute horziontal line step-width to make boxplots inter-comparable # step 1: compute max delta delta = 0 @@ -103,41 +102,50 @@ module Lib # e.g. delta > 100 -> stepwidth = 100, delta within [10-99.9] -> stepwidth = 10, delta within [1-9.99] -> stepwidth = 1 hline = 10**Math.log(delta,10).floor end - + + transformer = PredictionTransformer.new(res.collect{|r| r["Compound URI"]},predictions.first.prediction_feature_title) + res.size.times do |r| - unless predictions[0].feature_type=="classification" + # add boxplot + unless predictions.first.feature_type=="classification" # add boxplots including training, test and predicted values val_str = training_data_available ? "training=#{[res[r]["Training value"]].join(",")};" : "" val_str << "test=#{[res[r]["Test value"]].join(",")};predicted=#{[res[r]["Predicted value"]].join(",")}" - res[r]["Boxplot"] = File.join($validation[:uri],"/report/boxplot/#{val_str}?hline=#{hline}") + res[r]["Boxplot"] = File.join($validation[:uri],"/boxplot/#{val_str}?hline=#{hline}&size=150") end - # format values + # render missing values if format res[r]["Test value"] = "'missing'" unless res[r]["Test value"] - res[r]["Predicted value"] = (res[r]["Training value"] ? "'not-predicted'" : "'prediction-failed'") unless res[r]["Predicted value"] + res[r]["Predicted value"] = (res[r]["Training value"] ? "'in-training-data'" : "'outside-AD'") unless res[r]["Predicted value"] end + # handle arrays + # add transformed values ["Training value","Test value","Predicted value","Error","Confidence value","Validation URI"].each do |v| next unless res[r].has_key?(v) - if [res[r][v]].flatten.size==1 # if single value - res[r][v] = res[r][v].to_nice_s if format - else # else if multiple values (-> format is enabled) - unless predictions[0].feature_type=="classification" - # for regression, compute mean (to fix table sorting) - mean = res[r][v].inject(0.0) { |sum, el| sum + el } / res[r][v].size - res[r][v] = mean.to_nice_s + " (" + res[r][v].collect{|v| v.to_nice_s}.join(", ")+")" - else - res[r][v] = res[r][v].join(", ") - end + vals = [res[r][v]].flatten + do_transform = (transformer.do_transform? and ["Training value","Test value","Predicted value"].include?(v)) + if predictions.first.feature_type=="classification" or vals.any?{|x| !x.is_a?(Numeric)} + res[r][v] = vals.join(", ") + elsif vals.size==1 + res[r][v] = vals.first.to_nice_s + res[r][v] += "\n#{transformer.transform(vals.first,res[r]["Compound URI"])}" if do_transform + else # vals.size > 1 + mean = vals.inject(0.0) { |sum, el| sum + el } / vals.size + res[r][v] = "#{mean.to_nice_s} (mean)" + res[r][v] += "\n#{transformer.transform(mean,res[r]["Compound URI"])}" if do_transform + res[r][v] += "\n("+vals.collect{|v| v.to_nice_s}.join(", ") + res[r][v] += "\n#{transformer.transform(vals,res[r]["Compound URI"])}" if do_transform + res[r][v] += ")" end end end header = [] - header << "Compound" if add_pic + header << "Compound" if format header << "Training value" if training_data_available header << "Test value" header << "Predicted value" - if predictions[0].feature_type=="classification" + if predictions.first.feature_type=="classification" header << "Classification" else header << "Error" @@ -152,7 +160,66 @@ module Lib res.each do |a| array << header.collect{|h| a[h]} end + + if transformer.do_transform? + array[0].each_with_index do |v,i| + array[0][i] += "\n[#{transformer.unit}]" if ["Training value","Test value","Predicted value","Error"].include?(v) + end + end + array end end + + ########## HACK FOR LOEAL MODELS ############################## + + class PredictionTransformer + + def initialize(compounds, prediction_feature_title) + @prediction_feature_title = prediction_feature_title + case prediction_feature_title + when "LOAEL_log_mmol_kg_bw_day" + @mw = {} + OpenTox::Algorithm::Descriptor.physchem(compounds.collect{|c| OpenTox::Compound.new(c)},["Openbabel.mw"]).each do |uri,hash| + @mw[uri] = hash["Openbabel.mw"].to_f + end + end + end + + def do_transform? + case @prediction_feature_title + when /LOAEL_log_.mol_kg_bw_day/, "LOAEL_log_mg_kg_bw_day" + true + else + false + end + end + + def unit + case @prediction_feature_title + when /LOAEL_log_.mol_kg_bw_day/ + "-log mol/kg bw/day" + when "LOAEL_log_mg_kg_bw_day" + "log mg/kg bw/day" + else + nil + end + end + + def transform_single(val, c_uri) + case @prediction_feature_title + when /LOAEL_log_.mol_kg_bw_day/ + val = (10**(-1*val)) * (@mw[c_uri]*1000) + when "LOAEL_log_mg_kg_bw_day" + val = 10**val + else + nil + end + val ? (val*10).round/10.0 : nil + end + + def transform(val, c_uri) + "["+[val].flatten.collect{|v| transform_single(v,c_uri)}.join(", ")+" mg/kg bw/day]" + end + end end |