diff options
author | mguetlein <martin.guetlein@gmail.com> | 2014-10-30 15:44:31 +0100 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2014-10-30 15:44:31 +0100 |
commit | c528b931f0b3718509d162ca73982e8367a62069 (patch) | |
tree | 7eeb8915f9b5f2143426a6f60f383cebf21ef239 | |
parent | e9315c965d8828d9c7566b472e236bfe71e77294 (diff) |
move plotting
-rwxr-xr-x | application.rb | 1 | ||||
-rwxr-xr-x | lib/ot_predictions.rb | 119 | ||||
-rw-r--r-- | lib/prediction_data.rb | 2 | ||||
-rw-r--r-- | report/plot_application.rb | 105 | ||||
-rwxr-xr-x | report/report_application.rb | 45 | ||||
-rwxr-xr-x | report/report_content.rb | 4 | ||||
-rw-r--r-- | report/report_format.rb | 8 | ||||
-rwxr-xr-x | report/validation_access.rb | 4 | ||||
-rwxr-xr-x | report/xml_report.rb | 59 |
9 files changed, 247 insertions, 100 deletions
diff --git a/application.rb b/application.rb index 2cc433c..0bb75b5 100755 --- a/application.rb +++ b/application.rb @@ -4,6 +4,7 @@ require 'opentox-server' require "./test/test_application.rb" require "./report/report_application.rb" +require "./report/plot_application.rb" require "./validation/validation_application.rb" diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index e9da980..42dd572 100755 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -4,12 +4,15 @@ require "./lib/predictions.rb" module Lib class OTPredictions < Predictions - - def initialize(data, compounds=nil, training_values=nil) + + attr_reader :training_values, :prediction_feature_title + + def initialize(data, compounds=nil, training_values=nil, prediction_feature_title=nil) internal_server_error unless data.is_a?(Hash) super(data) @compounds = compounds @training_values = training_values + @prediction_feature_title = prediction_feature_title end def identifier(instance_index) @@ -20,10 +23,6 @@ module Lib @compounds[instance_index] end - def training_values() - @training_values - end - def compute_stats() res = {} case feature_type @@ -39,7 +38,7 @@ module Lib OTPredictions.to_array( [self] ) end - def self.to_array( predictions, add_pic=false, format=false, validation_uris=nil ) + def self.to_array( predictions, format=false, validation_uris=nil ) confidence_available = false training_data_available = false @@ -57,6 +56,7 @@ module Lib p.num_instances.times do |i| a = {} a["Compound"] = p.identifier(i)+"?media=image/png&size=150" + a["Compound URI"] = p.identifier(i) a["Training value"] = p.training_values[p.identifier(i)] if training_data_available a["Test value"] = p.actual_value(i) a["Predicted value"] = p.predicted_value(i) @@ -75,7 +75,6 @@ module Lib end a["Confidence value"] = p.confidence_value(i) if confidence_available a["Validation URI"] = v_uris[i] if validation_uris - a["Compound URI"] = p.identifier(i) idx = join_map["#{p.identifier(i)}#{v_uris ? v_uris[i] : ''}"] if (idx!=nil and format) # join equal compounds unless formatting is disabled @@ -91,7 +90,7 @@ module Lib end end - unless predictions[0].feature_type=="classification" + unless predictions.first.feature_type=="classification" # compute horziontal line step-width to make boxplots inter-comparable # step 1: compute max delta delta = 0 @@ -103,41 +102,50 @@ module Lib # e.g. delta > 100 -> stepwidth = 100, delta within [10-99.9] -> stepwidth = 10, delta within [1-9.99] -> stepwidth = 1 hline = 10**Math.log(delta,10).floor end - + + transformer = PredictionTransformer.new(res.collect{|r| r["Compound URI"]},predictions.first.prediction_feature_title) + res.size.times do |r| - unless predictions[0].feature_type=="classification" + # add boxplot + unless predictions.first.feature_type=="classification" # add boxplots including training, test and predicted values val_str = training_data_available ? "training=#{[res[r]["Training value"]].join(",")};" : "" val_str << "test=#{[res[r]["Test value"]].join(",")};predicted=#{[res[r]["Predicted value"]].join(",")}" - res[r]["Boxplot"] = File.join($validation[:uri],"/report/boxplot/#{val_str}?hline=#{hline}") + res[r]["Boxplot"] = File.join($validation[:uri],"/boxplot/#{val_str}?hline=#{hline}&size=150") end - # format values + # render missing values if format res[r]["Test value"] = "'missing'" unless res[r]["Test value"] - res[r]["Predicted value"] = (res[r]["Training value"] ? "'not-predicted'" : "'prediction-failed'") unless res[r]["Predicted value"] + res[r]["Predicted value"] = (res[r]["Training value"] ? "'in-training-data'" : "'outside-AD'") unless res[r]["Predicted value"] end + # handle arrays + # add transformed values ["Training value","Test value","Predicted value","Error","Confidence value","Validation URI"].each do |v| next unless res[r].has_key?(v) - if [res[r][v]].flatten.size==1 # if single value - res[r][v] = res[r][v].to_nice_s if format - else # else if multiple values (-> format is enabled) - unless predictions[0].feature_type=="classification" - # for regression, compute mean (to fix table sorting) - mean = res[r][v].inject(0.0) { |sum, el| sum + el } / res[r][v].size - res[r][v] = mean.to_nice_s + " (" + res[r][v].collect{|v| v.to_nice_s}.join(", ")+")" - else - res[r][v] = res[r][v].join(", ") - end + vals = [res[r][v]].flatten + do_transform = (transformer.do_transform? and ["Training value","Test value","Predicted value"].include?(v)) + if predictions.first.feature_type=="classification" or vals.any?{|x| !x.is_a?(Numeric)} + res[r][v] = vals.join(", ") + elsif vals.size==1 + res[r][v] = vals.first.to_nice_s + res[r][v] += "\n#{transformer.transform(vals.first,res[r]["Compound URI"])}" if do_transform + else # vals.size > 1 + mean = vals.inject(0.0) { |sum, el| sum + el } / vals.size + res[r][v] = "#{mean.to_nice_s} (mean)" + res[r][v] += "\n#{transformer.transform(mean,res[r]["Compound URI"])}" if do_transform + res[r][v] += "\n("+vals.collect{|v| v.to_nice_s}.join(", ") + res[r][v] += "\n#{transformer.transform(vals,res[r]["Compound URI"])}" if do_transform + res[r][v] += ")" end end end header = [] - header << "Compound" if add_pic + header << "Compound" if format header << "Training value" if training_data_available header << "Test value" header << "Predicted value" - if predictions[0].feature_type=="classification" + if predictions.first.feature_type=="classification" header << "Classification" else header << "Error" @@ -152,7 +160,66 @@ module Lib res.each do |a| array << header.collect{|h| a[h]} end + + if transformer.do_transform? + array[0].each_with_index do |v,i| + array[0][i] += "\n[#{transformer.unit}]" if ["Training value","Test value","Predicted value","Error"].include?(v) + end + end + array end end + + ########## HACK FOR LOEAL MODELS ############################## + + class PredictionTransformer + + def initialize(compounds, prediction_feature_title) + @prediction_feature_title = prediction_feature_title + case prediction_feature_title + when "LOAEL_log_mmol_kg_bw_day" + @mw = {} + OpenTox::Algorithm::Descriptor.physchem(compounds.collect{|c| OpenTox::Compound.new(c)},["Openbabel.mw"]).each do |uri,hash| + @mw[uri] = hash["Openbabel.mw"].to_f + end + end + end + + def do_transform? + case @prediction_feature_title + when /LOAEL_log_.mol_kg_bw_day/, "LOAEL_log_mg_kg_bw_day" + true + else + false + end + end + + def unit + case @prediction_feature_title + when /LOAEL_log_.mol_kg_bw_day/ + "-log mol/kg bw/day" + when "LOAEL_log_mg_kg_bw_day" + "log mg/kg bw/day" + else + nil + end + end + + def transform_single(val, c_uri) + case @prediction_feature_title + when /LOAEL_log_.mol_kg_bw_day/ + val = (10**(-1*val)) * (@mw[c_uri]*1000) + when "LOAEL_log_mg_kg_bw_day" + val = 10**val + else + nil + end + val ? (val*10).round/10.0 : nil + end + + def transform(val, c_uri) + "["+[val].flatten.collect{|v| transform_single(v,c_uri)}.join(", ")+" mg/kg bw/day]" + end + end end diff --git a/lib/prediction_data.rb b/lib/prediction_data.rb index a6e79c6..58c5570 100644 --- a/lib/prediction_data.rb +++ b/lib/prediction_data.rb @@ -9,7 +9,7 @@ module Lib def self.filter_data( data, compounds, min_confidence, min_num_predictions, max_num_predictions, prediction_index=nil ) internal_server_error "cannot filter anything, no confidence values available" if data[:confidence_values][0]==nil - bad_request_error "please specify either min_confidence or max_num_predictions" if + bad_request_error "please specify either min_confidence XOR max_num_predictions" if (min_confidence!=nil and max_num_predictions!=nil) || (min_confidence==nil and max_num_predictions==nil) bad_request_error "min_num_predictions only valid for min_confidence" if (min_confidence==nil and min_num_predictions!=nil) diff --git a/report/plot_application.rb b/report/plot_application.rb new file mode 100644 index 0000000..c3c4d4f --- /dev/null +++ b/report/plot_application.rb @@ -0,0 +1,105 @@ +require 'digest/md5' + +class Validation::Application < OpenTox::Application + + helpers do + def eval_r(r, cmd) + $logger.debug cmd + r.eval cmd + end + end + + # produces a boxplot + # params should be given in URI, e.g. : ..boxplot/test_values=5.8,5.6,5.3;predicted=5.9 + # ; separates series (or categories) + # = seperates key and values for each series + # , seperates values for each series + # 'hline=<float>' can be given as optional param (with '?') to draw horizontal lines with the given stepwidth + # (default: no horizontal lines, lines allow to compare different plots) + get '/validation/boxplot/:vals' do + + filename = "#{Digest::MD5.hexdigest(params[:vals].inspect+params[:hline].inspect+params[:size].inspect)}.png" + unless (File.exists?("/tmp/#{filename}")) + # retrieve values + vals = {} + params[:vals].split(";").collect do |x| + y = x.split("=") + vals[y[0]] = (y[1] ? y[1].split(",").collect{|z| z.to_f} : nil) + end + names = "c(\""+vals.keys.join("\",\"")+"\")" + values = vals.values.collect{|a| "c("+(a ? a.join(",") : "")+")"}.join(",") + + # the min range is set to hline*2 to draw at least two horizontal lines + hline = params[:hline] ? params[:hline].to_f : nil + unless hline + ylim = "" + else + min = vals.values.flatten.compact.min + max = vals.values.flatten.compact.max + if (max-min<(hline*2)) + to_add = (hline*2)-(max-min) + min -= to_add/2.0 + max += to_add/2.0 + end + range = "c(#{min},#{max})" + ylim = ",ylim=#{range}" + end + + # return "boxplot(#{values},col=c('red','blue','green'),names=#{names}#{ylim})" + @r = RinRuby.new(true,false) + size = (params[:size] ? params[:size].to_i : 300) + eval_r(@r,"png(\"/tmp/#{filename}\",width=#{2*size},height=#{size})") + eval_r(@r,"par(mai=c(0.5,0.5,0.2,0.2))") + eval_r(@r,"boxplot(#{values},col=c('red','blue','green'),names=#{names}#{ylim})") + if hline + # seq defines were to draw hline + # example: min -9.5, max 10.5, hline = 2 -> seq(-10,12,by=2) -> produces lines from -10 to 12 with step-width 2 + eval_r(@r,"abline(h=seq(floor(#{min}/#{hline})*#{hline}, round(#{max}/#{hline})*#{hline}, by=#{hline}),lty=2,col='dimgrey')") + end + eval_r(@r,'dev.off()') + @r.quit + end + send_file("/tmp/#{filename}",:filename=>"#{params[:vals]}.png",:type=>'image/png',:disposition => 'inline') + end + + get '/validation/binplot/:vals' do + + filename = "#{Digest::MD5.hexdigest(params[:vals].inspect+params[:size].inspect)}.png" + unless (File.exists?("/tmp/#{filename}")) and false + + @r = RinRuby.new(true,false) + size = (params[:size] ? params[:size].to_i : 300) + + # each bin is given as x1,x2,y + xvals = [] + yvals = [] + params[:vals].split(";").each do |bin| + x1,x2,y = bin.split(",") + if xvals.size==0 # first add point at y=0 to add a vertrical line from 0 to x1 + xvals << x1.to_f + yvals << 0 + end + # for each bin, add an additional point at y=0 to draw a vertrical line to 0 + xvals += [x1.to_f, x2.to_f, x2.to_f] + yvals += [y.to_f, y.to_f, 0] + end + # add first point again to draw horizontal line at y=0 + xvals << xvals[0] + yvals << yvals[0] + + eval_r(@r,"x <- c(#{xvals.join(",")})") + eval_r(@r,"y <- c(#{yvals.join(",")})") + eval_r(@r,"png(\"/tmp/#{filename}\",width=#{2*size},height=#{size})") +# eval_r(@r,"par(mai=c(0.5,0.5,0.2,0.2))") +# eval_r(@r,"par(mfrow=c(2,4))") + eval_r(@r,"plot(x,y,type='n', xlim=rev(range(x)), ylim=c(0,max(y)), xlab='#{params[:xlab]}', ylab='#{params[:ylab]}')")# type='n', +# eval_r(@r,"par(pch=22, col='red')") + eval_r(@r,"lines(x,y, type='l', col='red')") + eval_r(@r,"title(main='#{params[:title]}')") + eval_r(@r,'dev.off()') + @r.quit + end + send_file("/tmp/#{filename}",:filename=>"#{params[:vals]}.png",:type=>'image/png',:disposition => 'inline') + end + +end diff --git a/report/report_application.rb b/report/report_application.rb index 2bddf97..4c2fe29 100755 --- a/report/report_application.rb +++ b/report/report_application.rb @@ -35,51 +35,6 @@ class Validation::Application < OpenTox::Application end end - # produces a boxplot - # params should be given in URI, e.g. : ..boxplot/test_values=5.8,5.6,5.3;predicted=5.9 - # ; separates series (or categories) - # = seperates key and values for each series - # , seperates values for each series - # 'hline=<float>' can be given as optional param (with '?') to specifiy the stepwidth of horizontal lines - # (default is 1.0, horizontal lines allow to compare different plots) - get '/validation/report/boxplot/:vals' do - - filename = "#{Base64.encode64(params[:vals].inspect+params[:hline].inspect)}.png" - unless (File.exists?("/tmp/#{filename}")) - # retrieve values - vals = {} - params[:vals].split(";").collect do |x| - y = x.split("=") - vals[y[0]] = (y[1] ? y[1].split(",").collect{|z| z.to_f} : nil) - end - names = "c(\""+vals.keys.join("\",\"")+"\")" - values = vals.values.collect{|a| "c("+(a ? a.join(",") : "")+")"}.join(",") - - # the min range is set to hline*2 to draw at least two horizontal lines - hline = params[:hline] ? params[:hline].to_f : 1.0 - min = vals.values.flatten.compact.min - max = vals.values.flatten.compact.max - if (max-min<(hline*2)) - to_add = (hline*2)-(max-min) - min -= to_add/2.0 - max += to_add/2.0 - end - range = "c(#{min},#{max})" - - # return "boxplot(#{values},col=c('red','blue','green'),names=#{names},ylim=#{range})" - @r = RinRuby.new(true,false) - @r.eval "png(\"/tmp/#{filename}\",width=300,height=150)" - @r.eval "par(mai=c(0.5,0.5,0.2,0.2))" - @r.eval "boxplot(#{values},col=c('red','blue','green'),names=#{names},ylim=#{range})" - # seq defines were to draw hline - # example: min -9.5, max 10.5, hline = 2 -> seq(-10,12,by=2) -> produces lines from -10 to 12 with step-width 2 - @r.eval "abline(h=seq(floor(#{min}/#{hline})*#{hline}, round(#{max}/#{hline})*#{hline}, by=#{hline}),lty=2,col='dimgrey')" - @r.eval 'dev.off()' - @r.quit - end - send_file("/tmp/#{filename}",:filename=>"#{params[:vals]}.png",:type=>'image/png',:disposition => 'inline') - end - get '/validation/report/?' do perform do |rs| case request.env['HTTP_ACCEPT'].to_s diff --git a/report/report_content.rb b/report/report_content.rb index eac44a3..98d8cbb 100755 --- a/report/report_content.rb +++ b/report/report_content.rb @@ -73,7 +73,7 @@ class Reports::ReportContent @xml_report.add_paragraph(section_table, section_text) if section_text v_uris = validation_set.validations.collect{|v| Array.new(v.num_instances.to_i,v.validation_uri)} if add_validation_uris @xml_report.add_table(section_table, table_title, Lib::OTPredictions.to_array(validation_set.validations.collect{|v| v.get_predictions}, - true, true, v_uris)) + true, v_uris)) else @xml_report.add_paragraph(section_table, "No prediction info available.") end @@ -412,4 +412,4 @@ class Reports::ReportContent @tmp_files.delete(tmp_file_name) if @tmp_files.has_key?(tmp_file_name) end -end
\ No newline at end of file +end diff --git a/report/report_format.rb b/report/report_format.rb index 8d9d55e..68781c6 100644 --- a/report/report_format.rb +++ b/report/report_format.rb @@ -85,9 +85,11 @@ module Reports::ReportFormat # HACK to add java script to html file (modifying the xsl would probably be the clean correct solution) html_file = File.join(directory,html_filename.to_s) - content = File.read(html_file, :encoding=>"ISO-8859-1").gsub(/\<\//, "\n</") - content = content.gsub(/\<body\s/,"\n#{JAVASCRIPT}\n<body ") - content = content.gsub(/table summary="Predic/,"table class=\"tablesorter\" summary=\"Predic") + content = File.read(html_file, :encoding=>"ISO-8859-1") + content.gsub!(/\<\//, "\n</") # for better readability of the html source + content.gsub!(/<br>/,"<br>") # there is no straightforward way to add line breaks in docbook + content.gsub!(/\<body\s/,"\n#{JAVASCRIPT}\n<body ") + content.gsub!(/table summary="Predic/,"table class=\"tablesorter\" summary=\"Predic") File.open(html_file, 'wb', :encoding=>"ISO-8859-1") { |file| file.write(content) } end diff --git a/report/validation_access.rb b/report/validation_access.rb index 6248ff2..c3a5121 100755 --- a/report/validation_access.rb +++ b/report/validation_access.rb @@ -201,11 +201,11 @@ class Reports::ValidationDB if validation.training_dataset_uri d = Lib::DatasetCache.find(validation.training_dataset_uri) data.compounds.each do |c| - training_values[c] = (d.compound_indices(c) ? d.compound_indices(c).collect{|idx| d.data_entry_value(idx,validation.prediction_feature)} : nil) + training_values[c] = d.compound_indices(c).collect{|idx| d.data_entry_value(idx,validation.prediction_feature)} if d.compound_indices(c) end end task.progress(90) if task - Lib::OTPredictions.new( data.data, data.compounds, training_values ) + Lib::OTPredictions.new( data.data, data.compounds, training_values, OpenTox::Feature.new(validation.prediction_feature).title ) end def get_accept_values( validation ) diff --git a/report/xml_report.rb b/report/xml_report.rb index 2fbbbbe..feb49f8 100755 --- a/report/xml_report.rb +++ b/report/xml_report.rb @@ -165,17 +165,29 @@ module Reports element << table table end - - def add_image( element, url ) #, scale=false ) - image = Element.new("imageobject") - params = {"fileref" => url, "format"=>"PNG"} + + def self.image( image_url, link_url=nil ) + img = Element.new("imageobject") + params = {"fileref" => image_url, "format"=>"PNG"} #params["contentwidth"] = "2in" imagedata = Reports::XMLReportUtil.attribute_element("imagedata",params) - image << imagedata - element << image - return image + img << imagedata + unless link_url + img + else + ulink = Element.new("ulink") + ulink.add_attributes({"url" => link_url }) + ulink << img + ulink + end end + def add_image( element, image_url, link_url=nil ) + img = XMLReport.image( image_url, link_url ) + element << img + img + end + # adds a table to a REXML:Element, _table_values_ should be a multi-dimensional-array, returns the table as element # # call-seq: @@ -210,7 +222,7 @@ module Reports if auto_link_urls && v.to_s =~ /^http(s?):\/\// add_url(entry, v.to_s) else - entry.text = v.to_s + entry.text = v.to_s.gsub(/\n/,"<br>") # will be xml-escaped, post-processing required end row << entry end @@ -223,8 +235,10 @@ module Reports row = Element.new("row") r.each do |v| entry = Element.new("entry") - if auto_link_urls && (v.to_s =~ /depict|boxplot|media=image|.png$/) #PENDING - add_image(entry, v.to_s) + if v.is_a?(Element) + entry << v + elsif auto_link_urls && (v.to_s =~ /depict|boxplot|media=image|.png$/) #PENDING + add_image(entry, v.to_s, (v.to_s=~/\?/ ? v.to_s.split("?").first : nil)) elsif auto_link_urls && v.to_s =~ /^http(s?):\/\// #add_url(entry, v.to_s, v.to_s) v.to_s.split(" ").each do |vv| @@ -236,6 +250,7 @@ module Reports else text = v.to_s text.gsub!(/\+\-/,"±") + text.gsub!(/\n/,"<br>") # is xml-escaped, post-processing required entry << Text.new(text, true, nil, true) end row << entry @@ -292,7 +307,8 @@ module Reports end end - @doc.write(out) #,2, true, true) + @doc.write(out) + # @doc.write(out, 2) out.flush end @@ -303,16 +319,17 @@ module Reports rep = Reports::XMLReport.new("Demo report", "subtitle" "Fistname", "Surname") section1 = rep.add_section(rep.get_root_element, "First Section") - rep.add_paragraph(section1, "some text") - rep.add_paragraph(section1, "even more text") - rep.add_imagefigure(section1, "Figure", "http://upload.wikimedia.org/wikipedia/commons/thumb/e/eb/Siegel_der_Albert-Ludwigs-Universit%C3%A4t_Freiburg.svg/354px-Siegel_der_Albert-Ludwigs-Universit%C3%A4t_Freiburg.svg", "SVG", 100, "this is the logo of freiburg university") - section2 = rep.add_section(rep.get_root_element,"Second Section") - rep.add_section(section2,"A Subsection") - rep.add_section(section2,"Another Subsection") - rep.add_url(section2,"www.google.de", "link zu google") - sec3 = rep.add_section(rep.get_root_element,"Third Section") - rep.add_paragraph(sec3, "some \n more text for section 3",true) - rep.add_image(sec3, "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search=c1ccccc1") + rep.add_table(section1, "Table", [["a\n1","b"],["c","d"]]) +# rep.add_paragraph(section1, "some text") +# rep.add_paragraph(section1, "even more text") +# rep.add_imagefigure(section1, "Figure", #"http://upload.wikimedia.org/wikipedia/commons/thumb/e/eb/Siegel_der_Albert-Ludwigs-Universit%C3%A4t_Freiburg.svg/354px-Siegel_der_Albert-Ludwigs-Universit%C3%A4t_Freiburg.svg", "SVG", 100, "this is the logo of freiburg university") +# section2 = rep.add_section(rep.get_root_element,"Second Section") +# rep.add_section(section2,"A Subsection") +# rep.add_section(section2,"Another Subsection") +# rep.add_url(section2,"www.google.de", "link zu google") +# sec3 = rep.add_section(rep.get_root_element,"Third Section") +# rep.add_paragraph(sec3, "some \n more text for section 3",true) +# rep.add_image(sec3, "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search=c1ccccc1") #vals= [["a", "b", "c"],["a2", "b2", "c2"],["1", "2", "http://3"]] #rep.add_table(rep.get_root_element, "demo-table", vals) |