diff options
author | Christoph Helma <helma@in-silico.ch> | 2010-08-06 13:27:13 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2010-08-06 13:27:13 +0200 |
commit | 8e74fbbe98b4ef13c3d7a52352ab0e5ec99b7675 (patch) | |
tree | e7331bdbe3ae9936b7acc09eec6e3a3efeb5ee0b | |
parent | fffc9e0b2657e29eeb7ee73f5ff51dafa0b6acfb (diff) |
prediction caching
-rw-r--r-- | application.rb | 15 | ||||
-rw-r--r-- | lazar.rb | 120 | ||||
-rw-r--r-- | views/prediction.haml | 133 |
3 files changed, 67 insertions, 201 deletions
diff --git a/application.rb b/application.rb index 1c94f24..4bf6e94 100644 --- a/application.rb +++ b/application.rb @@ -11,6 +11,15 @@ class Model property :created_at, DateTime end +class Prediction + # cache predictions + include DataMapper::Resource + property :id, Serial + property :compound_uri, String, :length => 255 + property :model_uri, String, :length => 255 + property :yaml, Text, :length => 2**32-1 +end + DataMapper.auto_upgrade! require 'lazar.rb' @@ -51,3 +60,9 @@ delete '/?' do response['Content-Type'] = 'text/plain' "All Models deleted." end + +delete '/prediction?' do + Prediction.auto_migrate! + response['Content-Type'] = 'text/plain' + "All datasets deleted." +end @@ -2,8 +2,8 @@ # workaround to initialize R non-interactively (former rinruby versions did this by default) # avoids compiling R with X R = nil -require ("rinruby") -require ("haml") +require "rinruby" +require "haml" class Lazar < Model @@ -12,7 +12,7 @@ class Lazar < Model # AM begin # regression function, created 06/10 # ch: please properly integrate this into the workflow. You will need some criterium for distinguishing regression/classification (hardcoded regression for testing) - def regression(compound_uri,prediction) + def regression(compound_uri,prediction,verbose=false) lazar = YAML.load self.yaml compound = OpenTox::Compound.new(:uri => compound_uri) @@ -21,9 +21,8 @@ class Lazar < Model compound_matches = compound.match lazar.features conf = 0.0 - similarities = {} - activities = {} - fragments = { :activating => {}, :deactivating => {} } + features = { :activating => [], :deactivating => [] } + neighbors = {} regression = nil regr_occurrences = [] # occurrence vector with {0,1} entries @@ -38,9 +37,22 @@ class Lazar < Model sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values) lazar.activities[uri].each do |act| if sim > 0.3 - similarities[uri] = sim - activities[uri] = [] unless activities[uri] - activities[uri] << act + #similarities[uri] = sim + #activities[uri] = [] unless activities[uri] + #activities[uri] << act + neighbors[uri] = {:similarity => sim} + neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features] + matches.each do |m| + if lazar.effects[m] == 'activating' + neighbors[uri][:features][:activating] << {:smarts => m, :p_value => lazar.p_values[m]} + elsif lazar.effects[m] == 'deactivating' + neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]} + end + end + lazar.activities[uri].each do |act| + neighbors[uri][:activities] = [] unless neighbors[uri][:activities] + neighbors[uri][:activities] << act + end conf += OpenTox::Utils.gauss(sim) sims << OpenTox::Utils.gauss(sim) #TODO check for 0 s @@ -51,7 +63,7 @@ class Lazar < Model end end end - conf = conf/similarities.size + conf = conf/neighbors.size LOGGER.debug "Regression: found " + neighbor_matches.size.to_s + " neighbors." @@ -107,32 +119,30 @@ class Lazar < Model prediction.compounds << compound_uri prediction.features << feature_uri prediction.data[compound_uri] = [] unless prediction.data[compound_uri] - compound_matches.each { |m| fragments[lazar.effects[m].to_sym][m] = lazar.p_values[m] } + compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } } tuple = { File.join(@@config[:services]["opentox-model"],"lazar#regression") => regression, - File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf, - File.join(@@config[:services]["opentox-model"],"lazar#similarities") => similarities, - File.join(@@config[:services]["opentox-model"],"lazar#activities") => activities, - File.join(@@config[:services]["opentox-model"],"lazar#features") => fragments + File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf } + if verbose + tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors + tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features + end prediction.data[compound_uri] << {feature_uri => tuple} end - end # AM end - def classification(compound_uri,prediction) + def classification(compound_uri,prediction,verbose=false) lazar = YAML.load self.yaml compound = OpenTox::Compound.new(:uri => compound_uri) compound_matches = compound.match lazar.features conf = 0.0 - similarities = {} - #activities = {} - fragments = { :activating => {}, :deactivating => {} } + features = { :activating => [], :deactivating => [] } neighbors = {} classification = nil @@ -140,7 +150,6 @@ class Lazar < Model sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values) if sim > 0.3 - similarities[uri] = sim neighbors[uri] = {:similarity => sim} neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features] matches.each do |m| @@ -150,11 +159,7 @@ class Lazar < Model neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]} end end - #neighbors[uri][:features] = [] unless neighbors[uri][:features] - #neighbors[uri][:features] << matches lazar.activities[uri].each do |act| - #activities[uri] = [] unless activities[uri] - #activities[uri] << act neighbors[uri][:activities] = [] unless neighbors[uri][:activities] neighbors[uri][:activities] << act case act.to_s @@ -167,7 +172,7 @@ class Lazar < Model end end - conf = conf/similarities.size + conf = conf/neighbors.size if conf > 0.0 classification = true elsif conf < 0.0 @@ -178,17 +183,17 @@ class Lazar < Model prediction.compounds << compound_uri prediction.features << feature_uri prediction.data[compound_uri] = [] unless prediction.data[compound_uri] - compound_matches.each { |m| fragments[lazar.effects[m].to_sym][m] = lazar.p_values[m] } - #fragments[:activating] = fragments[:activating].sort{|a,b| b[1] <=> a[1]} + compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } } tuple = { - File.join(@@config[:services]["opentox-model"],"lazar#classification") => classification, - File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf, - File.join(@@config[:services]["opentox-model"],"lazar#neighbors") => neighbors, - File.join(@@config[:services]["opentox-model"],"lazar#features") => fragments + File.join(@@config[:services]["opentox-model"],"lazar#classification") => classification, + File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf } + if verbose + tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors + tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features + end prediction.data[compound_uri] << {feature_uri => tuple} end - end def database_activity?(compound_uri,prediction) @@ -203,10 +208,6 @@ class Lazar < Model prediction.data[compound_uri] = [] unless prediction.data[compound_uri] db_activities.each do |act| prediction.data[compound_uri] << {feature_uri => act} - #tuple = { - # :classification => act} - #:confidence => "experimental"} - #prediction.data[compound_uri] << {feature_uri => tuple} end true else @@ -317,41 +318,25 @@ post '/:id/?' do # create prediction end if compound_uri - # AM: switch here between regression and classification - begin - eval "lazar.#{prediction_type}(compound_uri,@prediction) unless lazar.database_activity?(compound_uri,@prediction)" - rescue - LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} " - halt 500, "Prediction of #{compound_uri} failed." - end + # look for cached prediction first + if cached_prediction = Prediction.first(:model_uri => lazar.uri, :compound_uri => compound_uri) + @prediction = YAML.load(cached_prediction.yaml) + else + #unless @prediction = YAML.load(Prediction.first(:model_uri => lazar.uri, :compound_uri => compound_uri).yaml) + begin + # AM: switch here between regression and classification + eval "lazar.#{prediction_type}(compound_uri,@prediction,true) unless lazar.database_activity?(compound_uri,@prediction)" + Prediction.create(:model_uri => lazar.uri, :compound_uri => compound_uri, :yaml => @prediction.to_yaml) + rescue + LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} " + halt 500, "Prediction of #{compound_uri} failed." + end + end case request.env['HTTP_ACCEPT'] when /yaml/ @prediction.to_yaml when 'application/rdf+xml' @prediction.to_owl - when /html/ - @compound = OpenTox::Compound.new(:uri => compound_uri) - @title = @prediction.title - if @prediction.data[@compound.uri] - if @prediction.creator.to_s.match(/model/) # real prediction - p = @prediction.data[@compound.uri].first.values.first - if !p[File.join(@@config[:services]["opentox-model"],"lazar#classification")].nil? - feature = File.join(@@config[:services]["opentox-model"],"lazar#classification") - elsif !p[File.join(@@config[:services]["opentox-model"],"lazar#regression")].nil? - feature = File.join(@@config[:services]["opentox-model"],"lazar#regression") - end - @activity = p[feature] - @confidence = p[File.join(@@config[:services]["opentox-model"],"lazar#confidence")] - @neighbors = p[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")]#.sort{|a,b| b.last[:similarity] <=> a.last[:similarity]} - #@training_activities = p[File.join(@@config[:services]["opentox-model"],"lazar#activities")] - @features = p[File.join(@@config[:services]["opentox-model"],"lazar#features")] - else # database value - @measured_activities = @prediction.data[@compound.uri].first.values - end - else - @activity = "not available (no similar compounds in the training dataset)" - end - haml :prediction end elsif dataset_uri @@ -376,4 +361,3 @@ post '/:id/?' do # create prediction end end - diff --git a/views/prediction.haml b/views/prediction.haml deleted file mode 100644 index 0187f87..0000000 --- a/views/prediction.haml +++ /dev/null @@ -1,133 +0,0 @@ -.predictions - %table - %tr - %th= @title.gsub(/_lazar_.*$/,' ').capitalize - %th Prediction - %th - %a{:href => "#", :id => "linkConfidence#{p.object_id}"} Confidence - :javascript - $("a#linkConfidence#{p.object_id}").click(function () { - $("dl#confidence").toggle(); - }); - %th Relevant features - %tr - %th - %img{:src => @compound.image_uri, :alt => @compound.smiles} - %td - - if @measured_activities - %br - - @measured_activities.each do |a| - - if activity(a) == 'active' - .active - = activity(a) - - elsif activity(a) == 'inactive' - .inactive - = activity(a) - - else - = a - %br - ( - %a{:href => "#", :id => "linkTrainingData#{p.object_id}"} Training data - :javascript - $("a#linkTrainingData#{p.object_id}").click(function () { - $("dl#training_data").toggle(); - }); - ) - - - else - - if activity(@activity) == 'active' - .active - = activity(@activity) - - elsif activity(@activity) == 'inactive' - .inactive - = activity(@activity) - - elsif @activity.is_a?(Float) - .other - = sprintf('%.03g', @activity) - - else - .other - %em= @activity.to_s - %td - = sprintf('%.03g', @confidence.to_f.abs) if @confidence - %td - %table - %tr - %th{:colspan => 2} activating - %th p value - - if @features[:activating] - - @features[:activating].sort{|a,b| b.last <=> a.last }.each do |f| - %tr - %th= f[0] - %td= f[1] - %tr - %th{:colspan => 2} deactivating - %th p value - - if @features[:deactivating] - - @features[:deactivating].sort{|a,b| b.last <=> a.last }.each do |f| - %tr - %th= f[0] - %td= f[1] - %tr - %th Neighbors - %th Activity - %th Similarity (activity specific) - %th Relevant features - - @neighbors.sort{|a,b| b.last[:similarity] <=> a.last[:similarity]}.each do |uri,data| - - c = OpenTox::Compound.new(:uri => uri) - %tr - %th - %br= c.smiles - %br - %a{:href => c.image_uri, :target => "_blank"} Image - %br - %img{:src => c.image_uri, :alt => c.smiles} - %td - - data[:activities].each do |act| - - if activity(act) == 'active' - .active - = activity(act) - - elsif activity(act) == 'inactive' - .inactive - = activity(act) - - elsif act.is_a?(Float) - .other - = sprintf('%.03g', act) - - else - .other - %em= act.to_s - %td - = sprintf('%.03g', data[:similarity]) - %td - %table - %tr - %th{:colspan => 2} activating - %th p value - -#%td= data[:features].inspect - -# data[:features][:activating].each do |f| - - data[:features][:activating].sort{|a,b| b.last[:p_value] <=> a.last[:p_value] }.each do |f| - -# f.inspect - %tr - %th= f[:smarts] - %td= f[:p_value] - %td=# f[:p_value] - %tr - %th{:colspan => 2} deactivating - %th p value - -# data[:features][:deactivating].sort{|a,b| b.last[:p_value] <=> a.last[:p_value] }.each do |f| - - data[:features][:deactivating].each do |f| - %tr - %th= f[:smarts] - %td= f[:p_value] - - - -%dl#confidence{ :style => "display: none;" } - %dt Confidence: - %dd Indicates the applicability domain of a model. Predictions with a high confidence can be expected to be more reliable than predictions with low confidence. Confidence values may take any value between 0 and 1. For most models confidence > 0.025 is a sensible (hard) cutoff to distiguish between reliable and unreliable predictions. - - -%dl#training_data{ :style => "display: none;" } - %dt Training data: - %dd Experimental result(s) from the training dataset are displayed here. - - |