summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormr <mr@mrautenberg.de>2010-08-23 14:06:17 +0200
committermr <mr@mrautenberg.de>2010-08-23 14:06:17 +0200
commitcb3fc6a27be73c9f8c08c31f555f181c43b50bb2 (patch)
tree1d0f583086be74b190ec11a1aff94b5a43a1a152
parent3de9e162beeb5f88d68505b560bc4dea7737e5f8 (diff)
parent0f47a05adb11fbd9dfd0977780d2daa1e434c7c8 (diff)
Merge remote branch 'helma/master' into development
-rw-r--r--Rakefile20
-rw-r--r--application.rb15
-rw-r--r--[-rwxr-xr-x]lazar.rb119
-rw-r--r--views/prediction.haml133
4 files changed, 62 insertions, 225 deletions
diff --git a/Rakefile b/Rakefile
deleted file mode 100644
index f4e10d6..0000000
--- a/Rakefile
+++ /dev/null
@@ -1,20 +0,0 @@
-require 'rubygems'
-require 'rake'
-require 'tasks/opentox'
-
-desc "Install required gems"
-task :install do
- puts `sudo gem sources -a http://gems.github.com`
- puts `sudo gem install sinatra datamapper dm-more builder helma-opentox-ruby-api-wrapper`
-end
-
-desc "Update gems"
-task :update do
- puts `sudo gem update sinatra datamapper dm-more builder helma-opentox-ruby-api-wrapper`
-end
-
-desc "Run tests"
-task :test do
- load 'test/test.rb'
-end
-
diff --git a/application.rb b/application.rb
index d1ff70e..0f762eb 100644
--- a/application.rb
+++ b/application.rb
@@ -1,7 +1,6 @@
require 'rubygems'
-gem "opentox-ruby-api-wrapper", "= 1.6.0"
+gem "opentox-ruby-api-wrapper", "= 1.6.3"
require 'opentox-ruby-api-wrapper'
-LOGGER.progname = File.expand_path(__FILE__)
class Model
include DataMapper::Resource
@@ -21,6 +20,15 @@ class Model
end
+class Prediction
+ # cache predictions
+ include DataMapper::Resource
+ property :id, Serial
+ property :compound_uri, String, :length => 255
+ property :model_uri, String, :length => 255
+ property :yaml, Text, :length => 2**32-1
+end
+
DataMapper.auto_upgrade!
require 'lazar.rb'
@@ -68,6 +76,7 @@ end
delete '/?' do
# TODO delete datasets
Model.auto_migrate!
+ Prediction.auto_migrate!
response['Content-Type'] = 'text/plain'
- "All Models deleted."
+ "All models and cached predictions deleted."
end
diff --git a/lazar.rb b/lazar.rb
index e43995d..0fd4634 100755..100644
--- a/lazar.rb
+++ b/lazar.rb
@@ -2,8 +2,8 @@
# workaround to initialize R non-interactively (former rinruby versions did this by default)
# avoids compiling R with X
R = nil
-require ("rinruby")
-require ("haml")
+require "rinruby"
+require "haml"
class Lazar < Model
@@ -12,7 +12,7 @@ class Lazar < Model
# AM begin
# regression function, created 06/10
# ch: please properly integrate this into the workflow. You will need some criterium for distinguishing regression/classification (hardcoded regression for testing)
- def regression(compound_uri,prediction)
+ def regression(compound_uri,prediction,verbose=false)
lazar = YAML.load self.yaml
compound = OpenTox::Compound.new(:uri => compound_uri)
@@ -21,9 +21,8 @@ class Lazar < Model
compound_matches = compound.match lazar.features
conf = 0.0
- similarities = {}
- activities = {}
- fragments = { :activating => {}, :deactivating => {} }
+ features = { :activating => [], :deactivating => [] }
+ neighbors = {}
regression = nil
regr_occurrences = [] # occurrence vector with {0,1} entries
@@ -38,9 +37,19 @@ class Lazar < Model
sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values)
lazar.activities[uri].each do |act|
if sim > 0.3
- similarities[uri] = sim
- activities[uri] = [] unless activities[uri]
- activities[uri] << act
+ neighbors[uri] = {:similarity => sim}
+ neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features]
+ matches.each do |m|
+ if lazar.effects[m] == 'activating'
+ neighbors[uri][:features][:activating] << {:smarts => m, :p_value => lazar.p_values[m]}
+ elsif lazar.effects[m] == 'deactivating'
+ neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]}
+ end
+ end
+ lazar.activities[uri].each do |act|
+ neighbors[uri][:activities] = [] unless neighbors[uri][:activities]
+ neighbors[uri][:activities] << act
+ end
conf += OpenTox::Utils.gauss(sim)
sims << OpenTox::Utils.gauss(sim)
#TODO check for 0 s
@@ -51,7 +60,7 @@ class Lazar < Model
end
end
end
- conf = conf/similarities.size
+ conf = conf/neighbors.size
LOGGER.debug "Regression: found " + neighbor_matches.size.to_s + " neighbors."
@@ -107,32 +116,30 @@ class Lazar < Model
prediction.compounds << compound_uri
prediction.features << feature_uri
prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- compound_matches.each { |m| fragments[lazar.effects[m].to_sym][m] = lazar.p_values[m] }
+ compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } }
tuple = {
File.join(@@config[:services]["opentox-model"],"lazar#regression") => regression,
- File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf,
- #File.join(@@config[:services]["opentox-model"],"lazar#similarities") => similarities,
- #File.join(@@config[:services]["opentox-model"],"lazar#activities") => activities,
- #File.join(@@config[:services]["opentox-model"],"lazar#features") => fragments
+ File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf
}
+ if verbose
+ tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors
+ tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features
+ end
prediction.data[compound_uri] << {feature_uri => tuple}
end
-
end
# AM end
- def classification(compound_uri,prediction)
+ def classification(compound_uri,prediction,verbose=false)
lazar = YAML.load self.yaml
compound = OpenTox::Compound.new(:uri => compound_uri)
compound_matches = compound.match lazar.features
conf = 0.0
- similarities = {}
- #activities = {}
- fragments = { :activating => {}, :deactivating => {} }
+ features = { :activating => [], :deactivating => [] }
neighbors = {}
classification = nil
@@ -140,7 +147,6 @@ class Lazar < Model
sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values)
if sim > 0.3
- similarities[uri] = sim
neighbors[uri] = {:similarity => sim}
neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features]
matches.each do |m|
@@ -150,11 +156,7 @@ class Lazar < Model
neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]}
end
end
- #neighbors[uri][:features] = [] unless neighbors[uri][:features]
- #neighbors[uri][:features] << matches
lazar.activities[uri].each do |act|
- #activities[uri] = [] unless activities[uri]
- #activities[uri] << act
neighbors[uri][:activities] = [] unless neighbors[uri][:activities]
neighbors[uri][:activities] << act
case act.to_s
@@ -167,7 +169,7 @@ class Lazar < Model
end
end
- conf = conf/similarities.size
+ conf = conf/neighbors.size
if conf > 0.0
classification = true
elsif conf < 0.0
@@ -178,17 +180,17 @@ class Lazar < Model
prediction.compounds << compound_uri
prediction.features << feature_uri
prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- compound_matches.each { |m| fragments[lazar.effects[m].to_sym][m] = lazar.p_values[m] }
- #fragments[:activating] = fragments[:activating].sort{|a,b| b[1] <=> a[1]}
+ compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } }
tuple = {
- File.join(@@config[:services]["opentox-model"],"lazar#classification") => classification,
- File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf,
- #File.join(@@config[:services]["opentox-model"],"lazar#neighbors") => neighbors,
- #File.join(@@config[:services]["opentox-model"],"lazar#features") => fragments
+ File.join(@@config[:services]["opentox-model"],"lazar#classification") => classification,
+ File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf
}
+ if verbose
+ tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors
+ tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features
+ end
prediction.data[compound_uri] << {feature_uri => tuple}
end
-
end
def database_activity?(compound_uri,prediction)
@@ -203,10 +205,6 @@ class Lazar < Model
prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
db_activities.each do |act|
prediction.data[compound_uri] << {feature_uri => act}
- #tuple = {
- # :classification => act}
- #:confidence => "experimental"}
- #prediction.data[compound_uri] << {feature_uri => tuple}
end
true
else
@@ -220,7 +218,6 @@ class Lazar < Model
feature_dataset = YAML.load(RestClient.get(data.feature_dataset_uri, :accept => 'application/x-yaml').to_s)
owl = OpenTox::Owl.create 'Model', uri
owl.set("creator","http://github.com/helma/opentox-model")
- # TODO
owl.set("title", URI.decode(data.dependentVariables.split(/#/).last) )
#owl.set("title","#{URI.decode(activity_dataset.title)} lazar classification")
owl.set("date",created_at.to_s)
@@ -321,41 +318,26 @@ post '/:id/?' do # create prediction
end
if compound_uri
- # AM: switch here between regression and classification
- begin
- eval "lazar.#{prediction_type}(compound_uri,@prediction) unless lazar.database_activity?(compound_uri,@prediction)"
- rescue
- LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} "
- halt 500, "Prediction of #{compound_uri} failed."
- end
+ # look for cached prediction first
+ if cached_prediction = Prediction.first(:model_uri => lazar.uri, :compound_uri => compound_uri)
+ @prediction = YAML.load(cached_prediction.yaml)
+ else
+ begin
+ # AM: switch here between regression and classification
+ eval "lazar.#{prediction_type}(compound_uri,@prediction,true) unless lazar.database_activity?(compound_uri,@prediction)"
+ Prediction.create(:model_uri => lazar.uri, :compound_uri => compound_uri, :yaml => @prediction.to_yaml)
+ rescue
+ LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} "
+ halt 500, "Prediction of #{compound_uri} failed."
+ end
+ end
case request.env['HTTP_ACCEPT']
when /yaml/
@prediction.to_yaml
when 'application/rdf+xml'
@prediction.to_owl
- when /html/
- @compound = OpenTox::Compound.new(:uri => compound_uri)
- @title = @prediction.title
- if @prediction.data[@compound.uri]
- if @prediction.creator.to_s.match(/model/) # real prediction
- p = @prediction.data[@compound.uri].first.values.first
- if !p[File.join(@@config[:services]["opentox-model"],"lazar#classification")].nil?
- feature = File.join(@@config[:services]["opentox-model"],"lazar#classification")
- elsif !p[File.join(@@config[:services]["opentox-model"],"lazar#regression")].nil?
- feature = File.join(@@config[:services]["opentox-model"],"lazar#regression")
- end
- @activity = p[feature]
- @confidence = p[File.join(@@config[:services]["opentox-model"],"lazar#confidence")]
- @neighbors = p[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")]#.sort{|a,b| b.last[:similarity] <=> a.last[:similarity]}
- #@training_activities = p[File.join(@@config[:services]["opentox-model"],"lazar#activities")]
- @features = p[File.join(@@config[:services]["opentox-model"],"lazar#features")]
- else # database value
- @measured_activities = @prediction.data[@compound.uri].first.values
- end
- else
- @activity = "not available (no similar compounds in the training dataset)"
- end
- haml :prediction
+ else
+ halt 400, "MIME type \"#{request.env['HTTP_ACCEPT']}\" not supported."
end
elsif dataset_uri
response['Content-Type'] = 'text/uri-list'
@@ -379,4 +361,3 @@ post '/:id/?' do # create prediction
end
end
-
diff --git a/views/prediction.haml b/views/prediction.haml
deleted file mode 100644
index 0187f87..0000000
--- a/views/prediction.haml
+++ /dev/null
@@ -1,133 +0,0 @@
-.predictions
- %table
- %tr
- %th= @title.gsub(/_lazar_.*$/,' ').capitalize
- %th Prediction
- %th
- %a{:href => "#", :id => "linkConfidence#{p.object_id}"} Confidence
- :javascript
- $("a#linkConfidence#{p.object_id}").click(function () {
- $("dl#confidence").toggle();
- });
- %th Relevant features
- %tr
- %th
- %img{:src => @compound.image_uri, :alt => @compound.smiles}
- %td
- - if @measured_activities
- %br
- - @measured_activities.each do |a|
- - if activity(a) == 'active'
- .active
- = activity(a)
- - elsif activity(a) == 'inactive'
- .inactive
- = activity(a)
- - else
- = a
- %br
- (
- %a{:href => "#", :id => "linkTrainingData#{p.object_id}"} Training data
- :javascript
- $("a#linkTrainingData#{p.object_id}").click(function () {
- $("dl#training_data").toggle();
- });
- )
-
- - else
- - if activity(@activity) == 'active'
- .active
- = activity(@activity)
- - elsif activity(@activity) == 'inactive'
- .inactive
- = activity(@activity)
- - elsif @activity.is_a?(Float)
- .other
- = sprintf('%.03g', @activity)
- - else
- .other
- %em= @activity.to_s
- %td
- = sprintf('%.03g', @confidence.to_f.abs) if @confidence
- %td
- %table
- %tr
- %th{:colspan => 2} activating
- %th p value
- - if @features[:activating]
- - @features[:activating].sort{|a,b| b.last <=> a.last }.each do |f|
- %tr
- %th= f[0]
- %td= f[1]
- %tr
- %th{:colspan => 2} deactivating
- %th p value
- - if @features[:deactivating]
- - @features[:deactivating].sort{|a,b| b.last <=> a.last }.each do |f|
- %tr
- %th= f[0]
- %td= f[1]
- %tr
- %th Neighbors
- %th Activity
- %th Similarity (activity specific)
- %th Relevant features
- - @neighbors.sort{|a,b| b.last[:similarity] <=> a.last[:similarity]}.each do |uri,data|
- - c = OpenTox::Compound.new(:uri => uri)
- %tr
- %th
- %br= c.smiles
- %br
- %a{:href => c.image_uri, :target => "_blank"} Image
- %br
- %img{:src => c.image_uri, :alt => c.smiles}
- %td
- - data[:activities].each do |act|
- - if activity(act) == 'active'
- .active
- = activity(act)
- - elsif activity(act) == 'inactive'
- .inactive
- = activity(act)
- - elsif act.is_a?(Float)
- .other
- = sprintf('%.03g', act)
- - else
- .other
- %em= act.to_s
- %td
- = sprintf('%.03g', data[:similarity])
- %td
- %table
- %tr
- %th{:colspan => 2} activating
- %th p value
- -#%td= data[:features].inspect
- -# data[:features][:activating].each do |f|
- - data[:features][:activating].sort{|a,b| b.last[:p_value] <=> a.last[:p_value] }.each do |f|
- -# f.inspect
- %tr
- %th= f[:smarts]
- %td= f[:p_value]
- %td=# f[:p_value]
- %tr
- %th{:colspan => 2} deactivating
- %th p value
- -# data[:features][:deactivating].sort{|a,b| b.last[:p_value] <=> a.last[:p_value] }.each do |f|
- - data[:features][:deactivating].each do |f|
- %tr
- %th= f[:smarts]
- %td= f[:p_value]
-
-
-
-%dl#confidence{ :style => "display: none;" }
- %dt Confidence:
- %dd Indicates the applicability domain of a model. Predictions with a high confidence can be expected to be more reliable than predictions with low confidence. Confidence values may take any value between 0 and 1. For most models confidence &gt; 0.025 is a sensible (hard) cutoff to distiguish between reliable and unreliable predictions.
-
-
-%dl#training_data{ :style => "display: none;" }
- %dt Training data:
- %dd Experimental result(s) from the training dataset are displayed here.
-
-