summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormr <mr@mrautenberg.de>2010-12-03 09:53:25 +0100
committermr <mr@mrautenberg.de>2010-12-03 09:53:25 +0100
commit13272ba7507d2b856d329637b48a64255434fea0 (patch)
tree64ee5eadacd672c5d24eff7a701bf0bd7b25088b
parentcb3fc6a27be73c9f8c08c31f555f181c43b50bb2 (diff)
parent12220a1cc4b37fda3a6776c4d0fd787d90a16882 (diff)
merge with helma/development
-rw-r--r--application.rb28
-rw-r--r--config.ru3
-rw-r--r--lazar.rb364
3 files changed, 61 insertions, 334 deletions
diff --git a/application.rb b/application.rb
index 0f762eb..b522baf 100644
--- a/application.rb
+++ b/application.rb
@@ -1,14 +1,14 @@
require 'rubygems'
-gem "opentox-ruby-api-wrapper", "= 1.6.3"
-require 'opentox-ruby-api-wrapper'
+gem "opentox-ruby", "~> 0"
+require 'opentox-ruby'
-class Model
+class ModelStore
include DataMapper::Resource
+ attr_accessor :prediction_dataset
property :id, Serial
property :uri, String, :length => 255
- property :owl, Text, :length => 2**32-1
- property :yaml, Text, :length => 2**32-1
- property :token_id, String, :length => 255
+ property :yaml, Text, :length => 2**32-1
+ property :token_id, String, :length => 255
property :created_at, DateTime
after :save, :check_policy
@@ -20,18 +20,19 @@ class Model
end
-class Prediction
+class PredictionCache
# cache predictions
include DataMapper::Resource
property :id, Serial
property :compound_uri, String, :length => 255
property :model_uri, String, :length => 255
- property :yaml, Text, :length => 2**32-1
+ property :dataset_uri, String, :length => 255
end
DataMapper.auto_upgrade!
require 'lazar.rb'
+#require 'property_lazar.rb'
helpers do
@@ -50,14 +51,13 @@ end
get '/?' do # get index of models
response['Content-Type'] = 'text/uri-list'
- Model.all(params).collect{|m| m.uri}.join("\n") + "\n"
+ ModelStore.all(params).collect{|m| m.uri}.join("\n") + "\n"
end
delete '/:id/?' do
begin
- model = Model.get(params[:id])
- uri = model.uri
- model.destroy!
+ uri = ModelStore.get(params[:id]).uri
+ ModelStore.get(params[:id]).destroy!
"Model #{params[:id]} deleted."
if params[:token_id] and !Model.get(params[:id]) and uri
begin
@@ -75,8 +75,8 @@ end
delete '/?' do
# TODO delete datasets
- Model.auto_migrate!
- Prediction.auto_migrate!
+ ModelStore.auto_migrate!
+ #Prediction.auto_migrate!
response['Content-Type'] = 'text/plain'
"All models and cached predictions deleted."
end
diff --git a/config.ru b/config.ru
index 6f5db40..1616a96 100644
--- a/config.ru
+++ b/config.ru
@@ -1,6 +1,5 @@
require 'rubygems'
-require 'opentox-ruby-api-wrapper'
+require 'opentox-ruby'
require 'config/config_ru'
set :app_file, __FILE__ # to get the view path right
run Sinatra::Application
-
diff --git a/lazar.rb b/lazar.rb
index 0fd4634..13c3caa 100644
--- a/lazar.rb
+++ b/lazar.rb
@@ -1,246 +1,16 @@
-# R integration
-# workaround to initialize R non-interactively (former rinruby versions did this by default)
-# avoids compiling R with X
-R = nil
-require "rinruby"
require "haml"
-class Lazar < Model
-
- attr_accessor :prediction_dataset
-
- # AM begin
- # regression function, created 06/10
- # ch: please properly integrate this into the workflow. You will need some criterium for distinguishing regression/classification (hardcoded regression for testing)
- def regression(compound_uri,prediction,verbose=false)
-
- lazar = YAML.load self.yaml
- compound = OpenTox::Compound.new(:uri => compound_uri)
-
- # obtain X values for query compound
- compound_matches = compound.match lazar.features
-
- conf = 0.0
- features = { :activating => [], :deactivating => [] }
- neighbors = {}
- regression = nil
-
- regr_occurrences = [] # occurrence vector with {0,1} entries
- sims = [] # similarity values between query and neighbors
- acts = [] # activities of neighbors for supervised learning
- neighbor_matches = [] # as in classification: URIs of matches
- gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
- i = 0
-
- # aquire data related to query structure
- lazar.fingerprints.each do |uri,matches|
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values)
- lazar.activities[uri].each do |act|
- if sim > 0.3
- neighbors[uri] = {:similarity => sim}
- neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features]
- matches.each do |m|
- if lazar.effects[m] == 'activating'
- neighbors[uri][:features][:activating] << {:smarts => m, :p_value => lazar.p_values[m]}
- elsif lazar.effects[m] == 'deactivating'
- neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]}
- end
- end
- lazar.activities[uri].each do |act|
- neighbors[uri][:activities] = [] unless neighbors[uri][:activities]
- neighbors[uri][:activities] << act
- end
- conf += OpenTox::Utils.gauss(sim)
- sims << OpenTox::Utils.gauss(sim)
- #TODO check for 0 s
- acts << Math.log10(act.to_f)
- #acts << act.to_f
- neighbor_matches[i] = matches
- i+=1
- end
- end
- end
- conf = conf/neighbors.size
- LOGGER.debug "Regression: found " + neighbor_matches.size.to_s + " neighbors."
-
-
- unless neighbor_matches.length == 0
- # gram matrix
- (0..(neighbor_matches.length-1)).each do |i|
- gram_matrix[i] = []
- # lower triangle
- (0..(i-1)).each do |j|
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values)
- gram_matrix[i] << OpenTox::Utils.gauss(sim)
- end
- # diagonal element
- gram_matrix[i][i] = 1.0
- # upper triangle
- ((i+1)..(neighbor_matches.length-1)).each do |j|
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values)
- gram_matrix[i] << OpenTox::Utils.gauss(sim)
- end
- end
-
- @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
- @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
- LOGGER.debug "Setting R data ..."
- # set data
- @r.gram_matrix = gram_matrix.flatten
- @r.n = neighbor_matches.length
- @r.y = acts
- @r.sims = sims
-
- LOGGER.debug "Preparing R data ..."
- # prepare data
- @r.eval "y<-as.vector(y)"
- @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
- @r.eval "sims<-as.vector(sims)"
-
- # model + support vectors
- LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
- @r.eval "sv<-as.vector(SVindex(model))"
- @r.eval "sims<-sims[sv]"
- @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
- LOGGER.debug "Predicting ..."
- @r.eval "p<-predict(model,sims)[1,1]"
- regression = 10**(@r.p.to_f)
- LOGGER.debug "Prediction is: '" + regression.to_s + "'."
- @r.quit # free R
-
- end
-
- if (regression != nil)
- feature_uri = lazar.dependentVariables
- prediction.compounds << compound_uri
- prediction.features << feature_uri
- prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } }
- tuple = {
- File.join(@@config[:services]["opentox-model"],"lazar#regression") => regression,
- File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf
- }
- if verbose
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features
- end
- prediction.data[compound_uri] << {feature_uri => tuple}
- end
-
- end
- # AM end
-
-
- def classification(compound_uri,prediction,verbose=false)
-
- lazar = YAML.load self.yaml
- compound = OpenTox::Compound.new(:uri => compound_uri)
- compound_matches = compound.match lazar.features
-
- conf = 0.0
- features = { :activating => [], :deactivating => [] }
- neighbors = {}
- classification = nil
-
- lazar.fingerprints.each do |uri,matches|
-
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values)
- if sim > 0.3
- neighbors[uri] = {:similarity => sim}
- neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features]
- matches.each do |m|
- if lazar.effects[m] == 'activating'
- neighbors[uri][:features][:activating] << {:smarts => m, :p_value => lazar.p_values[m]}
- elsif lazar.effects[m] == 'deactivating'
- neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]}
- end
- end
- lazar.activities[uri].each do |act|
- neighbors[uri][:activities] = [] unless neighbors[uri][:activities]
- neighbors[uri][:activities] << act
- case act.to_s
- when 'true'
- conf += OpenTox::Utils.gauss(sim)
- when 'false'
- conf -= OpenTox::Utils.gauss(sim)
- end
- end
- end
- end
-
- conf = conf/neighbors.size
- if conf > 0.0
- classification = true
- elsif conf < 0.0
- classification = false
- end
- if (classification != nil)
- feature_uri = lazar.dependentVariables
- prediction.compounds << compound_uri
- prediction.features << feature_uri
- prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } }
- tuple = {
- File.join(@@config[:services]["opentox-model"],"lazar#classification") => classification,
- File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf
- }
- if verbose
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features
- end
- prediction.data[compound_uri] << {feature_uri => tuple}
- end
- end
-
- def database_activity?(compound_uri,prediction)
- # find database activities
- lazar = YAML.load self.yaml
- db_activities = lazar.activities[compound_uri]
- if db_activities
- prediction.creator = lazar.trainingDataset
- feature_uri = lazar.dependentVariables
- prediction.compounds << compound_uri
- prediction.features << feature_uri
- prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- db_activities.each do |act|
- prediction.data[compound_uri] << {feature_uri => act}
- end
- true
- else
- false
- end
- end
-
- def to_owl
- data = YAML.load(yaml)
- activity_dataset = YAML.load(RestClient.get(data.trainingDataset, :accept => 'application/x-yaml').to_s)
- feature_dataset = YAML.load(RestClient.get(data.feature_dataset_uri, :accept => 'application/x-yaml').to_s)
- owl = OpenTox::Owl.create 'Model', uri
- owl.set("creator","http://github.com/helma/opentox-model")
- owl.set("title", URI.decode(data.dependentVariables.split(/#/).last) )
- #owl.set("title","#{URI.decode(activity_dataset.title)} lazar classification")
- owl.set("date",created_at.to_s)
- owl.set("algorithm",data.algorithm)
- owl.set("dependentVariables",activity_dataset.features.join(', '))
- owl.set("independentVariables",feature_dataset.features.join(', '))
- owl.set("predictedVariables", data.dependentVariables )
- #owl.set("predictedVariables",activity_dataset.features.join(', ') + "_lazar_classification")
- owl.set("trainingDataset",data.trainingDataset)
- owl.parameters = {
- "Dataset URI" =>
- { :scope => "mandatory", :value => data.trainingDataset },
- "Feature URI for dependent variable" =>
- { :scope => "mandatory", :value => activity_dataset.features.join(', ')},
- "Feature generation URI" =>
- { :scope => "mandatory", :value => feature_dataset.creator }
- }
-
- owl.rdf
- end
-
+helpers do
+ def uri_available?(urlStr)
+ url = URI.parse(urlStr)
+ Net::HTTP.start(urlStr.host, urlStr.port) do |http|
+ return http.head(urlStr.request_uri).code == "200"
+ end
+ end
end
+# Get model representation
+# @return [application/rdf+xml,application/x-yaml] Model representation
get '/:id/?' do
accept = request.env['HTTP_ACCEPT']
accept = "application/rdf+xml" if accept == '*/*' or accept == '' or accept.nil?
@@ -253,16 +23,14 @@ get '/:id/?' do
params[:id].sub!(/.rdf$/,'')
accept = 'application/rdf+xml'
end
- model = Lazar.get(params[:id])
- halt 404, "Model #{params[:id]} not found." unless model
+ halt 404, "Model #{params[:id]} not found." unless model = ModelStore.get(params[:id])
+ lazar = YAML.load model.yaml
case accept
- when "application/rdf+xml"
- response['Content-Type'] = 'application/rdf+xml'
- unless model.owl # lazy owl creation
- model.owl = model.to_owl
- model.save
- end
- model.owl
+ when /application\/rdf\+xml/
+ s = OpenTox::Serializer::Owl.new
+ s.add_model(url_for('/lazar',:full),lazar.metadata)
+ response['Content-Type'] = 'application/rdf+xml'
+ s.to_rdfxml
when /yaml/
response['Content-Type'] = 'application/x-yaml'
model.yaml
@@ -271,93 +39,53 @@ get '/:id/?' do
end
end
-get '/:id/algorithm/?' do
- response['Content-Type'] = 'text/plain'
- YAML.load(Lazar.get(params[:id]).yaml).algorithm
-end
-
-get '/:id/trainingDataset/?' do
- response['Content-Type'] = 'text/plain'
- YAML.load(Lazar.get(params[:id]).yaml).trainingDataset
-end
-
-get '/:id/feature_dataset/?' do
- response['Content-Type'] = 'text/plain'
- YAML.load(Lazar.get(params[:id]).yaml).feature_dataset_uri
-end
-
+# Store a lazar model. This method should not be called directly, use OpenTox::Algorithm::Lazr to create a lazar model
+# @param [Body] lazar Model representation in YAML format
+# @return [String] Model URI
post '/?' do # create model
halt 400, "MIME type \"#{request.content_type}\" not supported." unless request.content_type.match(/yaml/)
- model = Lazar.new
- model.save
+ model = ModelStore.create
model.token_id = params[:token_id] if params[:token_id]
model.token_id = request.env["HTTP_TOKEN_ID"] if !model.token_id and request.env["HTTP_TOKEN_ID"]
model.uri = url_for("/#{model.id}", :full)
- model.yaml = request.env["rack.input"].read
+ lazar = YAML.load request.env["rack.input"].read
+ lazar.uri = model.uri
+ model.yaml = lazar.to_yaml
model.save
model.uri
end
-post '/:id/?' do # create prediction
+# Make a lazar prediction. Predicts either a single compound or all compounds from a dataset
+# @param [optional,String] dataset_uri URI of the dataset to be predicted
+# @param [optional,String] compound_uri URI of the compound to be predicted
+# @param [optional,Header] Accept Content-type of prediction, can be either `application/rdf+xml or application/x-yaml`
+# @return [text/uri-list] URI of prediction task (dataset prediction) or prediction dataset (compound prediction)
+post '/:id/?' do
- lazar = Lazar.get(params[:id])
- halt 404, "Model #{params[:id]} does not exist." unless lazar
+ @lazar = YAML.load ModelStore.get(params[:id]).yaml
+
+ halt 404, "Model #{params[:id]} does not exist." unless @lazar
halt 404, "No compound_uri or dataset_uri parameter." unless compound_uri = params[:compound_uri] or dataset_uri = params[:dataset_uri]
- @prediction = OpenTox::Dataset.new
- @prediction.creator = lazar.uri
- @prediction.token_id = params[:token_id]
- @prediction.token_id = request.env["HTTP_TOKEN_ID"] if !@prediction.token_id and request.env["HTTP_TOKEN_ID"]
- dependent_variable = YAML.load(lazar.yaml).dependentVariables
- @prediction.title = URI.decode(dependent_variable.split(/#/).last)
- case dependent_variable
- when /classification/
- prediction_type = "classification"
- when /regression/
- prediction_type = "regression"
- end
+ response['Content-Type'] = 'text/uri-list'
if compound_uri
- # look for cached prediction first
- if cached_prediction = Prediction.first(:model_uri => lazar.uri, :compound_uri => compound_uri)
- @prediction = YAML.load(cached_prediction.yaml)
- else
- begin
- # AM: switch here between regression and classification
- eval "lazar.#{prediction_type}(compound_uri,@prediction,true) unless lazar.database_activity?(compound_uri,@prediction)"
- Prediction.create(:model_uri => lazar.uri, :compound_uri => compound_uri, :yaml => @prediction.to_yaml)
- rescue
- LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} "
- halt 500, "Prediction of #{compound_uri} failed."
- end
+ cache = PredictionCache.first(:model_uri => @lazar.uri, :compound_uri => compound_uri)
+ return cache.dataset_uri if cache and uri_available?(cache.dataset_uri)
+ begin
+ prediction_uri = @lazar.predict(compound_uri,true).uri
+ PredictionCache.create(:model_uri => @lazar.uri, :compound_uri => compound_uri, :dataset_uri => prediction_uri)
+ prediction_uri
+ rescue
+ LOGGER.error "Lazar prediction failed for #{compound_uri} with #{$!} "
+ halt 500, "Prediction of #{compound_uri} with #{@lazar.uri} failed."
end
- case request.env['HTTP_ACCEPT']
- when /yaml/
- @prediction.to_yaml
- when 'application/rdf+xml'
- @prediction.to_owl
- else
- halt 400, "MIME type \"#{request.env['HTTP_ACCEPT']}\" not supported."
- end
elsif dataset_uri
- response['Content-Type'] = 'text/uri-list'
- task_uri = OpenTox::Task.as_task do
- input_dataset = OpenTox::Dataset.find(dataset_uri)
- input_dataset.compounds.each do |compound_uri|
- # AM: switch here between regression and classification
- begin
- eval "lazar.#{prediction_type}(compound_uri,@prediction) unless lazar.database_activity?(compound_uri,@prediction)"
- rescue
- LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} "
- end
- end
- begin
- uri = @prediction.save.chomp
- rescue
- halt 500, "Could not save prediction dataset"
- end
+ task = OpenTox::Task.create("Predict dataset",url_for("/#{@lazar.id}", :full)) do
+ @lazar.predict_dataset(dataset_uri).uri
end
- halt 202,task_uri
+ halt 503,task.uri+"\n" if task.status == "Cancelled"
+ halt 202,task.uri
end
end