summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2011-03-10 12:12:49 +0100
committerChristoph Helma <helma@in-silico.ch>2011-03-10 12:12:49 +0100
commit5e07313c11eb38c1e7d16a0b381a2deb4c93877f (patch)
tree91f1123003d68084a3b129ff726f5bf8a56744cc
parentf4b6fcf2b50de0d11e3bcf7986ab9fba4e16208e (diff)
parent854e3ff1d872300151a23f787adf72afc54071f5 (diff)
Merge branch 'release/v1.0.0'v1.0.0
-rw-r--r--.gitignore2
-rw-r--r--README17
-rw-r--r--application.rb114
-rw-r--r--config.ru5
-rw-r--r--lazar.rb419
-rw-r--r--property_lazar.rb303
6 files changed, 159 insertions, 701 deletions
diff --git a/.gitignore b/.gitignore
index b5729fb..4638e4f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,4 @@ tmp/*
*.sqlite3
log/*
db/*
-models/*
+public/*
diff --git a/README b/README
index 81c96e4..e4d478c 100644
--- a/README
+++ b/README
@@ -5,7 +5,8 @@ OpenTox model
* Implements the OpenTox model API
* Ruby implementation of lazar prediction models
-REST operations:
+REST operations
+---------------
Get a list of all lazar models GET / - List of model URIs 200
Get the representation of a lazar model GET /{id} - Model representation 200,404
@@ -13,12 +14,14 @@ Predict a compound POST /{id} compound_uri Prediction
Predict a dataset POST /{id} dataset_uri Prediction dataset URI 200,404,500
Delete a model DELETE /{id} - - 200,404
-Supported MIME formats (http://chemical-mime.sourceforge.net/):
+Supported MIME formats (http://chemical-mime.sourceforge.net/)
+--------------------------------------------------------------
* application/rdf+xml (default): read/write OWL-DL
* application/x-yaml
-Examples:
+Examples
+--------
List all lazar models
curl http://webservices.in-silico.ch/model
@@ -38,6 +41,10 @@ Predict a dataset
Delete a model
curl -X DELETE http://webservices.in-silico.ch/model/{id}
-More documentation: Source code (application.rb, lazar.rb)
+API documentation
+-----------------
+
+http://rdoc.info/github/opentox/model
+
+Copyright (c) 2009-2011 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details.
-Copyright (c) 2009 Christoph Helma. See LICENSE for details.
diff --git a/application.rb b/application.rb
index 3e80e8b..866a267 100644
--- a/application.rb
+++ b/application.rb
@@ -1,64 +1,90 @@
require 'rubygems'
-gem "opentox-ruby-api-wrapper", "= 1.6.5"
-require 'opentox-ruby-api-wrapper'
+gem "opentox-ruby", "~> 1"
+require 'opentox-ruby'
-class Model
- include DataMapper::Resource
- property :id, Serial
- property :uri, String, :length => 255
- property :owl, Text, :length => 2**32-1
- property :yaml, Text, :length => 2**32-1
- property :created_at, DateTime
-end
+set :lock, true
+
+class PredictionCache < Ohm::Model
+ attribute :compound_uri
+ attribute :model_uri
+ attribute :dataset_uri
-class Prediction
- # cache predictions
- include DataMapper::Resource
- property :id, Serial
- property :compound_uri, String, :length => 255
- property :model_uri, String, :length => 255
- property :yaml, Text, :length => 2**32-1
+ index :compound_uri
+ index :model_uri
end
-DataMapper.auto_upgrade!
+before do
+ @accept = request.env['HTTP_ACCEPT']
+ @accept = 'application/rdf+xml' if @accept == '*/*' or @accept == '' or @accept.nil?
+ @id = request.path_info.match(/^\/\d+/)
+ unless @id.nil?
+ @id = @id.to_s.sub(/\//,'').to_i
-require 'lazar.rb'
-require 'property_lazar.rb'
+ @uri = uri @id
+ @yaml_file = "public/#{@id}.yaml"
+ halt 404, "Dataset #{@id} not found." unless File.exists? @yaml_file
+ end
+ # make sure subjectid is not included in params, subjectid is set as member variable
+ params.delete(:subjectid)
+end
+
+require 'lazar.rb'
helpers do
- def activity(a)
- case a.to_s
- when "true"
- act = "active"
- when "false"
- act = "inactive"
- else
- act = "not available"
- end
- act
- end
+
+ def next_id
+ id = Dir["./public/*yaml"].collect{|f| File.basename(f.sub(/.yaml/,'')).to_i}.sort.last
+ id = 0 if id.nil?
+ id + 1
+ end
+
+ def uri(id)
+ url_for "/#{id}", :full
+ end
+
+ def activity(a)
+ case a.to_s
+ when "true"
+ act = "active"
+ when "false"
+ act = "inactive"
+ else
+ act = "not available"
+ end
+ act
+ end
end
get '/?' do # get index of models
- response['Content-Type'] = 'text/uri-list'
- Model.all(params).collect{|m| m.uri}.join("\n") + "\n"
+ response['Content-Type'] = 'text/uri-list'
+ Dir["./public/*yaml"].collect{|f| File.basename(f.sub(/.yaml/,'')).to_i}.sort.collect{|n| uri n}.join("\n") + "\n"
end
delete '/:id/?' do
- begin
- Model.get(params[:id]).destroy!
- "Model #{params[:id]} deleted."
- rescue
- halt 404, "Model #{params[:id]} does not exist."
- end
+ LOGGER.debug "Deleting model with id "+@id.to_s
+ begin
+ FileUtils.rm @yaml_file
+ if @subjectid and !File.exists? @yaml_file and @uri
+ begin
+ res = OpenTox::Authorization.delete_policies_from_uri(@uri, @subjectid)
+ LOGGER.debug "Policy deleted for Dataset URI: #{@uri} with result: #{res}"
+ rescue
+ LOGGER.warn "Policy delete error for Dataset URI: #{@uri}"
+ end
+ end
+ response['Content-Type'] = 'text/plain'
+ "Model #{@id} deleted."
+ rescue
+ halt 404, "Model #{@id} does not exist."
+ end
end
delete '/?' do
- # TODO delete datasets
- Model.auto_migrate!
- Prediction.auto_migrate!
- response['Content-Type'] = 'text/plain'
- "All models and cached predictions deleted."
+ # TODO delete datasets
+ FileUtils.rm Dir["public/*.yaml"]
+ PredictionCache.all.each {|cache| cache.delete }
+ response['Content-Type'] = 'text/plain'
+ "All models and cached predictions deleted."
end
diff --git a/config.ru b/config.ru
index 6f5db40..ec39d1b 100644
--- a/config.ru
+++ b/config.ru
@@ -1,6 +1,7 @@
require 'rubygems'
-require 'opentox-ruby-api-wrapper'
+require 'opentox-ruby'
require 'config/config_ru'
set :app_file, __FILE__ # to get the view path right
run Sinatra::Application
-
+set :raise_errors, false
+set :show_exceptions, false \ No newline at end of file
diff --git a/lazar.rb b/lazar.rb
index 7d2198a..ba85784 100644
--- a/lazar.rb
+++ b/lazar.rb
@@ -1,359 +1,86 @@
-# R integration
-# workaround to initialize R non-interactively (former rinruby versions did this by default)
-# avoids compiling R with X
-R = nil
-require "rinruby"
require "haml"
-class Lazar < Model
-
- attr_accessor :prediction_dataset
-
- # AM begin
- # regression function, created 06/10
- # ch: please properly integrate this into the workflow. You will need some criterium for distinguishing regression/classification (hardcoded regression for testing)
- def regression(compound_uri,prediction,verbose=false)
-
- lazar = YAML.load self.yaml
- compound = OpenTox::Compound.new(:uri => compound_uri)
-
- # obtain X values for query compound
- compound_matches = compound.match lazar.features
-
- conf = 0.0
- features = { :activating => [], :deactivating => [] }
- neighbors = {}
- regression = nil
-
- regr_occurrences = [] # occurrence vector with {0,1} entries
- sims = [] # similarity values between query and neighbors
- acts = [] # activities of neighbors for supervised learning
- neighbor_matches = [] # as in classification: URIs of matches
- gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
- i = 0
-
- # aquire data related to query structure
- lazar.fingerprints.each do |uri,matches|
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values)
- lazar.activities[uri].each do |act|
- if sim > 0.3
- neighbors[uri] = {:similarity => sim}
- neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features]
- matches.each do |m|
- if lazar.effects[m] == 'activating'
- neighbors[uri][:features][:activating] << {:smarts => m, :p_value => lazar.p_values[m]}
- elsif lazar.effects[m] == 'deactivating'
- neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]}
- end
- end
- lazar.activities[uri].each do |act|
- neighbors[uri][:activities] = [] unless neighbors[uri][:activities]
- neighbors[uri][:activities] << act
- end
- conf += OpenTox::Utils.gauss(sim)
- sims << OpenTox::Utils.gauss(sim)
- #TODO check for 0 s
- acts << Math.log10(act.to_f)
- neighbor_matches[i] = matches
- i+=1
- end
- end
- end
- conf = conf/neighbors.size
- LOGGER.debug "Regression: found " + neighbor_matches.size.to_s + " neighbors."
-
-
- unless neighbor_matches.length == 0
- # gram matrix
- (0..(neighbor_matches.length-1)).each do |i|
- gram_matrix[i] = []
- # lower triangle
- (0..(i-1)).each do |j|
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values)
- gram_matrix[i] << OpenTox::Utils.gauss(sim)
- end
- # diagonal element
- gram_matrix[i][i] = 1.0
- # upper triangle
- ((i+1)..(neighbor_matches.length-1)).each do |j|
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values)
- gram_matrix[i] << OpenTox::Utils.gauss(sim)
- end
- end
-
- @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
- @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
- LOGGER.debug "Setting R data ..."
- # set data
- @r.gram_matrix = gram_matrix.flatten
- @r.n = neighbor_matches.length
- @r.y = acts
- @r.sims = sims
-
- LOGGER.debug "Preparing R data ..."
- # prepare data
- @r.eval "y<-as.vector(y)"
- @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
- @r.eval "sims<-as.vector(sims)"
-
- # model + support vectors
- LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
- @r.eval "sv<-as.vector(SVindex(model))"
- @r.eval "sims<-sims[sv]"
- @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
- LOGGER.debug "Predicting ..."
- @r.eval "p<-predict(model,sims)[1,1]"
- regression = 10**(@r.p.to_f)
- LOGGER.debug "Prediction is: '" + regression.to_s + "'."
- @r.quit # free R
-
- end
-
- if (regression != nil)
- feature_uri = lazar.dependentVariables
- prediction.compounds << compound_uri
- prediction.features << feature_uri
- prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } }
- tuple = {
- File.join(@@config[:services]["opentox-model"],"lazar#regression") => regression,
- File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf
- }
- if verbose
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features
- end
- prediction.data[compound_uri] << {feature_uri => tuple}
- end
-
- end
- # AM end
-
-
- def classification(compound_uri,prediction,verbose=false)
-
- lazar = YAML.load self.yaml
- compound = OpenTox::Compound.new(:uri => compound_uri)
- compound_matches = compound.match lazar.features
-
- conf = 0.0
- features = { :activating => [], :deactivating => [] }
- neighbors = {}
- classification = nil
-
- lazar.fingerprints.each do |uri,matches|
-
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values)
- if sim > 0.3
- neighbors[uri] = {:similarity => sim}
- neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features]
- matches.each do |m|
- if lazar.effects[m] == 'activating'
- neighbors[uri][:features][:activating] << {:smarts => m, :p_value => lazar.p_values[m]}
- elsif lazar.effects[m] == 'deactivating'
- neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]}
- end
- end
- lazar.activities[uri].each do |act|
- neighbors[uri][:activities] = [] unless neighbors[uri][:activities]
- neighbors[uri][:activities] << act
- case act.to_s
- when 'true'
- conf += OpenTox::Utils.gauss(sim)
- when 'false'
- conf -= OpenTox::Utils.gauss(sim)
- end
- end
- end
- end
-
- conf = conf/neighbors.size
- if conf > 0.0
- classification = true
- elsif conf < 0.0
- classification = false
- end
- if (classification != nil)
- feature_uri = lazar.dependentVariables
- prediction.compounds << compound_uri
- prediction.features << feature_uri
- prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } }
- tuple = {
- File.join(@@config[:services]["opentox-model"],"lazar#classification") => classification,
- File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf
- }
- if verbose
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features
- end
- prediction.data[compound_uri] << {feature_uri => tuple}
- end
- end
-
- def database_activity?(compound_uri,prediction)
- # find database activities
- lazar = YAML.load self.yaml
- db_activities = lazar.activities[compound_uri]
- if db_activities
- prediction.creator = lazar.trainingDataset
- feature_uri = lazar.dependentVariables
- prediction.compounds << compound_uri
- prediction.features << feature_uri
- prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- db_activities.each do |act|
- prediction.data[compound_uri] << {feature_uri => act}
- end
- true
- else
- false
- end
- end
-
- def to_owl
- data = YAML.load(yaml)
- activity_dataset = YAML.load(RestClient.get(data.trainingDataset, :accept => 'application/x-yaml').to_s)
- feature_dataset = YAML.load(RestClient.get(data.feature_dataset_uri, :accept => 'application/x-yaml').to_s)
- owl = OpenTox::Owl.create 'Model', uri
- owl.set("creator","http://github.com/helma/opentox-model")
- owl.set("title", URI.decode(data.dependentVariables.split(/#/).last) )
- #owl.set("title","#{URI.decode(activity_dataset.title)} lazar classification")
- owl.set("date",created_at.to_s)
- owl.set("algorithm",data.algorithm)
- owl.set("dependentVariables",activity_dataset.features.join(', '))
- owl.set("independentVariables",feature_dataset.features.join(', '))
- owl.set("predictedVariables", data.dependentVariables )
- #owl.set("predictedVariables",activity_dataset.features.join(', ') + "_lazar_classification")
- owl.set("trainingDataset",data.trainingDataset)
- owl.parameters = {
- "Dataset URI" =>
- { :scope => "mandatory", :value => data.trainingDataset },
- "Feature URI for dependent variable" =>
- { :scope => "mandatory", :value => activity_dataset.features.join(', ')},
- "Feature generation URI" =>
- { :scope => "mandatory", :value => feature_dataset.creator }
- }
-
- owl.rdf
- end
-
-end
-
+# Get model representation
+# @return [application/rdf+xml,application/x-yaml] Model representation
get '/:id/?' do
- accept = request.env['HTTP_ACCEPT']
- accept = "application/rdf+xml" if accept == '*/*' or accept == '' or accept.nil?
- # workaround for browser links
- case params[:id]
- when /.yaml$/
- params[:id].sub!(/.yaml$/,'')
- accept = 'application/x-yaml'
- when /.rdf$/
- params[:id].sub!(/.rdf$/,'')
- accept = 'application/rdf+xml'
- end
- model = Lazar.get(params[:id])
- halt 404, "Model #{params[:id]} not found." unless model
- case accept
- when "application/rdf+xml"
- response['Content-Type'] = 'application/rdf+xml'
- unless model.owl # lazy owl creation
- model.owl = model.to_owl
- model.save
- end
- model.owl
- when /yaml/
- response['Content-Type'] = 'application/x-yaml'
- model.yaml
- else
- halt 400, "Unsupported MIME type '#{accept}'"
- end
-end
-
-get '/:id/algorithm/?' do
- response['Content-Type'] = 'text/plain'
- YAML.load(Lazar.get(params[:id]).yaml).algorithm
-end
-
-get '/:id/trainingDataset/?' do
- response['Content-Type'] = 'text/plain'
- YAML.load(Lazar.get(params[:id]).yaml).trainingDataset
+ halt 404, "Model #{params[:id]} not found." unless File.exists? @yaml_file
+ response['Content-Type'] = @accept
+ case @accept
+ when /application\/rdf\+xml/
+ s = OpenTox::Serializer::Owl.new
+ s.add_model(@uri,YAML.load_file(@yaml_file).metadata)
+ response['Content-Type'] = 'application/rdf+xml'
+ s.to_rdfxml
+ when /yaml/
+ response['Content-Type'] = 'application/x-yaml'
+ File.read @yaml_file
+ when /html/
+ response['Content-Type'] = 'text/html'
+ OpenTox.text_to_html File.read(@yaml_file)
+ else
+ halt 400, "Unsupported MIME type '#{@accept}'"
+ end
end
-get '/:id/feature_dataset/?' do
- response['Content-Type'] = 'text/plain'
- YAML.load(Lazar.get(params[:id]).yaml).feature_dataset_uri
+get '/:id/metadata.?:ext?' do
+ metadata = YAML.load_file(@yaml_file).metadata
+ response['Content-Type'] = @accept
+ case @accept
+ when /yaml/
+ metadata.to_yaml
+ else #when /rdf/ and anything else
+ serializer = OpenTox::Serializer::Owl.new
+ serializer.add_metadata @uri, metadata
+ serializer.to_rdfxml
+ end
end
+# Store a lazar model. This method should not be called directly, use OpenTox::Algorithm::Lazr to create a lazar model
+# @param [Body] lazar Model representation in YAML format
+# @return [String] Model URI
post '/?' do # create model
- halt 400, "MIME type \"#{request.content_type}\" not supported." unless request.content_type.match(/yaml/)
- model = Lazar.new
- model.save
- model.uri = url_for("/#{model.id}", :full)
- model.yaml = request.env["rack.input"].read
- model.save
- model.uri
+ halt 400, "MIME type \"#{request.content_type}\" not supported." unless request.content_type.match(/yaml/)
+ @id = next_id
+ @uri = uri @id
+ @yaml_file = "public/#{@id}.yaml"
+ lazar = YAML.load request.env["rack.input"].read
+ lazar.uri = @uri
+ File.open(@yaml_file,"w+"){|f| f.puts lazar.to_yaml}
+ response['Content-Type'] = 'text/uri-list'
+ @uri
end
-post '/:id/?' do # create prediction
-
- lazar = Lazar.get(params[:id])
- halt 404, "Model #{params[:id]} does not exist." unless lazar
- halt 404, "No compound_uri or dataset_uri parameter." unless compound_uri = params[:compound_uri] or dataset_uri = params[:dataset_uri]
-
- @prediction = OpenTox::Dataset.new
- @prediction.creator = lazar.uri
- dependent_variable = YAML.load(lazar.yaml).dependentVariables
- @prediction.title = URI.decode(dependent_variable.split(/#/).last)
- case dependent_variable
- when /classification/
- prediction_type = "classification"
- when /regression/
- prediction_type = "regression"
- end
-
- if compound_uri
- # look for cached prediction first
- if cached_prediction = Prediction.first(:model_uri => lazar.uri, :compound_uri => compound_uri)
- @prediction = YAML.load(cached_prediction.yaml)
- else
- begin
- # AM: switch here between regression and classification
- eval "lazar.#{prediction_type}(compound_uri,@prediction,true) unless lazar.database_activity?(compound_uri,@prediction)"
- Prediction.create(:model_uri => lazar.uri, :compound_uri => compound_uri, :yaml => @prediction.to_yaml)
- rescue
- LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} "
- halt 500, "Prediction of #{compound_uri} failed."
- end
+# Make a lazar prediction. Predicts either a single compound or all compounds from a dataset
+# @param [optional,String] dataset_uri URI of the dataset to be predicted
+# @param [optional,String] compound_uri URI of the compound to be predicted
+# @param [optional,Header] Accept Content-type of prediction, can be either `application/rdf+xml or application/x-yaml`
+# @return [text/uri-list] URI of prediction task (dataset prediction) or prediction dataset (compound prediction)
+post '/:id/?' do
+
+ halt 404, "Model #{params[:id]} does not exist." unless File.exists? @yaml_file
+
+ halt 404, "No compound_uri or dataset_uri parameter." unless compound_uri = params[:compound_uri] or dataset_uri = params[:dataset_uri]
+ @lazar = YAML.load_file @yaml_file
+
+ response['Content-Type'] = 'text/uri-list'
+
+ if compound_uri
+ cache = PredictionCache.find(:model_uri => @lazar.uri, :compound_uri => compound_uri).first
+ return cache.dataset_uri if cache and uri_available?(cache.dataset_uri)
+ begin
+ prediction_uri = @lazar.predict(compound_uri,true,@subjectid).uri
+ PredictionCache.create(:model_uri => @lazar.uri, :compound_uri => compound_uri, :dataset_uri => prediction_uri)
+ prediction_uri
+ rescue
+ LOGGER.error "Lazar prediction failed for #{compound_uri} with #{$!} "
+ halt 500, "Prediction of #{compound_uri} with #{@lazar.uri} failed."
end
- case request.env['HTTP_ACCEPT']
- when /yaml/
- @prediction.to_yaml
- when 'application/rdf+xml'
- @prediction.to_owl
- else
- halt 400, "MIME type \"#{request.env['HTTP_ACCEPT']}\" not supported."
- end
-
- elsif dataset_uri
- response['Content-Type'] = 'text/uri-list'
- task_uri = OpenTox::Task.as_task("Predict dataset",url_for("/#{lazar.id}", :full)) do
- input_dataset = OpenTox::Dataset.find(dataset_uri)
- input_dataset.compounds.each do |compound_uri|
- # AM: switch here between regression and classification
- begin
- eval "lazar.#{prediction_type}(compound_uri,@prediction) unless lazar.database_activity?(compound_uri,@prediction)"
- rescue
- LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} "
- end
- end
- begin
- uri = @prediction.save.chomp
- rescue
- halt 500, "Could not save prediction dataset"
- end
- end
- halt 202,task_uri
- end
+ elsif dataset_uri
+ task = OpenTox::Task.create("Predict dataset",url_for("/#{@lazar.id}", :full)) do |task|
+ @lazar.predict_dataset(dataset_uri, @subjectid, task).uri
+ end
+ halt 503,task.uri+"\n" if task.status == "Cancelled"
+ halt 202,task.uri
+ end
end
diff --git a/property_lazar.rb b/property_lazar.rb
deleted file mode 100644
index 6e68718..0000000
--- a/property_lazar.rb
+++ /dev/null
@@ -1,303 +0,0 @@
-# R integration
-# workaround to initialize R non-interactively (former rinruby versions did this by default)
-# avoids compiling R with X
-R = nil
-require "rinruby"
-require "haml"
-
-class PropertyLazar < Model
-
- attr_accessor :prediction_dataset
-
-=begin
- # AM begin
- # regression function, created 06/10
- def regression(compound_uri,prediction,verbose=false)
-
- lazar = YAML.load self.yaml
- compound = OpenTox::Compound.new(:uri => compound_uri)
-
- # obtain X values for query compound
- compound_matches = compound.match lazar.features
-
- conf = 0.0
- features = { :activating => [], :deactivating => [] }
- neighbors = {}
- regression = nil
-
- regr_occurrences = [] # occurrence vector with {0,1} entries
- sims = [] # similarity values between query and neighbors
- acts = [] # activities of neighbors for supervised learning
- neighbor_matches = [] # as in classification: URIs of matches
- gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
- i = 0
-
- # aquire data related to query structure
- lazar.fingerprints.each do |uri,matches|
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values)
- lazar.activities[uri].each do |act|
- if sim > 0.3
- neighbors[uri] = {:similarity => sim}
- neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features]
- matches.each do |m|
- if lazar.effects[m] == 'activating'
- neighbors[uri][:features][:activating] << {:smarts => m, :p_value => lazar.p_values[m]}
- elsif lazar.effects[m] == 'deactivating'
- neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]}
- end
- end
- lazar.activities[uri].each do |act|
- neighbors[uri][:activities] = [] unless neighbors[uri][:activities]
- neighbors[uri][:activities] << act
- end
- conf += OpenTox::Utils.gauss(sim)
- sims << OpenTox::Utils.gauss(sim)
- #TODO check for 0 s
- acts << Math.log10(act.to_f)
- neighbor_matches[i] = matches
- i+=1
- end
- end
- end
- conf = conf/neighbors.size
- LOGGER.debug "Regression: found " + neighbor_matches.size.to_s + " neighbors."
-
-
- unless neighbor_matches.length == 0
- # gram matrix
- (0..(neighbor_matches.length-1)).each do |i|
- gram_matrix[i] = []
- # lower triangle
- (0..(i-1)).each do |j|
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values)
- gram_matrix[i] << OpenTox::Utils.gauss(sim)
- end
- # diagonal element
- gram_matrix[i][i] = 1.0
- # upper triangle
- ((i+1)..(neighbor_matches.length-1)).each do |j|
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values)
- gram_matrix[i] << OpenTox::Utils.gauss(sim)
- end
- end
-
- @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
- @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
- LOGGER.debug "Setting R data ..."
- # set data
- @r.gram_matrix = gram_matrix.flatten
- @r.n = neighbor_matches.length
- @r.y = acts
- @r.sims = sims
-
- LOGGER.debug "Preparing R data ..."
- # prepare data
- @r.eval "y<-as.vector(y)"
- @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
- @r.eval "sims<-as.vector(sims)"
-
- # model + support vectors
- LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
- @r.eval "sv<-as.vector(SVindex(model))"
- @r.eval "sims<-sims[sv]"
- @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
- LOGGER.debug "Predicting ..."
- @r.eval "p<-predict(model,sims)[1,1]"
- regression = 10**(@r.p.to_f)
- LOGGER.debug "Prediction is: '" + regression.to_s + "'."
- @r.quit # free R
-
- end
-
- if (regression != nil)
- feature_uri = lazar.dependentVariables
- prediction.compounds << compound_uri
- prediction.features << feature_uri
- prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } }
- tuple = {
- File.join(@@config[:services]["opentox-model"],"lazar#regression") => regression,
- File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf
- }
- if verbose
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features
- end
- prediction.data[compound_uri] << {feature_uri => tuple}
- end
-
- end
- # AM end
-=end
-
-
- def classification(compound_uri,prediction,verbose=false)
-
- lazar = YAML.load self.yaml
- compound = OpenTox::Compound.new(:uri => compound_uri)
- compound_properties = lazar.properties[compound.uri]
-
- conf = 0.0
- neighbors = {}
- features = []
- classification = nil
-
- lazar.properties.each do |uri,properties|
-
- sim = OpenTox::Algorithm::Similarity.euclidean(compound_properties,properties)
- if sim and sim > 0.001
- neighbors[uri] = {:similarity => sim}
- neighbors[uri][:features] = [] unless neighbors[uri][:features]
- properties.each do |p,v|
- neighbors[uri][:features] << {p => v}
- end
- lazar.activities[uri].each do |act|
- neighbors[uri][:activities] = [] unless neighbors[uri][:activities]
- neighbors[uri][:activities] << act
- case act.to_s
- when 'true'
- conf += OpenTox::Utils.gauss(sim)
- when 'false'
- conf -= OpenTox::Utils.gauss(sim)
- end
- end
- end
- end
-
- conf = conf/neighbors.size
- if conf > 0.0
- classification = true
- elsif conf < 0.0
- classification = false
- end
- if (classification != nil)
- feature_uri = lazar.dependentVariables
- prediction.compounds << compound_uri
- prediction.features << feature_uri
- prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- compound_properties.each { |p,v| features << {p => v} }
- tuple = {
- File.join(@@config[:services]["opentox-model"],"lazar#classification") => classification,
- File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf
- }
- if verbose
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features
- end
- prediction.data[compound_uri] << {feature_uri => tuple}
- end
- end
-
- def database_activity?(compound_uri,prediction)
- # find database activities
- lazar = YAML.load self.yaml
- db_activities = lazar.activities[compound_uri]
- if db_activities
- prediction.creator = lazar.trainingDataset
- feature_uri = lazar.dependentVariables
- prediction.compounds << compound_uri
- prediction.features << feature_uri
- prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- db_activities.each do |act|
- prediction.data[compound_uri] << {feature_uri => act}
- end
- true
- else
- false
- end
- end
-
- def to_owl
- data = YAML.load(yaml)
- activity_dataset = YAML.load(RestClient.get(data.trainingDataset, :accept => 'application/x-yaml').to_s)
- feature_dataset = YAML.load(RestClient.get(data.feature_dataset_uri, :accept => 'application/x-yaml').to_s)
- owl = OpenTox::Owl.create 'Model', uri
- owl.set("creator","http://github.com/helma/opentox-model")
- owl.set("title", URI.decode(data.dependentVariables.split(/#/).last) )
- #owl.set("title","#{URI.decode(activity_dataset.title)} lazar classification")
- owl.set("date",created_at.to_s)
- owl.set("algorithm",data.algorithm)
- owl.set("dependentVariables",activity_dataset.features.join(', '))
- owl.set("independentVariables",feature_dataset.features.join(', '))
- owl.set("predictedVariables", data.dependentVariables )
- #owl.set("predictedVariables",activity_dataset.features.join(', ') + "_lazar_classification")
- owl.set("trainingDataset",data.trainingDataset)
- owl.parameters = {
- "Dataset URI" =>
- { :scope => "mandatory", :value => data.trainingDataset },
- "Feature URI for dependent variable" =>
- { :scope => "mandatory", :value => activity_dataset.features.join(', ')},
- "Feature generation URI" =>
- { :scope => "mandatory", :value => feature_dataset.creator }
- }
-
- owl.rdf
- end
-
-end
-
-post '/pl/:id/?' do # create prediction
-
- lazar = PropertyLazar.get(params[:id])
- LOGGER.debug lazar.to_yaml
- halt 404, "Model #{params[:id]} does not exist." unless lazar
- halt 404, "No compound_uri or dataset_uri parameter." unless compound_uri = params[:compound_uri] or dataset_uri = params[:dataset_uri]
-
- @prediction = OpenTox::Dataset.new
- @prediction.creator = lazar.uri
- dependent_variable = YAML.load(lazar.yaml).dependentVariables
- @prediction.title = URI.decode(dependent_variable.split(/#/).last)
- case dependent_variable
- when /classification/
- prediction_type = "classification"
- when /regression/
- prediction_type = "regression"
- end
-
- if compound_uri
- # look for cached prediction first
- #if cached_prediction = Prediction.first(:model_uri => lazar.uri, :compound_uri => compound_uri)
- #@prediction = YAML.load(cached_prediction.yaml)
- #else
- begin
- # AM: switch here between regression and classification
- lazar.classification(compound_uri,@prediction,true) #unless lazar.database_activity?(compound_uri,@prediction)"
- #eval "lazar.#{prediction_type}(compound_uri,@prediction,true) unless lazar.database_activity?(compound_uri,@prediction)"
- #Prediction.create(:model_uri => lazar.uri, :compound_uri => compound_uri, :yaml => @prediction.to_yaml)
- rescue
- LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} "
- halt 500, "Prediction of #{compound_uri} failed."
- end
- #end
- case request.env['HTTP_ACCEPT']
- when /yaml/
- @prediction.to_yaml
- when 'application/rdf+xml'
- @prediction.to_owl
- else
- halt 400, "MIME type \"#{request.env['HTTP_ACCEPT']}\" not supported."
- end
-
- elsif dataset_uri
- response['Content-Type'] = 'text/uri-list'
- task_uri = OpenTox::Task.as_task("Predict dataset",url_for("/#{lazar.id}", :full)) do
- input_dataset = OpenTox::Dataset.find(dataset_uri)
- input_dataset.compounds.each do |compound_uri|
- # AM: switch here between regression and classification
- begin
- eval "lazar.#{prediction_type}(compound_uri,@prediction) unless lazar.database_activity?(compound_uri,@prediction)"
- rescue
- LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} "
- end
- end
- begin
- uri = @prediction.save.chomp
- rescue
- halt 500, "Could not save prediction dataset"
- end
- end
- halt 202,task_uri
- end
-
-end