summaryrefslogtreecommitdiff
path: root/lazar.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lazar.rb')
-rw-r--r--lazar.rb419
1 files changed, 73 insertions, 346 deletions
diff --git a/lazar.rb b/lazar.rb
index 7d2198a..ba85784 100644
--- a/lazar.rb
+++ b/lazar.rb
@@ -1,359 +1,86 @@
-# R integration
-# workaround to initialize R non-interactively (former rinruby versions did this by default)
-# avoids compiling R with X
-R = nil
-require "rinruby"
require "haml"
-class Lazar < Model
-
- attr_accessor :prediction_dataset
-
- # AM begin
- # regression function, created 06/10
- # ch: please properly integrate this into the workflow. You will need some criterium for distinguishing regression/classification (hardcoded regression for testing)
- def regression(compound_uri,prediction,verbose=false)
-
- lazar = YAML.load self.yaml
- compound = OpenTox::Compound.new(:uri => compound_uri)
-
- # obtain X values for query compound
- compound_matches = compound.match lazar.features
-
- conf = 0.0
- features = { :activating => [], :deactivating => [] }
- neighbors = {}
- regression = nil
-
- regr_occurrences = [] # occurrence vector with {0,1} entries
- sims = [] # similarity values between query and neighbors
- acts = [] # activities of neighbors for supervised learning
- neighbor_matches = [] # as in classification: URIs of matches
- gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
- i = 0
-
- # aquire data related to query structure
- lazar.fingerprints.each do |uri,matches|
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values)
- lazar.activities[uri].each do |act|
- if sim > 0.3
- neighbors[uri] = {:similarity => sim}
- neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features]
- matches.each do |m|
- if lazar.effects[m] == 'activating'
- neighbors[uri][:features][:activating] << {:smarts => m, :p_value => lazar.p_values[m]}
- elsif lazar.effects[m] == 'deactivating'
- neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]}
- end
- end
- lazar.activities[uri].each do |act|
- neighbors[uri][:activities] = [] unless neighbors[uri][:activities]
- neighbors[uri][:activities] << act
- end
- conf += OpenTox::Utils.gauss(sim)
- sims << OpenTox::Utils.gauss(sim)
- #TODO check for 0 s
- acts << Math.log10(act.to_f)
- neighbor_matches[i] = matches
- i+=1
- end
- end
- end
- conf = conf/neighbors.size
- LOGGER.debug "Regression: found " + neighbor_matches.size.to_s + " neighbors."
-
-
- unless neighbor_matches.length == 0
- # gram matrix
- (0..(neighbor_matches.length-1)).each do |i|
- gram_matrix[i] = []
- # lower triangle
- (0..(i-1)).each do |j|
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values)
- gram_matrix[i] << OpenTox::Utils.gauss(sim)
- end
- # diagonal element
- gram_matrix[i][i] = 1.0
- # upper triangle
- ((i+1)..(neighbor_matches.length-1)).each do |j|
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values)
- gram_matrix[i] << OpenTox::Utils.gauss(sim)
- end
- end
-
- @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
- @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
- LOGGER.debug "Setting R data ..."
- # set data
- @r.gram_matrix = gram_matrix.flatten
- @r.n = neighbor_matches.length
- @r.y = acts
- @r.sims = sims
-
- LOGGER.debug "Preparing R data ..."
- # prepare data
- @r.eval "y<-as.vector(y)"
- @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
- @r.eval "sims<-as.vector(sims)"
-
- # model + support vectors
- LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
- @r.eval "sv<-as.vector(SVindex(model))"
- @r.eval "sims<-sims[sv]"
- @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
- LOGGER.debug "Predicting ..."
- @r.eval "p<-predict(model,sims)[1,1]"
- regression = 10**(@r.p.to_f)
- LOGGER.debug "Prediction is: '" + regression.to_s + "'."
- @r.quit # free R
-
- end
-
- if (regression != nil)
- feature_uri = lazar.dependentVariables
- prediction.compounds << compound_uri
- prediction.features << feature_uri
- prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } }
- tuple = {
- File.join(@@config[:services]["opentox-model"],"lazar#regression") => regression,
- File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf
- }
- if verbose
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features
- end
- prediction.data[compound_uri] << {feature_uri => tuple}
- end
-
- end
- # AM end
-
-
- def classification(compound_uri,prediction,verbose=false)
-
- lazar = YAML.load self.yaml
- compound = OpenTox::Compound.new(:uri => compound_uri)
- compound_matches = compound.match lazar.features
-
- conf = 0.0
- features = { :activating => [], :deactivating => [] }
- neighbors = {}
- classification = nil
-
- lazar.fingerprints.each do |uri,matches|
-
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values)
- if sim > 0.3
- neighbors[uri] = {:similarity => sim}
- neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features]
- matches.each do |m|
- if lazar.effects[m] == 'activating'
- neighbors[uri][:features][:activating] << {:smarts => m, :p_value => lazar.p_values[m]}
- elsif lazar.effects[m] == 'deactivating'
- neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]}
- end
- end
- lazar.activities[uri].each do |act|
- neighbors[uri][:activities] = [] unless neighbors[uri][:activities]
- neighbors[uri][:activities] << act
- case act.to_s
- when 'true'
- conf += OpenTox::Utils.gauss(sim)
- when 'false'
- conf -= OpenTox::Utils.gauss(sim)
- end
- end
- end
- end
-
- conf = conf/neighbors.size
- if conf > 0.0
- classification = true
- elsif conf < 0.0
- classification = false
- end
- if (classification != nil)
- feature_uri = lazar.dependentVariables
- prediction.compounds << compound_uri
- prediction.features << feature_uri
- prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } }
- tuple = {
- File.join(@@config[:services]["opentox-model"],"lazar#classification") => classification,
- File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf
- }
- if verbose
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features
- end
- prediction.data[compound_uri] << {feature_uri => tuple}
- end
- end
-
- def database_activity?(compound_uri,prediction)
- # find database activities
- lazar = YAML.load self.yaml
- db_activities = lazar.activities[compound_uri]
- if db_activities
- prediction.creator = lazar.trainingDataset
- feature_uri = lazar.dependentVariables
- prediction.compounds << compound_uri
- prediction.features << feature_uri
- prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- db_activities.each do |act|
- prediction.data[compound_uri] << {feature_uri => act}
- end
- true
- else
- false
- end
- end
-
- def to_owl
- data = YAML.load(yaml)
- activity_dataset = YAML.load(RestClient.get(data.trainingDataset, :accept => 'application/x-yaml').to_s)
- feature_dataset = YAML.load(RestClient.get(data.feature_dataset_uri, :accept => 'application/x-yaml').to_s)
- owl = OpenTox::Owl.create 'Model', uri
- owl.set("creator","http://github.com/helma/opentox-model")
- owl.set("title", URI.decode(data.dependentVariables.split(/#/).last) )
- #owl.set("title","#{URI.decode(activity_dataset.title)} lazar classification")
- owl.set("date",created_at.to_s)
- owl.set("algorithm",data.algorithm)
- owl.set("dependentVariables",activity_dataset.features.join(', '))
- owl.set("independentVariables",feature_dataset.features.join(', '))
- owl.set("predictedVariables", data.dependentVariables )
- #owl.set("predictedVariables",activity_dataset.features.join(', ') + "_lazar_classification")
- owl.set("trainingDataset",data.trainingDataset)
- owl.parameters = {
- "Dataset URI" =>
- { :scope => "mandatory", :value => data.trainingDataset },
- "Feature URI for dependent variable" =>
- { :scope => "mandatory", :value => activity_dataset.features.join(', ')},
- "Feature generation URI" =>
- { :scope => "mandatory", :value => feature_dataset.creator }
- }
-
- owl.rdf
- end
-
-end
-
+# Get model representation
+# @return [application/rdf+xml,application/x-yaml] Model representation
get '/:id/?' do
- accept = request.env['HTTP_ACCEPT']
- accept = "application/rdf+xml" if accept == '*/*' or accept == '' or accept.nil?
- # workaround for browser links
- case params[:id]
- when /.yaml$/
- params[:id].sub!(/.yaml$/,'')
- accept = 'application/x-yaml'
- when /.rdf$/
- params[:id].sub!(/.rdf$/,'')
- accept = 'application/rdf+xml'
- end
- model = Lazar.get(params[:id])
- halt 404, "Model #{params[:id]} not found." unless model
- case accept
- when "application/rdf+xml"
- response['Content-Type'] = 'application/rdf+xml'
- unless model.owl # lazy owl creation
- model.owl = model.to_owl
- model.save
- end
- model.owl
- when /yaml/
- response['Content-Type'] = 'application/x-yaml'
- model.yaml
- else
- halt 400, "Unsupported MIME type '#{accept}'"
- end
-end
-
-get '/:id/algorithm/?' do
- response['Content-Type'] = 'text/plain'
- YAML.load(Lazar.get(params[:id]).yaml).algorithm
-end
-
-get '/:id/trainingDataset/?' do
- response['Content-Type'] = 'text/plain'
- YAML.load(Lazar.get(params[:id]).yaml).trainingDataset
+ halt 404, "Model #{params[:id]} not found." unless File.exists? @yaml_file
+ response['Content-Type'] = @accept
+ case @accept
+ when /application\/rdf\+xml/
+ s = OpenTox::Serializer::Owl.new
+ s.add_model(@uri,YAML.load_file(@yaml_file).metadata)
+ response['Content-Type'] = 'application/rdf+xml'
+ s.to_rdfxml
+ when /yaml/
+ response['Content-Type'] = 'application/x-yaml'
+ File.read @yaml_file
+ when /html/
+ response['Content-Type'] = 'text/html'
+ OpenTox.text_to_html File.read(@yaml_file)
+ else
+ halt 400, "Unsupported MIME type '#{@accept}'"
+ end
end
-get '/:id/feature_dataset/?' do
- response['Content-Type'] = 'text/plain'
- YAML.load(Lazar.get(params[:id]).yaml).feature_dataset_uri
+get '/:id/metadata.?:ext?' do
+ metadata = YAML.load_file(@yaml_file).metadata
+ response['Content-Type'] = @accept
+ case @accept
+ when /yaml/
+ metadata.to_yaml
+ else #when /rdf/ and anything else
+ serializer = OpenTox::Serializer::Owl.new
+ serializer.add_metadata @uri, metadata
+ serializer.to_rdfxml
+ end
end
+# Store a lazar model. This method should not be called directly, use OpenTox::Algorithm::Lazr to create a lazar model
+# @param [Body] lazar Model representation in YAML format
+# @return [String] Model URI
post '/?' do # create model
- halt 400, "MIME type \"#{request.content_type}\" not supported." unless request.content_type.match(/yaml/)
- model = Lazar.new
- model.save
- model.uri = url_for("/#{model.id}", :full)
- model.yaml = request.env["rack.input"].read
- model.save
- model.uri
+ halt 400, "MIME type \"#{request.content_type}\" not supported." unless request.content_type.match(/yaml/)
+ @id = next_id
+ @uri = uri @id
+ @yaml_file = "public/#{@id}.yaml"
+ lazar = YAML.load request.env["rack.input"].read
+ lazar.uri = @uri
+ File.open(@yaml_file,"w+"){|f| f.puts lazar.to_yaml}
+ response['Content-Type'] = 'text/uri-list'
+ @uri
end
-post '/:id/?' do # create prediction
-
- lazar = Lazar.get(params[:id])
- halt 404, "Model #{params[:id]} does not exist." unless lazar
- halt 404, "No compound_uri or dataset_uri parameter." unless compound_uri = params[:compound_uri] or dataset_uri = params[:dataset_uri]
-
- @prediction = OpenTox::Dataset.new
- @prediction.creator = lazar.uri
- dependent_variable = YAML.load(lazar.yaml).dependentVariables
- @prediction.title = URI.decode(dependent_variable.split(/#/).last)
- case dependent_variable
- when /classification/
- prediction_type = "classification"
- when /regression/
- prediction_type = "regression"
- end
-
- if compound_uri
- # look for cached prediction first
- if cached_prediction = Prediction.first(:model_uri => lazar.uri, :compound_uri => compound_uri)
- @prediction = YAML.load(cached_prediction.yaml)
- else
- begin
- # AM: switch here between regression and classification
- eval "lazar.#{prediction_type}(compound_uri,@prediction,true) unless lazar.database_activity?(compound_uri,@prediction)"
- Prediction.create(:model_uri => lazar.uri, :compound_uri => compound_uri, :yaml => @prediction.to_yaml)
- rescue
- LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} "
- halt 500, "Prediction of #{compound_uri} failed."
- end
+# Make a lazar prediction. Predicts either a single compound or all compounds from a dataset
+# @param [optional,String] dataset_uri URI of the dataset to be predicted
+# @param [optional,String] compound_uri URI of the compound to be predicted
+# @param [optional,Header] Accept Content-type of prediction, can be either `application/rdf+xml or application/x-yaml`
+# @return [text/uri-list] URI of prediction task (dataset prediction) or prediction dataset (compound prediction)
+post '/:id/?' do
+
+ halt 404, "Model #{params[:id]} does not exist." unless File.exists? @yaml_file
+
+ halt 404, "No compound_uri or dataset_uri parameter." unless compound_uri = params[:compound_uri] or dataset_uri = params[:dataset_uri]
+ @lazar = YAML.load_file @yaml_file
+
+ response['Content-Type'] = 'text/uri-list'
+
+ if compound_uri
+ cache = PredictionCache.find(:model_uri => @lazar.uri, :compound_uri => compound_uri).first
+ return cache.dataset_uri if cache and uri_available?(cache.dataset_uri)
+ begin
+ prediction_uri = @lazar.predict(compound_uri,true,@subjectid).uri
+ PredictionCache.create(:model_uri => @lazar.uri, :compound_uri => compound_uri, :dataset_uri => prediction_uri)
+ prediction_uri
+ rescue
+ LOGGER.error "Lazar prediction failed for #{compound_uri} with #{$!} "
+ halt 500, "Prediction of #{compound_uri} with #{@lazar.uri} failed."
end
- case request.env['HTTP_ACCEPT']
- when /yaml/
- @prediction.to_yaml
- when 'application/rdf+xml'
- @prediction.to_owl
- else
- halt 400, "MIME type \"#{request.env['HTTP_ACCEPT']}\" not supported."
- end
-
- elsif dataset_uri
- response['Content-Type'] = 'text/uri-list'
- task_uri = OpenTox::Task.as_task("Predict dataset",url_for("/#{lazar.id}", :full)) do
- input_dataset = OpenTox::Dataset.find(dataset_uri)
- input_dataset.compounds.each do |compound_uri|
- # AM: switch here between regression and classification
- begin
- eval "lazar.#{prediction_type}(compound_uri,@prediction) unless lazar.database_activity?(compound_uri,@prediction)"
- rescue
- LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} "
- end
- end
- begin
- uri = @prediction.save.chomp
- rescue
- halt 500, "Could not save prediction dataset"
- end
- end
- halt 202,task_uri
- end
+ elsif dataset_uri
+ task = OpenTox::Task.create("Predict dataset",url_for("/#{@lazar.id}", :full)) do |task|
+ @lazar.predict_dataset(dataset_uri, @subjectid, task).uri
+ end
+ halt 503,task.uri+"\n" if task.status == "Cancelled"
+ halt 202,task.uri
+ end
end