diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | README | 17 | ||||
-rw-r--r-- | application.rb | 114 | ||||
-rw-r--r-- | config.ru | 5 | ||||
-rw-r--r-- | lazar.rb | 419 | ||||
-rw-r--r-- | property_lazar.rb | 303 |
6 files changed, 159 insertions, 701 deletions
@@ -2,4 +2,4 @@ tmp/* *.sqlite3 log/* db/* -models/* +public/* @@ -5,7 +5,8 @@ OpenTox model * Implements the OpenTox model API * Ruby implementation of lazar prediction models -REST operations: +REST operations +--------------- Get a list of all lazar models GET / - List of model URIs 200 Get the representation of a lazar model GET /{id} - Model representation 200,404 @@ -13,12 +14,14 @@ Predict a compound POST /{id} compound_uri Prediction Predict a dataset POST /{id} dataset_uri Prediction dataset URI 200,404,500 Delete a model DELETE /{id} - - 200,404 -Supported MIME formats (http://chemical-mime.sourceforge.net/): +Supported MIME formats (http://chemical-mime.sourceforge.net/) +-------------------------------------------------------------- * application/rdf+xml (default): read/write OWL-DL * application/x-yaml -Examples: +Examples +-------- List all lazar models curl http://webservices.in-silico.ch/model @@ -38,6 +41,10 @@ Predict a dataset Delete a model curl -X DELETE http://webservices.in-silico.ch/model/{id} -More documentation: Source code (application.rb, lazar.rb) +API documentation +----------------- + +http://rdoc.info/github/opentox/model + +Copyright (c) 2009-2011 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details. -Copyright (c) 2009 Christoph Helma. See LICENSE for details. diff --git a/application.rb b/application.rb index 3e80e8b..866a267 100644 --- a/application.rb +++ b/application.rb @@ -1,64 +1,90 @@ require 'rubygems' -gem "opentox-ruby-api-wrapper", "= 1.6.5" -require 'opentox-ruby-api-wrapper' +gem "opentox-ruby", "~> 1" +require 'opentox-ruby' -class Model - include DataMapper::Resource - property :id, Serial - property :uri, String, :length => 255 - property :owl, Text, :length => 2**32-1 - property :yaml, Text, :length => 2**32-1 - property :created_at, DateTime -end +set :lock, true + +class PredictionCache < Ohm::Model + attribute :compound_uri + attribute :model_uri + attribute :dataset_uri -class Prediction - # cache predictions - include DataMapper::Resource - property :id, Serial - property :compound_uri, String, :length => 255 - property :model_uri, String, :length => 255 - property :yaml, Text, :length => 2**32-1 + index :compound_uri + index :model_uri end -DataMapper.auto_upgrade! +before do + @accept = request.env['HTTP_ACCEPT'] + @accept = 'application/rdf+xml' if @accept == '*/*' or @accept == '' or @accept.nil? + @id = request.path_info.match(/^\/\d+/) + unless @id.nil? + @id = @id.to_s.sub(/\//,'').to_i -require 'lazar.rb' -require 'property_lazar.rb' + @uri = uri @id + @yaml_file = "public/#{@id}.yaml" + halt 404, "Dataset #{@id} not found." unless File.exists? @yaml_file + end + # make sure subjectid is not included in params, subjectid is set as member variable + params.delete(:subjectid) +end + +require 'lazar.rb' helpers do - def activity(a) - case a.to_s - when "true" - act = "active" - when "false" - act = "inactive" - else - act = "not available" - end - act - end + + def next_id + id = Dir["./public/*yaml"].collect{|f| File.basename(f.sub(/.yaml/,'')).to_i}.sort.last + id = 0 if id.nil? + id + 1 + end + + def uri(id) + url_for "/#{id}", :full + end + + def activity(a) + case a.to_s + when "true" + act = "active" + when "false" + act = "inactive" + else + act = "not available" + end + act + end end get '/?' do # get index of models - response['Content-Type'] = 'text/uri-list' - Model.all(params).collect{|m| m.uri}.join("\n") + "\n" + response['Content-Type'] = 'text/uri-list' + Dir["./public/*yaml"].collect{|f| File.basename(f.sub(/.yaml/,'')).to_i}.sort.collect{|n| uri n}.join("\n") + "\n" end delete '/:id/?' do - begin - Model.get(params[:id]).destroy! - "Model #{params[:id]} deleted." - rescue - halt 404, "Model #{params[:id]} does not exist." - end + LOGGER.debug "Deleting model with id "+@id.to_s + begin + FileUtils.rm @yaml_file + if @subjectid and !File.exists? @yaml_file and @uri + begin + res = OpenTox::Authorization.delete_policies_from_uri(@uri, @subjectid) + LOGGER.debug "Policy deleted for Dataset URI: #{@uri} with result: #{res}" + rescue + LOGGER.warn "Policy delete error for Dataset URI: #{@uri}" + end + end + response['Content-Type'] = 'text/plain' + "Model #{@id} deleted." + rescue + halt 404, "Model #{@id} does not exist." + end end delete '/?' do - # TODO delete datasets - Model.auto_migrate! - Prediction.auto_migrate! - response['Content-Type'] = 'text/plain' - "All models and cached predictions deleted." + # TODO delete datasets + FileUtils.rm Dir["public/*.yaml"] + PredictionCache.all.each {|cache| cache.delete } + response['Content-Type'] = 'text/plain' + "All models and cached predictions deleted." end @@ -1,6 +1,7 @@ require 'rubygems' -require 'opentox-ruby-api-wrapper' +require 'opentox-ruby' require 'config/config_ru' set :app_file, __FILE__ # to get the view path right run Sinatra::Application - +set :raise_errors, false +set :show_exceptions, false
\ No newline at end of file @@ -1,359 +1,86 @@ -# R integration -# workaround to initialize R non-interactively (former rinruby versions did this by default) -# avoids compiling R with X -R = nil -require "rinruby" require "haml" -class Lazar < Model - - attr_accessor :prediction_dataset - - # AM begin - # regression function, created 06/10 - # ch: please properly integrate this into the workflow. You will need some criterium for distinguishing regression/classification (hardcoded regression for testing) - def regression(compound_uri,prediction,verbose=false) - - lazar = YAML.load self.yaml - compound = OpenTox::Compound.new(:uri => compound_uri) - - # obtain X values for query compound - compound_matches = compound.match lazar.features - - conf = 0.0 - features = { :activating => [], :deactivating => [] } - neighbors = {} - regression = nil - - regr_occurrences = [] # occurrence vector with {0,1} entries - sims = [] # similarity values between query and neighbors - acts = [] # activities of neighbors for supervised learning - neighbor_matches = [] # as in classification: URIs of matches - gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel - i = 0 - - # aquire data related to query structure - lazar.fingerprints.each do |uri,matches| - sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values) - lazar.activities[uri].each do |act| - if sim > 0.3 - neighbors[uri] = {:similarity => sim} - neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features] - matches.each do |m| - if lazar.effects[m] == 'activating' - neighbors[uri][:features][:activating] << {:smarts => m, :p_value => lazar.p_values[m]} - elsif lazar.effects[m] == 'deactivating' - neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]} - end - end - lazar.activities[uri].each do |act| - neighbors[uri][:activities] = [] unless neighbors[uri][:activities] - neighbors[uri][:activities] << act - end - conf += OpenTox::Utils.gauss(sim) - sims << OpenTox::Utils.gauss(sim) - #TODO check for 0 s - acts << Math.log10(act.to_f) - neighbor_matches[i] = matches - i+=1 - end - end - end - conf = conf/neighbors.size - LOGGER.debug "Regression: found " + neighbor_matches.size.to_s + " neighbors." - - - unless neighbor_matches.length == 0 - # gram matrix - (0..(neighbor_matches.length-1)).each do |i| - gram_matrix[i] = [] - # lower triangle - (0..(i-1)).each do |j| - sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values) - gram_matrix[i] << OpenTox::Utils.gauss(sim) - end - # diagonal element - gram_matrix[i][i] = 1.0 - # upper triangle - ((i+1)..(neighbor_matches.length-1)).each do |j| - sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values) - gram_matrix[i] << OpenTox::Utils.gauss(sim) - end - end - - @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests - @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed - LOGGER.debug "Setting R data ..." - # set data - @r.gram_matrix = gram_matrix.flatten - @r.n = neighbor_matches.length - @r.y = acts - @r.sims = sims - - LOGGER.debug "Preparing R data ..." - # prepare data - @r.eval "y<-as.vector(y)" - @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" - @r.eval "sims<-as.vector(sims)" - - # model + support vectors - LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)" - @r.eval "sv<-as.vector(SVindex(model))" - @r.eval "sims<-sims[sv]" - @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" - LOGGER.debug "Predicting ..." - @r.eval "p<-predict(model,sims)[1,1]" - regression = 10**(@r.p.to_f) - LOGGER.debug "Prediction is: '" + regression.to_s + "'." - @r.quit # free R - - end - - if (regression != nil) - feature_uri = lazar.dependentVariables - prediction.compounds << compound_uri - prediction.features << feature_uri - prediction.data[compound_uri] = [] unless prediction.data[compound_uri] - compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } } - tuple = { - File.join(@@config[:services]["opentox-model"],"lazar#regression") => regression, - File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf - } - if verbose - tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors - tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features - end - prediction.data[compound_uri] << {feature_uri => tuple} - end - - end - # AM end - - - def classification(compound_uri,prediction,verbose=false) - - lazar = YAML.load self.yaml - compound = OpenTox::Compound.new(:uri => compound_uri) - compound_matches = compound.match lazar.features - - conf = 0.0 - features = { :activating => [], :deactivating => [] } - neighbors = {} - classification = nil - - lazar.fingerprints.each do |uri,matches| - - sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values) - if sim > 0.3 - neighbors[uri] = {:similarity => sim} - neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features] - matches.each do |m| - if lazar.effects[m] == 'activating' - neighbors[uri][:features][:activating] << {:smarts => m, :p_value => lazar.p_values[m]} - elsif lazar.effects[m] == 'deactivating' - neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]} - end - end - lazar.activities[uri].each do |act| - neighbors[uri][:activities] = [] unless neighbors[uri][:activities] - neighbors[uri][:activities] << act - case act.to_s - when 'true' - conf += OpenTox::Utils.gauss(sim) - when 'false' - conf -= OpenTox::Utils.gauss(sim) - end - end - end - end - - conf = conf/neighbors.size - if conf > 0.0 - classification = true - elsif conf < 0.0 - classification = false - end - if (classification != nil) - feature_uri = lazar.dependentVariables - prediction.compounds << compound_uri - prediction.features << feature_uri - prediction.data[compound_uri] = [] unless prediction.data[compound_uri] - compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } } - tuple = { - File.join(@@config[:services]["opentox-model"],"lazar#classification") => classification, - File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf - } - if verbose - tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors - tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features - end - prediction.data[compound_uri] << {feature_uri => tuple} - end - end - - def database_activity?(compound_uri,prediction) - # find database activities - lazar = YAML.load self.yaml - db_activities = lazar.activities[compound_uri] - if db_activities - prediction.creator = lazar.trainingDataset - feature_uri = lazar.dependentVariables - prediction.compounds << compound_uri - prediction.features << feature_uri - prediction.data[compound_uri] = [] unless prediction.data[compound_uri] - db_activities.each do |act| - prediction.data[compound_uri] << {feature_uri => act} - end - true - else - false - end - end - - def to_owl - data = YAML.load(yaml) - activity_dataset = YAML.load(RestClient.get(data.trainingDataset, :accept => 'application/x-yaml').to_s) - feature_dataset = YAML.load(RestClient.get(data.feature_dataset_uri, :accept => 'application/x-yaml').to_s) - owl = OpenTox::Owl.create 'Model', uri - owl.set("creator","http://github.com/helma/opentox-model") - owl.set("title", URI.decode(data.dependentVariables.split(/#/).last) ) - #owl.set("title","#{URI.decode(activity_dataset.title)} lazar classification") - owl.set("date",created_at.to_s) - owl.set("algorithm",data.algorithm) - owl.set("dependentVariables",activity_dataset.features.join(', ')) - owl.set("independentVariables",feature_dataset.features.join(', ')) - owl.set("predictedVariables", data.dependentVariables ) - #owl.set("predictedVariables",activity_dataset.features.join(', ') + "_lazar_classification") - owl.set("trainingDataset",data.trainingDataset) - owl.parameters = { - "Dataset URI" => - { :scope => "mandatory", :value => data.trainingDataset }, - "Feature URI for dependent variable" => - { :scope => "mandatory", :value => activity_dataset.features.join(', ')}, - "Feature generation URI" => - { :scope => "mandatory", :value => feature_dataset.creator } - } - - owl.rdf - end - -end - +# Get model representation +# @return [application/rdf+xml,application/x-yaml] Model representation get '/:id/?' do - accept = request.env['HTTP_ACCEPT'] - accept = "application/rdf+xml" if accept == '*/*' or accept == '' or accept.nil? - # workaround for browser links - case params[:id] - when /.yaml$/ - params[:id].sub!(/.yaml$/,'') - accept = 'application/x-yaml' - when /.rdf$/ - params[:id].sub!(/.rdf$/,'') - accept = 'application/rdf+xml' - end - model = Lazar.get(params[:id]) - halt 404, "Model #{params[:id]} not found." unless model - case accept - when "application/rdf+xml" - response['Content-Type'] = 'application/rdf+xml' - unless model.owl # lazy owl creation - model.owl = model.to_owl - model.save - end - model.owl - when /yaml/ - response['Content-Type'] = 'application/x-yaml' - model.yaml - else - halt 400, "Unsupported MIME type '#{accept}'" - end -end - -get '/:id/algorithm/?' do - response['Content-Type'] = 'text/plain' - YAML.load(Lazar.get(params[:id]).yaml).algorithm -end - -get '/:id/trainingDataset/?' do - response['Content-Type'] = 'text/plain' - YAML.load(Lazar.get(params[:id]).yaml).trainingDataset + halt 404, "Model #{params[:id]} not found." unless File.exists? @yaml_file + response['Content-Type'] = @accept + case @accept + when /application\/rdf\+xml/ + s = OpenTox::Serializer::Owl.new + s.add_model(@uri,YAML.load_file(@yaml_file).metadata) + response['Content-Type'] = 'application/rdf+xml' + s.to_rdfxml + when /yaml/ + response['Content-Type'] = 'application/x-yaml' + File.read @yaml_file + when /html/ + response['Content-Type'] = 'text/html' + OpenTox.text_to_html File.read(@yaml_file) + else + halt 400, "Unsupported MIME type '#{@accept}'" + end end -get '/:id/feature_dataset/?' do - response['Content-Type'] = 'text/plain' - YAML.load(Lazar.get(params[:id]).yaml).feature_dataset_uri +get '/:id/metadata.?:ext?' do + metadata = YAML.load_file(@yaml_file).metadata + response['Content-Type'] = @accept + case @accept + when /yaml/ + metadata.to_yaml + else #when /rdf/ and anything else + serializer = OpenTox::Serializer::Owl.new + serializer.add_metadata @uri, metadata + serializer.to_rdfxml + end end +# Store a lazar model. This method should not be called directly, use OpenTox::Algorithm::Lazr to create a lazar model +# @param [Body] lazar Model representation in YAML format +# @return [String] Model URI post '/?' do # create model - halt 400, "MIME type \"#{request.content_type}\" not supported." unless request.content_type.match(/yaml/) - model = Lazar.new - model.save - model.uri = url_for("/#{model.id}", :full) - model.yaml = request.env["rack.input"].read - model.save - model.uri + halt 400, "MIME type \"#{request.content_type}\" not supported." unless request.content_type.match(/yaml/) + @id = next_id + @uri = uri @id + @yaml_file = "public/#{@id}.yaml" + lazar = YAML.load request.env["rack.input"].read + lazar.uri = @uri + File.open(@yaml_file,"w+"){|f| f.puts lazar.to_yaml} + response['Content-Type'] = 'text/uri-list' + @uri end -post '/:id/?' do # create prediction - - lazar = Lazar.get(params[:id]) - halt 404, "Model #{params[:id]} does not exist." unless lazar - halt 404, "No compound_uri or dataset_uri parameter." unless compound_uri = params[:compound_uri] or dataset_uri = params[:dataset_uri] - - @prediction = OpenTox::Dataset.new - @prediction.creator = lazar.uri - dependent_variable = YAML.load(lazar.yaml).dependentVariables - @prediction.title = URI.decode(dependent_variable.split(/#/).last) - case dependent_variable - when /classification/ - prediction_type = "classification" - when /regression/ - prediction_type = "regression" - end - - if compound_uri - # look for cached prediction first - if cached_prediction = Prediction.first(:model_uri => lazar.uri, :compound_uri => compound_uri) - @prediction = YAML.load(cached_prediction.yaml) - else - begin - # AM: switch here between regression and classification - eval "lazar.#{prediction_type}(compound_uri,@prediction,true) unless lazar.database_activity?(compound_uri,@prediction)" - Prediction.create(:model_uri => lazar.uri, :compound_uri => compound_uri, :yaml => @prediction.to_yaml) - rescue - LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} " - halt 500, "Prediction of #{compound_uri} failed." - end +# Make a lazar prediction. Predicts either a single compound or all compounds from a dataset +# @param [optional,String] dataset_uri URI of the dataset to be predicted +# @param [optional,String] compound_uri URI of the compound to be predicted +# @param [optional,Header] Accept Content-type of prediction, can be either `application/rdf+xml or application/x-yaml` +# @return [text/uri-list] URI of prediction task (dataset prediction) or prediction dataset (compound prediction) +post '/:id/?' do + + halt 404, "Model #{params[:id]} does not exist." unless File.exists? @yaml_file + + halt 404, "No compound_uri or dataset_uri parameter." unless compound_uri = params[:compound_uri] or dataset_uri = params[:dataset_uri] + @lazar = YAML.load_file @yaml_file + + response['Content-Type'] = 'text/uri-list' + + if compound_uri + cache = PredictionCache.find(:model_uri => @lazar.uri, :compound_uri => compound_uri).first + return cache.dataset_uri if cache and uri_available?(cache.dataset_uri) + begin + prediction_uri = @lazar.predict(compound_uri,true,@subjectid).uri + PredictionCache.create(:model_uri => @lazar.uri, :compound_uri => compound_uri, :dataset_uri => prediction_uri) + prediction_uri + rescue + LOGGER.error "Lazar prediction failed for #{compound_uri} with #{$!} " + halt 500, "Prediction of #{compound_uri} with #{@lazar.uri} failed." end - case request.env['HTTP_ACCEPT'] - when /yaml/ - @prediction.to_yaml - when 'application/rdf+xml' - @prediction.to_owl - else - halt 400, "MIME type \"#{request.env['HTTP_ACCEPT']}\" not supported." - end - - elsif dataset_uri - response['Content-Type'] = 'text/uri-list' - task_uri = OpenTox::Task.as_task("Predict dataset",url_for("/#{lazar.id}", :full)) do - input_dataset = OpenTox::Dataset.find(dataset_uri) - input_dataset.compounds.each do |compound_uri| - # AM: switch here between regression and classification - begin - eval "lazar.#{prediction_type}(compound_uri,@prediction) unless lazar.database_activity?(compound_uri,@prediction)" - rescue - LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} " - end - end - begin - uri = @prediction.save.chomp - rescue - halt 500, "Could not save prediction dataset" - end - end - halt 202,task_uri - end + elsif dataset_uri + task = OpenTox::Task.create("Predict dataset",url_for("/#{@lazar.id}", :full)) do |task| + @lazar.predict_dataset(dataset_uri, @subjectid, task).uri + end + halt 503,task.uri+"\n" if task.status == "Cancelled" + halt 202,task.uri + end end diff --git a/property_lazar.rb b/property_lazar.rb deleted file mode 100644 index 6e68718..0000000 --- a/property_lazar.rb +++ /dev/null @@ -1,303 +0,0 @@ -# R integration -# workaround to initialize R non-interactively (former rinruby versions did this by default) -# avoids compiling R with X -R = nil -require "rinruby" -require "haml" - -class PropertyLazar < Model - - attr_accessor :prediction_dataset - -=begin - # AM begin - # regression function, created 06/10 - def regression(compound_uri,prediction,verbose=false) - - lazar = YAML.load self.yaml - compound = OpenTox::Compound.new(:uri => compound_uri) - - # obtain X values for query compound - compound_matches = compound.match lazar.features - - conf = 0.0 - features = { :activating => [], :deactivating => [] } - neighbors = {} - regression = nil - - regr_occurrences = [] # occurrence vector with {0,1} entries - sims = [] # similarity values between query and neighbors - acts = [] # activities of neighbors for supervised learning - neighbor_matches = [] # as in classification: URIs of matches - gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel - i = 0 - - # aquire data related to query structure - lazar.fingerprints.each do |uri,matches| - sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values) - lazar.activities[uri].each do |act| - if sim > 0.3 - neighbors[uri] = {:similarity => sim} - neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features] - matches.each do |m| - if lazar.effects[m] == 'activating' - neighbors[uri][:features][:activating] << {:smarts => m, :p_value => lazar.p_values[m]} - elsif lazar.effects[m] == 'deactivating' - neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]} - end - end - lazar.activities[uri].each do |act| - neighbors[uri][:activities] = [] unless neighbors[uri][:activities] - neighbors[uri][:activities] << act - end - conf += OpenTox::Utils.gauss(sim) - sims << OpenTox::Utils.gauss(sim) - #TODO check for 0 s - acts << Math.log10(act.to_f) - neighbor_matches[i] = matches - i+=1 - end - end - end - conf = conf/neighbors.size - LOGGER.debug "Regression: found " + neighbor_matches.size.to_s + " neighbors." - - - unless neighbor_matches.length == 0 - # gram matrix - (0..(neighbor_matches.length-1)).each do |i| - gram_matrix[i] = [] - # lower triangle - (0..(i-1)).each do |j| - sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values) - gram_matrix[i] << OpenTox::Utils.gauss(sim) - end - # diagonal element - gram_matrix[i][i] = 1.0 - # upper triangle - ((i+1)..(neighbor_matches.length-1)).each do |j| - sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values) - gram_matrix[i] << OpenTox::Utils.gauss(sim) - end - end - - @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests - @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed - LOGGER.debug "Setting R data ..." - # set data - @r.gram_matrix = gram_matrix.flatten - @r.n = neighbor_matches.length - @r.y = acts - @r.sims = sims - - LOGGER.debug "Preparing R data ..." - # prepare data - @r.eval "y<-as.vector(y)" - @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" - @r.eval "sims<-as.vector(sims)" - - # model + support vectors - LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)" - @r.eval "sv<-as.vector(SVindex(model))" - @r.eval "sims<-sims[sv]" - @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" - LOGGER.debug "Predicting ..." - @r.eval "p<-predict(model,sims)[1,1]" - regression = 10**(@r.p.to_f) - LOGGER.debug "Prediction is: '" + regression.to_s + "'." - @r.quit # free R - - end - - if (regression != nil) - feature_uri = lazar.dependentVariables - prediction.compounds << compound_uri - prediction.features << feature_uri - prediction.data[compound_uri] = [] unless prediction.data[compound_uri] - compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } } - tuple = { - File.join(@@config[:services]["opentox-model"],"lazar#regression") => regression, - File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf - } - if verbose - tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors - tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features - end - prediction.data[compound_uri] << {feature_uri => tuple} - end - - end - # AM end -=end - - - def classification(compound_uri,prediction,verbose=false) - - lazar = YAML.load self.yaml - compound = OpenTox::Compound.new(:uri => compound_uri) - compound_properties = lazar.properties[compound.uri] - - conf = 0.0 - neighbors = {} - features = [] - classification = nil - - lazar.properties.each do |uri,properties| - - sim = OpenTox::Algorithm::Similarity.euclidean(compound_properties,properties) - if sim and sim > 0.001 - neighbors[uri] = {:similarity => sim} - neighbors[uri][:features] = [] unless neighbors[uri][:features] - properties.each do |p,v| - neighbors[uri][:features] << {p => v} - end - lazar.activities[uri].each do |act| - neighbors[uri][:activities] = [] unless neighbors[uri][:activities] - neighbors[uri][:activities] << act - case act.to_s - when 'true' - conf += OpenTox::Utils.gauss(sim) - when 'false' - conf -= OpenTox::Utils.gauss(sim) - end - end - end - end - - conf = conf/neighbors.size - if conf > 0.0 - classification = true - elsif conf < 0.0 - classification = false - end - if (classification != nil) - feature_uri = lazar.dependentVariables - prediction.compounds << compound_uri - prediction.features << feature_uri - prediction.data[compound_uri] = [] unless prediction.data[compound_uri] - compound_properties.each { |p,v| features << {p => v} } - tuple = { - File.join(@@config[:services]["opentox-model"],"lazar#classification") => classification, - File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf - } - if verbose - tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors - tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features - end - prediction.data[compound_uri] << {feature_uri => tuple} - end - end - - def database_activity?(compound_uri,prediction) - # find database activities - lazar = YAML.load self.yaml - db_activities = lazar.activities[compound_uri] - if db_activities - prediction.creator = lazar.trainingDataset - feature_uri = lazar.dependentVariables - prediction.compounds << compound_uri - prediction.features << feature_uri - prediction.data[compound_uri] = [] unless prediction.data[compound_uri] - db_activities.each do |act| - prediction.data[compound_uri] << {feature_uri => act} - end - true - else - false - end - end - - def to_owl - data = YAML.load(yaml) - activity_dataset = YAML.load(RestClient.get(data.trainingDataset, :accept => 'application/x-yaml').to_s) - feature_dataset = YAML.load(RestClient.get(data.feature_dataset_uri, :accept => 'application/x-yaml').to_s) - owl = OpenTox::Owl.create 'Model', uri - owl.set("creator","http://github.com/helma/opentox-model") - owl.set("title", URI.decode(data.dependentVariables.split(/#/).last) ) - #owl.set("title","#{URI.decode(activity_dataset.title)} lazar classification") - owl.set("date",created_at.to_s) - owl.set("algorithm",data.algorithm) - owl.set("dependentVariables",activity_dataset.features.join(', ')) - owl.set("independentVariables",feature_dataset.features.join(', ')) - owl.set("predictedVariables", data.dependentVariables ) - #owl.set("predictedVariables",activity_dataset.features.join(', ') + "_lazar_classification") - owl.set("trainingDataset",data.trainingDataset) - owl.parameters = { - "Dataset URI" => - { :scope => "mandatory", :value => data.trainingDataset }, - "Feature URI for dependent variable" => - { :scope => "mandatory", :value => activity_dataset.features.join(', ')}, - "Feature generation URI" => - { :scope => "mandatory", :value => feature_dataset.creator } - } - - owl.rdf - end - -end - -post '/pl/:id/?' do # create prediction - - lazar = PropertyLazar.get(params[:id]) - LOGGER.debug lazar.to_yaml - halt 404, "Model #{params[:id]} does not exist." unless lazar - halt 404, "No compound_uri or dataset_uri parameter." unless compound_uri = params[:compound_uri] or dataset_uri = params[:dataset_uri] - - @prediction = OpenTox::Dataset.new - @prediction.creator = lazar.uri - dependent_variable = YAML.load(lazar.yaml).dependentVariables - @prediction.title = URI.decode(dependent_variable.split(/#/).last) - case dependent_variable - when /classification/ - prediction_type = "classification" - when /regression/ - prediction_type = "regression" - end - - if compound_uri - # look for cached prediction first - #if cached_prediction = Prediction.first(:model_uri => lazar.uri, :compound_uri => compound_uri) - #@prediction = YAML.load(cached_prediction.yaml) - #else - begin - # AM: switch here between regression and classification - lazar.classification(compound_uri,@prediction,true) #unless lazar.database_activity?(compound_uri,@prediction)" - #eval "lazar.#{prediction_type}(compound_uri,@prediction,true) unless lazar.database_activity?(compound_uri,@prediction)" - #Prediction.create(:model_uri => lazar.uri, :compound_uri => compound_uri, :yaml => @prediction.to_yaml) - rescue - LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} " - halt 500, "Prediction of #{compound_uri} failed." - end - #end - case request.env['HTTP_ACCEPT'] - when /yaml/ - @prediction.to_yaml - when 'application/rdf+xml' - @prediction.to_owl - else - halt 400, "MIME type \"#{request.env['HTTP_ACCEPT']}\" not supported." - end - - elsif dataset_uri - response['Content-Type'] = 'text/uri-list' - task_uri = OpenTox::Task.as_task("Predict dataset",url_for("/#{lazar.id}", :full)) do - input_dataset = OpenTox::Dataset.find(dataset_uri) - input_dataset.compounds.each do |compound_uri| - # AM: switch here between regression and classification - begin - eval "lazar.#{prediction_type}(compound_uri,@prediction) unless lazar.database_activity?(compound_uri,@prediction)" - rescue - LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} " - end - end - begin - uri = @prediction.save.chomp - rescue - halt 500, "Could not save prediction dataset" - end - end - halt 202,task_uri - end - -end |