summaryrefslogtreecommitdiff
path: root/property_lazar.rb
diff options
context:
space:
mode:
Diffstat (limited to 'property_lazar.rb')
-rw-r--r--property_lazar.rb303
1 files changed, 0 insertions, 303 deletions
diff --git a/property_lazar.rb b/property_lazar.rb
deleted file mode 100644
index 6e68718..0000000
--- a/property_lazar.rb
+++ /dev/null
@@ -1,303 +0,0 @@
-# R integration
-# workaround to initialize R non-interactively (former rinruby versions did this by default)
-# avoids compiling R with X
-R = nil
-require "rinruby"
-require "haml"
-
-class PropertyLazar < Model
-
- attr_accessor :prediction_dataset
-
-=begin
- # AM begin
- # regression function, created 06/10
- def regression(compound_uri,prediction,verbose=false)
-
- lazar = YAML.load self.yaml
- compound = OpenTox::Compound.new(:uri => compound_uri)
-
- # obtain X values for query compound
- compound_matches = compound.match lazar.features
-
- conf = 0.0
- features = { :activating => [], :deactivating => [] }
- neighbors = {}
- regression = nil
-
- regr_occurrences = [] # occurrence vector with {0,1} entries
- sims = [] # similarity values between query and neighbors
- acts = [] # activities of neighbors for supervised learning
- neighbor_matches = [] # as in classification: URIs of matches
- gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
- i = 0
-
- # aquire data related to query structure
- lazar.fingerprints.each do |uri,matches|
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values)
- lazar.activities[uri].each do |act|
- if sim > 0.3
- neighbors[uri] = {:similarity => sim}
- neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features]
- matches.each do |m|
- if lazar.effects[m] == 'activating'
- neighbors[uri][:features][:activating] << {:smarts => m, :p_value => lazar.p_values[m]}
- elsif lazar.effects[m] == 'deactivating'
- neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]}
- end
- end
- lazar.activities[uri].each do |act|
- neighbors[uri][:activities] = [] unless neighbors[uri][:activities]
- neighbors[uri][:activities] << act
- end
- conf += OpenTox::Utils.gauss(sim)
- sims << OpenTox::Utils.gauss(sim)
- #TODO check for 0 s
- acts << Math.log10(act.to_f)
- neighbor_matches[i] = matches
- i+=1
- end
- end
- end
- conf = conf/neighbors.size
- LOGGER.debug "Regression: found " + neighbor_matches.size.to_s + " neighbors."
-
-
- unless neighbor_matches.length == 0
- # gram matrix
- (0..(neighbor_matches.length-1)).each do |i|
- gram_matrix[i] = []
- # lower triangle
- (0..(i-1)).each do |j|
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values)
- gram_matrix[i] << OpenTox::Utils.gauss(sim)
- end
- # diagonal element
- gram_matrix[i][i] = 1.0
- # upper triangle
- ((i+1)..(neighbor_matches.length-1)).each do |j|
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values)
- gram_matrix[i] << OpenTox::Utils.gauss(sim)
- end
- end
-
- @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
- @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
- LOGGER.debug "Setting R data ..."
- # set data
- @r.gram_matrix = gram_matrix.flatten
- @r.n = neighbor_matches.length
- @r.y = acts
- @r.sims = sims
-
- LOGGER.debug "Preparing R data ..."
- # prepare data
- @r.eval "y<-as.vector(y)"
- @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
- @r.eval "sims<-as.vector(sims)"
-
- # model + support vectors
- LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
- @r.eval "sv<-as.vector(SVindex(model))"
- @r.eval "sims<-sims[sv]"
- @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
- LOGGER.debug "Predicting ..."
- @r.eval "p<-predict(model,sims)[1,1]"
- regression = 10**(@r.p.to_f)
- LOGGER.debug "Prediction is: '" + regression.to_s + "'."
- @r.quit # free R
-
- end
-
- if (regression != nil)
- feature_uri = lazar.dependentVariables
- prediction.compounds << compound_uri
- prediction.features << feature_uri
- prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } }
- tuple = {
- File.join(@@config[:services]["opentox-model"],"lazar#regression") => regression,
- File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf
- }
- if verbose
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features
- end
- prediction.data[compound_uri] << {feature_uri => tuple}
- end
-
- end
- # AM end
-=end
-
-
- def classification(compound_uri,prediction,verbose=false)
-
- lazar = YAML.load self.yaml
- compound = OpenTox::Compound.new(:uri => compound_uri)
- compound_properties = lazar.properties[compound.uri]
-
- conf = 0.0
- neighbors = {}
- features = []
- classification = nil
-
- lazar.properties.each do |uri,properties|
-
- sim = OpenTox::Algorithm::Similarity.euclidean(compound_properties,properties)
- if sim and sim > 0.001
- neighbors[uri] = {:similarity => sim}
- neighbors[uri][:features] = [] unless neighbors[uri][:features]
- properties.each do |p,v|
- neighbors[uri][:features] << {p => v}
- end
- lazar.activities[uri].each do |act|
- neighbors[uri][:activities] = [] unless neighbors[uri][:activities]
- neighbors[uri][:activities] << act
- case act.to_s
- when 'true'
- conf += OpenTox::Utils.gauss(sim)
- when 'false'
- conf -= OpenTox::Utils.gauss(sim)
- end
- end
- end
- end
-
- conf = conf/neighbors.size
- if conf > 0.0
- classification = true
- elsif conf < 0.0
- classification = false
- end
- if (classification != nil)
- feature_uri = lazar.dependentVariables
- prediction.compounds << compound_uri
- prediction.features << feature_uri
- prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- compound_properties.each { |p,v| features << {p => v} }
- tuple = {
- File.join(@@config[:services]["opentox-model"],"lazar#classification") => classification,
- File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf
- }
- if verbose
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors
- tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features
- end
- prediction.data[compound_uri] << {feature_uri => tuple}
- end
- end
-
- def database_activity?(compound_uri,prediction)
- # find database activities
- lazar = YAML.load self.yaml
- db_activities = lazar.activities[compound_uri]
- if db_activities
- prediction.creator = lazar.trainingDataset
- feature_uri = lazar.dependentVariables
- prediction.compounds << compound_uri
- prediction.features << feature_uri
- prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
- db_activities.each do |act|
- prediction.data[compound_uri] << {feature_uri => act}
- end
- true
- else
- false
- end
- end
-
- def to_owl
- data = YAML.load(yaml)
- activity_dataset = YAML.load(RestClient.get(data.trainingDataset, :accept => 'application/x-yaml').to_s)
- feature_dataset = YAML.load(RestClient.get(data.feature_dataset_uri, :accept => 'application/x-yaml').to_s)
- owl = OpenTox::Owl.create 'Model', uri
- owl.set("creator","http://github.com/helma/opentox-model")
- owl.set("title", URI.decode(data.dependentVariables.split(/#/).last) )
- #owl.set("title","#{URI.decode(activity_dataset.title)} lazar classification")
- owl.set("date",created_at.to_s)
- owl.set("algorithm",data.algorithm)
- owl.set("dependentVariables",activity_dataset.features.join(', '))
- owl.set("independentVariables",feature_dataset.features.join(', '))
- owl.set("predictedVariables", data.dependentVariables )
- #owl.set("predictedVariables",activity_dataset.features.join(', ') + "_lazar_classification")
- owl.set("trainingDataset",data.trainingDataset)
- owl.parameters = {
- "Dataset URI" =>
- { :scope => "mandatory", :value => data.trainingDataset },
- "Feature URI for dependent variable" =>
- { :scope => "mandatory", :value => activity_dataset.features.join(', ')},
- "Feature generation URI" =>
- { :scope => "mandatory", :value => feature_dataset.creator }
- }
-
- owl.rdf
- end
-
-end
-
-post '/pl/:id/?' do # create prediction
-
- lazar = PropertyLazar.get(params[:id])
- LOGGER.debug lazar.to_yaml
- halt 404, "Model #{params[:id]} does not exist." unless lazar
- halt 404, "No compound_uri or dataset_uri parameter." unless compound_uri = params[:compound_uri] or dataset_uri = params[:dataset_uri]
-
- @prediction = OpenTox::Dataset.new
- @prediction.creator = lazar.uri
- dependent_variable = YAML.load(lazar.yaml).dependentVariables
- @prediction.title = URI.decode(dependent_variable.split(/#/).last)
- case dependent_variable
- when /classification/
- prediction_type = "classification"
- when /regression/
- prediction_type = "regression"
- end
-
- if compound_uri
- # look for cached prediction first
- #if cached_prediction = Prediction.first(:model_uri => lazar.uri, :compound_uri => compound_uri)
- #@prediction = YAML.load(cached_prediction.yaml)
- #else
- begin
- # AM: switch here between regression and classification
- lazar.classification(compound_uri,@prediction,true) #unless lazar.database_activity?(compound_uri,@prediction)"
- #eval "lazar.#{prediction_type}(compound_uri,@prediction,true) unless lazar.database_activity?(compound_uri,@prediction)"
- #Prediction.create(:model_uri => lazar.uri, :compound_uri => compound_uri, :yaml => @prediction.to_yaml)
- rescue
- LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} "
- halt 500, "Prediction of #{compound_uri} failed."
- end
- #end
- case request.env['HTTP_ACCEPT']
- when /yaml/
- @prediction.to_yaml
- when 'application/rdf+xml'
- @prediction.to_owl
- else
- halt 400, "MIME type \"#{request.env['HTTP_ACCEPT']}\" not supported."
- end
-
- elsif dataset_uri
- response['Content-Type'] = 'text/uri-list'
- task_uri = OpenTox::Task.as_task("Predict dataset",url_for("/#{lazar.id}", :full)) do
- input_dataset = OpenTox::Dataset.find(dataset_uri)
- input_dataset.compounds.each do |compound_uri|
- # AM: switch here between regression and classification
- begin
- eval "lazar.#{prediction_type}(compound_uri,@prediction) unless lazar.database_activity?(compound_uri,@prediction)"
- rescue
- LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} "
- end
- end
- begin
- uri = @prediction.save.chomp
- rescue
- halt 500, "Could not save prediction dataset"
- end
- end
- halt 202,task_uri
- end
-
-end