summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xlazar.rb145
1 files changed, 130 insertions, 15 deletions
diff --git a/lazar.rb b/lazar.rb
index 4e93143..0fa3794 100755
--- a/lazar.rb
+++ b/lazar.rb
@@ -1,8 +1,120 @@
+# R integration
+# workaround to initialize R non-interactively (former rinruby versions did this by default)
+R = nil
+require ("rinruby") # this requires R to be built with X11 support (implies package xorg-dev) not longer true with this hack (ch)
+@@r = RinRuby.new(false,false)
+@@r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
+
class Lazar < Model
attr_accessor :prediction_dataset
- def classify(compound_uri,prediction)
+ # AM begin
+ # regression function, created 06/10
+ # ch: please properly integrate this into the workflow. You will need some criterium for distinguishing regression/classification (hardcoded regression for testing)
+ def regression(compound_uri,prediction)
+
+ lazar = YAML.load self.yaml
+ compound = OpenTox::Compound.new(:uri => compound_uri)
+
+ # obtain X values for query compound
+ compound_matches = compound.match lazar.features
+
+ conf = 0.0
+ similarities = {}
+ regression = nil
+
+ regr_occurrences = [] # occurrence vector with {0,1} entries
+ sims = [] # similarity values between query and neighbors
+ acts = [] # activities of neighbors for supervised learning
+ neighbor_matches = [] # as in classification: URIs of matches
+ gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
+ i = 0
+
+ # aquire data related to query structure
+ lazar.fingerprints.each do |uri,matches|
+ sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values)
+ lazar.activities[uri].each do |act|
+ if sim > 0.3
+ similarities[uri] = sim
+ conf += OpenTox::Utils.gauss(sim)
+ sims << OpenTox::Utils.gauss(sim)
+ #TODO check for 0 s
+ acts << Math.log10(act.to_f)
+ #acts << act.to_f
+ neighbor_matches[i] = matches
+ i+=1
+ end
+ end
+ end
+ conf = conf/similarities.size
+ LOGGER.debug "Regression: found " + neighbor_matches.size.to_s + " neighbors."
+
+
+ unless neighbor_matches.length == 0
+ # gram matrix
+ (0..(neighbor_matches.length-1)).each do |i|
+ gram_matrix[i] = []
+ # lower triangle
+ (0..(i-1)).each do |j|
+ sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values)
+ gram_matrix[i] << OpenTox::Utils.gauss(sim)
+ end
+ # diagonal element
+ gram_matrix[i][i] = 1.0
+ # upper triangle
+ ((i+1)..(neighbor_matches.length-1)).each do |j|
+ sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values)
+ gram_matrix[i] << OpenTox::Utils.gauss(sim)
+ end
+ end
+
+ LOGGER.debug "Setting R data ..."
+ # set data
+ @@r.gram_matrix = gram_matrix.flatten
+ @@r.n = neighbor_matches.length
+ @@r.y = acts
+ @@r.sims = sims
+
+ LOGGER.debug "Preparing R data ..."
+ # prepare data
+ @@r.eval "y<-as.vector(y)"
+ @@r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
+ @@r.eval "sims<-as.vector(sims)"
+
+ # model + support vectors
+ LOGGER.debug "Creating SVM model ..."
+ @@r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
+ @@r.eval "sv<-as.vector(SVindex(model))"
+ @@r.eval "sims<-sims[sv]"
+ @@r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
+ LOGGER.debug "Predicting ..."
+ @@r.eval "p<-predict(model,sims)[1,1]"
+ regression = 10**(@@r.p.to_f)
+ LOGGER.debug "Prediction is: '" + regression.to_s + "'."
+
+ end
+
+ if (regression != nil)
+ feature_uri = lazar.dependentVariables
+ prediction.compounds << compound_uri
+ prediction.features << feature_uri
+ prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
+ tuple = {
+ File.join(@@config[:services]["opentox-model"],"lazar#regression") => regression,
+ File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf,
+ File.join(@@config[:services]["opentox-model"],"lazar#similarities") => similarities,
+ File.join(@@config[:services]["opentox-model"],"lazar#features") => compound_matches
+ }
+ prediction.data[compound_uri] << {feature_uri => tuple}
+ end
+
+
+ end
+ # AM end
+
+
+ def classification(compound_uri,prediction)
lazar = YAML.load self.yaml
compound = OpenTox::Compound.new(:uri => compound_uri)
@@ -40,15 +152,10 @@ class Lazar < Model
prediction.features << feature_uri
prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
tuple = {
- :classification => classification,
- :confidence => conf,
- :similarities => similarities,
- :features => compound_matches
- # uncomment to enable owl-dl serialisation of predictions
- # url_for("/lazar#classification") => classification,
- # url_for("/lazar#confidence") => conf,
- # url_for("/lazar#similarities") => similarities,
- # url_for("/lazar#features") => compound_matches
+ File.join(@@config[:services]["opentox-model"],"lazar#classification") => classification,
+ File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf,
+ File.join(@@config[:services]["opentox-model"],"lazar#similarities") => similarities,
+ File.join(@@config[:services]["opentox-model"],"lazar#features") => compound_matches
}
prediction.data[compound_uri] << {feature_uri => tuple}
end
@@ -168,11 +275,18 @@ post '/:id/?' do # create prediction
prediction = OpenTox::Dataset.new
prediction.creator = lazar.uri
- prediction.title = URI.decode YAML.load(lazar.yaml).dependentVariables.split(/#/).last
- prediction.title += " lazar classification"
+ dependent_variable = YAML.load(lazar.yaml).dependentVariables
+ prediction.title = URI.decode(dependent_variable.split(/#/).last)
+ case dependent_variable
+ when /classification/
+ prediction_type = "classification"
+ when /regression/
+ prediction_type = "regression"
+ end
if compound_uri
- lazar.classify(compound_uri,prediction) unless lazar.database_activity?(compound_uri,prediction)
+ # AM: switch here between regression and classification
+ eval "lazar.#{prediction_type}(compound_uri,prediction) unless lazar.database_activity?(compound_uri,prediction)"
LOGGER.debug prediction.to_yaml
case request.env['HTTP_ACCEPT']
when /yaml/
@@ -183,12 +297,13 @@ post '/:id/?' do # create prediction
halt 404, "Content type #{request.env['HTTP_ACCEPT']} not available."
end
-elsif dataset_uri
+ elsif dataset_uri
response['Content-Type'] = 'text/uri-list'
task_uri = OpenTox::Task.as_task do
input_dataset = OpenTox::Dataset.find(dataset_uri)
input_dataset.compounds.each do |compound_uri|
- lazar.classify(compound_uri,prediction) unless lazar.database_activity?(compound_uri,prediction)
+ # AM: switch here between regression and classification
+ eval "lazar.#{prediction_type}(compound_uri,prediction) unless lazar.database_activity?(compound_uri,prediction)"
end
begin
uri = prediction.save.chomp