summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2010-08-25 12:29:42 +0200
committerChristoph Helma <helma@in-silico.ch>2010-08-25 12:29:42 +0200
commit48684b9438ff48772972375e9a97f981592da191 (patch)
treebae85e96c781fa039c482c565dbf8c20159f10a8
parentc499e94395f91aeb52ee36d755a4958a9b7942ae (diff)
property_lazar.rb added
-rw-r--r--property_lazar.rb303
1 files changed, 303 insertions, 0 deletions
diff --git a/property_lazar.rb b/property_lazar.rb
new file mode 100644
index 0000000..6e68718
--- /dev/null
+++ b/property_lazar.rb
@@ -0,0 +1,303 @@
+# R integration
+# workaround to initialize R non-interactively (former rinruby versions did this by default)
+# avoids compiling R with X
+R = nil
+require "rinruby"
+require "haml"
+
+class PropertyLazar < Model
+
+ attr_accessor :prediction_dataset
+
+=begin
+ # AM begin
+ # regression function, created 06/10
+ def regression(compound_uri,prediction,verbose=false)
+
+ lazar = YAML.load self.yaml
+ compound = OpenTox::Compound.new(:uri => compound_uri)
+
+ # obtain X values for query compound
+ compound_matches = compound.match lazar.features
+
+ conf = 0.0
+ features = { :activating => [], :deactivating => [] }
+ neighbors = {}
+ regression = nil
+
+ regr_occurrences = [] # occurrence vector with {0,1} entries
+ sims = [] # similarity values between query and neighbors
+ acts = [] # activities of neighbors for supervised learning
+ neighbor_matches = [] # as in classification: URIs of matches
+ gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
+ i = 0
+
+ # aquire data related to query structure
+ lazar.fingerprints.each do |uri,matches|
+ sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values)
+ lazar.activities[uri].each do |act|
+ if sim > 0.3
+ neighbors[uri] = {:similarity => sim}
+ neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features]
+ matches.each do |m|
+ if lazar.effects[m] == 'activating'
+ neighbors[uri][:features][:activating] << {:smarts => m, :p_value => lazar.p_values[m]}
+ elsif lazar.effects[m] == 'deactivating'
+ neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]}
+ end
+ end
+ lazar.activities[uri].each do |act|
+ neighbors[uri][:activities] = [] unless neighbors[uri][:activities]
+ neighbors[uri][:activities] << act
+ end
+ conf += OpenTox::Utils.gauss(sim)
+ sims << OpenTox::Utils.gauss(sim)
+ #TODO check for 0 s
+ acts << Math.log10(act.to_f)
+ neighbor_matches[i] = matches
+ i+=1
+ end
+ end
+ end
+ conf = conf/neighbors.size
+ LOGGER.debug "Regression: found " + neighbor_matches.size.to_s + " neighbors."
+
+
+ unless neighbor_matches.length == 0
+ # gram matrix
+ (0..(neighbor_matches.length-1)).each do |i|
+ gram_matrix[i] = []
+ # lower triangle
+ (0..(i-1)).each do |j|
+ sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values)
+ gram_matrix[i] << OpenTox::Utils.gauss(sim)
+ end
+ # diagonal element
+ gram_matrix[i][i] = 1.0
+ # upper triangle
+ ((i+1)..(neighbor_matches.length-1)).each do |j|
+ sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values)
+ gram_matrix[i] << OpenTox::Utils.gauss(sim)
+ end
+ end
+
+ @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
+ @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
+ LOGGER.debug "Setting R data ..."
+ # set data
+ @r.gram_matrix = gram_matrix.flatten
+ @r.n = neighbor_matches.length
+ @r.y = acts
+ @r.sims = sims
+
+ LOGGER.debug "Preparing R data ..."
+ # prepare data
+ @r.eval "y<-as.vector(y)"
+ @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
+ @r.eval "sims<-as.vector(sims)"
+
+ # model + support vectors
+ LOGGER.debug "Creating SVM model ..."
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
+ @r.eval "sv<-as.vector(SVindex(model))"
+ @r.eval "sims<-sims[sv]"
+ @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
+ LOGGER.debug "Predicting ..."
+ @r.eval "p<-predict(model,sims)[1,1]"
+ regression = 10**(@r.p.to_f)
+ LOGGER.debug "Prediction is: '" + regression.to_s + "'."
+ @r.quit # free R
+
+ end
+
+ if (regression != nil)
+ feature_uri = lazar.dependentVariables
+ prediction.compounds << compound_uri
+ prediction.features << feature_uri
+ prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
+ compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } }
+ tuple = {
+ File.join(@@config[:services]["opentox-model"],"lazar#regression") => regression,
+ File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf
+ }
+ if verbose
+ tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors
+ tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features
+ end
+ prediction.data[compound_uri] << {feature_uri => tuple}
+ end
+
+ end
+ # AM end
+=end
+
+
+ def classification(compound_uri,prediction,verbose=false)
+
+ lazar = YAML.load self.yaml
+ compound = OpenTox::Compound.new(:uri => compound_uri)
+ compound_properties = lazar.properties[compound.uri]
+
+ conf = 0.0
+ neighbors = {}
+ features = []
+ classification = nil
+
+ lazar.properties.each do |uri,properties|
+
+ sim = OpenTox::Algorithm::Similarity.euclidean(compound_properties,properties)
+ if sim and sim > 0.001
+ neighbors[uri] = {:similarity => sim}
+ neighbors[uri][:features] = [] unless neighbors[uri][:features]
+ properties.each do |p,v|
+ neighbors[uri][:features] << {p => v}
+ end
+ lazar.activities[uri].each do |act|
+ neighbors[uri][:activities] = [] unless neighbors[uri][:activities]
+ neighbors[uri][:activities] << act
+ case act.to_s
+ when 'true'
+ conf += OpenTox::Utils.gauss(sim)
+ when 'false'
+ conf -= OpenTox::Utils.gauss(sim)
+ end
+ end
+ end
+ end
+
+ conf = conf/neighbors.size
+ if conf > 0.0
+ classification = true
+ elsif conf < 0.0
+ classification = false
+ end
+ if (classification != nil)
+ feature_uri = lazar.dependentVariables
+ prediction.compounds << compound_uri
+ prediction.features << feature_uri
+ prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
+ compound_properties.each { |p,v| features << {p => v} }
+ tuple = {
+ File.join(@@config[:services]["opentox-model"],"lazar#classification") => classification,
+ File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf
+ }
+ if verbose
+ tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors
+ tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features
+ end
+ prediction.data[compound_uri] << {feature_uri => tuple}
+ end
+ end
+
+ def database_activity?(compound_uri,prediction)
+ # find database activities
+ lazar = YAML.load self.yaml
+ db_activities = lazar.activities[compound_uri]
+ if db_activities
+ prediction.creator = lazar.trainingDataset
+ feature_uri = lazar.dependentVariables
+ prediction.compounds << compound_uri
+ prediction.features << feature_uri
+ prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
+ db_activities.each do |act|
+ prediction.data[compound_uri] << {feature_uri => act}
+ end
+ true
+ else
+ false
+ end
+ end
+
+ def to_owl
+ data = YAML.load(yaml)
+ activity_dataset = YAML.load(RestClient.get(data.trainingDataset, :accept => 'application/x-yaml').to_s)
+ feature_dataset = YAML.load(RestClient.get(data.feature_dataset_uri, :accept => 'application/x-yaml').to_s)
+ owl = OpenTox::Owl.create 'Model', uri
+ owl.set("creator","http://github.com/helma/opentox-model")
+ owl.set("title", URI.decode(data.dependentVariables.split(/#/).last) )
+ #owl.set("title","#{URI.decode(activity_dataset.title)} lazar classification")
+ owl.set("date",created_at.to_s)
+ owl.set("algorithm",data.algorithm)
+ owl.set("dependentVariables",activity_dataset.features.join(', '))
+ owl.set("independentVariables",feature_dataset.features.join(', '))
+ owl.set("predictedVariables", data.dependentVariables )
+ #owl.set("predictedVariables",activity_dataset.features.join(', ') + "_lazar_classification")
+ owl.set("trainingDataset",data.trainingDataset)
+ owl.parameters = {
+ "Dataset URI" =>
+ { :scope => "mandatory", :value => data.trainingDataset },
+ "Feature URI for dependent variable" =>
+ { :scope => "mandatory", :value => activity_dataset.features.join(', ')},
+ "Feature generation URI" =>
+ { :scope => "mandatory", :value => feature_dataset.creator }
+ }
+
+ owl.rdf
+ end
+
+end
+
+post '/pl/:id/?' do # create prediction
+
+ lazar = PropertyLazar.get(params[:id])
+ LOGGER.debug lazar.to_yaml
+ halt 404, "Model #{params[:id]} does not exist." unless lazar
+ halt 404, "No compound_uri or dataset_uri parameter." unless compound_uri = params[:compound_uri] or dataset_uri = params[:dataset_uri]
+
+ @prediction = OpenTox::Dataset.new
+ @prediction.creator = lazar.uri
+ dependent_variable = YAML.load(lazar.yaml).dependentVariables
+ @prediction.title = URI.decode(dependent_variable.split(/#/).last)
+ case dependent_variable
+ when /classification/
+ prediction_type = "classification"
+ when /regression/
+ prediction_type = "regression"
+ end
+
+ if compound_uri
+ # look for cached prediction first
+ #if cached_prediction = Prediction.first(:model_uri => lazar.uri, :compound_uri => compound_uri)
+ #@prediction = YAML.load(cached_prediction.yaml)
+ #else
+ begin
+ # AM: switch here between regression and classification
+ lazar.classification(compound_uri,@prediction,true) #unless lazar.database_activity?(compound_uri,@prediction)"
+ #eval "lazar.#{prediction_type}(compound_uri,@prediction,true) unless lazar.database_activity?(compound_uri,@prediction)"
+ #Prediction.create(:model_uri => lazar.uri, :compound_uri => compound_uri, :yaml => @prediction.to_yaml)
+ rescue
+ LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} "
+ halt 500, "Prediction of #{compound_uri} failed."
+ end
+ #end
+ case request.env['HTTP_ACCEPT']
+ when /yaml/
+ @prediction.to_yaml
+ when 'application/rdf+xml'
+ @prediction.to_owl
+ else
+ halt 400, "MIME type \"#{request.env['HTTP_ACCEPT']}\" not supported."
+ end
+
+ elsif dataset_uri
+ response['Content-Type'] = 'text/uri-list'
+ task_uri = OpenTox::Task.as_task("Predict dataset",url_for("/#{lazar.id}", :full)) do
+ input_dataset = OpenTox::Dataset.find(dataset_uri)
+ input_dataset.compounds.each do |compound_uri|
+ # AM: switch here between regression and classification
+ begin
+ eval "lazar.#{prediction_type}(compound_uri,@prediction) unless lazar.database_activity?(compound_uri,@prediction)"
+ rescue
+ LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} "
+ end
+ end
+ begin
+ uri = @prediction.save.chomp
+ rescue
+ halt 500, "Could not save prediction dataset"
+ end
+ end
+ halt 202,task_uri
+ end
+
+end