diff options
-rw-r--r-- | property_lazar.rb | 303 |
1 files changed, 303 insertions, 0 deletions
diff --git a/property_lazar.rb b/property_lazar.rb new file mode 100644 index 0000000..6e68718 --- /dev/null +++ b/property_lazar.rb @@ -0,0 +1,303 @@ +# R integration +# workaround to initialize R non-interactively (former rinruby versions did this by default) +# avoids compiling R with X +R = nil +require "rinruby" +require "haml" + +class PropertyLazar < Model + + attr_accessor :prediction_dataset + +=begin + # AM begin + # regression function, created 06/10 + def regression(compound_uri,prediction,verbose=false) + + lazar = YAML.load self.yaml + compound = OpenTox::Compound.new(:uri => compound_uri) + + # obtain X values for query compound + compound_matches = compound.match lazar.features + + conf = 0.0 + features = { :activating => [], :deactivating => [] } + neighbors = {} + regression = nil + + regr_occurrences = [] # occurrence vector with {0,1} entries + sims = [] # similarity values between query and neighbors + acts = [] # activities of neighbors for supervised learning + neighbor_matches = [] # as in classification: URIs of matches + gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + i = 0 + + # aquire data related to query structure + lazar.fingerprints.each do |uri,matches| + sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values) + lazar.activities[uri].each do |act| + if sim > 0.3 + neighbors[uri] = {:similarity => sim} + neighbors[uri][:features] = { :activating => [], :deactivating => [] } unless neighbors[uri][:features] + matches.each do |m| + if lazar.effects[m] == 'activating' + neighbors[uri][:features][:activating] << {:smarts => m, :p_value => lazar.p_values[m]} + elsif lazar.effects[m] == 'deactivating' + neighbors[uri][:features][:deactivating] << {:smarts => m, :p_value => lazar.p_values[m]} + end + end + lazar.activities[uri].each do |act| + neighbors[uri][:activities] = [] unless neighbors[uri][:activities] + neighbors[uri][:activities] << act + end + conf += OpenTox::Utils.gauss(sim) + sims << OpenTox::Utils.gauss(sim) + #TODO check for 0 s + acts << Math.log10(act.to_f) + neighbor_matches[i] = matches + i+=1 + end + end + end + conf = conf/neighbors.size + LOGGER.debug "Regression: found " + neighbor_matches.size.to_s + " neighbors." + + + unless neighbor_matches.length == 0 + # gram matrix + (0..(neighbor_matches.length-1)).each do |i| + gram_matrix[i] = [] + # lower triangle + (0..(i-1)).each do |j| + sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values) + gram_matrix[i] << OpenTox::Utils.gauss(sim) + end + # diagonal element + gram_matrix[i][i] = 1.0 + # upper triangle + ((i+1)..(neighbor_matches.length-1)).each do |j| + sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], lazar.p_values) + gram_matrix[i] << OpenTox::Utils.gauss(sim) + end + end + + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.gram_matrix = gram_matrix.flatten + @r.n = neighbor_matches.length + @r.y = acts + @r.sims = sims + + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-as.vector(y)" + @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" + @r.eval "sims<-as.vector(sims)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)" + @r.eval "sv<-as.vector(SVindex(model))" + @r.eval "sims<-sims[sv]" + @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" + LOGGER.debug "Predicting ..." + @r.eval "p<-predict(model,sims)[1,1]" + regression = 10**(@r.p.to_f) + LOGGER.debug "Prediction is: '" + regression.to_s + "'." + @r.quit # free R + + end + + if (regression != nil) + feature_uri = lazar.dependentVariables + prediction.compounds << compound_uri + prediction.features << feature_uri + prediction.data[compound_uri] = [] unless prediction.data[compound_uri] + compound_matches.each { |m| features[lazar.effects[m].to_sym] << {:smarts => m, :p_value => lazar.p_values[m] } } + tuple = { + File.join(@@config[:services]["opentox-model"],"lazar#regression") => regression, + File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf + } + if verbose + tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors + tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features + end + prediction.data[compound_uri] << {feature_uri => tuple} + end + + end + # AM end +=end + + + def classification(compound_uri,prediction,verbose=false) + + lazar = YAML.load self.yaml + compound = OpenTox::Compound.new(:uri => compound_uri) + compound_properties = lazar.properties[compound.uri] + + conf = 0.0 + neighbors = {} + features = [] + classification = nil + + lazar.properties.each do |uri,properties| + + sim = OpenTox::Algorithm::Similarity.euclidean(compound_properties,properties) + if sim and sim > 0.001 + neighbors[uri] = {:similarity => sim} + neighbors[uri][:features] = [] unless neighbors[uri][:features] + properties.each do |p,v| + neighbors[uri][:features] << {p => v} + end + lazar.activities[uri].each do |act| + neighbors[uri][:activities] = [] unless neighbors[uri][:activities] + neighbors[uri][:activities] << act + case act.to_s + when 'true' + conf += OpenTox::Utils.gauss(sim) + when 'false' + conf -= OpenTox::Utils.gauss(sim) + end + end + end + end + + conf = conf/neighbors.size + if conf > 0.0 + classification = true + elsif conf < 0.0 + classification = false + end + if (classification != nil) + feature_uri = lazar.dependentVariables + prediction.compounds << compound_uri + prediction.features << feature_uri + prediction.data[compound_uri] = [] unless prediction.data[compound_uri] + compound_properties.each { |p,v| features << {p => v} } + tuple = { + File.join(@@config[:services]["opentox-model"],"lazar#classification") => classification, + File.join(@@config[:services]["opentox-model"],"lazar#confidence") => conf + } + if verbose + tuple[File.join(@@config[:services]["opentox-model"],"lazar#neighbors")] = neighbors + tuple[File.join(@@config[:services]["opentox-model"],"lazar#features")] = features + end + prediction.data[compound_uri] << {feature_uri => tuple} + end + end + + def database_activity?(compound_uri,prediction) + # find database activities + lazar = YAML.load self.yaml + db_activities = lazar.activities[compound_uri] + if db_activities + prediction.creator = lazar.trainingDataset + feature_uri = lazar.dependentVariables + prediction.compounds << compound_uri + prediction.features << feature_uri + prediction.data[compound_uri] = [] unless prediction.data[compound_uri] + db_activities.each do |act| + prediction.data[compound_uri] << {feature_uri => act} + end + true + else + false + end + end + + def to_owl + data = YAML.load(yaml) + activity_dataset = YAML.load(RestClient.get(data.trainingDataset, :accept => 'application/x-yaml').to_s) + feature_dataset = YAML.load(RestClient.get(data.feature_dataset_uri, :accept => 'application/x-yaml').to_s) + owl = OpenTox::Owl.create 'Model', uri + owl.set("creator","http://github.com/helma/opentox-model") + owl.set("title", URI.decode(data.dependentVariables.split(/#/).last) ) + #owl.set("title","#{URI.decode(activity_dataset.title)} lazar classification") + owl.set("date",created_at.to_s) + owl.set("algorithm",data.algorithm) + owl.set("dependentVariables",activity_dataset.features.join(', ')) + owl.set("independentVariables",feature_dataset.features.join(', ')) + owl.set("predictedVariables", data.dependentVariables ) + #owl.set("predictedVariables",activity_dataset.features.join(', ') + "_lazar_classification") + owl.set("trainingDataset",data.trainingDataset) + owl.parameters = { + "Dataset URI" => + { :scope => "mandatory", :value => data.trainingDataset }, + "Feature URI for dependent variable" => + { :scope => "mandatory", :value => activity_dataset.features.join(', ')}, + "Feature generation URI" => + { :scope => "mandatory", :value => feature_dataset.creator } + } + + owl.rdf + end + +end + +post '/pl/:id/?' do # create prediction + + lazar = PropertyLazar.get(params[:id]) + LOGGER.debug lazar.to_yaml + halt 404, "Model #{params[:id]} does not exist." unless lazar + halt 404, "No compound_uri or dataset_uri parameter." unless compound_uri = params[:compound_uri] or dataset_uri = params[:dataset_uri] + + @prediction = OpenTox::Dataset.new + @prediction.creator = lazar.uri + dependent_variable = YAML.load(lazar.yaml).dependentVariables + @prediction.title = URI.decode(dependent_variable.split(/#/).last) + case dependent_variable + when /classification/ + prediction_type = "classification" + when /regression/ + prediction_type = "regression" + end + + if compound_uri + # look for cached prediction first + #if cached_prediction = Prediction.first(:model_uri => lazar.uri, :compound_uri => compound_uri) + #@prediction = YAML.load(cached_prediction.yaml) + #else + begin + # AM: switch here between regression and classification + lazar.classification(compound_uri,@prediction,true) #unless lazar.database_activity?(compound_uri,@prediction)" + #eval "lazar.#{prediction_type}(compound_uri,@prediction,true) unless lazar.database_activity?(compound_uri,@prediction)" + #Prediction.create(:model_uri => lazar.uri, :compound_uri => compound_uri, :yaml => @prediction.to_yaml) + rescue + LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} " + halt 500, "Prediction of #{compound_uri} failed." + end + #end + case request.env['HTTP_ACCEPT'] + when /yaml/ + @prediction.to_yaml + when 'application/rdf+xml' + @prediction.to_owl + else + halt 400, "MIME type \"#{request.env['HTTP_ACCEPT']}\" not supported." + end + + elsif dataset_uri + response['Content-Type'] = 'text/uri-list' + task_uri = OpenTox::Task.as_task("Predict dataset",url_for("/#{lazar.id}", :full)) do + input_dataset = OpenTox::Dataset.find(dataset_uri) + input_dataset.compounds.each do |compound_uri| + # AM: switch here between regression and classification + begin + eval "lazar.#{prediction_type}(compound_uri,@prediction) unless lazar.database_activity?(compound_uri,@prediction)" + rescue + LOGGER.error "#{prediction_type} failed for #{compound_uri} with #{$!} " + end + end + begin + uri = @prediction.save.chomp + rescue + halt 500, "Could not save prediction dataset" + end + end + halt 202,task_uri + end + +end |