From 4f622dd05299c3812286e11e8fce3e656e21ef29 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 18 Aug 2015 13:19:37 +0200 Subject: first attempt at installing openbabel from github --- lib/lazar-model.rb | 134 +---------------------------------------------------- 1 file changed, 2 insertions(+), 132 deletions(-) (limited to 'lib') diff --git a/lib/lazar-model.rb b/lib/lazar-model.rb index aeaa515..1970401 100644 --- a/lib/lazar-model.rb +++ b/lib/lazar-model.rb @@ -129,8 +129,6 @@ module OpenTox end class LazarFminerClassification < LazarClassification - #field :feature_dataset_id, type: BSON::ObjectId - #field :feature_calculation_algorithm, type: String def self.create training_dataset model = super(training_dataset) @@ -145,136 +143,6 @@ module OpenTox model.save model end - -=begin - def predict object - - t = Time.now - at = Time.now - - @training_dataset = OpenTox::Dataset.find(training_dataset_id) - @feature_dataset = OpenTox::Dataset.find(feature_dataset_id) - - compounds = [] - case object.class.to_s - when "OpenTox::Compound" - compounds = [object] - when "Array" - compounds = object - when "OpenTox::Dataset" - compounds = object.compounds - else - bad_request_error "Please provide a OpenTox::Compound an Array of OpenTox::Compounds or an OpenTox::Dataset as parameter." - end - - $logger.debug "Setup: #{Time.now-t}" - t = Time.now - - @query_fingerprint = Algorithm.run(feature_calculation_algorithm, compounds, @feature_dataset.features.collect{|f| f.name} ) - - $logger.debug "Query fingerprint calculation: #{Time.now-t}" - t = Time.now - - predictions = [] - prediction_feature = OpenTox::Feature.find prediction_feature_id - tt = 0 - pt = 0 - nt = 0 - st = 0 - nit = 0 - @training_fingerprints ||= @feature_dataset.data_entries - compounds.each_with_index do |compound,c| - t = Time.new - - $logger.debug "predict compound #{c+1}/#{compounds.size} #{compound.inchi}" - - database_activities = @training_dataset.values(compound,prediction_feature) - if database_activities and !database_activities.empty? - database_activities = database_activities.first if database_activities.size == 1 - $logger.debug "Compound #{compound.inchi} occurs in training dataset with activity #{database_activities}" - predictions << {:compound => compound, :value => database_activities, :confidence => "measured"} - next - else - - #training_fingerprints = @feature_dataset.data_entries - query_fingerprint = @query_fingerprint[c] - neighbors = [] - tt += Time.now-t - t = Time.new - - - # find neighbors - @training_fingerprints.each_with_index do |fingerprint, i| - ts = Time.new - sim = Algorithm.run(similarity_algorithm,fingerprint, query_fingerprint) - st += Time.now-ts - ts = Time.new - if sim > self.min_sim - if prediction_algorithm =~ /Regression/ - neighbors << [@feature_dataset.compound_ids[i],sim,training_activities[i], fingerprint] - else - neighbors << [@feature_dataset.compound_ids[i],sim,training_activities[i]] # use compound_ids, instantiation of Compounds is too time consuming - end - end - nit += Time.now-ts - end - - if neighbors.empty? - predictions << {:compound => compound, :value => nil, :confidence => nil, :warning => "No neighbors with similarity > #{min_sim} in dataset #{training_dataset.id}"} - next - end - nt += Time.now-t - t = Time.new - - if prediction_algorithm =~ /Regression/ - prediction = Algorithm.run(prediction_algorithm, neighbors, :min_train_performance => self.min_train_performance) - else - prediction = Algorithm.run(prediction_algorithm, neighbors) - end - prediction[:compound] = compound - prediction[:neighbors] = neighbors.sort{|a,b| b[1] <=> a[1]} # sort with ascending similarities - - - # AM: transform to original space (TODO) - #confidence_value = ((confidence_value+1.0)/2.0).abs if prediction.first and similarity_algorithm =~ /cosine/ - - - $logger.debug "predicted value: #{prediction[:value]}, confidence: #{prediction[:confidence]}" - predictions << prediction - pt += Time.now-t - end - - end - $logger.debug "Transform time: #{tt}" - $logger.debug "Neighbor search time: #{nt} (Similarity calculation: #{st}, Neighbor insert: #{nit})" - $logger.debug "Prediction time: #{pt}" - $logger.debug "Total prediction time: #{Time.now-at}" - - # serialize result - case object.class.to_s - when "OpenTox::Compound" - return predictions.first - when "Array" - return predictions - when "OpenTox::Dataset" - # prepare prediction dataset - prediction_dataset = LazarPrediction.new( - :title => "Lazar prediction for #{prediction_feature.title}", - :creator => __FILE__, - :prediction_feature_id => prediction_feature.id - - ) - confidence_feature = OpenTox::NumericFeature.find_or_create_by( "title" => "Prediction confidence" ) - warning_feature = OpenTox::NominalFeature.find_or_create_by("title" => "Warnings") - prediction_dataset.features = [ prediction_feature, confidence_feature, warning_feature ] - prediction_dataset.compounds = compounds - prediction_dataset.data_entries = predictions.collect{|p| [p[:value], p[:confidence],p[:warning]]} - prediction_dataset.save_all - return prediction_dataset - end - - end -=end end class LazarRegression < Lazar @@ -291,6 +159,8 @@ module OpenTox class PredictionModel < Lazar field :category, type: String field :endpoint, type: String + field :unit, type: String + field :model_id, type: BSON::ObjectId field :crossvalidation_id, type: BSON::ObjectId end -- cgit v1.2.3