summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-08-18 13:19:37 +0200
committerChristoph Helma <helma@in-silico.ch>2015-08-18 13:19:37 +0200
commit4f622dd05299c3812286e11e8fce3e656e21ef29 (patch)
tree4738cc6ea2e134578f6d19d9b0b58e6890bc6c40 /lib
parentc26112cbe94689da20688b4924c9025a995310ae (diff)
first attempt at installing openbabel from github
Diffstat (limited to 'lib')
-rw-r--r--lib/lazar-model.rb134
1 files changed, 2 insertions, 132 deletions
diff --git a/lib/lazar-model.rb b/lib/lazar-model.rb
index aeaa515..1970401 100644
--- a/lib/lazar-model.rb
+++ b/lib/lazar-model.rb
@@ -129,8 +129,6 @@ module OpenTox
end
class LazarFminerClassification < LazarClassification
- #field :feature_dataset_id, type: BSON::ObjectId
- #field :feature_calculation_algorithm, type: String
def self.create training_dataset
model = super(training_dataset)
@@ -145,136 +143,6 @@ module OpenTox
model.save
model
end
-
-=begin
- def predict object
-
- t = Time.now
- at = Time.now
-
- @training_dataset = OpenTox::Dataset.find(training_dataset_id)
- @feature_dataset = OpenTox::Dataset.find(feature_dataset_id)
-
- compounds = []
- case object.class.to_s
- when "OpenTox::Compound"
- compounds = [object]
- when "Array"
- compounds = object
- when "OpenTox::Dataset"
- compounds = object.compounds
- else
- bad_request_error "Please provide a OpenTox::Compound an Array of OpenTox::Compounds or an OpenTox::Dataset as parameter."
- end
-
- $logger.debug "Setup: #{Time.now-t}"
- t = Time.now
-
- @query_fingerprint = Algorithm.run(feature_calculation_algorithm, compounds, @feature_dataset.features.collect{|f| f.name} )
-
- $logger.debug "Query fingerprint calculation: #{Time.now-t}"
- t = Time.now
-
- predictions = []
- prediction_feature = OpenTox::Feature.find prediction_feature_id
- tt = 0
- pt = 0
- nt = 0
- st = 0
- nit = 0
- @training_fingerprints ||= @feature_dataset.data_entries
- compounds.each_with_index do |compound,c|
- t = Time.new
-
- $logger.debug "predict compound #{c+1}/#{compounds.size} #{compound.inchi}"
-
- database_activities = @training_dataset.values(compound,prediction_feature)
- if database_activities and !database_activities.empty?
- database_activities = database_activities.first if database_activities.size == 1
- $logger.debug "Compound #{compound.inchi} occurs in training dataset with activity #{database_activities}"
- predictions << {:compound => compound, :value => database_activities, :confidence => "measured"}
- next
- else
-
- #training_fingerprints = @feature_dataset.data_entries
- query_fingerprint = @query_fingerprint[c]
- neighbors = []
- tt += Time.now-t
- t = Time.new
-
-
- # find neighbors
- @training_fingerprints.each_with_index do |fingerprint, i|
- ts = Time.new
- sim = Algorithm.run(similarity_algorithm,fingerprint, query_fingerprint)
- st += Time.now-ts
- ts = Time.new
- if sim > self.min_sim
- if prediction_algorithm =~ /Regression/
- neighbors << [@feature_dataset.compound_ids[i],sim,training_activities[i], fingerprint]
- else
- neighbors << [@feature_dataset.compound_ids[i],sim,training_activities[i]] # use compound_ids, instantiation of Compounds is too time consuming
- end
- end
- nit += Time.now-ts
- end
-
- if neighbors.empty?
- predictions << {:compound => compound, :value => nil, :confidence => nil, :warning => "No neighbors with similarity > #{min_sim} in dataset #{training_dataset.id}"}
- next
- end
- nt += Time.now-t
- t = Time.new
-
- if prediction_algorithm =~ /Regression/
- prediction = Algorithm.run(prediction_algorithm, neighbors, :min_train_performance => self.min_train_performance)
- else
- prediction = Algorithm.run(prediction_algorithm, neighbors)
- end
- prediction[:compound] = compound
- prediction[:neighbors] = neighbors.sort{|a,b| b[1] <=> a[1]} # sort with ascending similarities
-
-
- # AM: transform to original space (TODO)
- #confidence_value = ((confidence_value+1.0)/2.0).abs if prediction.first and similarity_algorithm =~ /cosine/
-
-
- $logger.debug "predicted value: #{prediction[:value]}, confidence: #{prediction[:confidence]}"
- predictions << prediction
- pt += Time.now-t
- end
-
- end
- $logger.debug "Transform time: #{tt}"
- $logger.debug "Neighbor search time: #{nt} (Similarity calculation: #{st}, Neighbor insert: #{nit})"
- $logger.debug "Prediction time: #{pt}"
- $logger.debug "Total prediction time: #{Time.now-at}"
-
- # serialize result
- case object.class.to_s
- when "OpenTox::Compound"
- return predictions.first
- when "Array"
- return predictions
- when "OpenTox::Dataset"
- # prepare prediction dataset
- prediction_dataset = LazarPrediction.new(
- :title => "Lazar prediction for #{prediction_feature.title}",
- :creator => __FILE__,
- :prediction_feature_id => prediction_feature.id
-
- )
- confidence_feature = OpenTox::NumericFeature.find_or_create_by( "title" => "Prediction confidence" )
- warning_feature = OpenTox::NominalFeature.find_or_create_by("title" => "Warnings")
- prediction_dataset.features = [ prediction_feature, confidence_feature, warning_feature ]
- prediction_dataset.compounds = compounds
- prediction_dataset.data_entries = predictions.collect{|p| [p[:value], p[:confidence],p[:warning]]}
- prediction_dataset.save_all
- return prediction_dataset
- end
-
- end
-=end
end
class LazarRegression < Lazar
@@ -291,6 +159,8 @@ module OpenTox
class PredictionModel < Lazar
field :category, type: String
field :endpoint, type: String
+ field :unit, type: String
+ field :model_id, type: BSON::ObjectId
field :crossvalidation_id, type: BSON::ObjectId
end