From adefea0e78a4f05a2c9537e643873ad61fc22a0a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 3 Oct 2016 19:49:55 +0200 Subject: initial model creation tests --- lib/classification.rb | 2 + lib/model.rb | 120 +++++++++++++++++++++++++++----------------------- lib/opentox.rb | 5 +-- 3 files changed, 67 insertions(+), 60 deletions(-) (limited to 'lib') diff --git a/lib/classification.rb b/lib/classification.rb index 2ccd7d1..03c32c4 100644 --- a/lib/classification.rb +++ b/lib/classification.rb @@ -25,7 +25,9 @@ module OpenTox prediction = probabilities.key(p_max) {:value => prediction,:probabilities => probabilities} end + end + end end diff --git a/lib/model.rb b/lib/model.rb index 5cf2cdb..749611e 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -13,31 +13,73 @@ module OpenTox field :creator, type: String, default: __FILE__ field :training_dataset_id, type: BSON::ObjectId field :prediction_feature_id, type: BSON::ObjectId - - field :prediction_algorithm, type: String - field :prediction_algorithm_parameters, type: Hash, default: {} - - field :neighbor_algorithm, type: String - field :neighbor_algorithm_parameters, type: Hash, default: {} - field :feature_selection_algorithm, type: String - field :feature_selection_algorithm_parameters, type: Hash, default: {} + field :algorithms, type: Hash field :relevant_features, type: Hash - - # Create a lazar model from a training_dataset and a feature_dataset - # @param [OpenTox::Dataset] training_dataset - # @return [OpenTox::Model::Lazar] Regression or classification model - def initialize prediction_feature, training_dataset, params={} - super params + + def self.create prediction_feature:nil, training_dataset:nil, algorithms:{} + bad_request_error "Please provide a prediction_feature and/or a training_dataset." unless prediction_feature or training_dataset + prediction_feature = training_dataset.features.first unless prediction_feature + # TODO: prediction_feature without training_dataset: use all available data + # explicit prediction algorithm + if algorithms[:prediction] and algorithms[:prediction][:method] + case algorithms[:prediction][:method] + when /Classifiction/ + model = LazarClassification.new + when /Regression/ + model = LazarRegression.new + end + # guess model type + elsif prediction_feature.numeric? + model = LazarRegression.new + else + model = LazarClassification.new + end + # set defaults + if model.class == LazarClassification + model.algorithms = { + :similarity => { + :descriptors => "fingerprint['MP2D']", + :method => "Algorithm::Similarity.tanimoto", + :min => 0.1 + }, + :prediction => { + :descriptors => "fingerprint['MP2D']", + :method => "Algorithm::Classification.weighted_majority_vote", + }, + :feature_selection => nil, + } + elsif model.class == LazarRegression + model.algorithms = { + :similarity => { + :descriptors => "fingerprint['MP2D']", + :method => "Algorithm::Similarity.tanimoto", + :min => 0.1 + }, + :prediction => { + :descriptors => "fingerprint['MP2D']", + :method => "Algorithm::Regression.local_caret", + :parameters => "pls", + }, + :feature_selection => nil, + } + end + + # overwrite defaults + algorithms.each do |type,parameters| + parameters.each do |p,v| + model.algorithms[type][p] = v + end if parameters + end # set defaults for empty parameters - self.prediction_feature_id ||= prediction_feature.id - self.training_dataset_id ||= training_dataset.id - self.name ||= "#{training_dataset.name} #{prediction_feature.name}" - self.neighbor_algorithm_parameters ||= {} - self.neighbor_algorithm_parameters[:dataset_id] = training_dataset.id - - send(feature_selection_algorithm.to_sym) if feature_selection_algorithm - save + model.prediction_feature_id = prediction_feature.id + model.training_dataset_id = training_dataset.id + model.name = "#{training_dataset.name} #{prediction_feature.name}" + + #send(feature_selection_algorithm.to_sym) if feature_selection_algorithm + model.save + p model + model end def correlation_filter @@ -181,45 +223,11 @@ module OpenTox end class LazarClassification < Lazar - - def self.create prediction_feature, training_dataset, params={} - model = self.new prediction_feature, training_dataset, params - model.prediction_algorithm = "OpenTox::Algorithm::Classification.weighted_majority_vote" unless model.prediction_algorithm - model.neighbor_algorithm ||= "fingerprint_neighbors" - model.neighbor_algorithm_parameters ||= {} - { - :type => "MP2D", - :dataset_id => training_dataset.id, - :prediction_feature_id => prediction_feature.id, - :min_sim => 0.1 - }.each do |key,value| - model.neighbor_algorithm_parameters[key] ||= value - end - model.save - model - end end class LazarRegression < Lazar - def self.create prediction_feature, training_dataset, params={} - model = self.new prediction_feature, training_dataset, params - model.neighbor_algorithm ||= "fingerprint_neighbors" - model.prediction_algorithm ||= "OpenTox::Algorithm::Regression.local_fingerprint_regression" - model.neighbor_algorithm_parameters ||= {} - { - :min_sim => 0.1, - :dataset_id => training_dataset.id, - :prediction_feature_id => prediction_feature.id, - }.each do |key,value| - model.neighbor_algorithm_parameters[key] ||= value - end - model.neighbor_algorithm_parameters[:type] ||= "MP2D" if training_dataset.substances.first.is_a? Compound - model.save - model - end - end class Prediction diff --git a/lib/opentox.rb b/lib/opentox.rb index 7d8a8a2..5c300cf 100644 --- a/lib/opentox.rb +++ b/lib/opentox.rb @@ -1,8 +1,6 @@ module OpenTox - # Ruby interface - - # create default OpenTox classes (defined in opentox-client.rb) + # create default OpenTox classes # provides Mongoid's query and persistence methods # http://mongoid.org/en/mongoid/docs/persistence.html # http://mongoid.org/en/mongoid/docs/querying.html @@ -25,4 +23,3 @@ module OpenTox end end - -- cgit v1.2.3