From cdab5069ded9490afe81095059e9a407faf864d9 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Tue, 10 Jan 2017 13:44:43 +0100
Subject: independent_variables stored in GridFS to avoid Mongo database size
 limit problems

---
 lib/model.rb | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'lib/model.rb')
diff --git a/lib/model.rb b/lib/model.rb
index 9c4a93f..e5834ae 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -9,6 +9,8 @@ module OpenTox
       include Mongoid::Timestamps
       store_in collection: "models"
 
+      attr_writer :independent_variables # store in GridFS to avoid Mongo database size limit problems
+
       field :name, type: String
       field :creator, type: String, default: __FILE__
       field :algorithms, type: Hash, default:{}
@@ -17,7 +19,7 @@ module OpenTox
       field :prediction_feature_id, type: BSON::ObjectId
       field :dependent_variables, type: Array, default:[]
       field :descriptor_ids, type:Array, default:[]
-      field :independent_variables, type: Array, default:[]
+      field :independent_variables_id, type: BSON::ObjectId
       field :fingerprints, type: Array, default:[]
       field :descriptor_weights, type: Array, default:[]
       field :descriptor_means, type: Array, default:[]
@@ -119,6 +121,7 @@ module OpenTox
         end
 
         descriptor_method = model.algorithms[:descriptors][:method]
+        model.independent_variables = []
         case descriptor_method
         # parse fingerprints
         when "fingerprint"
@@ -179,6 +182,7 @@ module OpenTox
 
       def predict_substance substance
         
+        @independent_variables = Marshal.load $gridfs.find_one(_id: self.independent_variables_id).data
         case algorithms[:similarity][:method]
         when /tanimoto/ # binary features
           similarity_descriptors = substance.fingerprint algorithms[:descriptors][:type]
@@ -234,7 +238,7 @@ module OpenTox
               neighbor_dependent_variables << dependent_variables[i]
               independent_variables.each_with_index do |c,j|
                 neighbor_independent_variables[j] ||= []
-                neighbor_independent_variables[j] << independent_variables[j][i]
+                neighbor_independent_variables[j] << @independent_variables[j][i]
               end
             end
           end
@@ -302,6 +306,17 @@ module OpenTox
 
       end
 
+      def save # store independent_variables in GridFS to avoid Mongo database size limit problems
+        file = Mongo::Grid::File.new(Marshal.dump(@independent_variables), :filename => "#{id}.independent_variables")
+        self.independent_variables_id = $gridfs.insert_one(file)
+        super
+      end
+
+      def independent_variables 
+        @independent_variables ||= Marshal.load $gridfs.find_one(_id: self.independent_variables_id).data
+        @independent_variables
+      end
+
       def training_dataset
         Dataset.find(training_dataset_id)
       end
-- 
cgit v1.2.3


From d4e84b31bff853068f4f1602e3aac3d782558399 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Tue, 10 Jan 2017 16:29:02 +0100
Subject: initial model documentation

---
 lib/model.rb | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'lib/model.rb')

diff --git a/lib/model.rb b/lib/model.rb
index e5834ae..7731705 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -180,6 +180,9 @@ module OpenTox
         model
       end
 
+      # Predict a substance 
+      # @param [OpenTox::Substance]
+      # @return [Hash]
       def predict_substance substance
         
         @independent_variables = Marshal.load $gridfs.find_one(_id: self.independent_variables_id).data
@@ -260,6 +263,9 @@ module OpenTox
         prediction
       end
 
+      # Predict a substance (compound or nanoparticle), an array of substances or a dataset
+      # @param [OpenTox::Compound, OpenTox::Nanoparticle, Array<OpenTox::Substance>, OpenTox::Dataset]
+      # @return [Hash, Array<Hash>, OpenTox::Dataset]
       def predict object
 
         training_dataset = Dataset.find training_dataset_id
@@ -345,6 +351,7 @@ module OpenTox
     class LazarRegression < Lazar
     end
 
+    # Convenience class for generating and validating lazar models in a single step and predicting substances (compounds and nanoparticles), arrays of substances and datasets
     class Validation
 
       include OpenTox
@@ -358,6 +365,9 @@ module OpenTox
       field :model_id, type: BSON::ObjectId
       field :repeated_crossvalidation_id, type: BSON::ObjectId
 
+      # Predict a substance (compound or nanoparticle), an array of substances or a dataset
+      # @param [OpenTox::Compound, OpenTox::Nanoparticle, Array<OpenTox::Substance>, OpenTox::Dataset]
+      # @return [Hash, Array<Hash>, OpenTox::Dataset]
       def predict object
         model.predict object
       end
@@ -394,6 +404,10 @@ module OpenTox
         model.is_a? LazarClassification
       end
 
+      # Create and validate a lazar model from a csv file with training data and a json file with metadata
+      #
+      # @param [File] CSV file with two columns. The first line should contain either SMILES or InChI (first column) and the endpoint (second column). The first column should contain either the SMILES or InChI of the training compounds, the second column the training compounds toxic activities (qualitative or quantitative). Use -log10 transformed values for regression datasets. Add metadata to a JSON file with the same basename containing the fields "species", "endpoint", "source" and "unit" (regression only). You can find example training data at [Github](https://github.com/opentox/lazar-public-data).
+      # @return [OpenTox::Model::Validation] lazar model with three independent 10-fold crossvalidations
       def self.from_csv_file file
         metadata_file = file.sub(/csv$/,"json")
         bad_request_error "No metadata file #{metadata_file}" unless File.exist? metadata_file
-- 
cgit v1.2.3


From b5d6446f058916d018139948002b6e9d1162d4fe Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Tue, 10 Jan 2017 16:56:48 +0100
Subject: model documentation

---
 lib/model.rb | 37 +++++++++++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

(limited to 'lib/model.rb')

diff --git a/lib/model.rb b/lib/model.rb
index 7731705..321636d 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -27,6 +27,11 @@ module OpenTox
       field :scaled_variables, type: Array, default:[]
       field :version, type: Hash, default:{}
       
+      # Create a lazar model
+      # @param [OpenTox::Dataset, nil] training_dataset
+      # @param [OpenTox::Feature, nil] prediction_feature
+      # @param [Hash] algorithms
+      # @return [OpenTox::Model::Lazar]
       def self.create prediction_feature:nil, training_dataset:nil, algorithms:{}
         bad_request_error "Please provide a prediction_feature and/or a training_dataset." unless prediction_feature or training_dataset
         prediction_feature = training_dataset.features.first unless prediction_feature
@@ -318,23 +323,33 @@ module OpenTox
         super
       end
 
+      # Get independent variables
+      # @return [Array<Array>]
       def independent_variables 
         @independent_variables ||= Marshal.load $gridfs.find_one(_id: self.independent_variables_id).data
         @independent_variables
       end
 
+      # Get training dataset
+      # @return [OpenTox::Dataset]
       def training_dataset
         Dataset.find(training_dataset_id)
       end
 
+      # Get prediction feature
+      # @return [OpenTox::Feature]
       def prediction_feature
         Feature.find(prediction_feature_id)
       end
 
+      # Get training descriptors
+      # @return [Array<OpenTox::Feature>]
       def descriptors
         descriptor_ids.collect{|id| Feature.find(id)}
       end
 
+      # Get training substances
+      # @return [Array<OpenTox::Substance>]
       def substances
         substance_ids.collect{|id| Substance.find(id)}
       end
@@ -345,9 +360,11 @@ module OpenTox
 
     end
 
+    # Classification model
     class LazarClassification < Lazar
     end
 
+    # Regression model
     class LazarRegression < Lazar
     end
 
@@ -372,26 +389,38 @@ module OpenTox
         model.predict object
       end
 
+      # Get training dataset
+      # @return [OpenTox::Dataset]
       def training_dataset
         model.training_dataset
       end
 
+      # Get lazar model
+      # @return [OpenTox::Model::Lazar]
       def model
         Lazar.find model_id
       end
 
+      # Get algorithms
+      # @return [Hash]
       def algorithms
         model.algorithms
       end
 
+      # Get prediction feature
+      # @return [OpenTox::Feature]
       def prediction_feature
         model.prediction_feature
       end
 
+      # Get repeated crossvalidations
+      # @return [OpenTox::Validation::RepeatedCrossValidation]
       def repeated_crossvalidation
         OpenTox::Validation::RepeatedCrossValidation.find repeated_crossvalidation_id # full class name required
       end
 
+      # Get crossvalidations
+      # @return [Array<OpenTox::CrossValidation]
       def crossvalidations
         repeated_crossvalidation.crossvalidations
       end
@@ -405,8 +434,7 @@ module OpenTox
       end
 
       # Create and validate a lazar model from a csv file with training data and a json file with metadata
-      #
-      # @param [File] CSV file with two columns. The first line should contain either SMILES or InChI (first column) and the endpoint (second column). The first column should contain either the SMILES or InChI of the training compounds, the second column the training compounds toxic activities (qualitative or quantitative). Use -log10 transformed values for regression datasets. Add metadata to a JSON file with the same basename containing the fields "species", "endpoint", "source" and "unit" (regression only). You can find example training data at [Github](https://github.com/opentox/lazar-public-data).
+      # @param [File] CSV file with two columns. The first line should contain either SMILES or InChI (first column) and the endpoint (second column). The first column should contain either the SMILES or InChI of the training compounds, the second column the training compounds toxic activities (qualitative or quantitative). Use -log10 transformed values for regression datasets. Add metadata to a JSON file with the same basename containing the fields "species", "endpoint", "source" and "unit" (regression only). You can find example training data at https://github.com/opentox/lazar-public-data.
       # @return [OpenTox::Model::Validation] lazar model with three independent 10-fold crossvalidations
       def self.from_csv_file file
         metadata_file = file.sub(/csv$/,"json")
@@ -420,6 +448,11 @@ module OpenTox
         model_validation
       end
 
+      # Create and validate a nano-lazar model, import data from eNanoMapper if necessary
+      # @param [OpenTox::Dataset, nil] training_dataset
+      # @param [OpenTox::Feature, nil] prediction_feature
+      # @param [Hash, nil] algorithms
+      # @return [OpenTox::Model::Validation] lazar model with five independent 10-fold crossvalidations
       def self.from_enanomapper training_dataset: nil, prediction_feature:nil, algorithms: nil
         
         # find/import training_dataset
-- 
cgit v1.2.3


From ed0d7edee4ac9831b58a01555de8bdba3534495e Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Wed, 11 Jan 2017 08:24:23 +0100
Subject: model documentation updated

---
 lib/model.rb | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'lib/model.rb')

diff --git a/lib/model.rb b/lib/model.rb
index 321636d..64edb76 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -28,11 +28,14 @@ module OpenTox
       field :version, type: Hash, default:{}
       
       # Create a lazar model
-      # @param [OpenTox::Dataset, nil] training_dataset
+      # @param [OpenTox::Dataset] training_dataset
       # @param [OpenTox::Feature, nil] prediction_feature
-      # @param [Hash] algorithms
+      #   By default the first feature of the training dataset will be predicted, specify a prediction_feature if you want to predict another feature
+      # @param [Hash, nil] algorithms
+      #   Default algorithms will be used, if no algorithms parameter is provided. The algorithms hash has the following keys: :descriptors (specifies the descriptors to be used for similarity calculations and local QSAR models), :similarity (similarity algorithm and threshold), :feature_selection (feature selection algorithm), :prediction (local QSAR algorithm). Default parameters are used for unspecified keys. 
+      #
       # @return [OpenTox::Model::Lazar]
-      def self.create prediction_feature:nil, training_dataset:nil, algorithms:{}
+      def self.create prediction_feature:nil, training_dataset:, algorithms:{}
         bad_request_error "Please provide a prediction_feature and/or a training_dataset." unless prediction_feature or training_dataset
         prediction_feature = training_dataset.features.first unless prediction_feature
         # TODO: prediction_feature without training_dataset: use all available data
@@ -185,7 +188,7 @@ module OpenTox
         model
       end
 
-      # Predict a substance 
+      # Predict a substance (compound or nanoparticle)
       # @param [OpenTox::Substance]
       # @return [Hash]
       def predict_substance substance
@@ -449,6 +452,7 @@ module OpenTox
       end
 
       # Create and validate a nano-lazar model, import data from eNanoMapper if necessary
+      # nano-lazar methods are described in detail in https://github.com/enanomapper/nano-lazar-paper/blob/master/nano-lazar.pdf
       # @param [OpenTox::Dataset, nil] training_dataset
       # @param [OpenTox::Feature, nil] prediction_feature
       # @param [Hash, nil] algorithms
-- 
cgit v1.2.3


From 04ebe0640ab6e566dfc316f80a020d1e78b10924 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Wed, 11 Jan 2017 09:20:40 +0100
Subject: validation documentation

---
 lib/model.rb | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'lib/model.rb')

diff --git a/lib/model.rb b/lib/model.rb
index 64edb76..b18610d 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -320,7 +320,9 @@ module OpenTox
 
       end
 
-      def save # store independent_variables in GridFS to avoid Mongo database size limit problems
+      # Save the model
+      #   Stores independent_variables in GridFS to avoid Mongo database size limit problems
+      def save
         file = Mongo::Grid::File.new(Marshal.dump(@independent_variables), :filename => "#{id}.independent_variables")
         self.independent_variables_id = $gridfs.insert_one(file)
         super
@@ -357,6 +359,8 @@ module OpenTox
         substance_ids.collect{|id| Substance.find(id)}
       end
 
+      # Are fingerprints used as descriptors
+      # @return [TrueClass, FalseClass]
       def fingerprints?
         algorithms[:descriptors][:method] == "fingerprint" ? true : false
       end
@@ -428,10 +432,14 @@ module OpenTox
         repeated_crossvalidation.crossvalidations
       end
 
+      # Is it a regression model
+      # @return [TrueClass, FalseClass]
       def regression?
         model.is_a? LazarRegression
       end
 
+      # Is it a classification model
+      # @return [TrueClass, FalseClass]
       def classification?
         model.is_a? LazarClassification
       end
@@ -452,7 +460,7 @@ module OpenTox
       end
 
       # Create and validate a nano-lazar model, import data from eNanoMapper if necessary
-      # nano-lazar methods are described in detail in https://github.com/enanomapper/nano-lazar-paper/blob/master/nano-lazar.pdf
+      #   nano-lazar methods are described in detail in https://github.com/enanomapper/nano-lazar-paper/blob/master/nano-lazar.pdf
       # @param [OpenTox::Dataset, nil] training_dataset
       # @param [OpenTox::Feature, nil] prediction_feature
       # @param [Hash, nil] algorithms
-- 
cgit v1.2.3