From cdab5069ded9490afe81095059e9a407faf864d9 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Tue, 10 Jan 2017 13:44:43 +0100
Subject: independent_variables stored in GridFS to avoid Mongo database size
 limit problems

---
 lib/compound.rb |  2 --
 lib/model.rb    | 19 +++++++++++++++++--
 test/setup.rb   |  6 +-----
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/lib/compound.rb b/lib/compound.rb
index 8a1143b..1c308d8 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -209,7 +209,6 @@ module OpenTox
        update(:svg_id => $gridfs.insert_one(file))
       end
       $gridfs.find_one(_id: self.svg_id).data
-
     end
 
     # Get png image
@@ -223,7 +222,6 @@ module OpenTox
        update(:png_id => $gridfs.insert_one(file))
       end
       Base64.decode64($gridfs.find_one(_id: self.png_id).data)
-
     end
 
     # Get all known compound names. Relies on an external service for name lookups.
diff --git a/lib/model.rb b/lib/model.rb
index 9c4a93f..e5834ae 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -9,6 +9,8 @@ module OpenTox
       include Mongoid::Timestamps
       store_in collection: "models"
 
+      attr_writer :independent_variables # store in GridFS to avoid Mongo database size limit problems
+
       field :name, type: String
       field :creator, type: String, default: __FILE__
       field :algorithms, type: Hash, default:{}
@@ -17,7 +19,7 @@ module OpenTox
       field :prediction_feature_id, type: BSON::ObjectId
       field :dependent_variables, type: Array, default:[]
       field :descriptor_ids, type:Array, default:[]
-      field :independent_variables, type: Array, default:[]
+      field :independent_variables_id, type: BSON::ObjectId
       field :fingerprints, type: Array, default:[]
       field :descriptor_weights, type: Array, default:[]
       field :descriptor_means, type: Array, default:[]
@@ -119,6 +121,7 @@ module OpenTox
         end
 
         descriptor_method = model.algorithms[:descriptors][:method]
+        model.independent_variables = []
         case descriptor_method
         # parse fingerprints
         when "fingerprint"
@@ -179,6 +182,7 @@ module OpenTox
 
       def predict_substance substance
         
+        @independent_variables = Marshal.load $gridfs.find_one(_id: self.independent_variables_id).data
         case algorithms[:similarity][:method]
         when /tanimoto/ # binary features
           similarity_descriptors = substance.fingerprint algorithms[:descriptors][:type]
@@ -234,7 +238,7 @@ module OpenTox
               neighbor_dependent_variables << dependent_variables[i]
               independent_variables.each_with_index do |c,j|
                 neighbor_independent_variables[j] ||= []
-                neighbor_independent_variables[j] << independent_variables[j][i]
+                neighbor_independent_variables[j] << @independent_variables[j][i]
               end
             end
           end
@@ -302,6 +306,17 @@ module OpenTox
 
       end
 
+      def save # store independent_variables in GridFS to avoid Mongo database size limit problems
+        file = Mongo::Grid::File.new(Marshal.dump(@independent_variables), :filename => "#{id}.independent_variables")
+        self.independent_variables_id = $gridfs.insert_one(file)
+        super
+      end
+
+      def independent_variables 
+        @independent_variables ||= Marshal.load $gridfs.find_one(_id: self.independent_variables_id).data
+        @independent_variables
+      end
+
       def training_dataset
         Dataset.find(training_dataset_id)
       end
diff --git a/test/setup.rb b/test/setup.rb
index 63b59fb..40c8ebf 100644
--- a/test/setup.rb
+++ b/test/setup.rb
@@ -6,8 +6,4 @@ include OpenTox
 TEST_DIR ||= File.expand_path(File.dirname(__FILE__))
 DATA_DIR ||= File.join(TEST_DIR,"data")
 training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
-unless training_dataset
-  Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm")
-end
-#$mongo.database.drop
-#$gridfs = $mongo.database.fs
+Import::Enanomapper.import unless training_dataset
-- 
cgit v1.2.3


From 9636f06938619e7596ec19b65daba5dbe8c212c4 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Tue, 10 Jan 2017 15:25:42 +0100
Subject: Brief tutorial in README.md

---
 README.md | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 431c8b9..6354606 100644
--- a/README.md
+++ b/README.md
@@ -26,10 +26,73 @@ Installation
 
   The output should give you more verbose information that can help in debugging (e.g. to identify missing libraries).
 
+Tutorial
+--------
+
+Execute the following commands either from an interactive Ruby shell or a Ruby script:
+
+Create and use `lazar` models for small molecules
+.................................................
+
+#### Create a training dataset
+
+  Create a CSV file with two columns. The first line should contain either SMILES or InChI (first column) and the endpoint (second column). The first column should contain either the SMILES or InChI of the training compounds, the second column the training compounds toxic activities (qualitative or quantitative). Use -log10 transformed values for regression datasets. Add metadata to a JSON file with the same basename containing the fields "species", "endpoint", "source" and "unit" (regression only). You can find example training data at [Github](https://github.com/opentox/lazar-public-data).
+
+#### Create and validate a `lazar` model with default algorithms and parameters
+
+  `validated_model = Model::Validation.create_from_csv_file EPAFHM_log10.csv`
+
+#### Inspect crossvalidation results
+
+  `validated_model.crossvalidations`
+
+#### Predict a new compound
+
+  Create a compound
+
+  `compound = Compound.from_smiles "NC(=O)OCCC"`
+
+  Predict Fathead Minnow Acute Toxicity
+
+  `validated_model.predict compound`
+
+#### Experiment with other algorithms
+
+  You can pass algorithms parameters to the `Model::Validation.create_from_csv_file` command. The [API documentation](http://rdoc.info/gems/lazar) provides detailed instructions.
+
+Create and use `lazar` nanoparticle models
+........................................
+
+#### Create and validate a `nano-lazar` model from eNanoMapper with default algorithms and parameters
+
+  `validated_model = Model::Validation.create_from_enanomapper`
+
+  This command will mirror the eNanoMapper database in the local database, create a `nano-lazar` model and validate it with five independent 10-fold crossvalidations.
+
+#### Inspect crossvalidation results
+
+  `validated_model.crossvalidations`
+
+#### Predict nanoparticle toxicities
+
+  Choose a random nanoparticle from the "Potein Corona" dataset
+  ```
+  training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
+  nanoparticle = training_dataset.substances.shuffle.first
+  ```
+
+  Predict the "Net Cell Association" endpoint
+
+  `validated_model.predict nanoparticle`
+
+#### Experiment with other datasets, endpoints and algorithms
+
+  You can pass training_dataset, prediction_feature and algorithms parameters to the `Model::Validation.create_from_enanomapper` command. The [API documentation](http://rdoc.info/gems/lazar) provides detailed instructions.
+
 Documentation
 -------------
 * [API documentation](http://rdoc.info/gems/lazar)
 
 Copyright
 ---------
-Copyright (c) 2009-2015 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details.
+Copyright (c) 2009-2017 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details.
-- 
cgit v1.2.3


From 5db4a0d44073ddce8c1641a29450d31474b9e831 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Tue, 10 Jan 2017 15:27:56 +0100
Subject: README.md headings fixed

---
 README.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 6354606..0cffb15 100644
--- a/README.md
+++ b/README.md
@@ -31,8 +31,7 @@ Tutorial
 
 Execute the following commands either from an interactive Ruby shell or a Ruby script:
 
-Create and use `lazar` models for small molecules
-.................................................
+### Create and use `lazar` models for small molecules
 
 #### Create a training dataset
 
@@ -60,8 +59,7 @@ Create and use `lazar` models for small molecules
 
   You can pass algorithms parameters to the `Model::Validation.create_from_csv_file` command. The [API documentation](http://rdoc.info/gems/lazar) provides detailed instructions.
 
-Create and use `lazar` nanoparticle models
-........................................
+### Create and use `lazar` nanoparticle models
 
 #### Create and validate a `nano-lazar` model from eNanoMapper with default algorithms and parameters
 
-- 
cgit v1.2.3


From 093583c8c23b20f18cf82fb15913fe257a3dd72b Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Tue, 10 Jan 2017 15:31:25 +0100
Subject: README.md CV information fixed

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 0cffb15..7e0e420 100644
--- a/README.md
+++ b/README.md
@@ -41,6 +41,8 @@ Execute the following commands either from an interactive Ruby shell or a Ruby s
 
   `validated_model = Model::Validation.create_from_csv_file EPAFHM_log10.csv`
 
+  This command will create a `lazar` model and validate it with three independent 10-fold crossvalidations.
+
 #### Inspect crossvalidation results
 
   `validated_model.crossvalidations`
-- 
cgit v1.2.3


From d4e84b31bff853068f4f1602e3aac3d782558399 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Tue, 10 Jan 2017 16:29:02 +0100
Subject: initial model documentation

---
 lib/model.rb | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/lib/model.rb b/lib/model.rb
index e5834ae..7731705 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -180,6 +180,9 @@ module OpenTox
         model
       end
 
+      # Predict a substance 
+      # @param [OpenTox::Substance]
+      # @return [Hash]
       def predict_substance substance
         
         @independent_variables = Marshal.load $gridfs.find_one(_id: self.independent_variables_id).data
@@ -260,6 +263,9 @@ module OpenTox
         prediction
       end
 
+      # Predict a substance (compound or nanoparticle), an array of substances or a dataset
+      # @param [OpenTox::Compound, OpenTox::Nanoparticle, Array<OpenTox::Substance>, OpenTox::Dataset]
+      # @return [Hash, Array<Hash>, OpenTox::Dataset]
       def predict object
 
         training_dataset = Dataset.find training_dataset_id
@@ -345,6 +351,7 @@ module OpenTox
     class LazarRegression < Lazar
     end
 
+    # Convenience class for generating and validating lazar models in a single step and predicting substances (compounds and nanoparticles), arrays of substances and datasets
     class Validation
 
       include OpenTox
@@ -358,6 +365,9 @@ module OpenTox
       field :model_id, type: BSON::ObjectId
       field :repeated_crossvalidation_id, type: BSON::ObjectId
 
+      # Predict a substance (compound or nanoparticle), an array of substances or a dataset
+      # @param [OpenTox::Compound, OpenTox::Nanoparticle, Array<OpenTox::Substance>, OpenTox::Dataset]
+      # @return [Hash, Array<Hash>, OpenTox::Dataset]
       def predict object
         model.predict object
       end
@@ -394,6 +404,10 @@ module OpenTox
         model.is_a? LazarClassification
       end
 
+      # Create and validate a lazar model from a csv file with training data and a json file with metadata
+      #
+      # @param [File] CSV file with two columns. The first line should contain either SMILES or InChI (first column) and the endpoint (second column). The first column should contain either the SMILES or InChI of the training compounds, the second column the training compounds toxic activities (qualitative or quantitative). Use -log10 transformed values for regression datasets. Add metadata to a JSON file with the same basename containing the fields "species", "endpoint", "source" and "unit" (regression only). You can find example training data at [Github](https://github.com/opentox/lazar-public-data).
+      # @return [OpenTox::Model::Validation] lazar model with three independent 10-fold crossvalidations
       def self.from_csv_file file
         metadata_file = file.sub(/csv$/,"json")
         bad_request_error "No metadata file #{metadata_file}" unless File.exist? metadata_file
-- 
cgit v1.2.3


From b5d6446f058916d018139948002b6e9d1162d4fe Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Tue, 10 Jan 2017 16:56:48 +0100
Subject: model documentation

---
 lib/model.rb | 37 +++++++++++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/lib/model.rb b/lib/model.rb
index 7731705..321636d 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -27,6 +27,11 @@ module OpenTox
       field :scaled_variables, type: Array, default:[]
       field :version, type: Hash, default:{}
       
+      # Create a lazar model
+      # @param [OpenTox::Dataset, nil] training_dataset
+      # @param [OpenTox::Feature, nil] prediction_feature
+      # @param [Hash] algorithms
+      # @return [OpenTox::Model::Lazar]
       def self.create prediction_feature:nil, training_dataset:nil, algorithms:{}
         bad_request_error "Please provide a prediction_feature and/or a training_dataset." unless prediction_feature or training_dataset
         prediction_feature = training_dataset.features.first unless prediction_feature
@@ -318,23 +323,33 @@ module OpenTox
         super
       end
 
+      # Get independent variables
+      # @return [Array<Array>]
       def independent_variables 
         @independent_variables ||= Marshal.load $gridfs.find_one(_id: self.independent_variables_id).data
         @independent_variables
       end
 
+      # Get training dataset
+      # @return [OpenTox::Dataset]
       def training_dataset
         Dataset.find(training_dataset_id)
       end
 
+      # Get prediction feature
+      # @return [OpenTox::Feature]
       def prediction_feature
         Feature.find(prediction_feature_id)
       end
 
+      # Get training descriptors
+      # @return [Array<OpenTox::Feature>]
       def descriptors
         descriptor_ids.collect{|id| Feature.find(id)}
       end
 
+      # Get training substances
+      # @return [Array<OpenTox::Substance>]
       def substances
         substance_ids.collect{|id| Substance.find(id)}
       end
@@ -345,9 +360,11 @@ module OpenTox
 
     end
 
+    # Classification model
     class LazarClassification < Lazar
     end
 
+    # Regression model
     class LazarRegression < Lazar
     end
 
@@ -372,26 +389,38 @@ module OpenTox
         model.predict object
       end
 
+      # Get training dataset
+      # @return [OpenTox::Dataset]
       def training_dataset
         model.training_dataset
       end
 
+      # Get lazar model
+      # @return [OpenTox::Model::Lazar]
       def model
         Lazar.find model_id
       end
 
+      # Get algorithms
+      # @return [Hash]
       def algorithms
         model.algorithms
       end
 
+      # Get prediction feature
+      # @return [OpenTox::Feature]
       def prediction_feature
         model.prediction_feature
       end
 
+      # Get repeated crossvalidations
+      # @return [OpenTox::Validation::RepeatedCrossValidation]
       def repeated_crossvalidation
         OpenTox::Validation::RepeatedCrossValidation.find repeated_crossvalidation_id # full class name required
       end
 
+      # Get crossvalidations
+      # @return [Array<OpenTox::CrossValidation]
       def crossvalidations
         repeated_crossvalidation.crossvalidations
       end
@@ -405,8 +434,7 @@ module OpenTox
       end
 
       # Create and validate a lazar model from a csv file with training data and a json file with metadata
-      #
-      # @param [File] CSV file with two columns. The first line should contain either SMILES or InChI (first column) and the endpoint (second column). The first column should contain either the SMILES or InChI of the training compounds, the second column the training compounds toxic activities (qualitative or quantitative). Use -log10 transformed values for regression datasets. Add metadata to a JSON file with the same basename containing the fields "species", "endpoint", "source" and "unit" (regression only). You can find example training data at [Github](https://github.com/opentox/lazar-public-data).
+      # @param [File] CSV file with two columns. The first line should contain either SMILES or InChI (first column) and the endpoint (second column). The first column should contain either the SMILES or InChI of the training compounds, the second column the training compounds toxic activities (qualitative or quantitative). Use -log10 transformed values for regression datasets. Add metadata to a JSON file with the same basename containing the fields "species", "endpoint", "source" and "unit" (regression only). You can find example training data at https://github.com/opentox/lazar-public-data.
       # @return [OpenTox::Model::Validation] lazar model with three independent 10-fold crossvalidations
       def self.from_csv_file file
         metadata_file = file.sub(/csv$/,"json")
@@ -420,6 +448,11 @@ module OpenTox
         model_validation
       end
 
+      # Create and validate a nano-lazar model, import data from eNanoMapper if necessary
+      # @param [OpenTox::Dataset, nil] training_dataset
+      # @param [OpenTox::Feature, nil] prediction_feature
+      # @param [Hash, nil] algorithms
+      # @return [OpenTox::Model::Validation] lazar model with five independent 10-fold crossvalidations
       def self.from_enanomapper training_dataset: nil, prediction_feature:nil, algorithms: nil
         
         # find/import training_dataset
-- 
cgit v1.2.3


From ed0d7edee4ac9831b58a01555de8bdba3534495e Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Wed, 11 Jan 2017 08:24:23 +0100
Subject: model documentation updated

---
 README.md    |  2 +-
 lib/model.rb | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 7e0e420..28ed18f 100644
--- a/README.md
+++ b/README.md
@@ -87,7 +87,7 @@ Execute the following commands either from an interactive Ruby shell or a Ruby s
 
 #### Experiment with other datasets, endpoints and algorithms
 
-  You can pass training_dataset, prediction_feature and algorithms parameters to the `Model::Validation.create_from_enanomapper` command. The [API documentation](http://rdoc.info/gems/lazar) provides detailed instructions.
+  You can pass training_dataset, prediction_feature and algorithms parameters to the `Model::Validation.create_from_enanomapper` command. The [API documentation](http://rdoc.info/gems/lazar) provides detailed instructions. Detailed documentation and validation results can be found in this [publication](https://github.com/enanomapper/nano-lazar-paper/blob/master/nano-lazar.pdf).
 
 Documentation
 -------------
diff --git a/lib/model.rb b/lib/model.rb
index 321636d..64edb76 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -28,11 +28,14 @@ module OpenTox
       field :version, type: Hash, default:{}
       
       # Create a lazar model
-      # @param [OpenTox::Dataset, nil] training_dataset
+      # @param [OpenTox::Dataset] training_dataset
       # @param [OpenTox::Feature, nil] prediction_feature
-      # @param [Hash] algorithms
+      #   By default the first feature of the training dataset will be predicted, specify a prediction_feature if you want to predict another feature
+      # @param [Hash, nil] algorithms
+      #   Default algorithms will be used, if no algorithms parameter is provided. The algorithms hash has the following keys: :descriptors (specifies the descriptors to be used for similarity calculations and local QSAR models), :similarity (similarity algorithm and threshold), :feature_selection (feature selection algorithm), :prediction (local QSAR algorithm). Default parameters are used for unspecified keys. 
+      #
       # @return [OpenTox::Model::Lazar]
-      def self.create prediction_feature:nil, training_dataset:nil, algorithms:{}
+      def self.create prediction_feature:nil, training_dataset:, algorithms:{}
         bad_request_error "Please provide a prediction_feature and/or a training_dataset." unless prediction_feature or training_dataset
         prediction_feature = training_dataset.features.first unless prediction_feature
         # TODO: prediction_feature without training_dataset: use all available data
@@ -185,7 +188,7 @@ module OpenTox
         model
       end
 
-      # Predict a substance 
+      # Predict a substance (compound or nanoparticle)
       # @param [OpenTox::Substance]
       # @return [Hash]
       def predict_substance substance
@@ -449,6 +452,7 @@ module OpenTox
       end
 
       # Create and validate a nano-lazar model, import data from eNanoMapper if necessary
+      # nano-lazar methods are described in detail in https://github.com/enanomapper/nano-lazar-paper/blob/master/nano-lazar.pdf
       # @param [OpenTox::Dataset, nil] training_dataset
       # @param [OpenTox::Feature, nil] prediction_feature
       # @param [Hash, nil] algorithms
-- 
cgit v1.2.3


From 04ebe0640ab6e566dfc316f80a020d1e78b10924 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Wed, 11 Jan 2017 09:20:40 +0100
Subject: validation documentation

---
 lib/crossvalidation.rb       | 22 ++++++++++++++++++++++
 lib/model.rb                 | 12 ++++++++++--
 lib/validation-statistics.rb | 19 +++++++++++++++++++
 lib/validation.rb            |  3 +++
 4 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb
index bcb3ccf..75c5db5 100644
--- a/lib/crossvalidation.rb
+++ b/lib/crossvalidation.rb
@@ -1,10 +1,16 @@
 module OpenTox
 
   module Validation
+
+    # Crossvalidation
     class CrossValidation < Validation
       field :validation_ids, type: Array, default: []
       field :folds, type: Integer, default: 10
 
+      # Create a crossvalidation
+      # @param [OpenTox::Model::Lazar]
+      # @param [Fixnum] number of folds
+      # @return [OpenTox::Validation::CrossValidation]
       def self.create model, n=10
         $logger.debug model.algorithms
         klass = ClassificationCrossValidation if model.is_a? Model::LazarClassification
@@ -41,14 +47,20 @@ module OpenTox
         cv
       end
 
+      # Get execution time
+      # @return [Fixnum]
       def time
         finished_at - created_at
       end
 
+      # Get individual validations
+      # @return [Array<OpenTox::Validation>]
       def validations
         validation_ids.collect{|vid| TrainTest.find vid}
       end
 
+      # Get predictions for all compounds
+      # @return [Array<Hash>]
       def predictions
         predictions = {}
         validations.each{|v| predictions.merge!(v.predictions)}
@@ -56,6 +68,7 @@ module OpenTox
       end
     end
 
+    # Crossvalidation of classification models
     class ClassificationCrossValidation < CrossValidation
       include ClassificationStatistics
       field :accept_values, type: Array
@@ -68,6 +81,7 @@ module OpenTox
       field :probability_plot_id, type: BSON::ObjectId
     end
 
+    # Crossvalidation of regression models
     class RegressionCrossValidation < CrossValidation
       include RegressionStatistics
       field :rmse, type: Float, default:0
@@ -78,10 +92,16 @@ module OpenTox
       field :correlation_plot_id, type: BSON::ObjectId
     end
 
+    # Independent repeated crossvalidations
     class RepeatedCrossValidation < Validation
       field :crossvalidation_ids, type: Array, default: []
       field :correlation_plot_id, type: BSON::ObjectId
 
+      # Create repeated crossvalidations
+      # @param [OpenTox::Model::Lazar]
+      # @param [Fixnum] number of folds
+      # @param [Fixnum] number of repeats
+      # @return [OpenTox::Validation::RepeatedCrossValidation]
       def self.create model, folds=10, repeats=3
         repeated_cross_validation = self.new
         repeats.times do |n|
@@ -92,6 +112,8 @@ module OpenTox
         repeated_cross_validation
       end
 
+      # Get crossvalidations
+      # @return [OpenTox::Validation::CrossValidation]
       def crossvalidations
         crossvalidation_ids.collect{|id| CrossValidation.find(id)}
       end
diff --git a/lib/model.rb b/lib/model.rb
index 64edb76..b18610d 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -320,7 +320,9 @@ module OpenTox
 
       end
 
-      def save # store independent_variables in GridFS to avoid Mongo database size limit problems
+      # Save the model
+      #   Stores independent_variables in GridFS to avoid Mongo database size limit problems
+      def save
         file = Mongo::Grid::File.new(Marshal.dump(@independent_variables), :filename => "#{id}.independent_variables")
         self.independent_variables_id = $gridfs.insert_one(file)
         super
@@ -357,6 +359,8 @@ module OpenTox
         substance_ids.collect{|id| Substance.find(id)}
       end
 
+      # Are fingerprints used as descriptors
+      # @return [TrueClass, FalseClass]
       def fingerprints?
         algorithms[:descriptors][:method] == "fingerprint" ? true : false
       end
@@ -428,10 +432,14 @@ module OpenTox
         repeated_crossvalidation.crossvalidations
       end
 
+      # Is it a regression model
+      # @return [TrueClass, FalseClass]
       def regression?
         model.is_a? LazarRegression
       end
 
+      # Is it a classification model
+      # @return [TrueClass, FalseClass]
       def classification?
         model.is_a? LazarClassification
       end
@@ -452,7 +460,7 @@ module OpenTox
       end
 
       # Create and validate a nano-lazar model, import data from eNanoMapper if necessary
-      # nano-lazar methods are described in detail in https://github.com/enanomapper/nano-lazar-paper/blob/master/nano-lazar.pdf
+      #   nano-lazar methods are described in detail in https://github.com/enanomapper/nano-lazar-paper/blob/master/nano-lazar.pdf
       # @param [OpenTox::Dataset, nil] training_dataset
       # @param [OpenTox::Feature, nil] prediction_feature
       # @param [Hash, nil] algorithms
diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index 2202b79..553e6ac 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -1,7 +1,10 @@
 module OpenTox
   module Validation
+    # Statistical evaluation of classification validations
     module ClassificationStatistics
 
+      # Get statistics
+      # @return [Hash]
       def statistics 
         self.accept_values = model.prediction_feature.accept_values
         self.confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
@@ -63,6 +66,9 @@ module OpenTox
         }
       end
 
+      # Plot accuracy vs prediction probability
+      # @param [String,nil] format
+      # @return [Blob]
       def probability_plot format: "pdf"
         #unless probability_plot_id
 
@@ -99,8 +105,11 @@ module OpenTox
       end
     end
 
+    # Statistical evaluation of regression validations
     module RegressionStatistics
 
+      # Get statistics
+      # @return [Hash]
       def statistics
         self.rmse = 0
         self.mae = 0
@@ -147,10 +156,15 @@ module OpenTox
         }
       end
 
+      # Get percentage of measurements within the prediction interval
+      # @return [Float]
       def percent_within_prediction_interval
         100*within_prediction_interval.to_f/(within_prediction_interval+out_of_prediction_interval)
       end
 
+      # Plot predicted vs measured values
+      # @param [String,nil] format
+      # @return [Blob]
       def correlation_plot format: "png"
         unless correlation_plot_id
           tmpfile = "/tmp/#{id.to_s}_correlation.#{format}"
@@ -177,6 +191,11 @@ module OpenTox
         $gridfs.find_one(_id: correlation_plot_id).data
       end
 
+      # Get predictions with the largest difference between predicted and measured values
+      # @params [Fixnum] number of predictions
+      # @params [TrueClass,FalseClass,nil] include neighbors
+      # @params [TrueClass,FalseClass,nil] show common descriptors
+      # @return [Hash]
       def worst_predictions n: 5, show_neigbors: true, show_common_descriptors: false
         worst_predictions = predictions.sort_by{|sid,p| -(p["value"] - p["measurements"].median).abs}[0,n]
         worst_predictions.collect do |p|
diff --git a/lib/validation.rb b/lib/validation.rb
index ced9596..c9954b6 100644
--- a/lib/validation.rb
+++ b/lib/validation.rb
@@ -2,6 +2,7 @@ module OpenTox
 
   module Validation
 
+    # Base validation class
     class Validation
       include OpenTox
       include Mongoid::Document
@@ -14,6 +15,8 @@ module OpenTox
       field :predictions, type: Hash, default: {}
       field :finished_at, type: Time 
 
+      # Get model
+      # @return [OpenTox::Model::Lazar]
       def model
         Model::Lazar.find model_id
       end
-- 
cgit v1.2.3


From fa4abf3dfc04abcd8bf21f29f73555a5090bc6eb Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Wed, 11 Jan 2017 09:25:33 +0100
Subject: LOO documentation

---
 lib/leave-one-out-validation.rb | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb
index 538b7b3..8d22018 100644
--- a/lib/leave-one-out-validation.rb
+++ b/lib/leave-one-out-validation.rb
@@ -2,8 +2,12 @@ module OpenTox
 
   module Validation
 
+    # Leave one out validation
     class LeaveOneOut < Validation
 
+      # Create a leave one out validation
+      # @param [OpenTox::Model::Lazar]
+      # @return [OpenTox::Validation::LeaveOneOut]
       def self.create model
         bad_request_error "Cannot create leave one out validation for models with supervised feature selection. Please use crossvalidation instead." if model.algorithms[:feature_selection]
         $logger.debug "#{model.name}: LOO validation started"
@@ -32,6 +36,7 @@ module OpenTox
 
     end
 
+    # Leave one out validation for classification models
     class ClassificationLeaveOneOut < LeaveOneOut
       include ClassificationStatistics
       field :accept_values, type: Array
@@ -44,6 +49,7 @@ module OpenTox
       field :confidence_plot_id, type: BSON::ObjectId
     end
     
+    # Leave one out validation for regression models
     class RegressionLeaveOneOut  < LeaveOneOut
       include RegressionStatistics
       field :rmse, type: Float, default: 0
-- 
cgit v1.2.3


From f522a1089af8775798450b3f9f0aa4b579a3e1b5 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Wed, 11 Jan 2017 09:57:36 +0100
Subject: training test set validation documentation

---
 lib/train-test-validation.rb | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/lib/train-test-validation.rb b/lib/train-test-validation.rb
index 71abad2..034ae3a 100644
--- a/lib/train-test-validation.rb
+++ b/lib/train-test-validation.rb
@@ -2,11 +2,17 @@ module OpenTox
 
   module Validation
 
+    # Training test set validation
     class TrainTest < Validation
 
       field :training_dataset_id, type: BSON::ObjectId
       field :test_dataset_id, type: BSON::ObjectId
 
+      # Create a training test set validation
+      # @param [OpenTox::Model::Lazar]
+      # @param [OpenTox::Dataset] training dataset
+      # @param [OpenTox::Dataset] test dataset
+      # @return [OpenTox::Validation::TrainTest]
       def self.create model, training_set, test_set
         
         validation_model = model.class.create prediction_feature: model.prediction_feature, training_dataset: training_set, algorithms: model.algorithms
@@ -32,16 +38,21 @@ module OpenTox
         validation
       end
 
+      # Get test dataset
+      # @return [OpenTox::Dataset]
       def test_dataset
         Dataset.find test_dataset_id
       end
 
+      # Get training dataset
+      # @return [OpenTox::Dataset]
       def training_dataset
         Dataset.find training_dataset_id
       end
 
     end
 
+    # Training test set validation for classification models
     class ClassificationTrainTest < TrainTest
       include ClassificationStatistics
       field :accept_values, type: Array
@@ -54,6 +65,7 @@ module OpenTox
       field :probability_plot_id, type: BSON::ObjectId
     end
 
+    # Training test set validation for regression models
     class RegressionTrainTest < TrainTest
       include RegressionStatistics
       field :rmse, type: Float, default:0
-- 
cgit v1.2.3


From 85553b339acf3f9285a1c03b2fff342d9ddb9b6b Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Wed, 11 Jan 2017 16:00:07 +0100
Subject: documentation for all classes

---
 lib/algorithm.rb           |  1 +
 lib/caret.rb               | 13 +++++-
 lib/classification.rb      |  7 +++-
 lib/compound.rb            | 53 +++++++++++++++----------
 lib/dataset.rb             | 33 ++++++++++++++--
 lib/experiment.rb          | 99 ----------------------------------------------
 lib/feature.rb             |  7 ++++
 lib/feature_selection.rb   |  5 ++-
 lib/import.rb              |  4 +-
 lib/nanoparticle.rb        | 19 +++++++++
 lib/overwrite.rb           | 57 ++++++++++++++++++++------
 lib/physchem.rb            | 28 ++++++++++---
 lib/regression.rb          |  5 +++
 lib/rest-client-wrapper.rb |  1 +
 lib/similarity.rb          | 24 ++++++++++-
 lib/substance.rb           |  1 +
 16 files changed, 211 insertions(+), 146 deletions(-)
 delete mode 100644 lib/experiment.rb

diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 0e4b93a..f70ac1a 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -2,6 +2,7 @@ module OpenTox
 
   module Algorithm 
 
+    # Execute an algorithm with parameters
     def self.run algorithm, parameters=nil
       klass,method = algorithm.split('.')
       Object.const_get(klass).send(method,parameters) 
diff --git a/lib/caret.rb b/lib/caret.rb
index 7e4f771..f5c2bde 100644
--- a/lib/caret.rb
+++ b/lib/caret.rb
@@ -1,9 +1,17 @@
 module OpenTox
   module Algorithm
     
+    # Ruby interface for the R caret package
+    # Caret model list: https://topepo.github.io/caret/modelList.html
     class Caret
-      # model list: https://topepo.github.io/caret/modelList.html
 
+      # Create a local R caret model and make a prediction
+      # @param [Array<Float,Bool>] dependent_variables
+      # @param [Array<Array<Float,Bool>>] independent_variables
+      # @param [Array<Float>] weights
+      # @param [String] Caret method
+      # @param [Array<Float,Bool>] query_variables
+      # @return [Hash]
       def self.create_model_and_predict dependent_variables:, independent_variables:, weights:, method:, query_variables:
         remove = []
         # remove independent_variables with single values
@@ -77,12 +85,13 @@ module OpenTox
 
       end
 
-      # call caret methods dynamically, e.g. Caret.pls
+      # Call caret methods dynamically, e.g. Caret.pls
       def self.method_missing(sym, *args, &block)
         args.first[:method] = sym.to_s
         self.create_model_and_predict args.first
       end
 
+      # Convert Ruby values to R values
       def self.to_r v
         return "F" if v == false
         return "T" if v == true
diff --git a/lib/classification.rb b/lib/classification.rb
index e8c179f..638492b 100644
--- a/lib/classification.rb
+++ b/lib/classification.rb
@@ -1,9 +1,14 @@
 module OpenTox
   module Algorithm
     
+    # Classification algorithms
     class Classification
 
-      def self.weighted_majority_vote dependent_variables:, independent_variables:nil, weights:, query_variables:
+      # Weighted majority vote
+      # @param [Array<TrueClass,FalseClass>] dependent_variables
+      # @param [Array<Float>] weights
+      # @return [Hash]
+      def self.weighted_majority_vote dependent_variables:, independent_variables:nil, weights:, query_variables:nil
         class_weights = {}
         dependent_variables.each_with_index do |v,i|
           class_weights[v] ||= []
diff --git a/lib/compound.rb b/lib/compound.rb
index 1c308d8..bfe69e3 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -2,6 +2,7 @@ CACTUS_URI="https://cactus.nci.nih.gov/chemical/structure/"
 
 module OpenTox
 
+  # Small molecules with defined chemical structures
   class Compound < Substance
     require_relative "unique_descriptors.rb"
     DEFAULT_FINGERPRINT = "MP2D"
@@ -28,6 +29,9 @@ module OpenTox
       compound
     end
 
+    # Create chemical fingerprint
+    # @param [String] fingerprint type
+    # @return [Array<String>] 
     def fingerprint type=DEFAULT_FINGERPRINT
       unless fingerprints[type]
         return [] unless self.smiles
@@ -75,6 +79,9 @@ module OpenTox
       fingerprints[type]
     end
 
+    # Calculate physchem properties
+    # @param [Array<Hash>] list of descriptors
+    # @return [Array<Float>]
     def calculate_properties descriptors=PhysChem::OPENBABEL
       calculated_ids = properties.keys
       # BSON::ObjectId instances are not allowed as keys in a BSON document.
@@ -96,6 +103,10 @@ module OpenTox
       descriptors.collect{|d| properties[d.id.to_s]}
     end
 
+    # Match a SMARTS substructure
+    # @param [String] smarts
+    # @param [TrueClass,FalseClass] count matches or return true/false
+    # @return [TrueClass,FalseClass,Fixnum] 
     def smarts_match smarts, count=false
       obconversion = OpenBabel::OBConversion.new
       obmol = OpenBabel::OBMol.new
@@ -116,8 +127,8 @@ module OpenTox
     # Create a compound from smiles string
     # @example
     #   compound = OpenTox::Compound.from_smiles("c1ccccc1")
-    # @param [String] smiles Smiles string
-    # @return [OpenTox::Compound] Compound
+    # @param [String] smiles 
+    # @return [OpenTox::Compound]
     def self.from_smiles smiles
       if smiles.match(/\s/) # spaces seem to confuse obconversion and may lead to invalid smiles
         $logger.warn "SMILES parsing failed for '#{smiles}'', SMILES string contains whitespaces."
@@ -132,9 +143,9 @@ module OpenTox
       end
     end
 
-    # Create a compound from inchi string
-    # @param inchi [String] smiles InChI string
-    # @return [OpenTox::Compound] Compound
+    # Create a compound from InChI string
+    # @param [String] InChI 
+    # @return [OpenTox::Compound] 
     def self.from_inchi inchi
       #smiles = `echo "#{inchi}" | "#{File.join(File.dirname(__FILE__),"..","openbabel","bin","babel")}" -iinchi - -ocan`.chomp.strip
       smiles = obconversion(inchi,"inchi","can")
@@ -145,9 +156,9 @@ module OpenTox
       end
     end
 
-    # Create a compound from sdf string
-    # @param sdf [String] smiles SDF string
-    # @return [OpenTox::Compound] Compound
+    # Create a compound from SDF 
+    # @param [String] SDF 
+    # @return [OpenTox::Compound] 
     def self.from_sdf sdf
       # do not store sdf because it might be 2D
       Compound.from_smiles obconversion(sdf,"sdf","can")
@@ -156,40 +167,38 @@ module OpenTox
     # Create a compound from name. Relies on an external service for name lookups.
     # @example
     #   compound = OpenTox::Compound.from_name("Benzene")
-    # @param name [String] can be also an InChI/InChiKey, CAS number, etc
-    # @return [OpenTox::Compound] Compound
+    # @param [String] name, can be also an InChI/InChiKey, CAS number, etc
+    # @return [OpenTox::Compound]
     def self.from_name name
       Compound.from_smiles RestClientWrapper.get(File.join(CACTUS_URI,URI.escape(name),"smiles"))
     end
 
     # Get InChI
-    # @return [String] InChI string
+    # @return [String] 
     def inchi
       unless self["inchi"]
-
         result = obconversion(smiles,"smi","inchi")
-        #result = `echo "#{self.smiles}" | "#{File.join(File.dirname(__FILE__),"..","openbabel","bin","babel")}" -ismi - -oinchi`.chomp
         update(:inchi => result.chomp) if result and !result.empty?
       end
       self["inchi"]
     end
 
     # Get InChIKey
-    # @return [String] InChIKey string
+    # @return [String]
     def inchikey
       update(:inchikey => obconversion(smiles,"smi","inchikey")) unless self["inchikey"]
       self["inchikey"]
     end
 
     # Get (canonical) smiles
-    # @return [String] Smiles string
+    # @return [String]
     def smiles
       update(:smiles => obconversion(self["smiles"],"smi","can")) unless self["smiles"] 
       self["smiles"]
     end
 
-    # Get sdf
-    # @return [String] SDF string
+    # Get SDF
+    # @return [String]
     def sdf
       if self.sdf_id.nil? 
         sdf = obconversion(smiles,"smi","sdf")
@@ -227,20 +236,22 @@ module OpenTox
     # Get all known compound names. Relies on an external service for name lookups.
     # @example
     #   names = compound.names
-    # @return [String] Compound names
+    # @return [Array<String>] 
     def names
       update(:names => RestClientWrapper.get("#{CACTUS_URI}#{inchi}/names").split("\n")) unless self["names"] 
       self["names"]
     end
 
-    # @return [String] PubChem Compound Identifier (CID), derieved via restcall to pubchem
+    # Get PubChem Compound Identifier (CID), obtained via REST call to PubChem
+    # @return [String] 
     def cid
       pug_uri = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/"
       update(:cid => RestClientWrapper.post(File.join(pug_uri, "compound", "inchi", "cids", "TXT"),{:inchi => inchi}).strip) unless self["cid"] 
       self["cid"]
     end
 
-    # @return [String] ChEMBL database compound id, derieved via restcall to chembl
+    # Get ChEMBL database compound id, obtained via REST call to ChEMBL
+    # @return [String] 
     def chemblid
       # https://www.ebi.ac.uk/chembldb/ws#individualCompoundByInChiKey
       uri = "https://www.ebi.ac.uk/chemblws/compounds/smiles/#{smiles}.json"
@@ -290,7 +301,7 @@ module OpenTox
       mg.to_f/molecular_weight
     end
     
-    # Calculate molecular weight of Compound with OB and store it in object
+    # Calculate molecular weight of Compound with OB and store it in compound object
     # @return [Float] molecular weight
     def molecular_weight
       mw_feature = PhysChem.find_or_create_by(:name => "Openbabel.MW")
diff --git a/lib/dataset.rb b/lib/dataset.rb
index ab55294..44690e1 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -3,32 +3,43 @@ require 'tempfile'
 
 module OpenTox
 
+  # Collection of substances and features
   class Dataset
 
     field :data_entries, type: Hash, default: {}
 
     # Readers
 
+    # Get all compounds
+    # @return [Array<OpenTox::Compound>]
     def compounds
       substances.select{|s| s.is_a? Compound}
     end
 
+    # Get all nanoparticles
+    # @return [Array<OpenTox::Nanoparticle>]
     def nanoparticles
       substances.select{|s| s.is_a? Nanoparticle}
     end
 
     # Get all substances
+    # @return [Array<OpenTox::Substance>]
     def substances
       @substances ||= data_entries.keys.collect{|id| OpenTox::Substance.find id}.uniq
       @substances
     end
 
     # Get all features
+    # @return [Array<OpenTox::Feature>]
     def features
       @features ||= data_entries.collect{|sid,data| data.keys.collect{|id| OpenTox::Feature.find(id)}}.flatten.uniq
       @features
     end
 
+    # Get all values for a given substance and feature
+    # @param [OpenTox::Substance,BSON::ObjectId,String] substance or substance id
+    # @param [OpenTox::Feature,BSON::ObjectId,String] feature or feature id
+    # @return [TrueClass,FalseClass,Float]
     def values substance,feature
       substance = substance.id if substance.is_a? Substance
       feature = feature.id if feature.is_a? Feature
@@ -41,6 +52,10 @@ module OpenTox
 
     # Writers
 
+    # Add a value for a given substance and feature
+    # @param [OpenTox::Substance,BSON::ObjectId,String] substance or substance id
+    # @param [OpenTox::Feature,BSON::ObjectId,String] feature or feature id
+    # @param [TrueClass,FalseClass,Float]
     def add(substance,feature,value)
       substance = substance.id if substance.is_a? Substance
       feature = feature.id if feature.is_a? Feature
@@ -87,7 +102,7 @@ module OpenTox
 
     # Serialisation
     
-    # converts dataset to csv format including compound smiles as first column, other column headers are feature names
+    # Convert dataset to csv format including compound smiles as first column, other column headers are feature names
     # @return [String]
     def to_csv(inchi=false)
       CSV.generate() do |csv| 
@@ -130,6 +145,9 @@ module OpenTox
     #end
     
     # Create a dataset from CSV file
+    # @param [File] 
+    # @param [TrueClass,FalseClass] accept or reject empty values
+    # @return [OpenTox::Dataset]
     def self.from_csv_file file, accept_empty_values=false
       source = file
       name = File.basename(file,".*")
@@ -145,8 +163,10 @@ module OpenTox
       dataset
     end
 
-    # parse data in tabular format (e.g. from csv)
-    # does a lot of guesswork in order to determine feature types
+    # Parse data in tabular format (e.g. from csv)
+    #   does a lot of guesswork in order to determine feature types
+    # @param [Array<Array>] 
+    # @param [TrueClass,FalseClass] accept or reject empty values
     def parse_table table, accept_empty_values
 
       # features
@@ -225,6 +245,7 @@ module OpenTox
       save
     end
 
+    # Delete dataset
     def delete
       compounds.each{|c| c.dataset_ids.delete id.to_s}
       super
@@ -238,14 +259,20 @@ module OpenTox
     field :prediction_feature_id, type: BSON::ObjectId
     field :predictions, type: Hash, default: {}
 
+    # Get prediction feature
+    # @return [OpenTox::Feature]
     def prediction_feature
       Feature.find prediction_feature_id
     end
 
+    # Get all compounds
+    # @return [Array<OpenTox::Compound>]
     def compounds
       substances.select{|s| s.is_a? Compound}
     end
 
+    # Get all substances
+    # @return [Array<OpenTox::Substance>]
     def substances
       predictions.keys.collect{|id| Substance.find id}
     end
diff --git a/lib/experiment.rb b/lib/experiment.rb
deleted file mode 100644
index 0dfdf86..0000000
--- a/lib/experiment.rb
+++ /dev/null
@@ -1,99 +0,0 @@
-module OpenTox
-
-  class Experiment
-    field :dataset_ids, type: Array
-    field :model_settings, type: Array, default: []
-    field :results, type: Hash, default: {}
-
-    def run 
-      dataset_ids.each do |dataset_id|
-        dataset = Dataset.find(dataset_id)
-        results[dataset_id.to_s] = []
-        model_settings.each do |setting|
-          setting = setting.dup
-          model_algorithm = setting.delete :model_algorithm #if setting[:model_algorithm]
-          model = Object.const_get(model_algorithm).create dataset, setting
-          $logger.debug model
-          model.save
-          repeated_crossvalidation = RepeatedCrossValidation.create model
-          results[dataset_id.to_s] << {:model_id => model.id, :repeated_crossvalidation_id => repeated_crossvalidation.id}
-        end
-      end
-      save
-    end
-
-    def report
-      # statistical significances http://www.r-bloggers.com/anova-and-tukeys-test-on-r/
-      report = {}
-      report[:name] = name
-      report[:experiment_id] = self.id.to_s
-      report[:results] = {}
-      parameters = []
-      dataset_ids.each do |dataset_id|
-        dataset_name = Dataset.find(dataset_id).name
-        report[:results][dataset_name] = {}
-        report[:results][dataset_name][:anova] = {}
-        report[:results][dataset_name][:data] = []
-        # TODO results[dataset_id.to_s] does not exist
-        results[dataset_id.to_s].each do |result|
-          model = Model::Lazar.find(result[:model_id])
-          repeated_cv = RepeatedCrossValidation.find(result[:repeated_crossvalidation_id])
-          crossvalidations = repeated_cv.crossvalidations
-          if crossvalidations.first.is_a? ClassificationCrossValidation
-            parameters = [:accuracy,:true_rate,:predictivity]
-          elsif crossvalidations.first.is_a? RegressionCrossValidation
-            parameters = [:rmse,:mae,:r_squared]
-          end
-          summary = {}
-          [:neighbor_algorithm, :neighbor_algorithm_parameters, :prediction_algorithm].each do |key|
-            summary[key] = model[key]
-          end
-          summary[:nr_instances] = crossvalidations.first.nr_instances
-          summary[:nr_unpredicted] = crossvalidations.collect{|cv| cv.nr_unpredicted}
-          summary[:time] = crossvalidations.collect{|cv| cv.time}
-          parameters.each do |param|
-            summary[param] = crossvalidations.collect{|cv| cv.send(param)}
-          end
-          report[:results][dataset_name][:data] << summary
-        end
-      end
-      report[:results].each do |dataset,results|
-        ([:time,:nr_unpredicted]+parameters).each do |param|
-          experiments = []
-          outcome = []
-          results[:data].each_with_index do |result,i|
-            result[param].each do |p|
-              experiments << i
-              p = nil if p.kind_of? Float and p.infinite? # TODO fix @ division by 0
-              outcome << p
-            end
-          end
-          begin
-          R.assign "experiment_nr",experiments.collect{|i| "Experiment #{i}"}
-          R.eval "experiment_nr = factor(experiment_nr)"
-          R.assign "outcome", outcome
-          R.eval "data = data.frame(experiment_nr,outcome)"
-          # one-way ANOVA
-          R.eval "fit = aov(outcome ~ experiment_nr, data=data,na.action='na.omit')"
-          # http://stackoverflow.com/questions/3366506/extract-p-value-from-aov
-          p_value = R.eval("summary(fit)[[1]][['Pr(>F)']][[1]]").to_ruby
-          # aequivalent
-          # sum = R.eval("summary(fit)")
-          #p_value = sum.to_ruby.first.last.first
-          rescue 
-            p_value = nil
-          end
-          report[:results][dataset][:anova][param] = p_value
-=begin
-=end
-        end
-      end
-      report
-    end
-
-    def summary
-      report[:results].collect{|dataset,data| {dataset => data[:anova].select{|param,p_val| p_val < 0.1}}}
-    end
-  end
-
-end
diff --git a/lib/feature.rb b/lib/feature.rb
index 0ca4d41..f811aef 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -8,10 +8,14 @@ module OpenTox
     field :unit, type: String
     field :conditions, type: Hash
 
+    # Is it a nominal feature
+    # @return [TrueClass,FalseClass]
     def nominal?
       self.class == NominalFeature
     end
 
+    # Is it a numeric feature
+    # @return [TrueClass,FalseClass]
     def numeric?
       self.class == NumericFeature
     end
@@ -30,6 +34,9 @@ module OpenTox
   class Smarts < NominalFeature
     field :smarts, type: String 
     index "smarts" => 1
+    # Create feature from SMARTS string
+    # @param [String]
+    # @return [OpenTox::Feature]
     def self.from_smarts smarts
       self.find_or_create_by :smarts => smarts
     end
diff --git a/lib/feature_selection.rb b/lib/feature_selection.rb
index 65f9752..c596b1f 100644
--- a/lib/feature_selection.rb
+++ b/lib/feature_selection.rb
@@ -1,13 +1,16 @@
 module OpenTox
   module Algorithm
     
+    # Feature selection algorithms
     class FeatureSelection
 
+      # Select features correlated to the models prediction feature
+      # @param [OpenTox::Model::Lazar]
       def self.correlation_filter model
         relevant_features = {}
         R.assign "dependent", model.dependent_variables.collect{|v| to_r(v)}
         model.descriptor_weights = []
-        selected_variables = []
+        selected_variables = [] 
         selected_descriptor_ids = []
         model.independent_variables.each_with_index do |v,i|
           v.collect!{|n| to_r(n)}
diff --git a/lib/import.rb b/lib/import.rb
index 7a68335..fd00fbe 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -1,12 +1,14 @@
 module OpenTox
 
+  # Import data from external databases
   module Import
 
     class Enanomapper
       include OpenTox
 
-      # time critical step: JSON parsing (>99%), Oj brings only minor speed gains (~1%)
+      # Import from eNanoMapper
       def self.import
+        # time critical step: JSON parsing (>99%), Oj brings only minor speed gains (~1%)
         datasets = {}
         bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
         bundles.each do |bundle|
diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 06db4d2..73d5f8b 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -1,25 +1,36 @@
 module OpenTox
 
+  # Nanoparticles
   class Nanoparticle < Substance
     include OpenTox
 
     field :core_id, type: String, default: nil
     field :coating_ids, type: Array, default: []
 
+    # Get core compound
+    # @return [OpenTox::Compound]
     def core
       Compound.find core_id
     end
 
+    # Get coatings
+    # @return [Array<OpenTox::Compound>]
     def coating
       coating_ids.collect{|i| Compound.find i }
     end
 
+    # Get nanoparticle fingerprint (union of core and coating fingerprints)
+    # @param [String] fingerprint type
+    # @return [Array<String>] 
     def fingerprint type=DEFAULT_FINGERPRINT
       core_fp = core.fingerprint type
       coating_fp = coating.collect{|c| c.fingerprint type}.flatten.uniq.compact
       (core_fp.empty? or coating_fp.empty?) ? [] : (core_fp+coating_fp).uniq.compact
     end
 
+    # Calculate physchem properties
+    # @param [Array<Hash>] list of descriptors
+    # @return [Array<Float>]
     def calculate_properties descriptors=PhysChem::OPENBABEL
       if core.smiles and !coating.collect{|c| c.smiles}.compact.empty?
         core_prop = core.calculate_properties descriptors
@@ -28,6 +39,10 @@ module OpenTox
       end
     end
 
+    # Add (measured) feature values
+    # @param [OpenTox::Feature]
+    # @param [TrueClass,FalseClass,Float] 
+    # @param [OpenTox::Dataset]
     def add_feature feature, value, dataset
       unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand
         case feature.category
@@ -55,6 +70,10 @@ module OpenTox
       end
     end
 
+    # Parse values from Ambit database
+    # @param [OpenTox::Feature]
+    # @param [TrueClass,FalseClass,Float]
+    # @param [OpenTox::Dataset]
     def parse_ambit_value feature, v, dataset
       # TODO add study id to warnings
       v.delete "unit"
diff --git a/lib/overwrite.rb b/lib/overwrite.rb
index 31d30c9..91bc9e1 100644
--- a/lib/overwrite.rb
+++ b/lib/overwrite.rb
@@ -2,41 +2,51 @@ require "base64"
 class Object
   # An object is blank if it's false, empty, or a whitespace string.
   # For example, "", "   ", +nil+, [], and {} are all blank.
+  # @return [TrueClass,FalseClass]
   def blank?
     respond_to?(:empty?) ? empty? : !self
   end
 
+  # Is it a numeric object
+  # @return [TrueClass,FalseClass]
   def numeric?
     true if Float(self) rescue false
   end
 
   # Returns dimension of nested arrays
+  # @return [Fixnum]
   def dimension
     self.class == Array ? 1 + self[0].dimension : 0
   end
 end
 
 class Numeric
+  # Convert number to percent
+  # @return [Float]
   def percent_of(n)
     self.to_f / n.to_f * 100.0
   end
 end
 
 class Float
-  # round to n significant digits
-  # http://stackoverflow.com/questions/8382619/how-to-round-a-float-to-a-specified-number-of-significant-digits-in-ruby
+  # Round to n significant digits
+  #   http://stackoverflow.com/questions/8382619/how-to-round-a-float-to-a-specified-number-of-significant-digits-in-ruby
+  # @param [Fixnum]
+  # @return [Float]
   def signif(n)
     Float("%.#{n}g" % self)
   end
 
-  # converts -10 logarithmized values back
+  # Convert -10 log values to original values
+  # @return [Float]
   def delog10
     10**(-1*self)
   end
 end
 
 module Enumerable
-  # @return [Array] only the duplicates of an enumerable
+  # Get duplicates
+  # @return [Array] 
   def duplicates
     inject({}) {|h,v| h[v]=h[v].to_i+1; h}.reject{|k,v| v==1}.keys
   end
@@ -51,7 +61,10 @@ module Enumerable
 end
 
 class String
-  # @return [String] converts camel-case to underscore-case (OpenTox::SuperModel -> open_tox/super_model)
+  # Convert camel-case to underscore-case
+  # @example 
+  #   OpenTox::SuperModel -> open_tox/super_model
+  # @return [String]
   def underscore
     self.gsub(/::/, '/').
     gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
@@ -60,7 +73,7 @@ class String
     downcase
   end
 
-  # convert strings to boolean values
+  # Convert strings to boolean values
   # @return [TrueClass,FalseClass] true or false
   def to_boolean
     return true if self == true || self =~ (/(true|t|yes|y|1)$/i)
@@ -71,7 +84,8 @@ class String
 end
 
 class File
-  # @return [String] mime_type including charset using linux cmd command
+  # Get mime_type including charset using linux file command
+  # @return [String]
   def mime_type
     `file -ib '#{self.path}'`.chomp
   end
@@ -79,7 +93,7 @@ end
 
 class Array
 
-  # Sum up the size of single arrays in an array of arrays
+  # Sum the size of single arrays in an array of arrays
   # @param [Array] Array of arrays
   # @return [Integer] Sum of size of array elements
   def sum_size
@@ -92,33 +106,43 @@ class Array
     }
   end
 
-  # For symbolic features
+  # Check if the array has just one unique value.
   # @param [Array] Array to test.
-  # @return [Boolean] Whether the array has just one unique value.
+  # @return [TrueClass,FalseClass] 
   def zero_variance?
     return self.uniq.size == 1
   end
 
+  # Get the median of an array
+  # @return [Numeric]
   def median
     sorted = self.sort
     len = sorted.length
     (sorted[(len - 1) / 2] + sorted[len / 2]) / 2.0
   end
 
+  # Get the mean of an array
+  # @return [Numeric]
   def mean
     self.compact.inject{ |sum, el| sum + el }.to_f / self.compact.size
   end
 
+  # Get the variance of an array
+  # @return [Numeric]
   def sample_variance
     m = self.mean
     sum = self.compact.inject(0){|accum, i| accum +(i-m)**2 }
     sum/(self.compact.length - 1).to_f
   end
 
+  # Get the standard deviation of an array
+  # @return [Numeric]
   def standard_deviation
     Math.sqrt(self.sample_variance)
   end
 
+  # Convert array values for R
+  # @return [Array]
   def for_R
     if self.first.is_a?(String) 
       #"\"#{self.collect{|v| v.sub('[','').sub(']','')}.join(" ")}\"" # quote and remove square brackets
@@ -128,6 +152,8 @@ class Array
     end
   end
 
+  # Collect array with index
+  #   in analogy to each_with_index
   def collect_with_index
     result = []
     self.each_with_index do |elt, idx|
@@ -139,11 +165,15 @@ end
 
 module URI
 
+  # Is it a https connection
+  # @param [String]
+  # @return [TrueClass,FalseClass]
   def self.ssl? uri
     URI.parse(uri).instance_of? URI::HTTPS
   end
 
-  # @return [Boolean] checks if resource exists by making a HEAD-request
+  # Check if a http resource exists by making a HEAD-request
+  # @return [TrueClass,FalseClass]
   def self.accessible?(uri)
     parsed_uri = URI.parse(uri + (OpenTox::RestClientWrapper.subjectid ? "?subjectid=#{CGI.escape OpenTox::RestClientWrapper.subjectid}" : ""))
     http_code = URI.task?(uri) ? 600 : 400
@@ -163,6 +193,9 @@ module URI
     false
   end
 
+  # Is the URI valid
+  # @param [String]
+  # @return [TrueClass,FalseClass]
   def self.valid? uri
     u = URI.parse(uri)
     u.scheme!=nil and u.host!=nil
@@ -170,6 +203,8 @@ module URI
     false
   end
 
+  # Is the URI a task URI
+  # @param [String]
   def self.task? uri
     uri =~ /task/ and URI.valid? uri
   end
diff --git a/lib/physchem.rb b/lib/physchem.rb
index 327acd8..07df867 100644
--- a/lib/physchem.rb
+++ b/lib/physchem.rb
@@ -39,6 +39,9 @@ module OpenTox
 
     require_relative "unique_descriptors.rb"
 
+    # Get descriptor features
+    # @param [Hash]
+    # @return [Array<OpenTox::PhysChem>]
     def self.descriptors desc=DESCRIPTORS
       desc.collect do |name,description|
         lib,desc = name.split('.',2)
@@ -46,6 +49,8 @@ module OpenTox
       end
     end
 
+    # Get unique descriptor features
+    # @return [Array<OpenTox::PhysChem>]
     def self.unique_descriptors
       udesc = []
       UNIQUEDESCRIPTORS.each do |name|
@@ -64,23 +69,28 @@ module OpenTox
       udesc
     end
 
+    # Get OpenBabel descriptor features
+    # @return [Array<OpenTox::PhysChem>]
     def self.openbabel_descriptors
       descriptors OPENBABEL
     end
 
+    # Get CDK descriptor features
+    # @return [Array<OpenTox::PhysChem>]
     def self.cdk_descriptors
       descriptors CDK
     end
 
+    # Get JOELIB descriptor features
+    # @return [Array<OpenTox::PhysChem>]
     def self.joelib_descriptors
       descriptors JOELIB
     end
 
-    def calculate compound
-      result = send library.downcase,descriptor,compound
-      result[self.name]
-    end
-
+    # Calculate OpenBabel descriptors
+    # @param [String] descriptor type
+    # @param [OpenTox::Compound]
+    # @return [Hash]
     def openbabel descriptor, compound
       obdescriptor = OpenBabel::OBDescriptor.find_type descriptor
       obmol = OpenBabel::OBMol.new
@@ -90,10 +100,18 @@ module OpenTox
       {"#{library.capitalize}.#{descriptor}" => fix_value(obdescriptor.predict(obmol))}
     end
 
+    # Calculate CDK descriptors
+    # @param [String] descriptor type
+    # @param [OpenTox::Compound]
+    # @return [Hash]
     def cdk descriptor, compound
       java_descriptor "cdk", descriptor, compound
     end
 
+    # Calculate JOELIB descriptors
+    # @param [String] descriptor type
+    # @param [OpenTox::Compound]
+    # @return [Hash]
     def joelib descriptor, compound
       java_descriptor "joelib", descriptor, compound
     end
diff --git a/lib/regression.rb b/lib/regression.rb
index 3890987..fd2855f 100644
--- a/lib/regression.rb
+++ b/lib/regression.rb
@@ -1,8 +1,13 @@
 module OpenTox
   module Algorithm
     
+    # Regression algorithms
     class Regression
 
+      # Weighted average
+      # @param [Array<TrueClass,FalseClass>] dependent_variables
+      # @param [Array<Float>] weights
+      # @return [Hash]
       def self.weighted_average dependent_variables:, independent_variables:nil, weights:, query_variables:nil
         # TODO: prediction_interval
         weighted_sum = 0.0
diff --git a/lib/rest-client-wrapper.rb b/lib/rest-client-wrapper.rb
index 2073be2..f76a296 100644
--- a/lib/rest-client-wrapper.rb
+++ b/lib/rest-client-wrapper.rb
@@ -1,5 +1,6 @@
 module OpenTox
   
+  # Adjustments to the rest-client gem for OpenTox
   class RestClientWrapper
     
     attr_accessor :request, :response
diff --git a/lib/similarity.rb b/lib/similarity.rb
index 0901936..ccbc9d6 100644
--- a/lib/similarity.rb
+++ b/lib/similarity.rb
@@ -2,6 +2,10 @@ module OpenTox
   module Algorithm
 
     class Vector
+      # Get dot product 
+      # @param [Vector]
+      # @param [Vector]
+      # @return [Numeric]
       def self.dot_product(a, b)
         products = a.zip(b).map{|a, b| a * b}
         products.inject(0) {|s,p| s + p}
@@ -15,6 +19,9 @@ module OpenTox
 
     class Similarity
 
+      # Get Tanimoto similarity
+      # @param [Array<Array<Float>>]
+      # @return [Float]
       def self.tanimoto fingerprints
         ( fingerprints[0] & fingerprints[1]).size/(fingerprints[0]|fingerprints[1]).size.to_f
       end
@@ -23,18 +30,28 @@ module OpenTox
         #( fingerprints[0] & fingerprints[1]).size/(fingerprints[0]|fingerprints[1]).size.to_f
       #end
 
+      # Get Euclidean distance 
+      # @param [Array<Array<Float>>]
+      # @return [Float]
       def self.euclid scaled_properties
         sq = scaled_properties[0].zip(scaled_properties[1]).map{|a,b| (a - b) ** 2}
         Math.sqrt(sq.inject(0) {|s,c| s + c})
       end
 
-      # http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity
+      # Get cosine similarity
+      #   http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity
+      # @param [Array<Array<Float>>]
+      # @return [Float]
       def self.cosine scaled_properties
         scaled_properties = remove_nils scaled_properties
         Algorithm::Vector.dot_product(scaled_properties[0], scaled_properties[1]) / (Algorithm::Vector.magnitude(scaled_properties[0]) * Algorithm::Vector.magnitude(scaled_properties[1]))
       end
 
-      def self.weighted_cosine scaled_properties # [a,b,weights]
+      # Get weighted cosine similarity
+      #   http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity
+      # @param [Array<Array<Float>>] [a,b,weights]
+      # @return [Float]
+      def self.weighted_cosine scaled_properties 
         a,b,w = remove_nils scaled_properties
         return cosine(scaled_properties) if w.uniq.size == 1
         dot_product = 0
@@ -48,6 +65,9 @@ module OpenTox
         dot_product/(Math.sqrt(magnitude_a)*Math.sqrt(magnitude_b))
       end
 
+      # Remove nil values
+      # @param [Array<Array<Float>>] [a,b,weights]
+      # @return [Array<Array<Float>>] [a,b,weights]
       def self.remove_nils scaled_properties
         a =[]; b = []; w = []
         (0..scaled_properties.first.size-1).each do |i|
diff --git a/lib/substance.rb b/lib/substance.rb
index 31c465e..ef49659 100644
--- a/lib/substance.rb
+++ b/lib/substance.rb
@@ -1,5 +1,6 @@
 module OpenTox
 
+  # Base class for substances (e.g. compunds, nanoparticles)
   class Substance
     field :properties, type: Hash, default: {}
     field :dataset_ids, type: Array, default: []
-- 
cgit v1.2.3


From d7504cc422bbaeee3546589d87e7baeb4e977c0b Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 12 Jan 2017 17:57:03 +0100
Subject: source uris for core and coating

---
 lib/import.rb                         | 2 ++
 test/feature.rb                       | 4 ++--
 test/model-nanoparticle.rb            | 7 +++++++
 test/nanomaterial-model-validation.rb | 5 ++---
 test/validation-regression.rb         | 5 ++---
 5 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/lib/import.rb b/lib/import.rb
index fd00fbe..96e7ad1 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -22,6 +22,7 @@ module OpenTox
               uri = c["component"]["compound"]["URI"]
               uri = CGI.escape File.join(uri,"&media=application/json")
               data = JSON.parse(RestClientWrapper.get "https://data.enanomapper.net/query/compound/url/all?media=application/json&search=#{uri}")
+              source = data["dataEntry"][0]["compound"]["URI"]
               smiles = data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23SMILESDefault"]
               names = []
               names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
@@ -33,6 +34,7 @@ module OpenTox
               else
                 compound = Compound.find_or_create_by(:name => names.first,:names => names.compact)
               end
+              compound.source = source
               compound.save
               if c["relation"] == "HAS_CORE"
                 core_id = compound.id.to_s
diff --git a/test/feature.rb b/test/feature.rb
index 533ac0f..40edb9f 100644
--- a/test/feature.rb
+++ b/test/feature.rb
@@ -55,7 +55,7 @@ class FeatureTest < MiniTest::Test
   end
 
   def test_physchem_description
-    assert_equal 355, PhysChem.descriptors.size
+    assert_equal 346, PhysChem.descriptors.size
     assert_equal 15, PhysChem.openbabel_descriptors.size
     assert_equal 295, PhysChem.cdk_descriptors.size
     assert_equal 45, PhysChem.joelib_descriptors.size
@@ -63,7 +63,7 @@ class FeatureTest < MiniTest::Test
   end
 
   def test_physchem
-    assert_equal 355, PhysChem.descriptors.size
+    assert_equal 346, PhysChem.descriptors.size
     c = Compound.from_smiles "CC(=O)CC(C)C"
     logP = PhysChem.find_or_create_by :name => "Openbabel.logP"
     assert_equal 1.6215, logP.calculate(c)
diff --git a/test/model-nanoparticle.rb b/test/model-nanoparticle.rb
index 8dc6830..67bbfdd 100644
--- a/test/model-nanoparticle.rb
+++ b/test/model-nanoparticle.rb
@@ -8,6 +8,13 @@ class NanoparticleModelTest  < MiniTest::Test
     @prediction_feature = @training_dataset.features.select{|f| f["name"] == 'log2(Net cell association)'}.first
   end
 
+  def test_core_coating_source_uris
+    @training_dataset.nanoparticles.each do |np|
+      refute_nil np.core.source
+      np.coating.each{|c| refute_nil c.source}
+    end
+  end
+
   def test_nanoparticle_model
     assert true, @prediction_feature.measured
     model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature
diff --git a/test/nanomaterial-model-validation.rb b/test/nanomaterial-model-validation.rb
index b91c389..9eaa17d 100644
--- a/test/nanomaterial-model-validation.rb
+++ b/test/nanomaterial-model-validation.rb
@@ -8,7 +8,7 @@ class NanomaterialValidationModelTest < MiniTest::Test
   end
 
   def test_default_nanomaterial_validation_model
-    validation_model = Model::NanoValidation.create
+    validation_model = Model::Validation.from_enanomapper
     [:endpoint,:species,:source].each do |p|
       refute_empty validation_model[p]
     end
@@ -39,7 +39,7 @@ class NanomaterialValidationModelTest < MiniTest::Test
       :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" },
       :feature_selection => nil
     }
-    validation_model = Model::NanoValidation.create algorithms: algorithms
+    validation_model = Model::Validation.from_enanomapper algorithms: algorithms
     assert validation_model.regression?
     refute validation_model.classification?
     validation_model.crossvalidations.each do |cv|
@@ -50,6 +50,5 @@ class NanomaterialValidationModelTest < MiniTest::Test
     assert_includes nanoparticle.dataset_ids, @training_dataset.id
     prediction = validation_model.predict nanoparticle
     refute_nil prediction[:value]
-    assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions."
   end
 end
diff --git a/test/validation-regression.rb b/test/validation-regression.rb
index 7630521..01ed644 100644
--- a/test/validation-regression.rb
+++ b/test/validation-regression.rb
@@ -83,10 +83,9 @@ class ValidationRegressionTest < MiniTest::Test
     model = Model::Lazar.create training_dataset: dataset
     repeated_cv = RepeatedCrossValidation.create model
     repeated_cv.crossvalidations.each do |cv|
-      #assert cv.r_squared > 0.34, "R^2 (#{cv.r_squared}) should be larger than 0.034"
-      #assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split"
+      assert cv.r_squared > 0.34, "R^2 (#{cv.r_squared}) should be larger than 0.034"
+      assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split"
     end
-    File.open("tmp.png","w+"){|f| f.puts repeated_cv.correlation_plot}
   end
 
 end
-- 
cgit v1.2.3