From fbded88db8b51f41ffbd5a02f601e4538ec87258 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 14 Oct 2016 09:55:51 +0200 Subject: git commit added to model metadata --- lib/caret.rb | 9 ++++++++- lib/compound.rb | 2 -- lib/dataset.rb | 1 - lib/model.rb | 11 +++++++++++ lib/rest-client-wrapper.rb | 6 ------ test/descriptor.rb | 3 --- test/model-regression.rb | 3 ++- 7 files changed, 21 insertions(+), 14 deletions(-) diff --git a/lib/caret.rb b/lib/caret.rb index 2c4cd0c..e24c943 100644 --- a/lib/caret.rb +++ b/lib/caret.rb @@ -2,10 +2,17 @@ module OpenTox module Algorithm class Caret - # TODO classification # model list: https://topepo.github.io/caret/modelList.html def self.create_model_and_predict dependent_variables:, independent_variables:, weights:, method:, query_variables: + remove = [] + # remove independent_variables with single values + independent_variables.each_with_index { |values,i| remove << i if values.uniq.size == 1} + remove.sort.reverse.each do |i| + independent_variables.delete_at i + weights.delete_at i + query_variables.delete_at i + end if independent_variables.flatten.uniq == ["NA"] prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights prediction[:warning] = "No variables for regression model. Using weighted average of similar substances." diff --git a/lib/compound.rb b/lib/compound.rb index 6c53cde..e2a55ea 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -76,7 +76,6 @@ module OpenTox end def calculate_properties descriptors=PhysChem::OPENBABEL - # TODO: speedup java descriptors calculated_ids = properties.keys # BSON::ObjectId instances are not allowed as keys in a BSON document. new_ids = descriptors.collect{|d| d.id.to_s} - calculated_ids @@ -95,7 +94,6 @@ module OpenTox end save descriptors.collect{|d| properties[d.id.to_s]} - #properties.select{|id,v| descriptors.collect{|d| d.id.to_s}.include? id} end def smarts_match smarts, count=false diff --git a/lib/dataset.rb b/lib/dataset.rb index 453fc35..ab55294 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -130,7 +130,6 @@ module OpenTox #end # Create a dataset from CSV file - # TODO: document structure def self.from_csv_file file, accept_empty_values=false source = file name = File.basename(file,".*") diff --git a/lib/model.rb b/lib/model.rb index d7b072f..7503215 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -23,6 +23,7 @@ module OpenTox field :descriptor_means, type: Array, default:[] field :descriptor_sds, type: Array, default:[] field :scaled_variables, type: Array, default:[] + field :version, type: Hash, default:{} def self.create prediction_feature:nil, training_dataset:nil, algorithms:{} bad_request_error "Please provide a prediction_feature and/or a training_dataset." unless prediction_feature or training_dataset @@ -35,6 +36,16 @@ module OpenTox model.prediction_feature_id = prediction_feature.id model.training_dataset_id = training_dataset.id model.name = "#{prediction_feature.name} (#{training_dataset.name})" + # TODO: check if this works for gem version, add gem versioning? + dir = File.dirname(__FILE__) + commit = `cd #{dir}; git rev-parse HEAD`.chomp + branch = `cd #{dir}; git rev-parse --abbrev-ref HEAD`.chomp + url = `cd #{dir}; git config --get remote.origin.url`.chomp + if branch + model.version = {:url => url, :branch => branch, :commit => commit} + else + model.version = {:warning => "git is not installed"} + end # set defaults substance_classes = training_dataset.substances.collect{|s| s.class.to_s}.uniq diff --git a/lib/rest-client-wrapper.rb b/lib/rest-client-wrapper.rb index 9321a75..2073be2 100644 --- a/lib/rest-client-wrapper.rb +++ b/lib/rest-client-wrapper.rb @@ -55,14 +55,8 @@ module OpenTox if [301, 302, 307].include? response.code and request.method == :get response.follow_redirection(request, result) elsif response.code >= 400 and !URI.task?(uri) - #TODO add parameters to error-report - #parameters = request.args - #parameters[:headers][:subjectid] = "REMOVED" if parameters[:headers] and parameters[:headers][:subjectid] - #parameters[:url] = parameters[:url].gsub(/(http|https|)\:\/\/[a-zA-Z0-9\-]+\:[a-zA-Z0-9]+\@/, "REMOVED@") if parameters[:url] - #message += "\nREST parameters:\n#{parameters.inspect}" error = known_errors.collect{|e| e if e[:code] == response.code}.compact.first begin # errors are returned as error reports in json, try to parse - # TODO: may be the reason for failure of task.rb -n test_11_wait_for_error_task content = JSON.parse(response) msg = content["message"].to_s cause = content["errorCause"].to_s diff --git a/test/descriptor.rb b/test/descriptor.rb index 911f5c3..e5d8ff9 100644 --- a/test/descriptor.rb +++ b/test/descriptor.rb @@ -26,7 +26,6 @@ class DescriptorTest < MiniTest::Test def test_compound_openbabel_single c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N" - PhysChem.openbabel_descriptors # required for descriptor initialisation, TODO: move into libs feature = PhysChem.find_or_create_by(:name => "Openbabel.logP") result = c.calculate_properties([feature]) assert_equal 1.12518, result.first.round(5) @@ -34,7 +33,6 @@ class DescriptorTest < MiniTest::Test end def test_compound_cdk_single - PhysChem.cdk_descriptors # required for descriptor initialisation, TODO: move into libs c = OpenTox::Compound.from_smiles "c1ccccc1" feature = PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom") result = c.calculate_properties([feature]) @@ -50,7 +48,6 @@ class DescriptorTest < MiniTest::Test end def test_compound_joelib_single - PhysChem.joelib_descriptors # required for descriptor initialisation, TODO: move into libs c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N" result = c.calculate_properties [PhysChem.find_or_create_by(:name => "Joelib.LogP")] assert_equal 2.65908, result.first diff --git a/test/model-regression.rb b/test/model-regression.rb index 644ca1c..86b927c 100644 --- a/test/model-regression.rb +++ b/test/model-regression.rb @@ -122,7 +122,6 @@ class LazarRegressionTest < MiniTest::Test assert_equal algorithms[:descriptors], model.algorithms[:descriptors] prediction = model.predict training_dataset.substances[10] refute_nil prediction[:value] - # TODO test predictin end def test_regression_with_feature_selection @@ -138,6 +137,8 @@ class LazarRegressionTest < MiniTest::Test assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method] assert_equal 0.1, model.algorithms[:similarity][:min] assert_equal algorithms[:feature_selection][:method], model.algorithms[:feature_selection][:method] + prediction = model.predict training_dataset.substances[10] + refute_nil prediction[:value] end def test_regression_parameters -- cgit v1.2.3