From 5d4ab5553e1928e573aa6671ba6a273308b24e21 Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Wed, 10 Oct 2018 14:58:44 +0200 Subject: more differentiated feature classes --- lib/feature.rb | 62 ++++++++++++++++++++++++++++++++++++++------------------- lib/lazar.rb | 1 - lib/physchem.rb | 8 ++++---- test/feature.rb | 6 +++--- 4 files changed, 48 insertions(+), 29 deletions(-) diff --git a/lib/feature.rb b/lib/feature.rb index 1d18a00..e6fede6 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -2,23 +2,11 @@ module OpenTox # Basic feature class class Feature - field :measured, type: Boolean - field :calculated, type: Boolean - field :category, type: String - field :unit, type: String - field :conditions, type: Hash - - # Is it a nominal feature - # @return [TrueClass,FalseClass] - def nominal? - self.class == NominalFeature - end + end - # Is it a numeric feature - # @return [TrueClass,FalseClass] - def numeric? - self.class == NumericFeature - end + # Original ID (e.g. from CSV input) + class OriginalId < Feature + field :dataset_id, type: BSON::ObjectId end # Feature for categorical variables @@ -28,10 +16,46 @@ module OpenTox # Feature for quantitative variables class NumericFeature < Feature + field :unit, type: String + end + + # Nominal biological activity + class NominalBioActivity < NominalFeature + field :original_feature_id, type: BSON::ObjectId + field :transformation, type: Hash + end + + # Numeric biological activity + class NumericBioActivity < NumericFeature + field :original_feature_id, type: BSON::ObjectId + field :transformation, type: String + end + + # Nominal lazar prediction + class NominalLazarPrediction < NominalFeature + field :model_id, type: BSON::ObjectId + field :training_feature_id, type: BSON::ObjectId + end + + # Numeric lazar prediction + class NumericLazarPrediction < NumericFeature + field :model_id, type: BSON::ObjectId + field :training_feature_id, type: BSON::ObjectId + end + + class NominalSubstanceProperty < NominalFeature + end + + class NumericSubstanceProperty < NumericFeature + end + + class NanoParticleProperty < NumericSubstanceProperty + field :category, type: String + field :conditions, type: Hash end # Feature for SMARTS fragments - class Smarts < NominalFeature + class Smarts < Feature field :smarts, type: String index "smarts" => 1 # Create feature from SMARTS string @@ -42,8 +66,4 @@ module OpenTox end end - class OriginalId < Feature - field :dataset_id, type: BSON::ObjectId - end - end diff --git a/lib/lazar.rb b/lib/lazar.rb index 32f0317..d032282 100644 --- a/lib/lazar.rb +++ b/lib/lazar.rb @@ -94,6 +94,5 @@ CLASSES = ["Feature","Substance","Dataset","LazarPrediction","CrossValidation"," "train-test-validation.rb", "leave-one-out-validation.rb", "crossvalidation.rb", - #"experiment.rb", "import.rb", ].each{ |f| require_relative f } diff --git a/lib/physchem.rb b/lib/physchem.rb index 07df867..2af043b 100644 --- a/lib/physchem.rb +++ b/lib/physchem.rb @@ -1,7 +1,7 @@ module OpenTox # Feature for physico-chemical descriptors - class PhysChem < NumericFeature + class PhysChem < NumericSubstanceProperty field :library, type: String field :descriptor, type: String @@ -45,7 +45,7 @@ module OpenTox def self.descriptors desc=DESCRIPTORS desc.collect do |name,description| lib,desc = name.split('.',2) - self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true) + self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description) end end @@ -59,11 +59,11 @@ module OpenTox CDK_DESCRIPTIONS.select{|d| desc == d[:java_class].split('.').last.sub('Descriptor','') }.first[:names].each do |n| dname = "#{name}.#{n}" description = DESCRIPTORS[dname] - udesc << self.find_or_create_by(:name => dname, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true) + udesc << self.find_or_create_by(:name => dname, :library => lib, :descriptor => desc, :description => description) end else description = DESCRIPTORS[name] - udesc << self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true) + udesc << self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description) end end udesc diff --git a/test/feature.rb b/test/feature.rb index 85ce588..c2bdbb5 100644 --- a/test/feature.rb +++ b/test/feature.rb @@ -55,15 +55,15 @@ class FeatureTest < MiniTest::Test end def test_physchem_description - assert_equal 346, PhysChem.descriptors.size - assert_equal 15, PhysChem.openbabel_descriptors.size + assert_equal 347, PhysChem.descriptors.size + assert_equal 16, PhysChem.openbabel_descriptors.size assert_equal 286, PhysChem.cdk_descriptors.size assert_equal 45, PhysChem.joelib_descriptors.size assert_equal 309, PhysChem.unique_descriptors.size end def test_physchem - assert_equal 346, PhysChem.descriptors.size + assert_equal 347, PhysChem.descriptors.size c = Compound.from_smiles "CC(=O)CC(C)C" logP = PhysChem.find_or_create_by :name => "Openbabel.logP" assert_equal 1.6215, c.calculate_properties([logP]).first -- cgit v1.2.3