From 85553b339acf3f9285a1c03b2fff342d9ddb9b6b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 11 Jan 2017 16:00:07 +0100 Subject: documentation for all classes --- lib/algorithm.rb | 1 + lib/caret.rb | 13 +++++- lib/classification.rb | 7 +++- lib/compound.rb | 53 +++++++++++++++---------- lib/dataset.rb | 33 ++++++++++++++-- lib/experiment.rb | 99 ---------------------------------------------- lib/feature.rb | 7 ++++ lib/feature_selection.rb | 5 ++- lib/import.rb | 4 +- lib/nanoparticle.rb | 19 +++++++++ lib/overwrite.rb | 57 ++++++++++++++++++++------ lib/physchem.rb | 28 ++++++++++--- lib/regression.rb | 5 +++ lib/rest-client-wrapper.rb | 1 + lib/similarity.rb | 24 ++++++++++- lib/substance.rb | 1 + 16 files changed, 211 insertions(+), 146 deletions(-) delete mode 100644 lib/experiment.rb (limited to 'lib') diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 0e4b93a..f70ac1a 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -2,6 +2,7 @@ module OpenTox module Algorithm + # Execute an algorithm with parameters def self.run algorithm, parameters=nil klass,method = algorithm.split('.') Object.const_get(klass).send(method,parameters) diff --git a/lib/caret.rb b/lib/caret.rb index 7e4f771..f5c2bde 100644 --- a/lib/caret.rb +++ b/lib/caret.rb @@ -1,9 +1,17 @@ module OpenTox module Algorithm + # Ruby interface for the R caret package + # Caret model list: https://topepo.github.io/caret/modelList.html class Caret - # model list: https://topepo.github.io/caret/modelList.html + # Create a local R caret model and make a prediction + # @param [Array] dependent_variables + # @param [Array>] independent_variables + # @param [Array] weights + # @param [String] Caret method + # @param [Array] query_variables + # @return [Hash] def self.create_model_and_predict dependent_variables:, independent_variables:, weights:, method:, query_variables: remove = [] # remove independent_variables with single values @@ -77,12 +85,13 @@ module OpenTox end - # call caret methods dynamically, e.g. Caret.pls + # Call caret methods dynamically, e.g. Caret.pls def self.method_missing(sym, *args, &block) args.first[:method] = sym.to_s self.create_model_and_predict args.first end + # Convert Ruby values to R values def self.to_r v return "F" if v == false return "T" if v == true diff --git a/lib/classification.rb b/lib/classification.rb index e8c179f..638492b 100644 --- a/lib/classification.rb +++ b/lib/classification.rb @@ -1,9 +1,14 @@ module OpenTox module Algorithm + # Classification algorithms class Classification - def self.weighted_majority_vote dependent_variables:, independent_variables:nil, weights:, query_variables: + # Weighted majority vote + # @param [Array] dependent_variables + # @param [Array] weights + # @return [Hash] + def self.weighted_majority_vote dependent_variables:, independent_variables:nil, weights:, query_variables:nil class_weights = {} dependent_variables.each_with_index do |v,i| class_weights[v] ||= [] diff --git a/lib/compound.rb b/lib/compound.rb index 1c308d8..bfe69e3 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -2,6 +2,7 @@ CACTUS_URI="https://cactus.nci.nih.gov/chemical/structure/" module OpenTox + # Small molecules with defined chemical structures class Compound < Substance require_relative "unique_descriptors.rb" DEFAULT_FINGERPRINT = "MP2D" @@ -28,6 +29,9 @@ module OpenTox compound end + # Create chemical fingerprint + # @param [String] fingerprint type + # @return [Array] def fingerprint type=DEFAULT_FINGERPRINT unless fingerprints[type] return [] unless self.smiles @@ -75,6 +79,9 @@ module OpenTox fingerprints[type] end + # Calculate physchem properties + # @param [Array] list of descriptors + # @return [Array] def calculate_properties descriptors=PhysChem::OPENBABEL calculated_ids = properties.keys # BSON::ObjectId instances are not allowed as keys in a BSON document. @@ -96,6 +103,10 @@ module OpenTox descriptors.collect{|d| properties[d.id.to_s]} end + # Match a SMARTS substructure + # @param [String] smarts + # @param [TrueClass,FalseClass] count matches or return true/false + # @return [TrueClass,FalseClass,Fixnum] def smarts_match smarts, count=false obconversion = OpenBabel::OBConversion.new obmol = OpenBabel::OBMol.new @@ -116,8 +127,8 @@ module OpenTox # Create a compound from smiles string # @example # compound = OpenTox::Compound.from_smiles("c1ccccc1") - # @param [String] smiles Smiles string - # @return [OpenTox::Compound] Compound + # @param [String] smiles + # @return [OpenTox::Compound] def self.from_smiles smiles if smiles.match(/\s/) # spaces seem to confuse obconversion and may lead to invalid smiles $logger.warn "SMILES parsing failed for '#{smiles}'', SMILES string contains whitespaces." @@ -132,9 +143,9 @@ module OpenTox end end - # Create a compound from inchi string - # @param inchi [String] smiles InChI string - # @return [OpenTox::Compound] Compound + # Create a compound from InChI string + # @param [String] InChI + # @return [OpenTox::Compound] def self.from_inchi inchi #smiles = `echo "#{inchi}" | "#{File.join(File.dirname(__FILE__),"..","openbabel","bin","babel")}" -iinchi - -ocan`.chomp.strip smiles = obconversion(inchi,"inchi","can") @@ -145,9 +156,9 @@ module OpenTox end end - # Create a compound from sdf string - # @param sdf [String] smiles SDF string - # @return [OpenTox::Compound] Compound + # Create a compound from SDF + # @param [String] SDF + # @return [OpenTox::Compound] def self.from_sdf sdf # do not store sdf because it might be 2D Compound.from_smiles obconversion(sdf,"sdf","can") @@ -156,40 +167,38 @@ module OpenTox # Create a compound from name. Relies on an external service for name lookups. # @example # compound = OpenTox::Compound.from_name("Benzene") - # @param name [String] can be also an InChI/InChiKey, CAS number, etc - # @return [OpenTox::Compound] Compound + # @param [String] name, can be also an InChI/InChiKey, CAS number, etc + # @return [OpenTox::Compound] def self.from_name name Compound.from_smiles RestClientWrapper.get(File.join(CACTUS_URI,URI.escape(name),"smiles")) end # Get InChI - # @return [String] InChI string + # @return [String] def inchi unless self["inchi"] - result = obconversion(smiles,"smi","inchi") - #result = `echo "#{self.smiles}" | "#{File.join(File.dirname(__FILE__),"..","openbabel","bin","babel")}" -ismi - -oinchi`.chomp update(:inchi => result.chomp) if result and !result.empty? end self["inchi"] end # Get InChIKey - # @return [String] InChIKey string + # @return [String] def inchikey update(:inchikey => obconversion(smiles,"smi","inchikey")) unless self["inchikey"] self["inchikey"] end # Get (canonical) smiles - # @return [String] Smiles string + # @return [String] def smiles update(:smiles => obconversion(self["smiles"],"smi","can")) unless self["smiles"] self["smiles"] end - # Get sdf - # @return [String] SDF string + # Get SDF + # @return [String] def sdf if self.sdf_id.nil? sdf = obconversion(smiles,"smi","sdf") @@ -227,20 +236,22 @@ module OpenTox # Get all known compound names. Relies on an external service for name lookups. # @example # names = compound.names - # @return [String] Compound names + # @return [Array] def names update(:names => RestClientWrapper.get("#{CACTUS_URI}#{inchi}/names").split("\n")) unless self["names"] self["names"] end - # @return [String] PubChem Compound Identifier (CID), derieved via restcall to pubchem + # Get PubChem Compound Identifier (CID), obtained via REST call to PubChem + # @return [String] def cid pug_uri = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/" update(:cid => RestClientWrapper.post(File.join(pug_uri, "compound", "inchi", "cids", "TXT"),{:inchi => inchi}).strip) unless self["cid"] self["cid"] end - # @return [String] ChEMBL database compound id, derieved via restcall to chembl + # Get ChEMBL database compound id, obtained via REST call to ChEMBL + # @return [String] def chemblid # https://www.ebi.ac.uk/chembldb/ws#individualCompoundByInChiKey uri = "https://www.ebi.ac.uk/chemblws/compounds/smiles/#{smiles}.json" @@ -290,7 +301,7 @@ module OpenTox mg.to_f/molecular_weight end - # Calculate molecular weight of Compound with OB and store it in object + # Calculate molecular weight of Compound with OB and store it in compound object # @return [Float] molecular weight def molecular_weight mw_feature = PhysChem.find_or_create_by(:name => "Openbabel.MW") diff --git a/lib/dataset.rb b/lib/dataset.rb index ab55294..44690e1 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -3,32 +3,43 @@ require 'tempfile' module OpenTox + # Collection of substances and features class Dataset field :data_entries, type: Hash, default: {} # Readers + # Get all compounds + # @return [Array] def compounds substances.select{|s| s.is_a? Compound} end + # Get all nanoparticles + # @return [Array] def nanoparticles substances.select{|s| s.is_a? Nanoparticle} end # Get all substances + # @return [Array] def substances @substances ||= data_entries.keys.collect{|id| OpenTox::Substance.find id}.uniq @substances end # Get all features + # @return [Array] def features @features ||= data_entries.collect{|sid,data| data.keys.collect{|id| OpenTox::Feature.find(id)}}.flatten.uniq @features end + # Get all values for a given substance and feature + # @param [OpenTox::Substance,BSON::ObjectId,String] substance or substance id + # @param [OpenTox::Feature,BSON::ObjectId,String] feature or feature id + # @return [TrueClass,FalseClass,Float] def values substance,feature substance = substance.id if substance.is_a? Substance feature = feature.id if feature.is_a? Feature @@ -41,6 +52,10 @@ module OpenTox # Writers + # Add a value for a given substance and feature + # @param [OpenTox::Substance,BSON::ObjectId,String] substance or substance id + # @param [OpenTox::Feature,BSON::ObjectId,String] feature or feature id + # @param [TrueClass,FalseClass,Float] def add(substance,feature,value) substance = substance.id if substance.is_a? Substance feature = feature.id if feature.is_a? Feature @@ -87,7 +102,7 @@ module OpenTox # Serialisation - # converts dataset to csv format including compound smiles as first column, other column headers are feature names + # Convert dataset to csv format including compound smiles as first column, other column headers are feature names # @return [String] def to_csv(inchi=false) CSV.generate() do |csv| @@ -130,6 +145,9 @@ module OpenTox #end # Create a dataset from CSV file + # @param [File] + # @param [TrueClass,FalseClass] accept or reject empty values + # @return [OpenTox::Dataset] def self.from_csv_file file, accept_empty_values=false source = file name = File.basename(file,".*") @@ -145,8 +163,10 @@ module OpenTox dataset end - # parse data in tabular format (e.g. from csv) - # does a lot of guesswork in order to determine feature types + # Parse data in tabular format (e.g. from csv) + # does a lot of guesswork in order to determine feature types + # @param [Array] + # @param [TrueClass,FalseClass] accept or reject empty values def parse_table table, accept_empty_values # features @@ -225,6 +245,7 @@ module OpenTox save end + # Delete dataset def delete compounds.each{|c| c.dataset_ids.delete id.to_s} super @@ -238,14 +259,20 @@ module OpenTox field :prediction_feature_id, type: BSON::ObjectId field :predictions, type: Hash, default: {} + # Get prediction feature + # @return [OpenTox::Feature] def prediction_feature Feature.find prediction_feature_id end + # Get all compounds + # @return [Array] def compounds substances.select{|s| s.is_a? Compound} end + # Get all substances + # @return [Array] def substances predictions.keys.collect{|id| Substance.find id} end diff --git a/lib/experiment.rb b/lib/experiment.rb deleted file mode 100644 index 0dfdf86..0000000 --- a/lib/experiment.rb +++ /dev/null @@ -1,99 +0,0 @@ -module OpenTox - - class Experiment - field :dataset_ids, type: Array - field :model_settings, type: Array, default: [] - field :results, type: Hash, default: {} - - def run - dataset_ids.each do |dataset_id| - dataset = Dataset.find(dataset_id) - results[dataset_id.to_s] = [] - model_settings.each do |setting| - setting = setting.dup - model_algorithm = setting.delete :model_algorithm #if setting[:model_algorithm] - model = Object.const_get(model_algorithm).create dataset, setting - $logger.debug model - model.save - repeated_crossvalidation = RepeatedCrossValidation.create model - results[dataset_id.to_s] << {:model_id => model.id, :repeated_crossvalidation_id => repeated_crossvalidation.id} - end - end - save - end - - def report - # statistical significances http://www.r-bloggers.com/anova-and-tukeys-test-on-r/ - report = {} - report[:name] = name - report[:experiment_id] = self.id.to_s - report[:results] = {} - parameters = [] - dataset_ids.each do |dataset_id| - dataset_name = Dataset.find(dataset_id).name - report[:results][dataset_name] = {} - report[:results][dataset_name][:anova] = {} - report[:results][dataset_name][:data] = [] - # TODO results[dataset_id.to_s] does not exist - results[dataset_id.to_s].each do |result| - model = Model::Lazar.find(result[:model_id]) - repeated_cv = RepeatedCrossValidation.find(result[:repeated_crossvalidation_id]) - crossvalidations = repeated_cv.crossvalidations - if crossvalidations.first.is_a? ClassificationCrossValidation - parameters = [:accuracy,:true_rate,:predictivity] - elsif crossvalidations.first.is_a? RegressionCrossValidation - parameters = [:rmse,:mae,:r_squared] - end - summary = {} - [:neighbor_algorithm, :neighbor_algorithm_parameters, :prediction_algorithm].each do |key| - summary[key] = model[key] - end - summary[:nr_instances] = crossvalidations.first.nr_instances - summary[:nr_unpredicted] = crossvalidations.collect{|cv| cv.nr_unpredicted} - summary[:time] = crossvalidations.collect{|cv| cv.time} - parameters.each do |param| - summary[param] = crossvalidations.collect{|cv| cv.send(param)} - end - report[:results][dataset_name][:data] << summary - end - end - report[:results].each do |dataset,results| - ([:time,:nr_unpredicted]+parameters).each do |param| - experiments = [] - outcome = [] - results[:data].each_with_index do |result,i| - result[param].each do |p| - experiments << i - p = nil if p.kind_of? Float and p.infinite? # TODO fix @ division by 0 - outcome << p - end - end - begin - R.assign "experiment_nr",experiments.collect{|i| "Experiment #{i}"} - R.eval "experiment_nr = factor(experiment_nr)" - R.assign "outcome", outcome - R.eval "data = data.frame(experiment_nr,outcome)" - # one-way ANOVA - R.eval "fit = aov(outcome ~ experiment_nr, data=data,na.action='na.omit')" - # http://stackoverflow.com/questions/3366506/extract-p-value-from-aov - p_value = R.eval("summary(fit)[[1]][['Pr(>F)']][[1]]").to_ruby - # aequivalent - # sum = R.eval("summary(fit)") - #p_value = sum.to_ruby.first.last.first - rescue - p_value = nil - end - report[:results][dataset][:anova][param] = p_value -=begin -=end - end - end - report - end - - def summary - report[:results].collect{|dataset,data| {dataset => data[:anova].select{|param,p_val| p_val < 0.1}}} - end - end - -end diff --git a/lib/feature.rb b/lib/feature.rb index 0ca4d41..f811aef 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -8,10 +8,14 @@ module OpenTox field :unit, type: String field :conditions, type: Hash + # Is it a nominal feature + # @return [TrueClass,FalseClass] def nominal? self.class == NominalFeature end + # Is it a numeric feature + # @return [TrueClass,FalseClass] def numeric? self.class == NumericFeature end @@ -30,6 +34,9 @@ module OpenTox class Smarts < NominalFeature field :smarts, type: String index "smarts" => 1 + # Create feature from SMARTS string + # @param [String] + # @return [OpenTox::Feature] def self.from_smarts smarts self.find_or_create_by :smarts => smarts end diff --git a/lib/feature_selection.rb b/lib/feature_selection.rb index 65f9752..c596b1f 100644 --- a/lib/feature_selection.rb +++ b/lib/feature_selection.rb @@ -1,13 +1,16 @@ module OpenTox module Algorithm + # Feature selection algorithms class FeatureSelection + # Select features correlated to the models prediction feature + # @param [OpenTox::Model::Lazar] def self.correlation_filter model relevant_features = {} R.assign "dependent", model.dependent_variables.collect{|v| to_r(v)} model.descriptor_weights = [] - selected_variables = [] + selected_variables = [] selected_descriptor_ids = [] model.independent_variables.each_with_index do |v,i| v.collect!{|n| to_r(n)} diff --git a/lib/import.rb b/lib/import.rb index 7a68335..fd00fbe 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -1,12 +1,14 @@ module OpenTox + # Import data from external databases module Import class Enanomapper include OpenTox - # time critical step: JSON parsing (>99%), Oj brings only minor speed gains (~1%) + # Import from eNanoMapper def self.import + # time critical step: JSON parsing (>99%), Oj brings only minor speed gains (~1%) datasets = {} bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] bundles.each do |bundle| diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 06db4d2..73d5f8b 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -1,25 +1,36 @@ module OpenTox + # Nanoparticles class Nanoparticle < Substance include OpenTox field :core_id, type: String, default: nil field :coating_ids, type: Array, default: [] + # Get core compound + # @return [OpenTox::Compound] def core Compound.find core_id end + # Get coatings + # @return [Array] def coating coating_ids.collect{|i| Compound.find i } end + # Get nanoparticle fingerprint (union of core and coating fingerprints) + # @param [String] fingerprint type + # @return [Array] def fingerprint type=DEFAULT_FINGERPRINT core_fp = core.fingerprint type coating_fp = coating.collect{|c| c.fingerprint type}.flatten.uniq.compact (core_fp.empty? or coating_fp.empty?) ? [] : (core_fp+coating_fp).uniq.compact end + # Calculate physchem properties + # @param [Array] list of descriptors + # @return [Array] def calculate_properties descriptors=PhysChem::OPENBABEL if core.smiles and !coating.collect{|c| c.smiles}.compact.empty? core_prop = core.calculate_properties descriptors @@ -28,6 +39,10 @@ module OpenTox end end + # Add (measured) feature values + # @param [OpenTox::Feature] + # @param [TrueClass,FalseClass,Float] + # @param [OpenTox::Dataset] def add_feature feature, value, dataset unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand case feature.category @@ -55,6 +70,10 @@ module OpenTox end end + # Parse values from Ambit database + # @param [OpenTox::Feature] + # @param [TrueClass,FalseClass,Float] + # @param [OpenTox::Dataset] def parse_ambit_value feature, v, dataset # TODO add study id to warnings v.delete "unit" diff --git a/lib/overwrite.rb b/lib/overwrite.rb index 31d30c9..91bc9e1 100644 --- a/lib/overwrite.rb +++ b/lib/overwrite.rb @@ -2,41 +2,51 @@ require "base64" class Object # An object is blank if it's false, empty, or a whitespace string. # For example, "", " ", +nil+, [], and {} are all blank. + # @return [TrueClass,FalseClass] def blank? respond_to?(:empty?) ? empty? : !self end + # Is it a numeric object + # @return [TrueClass,FalseClass] def numeric? true if Float(self) rescue false end # Returns dimension of nested arrays + # @return [Fixnum] def dimension self.class == Array ? 1 + self[0].dimension : 0 end end class Numeric + # Convert number to percent + # @return [Float] def percent_of(n) self.to_f / n.to_f * 100.0 end end class Float - # round to n significant digits - # http://stackoverflow.com/questions/8382619/how-to-round-a-float-to-a-specified-number-of-significant-digits-in-ruby + # Round to n significant digits + # http://stackoverflow.com/questions/8382619/how-to-round-a-float-to-a-specified-number-of-significant-digits-in-ruby + # @param [Fixnum] + # @return [Float] def signif(n) Float("%.#{n}g" % self) end - # converts -10 logarithmized values back + # Convert -10 log values to original values + # @return [Float] def delog10 10**(-1*self) end end module Enumerable - # @return [Array] only the duplicates of an enumerable + # Get duplicates + # @return [Array] def duplicates inject({}) {|h,v| h[v]=h[v].to_i+1; h}.reject{|k,v| v==1}.keys end @@ -51,7 +61,10 @@ module Enumerable end class String - # @return [String] converts camel-case to underscore-case (OpenTox::SuperModel -> open_tox/super_model) + # Convert camel-case to underscore-case + # @example + # OpenTox::SuperModel -> open_tox/super_model + # @return [String] def underscore self.gsub(/::/, '/'). gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2'). @@ -60,7 +73,7 @@ class String downcase end - # convert strings to boolean values + # Convert strings to boolean values # @return [TrueClass,FalseClass] true or false def to_boolean return true if self == true || self =~ (/(true|t|yes|y|1)$/i) @@ -71,7 +84,8 @@ class String end class File - # @return [String] mime_type including charset using linux cmd command + # Get mime_type including charset using linux file command + # @return [String] def mime_type `file -ib '#{self.path}'`.chomp end @@ -79,7 +93,7 @@ end class Array - # Sum up the size of single arrays in an array of arrays + # Sum the size of single arrays in an array of arrays # @param [Array] Array of arrays # @return [Integer] Sum of size of array elements def sum_size @@ -92,33 +106,43 @@ class Array } end - # For symbolic features + # Check if the array has just one unique value. # @param [Array] Array to test. - # @return [Boolean] Whether the array has just one unique value. + # @return [TrueClass,FalseClass] def zero_variance? return self.uniq.size == 1 end + # Get the median of an array + # @return [Numeric] def median sorted = self.sort len = sorted.length (sorted[(len - 1) / 2] + sorted[len / 2]) / 2.0 end + # Get the mean of an array + # @return [Numeric] def mean self.compact.inject{ |sum, el| sum + el }.to_f / self.compact.size end + # Get the variance of an array + # @return [Numeric] def sample_variance m = self.mean sum = self.compact.inject(0){|accum, i| accum +(i-m)**2 } sum/(self.compact.length - 1).to_f end + # Get the standard deviation of an array + # @return [Numeric] def standard_deviation Math.sqrt(self.sample_variance) end + # Convert array values for R + # @return [Array] def for_R if self.first.is_a?(String) #"\"#{self.collect{|v| v.sub('[','').sub(']','')}.join(" ")}\"" # quote and remove square brackets @@ -128,6 +152,8 @@ class Array end end + # Collect array with index + # in analogy to each_with_index def collect_with_index result = [] self.each_with_index do |elt, idx| @@ -139,11 +165,15 @@ end module URI + # Is it a https connection + # @param [String] + # @return [TrueClass,FalseClass] def self.ssl? uri URI.parse(uri).instance_of? URI::HTTPS end - # @return [Boolean] checks if resource exists by making a HEAD-request + # Check if a http resource exists by making a HEAD-request + # @return [TrueClass,FalseClass] def self.accessible?(uri) parsed_uri = URI.parse(uri + (OpenTox::RestClientWrapper.subjectid ? "?subjectid=#{CGI.escape OpenTox::RestClientWrapper.subjectid}" : "")) http_code = URI.task?(uri) ? 600 : 400 @@ -163,6 +193,9 @@ module URI false end + # Is the URI valid + # @param [String] + # @return [TrueClass,FalseClass] def self.valid? uri u = URI.parse(uri) u.scheme!=nil and u.host!=nil @@ -170,6 +203,8 @@ module URI false end + # Is the URI a task URI + # @param [String] def self.task? uri uri =~ /task/ and URI.valid? uri end diff --git a/lib/physchem.rb b/lib/physchem.rb index 327acd8..07df867 100644 --- a/lib/physchem.rb +++ b/lib/physchem.rb @@ -39,6 +39,9 @@ module OpenTox require_relative "unique_descriptors.rb" + # Get descriptor features + # @param [Hash] + # @return [Array] def self.descriptors desc=DESCRIPTORS desc.collect do |name,description| lib,desc = name.split('.',2) @@ -46,6 +49,8 @@ module OpenTox end end + # Get unique descriptor features + # @return [Array] def self.unique_descriptors udesc = [] UNIQUEDESCRIPTORS.each do |name| @@ -64,23 +69,28 @@ module OpenTox udesc end + # Get OpenBabel descriptor features + # @return [Array] def self.openbabel_descriptors descriptors OPENBABEL end + # Get CDK descriptor features + # @return [Array] def self.cdk_descriptors descriptors CDK end + # Get JOELIB descriptor features + # @return [Array] def self.joelib_descriptors descriptors JOELIB end - def calculate compound - result = send library.downcase,descriptor,compound - result[self.name] - end - + # Calculate OpenBabel descriptors + # @param [String] descriptor type + # @param [OpenTox::Compound] + # @return [Hash] def openbabel descriptor, compound obdescriptor = OpenBabel::OBDescriptor.find_type descriptor obmol = OpenBabel::OBMol.new @@ -90,10 +100,18 @@ module OpenTox {"#{library.capitalize}.#{descriptor}" => fix_value(obdescriptor.predict(obmol))} end + # Calculate CDK descriptors + # @param [String] descriptor type + # @param [OpenTox::Compound] + # @return [Hash] def cdk descriptor, compound java_descriptor "cdk", descriptor, compound end + # Calculate JOELIB descriptors + # @param [String] descriptor type + # @param [OpenTox::Compound] + # @return [Hash] def joelib descriptor, compound java_descriptor "joelib", descriptor, compound end diff --git a/lib/regression.rb b/lib/regression.rb index 3890987..fd2855f 100644 --- a/lib/regression.rb +++ b/lib/regression.rb @@ -1,8 +1,13 @@ module OpenTox module Algorithm + # Regression algorithms class Regression + # Weighted average + # @param [Array] dependent_variables + # @param [Array] weights + # @return [Hash] def self.weighted_average dependent_variables:, independent_variables:nil, weights:, query_variables:nil # TODO: prediction_interval weighted_sum = 0.0 diff --git a/lib/rest-client-wrapper.rb b/lib/rest-client-wrapper.rb index 2073be2..f76a296 100644 --- a/lib/rest-client-wrapper.rb +++ b/lib/rest-client-wrapper.rb @@ -1,5 +1,6 @@ module OpenTox + # Adjustments to the rest-client gem for OpenTox class RestClientWrapper attr_accessor :request, :response diff --git a/lib/similarity.rb b/lib/similarity.rb index 0901936..ccbc9d6 100644 --- a/lib/similarity.rb +++ b/lib/similarity.rb @@ -2,6 +2,10 @@ module OpenTox module Algorithm class Vector + # Get dot product + # @param [Vector] + # @param [Vector] + # @return [Numeric] def self.dot_product(a, b) products = a.zip(b).map{|a, b| a * b} products.inject(0) {|s,p| s + p} @@ -15,6 +19,9 @@ module OpenTox class Similarity + # Get Tanimoto similarity + # @param [Array>] + # @return [Float] def self.tanimoto fingerprints ( fingerprints[0] & fingerprints[1]).size/(fingerprints[0]|fingerprints[1]).size.to_f end @@ -23,18 +30,28 @@ module OpenTox #( fingerprints[0] & fingerprints[1]).size/(fingerprints[0]|fingerprints[1]).size.to_f #end + # Get Euclidean distance + # @param [Array>] + # @return [Float] def self.euclid scaled_properties sq = scaled_properties[0].zip(scaled_properties[1]).map{|a,b| (a - b) ** 2} Math.sqrt(sq.inject(0) {|s,c| s + c}) end - # http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity + # Get cosine similarity + # http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity + # @param [Array>] + # @return [Float] def self.cosine scaled_properties scaled_properties = remove_nils scaled_properties Algorithm::Vector.dot_product(scaled_properties[0], scaled_properties[1]) / (Algorithm::Vector.magnitude(scaled_properties[0]) * Algorithm::Vector.magnitude(scaled_properties[1])) end - def self.weighted_cosine scaled_properties # [a,b,weights] + # Get weighted cosine similarity + # http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity + # @param [Array>] [a,b,weights] + # @return [Float] + def self.weighted_cosine scaled_properties a,b,w = remove_nils scaled_properties return cosine(scaled_properties) if w.uniq.size == 1 dot_product = 0 @@ -48,6 +65,9 @@ module OpenTox dot_product/(Math.sqrt(magnitude_a)*Math.sqrt(magnitude_b)) end + # Remove nil values + # @param [Array>] [a,b,weights] + # @return [Array>] [a,b,weights] def self.remove_nils scaled_properties a =[]; b = []; w = [] (0..scaled_properties.first.size-1).each do |i| diff --git a/lib/substance.rb b/lib/substance.rb index 31c465e..ef49659 100644 --- a/lib/substance.rb +++ b/lib/substance.rb @@ -1,5 +1,6 @@ module OpenTox + # Base class for substances (e.g. compunds, nanoparticles) class Substance field :properties, type: Hash, default: {} field :dataset_ids, type: Array, default: [] -- cgit v1.2.3