summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2017-01-11 16:00:07 +0100
committerChristoph Helma <helma@in-silico.ch>2017-01-11 16:00:07 +0100
commit85553b339acf3f9285a1c03b2fff342d9ddb9b6b (patch)
treea00b5879578ed07ed2bea7b67c8da3896b43bc5d /lib
parentf522a1089af8775798450b3f9f0aa4b579a3e1b5 (diff)
documentation for all classes
Diffstat (limited to 'lib')
-rw-r--r--lib/algorithm.rb1
-rw-r--r--lib/caret.rb13
-rw-r--r--lib/classification.rb7
-rw-r--r--lib/compound.rb53
-rw-r--r--lib/dataset.rb33
-rw-r--r--lib/experiment.rb99
-rw-r--r--lib/feature.rb7
-rw-r--r--lib/feature_selection.rb5
-rw-r--r--lib/import.rb4
-rw-r--r--lib/nanoparticle.rb19
-rw-r--r--lib/overwrite.rb57
-rw-r--r--lib/physchem.rb28
-rw-r--r--lib/regression.rb5
-rw-r--r--lib/rest-client-wrapper.rb1
-rw-r--r--lib/similarity.rb24
-rw-r--r--lib/substance.rb1
16 files changed, 211 insertions, 146 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 0e4b93a..f70ac1a 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -2,6 +2,7 @@ module OpenTox
module Algorithm
+ # Execute an algorithm with parameters
def self.run algorithm, parameters=nil
klass,method = algorithm.split('.')
Object.const_get(klass).send(method,parameters)
diff --git a/lib/caret.rb b/lib/caret.rb
index 7e4f771..f5c2bde 100644
--- a/lib/caret.rb
+++ b/lib/caret.rb
@@ -1,9 +1,17 @@
module OpenTox
module Algorithm
+ # Ruby interface for the R caret package
+ # Caret model list: https://topepo.github.io/caret/modelList.html
class Caret
- # model list: https://topepo.github.io/caret/modelList.html
+ # Create a local R caret model and make a prediction
+ # @param [Array<Float,Bool>] dependent_variables
+ # @param [Array<Array<Float,Bool>>] independent_variables
+ # @param [Array<Float>] weights
+ # @param [String] Caret method
+ # @param [Array<Float,Bool>] query_variables
+ # @return [Hash]
def self.create_model_and_predict dependent_variables:, independent_variables:, weights:, method:, query_variables:
remove = []
# remove independent_variables with single values
@@ -77,12 +85,13 @@ module OpenTox
end
- # call caret methods dynamically, e.g. Caret.pls
+ # Call caret methods dynamically, e.g. Caret.pls
def self.method_missing(sym, *args, &block)
args.first[:method] = sym.to_s
self.create_model_and_predict args.first
end
+ # Convert Ruby values to R values
def self.to_r v
return "F" if v == false
return "T" if v == true
diff --git a/lib/classification.rb b/lib/classification.rb
index e8c179f..638492b 100644
--- a/lib/classification.rb
+++ b/lib/classification.rb
@@ -1,9 +1,14 @@
module OpenTox
module Algorithm
+ # Classification algorithms
class Classification
- def self.weighted_majority_vote dependent_variables:, independent_variables:nil, weights:, query_variables:
+ # Weighted majority vote
+ # @param [Array<TrueClass,FalseClass>] dependent_variables
+ # @param [Array<Float>] weights
+ # @return [Hash]
+ def self.weighted_majority_vote dependent_variables:, independent_variables:nil, weights:, query_variables:nil
class_weights = {}
dependent_variables.each_with_index do |v,i|
class_weights[v] ||= []
diff --git a/lib/compound.rb b/lib/compound.rb
index 1c308d8..bfe69e3 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -2,6 +2,7 @@ CACTUS_URI="https://cactus.nci.nih.gov/chemical/structure/"
module OpenTox
+ # Small molecules with defined chemical structures
class Compound < Substance
require_relative "unique_descriptors.rb"
DEFAULT_FINGERPRINT = "MP2D"
@@ -28,6 +29,9 @@ module OpenTox
compound
end
+ # Create chemical fingerprint
+ # @param [String] fingerprint type
+ # @return [Array<String>]
def fingerprint type=DEFAULT_FINGERPRINT
unless fingerprints[type]
return [] unless self.smiles
@@ -75,6 +79,9 @@ module OpenTox
fingerprints[type]
end
+ # Calculate physchem properties
+ # @param [Array<Hash>] list of descriptors
+ # @return [Array<Float>]
def calculate_properties descriptors=PhysChem::OPENBABEL
calculated_ids = properties.keys
# BSON::ObjectId instances are not allowed as keys in a BSON document.
@@ -96,6 +103,10 @@ module OpenTox
descriptors.collect{|d| properties[d.id.to_s]}
end
+ # Match a SMARTS substructure
+ # @param [String] smarts
+ # @param [TrueClass,FalseClass] count matches or return true/false
+ # @return [TrueClass,FalseClass,Fixnum]
def smarts_match smarts, count=false
obconversion = OpenBabel::OBConversion.new
obmol = OpenBabel::OBMol.new
@@ -116,8 +127,8 @@ module OpenTox
# Create a compound from smiles string
# @example
# compound = OpenTox::Compound.from_smiles("c1ccccc1")
- # @param [String] smiles Smiles string
- # @return [OpenTox::Compound] Compound
+ # @param [String] smiles
+ # @return [OpenTox::Compound]
def self.from_smiles smiles
if smiles.match(/\s/) # spaces seem to confuse obconversion and may lead to invalid smiles
$logger.warn "SMILES parsing failed for '#{smiles}'', SMILES string contains whitespaces."
@@ -132,9 +143,9 @@ module OpenTox
end
end
- # Create a compound from inchi string
- # @param inchi [String] smiles InChI string
- # @return [OpenTox::Compound] Compound
+ # Create a compound from InChI string
+ # @param [String] InChI
+ # @return [OpenTox::Compound]
def self.from_inchi inchi
#smiles = `echo "#{inchi}" | "#{File.join(File.dirname(__FILE__),"..","openbabel","bin","babel")}" -iinchi - -ocan`.chomp.strip
smiles = obconversion(inchi,"inchi","can")
@@ -145,9 +156,9 @@ module OpenTox
end
end
- # Create a compound from sdf string
- # @param sdf [String] smiles SDF string
- # @return [OpenTox::Compound] Compound
+ # Create a compound from SDF
+ # @param [String] SDF
+ # @return [OpenTox::Compound]
def self.from_sdf sdf
# do not store sdf because it might be 2D
Compound.from_smiles obconversion(sdf,"sdf","can")
@@ -156,40 +167,38 @@ module OpenTox
# Create a compound from name. Relies on an external service for name lookups.
# @example
# compound = OpenTox::Compound.from_name("Benzene")
- # @param name [String] can be also an InChI/InChiKey, CAS number, etc
- # @return [OpenTox::Compound] Compound
+ # @param [String] name, can be also an InChI/InChiKey, CAS number, etc
+ # @return [OpenTox::Compound]
def self.from_name name
Compound.from_smiles RestClientWrapper.get(File.join(CACTUS_URI,URI.escape(name),"smiles"))
end
# Get InChI
- # @return [String] InChI string
+ # @return [String]
def inchi
unless self["inchi"]
-
result = obconversion(smiles,"smi","inchi")
- #result = `echo "#{self.smiles}" | "#{File.join(File.dirname(__FILE__),"..","openbabel","bin","babel")}" -ismi - -oinchi`.chomp
update(:inchi => result.chomp) if result and !result.empty?
end
self["inchi"]
end
# Get InChIKey
- # @return [String] InChIKey string
+ # @return [String]
def inchikey
update(:inchikey => obconversion(smiles,"smi","inchikey")) unless self["inchikey"]
self["inchikey"]
end
# Get (canonical) smiles
- # @return [String] Smiles string
+ # @return [String]
def smiles
update(:smiles => obconversion(self["smiles"],"smi","can")) unless self["smiles"]
self["smiles"]
end
- # Get sdf
- # @return [String] SDF string
+ # Get SDF
+ # @return [String]
def sdf
if self.sdf_id.nil?
sdf = obconversion(smiles,"smi","sdf")
@@ -227,20 +236,22 @@ module OpenTox
# Get all known compound names. Relies on an external service for name lookups.
# @example
# names = compound.names
- # @return [String] Compound names
+ # @return [Array<String>]
def names
update(:names => RestClientWrapper.get("#{CACTUS_URI}#{inchi}/names").split("\n")) unless self["names"]
self["names"]
end
- # @return [String] PubChem Compound Identifier (CID), derieved via restcall to pubchem
+ # Get PubChem Compound Identifier (CID), obtained via REST call to PubChem
+ # @return [String]
def cid
pug_uri = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/"
update(:cid => RestClientWrapper.post(File.join(pug_uri, "compound", "inchi", "cids", "TXT"),{:inchi => inchi}).strip) unless self["cid"]
self["cid"]
end
- # @return [String] ChEMBL database compound id, derieved via restcall to chembl
+ # Get ChEMBL database compound id, obtained via REST call to ChEMBL
+ # @return [String]
def chemblid
# https://www.ebi.ac.uk/chembldb/ws#individualCompoundByInChiKey
uri = "https://www.ebi.ac.uk/chemblws/compounds/smiles/#{smiles}.json"
@@ -290,7 +301,7 @@ module OpenTox
mg.to_f/molecular_weight
end
- # Calculate molecular weight of Compound with OB and store it in object
+ # Calculate molecular weight of Compound with OB and store it in compound object
# @return [Float] molecular weight
def molecular_weight
mw_feature = PhysChem.find_or_create_by(:name => "Openbabel.MW")
diff --git a/lib/dataset.rb b/lib/dataset.rb
index ab55294..44690e1 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -3,32 +3,43 @@ require 'tempfile'
module OpenTox
+ # Collection of substances and features
class Dataset
field :data_entries, type: Hash, default: {}
# Readers
+ # Get all compounds
+ # @return [Array<OpenTox::Compound>]
def compounds
substances.select{|s| s.is_a? Compound}
end
+ # Get all nanoparticles
+ # @return [Array<OpenTox::Nanoparticle>]
def nanoparticles
substances.select{|s| s.is_a? Nanoparticle}
end
# Get all substances
+ # @return [Array<OpenTox::Substance>]
def substances
@substances ||= data_entries.keys.collect{|id| OpenTox::Substance.find id}.uniq
@substances
end
# Get all features
+ # @return [Array<OpenTox::Feature>]
def features
@features ||= data_entries.collect{|sid,data| data.keys.collect{|id| OpenTox::Feature.find(id)}}.flatten.uniq
@features
end
+ # Get all values for a given substance and feature
+ # @param [OpenTox::Substance,BSON::ObjectId,String] substance or substance id
+ # @param [OpenTox::Feature,BSON::ObjectId,String] feature or feature id
+ # @return [TrueClass,FalseClass,Float]
def values substance,feature
substance = substance.id if substance.is_a? Substance
feature = feature.id if feature.is_a? Feature
@@ -41,6 +52,10 @@ module OpenTox
# Writers
+ # Add a value for a given substance and feature
+ # @param [OpenTox::Substance,BSON::ObjectId,String] substance or substance id
+ # @param [OpenTox::Feature,BSON::ObjectId,String] feature or feature id
+ # @param [TrueClass,FalseClass,Float]
def add(substance,feature,value)
substance = substance.id if substance.is_a? Substance
feature = feature.id if feature.is_a? Feature
@@ -87,7 +102,7 @@ module OpenTox
# Serialisation
- # converts dataset to csv format including compound smiles as first column, other column headers are feature names
+ # Convert dataset to csv format including compound smiles as first column, other column headers are feature names
# @return [String]
def to_csv(inchi=false)
CSV.generate() do |csv|
@@ -130,6 +145,9 @@ module OpenTox
#end
# Create a dataset from CSV file
+ # @param [File]
+ # @param [TrueClass,FalseClass] accept or reject empty values
+ # @return [OpenTox::Dataset]
def self.from_csv_file file, accept_empty_values=false
source = file
name = File.basename(file,".*")
@@ -145,8 +163,10 @@ module OpenTox
dataset
end
- # parse data in tabular format (e.g. from csv)
- # does a lot of guesswork in order to determine feature types
+ # Parse data in tabular format (e.g. from csv)
+ # does a lot of guesswork in order to determine feature types
+ # @param [Array<Array>]
+ # @param [TrueClass,FalseClass] accept or reject empty values
def parse_table table, accept_empty_values
# features
@@ -225,6 +245,7 @@ module OpenTox
save
end
+ # Delete dataset
def delete
compounds.each{|c| c.dataset_ids.delete id.to_s}
super
@@ -238,14 +259,20 @@ module OpenTox
field :prediction_feature_id, type: BSON::ObjectId
field :predictions, type: Hash, default: {}
+ # Get prediction feature
+ # @return [OpenTox::Feature]
def prediction_feature
Feature.find prediction_feature_id
end
+ # Get all compounds
+ # @return [Array<OpenTox::Compound>]
def compounds
substances.select{|s| s.is_a? Compound}
end
+ # Get all substances
+ # @return [Array<OpenTox::Substance>]
def substances
predictions.keys.collect{|id| Substance.find id}
end
diff --git a/lib/experiment.rb b/lib/experiment.rb
deleted file mode 100644
index 0dfdf86..0000000
--- a/lib/experiment.rb
+++ /dev/null
@@ -1,99 +0,0 @@
-module OpenTox
-
- class Experiment
- field :dataset_ids, type: Array
- field :model_settings, type: Array, default: []
- field :results, type: Hash, default: {}
-
- def run
- dataset_ids.each do |dataset_id|
- dataset = Dataset.find(dataset_id)
- results[dataset_id.to_s] = []
- model_settings.each do |setting|
- setting = setting.dup
- model_algorithm = setting.delete :model_algorithm #if setting[:model_algorithm]
- model = Object.const_get(model_algorithm).create dataset, setting
- $logger.debug model
- model.save
- repeated_crossvalidation = RepeatedCrossValidation.create model
- results[dataset_id.to_s] << {:model_id => model.id, :repeated_crossvalidation_id => repeated_crossvalidation.id}
- end
- end
- save
- end
-
- def report
- # statistical significances http://www.r-bloggers.com/anova-and-tukeys-test-on-r/
- report = {}
- report[:name] = name
- report[:experiment_id] = self.id.to_s
- report[:results] = {}
- parameters = []
- dataset_ids.each do |dataset_id|
- dataset_name = Dataset.find(dataset_id).name
- report[:results][dataset_name] = {}
- report[:results][dataset_name][:anova] = {}
- report[:results][dataset_name][:data] = []
- # TODO results[dataset_id.to_s] does not exist
- results[dataset_id.to_s].each do |result|
- model = Model::Lazar.find(result[:model_id])
- repeated_cv = RepeatedCrossValidation.find(result[:repeated_crossvalidation_id])
- crossvalidations = repeated_cv.crossvalidations
- if crossvalidations.first.is_a? ClassificationCrossValidation
- parameters = [:accuracy,:true_rate,:predictivity]
- elsif crossvalidations.first.is_a? RegressionCrossValidation
- parameters = [:rmse,:mae,:r_squared]
- end
- summary = {}
- [:neighbor_algorithm, :neighbor_algorithm_parameters, :prediction_algorithm].each do |key|
- summary[key] = model[key]
- end
- summary[:nr_instances] = crossvalidations.first.nr_instances
- summary[:nr_unpredicted] = crossvalidations.collect{|cv| cv.nr_unpredicted}
- summary[:time] = crossvalidations.collect{|cv| cv.time}
- parameters.each do |param|
- summary[param] = crossvalidations.collect{|cv| cv.send(param)}
- end
- report[:results][dataset_name][:data] << summary
- end
- end
- report[:results].each do |dataset,results|
- ([:time,:nr_unpredicted]+parameters).each do |param|
- experiments = []
- outcome = []
- results[:data].each_with_index do |result,i|
- result[param].each do |p|
- experiments << i
- p = nil if p.kind_of? Float and p.infinite? # TODO fix @ division by 0
- outcome << p
- end
- end
- begin
- R.assign "experiment_nr",experiments.collect{|i| "Experiment #{i}"}
- R.eval "experiment_nr = factor(experiment_nr)"
- R.assign "outcome", outcome
- R.eval "data = data.frame(experiment_nr,outcome)"
- # one-way ANOVA
- R.eval "fit = aov(outcome ~ experiment_nr, data=data,na.action='na.omit')"
- # http://stackoverflow.com/questions/3366506/extract-p-value-from-aov
- p_value = R.eval("summary(fit)[[1]][['Pr(>F)']][[1]]").to_ruby
- # aequivalent
- # sum = R.eval("summary(fit)")
- #p_value = sum.to_ruby.first.last.first
- rescue
- p_value = nil
- end
- report[:results][dataset][:anova][param] = p_value
-=begin
-=end
- end
- end
- report
- end
-
- def summary
- report[:results].collect{|dataset,data| {dataset => data[:anova].select{|param,p_val| p_val < 0.1}}}
- end
- end
-
-end
diff --git a/lib/feature.rb b/lib/feature.rb
index 0ca4d41..f811aef 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -8,10 +8,14 @@ module OpenTox
field :unit, type: String
field :conditions, type: Hash
+ # Is it a nominal feature
+ # @return [TrueClass,FalseClass]
def nominal?
self.class == NominalFeature
end
+ # Is it a numeric feature
+ # @return [TrueClass,FalseClass]
def numeric?
self.class == NumericFeature
end
@@ -30,6 +34,9 @@ module OpenTox
class Smarts < NominalFeature
field :smarts, type: String
index "smarts" => 1
+ # Create feature from SMARTS string
+ # @param [String]
+ # @return [OpenTox::Feature]
def self.from_smarts smarts
self.find_or_create_by :smarts => smarts
end
diff --git a/lib/feature_selection.rb b/lib/feature_selection.rb
index 65f9752..c596b1f 100644
--- a/lib/feature_selection.rb
+++ b/lib/feature_selection.rb
@@ -1,13 +1,16 @@
module OpenTox
module Algorithm
+ # Feature selection algorithms
class FeatureSelection
+ # Select features correlated to the models prediction feature
+ # @param [OpenTox::Model::Lazar]
def self.correlation_filter model
relevant_features = {}
R.assign "dependent", model.dependent_variables.collect{|v| to_r(v)}
model.descriptor_weights = []
- selected_variables = []
+ selected_variables = []
selected_descriptor_ids = []
model.independent_variables.each_with_index do |v,i|
v.collect!{|n| to_r(n)}
diff --git a/lib/import.rb b/lib/import.rb
index 7a68335..fd00fbe 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -1,12 +1,14 @@
module OpenTox
+ # Import data from external databases
module Import
class Enanomapper
include OpenTox
- # time critical step: JSON parsing (>99%), Oj brings only minor speed gains (~1%)
+ # Import from eNanoMapper
def self.import
+ # time critical step: JSON parsing (>99%), Oj brings only minor speed gains (~1%)
datasets = {}
bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
bundles.each do |bundle|
diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 06db4d2..73d5f8b 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -1,25 +1,36 @@
module OpenTox
+ # Nanoparticles
class Nanoparticle < Substance
include OpenTox
field :core_id, type: String, default: nil
field :coating_ids, type: Array, default: []
+ # Get core compound
+ # @return [OpenTox::Compound]
def core
Compound.find core_id
end
+ # Get coatings
+ # @return [Array<OpenTox::Compound>]
def coating
coating_ids.collect{|i| Compound.find i }
end
+ # Get nanoparticle fingerprint (union of core and coating fingerprints)
+ # @param [String] fingerprint type
+ # @return [Array<String>]
def fingerprint type=DEFAULT_FINGERPRINT
core_fp = core.fingerprint type
coating_fp = coating.collect{|c| c.fingerprint type}.flatten.uniq.compact
(core_fp.empty? or coating_fp.empty?) ? [] : (core_fp+coating_fp).uniq.compact
end
+ # Calculate physchem properties
+ # @param [Array<Hash>] list of descriptors
+ # @return [Array<Float>]
def calculate_properties descriptors=PhysChem::OPENBABEL
if core.smiles and !coating.collect{|c| c.smiles}.compact.empty?
core_prop = core.calculate_properties descriptors
@@ -28,6 +39,10 @@ module OpenTox
end
end
+ # Add (measured) feature values
+ # @param [OpenTox::Feature]
+ # @param [TrueClass,FalseClass,Float]
+ # @param [OpenTox::Dataset]
def add_feature feature, value, dataset
unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand
case feature.category
@@ -55,6 +70,10 @@ module OpenTox
end
end
+ # Parse values from Ambit database
+ # @param [OpenTox::Feature]
+ # @param [TrueClass,FalseClass,Float]
+ # @param [OpenTox::Dataset]
def parse_ambit_value feature, v, dataset
# TODO add study id to warnings
v.delete "unit"
diff --git a/lib/overwrite.rb b/lib/overwrite.rb
index 31d30c9..91bc9e1 100644
--- a/lib/overwrite.rb
+++ b/lib/overwrite.rb
@@ -2,41 +2,51 @@ require "base64"
class Object
# An object is blank if it's false, empty, or a whitespace string.
# For example, "", " ", +nil+, [], and {} are all blank.
+ # @return [TrueClass,FalseClass]
def blank?
respond_to?(:empty?) ? empty? : !self
end
+ # Is it a numeric object
+ # @return [TrueClass,FalseClass]
def numeric?
true if Float(self) rescue false
end
# Returns dimension of nested arrays
+ # @return [Fixnum]
def dimension
self.class == Array ? 1 + self[0].dimension : 0
end
end
class Numeric
+ # Convert number to percent
+ # @return [Float]
def percent_of(n)
self.to_f / n.to_f * 100.0
end
end
class Float
- # round to n significant digits
- # http://stackoverflow.com/questions/8382619/how-to-round-a-float-to-a-specified-number-of-significant-digits-in-ruby
+ # Round to n significant digits
+ # http://stackoverflow.com/questions/8382619/how-to-round-a-float-to-a-specified-number-of-significant-digits-in-ruby
+ # @param [Fixnum]
+ # @return [Float]
def signif(n)
Float("%.#{n}g" % self)
end
- # converts -10 logarithmized values back
+ # Convert -10 log values to original values
+ # @return [Float]
def delog10
10**(-1*self)
end
end
module Enumerable
- # @return [Array] only the duplicates of an enumerable
+ # Get duplicates
+ # @return [Array]
def duplicates
inject({}) {|h,v| h[v]=h[v].to_i+1; h}.reject{|k,v| v==1}.keys
end
@@ -51,7 +61,10 @@ module Enumerable
end
class String
- # @return [String] converts camel-case to underscore-case (OpenTox::SuperModel -> open_tox/super_model)
+ # Convert camel-case to underscore-case
+ # @example
+ # OpenTox::SuperModel -> open_tox/super_model
+ # @return [String]
def underscore
self.gsub(/::/, '/').
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
@@ -60,7 +73,7 @@ class String
downcase
end
- # convert strings to boolean values
+ # Convert strings to boolean values
# @return [TrueClass,FalseClass] true or false
def to_boolean
return true if self == true || self =~ (/(true|t|yes|y|1)$/i)
@@ -71,7 +84,8 @@ class String
end
class File
- # @return [String] mime_type including charset using linux cmd command
+ # Get mime_type including charset using linux file command
+ # @return [String]
def mime_type
`file -ib '#{self.path}'`.chomp
end
@@ -79,7 +93,7 @@ end
class Array
- # Sum up the size of single arrays in an array of arrays
+ # Sum the size of single arrays in an array of arrays
# @param [Array] Array of arrays
# @return [Integer] Sum of size of array elements
def sum_size
@@ -92,33 +106,43 @@ class Array
}
end
- # For symbolic features
+ # Check if the array has just one unique value.
# @param [Array] Array to test.
- # @return [Boolean] Whether the array has just one unique value.
+ # @return [TrueClass,FalseClass]
def zero_variance?
return self.uniq.size == 1
end
+ # Get the median of an array
+ # @return [Numeric]
def median
sorted = self.sort
len = sorted.length
(sorted[(len - 1) / 2] + sorted[len / 2]) / 2.0
end
+ # Get the mean of an array
+ # @return [Numeric]
def mean
self.compact.inject{ |sum, el| sum + el }.to_f / self.compact.size
end
+ # Get the variance of an array
+ # @return [Numeric]
def sample_variance
m = self.mean
sum = self.compact.inject(0){|accum, i| accum +(i-m)**2 }
sum/(self.compact.length - 1).to_f
end
+ # Get the standard deviation of an array
+ # @return [Numeric]
def standard_deviation
Math.sqrt(self.sample_variance)
end
+ # Convert array values for R
+ # @return [Array]
def for_R
if self.first.is_a?(String)
#"\"#{self.collect{|v| v.sub('[','').sub(']','')}.join(" ")}\"" # quote and remove square brackets
@@ -128,6 +152,8 @@ class Array
end
end
+ # Collect array with index
+ # in analogy to each_with_index
def collect_with_index
result = []
self.each_with_index do |elt, idx|
@@ -139,11 +165,15 @@ end
module URI
+ # Is it a https connection
+ # @param [String]
+ # @return [TrueClass,FalseClass]
def self.ssl? uri
URI.parse(uri).instance_of? URI::HTTPS
end
- # @return [Boolean] checks if resource exists by making a HEAD-request
+ # Check if a http resource exists by making a HEAD-request
+ # @return [TrueClass,FalseClass]
def self.accessible?(uri)
parsed_uri = URI.parse(uri + (OpenTox::RestClientWrapper.subjectid ? "?subjectid=#{CGI.escape OpenTox::RestClientWrapper.subjectid}" : ""))
http_code = URI.task?(uri) ? 600 : 400
@@ -163,6 +193,9 @@ module URI
false
end
+ # Is the URI valid
+ # @param [String]
+ # @return [TrueClass,FalseClass]
def self.valid? uri
u = URI.parse(uri)
u.scheme!=nil and u.host!=nil
@@ -170,6 +203,8 @@ module URI
false
end
+ # Is the URI a task URI
+ # @param [String]
def self.task? uri
uri =~ /task/ and URI.valid? uri
end
diff --git a/lib/physchem.rb b/lib/physchem.rb
index 327acd8..07df867 100644
--- a/lib/physchem.rb
+++ b/lib/physchem.rb
@@ -39,6 +39,9 @@ module OpenTox
require_relative "unique_descriptors.rb"
+ # Get descriptor features
+ # @param [Hash]
+ # @return [Array<OpenTox::PhysChem>]
def self.descriptors desc=DESCRIPTORS
desc.collect do |name,description|
lib,desc = name.split('.',2)
@@ -46,6 +49,8 @@ module OpenTox
end
end
+ # Get unique descriptor features
+ # @return [Array<OpenTox::PhysChem>]
def self.unique_descriptors
udesc = []
UNIQUEDESCRIPTORS.each do |name|
@@ -64,23 +69,28 @@ module OpenTox
udesc
end
+ # Get OpenBabel descriptor features
+ # @return [Array<OpenTox::PhysChem>]
def self.openbabel_descriptors
descriptors OPENBABEL
end
+ # Get CDK descriptor features
+ # @return [Array<OpenTox::PhysChem>]
def self.cdk_descriptors
descriptors CDK
end
+ # Get JOELIB descriptor features
+ # @return [Array<OpenTox::PhysChem>]
def self.joelib_descriptors
descriptors JOELIB
end
- def calculate compound
- result = send library.downcase,descriptor,compound
- result[self.name]
- end
-
+ # Calculate OpenBabel descriptors
+ # @param [String] descriptor type
+ # @param [OpenTox::Compound]
+ # @return [Hash]
def openbabel descriptor, compound
obdescriptor = OpenBabel::OBDescriptor.find_type descriptor
obmol = OpenBabel::OBMol.new
@@ -90,10 +100,18 @@ module OpenTox
{"#{library.capitalize}.#{descriptor}" => fix_value(obdescriptor.predict(obmol))}
end
+ # Calculate CDK descriptors
+ # @param [String] descriptor type
+ # @param [OpenTox::Compound]
+ # @return [Hash]
def cdk descriptor, compound
java_descriptor "cdk", descriptor, compound
end
+ # Calculate JOELIB descriptors
+ # @param [String] descriptor type
+ # @param [OpenTox::Compound]
+ # @return [Hash]
def joelib descriptor, compound
java_descriptor "joelib", descriptor, compound
end
diff --git a/lib/regression.rb b/lib/regression.rb
index 3890987..fd2855f 100644
--- a/lib/regression.rb
+++ b/lib/regression.rb
@@ -1,8 +1,13 @@
module OpenTox
module Algorithm
+ # Regression algorithms
class Regression
+ # Weighted average
+ # @param [Array<TrueClass,FalseClass>] dependent_variables
+ # @param [Array<Float>] weights
+ # @return [Hash]
def self.weighted_average dependent_variables:, independent_variables:nil, weights:, query_variables:nil
# TODO: prediction_interval
weighted_sum = 0.0
diff --git a/lib/rest-client-wrapper.rb b/lib/rest-client-wrapper.rb
index 2073be2..f76a296 100644
--- a/lib/rest-client-wrapper.rb
+++ b/lib/rest-client-wrapper.rb
@@ -1,5 +1,6 @@
module OpenTox
+ # Adjustments to the rest-client gem for OpenTox
class RestClientWrapper
attr_accessor :request, :response
diff --git a/lib/similarity.rb b/lib/similarity.rb
index 0901936..ccbc9d6 100644
--- a/lib/similarity.rb
+++ b/lib/similarity.rb
@@ -2,6 +2,10 @@ module OpenTox
module Algorithm
class Vector
+ # Get dot product
+ # @param [Vector]
+ # @param [Vector]
+ # @return [Numeric]
def self.dot_product(a, b)
products = a.zip(b).map{|a, b| a * b}
products.inject(0) {|s,p| s + p}
@@ -15,6 +19,9 @@ module OpenTox
class Similarity
+ # Get Tanimoto similarity
+ # @param [Array<Array<Float>>]
+ # @return [Float]
def self.tanimoto fingerprints
( fingerprints[0] & fingerprints[1]).size/(fingerprints[0]|fingerprints[1]).size.to_f
end
@@ -23,18 +30,28 @@ module OpenTox
#( fingerprints[0] & fingerprints[1]).size/(fingerprints[0]|fingerprints[1]).size.to_f
#end
+ # Get Euclidean distance
+ # @param [Array<Array<Float>>]
+ # @return [Float]
def self.euclid scaled_properties
sq = scaled_properties[0].zip(scaled_properties[1]).map{|a,b| (a - b) ** 2}
Math.sqrt(sq.inject(0) {|s,c| s + c})
end
- # http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity
+ # Get cosine similarity
+ # http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity
+ # @param [Array<Array<Float>>]
+ # @return [Float]
def self.cosine scaled_properties
scaled_properties = remove_nils scaled_properties
Algorithm::Vector.dot_product(scaled_properties[0], scaled_properties[1]) / (Algorithm::Vector.magnitude(scaled_properties[0]) * Algorithm::Vector.magnitude(scaled_properties[1]))
end
- def self.weighted_cosine scaled_properties # [a,b,weights]
+ # Get weighted cosine similarity
+ # http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity
+ # @param [Array<Array<Float>>] [a,b,weights]
+ # @return [Float]
+ def self.weighted_cosine scaled_properties
a,b,w = remove_nils scaled_properties
return cosine(scaled_properties) if w.uniq.size == 1
dot_product = 0
@@ -48,6 +65,9 @@ module OpenTox
dot_product/(Math.sqrt(magnitude_a)*Math.sqrt(magnitude_b))
end
+ # Remove nil values
+ # @param [Array<Array<Float>>] [a,b,weights]
+ # @return [Array<Array<Float>>] [a,b,weights]
def self.remove_nils scaled_properties
a =[]; b = []; w = []
(0..scaled_properties.first.size-1).each do |i|
diff --git a/lib/substance.rb b/lib/substance.rb
index 31c465e..ef49659 100644
--- a/lib/substance.rb
+++ b/lib/substance.rb
@@ -1,5 +1,6 @@
module OpenTox
+ # Base class for substances (e.g. compunds, nanoparticles)
class Substance
field :properties, type: Hash, default: {}
field :dataset_ids, type: Array, default: []