diff options
author | Christoph Helma <helma@in-silico.ch> | 2016-10-12 21:32:27 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2016-10-12 21:32:27 +0200 |
commit | dc4ab1f4e64d738d6c0b70f0b690a2359685080f (patch) | |
tree | 054ae887bf978b519a95dce5dbead59bbc67a2bb /lib/similarity.rb | |
parent | 1ec5ad2c67f270287499980a794e51bc9a6bbd84 (diff) |
physchem regression, correlation_filter for fingerprints
Diffstat (limited to 'lib/similarity.rb')
-rw-r--r-- | lib/similarity.rb | 25 |
1 files changed, 19 insertions, 6 deletions
diff --git a/lib/similarity.rb b/lib/similarity.rb index b9b4571..328d42a 100644 --- a/lib/similarity.rb +++ b/lib/similarity.rb @@ -19,18 +19,19 @@ module OpenTox ( fingerprints[0] & fingerprints[1]).size/(fingerprints[0]|fingerprints[1]).size.to_f end - def self.euclid fingerprints - sq = fingerprints[0].zip(fingerprints[1]).map{|a,b| (a - b) ** 2} + def self.euclid scaled_properties + sq = scaled_properties[0].zip(scaled_properties[1]).map{|a,b| (a - b) ** 2} Math.sqrt(sq.inject(0) {|s,c| s + c}) end # http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity - def self.cosine fingerprints - Algorithm::Vector.dot_product(fingerprints[0], fingerprints[1]) / (Algorithm::Vector.magnitude(fingerprints[0]) * Algorithm::Vector.magnitude(fingerprints[1])) + def self.cosine scaled_properties + scaled_properties = remove_nils scaled_properties + Algorithm::Vector.dot_product(scaled_properties[0], scaled_properties[1]) / (Algorithm::Vector.magnitude(scaled_properties[0]) * Algorithm::Vector.magnitude(scaled_properties[1])) end - def self.weighted_cosine fingerprints # [a,b,weights] - a, b, w = fingerprints + def self.weighted_cosine scaled_properties # [a,b,weights] + a,b,w = remove_nils scaled_properties dot_product = 0 magnitude_a = 0 magnitude_b = 0 @@ -42,6 +43,18 @@ module OpenTox dot_product/(Math.sqrt(magnitude_a)*Math.sqrt(magnitude_b)) end + def self.remove_nils scaled_properties + a =[]; b = []; w = [] + (0..scaled_properties.first.size-1).each do |i| + if scaled_properties[0][i] and scaled_properties[1][i] and !scaled_properties[0][i].nan? and !scaled_properties[1][i].nan? + a << scaled_properties[0][i] + b << scaled_properties[1][i] + w << scaled_properties[2][i] + end + end + [a,b,w] + end + end end end |