summaryrefslogtreecommitdiff
path: root/lib/similarity.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-05-13 13:38:24 +0200
committerChristoph Helma <helma@in-silico.ch>2016-05-13 13:38:24 +0200
commitc90644211e214a50f6fdb3a936bf247f45f1f4be (patch)
tree9ae3f0b33feb55f3904c4d7a08e39567223b07aa /lib/similarity.rb
parentb8bb12c8a163c238d7d4387c1914e2100bb660df (diff)
compound tests fixed
Diffstat (limited to 'lib/similarity.rb')
-rw-r--r--lib/similarity.rb46
1 files changed, 46 insertions, 0 deletions
diff --git a/lib/similarity.rb b/lib/similarity.rb
new file mode 100644
index 0000000..f25d4c3
--- /dev/null
+++ b/lib/similarity.rb
@@ -0,0 +1,46 @@
+module OpenTox
+ module Algorithm
+
+ class Vector
+ def self.dot_product(a, b)
+ products = a.zip(b).map{|a, b| a * b}
+ products.inject(0) {|s,p| s + p}
+ end
+
+ def self.magnitude(point)
+ squares = point.map{|x| x ** 2}
+ Math.sqrt(squares.inject(0) {|s, c| s + c})
+ end
+ end
+
+ class Similarity
+
+ def self.tanimoto a, b
+ ( a & b).size/(a|b).size.to_f
+ end
+
+ def self.euclid a, b
+ sq = a.zip(b).map{|a,b| (a - b) ** 2}
+ Math.sqrt(sq.inject(0) {|s,c| s + c})
+ end
+
+ # http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity
+ def self.cosine a, b
+ Algorithm::Vector.dot_product(a, b) / (Algorithm::Vector.magnitude(a) * Algorithm::Vector.magnitude(b))
+ end
+
+ def self.weighted_cosine(a, b, w)
+ dot_product = 0
+ magnitude_a = 0
+ magnitude_b = 0
+ (0..a.size-1).each do |i|
+ dot_product += w[i].abs*a[i]*b[i]
+ magnitude_a += w[i].abs*a[i]**2
+ magnitude_b += w[i].abs*b[i]**2
+ end
+ dot_product/Math.sqrt(magnitude_a*magnitude_b)
+ end
+
+ end
+ end
+end