From c90644211e214a50f6fdb3a936bf247f45f1f4be Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 13 May 2016 13:38:24 +0200 Subject: compound tests fixed --- lib/similarity.rb | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 lib/similarity.rb (limited to 'lib/similarity.rb') diff --git a/lib/similarity.rb b/lib/similarity.rb new file mode 100644 index 0000000..f25d4c3 --- /dev/null +++ b/lib/similarity.rb @@ -0,0 +1,46 @@ +module OpenTox + module Algorithm + + class Vector + def self.dot_product(a, b) + products = a.zip(b).map{|a, b| a * b} + products.inject(0) {|s,p| s + p} + end + + def self.magnitude(point) + squares = point.map{|x| x ** 2} + Math.sqrt(squares.inject(0) {|s, c| s + c}) + end + end + + class Similarity + + def self.tanimoto a, b + ( a & b).size/(a|b).size.to_f + end + + def self.euclid a, b + sq = a.zip(b).map{|a,b| (a - b) ** 2} + Math.sqrt(sq.inject(0) {|s,c| s + c}) + end + + # http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity + def self.cosine a, b + Algorithm::Vector.dot_product(a, b) / (Algorithm::Vector.magnitude(a) * Algorithm::Vector.magnitude(b)) + end + + def self.weighted_cosine(a, b, w) + dot_product = 0 + magnitude_a = 0 + magnitude_b = 0 + (0..a.size-1).each do |i| + dot_product += w[i].abs*a[i]*b[i] + magnitude_a += w[i].abs*a[i]**2 + magnitude_b += w[i].abs*b[i]**2 + end + dot_product/Math.sqrt(magnitude_a*magnitude_b) + end + + end + end +end -- cgit v1.2.3