summaryrefslogtreecommitdiff
path: root/lib/similarity.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/similarity.rb')
-rw-r--r--lib/similarity.rb25
1 files changed, 19 insertions, 6 deletions
diff --git a/lib/similarity.rb b/lib/similarity.rb
index b9b4571..328d42a 100644
--- a/lib/similarity.rb
+++ b/lib/similarity.rb
@@ -19,18 +19,19 @@ module OpenTox
( fingerprints[0] & fingerprints[1]).size/(fingerprints[0]|fingerprints[1]).size.to_f
end
- def self.euclid fingerprints
- sq = fingerprints[0].zip(fingerprints[1]).map{|a,b| (a - b) ** 2}
+ def self.euclid scaled_properties
+ sq = scaled_properties[0].zip(scaled_properties[1]).map{|a,b| (a - b) ** 2}
Math.sqrt(sq.inject(0) {|s,c| s + c})
end
# http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity
- def self.cosine fingerprints
- Algorithm::Vector.dot_product(fingerprints[0], fingerprints[1]) / (Algorithm::Vector.magnitude(fingerprints[0]) * Algorithm::Vector.magnitude(fingerprints[1]))
+ def self.cosine scaled_properties
+ scaled_properties = remove_nils scaled_properties
+ Algorithm::Vector.dot_product(scaled_properties[0], scaled_properties[1]) / (Algorithm::Vector.magnitude(scaled_properties[0]) * Algorithm::Vector.magnitude(scaled_properties[1]))
end
- def self.weighted_cosine fingerprints # [a,b,weights]
- a, b, w = fingerprints
+ def self.weighted_cosine scaled_properties # [a,b,weights]
+ a,b,w = remove_nils scaled_properties
dot_product = 0
magnitude_a = 0
magnitude_b = 0
@@ -42,6 +43,18 @@ module OpenTox
dot_product/(Math.sqrt(magnitude_a)*Math.sqrt(magnitude_b))
end
+ def self.remove_nils scaled_properties
+ a =[]; b = []; w = []
+ (0..scaled_properties.first.size-1).each do |i|
+ if scaled_properties[0][i] and scaled_properties[1][i] and !scaled_properties[0][i].nan? and !scaled_properties[1][i].nan?
+ a << scaled_properties[0][i]
+ b << scaled_properties[1][i]
+ w << scaled_properties[2][i]
+ end
+ end
+ [a,b,w]
+ end
+
end
end
end