blob: 346a342b091df646809456284d9e1b3fa6df0567 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
|
class Distance
# Get Euclidean distance
# @param [Array<Array<Float>>]
# @return [Float]
def self.euclid variables
sq = variables[0].zip(variables[1]).map{|a,b| (a - b) ** 2}
Math.sqrt(sq.inject(0) {|s,c| s + c})
end
end
class Similarity
# Get Tanimoto similarity
# @param [Array<Array<String>>]
# @return [Float]
def self.tanimoto fingerprints
( fingerprints[0] & fingerprints[1] ).size/( fingerprints[0] | fingerprints[1] ).size.to_f
end
# Get cosine similarity
# http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity
# @param [Array<Array<Float>>]
# @return [Float]
def self.cosine variables
variables[0].dot_product(variables[1]) / (variables[0].magnitude * variables[1].magnitude)
end
=begin
# Get weighted cosine similarity
# http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity
# @param [Array<Array<Float>>] [a,b,weights]
# @return [Float]
def self.weighted_cosine scaled_properties
a,b,w = remove_nils scaled_properties
return cosine(scaled_properties) if w.uniq.size == 1
dot_product = 0
magnitude_a = 0
magnitude_b = 0
(0..a.size-1).each do |i|
dot_product += w[i].abs*a[i]*b[i]
magnitude_a += w[i].abs*a[i]**2
magnitude_b += w[i].abs*b[i]**2
end
dot_product/(Math.sqrt(magnitude_a)*Math.sqrt(magnitude_b))
end
# Remove nil values
# @param [Array<Array<Float>>] [a,b,weights]
# @return [Array<Array<Float>>] [a,b,weights]
def self.remove_nils scaled_properties
a =[]; b = []; w = []
(0..scaled_properties.first.size-1).each do |i|
if scaled_properties[0][i] and scaled_properties[1][i] and !scaled_properties[0][i].nan? and !scaled_properties[1][i].nan?
a << scaled_properties[0][i]
b << scaled_properties[1][i]
w << scaled_properties[2][i]
end
end
[a,b,w]
end
=end
end
|