From c90644211e214a50f6fdb3a936bf247f45f1f4be Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 13 May 2016 13:38:24 +0200 Subject: compound tests fixed --- lib/similarity.rb | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 lib/similarity.rb (limited to 'lib/similarity.rb') diff --git a/lib/similarity.rb b/lib/similarity.rb new file mode 100644 index 0000000..f25d4c3 --- /dev/null +++ b/lib/similarity.rb @@ -0,0 +1,46 @@ +module OpenTox + module Algorithm + + class Vector + def self.dot_product(a, b) + products = a.zip(b).map{|a, b| a * b} + products.inject(0) {|s,p| s + p} + end + + def self.magnitude(point) + squares = point.map{|x| x ** 2} + Math.sqrt(squares.inject(0) {|s, c| s + c}) + end + end + + class Similarity + + def self.tanimoto a, b + ( a & b).size/(a|b).size.to_f + end + + def self.euclid a, b + sq = a.zip(b).map{|a,b| (a - b) ** 2} + Math.sqrt(sq.inject(0) {|s,c| s + c}) + end + + # http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity + def self.cosine a, b + Algorithm::Vector.dot_product(a, b) / (Algorithm::Vector.magnitude(a) * Algorithm::Vector.magnitude(b)) + end + + def self.weighted_cosine(a, b, w) + dot_product = 0 + magnitude_a = 0 + magnitude_b = 0 + (0..a.size-1).each do |i| + dot_product += w[i].abs*a[i]*b[i] + magnitude_a += w[i].abs*a[i]**2 + magnitude_b += w[i].abs*b[i]**2 + end + dot_product/Math.sqrt(magnitude_a*magnitude_b) + end + + end + end +end -- cgit v1.2.3 From f46ba3b7262f5b551c81fc9396c5b7f0cac7f030 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 27 May 2016 19:16:16 +0200 Subject: first correlation of nanoparticle predictions --- lib/similarity.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/similarity.rb') diff --git a/lib/similarity.rb b/lib/similarity.rb index f25d4c3..00179c1 100644 --- a/lib/similarity.rb +++ b/lib/similarity.rb @@ -38,7 +38,7 @@ module OpenTox magnitude_a += w[i].abs*a[i]**2 magnitude_b += w[i].abs*b[i]**2 end - dot_product/Math.sqrt(magnitude_a*magnitude_b) + dot_product/(Math.sqrt(magnitude_a)*Math.sqrt(magnitude_b)) end end -- cgit v1.2.3 From 5d4e5e463c2b87241bbb56e4658e1e26c0ed084f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 5 Oct 2016 13:22:12 +0200 Subject: substance and nanoparticle model creation and predictions --- lib/similarity.rb | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'lib/similarity.rb') diff --git a/lib/similarity.rb b/lib/similarity.rb index 00179c1..b9b4571 100644 --- a/lib/similarity.rb +++ b/lib/similarity.rb @@ -15,21 +15,22 @@ module OpenTox class Similarity - def self.tanimoto a, b - ( a & b).size/(a|b).size.to_f + def self.tanimoto fingerprints + ( fingerprints[0] & fingerprints[1]).size/(fingerprints[0]|fingerprints[1]).size.to_f end - def self.euclid a, b - sq = a.zip(b).map{|a,b| (a - b) ** 2} + def self.euclid fingerprints + sq = fingerprints[0].zip(fingerprints[1]).map{|a,b| (a - b) ** 2} Math.sqrt(sq.inject(0) {|s,c| s + c}) end # http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity - def self.cosine a, b - Algorithm::Vector.dot_product(a, b) / (Algorithm::Vector.magnitude(a) * Algorithm::Vector.magnitude(b)) + def self.cosine fingerprints + Algorithm::Vector.dot_product(fingerprints[0], fingerprints[1]) / (Algorithm::Vector.magnitude(fingerprints[0]) * Algorithm::Vector.magnitude(fingerprints[1])) end - def self.weighted_cosine(a, b, w) + def self.weighted_cosine fingerprints # [a,b,weights] + a, b, w = fingerprints dot_product = 0 magnitude_a = 0 magnitude_b = 0 -- cgit v1.2.3 From dc4ab1f4e64d738d6c0b70f0b690a2359685080f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 12 Oct 2016 21:32:27 +0200 Subject: physchem regression, correlation_filter for fingerprints --- lib/similarity.rb | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'lib/similarity.rb') diff --git a/lib/similarity.rb b/lib/similarity.rb index b9b4571..328d42a 100644 --- a/lib/similarity.rb +++ b/lib/similarity.rb @@ -19,18 +19,19 @@ module OpenTox ( fingerprints[0] & fingerprints[1]).size/(fingerprints[0]|fingerprints[1]).size.to_f end - def self.euclid fingerprints - sq = fingerprints[0].zip(fingerprints[1]).map{|a,b| (a - b) ** 2} + def self.euclid scaled_properties + sq = scaled_properties[0].zip(scaled_properties[1]).map{|a,b| (a - b) ** 2} Math.sqrt(sq.inject(0) {|s,c| s + c}) end # http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity - def self.cosine fingerprints - Algorithm::Vector.dot_product(fingerprints[0], fingerprints[1]) / (Algorithm::Vector.magnitude(fingerprints[0]) * Algorithm::Vector.magnitude(fingerprints[1])) + def self.cosine scaled_properties + scaled_properties = remove_nils scaled_properties + Algorithm::Vector.dot_product(scaled_properties[0], scaled_properties[1]) / (Algorithm::Vector.magnitude(scaled_properties[0]) * Algorithm::Vector.magnitude(scaled_properties[1])) end - def self.weighted_cosine fingerprints # [a,b,weights] - a, b, w = fingerprints + def self.weighted_cosine scaled_properties # [a,b,weights] + a,b,w = remove_nils scaled_properties dot_product = 0 magnitude_a = 0 magnitude_b = 0 @@ -42,6 +43,18 @@ module OpenTox dot_product/(Math.sqrt(magnitude_a)*Math.sqrt(magnitude_b)) end + def self.remove_nils scaled_properties + a =[]; b = []; w = [] + (0..scaled_properties.first.size-1).each do |i| + if scaled_properties[0][i] and scaled_properties[1][i] and !scaled_properties[0][i].nan? and !scaled_properties[1][i].nan? + a << scaled_properties[0][i] + b << scaled_properties[1][i] + w << scaled_properties[2][i] + end + end + [a,b,w] + end + end end end -- cgit v1.2.3 From 160e75e696452ac61e651664ac56d16ce1c9c4b6 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 13 Oct 2016 19:17:03 +0200 Subject: model tests separated and cleaned --- lib/similarity.rb | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/similarity.rb') diff --git a/lib/similarity.rb b/lib/similarity.rb index 328d42a..772e812 100644 --- a/lib/similarity.rb +++ b/lib/similarity.rb @@ -32,6 +32,7 @@ module OpenTox def self.weighted_cosine scaled_properties # [a,b,weights] a,b,w = remove_nils scaled_properties + return cosine(scaled_properties) if w.uniq.size == 1 dot_product = 0 magnitude_a = 0 magnitude_b = 0 -- cgit v1.2.3 From 9a06f2ff5ae6bdbe7dc90555599e186f1585e0d2 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 10 Nov 2016 15:27:26 +0100 Subject: Model::NanoPrediction parameters --- lib/similarity.rb | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/similarity.rb') diff --git a/lib/similarity.rb b/lib/similarity.rb index 772e812..0901936 100644 --- a/lib/similarity.rb +++ b/lib/similarity.rb @@ -19,6 +19,10 @@ module OpenTox ( fingerprints[0] & fingerprints[1]).size/(fingerprints[0]|fingerprints[1]).size.to_f end + #def self.weighted_tanimoto fingerprints + #( fingerprints[0] & fingerprints[1]).size/(fingerprints[0]|fingerprints[1]).size.to_f + #end + def self.euclid scaled_properties sq = scaled_properties[0].zip(scaled_properties[1]).map{|a,b| (a - b) ** 2} Math.sqrt(sq.inject(0) {|s,c| s + c}) -- cgit v1.2.3