diff options
Diffstat (limited to 'transform.rb')
-rw-r--r-- | transform.rb | 243 |
1 files changed, 198 insertions, 45 deletions
diff --git a/transform.rb b/transform.rb index a00f87d..5754393 100644 --- a/transform.rb +++ b/transform.rb @@ -2,54 +2,207 @@ require 'rubygems' require 'opentox-ruby' require 'test/unit' +class Float + def round_to(x) + (self * 10**x).round.to_f / 10**x + end +end class TransformTest < Test::Unit::TestCase -def test_mlr - 2.times { - n_prop = [ [1,1], [2,2], [3,3] ] # erste WH - acts = [ 3,2,3 ] # should yield a constant y=2.8 - sims = [ 4,2,4 ] # move constant closer to 3.0 - q_prop = [0.5,0.5] # extrapolation - params={:n_prop => n_prop, :q_prop => q_prop, :sims => sims, :acts => acts} - - prediction = OpenTox::Algorithm::Neighbors.mlr(params) - assert_in_delta prediction, 2.8, 1.0E-10 # small deviations, don't know why - - q_prop = [1.5,1.5] # interpolation - prediction = OpenTox::Algorithm::Neighbors.mlr(params) - assert_in_delta prediction, 2.8, 1.0E-10 # small deviations, don't know why - } -end + #def test_mlr + # 2.times { + # n_prop = [ [1,1], [2,2], [3,3] ] # erste WH + # acts = [ 3,2,3 ] # should yield a constant y=2.8 + # sims = [ 4,2,4 ] # move constant closer to 3.0 + # q_prop = [0.5,0.5] # extrapolation + # params={:n_prop => n_prop, :q_prop => q_prop, :sims => sims, :acts => acts} + # + # prediction = OpenTox::Algorithm::Neighbors.mlr(params) + # assert_in_delta prediction, 2.8, 1.0E-10 # small deviations, don't know why + # + # q_prop = [1.5,1.5] # interpolation + # prediction = OpenTox::Algorithm::Neighbors.mlr(params) + # assert_in_delta prediction, 2.8, 1.0E-10 # small deviations, don't know why + # } + #end + + def test_pca + + d = GSL::Matrix.alloc([1.0, -5, 1.1, 2.0, -5, 1.9, 3.0, -5, 3.3], 3, 3) # 2nd col is const -5, gets removed + rd = GSL::Matrix.alloc([1.0, 1.1, 1.9, 2.0, 3.1, 3.2], 3, 2) + td = GSL::Matrix.alloc([-1.4142135623731, -0.14142135623731, 1.5556349186104],3,1) + ev = GSL::Matrix.alloc([0.707106781186548, 0.707106781186548], 2, 1) + + # Lossy + 2.times do # repeat to ensure idempotency + pca = OpenTox::Transform::PCA.new(d, 0.05) + assert_equal pca.data_matrix, d + assert_equal pca.data_transformed_matrix, td + assert_equal pca.transform(d), td + assert_equal pca.eigenvector_matrix, ev + assert_equal pca.restore, rd + end + + rd = GSL::Matrix.alloc([1.0, 1.1, 2.0, 1.9, 3.0, 3.3], 3, 2) # 2nd col of d is const -5, gets removed on rd + td = GSL::Matrix.alloc([-1.4142135623731, -7.84962372879505e-17, -0.14142135623731, -0.14142135623731, 1.5556349186104, 0.141421356237309],3,2) + ev = GSL::Matrix.alloc([0.707106781186548, -0.707106781186548, 0.707106781186548, 0.707106781186548], 2, 2) + + # Lossless + 2.times do + pca = OpenTox::Transform::PCA.new(d, 0.0) + assert_equal pca.data_matrix, d + assert_equal pca.data_transformed_matrix, td + assert_equal pca.transform(d), td + assert_equal pca.eigenvector_matrix, ev + assert_equal pca.restore, rd + end + + rd = GSL::Matrix.alloc([1.0, 1.1, 1.9, 2.0, 3.1, 3.2], 3, 2) + td = GSL::Matrix.alloc([-1.4142135623731, -0.14142135623731, 1.5556349186104],3,1) + ev = GSL::Matrix.alloc([0.707106781186548, 0.707106781186548], 2, 1) + # Lossy, but using maxcols constraint + 2.times do + pca = OpenTox::Transform::PCA.new(d, 0.0, 1) # 1 column + assert_equal pca.data_matrix, d + assert_equal pca.data_transformed_matrix, td + assert_equal pca.transform(d), td + assert_equal pca.eigenvector_matrix, ev + assert_equal pca.restore, rd + end + + + end + + def test_svd + + m = GSL::Matrix[ + [5,5,0,5], + [5,0,3,4], + [3,4,0,3], + [0,0,5,3], + [5,4,4,5], + [5,4,5,5] + ] + + + foo = GSL::Matrix[[5,5,3,0,5,5]] + bar = GSL::Matrix[[5,4,5,5]] + + + + # AutoScale (mean and center) to improve on representation + nr_cases, nr_features = m.size1, m.size2 + (0..nr_features-1).each { |i| + autoscaler = OpenTox::Transform::AutoScale.new(m.col(i)) + m.col(i)[0..nr_cases-1] = autoscaler.vs + bar.col(i)[0..0] = autoscaler.transform bar.col(i) + } + autoscaler = OpenTox::Transform::AutoScale.new(foo.transpose.col(0)) + foo = GSL::Matrix[autoscaler.vs] + + + #puts + #puts m.to_a.collect { |r| r.collect{ |v| sprintf("%.2f", v) }.join(", ") }.join("\n") + #puts + #puts foo.to_a.collect { |r| r.collect{ |v| sprintf("%.2f", v) }.join(", ") }.join("\n") + #puts + #puts bar.to_a.collect { |r| r.collect{ |v| sprintf("%.2f", v) }.join(", ") }.join("\n") + + + + # run SVD + svd = OpenTox::Algorithm::Transform::SVD.new m, 0.2 + #puts + #puts svd.restore.to_a.collect { |r| r.collect{ |v| sprintf("%.2f", v) }.join(", ") }.join("\n") + + + #puts + #puts svd.data_transformed_matrix.to_a.collect { |r| r.collect{ |v| sprintf("%.2f", v) }.join(", ") }.join("\n") + + # instance transform + bar = svd.transform bar # alias for svd.transform_instance bar + sim = [] + svd.uk.each_row { |x| + sim << OpenTox::Algorithm::Similarity.cosine_num(x,bar.row(0)) + } + + # # # NO AUTOSCALE + #assert_equal sim[0].round_to(3), 0.346 + #assert_equal sim[1].round_to(3), 0.966 + #assert_equal sim[2].round_to(3), 0.282 + #assert_equal sim[3].round_to(3), 0.599 + #assert_equal sim[4].round_to(3), 0.975 + #assert_equal sim[5].round_to(3), 1.000 + + # # # AUTOSCALE + assert_equal sim[0].round_to(3), -0.115 + assert_equal sim[1].round_to(3), 0.425 + assert_equal sim[2].round_to(3), -0.931 + assert_equal sim[3].round_to(3), -0.352 + assert_equal sim[4].round_to(3), 0.972 + assert_equal sim[5].round_to(3), 1.000 + + + # feature transform, only for demonstration of concept + foo = svd.transform_feature foo + sim = [] + svd.vk.each_row { |x| + sim << OpenTox::Algorithm::Similarity.cosine_num(x,foo.row(0)) + } + + # # # NO AUTOSCALE + #assert_equal sim[0].round_to(3), 1.000 + #assert_equal sim[1].round_to(3), 0.874 + #assert_equal sim[2].round_to(3), 0.064 + #assert_equal sim[3].round_to(3), 0.895 + + # # # AUTOSCALE + assert_equal sim[0].round_to(3), 1.000 + assert_equal sim[1].round_to(3), 0.705 + assert_equal sim[2].round_to(3), 0.023 + assert_equal sim[3].round_to(3), 0.934 + + end + + def test_logas + + d1 = [ 1,2,3 ].to_gv + d2 = [ -1,0,1 ].to_gv + d3 = [ -2,3,8 ].to_gv + d4 = [ -20,30,80 ].to_gv + d5 = [ 0.707, 0.7071].to_gv + + d1la = [ -1.31668596949013, 0.211405021140643, 1.10528094834949 ].to_gv + d2la = d1la + d3la = [ -1.37180016053906, 0.388203523926062, 0.983596636612997 ].to_gv + d4la = [ -1.40084731572532, 0.532435269814955, 0.868412045910369 ].to_gv + d5la = [ -1.0, 1.0 ].to_gv + + 2.times { + + logas = OpenTox::Transform::LogAutoScale.new(d1) + assert_equal logas.vs, d1la + assert_equal logas.restore(logas.vs), d1 + + logas = OpenTox::Transform::LogAutoScale.new(d2) + assert_equal logas.vs, d2la + assert_equal logas.restore(logas.vs), d2 + + logas = OpenTox::Transform::LogAutoScale.new(d3) + assert_equal logas.vs, d3la + assert_equal logas.restore(logas.vs), d3 + + logas = OpenTox::Transform::LogAutoScale.new(d4) + assert_equal logas.vs, d4la + assert_equal logas.restore(logas.vs), d4 -# def test_pca -# -# d = GSL::Matrix.alloc([1,1.1,2,1.9,3,3.3], 3, 2) -# td = GSL::Matrix.alloc([-1.3421074161875, -0.127000127000191, 1.46910754318769],3,1) -# ev = GSL::Matrix.alloc([0.707106781186548, 0.707106781186548], 2, 1) -# rd = GSL::Matrix.alloc([1.05098674493306, 1.043223563717, 1.91019734898661, 2.0, 3.03881590608033, 3.256776436283], 3, 2) -# -# # Lossy -# 2.times do # repeat to ensure idempotency -# pca = OpenTox::Algorithm::Transform::PCA.new(d, 0.05) -# assert_equal pca.data_matrix, d -# assert_equal pca.data_transformed_matrix, td -# assert_equal pca.eigenvector_matrix, ev -# assert_equal pca.restore, rd -# end -# -# td = GSL::Matrix.alloc([-1.3421074161875, 0.0721061461855949, -0.127000127000191, -0.127000127000191, 1.46910754318769, 0.0548939808145955],3,2) -# ev = GSL::Matrix.alloc([0.707106781186548, -0.707106781186548, 0.707106781186548, 0.707106781186548], 2, 2) -# -# # Lossless -# 2.times do -# pca = OpenTox::Algorithm::Transform::PCA.new(d, 0.0) -# assert_equal pca.data_matrix, d -# assert_equal pca.data_transformed_matrix, td -# assert_equal pca.eigenvector_matrix, ev -# assert_equal pca.restore, d -# end -# -# end + logas = OpenTox::Transform::LogAutoScale.new(d5) + assert_equal logas.vs, d5la + assert_equal logas.restore(logas.vs), d5 + + } + + end end |