diff options
-rw-r--r-- | algorithm.rb | 76 | ||||
-rw-r--r-- | lazar.rb | 197 | ||||
-rw-r--r-- | transform.rb | 152 |
3 files changed, 275 insertions, 150 deletions
diff --git a/algorithm.rb b/algorithm.rb index 6cecfca..3bdc6de 100644 --- a/algorithm.rb +++ b/algorithm.rb @@ -76,6 +76,82 @@ class AlgorithmTest < Test::Unit::TestCase assert_in_delta OpenTox::Algorithm::Similarity.tanimoto(features_a, features_c, weights, params), 0.235749338271022, 0.000001 } end + + def test_mlr + n_prop = [ [ -2.0, -2.0 ], + [ -1.0, -1.0 ], + [ 0.0, 0.0 ], + [ 1.0, 1.0 ], + [ 2.0, 2.0 ] ] + + q_prop = [ 1.0, 1.0 ] + + #acts = [ -2.0, + # -1.0, + # 0.0, + # 1.0, + # 2.0 ] + + acts = [ 0.0, + 0.5, + 1.0, + 1.5, + 2.0 ] + + maxcols = 2 + res1 = OpenTox::Algorithm::Neighbors::mlr(:n_prop => n_prop, :q_prop => q_prop, :acts => acts, :maxcols => maxcols) + maxcols = 1 + res2 = OpenTox::Algorithm::Neighbors::mlr(:n_prop => n_prop, :q_prop => q_prop, :acts => acts, :maxcols => maxcols) + assert_in_delta res1, 1.4958008960423, 10E-06 + assert_equal res1, res2 + end + + def test_pcr + n_prop = [ [ -2.0, -2.0 ], + [ -1.0, -1.0 ], + [ 0.0, 0.0 ], + [ 1.0, 1.0 ], + [ 2.0, 2.0 ] ] + + q_prop = [ 1.0, 1.0 ] + + #acts = [ -2.0, + # -1.0, + # 0.0, + # 1.0, + # 2.0 ] + + acts = [ 0.0, + 0.5, + 1.0, + 1.5, + 2.0 ] + + maxcols = 2 + res1 = OpenTox::Algorithm::Neighbors::pcr(:n_prop => n_prop, :q_prop => q_prop, :acts => acts, :maxcols => maxcols) + + maxcols = 1 + res2 = OpenTox::Algorithm::Neighbors::pcr(:n_prop => n_prop, :q_prop => q_prop, :acts => acts, :maxcols => maxcols) + + assert_in_delta res1, 1.4958008960423, 10E-06 + assert_equal res1, res2 + end + + def test_pc_descriptors + ds = OpenTox::Algorithm::Neighbors.get_props_pc({ :neighbors => [ {:compound => "http://toxcreate3.in-silico.ch:8082/compound/InChI=1S/C5H8O2/c1-4(2)5(6)7-3/h1H2,2-3H3"}, {:compound => "http://toxcreate3.in-silico.ch:8082/compound/InChI=1S/C4H8O/c1-4(2)3-5/h3-4H,1-2H3"} ], :compound => "http://toxcreate3.in-silico.ch:8082/compound/InChI=1S/C12H12N2O3/c1-2-12(8-6-4-3-5-7-8)9(15)13-11(17)14-10(12)16/h3-7H,2H2,1H3,(H2,13,14,15,16,17)", :pc_group => "constitutional" } ) + ds[0][0].each_with_index {|v,i| + assert_in_delta v, [1.78999996185303, 0.0, 1.0, 13.0, nil, 3.0, 4.0, 2.0, 0.0, 5.0, 0.0, 0.541899979114532, 0.0, 0.879999995231628, 0.293655604124069, 21.380500793457][i], 0.0001 + } + ds[0][1].each_with_index {|v,i| + assert_in_delta v, [1.78999996185303, 0.0, 2.0, 15.0, nil, 3.0, 6.0, 5.0, 0.0, 7.0, 0.0, 1.16509997844696, 0.0, 0.71399998664856, 1.35745799541473, 26.4176006317139][i], 0.0001 + } + ds[1].each_with_index {|v,i| + assert_in_delta v, [2.23000001907349, 0.0, 2.0, 29.0, nil, 2.0, 18.0, 8.0, 6.0, 2.0, 6.0, -1.12360000610352, 2.0, 3.76099991798401, 1.26247692108154, 34.4664001464844][i], 0.0001 + } + end + + + =begin def test_clustering # Parameters @@ -54,10 +54,10 @@ class LazarTest < Test::Unit::TestCase FileUtils.cp f, reference FileUtils.rm f end - @predictions.each do |dataset| - dataset.delete(@@subjectid) - end - @model.delete(@@subjectid) + #@predictions.each do |dataset| + # dataset.delete(@@subjectid) + #end + #@model.delete(@@subjectid) end =begin @@ -65,7 +65,7 @@ class LazarTest < Test::Unit::TestCase def test_create_regression_model create_model :dataset_uri => @@regression_training_dataset.uri predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") - assert_in_delta @predictions.first.value(@compounds.first), 1.095, 0.1 + assert_in_delta @predictions.first.value(@compounds.first), 0.15, 0.2 assert_equal 0.453.round_to(3), @predictions.first.confidence(@compounds.first).round_to(3) assert_equal 253, @predictions.first.neighbors(@compounds.first).size cleanup @@ -109,14 +109,14 @@ class LazarTest < Test::Unit::TestCase cleanup end - def test_classification_svm_model + def test_classification_svm_model create_model :dataset_uri => @@classification_training_dataset.uri, :prediction_algorithm => "local_svm_classification" predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") predict_dataset OpenTox::Dataset.create_from_csv_file("data/multicolumn.csv", @@subjectid) assert_equal "false", @predictions[0].value(@compounds[0]) - assert_equal 0.3952.round_to(4), @predictions[0].confidence(@compounds[0]).round_to(4) + assert_equal 0.3952, @predictions[0].confidence(@compounds[0]).round_to(4) assert_equal 16, @predictions[0].neighbors(@compounds[0]).size c = OpenTox::Compound.from_smiles("c1ccccc1NN") @@ -125,111 +125,98 @@ class LazarTest < Test::Unit::TestCase assert_equal 41, @model.features.size cleanup - end - - def test_classification_svm_prop_model - - create_model :dataset_uri => @@classification_training_dataset.uri, :prediction_algorithm => "local_svm_classification", :local_svm_kernel => "propositionalized" - predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") - predict_dataset OpenTox::Dataset.create_from_csv_file("data/multicolumn.csv", @@subjectid) - - assert_equal "false", @predictions[0].value(@compounds[0]) - #assert_equal 0.2938.round_to(4), @predictions[0].confidence(@compounds[0]).round_to(4) - assert_equal 0.3952.round_to(4), @predictions[0].confidence(@compounds[0]).round_to(4) - assert_equal 16, @predictions[0].neighbors(@compounds[0]).size - - c = OpenTox::Compound.from_smiles("c1ccccc1NN") - assert_equal 4, @predictions[1].compounds.size - assert_equal "false", @predictions[1].value(c) - - assert_equal 41, @model.features.size - cleanup - end - -# def test_regression_mlr_prop_model -# create_model :dataset_uri => @@regression_training_dataset.uri, :prediction_algorithm => "local_mlr_prop" -# predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") -# assert_equal 0.453, @predictions.first.confidence(@compounds.first).round_to(3) -# assert_equal 0.265, @predictions.first.value(@compounds.first).round_to(3) -# assert_equal 253, @predictions.first.neighbors(@compounds.first).size -# assert_equal 131, @model.features.size -# end -# -# def test_regression_mlr_prop_conf_stdev -# create_model :dataset_uri => @@regression_training_dataset.uri, :prediction_algorithm => "local_mlr_prop", :conf_stdev => "true" -# predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") -# assert_equal 0.154, @predictions.first.confidence(@compounds.first).round_to(3) -# assert_equal 0.265, @predictions.first.value(@compounds.first).round_to(3) -# assert_equal 253, @predictions.first.neighbors(@compounds.first).size -# assert_equal 131, @model.features.size -# end -# -# -# def test_regression_mlr_prop_weighted_model -# create_model :dataset_uri => @@regression_training_dataset.uri, :prediction_algorithm => "local_mlr_prop", :nr_hits => "true" -# predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") -# assert_equal 0.453, @predictions.first.confidence(@compounds.first).round_to(3) -# assert_equal 0.265, @predictions.first.value(@compounds.first).round_to(3) -# assert_equal 253, @predictions.first.neighbors(@compounds.first).size -# assert_equal 131, @model.features.size -# end - - def test_conf_stdev - params = {:sims => [0.6,0.72,0.8], :acts => [1,1,1], :neighbors => [1,1,1], :conf_stdev => true} # stdev = 0 - params2 = {:sims => [0.6,0.7,0.8], :acts => [3.4,2,0.6], :neighbors => [1,1,1,1], :conf_stdev => true } # stev ~ 1.4 - params3 = {:sims => [0.6,0.7,0.8], :acts => [1,1,1], :neighbors => [1,1,1], } - params4 = {:sims => [0.6,0.7,0.8], :acts => [3.4,2,0.6], :neighbors => [1,1,1] } - 2.times { - assert_in_delta OpenTox::Algorithm::Neighbors::get_confidence(params), 0.72, 0.0001 - assert_in_delta OpenTox::Algorithm::Neighbors::get_confidence(params2), 0.172617874759125, 0.0001 - assert_in_delta OpenTox::Algorithm::Neighbors::get_confidence(params3), 0.7, 0.0001 - assert_in_delta OpenTox::Algorithm::Neighbors::get_confidence(params4), 0.7, 0.0001 - } end + def test_classification_svm_prop_model + create_model :dataset_uri => @@classification_training_dataset.uri, :prediction_algorithm => "local_svm_classification", :local_svm_kernel => "propositionalized" + predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") + predict_dataset OpenTox::Dataset.create_from_csv_file("data/multicolumn.csv", @@subjectid) -=begin - def test_ambit_classification_model - - # create model - dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/9?max=400" - feature_uri ="http://apps.ideaconsult.net:8080/ambit2/feature/21573" - #model_uri = OpenTox::Algorithm::Lazar.new.run({:dataset_uri => dataset_uri, :prediction_feature => feature_uri}).to_s - #lazar = OpenTox::Model::Lazar.find model_uri - model_uri = OpenTox::Algorithm::Lazar.new.run({:dataset_uri => dataset_uri, :prediction_feature => feature_uri, :subjectid => @@subjectid}).to_s - validate_owl model_uri,@@subjectid - lazar = OpenTox::Model::Lazar.find model_uri, @@subjectid - puts lazar.features.size - assert_equal lazar.features.size, 1874 - #puts "Model: #{lazar.uri}" - #puts lazar.features.size + assert_equal "false", @predictions[0].value(@compounds[0]) + assert_equal 0.3952, @predictions[0].confidence(@compounds[0]).round_to(4) + assert_equal 16, @predictions[0].neighbors(@compounds[0]).size - # single prediction - compound = OpenTox::Compound.from_smiles("c1ccccc1NN") - #prediction_uri = lazar.run(:compound_uri => compound.uri) - #prediction = OpenTox::LazarPrediction.find(prediction_uri) - prediction_uri = lazar.run(:compound_uri => compound.uri, :subjectid => @@subjectid) - prediction = OpenTox::LazarPrediction.find(prediction_uri, @@subjectid) - #puts "Prediction: #{prediction.uri}" - #puts prediction.value(compound) - assert_equal prediction.value(compound), "3.0" - #puts @prediction.confidence(compound).round_to(4) - #assert_equal @prediction.confidence(compound).round_to(4), 0.3005.round_to(4) - #assert_equal @prediction.neighbors(compound).size, 15 - #@prediction.delete(@@subjectid) + c = OpenTox::Compound.from_smiles("c1ccccc1NN") + assert_equal 4, @predictions[1].compounds.size + assert_equal "false", @predictions[1].value(c) - # dataset activity - #compound = OpenTox::Compound.from_smiles("CNN") - #prediction_uri = @lazar.run(:compound_uri => compound.uri, :subjectid => @@subjectid) - #@prediction = OpenTox::LazarPrediction.find prediction_uri, @@subjectid - #assert !@prediction.measured_activities(compound).empty? - #assert_equal @prediction.measured_activities(compound).first, true - #assert @prediction.value(compound).nil? - #@prediction.delete(@@subjectid) + assert_equal 41, @model.features.size + cleanup + end - # dataset prediction - #@lazar.delete(@@subjectid) + def test_regression_mlr_prop_model + create_model :dataset_uri => @@regression_training_dataset.uri, :prediction_algorithm => "local_mlr_prop" + predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") + assert_equal 0.453, @predictions.first.confidence(@compounds.first).round_to(3) + assert_equal 0.615, @predictions.first.value(@compounds.first).round_to(3) + assert_equal 253, @predictions.first.neighbors(@compounds.first).size + assert_equal 131, @model.features.size end + +## def test_regression_mlr_prop_conf_stdev +## create_model :dataset_uri => @@regression_training_dataset.uri, :prediction_algorithm => "local_mlr_prop", :conf_stdev => "true" +## predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") +## assert_equal 0.154, @predictions.first.confidence(@compounds.first).round_to(3) +## assert_equal 0.265, @predictions.first.value(@compounds.first).round_to(3) +## assert_equal 253, @predictions.first.neighbors(@compounds.first).size +## assert_equal 131, @model.features.size +## end + + def test_conf_stdev + params = {:sims => [0.6,0.72,0.8], :acts => [1,1,1], :neighbors => [1,1,1], :conf_stdev => true} + params2 = {:sims => [0.6,0.7,0.8], :acts => [3.4,2,0.6], :neighbors => [1,1,1,1], :conf_stdev => true } # stev ~ 1.4 + params3 = {:sims => [0.6,0.7,0.8], :acts => [1,1,1], :neighbors => [1,1,1], } + params4 = {:sims => [0.6,0.7,0.8], :acts => [3.4,2,0.6], :neighbors => [1,1,1] } + 2.times { + assert_in_delta OpenTox::Algorithm::Neighbors::get_confidence(params), 0.72, 0.0001 + assert_in_delta OpenTox::Algorithm::Neighbors::get_confidence(params2), 0.172617874759125, 0.0001 + assert_in_delta OpenTox::Algorithm::Neighbors::get_confidence(params3), 0.7, 0.0001 + assert_in_delta OpenTox::Algorithm::Neighbors::get_confidence(params4), 0.7, 0.0001 + } + end + +=begin + def test_ambit_classification_model + + # create model + dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/9?max=400" + feature_uri ="http://apps.ideaconsult.net:8080/ambit2/feature/21573" + #model_uri = OpenTox::Algorithm::Lazar.new.run({:dataset_uri => dataset_uri, :prediction_feature => feature_uri}).to_s + #lazar = OpenTox::Model::Lazar.find model_uri + model_uri = OpenTox::Algorithm::Lazar.new.run({:dataset_uri => dataset_uri, :prediction_feature => feature_uri, :subjectid => @@subjectid}).to_s + validate_owl model_uri,@@subjectid + lazar = OpenTox::Model::Lazar.find model_uri, @@subjectid + puts lazar.features.size + assert_equal lazar.features.size, 1874 + #puts "Model: #{lazar.uri}" + #puts lazar.features.size + + # single prediction + compound = OpenTox::Compound.from_smiles("c1ccccc1NN") + #prediction_uri = lazar.run(:compound_uri => compound.uri) + #prediction = OpenTox::LazarPrediction.find(prediction_uri) + prediction_uri = lazar.run(:compound_uri => compound.uri, :subjectid => @@subjectid) + prediction = OpenTox::LazarPrediction.find(prediction_uri, @@subjectid) + #puts "Prediction: #{prediction.uri}" + #puts prediction.value(compound) + assert_equal prediction.value(compound), "3.0" + #puts @prediction.confidence(compound).round_to(4) + #assert_equal @prediction.confidence(compound).round_to(4), 0.3005.round_to(4) + #assert_equal @prediction.neighbors(compound).size, 15 + #@prediction.delete(@@subjectid) + + # dataset activity + #compound = OpenTox::Compound.from_smiles("CNN") + #prediction_uri = @lazar.run(:compound_uri => compound.uri, :subjectid => @@subjectid) + #@prediction = OpenTox::LazarPrediction.find prediction_uri, @@subjectid + #assert !@prediction.measured_activities(compound).empty? + #assert_equal @prediction.measured_activities(compound).first, true + #assert @prediction.value(compound).nil? + #@prediction.delete(@@subjectid) + + # dataset prediction + #@lazar.delete(@@subjectid) + end =end end diff --git a/transform.rb b/transform.rb index a00f87d..5b2bd48 100644 --- a/transform.rb +++ b/transform.rb @@ -5,51 +5,113 @@ require 'test/unit' class TransformTest < Test::Unit::TestCase -def test_mlr - 2.times { - n_prop = [ [1,1], [2,2], [3,3] ] # erste WH - acts = [ 3,2,3 ] # should yield a constant y=2.8 - sims = [ 4,2,4 ] # move constant closer to 3.0 - q_prop = [0.5,0.5] # extrapolation - params={:n_prop => n_prop, :q_prop => q_prop, :sims => sims, :acts => acts} - - prediction = OpenTox::Algorithm::Neighbors.mlr(params) - assert_in_delta prediction, 2.8, 1.0E-10 # small deviations, don't know why - - q_prop = [1.5,1.5] # interpolation - prediction = OpenTox::Algorithm::Neighbors.mlr(params) - assert_in_delta prediction, 2.8, 1.0E-10 # small deviations, don't know why - } -end + #def test_mlr + # 2.times { + # n_prop = [ [1,1], [2,2], [3,3] ] # erste WH + # acts = [ 3,2,3 ] # should yield a constant y=2.8 + # sims = [ 4,2,4 ] # move constant closer to 3.0 + # q_prop = [0.5,0.5] # extrapolation + # params={:n_prop => n_prop, :q_prop => q_prop, :sims => sims, :acts => acts} + # + # prediction = OpenTox::Algorithm::Neighbors.mlr(params) + # assert_in_delta prediction, 2.8, 1.0E-10 # small deviations, don't know why + # + # q_prop = [1.5,1.5] # interpolation + # prediction = OpenTox::Algorithm::Neighbors.mlr(params) + # assert_in_delta prediction, 2.8, 1.0E-10 # small deviations, don't know why + # } + #end + + def test_pca + + d = GSL::Matrix.alloc([1.0, -5, 1.1, 2.0, -5, 1.9, 3.0, -5, 3.3], 3, 3) # 2nd col is const -5, gets removed + rd = GSL::Matrix.alloc([1.0, 1.1, 1.9, 2.0, 3.1, 3.2], 3, 2) + td = GSL::Matrix.alloc([-1.4142135623731, -0.14142135623731, 1.5556349186104],3,1) + ev = GSL::Matrix.alloc([0.707106781186548, 0.707106781186548], 2, 1) + + # Lossy + 2.times do # repeat to ensure idempotency + pca = OpenTox::Transform::PCA.new(d, 0.05) + assert_equal pca.data_matrix, d + assert_equal pca.data_transformed_matrix, td + assert_equal pca.transform(d), td + assert_equal pca.eigenvector_matrix, ev + assert_equal pca.restore, rd + end + + rd = GSL::Matrix.alloc([1.0, 1.1, 2.0, 1.9, 3.0, 3.3], 3, 2) # 2nd col of d is const -5, gets removed on rd + td = GSL::Matrix.alloc([-1.4142135623731, -7.84962372879505e-17, -0.14142135623731, -0.14142135623731, 1.5556349186104, 0.141421356237309],3,2) + ev = GSL::Matrix.alloc([0.707106781186548, -0.707106781186548, 0.707106781186548, 0.707106781186548], 2, 2) + + # Lossless + 2.times do + pca = OpenTox::Transform::PCA.new(d, 0.0) + assert_equal pca.data_matrix, d + assert_equal pca.data_transformed_matrix, td + assert_equal pca.transform(d), td + assert_equal pca.eigenvector_matrix, ev + assert_equal pca.restore, rd + end + + rd = GSL::Matrix.alloc([1.0, 1.1, 1.9, 2.0, 3.1, 3.2], 3, 2) + td = GSL::Matrix.alloc([-1.4142135623731, -0.14142135623731, 1.5556349186104],3,1) + ev = GSL::Matrix.alloc([0.707106781186548, 0.707106781186548], 2, 1) + # Lossy, but using maxcols constraint + 2.times do + pca = OpenTox::Transform::PCA.new(d, 0.0, 1) # 1 column + assert_equal pca.data_matrix, d + assert_equal pca.data_transformed_matrix, td + assert_equal pca.transform(d), td + assert_equal pca.eigenvector_matrix, ev + assert_equal pca.restore, rd + end + + + end + + def test_logas + + d1 = [ 1,2,3 ].to_gv + d2 = [ -1,0,1 ].to_gv + d3 = [ -2,3,8 ].to_gv + d4 = [ -20,30,80 ].to_gv + d5 = [ 0.707, 0.7071].to_gv + + d1la = [ -1.31668596949013, 0.211405021140643, 1.10528094834949 ].to_gv + d2la = d1la + d3la = [ -1.37180016053906, 0.388203523926062, 0.983596636612997 ].to_gv + d4la = [ -1.40084731572532, 0.532435269814955, 0.868412045910369 ].to_gv + d5la = [ -1.0, 1.0 ].to_gv + + 2.times { + + logas = OpenTox::Transform::LogAutoScale.new(d1) + assert_equal logas.vs, d1la + assert_equal logas.transform(d1), logas.vs + assert_equal logas.restore(logas.vs), d1 + + logas = OpenTox::Transform::LogAutoScale.new(d2) + assert_equal logas.vs, d2la + assert_equal logas.transform(d2), d2la + assert_equal logas.restore(logas.vs), d2 + + logas = OpenTox::Transform::LogAutoScale.new(d3) + assert_equal logas.vs, d3la + assert_equal logas.transform(d3), logas.vs + assert_equal logas.restore(logas.vs), d3 + + logas = OpenTox::Transform::LogAutoScale.new(d4) + assert_equal logas.vs, d4la + assert_equal logas.transform(d4), logas.vs + assert_equal logas.restore(logas.vs), d4 -# def test_pca -# -# d = GSL::Matrix.alloc([1,1.1,2,1.9,3,3.3], 3, 2) -# td = GSL::Matrix.alloc([-1.3421074161875, -0.127000127000191, 1.46910754318769],3,1) -# ev = GSL::Matrix.alloc([0.707106781186548, 0.707106781186548], 2, 1) -# rd = GSL::Matrix.alloc([1.05098674493306, 1.043223563717, 1.91019734898661, 2.0, 3.03881590608033, 3.256776436283], 3, 2) -# -# # Lossy -# 2.times do # repeat to ensure idempotency -# pca = OpenTox::Algorithm::Transform::PCA.new(d, 0.05) -# assert_equal pca.data_matrix, d -# assert_equal pca.data_transformed_matrix, td -# assert_equal pca.eigenvector_matrix, ev -# assert_equal pca.restore, rd -# end -# -# td = GSL::Matrix.alloc([-1.3421074161875, 0.0721061461855949, -0.127000127000191, -0.127000127000191, 1.46910754318769, 0.0548939808145955],3,2) -# ev = GSL::Matrix.alloc([0.707106781186548, -0.707106781186548, 0.707106781186548, 0.707106781186548], 2, 2) -# -# # Lossless -# 2.times do -# pca = OpenTox::Algorithm::Transform::PCA.new(d, 0.0) -# assert_equal pca.data_matrix, d -# assert_equal pca.data_transformed_matrix, td -# assert_equal pca.eigenvector_matrix, ev -# assert_equal pca.restore, d -# end -# -# end + logas = OpenTox::Transform::LogAutoScale.new(d5) + assert_equal logas.vs, d5la + assert_equal logas.transform(d5), logas.vs + assert_equal logas.restore(logas.vs), d5 + + } + + end end |