From 2bb7e2b08a035b7419e5b280b2d93e1e4468c35d Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 23 Jun 2011 13:16:33 +0000 Subject: lazar predictions fixed --- data/multicolumn.csv | 2 +- fminer.rb | 12 ++++++++---- lazar.rb | 55 +++++++++++++++++++++++++++------------------------- validation.rb | 3 ++- 4 files changed, 40 insertions(+), 32 deletions(-) diff --git a/data/multicolumn.csv b/data/multicolumn.csv index 551429e..2fa9a1c 100644 --- a/data/multicolumn.csv +++ b/data/multicolumn.csv @@ -1,5 +1,5 @@ SMILES, Hamster Carcinogenicity, numeric feature, classification, mixed, string -CC=O , 1, 1, true , true , "test" +c1ccccc1NN , 1, 1, true , true , "test" C12C3=C(C=CC=C3)CC1=CC(=CC=2)NC(C)=O , 1, 2, false, 7.5 , "test" O=C(N)\C(C2=CC=CO2)=C/C1=CC=C([N+]([O-])=O)O1, 1, 3, true , 5 , "test" C1(N=CNN=1)N , 0, 4, false, false, "test" diff --git a/fminer.rb b/fminer.rb index 22fc945..1dbd1b6 100644 --- a/fminer.rb +++ b/fminer.rb @@ -26,7 +26,8 @@ class FminerTest < Test::Unit::TestCase feature = @@classification_training_dataset.features.keys.first @dataset_uri = OpenTox::Algorithm::Fminer::BBRC.new.run({:dataset_uri => @@classification_training_dataset.uri, :prediction_feature => feature, :subjectid => @@subjectid}).to_s dump - assert_equal 52, @dataset.features.size + assert_equal 41, @dataset.features.size # 32 bit + #assert_equal 52, @dataset.features.size cleanup end @@ -34,7 +35,8 @@ class FminerTest < Test::Unit::TestCase feature = File.join @@regression_training_dataset.uri,"feature/LC50_mmol" @dataset_uri = OpenTox::Algorithm::Fminer::BBRC.new.run({:dataset_uri => @@regression_training_dataset.uri, :prediction_feature => feature, :subjectid => @@subjectid, :feature_type=>"paths"}).to_s dump - assert_equal 219, @dataset.features.size + assert_equal 207, @dataset.features.size # 32 bit + #assert_equal 219, @dataset.features.size cleanup end @@ -42,7 +44,8 @@ class FminerTest < Test::Unit::TestCase feature = @@classification_training_dataset.features.keys.first @dataset_uri = OpenTox::Algorithm::Fminer::LAST.new.run({:dataset_uri => @@classification_training_dataset.uri, :prediction_feature => feature, :subjectid => @@subjectid}).to_s dump - assert_equal 23, @dataset.features.size + #assert_equal 23, @dataset.features.size + assert_equal 21, @dataset.features.size # 32 bit cleanup end @@ -55,7 +58,8 @@ class FminerTest < Test::Unit::TestCase "min_frequency" => 2, :subjectid => @@subjectid }) dump - assert_equal 52, @dataset.features.size + assert_equal 41, @dataset.features.size # 32 bit + #assert_equal 52, @dataset.features.size cleanup end diff --git a/lazar.rb b/lazar.rb index ea41f98..f4e7e44 100644 --- a/lazar.rb +++ b/lazar.rb @@ -41,7 +41,7 @@ class LazarTest < Test::Unit::TestCase end def predict_dataset(dataset) - prediction_uri = @model.run(:dataset_uri => dataset.uri, :subjectid => @@subjectid) + prediction_uri = @model.run(:dataset_uri => dataset.uri, :subjectid => @@subjectid) prediction = OpenTox::LazarPrediction.find(prediction_uri, @@subjectid) @predictions << prediction dump prediction, File.join(@dump_dir,caller[0][/`.*'/][1..-2],"dataset_prediction")+".yaml" @@ -67,24 +67,25 @@ class LazarTest < Test::Unit::TestCase def test_create_regression_model create_model :dataset_uri => @@regression_training_dataset.uri predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") - assert_equal 0.4.round_to(3), @predictions.first.value(@compounds.first).round_to(3) - assert_equal 0.276.round_to(3), @predictions.first.confidence(@compounds.first).round_to(3) - assert_equal 61, @predictions.first.neighbors(@compounds.first).size + assert_equal 0.541.round_to(3), @predictions.first.value(@compounds.first).round_to(3) + assert_equal 0.285.round_to(3), @predictions.first.confidence(@compounds.first).round_to(3) + assert_equal 58, @predictions.first.neighbors(@compounds.first).size cleanup end def test_create_regression_prop_model create_model :dataset_uri => @@regression_training_dataset.uri, :local_svm_kernel => "propositionalized" predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") - assert_equal 0.4.round_to(1), @predictions.first.value(@compounds.first).round_to(1) - assert_equal 0.276.round_to(3), @predictions.first.confidence(@compounds.first).round_to(3) - assert_equal 61, @predictions.first.neighbors(@compounds.first).size - assert_equal 219, @model.features.size + assert_equal 0.1.round_to(1), @predictions.first.value(@compounds.first).round_to(1) + assert_equal 0.285.round_to(3), @predictions.first.confidence(@compounds.first).round_to(3) + assert_equal 58, @predictions.first.neighbors(@compounds.first).size + assert_equal 207, @model.features.size cleanup end def test_classification_model create_model :dataset_uri => @@classification_training_dataset.uri + puts @model.uri # single prediction predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") # dataset activity @@ -93,9 +94,9 @@ class LazarTest < Test::Unit::TestCase predict_dataset OpenTox::Dataset.create_from_csv_file("data/multicolumn.csv", @@subjectid) # assertions # single prediction - assert_equal false, @predictions[0].value(@compounds[0]) - assert_equal 0.3067.round_to(4), @predictions[0].confidence(@compounds[0]).round_to(4) - assert_equal 14, @predictions[0].neighbors(@compounds[0]).size + assert_equal "false", @predictions[0].value(@compounds[0]) + assert_equal 0.2938.round_to(4), @predictions[0].confidence(@compounds[0]).round_to(4) + assert_equal 16, @predictions[0].neighbors(@compounds[0]).size # dataset activity assert !@predictions[1].measured_activities(@compounds[1]).empty? assert_equal "true", @predictions[1].measured_activities(@compounds[1]).first.to_s @@ -104,10 +105,11 @@ class LazarTest < Test::Unit::TestCase c = OpenTox::Compound.from_smiles("CC(=Nc1ccc2c(c1)Cc1ccccc21)O") assert_equal nil, @predictions[2].value(c) assert_equal "true", @predictions[2].measured_activities(c).first.to_s - c = OpenTox::Compound.new("http://ot-dev.in-silico.ch/compound/InChI=1S/C2H4N4/c3-2-4-1-5-6-2/h1H,(H3,3,4,5,6)") - assert_equal false, @predictions[2].value(c) + c = OpenTox::Compound.from_smiles("c1ccccc1NN") + assert_equal "false", @predictions[2].value(c) + assert_equal 0.2938.round_to(4) , @predictions[2].confidence(c).round_to(4) # model - assert_equal 52, @model.features.size + assert_equal 41, @model.features.size cleanup end @@ -117,15 +119,15 @@ class LazarTest < Test::Unit::TestCase predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") predict_dataset OpenTox::Dataset.create_from_csv_file("data/multicolumn.csv", @@subjectid) - assert_equal false, @predictions[0].value(@compounds[0]) - assert_equal 0.3067.round_to(4), @predictions[0].confidence(@compounds[0]).round_to(4) - assert_equal 14, @predictions[0].neighbors(@compounds[0]).size + assert_equal "false", @predictions[0].value(@compounds[0]) + assert_equal 0.3952.round_to(4), @predictions[0].confidence(@compounds[0]).round_to(4) + assert_equal 16, @predictions[0].neighbors(@compounds[0]).size - c = OpenTox::Compound.new("http://ot-dev.in-silico.ch/compound/InChI=1S/C2H4N4/c3-2-4-1-5-6-2/h1H,(H3,3,4,5,6)") + c = OpenTox::Compound.from_smiles("c1ccccc1NN") assert_equal 4, @predictions[1].compounds.size - assert_equal false, @predictions[1].value(c) + assert_equal "false", @predictions[1].value(c) - assert_equal 52, @model.features.size + assert_equal 41, @model.features.size cleanup end @@ -135,15 +137,16 @@ class LazarTest < Test::Unit::TestCase predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") predict_dataset OpenTox::Dataset.create_from_csv_file("data/multicolumn.csv", @@subjectid) - assert_equal false, @predictions[0].value(@compounds[0]) - assert_equal 0.3067.round_to(4), @predictions[0].confidence(@compounds[0]).round_to(4) - assert_equal 14, @predictions[0].neighbors(@compounds[0]).size + assert_equal "false", @predictions[0].value(@compounds[0]) + #assert_equal 0.2938.round_to(4), @predictions[0].confidence(@compounds[0]).round_to(4) + assert_equal 0.3952.round_to(4), @predictions[0].confidence(@compounds[0]).round_to(4) + assert_equal 16, @predictions[0].neighbors(@compounds[0]).size - c = OpenTox::Compound.new("http://ot-dev.in-silico.ch/compound/InChI=1S/C2H4N4/c3-2-4-1-5-6-2/h1H,(H3,3,4,5,6)") + c = OpenTox::Compound.from_smiles("c1ccccc1NN") assert_equal 4, @predictions[1].compounds.size - assert_equal false, @predictions[1].value(c) + assert_equal "false", @predictions[1].value(c) - assert_equal 52, @model.features.size + assert_equal 41, @model.features.size cleanup end diff --git a/validation.rb b/validation.rb index ff0725d..cf2a3bd 100644 --- a/validation.rb +++ b/validation.rb @@ -24,7 +24,8 @@ end class ValidationTest < Test::Unit::TestCase @@delete = true - @@feature_types = ["bbrc", "last"] + #@@feature_types = ["bbrc", "last"] + @@feature_types = ["bbrc"] @@qmrf_test = true @@data = [] @@data << { :type => :crossvalidation, -- cgit v1.2.3