From 983ad5cd9d1bc9f2ccd931b7d75fbb8b95e5f873 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 17 Jun 2011 15:21:14 +0200 Subject: lazar dumpfiles implemented --- fminer.rb | 19 +++-- lazar.rb | 211 +++++++++++++++++++++++++++----------------------------- validate-owl.rb | 6 +- 3 files changed, 117 insertions(+), 119 deletions(-) diff --git a/fminer.rb b/fminer.rb index 87ea4a4..22fc945 100644 --- a/fminer.rb +++ b/fminer.rb @@ -16,16 +16,16 @@ class FminerTest < Test::Unit::TestCase @dataset.delete(@@subjectid) end - def dump(method) + def dump @dataset = OpenTox::Dataset.find @dataset_uri, @@subjectid - @dumpfile = File.join(@dump_dir,method.to_s)+".yaml" + @dumpfile = File.join(@dump_dir,caller[0][/`.*'/][1..-2])+".yaml" File.open(@dumpfile,"w+"){|f| f.puts @dataset.to_yaml} end def test_bbrc feature = @@classification_training_dataset.features.keys.first @dataset_uri = OpenTox::Algorithm::Fminer::BBRC.new.run({:dataset_uri => @@classification_training_dataset.uri, :prediction_feature => feature, :subjectid => @@subjectid}).to_s - dump __method__ + dump assert_equal 52, @dataset.features.size cleanup end @@ -33,7 +33,7 @@ class FminerTest < Test::Unit::TestCase def test_regression_bbrc feature = File.join @@regression_training_dataset.uri,"feature/LC50_mmol" @dataset_uri = OpenTox::Algorithm::Fminer::BBRC.new.run({:dataset_uri => @@regression_training_dataset.uri, :prediction_feature => feature, :subjectid => @@subjectid, :feature_type=>"paths"}).to_s - dump __method__ + dump assert_equal 219, @dataset.features.size cleanup end @@ -41,7 +41,7 @@ class FminerTest < Test::Unit::TestCase def test_last feature = @@classification_training_dataset.features.keys.first @dataset_uri = OpenTox::Algorithm::Fminer::LAST.new.run({:dataset_uri => @@classification_training_dataset.uri, :prediction_feature => feature, :subjectid => @@subjectid}).to_s - dump __method__ + dump assert_equal 23, @dataset.features.size cleanup end @@ -54,7 +54,7 @@ class FminerTest < Test::Unit::TestCase "backbone" => true, "min_frequency" => 2, :subjectid => @@subjectid }) - dump __method__ + dump assert_equal 52, @dataset.features.size cleanup end @@ -62,11 +62,10 @@ class FminerTest < Test::Unit::TestCase # Deactivated by AM because of efficiency problems (does not return) # def test_regression_last # feature = File.join @@regression_training_dataset.uri,"feature/LC50_mmol" -# dataset_uri = OpenTox::Algorithm::Fminer::LAST.new.run({:dataset_uri => @@regression_training_dataset.uri, :prediction_feature => feature, :subjectid => @@subjectid}).to_s -# d =OpenTox::Dataset.new dataset_uri, @@subjectid -# d.load_features(@@subjectid) +# @dataset_uri = OpenTox::Algorithm::Fminer::LAST.new.run({:dataset_uri => @@regression_training_dataset.uri, :prediction_feature => feature, :subjectid => @@subjectid}).to_s +# dump # assert_equal 4, d.features.size -# d.delete(@@subjectid) +# cleanup # end end diff --git a/lazar.rb b/lazar.rb index b60794c..ea41f98 100644 --- a/lazar.rb +++ b/lazar.rb @@ -13,143 +13,138 @@ class LazarTest < Test::Unit::TestCase def setup @predictions = [] - @models = [] + @compounds = [] + @files = [] + @dump_dir = FileUtils.mkdir_p File.join(File.dirname(__FILE__),"dump",File.basename(__FILE__,".rb")) + FileUtils.mkdir_p File.join(File.dirname(__FILE__),"reference",File.basename(__FILE__,".rb")) end - def teardown - @predictions.each {|p| p.delete(@@subjectid)} - @models.each {|m| m.delete(@@subjectid)} + def dump(object,file) + @files << file + FileUtils.mkdir_p File.dirname(file) + File.open(file,"w+"){|f| f.puts object.to_yaml} end -=begin -=end - def test_create_regression_model - model_uri = OpenTox::Algorithm::Lazar.new.run({:dataset_uri => @@regression_training_dataset.uri, :subjectid => @@subjectid}).to_s - #puts model_uri - validate_owl model_uri,@@subjectid - lazar = OpenTox::Model::Lazar.find model_uri, @@subjectid - @models << lazar - compound = OpenTox::Compound.from_smiles("c1ccccc1NN") - prediction_uri = lazar.run(:compound_uri => compound.uri, :subjectid => @@subjectid).to_s + def create_model(params) + params[:subjectid] = @@subjectid + model_uri = OpenTox::Algorithm::Lazar.new.run(params).to_s + @model = OpenTox::Model::Lazar.find model_uri, @@subjectid + dump @model, File.join(@dump_dir,caller[0][/`.*'/][1..-2],"model")+".yaml" + end + + def predict_compound(compound) + @compounds << compound + prediction_uri = @model.run(:compound_uri => compound.uri, :subjectid => @@subjectid) prediction = OpenTox::LazarPrediction.find(prediction_uri, @@subjectid) @predictions << prediction - assert_equal prediction.value(compound).round_to(3),0.378.round_to(3) - assert_equal prediction.confidence(compound).round_to(3), 0.276.round_to(3) - #assert_equal prediction.value(compound).round_to(4), 0.2847.round_to(4) - #assert_equal prediction.confidence(compound).round_to(4), 0.3223.round_to(4) - assert_equal prediction.neighbors(compound).size, 61 + dump prediction, File.join(@dump_dir,caller[0][/`.*'/][1..-2],"compound_prediction")+@compounds.size.to_s+".yaml" end - def test_create_regression_prop_model - model_uri = OpenTox::Algorithm::Lazar.new.run({:dataset_uri => @@regression_training_dataset.uri, :subjectid => @@subjectid, :local_svm_kernel => "propositionalized"}).to_s - #puts model_uri - validate_owl model_uri,@@subjectid - lazar = OpenTox::Model::Lazar.find model_uri, @@subjectid - @models << lazar - assert_equal 219, lazar.features.size - compound = OpenTox::Compound.from_smiles("c1ccccc1NN") - prediction_uri = lazar.run(:compound_uri => compound.uri, :subjectid => @@subjectid).to_s + def predict_dataset(dataset) + prediction_uri = @model.run(:dataset_uri => dataset.uri, :subjectid => @@subjectid) prediction = OpenTox::LazarPrediction.find(prediction_uri, @@subjectid) @predictions << prediction - assert_equal prediction.value(compound).round_to(1),0.1.round_to(1) - assert_equal prediction.confidence(compound).round_to(3), 0.276.round_to(3) - #assert_equal prediction.value(compound).round_to(4), 0.2847.round_to(4) - #assert_equal prediction.confidence(compound).round_to(4), 0.3223.round_to(4) - assert_equal prediction.neighbors(compound).size, 61 + dump prediction, File.join(@dump_dir,caller[0][/`.*'/][1..-2],"dataset_prediction")+".yaml" end - def test_classification_model + def cleanup # executed only when assertions succeed (teardown is called even when assertions fail) + validate_owl @model.uri + @files.each do |f| + reference = f.sub(/dump/,"reference") + FileUtils.mkdir_p File.dirname(reference) + FileUtils.cp f, reference + FileUtils.rm f + end + @predictions.each do |dataset| + validate_owl @model.uri + dataset.delete(@@subjectid) + end + @model.delete(@@subjectid) + end - # create model - model_uri = OpenTox::Algorithm::Lazar.new.run({:dataset_uri => @@classification_training_dataset.uri, :subjectid => @@subjectid}).to_s - validate_owl model_uri,@@subjectid - lazar = OpenTox::Model::Lazar.find model_uri, @@subjectid - @models << lazar - assert_equal lazar.features.size, 52 +=begin +=end + def test_create_regression_model + create_model :dataset_uri => @@regression_training_dataset.uri + predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") + assert_equal 0.4.round_to(3), @predictions.first.value(@compounds.first).round_to(3) + assert_equal 0.276.round_to(3), @predictions.first.confidence(@compounds.first).round_to(3) + assert_equal 61, @predictions.first.neighbors(@compounds.first).size + cleanup + end - # single prediction - compound = OpenTox::Compound.from_smiles("c1ccccc1NN") - prediction_uri = lazar.run(:compound_uri => compound.uri, :subjectid => @@subjectid) - prediction = OpenTox::LazarPrediction.find(prediction_uri, @@subjectid) - @predictions << prediction - #puts prediction_uri - assert_equal prediction.value(compound), "false" - assert_equal prediction.confidence(compound).round_to(4), 0.3067.round_to(4) - assert_equal prediction.neighbors(compound).size, 14 + def test_create_regression_prop_model + create_model :dataset_uri => @@regression_training_dataset.uri, :local_svm_kernel => "propositionalized" + predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") + assert_equal 0.4.round_to(1), @predictions.first.value(@compounds.first).round_to(1) + assert_equal 0.276.round_to(3), @predictions.first.confidence(@compounds.first).round_to(3) + assert_equal 61, @predictions.first.neighbors(@compounds.first).size + assert_equal 219, @model.features.size + cleanup + end + def test_classification_model + create_model :dataset_uri => @@classification_training_dataset.uri + # single prediction + predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") # dataset activity - compound = OpenTox::Compound.from_smiles("CNN") - prediction_uri = lazar.run(:compound_uri => compound.uri, :subjectid => @@subjectid) - prediction = OpenTox::LazarPrediction.find prediction_uri, @@subjectid - @predictions << prediction - assert !prediction.measured_activities(compound).empty? - assert_equal prediction.measured_activities(compound).first.to_s, "true" - assert prediction.value(compound).nil? - + predict_compound OpenTox::Compound.from_smiles("CNN") # dataset prediction - test_dataset = OpenTox::Dataset.create_from_csv_file("data/multicolumn.csv", @@subjectid) - prediction = OpenTox::LazarPrediction.find lazar.run(:dataset_uri => test_dataset.uri, :subjectid => @@subjectid), @@subjectid - @predictions << prediction - assert_equal prediction.compounds.size, 4 - compound = OpenTox::Compound.from_smiles "CC(=Nc1ccc2c(c1)Cc1ccccc21)O" - assert_equal prediction.value(compound), nil - assert_equal prediction.measured_activities(compound).first.to_s, "true" + predict_dataset OpenTox::Dataset.create_from_csv_file("data/multicolumn.csv", @@subjectid) + # assertions + # single prediction + assert_equal false, @predictions[0].value(@compounds[0]) + assert_equal 0.3067.round_to(4), @predictions[0].confidence(@compounds[0]).round_to(4) + assert_equal 14, @predictions[0].neighbors(@compounds[0]).size + # dataset activity + assert !@predictions[1].measured_activities(@compounds[1]).empty? + assert_equal "true", @predictions[1].measured_activities(@compounds[1]).first.to_s + assert @predictions[1].value(@compounds[1]).nil? + # dataset prediction + c = OpenTox::Compound.from_smiles("CC(=Nc1ccc2c(c1)Cc1ccccc21)O") + assert_equal nil, @predictions[2].value(c) + assert_equal "true", @predictions[2].measured_activities(c).first.to_s + c = OpenTox::Compound.new("http://ot-dev.in-silico.ch/compound/InChI=1S/C2H4N4/c3-2-4-1-5-6-2/h1H,(H3,3,4,5,6)") + assert_equal false, @predictions[2].value(c) + # model + assert_equal 52, @model.features.size + cleanup end def test_classification_svm_model - # create model - model_uri = OpenTox::Algorithm::Lazar.new.run({:dataset_uri => @@classification_training_dataset.uri, :subjectid => @@subjectid, :prediction_algorithm => "local_svm_classification"}).to_s - lazar = OpenTox::Model::Lazar.find model_uri, @@subjectid - @models << lazar - assert_equal lazar.features.size, 52 + create_model :dataset_uri => @@classification_training_dataset.uri, :prediction_algorithm => "local_svm_classification" + predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") + predict_dataset OpenTox::Dataset.create_from_csv_file("data/multicolumn.csv", @@subjectid) - # single prediction - compound = OpenTox::Compound.from_smiles("c1ccccc1NN") - prediction_uri = lazar.run(:compound_uri => compound.uri, :subjectid => @@subjectid) - prediction = OpenTox::LazarPrediction.find(prediction_uri, @@subjectid) - @predictions << prediction - assert_equal prediction.value(compound), "false" - assert_equal prediction.confidence(compound).round_to(4), 0.4131.round_to(4) - assert_equal prediction.neighbors(compound).size, 14 + assert_equal false, @predictions[0].value(@compounds[0]) + assert_equal 0.3067.round_to(4), @predictions[0].confidence(@compounds[0]).round_to(4) + assert_equal 14, @predictions[0].neighbors(@compounds[0]).size - # dataset prediction - test_dataset = OpenTox::Dataset.create_from_csv_file("data/multicolumn.csv", @@subjectid) - prediction = OpenTox::LazarPrediction.find lazar.run(:dataset_uri => test_dataset.uri, :subjectid => @@subjectid), @@subjectid - @predictions << prediction - assert_equal prediction.compounds.size, 4 - compound = OpenTox::Compound.from_smiles "CC(=Nc1ccc2c(c1)Cc1ccccc21)O" - assert_equal prediction.value(compound), nil - assert_equal prediction.measured_activities(compound).first, true + c = OpenTox::Compound.new("http://ot-dev.in-silico.ch/compound/InChI=1S/C2H4N4/c3-2-4-1-5-6-2/h1H,(H3,3,4,5,6)") + assert_equal 4, @predictions[1].compounds.size + assert_equal false, @predictions[1].value(c) + + assert_equal 52, @model.features.size + cleanup end def test_classification_svm_prop_model + create_model :dataset_uri => @@classification_training_dataset.uri, :prediction_algorithm => "local_svm_classification", :local_svm_kernel => "propositionalized" + predict_compound OpenTox::Compound.from_smiles("c1ccccc1NN") + predict_dataset OpenTox::Dataset.create_from_csv_file("data/multicolumn.csv", @@subjectid) + + assert_equal false, @predictions[0].value(@compounds[0]) + assert_equal 0.3067.round_to(4), @predictions[0].confidence(@compounds[0]).round_to(4) + assert_equal 14, @predictions[0].neighbors(@compounds[0]).size - # create model - model_uri = OpenTox::Algorithm::Lazar.new.run({:dataset_uri => @@classification_training_dataset.uri, :subjectid => @@subjectid, :prediction_algorithm => "local_svm_classification", :local_svm_kernel => "propositionalized"}).to_s - lazar = OpenTox::Model::Lazar.find model_uri, @@subjectid - @models << lazar - assert_equal lazar.features.size, 52 - - # single prediction - compound = OpenTox::Compound.from_smiles("c1ccccc1NN") - prediction_uri = lazar.run(:compound_uri => compound.uri, :subjectid => @@subjectid) - prediction = OpenTox::LazarPrediction.find(prediction_uri, @@subjectid) - @predictions << prediction - assert_equal prediction.value(compound), "false" - assert_equal prediction.confidence(compound).round_to(4), 0.4131.round_to(4) - assert_equal prediction.neighbors(compound).size, 14 - - # dataset prediction - test_dataset = OpenTox::Dataset.create_from_csv_file("data/multicolumn.csv", @@subjectid) - prediction = OpenTox::LazarPrediction.find lazar.run(:dataset_uri => test_dataset.uri, :subjectid => @@subjectid), @@subjectid - @predictions << prediction - assert_equal prediction.compounds.size, 4 - compound = OpenTox::Compound.from_smiles "CC(=Nc1ccc2c(c1)Cc1ccccc21)O" - assert_equal prediction.value(compound), nil - assert_equal prediction.measured_activities(compound).first, true + c = OpenTox::Compound.new("http://ot-dev.in-silico.ch/compound/InChI=1S/C2H4N4/c3-2-4-1-5-6-2/h1H,(H3,3,4,5,6)") + assert_equal 4, @predictions[1].compounds.size + assert_equal false, @predictions[1].value(c) + assert_equal 52, @model.features.size + cleanup end =begin diff --git a/validate-owl.rb b/validate-owl.rb index e219d58..2152e7d 100644 --- a/validate-owl.rb +++ b/validate-owl.rb @@ -2,7 +2,11 @@ def validate_owl(uri, subjectid=nil) if validator_available? owl = OpenTox::RestClientWrapper.get(uri,{:accept => "application/rdf+xml",:subjectid => subjectid}, nil, false) html = OpenTox::RestClientWrapper.post("http://www.mygrid.org.uk/OWL/Validator",{:rdf => owl, :level => "DL",:subjectid => subjectid}) - assert_match(/YES/,html) + # assert_match(/YES/,html) + # avoid verbose html output if validation fails + owl_dl = false + owl_dl = true if html =~ /YES/ + assert_equal true, owl_dl, "Invalid OWL-DL: #{uri}" else puts "http://www.mygrid.org.uk/OWL/Validator offline" end -- cgit v1.2.3