From 59652d0dc832caf55c66c7bf0625fab0e801c3f6 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Fri, 10 Oct 2014 12:51:48 +0200 Subject: add test for usage of 'feature_dataset_uri' instead of 'feature_generation_uri' to lazar-physchem test and to validation test --- test/lazar-physchem-long.rb | 78 ++++++++++++++++++++++++++++++++------------- test/validation-long.rb | 15 +++++++-- 2 files changed, 67 insertions(+), 26 deletions(-) diff --git a/test/lazar-physchem-long.rb b/test/lazar-physchem-long.rb index a4cb0b3..7959601 100644 --- a/test/lazar-physchem-long.rb +++ b/test/lazar-physchem-long.rb @@ -5,36 +5,70 @@ class LazarPhyschemDescriptorTest < MiniTest::Test def test_lazar_pc_descriptors # check available descriptors - desc = OpenTox::Algorithm::Descriptor.physchem_descriptors.keys - assert_equal 111,desc.size,"wrong num physchem descriptors" + @descriptors = OpenTox::Algorithm::Descriptor.physchem_descriptors.keys + assert_equal 111,@descriptors.size,"wrong num physchem descriptors" sum = 0 {"Openbabel"=>16,"Cdk"=>50,"Joelib"=>45}.each do |k,v| - assert_equal v,desc.select{|x| x=~/^#{k}\./}.size,"wrong num #{k} descriptors" + assert_equal v,@descriptors.select{|x| x=~/^#{k}\./}.size,"wrong num #{k} descriptors" sum += v end assert_equal 111,sum # select descriptors for test - num_features_offset = 0 - desc.keep_if{|x| x=~/^Openbabel\./} - desc.delete("Openbabel.L5") # TODO Openbabel.L5 does not work, investigate!!! + @num_features_offset = 0 + @descriptors.keep_if{|x| x=~/^Openbabel\./} + @descriptors.delete("Openbabel.L5") # TODO Openbabel.L5 does not work, investigate!!! unless defined?($short_tests) # the actual descriptor calculation is rather fast, computing 3D structures takes time # A CDK descriptor can calculate serveral values, e.g., ALOGP produces ALOGP.ALogP, ALOGP.ALogp2, ALOGP.AMR # both is accepted (and tested here): Cdk.ALOGP (produces 3 features), or ALOGP.AMR (produces only 1 feature) - desc += ["Cdk.ALOGP.AMR", "Cdk.WienerNumbers", "Joelib.LogP", "Joelib.count.HeteroCycles"] - num_features_offset = 1 # Cdk.WienerNumbers produces 2 (instead of 1) features + @descriptors += ["Cdk.ALOGP.AMR", "Cdk.WienerNumbers", "Joelib.LogP", "Joelib.count.HeteroCycles"] + @num_features_offset = 1 # Cdk.WienerNumbers produces 2 (instead of 1) features end - puts "Descriptors: #{desc}" + puts "Descriptors: #{@descriptors}" # UPLOAD DATA - dataset = OpenTox::Dataset.new - dataset.upload File.join(DATA_DIR,"EPAFHM.medi.csv") - assert_equal dataset.uri.uri?, true - puts "Dataset: "+dataset.uri + @dataset = OpenTox::Dataset.new + @dataset.upload File.join(DATA_DIR,"EPAFHM.medi.csv") + assert_equal @dataset.uri.uri?, true + puts "Dataset: "+@dataset.uri + @compound_smiles = "CC(C)(C)CN" + @compound_inchi = "InChI=1S/C5H13N/c1-5(2,3)4-6/h4,6H2,1-3H3" + + prediction_a = build_model_and_predict(true) + prediction_b = build_model_and_predict(false) + + assert_equal prediction_a,prediction_b,"predicted value differs depending on calculation method" + puts "Predicted value: #{prediction_a}" + # the actual value (from the complete EPAFHM dataset) is 5.45, but it is predicted higher when tested + # do not expect a fixed value, this might vary with, e.g., the calculated 3d structure by OB + assert prediction_a > 5,"predicted values should be above 5, is #{prediction_a}" + assert prediction_a < 15,"predicted values should be below 15, is #{prediction_a}" + end + + def build_model_and_predict(precompute_feature_dataset=true) + + model_params = {:dataset_uri => @dataset.uri} + feat_gen_uri = File.join($algorithm[:uri],"descriptor","physchem") + + if precompute_feature_dataset + # PRECOMPUTE FEATURES + p = "/tmp/mergedfile.csv" + f = File.open(p,"w") + f.puts File.read(File.join(DATA_DIR,"EPAFHM.medi.csv")) + f.puts "\"#{@compound_smiles}\"," + f.close + d = OpenTox::Dataset.new + d.upload p + model_params[:feature_dataset_uri] = OpenTox::Algorithm::Generic.new(feat_gen_uri).run({:dataset_uri => d.uri, :descriptors => @descriptors}) + else + model_params[:feature_generation_uri] = feat_gen_uri + model_params[:descriptors] = @descriptors + end + # BUILD MODEL - model_uri = OpenTox::Model::Lazar.create :dataset_uri => dataset.uri, :feature_generation_uri => File.join($algorithm[:uri],"descriptor","physchem"), :descriptors => desc + model_uri = OpenTox::Model::Lazar.create model_params puts "Model: "+model_uri model = OpenTox::Model::Lazar.new model_uri assert_equal model_uri.uri?, true @@ -43,9 +77,11 @@ class LazarPhyschemDescriptorTest < MiniTest::Test # CHECK FEATURE DATASET feature_dataset_uri = model.metadata[RDF::OT.featureDataset].first puts "Feature dataset: #{feature_dataset_uri}" - features = OpenTox::Dataset.new(feature_dataset_uri).features + feature_dataset = OpenTox::Dataset.new(feature_dataset_uri) + assert_equal @dataset.compounds.size,feature_dataset.compounds.size-(precompute_feature_dataset ? 1 : 0),"num compounds in feature dataset not correct" + features = feature_dataset.features feature_titles = features.collect{|f| f.title} - desc.each do |d| + @descriptors.each do |d| if (d=~/^Cdk\./ and d.count(".")==1) # CDK descriptors (e.g. Cdk.ALOG are included as Cdk.ALOGP.ALogP, Cdk.ALOGP.ALogp2 ..) match = false feature_titles.each do |f| @@ -56,10 +92,10 @@ class LazarPhyschemDescriptorTest < MiniTest::Test assert feature_titles.include?(d),"feature not found #{d} in feature dataset #{feature_titles.inspect}" end end - assert_equal (desc.size+num_features_offset),features.size,"wrong num features in feature dataset" + assert_equal (@descriptors.size+@num_features_offset),features.size,"wrong num features in feature dataset" # predict compound - compound_uri = "#{$compound[:uri]}/InChI=1S/C13H8Cl2O2/c14-12-5-4-11(7-13(12)15)17-10-3-1-2-9(6-10)8-16/h1-8H" + compound_uri = "#{$compound[:uri]}/#{@compound_inchi}" prediction_uri = model.predict :compound_uri => compound_uri prediction = OpenTox::Dataset.new prediction_uri assert_equal prediction.uri.uri?, true @@ -69,11 +105,7 @@ class LazarPhyschemDescriptorTest < MiniTest::Test assert prediction.features.collect{|f| f.uri}.include?(model.predicted_variable),"prediction feature #{model.predicted_variable} not included prediction dataset #{prediction.features.collect{|f| f.uri}}" assert prediction.compounds.collect{|c| c.uri}.include?(compound_uri),"compound #{compound_uri} not included in prediction dataset #{prediction.compounds.collect{|c| c.uri}}" assert_equal 1,prediction.compound_indices(compound_uri).size,"compound should only be once in the dataset" - predicted_value = prediction.data_entry_value(prediction.compound_indices(compound_uri).first,model.predicted_variable) - puts "Predicted value: #{predicted_value}" - assert predicted_value > 0.005,"predicted values should be above 0.005, is #{predicted_value}" - assert predicted_value < 0.1,"predicted values should be below 0.1, is #{predicted_value}" - + prediction.data_entry_value(prediction.compound_indices(compound_uri).first,model.predicted_variable) end end diff --git a/test/validation-long.rb b/test/validation-long.rb index 88ee315..2f4fe58 100644 --- a/test/validation-long.rb +++ b/test/validation-long.rb @@ -65,9 +65,18 @@ class ValidationTest < MiniTest::Test :info => file.path, :delete => true} FEAT_GEN[file].each do |feat_gen| data[:alg_params] = "feature_generation_uri="+feat_gen -+ data[:alg_params] << ";backbone=false;min_chisq_significance=0.0" if feat_gen=~/fminer/ and data[:info] =~ /mini/ - data[:alg_params] << ";descriptors="+[ "Openbabel.atoms", "Openbabel.bonds", "Openbabel.dbonds", "Openbabel.HBA1", "Openbabel.HBA2", "Openbabel.HBD", "Openbabel.MP", "Openbabel.MR", "Openbabel.MW", "Openbabel.nF", "Openbabel.sbonds", "Openbabel.tbonds", "Openbabel.TPSA"].join(",") if feat_gen=~/physchem/ - DATA << data + data[:alg_params] << ";backbone=false;min_chisq_significance=0.0" if feat_gen=~/fminer/ and data[:info] =~ /mini/ + if feat_gen=~/physchem/ + # validation with physchem descriptors is performed twice, once with feature_generation_uri, once with feature_dataset_uri + desc = [ "Openbabel.atoms", "Openbabel.bonds", "Openbabel.dbonds", "Openbabel.HBA1", "Openbabel.HBA2", "Openbabel.HBD", "Openbabel.MP", "Openbabel.MR", "Openbabel.MW", "Openbabel.nF", "Openbabel.sbonds", "Openbabel.tbonds", "Openbabel.TPSA"] + data[:alg_params] << ";descriptors="+desc.join(",") + DATA << data + feature_dataset_uri = OpenTox::Algorithm::Generic.new(feat_gen).run({:dataset_uri => data[:data], :descriptors => desc}) + data[:alg_params] = "feature_dataset_uri="+feature_dataset_uri + DATA << data + else + DATA << data + end end end end -- cgit v1.2.3