summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2014-10-10 12:51:48 +0200
committermguetlein <martin.guetlein@gmail.com>2014-10-10 12:51:48 +0200
commit59652d0dc832caf55c66c7bf0625fab0e801c3f6 (patch)
treedc6f93a6a0949c6aaad72a6093d31ac6423d8e2c
parent19068ca987318533acd31b11df3138c39bdcd012 (diff)
add test for usage of 'feature_dataset_uri' instead of 'feature_generation_uri' to lazar-physchem test and to validation test
-rw-r--r--test/lazar-physchem-long.rb78
-rw-r--r--test/validation-long.rb15
2 files changed, 67 insertions, 26 deletions
diff --git a/test/lazar-physchem-long.rb b/test/lazar-physchem-long.rb
index a4cb0b3..7959601 100644
--- a/test/lazar-physchem-long.rb
+++ b/test/lazar-physchem-long.rb
@@ -5,36 +5,70 @@ class LazarPhyschemDescriptorTest < MiniTest::Test
def test_lazar_pc_descriptors
# check available descriptors
- desc = OpenTox::Algorithm::Descriptor.physchem_descriptors.keys
- assert_equal 111,desc.size,"wrong num physchem descriptors"
+ @descriptors = OpenTox::Algorithm::Descriptor.physchem_descriptors.keys
+ assert_equal 111,@descriptors.size,"wrong num physchem descriptors"
sum = 0
{"Openbabel"=>16,"Cdk"=>50,"Joelib"=>45}.each do |k,v|
- assert_equal v,desc.select{|x| x=~/^#{k}\./}.size,"wrong num #{k} descriptors"
+ assert_equal v,@descriptors.select{|x| x=~/^#{k}\./}.size,"wrong num #{k} descriptors"
sum += v
end
assert_equal 111,sum
# select descriptors for test
- num_features_offset = 0
- desc.keep_if{|x| x=~/^Openbabel\./}
- desc.delete("Openbabel.L5") # TODO Openbabel.L5 does not work, investigate!!!
+ @num_features_offset = 0
+ @descriptors.keep_if{|x| x=~/^Openbabel\./}
+ @descriptors.delete("Openbabel.L5") # TODO Openbabel.L5 does not work, investigate!!!
unless defined?($short_tests)
# the actual descriptor calculation is rather fast, computing 3D structures takes time
# A CDK descriptor can calculate serveral values, e.g., ALOGP produces ALOGP.ALogP, ALOGP.ALogp2, ALOGP.AMR
# both is accepted (and tested here): Cdk.ALOGP (produces 3 features), or ALOGP.AMR (produces only 1 feature)
- desc += ["Cdk.ALOGP.AMR", "Cdk.WienerNumbers", "Joelib.LogP", "Joelib.count.HeteroCycles"]
- num_features_offset = 1 # Cdk.WienerNumbers produces 2 (instead of 1) features
+ @descriptors += ["Cdk.ALOGP.AMR", "Cdk.WienerNumbers", "Joelib.LogP", "Joelib.count.HeteroCycles"]
+ @num_features_offset = 1 # Cdk.WienerNumbers produces 2 (instead of 1) features
end
- puts "Descriptors: #{desc}"
+ puts "Descriptors: #{@descriptors}"
# UPLOAD DATA
- dataset = OpenTox::Dataset.new
- dataset.upload File.join(DATA_DIR,"EPAFHM.medi.csv")
- assert_equal dataset.uri.uri?, true
- puts "Dataset: "+dataset.uri
+ @dataset = OpenTox::Dataset.new
+ @dataset.upload File.join(DATA_DIR,"EPAFHM.medi.csv")
+ assert_equal @dataset.uri.uri?, true
+ puts "Dataset: "+@dataset.uri
+ @compound_smiles = "CC(C)(C)CN"
+ @compound_inchi = "InChI=1S/C5H13N/c1-5(2,3)4-6/h4,6H2,1-3H3"
+
+ prediction_a = build_model_and_predict(true)
+ prediction_b = build_model_and_predict(false)
+
+ assert_equal prediction_a,prediction_b,"predicted value differs depending on calculation method"
+ puts "Predicted value: #{prediction_a}"
+ # the actual value (from the complete EPAFHM dataset) is 5.45, but it is predicted higher when tested
+ # do not expect a fixed value, this might vary with, e.g., the calculated 3d structure by OB
+ assert prediction_a > 5,"predicted values should be above 5, is #{prediction_a}"
+ assert prediction_a < 15,"predicted values should be below 15, is #{prediction_a}"
+ end
+
+ def build_model_and_predict(precompute_feature_dataset=true)
+
+ model_params = {:dataset_uri => @dataset.uri}
+ feat_gen_uri = File.join($algorithm[:uri],"descriptor","physchem")
+
+ if precompute_feature_dataset
+ # PRECOMPUTE FEATURES
+ p = "/tmp/mergedfile.csv"
+ f = File.open(p,"w")
+ f.puts File.read(File.join(DATA_DIR,"EPAFHM.medi.csv"))
+ f.puts "\"#{@compound_smiles}\","
+ f.close
+ d = OpenTox::Dataset.new
+ d.upload p
+ model_params[:feature_dataset_uri] = OpenTox::Algorithm::Generic.new(feat_gen_uri).run({:dataset_uri => d.uri, :descriptors => @descriptors})
+ else
+ model_params[:feature_generation_uri] = feat_gen_uri
+ model_params[:descriptors] = @descriptors
+ end
+
# BUILD MODEL
- model_uri = OpenTox::Model::Lazar.create :dataset_uri => dataset.uri, :feature_generation_uri => File.join($algorithm[:uri],"descriptor","physchem"), :descriptors => desc
+ model_uri = OpenTox::Model::Lazar.create model_params
puts "Model: "+model_uri
model = OpenTox::Model::Lazar.new model_uri
assert_equal model_uri.uri?, true
@@ -43,9 +77,11 @@ class LazarPhyschemDescriptorTest < MiniTest::Test
# CHECK FEATURE DATASET
feature_dataset_uri = model.metadata[RDF::OT.featureDataset].first
puts "Feature dataset: #{feature_dataset_uri}"
- features = OpenTox::Dataset.new(feature_dataset_uri).features
+ feature_dataset = OpenTox::Dataset.new(feature_dataset_uri)
+ assert_equal @dataset.compounds.size,feature_dataset.compounds.size-(precompute_feature_dataset ? 1 : 0),"num compounds in feature dataset not correct"
+ features = feature_dataset.features
feature_titles = features.collect{|f| f.title}
- desc.each do |d|
+ @descriptors.each do |d|
if (d=~/^Cdk\./ and d.count(".")==1) # CDK descriptors (e.g. Cdk.ALOG are included as Cdk.ALOGP.ALogP, Cdk.ALOGP.ALogp2 ..)
match = false
feature_titles.each do |f|
@@ -56,10 +92,10 @@ class LazarPhyschemDescriptorTest < MiniTest::Test
assert feature_titles.include?(d),"feature not found #{d} in feature dataset #{feature_titles.inspect}"
end
end
- assert_equal (desc.size+num_features_offset),features.size,"wrong num features in feature dataset"
+ assert_equal (@descriptors.size+@num_features_offset),features.size,"wrong num features in feature dataset"
# predict compound
- compound_uri = "#{$compound[:uri]}/InChI=1S/C13H8Cl2O2/c14-12-5-4-11(7-13(12)15)17-10-3-1-2-9(6-10)8-16/h1-8H"
+ compound_uri = "#{$compound[:uri]}/#{@compound_inchi}"
prediction_uri = model.predict :compound_uri => compound_uri
prediction = OpenTox::Dataset.new prediction_uri
assert_equal prediction.uri.uri?, true
@@ -69,11 +105,7 @@ class LazarPhyschemDescriptorTest < MiniTest::Test
assert prediction.features.collect{|f| f.uri}.include?(model.predicted_variable),"prediction feature #{model.predicted_variable} not included prediction dataset #{prediction.features.collect{|f| f.uri}}"
assert prediction.compounds.collect{|c| c.uri}.include?(compound_uri),"compound #{compound_uri} not included in prediction dataset #{prediction.compounds.collect{|c| c.uri}}"
assert_equal 1,prediction.compound_indices(compound_uri).size,"compound should only be once in the dataset"
- predicted_value = prediction.data_entry_value(prediction.compound_indices(compound_uri).first,model.predicted_variable)
- puts "Predicted value: #{predicted_value}"
- assert predicted_value > 0.005,"predicted values should be above 0.005, is #{predicted_value}"
- assert predicted_value < 0.1,"predicted values should be below 0.1, is #{predicted_value}"
-
+ prediction.data_entry_value(prediction.compound_indices(compound_uri).first,model.predicted_variable)
end
end
diff --git a/test/validation-long.rb b/test/validation-long.rb
index 88ee315..2f4fe58 100644
--- a/test/validation-long.rb
+++ b/test/validation-long.rb
@@ -65,9 +65,18 @@ class ValidationTest < MiniTest::Test
:info => file.path, :delete => true}
FEAT_GEN[file].each do |feat_gen|
data[:alg_params] = "feature_generation_uri="+feat_gen
-+ data[:alg_params] << ";backbone=false;min_chisq_significance=0.0" if feat_gen=~/fminer/ and data[:info] =~ /mini/
- data[:alg_params] << ";descriptors="+[ "Openbabel.atoms", "Openbabel.bonds", "Openbabel.dbonds", "Openbabel.HBA1", "Openbabel.HBA2", "Openbabel.HBD", "Openbabel.MP", "Openbabel.MR", "Openbabel.MW", "Openbabel.nF", "Openbabel.sbonds", "Openbabel.tbonds", "Openbabel.TPSA"].join(",") if feat_gen=~/physchem/
- DATA << data
+ data[:alg_params] << ";backbone=false;min_chisq_significance=0.0" if feat_gen=~/fminer/ and data[:info] =~ /mini/
+ if feat_gen=~/physchem/
+ # validation with physchem descriptors is performed twice, once with feature_generation_uri, once with feature_dataset_uri
+ desc = [ "Openbabel.atoms", "Openbabel.bonds", "Openbabel.dbonds", "Openbabel.HBA1", "Openbabel.HBA2", "Openbabel.HBD", "Openbabel.MP", "Openbabel.MR", "Openbabel.MW", "Openbabel.nF", "Openbabel.sbonds", "Openbabel.tbonds", "Openbabel.TPSA"]
+ data[:alg_params] << ";descriptors="+desc.join(",")
+ DATA << data
+ feature_dataset_uri = OpenTox::Algorithm::Generic.new(feat_gen).run({:dataset_uri => data[:data], :descriptors => desc})
+ data[:alg_params] = "feature_dataset_uri="+feature_dataset_uri
+ DATA << data
+ else
+ DATA << data
+ end
end
end
end