diff options
author | mguetlein <martin.guetlein@gmail.com> | 2014-10-08 11:25:38 +0200 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2014-10-08 11:25:38 +0200 |
commit | f0f2c5b5d6435f235fbd6520b97daa944fd9df8a (patch) | |
tree | 44226c4873403904451f7b40af58c48339942c8e | |
parent | aac621fb93064542e25bbd1e8581332558908f3c (diff) |
add feature dataset test and computation of single cdk feature value (e.g. Cdk.ALOGP.AMR instead of Cdk.ALOGP) to lazar-phsychem test
-rw-r--r-- | test/lazar-physchem-long.rb | 45 |
1 files changed, 36 insertions, 9 deletions
diff --git a/test/lazar-physchem-long.rb b/test/lazar-physchem-long.rb index 0b2820d..a4cb0b3 100644 --- a/test/lazar-physchem-long.rb +++ b/test/lazar-physchem-long.rb @@ -15,38 +15,65 @@ class LazarPhyschemDescriptorTest < MiniTest::Test assert_equal 111,sum # select descriptors for test + num_features_offset = 0 desc.keep_if{|x| x=~/^Openbabel\./} + desc.delete("Openbabel.L5") # TODO Openbabel.L5 does not work, investigate!!! unless defined?($short_tests) # the actual descriptor calculation is rather fast, computing 3D structures takes time - desc += ["Cdk.XLogP", "Cdk.WienerNumbers", "Joelib.LogP", "Joelib.count.HeteroCycles"] + # A CDK descriptor can calculate serveral values, e.g., ALOGP produces ALOGP.ALogP, ALOGP.ALogp2, ALOGP.AMR + # both is accepted (and tested here): Cdk.ALOGP (produces 3 features), or ALOGP.AMR (produces only 1 feature) + desc += ["Cdk.ALOGP.AMR", "Cdk.WienerNumbers", "Joelib.LogP", "Joelib.count.HeteroCycles"] + num_features_offset = 1 # Cdk.WienerNumbers produces 2 (instead of 1) features end - puts "descriptors for modeling: #{desc}" + puts "Descriptors: #{desc}" + # UPLOAD DATA dataset = OpenTox::Dataset.new dataset.upload File.join(DATA_DIR,"EPAFHM.medi.csv") assert_equal dataset.uri.uri?, true - puts dataset.uri + puts "Dataset: "+dataset.uri + # BUILD MODEL model_uri = OpenTox::Model::Lazar.create :dataset_uri => dataset.uri, :feature_generation_uri => File.join($algorithm[:uri],"descriptor","physchem"), :descriptors => desc - puts model_uri + puts "Model: "+model_uri model = OpenTox::Model::Lazar.new model_uri assert_equal model_uri.uri?, true - puts model.predicted_variable + puts "Predicted variable: "+model.predicted_variable + + # CHECK FEATURE DATASET + feature_dataset_uri = model.metadata[RDF::OT.featureDataset].first + puts "Feature dataset: #{feature_dataset_uri}" + features = OpenTox::Dataset.new(feature_dataset_uri).features + feature_titles = features.collect{|f| f.title} + desc.each do |d| + if (d=~/^Cdk\./ and d.count(".")==1) # CDK descriptors (e.g. Cdk.ALOG are included as Cdk.ALOGP.ALogP, Cdk.ALOGP.ALogp2 ..) + match = false + feature_titles.each do |f| + match = true if f=~/d/ + end + assert match,"feature not found #{d} in feature dataset #{feature_titles.inspect}" + else + assert feature_titles.include?(d),"feature not found #{d} in feature dataset #{feature_titles.inspect}" + end + end + assert_equal (desc.size+num_features_offset),features.size,"wrong num features in feature dataset" + # predict compound compound_uri = "#{$compound[:uri]}/InChI=1S/C13H8Cl2O2/c14-12-5-4-11(7-13(12)15)17-10-3-1-2-9(6-10)8-16/h1-8H" prediction_uri = model.predict :compound_uri => compound_uri prediction = OpenTox::Dataset.new prediction_uri assert_equal prediction.uri.uri?, true - puts prediction.uri - + puts "Prediction "+prediction.uri + + # check prediction assert prediction.features.collect{|f| f.uri}.include?(model.predicted_variable),"prediction feature #{model.predicted_variable} not included prediction dataset #{prediction.features.collect{|f| f.uri}}" assert prediction.compounds.collect{|c| c.uri}.include?(compound_uri),"compound #{compound_uri} not included in prediction dataset #{prediction.compounds.collect{|c| c.uri}}" assert_equal 1,prediction.compound_indices(compound_uri).size,"compound should only be once in the dataset" - predicted_value = prediction.data_entry_value(prediction.compound_indices(compound_uri).first,model.predicted_variable) - puts predicted_value + puts "Predicted value: #{predicted_value}" assert predicted_value > 0.005,"predicted values should be above 0.005, is #{predicted_value}" assert predicted_value < 0.1,"predicted values should be below 0.1, is #{predicted_value}" + end end |