From 8edf76d23b169387b4652c5dd1571ba60b040f96 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 31 Jul 2015 19:25:23 +0200 Subject: intermediary commit --- test/lazar-physchem-long.rb | 52 ++++++++++++++++++++++++-------------------- test/lazar-physchem-short.rb | 28 ++++++++++++++++++++++-- 2 files changed, 54 insertions(+), 26 deletions(-) diff --git a/test/lazar-physchem-long.rb b/test/lazar-physchem-long.rb index 39e353e..280184d 100644 --- a/test/lazar-physchem-long.rb +++ b/test/lazar-physchem-long.rb @@ -23,7 +23,7 @@ class LazarPhyschemDescriptorTest < MiniTest::Test puts "Descriptors: #{@descriptors}" # UPLOAD DATA - @dataset = OpenTox::MeasuredDataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv") + @dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv") puts "Dataset: "+@dataset.id @compound_smiles = "CC(C)(C)CN" @@ -32,6 +32,9 @@ class LazarPhyschemDescriptorTest < MiniTest::Test prediction_a = build_model_and_predict(true) prediction_b = build_model_and_predict(false) + p prediction_a.data_entries + p prediction_b.data_entries + assert_equal prediction_a,prediction_b,"predicted value differs depending on calculation method" puts "Predicted value: #{prediction_a}" # the actual value (from the complete EPAFHM dataset) is 5.45, but it is predicted higher when tested @@ -45,6 +48,7 @@ class LazarPhyschemDescriptorTest < MiniTest::Test model_params = {:dataset => @dataset} #feat_gen_uri = File.join($algorithm[:uri],"descriptor","physchem") +=begin if precompute_feature_dataset # PRECOMPUTE FEATURES p = "/tmp/mergedfile.csv" @@ -59,27 +63,28 @@ class LazarPhyschemDescriptorTest < MiniTest::Test model_params[:feature_generation_uri] = feat_gen_uri model_params[:descriptors] = @descriptors end +=end # BUILD MODEL #p descriptors - feature_dataset = OpenTox::CalculatedDataset.new - feature_dataset.compounds = @dataset.compounds - feature_dataset.data_entries = descriptors - feature_dataset.features = @descriptors.collect{|d| OpenTox::Feature.find_or_create_by(:title => d)} - feature_dataset["inchis"].each do |inchi| - assert_kind_of String, inchi + feature_dataset = OpenTox::Algorithm::Descriptor.physchem(@dataset, @descriptors) + #feature_dataset = DescriptorDataset.new + #feature_dataset.compounds = @dataset.compounds + #feature_dataset.data_entries = descriptors + #feature_dataset.features = @descriptors.collect{|d| OpenTox::Feature.find_or_create_by(:title => d)} + feature_dataset.compounds.each do |compound| + assert_kind_of Compound, compound end - feature_dataset["feature_ids"].each do |id| + feature_dataset.feature_ids.each do |id| assert_kind_of BSON::ObjectId, id end feature_dataset.data_entries.each do |entry| - #p entry assert_kind_of Array, entry - entry.each do |e| + #entry.each do |e| #p e - assert_kind_of Float, e - end + # assert_kind_of Float, e + #end end feature_dataset.save model = OpenTox::Model::Lazar.create @dataset, feature_dataset @@ -93,7 +98,7 @@ class LazarPhyschemDescriptorTest < MiniTest::Test #feature_dataset = OpenTox::Dataset.new(feature_dataset_uri) assert_equal @dataset.compounds.size,feature_dataset.compounds.size,"Incorrect number of compounds in feature dataset" features = feature_dataset.features - feature_titles = features.collect{|f| f.title} + feature_titles = features.collect{|f| f.name} @descriptors.each do |d| if (d=~/^Cdk\./ and d.count(".")==1) # CDK descriptors (e.g. Cdk.ALOG are included as Cdk.ALOGP.ALogP, Cdk.ALOGP.ALogp2 ..) match = false @@ -105,23 +110,22 @@ class LazarPhyschemDescriptorTest < MiniTest::Test assert feature_titles.include?(d),"feature not found #{d} in feature dataset #{feature_titles.inspect}" end end - assert_equal @descriptors.size,features.size,"Incorrect number of features in feature dataset" - #assert_equal (@descriptors.size+@num_features_offset),features.size,"wrong num features in feature dataset" + # Cdk.WienerNumbers returns 2 features + assert_equal (@descriptors.size+@num_features_offset),features.size,"wrong num features in feature dataset" # predict compound - compound_uri = "#{$compound[:uri]}/#{@compound_inchi}" - compound = OpenTox::Compound.new @compound_inchi - prediction = model.predict :compound => compound - p prediction + compound = OpenTox::Compound.from_inchi @compound_inchi + prediction = model.predict compound + prediction #prediction = OpenTox::Dataset.new prediction_uri #assert_equal prediction.uri.uri?, true #puts "Prediction "+prediction.uri - # check prediction - assert prediction.features.collect{|f| f.uri}.include?(model.predicted_variable),"prediction feature #{model.predicted_variable} not included prediction dataset #{prediction.features.collect{|f| f.uri}}" - assert prediction.compounds.collect{|c| c.uri}.include?(compound_uri),"compound #{compound_uri} not included in prediction dataset #{prediction.compounds.collect{|c| c.uri}}" - assert_equal 1,prediction.compound_indices(compound_uri).size,"compound should only be once in the dataset" - prediction.data_entry_value(prediction.compound_indices(compound_uri).first,model.predicted_variable) + # TODO check prediction + #assert prediction.features.collect{|f| f.uri}.include?(model.predicted_variable),"prediction feature #{model.predicted_variable} not included prediction dataset #{prediction.features.collect{|f| f.uri}}" + #assert prediction.compounds.collect{|c| c.uri}.include?(compound_uri),"compound #{compound_uri} not included in prediction dataset #{prediction.compounds.collect{|c| c.uri}}" + #assert_equal 1,prediction.compound_indices(compound_uri).size,"compound should only be once in the dataset" + #prediction.data_entry_value(prediction.compound_indices(compound_uri).first,model.predicted_variable) end end diff --git a/test/lazar-physchem-short.rb b/test/lazar-physchem-short.rb index 5b00231..c9de60c 100644 --- a/test/lazar-physchem-short.rb +++ b/test/lazar-physchem-short.rb @@ -1,3 +1,27 @@ -$short_tests = true +require_relative "setup.rb" -require File.join(File.expand_path(File.dirname(__FILE__)),"lazar-physchem-long.rb") +class LazarPhyschemDescriptorTest < MiniTest::Test + def test_epafhm + # check available descriptors + @descriptors = OpenTox::Algorithm::Descriptor::DESCRIPTORS.keys + assert_equal 111,@descriptors.size,"wrong number of physchem descriptors" + @descriptor_values = OpenTox::Algorithm::Descriptor::DESCRIPTOR_VALUES + + # select descriptors for test + @num_features_offset = 0 + @descriptors.keep_if{|x| x=~/^Openbabel\./} + @descriptors.delete("Openbabel.L5") # TODO Openbabel.L5 does not work, investigate!!! + puts "Descriptors: #{@descriptors}" + + # UPLOAD DATA + training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv") + puts "Dataset: "+training_dataset.id + feature_dataset = Algorithm::Descriptor.physchem training_dataset, @descriptors + model = Model::Lazar.create training_dataset, feature_dataset + #p model + compound = Compound.from_smiles "CC(C)(C)CN" + prediction = model.predict compound + p prediction + + end +end -- cgit v1.2.3