summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-07-31 19:25:23 +0200
committerChristoph Helma <helma@in-silico.ch>2015-07-31 19:25:23 +0200
commit8edf76d23b169387b4652c5dd1571ba60b040f96 (patch)
tree5d9f3de5441335d534c7d7ef9db1dafb37c67947
parent7d0149d445b2e613e91de5a0470a5c8e08f54057 (diff)
intermediary commit
-rw-r--r--test/lazar-physchem-long.rb52
-rw-r--r--test/lazar-physchem-short.rb28
2 files changed, 54 insertions, 26 deletions
diff --git a/test/lazar-physchem-long.rb b/test/lazar-physchem-long.rb
index 39e353e..280184d 100644
--- a/test/lazar-physchem-long.rb
+++ b/test/lazar-physchem-long.rb
@@ -23,7 +23,7 @@ class LazarPhyschemDescriptorTest < MiniTest::Test
puts "Descriptors: #{@descriptors}"
# UPLOAD DATA
- @dataset = OpenTox::MeasuredDataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv")
+ @dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv")
puts "Dataset: "+@dataset.id
@compound_smiles = "CC(C)(C)CN"
@@ -32,6 +32,9 @@ class LazarPhyschemDescriptorTest < MiniTest::Test
prediction_a = build_model_and_predict(true)
prediction_b = build_model_and_predict(false)
+ p prediction_a.data_entries
+ p prediction_b.data_entries
+
assert_equal prediction_a,prediction_b,"predicted value differs depending on calculation method"
puts "Predicted value: #{prediction_a}"
# the actual value (from the complete EPAFHM dataset) is 5.45, but it is predicted higher when tested
@@ -45,6 +48,7 @@ class LazarPhyschemDescriptorTest < MiniTest::Test
model_params = {:dataset => @dataset}
#feat_gen_uri = File.join($algorithm[:uri],"descriptor","physchem")
+=begin
if precompute_feature_dataset
# PRECOMPUTE FEATURES
p = "/tmp/mergedfile.csv"
@@ -59,27 +63,28 @@ class LazarPhyschemDescriptorTest < MiniTest::Test
model_params[:feature_generation_uri] = feat_gen_uri
model_params[:descriptors] = @descriptors
end
+=end
# BUILD MODEL
#p descriptors
- feature_dataset = OpenTox::CalculatedDataset.new
- feature_dataset.compounds = @dataset.compounds
- feature_dataset.data_entries = descriptors
- feature_dataset.features = @descriptors.collect{|d| OpenTox::Feature.find_or_create_by(:title => d)}
- feature_dataset["inchis"].each do |inchi|
- assert_kind_of String, inchi
+ feature_dataset = OpenTox::Algorithm::Descriptor.physchem(@dataset, @descriptors)
+ #feature_dataset = DescriptorDataset.new
+ #feature_dataset.compounds = @dataset.compounds
+ #feature_dataset.data_entries = descriptors
+ #feature_dataset.features = @descriptors.collect{|d| OpenTox::Feature.find_or_create_by(:title => d)}
+ feature_dataset.compounds.each do |compound|
+ assert_kind_of Compound, compound
end
- feature_dataset["feature_ids"].each do |id|
+ feature_dataset.feature_ids.each do |id|
assert_kind_of BSON::ObjectId, id
end
feature_dataset.data_entries.each do |entry|
- #p entry
assert_kind_of Array, entry
- entry.each do |e|
+ #entry.each do |e|
#p e
- assert_kind_of Float, e
- end
+ # assert_kind_of Float, e
+ #end
end
feature_dataset.save
model = OpenTox::Model::Lazar.create @dataset, feature_dataset
@@ -93,7 +98,7 @@ class LazarPhyschemDescriptorTest < MiniTest::Test
#feature_dataset = OpenTox::Dataset.new(feature_dataset_uri)
assert_equal @dataset.compounds.size,feature_dataset.compounds.size,"Incorrect number of compounds in feature dataset"
features = feature_dataset.features
- feature_titles = features.collect{|f| f.title}
+ feature_titles = features.collect{|f| f.name}
@descriptors.each do |d|
if (d=~/^Cdk\./ and d.count(".")==1) # CDK descriptors (e.g. Cdk.ALOG are included as Cdk.ALOGP.ALogP, Cdk.ALOGP.ALogp2 ..)
match = false
@@ -105,23 +110,22 @@ class LazarPhyschemDescriptorTest < MiniTest::Test
assert feature_titles.include?(d),"feature not found #{d} in feature dataset #{feature_titles.inspect}"
end
end
- assert_equal @descriptors.size,features.size,"Incorrect number of features in feature dataset"
- #assert_equal (@descriptors.size+@num_features_offset),features.size,"wrong num features in feature dataset"
+ # Cdk.WienerNumbers returns 2 features
+ assert_equal (@descriptors.size+@num_features_offset),features.size,"wrong num features in feature dataset"
# predict compound
- compound_uri = "#{$compound[:uri]}/#{@compound_inchi}"
- compound = OpenTox::Compound.new @compound_inchi
- prediction = model.predict :compound => compound
- p prediction
+ compound = OpenTox::Compound.from_inchi @compound_inchi
+ prediction = model.predict compound
+ prediction
#prediction = OpenTox::Dataset.new prediction_uri
#assert_equal prediction.uri.uri?, true
#puts "Prediction "+prediction.uri
- # check prediction
- assert prediction.features.collect{|f| f.uri}.include?(model.predicted_variable),"prediction feature #{model.predicted_variable} not included prediction dataset #{prediction.features.collect{|f| f.uri}}"
- assert prediction.compounds.collect{|c| c.uri}.include?(compound_uri),"compound #{compound_uri} not included in prediction dataset #{prediction.compounds.collect{|c| c.uri}}"
- assert_equal 1,prediction.compound_indices(compound_uri).size,"compound should only be once in the dataset"
- prediction.data_entry_value(prediction.compound_indices(compound_uri).first,model.predicted_variable)
+ # TODO check prediction
+ #assert prediction.features.collect{|f| f.uri}.include?(model.predicted_variable),"prediction feature #{model.predicted_variable} not included prediction dataset #{prediction.features.collect{|f| f.uri}}"
+ #assert prediction.compounds.collect{|c| c.uri}.include?(compound_uri),"compound #{compound_uri} not included in prediction dataset #{prediction.compounds.collect{|c| c.uri}}"
+ #assert_equal 1,prediction.compound_indices(compound_uri).size,"compound should only be once in the dataset"
+ #prediction.data_entry_value(prediction.compound_indices(compound_uri).first,model.predicted_variable)
end
end
diff --git a/test/lazar-physchem-short.rb b/test/lazar-physchem-short.rb
index 5b00231..c9de60c 100644
--- a/test/lazar-physchem-short.rb
+++ b/test/lazar-physchem-short.rb
@@ -1,3 +1,27 @@
-$short_tests = true
+require_relative "setup.rb"
-require File.join(File.expand_path(File.dirname(__FILE__)),"lazar-physchem-long.rb")
+class LazarPhyschemDescriptorTest < MiniTest::Test
+ def test_epafhm
+ # check available descriptors
+ @descriptors = OpenTox::Algorithm::Descriptor::DESCRIPTORS.keys
+ assert_equal 111,@descriptors.size,"wrong number of physchem descriptors"
+ @descriptor_values = OpenTox::Algorithm::Descriptor::DESCRIPTOR_VALUES
+
+ # select descriptors for test
+ @num_features_offset = 0
+ @descriptors.keep_if{|x| x=~/^Openbabel\./}
+ @descriptors.delete("Openbabel.L5") # TODO Openbabel.L5 does not work, investigate!!!
+ puts "Descriptors: #{@descriptors}"
+
+ # UPLOAD DATA
+ training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv")
+ puts "Dataset: "+training_dataset.id
+ feature_dataset = Algorithm::Descriptor.physchem training_dataset, @descriptors
+ model = Model::Lazar.create training_dataset, feature_dataset
+ #p model
+ compound = Compound.from_smiles "CC(C)(C)CN"
+ prediction = model.predict compound
+ p prediction
+
+ end
+end