summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-07-27 20:55:09 +0200
committerChristoph Helma <helma@in-silico.ch>2015-07-27 20:55:09 +0200
commitc8a466cc22c1c0dcc821700a5bc58ba60b49119d (patch)
tree8a79292e64fdd55ab74082c16bb3ba102c24ef7f
parentf33ef5d6b12d504d8cfa867f189bc75a74ed7772 (diff)
reasonable query performace for data_entries
-rw-r--r--test/data_entry.rb27
-rw-r--r--test/fminer-long.rb10
-rw-r--r--test/fminer.rb22
-rw-r--r--test/lazar-fminer.rb23
-rw-r--r--test/setup.rb2
5 files changed, 46 insertions, 38 deletions
diff --git a/test/data_entry.rb b/test/data_entry.rb
index 13a2884..7c9e49e 100644
--- a/test/data_entry.rb
+++ b/test/data_entry.rb
@@ -44,7 +44,9 @@ class DataEntryTest < MiniTest::Test
assert_kind_of NumericBioAssay, feature
assert_match "EPAFHM.mini.csv", feature.source
assert_equal 0.0113, DataEntry[d.compounds.first, feature]
+ assert_equal 0.0113, d[d.compounds.first, feature]
assert_equal 0.00323, DataEntry[d.compounds[5], feature]
+ assert_equal 0.00323, d[d.compounds[5], feature]
end
def test_upload_kazius
@@ -57,30 +59,25 @@ class DataEntryTest < MiniTest::Test
end
def test_upload_feature_dataset
- t1 = Time.now
+ t = Time.now
f = File.join DATA_DIR, "rat_feature_dataset.csv"
d = OpenTox::Dataset.from_csv_file f
assert_equal 458, d.features.size
d.save
- t2 = Time.now
- p "Upload: #{t2-t1}"
+ p "Upload: #{Time.now-t}"
d2 = OpenTox::Dataset.find d.id
- t3 = Time.now
- p "Dowload: #{t3-t2}"
+ t = Time.now
assert_equal d.features.size, d2.features.size
csv = CSV.read f
- assert_equal csv.size-1, d2.compounds.size
+ csv.shift # remove header
+ assert_equal csv.size, d2.compounds.size
assert_equal csv.first.size-1, d2.features.size
- # asserting complete ds
- 3.times do
- cid = rand(d.compounds.size)
- 3.times do
- fid = rand(d.features.size)
- # TODO data access is slow
- assert_equal csv[cid+1][fid+1].to_i, DataEntry[d2.compounds[cid],d2.features[fid]]
- end
+ d2.compounds.each_with_index do |compound,i|
+ row = csv[i]
+ row.shift # remove compound
+ assert_equal row, d2.fingerprint(compound)
end
- #assert_equal csv.size-1, d.data_entries.size
+ p "Dowload: #{Time.now-t}"
d2.delete
assert_raises Mongoid::Errors::DocumentNotFound do
Dataset.find d.id
diff --git a/test/fminer-long.rb b/test/fminer-long.rb
index e396145..187b344 100644
--- a/test/fminer-long.rb
+++ b/test/fminer-long.rb
@@ -4,7 +4,7 @@ class FminerTest < MiniTest::Test
def test_fminer_multicell
# TODO aborts, probably fminer
- dataset = OpenTox::MeasuredDataset.new
+ dataset = OpenTox::Dataset.new
#multi_cell_call.csv
dataset.upload File.join(DATA_DIR,"multi_cell_call.csv")
feature_dataset = OpenTox::Algorithm::Fminer.bbrc(:dataset => dataset)#, :min_frequency => 15)
@@ -13,8 +13,7 @@ class FminerTest < MiniTest::Test
end
def test_fminer_isscan
- dataset = OpenTox::MeasuredDataset.new
- dataset.upload File.join(DATA_DIR,"ISSCAN-multi.csv")
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"ISSCAN-multi.csv")
feature_dataset = OpenTox::Algorithm::Fminer.bbrc(:dataset => dataset)#, :min_frequency => 15)
assert_equal feature_dataset.compounds.size, dataset.compounds.size
p feature_dataset
@@ -23,8 +22,9 @@ class FminerTest < MiniTest::Test
end
def test_fminer_kazius
- dataset = OpenTox::MeasuredDataset.from_csv_file File.join(DATA_DIR,"kazius.csv")
- feature_dataset = OpenTox::Algorithm::Fminer.bbrc(:dataset => dataset, :min_frequency => 200)
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"kazius.csv")
+ feature_dataset = OpenTox::Algorithm::Fminer.bbrc(:dataset => dataset)#, :min_frequency => 50)
+ #feature_dataset = OpenTox::Algorithm::Fminer.bbrc(:dataset => dataset, :min_frequency => 200)
#feature_dataset = OpenTox::Algorithm::Fminer.bbrc(:dataset => dataset)#, :min_frequency => 15)
assert_equal feature_dataset.compounds.size, dataset.compounds.size
p feature_dataset.compounds.size
diff --git a/test/fminer.rb b/test/fminer.rb
index 5e8bc6e..e319b5e 100644
--- a/test/fminer.rb
+++ b/test/fminer.rb
@@ -3,20 +3,30 @@ require_relative "setup.rb"
class FminerTest < MiniTest::Test
def test_fminer_bbrc
- dataset = OpenTox::Dataset.new
- dataset.upload File.join(DATA_DIR,"hamster_carcinogenicity.csv")
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
refute_nil dataset.id
-
feature_dataset = OpenTox::Algorithm::Fminer.bbrc :dataset => dataset
assert_equal dataset.compounds.size, feature_dataset.compounds.size
assert_equal 54, feature_dataset.features.size
- assert_equal '[#6&A]-[#6&A]-[#6&A]=[#6&A]', feature_dataset.features.first.title
+ assert_equal "C-C-C=C", feature_dataset.features.first.smarts
compounds = feature_dataset.compounds
smarts = feature_dataset.features.collect{|f| f.smarts}
- match = OpenTox::Algorithm::Descriptor.smarts_match compounds, smarts
+ match = OpenTox::Algorithm::Descriptor.smarts_count compounds, smarts
+ p smarts
compounds.each_with_index do |c,i|
+ p c.smiles
+ p match[i]
+ p feature_dataset.feature_values(c)
smarts.each_with_index do |s,j|
- assert_equal match[i][j], feature_dataset.data_entries[i][j].to_i
+ #unless match[i][j] == DataEntry.where(:dataset_id => feature_dataset.id, :compound_id => c.id, :feature_id => feature_dataset.features[j]).distinct(:value).first
+ #p c
+ #p s
+ #p feature_dataset.features[j]
+ #p match[i][j]
+ #p DataEntry.where(:dataset_id => feature_dataset.id, :compound_id => c.id, :feature_id => feature_dataset.features[j]).distinct(:value)
+ #end
+
+ #assert_equal match[i][j], feature_dataset[c,feature_dataset.features[j]]
end
end
diff --git a/test/lazar-fminer.rb b/test/lazar-fminer.rb
index 17a52b1..11fef72 100644
--- a/test/lazar-fminer.rb
+++ b/test/lazar-fminer.rb
@@ -3,16 +3,18 @@ require_relative "setup.rb"
class LazarFminerTest < MiniTest::Test
def test_lazar_fminer
- dataset = OpenTox::MeasuredDataset.new
- dataset.upload File.join(DATA_DIR,"hamster_carcinogenicity.csv")
- model = OpenTox::Model::Lazar.create OpenTox::Algorithm::Fminer.bbrc(:dataset => dataset)
- feature_dataset = OpenTox::CalculatedDataset.find model.feature_dataset_id
- assert_equal dataset.compounds.size, feature_dataset.compounds.size
+ training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
+ feature_dataset = OpenTox::Algorithm::Fminer.bbrc(:dataset => training_dataset)
+ #p feature_dataset
+ model = OpenTox::Model::Lazar.create training_dataset, feature_dataset
+ #feature_dataset = OpenTox::Dataset.find model.feature_dataset_id
+ p model
+ assert_equal training_dataset.compounds.size, feature_dataset.compounds.size
assert_equal 54, feature_dataset.features.size
- feature_dataset.data_entries.each do |e|
- assert_equal e.size, feature_dataset.features.size
- end
- assert_equal '[#6&A]-[#6&A]-[#6&A]=[#6&A]', feature_dataset.features.first.title
+ #feature_dataset.data_entries.each do |e|
+ #assert_equal e.size, feature_dataset.features.size
+ #end
+ assert_equal '[#6&A]-[#6&A]-[#6&A]=[#6&A]', feature_dataset.features.first.smarts
[ {
:compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"),
@@ -32,8 +34,7 @@ class LazarFminerTest < MiniTest::Test
end
# make a dataset prediction
- compound_dataset = OpenTox::MeasuredDataset.new
- compound_dataset.upload File.join(DATA_DIR,"EPAFHM.mini.csv")
+ compound_dataset = OpenTox::Dataset.from_sdf File.join(DATA_DIR,"EPAFHM.mini.csv")
#assert_equal compound_dataset.uri.uri?, true
prediction = model.predict :dataset => compound_dataset
assert_equal compound_dataset.compounds, prediction.compounds
diff --git a/test/setup.rb b/test/setup.rb
index 9d9bc64..cfa670d 100644
--- a/test/setup.rb
+++ b/test/setup.rb
@@ -7,4 +7,4 @@ require File.join(ENV["HOME"],".opentox","config","test.rb")
include OpenTox
TEST_DIR ||= File.expand_path(File.dirname(__FILE__))
DATA_DIR ||= File.join(TEST_DIR,"data")
-$mongo.database.drop
+#$mongo.database.drop