summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.de>2009-09-01 20:04:40 +0200
committerChristoph Helma <helma@in-silico.de>2009-09-01 20:04:40 +0200
commit34124d5b002f15980bf1c36ddfa837e8d123991f (patch)
tree55d939eef72a6111606ff72dc59c2ddcfeb47404
parent1c8d5c73c0e322f4594c365014990bf5f003e00a (diff)
similarity calculations added
-rw-r--r--Rakefile4
-rw-r--r--application.rb163
-rw-r--r--config.ru2
-rw-r--r--test/test.rb115
4 files changed, 219 insertions, 65 deletions
diff --git a/Rakefile b/Rakefile
index 16f597f..868e04a 100644
--- a/Rakefile
+++ b/Rakefile
@@ -5,3 +5,7 @@ require 'tasks/opentox'
@gems = "sinatra emk-sinatra-url-for builder opentox-ruby-api-wrapper"
+desc "Run local tests"
+task :test do
+ load 'test/test.rb'
+end
diff --git a/application.rb b/application.rb
index 26bb3c3..f26d664 100644
--- a/application.rb
+++ b/application.rb
@@ -15,33 +15,105 @@ end
set :default_content, :yaml
-helpers do
+class Dataset
- def create_dataset(uri)
- @@redis.set_add "datasets", uri
+ include OpenTox::Utils
+ attr_reader :uri, :name
+
+ def initialize(uri)
+ @name = File.basename(uri)
+ @uri = uri
end
- def add_feature(dataset_uri, compound_uri, feature_uri)
- @@redis.set_add dataset_uri + '::compounds', compound_uri
- @@redis.set_add dataset_uri + '::features', feature_uri
- @@redis.set_add dataset_uri + '::' + compound_uri + '::features', feature_uri
- @@redis.set_add dataset_uri + '::' + feature_uri + '::compounds', compound_uri
+ def self.create(uri)
+ dataset = Dataset.new(uri)
+ dataset.save
+ dataset
end
- def delete_dataset(uri)
- @@redis.set_members(uri + '::compounds').each do |compound_uri|
- @@redis.delete uri + '::' + compound_uri
+ def self.find(uri)
+ if @@redis.set_member? "datasets", uri
+ Dataset.new(uri)
+ else
+ nil
end
- @@redis.delete uri + '::compounds'
- @@redis.set_members(uri + '::features').each do |feature_uri|
- @@redis.delete uri + '::' + feature_uri
+ end
+
+ def self.exists?(uri)
+ @@redis.set_member? "datasets", uri
+ end
+
+ def self.find_all_uris
+ @@redis.set_members("datasets")
+ end
+
+ def save
+ @@redis.set_add "datasets", uri
+ end
+
+ def destroy
+ @@redis.set_members(@uri + '::compounds').each do |compound_uri|
+ @@redis.delete @uri + '::' + compound_uri
+ end
+ @@redis.delete @uri + '::compounds'
+ @@redis.set_members(@uri + '::features').each do |feature_uri|
+ @@redis.delete @uri + '::' + feature_uri
end
- @@redis.delete uri + '::features'
- @@redis.set_delete "datasets", uri
+ @@redis.delete @uri + '::features'
+ @@redis.set_delete "datasets", @uri
+ end
+
+ def add(compound_uri,feature_uri)
+ @@redis.set_add @uri + '::compounds', compound_uri
+ @@redis.set_add @uri + '::features', feature_uri
+ @@redis.set_add @uri + '::' + compound_uri + '::features', feature_uri
+ @@redis.set_add @uri + '::' + feature_uri + '::compounds', compound_uri
+ end
+
+ def compound_uris
+ @@redis.set_members(@uri + "::compounds")
end
+ def feature_uris
+ @@redis.set_members(@uri + "::features")
+ end
+
+ def feature_uris_for_compound(compound_uri)
+ @@redis.set_members(@uri + '::' + compound_uri + '::features')
+ end
+
+ def compound_uris_for_feature(feature_uri)
+ @@redis.set_members(@uri + '::' + feature_uri + '::compounds')
+ end
+
+ def tanimoto(compound_uris)
+ raise "Exactly 2 compounds are needed for similarity calculations" unless compound_uris.size == 2
+ compound_keys = compound_uris.collect{ |c| @uri + '::' + c + "::features" }
+ union_size = @@redis.set_union(compound_keys[0], compound_keys[1]).size
+ intersect_size = @@redis.set_intersect(compound_keys[0], compound_keys[1]).size
+ intersect_size.to_f/union_size.to_f
+ end
+
+ def weighted_tanimoto(compound_uris)
+ raise "Exactly 2 compounds are needed for similarity calculations" unless compound_uris.size == 2
+ compound_keys = compound_uris.collect{ |c| @uri + '::' + c + "::features" }
+ union = @@redis.set_union(compound_keys[0], compound_keys[1])
+ intersect = @@redis.set_intersect(compound_keys[0], compound_keys[1])
+
+ p_sum_union = 0.0
+ p_sum_intersect = 0.0
+
+ union.each{ |f| p_sum_union += gauss(OpenTox::Feature.new(:uri => f).value('p_value').to_f) }
+ intersect.each{ |f| p_sum_intersect += gauss(OpenTox::Feature.new(:uri => f).value('p_value').to_f) }
+ "#{p_sum_intersect/p_sum_union}"
+ end
+
+end
+
+helpers do
+
def not_found?
- halt 404, "Dataset \"#{params[:name]}\" not found." unless @@redis.set_member? "datasets", uri(params[:name])
+ halt 404, "Dataset \"#{params[:name]}\" not found." unless Dataset.exists? uri(params[:name])
end
def uri(name)
@@ -53,12 +125,12 @@ end
## REST API
get '/?' do
- @@redis.set_members("datasets").collect{|d| uri(d)}.join("\n")
+ Dataset.find_all_uris.join("\n")
end
get '/:name' do
not_found?
- @dataset = {:uri => uri(params[:name]), :name => params[:name]}
+ @dataset = Dataset.find(uri params[:name])
respond_to do |format|
format.yaml { @dataset.to_yaml }
format.xml { builder :dataset }
@@ -72,71 +144,66 @@ end
get '/:name/compounds' do
not_found?
- @@redis.set_members(uri(params[:name]) + "::compounds").join("\n")
+ Dataset.find(uri params[:name]).compound_uris.join("\n")
end
get '/:name/features' do
not_found?
- @@redis.set_members(uri(params[:name]) + "::features").join("\n")
+ Dataset.find(uri params[:name]).feature_uris.join("\n")
end
-=begin
-get '/:name/:type/*/*/intersection' do
- # CHECK/TEST
- @@redis.set_intersect(params[:splat][0], params[:splat][1], URI.encode(params[:name]) + '/' + params[:type]).join("\n")
+get '/:name/compound/*/features' do
+ not_found?
+ compound_uri = params[:splat].first.gsub(/ /,'+')
+ Dataset.find(uri params[:name]).feature_uris_for_compound(compound_uri).join("\n")
end
-get '/:name/:type/*/*/union' do
- # CHECK/TEST
- @@redis.set_union(params[:splat][0], params[:splat][1], URI.encode(params[:name]) + '/' + params[:type]).join("\n")
+get '/:name/feature/*/compounds' do
+ not_found?
+ Dataset.find(uri params[:name]).compound_uris_for_feature(params[:splat].first).join("\n")
end
-=end
-get '/:name/*/features' do
+get '/:name/tanimoto/compound/*/compound/*/?' do
not_found?
- # re-escape smiles (Sinatra unescapes params and splats)
- compound_uri = params[:splat].first.sub(%r{(http://[\w\.:]+/)(.*)$}) {|s| $1 + URI.escape($2, /[^#{URI::PATTERN::UNRESERVED}]/)}
-
- #puts compound_uri
- @@redis.set_members(uri(params[:name]) + '::' + compound_uri + '::features').join("\n")
+ compound_uris = params[:splat].collect{ |c| c.gsub(/ /,'+') }
+ "#{Dataset.find(uri params[:name]).tanimoto(compound_uris)}"
end
-get '/:name/*/compounds' do
+get '/:name/weighted_tanimoto/compound/*/compound/*/?' do
not_found?
- @@redis.set_members(uri(params[:name]) + '::' + params[:splat].first + '::compounds').join("\n")
+ compound_uris = params[:splat].collect{ |c| c.gsub(/ /,'+') }
+ Dataset.find(uri params[:name]).weighted_tanimoto(compound_uris)
end
post '/?' do
#protected!
- uri = uri(params[:name])
- halt 403, "Dataset \"#{name}\" exists - please choose another name." if @@redis.set_member?("datasets", uri)
+ halt 403, "Dataset \"#{name}\" exists - please choose another name." if Dataset.exists?(uri params[:name])
- @@redis.set_add "datasets", uri
+ dataset = Dataset.create(uri params[:name])
if params[:file]
File.open(params[:file][:tempfile].path).each_line do |line|
record = line.chomp.split(/,\s*/)
compound_uri = OpenTox::Compound.new(:smiles => record[0]).uri
feature_uri = OpenTox::Feature.new(:name => params[:name], :values => {:classification => record[1]}).uri
- add_feature(uri, compound_uri, feature_uri)
+ dataset.add(compound_uri, feature_uri)
end
end
- uri
+ dataset.uri
end
put '/:name/?' do
#protected!
not_found?
- uri = uri(params[:name])
- add_feature(uri, params[:compound_uri],params[:feature_uri])
- uri + " sucessfully updated."
+ dataset = Dataset.find(uri params[:name])
+ dataset.add(params[:compound_uri],params[:feature_uri])
+ dataset.uri + " sucessfully updated."
end
delete '/:name/?' do
# dangerous, because other datasets might refer to it
#protected!
not_found?
- uri = uri(params[:name])
- delete_dataset(uri)
- "Successfully deleted dataset \"#{uri}\"."
+ Dataset.find(uri params[:name]).destroy
+ "Successfully deleted dataset \"#{params[:name]}\"."
end
diff --git a/config.ru b/config.ru
index 961b2d8..63dd2ce 100644
--- a/config.ru
+++ b/config.ru
@@ -14,7 +14,7 @@ if ENV['RACK_ENV'] == 'production'
mail.to 'helma@in-silico.ch'
mail.subject '[ERROR] %s'
end
-else
+elsif ENV['RACK_ENV'] == 'development'
use Rack::Reloader
use Rack::ShowExceptions
end
diff --git a/test/test.rb b/test/test.rb
index 5c4b2e7..b1bae83 100644
--- a/test/test.rb
+++ b/test/test.rb
@@ -32,28 +32,39 @@ class DatasetsTest < Test::Unit::TestCase
def test_create_dataset_and_insert_data
name = "Test dataset"
- smiles = '[O-][N+](=O)C/C=C\C(=O)Cc1cc(C#N)ccc1'
- compound_uri = OpenTox::Compound.new(:smiles => smiles).uri
- feature_uri = OpenTox::Feature.new(:name => name, :values => {:classification => "true"}).uri
+ compounds = {
+ '[O-][N+](=O)C/C=C\C(=O)Cc1cc(C#N)ccc1' => 'true',
+ 'F[B-](F)(F)F.[Na+]' => 'false',
+ 'N#[N+]C1=CC=CC=C1.F[B-](F)(F)F' => 'false'
+ }
post '/', :name => name
assert last_response.ok?
uri = last_response.body.chomp
get uri
assert last_response.ok?
assert last_response.body.include?("Test_dataset")
- put uri, :compound_uri => compound_uri, :feature_uri => feature_uri
- assert last_response.ok?
+
+ compounds.each do |smiles,activity|
+
+ compound_uri = OpenTox::Compound.new(:smiles => smiles).uri
+ feature_uri = OpenTox::Feature.new(:name => name, :values => {:classification => activity}).uri
+ put uri, :compound_uri => compound_uri, :feature_uri => feature_uri
+
+ assert last_response.ok?
+ get uri + '/compounds'
+ assert last_response.ok?
+ assert last_response.body.include?(compound_uri)
+ get uri + '/features'
+ assert last_response.ok?
+ assert last_response.body.include?(activity)
+ assert last_response.body.include?(feature_uri)
+ get uri + '/compound/' + compound_uri + '/features'
+ assert last_response.ok?
+ assert last_response.body.include?(activity)
+ assert_equal feature_uri, last_response.body
+ end
get uri + '/compounds'
- assert last_response.ok?
- assert_equal compound_uri, last_response.body
- get uri + '/features'
- assert last_response.ok?
- assert last_response.body.include?("true")
- assert_equal feature_uri, last_response.body
- get uri + '/' + compound_uri + '/features'
- assert last_response.ok?
- assert last_response.body.include?("true")
- assert_equal feature_uri, last_response.body
+ #puts last_response.body
delete uri
assert last_response.ok?
get "/Test_dataset"
@@ -70,7 +81,11 @@ class DatasetsTest < Test::Unit::TestCase
get uri + '/compounds'
assert last_response.ok?
assert last_response.body.include?(compound_uri)
- get uri + '/' + compound_uri + '/features'
+ get uri + '/features'
+ assert last_response.ok?
+ assert last_response.body.include?("Hamster%20Carcinogenicity/classification/true")
+ assert last_response.body.include?("Hamster%20Carcinogenicity/classification/false")
+ get uri + '/compound/' + compound_uri + '/features'
assert last_response.ok?
assert last_response.body.include?("Hamster%20Carcinogenicity/classification/true")
delete uri
@@ -86,7 +101,75 @@ class DatasetsTest < Test::Unit::TestCase
get uri
assert last_response.ok?
end
+=end
+ def test_tanimoto_similarity
+ #@feature_set = OpenTox::Algorithms::Fminer.new :dataset_uri => @dataset
+ name = "Similarity test dataset"
+ data = {
+ '[O-][N+](=O)C/C=C\C(=O)Cc1cc(C#N)ccc1' =>
+ {
+ 'A' => 1.0,
+ 'B' => 0.9,
+ 'C' => 0.8,
+ 'D' => 0.7,
+ 'E' => 0.5
+ },
+ 'F[B-](F)(F)F.[Na+]' =>
+ {
+ 'F' => 0.9,
+ 'B' => 0.9,
+ 'C' => 0.8,
+ 'D' => 0.7,
+ 'E' => 0.5
+ },
+ 'N#[N+]C1=CC=CC=C1.F[B-](F)(F)F' =>
+ {
+ 'A' => 1.0,
+ 'B' => 0.9,
+ 'F' => 0.9,
+ }
+ }
+ post '/', :name => name
+ assert last_response.ok?
+ uri = last_response.body
+ get uri
+ assert last_response.ok?
+
+ data.each do |smiles,features|
+ compound_uri = OpenTox::Compound.new(:smiles => smiles).uri
+ features.each do |k,v|
+ feature_uri = OpenTox::Feature.new(:name => k, :values => {:p_value => v}).uri
+ put uri, :compound_uri => compound_uri, :feature_uri => feature_uri
+ assert last_response.ok?
+ end
+ end
+
+ data.each do |smiles,features|
+ compound_uri = OpenTox::Compound.new(:smiles => smiles).uri
+ data.each do |s,f|
+ unless s == smiles
+ neighbor_uri = OpenTox::Compound.new(:smiles => s).uri
+ get uri + "/tanimoto/compound/#{compound_uri}/compound/#{neighbor_uri}"
+ assert last_response.ok?
+ sim = last_response.body
+ features_a = data[smiles].keys
+ features_b = data[s].keys
+ union = features_a | features_b
+ intersect = features_a & features_b
+ mysim = intersect.size.to_f/union.size.to_f
+ assert_equal sim, mysim.to_s
+ puts "tanimoto::#{smiles}::#{s}::#{last_response.body}"
+ get uri + "/weighted_tanimoto/compound/#{compound_uri}/compound/#{neighbor_uri}"
+ assert last_response.ok?
+ puts "weighted_tanimoto::#{smiles}::#{s}::#{last_response.body}"
+ end
+ end
+ end
+
+ end
+
+=begin
def test_unauthorized_create
post '/', :name => "Test dataset"
assert !last_response.ok?