summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Rakefile4
-rw-r--r--application.rb217
-rw-r--r--config.ru10
-rw-r--r--environment.rb29
-rw-r--r--model.rb83
-rw-r--r--models.rb47
-rw-r--r--test/hamster_carcinogenicity.csv85
-rw-r--r--test/test.rb44
8 files changed, 302 insertions, 217 deletions
diff --git a/Rakefile b/Rakefile
index bd2f675..f4e10d6 100644
--- a/Rakefile
+++ b/Rakefile
@@ -1,5 +1,6 @@
require 'rubygems'
require 'rake'
+require 'tasks/opentox'
desc "Install required gems"
task :install do
@@ -14,7 +15,6 @@ end
desc "Run tests"
task :test do
- puts "No tests for lazar."
- #load 'test.rb'
+ load 'test/test.rb'
end
diff --git a/application.rb b/application.rb
index d1aa61a..b5d8d4e 100644
--- a/application.rb
+++ b/application.rb
@@ -1,174 +1,103 @@
-load 'environment.rb'
-
-get '/models/?' do # get index of models
- Model.all.collect{ |m| m.uri }.join("\n")
+['rubygems', 'sinatra', 'redis', 'builder', 'opentox-ruby-api-wrapper'].each do |lib|
+ require lib
end
-get '/model/:id' do
- halt 404, "Model #{params[:id]} not found." unless model = Model.get(params[:id])
- halt 202, model.to_yaml unless model.finished
- model.to_yaml
-# builder do |xml|
-# xml.instruct!
-# end
- #xml model
+load File.join(File.dirname(__FILE__), 'model.rb')
+
+case ENV['RACK_ENV']
+when 'production'
+ @@redis = Redis.new :db => 0
+when 'development'
+ @@redis = Redis.new :db => 1
+when 'test'
+ @@redis = Redis.new :db => 2
+ @@redis.flush_db
end
-post '/models/?' do # create a model
+set :default_content, :yaml
- training_dataset = OpenTox::Dataset.new :uri => params[:dataset_uri]
- model = Model.create(:name => training_dataset.name, :training_dataset_uri => training_dataset.uri)
- model.update_attributes(:uri => url_for("/model/", :full) + model.id.to_s)
+helpers do
- Spork.spork do
- feature_generation = OpenTox::Fminer.new(training_dataset)
- feature_dataset = feature_generation.dataset
- model.feature_dataset_uri = feature_dataset.uri.chomp
- model.finished = true
- model.save
+ def find
+ uri = uri(params[:splat].first)
+ halt 404, "Dataset \"#{uri}\" not found." unless @model = Model.find(uri)
end
-
- model.uri.to_s
-end
-delete '/model/:id' do
- halt 404, "Model #{params[:id]} not found." unless model = Model.get(params[:id])
- model.predictions.each do |p|
- p.neighbors.each { |n| n.destroy }
- p.features.each { |n| f.destroy }
- p.destroy
+ def uri(name)
+ uri = url_for("/model/", :full) + URI.encode(name)
end
- model.destroy
- "Model #{params[:id]} succesfully deleted."
- # TODO: what happens with datasets, avoid stale datasets, but other components might need them
end
-post '/model/:id' do # create prediction
-
- halt 404, "Model #{params[:id]} not found." unless model = Model.get(params[:id])
- query_compound = OpenTox::Compound.new :uri => params[:compound_uri]
- activity_dataset = OpenTox::Dataset.new :uri => model.training_dataset_uri
-
-# database_activities = activity_dataset.features(query_compound)
-
-# if database_activities.size > 0 # return database values
-# database_activities.collect{ |f| f.uri }.join('\n')
-
-# else # make prediction
- prediction = Prediction.find_or_create(:model_uri => model.uri, :compound_uri => params[:compound_uri])
-
- unless prediction.finished # present cached prediction if finished
-
- prediction.update_attributes(:uri => url_for("/prediction/", :full) + prediction.id.to_s)
- Spork.spork do
- feature_dataset = OpenTox::Dataset.new :uri => model.feature_dataset_uri
- compound_descriptors = feature_dataset.all_compounds_and_features_uris
- training_features = feature_dataset.all_features
- compound_activities = activity_dataset.all_compounds_and_features_uris
- query_features = query_compound.match(training_features)
- query_features.each do |f|
- Feature.find_or_create(:feature_uri => f.uri, :prediction_uri => prediction.uri)
- end
- query_feature_uris = query_features.collect{|f| f.uri}
-
- conf = 0.0
- nr_neighbors = 0
-
- compound_descriptors.each do |compound_uri,feature_uris|
- sim = similarity(feature_uris,query_feature_uris)
- if sim > 0.0
- nr_neighbors += 1
- # datamapper default precision is 10, floats with higher precision are not saved
- n = Neighbor.create(:uri => compound_uri, :similarity => (1000*sim).round/1000.0, :prediction_uri => prediction.uri)
- compound_activities[compound_uri].each do |a|
- case OpenTox::Feature.new(:uri => a).value('classification').to_s
- when 'true'
- conf += gauss(sim)
- when 'false'
- conf -= gauss(sim)
- end
- end
- end
- end
- conf = conf/nr_neighbors
- if conf > 0.0
- classification = true
- elsif conf < 0.0
- classification = false
- end
- prediction.update_attributes(:confidence => (1000*conf).round/1000.0, :classification => classification, :finished => true)
-
- end
-
- end
-
- prediction.uri
-# end
+get '/algorithms' do
+ url_for("/algorithm/classification", :full)
end
-# PREDICTIONS
-get '/predictions?' do # get index of predictions
- Prediction.all.collect{ |p| p.uri }.join("\n")
+post '/algorithm/classification/?' do # create a model
+ #halt 403,
+ activity_dataset_uri = OpenTox::Dataset.find(:uri => params[:dataset_uri]).uri
+ feature_dataset_uri = OpenTox::Algorithm::Fminer.create(activity_dataset_uri)
+ Model.create(:activity_dataset_uri => activity_dataset_uri, :feature_dataset_uri => feature_dataset_uri).uri
end
-get '/prediction/:id' do # display prediction
- halt 404, "Prediction #{params[:id]} not found." unless prediction = Prediction.get(params[:id])
- halt 202, prediction.to_yaml unless prediction.finished
- prediction.to_yaml
- #xml prediction
+get '/models/?' do # get index of models
+ Model.find_all.join("\n")
end
-get '/prediction/:id/neighbors' do
- halt 404, "Prediction #{params[:id]} not found." unless prediction = Prediction.get(params[:id])
- halt 202, "Prediction #{params[:id]} not yet finished, please try again later." unless prediction.finished
- #xml Neighbor.all(:prediction_uri => prediction.uri)
- Neighbor.all(:prediction_uri => prediction.uri).to_yaml
+get '/model/*/?' do
+ #halt 404, "Model #{name} not found." unless @model = Model.find(request.url)
+ find
+ @model.to_yaml
end
-get '/prediction/:id/features' do
- halt 404, "Prediction #{params[:id]} not found." unless prediction = Prediction.get(params[:id])
- halt 202, "Prediction #{params[:id]} not yet finished, please try again later." unless prediction.finished
- #xml Feature.all(:prediction_uri => prediction.uri)
- Feature.all(:prediction_uri => prediction.uri).to_yaml
+delete '/model/*' do
+ name = params[:splat].first
+ halt 404, "Model #{name} not found." unless @model = Model.find(request.url)
+ @model.destroy
+ "Model #{params[:id]} succesfully deleted."
end
-delete '/prediction/:id' do
- halt 404, "Prediction #{params[:id]} not found." unless prediction = Prediction.get(params[:id])
- p.neighbors.each { |n| n.destroy }
- p.features.each { |f| f.destroy }
- p.destroy
- "Prediction #{params[:id]} succesfully deleted."
+post '/model/*' do # create prediction
+ name = params[:splat].first
+ halt 404, "Model #{name} not found." unless @model = Model.find(request.url)
+ compound = OpenTox::Compound.new :uri => params[:compound_uri]
+ @model.predict(compound)
end
-# Utility functions
-def similarity(neighbor_features, query_features)
-
- common_features = neighbor_features & query_features
- all_features = neighbor_features | query_features
-
- #common_features.size.to_f/all_features.size.to_f
- sum_p_common = 0.0
- sum_p_all = 0.0
+# PREDICTIONS
+get '/model/*/predictions?' do # get dataset URI
+ name = params[:splat].first
+ halt 404, "Model #{name} not found." unless @model = Model.find(request.url)
+ # Dataset.find
+end
- all_features.each do |f|
- sum_p_all += gauss(OpenTox::Feature.new(:uri => f).value('p_value').to_f)
- end
- common_features.each do |f|
- sum_p_common += gauss(OpenTox::Feature.new(:uri => f).value('p_value').to_f)
- end
- sum_p_common/sum_p_all
+get '/model/*/prediction/*' do # display prediction for a compound
+ name = params[:splat].first
+ compound_uri = params[:splat][1]
+ halt 404, "Model #{name} not found." unless @model = Model.find(request.url)
+ # prediction not found
+ #prediction.to_yaml
+ #xml prediction
+end
+get '/model/*/prediction/*/neighbors' do
+ name = params[:splat].first
+ compound_uri = params[:splat][1]
+ halt 404, "Model #{name} not found." unless @model = Model.find(request.url)
+ # prediction not found
+ # prediction.neighbors
end
-# gauss kernel
-def gauss(sim, sigma = 0.3)
- x = 1.0 - sim
- Math.exp(-(x*x)/(2*sigma*sigma))
+get '/model/*/prediction/*/features' do
+ name = params[:splat].first
+ compound_uri = params[:splat][1]
+ halt 404, "Model #{name} not found." unless @model = Model.find(request.url)
+ # prediction not found
+ # prediction not found
+ # prediction.features
end
-def xml(object)
- builder do |xml|
- xml.instruct!
- object.to_xml
- end
+delete '/model/*/prediction/*' do # display prediction for a compound
+ name = params[:splat].first
+ halt 404, "Model #{name} not found." unless @model = Model.find(request.url)
+ # Prediction.destroy
end
diff --git a/config.ru b/config.ru
index 95e58ae..02ca8b3 100644
--- a/config.ru
+++ b/config.ru
@@ -2,11 +2,9 @@ require 'rubygems'
require 'sinatra'
require 'application.rb'
-if ENV["RACK_ENV"] == 'production'
- FileUtils.mkdir_p 'log' unless File.exists?('log')
- log = File.new("log/sinatra.log", "a")
- $stdout.reopen(log)
- $stderr.reopen(log)
-end
+FileUtils.mkdir_p 'log' unless File.exists?('log')
+log = File.new("log/#{ENV["RACK_ENV"]}.log", "a")
+$stdout.reopen(log)
+$stderr.reopen(log)
run Sinatra::Application
diff --git a/environment.rb b/environment.rb
index 684d029..c6ebefb 100644
--- a/environment.rb
+++ b/environment.rb
@@ -1,23 +1,16 @@
-require 'rubygems'
-
-['sinatra', 'sinatra/url_for', 'dm-core', 'dm-more', 'builder', 'opentox-ruby-api-wrapper'].each do |lib|
+['rubygems', 'sinatra', 'redis', 'builder', 'opentox-ruby-api-wrapper'].each do |lib|
require lib
end
-require "openbabel"
-
-sqlite = "#{File.expand_path(File.dirname(__FILE__))}/#{Sinatra::Base.environment}.sqlite3"
-DataMapper.setup(:default, "sqlite3:///#{sqlite}")
-#DataMapper.setup(:default, 'sqlite3::memory:')
-
-DataMapper::Logger.new(STDOUT, 0)
-
-load 'models.rb'
-
-unless File.exists?(sqlite)
- Model.auto_migrate!
- Prediction.auto_migrate!
- Neighbor.auto_migrate!
- Feature.auto_migrate!
+case ENV['RACK_ENV']
+when 'production'
+ @@redis = Redis.new :db => 0
+when 'development'
+ @@redis = Redis.new :db => 1
+when 'test'
+ @@redis = Redis.new :db => 2
+ @@redis.flush_db
end
+set :default_content, :yaml
+load 'models.rb'
diff --git a/model.rb b/model.rb
new file mode 100644
index 0000000..3a84217
--- /dev/null
+++ b/model.rb
@@ -0,0 +1,83 @@
+class Model
+
+ include OpenTox::Utils
+ attr_accessor :uri, :activity_dataset_uri, :feature_dataset_uri, :name
+
+ def initialize(params)
+ @uri = params[:uri]
+ @activity_dataset_uri = params[:activity_dataset_uri]
+ @feature_dataset_uri = params[:feature_dataset_uri]
+ begin
+ @name = URI.split(@uri)[5]
+ rescue
+ puts "Bad URI #{@uri}"
+ end
+ end
+
+ def self.create(params)
+ params[:uri] = params[:activity_dataset_uri].sub(/dataset/,'model')
+ @@redis.set_add "models", params[:uri]
+ @@redis.set(File.join(params[:uri],"activity_dataset"), params[:activity_dataset_uri])
+ @@redis.set(File.join(params[:uri],"feature_dataset"), params[:feature_dataset_uri])
+ Model.new(params)
+ end
+
+ def self.find(uri)
+ if @@redis.set_member? "models", uri
+ activity_dataset_uri = @@redis.get File.join(uri,"activity_dataset")
+ feature_dataset_uri = @@redis.get File.join(uri,"feature_dataset")
+ Model.new(:uri => uri, :activity_dataset_uri => activity_dataset_uri, :feature_dataset_uri => feature_dataset_uri)
+ else
+ nil
+ end
+ end
+
+ def self.find_all
+ @@redis.set_members("models")
+ end
+
+ def predict(compound)
+
+ training_activities = OpenTox::Dataset.find :uri => @uri.sub(/model/,'dataset')
+ # find database activities
+ # find prediction
+ training_features = OpenTox::Dataset.find(:uri => @feature_dataset_uri)
+
+ prediction_dataset = OpenTox::Dataset.find_or_create(:name => training_activities.name + '_predictions')
+ prediction_neighbors = OpenTox::Dataset.find_or_create(:name => training_activities.name + '_neighbors')
+ prediction_features = OpenTox::Dataset.find_or_create(:name => training_activities.name + '_prediction_features')
+
+ feature_uris = compound.match(training_features)
+ prediction_features.add({compound.uri => feature_uris})
+
+ conf = 0.0
+ neighbors = []
+
+ training_features.compounds.each do |neighbor|
+ sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(training_features,neighbor,prediction_features,compound).to_f
+ if sim > 0.3
+ neighbors << neighbor.uri
+ training_activities.features(neighbor).each do |a|
+ case OpenTox::Feature.new(:uri => a.uri).value('classification').to_s
+ when 'true'
+ conf += OpenTox::Utils.gauss(sim)
+ when 'false'
+ conf -= OpenTox::Utils.gauss(sim)
+ end
+ end
+ end
+ end
+ conf = conf/neighbors.size
+ if conf > 0.0
+ classification = true
+ elsif conf < 0.0
+ classification = false
+ end
+
+ prediction_neighbors.add({compound.uri => neighbors})
+ prediction_uri = OpenTox::Feature.new(:name => @name, :values => {:classification => classification, :confidence => conf}).uri
+ prediction_uri
+
+ end
+
+end
diff --git a/models.rb b/models.rb
deleted file mode 100644
index c5b0bad..0000000
--- a/models.rb
+++ /dev/null
@@ -1,47 +0,0 @@
-class Model
- include DataMapper::Resource
- property :id, Serial
- property :name, String
- property :uri, String, :size => 255
- property :feature_dataset_uri, String, :size => 255
- property :training_dataset_uri, String, :size => 255
- property :finished, Boolean, :default => false
-
- def predictions
- Prediction.all(:model_uri => uri)
- end
-end
-
-class Prediction
- include DataMapper::Resource
- property :id, Serial
- property :uri, String, :size => 255
- property :model_uri, String, :size => 255
- property :compound_uri, String, :size => 255
- property :classification, Boolean
- property :confidence, Float
- property :finished, Boolean, :default => false
-
- def neighbors
- Neighbor.all(:prediction_uri => uri)
- end
-
- def features
- Feature.all(:prediction_uri => uri)
- end
-end
-
-class Neighbor
- include DataMapper::Resource
- property :id, Serial
- property :uri, String, :size => 255
- property :prediction_uri, String, :size => 255
- property :similarity, Float
-end
-
-class Feature
- include DataMapper::Resource
- property :id, Serial
- property :feature_uri, String, :size => 255
- property :prediction_uri, String, :size => 255
-end
diff --git a/test/hamster_carcinogenicity.csv b/test/hamster_carcinogenicity.csv
new file mode 100644
index 0000000..009808f
--- /dev/null
+++ b/test/hamster_carcinogenicity.csv
@@ -0,0 +1,85 @@
+CC=O,true
+C12C3=C(C=CC=C3)CC1=CC(=CC=2)NC(C)=O,true
+O=C(N)\C(C2=CC=CO2)=C/C1=CC=C([N+]([O-])=O)O1,true
+C1(N=CNN=1)N,false
+Br(=O)(=O)[O-].[K+],true
+[Cl-].[Cd+2].[Cl-],false
+O=S(=O)([O-])[O-].[Cd+2],false
+ClC1=CC(=NC(=N1)SCC(=O)O)NC2=CC=CC(=C2C)C,false
+ClCOC,true
+C=C(Cl)C=C,false
+Clc1ccc(cc1)c2ccc(COC(C)(C)C(O)=O)cc2,false
+O=C1OC2=C(C=CC=C2)C=C1,false
+ClC(=C(C1=CC=C(C=C1)Cl)C2=CC=C(C=C2)Cl)Cl,true
+ClC(C(C1=CC=C(C=C1)Cl)C2=CC=C(C=C2)Cl)(Cl)Cl,false
+C=CCN(CC=C)N=O,true
+Cl\C2=C(/Cl)C3(Cl)C1C4CC(C1C2(Cl)C3(Cl)Cl)C5OC45,false
+O=C(N(C)C)Cl,true
+CN(C)N,true
+N(NC)C.[H]Cl.[H]Cl,true
+CCO,false
+O=C(N(CC)N=O)NCCO,true
+O=C(N(CC)N=O)NCC(=O)C,true
+C=O,false
+[O-][N+](=O)C1=CC=C(O1)C2=CSC(=N2)NNC=O,true
+O=CC1=CC=CO1,false
+OCC1CO1,true
+O=C2C1=C(OC)C=C(OC)C(Cl)=C1O[C@]32C(OC)=CC(C[C@@](C)3[H])=O,false
+ClC1=C(C(=C(C(=C1Cl)Cl)Cl)Cl)Cl,true
+NN,true
+OS(=O)(=O)O.NN,true
+CC(=O)N(O)C1=CC2=C(C=C1)C3=CC=CC=C3C2,true
+OCCNN,false
+O=C(C1=CC=NC=C1)NN,false
+OC(=O)C1=CC=NC=C1,false
+O=C(NC1=CC=CC(=C1)Cl)OC(C)C,false
+O=C(NC1=CC=CC=C1)OC(C)C,false
+[O-]C(C)=O.[O-]C(C)=O.[Pb+2].[OH-].[OH-].[Pb+2].[OH-].[OH-].[Pb+2],false
+CN(C)CCN(CC2=CC=CS2)C1=NC=CC=C1.Cl,false
+NC1=C2C(=NC(=N1)N)N=CC(=N2)CN(C3=CC=C(C=C3)C(=O)N[C@@H](CCC(=O)O)C(=O)O)C,false
+CN(N)C=O,true
+O=C(C(=C)C)OC,false
+CNN,true
+O=C(C1=CC=CN=C1)CCCN(N=O)C,false
+CC1=CC(=O)NC(=S)N1,true
+CC(C(O)=O)(OC1=CC=C(C=C1)C2CCCC3=C2C=CC=C3)C,false
+O=N[O-].[Na+],false
+[O-][N+](C1=CC=C(C2=CSC(NC(C)=O)=N2)O1)=O,true
+[O-][N+](=O)C1=CC=C(O1)C2=CSC(=N2)NC=O,true
+O=[N+](C1=CC=C2C3=C1C=CC=C3CC2)[O-],false
+N(CC(CO)O)(CC(O)C)N=O,true
+N(CC(CO)O)(CC(C)=O)N=O,true
+N(CC(CO)O)(CCO)N=O,false
+O=C(C)CN(N=O)CCO,true
+C1C(N(C(CN1N=O)C)C)C,true
+N(CC(C)=O)(CC=C)N=O,true
+N(CC(CO)O)(C)N=O,true
+O=NN1CCOCC1,true
+N1C=CC=C(C=1)C2N(N=O)CCC2,true
+C1=CC=C(C=[N+]1[O-])C2CCCN2N=O,false
+O=NN1CCCCC1,true
+O=NN1CCCC1,true
+O=C(N(CC(C)=O)N=O)NCCCl,true
+N(C(=O)N)(N=O)CC(C)=O,true
+C1(CCN=C=S)=CC=CC=C1,false
+O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC,false
+C1=C2C(=CC=C1NC3=CC=CC=C3)C=CC=C2,false
+O=C1N2C(C3=C(C=CC=C3)CC2)CN(C1)C(=O)C4CCCCC4,false
+C1(=CC(=C(O)C=C1)O)C(O)=O,false
+O=C1C2=C(C=C(C=C2O)O)O/C(=C\1O)C3=CC(=C(C=C3)O)O.O.O,false
+C1=C(C=CC(=C1)C(C2=CC=C(N)C(=C2)C)=C3C=CC(=N)C=C3)N.[H]Cl,false
+C(C1=CC=C(C=C1)N)(C2=CC=C(C=C2)N)=C3C=CC(C=C3)=N.[H]Cl,false
+OC2=CC1=C(C(O)=C2)C(C(O[C@@H]4O[C@@H]([C@H]([C@H](O)[C@H]4O)O)CO[C@H]3[C@H](O)[C@H](O)[C@H]([C@H](C)O3)O)=C(C5=CC(O)=C(C=C5)O)O1)=O,false
+ClC(=CCl)Cl,false
+NC(=O)OCC,true
+C=CCl,true
+N#[N+]C1=CC=CC=C1.F[B-](F)(F)F,false
+C1(CN(CC(N1N=O)C)N=O)C,true
+N(CCN(C)C)(C)N=O,true
+C1(CN(N=O)CC(O1)C)C,true
+O1C(N(CC1C)N=O)=O,true
+CCOC(=O)N(C)N=O,true
+C1N(COC1)N=O,true
+O=C(N(CCC1=CC=CC=C1)N=O)N,true
+O=NN1CCC1,true
+F[B-](F)(F)F.[Na+],false
diff --git a/test/test.rb b/test/test.rb
new file mode 100644
index 0000000..f374bb4
--- /dev/null
+++ b/test/test.rb
@@ -0,0 +1,44 @@
+require 'application'
+require 'test/unit'
+require 'rack/test'
+
+set :environment, :test
+
+class LazarTest < Test::Unit::TestCase
+ include Rack::Test::Methods
+
+ def app
+ Sinatra::Application
+ end
+
+ def setup
+ @dataset = OpenTox::Dataset.create :name => "Hamster Carcinogenicity"
+ @dataset.import :csv => File.join(File.dirname(__FILE__), "hamster_carcinogenicity.csv"), :compound_format => "smiles", :feature_type => "activity"
+ end
+
+ def teardown
+ @dataset.delete
+ end
+
+ def test_algorithms
+ get '/algorithms'
+ assert last_response.body.include?("classification")
+ end
+
+ def test_create_model_and_predict
+ post '/algorithm/classification', :dataset_uri => @dataset.uri
+ assert last_response.ok?
+ model_uri = last_response.body
+ get model_uri
+ assert last_response.ok?
+ get '/models'
+ assert last_response.body.include? model_uri
+ query_structure = OpenTox::Compound.new :smiles => 'c1ccccc1NN'
+ #query_structure = OpenTox::Compound.new :smiles => '[O-]C(C)=O.[O-]C(C)=O.[Pb+2].[OH-].[OH-].[Pb+2].[OH-].[OH-].[Pb+2]'
+ post model_uri, :compound_uri => query_structure.uri
+ assert last_response.ok?
+ assert last_response.body.include? 'classification/true'
+ puts last_response.body
+ end
+
+end