diff options
-rw-r--r-- | Rakefile | 4 | ||||
-rw-r--r-- | application.rb | 217 | ||||
-rw-r--r-- | config.ru | 10 | ||||
-rw-r--r-- | environment.rb | 29 | ||||
-rw-r--r-- | model.rb | 83 | ||||
-rw-r--r-- | models.rb | 47 | ||||
-rw-r--r-- | test/hamster_carcinogenicity.csv | 85 | ||||
-rw-r--r-- | test/test.rb | 44 |
8 files changed, 302 insertions, 217 deletions
@@ -1,5 +1,6 @@ require 'rubygems' require 'rake' +require 'tasks/opentox' desc "Install required gems" task :install do @@ -14,7 +15,6 @@ end desc "Run tests" task :test do - puts "No tests for lazar." - #load 'test.rb' + load 'test/test.rb' end diff --git a/application.rb b/application.rb index d1aa61a..b5d8d4e 100644 --- a/application.rb +++ b/application.rb @@ -1,174 +1,103 @@ -load 'environment.rb' - -get '/models/?' do # get index of models - Model.all.collect{ |m| m.uri }.join("\n") +['rubygems', 'sinatra', 'redis', 'builder', 'opentox-ruby-api-wrapper'].each do |lib| + require lib end -get '/model/:id' do - halt 404, "Model #{params[:id]} not found." unless model = Model.get(params[:id]) - halt 202, model.to_yaml unless model.finished - model.to_yaml -# builder do |xml| -# xml.instruct! -# end - #xml model +load File.join(File.dirname(__FILE__), 'model.rb') + +case ENV['RACK_ENV'] +when 'production' + @@redis = Redis.new :db => 0 +when 'development' + @@redis = Redis.new :db => 1 +when 'test' + @@redis = Redis.new :db => 2 + @@redis.flush_db end -post '/models/?' do # create a model +set :default_content, :yaml - training_dataset = OpenTox::Dataset.new :uri => params[:dataset_uri] - model = Model.create(:name => training_dataset.name, :training_dataset_uri => training_dataset.uri) - model.update_attributes(:uri => url_for("/model/", :full) + model.id.to_s) +helpers do - Spork.spork do - feature_generation = OpenTox::Fminer.new(training_dataset) - feature_dataset = feature_generation.dataset - model.feature_dataset_uri = feature_dataset.uri.chomp - model.finished = true - model.save + def find + uri = uri(params[:splat].first) + halt 404, "Dataset \"#{uri}\" not found." unless @model = Model.find(uri) end - - model.uri.to_s -end -delete '/model/:id' do - halt 404, "Model #{params[:id]} not found." unless model = Model.get(params[:id]) - model.predictions.each do |p| - p.neighbors.each { |n| n.destroy } - p.features.each { |n| f.destroy } - p.destroy + def uri(name) + uri = url_for("/model/", :full) + URI.encode(name) end - model.destroy - "Model #{params[:id]} succesfully deleted." - # TODO: what happens with datasets, avoid stale datasets, but other components might need them end -post '/model/:id' do # create prediction - - halt 404, "Model #{params[:id]} not found." unless model = Model.get(params[:id]) - query_compound = OpenTox::Compound.new :uri => params[:compound_uri] - activity_dataset = OpenTox::Dataset.new :uri => model.training_dataset_uri - -# database_activities = activity_dataset.features(query_compound) - -# if database_activities.size > 0 # return database values -# database_activities.collect{ |f| f.uri }.join('\n') - -# else # make prediction - prediction = Prediction.find_or_create(:model_uri => model.uri, :compound_uri => params[:compound_uri]) - - unless prediction.finished # present cached prediction if finished - - prediction.update_attributes(:uri => url_for("/prediction/", :full) + prediction.id.to_s) - Spork.spork do - feature_dataset = OpenTox::Dataset.new :uri => model.feature_dataset_uri - compound_descriptors = feature_dataset.all_compounds_and_features_uris - training_features = feature_dataset.all_features - compound_activities = activity_dataset.all_compounds_and_features_uris - query_features = query_compound.match(training_features) - query_features.each do |f| - Feature.find_or_create(:feature_uri => f.uri, :prediction_uri => prediction.uri) - end - query_feature_uris = query_features.collect{|f| f.uri} - - conf = 0.0 - nr_neighbors = 0 - - compound_descriptors.each do |compound_uri,feature_uris| - sim = similarity(feature_uris,query_feature_uris) - if sim > 0.0 - nr_neighbors += 1 - # datamapper default precision is 10, floats with higher precision are not saved - n = Neighbor.create(:uri => compound_uri, :similarity => (1000*sim).round/1000.0, :prediction_uri => prediction.uri) - compound_activities[compound_uri].each do |a| - case OpenTox::Feature.new(:uri => a).value('classification').to_s - when 'true' - conf += gauss(sim) - when 'false' - conf -= gauss(sim) - end - end - end - end - conf = conf/nr_neighbors - if conf > 0.0 - classification = true - elsif conf < 0.0 - classification = false - end - prediction.update_attributes(:confidence => (1000*conf).round/1000.0, :classification => classification, :finished => true) - - end - - end - - prediction.uri -# end +get '/algorithms' do + url_for("/algorithm/classification", :full) end -# PREDICTIONS -get '/predictions?' do # get index of predictions - Prediction.all.collect{ |p| p.uri }.join("\n") +post '/algorithm/classification/?' do # create a model + #halt 403, + activity_dataset_uri = OpenTox::Dataset.find(:uri => params[:dataset_uri]).uri + feature_dataset_uri = OpenTox::Algorithm::Fminer.create(activity_dataset_uri) + Model.create(:activity_dataset_uri => activity_dataset_uri, :feature_dataset_uri => feature_dataset_uri).uri end -get '/prediction/:id' do # display prediction - halt 404, "Prediction #{params[:id]} not found." unless prediction = Prediction.get(params[:id]) - halt 202, prediction.to_yaml unless prediction.finished - prediction.to_yaml - #xml prediction +get '/models/?' do # get index of models + Model.find_all.join("\n") end -get '/prediction/:id/neighbors' do - halt 404, "Prediction #{params[:id]} not found." unless prediction = Prediction.get(params[:id]) - halt 202, "Prediction #{params[:id]} not yet finished, please try again later." unless prediction.finished - #xml Neighbor.all(:prediction_uri => prediction.uri) - Neighbor.all(:prediction_uri => prediction.uri).to_yaml +get '/model/*/?' do + #halt 404, "Model #{name} not found." unless @model = Model.find(request.url) + find + @model.to_yaml end -get '/prediction/:id/features' do - halt 404, "Prediction #{params[:id]} not found." unless prediction = Prediction.get(params[:id]) - halt 202, "Prediction #{params[:id]} not yet finished, please try again later." unless prediction.finished - #xml Feature.all(:prediction_uri => prediction.uri) - Feature.all(:prediction_uri => prediction.uri).to_yaml +delete '/model/*' do + name = params[:splat].first + halt 404, "Model #{name} not found." unless @model = Model.find(request.url) + @model.destroy + "Model #{params[:id]} succesfully deleted." end -delete '/prediction/:id' do - halt 404, "Prediction #{params[:id]} not found." unless prediction = Prediction.get(params[:id]) - p.neighbors.each { |n| n.destroy } - p.features.each { |f| f.destroy } - p.destroy - "Prediction #{params[:id]} succesfully deleted." +post '/model/*' do # create prediction + name = params[:splat].first + halt 404, "Model #{name} not found." unless @model = Model.find(request.url) + compound = OpenTox::Compound.new :uri => params[:compound_uri] + @model.predict(compound) end -# Utility functions -def similarity(neighbor_features, query_features) - - common_features = neighbor_features & query_features - all_features = neighbor_features | query_features - - #common_features.size.to_f/all_features.size.to_f - sum_p_common = 0.0 - sum_p_all = 0.0 +# PREDICTIONS +get '/model/*/predictions?' do # get dataset URI + name = params[:splat].first + halt 404, "Model #{name} not found." unless @model = Model.find(request.url) + # Dataset.find +end - all_features.each do |f| - sum_p_all += gauss(OpenTox::Feature.new(:uri => f).value('p_value').to_f) - end - common_features.each do |f| - sum_p_common += gauss(OpenTox::Feature.new(:uri => f).value('p_value').to_f) - end - sum_p_common/sum_p_all +get '/model/*/prediction/*' do # display prediction for a compound + name = params[:splat].first + compound_uri = params[:splat][1] + halt 404, "Model #{name} not found." unless @model = Model.find(request.url) + # prediction not found + #prediction.to_yaml + #xml prediction +end +get '/model/*/prediction/*/neighbors' do + name = params[:splat].first + compound_uri = params[:splat][1] + halt 404, "Model #{name} not found." unless @model = Model.find(request.url) + # prediction not found + # prediction.neighbors end -# gauss kernel -def gauss(sim, sigma = 0.3) - x = 1.0 - sim - Math.exp(-(x*x)/(2*sigma*sigma)) +get '/model/*/prediction/*/features' do + name = params[:splat].first + compound_uri = params[:splat][1] + halt 404, "Model #{name} not found." unless @model = Model.find(request.url) + # prediction not found + # prediction not found + # prediction.features end -def xml(object) - builder do |xml| - xml.instruct! - object.to_xml - end +delete '/model/*/prediction/*' do # display prediction for a compound + name = params[:splat].first + halt 404, "Model #{name} not found." unless @model = Model.find(request.url) + # Prediction.destroy end @@ -2,11 +2,9 @@ require 'rubygems' require 'sinatra' require 'application.rb' -if ENV["RACK_ENV"] == 'production' - FileUtils.mkdir_p 'log' unless File.exists?('log') - log = File.new("log/sinatra.log", "a") - $stdout.reopen(log) - $stderr.reopen(log) -end +FileUtils.mkdir_p 'log' unless File.exists?('log') +log = File.new("log/#{ENV["RACK_ENV"]}.log", "a") +$stdout.reopen(log) +$stderr.reopen(log) run Sinatra::Application diff --git a/environment.rb b/environment.rb index 684d029..c6ebefb 100644 --- a/environment.rb +++ b/environment.rb @@ -1,23 +1,16 @@ -require 'rubygems' - -['sinatra', 'sinatra/url_for', 'dm-core', 'dm-more', 'builder', 'opentox-ruby-api-wrapper'].each do |lib| +['rubygems', 'sinatra', 'redis', 'builder', 'opentox-ruby-api-wrapper'].each do |lib| require lib end -require "openbabel" - -sqlite = "#{File.expand_path(File.dirname(__FILE__))}/#{Sinatra::Base.environment}.sqlite3" -DataMapper.setup(:default, "sqlite3:///#{sqlite}") -#DataMapper.setup(:default, 'sqlite3::memory:') - -DataMapper::Logger.new(STDOUT, 0) - -load 'models.rb' - -unless File.exists?(sqlite) - Model.auto_migrate! - Prediction.auto_migrate! - Neighbor.auto_migrate! - Feature.auto_migrate! +case ENV['RACK_ENV'] +when 'production' + @@redis = Redis.new :db => 0 +when 'development' + @@redis = Redis.new :db => 1 +when 'test' + @@redis = Redis.new :db => 2 + @@redis.flush_db end +set :default_content, :yaml +load 'models.rb' diff --git a/model.rb b/model.rb new file mode 100644 index 0000000..3a84217 --- /dev/null +++ b/model.rb @@ -0,0 +1,83 @@ +class Model + + include OpenTox::Utils + attr_accessor :uri, :activity_dataset_uri, :feature_dataset_uri, :name + + def initialize(params) + @uri = params[:uri] + @activity_dataset_uri = params[:activity_dataset_uri] + @feature_dataset_uri = params[:feature_dataset_uri] + begin + @name = URI.split(@uri)[5] + rescue + puts "Bad URI #{@uri}" + end + end + + def self.create(params) + params[:uri] = params[:activity_dataset_uri].sub(/dataset/,'model') + @@redis.set_add "models", params[:uri] + @@redis.set(File.join(params[:uri],"activity_dataset"), params[:activity_dataset_uri]) + @@redis.set(File.join(params[:uri],"feature_dataset"), params[:feature_dataset_uri]) + Model.new(params) + end + + def self.find(uri) + if @@redis.set_member? "models", uri + activity_dataset_uri = @@redis.get File.join(uri,"activity_dataset") + feature_dataset_uri = @@redis.get File.join(uri,"feature_dataset") + Model.new(:uri => uri, :activity_dataset_uri => activity_dataset_uri, :feature_dataset_uri => feature_dataset_uri) + else + nil + end + end + + def self.find_all + @@redis.set_members("models") + end + + def predict(compound) + + training_activities = OpenTox::Dataset.find :uri => @uri.sub(/model/,'dataset') + # find database activities + # find prediction + training_features = OpenTox::Dataset.find(:uri => @feature_dataset_uri) + + prediction_dataset = OpenTox::Dataset.find_or_create(:name => training_activities.name + '_predictions') + prediction_neighbors = OpenTox::Dataset.find_or_create(:name => training_activities.name + '_neighbors') + prediction_features = OpenTox::Dataset.find_or_create(:name => training_activities.name + '_prediction_features') + + feature_uris = compound.match(training_features) + prediction_features.add({compound.uri => feature_uris}) + + conf = 0.0 + neighbors = [] + + training_features.compounds.each do |neighbor| + sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(training_features,neighbor,prediction_features,compound).to_f + if sim > 0.3 + neighbors << neighbor.uri + training_activities.features(neighbor).each do |a| + case OpenTox::Feature.new(:uri => a.uri).value('classification').to_s + when 'true' + conf += OpenTox::Utils.gauss(sim) + when 'false' + conf -= OpenTox::Utils.gauss(sim) + end + end + end + end + conf = conf/neighbors.size + if conf > 0.0 + classification = true + elsif conf < 0.0 + classification = false + end + + prediction_neighbors.add({compound.uri => neighbors}) + prediction_uri = OpenTox::Feature.new(:name => @name, :values => {:classification => classification, :confidence => conf}).uri + prediction_uri + + end + +end diff --git a/models.rb b/models.rb deleted file mode 100644 index c5b0bad..0000000 --- a/models.rb +++ /dev/null @@ -1,47 +0,0 @@ -class Model - include DataMapper::Resource - property :id, Serial - property :name, String - property :uri, String, :size => 255 - property :feature_dataset_uri, String, :size => 255 - property :training_dataset_uri, String, :size => 255 - property :finished, Boolean, :default => false - - def predictions - Prediction.all(:model_uri => uri) - end -end - -class Prediction - include DataMapper::Resource - property :id, Serial - property :uri, String, :size => 255 - property :model_uri, String, :size => 255 - property :compound_uri, String, :size => 255 - property :classification, Boolean - property :confidence, Float - property :finished, Boolean, :default => false - - def neighbors - Neighbor.all(:prediction_uri => uri) - end - - def features - Feature.all(:prediction_uri => uri) - end -end - -class Neighbor - include DataMapper::Resource - property :id, Serial - property :uri, String, :size => 255 - property :prediction_uri, String, :size => 255 - property :similarity, Float -end - -class Feature - include DataMapper::Resource - property :id, Serial - property :feature_uri, String, :size => 255 - property :prediction_uri, String, :size => 255 -end diff --git a/test/hamster_carcinogenicity.csv b/test/hamster_carcinogenicity.csv new file mode 100644 index 0000000..009808f --- /dev/null +++ b/test/hamster_carcinogenicity.csv @@ -0,0 +1,85 @@ +CC=O,true
+C12C3=C(C=CC=C3)CC1=CC(=CC=2)NC(C)=O,true
+O=C(N)\C(C2=CC=CO2)=C/C1=CC=C([N+]([O-])=O)O1,true
+C1(N=CNN=1)N,false
+Br(=O)(=O)[O-].[K+],true
+[Cl-].[Cd+2].[Cl-],false
+O=S(=O)([O-])[O-].[Cd+2],false
+ClC1=CC(=NC(=N1)SCC(=O)O)NC2=CC=CC(=C2C)C,false
+ClCOC,true
+C=C(Cl)C=C,false
+Clc1ccc(cc1)c2ccc(COC(C)(C)C(O)=O)cc2,false
+O=C1OC2=C(C=CC=C2)C=C1,false
+ClC(=C(C1=CC=C(C=C1)Cl)C2=CC=C(C=C2)Cl)Cl,true
+ClC(C(C1=CC=C(C=C1)Cl)C2=CC=C(C=C2)Cl)(Cl)Cl,false
+C=CCN(CC=C)N=O,true
+Cl\C2=C(/Cl)C3(Cl)C1C4CC(C1C2(Cl)C3(Cl)Cl)C5OC45,false
+O=C(N(C)C)Cl,true
+CN(C)N,true
+N(NC)C.[H]Cl.[H]Cl,true
+CCO,false
+O=C(N(CC)N=O)NCCO,true
+O=C(N(CC)N=O)NCC(=O)C,true
+C=O,false
+[O-][N+](=O)C1=CC=C(O1)C2=CSC(=N2)NNC=O,true
+O=CC1=CC=CO1,false
+OCC1CO1,true
+O=C2C1=C(OC)C=C(OC)C(Cl)=C1O[C@]32C(OC)=CC(C[C@@](C)3[H])=O,false
+ClC1=C(C(=C(C(=C1Cl)Cl)Cl)Cl)Cl,true
+NN,true
+OS(=O)(=O)O.NN,true
+CC(=O)N(O)C1=CC2=C(C=C1)C3=CC=CC=C3C2,true
+OCCNN,false
+O=C(C1=CC=NC=C1)NN,false
+OC(=O)C1=CC=NC=C1,false
+O=C(NC1=CC=CC(=C1)Cl)OC(C)C,false
+O=C(NC1=CC=CC=C1)OC(C)C,false
+[O-]C(C)=O.[O-]C(C)=O.[Pb+2].[OH-].[OH-].[Pb+2].[OH-].[OH-].[Pb+2],false
+CN(C)CCN(CC2=CC=CS2)C1=NC=CC=C1.Cl,false
+NC1=C2C(=NC(=N1)N)N=CC(=N2)CN(C3=CC=C(C=C3)C(=O)N[C@@H](CCC(=O)O)C(=O)O)C,false
+CN(N)C=O,true
+O=C(C(=C)C)OC,false
+CNN,true
+O=C(C1=CC=CN=C1)CCCN(N=O)C,false
+CC1=CC(=O)NC(=S)N1,true
+CC(C(O)=O)(OC1=CC=C(C=C1)C2CCCC3=C2C=CC=C3)C,false
+O=N[O-].[Na+],false
+[O-][N+](C1=CC=C(C2=CSC(NC(C)=O)=N2)O1)=O,true
+[O-][N+](=O)C1=CC=C(O1)C2=CSC(=N2)NC=O,true
+O=[N+](C1=CC=C2C3=C1C=CC=C3CC2)[O-],false
+N(CC(CO)O)(CC(O)C)N=O,true
+N(CC(CO)O)(CC(C)=O)N=O,true
+N(CC(CO)O)(CCO)N=O,false
+O=C(C)CN(N=O)CCO,true
+C1C(N(C(CN1N=O)C)C)C,true
+N(CC(C)=O)(CC=C)N=O,true
+N(CC(CO)O)(C)N=O,true
+O=NN1CCOCC1,true
+N1C=CC=C(C=1)C2N(N=O)CCC2,true
+C1=CC=C(C=[N+]1[O-])C2CCCN2N=O,false
+O=NN1CCCCC1,true
+O=NN1CCCC1,true
+O=C(N(CC(C)=O)N=O)NCCCl,true
+N(C(=O)N)(N=O)CC(C)=O,true
+C1(CCN=C=S)=CC=CC=C1,false
+O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC,false
+C1=C2C(=CC=C1NC3=CC=CC=C3)C=CC=C2,false
+O=C1N2C(C3=C(C=CC=C3)CC2)CN(C1)C(=O)C4CCCCC4,false
+C1(=CC(=C(O)C=C1)O)C(O)=O,false
+O=C1C2=C(C=C(C=C2O)O)O/C(=C\1O)C3=CC(=C(C=C3)O)O.O.O,false
+C1=C(C=CC(=C1)C(C2=CC=C(N)C(=C2)C)=C3C=CC(=N)C=C3)N.[H]Cl,false
+C(C1=CC=C(C=C1)N)(C2=CC=C(C=C2)N)=C3C=CC(C=C3)=N.[H]Cl,false
+OC2=CC1=C(C(O)=C2)C(C(O[C@@H]4O[C@@H]([C@H]([C@H](O)[C@H]4O)O)CO[C@H]3[C@H](O)[C@H](O)[C@H]([C@H](C)O3)O)=C(C5=CC(O)=C(C=C5)O)O1)=O,false
+ClC(=CCl)Cl,false
+NC(=O)OCC,true
+C=CCl,true
+N#[N+]C1=CC=CC=C1.F[B-](F)(F)F,false
+C1(CN(CC(N1N=O)C)N=O)C,true
+N(CCN(C)C)(C)N=O,true
+C1(CN(N=O)CC(O1)C)C,true
+O1C(N(CC1C)N=O)=O,true
+CCOC(=O)N(C)N=O,true
+C1N(COC1)N=O,true
+O=C(N(CCC1=CC=CC=C1)N=O)N,true
+O=NN1CCC1,true
+F[B-](F)(F)F.[Na+],false
diff --git a/test/test.rb b/test/test.rb new file mode 100644 index 0000000..f374bb4 --- /dev/null +++ b/test/test.rb @@ -0,0 +1,44 @@ +require 'application' +require 'test/unit' +require 'rack/test' + +set :environment, :test + +class LazarTest < Test::Unit::TestCase + include Rack::Test::Methods + + def app + Sinatra::Application + end + + def setup + @dataset = OpenTox::Dataset.create :name => "Hamster Carcinogenicity" + @dataset.import :csv => File.join(File.dirname(__FILE__), "hamster_carcinogenicity.csv"), :compound_format => "smiles", :feature_type => "activity" + end + + def teardown + @dataset.delete + end + + def test_algorithms + get '/algorithms' + assert last_response.body.include?("classification") + end + + def test_create_model_and_predict + post '/algorithm/classification', :dataset_uri => @dataset.uri + assert last_response.ok? + model_uri = last_response.body + get model_uri + assert last_response.ok? + get '/models' + assert last_response.body.include? model_uri + query_structure = OpenTox::Compound.new :smiles => 'c1ccccc1NN' + #query_structure = OpenTox::Compound.new :smiles => '[O-]C(C)=O.[O-]C(C)=O.[Pb+2].[OH-].[OH-].[Pb+2].[OH-].[OH-].[Pb+2]' + post model_uri, :compound_uri => query_structure.uri + assert last_response.ok? + assert last_response.body.include? 'classification/true' + puts last_response.body + end + +end |