From 1e5762d6328ba3d261a420adaeeff441cfbd5fa2 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 6 Oct 2009 09:59:50 +0200 Subject: Passes tests in opentox-test --- .gitignore | 1 + application.rb | 104 +----------------------------- config.ru | 2 + environment.rb | 15 +---- lazar.rb | 134 +++++++++++++++++++++++++++++++++++++++ model.rb | 83 ------------------------ test/hamster_carcinogenicity.csv | 85 ------------------------- test/test.rb | 44 ------------- 8 files changed, 142 insertions(+), 326 deletions(-) create mode 100644 lazar.rb delete mode 100644 model.rb delete mode 100644 test/hamster_carcinogenicity.csv delete mode 100644 test/test.rb diff --git a/.gitignore b/.gitignore index 8839a06..e4a5a44 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ tmp/* *.sqlite3 log/* +db/* diff --git a/application.rb b/application.rb index b5d8d4e..c86fe66 100644 --- a/application.rb +++ b/application.rb @@ -1,103 +1,5 @@ -['rubygems', 'sinatra', 'redis', 'builder', 'opentox-ruby-api-wrapper'].each do |lib| - require lib -end - -load File.join(File.dirname(__FILE__), 'model.rb') - -case ENV['RACK_ENV'] -when 'production' - @@redis = Redis.new :db => 0 -when 'development' - @@redis = Redis.new :db => 1 -when 'test' - @@redis = Redis.new :db => 2 - @@redis.flush_db -end +require 'rubygems' +require 'opentox-ruby-api-wrapper' +require 'lazar.rb' set :default_content, :yaml - -helpers do - - def find - uri = uri(params[:splat].first) - halt 404, "Dataset \"#{uri}\" not found." unless @model = Model.find(uri) - end - - def uri(name) - uri = url_for("/model/", :full) + URI.encode(name) - end -end - -get '/algorithms' do - url_for("/algorithm/classification", :full) -end - -post '/algorithm/classification/?' do # create a model - #halt 403, - activity_dataset_uri = OpenTox::Dataset.find(:uri => params[:dataset_uri]).uri - feature_dataset_uri = OpenTox::Algorithm::Fminer.create(activity_dataset_uri) - Model.create(:activity_dataset_uri => activity_dataset_uri, :feature_dataset_uri => feature_dataset_uri).uri -end - -get '/models/?' do # get index of models - Model.find_all.join("\n") -end - -get '/model/*/?' do - #halt 404, "Model #{name} not found." unless @model = Model.find(request.url) - find - @model.to_yaml -end - -delete '/model/*' do - name = params[:splat].first - halt 404, "Model #{name} not found." unless @model = Model.find(request.url) - @model.destroy - "Model #{params[:id]} succesfully deleted." -end - -post '/model/*' do # create prediction - name = params[:splat].first - halt 404, "Model #{name} not found." unless @model = Model.find(request.url) - compound = OpenTox::Compound.new :uri => params[:compound_uri] - @model.predict(compound) -end - -# PREDICTIONS -get '/model/*/predictions?' do # get dataset URI - name = params[:splat].first - halt 404, "Model #{name} not found." unless @model = Model.find(request.url) - # Dataset.find -end - -get '/model/*/prediction/*' do # display prediction for a compound - name = params[:splat].first - compound_uri = params[:splat][1] - halt 404, "Model #{name} not found." unless @model = Model.find(request.url) - # prediction not found - #prediction.to_yaml - #xml prediction -end - -get '/model/*/prediction/*/neighbors' do - name = params[:splat].first - compound_uri = params[:splat][1] - halt 404, "Model #{name} not found." unless @model = Model.find(request.url) - # prediction not found - # prediction.neighbors -end - -get '/model/*/prediction/*/features' do - name = params[:splat].first - compound_uri = params[:splat][1] - halt 404, "Model #{name} not found." unless @model = Model.find(request.url) - # prediction not found - # prediction not found - # prediction.features -end - -delete '/model/*/prediction/*' do # display prediction for a compound - name = params[:splat].first - halt 404, "Model #{name} not found." unless @model = Model.find(request.url) - # Prediction.destroy -end diff --git a/config.ru b/config.ru index 02ca8b3..c386f81 100644 --- a/config.ru +++ b/config.ru @@ -1,6 +1,8 @@ require 'rubygems' require 'sinatra' require 'application.rb' +require 'rack' +require 'rack/contrib' FileUtils.mkdir_p 'log' unless File.exists?('log') log = File.new("log/#{ENV["RACK_ENV"]}.log", "a") diff --git a/environment.rb b/environment.rb index c6ebefb..067fc18 100644 --- a/environment.rb +++ b/environment.rb @@ -1,16 +1,5 @@ -['rubygems', 'sinatra', 'redis', 'builder', 'opentox-ruby-api-wrapper'].each do |lib| - require lib -end - -case ENV['RACK_ENV'] -when 'production' - @@redis = Redis.new :db => 0 -when 'development' - @@redis = Redis.new :db => 1 -when 'test' - @@redis = Redis.new :db => 2 - @@redis.flush_db -end +require 'rubygems' +require 'opentox-ruby-api-wrapper' set :default_content, :yaml load 'models.rb' diff --git a/lazar.rb b/lazar.rb new file mode 100644 index 0000000..2b23495 --- /dev/null +++ b/lazar.rb @@ -0,0 +1,134 @@ +require 'datamapper' + +DataMapper::setup(:default, "sqlite3://#{Dir.pwd}/db/#{ENV['RACK_ENV']}.db") + +class LazarModel + include DataMapper::Resource + property :id, Serial + property :activity_dataset_uri, String, :length => 256 # default is too short for URIs + property :feature_dataset_uri, String, :length => 256 # default is too short for URIs + property :created_at, DateTime + + def uri + File.join(OpenTox::Model::LazarClassification.base_uri,"lazar_classification", self.id.to_s) + end + + def predict(compound) + + training_activities = OpenTox::Dataset.find :uri => @activity_dataset_uri + # TODO: find database activities + # TODO: find prediction + training_features = OpenTox::Dataset.find :uri => @feature_dataset_uri + + prediction_dataset = OpenTox::Dataset.find_or_create(:name => training_activities.name + '_predictions') + prediction_neighbors = OpenTox::Dataset.find_or_create(:name => training_activities.name + '_neighbors') + prediction_features = OpenTox::Dataset.find_or_create(:name => training_activities.name + '_prediction_features') + + feature_uris = compound.match(training_features) + prediction_features.add({compound.uri => feature_uris}.to_yaml) + + conf = 0.0 + neighbors = [] + + training_features.compounds.each do |neighbor| + sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(training_features,neighbor,prediction_features,compound).to_f + if sim > 0.3 + neighbors << neighbor.uri + training_activities.features(neighbor).each do |a| + case OpenTox::Feature.new(:uri => a.uri).value('classification').to_s + when 'true' + conf += OpenTox::Utils.gauss(sim) + when 'false' + conf -= OpenTox::Utils.gauss(sim) + end + end + end + end + conf = conf/neighbors.size + if conf > 0.0 + classification = true + elsif conf < 0.0 + classification = false + end + + prediction = OpenTox::Feature.new(:name => training_activities.name + " prediction", :classification => classification, :confidence => conf) + prediction_neighbors.add({compound.uri => neighbors}.to_yaml) + prediction_dataset.add({compound.uri => [prediction.uri]}.to_yaml) + + prediction.uri + + end +end + +# automatically create the post table +LazarModel.auto_migrate! #unless LazarModel.table_exists? +LazarModel.auto_migrate! if ENV['RACK_ENV'] == 'test' + +get '/lazar_classification/?' do # get index of models + LazarModel.all.collect{|m| m.uri}.join("\n") +end + +get '/lazar_classification/:id/?' do + halt 404, "Model #{params[:id]} not found." unless @model = LazarModel.get(params[:id]) + @model.to_yaml +end + +delete '/lazar_classification/:id/?' do + halt 404, "Model #{params[:id]} not found." unless @model = LazarModel.get(params[:id]) + @model.destroy + "Model #{params[:id]} succesfully deleted." +end + +post '/lazar_classification/?' do # create model + halt 404, "Dataset #{params[:activity_dataset_uri]} not found" unless OpenTox::Dataset.find(:uri => params[:activity_dataset_uri]) + halt 404, "Dataset #{params[:feature_dataset_uri]} not found" unless OpenTox::Dataset.find(:uri => params[:feature_dataset_uri]) + model = LazarModel.new(params) + model.save + model.uri +end + +# PREDICTIONS +post '/lazar_classification/:id/?' do # create prediction + halt 404, "Model #{params[:id]} not found." unless @model = LazarModel.get(params[:id]) + compound = OpenTox::Compound.new :uri => params[:compound_uri] + @model.predict(compound) +end + +get '/lazar_classification/*/predictions?' do # get dataset URI + name = params[:splat].first + halt 404, "Model #{name} not found." unless @model = LazarModel.get(request.url) + # Dataset.find +end + +get '/lazar_classification/*/prediction/*' do # display prediction for a compound + name = params[:splat].first + compound_uri = params[:splat][1] + halt 404, "Model #{name} not found." unless @model = LazarModel.get(request.url) + # prediction not found + #prediction.to_yaml + #xml prediction +end + +get '/lazar_classification/*/prediction/*/neighbors' do + name = params[:splat].first + compound_uri = params[:splat][1] + halt 404, "Model #{name} not found." unless @model = LazarModel.get(request.url) + # prediction not found + # prediction.neighbors +end + +get '/lazar_classification/*/prediction/*/features' do + name = params[:splat].first + compound_uri = params[:splat][1] + halt 404, "Model #{name} not found." unless @model = LazarModel.get(request.url) + # prediction not found + # prediction not found + # prediction.features +end + +delete '/lazar_classification/*/prediction/*' do # display prediction for a compound + name = params[:splat].first + halt 404, "Model #{name} not found." unless @model = LazarModel.get(request.url) + # Prediction.destroy +end + diff --git a/model.rb b/model.rb deleted file mode 100644 index 3a84217..0000000 --- a/model.rb +++ /dev/null @@ -1,83 +0,0 @@ -class Model - - include OpenTox::Utils - attr_accessor :uri, :activity_dataset_uri, :feature_dataset_uri, :name - - def initialize(params) - @uri = params[:uri] - @activity_dataset_uri = params[:activity_dataset_uri] - @feature_dataset_uri = params[:feature_dataset_uri] - begin - @name = URI.split(@uri)[5] - rescue - puts "Bad URI #{@uri}" - end - end - - def self.create(params) - params[:uri] = params[:activity_dataset_uri].sub(/dataset/,'model') - @@redis.set_add "models", params[:uri] - @@redis.set(File.join(params[:uri],"activity_dataset"), params[:activity_dataset_uri]) - @@redis.set(File.join(params[:uri],"feature_dataset"), params[:feature_dataset_uri]) - Model.new(params) - end - - def self.find(uri) - if @@redis.set_member? "models", uri - activity_dataset_uri = @@redis.get File.join(uri,"activity_dataset") - feature_dataset_uri = @@redis.get File.join(uri,"feature_dataset") - Model.new(:uri => uri, :activity_dataset_uri => activity_dataset_uri, :feature_dataset_uri => feature_dataset_uri) - else - nil - end - end - - def self.find_all - @@redis.set_members("models") - end - - def predict(compound) - - training_activities = OpenTox::Dataset.find :uri => @uri.sub(/model/,'dataset') - # find database activities - # find prediction - training_features = OpenTox::Dataset.find(:uri => @feature_dataset_uri) - - prediction_dataset = OpenTox::Dataset.find_or_create(:name => training_activities.name + '_predictions') - prediction_neighbors = OpenTox::Dataset.find_or_create(:name => training_activities.name + '_neighbors') - prediction_features = OpenTox::Dataset.find_or_create(:name => training_activities.name + '_prediction_features') - - feature_uris = compound.match(training_features) - prediction_features.add({compound.uri => feature_uris}) - - conf = 0.0 - neighbors = [] - - training_features.compounds.each do |neighbor| - sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(training_features,neighbor,prediction_features,compound).to_f - if sim > 0.3 - neighbors << neighbor.uri - training_activities.features(neighbor).each do |a| - case OpenTox::Feature.new(:uri => a.uri).value('classification').to_s - when 'true' - conf += OpenTox::Utils.gauss(sim) - when 'false' - conf -= OpenTox::Utils.gauss(sim) - end - end - end - end - conf = conf/neighbors.size - if conf > 0.0 - classification = true - elsif conf < 0.0 - classification = false - end - - prediction_neighbors.add({compound.uri => neighbors}) - prediction_uri = OpenTox::Feature.new(:name => @name, :values => {:classification => classification, :confidence => conf}).uri - prediction_uri - - end - -end diff --git a/test/hamster_carcinogenicity.csv b/test/hamster_carcinogenicity.csv deleted file mode 100644 index 009808f..0000000 --- a/test/hamster_carcinogenicity.csv +++ /dev/null @@ -1,85 +0,0 @@ -CC=O,true -C12C3=C(C=CC=C3)CC1=CC(=CC=2)NC(C)=O,true -O=C(N)\C(C2=CC=CO2)=C/C1=CC=C([N+]([O-])=O)O1,true -C1(N=CNN=1)N,false -Br(=O)(=O)[O-].[K+],true -[Cl-].[Cd+2].[Cl-],false -O=S(=O)([O-])[O-].[Cd+2],false -ClC1=CC(=NC(=N1)SCC(=O)O)NC2=CC=CC(=C2C)C,false -ClCOC,true -C=C(Cl)C=C,false -Clc1ccc(cc1)c2ccc(COC(C)(C)C(O)=O)cc2,false -O=C1OC2=C(C=CC=C2)C=C1,false -ClC(=C(C1=CC=C(C=C1)Cl)C2=CC=C(C=C2)Cl)Cl,true -ClC(C(C1=CC=C(C=C1)Cl)C2=CC=C(C=C2)Cl)(Cl)Cl,false -C=CCN(CC=C)N=O,true -Cl\C2=C(/Cl)C3(Cl)C1C4CC(C1C2(Cl)C3(Cl)Cl)C5OC45,false -O=C(N(C)C)Cl,true -CN(C)N,true -N(NC)C.[H]Cl.[H]Cl,true -CCO,false -O=C(N(CC)N=O)NCCO,true -O=C(N(CC)N=O)NCC(=O)C,true -C=O,false -[O-][N+](=O)C1=CC=C(O1)C2=CSC(=N2)NNC=O,true -O=CC1=CC=CO1,false -OCC1CO1,true -O=C2C1=C(OC)C=C(OC)C(Cl)=C1O[C@]32C(OC)=CC(C[C@@](C)3[H])=O,false -ClC1=C(C(=C(C(=C1Cl)Cl)Cl)Cl)Cl,true -NN,true -OS(=O)(=O)O.NN,true -CC(=O)N(O)C1=CC2=C(C=C1)C3=CC=CC=C3C2,true -OCCNN,false -O=C(C1=CC=NC=C1)NN,false -OC(=O)C1=CC=NC=C1,false -O=C(NC1=CC=CC(=C1)Cl)OC(C)C,false -O=C(NC1=CC=CC=C1)OC(C)C,false -[O-]C(C)=O.[O-]C(C)=O.[Pb+2].[OH-].[OH-].[Pb+2].[OH-].[OH-].[Pb+2],false -CN(C)CCN(CC2=CC=CS2)C1=NC=CC=C1.Cl,false -NC1=C2C(=NC(=N1)N)N=CC(=N2)CN(C3=CC=C(C=C3)C(=O)N[C@@H](CCC(=O)O)C(=O)O)C,false -CN(N)C=O,true -O=C(C(=C)C)OC,false -CNN,true -O=C(C1=CC=CN=C1)CCCN(N=O)C,false -CC1=CC(=O)NC(=S)N1,true -CC(C(O)=O)(OC1=CC=C(C=C1)C2CCCC3=C2C=CC=C3)C,false -O=N[O-].[Na+],false -[O-][N+](C1=CC=C(C2=CSC(NC(C)=O)=N2)O1)=O,true -[O-][N+](=O)C1=CC=C(O1)C2=CSC(=N2)NC=O,true -O=[N+](C1=CC=C2C3=C1C=CC=C3CC2)[O-],false -N(CC(CO)O)(CC(O)C)N=O,true -N(CC(CO)O)(CC(C)=O)N=O,true -N(CC(CO)O)(CCO)N=O,false -O=C(C)CN(N=O)CCO,true -C1C(N(C(CN1N=O)C)C)C,true -N(CC(C)=O)(CC=C)N=O,true -N(CC(CO)O)(C)N=O,true -O=NN1CCOCC1,true -N1C=CC=C(C=1)C2N(N=O)CCC2,true -C1=CC=C(C=[N+]1[O-])C2CCCN2N=O,false -O=NN1CCCCC1,true -O=NN1CCCC1,true -O=C(N(CC(C)=O)N=O)NCCCl,true -N(C(=O)N)(N=O)CC(C)=O,true -C1(CCN=C=S)=CC=CC=C1,false -O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC,false -C1=C2C(=CC=C1NC3=CC=CC=C3)C=CC=C2,false -O=C1N2C(C3=C(C=CC=C3)CC2)CN(C1)C(=O)C4CCCCC4,false -C1(=CC(=C(O)C=C1)O)C(O)=O,false -O=C1C2=C(C=C(C=C2O)O)O/C(=C\1O)C3=CC(=C(C=C3)O)O.O.O,false -C1=C(C=CC(=C1)C(C2=CC=C(N)C(=C2)C)=C3C=CC(=N)C=C3)N.[H]Cl,false -C(C1=CC=C(C=C1)N)(C2=CC=C(C=C2)N)=C3C=CC(C=C3)=N.[H]Cl,false -OC2=CC1=C(C(O)=C2)C(C(O[C@@H]4O[C@@H]([C@H]([C@H](O)[C@H]4O)O)CO[C@H]3[C@H](O)[C@H](O)[C@H]([C@H](C)O3)O)=C(C5=CC(O)=C(C=C5)O)O1)=O,false -ClC(=CCl)Cl,false -NC(=O)OCC,true -C=CCl,true -N#[N+]C1=CC=CC=C1.F[B-](F)(F)F,false -C1(CN(CC(N1N=O)C)N=O)C,true -N(CCN(C)C)(C)N=O,true -C1(CN(N=O)CC(O1)C)C,true -O1C(N(CC1C)N=O)=O,true -CCOC(=O)N(C)N=O,true -C1N(COC1)N=O,true -O=C(N(CCC1=CC=CC=C1)N=O)N,true -O=NN1CCC1,true -F[B-](F)(F)F.[Na+],false diff --git a/test/test.rb b/test/test.rb deleted file mode 100644 index f374bb4..0000000 --- a/test/test.rb +++ /dev/null @@ -1,44 +0,0 @@ -require 'application' -require 'test/unit' -require 'rack/test' - -set :environment, :test - -class LazarTest < Test::Unit::TestCase - include Rack::Test::Methods - - def app - Sinatra::Application - end - - def setup - @dataset = OpenTox::Dataset.create :name => "Hamster Carcinogenicity" - @dataset.import :csv => File.join(File.dirname(__FILE__), "hamster_carcinogenicity.csv"), :compound_format => "smiles", :feature_type => "activity" - end - - def teardown - @dataset.delete - end - - def test_algorithms - get '/algorithms' - assert last_response.body.include?("classification") - end - - def test_create_model_and_predict - post '/algorithm/classification', :dataset_uri => @dataset.uri - assert last_response.ok? - model_uri = last_response.body - get model_uri - assert last_response.ok? - get '/models' - assert last_response.body.include? model_uri - query_structure = OpenTox::Compound.new :smiles => 'c1ccccc1NN' - #query_structure = OpenTox::Compound.new :smiles => '[O-]C(C)=O.[O-]C(C)=O.[Pb+2].[OH-].[OH-].[Pb+2].[OH-].[OH-].[Pb+2]' - post model_uri, :compound_uri => query_structure.uri - assert last_response.ok? - assert last_response.body.include? 'classification/true' - puts last_response.body - end - -end -- cgit v1.2.3