diff options
author | Christoph Helma <helma@in-silico.de> | 2009-08-17 11:14:17 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.de> | 2009-08-17 11:14:17 +0200 |
commit | 62ae5207d6dd5f1bd293eaf777d4d0d61e0409c9 (patch) | |
tree | c04eb914d68d65d06079d0a241ad9ba06d6d16e7 | |
parent | f66e3c0d90672d9c3cf8d40798290cf8c04be178 (diff) |
production logging enabled, predictions working with documented algorithm
-rw-r--r-- | application.rb | 68 | ||||
-rw-r--r-- | config.ru | 7 | ||||
-rw-r--r-- | environment.rb | 2 | ||||
-rw-r--r-- | models.rb | 20 |
4 files changed, 56 insertions, 41 deletions
diff --git a/application.rb b/application.rb index 928a122..d1aa61a 100644 --- a/application.rb +++ b/application.rb @@ -6,15 +6,15 @@ end get '/model/:id' do halt 404, "Model #{params[:id]} not found." unless model = Model.get(params[:id]) - halt 202, "Model #{params[:id]} still under construction, please try again later." unless model.finished + halt 202, model.to_yaml unless model.finished + model.to_yaml # builder do |xml| # xml.instruct! - model.to_yaml # end #xml model end -post '/models' do # create a model +post '/models/?' do # create a model training_dataset = OpenTox::Dataset.new :uri => params[:dataset_uri] model = Model.create(:name => training_dataset.name, :training_dataset_uri => training_dataset.uri) @@ -59,45 +59,46 @@ post '/model/:id' do # create prediction unless prediction.finished # present cached prediction if finished - #Spork.spork do - prediction.update_attributes(:uri => url_for("/prediction/", :full) + prediction.id.to_s) + prediction.update_attributes(:uri => url_for("/prediction/", :full) + prediction.id.to_s) + Spork.spork do feature_dataset = OpenTox::Dataset.new :uri => model.feature_dataset_uri - compound_descriptors = feature_dataset.all_compounds_and_features + compound_descriptors = feature_dataset.all_compounds_and_features_uris training_features = feature_dataset.all_features - compound_activities = activity_dataset.all_compounds_and_features # TODO: returns nil/fix in gem + compound_activities = activity_dataset.all_compounds_and_features_uris query_features = query_compound.match(training_features) query_features.each do |f| Feature.find_or_create(:feature_uri => f.uri, :prediction_uri => prediction.uri) end + query_feature_uris = query_features.collect{|f| f.uri} conf = 0.0 + nr_neighbors = 0 - compound_descriptors.each do |compound_uri,features| - sim = similarity(features,query_features,model) + compound_descriptors.each do |compound_uri,feature_uris| + sim = similarity(feature_uris,query_feature_uris) if sim > 0.0 - puts sim - Neighbor.find_or_create(:compound_uri => compound_uri, :similarity => sim, :prediction_uri => prediction.uri) + nr_neighbors += 1 + # datamapper default precision is 10, floats with higher precision are not saved + n = Neighbor.create(:uri => compound_uri, :similarity => (1000*sim).round/1000.0, :prediction_uri => prediction.uri) compound_activities[compound_uri].each do |a| - case a.value('classification').to_s + case OpenTox::Feature.new(:uri => a).value('classification').to_s when 'true' - conf += sim #TODO gaussian + conf += gauss(sim) when 'false' - conf -= sim #TODO gaussian + conf -= gauss(sim) end end end end - + conf = conf/nr_neighbors if conf > 0.0 classification = true elsif conf < 0.0 classification = false end - prediction.update_attributes(:confidence => conf, :classification => classification, :finished => true) - prediction.save! - puts prediction.to_yaml + prediction.update_attributes(:confidence => (1000*conf).round/1000.0, :classification => classification, :finished => true) - #end + end end @@ -112,7 +113,6 @@ end get '/prediction/:id' do # display prediction halt 404, "Prediction #{params[:id]} not found." unless prediction = Prediction.get(params[:id]) - #halt 202, "Prediction #{params[:id]} not yet finished, please try again later." unless prediction.finished halt 202, prediction.to_yaml unless prediction.finished prediction.to_yaml #xml prediction @@ -141,23 +141,29 @@ delete '/prediction/:id' do end # Utility functions -def similarity(neighbor_features, query_features, model) +def similarity(neighbor_features, query_features) - nf = neighbor_features.collect{|f| f.uri } - qf = query_features.collect{|f| f.uri } - #common_features = neighbor_features & query_features - #all_features = neighbor_features | query_features - common_features = nf & qf - all_features = nf | qf + common_features = neighbor_features & query_features + all_features = neighbor_features | query_features + #common_features.size.to_f/all_features.size.to_f sum_p_common = 0.0 sum_p_all = 0.0 - #all_features.each { |f| sum_p_all += f.value.to_f } - #common_features.each { |f| sum_p_common += f.value.to_f } - #sum_p_common/sum_p_all - common_features.size.to_f/all_features.size.to_f + all_features.each do |f| + sum_p_all += gauss(OpenTox::Feature.new(:uri => f).value('p_value').to_f) + end + common_features.each do |f| + sum_p_common += gauss(OpenTox::Feature.new(:uri => f).value('p_value').to_f) + end + sum_p_common/sum_p_all + +end +# gauss kernel +def gauss(sim, sigma = 0.3) + x = 1.0 - sim + Math.exp(-(x*x)/(2*sigma*sigma)) end def xml(object) @@ -2,4 +2,11 @@ require 'rubygems' require 'sinatra' require 'application.rb' +if ENV["RACK_ENV"] == 'production' + FileUtils.mkdir_p 'log' unless File.exists?('log') + log = File.new("log/sinatra.log", "a") + $stdout.reopen(log) + $stderr.reopen(log) +end + run Sinatra::Application diff --git a/environment.rb b/environment.rb index aea0710..684d029 100644 --- a/environment.rb +++ b/environment.rb @@ -8,6 +8,8 @@ require "openbabel" sqlite = "#{File.expand_path(File.dirname(__FILE__))}/#{Sinatra::Base.environment}.sqlite3" DataMapper.setup(:default, "sqlite3:///#{sqlite}") +#DataMapper.setup(:default, 'sqlite3::memory:') + DataMapper::Logger.new(STDOUT, 0) load 'models.rb' @@ -2,9 +2,9 @@ class Model include DataMapper::Resource property :id, Serial property :name, String - property :uri, URI - property :feature_dataset_uri, URI - property :training_dataset_uri, URI + property :uri, String, :size => 255 + property :feature_dataset_uri, String, :size => 255 + property :training_dataset_uri, String, :size => 255 property :finished, Boolean, :default => false def predictions @@ -15,9 +15,9 @@ end class Prediction include DataMapper::Resource property :id, Serial - property :uri, URI - property :model_uri, URI - property :compound_uri, URI + property :uri, String, :size => 255 + property :model_uri, String, :size => 255 + property :compound_uri, String, :size => 255 property :classification, Boolean property :confidence, Float property :finished, Boolean, :default => false @@ -34,14 +34,14 @@ end class Neighbor include DataMapper::Resource property :id, Serial - property :compound_uri, URI - property :prediction_uri, URI + property :uri, String, :size => 255 + property :prediction_uri, String, :size => 255 property :similarity, Float end class Feature include DataMapper::Resource property :id, Serial - property :feature_uri, URI - property :prediction_uri, URI + property :feature_uri, String, :size => 255 + property :prediction_uri, String, :size => 255 end |