diff options
author | mguetlein <martin.guetlein@gmail.com> | 2010-06-02 12:10:11 +0200 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2010-06-02 12:10:11 +0200 |
commit | 32c6fae5eba64293efd25bb7db177f1964ee2625 (patch) | |
tree | 0089a16f46ef8f9cdfb6fdcf64d936c56d71345d | |
parent | 01dbc229e6a3483bd18d028f5cab3483c10ca43c (diff) |
replacing datamapper with activerecord for validation objects
-rw-r--r-- | Rakefile | 44 | ||||
-rw-r--r-- | application.rb | 9 | ||||
-rw-r--r-- | db/migrate/000_drop_validations.rb | 13 | ||||
-rw-r--r-- | db/migrate/001_init_validation.rb | 64 | ||||
-rw-r--r-- | example.rb | 8 | ||||
-rw-r--r-- | lib/validation_db.rb | 67 | ||||
-rw-r--r-- | nightly/nightly.rb | 17 | ||||
-rw-r--r-- | report/validation_access.rb | 8 | ||||
-rw-r--r-- | test/test_examples.rb | 37 | ||||
-rw-r--r-- | validation/validation_application.rb | 16 | ||||
-rw-r--r-- | validation/validation_format.rb | 15 | ||||
-rw-r--r-- | validation/validation_service.rb | 151 | ||||
-rw-r--r-- | validation/validation_test.rb | 11 |
13 files changed, 283 insertions, 177 deletions
@@ -18,3 +18,47 @@ task :test do load 'test.rb' end +desc "load config" +task :load_config do + require 'yaml' + ENV['RACK_ENV'] = 'test' unless ENV['RACK_ENV'] + basedir = File.join(ENV['HOME'], ".opentox") + config_dir = File.join(basedir, "config") + config_file = File.join(config_dir, "#{ENV['RACK_ENV']}.yaml") + if File.exist?(config_file) + @@config = YAML.load_file(config_file) + raise "could not load config, config file: "+config_file.to_s unless @@config + end + puts "config loaded" +end + +# USER VERSION 0 instead +#desc "Clear database" +#task :clear_db => :load_config do +# if @@config[:database][:adapter]=="mysql" +# clear = nil +# IO.popen("locate clear_mysql.sh"){ |f| clear=f.gets.chomp("\n") } +# raise "clear_mysql.sh not found" unless clear +# cmd = clear+" "+@@config[:database][:username]+" "+@@config[:database][:password]+" "+@@config[:database][:database] +# IO.popen(cmd){ |f| puts f.gets } +# else +# raise "clear not implemented for database-type: "+@@config[:database][:adapter] +# end +#end + +desc "Migrate the database through scripts in db/migrate. Target specific version with VERSION=x" +task :migrate => :load_config do + require 'active_record' + ActiveRecord::Base.establish_connection( + :adapter => @@config[:database][:adapter], + :host => @@config[:database][:host], + :database => @@config[:database][:database], + :username => @@config[:database][:username], + :password => @@config[:database][:password] + ) + ActiveRecord::Base.logger = Logger.new($stdout) + ActiveRecord::Migrator.migrate('db/migrate', ENV["VERSION"] ? ENV["VERSION"].to_i : nil ) +end + + + diff --git a/application.rb b/application.rb index 236280c..3b0dc71 100644 --- a/application.rb +++ b/application.rb @@ -4,6 +4,15 @@ gem 'opentox-ruby-api-wrapper', '= 1.4.4.4' require lib end +require 'active_record' +ActiveRecord::Base.establish_connection( + :adapter => @@config[:database][:adapter], + :host => @@config[:database][:host], + :database => @@config[:database][:database], + :username => @@config[:database][:username], + :password => @@config[:database][:password] +) + #unless(defined? LOGGER) #LOGGER = Logger.new(STDOUT) #LOGGER.datetime_format = "%Y-%m-%d %H:%M:%S " diff --git a/db/migrate/000_drop_validations.rb b/db/migrate/000_drop_validations.rb new file mode 100644 index 0000000..4b0288d --- /dev/null +++ b/db/migrate/000_drop_validations.rb @@ -0,0 +1,13 @@ + +class DropValidations < ActiveRecord::Migration + def self.up + drop_table :validations if table_exists? :validations + drop_table :crossvalidations if table_exists? :crossvalidations + end + + def self.down + drop_table :validations if table_exists? :validations + drop_table :crossvalidations if table_exists? :crossvalidations + end +end + diff --git a/db/migrate/001_init_validation.rb b/db/migrate/001_init_validation.rb new file mode 100644 index 0000000..d38afd7 --- /dev/null +++ b/db/migrate/001_init_validation.rb @@ -0,0 +1,64 @@ + +class InitValidation < ActiveRecord::Migration + def self.up + + create_table :crossvalidations do |t| + + [:crossvalidation_uri, #accesss to :uri somehow does not work, create uri-function in object + :algorithm_uri, + :dataset_uri ].each do |p| + t.column p, :string, :limit => 255 + end + + [:created_at ].each do |p| + t.column p, :datetime + end + + [:num_folds, + :random_seed ].each do |p| + t.column p, :integer, :null => false + end + + [ :stratified ].each do |p| + t.column p, :boolean, :null => false + end + + end + + create_table :validations do |t| + + [:validation_uri, #accesss to :uri somehow does not work, create uri-function in obejct + :model_uri, + :algorithm_uri, + :training_dataset_uri, + :test_target_dataset_uri, + :test_dataset_uri, + :prediction_dataset_uri, + :prediction_feature ].each do |p| + t.column p, :string, :limit => 255 + end + + [:created_at ].each do |p| + t.column p, :datetime + end + + [:real_runtime, :num_instances, :num_without_class, :num_unpredicted, :crossvalidation_id, :crossvalidation_fold ].each do |p| + t.column p, :integer + end + + [:real_runtime, :percent_without_class, :percent_unpredicted ].each do |p| + t.column p, :float + end + + [:classification_statistics, :regression_statistics].each do |p| + t.column(p, :text, :limit => 16320) + end + end + end + + def self.down + drop_table :validations + drop_table :crossvalidations + end +end + @@ -51,6 +51,12 @@ class Example def self.prepare_example_resources @@summary = "" + #delete validations + log "delete validations" + ActiveRecord::Base.logger = Logger.new("/dev/null") + ActiveRecord::Migrator.migrate('db/migrate', 0 ) + ActiveRecord::Migrator.migrate('db/migrate', 1 ) + #delete_all(@@config[:services]["opentox-dataset"]) log OpenTox::RestClientWrapper.delete @@config[:services]["opentox-dataset"] @@ -60,7 +66,6 @@ class Example data_uri = OpenTox::RestClientWrapper.post(@@config[:services]["opentox-dataset"],{:content_type => @@file_type},data).chomp("\n") log "train-test-validation" - Lib::Validation.auto_migrate! #delete_all(@@config[:services]["opentox-model"]) OpenTox::RestClientWrapper.delete @@config[:services]["opentox-model"] @@ -73,7 +78,6 @@ class Example v.validate_algorithm( @@alg_params ) log "crossvalidation" - Lib::Crossvalidation.auto_migrate! cv = Validation::Crossvalidation.new({ :dataset_uri => data_uri, :algorithm_uri => @@alg, :num_folds => 5, :stratified => false }) cv.create_cv_datasets( URI.decode(@@feature) ) cv.perform_cv( @@alg_params ) diff --git a/lib/validation_db.rb b/lib/validation_db.rb index 9bca160..17e8f74 100644 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -47,66 +47,15 @@ module Lib VAL_MERGE_SUM = VAL_PROPS_SUM + VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_PER_CLASS_SUM VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS - class Validation - include DataMapper::Resource - - property :id, Serial - property :uri, String, :length => 255 - property :model_uri, String, :length => 255 - property :algorithm_uri, String, :length => 255 - property :training_dataset_uri, String, :length => 255 - property :test_target_dataset_uri, String, :length => 255 - property :test_dataset_uri, String, :length => 255 - property :prediction_dataset_uri, String, :length => 255 - property :prediction_feature, String, :length => 255 - property :created_at, DateTime - property :real_runtime, Float - - property :num_instances, Integer - property :num_without_class, Integer - property :percent_without_class, Float - property :num_unpredicted, Integer - property :percent_unpredicted, Float - - property :classification_statistics, Object #Hash - property :regression_statistics, Object - - property :crossvalidation_id, Integer - property :crossvalidation_fold, Integer + class Validation < ActiveRecord::Base + def uri + self.validation_uri + end end - class Crossvalidation - include DataMapper::Resource - property :id, Serial - property :uri, String, :length => 255 - property :algorithm_uri, String, :length => 255 - property :dataset_uri, String, :length => 255 - property :num_folds, Integer, :default => 10 - property :stratified, Boolean, :default => false - property :random_seed, Integer, :default => 1 + class Crossvalidation < ActiveRecord::Base + def uri + self.crossvalidation_uri + end end end - -# sqlite is used for storing validations and crossvalidations -#sqlite = "#{File.expand_path(File.dirname(__FILE__))}/#{Sinatra::Base.environment}.sqlite3" -#DataMapper.setup(:default, "sqlite3:///#{sqlite}") -#unless FileTest.exists?("#{sqlite}") -# [Lib::Validation, Lib::Crossvalidation].each do |model| -# model.auto_migrate! -# end -#end - -#raise "':database:' configuration missing in config file" unless @@config.has_key?(:database) -#[ "adapter","database","username","password","host" ].each do |field| - #raise "field '"+field+":' missing in database configuration" unless @@config[:database].has_key?(field) -#end -#DataMapper.setup(:default, { - #:adapter => @@config[:database]["adapter"], - #:database => @@config[:database]["database"], - #:username => @@config[:database]["username"], - # :password => @@config[:database]["password"], -# :host => @@config[:database]["host"] - #}) -[Lib::Validation, Lib::Crossvalidation].each do |resource| - resource.auto_migrate! unless resource.storage_exists? -end diff --git a/nightly/nightly.rb b/nightly/nightly.rb index 594d4ee..558a5ea 100644 --- a/nightly/nightly.rb +++ b/nightly/nightly.rb @@ -28,15 +28,18 @@ class Nightly running = [] report = Reports::XMLReport.new("Nightly Validation", Time.now.strftime("Created at %m.%d.%Y - %H:%M")) + count = 1 benchmarks.each do |b| - running << b.class.to_s.gsub(/Nightly::/, "")+b.object_id.to_s + id = "["+count.to_s+"]-"+b.title + count += 1 + running << id Thread.new do begin b.build rescue => ex LOGGER.error "uncaught nightly build error: "+ex.message ensure - running.delete(b.class.to_s.gsub(/Nightly::/, "")+b.object_id.to_s) + running.delete id end end end @@ -126,7 +129,11 @@ class Nightly end def title - @validation_examples.collect{|e| e.title}.join(" + ") + if @validation_examples.size==0 + @validation_examples[0].class.humanize + else + @validation_examples[0].class.superclass.humanize + end end def result_table @@ -174,7 +181,7 @@ class Nightly @validation_examples.each do |v| - id = v.title+count.to_s + id = "["+count.to_s+"]-"+v.title count += 1 running << id LOGGER.debug "Uploading datasets: "+v.title @@ -200,7 +207,7 @@ class Nightly wait = 0 while running.size>0 - LOGGER.debug self.class.to_s.gsub(/Nightly::/, "")+" waiting for "+running.inspect if wait%20==0 + LOGGER.debug self.title+" waiting for "+running.inspect if wait%20==0 wait += 1 sleep 1 end diff --git a/report/validation_access.rb b/report/validation_access.rb index ac90b0f..f85698f 100644 --- a/report/validation_access.rb +++ b/report/validation_access.rb @@ -59,7 +59,7 @@ class Reports::ValidationDB < Reports::ValidationAccess validation_uris.each do |u| if u.to_s =~ /.*\/crossvalidation\/[0-9]+/ cv_id = u.split("/")[-1].to_i - res += Lib::Validation.all(:crossvalidation_id => cv_id).collect{|v| v.uri.to_s} + res += Lib::Validation.find( :all, :conditions => { :crossvalidation_id => cv_id } ).collect{|v| v.uri.to_s} else res += [u.to_s] end @@ -76,7 +76,7 @@ class Reports::ValidationDB < Reports::ValidationAccess (validation_id.to_i > 0 || validation_id.to_s=="0" ) v = nil begin - v = Lib::Validation.get(validation_id) + v = Lib::Validation.find(validation_id) rescue => ex raise "could not access validation with id "+validation_id.to_s+", error-msg: "+ex.message end @@ -88,14 +88,14 @@ class Reports::ValidationDB < Reports::ValidationAccess {:classification_statistics => Lib::VAL_CLASS_PROPS, :regression_statistics => Lib::VAL_REGR_PROPS}.each do |subset_name,subset_props| - subset = v[subset_name] + subset = YAML.load(v[subset_name].to_s) subset_props.each{ |prop| validation.send("#{prop.to_s}=".to_sym, subset[prop]) } if subset end end def init_cv(validation) - cv = Lib::Crossvalidation.get(validation.crossvalidation_id) + cv = Lib::Crossvalidation.find(validation.crossvalidation_id) raise Reports::BadRequest.new "no crossvalidation found with id "+validation.crossvalidation_id.to_s unless cv Lib::CROSS_VAL_PROPS.each do |p| diff --git a/test/test_examples.rb b/test/test_examples.rb index 0b817fb..beeaa30 100644 --- a/test/test_examples.rb +++ b/test/test_examples.rb @@ -11,14 +11,14 @@ module ValidationExamples ######################################################################################################## - class ABSTRACTHamsterSplit < SplitTestValidation + class HamsterSplit < SplitTestValidation def initialize @dataset_file = File.new("data/hamster_carcinogenicity.yaml","r") @prediction_feature = "http://localhost/toxmodel/feature#Hamster%20Carcinogenicity%20(DSSTOX/CPDB)" end end - class LazarHamsterSplit < ABSTRACTHamsterSplit + class LazarHamsterSplit < HamsterSplit def initialize @algorithm_uri = File.join(@@config[:services]["opentox-algorithm"],"lazar") @algorithm_params = "feature_generation_uri="+File.join(@@config[:services]["opentox-algorithm"],"fminer") @@ -26,7 +26,7 @@ module ValidationExamples end end - class MajorityHamsterSplit < ABSTRACTHamsterSplit + class MajorityHamsterSplit < HamsterSplit def initialize @algorithm_uri = File.join(@@config[:services]["opentox-majority"],"/class/algorithm") super @@ -35,7 +35,7 @@ module ValidationExamples ######################################################################################################## - class ABSTRACTHamsterTrainingTest < TrainingTestValidation + class HamsterTrainingTest < TrainingTestValidation def initialize @test_target_dataset_file = File.new("data/hamster_carcinogenicity.yaml","r") @training_dataset_file = File.new("data/hamster_carcinogenicity.train.yaml","r") @@ -44,14 +44,14 @@ module ValidationExamples end end - class MajorityHamsterTrainingTest < ABSTRACTHamsterTrainingTest + class MajorityHamsterTrainingTest < HamsterTrainingTest def initialize @algorithm_uri = File.join(@@config[:services]["opentox-majority"],"/class/algorithm") super end end - class LazarHamsterTrainingTest < ABSTRACTHamsterTrainingTest + class LazarHamsterTrainingTest < HamsterTrainingTest def initialize @algorithm_uri = File.join(@@config[:services]["opentox-algorithm"],"lazar") @algorithm_params = "feature_generation_uri="+File.join(@@config[:services]["opentox-algorithm"],"fminer") @@ -61,21 +61,22 @@ module ValidationExamples ######################################################################################################## - class ABSTRACTHamsterCrossvalidation < CrossValidation + class HamsterCrossvalidation < CrossValidation def initialize @dataset_file = File.new("data/hamster_carcinogenicity.yaml","r") @prediction_feature = "http://localhost/toxmodel/feature#Hamster%20Carcinogenicity%20(DSSTOX/CPDB)" + @num_folds = 10 end end - class MajorityHamsterCrossvalidation < ABSTRACTHamsterCrossvalidation + class MajorityHamsterCrossvalidation < HamsterCrossvalidation def initialize @algorithm_uri = File.join(@@config[:services]["opentox-majority"],"/class/algorithm") super end end - class LazarHamsterCrossvalidation < ABSTRACTHamsterCrossvalidation + class LazarHamsterCrossvalidation < HamsterCrossvalidation def initialize @algorithm_uri = File.join(@@config[:services]["opentox-algorithm"],"lazar") @algorithm_params = "feature_generation_uri="+File.join(@@config[:services]["opentox-algorithm"],"fminer") @@ -85,21 +86,21 @@ module ValidationExamples ######################################################################################################## - class ABSTRACT_ISTHamsterCrossvalidationInsilico < CrossValidation + class ISTHamsterCrossvalidationInsilico < CrossValidation def initialize @dataset_uri = "http://webservices.in-silico.ch/dataset/108" @prediction_feature = "http://toxcreate.org/feature#Hamster%20Carcinogenicity%20(DSSTOX/CPDB)" end end - class MajorityISTHamsterCrossvalidation < ABSTRACT_ISTHamsterCrossvalidationInsilico + class MajorityISTHamsterCrossvalidation < ISTHamsterCrossvalidationInsilico def initialize @algorithm_uri = File.join(@@config[:services]["opentox-majority"],"/class/algorithm") super end end - class LazarISTHamsterCrossvalidation < ABSTRACT_ISTHamsterCrossvalidationInsilico + class LazarISTHamsterCrossvalidation < ISTHamsterCrossvalidationInsilico def initialize @algorithm_uri = File.join(@@config[:services]["opentox-algorithm"],"lazar") @algorithm_params = "feature_generation_uri="+File.join(@@config[:services]["opentox-algorithm"],"fminer") @@ -107,7 +108,7 @@ module ValidationExamples end end - class ISTLazarISTHamsterCrossvalidation < ABSTRACT_ISTHamsterCrossvalidationInsilico + class ISTLazarISTHamsterCrossvalidation < ISTHamsterCrossvalidationInsilico def initialize @algorithm_uri = "http://webservices.in-silico.ch/algorithm/lazar" @algorithm_params = "feature_generation_uri=http://webservices.in-silico.ch/algorithm/fminer" @@ -128,7 +129,7 @@ module ValidationExamples ######################################################################################################## - class ABSTRACTCacoTrainingTest < TrainingTestValidation + class CacoTrainingTest < TrainingTestValidation def initialize @training_dataset_uri = "http://ambit.uni-plovdiv.bg:8080/ambit2/dataset/R7798" @test_dataset_uri = "http://ambit.uni-plovdiv.bg:8080/ambit2/dataset/R8353" @@ -136,28 +137,28 @@ module ValidationExamples end end - class LR_AmbitCacoTrainingTest < ABSTRACTCacoTrainingTest + class LR_AmbitCacoTrainingTest < CacoTrainingTest def initialize @algorithm_uri = "http://ambit.uni-plovdiv.bg:8080/ambit2/algorithm/LR" super end end - class MLR_NTUA_CacoTrainingTest < ABSTRACTCacoTrainingTest + class MLR_NTUA_CacoTrainingTest < CacoTrainingTest def initialize @algorithm_uri = "http://opentox.ntua.gr:3003/algorithm/mlr" super end end - class MLR_NTUA2_CacoTrainingTest < ABSTRACTCacoTrainingTest + class MLR_NTUA2_CacoTrainingTest < CacoTrainingTest def initialize @algorithm_uri = "http://opentox.ntua.gr:3004/algorithm/mlr" super end end - class MajorityCacoTrainingTest < ABSTRACTCacoTrainingTest + class MajorityCacoTrainingTest < CacoTrainingTest def initialize @algorithm_uri = File.join(@@config[:services]["opentox-majority"],"/regr/algorithm") super diff --git a/validation/validation_application.rb b/validation/validation_application.rb index f9e6f16..13dc26d 100644 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -23,7 +23,7 @@ end get '/crossvalidation/:id' do LOGGER.info "get crossvalidation with id "+params[:id].to_s - halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.get(params[:id]) + halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.find(params[:id]) case request.env['HTTP_ACCEPT'].to_s when "application/rdf+xml" @@ -41,13 +41,13 @@ end delete '/crossvalidation/:id/?' do LOGGER.info "delete crossvalidation with id "+params[:id].to_s content_type "text/plain" - halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.get(params[:id]) + halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.find(params[:id]) crossvalidation.delete end get '/crossvalidation/:id/validations' do LOGGER.info "get all validations for crossvalidation with id "+params[:id].to_s - halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.get(params[:id]) + halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.find(params[:id]) content_type "text/uri-list" Validation::Validation.all(:crossvalidation_id => params[:id]).collect{ |v| v.uri.to_s }.join("\n")+"\n" end @@ -55,7 +55,7 @@ end get '/crossvalidation/:id/statistics' do LOGGER.info "get merged validation-result for crossvalidation with id "+params[:id].to_s - halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.get(params[:id]) + halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.find(params[:id]) Lib::MergeObjects.register_merge_attributes( Validation::Validation, Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL) unless @@ -104,7 +104,7 @@ end get '/:id' do LOGGER.info "get validation with id "+params[:id].to_s+" '"+request.env['HTTP_ACCEPT'].to_s+"'" - halt 404, "Validation '#{params[:id]}' not found." unless validation = Validation::Validation.get(params[:id]) + halt 404, "Validation '#{params[:id]}' not found." unless validation = Validation::Validation.find(params[:id]) case request.env['HTTP_ACCEPT'].to_s when "application/rdf+xml" @@ -143,7 +143,7 @@ post '/?' do "params given: "+params.inspect end content_type "text/uri-list" - v.uri + v.validation_uri end halt 202,task_uri end @@ -198,7 +198,7 @@ end get '/:id/:attribute' do LOGGER.info "access validation attribute "+params.inspect - halt 404, "Validation #{params[:id]} not found." unless validation = Validation::Validation.get(params[:id]) + halt 404, "Validation #{params[:id]} not found." unless validation = Validation::Validation.find(params[:id]) begin raise unless validation.attribute_loaded?(params[:attribute]) rescue @@ -210,7 +210,7 @@ end delete '/:id' do LOGGER.info "delete validation with id "+params[:id].to_s - halt 404, "Validation #{params[:id]} not found." unless validation = Validation::Validation.get(params[:id]) + halt 404, "Validation #{params[:id]} not found." unless validation = Validation::Validation.find(params[:id]) content_type "text/plain" validation.delete end
\ No newline at end of file diff --git a/validation/validation_format.rb b/validation/validation_format.rb index f23d00e..357d96e 100644 --- a/validation/validation_format.rb +++ b/validation/validation_format.rb @@ -23,6 +23,8 @@ module Validation # the right properties, classes for to_rdf def get_content_as_hash + LOGGER.debug self.validation_uri + h = {} Lib::VAL_PROPS.each{|p| h[p] = self.send(p)} if crossvalidation_id!=nil @@ -35,14 +37,15 @@ module Validation h[:crossvalidation_info] = cv end if classification_statistics + class_stats = YAML.load(classification_statistics.to_s) clazz = {} - Lib::VAL_CLASS_PROPS_SINGLE.each{ |p| clazz[p] = classification_statistics[p] } + Lib::VAL_CLASS_PROPS_SINGLE.each{ |p| clazz[p] = class_stats[p] } # transpose results per class class_values = {} Lib::VAL_CLASS_PROPS_PER_CLASS.each do |p| - $sinatra.halt 500, "missing classification statitstics: "+p.to_s+" "+classification_statistics.inspect unless classification_statistics[p] - classification_statistics[p].each do |class_value, property_value| + $sinatra.halt 500, "missing classification statitstics: "+p.to_s+" "+class_stats.inspect unless class_stats[p] + class_stats[p].each do |class_value, property_value| class_values[class_value] = {:class_value => class_value} unless class_values.has_key?(class_value) map = class_values[class_value] map[p] = property_value @@ -52,8 +55,8 @@ module Validation #converting confusion matrix cells = [] - $sinatra.halt 500,"confusion matrix missing" unless classification_statistics[:confusion_matrix]!=nil - classification_statistics[:confusion_matrix].each do |k,v| + $sinatra.halt 500,"confusion matrix missing" unless class_stats[:confusion_matrix]!=nil + class_stats[:confusion_matrix].each do |k,v| cell = {} # key in confusion matrix is map with predicted and actual attribute k.each{ |kk,vv| cell[kk] = vv } @@ -153,7 +156,7 @@ module Validation Lib::CROSS_VAL_PROPS_REDUNDANT.each{|p| h[p] = self.send(p)} v = [] - Validation.all(:crossvalidation_id => self.id).each do |val| + Validation.find( :all, :conditions => { :crossvalidation_id => self.id } ).each do |val| v.push( val.uri.to_s ) end h[:validations] = v diff --git a/validation/validation_service.rb b/validation/validation_service.rb index 1b6678b..cdc0ac9 100644 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -39,20 +39,21 @@ module Validation $sinatra.halt 500,"do not set id manually" if params[:id] $sinatra.halt 500,"do not set uri manually" if params[:uri] super params - # hack to overcome datamapper bug: save to set id - unless save - raise "error saving validation "+errors.inspect - end - reload - raise "internal error, validation-id not set "+to_yaml if @id==nil - update :uri => $sinatra.url_for("/"+@id.to_s, :full) + self.save + raise "internal error, validation-id not set "+to_yaml if self.id==nil + self.attributes = { :validation_uri => $sinatra.url_for("/"+self.id.to_s, :full).to_s } + self.save + end + + def uri + self.validation_uri end # deletes a validation # PENDING: model and referenced datasets are deleted as well, keep it that way? def delete - model = OpenTox::Model::PredictionModel.find(@model_uri) if @model_uri + model = OpenTox::Model::PredictionModel.find(self.model_uri) if self.model_uri model.destroy if model #[@test_dataset_uri, @training_dataset_uri, @prediction_dataset_uri].each do |d| @@ -60,19 +61,18 @@ module Validation #dataset.delete if dataset #end destroy - "Successfully deleted validation "+@id.to_s+"." + "Successfully deleted validation "+self.id.to_s+"." end # validates an algorithm by building a model and validating this model def validate_algorithm( algorithm_params=nil ) - $sinatra.halt 404, "no algorithm uri: '"+algorithm_uri.to_s+"'" if @algorithm_uri==nil or @algorithm_uri.to_s.size<1 + $sinatra.halt 404, "no algorithm uri: '"+self.algorithm_uri.to_s+"'" if self.algorithm_uri==nil or self.algorithm_uri.to_s.size<1 - params = { :dataset_uri => @training_dataset_uri, :prediction_feature => @prediction_feature } + params = { :dataset_uri => self.training_dataset_uri, :prediction_feature => self.prediction_feature } if (algorithm_params!=nil) algorithm_params.split(";").each do |alg_params| alg_param = alg_params.split("=") - #puts "param "+alg_param.to_s $sinatra.halt 404, "invalid algorithm param: '"+alg_params.to_s+"'" unless alg_param.size==2 or alg_param[0].to_s.size<1 or alg_param[1].to_s.size<1 LOGGER.warn "algorihtm param contains empty space, encode? "+alg_param[1].to_s if alg_param[1] =~ /\s/ params[alg_param[0].to_sym] = alg_param[1] @@ -82,10 +82,11 @@ module Validation model = OpenTox::Model::PredictionModel.build(algorithm_uri, params) $sinatra.halt 500,"model building failed" unless model - update :model_uri => model.uri + self.attributes = { :model_uri => model.uri } + self.save $sinatra.halt 500,"error after building model: model.dependent_variable != validation.prediciton_feature ("+ - model.dependentVariables.to_s+" != "+@prediction_feature+")" if @prediction_feature!=model.dependentVariables + model.dependentVariables.to_s+" != "+self.prediction_feature+")" if self.prediction_feature!=model.dependentVariables validate_model end @@ -94,59 +95,61 @@ module Validation # PENDING: a new dataset is created to store the predictions, this should be optional: delete predictions afterwards yes/no def validate_model - LOGGER.debug "validating model '"+@model_uri+"'" + LOGGER.debug "validating model '"+self.model_uri+"'" - #test_dataset = OpenTox::Dataset.find @test_dataset_uri - #$sinatra.halt 400, "test dataset no found: "+@test_dataset_uri.to_s unless test_dataset + model = OpenTox::Model::PredictionModel.find(self.model_uri) + $sinatra.halt 400, "model not found: "+self.model_uri.to_s unless model - model = OpenTox::Model::PredictionModel.find(@model_uri) - $sinatra.halt 400, "model not found: "+@model_uri.to_s unless model - - unless @algorithm_uri - update :algorithm_uri => model.algorithm + unless self.algorithm_uri + self.attributes = { :algorithm_uri => model.algorithm } + self.save end - if @prediction_feature + if self.prediction_feature $sinatra.halt 400, "error validating model: model.dependent_variable != validation.prediciton_feature ("+ - model.dependentVariables+" != "+@prediction_feature+")" if @prediction_feature!=model.dependentVariables + model.dependentVariables+" != "+self.prediction_feature+")" if self.prediction_feature!=model.dependentVariables else $sinatra.halt 400, "model has no dependentVariables specified, please give prediction feature for model validation" unless model.dependentVariables - update :prediction_feature => model.dependentVariables + self.attributes = { :prediction_feature => model.dependentVariables } + self.save end prediction_dataset_uri = "" benchmark = Benchmark.measure do - prediction_dataset_uri = model.predict_dataset(@test_dataset_uri) + prediction_dataset_uri = model.predict_dataset(self.test_dataset_uri) end - update :prediction_dataset_uri => prediction_dataset_uri, - :real_runtime => benchmark.real + self.attributes = { :prediction_dataset_uri => prediction_dataset_uri, + :real_runtime => benchmark.real } + self.save compute_validation_stats(model) end def compute_validation_stats(model = nil) - model = OpenTox::Model::PredictionModel.find(@model_uri) unless model - $sinatra.halt 400, "model not found: "+@model_uri.to_s unless model + model = OpenTox::Model::PredictionModel.find(self.model_uri) unless model + $sinatra.halt 400, "model not found: "+self.model_uri.to_s unless model - update :prediction_feature => model.dependentVariables unless @prediction_feature - update :algorithm_uri => model.algorithm unless @algorithm_uri + self.attributes = { :prediction_feature => model.dependentVariables } unless self.prediction_feature + self.attributes = { :algorithm_uri => model.algorithm } unless self.algorithm_uri + self.save LOGGER.debug "computing prediction stats" prediction = Lib::OTPredictions.new( model.classification?, - @test_dataset_uri, @test_target_dataset_uri, @prediction_feature, - @prediction_dataset_uri, model.predictedVariables ) + self.test_dataset_uri, self.test_target_dataset_uri, self.prediction_feature, + self.prediction_dataset_uri, model.predictedVariables ) if prediction.classification? - update :classification_statistics => prediction.compute_stats + self.attributes = { :classification_statistics => prediction.compute_stats.to_yaml } else - update :regression_statistics => prediction.compute_stats + self.attributes = { :regression_statistics => prediction.compute_stats.to_yaml } end - update :num_instances => prediction.num_instances, + self.attributes = { :num_instances => prediction.num_instances, :num_without_class => prediction.num_without_class, :percent_without_class => prediction.percent_without_class, :num_unpredicted => prediction.num_unpredicted, - :percent_unpredicted => prediction.percent_unpredicted + :percent_unpredicted => prediction.percent_unpredicted } + self.save end end @@ -157,21 +160,26 @@ module Validation $sinatra.halt 500,"do not set id manually" if params[:id] $sinatra.halt 500,"do not set uri manually" if params[:uri] - super params - unless save - raise "error saving crossvalidation "+errors.inspect - end - reload - raise "internal error, crossvalidation-id not set" if @id==nil - update :uri => $sinatra.url_for("/crossvalidation/"+@id.to_s, :full) + params[:num_folds] = 10 if params[:num_folds]==nil + params[:random_seed] = 1 if params[:random_seed]==nil + params[:stratified] = false if params[:stratified]==nil + super params + self.save + raise "internal error, crossvalidation-id not set" if self.id==nil + self.attributes = { :crossvalidation_uri => $sinatra.url_for("/crossvalidation/"+self.id.to_s, :full) } + self.save + end + + def uri + self.crossvalidation_uri end # deletes a crossvalidation, all validations are deleted as well def delete - Validation.all(:crossvalidation_id => @id).each{ |v| v.delete } + Validation.all(:crossvalidation_id => self.id).each{ |v| v.delete } destroy - "Successfully deleted crossvalidation "+@id.to_s+"." + "Successfully deleted crossvalidation "+self.id.to_s+"." end # creates the cv folds @@ -185,7 +193,7 @@ module Validation def perform_cv ( algorithm_params=nil ) LOGGER.debug "perform cv validations" - Validation.all( :crossvalidation_id => id ).each do |v| + Validation.find( :all, :conditions => { :crossvalidation_id => id } ).each do |v| v.validate_algorithm( algorithm_params ) #break end @@ -196,26 +204,26 @@ module Validation # returns true if successfull, false otherwise def copy_cv_datasets( prediction_feature ) - equal_cvs = Crossvalidation.all( { :dataset_uri => @dataset_uri, :num_folds => @num_folds, - :stratified => @stratified, :random_seed => @random_seed } ).reject{ |cv| cv.id == @id } + equal_cvs = Crossvalidation.all( { :dataset_uri => self.dataset_uri, :num_folds => self.num_folds, + :stratified => self.stratified, :random_seed => self.random_seed } ).reject{ |cv| cv.id == self.id } return false if equal_cvs.size == 0 cv = equal_cvs[0] Validation.all( :crossvalidation_id => cv.id ).each do |v| - if @stratified and v.prediction_feature != prediction_feature + if self.stratified and v.prediction_feature != prediction_feature return false; end unless (OpenTox::Dataset.find(v.training_dataset_uri) and OpenTox::Dataset.find(v.test_dataset_uri)) LOGGER.debug "dataset uris obsolete, aborting copy of datasets" - Validation.all( :crossvalidation_id => @id ).each{ |v| v.delete } + Validation.all( :crossvalidation_id => self.id ).each{ |v| v.delete } return false end - validation = Validation.new :crossvalidation_id => @id, + validation = Validation.new :crossvalidation_id => self.id, :crossvalidation_fold => v.crossvalidation_fold, :training_dataset_uri => v.training_dataset_uri, :test_dataset_uri => v.test_dataset_uri, - :algorithm_uri => @algorithm_uri + :algorithm_uri => self.algorithm_uri end LOGGER.debug "copyied dataset uris from cv "+cv.uri.to_s return true @@ -225,14 +233,15 @@ module Validation # stores uris in validation objects def create_new_cv_datasets( prediction_feature ) + $sinatra.halt(500,"random seed not set") unless self.random_seed LOGGER.debug "creating datasets for crossvalidation" - orig_dataset = OpenTox::Dataset.find(@dataset_uri) - $sinatra.halt 400, "Dataset not found: "+@dataset_uri.to_s unless orig_dataset + orig_dataset = OpenTox::Dataset.find(self.dataset_uri) + $sinatra.halt 400, "Dataset not found: "+self.dataset_uri.to_s unless orig_dataset - shuffled_compounds = orig_dataset.compounds.shuffle( @random_seed ) + shuffled_compounds = orig_dataset.compounds.shuffle( self.random_seed ) - unless @stratified - split_compounds = shuffled_compounds.chunk( @num_folds ) + unless self.stratified + split_compounds = shuffled_compounds.chunk( self.num_folds ) else class_compounds = {} # "inactive" => compounds[], "active" => compounds[] .. shuffled_compounds.each do |c| @@ -247,7 +256,7 @@ module Validation split_class_compounds = [] # inactive_compounds[fold_i][], active_compounds[fold_i][], .. class_compounds.values.each do |compounds| - split_class_compounds.push( compounds.chunk( @num_folds ) ) + split_class_compounds.push( compounds.chunk( self.num_folds ) ) end LOGGER.debug "stratified cv: splits for class values: "+split_class_compounds.collect{ |c| c.collect{ |cc| cc.size }.join("/") }.join(", ") @@ -258,7 +267,7 @@ module Validation # step 1: sort current split in ascending order split_comp.sort!{|x,y| x.size <=> y.size } # step 2: add splits - (0..@num_folds-1).each do |i| + (0..self.num_folds-1).each do |i| unless split_compounds[i] split_compounds[i] = split_comp[i] else @@ -273,19 +282,19 @@ module Validation test_features = orig_dataset.features.dclone - [prediction_feature] - (1..@num_folds).each do |n| + (1..self.num_folds).each do |n| - datasetname = 'cv'+@id.to_s + + datasetname = 'cv'+self.id.to_s + #'_d'+orig_dataset.name.to_s + - '_f'+n.to_s+'of'+@num_folds.to_s+ - '_r'+@random_seed.to_s+ - '_s'+@stratified.to_s + '_f'+n.to_s+'of'+self.num_folds.to_s+ + '_r'+self.random_seed.to_s+ + '_s'+self.stratified.to_s source = $sinatra.url_for('/crossvalidation',:full) test_compounds = [] train_compounds = [] - (1..@num_folds).each do |nn| + (1..self.num_folds).each do |nn| compounds = split_compounds.at(nn-1) if n == nn @@ -295,7 +304,7 @@ module Validation end end - $sinatra.halt 500,"internal error, num test compounds not correct" unless (shuffled_compounds.size/@num_folds - test_compounds.size).abs <= 1 + $sinatra.halt 500,"internal error, num test compounds not correct" unless (shuffled_compounds.size/self.num_folds - test_compounds.size).abs <= 1 $sinatra.halt 500,"internal error, num train compounds not correct" unless shuffled_compounds.size - test_compounds.size == train_compounds.size LOGGER.debug "training set: "+datasetname+"_train, compounds: "+train_compounds.size.to_s @@ -306,10 +315,10 @@ module Validation validation = Validation.new :training_dataset_uri => train_dataset_uri, :test_dataset_uri => test_dataset_uri, - :test_target_dataset_uri => @dataset_uri, - :crossvalidation_id => @id, :crossvalidation_fold => n, + :test_target_dataset_uri => self.dataset_uri, + :crossvalidation_id => self.id, :crossvalidation_fold => n, :prediction_feature => prediction_feature, - :algorithm_uri => @algorithm_uri + :algorithm_uri => self.algorithm_uri end end end diff --git a/validation/validation_test.rb b/validation/validation_test.rb index 0d59c42..1e3908a 100644 --- a/validation/validation_test.rb +++ b/validation/validation_test.rb @@ -15,16 +15,20 @@ class ValidationTest < Test::Unit::TestCase include Lib::TestUtil def test_it + $test_case = self + + #get "/1" + #puts last_response.body # post "/test_validation",:select=>"6d" #,:report=>"yes,please" # puts last_response.body - #run_test("6a") + run_test("2b") - #puts Nightly.build_nightly("6") + #puts Nightly.build_nightly("1", false) #prepare_examples - do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE + #do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE end def app @@ -32,7 +36,6 @@ class ValidationTest < Test::Unit::TestCase end def run_test(select) - $test_case = self validationExamples = ValidationExamples.select(select) validationExamples.each do |vv| vv.each do |v| |