From 3c5d86730bbd4e87d659a01896925628d06e26ff Mon Sep 17 00:00:00 2001 From: mguetlein Date: Fri, 8 Apr 2011 15:39:55 +0200 Subject: =?UTF-8?q?re=C3=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Rakefile | 11 ++- application.rb | 14 +-- data/EPAFHM.mini.csv | 21 +++++ example.rb | 24 ++--- lib/merge.rb | 14 +-- lib/ot_predictions.rb | 4 +- lib/validation_db.rb | 116 +++++++++++++----------- reach_reports/reach_persistance.rb | 5 +- reach_reports/reach_service.rb | 6 +- reach_reports/reach_test.rb | 2 +- report/environment.rb | 2 +- report/report_factory.rb | 6 +- report/report_persistance.rb | 49 +++++----- report/validation_access.rb | 20 ++--- report/validation_data.rb | 50 +++++------ test/test_examples.rb | 2 +- test/unit_test.rb | 170 +++++++++++++++++++---------------- validation/validation_application.rb | 63 +++++++------ validation/validation_format.rb | 21 ++--- validation/validation_service.rb | 53 ++++++----- validation/validation_test.rb | 20 +++-- 21 files changed, 363 insertions(+), 310 deletions(-) create mode 100644 data/EPAFHM.mini.csv diff --git a/Rakefile b/Rakefile index e8e2a92..de1e69d 100755 --- a/Rakefile +++ b/Rakefile @@ -1,8 +1,11 @@ require 'rubygems' require 'rake' -REPORT_GEMS = [ 'opentox-ruby', 'mime-types', 'ruby-plot', 'rinruby'] #'ar-extensions', 'activerecord', 'activesupport', -VALIDATION_GEMS = [ 'opentox-ruby', 'ruby-plot'] #'ar-extensions', 'activerecord', 'activesupport', + + +REPORT_GEMS = [ 'opentox-ruby', 'ruby-plot', 'rinruby', 'dm-core', 'dm-serializer', 'dm-timestamps', + 'dm-types', 'dm-migrations', 'dm-validations', 'dm-sqlite-adapter', 'benchmark'] +VALIDATION_GEMS = [ 'opentox-ruby', 'ruby-plot'] GEM_VERSIONS = { "ruby-plot" => "= 0.0.2" } GEM_INSTALL_OPTIONS = {} @@ -11,7 +14,6 @@ GEM_INSTALL_OPTIONS = {} ## this is needed because otherwihse ar-extensions adds activesupport 3.0.0 which confuses things #GEM_INSTALL_OPTIONS = { "ar-extensions" => "--ignore-dependencies" } - desc "Install required gems" task :install_gems do (REPORT_GEMS + VALIDATION_GEMS).uniq.each do |g| @@ -29,7 +31,8 @@ task :install_gems do options = "" options += "--version '"+GEM_VERSIONS[g]+"' " if GEM_VERSIONS.has_key?(g) options += GEM_INSTALL_OPTIONS[g]+" " if GEM_INSTALL_OPTIONS.has_key?(g) - cmd = "sudo env PATH=$PATH gem install "+options+" "+g + #cmd = "sudo env PATH=$PATH gem install "+options+" "+g + cmd = "gem install "+options+" "+g puts "installing gem, this may take some time..." puts cmd IO.popen(cmd){ |f| puts f.gets } diff --git a/application.rb b/application.rb index ad9653a..10c8fe6 100755 --- a/application.rb +++ b/application.rb @@ -1,16 +1,8 @@ require 'rubygems' -gem "opentox-ruby", "~> 0" -[ 'sinatra', 'sinatra/url_for', 'opentox-ruby' ].each do |lib| +gem "opentox-ruby" +[ 'sinatra', 'sinatra/url_for', 'opentox-ruby', 'benchmark' ].each do |lib| require lib end -['dm-core', 'dm-serializer', 'dm-timestamps', 'dm-types', 'dm-migrations', 'dm-validations' ].each{|lib| require lib } -DataMapper.setup(:default, { - :adapter => CONFIG[:database][:adapter], - :database => CONFIG[:database][:database], - :username => CONFIG[:database][:username], - :password => CONFIG[:database][:password], - :host => CONFIG[:database][:host]}) -set :lock, true #unless(defined? LOGGER) #LOGGER = Logger.new(STDOUT) @@ -37,7 +29,7 @@ end get '/prepare_examples/?' do LOGGER.info "prepare examples" content_type "text/plain" - Example.prepare_example_resources + return_task(Example.prepare_example_resources) end post '/test_examples/?' do diff --git a/data/EPAFHM.mini.csv b/data/EPAFHM.mini.csv new file mode 100644 index 0000000..c86cd33 --- /dev/null +++ b/data/EPAFHM.mini.csv @@ -0,0 +1,21 @@ +"STRUCTURE_SMILES","LC50_mmol" +"C1=CC(C=O)=CC(OC)=C1OCCCCCC",1.13E-02 +"C1(OC)=C([N+]([O-])=O)C(C=O)=CC(Br)=C1O",2.66E-01 +"CCCCCCCCOC(=O)C1=CC=CC(C(=O)OCCCCCCCC)=C1", +"C1=CC(Cl)=CC=C1OC2=C([N+](=O)[O-])C=CC=C2",7.69E-03 +"CC1=C(NC=O)C=CC=C1Cl",2.75E-01 +"CCCCOC(=O)C1=CC=CC(C(=O)OCCCC)=C1",3.23E-03 +"C(C1=CC=CC=C1)(C2=CC=CC=C2)(O)C#C",5.33E-02 +"CCCSCCSCCC",4.22E-02 +"CCCCCCCCOC(=O)C1=CC=C(C(=O)OCCCCCCCC)C=C1", +"OCCCCOC(=O)C1=CC=CC=C1C(=O)OCCCCOC(=O)C2=CC=CC=C2C(=O)OCCCCO", +"CCCSCCCCSCCC",1.45E-02 +"C1([N+](=O)[O-])=CC=C(C)C=C1OP(=O)(OC2=C([N+](=O)[O-])C=CC(C)=C2)OC3=C([N+]([O-])=O)C=CC(C)=C3", +"C1=C([N+]([O-])=O)C=CC=C1P(=O)(C2=CC([N+](=O)[O-])=CC=C2)C3=CC([N+](=O)[O-])=CC=C3", +"ClCCOC(=O)NC1CCCCC1",1.70E-01 +"O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC",2.08E+00 +"OC1=C(C=C(C=C1)[N+](=O)[O-])[N+](=O)[O-]",5.92E-02 +"NC(=O)OCC",5.88E+01 +"[O-]C(C1=CC=CC=C1O)=O.[Na+]",1.25E+01 +"C1=CC=CC=C1C(=O)N",5.46E+00 +"CC[N+](CC)(CC)CC1(=CC=CC=C1).[Cl-]",7.07E-01 diff --git a/example.rb b/example.rb index 07151a8..b0b591e 100755 --- a/example.rb +++ b/example.rb @@ -3,11 +3,11 @@ require 'lib/test_util.rb' class Example - @@file=File.new("data/hamster_carcinogenicity.yaml","r") + @@file=File.new("data/hamster_carcinogenicity.csv","r") @@file_type="text/x-yaml" @@model=File.join CONFIG[:services]["opentox-model"],"1" #@@feature= URI.encode("http://local-ot/toxmodel/feature#Hamster%20Carcinogenicity%20(DSSTOX/CPDB)") - @@feature= File.join CONFIG[:services]["opentox-dataset"],"1/feature/hamster_carcinogenicity" + @@feature= File.join CONFIG[:services]["opentox-dataset"],"1/feature/Hamster%2520Carcinogenicity" #@@predicted_feature= URI.encode("http://local-ot/toxmodel/feature#Hamster%20Carcinogenicity%20(DSSTOX/CPDB)_lazar_classification") @@alg = File.join CONFIG[:services]["opentox-algorithm"],"lazar" @@alg_params = "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc") @@ -58,12 +58,15 @@ class Example #TODO subjectid = nil + Ohm.flush + task = OpenTox::Task.create("prepare examples", "n/a") do |task| @@summary = "" + #delete validations - log "delete validations" - Lib::Validation.auto_migrate! - Lib::Crossvalidation.auto_migrate! + #log "delete validations" + #Lib::Validation.auto_migrate! + #Lib::Crossvalidation.auto_migrate! #ActiveRecord::Base.logger = Logger.new("/dev/null") #ActiveRecord::Migrator.migrate('db/migrate', 0 ) #ActiveRecord::Migrator.migrate('db/migrate', 1 ) @@ -80,10 +83,7 @@ class Example halt 400,"File not found: "+@@file.path.to_s unless File.exist?(@@file.path) #data = File.read(@@file.path) #data_uri = OpenTox::RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{:content_type => @@file_type},data).chomp("\n") - data = File.read(@@file.path) - dataset = OpenTox::Dataset.create - dataset.load_yaml(data) - dataset.save + dataset = OpenTox::Dataset.create_from_csv_file(@@file.path,nil) data_uri = dataset.uri log "-> "+data_uri task.progress(20) @@ -92,7 +92,7 @@ class Example #delete_all(CONFIG[:services]["opentox-model"]) OpenTox::RestClientWrapper.delete CONFIG[:services]["opentox-model"] - split_params = Validation::Util.train_test_dataset_split(data_uri, URI.decode(@@feature), 0.9, 1) + split_params = Validation::Util.train_test_dataset_split(data_uri, URI.decode(@@feature), nil, 0.9, 1) v = Validation::Validation.new :training_dataset_uri => split_params[:training_dataset_uri], :validation_type => "test_set_validation", :test_dataset_uri => split_params[:test_dataset_uri], @@ -102,7 +102,7 @@ class Example v.validate_algorithm( @@alg_params, OpenTox::SubTask.new(task, 20, 40) ) log "crossvalidation" - cv = Validation::Crossvalidation.new({ :dataset_uri => data_uri, :algorithm_uri => @@alg, :num_folds => 5, :stratified => false }) + cv = Validation::Crossvalidation.create({ :dataset_uri => data_uri, :algorithm_uri => @@alg, :num_folds => 5, :stratified => false }) cv.perform_cv( URI.decode(@@feature), @@alg_params, OpenTox::SubTask.new(task, 40, 70) ) log "create validation report" @@ -124,7 +124,7 @@ class Example log "done" @@summary end - return_task(task) + task end # performs all curl calls listed in examples after ">>>", next line is added if line ends with "\" diff --git a/lib/merge.rb b/lib/merge.rb index 527415e..ecbe133 100644 --- a/lib/merge.rb +++ b/lib/merge.rb @@ -20,8 +20,7 @@ module Lib def self.merge_array_objects( array ) return nil if array.size == nil return array[0] if array.size==1 - - m = self.merge_objects(array[0], array[1] ) + m = self.merge_objects(array[0], array[1]) (2..array.size-1).each do |i| m = self.merge_objects(m, array[i] ) end @@ -33,8 +32,7 @@ module Lib end def self.merge_objects( object1, object2 ) - - raise "classes not equal" if object1.class != object2.class + raise "classes not equal : "+object1.class.to_s+" != "+object2.class.to_s if object1.class != object2.class object_class = object1.class raise "register which attributes to merge first, nothing found for class "+object_class.to_s unless merge_attributes_registered?(object_class) raise "not supported, successivly add unmerged object to a merge object" if merge_count(object2)>1 @@ -71,6 +69,11 @@ module Lib variance = nil if (avg=avg_attribute?(object_class, attribute)) || sum_attribute?(object_class, attribute) + # we string to numerics if wanted, value1 is no string anymore if weight>1 + if value2.is_a?(String) and ((weight1==1 and value1.is_a?(String)) or (weight1>1 and value1.is_a?(Numeric))) + value1 = value1.to_f + value2 = value2.to_f + end if (value1==nil and value2==nil ) #do nothing elsif value1.is_a?(Numeric) and value2.is_a?(Numeric) @@ -104,7 +107,8 @@ module Lib end end else - raise "invalid, cannot avg/sum non-numeric content for attribute: "+attribute.to_s+" contents: '"+value1.to_s+"', '"+value2.to_s+"'" + raise "invalid, cannot avg/sum non-numeric content for attribute: "+attribute.to_s+" contents: '"+value1.to_s+"' ("+ + value1.class.to_s+"), '"+value2.to_s+"' ("+value2.class.to_s+")" end elsif non_numeric_attribute?(object_class, attribute) if (value1.is_a?(Hash) and value2.is_a?(Hash)) diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index f812854..5033425 100755 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -184,9 +184,9 @@ module Lib res = {} case @feature_type when "classification" - (Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)} + (Validation::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)} when "regression" - (Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) } + (Validation::VAL_REGR_PROPS).each{ |s| res[s] = send(s) } end return res end diff --git a/lib/validation_db.rb b/lib/validation_db.rb index 0d5db21..0beb73d 100755 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -4,7 +4,7 @@ #end require "lib/merge.rb" -module Lib +module Validation VAL_PROPS_GENERAL = [ :validation_uri, :validation_type, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature, :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :date ] @@ -48,42 +48,53 @@ module Lib VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS -# class Validation < ActiveRecord::Base -# serialize :classification_statistics -# serialize :regression_statistics -# -# alias_attribute :date, :created_at - - class Validation - include DataMapper::Resource + class Validation < Ohm::Model - property :id, Serial - property :validation_type, String, :length => 512 - property :model_uri, String, :length => 512 - property :algorithm_uri, String, :length => 512 - property :training_dataset_uri, String, :length => 512 - property :test_target_dataset_uri, String, :length => 512 - property :test_dataset_uri, String, :length => 512 - property :prediction_dataset_uri, String, :length => 512 - property :prediction_feature, String, :length => 512 - property :created_at, DateTime - property :num_instances, Integer - property :num_without_class, Integer - property :num_unpredicted, Integer - property :crossvalidation_id, Integer - property :crossvalidation_fold, Integer - property :real_runtime, Float - property :percent_without_class, Float - property :percent_unpredicted, Float - property :classification_statistics, Object - property :regression_statistics, Object - property :finished, Boolean, :default => false + attribute :validation_type + attribute :model_uri + attribute :algorithm_uri + attribute :training_dataset_uri + attribute :test_target_dataset_uri + attribute :test_dataset_uri + attribute :prediction_dataset_uri + attribute :prediction_feature + attribute :created_at + attribute :num_instances + attribute :num_without_class + attribute :num_unpredicted + attribute :crossvalidation_id + attribute :crossvalidation_fold + attribute :real_runtime + attribute :percent_without_class + attribute :percent_unpredicted + attribute :classification_statistics_yaml + attribute :regression_statistics_yaml + attribute :finished + + index :model_uri + index :validation_type + index :crossvalidation_id attr_accessor :subjectid - after :save, :check_policy - private - def check_policy + def classification_statistics + YAML.load(self.classification_statistics_yaml) if self.classification_statistics_yaml + end + + def classification_statistics=(cs) + self.classification_statistics_yaml = cs.to_yaml + end + + def regression_statistics + YAML.load(self.regression_statistics_yaml) if self.regression_statistics_yaml + end + + def regression_statistics=(rs) + self.regression_statistics_yaml = rs.to_yaml + end + + def save + super OpenTox::Authorization.check_policy(validation_uri, subjectid) end @@ -115,25 +126,27 @@ module Lib end -# class Crossvalidation < ActiveRecord::Base -# alias_attribute :date, :created_at - class Crossvalidation - include DataMapper::Resource + class Crossvalidation < Ohm::Model - property :id, Serial - property :algorithm_uri, String, :length => 512 - property :dataset_uri, String, :length => 512 - property :created_at, DateTime - property :num_folds, Integer, :default => 10 - property :random_seed, Integer, :default => 1 - property :finished, Boolean, :default => false - property :stratified, Boolean, :default => false + attribute :algorithm_uri + attribute :dataset_uri + attribute :created_at + attribute :num_folds + attribute :random_seed + attribute :finished + attribute :stratified attr_accessor :subjectid + + index :algorithm_uri + index :dataset_uri + index :num_folds + index :random_seed + index :stratified + index :finished - after :save, :check_policy - private - def check_policy + def save + super OpenTox::Authorization.check_policy(crossvalidation_uri, subjectid) end @@ -152,7 +165,7 @@ module Lib # further conditions can be specified in __conditions__ def self.find_all_uniq(conditions={}, subjectid=nil ) #cvs = Lib::Crossvalidation.find(:all, :conditions => conditions) - cvs = Lib::Crossvalidation.all(:conditions => conditions) + cvs = Crossvalidation.find( conditions ) uniq = [] cvs.each do |cv| next if AA_SERVER and !OpenTox::Authorization.authorized?(cv.crossvalidation_uri,"GET",subjectid) @@ -171,8 +184,3 @@ module Lib end end - -Lib::Validation.auto_upgrade! -Lib::Validation.raise_on_save_failure = true -Lib::Crossvalidation.auto_upgrade! -Lib::Crossvalidation.raise_on_save_failure = true diff --git a/reach_reports/reach_persistance.rb b/reach_reports/reach_persistance.rb index 3118809..b90778b 100755 --- a/reach_reports/reach_persistance.rb +++ b/reach_reports/reach_persistance.rb @@ -1,5 +1,8 @@ -require "dm-validations" +['dm-core', 'dm-serializer', 'dm-timestamps', 'dm-types', 'dm-migrations', 'dm-validations' ].each{|lib| require lib } +db_dir = File.join(File.join(ENV['HOME'], ".opentox"), "db") +FileUtils.mkdir_p db_dir +DataMapper::setup(:default, "sqlite3://#{db_dir}/reach_reports.sqlite3") DataMapper::Model.raise_on_save_failure = true diff --git a/reach_reports/reach_service.rb b/reach_reports/reach_service.rb index 53acb62..0cf4172 100755 --- a/reach_reports/reach_service.rb +++ b/reach_reports/reach_service.rb @@ -133,11 +133,11 @@ module ReachReports val_datasets = [] if algorithm - cvs = Lib::Crossvalidation.find_all_uniq({:algorithm_uri => algorithm.uri, :finished => true},r.subjectid) + cvs = Validation::Crossvalidation.find_all_uniq({:algorithm_uri => algorithm.uri, :finished => true},r.subjectid) # PENDING: cv classification/regression hack cvs = cvs.delete_if do |cv| #val = Validation::Validation.first( :all, :conditions => { :crossvalidation_id => cv.id } ) - val = Validation::Validation.first( :crossvalidation_id => cv.id ) + val = Validation::Validation.find( :crossvalidation_id => cv.id ).first raise "should not happen: no validations found for crossvalidation "+cv.id.to_s unless val (val.classification_statistics!=nil) != (feature_type=="classification") end @@ -196,7 +196,7 @@ module ReachReports LOGGER.debug "looking for validations with "+{:model_uri => model.uri}.inspect #vals = Lib::Validation.find(:all, :conditions => {:model_uri => model.uri}) - vals = Lib::Validation.all({:model_uri => model.uri}) + vals = Validation::Validation.find({:model_uri => model.uri}) uniq_vals = [] vals.each do |val| match = false diff --git a/reach_reports/reach_test.rb b/reach_reports/reach_test.rb index 87addfa..43aec28 100755 --- a/reach_reports/reach_test.rb +++ b/reach_reports/reach_test.rb @@ -150,7 +150,7 @@ class ReachTest < Test::Unit::TestCase #model_uri = "http://local-ot/model/104" - model_uri = "http://local-ot/majority/class/model/125" + model_uri = "http://local-ot/model/72" # m = OpenTox::Model::Generic.find(model_uri) diff --git a/report/environment.rb b/report/environment.rb index 12e3272..aa8ab56 100755 --- a/report/environment.rb +++ b/report/environment.rb @@ -13,7 +13,7 @@ end require "lib/ot_predictions.rb" #require "lib/active_record_setup.rb" -require "lib/data_mapper_util.rb" +#require "lib/data_mapper_util.rb" require "report/plot_factory.rb" require "report/xml_report.rb" diff --git a/report/report_factory.rb b/report/report_factory.rb index e770d2f..f48d11a 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -76,7 +76,7 @@ module Reports::ReportFactory end task.progress(90) if task - report.add_result(validation_set, Lib::ALL_PROPS, "All Results", "All Results") + report.add_result(validation_set, Validation::ALL_PROPS, "All Results", "All Results") report.add_predictions( validation_set ) task.progress(100) if task report @@ -89,7 +89,7 @@ module Reports::ReportFactory validation_set.get_values(:crossvalidation_id,false).inspect) if validation_set.unique_value(:crossvalidation_id)==nil validation_set.load_cv_attributes raise OpenTox::BadRequestError.new("num validations ("+validation_set.size.to_s+") is not equal to num folds ("+ - validation_set.unique_value(:num_folds).to_s+")") unless validation_set.unique_value(:num_folds)==validation_set.size + validation_set.unique_value(:num_folds).to_s+")") unless validation_set.unique_value(:num_folds).to_i==validation_set.size raise OpenTox::BadRequestError.new("num different folds is not equal to num validations") unless validation_set.num_different_values(:crossvalidation_fold)==validation_set.size raise OpenTox::BadRequestError.new("validations must have unique feature type, i.e. must be either all regression, "+ +"or all classification validations") unless validation_set.unique_feature_type @@ -116,7 +116,7 @@ module Reports::ReportFactory end task.progress(90) if task - report.add_result(validation_set, Lib::ALL_PROPS, "All Results", "All Results") + report.add_result(validation_set, Validation::ALL_PROPS, "All Results", "All Results") report.add_predictions( validation_set ) #, [:crossvalidation_fold] ) task.progress(100) if task report diff --git a/report/report_persistance.rb b/report/report_persistance.rb index df4930c..9097fa3 100755 --- a/report/report_persistance.rb +++ b/report/report_persistance.rb @@ -188,22 +188,21 @@ module Reports # serialize :model_uris # alias_attribute :date, :created_at - class ReportData - include DataMapper::Resource + class ReportData < Ohm::Model - property :id, Serial - property :report_type, String, :length => 255 - property :created_at, DateTime - property :validation_uris, Object - property :crossvalidation_uris, Object - property :model_uris, Object - property :algorithm_uris, Object + attribute :report_type + attribute :created_at + attribute :validation_uris + attribute :crossvalidation_uris + attribute :model_uris + attribute :algorithm_uris + + index :report_type attr_accessor :subjectid - after :save, :check_policy - private - def check_policy + def save + super OpenTox::Authorization.check_policy(report_uri, subjectid) end @@ -245,32 +244,30 @@ module Reports report.subjectid = subjectid report.report_type = type report.save + OpenTox::Authorization.check_policy(report.report_uri, subjectid) new_report_with_id(report_content, type, report.id) end def list_reports(type, filter_params={}) - filter_params["report_type"]=type unless filter_params.has_key?("report_type") - #ReportData.find_like(filter_params).delete_if{|r| r.report_type!=type}.collect{ |r| r.id } - - filter_params = Lib::DataMapperUtil.check_params(ReportData, filter_params) + LOGGER.debug "find reports for params: "+filter_params.inspect # unfortunately, datamapper does not allow searching in Objects # do filtering for list = Object params manually list_params = {} [:validation_uris, :crossvalidation_uris, :algorithm_uris, :model_uris].each do |l| list_params[l] = filter_params.delete(l) if filter_params.has_key?(l) end - - reports = ReportData.all(filter_params).delete_if{|r| r.report_type!=type} + reports = ReportData.find( :report_type => type ) list_params.each do |k,v| - reports = reports.delete_if{ |r| !r.send(k).include?(v) } + reports = reports.collect{|x| x}.delete_if{ |r| !r.send(k).include?(v) } end reports.collect{ |r| r.id } end def get_report(type, id, format, force_formating, params) - report = ReportData.first({:id => id, :report_type => type}) - raise OpenTox::NotFoundError.new("Report with id='"+id.to_s+"' and type='"+type.to_s+"' not found.") unless report + report = ReportData[id] + raise OpenTox::NotFoundError.new("Report with id='"+id.to_s+"' and type='"+type.to_s+"' not found.") if + report==nil or report.report_type!=type # begin # report = ReportData.find(:first, :conditions => {:id => id, :report_type => type}) # rescue ActiveRecord::RecordNotFound @@ -294,9 +291,10 @@ module Reports # raise OpenTox::NotFoundError.new("Report with id='"+id.to_s+"' and type='"+type.to_s+"' not found.") # end # ReportData.delete(id) - report = ReportData.first({:id => id, :report_type => type}) - raise OpenTox::NotFoundError.new("Report with id='"+id.to_s+"' and type='"+type.to_s+"' not found.") unless report - report.destroy + report = ReportData[id] + raise OpenTox::NotFoundError.new("Report with id='"+id.to_s+"' and type='"+type.to_s+"' not found.") if + report==nil || report.report_type!=type + report.delete if (subjectid) begin res = OpenTox::Authorization.delete_policies_from_uri(report.report_uri, subjectid) @@ -310,9 +308,6 @@ module Reports end end -Reports::ReportData.auto_upgrade! -Reports::ReportData.raise_on_save_failure = true - #module Reports # def self.check_filter_params(model, filter_params) # prop_names = model.properties.collect{|p| p.name.to_s} diff --git a/report/validation_access.rb b/report/validation_access.rb index 53ecc46..96dfbf3 100755 --- a/report/validation_access.rb +++ b/report/validation_access.rb @@ -21,11 +21,11 @@ class Reports::ValidationDB # rescue => ex # raise "could not access crossvalidation with id "+validation_id.to_s+", error-msg: "+ex.message # end - cv = Lib::Crossvalidation.get( cv_id ) + cv = Validation::Crossvalidation.get( cv_id ) raise OpenTox::NotFoundError.new "crossvalidation with id "+cv_id.to_s+" not found" unless cv raise OpenTox::BadRequestError.new("crossvalidation with id '"+cv_id.to_s+"' not finished") unless cv.finished - #res += Lib::Validation.find( :all, :conditions => { :crossvalidation_id => cv_id } ).collect{|v| v.validation_uri.to_s} - res += Lib::Validation.all( :crossvalidation_id => cv_id ).collect{|v| v.validation_uri.to_s } + #res += Validation::Validation.find( :all, :conditions => { :crossvalidation_id => cv_id } ).collect{|v| v.validation_uri.to_s} + res += Validation::Validation.find( :crossvalidation_id => cv_id ).collect{|v| v.validation_uri.to_s } else res += [u.to_s] end @@ -42,16 +42,16 @@ class Reports::ValidationDB v = nil raise OpenTox::NotAuthorizedError.new "Not authorized: GET "+uri.to_s if AA_SERVER and !OpenTox::Authorization.authorized?(uri,"GET",subjectid) - v = Lib::Validation.get(validation_id) + v = Validation::Validation.get(validation_id) raise OpenTox::NotFoundError.new "validation with id "+validation_id.to_s+" not found" unless v raise OpenTox::BadRequestError.new "validation with id "+validation_id.to_s+" is not finished yet" unless v.finished - (Lib::VAL_PROPS + Lib::VAL_CV_PROPS).each do |p| + (Validation::VAL_PROPS + Validation::VAL_CV_PROPS).each do |p| validation.send("#{p.to_s}=".to_sym, v.send(p)) end - {:classification_statistics => Lib::VAL_CLASS_PROPS, - :regression_statistics => Lib::VAL_REGR_PROPS}.each do |subset_name,subset_props| + {:classification_statistics => Validation::VAL_CLASS_PROPS, + :regression_statistics => Validation::VAL_REGR_PROPS}.each do |subset_name,subset_props| subset = v.send(subset_name) subset_props.each{ |prop| validation.send("#{prop.to_s}=".to_sym, subset[prop]) } if subset end @@ -60,11 +60,11 @@ class Reports::ValidationDB def init_cv(validation) #cv = Lib::Crossvalidation.find(validation.crossvalidation_id) - cv = Lib::Crossvalidation.get(validation.crossvalidation_id) + cv = Validation::Crossvalidation.get(validation.crossvalidation_id) raise OpenTox::BadRequestError.new "no crossvalidation found with id "+validation.crossvalidation_id.to_s unless cv - Lib::CROSS_VAL_PROPS.each do |p| - validation.send("#{p.to_s}=".to_sym, cv[p]) + Validation::CROSS_VAL_PROPS.each do |p| + validation.send("#{p.to_s}=".to_sym, cv.send(p.to_s)) end end diff --git a/report/validation_data.rb b/report/validation_data.rb index 15d51ec..9212c98 100755 --- a/report/validation_data.rb +++ b/report/validation_data.rb @@ -51,13 +51,13 @@ end module Reports - # = Reports::Validation + # = ReportValidation # # contains all values of a validation object # - class Validation + class ReportValidation - @@validation_access = Reports::ValidationDB.new + @@validation_access = ValidationDB.new # for overwriting validation source (other than using webservices) def self.reset_validation_access(validation_access) @@ -69,7 +69,7 @@ module Reports end # create member variables for all validation properties - @@validation_attributes = Lib::ALL_PROPS + + @@validation_attributes = Validation::ALL_PROPS + VAL_ATTR_VARIANCE.collect{ |a| (a.to_s+"_variance").to_sym } + VAL_ATTR_RANKING.collect{ |a| (a.to_s+"_ranking").to_sym } @@validation_attributes.each{ |a| attr_accessor a } @@ -85,7 +85,7 @@ module Reports # returns/creates predictions, cache to save rest-calls/computation time # # call-seq: - # get_predictions => Reports::Predictions + # get_predictions => Predictions # def get_predictions( task=nil ) if @predictions @@ -127,13 +127,13 @@ module Reports @@validation_access.init_cv(self) end - @@persistance = Reports::ReportService.persistance + @@persistance = ReportService.persistance def validation_report_uri #puts "searching for validation report: "+self.validation_uri.to_s return @validation_report_uri if @validation_report_uri!=nil ids = @@persistance.list_reports("validation",{:validation_uris=>validation_uri }) - @validation_report_uri = Reports::ReportService.instance.get_uri("validation",ids[-1]) if ids and ids.size>0 + @validation_report_uri = ReportService.instance.get_uri("validation",ids[-1]) if ids and ids.size>0 end def cv_report_uri @@ -142,7 +142,7 @@ module Reports raise "no cv uri "+to_yaml unless self.crossvalidation_uri ids = @@persistance.list_reports("crossvalidation",{:crossvalidation=>self.crossvalidation_uri.to_s }) #puts "-> "+ids.inspect - @cv_report_uri = Reports::ReportService.instance.get_uri("crossvalidation",ids[-1]) if ids and ids.size>0 + @cv_report_uri = ReportService.instance.get_uri("crossvalidation",ids[-1]) if ids and ids.size>0 end def clone_validation @@ -160,9 +160,9 @@ module Reports def initialize(validation_uris=nil, subjectid=nil) @unique_values = {} - validation_uris = Reports::Validation.resolve_cv_uris(validation_uris, subjectid) if validation_uris + validation_uris = ReportValidation.resolve_cv_uris(validation_uris, subjectid) if validation_uris @validations = Array.new - validation_uris.each{|u| @validations.push(Reports::Validation.new(u, subjectid))} if validation_uris + validation_uris.each{|u| @validations.push(ReportValidation.new(u, subjectid))} if validation_uris end @@ -249,12 +249,12 @@ module Reports def get_domain_for_attr( attribute ) class_domain = get_class_domain() - if Lib::Validation.classification_property?(attribute) and - !Lib::Validation.depends_on_class_value?(attribute) + if Validation::Validation.classification_property?(attribute) and + !Validation::Validation.depends_on_class_value?(attribute) [ nil ] - elsif Lib::Validation.classification_property?(attribute) and + elsif Validation::Validation.classification_property?(attribute) and class_domain.size==2 and - Lib::Validation.complement_exists?(attribute) + Validation::Validation.complement_exists?(attribute) [ class_domain[0] ] else class_domain @@ -270,10 +270,10 @@ module Reports # returns a new set with all validation that have values as specified in the map # # call-seq: - # filter(map) => Reports::ValidationSet + # filter(map) => ValidationSet # def filter(map) - new_set = Reports::ValidationSet.new + new_set = ValidationSet.new validations.each{ |v| new_set.validations.push(v) if v.has_values?(map) } return new_set end @@ -282,10 +282,10 @@ module Reports # e.g. create set with predictions: collect{ |validation| validation.get_predictions!=null } # # call-seq: - # filter_proc(proc) => Reports::ValidationSet + # filter_proc(proc) => ValidationSet # def collect - new_set = Reports::ValidationSet.new + new_set = ValidationSet.new validations.each{ |v| new_set.validations.push(v) if yield(v) } return new_set end @@ -398,19 +398,19 @@ module Reports # to_array(attributes) => array # def merge(equal_attributes) - new_set = Reports::ValidationSet.new + new_set = ValidationSet.new # unique values stay unique when merging # derive unique values before, because model dependent props cannot be accessed later (when mergin validations from different models) new_set.unique_values = @unique_values #compute grouping - grouping = Reports::Util.group(@validations, equal_attributes) + grouping = Util.group(@validations, equal_attributes) #puts "groups "+grouping.size.to_s - Lib::MergeObjects.register_merge_attributes( Reports::Validation, - Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL) unless - Lib::MergeObjects.merge_attributes_registered?(Reports::Validation) + Lib::MergeObjects.register_merge_attributes( ReportValidation, + Validation::VAL_MERGE_AVG,Validation::VAL_MERGE_SUM,Validation::VAL_MERGE_GENERAL) unless + Lib::MergeObjects.merge_attributes_registered?(ReportValidation) #merge grouping.each do |g| @@ -438,12 +438,12 @@ module Reports def compute_ranking(equal_attributes, ranking_attribute, class_value=nil ) #puts "compute_ranking("+equal_attributes.inspect+", "+ranking_attribute.inspect+", "+class_value.to_s+" )" - new_set = Reports::ValidationSet.new + new_set = ValidationSet.new (0..@validations.size-1).each do |i| new_set.validations.push(@validations[i].clone_validation) end - grouping = Reports::Util.group(new_set.validations, equal_attributes) + grouping = Util.group(new_set.validations, equal_attributes) grouping.each do |group| # put indices and ranking values for current group into hash diff --git a/test/test_examples.rb b/test/test_examples.rb index 3c6f00e..49d7838 100755 --- a/test/test_examples.rb +++ b/test/test_examples.rb @@ -85,7 +85,7 @@ module ValidationExamples class EPAFHMCrossvalidation < CrossValidation def initialize - @dataset_file = File.new("data/EPAFHM.csv","r") + @dataset_file = File.new("data/EPAFHM.mini.csv","r") #@prediction_feature = "http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk" @num_folds = 10 end diff --git a/test/unit_test.rb b/test/unit_test.rb index 11b0759..c5a4a62 100644 --- a/test/unit_test.rb +++ b/test/unit_test.rb @@ -1,6 +1,6 @@ #TEST_USER = "mgtest" #TEST_PW = "mgpasswd" -#ENV['RACK_ENV'] = 'test' +ENV['RACK_ENV'] = 'production' require "rubygems" require "sinatra" @@ -44,108 +44,124 @@ class ValidationTest < Test::Unit::TestCase puts "AA disabled" @@subjectid = nil end - f = File.new("data/hamster_carcinogenicity.mini.csv") - @@data_class_mini = ValidationExamples::Util.upload_dataset(f, @@subjectid) - @@feat_class_mini = ValidationExamples::Util.prediction_feature_for_file(f) + + files = [ + File.new("data/hamster_carcinogenicity.mini.csv"), + File.new("data/EPAFHM.mini.csv") + ] + @@data = {} + files.each do |f| + d = ValidationExamples::Util.upload_dataset(f, @@subjectid) + @@data[d] = ValidationExamples::Util.prediction_feature_for_file(f) + end end def global_teardown puts "delete and logout" - OpenTox::Dataset.find(@@data_class_mini,@@subjectid).delete(@@subjectid) if defined?@@data_class_mini - @@cv.delete(@@subjectid) if defined?@@cv - @@report.delete(@@subjectid) if defined?@@report - @@qmrfReport.delete(@@subjectid) if defined?@@qmrfReport + #OpenTox::Dataset.find(@@data,@@subjectid).delete(@@subjectid) if defined?@@data + @@cvs.each{|cv| cv.delete(@@subjectid)} if defined?@@cvs + @@reports.each{|report| report.delete(@@subjectid)} if defined?@@reports + @@qmrfReports.each{|qmrfReport| qmrfReport.delete(@@subjectid)} if defined?@@qmrfReports OpenTox::Authorization.logout(@@subjectid) if AA_SERVER end def test_crossvalidation - puts "test_crossvalidation" + #assert_rest_call_error OpenTox::NotFoundError do # OpenTox::Crossvalidation.find(File.join(CONFIG[:services]["opentox-validation"],"crossvalidation/noexistingid")) #end - p = { - :dataset_uri => @@data_class_mini, - :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), - :algorithm_params => "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"), - :prediction_feature => @@feat_class_mini, - :num_folds => 2 } - t = OpenTox::SubTask.new(nil,0,1) - def t.progress(pct) - if !defined?@last_msg or @last_msg+3 data, + :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), + :algorithm_params => "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"), + :prediction_feature => feat, + :num_folds => 2 } + t = OpenTox::SubTask.new(nil,0,1) + def t.progress(pct) + if !defined?@last_msg or @last_msg+3 params[:dataset_uri], :algorithm_uri => params[:algorithm_uri] } [ :num_folds, :random_seed, :stratified ].each{ |sym| cv_params[sym] = params[sym] if params[sym] } - cv = Validation::Crossvalidation.new cv_params + cv = Validation::Crossvalidation.create cv_params cv.subjectid = @subjectid cv.perform_cv( params[:prediction_feature], params[:algorithm_params], task ) cv.crossvalidation_uri @@ -62,7 +63,7 @@ post '/crossvalidation/cleanup/?' do deleted << cv.crossvalidation_uri #Validation::Crossvalidation.delete(cv.id) cv.subjectid = @subjectid - cv.delete + cv.delete_crossvalidation #end end LOGGER.info "crossvalidation cleanup, deleted "+deleted.size.to_s+" cvs" @@ -124,13 +125,19 @@ get '/crossvalidation/:id/statistics' do raise OpenTox::BadRequestError.new "Crossvalidation '"+params[:id].to_s+"' not finished" unless crossvalidation.finished Lib::MergeObjects.register_merge_attributes( Validation::Validation, - Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL-[:date,:validation_uri,:crossvalidation_uri]) unless + Validation::VAL_MERGE_AVG,Validation::VAL_MERGE_SUM,Validation::VAL_MERGE_GENERAL-[:date,:validation_uri,:crossvalidation_uri]) unless Lib::MergeObjects.merge_attributes_registered?(Validation::Validation) #v = Lib::MergeObjects.merge_array_objects( Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ) ) - v = Lib::MergeObjects.merge_array_objects( Validation::Validation.all( :crossvalidation_id => params[:id] ) ) + # convert ohm:set into array, as ohm:set[0]=nil(!) + vals = Validation::Validation.find( :crossvalidation_id => params[:id] ).collect{|x| x} +# LOGGER.debug vals.collect{|v| v.validation_uri}.join("\n") +# LOGGER.debug vals.size +# LOGGER.debug vals.class + + v = Lib::MergeObjects.merge_array_objects( vals ) v.created_at = nil - v.id = nil + #v.id = nil case request.env['HTTP_ACCEPT'].to_s when /text\/html/ @@ -159,7 +166,7 @@ delete '/crossvalidation/:id/?' do cv = Validation::Crossvalidation.get(params[:id]) cv.subjectid = @subjectid raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found." unless cv - cv.delete + cv.delete_crossvalidation end #get '/crossvalidation/:id/validations' do @@ -208,8 +215,8 @@ get '/?' do LOGGER.info "list all validations, params: "+params.inspect #uri_list = Validation::Validation.find_like(params).collect{ |v| v.validation_uri }.join("\n")+"\n" #uri_list = Validation::Validation.all(params).collect{ |v| v.validation_uri }.join("\n")+"\n" - uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" - + #uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + uri_list = Validation::Validation.all.collect{|v| v.validation_uri}.join("\n") + "\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = @@ -241,7 +248,7 @@ post '/test_set_validation' do LOGGER.info "creating test-set-validation "+params.inspect if params[:model_uri] and params[:test_dataset_uri] and !params[:training_dataset_uri] and !params[:algorithm_uri] task = OpenTox::Task.create( "Perform test-set-validation", url_for("/", :full) ) do |task| #, params - v = Validation::Validation.new :validation_type => "test_set_validation", + v = Validation::Validation.create :validation_type => "test_set_validation", :model_uri => params[:model_uri], :test_dataset_uri => params[:test_dataset_uri], :test_target_dataset_uri => params[:test_target_dataset_uri], @@ -262,8 +269,9 @@ get '/test_set_validation' do #uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "test_set_validation" } ).collect{ |v| v.validation_uri }.join("\n")+"\n" #uri_list = Validation::Validation.all( :validation_type => "test_set_validation" ).collect{ |v| v.validation_uri }.join("\n")+"\n" - params[:validation_type] = "test_set_validation" - uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + #params[:validation_type] = "test_set_validation" + #uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + uri_list = Validation::Validation.find(:validation_type => "test_set_validation").collect{|v| v.validation_uri}.join("\n") + "\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = @@ -285,7 +293,7 @@ post '/training_test_validation/?' do LOGGER.info "creating training-test-validation "+params.inspect if params[:algorithm_uri] and params[:training_dataset_uri] and params[:test_dataset_uri] and params[:prediction_feature] and !params[:model_uri] task = OpenTox::Task.create( "Perform training-test-validation", url_for("/", :full) ) do |task| #, params - v = Validation::Validation.new :validation_type => "training_test_validation", + v = Validation::Validation.create :validation_type => "training_test_validation", :algorithm_uri => params[:algorithm_uri], :training_dataset_uri => params[:training_dataset_uri], :test_dataset_uri => params[:test_dataset_uri], @@ -306,8 +314,9 @@ get '/training_test_validation' do LOGGER.info "list all training-test-validations, params: "+params.inspect #uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "training_test_validation" } ).collect{ |v| v.validation_uri }.join("\n")+"\n" #uri_list = Validation::Validation.all( :validation_type => "training_test_validation" ).collect{ |v| v.validation_uri }.join("\n")+"\n" - params[:validation_type] = "training_test_validation" - uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + #params[:validation_type] = "training_test_validation" + #uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + uri_list = Validation::Validation.find(:validation_type => "training_test_validation").collect{|v| v.validation_uri}.join("\n") + "\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = @@ -340,7 +349,7 @@ post '/bootstrapping' do params.merge!( Validation::Util.bootstrapping( params[:dataset_uri], params[:prediction_feature], @subjectid, params[:random_seed], OpenTox::SubTask.create(task,0,33)) ) - v = Validation::Validation.new :validation_type => "bootstrapping", + v = Validation::Validation.create :validation_type => "bootstrapping", :test_target_dataset_uri => params[:dataset_uri], :prediction_feature => params[:prediction_feature], :algorithm_uri => params[:algorithm_uri] @@ -355,8 +364,9 @@ get '/bootstrapping' do LOGGER.info "list all bootstrapping-validations, params: "+params.inspect #uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "bootstrapping" } ).collect{ |v| v.validation_uri }.join("\n")+"\n" #uri_list = Validation::Validation.all( :validation_type => "bootstrapping" ).collect{ |v| v.validation_uri }.join("\n")+"\n" - params[:validation_type] = "bootstrapping" - uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + #params[:validation_type] = "bootstrapping" + #uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + uri_list = Validation::Validation.find(:validation_type => "bootstrapping").collect{|v| v.validation_uri}.join("\n") + "\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = @@ -388,7 +398,7 @@ post '/training_test_split' do params.merge!( Validation::Util.train_test_dataset_split(params[:dataset_uri], params[:prediction_feature], @subjectid, params[:split_ratio], params[:random_seed], OpenTox::SubTask.create(task,0,33))) - v = Validation::Validation.new :validation_type => "training_test_split", + v = Validation::Validation.create :validation_type => "training_test_split", :training_dataset_uri => params[:training_dataset_uri], :test_dataset_uri => params[:test_dataset_uri], :test_target_dataset_uri => params[:dataset_uri], @@ -406,8 +416,9 @@ get '/training_test_split' do LOGGER.info "list all training-test-split-validations, params: "+params.inspect #uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "training_test_split" } ).collect{ |v| v.validation_uri }.join("\n")+"\n" #uri_list = Validation::Validation.all( :validation_type => "training_test_split" ).collect{ |v| v.validation_uri }.join("\n")+"\n" - params[:validation_type] = "training_test_split" - uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + #params[:validation_type] = "training_test_split" + #uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + uri_list = Validation::Validation.find(:validation_type => "training_test_split").collect{|v| v.validation_uri}.join("\n") + "\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = @@ -440,7 +451,7 @@ post '/cleanup/?' do deleted << val.validation_uri #Validation::Validation.delete(val.id) val.subjectid = @subjectid - val.delete + val.delete_validation end LOGGER.info "validation cleanup, deleted "+deleted.size.to_s+" validations" deleted.join("\n")+"\n" @@ -463,7 +474,7 @@ post '/validate_datasets' do params[:validation_type] = "validate_datasets" if params[:model_uri] - v = Validation::Validation.new params + v = Validation::Validation.create params v.subjectid = @subjectid v.compute_validation_stats_with_model(nil,false,task) else @@ -475,7 +486,7 @@ post '/validate_datasets' do predicted_feature = params.delete("predicted_feature") feature_type = "classification" if params.delete("classification")!=nil feature_type = "regression" if params.delete("regression")!=nil - v = Validation::Validation.new params + v = Validation::Validation.create params v.subjectid = @subjectid v.compute_validation_stats(feature_type,predicted_feature,nil,nil,false,task) end @@ -532,7 +543,7 @@ get '/:id' do # rescue ActiveRecord::RecordNotFound => ex # raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found." # end - validation = Validation::Validation.get(params[:id]) + validation = Validation::Validation[params[:id]] raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found." unless validation case request.env['HTTP_ACCEPT'].to_s @@ -566,5 +577,5 @@ delete '/:id' do validation.subjectid = @subjectid raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found." unless validation content_type "text/plain" - validation.delete + validation.delete_validation end \ No newline at end of file diff --git a/validation/validation_format.rb b/validation/validation_format.rb index f69ceac..6fdea61 100755 --- a/validation/validation_format.rb +++ b/validation/validation_format.rb @@ -4,15 +4,15 @@ require "lib/format_util.rb" module Validation # adding to_yaml and to_rdf functionality to validation - class Validation < Lib::Validation + class Validation # builds hash for valiation, as the internal presentation differs from the owl-object # the hash is directly printed in to_yaml, or added to the owl-structure def get_content_as_hash() h = {} - (Lib::VAL_PROPS - [:validation_uri]).each do |p| - h[p] = self.send(p) + (VAL_PROPS - [:validation_uri]).each do |p| + h[p] = self.send(p.to_s) end if crossvalidation_id!=nil cv = {:type => OT.CrossvalidationInfo} @@ -22,13 +22,14 @@ module Validation h[:crossvalidation_info] = cv end if classification_statistics - raise "classification_statistics is no has: "+classification_statistics.class.to_s unless classification_statistics.is_a?(Hash) + raise "classification_statistics is no hash: "+classification_statistics.class.to_s+" -> '"+ + classification_statistics.to_s+"'" unless classification_statistics.is_a?(Hash) clazz = { :type => OT.ClassificationStatistics } - Lib::VAL_CLASS_PROPS_SINGLE.each{ |p| clazz[p] = classification_statistics[p] } + VAL_CLASS_PROPS_SINGLE.each{ |p| clazz[p] = classification_statistics[p] } # transpose results per class class_values = {} - Lib::VAL_CLASS_PROPS_PER_CLASS.each do |p| + VAL_CLASS_PROPS_PER_CLASS.each do |p| raise "missing classification statitstics: "+p.to_s+" "+classification_statistics.inspect if classification_statistics[p]==nil classification_statistics[p].each do |class_value, property_value| class_values[class_value] = {:class_value => class_value, :type => OT.ClassValueStatistics} unless class_values.has_key?(class_value) @@ -54,7 +55,7 @@ module Validation h[:classification_statistics] = clazz elsif regression_statistics regr = {:type => OT.RegressionStatistics } - Lib::VAL_REGR_PROPS.each{ |p| regr[p] = regression_statistics[p]} + VAL_REGR_PROPS.each{ |p| regr[p] = regression_statistics[p]} h[:regression_statistics] = regr end return h @@ -72,17 +73,17 @@ module Validation end - class Crossvalidation < Lib::Crossvalidation + class Crossvalidation def get_content_as_hash h = {} - (Lib::CROSS_VAL_PROPS_REDUNDANT - [:crossvalidation_uri]).each do |p| + (CROSS_VAL_PROPS_REDUNDANT - [:crossvalidation_uri]).each do |p| h[p] = self.send(p) end v = [] #Validation.find( :all, :conditions => { :crossvalidation_id => self.id } ).each do |val| - Validation.all( :crossvalidation_id => self.id ).each do |val| + Validation.find( :crossvalidation_id => self.id ).each do |val| v.push( val.validation_uri.to_s ) end h[:validation_uris] = v diff --git a/validation/validation_service.rb b/validation/validation_service.rb index a1efba5..93c167f 100755 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -30,7 +30,7 @@ end module Validation - class Validation < Lib::Validation + class Validation # constructs a validation object, Rsets id und uri #def initialize( params={} ) @@ -43,7 +43,7 @@ module Validation # deletes a validation # PENDING: model and referenced datasets are deleted as well, keep it that way? - def delete( delete_all=true ) + def delete_validation( delete_all=true ) if (delete_all) to_delete = [:model_uri, :training_dataset_uri, :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri ] case self.validation_type @@ -72,7 +72,7 @@ module Validation end end end - self.destroy + self.delete if (subjectid) begin res = OpenTox::Authorization.delete_policies_from_uri(validation_uri, subjectid) @@ -222,17 +222,13 @@ module Validation # :percent_unpredicted => prediction.percent_unpredicted, # :finished => true} # self.save! - self.attributes= {:num_instances => prediction.num_instances, + self.update :num_instances => prediction.num_instances, :num_without_class => prediction.num_without_class, :percent_without_class => prediction.percent_without_class, :num_unpredicted => prediction.num_unpredicted, :percent_unpredicted => prediction.percent_unpredicted, - :finished => true} - begin - self.save - rescue DataMapper::SaveFailureError => e - raise "could not save validation: "+e.resource.errors.inspect - end + :finished => true + raise unless self.valid? end task.progress(100) if task @@ -240,7 +236,7 @@ module Validation end end - class Crossvalidation < Lib::Crossvalidation + class Crossvalidation # constructs a crossvalidation, id and uri are set #def initialize( params={} ) @@ -262,12 +258,12 @@ module Validation end # deletes a crossvalidation, all validations are deleted as well - def delete - Validation.all(:crossvalidation_id => self.id).each do |v| + def delete_crossvalidation + Validation.find(:crossvalidation_id => self.id).each do |v| v.subjectid = self.subjectid - v.delete + v.delete_validation end - self.destroy + self.delete if (subjectid) begin res = OpenTox::Authorization.delete_policies_from_uri(crossvalidation_uri, subjectid) @@ -281,6 +277,9 @@ module Validation # creates the cv folds def create_cv_datasets( prediction_feature, task=nil ) + self.random_seed = 1 unless self.random_seed + self.num_folds = 10 unless self.num_folds + self.stratified = false unless self.stratified if copy_cv_datasets( prediction_feature ) # dataset folds of a previous crossvalidaiton could be used task.progress(100) if task @@ -296,7 +295,7 @@ module Validation i = 0 task_step = 100 / self.num_folds.to_f; @tmp_validations.each do | val | - validation = Validation.new val + validation = Validation.create val validation.subjectid = self.subjectid validation.validate_algorithm( algorithm_params, OpenTox::SubTask.create(task, i * task_step, ( i + 1 ) * task_step) ) @@ -316,8 +315,7 @@ module Validation # copies datasets from an older crossvalidation on the same dataset and the same folds # returns true if successfull, false otherwise def copy_cv_datasets( prediction_feature ) - - cvs = Crossvalidation.all( { + cvs = Crossvalidation.find( { :dataset_uri => self.dataset_uri, :num_folds => self.num_folds, :stratified => self.stratified, @@ -332,7 +330,7 @@ module Validation OpenTox::Dataset.exist?(v.training_dataset_uri,self.subjectid) and OpenTox::Dataset.exist?(v.test_dataset_uri,self.subjectid) #make sure self.id is set - self.save if self.new? + #self.save if self.new? tmp_val << { :validation_type => "crossvalidation", :training_dataset_uri => v.training_dataset_uri, :test_dataset_uri => v.test_dataset_uri, @@ -354,16 +352,14 @@ module Validation # creates cv folds (training and testdatasets) # stores uris in validation objects def create_new_cv_datasets( prediction_feature, task = nil ) - - raise "random seed not set "+self.inspect unless self.random_seed LOGGER.debug "creating datasets for crossvalidation" orig_dataset = OpenTox::Dataset.find(self.dataset_uri,self.subjectid) raise OpenTox::NotFoundError.new "Dataset not found: "+self.dataset_uri.to_s unless orig_dataset shuffled_compounds = orig_dataset.compounds.shuffle( self.random_seed ) - unless self.stratified - split_compounds = shuffled_compounds.chunk( self.num_folds ) + unless self.stratified + split_compounds = shuffled_compounds.chunk( self.num_folds.to_i ) else class_compounds = {} # "inactive" => compounds[], "active" => compounds[] .. shuffled_compounds.each do |c| @@ -406,7 +402,7 @@ module Validation @tmp_validations = [] - (1..self.num_folds).each do |n| + (1..self.num_folds.to_i).each do |n| datasetname = 'cv'+self.id.to_s + #'_d'+orig_dataset.name.to_s + @@ -418,7 +414,7 @@ module Validation test_compounds = [] train_compounds = [] - (1..self.num_folds).each do |nn| + (1..self.num_folds.to_i).each do |nn| compounds = split_compounds.at(nn-1) if n == nn @@ -428,7 +424,7 @@ module Validation end end - raise "internal error, num test compounds not correct" unless (shuffled_compounds.size/self.num_folds - test_compounds.size).abs <= 1 + raise "internal error, num test compounds not correct" unless (shuffled_compounds.size/self.num_folds.to_i - test_compounds.size).abs <= 1 raise "internal error, num train compounds not correct" unless shuffled_compounds.size - test_compounds.size == train_compounds.size LOGGER.debug "training set: "+datasetname+"_train, compounds: "+train_compounds.size.to_s @@ -442,7 +438,7 @@ module Validation { DC.title => datasetname + '_test', DC.creator => source }, self.subjectid ).uri #make sure self.id is set - self.save if self.new? + #self.save if self.new? tmp_validation = { :validation_type => "crossvalidation", :training_dataset_uri => train_dataset_uri, :test_dataset_uri => test_dataset_uri, @@ -551,9 +547,10 @@ module Validation # splits a dataset into test and training dataset # returns map with training_dataset_uri and test_dataset_uri def self.train_test_dataset_split( orig_dataset_uri, prediction_feature, subjectid, split_ratio=nil, random_seed=nil, task=nil ) - split_ratio=0.67 unless split_ratio + split_ratio = split_ratio.to_f random_seed=1 unless random_seed + random_seed = random_seed.to_i orig_dataset = OpenTox::Dataset.find orig_dataset_uri, subjectid orig_dataset.load_all subjectid diff --git a/validation/validation_test.rb b/validation/validation_test.rb index ffb25c4..efa8ad5 100755 --- a/validation/validation_test.rb +++ b/validation/validation_test.rb @@ -184,13 +184,13 @@ class ValidationTest < Test::Unit::TestCase #ambit_service = "https://ambit.uni-plovdiv.bg:8443/ambit2" #https%3A%2F%2Fambit.uni-plovdiv.bg%3A8443%2Fambit2 - + # post "/validate_datasets",{ -# :test_dataset_uri=>ambit_service+"/dataset/R401577?max=50", -# :prediction_dataset_uri=>ambit_service+"/dataset/R401577?max=50&feature_uris[]="+CGI.escape(ambit_service)+"%2Fmodel%2F35194%2Fpredicted", -# #:test_target_dataset_uri=>ambit_service+"/dataset/R401560", -# :prediction_feature=>ambit_service+"/feature/26221", -# :predicted_feature=>ambit_service+"/feature/218699", +# :test_dataset_uri=>"http://local-ot/dataset/94", +# :prediction_dataset_uri=>'http://local-ot/dataset/96', +# :test_target_dataset_uri=>'http://local-ot/dataset/92', +# :prediction_feature=>'http://local-ot/dataset/92/feature/Hamster%20Carcinogenicity', +# :predicted_feature=>"", # :classification=>"true", # :subjectid=>SUBJECTID} # #:model_uri=>"http://local-ot/model/31"} @@ -199,8 +199,8 @@ class ValidationTest < Test::Unit::TestCase # uri = last_response.body # val = wait_for_task(uri) # puts val -# #get "/"+val.split("/")[-1] - +# get "/"+val.split("/")[-1] +# puts last_response.body # d = OpenTox::Dataset.find("https://ambit.uni-plovdiv.bg:8443/ambit2/dataset/R545",SUBJECTID) # puts d.compounds.inspect @@ -257,7 +257,9 @@ class ValidationTest < Test::Unit::TestCase #delete "/1",:subjectid=>SUBJECTID - run_test("19i") + prepare_examples() + + #run_test("1a") #,{:validation_uri => "http://local-ot/validation/crossvalidation/1"}) #run_test("3a","http://local-ot/validation/crossvalidation/4") #run_test("3b","http://local-ot/validation/crossvalidation/3") -- cgit v1.2.3 From 78751b778a5dabfac142a017fc1e0d8d1c045acd Mon Sep 17 00:00:00 2001 From: mguetlein Date: Fri, 8 Apr 2011 15:39:55 +0200 Subject: switch from datamapper/mysql to redis (sqlite for qmrf) --- Rakefile | 11 ++- application.rb | 14 +-- data/EPAFHM.mini.csv | 21 +++++ example.rb | 24 ++--- lib/merge.rb | 14 +-- lib/ot_predictions.rb | 4 +- lib/validation_db.rb | 116 +++++++++++++----------- reach_reports/reach_persistance.rb | 5 +- reach_reports/reach_service.rb | 6 +- reach_reports/reach_test.rb | 2 +- report/environment.rb | 2 +- report/report_factory.rb | 6 +- report/report_persistance.rb | 49 +++++----- report/validation_access.rb | 20 ++--- report/validation_data.rb | 50 +++++------ test/test_examples.rb | 2 +- test/unit_test.rb | 170 +++++++++++++++++++---------------- validation/validation_application.rb | 63 +++++++------ validation/validation_format.rb | 21 ++--- validation/validation_service.rb | 53 ++++++----- validation/validation_test.rb | 20 +++-- 21 files changed, 363 insertions(+), 310 deletions(-) create mode 100644 data/EPAFHM.mini.csv diff --git a/Rakefile b/Rakefile index e8e2a92..de1e69d 100755 --- a/Rakefile +++ b/Rakefile @@ -1,8 +1,11 @@ require 'rubygems' require 'rake' -REPORT_GEMS = [ 'opentox-ruby', 'mime-types', 'ruby-plot', 'rinruby'] #'ar-extensions', 'activerecord', 'activesupport', -VALIDATION_GEMS = [ 'opentox-ruby', 'ruby-plot'] #'ar-extensions', 'activerecord', 'activesupport', + + +REPORT_GEMS = [ 'opentox-ruby', 'ruby-plot', 'rinruby', 'dm-core', 'dm-serializer', 'dm-timestamps', + 'dm-types', 'dm-migrations', 'dm-validations', 'dm-sqlite-adapter', 'benchmark'] +VALIDATION_GEMS = [ 'opentox-ruby', 'ruby-plot'] GEM_VERSIONS = { "ruby-plot" => "= 0.0.2" } GEM_INSTALL_OPTIONS = {} @@ -11,7 +14,6 @@ GEM_INSTALL_OPTIONS = {} ## this is needed because otherwihse ar-extensions adds activesupport 3.0.0 which confuses things #GEM_INSTALL_OPTIONS = { "ar-extensions" => "--ignore-dependencies" } - desc "Install required gems" task :install_gems do (REPORT_GEMS + VALIDATION_GEMS).uniq.each do |g| @@ -29,7 +31,8 @@ task :install_gems do options = "" options += "--version '"+GEM_VERSIONS[g]+"' " if GEM_VERSIONS.has_key?(g) options += GEM_INSTALL_OPTIONS[g]+" " if GEM_INSTALL_OPTIONS.has_key?(g) - cmd = "sudo env PATH=$PATH gem install "+options+" "+g + #cmd = "sudo env PATH=$PATH gem install "+options+" "+g + cmd = "gem install "+options+" "+g puts "installing gem, this may take some time..." puts cmd IO.popen(cmd){ |f| puts f.gets } diff --git a/application.rb b/application.rb index ad9653a..10c8fe6 100755 --- a/application.rb +++ b/application.rb @@ -1,16 +1,8 @@ require 'rubygems' -gem "opentox-ruby", "~> 0" -[ 'sinatra', 'sinatra/url_for', 'opentox-ruby' ].each do |lib| +gem "opentox-ruby" +[ 'sinatra', 'sinatra/url_for', 'opentox-ruby', 'benchmark' ].each do |lib| require lib end -['dm-core', 'dm-serializer', 'dm-timestamps', 'dm-types', 'dm-migrations', 'dm-validations' ].each{|lib| require lib } -DataMapper.setup(:default, { - :adapter => CONFIG[:database][:adapter], - :database => CONFIG[:database][:database], - :username => CONFIG[:database][:username], - :password => CONFIG[:database][:password], - :host => CONFIG[:database][:host]}) -set :lock, true #unless(defined? LOGGER) #LOGGER = Logger.new(STDOUT) @@ -37,7 +29,7 @@ end get '/prepare_examples/?' do LOGGER.info "prepare examples" content_type "text/plain" - Example.prepare_example_resources + return_task(Example.prepare_example_resources) end post '/test_examples/?' do diff --git a/data/EPAFHM.mini.csv b/data/EPAFHM.mini.csv new file mode 100644 index 0000000..c86cd33 --- /dev/null +++ b/data/EPAFHM.mini.csv @@ -0,0 +1,21 @@ +"STRUCTURE_SMILES","LC50_mmol" +"C1=CC(C=O)=CC(OC)=C1OCCCCCC",1.13E-02 +"C1(OC)=C([N+]([O-])=O)C(C=O)=CC(Br)=C1O",2.66E-01 +"CCCCCCCCOC(=O)C1=CC=CC(C(=O)OCCCCCCCC)=C1", +"C1=CC(Cl)=CC=C1OC2=C([N+](=O)[O-])C=CC=C2",7.69E-03 +"CC1=C(NC=O)C=CC=C1Cl",2.75E-01 +"CCCCOC(=O)C1=CC=CC(C(=O)OCCCC)=C1",3.23E-03 +"C(C1=CC=CC=C1)(C2=CC=CC=C2)(O)C#C",5.33E-02 +"CCCSCCSCCC",4.22E-02 +"CCCCCCCCOC(=O)C1=CC=C(C(=O)OCCCCCCCC)C=C1", +"OCCCCOC(=O)C1=CC=CC=C1C(=O)OCCCCOC(=O)C2=CC=CC=C2C(=O)OCCCCO", +"CCCSCCCCSCCC",1.45E-02 +"C1([N+](=O)[O-])=CC=C(C)C=C1OP(=O)(OC2=C([N+](=O)[O-])C=CC(C)=C2)OC3=C([N+]([O-])=O)C=CC(C)=C3", +"C1=C([N+]([O-])=O)C=CC=C1P(=O)(C2=CC([N+](=O)[O-])=CC=C2)C3=CC([N+](=O)[O-])=CC=C3", +"ClCCOC(=O)NC1CCCCC1",1.70E-01 +"O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC",2.08E+00 +"OC1=C(C=C(C=C1)[N+](=O)[O-])[N+](=O)[O-]",5.92E-02 +"NC(=O)OCC",5.88E+01 +"[O-]C(C1=CC=CC=C1O)=O.[Na+]",1.25E+01 +"C1=CC=CC=C1C(=O)N",5.46E+00 +"CC[N+](CC)(CC)CC1(=CC=CC=C1).[Cl-]",7.07E-01 diff --git a/example.rb b/example.rb index 07151a8..b0b591e 100755 --- a/example.rb +++ b/example.rb @@ -3,11 +3,11 @@ require 'lib/test_util.rb' class Example - @@file=File.new("data/hamster_carcinogenicity.yaml","r") + @@file=File.new("data/hamster_carcinogenicity.csv","r") @@file_type="text/x-yaml" @@model=File.join CONFIG[:services]["opentox-model"],"1" #@@feature= URI.encode("http://local-ot/toxmodel/feature#Hamster%20Carcinogenicity%20(DSSTOX/CPDB)") - @@feature= File.join CONFIG[:services]["opentox-dataset"],"1/feature/hamster_carcinogenicity" + @@feature= File.join CONFIG[:services]["opentox-dataset"],"1/feature/Hamster%2520Carcinogenicity" #@@predicted_feature= URI.encode("http://local-ot/toxmodel/feature#Hamster%20Carcinogenicity%20(DSSTOX/CPDB)_lazar_classification") @@alg = File.join CONFIG[:services]["opentox-algorithm"],"lazar" @@alg_params = "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc") @@ -58,12 +58,15 @@ class Example #TODO subjectid = nil + Ohm.flush + task = OpenTox::Task.create("prepare examples", "n/a") do |task| @@summary = "" + #delete validations - log "delete validations" - Lib::Validation.auto_migrate! - Lib::Crossvalidation.auto_migrate! + #log "delete validations" + #Lib::Validation.auto_migrate! + #Lib::Crossvalidation.auto_migrate! #ActiveRecord::Base.logger = Logger.new("/dev/null") #ActiveRecord::Migrator.migrate('db/migrate', 0 ) #ActiveRecord::Migrator.migrate('db/migrate', 1 ) @@ -80,10 +83,7 @@ class Example halt 400,"File not found: "+@@file.path.to_s unless File.exist?(@@file.path) #data = File.read(@@file.path) #data_uri = OpenTox::RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{:content_type => @@file_type},data).chomp("\n") - data = File.read(@@file.path) - dataset = OpenTox::Dataset.create - dataset.load_yaml(data) - dataset.save + dataset = OpenTox::Dataset.create_from_csv_file(@@file.path,nil) data_uri = dataset.uri log "-> "+data_uri task.progress(20) @@ -92,7 +92,7 @@ class Example #delete_all(CONFIG[:services]["opentox-model"]) OpenTox::RestClientWrapper.delete CONFIG[:services]["opentox-model"] - split_params = Validation::Util.train_test_dataset_split(data_uri, URI.decode(@@feature), 0.9, 1) + split_params = Validation::Util.train_test_dataset_split(data_uri, URI.decode(@@feature), nil, 0.9, 1) v = Validation::Validation.new :training_dataset_uri => split_params[:training_dataset_uri], :validation_type => "test_set_validation", :test_dataset_uri => split_params[:test_dataset_uri], @@ -102,7 +102,7 @@ class Example v.validate_algorithm( @@alg_params, OpenTox::SubTask.new(task, 20, 40) ) log "crossvalidation" - cv = Validation::Crossvalidation.new({ :dataset_uri => data_uri, :algorithm_uri => @@alg, :num_folds => 5, :stratified => false }) + cv = Validation::Crossvalidation.create({ :dataset_uri => data_uri, :algorithm_uri => @@alg, :num_folds => 5, :stratified => false }) cv.perform_cv( URI.decode(@@feature), @@alg_params, OpenTox::SubTask.new(task, 40, 70) ) log "create validation report" @@ -124,7 +124,7 @@ class Example log "done" @@summary end - return_task(task) + task end # performs all curl calls listed in examples after ">>>", next line is added if line ends with "\" diff --git a/lib/merge.rb b/lib/merge.rb index 527415e..ecbe133 100644 --- a/lib/merge.rb +++ b/lib/merge.rb @@ -20,8 +20,7 @@ module Lib def self.merge_array_objects( array ) return nil if array.size == nil return array[0] if array.size==1 - - m = self.merge_objects(array[0], array[1] ) + m = self.merge_objects(array[0], array[1]) (2..array.size-1).each do |i| m = self.merge_objects(m, array[i] ) end @@ -33,8 +32,7 @@ module Lib end def self.merge_objects( object1, object2 ) - - raise "classes not equal" if object1.class != object2.class + raise "classes not equal : "+object1.class.to_s+" != "+object2.class.to_s if object1.class != object2.class object_class = object1.class raise "register which attributes to merge first, nothing found for class "+object_class.to_s unless merge_attributes_registered?(object_class) raise "not supported, successivly add unmerged object to a merge object" if merge_count(object2)>1 @@ -71,6 +69,11 @@ module Lib variance = nil if (avg=avg_attribute?(object_class, attribute)) || sum_attribute?(object_class, attribute) + # we string to numerics if wanted, value1 is no string anymore if weight>1 + if value2.is_a?(String) and ((weight1==1 and value1.is_a?(String)) or (weight1>1 and value1.is_a?(Numeric))) + value1 = value1.to_f + value2 = value2.to_f + end if (value1==nil and value2==nil ) #do nothing elsif value1.is_a?(Numeric) and value2.is_a?(Numeric) @@ -104,7 +107,8 @@ module Lib end end else - raise "invalid, cannot avg/sum non-numeric content for attribute: "+attribute.to_s+" contents: '"+value1.to_s+"', '"+value2.to_s+"'" + raise "invalid, cannot avg/sum non-numeric content for attribute: "+attribute.to_s+" contents: '"+value1.to_s+"' ("+ + value1.class.to_s+"), '"+value2.to_s+"' ("+value2.class.to_s+")" end elsif non_numeric_attribute?(object_class, attribute) if (value1.is_a?(Hash) and value2.is_a?(Hash)) diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index f812854..5033425 100755 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -184,9 +184,9 @@ module Lib res = {} case @feature_type when "classification" - (Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)} + (Validation::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)} when "regression" - (Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) } + (Validation::VAL_REGR_PROPS).each{ |s| res[s] = send(s) } end return res end diff --git a/lib/validation_db.rb b/lib/validation_db.rb index 0d5db21..0beb73d 100755 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -4,7 +4,7 @@ #end require "lib/merge.rb" -module Lib +module Validation VAL_PROPS_GENERAL = [ :validation_uri, :validation_type, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature, :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :date ] @@ -48,42 +48,53 @@ module Lib VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS -# class Validation < ActiveRecord::Base -# serialize :classification_statistics -# serialize :regression_statistics -# -# alias_attribute :date, :created_at - - class Validation - include DataMapper::Resource + class Validation < Ohm::Model - property :id, Serial - property :validation_type, String, :length => 512 - property :model_uri, String, :length => 512 - property :algorithm_uri, String, :length => 512 - property :training_dataset_uri, String, :length => 512 - property :test_target_dataset_uri, String, :length => 512 - property :test_dataset_uri, String, :length => 512 - property :prediction_dataset_uri, String, :length => 512 - property :prediction_feature, String, :length => 512 - property :created_at, DateTime - property :num_instances, Integer - property :num_without_class, Integer - property :num_unpredicted, Integer - property :crossvalidation_id, Integer - property :crossvalidation_fold, Integer - property :real_runtime, Float - property :percent_without_class, Float - property :percent_unpredicted, Float - property :classification_statistics, Object - property :regression_statistics, Object - property :finished, Boolean, :default => false + attribute :validation_type + attribute :model_uri + attribute :algorithm_uri + attribute :training_dataset_uri + attribute :test_target_dataset_uri + attribute :test_dataset_uri + attribute :prediction_dataset_uri + attribute :prediction_feature + attribute :created_at + attribute :num_instances + attribute :num_without_class + attribute :num_unpredicted + attribute :crossvalidation_id + attribute :crossvalidation_fold + attribute :real_runtime + attribute :percent_without_class + attribute :percent_unpredicted + attribute :classification_statistics_yaml + attribute :regression_statistics_yaml + attribute :finished + + index :model_uri + index :validation_type + index :crossvalidation_id attr_accessor :subjectid - after :save, :check_policy - private - def check_policy + def classification_statistics + YAML.load(self.classification_statistics_yaml) if self.classification_statistics_yaml + end + + def classification_statistics=(cs) + self.classification_statistics_yaml = cs.to_yaml + end + + def regression_statistics + YAML.load(self.regression_statistics_yaml) if self.regression_statistics_yaml + end + + def regression_statistics=(rs) + self.regression_statistics_yaml = rs.to_yaml + end + + def save + super OpenTox::Authorization.check_policy(validation_uri, subjectid) end @@ -115,25 +126,27 @@ module Lib end -# class Crossvalidation < ActiveRecord::Base -# alias_attribute :date, :created_at - class Crossvalidation - include DataMapper::Resource + class Crossvalidation < Ohm::Model - property :id, Serial - property :algorithm_uri, String, :length => 512 - property :dataset_uri, String, :length => 512 - property :created_at, DateTime - property :num_folds, Integer, :default => 10 - property :random_seed, Integer, :default => 1 - property :finished, Boolean, :default => false - property :stratified, Boolean, :default => false + attribute :algorithm_uri + attribute :dataset_uri + attribute :created_at + attribute :num_folds + attribute :random_seed + attribute :finished + attribute :stratified attr_accessor :subjectid + + index :algorithm_uri + index :dataset_uri + index :num_folds + index :random_seed + index :stratified + index :finished - after :save, :check_policy - private - def check_policy + def save + super OpenTox::Authorization.check_policy(crossvalidation_uri, subjectid) end @@ -152,7 +165,7 @@ module Lib # further conditions can be specified in __conditions__ def self.find_all_uniq(conditions={}, subjectid=nil ) #cvs = Lib::Crossvalidation.find(:all, :conditions => conditions) - cvs = Lib::Crossvalidation.all(:conditions => conditions) + cvs = Crossvalidation.find( conditions ) uniq = [] cvs.each do |cv| next if AA_SERVER and !OpenTox::Authorization.authorized?(cv.crossvalidation_uri,"GET",subjectid) @@ -171,8 +184,3 @@ module Lib end end - -Lib::Validation.auto_upgrade! -Lib::Validation.raise_on_save_failure = true -Lib::Crossvalidation.auto_upgrade! -Lib::Crossvalidation.raise_on_save_failure = true diff --git a/reach_reports/reach_persistance.rb b/reach_reports/reach_persistance.rb index 3118809..b90778b 100755 --- a/reach_reports/reach_persistance.rb +++ b/reach_reports/reach_persistance.rb @@ -1,5 +1,8 @@ -require "dm-validations" +['dm-core', 'dm-serializer', 'dm-timestamps', 'dm-types', 'dm-migrations', 'dm-validations' ].each{|lib| require lib } +db_dir = File.join(File.join(ENV['HOME'], ".opentox"), "db") +FileUtils.mkdir_p db_dir +DataMapper::setup(:default, "sqlite3://#{db_dir}/reach_reports.sqlite3") DataMapper::Model.raise_on_save_failure = true diff --git a/reach_reports/reach_service.rb b/reach_reports/reach_service.rb index 53acb62..0cf4172 100755 --- a/reach_reports/reach_service.rb +++ b/reach_reports/reach_service.rb @@ -133,11 +133,11 @@ module ReachReports val_datasets = [] if algorithm - cvs = Lib::Crossvalidation.find_all_uniq({:algorithm_uri => algorithm.uri, :finished => true},r.subjectid) + cvs = Validation::Crossvalidation.find_all_uniq({:algorithm_uri => algorithm.uri, :finished => true},r.subjectid) # PENDING: cv classification/regression hack cvs = cvs.delete_if do |cv| #val = Validation::Validation.first( :all, :conditions => { :crossvalidation_id => cv.id } ) - val = Validation::Validation.first( :crossvalidation_id => cv.id ) + val = Validation::Validation.find( :crossvalidation_id => cv.id ).first raise "should not happen: no validations found for crossvalidation "+cv.id.to_s unless val (val.classification_statistics!=nil) != (feature_type=="classification") end @@ -196,7 +196,7 @@ module ReachReports LOGGER.debug "looking for validations with "+{:model_uri => model.uri}.inspect #vals = Lib::Validation.find(:all, :conditions => {:model_uri => model.uri}) - vals = Lib::Validation.all({:model_uri => model.uri}) + vals = Validation::Validation.find({:model_uri => model.uri}) uniq_vals = [] vals.each do |val| match = false diff --git a/reach_reports/reach_test.rb b/reach_reports/reach_test.rb index 87addfa..43aec28 100755 --- a/reach_reports/reach_test.rb +++ b/reach_reports/reach_test.rb @@ -150,7 +150,7 @@ class ReachTest < Test::Unit::TestCase #model_uri = "http://local-ot/model/104" - model_uri = "http://local-ot/majority/class/model/125" + model_uri = "http://local-ot/model/72" # m = OpenTox::Model::Generic.find(model_uri) diff --git a/report/environment.rb b/report/environment.rb index 12e3272..aa8ab56 100755 --- a/report/environment.rb +++ b/report/environment.rb @@ -13,7 +13,7 @@ end require "lib/ot_predictions.rb" #require "lib/active_record_setup.rb" -require "lib/data_mapper_util.rb" +#require "lib/data_mapper_util.rb" require "report/plot_factory.rb" require "report/xml_report.rb" diff --git a/report/report_factory.rb b/report/report_factory.rb index e770d2f..f48d11a 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -76,7 +76,7 @@ module Reports::ReportFactory end task.progress(90) if task - report.add_result(validation_set, Lib::ALL_PROPS, "All Results", "All Results") + report.add_result(validation_set, Validation::ALL_PROPS, "All Results", "All Results") report.add_predictions( validation_set ) task.progress(100) if task report @@ -89,7 +89,7 @@ module Reports::ReportFactory validation_set.get_values(:crossvalidation_id,false).inspect) if validation_set.unique_value(:crossvalidation_id)==nil validation_set.load_cv_attributes raise OpenTox::BadRequestError.new("num validations ("+validation_set.size.to_s+") is not equal to num folds ("+ - validation_set.unique_value(:num_folds).to_s+")") unless validation_set.unique_value(:num_folds)==validation_set.size + validation_set.unique_value(:num_folds).to_s+")") unless validation_set.unique_value(:num_folds).to_i==validation_set.size raise OpenTox::BadRequestError.new("num different folds is not equal to num validations") unless validation_set.num_different_values(:crossvalidation_fold)==validation_set.size raise OpenTox::BadRequestError.new("validations must have unique feature type, i.e. must be either all regression, "+ +"or all classification validations") unless validation_set.unique_feature_type @@ -116,7 +116,7 @@ module Reports::ReportFactory end task.progress(90) if task - report.add_result(validation_set, Lib::ALL_PROPS, "All Results", "All Results") + report.add_result(validation_set, Validation::ALL_PROPS, "All Results", "All Results") report.add_predictions( validation_set ) #, [:crossvalidation_fold] ) task.progress(100) if task report diff --git a/report/report_persistance.rb b/report/report_persistance.rb index df4930c..9097fa3 100755 --- a/report/report_persistance.rb +++ b/report/report_persistance.rb @@ -188,22 +188,21 @@ module Reports # serialize :model_uris # alias_attribute :date, :created_at - class ReportData - include DataMapper::Resource + class ReportData < Ohm::Model - property :id, Serial - property :report_type, String, :length => 255 - property :created_at, DateTime - property :validation_uris, Object - property :crossvalidation_uris, Object - property :model_uris, Object - property :algorithm_uris, Object + attribute :report_type + attribute :created_at + attribute :validation_uris + attribute :crossvalidation_uris + attribute :model_uris + attribute :algorithm_uris + + index :report_type attr_accessor :subjectid - after :save, :check_policy - private - def check_policy + def save + super OpenTox::Authorization.check_policy(report_uri, subjectid) end @@ -245,32 +244,30 @@ module Reports report.subjectid = subjectid report.report_type = type report.save + OpenTox::Authorization.check_policy(report.report_uri, subjectid) new_report_with_id(report_content, type, report.id) end def list_reports(type, filter_params={}) - filter_params["report_type"]=type unless filter_params.has_key?("report_type") - #ReportData.find_like(filter_params).delete_if{|r| r.report_type!=type}.collect{ |r| r.id } - - filter_params = Lib::DataMapperUtil.check_params(ReportData, filter_params) + LOGGER.debug "find reports for params: "+filter_params.inspect # unfortunately, datamapper does not allow searching in Objects # do filtering for list = Object params manually list_params = {} [:validation_uris, :crossvalidation_uris, :algorithm_uris, :model_uris].each do |l| list_params[l] = filter_params.delete(l) if filter_params.has_key?(l) end - - reports = ReportData.all(filter_params).delete_if{|r| r.report_type!=type} + reports = ReportData.find( :report_type => type ) list_params.each do |k,v| - reports = reports.delete_if{ |r| !r.send(k).include?(v) } + reports = reports.collect{|x| x}.delete_if{ |r| !r.send(k).include?(v) } end reports.collect{ |r| r.id } end def get_report(type, id, format, force_formating, params) - report = ReportData.first({:id => id, :report_type => type}) - raise OpenTox::NotFoundError.new("Report with id='"+id.to_s+"' and type='"+type.to_s+"' not found.") unless report + report = ReportData[id] + raise OpenTox::NotFoundError.new("Report with id='"+id.to_s+"' and type='"+type.to_s+"' not found.") if + report==nil or report.report_type!=type # begin # report = ReportData.find(:first, :conditions => {:id => id, :report_type => type}) # rescue ActiveRecord::RecordNotFound @@ -294,9 +291,10 @@ module Reports # raise OpenTox::NotFoundError.new("Report with id='"+id.to_s+"' and type='"+type.to_s+"' not found.") # end # ReportData.delete(id) - report = ReportData.first({:id => id, :report_type => type}) - raise OpenTox::NotFoundError.new("Report with id='"+id.to_s+"' and type='"+type.to_s+"' not found.") unless report - report.destroy + report = ReportData[id] + raise OpenTox::NotFoundError.new("Report with id='"+id.to_s+"' and type='"+type.to_s+"' not found.") if + report==nil || report.report_type!=type + report.delete if (subjectid) begin res = OpenTox::Authorization.delete_policies_from_uri(report.report_uri, subjectid) @@ -310,9 +308,6 @@ module Reports end end -Reports::ReportData.auto_upgrade! -Reports::ReportData.raise_on_save_failure = true - #module Reports # def self.check_filter_params(model, filter_params) # prop_names = model.properties.collect{|p| p.name.to_s} diff --git a/report/validation_access.rb b/report/validation_access.rb index 53ecc46..96dfbf3 100755 --- a/report/validation_access.rb +++ b/report/validation_access.rb @@ -21,11 +21,11 @@ class Reports::ValidationDB # rescue => ex # raise "could not access crossvalidation with id "+validation_id.to_s+", error-msg: "+ex.message # end - cv = Lib::Crossvalidation.get( cv_id ) + cv = Validation::Crossvalidation.get( cv_id ) raise OpenTox::NotFoundError.new "crossvalidation with id "+cv_id.to_s+" not found" unless cv raise OpenTox::BadRequestError.new("crossvalidation with id '"+cv_id.to_s+"' not finished") unless cv.finished - #res += Lib::Validation.find( :all, :conditions => { :crossvalidation_id => cv_id } ).collect{|v| v.validation_uri.to_s} - res += Lib::Validation.all( :crossvalidation_id => cv_id ).collect{|v| v.validation_uri.to_s } + #res += Validation::Validation.find( :all, :conditions => { :crossvalidation_id => cv_id } ).collect{|v| v.validation_uri.to_s} + res += Validation::Validation.find( :crossvalidation_id => cv_id ).collect{|v| v.validation_uri.to_s } else res += [u.to_s] end @@ -42,16 +42,16 @@ class Reports::ValidationDB v = nil raise OpenTox::NotAuthorizedError.new "Not authorized: GET "+uri.to_s if AA_SERVER and !OpenTox::Authorization.authorized?(uri,"GET",subjectid) - v = Lib::Validation.get(validation_id) + v = Validation::Validation.get(validation_id) raise OpenTox::NotFoundError.new "validation with id "+validation_id.to_s+" not found" unless v raise OpenTox::BadRequestError.new "validation with id "+validation_id.to_s+" is not finished yet" unless v.finished - (Lib::VAL_PROPS + Lib::VAL_CV_PROPS).each do |p| + (Validation::VAL_PROPS + Validation::VAL_CV_PROPS).each do |p| validation.send("#{p.to_s}=".to_sym, v.send(p)) end - {:classification_statistics => Lib::VAL_CLASS_PROPS, - :regression_statistics => Lib::VAL_REGR_PROPS}.each do |subset_name,subset_props| + {:classification_statistics => Validation::VAL_CLASS_PROPS, + :regression_statistics => Validation::VAL_REGR_PROPS}.each do |subset_name,subset_props| subset = v.send(subset_name) subset_props.each{ |prop| validation.send("#{prop.to_s}=".to_sym, subset[prop]) } if subset end @@ -60,11 +60,11 @@ class Reports::ValidationDB def init_cv(validation) #cv = Lib::Crossvalidation.find(validation.crossvalidation_id) - cv = Lib::Crossvalidation.get(validation.crossvalidation_id) + cv = Validation::Crossvalidation.get(validation.crossvalidation_id) raise OpenTox::BadRequestError.new "no crossvalidation found with id "+validation.crossvalidation_id.to_s unless cv - Lib::CROSS_VAL_PROPS.each do |p| - validation.send("#{p.to_s}=".to_sym, cv[p]) + Validation::CROSS_VAL_PROPS.each do |p| + validation.send("#{p.to_s}=".to_sym, cv.send(p.to_s)) end end diff --git a/report/validation_data.rb b/report/validation_data.rb index 15d51ec..9212c98 100755 --- a/report/validation_data.rb +++ b/report/validation_data.rb @@ -51,13 +51,13 @@ end module Reports - # = Reports::Validation + # = ReportValidation # # contains all values of a validation object # - class Validation + class ReportValidation - @@validation_access = Reports::ValidationDB.new + @@validation_access = ValidationDB.new # for overwriting validation source (other than using webservices) def self.reset_validation_access(validation_access) @@ -69,7 +69,7 @@ module Reports end # create member variables for all validation properties - @@validation_attributes = Lib::ALL_PROPS + + @@validation_attributes = Validation::ALL_PROPS + VAL_ATTR_VARIANCE.collect{ |a| (a.to_s+"_variance").to_sym } + VAL_ATTR_RANKING.collect{ |a| (a.to_s+"_ranking").to_sym } @@validation_attributes.each{ |a| attr_accessor a } @@ -85,7 +85,7 @@ module Reports # returns/creates predictions, cache to save rest-calls/computation time # # call-seq: - # get_predictions => Reports::Predictions + # get_predictions => Predictions # def get_predictions( task=nil ) if @predictions @@ -127,13 +127,13 @@ module Reports @@validation_access.init_cv(self) end - @@persistance = Reports::ReportService.persistance + @@persistance = ReportService.persistance def validation_report_uri #puts "searching for validation report: "+self.validation_uri.to_s return @validation_report_uri if @validation_report_uri!=nil ids = @@persistance.list_reports("validation",{:validation_uris=>validation_uri }) - @validation_report_uri = Reports::ReportService.instance.get_uri("validation",ids[-1]) if ids and ids.size>0 + @validation_report_uri = ReportService.instance.get_uri("validation",ids[-1]) if ids and ids.size>0 end def cv_report_uri @@ -142,7 +142,7 @@ module Reports raise "no cv uri "+to_yaml unless self.crossvalidation_uri ids = @@persistance.list_reports("crossvalidation",{:crossvalidation=>self.crossvalidation_uri.to_s }) #puts "-> "+ids.inspect - @cv_report_uri = Reports::ReportService.instance.get_uri("crossvalidation",ids[-1]) if ids and ids.size>0 + @cv_report_uri = ReportService.instance.get_uri("crossvalidation",ids[-1]) if ids and ids.size>0 end def clone_validation @@ -160,9 +160,9 @@ module Reports def initialize(validation_uris=nil, subjectid=nil) @unique_values = {} - validation_uris = Reports::Validation.resolve_cv_uris(validation_uris, subjectid) if validation_uris + validation_uris = ReportValidation.resolve_cv_uris(validation_uris, subjectid) if validation_uris @validations = Array.new - validation_uris.each{|u| @validations.push(Reports::Validation.new(u, subjectid))} if validation_uris + validation_uris.each{|u| @validations.push(ReportValidation.new(u, subjectid))} if validation_uris end @@ -249,12 +249,12 @@ module Reports def get_domain_for_attr( attribute ) class_domain = get_class_domain() - if Lib::Validation.classification_property?(attribute) and - !Lib::Validation.depends_on_class_value?(attribute) + if Validation::Validation.classification_property?(attribute) and + !Validation::Validation.depends_on_class_value?(attribute) [ nil ] - elsif Lib::Validation.classification_property?(attribute) and + elsif Validation::Validation.classification_property?(attribute) and class_domain.size==2 and - Lib::Validation.complement_exists?(attribute) + Validation::Validation.complement_exists?(attribute) [ class_domain[0] ] else class_domain @@ -270,10 +270,10 @@ module Reports # returns a new set with all validation that have values as specified in the map # # call-seq: - # filter(map) => Reports::ValidationSet + # filter(map) => ValidationSet # def filter(map) - new_set = Reports::ValidationSet.new + new_set = ValidationSet.new validations.each{ |v| new_set.validations.push(v) if v.has_values?(map) } return new_set end @@ -282,10 +282,10 @@ module Reports # e.g. create set with predictions: collect{ |validation| validation.get_predictions!=null } # # call-seq: - # filter_proc(proc) => Reports::ValidationSet + # filter_proc(proc) => ValidationSet # def collect - new_set = Reports::ValidationSet.new + new_set = ValidationSet.new validations.each{ |v| new_set.validations.push(v) if yield(v) } return new_set end @@ -398,19 +398,19 @@ module Reports # to_array(attributes) => array # def merge(equal_attributes) - new_set = Reports::ValidationSet.new + new_set = ValidationSet.new # unique values stay unique when merging # derive unique values before, because model dependent props cannot be accessed later (when mergin validations from different models) new_set.unique_values = @unique_values #compute grouping - grouping = Reports::Util.group(@validations, equal_attributes) + grouping = Util.group(@validations, equal_attributes) #puts "groups "+grouping.size.to_s - Lib::MergeObjects.register_merge_attributes( Reports::Validation, - Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL) unless - Lib::MergeObjects.merge_attributes_registered?(Reports::Validation) + Lib::MergeObjects.register_merge_attributes( ReportValidation, + Validation::VAL_MERGE_AVG,Validation::VAL_MERGE_SUM,Validation::VAL_MERGE_GENERAL) unless + Lib::MergeObjects.merge_attributes_registered?(ReportValidation) #merge grouping.each do |g| @@ -438,12 +438,12 @@ module Reports def compute_ranking(equal_attributes, ranking_attribute, class_value=nil ) #puts "compute_ranking("+equal_attributes.inspect+", "+ranking_attribute.inspect+", "+class_value.to_s+" )" - new_set = Reports::ValidationSet.new + new_set = ValidationSet.new (0..@validations.size-1).each do |i| new_set.validations.push(@validations[i].clone_validation) end - grouping = Reports::Util.group(new_set.validations, equal_attributes) + grouping = Util.group(new_set.validations, equal_attributes) grouping.each do |group| # put indices and ranking values for current group into hash diff --git a/test/test_examples.rb b/test/test_examples.rb index 3c6f00e..49d7838 100755 --- a/test/test_examples.rb +++ b/test/test_examples.rb @@ -85,7 +85,7 @@ module ValidationExamples class EPAFHMCrossvalidation < CrossValidation def initialize - @dataset_file = File.new("data/EPAFHM.csv","r") + @dataset_file = File.new("data/EPAFHM.mini.csv","r") #@prediction_feature = "http://ot-dev.in-silico.ch/toxcreate/feature#IRIS%20unit%20risk" @num_folds = 10 end diff --git a/test/unit_test.rb b/test/unit_test.rb index 11b0759..c5a4a62 100644 --- a/test/unit_test.rb +++ b/test/unit_test.rb @@ -1,6 +1,6 @@ #TEST_USER = "mgtest" #TEST_PW = "mgpasswd" -#ENV['RACK_ENV'] = 'test' +ENV['RACK_ENV'] = 'production' require "rubygems" require "sinatra" @@ -44,108 +44,124 @@ class ValidationTest < Test::Unit::TestCase puts "AA disabled" @@subjectid = nil end - f = File.new("data/hamster_carcinogenicity.mini.csv") - @@data_class_mini = ValidationExamples::Util.upload_dataset(f, @@subjectid) - @@feat_class_mini = ValidationExamples::Util.prediction_feature_for_file(f) + + files = [ + File.new("data/hamster_carcinogenicity.mini.csv"), + File.new("data/EPAFHM.mini.csv") + ] + @@data = {} + files.each do |f| + d = ValidationExamples::Util.upload_dataset(f, @@subjectid) + @@data[d] = ValidationExamples::Util.prediction_feature_for_file(f) + end end def global_teardown puts "delete and logout" - OpenTox::Dataset.find(@@data_class_mini,@@subjectid).delete(@@subjectid) if defined?@@data_class_mini - @@cv.delete(@@subjectid) if defined?@@cv - @@report.delete(@@subjectid) if defined?@@report - @@qmrfReport.delete(@@subjectid) if defined?@@qmrfReport + #OpenTox::Dataset.find(@@data,@@subjectid).delete(@@subjectid) if defined?@@data + @@cvs.each{|cv| cv.delete(@@subjectid)} if defined?@@cvs + @@reports.each{|report| report.delete(@@subjectid)} if defined?@@reports + @@qmrfReports.each{|qmrfReport| qmrfReport.delete(@@subjectid)} if defined?@@qmrfReports OpenTox::Authorization.logout(@@subjectid) if AA_SERVER end def test_crossvalidation - puts "test_crossvalidation" + #assert_rest_call_error OpenTox::NotFoundError do # OpenTox::Crossvalidation.find(File.join(CONFIG[:services]["opentox-validation"],"crossvalidation/noexistingid")) #end - p = { - :dataset_uri => @@data_class_mini, - :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), - :algorithm_params => "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"), - :prediction_feature => @@feat_class_mini, - :num_folds => 2 } - t = OpenTox::SubTask.new(nil,0,1) - def t.progress(pct) - if !defined?@last_msg or @last_msg+3 data, + :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), + :algorithm_params => "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"), + :prediction_feature => feat, + :num_folds => 2 } + t = OpenTox::SubTask.new(nil,0,1) + def t.progress(pct) + if !defined?@last_msg or @last_msg+3 params[:dataset_uri], :algorithm_uri => params[:algorithm_uri] } [ :num_folds, :random_seed, :stratified ].each{ |sym| cv_params[sym] = params[sym] if params[sym] } - cv = Validation::Crossvalidation.new cv_params + cv = Validation::Crossvalidation.create cv_params cv.subjectid = @subjectid cv.perform_cv( params[:prediction_feature], params[:algorithm_params], task ) cv.crossvalidation_uri @@ -62,7 +63,7 @@ post '/crossvalidation/cleanup/?' do deleted << cv.crossvalidation_uri #Validation::Crossvalidation.delete(cv.id) cv.subjectid = @subjectid - cv.delete + cv.delete_crossvalidation #end end LOGGER.info "crossvalidation cleanup, deleted "+deleted.size.to_s+" cvs" @@ -124,13 +125,19 @@ get '/crossvalidation/:id/statistics' do raise OpenTox::BadRequestError.new "Crossvalidation '"+params[:id].to_s+"' not finished" unless crossvalidation.finished Lib::MergeObjects.register_merge_attributes( Validation::Validation, - Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL-[:date,:validation_uri,:crossvalidation_uri]) unless + Validation::VAL_MERGE_AVG,Validation::VAL_MERGE_SUM,Validation::VAL_MERGE_GENERAL-[:date,:validation_uri,:crossvalidation_uri]) unless Lib::MergeObjects.merge_attributes_registered?(Validation::Validation) #v = Lib::MergeObjects.merge_array_objects( Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ) ) - v = Lib::MergeObjects.merge_array_objects( Validation::Validation.all( :crossvalidation_id => params[:id] ) ) + # convert ohm:set into array, as ohm:set[0]=nil(!) + vals = Validation::Validation.find( :crossvalidation_id => params[:id] ).collect{|x| x} +# LOGGER.debug vals.collect{|v| v.validation_uri}.join("\n") +# LOGGER.debug vals.size +# LOGGER.debug vals.class + + v = Lib::MergeObjects.merge_array_objects( vals ) v.created_at = nil - v.id = nil + #v.id = nil case request.env['HTTP_ACCEPT'].to_s when /text\/html/ @@ -159,7 +166,7 @@ delete '/crossvalidation/:id/?' do cv = Validation::Crossvalidation.get(params[:id]) cv.subjectid = @subjectid raise OpenTox::NotFoundError.new "Crossvalidation '#{params[:id]}' not found." unless cv - cv.delete + cv.delete_crossvalidation end #get '/crossvalidation/:id/validations' do @@ -208,8 +215,8 @@ get '/?' do LOGGER.info "list all validations, params: "+params.inspect #uri_list = Validation::Validation.find_like(params).collect{ |v| v.validation_uri }.join("\n")+"\n" #uri_list = Validation::Validation.all(params).collect{ |v| v.validation_uri }.join("\n")+"\n" - uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" - + #uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + uri_list = Validation::Validation.all.collect{|v| v.validation_uri}.join("\n") + "\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = @@ -241,7 +248,7 @@ post '/test_set_validation' do LOGGER.info "creating test-set-validation "+params.inspect if params[:model_uri] and params[:test_dataset_uri] and !params[:training_dataset_uri] and !params[:algorithm_uri] task = OpenTox::Task.create( "Perform test-set-validation", url_for("/", :full) ) do |task| #, params - v = Validation::Validation.new :validation_type => "test_set_validation", + v = Validation::Validation.create :validation_type => "test_set_validation", :model_uri => params[:model_uri], :test_dataset_uri => params[:test_dataset_uri], :test_target_dataset_uri => params[:test_target_dataset_uri], @@ -262,8 +269,9 @@ get '/test_set_validation' do #uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "test_set_validation" } ).collect{ |v| v.validation_uri }.join("\n")+"\n" #uri_list = Validation::Validation.all( :validation_type => "test_set_validation" ).collect{ |v| v.validation_uri }.join("\n")+"\n" - params[:validation_type] = "test_set_validation" - uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + #params[:validation_type] = "test_set_validation" + #uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + uri_list = Validation::Validation.find(:validation_type => "test_set_validation").collect{|v| v.validation_uri}.join("\n") + "\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = @@ -285,7 +293,7 @@ post '/training_test_validation/?' do LOGGER.info "creating training-test-validation "+params.inspect if params[:algorithm_uri] and params[:training_dataset_uri] and params[:test_dataset_uri] and params[:prediction_feature] and !params[:model_uri] task = OpenTox::Task.create( "Perform training-test-validation", url_for("/", :full) ) do |task| #, params - v = Validation::Validation.new :validation_type => "training_test_validation", + v = Validation::Validation.create :validation_type => "training_test_validation", :algorithm_uri => params[:algorithm_uri], :training_dataset_uri => params[:training_dataset_uri], :test_dataset_uri => params[:test_dataset_uri], @@ -306,8 +314,9 @@ get '/training_test_validation' do LOGGER.info "list all training-test-validations, params: "+params.inspect #uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "training_test_validation" } ).collect{ |v| v.validation_uri }.join("\n")+"\n" #uri_list = Validation::Validation.all( :validation_type => "training_test_validation" ).collect{ |v| v.validation_uri }.join("\n")+"\n" - params[:validation_type] = "training_test_validation" - uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + #params[:validation_type] = "training_test_validation" + #uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + uri_list = Validation::Validation.find(:validation_type => "training_test_validation").collect{|v| v.validation_uri}.join("\n") + "\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = @@ -340,7 +349,7 @@ post '/bootstrapping' do params.merge!( Validation::Util.bootstrapping( params[:dataset_uri], params[:prediction_feature], @subjectid, params[:random_seed], OpenTox::SubTask.create(task,0,33)) ) - v = Validation::Validation.new :validation_type => "bootstrapping", + v = Validation::Validation.create :validation_type => "bootstrapping", :test_target_dataset_uri => params[:dataset_uri], :prediction_feature => params[:prediction_feature], :algorithm_uri => params[:algorithm_uri] @@ -355,8 +364,9 @@ get '/bootstrapping' do LOGGER.info "list all bootstrapping-validations, params: "+params.inspect #uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "bootstrapping" } ).collect{ |v| v.validation_uri }.join("\n")+"\n" #uri_list = Validation::Validation.all( :validation_type => "bootstrapping" ).collect{ |v| v.validation_uri }.join("\n")+"\n" - params[:validation_type] = "bootstrapping" - uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + #params[:validation_type] = "bootstrapping" + #uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + uri_list = Validation::Validation.find(:validation_type => "bootstrapping").collect{|v| v.validation_uri}.join("\n") + "\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = @@ -388,7 +398,7 @@ post '/training_test_split' do params.merge!( Validation::Util.train_test_dataset_split(params[:dataset_uri], params[:prediction_feature], @subjectid, params[:split_ratio], params[:random_seed], OpenTox::SubTask.create(task,0,33))) - v = Validation::Validation.new :validation_type => "training_test_split", + v = Validation::Validation.create :validation_type => "training_test_split", :training_dataset_uri => params[:training_dataset_uri], :test_dataset_uri => params[:test_dataset_uri], :test_target_dataset_uri => params[:dataset_uri], @@ -406,8 +416,9 @@ get '/training_test_split' do LOGGER.info "list all training-test-split-validations, params: "+params.inspect #uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "training_test_split" } ).collect{ |v| v.validation_uri }.join("\n")+"\n" #uri_list = Validation::Validation.all( :validation_type => "training_test_split" ).collect{ |v| v.validation_uri }.join("\n")+"\n" - params[:validation_type] = "training_test_split" - uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + #params[:validation_type] = "training_test_split" + #uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" + uri_list = Validation::Validation.find(:validation_type => "training_test_split").collect{|v| v.validation_uri}.join("\n") + "\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = @@ -440,7 +451,7 @@ post '/cleanup/?' do deleted << val.validation_uri #Validation::Validation.delete(val.id) val.subjectid = @subjectid - val.delete + val.delete_validation end LOGGER.info "validation cleanup, deleted "+deleted.size.to_s+" validations" deleted.join("\n")+"\n" @@ -463,7 +474,7 @@ post '/validate_datasets' do params[:validation_type] = "validate_datasets" if params[:model_uri] - v = Validation::Validation.new params + v = Validation::Validation.create params v.subjectid = @subjectid v.compute_validation_stats_with_model(nil,false,task) else @@ -475,7 +486,7 @@ post '/validate_datasets' do predicted_feature = params.delete("predicted_feature") feature_type = "classification" if params.delete("classification")!=nil feature_type = "regression" if params.delete("regression")!=nil - v = Validation::Validation.new params + v = Validation::Validation.create params v.subjectid = @subjectid v.compute_validation_stats(feature_type,predicted_feature,nil,nil,false,task) end @@ -532,7 +543,7 @@ get '/:id' do # rescue ActiveRecord::RecordNotFound => ex # raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found." # end - validation = Validation::Validation.get(params[:id]) + validation = Validation::Validation[params[:id]] raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found." unless validation case request.env['HTTP_ACCEPT'].to_s @@ -566,5 +577,5 @@ delete '/:id' do validation.subjectid = @subjectid raise OpenTox::NotFoundError.new "Validation '#{params[:id]}' not found." unless validation content_type "text/plain" - validation.delete + validation.delete_validation end \ No newline at end of file diff --git a/validation/validation_format.rb b/validation/validation_format.rb index f69ceac..6fdea61 100755 --- a/validation/validation_format.rb +++ b/validation/validation_format.rb @@ -4,15 +4,15 @@ require "lib/format_util.rb" module Validation # adding to_yaml and to_rdf functionality to validation - class Validation < Lib::Validation + class Validation # builds hash for valiation, as the internal presentation differs from the owl-object # the hash is directly printed in to_yaml, or added to the owl-structure def get_content_as_hash() h = {} - (Lib::VAL_PROPS - [:validation_uri]).each do |p| - h[p] = self.send(p) + (VAL_PROPS - [:validation_uri]).each do |p| + h[p] = self.send(p.to_s) end if crossvalidation_id!=nil cv = {:type => OT.CrossvalidationInfo} @@ -22,13 +22,14 @@ module Validation h[:crossvalidation_info] = cv end if classification_statistics - raise "classification_statistics is no has: "+classification_statistics.class.to_s unless classification_statistics.is_a?(Hash) + raise "classification_statistics is no hash: "+classification_statistics.class.to_s+" -> '"+ + classification_statistics.to_s+"'" unless classification_statistics.is_a?(Hash) clazz = { :type => OT.ClassificationStatistics } - Lib::VAL_CLASS_PROPS_SINGLE.each{ |p| clazz[p] = classification_statistics[p] } + VAL_CLASS_PROPS_SINGLE.each{ |p| clazz[p] = classification_statistics[p] } # transpose results per class class_values = {} - Lib::VAL_CLASS_PROPS_PER_CLASS.each do |p| + VAL_CLASS_PROPS_PER_CLASS.each do |p| raise "missing classification statitstics: "+p.to_s+" "+classification_statistics.inspect if classification_statistics[p]==nil classification_statistics[p].each do |class_value, property_value| class_values[class_value] = {:class_value => class_value, :type => OT.ClassValueStatistics} unless class_values.has_key?(class_value) @@ -54,7 +55,7 @@ module Validation h[:classification_statistics] = clazz elsif regression_statistics regr = {:type => OT.RegressionStatistics } - Lib::VAL_REGR_PROPS.each{ |p| regr[p] = regression_statistics[p]} + VAL_REGR_PROPS.each{ |p| regr[p] = regression_statistics[p]} h[:regression_statistics] = regr end return h @@ -72,17 +73,17 @@ module Validation end - class Crossvalidation < Lib::Crossvalidation + class Crossvalidation def get_content_as_hash h = {} - (Lib::CROSS_VAL_PROPS_REDUNDANT - [:crossvalidation_uri]).each do |p| + (CROSS_VAL_PROPS_REDUNDANT - [:crossvalidation_uri]).each do |p| h[p] = self.send(p) end v = [] #Validation.find( :all, :conditions => { :crossvalidation_id => self.id } ).each do |val| - Validation.all( :crossvalidation_id => self.id ).each do |val| + Validation.find( :crossvalidation_id => self.id ).each do |val| v.push( val.validation_uri.to_s ) end h[:validation_uris] = v diff --git a/validation/validation_service.rb b/validation/validation_service.rb index a1efba5..93c167f 100755 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -30,7 +30,7 @@ end module Validation - class Validation < Lib::Validation + class Validation # constructs a validation object, Rsets id und uri #def initialize( params={} ) @@ -43,7 +43,7 @@ module Validation # deletes a validation # PENDING: model and referenced datasets are deleted as well, keep it that way? - def delete( delete_all=true ) + def delete_validation( delete_all=true ) if (delete_all) to_delete = [:model_uri, :training_dataset_uri, :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri ] case self.validation_type @@ -72,7 +72,7 @@ module Validation end end end - self.destroy + self.delete if (subjectid) begin res = OpenTox::Authorization.delete_policies_from_uri(validation_uri, subjectid) @@ -222,17 +222,13 @@ module Validation # :percent_unpredicted => prediction.percent_unpredicted, # :finished => true} # self.save! - self.attributes= {:num_instances => prediction.num_instances, + self.update :num_instances => prediction.num_instances, :num_without_class => prediction.num_without_class, :percent_without_class => prediction.percent_without_class, :num_unpredicted => prediction.num_unpredicted, :percent_unpredicted => prediction.percent_unpredicted, - :finished => true} - begin - self.save - rescue DataMapper::SaveFailureError => e - raise "could not save validation: "+e.resource.errors.inspect - end + :finished => true + raise unless self.valid? end task.progress(100) if task @@ -240,7 +236,7 @@ module Validation end end - class Crossvalidation < Lib::Crossvalidation + class Crossvalidation # constructs a crossvalidation, id and uri are set #def initialize( params={} ) @@ -262,12 +258,12 @@ module Validation end # deletes a crossvalidation, all validations are deleted as well - def delete - Validation.all(:crossvalidation_id => self.id).each do |v| + def delete_crossvalidation + Validation.find(:crossvalidation_id => self.id).each do |v| v.subjectid = self.subjectid - v.delete + v.delete_validation end - self.destroy + self.delete if (subjectid) begin res = OpenTox::Authorization.delete_policies_from_uri(crossvalidation_uri, subjectid) @@ -281,6 +277,9 @@ module Validation # creates the cv folds def create_cv_datasets( prediction_feature, task=nil ) + self.random_seed = 1 unless self.random_seed + self.num_folds = 10 unless self.num_folds + self.stratified = false unless self.stratified if copy_cv_datasets( prediction_feature ) # dataset folds of a previous crossvalidaiton could be used task.progress(100) if task @@ -296,7 +295,7 @@ module Validation i = 0 task_step = 100 / self.num_folds.to_f; @tmp_validations.each do | val | - validation = Validation.new val + validation = Validation.create val validation.subjectid = self.subjectid validation.validate_algorithm( algorithm_params, OpenTox::SubTask.create(task, i * task_step, ( i + 1 ) * task_step) ) @@ -316,8 +315,7 @@ module Validation # copies datasets from an older crossvalidation on the same dataset and the same folds # returns true if successfull, false otherwise def copy_cv_datasets( prediction_feature ) - - cvs = Crossvalidation.all( { + cvs = Crossvalidation.find( { :dataset_uri => self.dataset_uri, :num_folds => self.num_folds, :stratified => self.stratified, @@ -332,7 +330,7 @@ module Validation OpenTox::Dataset.exist?(v.training_dataset_uri,self.subjectid) and OpenTox::Dataset.exist?(v.test_dataset_uri,self.subjectid) #make sure self.id is set - self.save if self.new? + #self.save if self.new? tmp_val << { :validation_type => "crossvalidation", :training_dataset_uri => v.training_dataset_uri, :test_dataset_uri => v.test_dataset_uri, @@ -354,16 +352,14 @@ module Validation # creates cv folds (training and testdatasets) # stores uris in validation objects def create_new_cv_datasets( prediction_feature, task = nil ) - - raise "random seed not set "+self.inspect unless self.random_seed LOGGER.debug "creating datasets for crossvalidation" orig_dataset = OpenTox::Dataset.find(self.dataset_uri,self.subjectid) raise OpenTox::NotFoundError.new "Dataset not found: "+self.dataset_uri.to_s unless orig_dataset shuffled_compounds = orig_dataset.compounds.shuffle( self.random_seed ) - unless self.stratified - split_compounds = shuffled_compounds.chunk( self.num_folds ) + unless self.stratified + split_compounds = shuffled_compounds.chunk( self.num_folds.to_i ) else class_compounds = {} # "inactive" => compounds[], "active" => compounds[] .. shuffled_compounds.each do |c| @@ -406,7 +402,7 @@ module Validation @tmp_validations = [] - (1..self.num_folds).each do |n| + (1..self.num_folds.to_i).each do |n| datasetname = 'cv'+self.id.to_s + #'_d'+orig_dataset.name.to_s + @@ -418,7 +414,7 @@ module Validation test_compounds = [] train_compounds = [] - (1..self.num_folds).each do |nn| + (1..self.num_folds.to_i).each do |nn| compounds = split_compounds.at(nn-1) if n == nn @@ -428,7 +424,7 @@ module Validation end end - raise "internal error, num test compounds not correct" unless (shuffled_compounds.size/self.num_folds - test_compounds.size).abs <= 1 + raise "internal error, num test compounds not correct" unless (shuffled_compounds.size/self.num_folds.to_i - test_compounds.size).abs <= 1 raise "internal error, num train compounds not correct" unless shuffled_compounds.size - test_compounds.size == train_compounds.size LOGGER.debug "training set: "+datasetname+"_train, compounds: "+train_compounds.size.to_s @@ -442,7 +438,7 @@ module Validation { DC.title => datasetname + '_test', DC.creator => source }, self.subjectid ).uri #make sure self.id is set - self.save if self.new? + #self.save if self.new? tmp_validation = { :validation_type => "crossvalidation", :training_dataset_uri => train_dataset_uri, :test_dataset_uri => test_dataset_uri, @@ -551,9 +547,10 @@ module Validation # splits a dataset into test and training dataset # returns map with training_dataset_uri and test_dataset_uri def self.train_test_dataset_split( orig_dataset_uri, prediction_feature, subjectid, split_ratio=nil, random_seed=nil, task=nil ) - split_ratio=0.67 unless split_ratio + split_ratio = split_ratio.to_f random_seed=1 unless random_seed + random_seed = random_seed.to_i orig_dataset = OpenTox::Dataset.find orig_dataset_uri, subjectid orig_dataset.load_all subjectid diff --git a/validation/validation_test.rb b/validation/validation_test.rb index ffb25c4..efa8ad5 100755 --- a/validation/validation_test.rb +++ b/validation/validation_test.rb @@ -184,13 +184,13 @@ class ValidationTest < Test::Unit::TestCase #ambit_service = "https://ambit.uni-plovdiv.bg:8443/ambit2" #https%3A%2F%2Fambit.uni-plovdiv.bg%3A8443%2Fambit2 - + # post "/validate_datasets",{ -# :test_dataset_uri=>ambit_service+"/dataset/R401577?max=50", -# :prediction_dataset_uri=>ambit_service+"/dataset/R401577?max=50&feature_uris[]="+CGI.escape(ambit_service)+"%2Fmodel%2F35194%2Fpredicted", -# #:test_target_dataset_uri=>ambit_service+"/dataset/R401560", -# :prediction_feature=>ambit_service+"/feature/26221", -# :predicted_feature=>ambit_service+"/feature/218699", +# :test_dataset_uri=>"http://local-ot/dataset/94", +# :prediction_dataset_uri=>'http://local-ot/dataset/96', +# :test_target_dataset_uri=>'http://local-ot/dataset/92', +# :prediction_feature=>'http://local-ot/dataset/92/feature/Hamster%20Carcinogenicity', +# :predicted_feature=>"", # :classification=>"true", # :subjectid=>SUBJECTID} # #:model_uri=>"http://local-ot/model/31"} @@ -199,8 +199,8 @@ class ValidationTest < Test::Unit::TestCase # uri = last_response.body # val = wait_for_task(uri) # puts val -# #get "/"+val.split("/")[-1] - +# get "/"+val.split("/")[-1] +# puts last_response.body # d = OpenTox::Dataset.find("https://ambit.uni-plovdiv.bg:8443/ambit2/dataset/R545",SUBJECTID) # puts d.compounds.inspect @@ -257,7 +257,9 @@ class ValidationTest < Test::Unit::TestCase #delete "/1",:subjectid=>SUBJECTID - run_test("19i") + prepare_examples() + + #run_test("1a") #,{:validation_uri => "http://local-ot/validation/crossvalidation/1"}) #run_test("3a","http://local-ot/validation/crossvalidation/4") #run_test("3b","http://local-ot/validation/crossvalidation/3") -- cgit v1.2.3 From e0fe8f2d9195f91db9314e55c1b8c8d9a4efa8af Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 3 May 2011 15:10:36 +0200 Subject: remove install-gems task (moved to opentox-ruby), adding public folder --- Rakefile | 39 --------------------------------------- public/robots.txt | 2 ++ 2 files changed, 2 insertions(+), 39 deletions(-) create mode 100644 public/robots.txt diff --git a/Rakefile b/Rakefile index de1e69d..ca9856d 100755 --- a/Rakefile +++ b/Rakefile @@ -1,45 +1,6 @@ require 'rubygems' require 'rake' - - -REPORT_GEMS = [ 'opentox-ruby', 'ruby-plot', 'rinruby', 'dm-core', 'dm-serializer', 'dm-timestamps', - 'dm-types', 'dm-migrations', 'dm-validations', 'dm-sqlite-adapter', 'benchmark'] -VALIDATION_GEMS = [ 'opentox-ruby', 'ruby-plot'] - -GEM_VERSIONS = { "ruby-plot" => "= 0.0.2" } -GEM_INSTALL_OPTIONS = {} - -#GEM_VERSIONS = { "activerecord" => "= 2.3.8", "activesupport" => "= 2.3.8", "ar-extensions" => "= 0.9.2", "ruby-plot" => "= 0.0.2" } -## this is needed because otherwihse ar-extensions adds activesupport 3.0.0 which confuses things -#GEM_INSTALL_OPTIONS = { "ar-extensions" => "--ignore-dependencies" } - -desc "Install required gems" -task :install_gems do - (REPORT_GEMS + VALIDATION_GEMS).uniq.each do |g| - begin - if GEM_VERSIONS.has_key?(g) - print "> gem "+g+", '"+GEM_VERSIONS[g]+"' .. " - gem g, GEM_VERSIONS[g] - else - print "> gem "+g+" .. " - gem g - end - puts "ok" - rescue LoadError => ex - puts "NOT FOUND" - options = "" - options += "--version '"+GEM_VERSIONS[g]+"' " if GEM_VERSIONS.has_key?(g) - options += GEM_INSTALL_OPTIONS[g]+" " if GEM_INSTALL_OPTIONS.has_key?(g) - #cmd = "sudo env PATH=$PATH gem install "+options+" "+g - cmd = "gem install "+options+" "+g - puts "installing gem, this may take some time..." - puts cmd - IO.popen(cmd){ |f| puts f.gets } - end - end -end - desc "Perform unit tests" task :test do require 'test/unit_test.rb' diff --git a/public/robots.txt b/public/robots.txt new file mode 100644 index 0000000..1f53798 --- /dev/null +++ b/public/robots.txt @@ -0,0 +1,2 @@ +User-agent: * +Disallow: / -- cgit v1.2.3 From 584fbc8ad71f9487909c2e19f80c84ac91d33d10 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Fri, 6 May 2011 19:43:23 +0200 Subject: extended validation unit testing (to test new OpenTox::Validation functionality) --- data/EPAFHM.csv | 618 +++++++++++++++++++++++++++++++++++ data/StJudes-HepG2-testset_Class.csv | 173 ++++++++++ test/unit_test.rb | 140 +++++--- 3 files changed, 893 insertions(+), 38 deletions(-) create mode 100644 data/EPAFHM.csv create mode 100644 data/StJudes-HepG2-testset_Class.csv diff --git a/data/EPAFHM.csv b/data/EPAFHM.csv new file mode 100644 index 0000000..9092abc --- /dev/null +++ b/data/EPAFHM.csv @@ -0,0 +1,618 @@ +"STRUCTURE_SMILES","LC50_mmol" +"C1=CC(C=O)=CC(OC)=C1OCCCCCC",1.13E-02 +"C1(OC)=C([N+]([O-])=O)C(C=O)=CC(Br)=C1O",2.66E-01 +"CCCCCCCCOC(=O)C1=CC=CC(C(=O)OCCCCCCCC)=C1", +"C1=CC(Cl)=CC=C1OC2=C([N+](=O)[O-])C=CC=C2",7.69E-03 +"CC1=C(NC=O)C=CC=C1Cl",2.75E-01 +"CCCCOC(=O)C1=CC=CC(C(=O)OCCCC)=C1",3.23E-03 +"C(C1=CC=CC=C1)(C2=CC=CC=C2)(O)C#C",5.33E-02 +"CCCSCCSCCC",4.22E-02 +"CCCCCCCCOC(=O)C1=CC=C(C(=O)OCCCCCCCC)C=C1", +"OCCCCOC(=O)C1=CC=CC=C1C(=O)OCCCCOC(=O)C2=CC=CC=C2C(=O)OCCCCO", +"CCCSCCCCSCCC",1.45E-02 +"C1([N+](=O)[O-])=CC=C(C)C=C1OP(=O)(OC2=C([N+](=O)[O-])C=CC(C)=C2)OC3=C([N+]([O-])=O)C=CC(C)=C3", +"C1=C([N+]([O-])=O)C=CC=C1P(=O)(C2=CC([N+](=O)[O-])=CC=C2)C3=CC([N+](=O)[O-])=CC=C3", +"ClCCOC(=O)NC1CCCCC1",1.70E-01 +"O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC",2.08E+00 +"OC1=C(C=C(C=C1)[N+](=O)[O-])[N+](=O)[O-]",5.92E-02 +"NC(=O)OCC",5.88E+01 +"[O-]C(C1=CC=CC=C1O)=O.[Na+]",1.25E+01 +"C1=CC=CC=C1C(=O)N",5.46E+00 +"CC[N+](CC)(CC)CC1(=CC=CC=C1).[Cl-]",7.07E-01 +"CN(C)N",1.31E-01 +"CC(C(C(NC([O-])=N1)=O)(C1=O)CC)CCC.[Na+]",1.99E-01 +"N1C(=O)C(CC)(CCC(C)C)C(=O)NC1=O",3.77E-01 +"O=C1C2=C(N=CN2C)N(C(=O)N1C)C",7.78E-01 +"C1=CC=C2C(=C1)C(=O)C(C)=CC2=O",6.39E-04 +"OC1=C(Cl)C(Cl)=C(Cl)C=C1Cl",4.44E-03 +"OC1=CC(C)=C(Cl)C=C1",3.84E-02 +"[H]Cl.C1=CC=CC=C1CC2=NCCN2",1.80E+00 +"O=S(O)(O)=O.C1(=CC=CC=C1CC(N)C).C2=CC=CC=C2CC(N)C",7.82E-02 +"O(CC)CC",3.45E+01 +"O=C2N5[C@@]3([H])[C@@]1([H])[C@](C[C@]4([H])N(C7)CC[C@]34C6=C5C=CC=C6)([H])C7=CCO[C@]([H])1C2.O=C9N%12[C@@]%10([H])[C@@]8([H])[C@](C[C@]%11([H])N(C%14)CC[C@]%10%11C%13=C%12C=CC=C%13)([H])C%14=CCO[C@]([H])8C9.O=S(O)(O)=O",1.11E-03 +"NC1=CC=CC=C1",1.13E+00 +"O=C(OC1=C2C(=CC=C1)C=CC=C2)NC",4.35E-02 +"CCO",3.19E+02 +"C1(=NC=CC=C1C2CCCN2C).OS(O)(=O)=O",5.30E-02 +"C1(O)=CC=CC=C1C(=O)N",7.36E-01 +"O=C1NC(=O)NC=C1", +"CCCCCC=O",1.75E-01 +"O=C1OC2=CC=CC=C2C(O)=C1CC3=C(O)C4=CC=CC=C4OC3=O",1.52E-02 +"C1(C=O)=CC=C(OC2=CC=CC=C2)C=C1",2.32E-02 +"CO",9.17E+02 +"OC(C)C",1.44E+02 +"CC(=O)C",1.23E+02 +"ClC(Cl)Cl",5.92E-01 +"CS(=O)C",4.35E+02 +"ClC(C(Cl)(Cl)Cl)(Cl)Cl",6.00E-03 +"OC1=C(C=C(C(=C1CC2=C(C(=CC(=C2Cl)Cl)Cl)O)Cl)Cl)Cl",5.16E-05 +"C1=CC(=CC=C1N)C(=O)CC",9.79E-01 +"OCCC",7.57E+01 +"CCCCO",2.33E+01 +"CCCCCO",5.36E+00 +"C1=CC=CC=C1",2.25E-01 +"CC(Cl)(Cl)Cl",3.55E-01 +"[S-]C1=NC(C(C(C)CCC)(CC)C(N1)=O)=O.[Na+]",9.91E-02 +"CC#N",4.01E+01 +"CC=O",7.67E-01 +"ClCCl",3.89E+00 +"IC(I)I",7.42E-03 +"[N+](C)(C)(C)C.[Cl-]",4.22E+00 +"CC(C)(C)O",8.65E+01 +"C(F)(F)(F)CO",1.19E+00 +"CC(=O)C(C)(C)C",8.69E-01 +"ClC(C(Cl)Cl)(Cl)Cl",3.72E-02 +"CC1(C)NC(=O)NC1=O",1.29E+02 +"CCC(O)(C)CC",6.58E+00 +"C#CC(O)(C)CC",1.24E+01 +"C1CCCC(C#C)(O)C1",2.06E+00 +"CCCCOCCOP(=O)(OCCOCCCC)OCCOCCCC",2.81E-02 +"OCC(C)C",1.93E+01 +"CC(Cl)CCl",1.12E+00 +"NCC(N)C",1.36E+01 +"CC(O)CC",4.95E+01 +"CCC(=O)C",4.47E+01 +"OC(C)CN",3.36E+01 +"ClC(CCl)Cl",6.12E-01 +"ClC(=CCl)Cl",3.36E-01 +"CC(=O)OC",4.82E+00 +"ClC(C(Cl)Cl)Cl",1.21E-01 +"C1(C)(C)CCCC(C)=C1C=CC(C)=O",2.65E-02 +"ClC1=C(O)C(Cl)=CC(=C1)C(C2=CC(Cl)=C(O)C(=C2)Cl)(C)C",3.63E-03 +"C(C1C=CC(=CC=1)O)(CC)(C)C",1.58E-02 +"C1CC(CCC1(N)C)C(C)(N)C",3.83E-01 +"ClC(Cl)C1=C(Cl)C=CC=C1Cl",4.22E-03 +"C1=CC=C2C=CC=C3C2=C1CC3",1.12E-02 +"CC1=CNC2=C1C=CC=C2",6.74E-02 +"O=C([C@](C(C=C4OC)=C(C=C4OC)OC3)([H])[C@]3([H])O2)C(C=C5)=C2C1=C5O[C@@H]([C@@](C)=C)C1",1.32E-05 +"O=C2C1=NC3=C(C=C(C)C(C)=C3)N(C[C@H](O)[C@H](O)[C@H](O)CO)C1=NC(N2)=O", +"C1=CC=CC=C1OC(=O)C2=CC=CC=C2C(=O)OC3=CC=CC=C3",2.51E-04 +"O=C1C2=C(C=CC=C2)C(=O)C3=C1C=CC=C3", +"CCOC(=O)C1=CC=CC=C1C(=O)OCC",1.43E-01 +"C1=CC=C(C(=O)OCCCC)C(=C1)C(=O)OCCCC",3.5900E-03 +"CCC1=C(Br)C(Br)=C(Br)C(Br)=C1Br", +"O=C1C2=C(C=CC=C2)N=NN1CSP(=S)(OC)OC",2.02E-04 +"C1=CC=CC=C1NC(=O)C2=C(O)C=CC=C2",1.85E-02 +"Cl\C(Cl)=C(Cl)/C(Cl)=C(Cl)\Cl",3.45E-04 +"OC1=C(C(=C(C(=C1Cl)Cl)Cl)Cl)Cl",9.12E-04 +"OC1=C(C=C(C=C1Cl)Cl)Cl",2.48E-02 +"OC1=CC(C(F)(F)F)=C([N+]([O-])=O)C=C1",4.41E-02 +"C1(N)=CC=CC=C1C(=O)N",2.90E+00 +"OC1=C([N+]([O-])=O)C=CC=C1",1.15E+00 +"OC1=C(C=C(C=C1C(CC)C)[N+](=O)[O-])[N+](=O)[O-]",2.23E-03 +"O=CC1=CC=CC=C1O",1.88E-02 +"OC1=CC=CC2=CC=CC=C12",3.21E-02 +"OC1=C(C=CC=C1)C2=CC=CC=C2",3.61E-02 +"C12C(=O)C3=C(OC=1C=CC=C2)C=CC=C3", +"BrC1=C(O)C(C=O)=CC(Br)=C1",3.04E-03 +"C1=C2C(=CC=C1)C=CC=C2",4.79E-02 +"N1=CC=CC2=C1C=CC=C2",6.02E-01 +"CCN(CC)C1CCCCC1",1.38E-01 +"CCN(CC)C1=CC=CC=C1",1.10E-01 +"OCCN(CC)C1=CC(C)=CC=C1",2.95E-01 +"C1CCCCC1C2CCCCC2", +"C1=CC=CC=C1C(=O)CC(=O)C",6.78E-03 +"C1=CC(N)=CC=C1C(=O)OCC",2.16E-01 +"O1COC2=CC=C(/C=C/C=C/C(=O)N3CCCCC3)C=C12",2.75E-02 +"C1(C=O)=C(O)C=C(O)C=C1",9.50E-02 +"CC1=C(C)C=CC=C1",1.54E-01 +"OC1=C(C)C=CC=C1",1.29E-01 +"ClC1=C(C=CC=C1)Cl",6.40E-02 +"NC1=C(Cl)C=CC=C1",4.50E-02 +"CC1=C(F)C=CC=C1",1.76E-01 +"OC1=CC=CC=C1Cl",8.87E-02 +"CC1=C(C=CC(=C1)C)C",6.42E-02 +"CC1=CC(Cl)=C(Cl)C=C1",1.81E-02 +"NC1=CC(Cl)=C(Cl)C=C1",4.67E-02 +"C=C(C)C(=O)OCC=C",7.85E-03 +"BrCC(Br)CO",3.26E-01 +"CC(C=O)CC",1.16E-01 +"ClCC(Cl)CCl",3.91E-01 +"CCC(=O)CC",1.79E+01 +"CCC(C)=NO",9.68E+00 +"OCCN(C(C)C)C(C)C",1.38E+00 +"NC1=C([N+]([O-])=O)C=C([N+]([O-])=O)C=C1",8.08E-02 +"OC1=CC=C(Cl)C=C1CC2=CC(Cl)=CC=C2O",1.15E-03 +"C[N+](C1=CC=CC=C1)(C)C.[I-]",9.24E-01 +"C(C1=CC=C(O)C=C1)(C)(C)C",3.43E-02 +"C1=CC=CC=C1C(C)C",5.26E-02 +"C1=CC=CC=C1C(=O)C",1.35E+00 +"O=[N+](C1=CC=CC=C1)[O-]",9.67E-01 +"C1=C(C(=O)C)C=C(N)C=C1",2.83E+00 +"CC1=CC([N+](=O)[O-])=CC=C1",1.87E-01 +"CN(C)C1=CC=C(C)C=C1",3.62E-01 +"O=[N+](C1=CC=C(C=C1)N)[O-]",9.05E-01 +"OC1=CC=C([N+](=O)[O-])C=C1",3.22E-01 +"CN(C)C1=CC=C(C=O)C=C1",3.06E-01 +"[O-][N+](=O)C1=CC=C([N+]([O-])=O)C=C1",4.22E-03 +"CCN(CCO)CC",1.52E+01 +"CCC1=CC=CC=C1",9.89E-02 +"NCC1=CC=CC=C1",9.52E-01 +"O=CC1=CC=CC=C1",9.30E-02 +"C1=CC=C(NC)C=C1",9.33E-01 +"ON=C1CCCCC1",1.84E+00 +"N1=C(C#N)C=CC=C1",6.97E+00 +"N1=C(CC)C=CC=C1",3.86E+00 +"CC1(C)OCC(CO)O1",1.26E+02 +"C1N2CN3CN(C2)CN1C3",3.55E+02 +"C1=CC=CC=C1OC2=CC=CC=C2",2.35E-02 +"CCNC1=CC(C)=CC=C1",3.66E-01 +"CCCN(CCC)CCC",3.55E-01 +"OCCN(CCO)CCO",7.91E+01 +"C1=CC=CC=C1CCC(C)(C)O",4.04E-01 +"C1=CC(C)=CC=C1SSC2=CC=C(C)C=C2", +"OCCN1CCNCC1",4.92E+01 +"CN(C)CC1=CC=CC=C1",2.80E-01 +"C1(=CC=C(C=C1)O)NC(C)=O",5.39E+00 +"NC1=CC=C(CCCC)C=C1",6.80E-02 +"CCCCCCCCCC1=CC=C(O)C=C1",6.35E-04 +"NC1=CC=C(CCCCCCCCCCCC)C=C1", +"CCC(CCCC)CO",2.17E-01 +"ClC1=CC=C(C=O)C=C1",1.56E-02 +"N1=C(C)C=CC(CC)=C1",6.69E-01 +"CC(=O)CCCN(CC)CC",2.14E+00 +"CCOC(=O)CC(=O)OCC",9.18E-02 +"OC1=C(C)C=C(C)C=C1",1.36E-01 +"CCCCOC(=O)C=CC(=O)OCCCC",2.76E-03 +"CCCCOC(=O)CCCCC(=O)OCCCC",1.41E-02 +"NC1=CC=C(Br)C=C1",2.76E-01 +"CC1=CC=C(C)C=C1",8.35E-02 +"OC1=CC=C(C)C=C1",1.53E-01 +"NC1=CC=C(C=C1)Cl",2.46E-01 +"OC1=CC=C(Cl)C=C1",4.75E-02 +"NC1=CC=C(C)C=C1",1.49E+00 +"C=CC(=O)OCC(C)C",1.64E-02 +"BrCCC",5.47E-01 +"C=CC=O",3.03E-04 +"ClCCCl",1.37E+00 +"ClCCO",6.67E-01 +"CCCN",5.21E+00 +"CCC#N",2.76E+01 +"ClCC#N",1.78E-02 +"NCCN",3.66E+00 +"C=CCO",5.51E-03 +"C(O)C#C",2.64E-02 +"CC=NO",1.29E+00 +"C[C@](CC(O)C)(C)O",9.05E+01 +"CC(C)(C)CC(C)(C)N",1.90E-01 +"CC(C)(C)SC(C)(C)C",1.99E-01 +"CCCC(=O)C",1.44E+01 +"CC(=O)CC(C)C",5.21E+00 +"CC(C)OC(C)C",7.69E+00 +"CC1=CC=CC=C1",3.68E-01 +"N1=CC=C(C)C=C1",4.33E+00 +"ClC1=CC=CC=C1",1.50E-01 +"C1CCCCC1O",7.03E+00 +"O=C1CCCCC1",6.33E+00 +"OC1=CC=CC=C1",3.47E-01 +"N1=CC(C)=CC=C1",1.55E+00 +"CN1CCNCC1",2.30E+01 +"N1=C(C)C=CC=C1",9.63E+00 +"N1CC(C)NCC1",2.24E+01 +"CC(=O)OCCC",5.87E-01 +"BrCCCBr",1.04E-02 +"BrCCCC",2.68E-01 +"CCCCN",3.66E+00 +"C=CCC#N",2.71E+00 +"NCCCN",1.61E+01 +"N#CCC#N",8.48E-03 +"COCCN",6.98E+00 +"CCNCC",1.17E+01 +"N1C=CC=C1",3.13E+00 +"C1CCCO1",3.00E+01 +"C1=COC=C1",8.96E-01 +"CC(C)(C)SSC(C)(C)C",7.68E-03 +"CC(=O)CCC(C)C",1.39E+00 +"CCOC(=O)CCCCCCCCC(=O)OCC",1.05E-02 +"CCCCCC(=O)C",1.15E+00 +"CCCCCC",2.90E-02 +"ClCCCCCl",4.06E-01 +"CCCCCN",2.03E+00 +"CCCCC=O",1.50E-01 +"C(O)C#CC(O)",6.23E-01 +"CCNCCO",1.66E+01 +"C1CCCCC1",5.38E-02 +"N1=CC=CC=C1",1.26E+00 +"C1OCOCO1",6.61E+01 +"O=C(CC/C=C(C)/C)C",6.79E-01 +"CC(=O)CCCCCC",2.81E-01 +"CC(=O)OCCOCC",3.19E-01 +"BrCCCCCC",2.09E-02 +"CCCCCCN",5.59E-01 +"CCCCCCO",9.56E-01 +"OCCNCCO",4.48E+01 +"OCCOCCO",7.09E+02 +"CCCSCCC",1.84E-01 +"CCCCCCCN",1.89E-01 +"N#CCCCCC#N",1.79E+01 +"CCCCCCCO",2.97E-01 +"BrCCCCCCCC",4.34E-03 +"CCCCCCCCN",4.02E-02 +"CCCCCCCCO",1.04E-01 +"CCOCCOCCO",1.98E+02 +"CCCCCCCCC(=O)O",6.57E-01 +"CCCCCCCCCC(=O)C",8.81E-03 +"CCCCCCCCCN",1.50E-02 +"OCCOCCOCCO",4.59E+02 +"CCCCCCCCCCO",1.52E-02 +"CCCCCCCCCCCCCO", +"CC(C)OC1=CC=CC=C1OC(=O)NC",4.21E-02 +"CC(O)(C)C#C",3.91E+01 +"C(Cl)(Cl)(Cl)CO",2.00E+00 +"OC(C1=CC=C(C=C1)Cl)(C2=CC=C(C=C2)Cl)C(Cl)(Cl)Cl",1.67E-03 +"C1=CC=CC=C1OP(=O)(OC2=CC=CC=C2)OC3=CC=CC=C3",2.66E-03 +"S(=O)(C)C1=CC=C(OP(=S)(OCC)OCC)C=C1",1.40E-01 +"CC(C=NOC(=O)NC)(SC)C",4.52E-03 +"O=C(C1=C(C=CC=C1)C(=O)OCC(CCCC)CC)OCC(CCCC)CC", +"CCCCCCCCOC(=O)C1=CC=CC=C1C(=O)OCCCCCCCC", +"C1=CC=CC(O)=C1C(=O)OC2=CC=CC=C2",5.51E-03 +"C1=CC=CC(O)=C1C(=O)OCC",1.22E-01 +"OC1=C(Br)C=C(Br)C=C1Br",1.98E-02 +"OC1=C(C=C(C=C1)N)[N+](=O)[O-]",2.35E-01 +"C1=CC=CC=C1C(=O)C2=CC=CC=C2",8.07E-02 +"C1=CC=CC=C1N(CCO)CCO",4.06E+00 +"C1=CC(=CC=C1C=O)N(CC)CC",1.35E-01 +"OC1=C(C=CC=C1)O",8.37E-02 +"ClC1=C(Cl)C=C(Cl)C=C1",1.65E-02 +"ClC1=C(C=CC(=C1)Cl)O",4.75E-02 +"CC1=C(C=C(C=C1)[N+](=O)[O-])[N+](=O)[O-]",1.33E-01 +"O=CC1=CC(OCC)=C(O)C=C1",5.27E-01 +"C1(C=O)=CC(OC)=C(O)C=C1",5.51E-01 +"CN(C1=CC=CC=C1)C",5.29E-01 +"ClC1=CC([N+](=O)[O-])=CC=C1",1.19E-01 +"O=C(C(SP(=S)(OC)OC)CC(=O)OCC)OCC",4.27E-02 +"NC1=C(Cl)C=C([N+]([O-])=O)C=C1",1.16E-01 +"C1(C=O)=CC=C(C(C)C)C=C1",4.47E-02 +"C1=CC=CC=C1NC2=CC=CC=C2",2.24E-02 +"C1=CC=CC=C1OCCO",2.49E+00 +"OC1=CC=C(CC)C=C1",8.51E-02 +"CC(C=O)CCC",1.88E-01 +"CC(=O)CC(=O)C",1.35E+00 +"CCCCCC(=O)OCC",6.17E-02 +"CCCC=O",2.04E-01 +"CC(=O)OCCCC",1.55E-01 +"C1COCCO1",1.17E+02 +"CCCCCCCCCCCCN",5.56E-04 +"CCCCCCCCCCCCCC=O", +"CCCCOP(=O)(OCCCC)OCCCC",3.56E-02 +"O=C(CC(=O)C1)CC1(C)C",8.20E+01 +"OC(C)CCl",2.59E+00 +"ClC(=C(Cl)Cl)Cl",9.95E-02 +"CC(C1=CC=CC=C1)(O)C#C",7.73E-01 +"OC1=C(C=C(C=C1C(C)(C)C)C)C(C)(C)C",1.65E-03 +"O=S(C1=CC=CC=C1C([N-]2)=O)2=O.[Na+].[O]",8.20E+01 +"C1=CC=CC2C3=CC=CC=C3OC1=2",8.92E-03 +"C1=CC=CC=C1OC(=O)C2=C(O)C=C(N)C=C2",2.08E-02 +"CCN(CC)C(=O)C1=CC=CC(C)=C1",5.75E-01 +"CCC([O-])=O.[Na+]",4.99E+01 +"NCCN1CCNCC1",1.70E+01 +"CCCCOC(=O)CCC(=O)OCCCC",1.94E-02 +"CCOC(=O)CCCCC(=O)OCC",8.99E-02 +"OCCN",3.39E+01 +"CC(=O)OCC",2.61E+00 +"N1CC(C)OC(C)C1",3.36E+00 +"CCC1=CC(CC)=CC=C1",3.09E-02 +"ClCCCCl",9.82E-01 +"CCCCCC(O)=O",2.76E+00 +"CC(=O)OCCCCCC",3.05E-02 +"CCCCOCCCC",2.48E-01 +"CCCCCCCCCO",3.95E-02 +"CCCCCCNCCCCCC",4.21E-03 +"OCCCCCCCC\C=C/CCCCCCCC", +"C1(C=O)=C(O)C(OC)=CC=C1",1.58E-02 +"OC1=CC(OC)=CC=C1",5.96E-01 +"COC1=CC=C(C=C1)O",8.86E-01 +"COC1=CC=C(OC)C=C1",8.47E-01 +"C1=COC2=C1C=CC=C2",1.19E-01 +"C(CN(C1)C2)N(C1)C2",1.54E+01 +"C(CC(CC1CC23)C2)(C1)C3",2.06E-03 +"CCOP(OCC)(=S)SCCSCC",9.95E-03 +"[O-]C(N1)=NC(C(C(CCC)C)(CC=C)C1=O)=O.[Na+]",9.07E-02 +"BrC1=C(C)NC(=O)N(C(C)CC)C1=O",7.12E-01 +"OC1=C([N+](=O)[O-])C=CC([N+]([O-])=O)=C1",1.82E-02 +"ClC1=C(C=CC(=C1)NC(=O)N(C)C)Cl",6.09E-02 +"C1=CC(F)=CC=C1OC2=CC=C(F)C=C2",5.48E-03 +"S=P(OC1=NC(=NC(=C1)C)C(C)C)(OCC)OCC",3.07E-02 +"FC1=CC=C([N+](=O)[O-])C=C1",2.01E-01 +"C(F)(F)(F)C1=CC(C#N)=CC=C1",2.79E-01 +"NC1=CC=C(F)C=C1",1.52E-01 +"C1(C=O)=C(Cl)C=CC=C1F",5.93E-02 +"NC1=C(C(F)(F)F)C=C(F)C=C1",1.65E-01 +"C1(C=O)=C(F)C=CC=C1",1.09E-02 +"C1=CC=CC(=C1C(F)(F)F)C#N",2.47E-01 +"C1(C=O)=CC(C(F)(F)F)=CC=C1",5.31E-03 +"CNC1=CC=C(F)C=C1",3.07E-01 +"CCCCCCC(=O)CCCCCC", +"O[C@H]1[C@@]([C@@](C)2C)(C)CC[C@H]2C1",4.10E-01 +"O=C(CC1C2)C(C2)(C1(C)C)C",1.12E-01 +"CC2(C)OC1(C)CCC2CC1",6.61E-01 +"O=[C@](O)[C@@]1(C)[C@]([C@]([C@]([H])2CC3)(C)CCC1)([H])CCC2=C\C3=C(C)/C",4.93E-03 +"N1=C(C2=CC=CC=C2)NC(C3=CC=CC=C3)=C1C4=CC=CC=C4", +"C1COC2=CC=CC=C12",6.80E-01 +"O[C@H]1[C@H](CC2)C[C@H]2C1",2.03E+00 +"C1C2C=CC1CC2",1.06E-01 +"N1=C(C(=O)O)C=CC=C1C(=O)O",1.93E+00 +"N1=CC(C=O)=CC=C1",1.53E-01 +"CCCCC(=O)CCCC",2.18E-01 +"C=C(C)C(C)=C",8.41E-02 +"O=[C@](O)[C@@]3(C)[C@@]1([H])[C@@](CCC3)(C)[C@]2([H])C(C=[C@@]([C@@H](C)C)CC2)=CC1",7.87E-03 +"C1=CC=CC=C1C2=CC(=O)C3=CC=CC=C3O2",1.57E-02 +"OC1=C(C)C=C(C)C=C1C",9.54E-02 +"C1(C#N)=CC=CC=C1C",3.82E-01 +"C1(C=O)=C(C)C=CC=C1",4.40E-01 +"C1(=CC=CC=C1)C(=O)[O-].[Na+]",3.36E+00 +"OC1=C(C)C=C([N+]([O-])=O)C=C1[N+]([O-])=O",8.73E-03 +"C1=CC=CC=C1CCCCC",1.15E-02 +"O=C(OC(C)(C)C)C",2.82E+00 +"ClC1=CC(Cl)=CC=C1",5.46E-02 +"Cl\C=C\CCl",2.15E-03 +"CCCCSCCCC",2.45E-02 +"C1(O)=CC(OC)=CC=C1C(=O)C",4.18E-01 +"C1(C=O)=C([N+]([O-])=O)C=CC=C1",9.53E-02 +"O=CC1=CC=C([N+](=O)[O-])C=C1",6.68E-02 +"CC(=O)C(C)C",1.00E+01 +"OC1=C([N+]([O-])=O)C=CC=C1[N+]([O-])=O",2.16E-01 +"BrC1=C(Br)C=CC=C1",1.72E-02 +"C=CCNC1=CC=CC=C1",2.70E-01 +"NC1=CC=C(CC)C=C1",6.02E-01 +"CC(C)CC=O",3.77E-02 +"CCCCC(=O)C",4.27E+00 +"CC=CC=CC",2.43E-01 +"CCCCCCCCCCCC(=O)C",1.81E-03 +"C1=CC=CC=C1[Sn](C2=CC=CC=C2)(C3=CC=CC=C3)C4=CC=CC=C4", +"[H][C@]1(CC2)C(C)(C)CCC[C@@](C)1[C@@H](CC[C@@](O)(C)C=C)C2=C",4.13E-04 +"CC[Sn](CC)(CC)CC",4.68E-05 +"CC(C)C(C)N",3.26E+00 +"CC(C)C(O)C(C)C",1.40E+00 +"C1=CC=CC=C1N(C2=CC=CC=C2)C3=CC=CC=C3", +"C1=CC=CC=C1N(C2=CC=CC=C2)C=O",1.54E-01 +"CCOC(=O)C(CC1=CC=CC=C1)C(=O)OCC",2.17E-02 +"OC1=C(Br)C(Br)=C(Br)C(Br)=C1Br",1.90E-04 +"OC1=C(I)C=C(I)C=C1I",2.56E-03 +"C1(C=O)=C(OC)C=C(OC)C=C1",1.21E-01 +"OC1=C(NC(=O)C)C=CC=C1",1.79E-01 +"NC1=C(Cl)C=C(C)C=C1",2.54E-01 +"NC1=C([N+]([O-])=O)C=C(OCC)C=C1",1.43E-01 +"C1=CC([N+](=O)[O-])=CC=C1C(=O)OC",1.31E-01 +"C1=CC([N+]([O-])=O)=CC=C1C(=O)N",8.01E-01 +"C1=CC=CC=C1OC2=CC=C([N+](=O)[O-])C=C2",1.23E-02 +"C1=CC=C(CS(=O)CC2=CC=CC=C2)C=C1",3.48E-01 +"OC1=CC(NC(=O)C)=CC=C1",7.48E+00 +"OCCN1CCOCC1",2.07E+01 +"ClCC1=CC=C(CCl)C=C1",2.23E-04 +"IC1=CC=C(I)C=C1", +"O1C(C)=CC=C1C",7.40E-01 +"ClCCCCCCl",1.79E-01 +"BrCCCCCCC",8.21E-03 +"CCCSSCCC",1.70E-02 +"N#CCCCCCCC#N",3.88E+00 +"NC1=C(Cl)C(Cl)=C(Cl)C=C1",1.85E-02 +"C1(C=O)=C(O)C=CC(Cl)=C1",4.92E-03 +"OC1=CC=C(CCC)C=C1",8.08E-02 +"C1(C=O)=C(F)C(F)=C(F)C(F)=C1F",5.61E-03 +"C(Cl)(Cl)C(=O)N",1.88E+00 +"CCCCCCC(C)N",4.02E-02 +"CC(=O)CCCCCCCC",3.09E-02 +"CCCCCOCCCCC",1.98E-02 +"CC1=COC=N1",1.67E+01 +"CC1=NC=CN1",3.48E+00 +"O=C1C3CC2CC1CC(C3)C2",4.05E-01 +"CCCCCCC1OC(=O)CC1",1.06E-01 +"C1(C=O)=C(O)C=C(OC)C=C1OC",1.47E-02 +"C1=C(Cl)C(Cl)=CC=C1NC(=O)CC",3.94E-02 +"OC1=C(C=C(C=C1C(C)(C)C)C(C)(C)C)C(C)(C)C",2.32E-04 +"C=CC(Cl)C(Cl)",6.54E-02 +"C(=O)N(CCCC)CCCC",5.68E-01 +"C(O)C#CC",1.44E-01 +"CC(C)=CC=C(C)C",3.43E-02 +"NC13CC(CC(C3)C2)CC2C1",1.65E-01 +"N1=C(O)C(C#N)=C(C)C=C1C",1.06E+00 +"NC1=C(F)C(F)=C(F)C(F)=C1F",2.03E-01 +"ClC1=CC=C(SCSP(=S)(OCC)OCC)C=C1",7.00E-04 +"C1=CC=CC=C1P(=O)(C2=CC=CC=C2)C3=CC=CC=C3",1.93E-01 +"C1=CC(N(C)C)=CC=C1P(=O)(C2=CC=C(N(C)C)C=C2)C3=CC=C(N(C)C)C=C3", +"C=CC(=O)OCCO",4.14E-02 +"C#CC(O)CCCCC",3.27E-03 +"CCCCCCCC(=O)C",1.07E-01 +"Cl[C@@H]1CCCC[C@H]1Cl",1.20E-01 +"C1=CC=CC=C1OC2=CC=C(O)C=C2",2.66E-02 +"C=C(C)C(=O)OCCO",1.74E+00 +"C1(Br)=CSC=C1",3.80E-02 +"O=CC1=C(Cl)C=C(Cl)C=C1",1.03E-02 +"C1=CC=CC=C1SSC2=CC=CC=C2",5.04E-04 +"C1(=CC=CC=C1)/C=C/C=C/C2=CC=CC=C2", +"O=C(OCC)C1=CC(N)=CC=C1.OS(C)(=O)=O",3.02E-01 +"C(F)(F)(F)C(O)C(F)(F)(F)",1.45E+00 +"C=CC(O)CC=C",3.88E-01 +"C(O)CC#C",5.15E-01 +"CC\C=C/CCO",3.80E+00 +"CC/C=C/CCO",2.71E+00 +"CN1C(C(=O)C)=CC=C1",1.28E+00 +"N1=CC=C(C2=CC=CC=C2)C=C1",1.04E-01 +"C1=CC=CC=C1S(=O)C2=CC=CC=C2",4.32E-01 +"C1=CC=CC=C1C2=CC=C(C3=CC=CC=C3)O2", +"C=CC(=O)OCC(O)C",2.60E-02 +"N1=C(N)C=CC(Br)=C1",1.02E+00 +"CCOP(=O)(CC1=CC=CC=C1)OCC",1.47E+00 +"CCCCCCCCCCCC(=O)N", +"N1=CC=C(C(=O)C)C=C1",1.39E+00 +"C1=CC(Cl)=CC=C1C(=O)OC",6.40E-02 +"CCCCOC1=CC=CC=C1",3.80E-02 +"C1=CC(C#N)=CC=C1C(=O)OC",2.90E-01 +"OC1=C(O)C(Cl)=C(Cl)C(Cl)=C1Cl",5.12E-03 +"C1=C(C(=O)CBr)C(OC)=CC=C1OC",2.55E-03 +"CCCC[Sn](CCCC)(CCCC)CCCC",1.30E-04 +"C#CC(C)(O)C(C)C",1.83E+00 +"C1=CC=C2C3=CC=CC=C3N(C2=C1)C=C",1.66E-05 +"C1=C(N)C=CC=C1OCC2=CC=CC=C2",4.59E-02 +"O=C(NC)OC1=CC=CC(C2)=C1OC2(C)C",3.81E-03 +"CC(OC)(C)C",7.62E+00 +"C=CCCCCCCC=C",2.10E-03 +"C1=CC(O)=CC=C1/N=N/C2=CC=CC=C2",6.00E-03 +"C1=C(I)C(O)=C(I)C=C1C#N",1.83E-02 +"C1=C(Br)C(O)=C(Br)C=C1C#N",4.55E-02 +"O=[C@](O)[C@@]3(C)[C@@]2([H])[C@@](CCC3)(C)C1=C(CC2)C=[C@@]([C@@H](C)C)C=C1",6.99E-03 +"C=CCC1=CC=CC=C1O",1.12E-01 +"C1(C=O)=C(O)C=CC(Br)=C1",6.46E-03 +"C=C(CCl)C(Cl)",1.52E-03 +"C1=C(Cl)C(O)=C(Cl)C=C1C#N",1.29E-01 +"CCCCOC(=O)C1=CC=C(C(=O)OCCCC)C=C1",2.12E-03 +"C1=CC(O)=CC=C1OC2=CC=C(O)C=C2",2.86E-02 +"C1(Cl)=CC=CC(Cl)=C1C(=O)N",2.47E+00 +"CCCCCCCCCCN",6.55E-03 +"CNC(=O)OC1=CC(C)=C(N(C)C)C=C1",9.36E-03 +"N1=C(Br)NC(Br)=C1Br",2.01E-02 +"CCOP(=S)(OC1=CC=C(C=C1)[N+](=O)[O-])C2=CC=CC=C2",2.43E-04 +"OC(C)CC#C",4.17E-01 +"OC1=C(O)C=C(Cl)C=C1",1.09E-02 +"C1(O)=CC(O)=CC=C1C(=O)OC",2.72E-01 +"C=CC(=O)OCCCCCCCCCCCC", +"N1=C(Cl)C(Cl)=C(Cl)C(Cl)=C1Cl",1.87E-03 +"C1[C@H](C[C@H]([C@@H](C1)C(C)C)O)C",1.21E-01 +"C1=CN=CN1S(=O)(=O)C2=CC=C(C)C=C2",1.88E-01 +"C1=C(C(=O)C)C(Cl)=CC(Cl)=C1",6.89E-02 +"CCCCCCCCC#N",3.77E-02 +"NC1=CC(C(F)(F)F)=C(F)C=C1",1.68E-01 +"[C@H]1(CCCC[C@H]1O)C2=CC=CC=C2",2.52E-01 +"C=C(C)C(=O)OCCOCC",1.75E-01 +"OC1=C(C)C(C)=CC=C1C",6.02E-02 +"CCCCCCCCCCCC#N",2.37E-03 +"C1(OC)=CC=CC=C1C(=O)N",7.94E-01 +"C1(Cl)=CC(Cl)=CC=C1C(=O)N",5.03E-01 +"C=C(C)C(=O)OCC1OCCC1",2.04E-01 +"OC1=C(OC)C=C(Cl)C(Cl)=C1",2.32E-02 +"C=C(C)C(=O)OCC1=CC=CC=C1",2.65E-02 +"C=CC(=O)OCCCCCC",7.10E-03 +"CC(C)(C)C1=CC=C(OC(=O)NC)C=C1",4.82E-02 +"N1=C(CCN)C=CC=C1", +"C1=CC=CC=C1CN2CCNCC2",2.69E-01 +"N1=CC=CC(=C1)CCCO",1.09E+00 +"CCCCCCCCCCCCCN",3.28E-04 +"C1=CC=CC(N)=C1C(=O)C2=CC=C(Cl)C=C2",9.15E-03 +"C1(Cl)=CC=C(Cl)C=C1C(=O)OC",6.83E-02 +"S=P(OC1=NC(=C(C=C1Cl)Cl)Cl)(OCC)OCC",9.07E-04 +"C1(OC)=CC(C=O)=CC(Br)=C1O",2.58E-01 +"C=CC(=O)OC1CCCCC1",9.60E-03 +"S(C1=CC=C(Cl)C=C1)(=O)C2=CC=C(Cl)C=C2", +"CCOC(=O)N(C(=O)OCC)C(=O)OCC",5.70E-02 +"OC1=C(O)C=C(Cl)C(Cl)=C1",4.97E-03 +"NC1=C(Cl)C(Cl)=CC(Cl)=C1Cl",1.17E-03 +"N1=C(C2=CC=CC=C2)C=CC=C1C3=CC=CC=C3",9.08E-04 +"ClC1=CC(Cl)=C([N+]([O-])=O)C=C1[N+]([O-])=O",1.92E-04 +"C1(Cl)=CC(Cl)=C(Cl)C=C1SSC2=C(Cl)C=C(Cl)C(Cl)=C2", +"C1=C(C)C(C)=CC=C1OP(=O)(OC2=CC(C)=C(C)C=C2)OC3=CC(C)=C(C)C=C3", +"CCC(C)C(C)C=O",1.40E-01 +"C(O)C#CCCCCCCC",6.94E-03 +"N1=C(O)C=CC(Cl)=C1",8.80E+00 +"CC(C)SSC(C)C",5.53E-02 +"C1(C=O)=C(OC)C=C(OC)C(OC)=C1",2.52E-01 +"C=C(C)C(=O)OC(C)C",2.96E-01 +"C=CC(O)CCC",3.04E-01 +"OC1=C(Cl)C(Cl)=C(Cl)C(Cl)=C1",1.77E-03 +"C1=CN=CC=C1CCC2=CC=NC=C2",8.20E-01 +"C1C(=O)N(CC)C(=S)N(CC)C1=O",2.25E+01 +"COC(=O)C1=CC=C(C(=O)OC)C=C1[N+]([O-])=O",2.73E-02 +"C1=CC(Cl)=CC2N=C(S)SC1=2",1.59E-02 +"COC(=O)C1=CC=C(C(=O)OC)C=C1N",4.27E-02 +"CCSCCSCC",4.01E-01 +"CN(CCCCl)C.[H]Cl",8.41E-01 +"C1=C(C(=O)C)C=C([N+]([O-])=O)C(Cl)=C1",2.76E-02 +"CC1=CC(Cl)=NC(N)=N1",9.82E-01 +"CC1=C(OC)C=CC=C1OC",1.33E-01 +"N1=C(N(C)C)C=CC=C1",1.04E+00 +"CC(C)(C)CN",5.45E+00 +"O=[C@](O)[C@@]3(C)[C@@]1([H])[C@@](CCC3)(C)[C@]2([H])C(C[C@](C=C)(C)CC2)=CC1",2.88E-03 +"C1(N)=CC=C(Cl)C=C1C#N",1.87E-01 +"ClC(Cl)(C(C)(O)C)Cl.ClC(Cl)(C(C)(O)C)Cl.[H]O[H]",3.62E-01 +"CCCCCCCCCCC(=O)C",6.40E-03 +"C1=C(/C=C/C=O)C=CC(N(C)C)=C1",3.67E-02 +"C(C(=O)O)[N+]1(=CC=CC=C1).[Cl-]",9.33E-01 +"ClC1=C([N+]([O-])=O)C(Cl)=C([N+]([O-])=O)C(Cl)=C1",8.18E-04 +"ClC1=CC=C([N+](=O)[O-])C=C1C=O",2.09E-02 +"N#CC1=C(Cl)C=CC=C1C",9.96E-02 +"N1=C(Br)C(O)=CC=C1",2.70E+00 +"N1=C(Cl)C(O)=CC=C1",4.80E+00 +"C#CCN(CC#C)CC#C",2.26E+00 +"CCOC(OCC)CN(C)CC(OCC)OCC",2.41E+00 +"NCCCN1CCN(CCCN)CC1",1.55E+01 +"OC(CC/C=C(C)/CC/C=C(C)\C)(C)C=C",6.43E-03 +"ClCCN1CCCC1.[H]Cl",9.00E-01 +"CCCCCCCCCCCN",1.23E-03 +"C#CC(O)CCCC",1.57E-02 +"C1(C=O)=CC=C(OCC)C=C1",1.87E-01 +"O=C(C(C(C1C2)(C)C)(C2)C)C1Br",2.96E-01 +"CC(C)=CC1C(C)(C)C1C(=O)OCC2=COC(CC3=CC=CC=C3)=C2",1.82E-05 +"CCOP(=S)(OCC)SCSC(C)(C)C",4.61E-05 +"BrC(Br)C1=C(C(Br)Br)C=CC=C1",1.04E-03 +"C1=C(C(=O)C)C(Cl)=C(Cl)C(Cl)=C1",8.95E-03 +"C1(OC)=C(OC)C(OC)=CC=C1C(=O)C",9.47E-01 +"CCOC(=O)C(Cl)C(=O)OCC",4.88E-03 +"CCNCC1=CC=CC=C1",4.22E-01 +"ClC1=CC=CC=[N+]1C.[I-]",7.79E-01 +"C1=CC(Br)=CC=C1C(=O)C2=CC=CN=C2",7.78E-02 +"C1=CC=CC=C1C(=O)C2=CC=NC=C2",5.62E-01 +"CC1(C)CCC(C)(C)O1",1.31E+00 +"N1=C([N+]([O-])=O)C(O)=CC=C1",1.19E+00 +"C1(CC)=CC=CC(CC)=C1N(COC)C(=O)CCl",1.85E-02 +"NC1=CC=C(CCCCCCCC)C=C1",5.84E-04 +"CSC(C)=NOC(=O)NC",1.30E-02 +"N1=C(O)C=CC=C1Cl",1.65E+00 +"NC1=NN=C(C)C(C)=N1",7.67E+00 +"C1=CC([N+]([O-])=O)=CC([N+](=O)[O-])=C1OC2=CC=C(Br)C=C2", +"O=CC1=CC=C(N(CC)CC)C=C1O",2.77E-02 +"N1=C(C)C=CC=C1Cl",1.82E+00 +"C#CC(CCC(C)C)(C)O",3.49E-01 +"CC1=C(C)OC(C)=N1",4.04E+00 +"CC(=O)C(C)CN(C)C",6.58E-02 +"C1=CC([N+](=O)[O-])=CC=C1OC2=CC(C)=C(Cl)C=C2", +"C1=CC=C(Br)C=C1C(=O)N",4.63E-01 +"O=C(C(=NOC(=O)NC)SC)N(C)C",3.09E-02 +"NC1=C(C(C)C)C=CC=C1C(C)C",8.63E-02 +"[Na+].[N-]=[N+]=[N-]",8.40E-02 +"C[N+](C1=CC=CC=C1)(C)C.[O-]S(=O)(=O)OC",1.00E+00 +"ClC1=C(C(=C(C(=C1Cl)Cl)Cl)Cl)C(=C(Cl)Cl)Cl", +"CC(C)(O)C(F)(C(F)F)F",3.64E+00 +"CCC(N)C",3.76E+00 +"COCCCNCC1=CC(OC)=C(OC)C(OC)=C1",5.05E-01 +"BrCC1OCCCC1",1.15E+00 +"NC1=CC=C(CCCCCCCCCC)C=C1",2.66E-04 +"NC1=CC=C(OCCCCCC)C=C1",1.56E-02 +"C1([N+](=O)[O-])=CC(Cl)=CC=C1C(=O)OC",1.28E-01 +"C1(C=O)=C([N+](=O)[O-])C=CC(O)=C1",2.51E-01 +"O=C(C(C1=CC=C(C=C1)Cl)C(C)C)OC(C2=CC=CC(=C2)OC3=CC=CC=C3)C#N",1.21E-05 +"C1=CC=C(OC2=CC=CC=C2)C=C1COC(=O)C3C(C)(C)C3C=C(Cl)Cl",4.09E-05 +"CCSCCCCSCC",3.40E-02 +"CCCCCCCCOC1=CC=CC=C1NC(=O)C",1.71E-03 +"C1=CC(C(C)(C)C)=CC=C1C(=O)N",1.80E-01 +"CSCCCCCCSC",5.66E-02 +"CC(O)C#C",1.67E-01 +"C1(C)=C(C)C=CC=C1OP(=O)(OC2=C(C)C(C)=CC=C2)OC3=C(C)C(C)=CC=C3", +"C1C(=CC=C[N+]=1CC2C=CC=CC=2)S(=O)(=O)[O-]",9.67E+00 +"C1=CC(C(C)(C)C)=CC=C1OC2=CC=CC(C=O)=C2",1.45E-03 +"O=C(OC(C2=CC=CC(OC3=CC=CC=C3)=C2)C#N)[C@H](C1=CC=C(OC(F)F)C=C1)[C@H](C)C",4.21E-07 +"ClC1=CC=CC(Cl)=C1OP(=O)(OC2=C(Cl)C=CC=C2Cl)OC3=C(Cl)C=CC=C3Cl", +"C1=C(C=O)C=CC=C1OC2=CC(Cl)=C(Cl)C=C2",1.12E-03 +"[Na+].O.O.[O-]C1=C([N+]([O-])=O)C=C([N+]([O-])=O)C2=CC=CC=C12",1.45E-02 +"CC(C)(C)C1=CC=C(C=C)C=C1",3.06E-03 +"O=P(OCC)(SCCSCC)OCC",6.19E-02 +"ClCC1=CC(C=C)=CC=C1",2.03E-03 diff --git a/data/StJudes-HepG2-testset_Class.csv b/data/StJudes-HepG2-testset_Class.csv new file mode 100644 index 0000000..b563ce7 --- /dev/null +++ b/data/StJudes-HepG2-testset_Class.csv @@ -0,0 +1,173 @@ +SMILES,EC50-le-10um +CC[n+]1ccccc1\C=C\c2ccc(cc2)N(C)C,0 +C[n+]1ccccc1\C=C\c2ccc(cc2)[N+](=O)[O-],0 +CC(C)OC(=O)C1C(C2=C(CC(CC2=O)c3ccc(Cl)cc3)N=C1C)c4ccc(F)cc4,0 +CCOc1ccc2nc3cc(N)ccc3c(N)c2c1,0 +CCCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)cc3)N=C1C)c4ccccc4Cl,0 +CCCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)cc3)N=C1C)c4ccc(cc4)[N+](=O)[O-],0 +CCCCN1C(=N)N(CC(O)c2ccc(Br)cc2)c3ccccc13,0 +CN(C)c1cc[n+](CC(=O)c2ccc(Br)cc2)cc1,0 +OC(CN1C(=N)N(CC=C)c2ccccc12)c3ccc(Cl)c(Cl)c3,0 +Cc1ccc(NC(=O)c2c(I)cnn2C)cc1[N+](=O)[O-],0 +CCN(CC)CCN1C(=N)N(CC(O)c2ccc(Cl)c(Cl)c2)c3ccccc13,1 +COC(=O)C1C=NC=C(C1c2ccc(Cl)c(Cl)c2)C(=O)OC,0 +CCN(CC)CCN1C(Nc2ccccc2C1=O)c3ccc(Cl)cc3,0 +CN(C)C1=C(N(C(=O)C)c2ccc(F)cc2)C(=O)c3ccccc3C1=O,1 +CCCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)cc3)N=C1C)c4ccccc4F,0 +CCCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4ccccc4F,0 +CCCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4cccc(C)c4,0 +COc1ccc(cc1OC)C2CC(=O)C3=C(C2)N=C(C)C(C3c4ccccc4Cl)C(=O)OC(C)C,0 +CCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4cccc(C)c4,0 +CCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4cccc(Cl)c4,0 +COC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4ccccc4F,0 +COCCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4ccccc4F,0 +CCCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4ccc(F)cc4,0 +CCCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4cccc(Cl)c4,0 +CCCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4ccccc4OC,0 +COC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4ccc(F)cc4,0 +CCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4ccccc4C,0 +COC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4ccccc4C,0 +CCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4ccc(cc4)[N+](=O)[O-],0 +COCCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4ccc(F)cc4,0 +CCCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4ccccc4Cl,0 +COc1ccc(cc1OC)C2CC(=O)C3=C(C2)N=C(C)C(C3c4ccccc4OC)C(=O)OC(C)C,0 +CCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4ccccc4Cl,0 +CCCOC(=O)C1C(C2=C(CC(CC2=O)c3ccc(OC)c(OC)c3)N=C1C)c4ccccc4C,0 +CCOc1ccc2nc(C)cc(Nc3ccc(Cl)cc3)c2c1,1 +COc1ccc2S\C(=C/c3cccc[n+]3C)\N(C)c2c1,0 +CCN1\C(=C\c2cccc[n+]2C)\Sc3ccc(OC)cc13,0 +CCN1\C(=C\c2cccc[n+]2C)\C=Cc3cc(C)ccc13,0 +CN1\C(=C\c2cccc[n+]2C)\Sc3ccccc13,0 +CN1\C(=C\c2cccc[n+]2C)\Sc3ccc(C)cc13,0 +Clc1ccc(Nc2nc(NCc3occc3)c4ccccc4n2)cc1,0 +Fc1ccc(Nc2nc(NCc3occc3)c4ccccc4n2)cc1,1 +CCOc1ccc(cc1)N(C(=O)C)C2=C(N3CCOCC3)C(=O)c4ccccc4C2=O,1 +CCOc1ccc(cc1)N(C(=O)C)C2=C(N(C)C)C(=O)c3ccccc3C2=O,1 +CCCN1C2=C(CCC2)C(=N)C3=C1CCCC3,0 +Oc1ccc(Br)cc1CN2CCN(Cc3ccccc3F)CC2,0 +Oc1ccc(Br)cc1CN2CCN(Cc3cccc(F)c3)CC2,0 +CCCCOc1ccc(cc1)N(C(=O)C)C2=C(N3CCCCC3)C(=O)c4ccccc4C2=O,0 +CCCCOc1ccc(cc1)N(C(=O)C)C2=C(N3CCOCC3)C(=O)c4ccccc4C2=O,1 +CCCCCOc1ccc(cc1)N(C(=O)C)C2=C(N3CCCCC3)C(=O)c4ccccc4C2=O,1 +CCCCCOc1ccc(cc1)N(C(=O)C)C2=C(N3CCOCC3)C(=O)c4ccccc4C2=O,0 +CCOc1ccc(\C=C\C2=NC(=O)c3ccccc3O2)cc1,0 +Cc1ccc(cc1)N2C(=NC(=NC2(C)C)N)N,0 +CCc1ccc(cc1)S(=O)(=O)N=C2CC(=Nc3ccccc3)C(=O)c4ccccc24,1 +Cc1ccccc1N=C2CC(=NS(=O)(=O)c3ccc(F)cc3)c4ccccc4C2=O,1 +CC(C)(C)c1ccc(cc1)S(=O)(=O)N=C2CC(=Nc3ccc(O)cc3)C(=O)c4ccccc24,1 +CC(C)c1ccc(cc1)S(=O)(=O)N=C2CC(=Nc3ccc(O)cc3)C(=O)c4ccccc24,1 +CCc1ccc(cc1)S(=O)(=O)N=C2CC(=Nc3ccccc3OC)C(=O)c4ccccc24,0 +COc1ccccc1N=C2CC(=NS(=O)(=O)c3ccc(cc3)C(C)C)c4ccccc4C2=O,0 +CCCCCCCCCC[n+]1ccccc1,0 +COc1ccccc1C2CC(=O)C3=C(C2)N=C(C)C(C3c4ccc(Cl)cc4)C(=O)OC(C)C,0 +Cc1ccc2S\C(=C/c3sc4ccccc4[n+]3C)\N(CCO)c2c1,0 +CN1\C(=C\c2sc3ccccc3[n+]2CCO)\Sc4ccc(C)cc14,0 +Cc1ccc2N(CCO)\C(=C\c3sc4ccccc4[n+]3C)\Sc2c1,0 +C[n+]1c(\C=C\2/C=CC=CN2CC=C)sc3ccccc13,0 +CN1\C(=C\c2sc3ccccc3[n+]2C)\C=Cc4cc(C)ccc14,1 +CCN1C=CC(=Cc2sc3ccccc3[n+]2CC=C)C=C1,0 +CC[n+]1c(\C=C\2/C=CC=CN2CC=C)sc3ccccc13,0 +CCCCCCCN1C=CC(=N)C=C1,0 +CC(C)c1ccc(NC(=O)\C=C\c2ccccc2Cl)cc1,0 +CCOc1cc2CCNC(c3cc(Cl)ccc3O)c2cc1OCC,0 +C[N+](C)(C)c1nc(nc(n1)N2CCOCC2)N(c3ccccc3)c4ccccc4,0 +CCOc1cc2CCNC(c3cc(C)c(O)c(C)c3)c2cc1OCC,0 +CC(C)(C)C(=O)N1CCN(CC1)c2ccc(c(NC3CC3)c2)[N+](=O)[O-],0 +Oc1ccc(cc1Cl)C2NCc3ccccc3n4cccc24,0 +CCOC(=O)c1cnc2c(OC)cccc2c1Nc3ccc(OC)c(OC)c3,0 +COc1ccc(cc1OC)C2CC(=O)C3=C(C2)NC(=O)CC3c4ccc(Br)cc4,0 +COc1ccc(cc1OC)C2CC(=O)N=C3CC(CC(=O)C23)c4ccc(F)cc4,0 +COc1ccc(cc1OC)C2CC(=O)C3=C(C2)NC(=O)CC3c4ccc(cc4)C(F)(F)F,0 +CCOc1cc(ccc1O)C2CC(=O)N=C3CC(CC(=O)C23)c4cc(OC)ccc4OC,0 +COc1cc(cc(Br)c1O)C2CC(=O)N=C3CC(CC(=O)C23)c4ccccc4Cl,0 +CCOc1cc(cc(Cl)c1O)C2CC(=O)N=C3CC(CC(=O)C23)c4ccc(OC)cc4,0 +COc1ccc(cc1)C2CC(=O)C3=C(C2)NC(=O)CC3c4ccc(OC)c(OC)c4,0 +CCOc1cc(cc(Cl)c1O)C2CC(=O)N=C3CC(CC(=O)C23)c4ccccc4Cl,0 +Cc1cc(Nc2ccc(Br)c(C)c2)n3nc(C)nc3n1,0 +CCc1nc2nc(C)cc(Nc3ccc(I)cc3)n2n1,0 +COc1ccc(Nc2cc(C)c3ccc(O)cc3n2)cc1,0 +CCOc1ccc(Nc2cc(C)c3ccc(O)cc3n2)cc1,0 +CCc1ccc(Nc2cc(C)c3ccc(O)cc3n2)cc1,0 +CC(C)CN1C(C(C(=O)Nc2ccc(C)c(C)c2)c3ccccc3C1=O)c4cccs4,0 +CCCN(C(=O)C(=O)Nc1ccc2N=C3CCCCCN3C(=O)c2c1)c4ccc(OCC)cc4,0 +CN(C(=O)c1cc2CS(=O)(=O)c3ccccc3c2s1)c4cccc(c4)C(F)(F)F,0 +CCN1N=C(N=C2C(=O)N(C)C(=O)N=C12)c3ccc(cc3)C(F)(F)F,1 +CN1C(=O)N(C)c2cn(c(c3ccc(cc3)C(C)(C)C)c2C1=O)c4cc(Cl)ccc4O,0 +CC(O)(C(CN1CCCCC1)c2ccccc2)c3ccc(Cl)cc3,0 +CC(C)CN1C(C(C(=O)Nc2cccc(c2)C(F)(F)F)c3ccccc3C1=O)c4cccs4,0 +CC(C)CN1C(C(C(=O)Nc2ccc(F)c(Cl)c2)c3ccccc3C1=O)c4cccs4,0 +COc1ccc(NC(=O)C2C(N(CC(C)C)C(=O)c3ccccc23)c4cccs4)cc1Cl,0 +CC(C)CN1C(C(C(=O)Nc2ccc(F)cc2F)c3ccccc3C1=O)c4cccs4,0 +COc1ccc(NC(=O)C2C(N(CC(C)C)C(=O)c3ccccc23)c4cccs4)cc1OC,0 +COc1cccc(NC(=O)C2C(N(CC(C)C)C(=O)c3ccccc23)c4cccs4)c1,0 +COc1cc(OC)c(NC(=O)C2C(N(CC(C)C)C(=O)c3ccccc23)c4cccs4)cc1Cl,0 +CCOc1ccc(NC(=O)C2C(N(CC(C)C)C(=O)c3ccccc23)c4cccs4)cc1,0 +CC(C)CN1C(C(C(=O)Nc2ccc(C)cc2Cl)c3ccccc3C1=O)c4cccs4,0 +CC(C)CN1C(C(C(=O)Nc2cccc(c2)C#N)c3ccccc3C1=O)c4cccs4,0 +CCc1ccccc1NC(=O)C2C(N(CC(C)C)C(=O)c3ccccc23)c4cccs4,0 +COc1ccc(NC(=O)C2C(N(CC(C)C)C(=O)c3ccccc23)c4cccs4)cc1,0 +CC(C)CN1C(C(C(=O)Nc2cccc(c2)C(=O)C)c3ccccc3C1=O)c4cccs4,0 +COC(=O)c1ccc(NC(=O)C2C(N(CC(C)C)C(=O)c3ccccc23)c4cccs4)cc1,0 +COc1ccc(NC(=O)C2C(N(C3CCCC3)C(=O)c4ccccc24)c5cccs5)cc1,0 +CCOC(=O)c1cccc(NC(=O)CC2N(CCC(C)C)C(=O)N(C2=O)c3ccc(F)cc3)c1,0 +CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O,0 +OC(CNC1CCCCC1)Cn2c3ccc(Cl)cc3c4cc(Cl)ccc24,0 +CC(C)NCC(O)Cn1c2ccc(Cl)cc2c3cc(Cl)ccc13,1 +Cc1ccc(cc1)C2CC(n3nc(cc3N2)C(=O)N4CCN(CC4)C(c5ccccc5)c6ccccc6)C(F)(F)F,0 +Nc1ccc(cc1)c2nc3cc(ccc3[nH]2)c4ccc5[nH]c(nc5c4)c6ccc(N)cc6,1 +OC(CN1C(=N)N(CCN2CCCCC2)c3ccccc13)c4ccc(Cl)c(Cl)c4,1 +CCN1CCN(Cc2c(O)ccc3C(=O)C(=C(Oc23)C(F)(F)F)Oc4ccccc4C(C)C)CC1,0 +CC[n+]1c(\C=C\c2ccc(cc2)N(C)C)ccc3cc(OC)ccc13,1 +CCN(CC)c1ccc(\C=C\c2ccc3ccc(OC)cc3[n+]2CC)cc1,1 +CCN(CC)CCCOC(=O)c1c(C)oc2cc3c(C(=O)OCCCN(CC)CC)c(C)oc3cc12,1 +CCCCCCN1C(=N)N(CC(O)COc2ccccc2C)c3ccccc13,1 +Oc1c(cc(CN2CCN(CC2)c3cccc(c3)C(F)(F)F)c4cccnc14)[N+](=O)[O-],0 +CN1CCN(CC1)c2cc(C)c3cc(NC(=S)NCC4CCN(Cc5cccc(C)c5)CC4)ccc3n2,1 +CN1C(=O)\C(=C/c2ccccc2Cl)\Sc3ccc(cc13)C(=O)NC4CCN(Cc5ccccc5)CC4,0 +C\C=C\c1[n+](C)c2ccccc2n1C,0 +CSc1ccc2ccccc2[n+]1C,0 +Cc1c2[nH]c3ccccc3c2c4CCC(C)(C)Cc4[n+]1C,0 +OC(CNC1CCCC1)Cn2c3ccc(Cl)cc3c4cc(Cl)ccc24,1 +OC(CNC1CCCC1)Cn2c3ccccc3c4ccccc24,0 +Clc1ccc(cc1)S(=O)(=O)N2CCN(CC2)C(=S)NCc3occc3,0 +Cc1ccc(C)c(NC(=O)CC2N(Cc3occc3)C(=S)N(C2=O)c4ccccc4)c1,0 +[O-][N+](=O)c1ccc(cc1)S(=O)(=O)N2CCN(CC2)C(=S)NCc3occc3,0 +CCc1c2ccc(C)n2cc[n+]1CC#C,0 +CC(C)(C)c1cc(cc(c1O)C(C)(C)C)C2C(C#N)C(=N)OC3=C2C(=O)CCC3,1 +COc1ccc(O)c(CN2CCN(CC2)c3cc(C)ccc3C)c1,0 +Oc1ccc(Cl)cc1CN2CCN(CC2)c3cccc(c3)C(F)(F)F,0 +CCOc1ccccc1N2CCN(Cc3cc(Br)cc(OC)c3O)CC2,0 +Cc1cccc(N2CCN(Cc3ccccc3O)CC2)c1C,0 +Cc1cccc(N2CCN(Cc3cc(Br)ccc3O)CC2)c1C,0 +Oc1ccc(Br)cc1CN2CCN(Cc3ccccc3)CC2,0 +Cc1ccc(C)c(c1)N2CCN(Cc3cc(Br)ccc3O)CC2,0 +Cc1ccc(C)c(c1)N2CCN(Cc3cc(Cl)ccc3O)CC2,0 +CCN(C(=O)C)C1=C(N2CCOCC2)C(=O)c3ccccc3C1=O,1 +CCOC(=O)C1=C(C)N=C2S\C(=C/c3cc(Cl)ccc3O)\C(=O)N2C1c4ccc(cc4)N(C)C,0 +Fc1ccc(Nc2nc(NCc3ccccc3)c4ccccc4n2)cc1,1 +CCOc1ccc(Nc2nc(NCc3ccccc3)c4ccccc4n2)cc1,1 +CC(C)c1ccccc1OC2=C(Oc3c(CN4CCN(C)CC4)c(O)ccc3C2=O)C(F)(F)F,0 +ClC(Cl)(Cl)C(=O)Nc1ccccc1N2CCN(CC2)C(=O)c3ccccc3,0 +Clc1cccc(NC(=O)C(Cl)(Cl)Cl)c1N2CCOCC2,0 +CC(C)c1ccc(CNCC(O)c2ccccc2)cc1,0 +CCOC(=O)C1(Cc2cccc(F)c2)CCN(Cc3ccccc3O)CC1,0 +CCOC(=O)C1(Cc2ccc(Cl)cc2)CCN(Cc3ccccc3O)CC1,0 +CCOC(=O)C1(Cc2cccc(c2)C(F)(F)F)CCN(Cc3ccccc3O)CC1,0 +CCOC(=O)C1(Cc2ccccc2)CCN(Cc3cccc(OC)c3O)CC1,0 +CSC(=S)N=NC(c1ccccc1)c2ccccn2,0 +Brc1ccc(cc1)N=C2C(C(=O)c3ccccc3C2=O)n4nnc5ccccc45,0 +CCOC(=O)C1=C(C)N=C2S\C(=C\c3ccccc3O)\C(=O)N2C1c4ccccc4,0 +CC(C)CCNC(=O)c1c(C)oc2C(=O)c3ccccc3C(=O)c12,0 +CCOc1ccc(\C=C\2/Oc3c(CN4CCCCC4)c(O)ccc3C2=O)cc1,0 +COc1ccc(Nc2cc(C)nc(Nc3ccc(NS(=O)(=O)c4ccc(OC)cc4)cc3)n2)cc1,0 +COc1ccccc1Cn2ccnc2SCC(=O)Nc3cc(ccc3Cl)C(F)(F)F,0 +Cc1ccc(cc1)C(O)(CNCc2ccccc2)c3ccc(Br)cc3,0 +CCCCN(CCCC)CC(O)COc1ccccc1C(=O)Nc2ccccc2,0 +Oc1ccc(Br)cc1CN2CCN(CC2)C3CC4CCC3C4,0 +C(Cc1ccccc1)Nc2ccnc3cc4ccccc4cc23,1 +Cc1ccc(cc1)C2(CC2C(=O)Nc3ccc(cc3O)[N+](=O)[O-])c4ccc(C)cc4,0 +Cc1cc(C)nc(N=C(N)Nc2ccccc2O)n1,0 +CC(C)(C)c1cc(cc(c1O)C(C)(C)C)N=NC(=O)c2ccncc2,0 +Oc1ccccc1Cn2c3ccccc3c4cc(ccc24)[N+](=O)[O-],0 +CC(C)(C)NC(=O)C1=CN(C=C(C(=O)NC2CCN(Cc3ccccc3)C2)C1=O)C4CCCCC4,0 +CC(CNC(=O)C1=CN(C=C(C(=O)NC(C)(C)C)C1=O)C2CCCCC2)c3ccccc3,0 diff --git a/test/unit_test.rb b/test/unit_test.rb index c5a4a62..f51e376 100644 --- a/test/unit_test.rb +++ b/test/unit_test.rb @@ -45,72 +45,136 @@ class ValidationTest < Test::Unit::TestCase @@subjectid = nil end - files = [ - File.new("data/hamster_carcinogenicity.mini.csv"), - File.new("data/EPAFHM.mini.csv") - ] - @@data = {} - files.each do |f| - d = ValidationExamples::Util.upload_dataset(f, @@subjectid) - @@data[d] = ValidationExamples::Util.prediction_feature_for_file(f) + files = { File.new("data/hamster_carcinogenicity.mini.csv") => :crossvalidation, + File.new("data/EPAFHM.mini.csv") => :crossvalidation, + File.new("data/hamster_carcinogenicity.csv") => :validation, + File.new("data/EPAFHM.csv") => :validation, +# File.new("data/StJudes-HepG2-testset_Class.csv") => :crossvalidation + } + @@data = [] + files.each do |file,type| + @@data << { :type => type, + :data => ValidationExamples::Util.upload_dataset(file, @@subjectid), + :feat => ValidationExamples::Util.prediction_feature_for_file(file), + :file => file} end end def global_teardown puts "delete and logout" - #OpenTox::Dataset.find(@@data,@@subjectid).delete(@@subjectid) if defined?@@data + @@data.each{|data| OpenTox::Dataset.find(data[:data],@@subjectid).delete(@@subjectid)} + @@vs.each{|v| v.delete(@@subjectid)} if defined?@@vs @@cvs.each{|cv| cv.delete(@@subjectid)} if defined?@@cvs @@reports.each{|report| report.delete(@@subjectid)} if defined?@@reports @@qmrfReports.each{|qmrfReport| qmrfReport.delete(@@subjectid)} if defined?@@qmrfReports OpenTox::Authorization.logout(@@subjectid) if AA_SERVER end - def test_crossvalidation + def test_training_test_split - #assert_rest_call_error OpenTox::NotFoundError do - # OpenTox::Crossvalidation.find(File.join(CONFIG[:services]["opentox-validation"],"crossvalidation/noexistingid")) - #end - @@cvs = [] - @@data.each do |data,feat| - puts "test_crossvalidation" - p = { - :dataset_uri => data, - :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), - :algorithm_params => "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"), - :prediction_feature => feat, - :num_folds => 2 } - t = OpenTox::SubTask.new(nil,0,1) - def t.progress(pct) - if !defined?@last_msg or @last_msg+3 data[:data], + :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), + :algorithm_params => "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"), + :prediction_feature => data[:feat], + :split_ratio => 0.99, + :random_seed => 2} + v = OpenTox::Validation.create_training_test_split(p, @@subjectid) + assert v.uri.uri? + if @@subjectid + assert_rest_call_error OpenTox::NotAuthorizedError do + OpenTox::Crossvalidation.find(v.uri) + end end + v = OpenTox::Validation.find(v.uri, @@subjectid) + assert v.uri.uri? + @@vs << v end - def t.waiting_for(task_uri); end - cv = OpenTox::Crossvalidation.create(p, @@subjectid, t) - assert cv.uri.uri? + end + end + + def test_validation_report + #@@cv = OpenTox::Crossvalidation.find("http://local-ot/validation/crossvalidation/48", @@subjectid) + + @@reports = [] unless defined?@@reports + @@vs.each do |v| + puts "test_validation_report" + assert defined?v,"no validation defined" + assert_kind_of OpenTox::Validation,v if @@subjectid assert_rest_call_error OpenTox::NotAuthorizedError do - OpenTox::Crossvalidation.find(cv.uri) + OpenTox::CrossvalidationReport.create(v.uri) end end - cv = OpenTox::Crossvalidation.find(cv.uri, @@subjectid) - assert cv.uri.uri? + report = OpenTox::ValidationReport.create(v.uri,@@subjectid) + assert report.uri.uri? if @@subjectid assert_rest_call_error OpenTox::NotAuthorizedError do - cv.summary(cv) + OpenTox::CrossvalidationReport.find(report.uri) + end + end + report = OpenTox::ValidationReport.find(report.uri,@@subjectid) + assert report.uri.uri? + report2 = OpenTox::ValidationReport.find_for_validation(v.uri,@@subjectid) + assert_equal report.uri,report2.uri + report3_uri = v.find_or_create_report(@@subjectid) + assert_equal report.uri,report3_uri + @@reports << report2 + end + end + + def test_crossvalidation + + #assert_rest_call_error OpenTox::NotFoundError do + # OpenTox::Crossvalidation.find(File.join(CONFIG[:services]["opentox-validation"],"crossvalidation/noexistingid")) + #end + @@cvs = [] + @@data.each do |data| + if data[:type]==:crossvalidation + puts "test_crossvalidation "+data[:file].path.to_s + p = { + :dataset_uri => data[:data], + :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), + :algorithm_params => "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"), + :prediction_feature => data[:feat], + :num_folds => 2 } + t = OpenTox::SubTask.new(nil,0,1) + def t.progress(pct) + if !defined?@last_msg or @last_msg+3 Date: Fri, 6 May 2011 19:47:16 +0200 Subject: fix report encoding problems --- report/report_application.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/report/report_application.rb b/report/report_application.rb index 5a47063..258daa7 100755 --- a/report/report_application.rb +++ b/report/report_application.rb @@ -82,6 +82,8 @@ get '/report/:type/:id' do report = rs.get_report(params[:type],params[:id],accept_header) format = Reports::ReportFormat.get_format(accept_header) content_type format + # default encoding is utf-8, html conversion produces iso-8859-1 encoding + content_type "text/html", 'charset' => 'ISO-8859-1' if format=="text/html" #PENDING: get_report should return file or string, check for result.is_file instead of format if format=="application/x-yaml" or format=="application/rdf+xml" report -- cgit v1.2.3 From 710976325cd0e23297e07c0a2f2460573287a49b Mon Sep 17 00:00:00 2001 From: mguetlein Date: Fri, 6 May 2011 20:03:50 +0200 Subject: unit test slightly modified --- test/unit_test.rb | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test/unit_test.rb b/test/unit_test.rb index f51e376..2462984 100644 --- a/test/unit_test.rb +++ b/test/unit_test.rb @@ -81,9 +81,17 @@ class ValidationTest < Test::Unit::TestCase :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), :algorithm_params => "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"), :prediction_feature => data[:feat], - :split_ratio => 0.99, + :split_ratio => 0.95, :random_seed => 2} - v = OpenTox::Validation.create_training_test_split(p, @@subjectid) + t = OpenTox::SubTask.new(nil,0,1) + def t.progress(pct) + if !defined?@last_msg or @last_msg+3 Date: Fri, 6 May 2011 20:05:04 +0200 Subject: validation utilizes/requires acceptValue for classification --- lib/ot_predictions.rb | 23 +++++++++++++++-------- lib/predictions.rb | 24 +++++++++++------------ report/report_content.rb | 14 +++++++------- report/validation_access.rb | 9 +++++++-- report/validation_data.rb | 46 +++++++++++++++++++++++---------------------- 5 files changed, 65 insertions(+), 51 deletions(-) diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index 5033425..1fd601c 100755 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -54,13 +54,20 @@ module Lib @compounds = test_dataset.compounds LOGGER.debug "test dataset size: "+@compounds.size.to_s raise "test dataset is empty "+test_dataset_uri.to_s unless @compounds.size>0 - class_values = feature_type=="classification" ? OpenTox::Feature.find(prediction_feature, subjectid).domain : nil + + if feature_type=="classification" + accept_values = test_target_dataset.features[prediction_feature][OT.acceptValue] + raise "'"+OT.acceptValue.to_s+"' missing/invalid for feature '"+prediction_feature.to_s+"' in dataset '"+ + test_target_dataset_uri.to_s+"', acceptValues are: '"+accept_values.inspect+"'" if accept_values==nil or accept_values.length<2 + else + accept_values=nil + end actual_values = [] @compounds.each do |c| case feature_type when "classification" - actual_values << classification_value(test_target_dataset, c, prediction_feature, class_values) + actual_values << classification_value(test_target_dataset, c, prediction_feature, accept_values) when "regression" actual_values << regression_value(test_target_dataset, c, prediction_feature) end @@ -108,7 +115,7 @@ module Lib case feature_type when "classification" # TODO: remove LAZAR_PREDICTION_DATASET_HACK - predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, class_values) + predicted_values << classification_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable, accept_values) when "regression" predicted_values << regression_value(prediction_dataset, c, no_prediction_feature ? nil : predicted_variable) end @@ -126,7 +133,7 @@ module Lib end task.progress(80) if task # loaded predicted values and confidence - super(predicted_values, actual_values, confidence_values, feature_type, class_values) + super(predicted_values, actual_values, confidence_values, feature_type, accept_values) raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size task.progress(100) if task # done with the mathmatics end @@ -143,11 +150,11 @@ module Lib end end - def classification_value(dataset, compound, feature, class_values) + def classification_value(dataset, compound, feature, accept_values) v = value(dataset, compound, feature) - i = class_values.index(v) - raise "illegal class_value of prediction (value is '"+v.to_s+"', class is '"+v.class.to_s+"'), possible values are "+ - class_values.inspect unless v==nil or i!=nil + i = accept_values.index(v.to_s) + raise "illegal class_value of prediction (value is '"+v.to_s+"'), accept values are "+ + accept_values.inspect unless v==nil or i!=nil i end diff --git a/lib/predictions.rb b/lib/predictions.rb index 5850024..db3c60c 100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -23,13 +23,13 @@ module Lib actual_values, confidence_values, feature_type, - class_domain=nil ) + accept_values=nil ) @predicted_values = predicted_values @actual_values = actual_values @confidence_values = confidence_values @feature_type = feature_type - @class_domain = class_domain + @accept_values = accept_values @num_classes = 1 #puts "predicted: "+predicted_values.inspect @@ -58,15 +58,15 @@ module Lib case @feature_type when "classification" - raise "class_domain missing while performing classification" unless @class_domain - @num_classes = @class_domain.size + raise "accept_values missing while performing classification" unless @accept_values + @num_classes = @accept_values.size raise "num classes < 2" if @num_classes<2 { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+ "has to be either nil or index of predicted-values" if v!=nil and (!v.is_a?(Numeric) or v<0 or v>@num_classes)} end when "regresssion" - raise "class_domain != nil while performing regression" if @class_domain + raise "accept_values != nil while performing regression" if @accept_values { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+ "has to be either nil or number" unless v==nil or v.is_a?(Numeric)} @@ -89,7 +89,7 @@ module Lib case @feature_type when "classification" @confusion_matrix = [] - @class_domain.each do |v| + @accept_values.each do |v| @confusion_matrix.push( Array.new( @num_classes, 0 ) ) end @@ -235,8 +235,8 @@ module Lib res = {} (0..@num_classes-1).each do |actual| (0..@num_classes-1).each do |predicted| - res[{:confusion_matrix_actual => @class_domain[actual], - :confusion_matrix_predicted => @class_domain[predicted]}] = @confusion_matrix[actual][predicted] + res[{:confusion_matrix_actual => @accept_values[actual], + :confusion_matrix_predicted => @accept_values[predicted]}] = @confusion_matrix[actual][predicted] end end return res @@ -495,7 +495,7 @@ module Lib raise "no confidence values" if @confidence_values==nil raise "no class-value specified" if class_value==nil - class_index = @class_domain.index(class_value) + class_index = @accept_values.index(class_value) raise "class not found "+class_value.to_s if class_index==nil c = []; p = []; a = [] @@ -529,7 +529,7 @@ module Lib def predicted_value(instance_index) case @feature_type when "classification" - @predicted_values[instance_index]==nil ? nil : @class_domain[@predicted_values[instance_index]] + @predicted_values[instance_index]==nil ? nil : @accept_values[@predicted_values[instance_index]] when "regression" @predicted_values[instance_index] end @@ -542,7 +542,7 @@ module Lib def actual_value(instance_index) case @feature_type when "classification" - @actual_values[instance_index]==nil ? nil : @class_domain[@actual_values[instance_index]] + @actual_values[instance_index]==nil ? nil : @accept_values[@actual_values[instance_index]] when "regression" @actual_values[instance_index] end @@ -576,7 +576,7 @@ module Lib def prediction_feature_value_map(proc) res = {} (0..@num_classes-1).each do |i| - res[@class_domain[i]] = proc.call(i) + res[@accept_values[i]] = proc.call(i) end return res end diff --git a/report/report_content.rb b/report/report_content.rb index 1345e6f..36f9955 100755 --- a/report/report_content.rb +++ b/report/report_content.rb @@ -189,9 +189,9 @@ class Reports::ReportContent end @xml_report.add_paragraph(section_roc, section_text) if section_text - class_domain = validation_set.get_class_domain - class_domain.size.times do |i| - class_value = class_domain[i] + accept_values = validation_set.get_accept_values + accept_values.size.times do |i| + class_value = accept_values[i] image_title = image_titles ? image_titles[i] : "ROC Plot for class-value '"+class_value.to_s+"'" image_caption = image_captions ? image_captions[i] : nil plot_file_name = "roc_plot"+@tmp_file_count.to_s+".svg" @@ -236,11 +236,11 @@ class Reports::ReportContent image_titles=nil, image_captions=nil) - class_domain = validation_set.get_domain_for_attr(rank_attribute) - puts "ranking plot for "+rank_attribute.to_s+", class values: "+class_domain.to_s + accept_values = validation_set.get_class_values_for(rank_attribute) + puts "ranking plot for "+rank_attribute.to_s+", class values: "+accept_values.to_s - class_domain.size.times do |i| - class_value = class_domain[i] + accept_values.size.times do |i| + class_value = accept_values[i] if image_titles image_title = image_titles[i] else diff --git a/report/validation_access.rb b/report/validation_access.rb index 96dfbf3..22c7146 100755 --- a/report/validation_access.rb +++ b/report/validation_access.rb @@ -74,8 +74,13 @@ class Reports::ValidationDB validation.predicted_variable, subjectid, task) end - def get_class_domain( validation ) - OpenTox::Feature.new( validation.prediction_feature ).domain + def get_accept_values( validation ) + # PENDING So far, one has to load the whole dataset to get the accept_value from ambit + d = OpenTox::Dataset.find( validation.test_target_dataset_uri ) + accept_values = d.features[validation.prediction_feature][OT.acceptValue] + raise "cannot get accept values from dataset "+validation.test_target_dataset_uri.to_s+" for feature "+ + validation.prediction_feature+":\n"+d.features[validation.prediction_feature].to_yaml unless accept_values!=nil + accept_values end def feature_type( validation, subjectid=nil ) diff --git a/report/validation_data.rb b/report/validation_data.rb index 9212c98..fcb8fc0 100755 --- a/report/validation_data.rb +++ b/report/validation_data.rb @@ -104,9 +104,9 @@ module Reports # returns the predictions feature values (i.e. the domain of the class attribute) # - def get_class_domain() - @class_domain = @@validation_access.get_class_domain(self) unless @class_domain - @class_domain + def get_accept_values() + @accept_values = @@validation_access.get_accept_values(self) unless @accept_values + @accept_values end # is classification/regression validation? cache to save rest-calls @@ -233,7 +233,7 @@ module Reports # def get_true_prediction_feature_value # if all_classification? -# class_values = get_class_domain +# class_values = get_accept_values # if class_values.size == 2 # (0..1).each do |i| # return class_values[i] if (class_values[i].to_s.downcase == "true" || class_values[i].to_s.downcase == "active") @@ -243,21 +243,23 @@ module Reports # return nil # end - def get_class_domain( ) - return unique_value("get_class_domain") + def get_accept_values( ) + return unique_value("get_accept_values") end - def get_domain_for_attr( attribute ) - class_domain = get_class_domain() - if Validation::Validation.classification_property?(attribute) and - !Validation::Validation.depends_on_class_value?(attribute) - [ nil ] - elsif Validation::Validation.classification_property?(attribute) and - class_domain.size==2 and - Validation::Validation.complement_exists?(attribute) - [ class_domain[0] ] + def get_accept_values_for_attr( attribute ) + if !Validation::Validation.classification_property?(attribute) + [] else - class_domain + accept_values = get_accept_values() + if !Validation::Validation.depends_on_class_value?(attribute) + [ nil ] + elsif accept_values.size==2 and + Validation::Validation.complement_exists?(attribute) + [ accept_values[0] ] + else + accept_values + end end end @@ -298,10 +300,10 @@ module Reports #puts col_values.inspect # get domain for classification attribute, i.e. ["true","false"] - class_domain = get_domain_for_attr(attribute_val) + accept_values = get_accept_values_for_attr(attribute_val) # or the attribute has a complementary value, i.e. true_positive_rate # -> domain is reduced to one class value - first_value_elem = (class_domain.size==1 && class_domain[0]!=nil) + first_value_elem = (accept_values.size==1 && accept_values[0]!=nil) cell_values = {} row_values.each do |row| @@ -311,7 +313,7 @@ module Reports if v.send(attribute_row)==row and v.send(attribute_col)==col raise "two validation have equal row and column values"if val!=nil val = v.send(attribute_val) - val = val[class_domain[0]] if first_value_elem + val = val[accept_values[0]] if first_value_elem val = val.to_nice_s end end @@ -357,13 +359,13 @@ module Reports else attribute_not_nil[index] = true if remove_nil_attributes - class_domain = get_domain_for_attr(a) + accept_values = get_accept_values_for_attr(a) # get domain for classification attribute, i.e. ["true","false"] - if class_domain.size==1 && class_domain[0]!=nil + if accept_values.size==1 && accept_values[0]!=nil # or the attribute has a complementary value, i.e. true_positive_rate # -> domain is reduced to one class value raise "illegal state, value for "+a.to_s+" is no hash: '"+val.to_s+"'" unless (val.is_a?(Hash)) - val = val[class_domain[0]] + val = val[accept_values[0]] end if variance -- cgit v1.2.3 From e09012e01aa865900184bee186933b11f6fa1d3f Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 9 May 2011 14:06:48 +0200 Subject: fix search via param functionality for validation, add corresponding unit-tests --- lib/ohm_util.rb | 39 ++++++++++++++++++++++++ report/environment.rb | 3 +- report/report_persistance.rb | 13 ++------ test/unit_test.rb | 58 +++++++++++++++++++++++++++++++----- validation/validation_application.rb | 14 ++------- 5 files changed, 97 insertions(+), 30 deletions(-) create mode 100644 lib/ohm_util.rb diff --git a/lib/ohm_util.rb b/lib/ohm_util.rb new file mode 100644 index 0000000..21d7978 --- /dev/null +++ b/lib/ohm_util.rb @@ -0,0 +1,39 @@ + +module Lib + module OhmUtil + + def self.check_params(model, params) + prop_names = model.attributes.collect{|p| p.to_s} + params.keys.each do |k| + key = k.to_s + if (key == "subjectid") + params.delete(k) + else + unless prop_names.include?(key) + key = key.from_rdf_format + unless prop_names.include?(key) + key = key+"_uri" + unless prop_names.include?(key) + key = key+"s" + unless prop_names.include?(key) + raise OpenTox::BadRequestError.new "no attribute found: '"+k.to_s+"'" + end + end + end + end + params[key.to_sym] = params.delete(k) + end + end + params + end + + def self.find(model, filter_params) + if (filter_params.size==0) + model.all + else + model.find(check_params(model,filter_params)) + end + end + + end +end \ No newline at end of file diff --git a/report/environment.rb b/report/environment.rb index aa8ab56..4729dc8 100755 --- a/report/environment.rb +++ b/report/environment.rb @@ -12,8 +12,7 @@ module Reports end require "lib/ot_predictions.rb" -#require "lib/active_record_setup.rb" -#require "lib/data_mapper_util.rb" +require "lib/ohm_util.rb" require "report/plot_factory.rb" require "report/xml_report.rb" diff --git a/report/report_persistance.rb b/report/report_persistance.rb index 9097fa3..113c81b 100755 --- a/report/report_persistance.rb +++ b/report/report_persistance.rb @@ -198,6 +198,8 @@ module Reports attribute :algorithm_uris index :report_type + index :validation_uris + index :crossvalidation_uris attr_accessor :subjectid @@ -250,16 +252,7 @@ module Reports def list_reports(type, filter_params={}) LOGGER.debug "find reports for params: "+filter_params.inspect - # unfortunately, datamapper does not allow searching in Objects - # do filtering for list = Object params manually - list_params = {} - [:validation_uris, :crossvalidation_uris, :algorithm_uris, :model_uris].each do |l| - list_params[l] = filter_params.delete(l) if filter_params.has_key?(l) - end - reports = ReportData.find( :report_type => type ) - list_params.each do |k,v| - reports = reports.collect{|x| x}.delete_if{ |r| !r.send(k).include?(v) } - end + reports = Lib::OhmUtil.find( ReportData, filter_params ) reports.collect{ |r| r.id } end diff --git a/test/unit_test.rb b/test/unit_test.rb index 2462984..5984f42 100644 --- a/test/unit_test.rb +++ b/test/unit_test.rb @@ -44,38 +44,55 @@ class ValidationTest < Test::Unit::TestCase puts "AA disabled" @@subjectid = nil end - + + @@data = [] files = { File.new("data/hamster_carcinogenicity.mini.csv") => :crossvalidation, File.new("data/EPAFHM.mini.csv") => :crossvalidation, File.new("data/hamster_carcinogenicity.csv") => :validation, File.new("data/EPAFHM.csv") => :validation, # File.new("data/StJudes-HepG2-testset_Class.csv") => :crossvalidation } - @@data = [] files.each do |file,type| @@data << { :type => type, :data => ValidationExamples::Util.upload_dataset(file, @@subjectid), :feat => ValidationExamples::Util.prediction_feature_for_file(file), - :file => file} + :info => file.path, :delete => true} end +# @@data << { :type => :crossvalidation, +# :data => "http://apps.ideaconsult.net:8080/ambit2/dataset/9?max=50", +# :feat => "http://apps.ideaconsult.net:8080/ambit2/feature/21573", +# :info => "http://apps.ideaconsult.net:8080/ambit2/dataset/9?max=50" } +# @@data << { :type => :validation, +# :data => "http://apps.ideaconsult.net:8080/ambit2/dataset/272?max=50", +# :feat => "http://apps.ideaconsult.net:8080/ambit2/feature/26221", +# :info => "http://apps.ideaconsult.net:8080/ambit2/dataset/272?max=50" } end def global_teardown puts "delete and logout" - @@data.each{|data| OpenTox::Dataset.find(data[:data],@@subjectid).delete(@@subjectid)} + @@data.each{|data| OpenTox::Dataset.find(data[:data],@@subjectid).delete(@@subjectid) if data[:delete]} @@vs.each{|v| v.delete(@@subjectid)} if defined?@@vs @@cvs.each{|cv| cv.delete(@@subjectid)} if defined?@@cvs @@reports.each{|report| report.delete(@@subjectid)} if defined?@@reports @@qmrfReports.each{|qmrfReport| qmrfReport.delete(@@subjectid)} if defined?@@qmrfReports OpenTox::Authorization.logout(@@subjectid) if AA_SERVER end + + def test_validation_list + puts "test_validation_list" + list = OpenTox::Validation.list + assert list.is_a?(Array) + list.each do |l| + assert l.uri? + end + end def test_training_test_split @@vs = [] @@data.each do |data| if data[:type]==:validation - puts "test_training_test_split "+data[:file].path.to_s + puts "test_training_test_split "+data[:info].to_s p = { :dataset_uri => data[:data], :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), @@ -100,6 +117,11 @@ class ValidationTest < Test::Unit::TestCase end v = OpenTox::Validation.find(v.uri, @@subjectid) assert v.uri.uri? + + model = v.metadata[OT.model] + assert model.uri? + v_list = OpenTox::Validation.list( {:model => model} ) + assert v_list.size==1 and v_list.include?(v.uri) @@vs << v end end @@ -118,6 +140,8 @@ class ValidationTest < Test::Unit::TestCase OpenTox::CrossvalidationReport.create(v.uri) end end + report = OpenTox::ValidationReport.find_for_validation(v.uri,@@subjectid) + assert report==nil,"report already exists for validation\nreport: "+(report ? report.uri.to_s : "")+"\nvalidation: "+v.uri.to_s report = OpenTox::ValidationReport.create(v.uri,@@subjectid) assert report.uri.uri? if @@subjectid @@ -134,7 +158,16 @@ class ValidationTest < Test::Unit::TestCase @@reports << report2 end end - + + def test_crossvalidation_list + puts "test_crossvalidation_list" + list = OpenTox::Crossvalidation.list + assert list.is_a?(Array) + list.each do |l| + assert l.uri? + end + end + def test_crossvalidation #assert_rest_call_error OpenTox::NotFoundError do @@ -143,7 +176,7 @@ class ValidationTest < Test::Unit::TestCase @@cvs = [] @@data.each do |data| if data[:type]==:crossvalidation - puts "test_crossvalidation "+data[:file].path.to_s + puts "test_crossvalidation "+data[:info].to_s p = { :dataset_uri => data[:data], :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), @@ -174,6 +207,16 @@ class ValidationTest < Test::Unit::TestCase end summary = cv.summary(@@subjectid) assert_kind_of Hash,summary + + algorithm = cv.metadata[OT.algorithm] + assert algorithm.uri? + cv_list = OpenTox::Crossvalidation.list( {:algorithm => algorithm} ) + assert cv_list.include?(cv.uri) + cv_list.each do |cv_uri| + alg = OpenTox::Crossvalidation.find(cv_uri).metadata[OT.algorithm] + assert alg==algorithm,"wrong algorithm for filtered crossvalidation, should be: '"+algorithm.to_s+"', is: '"+alg.to_s+"'" + end + @@cvs << cv end end @@ -195,6 +238,7 @@ class ValidationTest < Test::Unit::TestCase OpenTox::CrossvalidationReport.create(cv.uri) end end + assert OpenTox::ValidationReport.find_for_validation(cv.uri,@@subjectid)==nil report = OpenTox::CrossvalidationReport.create(cv.uri,@@subjectid) assert report.uri.uri? if @@subjectid diff --git a/validation/validation_application.rb b/validation/validation_application.rb index 4ef05a9..2bd2fea 100755 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -9,11 +9,7 @@ require 'validation/validation_service.rb' get '/crossvalidation/?' do LOGGER.info "list all crossvalidations" - #uri_list = Validation::Crossvalidation.all.collect{ |cv| cv.crossvalidation_uri }.join("\n")+"\n" - #uri_list = Lib::DataMapperUtil.all(Validation::Crossvalidation,params).collect{ |cv| cv.crossvalidation_uri }.join("\n")+"\n" - uri_list = Validation::Crossvalidation.all.collect{|cv| cv.crossvalidation_uri}.join("\n") + "\n" - - #uri_list = Validation::Crossvalidation.find_like(params).collect{ |cv| cv.crossvalidation_uri }.join("\n")+"\n" + uri_list = Lib::OhmUtil.find( Validation::Crossvalidation, params ).collect{|v| v.crossvalidation_uri}.join("\n") + "\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = "Single validations: "+url_for("/",:full)+"\n"+ @@ -134,7 +130,7 @@ get '/crossvalidation/:id/statistics' do # LOGGER.debug vals.collect{|v| v.validation_uri}.join("\n") # LOGGER.debug vals.size # LOGGER.debug vals.class - + raise "could not load all validations for crossvalidation" if vals.include?(nil) v = Lib::MergeObjects.merge_array_objects( vals ) v.created_at = nil #v.id = nil @@ -213,11 +209,7 @@ end get '/?' do LOGGER.info "list all validations, params: "+params.inspect - #uri_list = Validation::Validation.find_like(params).collect{ |v| v.validation_uri }.join("\n")+"\n" - #uri_list = Validation::Validation.all(params).collect{ |v| v.validation_uri }.join("\n")+"\n" - #uri_list = Lib::DataMapperUtil.all(Validation::Validation,params).collect{ |v| v.validation_uri }.join("\n")+"\n" - uri_list = Validation::Validation.all.collect{|v| v.validation_uri}.join("\n") + "\n" - + uri_list = Lib::OhmUtil.find( Validation::Validation, params ).collect{|v| v.validation_uri}.join("\n") + "\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = "To perform a validation:\n"+ -- cgit v1.2.3 From d798101491309cd65b0d49ed2993d439a74d7c5e Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 9 May 2011 16:31:29 +0200 Subject: remove unit tests from validation repository (move to test) --- lib/test_util.rb | 76 ------------- test/unit_test.rb | 321 ------------------------------------------------------ 2 files changed, 397 deletions(-) delete mode 100755 lib/test_util.rb delete mode 100644 test/unit_test.rb diff --git a/lib/test_util.rb b/lib/test_util.rb deleted file mode 100755 index 590d295..0000000 --- a/lib/test_util.rb +++ /dev/null @@ -1,76 +0,0 @@ - -require 'test/unit' - -module Lib - # test utitily, to be included rack unit tests - module TestUtil - - def wait_for_task(uri) - return TestUtil.wait_for_task(uri) - end - - def self.wait_for_task(uri) - if uri.task_uri? - task = OpenTox::Task.find(uri) - task.wait_for_completion - #raise "task failed: "+uri.to_s+", error is:\n"+task.description if task.error? - LOGGER.error "task failed :\n"+task.to_yaml if task.error? - uri = task.result_uri - end - return uri - end - - # updloads a dataset - def upload_data(ws, file) - - case file.path - when /yaml$/ - type = "application/x-yaml" - when /owl$/ - type = "application/rdf+xml" - else - raise "unknown type for file: "+file.path.to_s - end - - data = File.read(file.path) - task_uri = RestClient.post ws, data, :content_type => type - data_uri = task_uri.body - puts "done: "+data_uri.to_s - add_resource(data_uri) - return data_uri - end - - # adds a resource to delete it later on - def add_resource(res) - @to_delete = [] unless @to_delete - @to_delete.push(res) - end - - # deletes all resources - def delete_resources - if @to_delete - @to_delete.each do |d| - puts "deleting "+d.to_s - if d.to_s =~ /^http.*/ - ext("curl -X DELETE "+d.to_s) - else - delete d.to_s - end - end - end - end - - # execute an external program like curl - def ext(call, indent=" ") - response = "" - IO.popen(call.to_s+" 2> /dev/null") do |f| - while line = f.gets - response += indent.to_s+line - end - end - assert $?==0, "returns error "+call+" "+response - return response - end - - end -end diff --git a/test/unit_test.rb b/test/unit_test.rb deleted file mode 100644 index 5984f42..0000000 --- a/test/unit_test.rb +++ /dev/null @@ -1,321 +0,0 @@ -#TEST_USER = "mgtest" -#TEST_PW = "mgpasswd" -ENV['RACK_ENV'] = 'production' - -require "rubygems" -require "sinatra" -require "uri" -require "yaml" -require 'application.rb' -require 'test/unit' -require 'rack/test' -require 'lib/test_util.rb' -require 'test/test_examples.rb' - -TEST_USER = "guest" -TEST_PW = "guest" - -#LOGGER = OTLogger.new(STDOUT) -#LOGGER.datetime_format = "%Y-%m-%d %H:%M:%S " -#LOGGER.formatter = Logger::Formatter.new - -module Sinatra - set :raise_errors, false - set :show_exceptions, false -end - -class Exception - def message - errorCause ? errorCause.to_yaml : to_s - end -end - -class ValidationTest < Test::Unit::TestCase - include Rack::Test::Methods - include Lib::TestUtil - - def global_setup - puts "login and upload datasets" - if AA_SERVER - @@subjectid = OpenTox::Authorization.authenticate(TEST_USER,TEST_PW) - raise "could not log in" unless @@subjectid - puts "logged in: "+@@subjectid.to_s - else - puts "AA disabled" - @@subjectid = nil - end - - @@data = [] - files = { File.new("data/hamster_carcinogenicity.mini.csv") => :crossvalidation, - File.new("data/EPAFHM.mini.csv") => :crossvalidation, - File.new("data/hamster_carcinogenicity.csv") => :validation, - File.new("data/EPAFHM.csv") => :validation, -# File.new("data/StJudes-HepG2-testset_Class.csv") => :crossvalidation - } - files.each do |file,type| - @@data << { :type => type, - :data => ValidationExamples::Util.upload_dataset(file, @@subjectid), - :feat => ValidationExamples::Util.prediction_feature_for_file(file), - :info => file.path, :delete => true} - end -# @@data << { :type => :crossvalidation, -# :data => "http://apps.ideaconsult.net:8080/ambit2/dataset/9?max=50", -# :feat => "http://apps.ideaconsult.net:8080/ambit2/feature/21573", -# :info => "http://apps.ideaconsult.net:8080/ambit2/dataset/9?max=50" } -# @@data << { :type => :validation, -# :data => "http://apps.ideaconsult.net:8080/ambit2/dataset/272?max=50", -# :feat => "http://apps.ideaconsult.net:8080/ambit2/feature/26221", -# :info => "http://apps.ideaconsult.net:8080/ambit2/dataset/272?max=50" } - end - - def global_teardown - puts "delete and logout" - @@data.each{|data| OpenTox::Dataset.find(data[:data],@@subjectid).delete(@@subjectid) if data[:delete]} - @@vs.each{|v| v.delete(@@subjectid)} if defined?@@vs - @@cvs.each{|cv| cv.delete(@@subjectid)} if defined?@@cvs - @@reports.each{|report| report.delete(@@subjectid)} if defined?@@reports - @@qmrfReports.each{|qmrfReport| qmrfReport.delete(@@subjectid)} if defined?@@qmrfReports - OpenTox::Authorization.logout(@@subjectid) if AA_SERVER - end - - def test_validation_list - puts "test_validation_list" - list = OpenTox::Validation.list - assert list.is_a?(Array) - list.each do |l| - assert l.uri? - end - end - - def test_training_test_split - - @@vs = [] - @@data.each do |data| - if data[:type]==:validation - puts "test_training_test_split "+data[:info].to_s - p = { - :dataset_uri => data[:data], - :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), - :algorithm_params => "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"), - :prediction_feature => data[:feat], - :split_ratio => 0.95, - :random_seed => 2} - t = OpenTox::SubTask.new(nil,0,1) - def t.progress(pct) - if !defined?@last_msg or @last_msg+3 model} ) - assert v_list.size==1 and v_list.include?(v.uri) - @@vs << v - end - end - end - - def test_validation_report - #@@cv = OpenTox::Crossvalidation.find("http://local-ot/validation/crossvalidation/48", @@subjectid) - - @@reports = [] unless defined?@@reports - @@vs.each do |v| - puts "test_validation_report" - assert defined?v,"no validation defined" - assert_kind_of OpenTox::Validation,v - if @@subjectid - assert_rest_call_error OpenTox::NotAuthorizedError do - OpenTox::CrossvalidationReport.create(v.uri) - end - end - report = OpenTox::ValidationReport.find_for_validation(v.uri,@@subjectid) - assert report==nil,"report already exists for validation\nreport: "+(report ? report.uri.to_s : "")+"\nvalidation: "+v.uri.to_s - report = OpenTox::ValidationReport.create(v.uri,@@subjectid) - assert report.uri.uri? - if @@subjectid - assert_rest_call_error OpenTox::NotAuthorizedError do - OpenTox::CrossvalidationReport.find(report.uri) - end - end - report = OpenTox::ValidationReport.find(report.uri,@@subjectid) - assert report.uri.uri? - report2 = OpenTox::ValidationReport.find_for_validation(v.uri,@@subjectid) - assert_equal report.uri,report2.uri - report3_uri = v.find_or_create_report(@@subjectid) - assert_equal report.uri,report3_uri - @@reports << report2 - end - end - - def test_crossvalidation_list - puts "test_crossvalidation_list" - list = OpenTox::Crossvalidation.list - assert list.is_a?(Array) - list.each do |l| - assert l.uri? - end - end - - def test_crossvalidation - - #assert_rest_call_error OpenTox::NotFoundError do - # OpenTox::Crossvalidation.find(File.join(CONFIG[:services]["opentox-validation"],"crossvalidation/noexistingid")) - #end - @@cvs = [] - @@data.each do |data| - if data[:type]==:crossvalidation - puts "test_crossvalidation "+data[:info].to_s - p = { - :dataset_uri => data[:data], - :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), - :algorithm_params => "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"), - :prediction_feature => data[:feat], - :num_folds => 2 } - t = OpenTox::SubTask.new(nil,0,1) - def t.progress(pct) - if !defined?@last_msg or @last_msg+3 algorithm} ) - assert cv_list.include?(cv.uri) - cv_list.each do |cv_uri| - alg = OpenTox::Crossvalidation.find(cv_uri).metadata[OT.algorithm] - assert alg==algorithm,"wrong algorithm for filtered crossvalidation, should be: '"+algorithm.to_s+"', is: '"+alg.to_s+"'" - end - - @@cvs << cv - end - end - end - - def test_crossvalidation_report - #@@cv = OpenTox::Crossvalidation.find("http://local-ot/validation/crossvalidation/48", @@subjectid) - - @@reports = [] unless defined?@@reports - @@cvs.each do |cv| - puts "test_crossvalidation_report" - assert defined?cv,"no crossvalidation defined" - assert_kind_of OpenTox::Crossvalidation,cv - #assert_rest_call_error OpenTox::NotFoundError do - # OpenTox::CrossvalidationReport.find_for_crossvalidation(cv.uri) - #end - if @@subjectid - assert_rest_call_error OpenTox::NotAuthorizedError do - OpenTox::CrossvalidationReport.create(cv.uri) - end - end - assert OpenTox::ValidationReport.find_for_validation(cv.uri,@@subjectid)==nil - report = OpenTox::CrossvalidationReport.create(cv.uri,@@subjectid) - assert report.uri.uri? - if @@subjectid - assert_rest_call_error OpenTox::NotAuthorizedError do - OpenTox::CrossvalidationReport.find(report.uri) - end - end - report = OpenTox::CrossvalidationReport.find(report.uri,@@subjectid) - assert report.uri.uri? - report2 = OpenTox::CrossvalidationReport.find_for_crossvalidation(cv.uri,@@subjectid) - assert_equal report.uri,report2.uri - report3_uri = cv.find_or_create_report(@@subjectid) - assert_equal report.uri,report3_uri - @@reports << report2 - end - end - - def test_qmrf_report - #@@cv = OpenTox::Crossvalidation.find("http://local-ot/validation/crossvalidation/13", @@subjectid) - - @@qmrfReports = [] - @@cvs.each do |cv| - puts "test_qmrf_report" - assert defined?cv,"no crossvalidation defined" - validations = cv.metadata[OT.validation] - assert_kind_of Array,validations - assert validations.size==cv.metadata[OT.numFolds].to_i,validations.size.to_s+"!="+cv.metadata[OT.numFolds].to_s - val = OpenTox::Validation.find(validations[0], @@subjectid) - model_uri = val.metadata[OT.model] - - model = OpenTox::Model::Generic.find(model_uri, @@subjectid) - assert model!=nil - - #assert_rest_call_error OpenTox::NotFoundError do - # OpenTox::QMRFReport.find_for_model(model_uri, @@subjectid) - #end - - @@qmrfReports << OpenTox::QMRFReport.create(model_uri, @@subjectid) - end - end - - ################### utils and overrides ########################## - - def app - Sinatra::Application - end - - # checks RestCallError type - def assert_rest_call_error( ex ) - if ex==OpenTox::NotAuthorizedError and @@subjectid==nil - puts "AA disabled: skipping test for not authorized" - return - end - begin - yield - rescue OpenTox::RestCallError => e - report = e.errorCause - while report.errorCause - report = report.errorCause - end - assert_equal report.errorType,ex.to_s - end - end - - # hack to have a global_setup and global_teardown - def teardown - if((@@expected_test_count-=1) == 0) - global_teardown - end - end - def setup - unless defined?@@expected_test_count - @@expected_test_count = (self.class.instance_methods.reject{|method| method[0..3] != 'test'}).length - global_setup - end - end - -end - - -- cgit v1.2.3 From 81609ef5a360e54f26d92236853ec0121fed0d42 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 9 May 2011 16:48:10 +0200 Subject: restore test_util.rb (accidently removed with unit-tests) --- lib/test_util.rb | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100755 lib/test_util.rb diff --git a/lib/test_util.rb b/lib/test_util.rb new file mode 100755 index 0000000..590d295 --- /dev/null +++ b/lib/test_util.rb @@ -0,0 +1,76 @@ + +require 'test/unit' + +module Lib + # test utitily, to be included rack unit tests + module TestUtil + + def wait_for_task(uri) + return TestUtil.wait_for_task(uri) + end + + def self.wait_for_task(uri) + if uri.task_uri? + task = OpenTox::Task.find(uri) + task.wait_for_completion + #raise "task failed: "+uri.to_s+", error is:\n"+task.description if task.error? + LOGGER.error "task failed :\n"+task.to_yaml if task.error? + uri = task.result_uri + end + return uri + end + + # updloads a dataset + def upload_data(ws, file) + + case file.path + when /yaml$/ + type = "application/x-yaml" + when /owl$/ + type = "application/rdf+xml" + else + raise "unknown type for file: "+file.path.to_s + end + + data = File.read(file.path) + task_uri = RestClient.post ws, data, :content_type => type + data_uri = task_uri.body + puts "done: "+data_uri.to_s + add_resource(data_uri) + return data_uri + end + + # adds a resource to delete it later on + def add_resource(res) + @to_delete = [] unless @to_delete + @to_delete.push(res) + end + + # deletes all resources + def delete_resources + if @to_delete + @to_delete.each do |d| + puts "deleting "+d.to_s + if d.to_s =~ /^http.*/ + ext("curl -X DELETE "+d.to_s) + else + delete d.to_s + end + end + end + end + + # execute an external program like curl + def ext(call, indent=" ") + response = "" + IO.popen(call.to_s+" 2> /dev/null") do |f| + while line = f.gets + response += indent.to_s+line + end + end + assert $?==0, "returns error "+call+" "+response + return response + end + + end +end -- cgit v1.2.3 From 4951f2181a7f2c9c6e04431ff244a6a528dc245a Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 10 May 2011 09:50:18 +0200 Subject: fix report created-at date --- report/report_content.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/report/report_content.rb b/report/report_content.rb index 36f9955..674bb13 100755 --- a/report/report_content.rb +++ b/report/report_content.rb @@ -8,7 +8,7 @@ class Reports::ReportContent attr_accessor :xml_report, :tmp_files def initialize(title) - @xml_report = Reports::XMLReport.new(title, Time.now.strftime("Created at %m.%d.%Y - %H:%M")) + @xml_report = Reports::XMLReport.new(title, Time.now.strftime("Created at %d.%m.%Y - %H:%M")) @tmp_file_count = 0 @current_section = @xml_report.get_root_element end -- cgit v1.2.3 From b012dc2c3299a829e9fa1bc45b5134c90db451d0 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 10 May 2011 12:27:42 +0200 Subject: set date when creating validations/crossvalidtions/reports --- lib/validation_db.rb | 24 +++++++++++++----------- report/report_persistance.rb | 17 ++++++++--------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/lib/validation_db.rb b/lib/validation_db.rb index 0beb73d..e2595c5 100755 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -58,7 +58,7 @@ module Validation attribute :test_dataset_uri attribute :prediction_dataset_uri attribute :prediction_feature - attribute :created_at + attribute :date attribute :num_instances attribute :num_without_class attribute :num_unpredicted @@ -77,6 +77,11 @@ module Validation attr_accessor :subjectid + def self.create(params={}) + params[:date] = Time.new + super params + end + def classification_statistics YAML.load(self.classification_statistics_yaml) if self.classification_statistics_yaml end @@ -99,10 +104,6 @@ module Validation end public - def date - created_at - end - def validation_uri raise "no id" if self.id==nil $url_provider.url_for("/"+self.id.to_s, :full) @@ -130,7 +131,7 @@ module Validation attribute :algorithm_uri attribute :dataset_uri - attribute :created_at + attribute :date attribute :num_folds attribute :random_seed attribute :finished @@ -144,17 +145,18 @@ module Validation index :random_seed index :stratified index :finished - + + def self.create(params={}) + params[:date] = Time.new + super params + end + def save super OpenTox::Authorization.check_policy(crossvalidation_uri, subjectid) end public - def date - created_at - end - def crossvalidation_uri raise "no id" if self.id==nil $url_provider.url_for("/crossvalidation/"+self.id.to_s, :full) if self.id diff --git a/report/report_persistance.rb b/report/report_persistance.rb index 113c81b..c85ad68 100755 --- a/report/report_persistance.rb +++ b/report/report_persistance.rb @@ -191,7 +191,7 @@ module Reports class ReportData < Ohm::Model attribute :report_type - attribute :created_at + attribute :date attribute :validation_uris attribute :crossvalidation_uris attribute :model_uris @@ -203,16 +203,16 @@ module Reports attr_accessor :subjectid + def self.create(params={}) + params[:date] = Time.new + super params + end + def save super OpenTox::Authorization.check_policy(report_uri, subjectid) end - public - def date - created_at - end - def report_uri raise "no id" if self.id==nil Reports::ReportService.instance.get_uri(self.report_type, self.id) @@ -242,10 +242,9 @@ module Reports def new_report(report_content, type, meta_data, uri_provider, subjectid=nil) raise "report meta data missing" unless meta_data - report = ReportData.new(meta_data) + meta_data[:report_type] = type + report = ReportData.create(meta_data) report.subjectid = subjectid - report.report_type = type - report.save OpenTox::Authorization.check_policy(report.report_uri, subjectid) new_report_with_id(report_content, type, report.id) end -- cgit v1.2.3 From 53f313788af0530943809a383734698faa569397 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 10 May 2011 12:37:25 +0200 Subject: fix (replace remainding created_at with date) --- validation/validation_application.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validation/validation_application.rb b/validation/validation_application.rb index 2bd2fea..2617f7a 100755 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -132,7 +132,7 @@ get '/crossvalidation/:id/statistics' do # LOGGER.debug vals.class raise "could not load all validations for crossvalidation" if vals.include?(nil) v = Lib::MergeObjects.merge_array_objects( vals ) - v.created_at = nil + v.date = nil #v.id = nil case request.env['HTTP_ACCEPT'].to_s -- cgit v1.2.3 From 5f3202531526283919c0707f11dcf37370f90b25 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 10 May 2011 16:08:13 +0200 Subject: fix stratified validation --- validation/validation_service.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/validation/validation_service.rb b/validation/validation_service.rb index 93c167f..ce16213 100755 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -362,11 +362,11 @@ module Validation split_compounds = shuffled_compounds.chunk( self.num_folds.to_i ) else class_compounds = {} # "inactive" => compounds[], "active" => compounds[] .. - shuffled_compounds.each do |c| - orig_dataset.features(c).each do |a| - value = OpenTox::Feature.new(:uri => a.uri).value(prediction_feature).to_s - class_compounds[value] = [] unless class_compounds.has_key?(value) - class_compounds[value].push(c) + accept_values = orig_dataset.features[prediction_feature][OT.acceptValue] + accept_values.each do |value| + class_compounds[value] = [] + shuffled_compounds.each do |c| + class_compounds[value] << c if orig_dataset.data_entries[c][prediction_feature].to_s==value end end LOGGER.debug "stratified cv: different class values: "+class_compounds.keys.join(", ") @@ -374,7 +374,7 @@ module Validation split_class_compounds = [] # inactive_compounds[fold_i][], active_compounds[fold_i][], .. class_compounds.values.each do |compounds| - split_class_compounds.push( compounds.chunk( self.num_folds ) ) + split_class_compounds << compounds.chunk( self.num_folds.to_i ) end LOGGER.debug "stratified cv: splits for class values: "+split_class_compounds.collect{ |c| c.collect{ |cc| cc.size }.join("/") }.join(", ") @@ -385,7 +385,7 @@ module Validation # step 1: sort current split in ascending order split_comp.sort!{|x,y| x.size <=> y.size } # step 2: add splits - (0..self.num_folds-1).each do |i| + (0..self.num_folds.to_i-1).each do |i| unless split_compounds[i] split_compounds[i] = split_comp[i] else -- cgit v1.2.3 From 9436a6e0c6b8cfc0ebbc742fd6568c233a75006d Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 11 May 2011 10:13:46 +0200 Subject: fix copy-exisiting-datasets function (removing datamapper syntax) --- validation/validation_service.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/validation/validation_service.rb b/validation/validation_service.rb index ce16213..247cdb3 100755 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -324,7 +324,7 @@ module Validation cvs.each do |cv| next if AA_SERVER and !OpenTox::Authorization.authorized?(cv.crossvalidation_uri,"GET",self.subjectid) tmp_val = [] - Validation.all( :crossvalidation_id => cv.id ).each do |v| + Validation.find( :crossvalidation_id => cv.id ).each do |v| break unless v.prediction_feature == prediction_feature and OpenTox::Dataset.exist?(v.training_dataset_uri,self.subjectid) and @@ -340,7 +340,7 @@ module Validation :prediction_feature => prediction_feature, :algorithm_uri => self.algorithm_uri } end - if tmp_val.size == self.num_folds + if tmp_val.size == self.num_folds.to_i @tmp_validations = tmp_val LOGGER.debug "copied dataset uris from cv "+cv.crossvalidation_uri.to_s #+":\n"+tmp_val.inspect return true -- cgit v1.2.3 From 2f3bdab44bc49a65aa9843516c86337f26d4201d Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 11 May 2011 13:39:46 +0200 Subject: fix stratified cv for datasets with multiple compounds --- validation/validation_service.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/validation/validation_service.rb b/validation/validation_service.rb index 247cdb3..dcfb8d7 100755 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -366,7 +366,8 @@ module Validation accept_values.each do |value| class_compounds[value] = [] shuffled_compounds.each do |c| - class_compounds[value] << c if orig_dataset.data_entries[c][prediction_feature].to_s==value + #PENDING accept values are type string, data_entries may be boolean + class_compounds[value] << c if orig_dataset.data_entries[c][prediction_feature].collect{|v| v.to_s}.include?(value) end end LOGGER.debug "stratified cv: different class values: "+class_compounds.keys.join(", ") @@ -425,7 +426,8 @@ module Validation end raise "internal error, num test compounds not correct" unless (shuffled_compounds.size/self.num_folds.to_i - test_compounds.size).abs <= 1 - raise "internal error, num train compounds not correct" unless shuffled_compounds.size - test_compounds.size == train_compounds.size + raise "internal error, num train compounds not correct, should be '"+(shuffled_compounds.size-test_compounds.size).to_s+ + "', is '"+train_compounds.size.to_s+"'" unless shuffled_compounds.size - test_compounds.size == train_compounds.size LOGGER.debug "training set: "+datasetname+"_train, compounds: "+train_compounds.size.to_s #train_dataset_uri = orig_dataset.create_new_dataset( train_compounds, orig_dataset.features, datasetname + '_train', source ) -- cgit v1.2.3 From 05363717e6a2dd7fb768cd8a8fe8c0c5fda76931 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 11 May 2011 14:34:54 +0200 Subject: fix for crossvalidation//predictions (datamapper to redis syntax) --- validation/validation_application.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/validation/validation_application.rb b/validation/validation_application.rb index 2617f7a..4bcd07d 100755 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -187,8 +187,7 @@ get '/crossvalidation/:id/predictions' do raise OpenTox::BadRequestError.new "Crossvalidation '"+params[:id].to_s+"' not finished" unless crossvalidation.finished content_type "application/x-yaml" - #validations = Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ) - validations = Validation::Validation.all( :crossvalidation_id => params[:id] ) + validations = Validation::Validation.find( :crossvalidation_id => params[:id] ) p = Lib::OTPredictions.to_array( validations.collect{ |v| v.compute_validation_stats_with_model(nil, true) } ).to_yaml case request.env['HTTP_ACCEPT'].to_s -- cgit v1.2.3 From 221eee5338ea7512bc87ab1209bcf1b955325ec6 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 11 May 2011 15:35:03 +0200 Subject: fix: subjectid needed for loading acceptValue with a&a --- report/validation_access.rb | 4 ++-- report/validation_data.rb | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/report/validation_access.rb b/report/validation_access.rb index 22c7146..e9b6e19 100755 --- a/report/validation_access.rb +++ b/report/validation_access.rb @@ -74,9 +74,9 @@ class Reports::ValidationDB validation.predicted_variable, subjectid, task) end - def get_accept_values( validation ) + def get_accept_values( validation, subjectid=nil ) # PENDING So far, one has to load the whole dataset to get the accept_value from ambit - d = OpenTox::Dataset.find( validation.test_target_dataset_uri ) + d = OpenTox::Dataset.find( validation.test_target_dataset_uri, subjectid ) accept_values = d.features[validation.prediction_feature][OT.acceptValue] raise "cannot get accept values from dataset "+validation.test_target_dataset_uri.to_s+" for feature "+ validation.prediction_feature+":\n"+d.features[validation.prediction_feature].to_yaml unless accept_values!=nil diff --git a/report/validation_data.rb b/report/validation_data.rb index fcb8fc0..42b179b 100755 --- a/report/validation_data.rb +++ b/report/validation_data.rb @@ -105,7 +105,7 @@ module Reports # returns the predictions feature values (i.e. the domain of the class attribute) # def get_accept_values() - @accept_values = @@validation_access.get_accept_values(self) unless @accept_values + @accept_values = @@validation_access.get_accept_values(self, @subjectid) unless @accept_values @accept_values end -- cgit v1.2.3 From a730ae5a2a34ece618ae7b66c50b0d5aca7c9120 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 12 May 2011 10:20:48 +0200 Subject: fix report to html conversion timeout --- report/report_service.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/report/report_service.rb b/report/report_service.rb index 91eefe8..722c3d6 100644 --- a/report/report_service.rb +++ b/report/report_service.rb @@ -83,6 +83,9 @@ module Reports LOGGER.debug "report persisted with id: '"+id.to_s+"'" task.progress(100) if task + #HACK - format to html right after creation, as dynamically create html may cause deadlocks + get_report(type, id, "text/html") + return get_uri(type, id) end -- cgit v1.2.3 From c858faa20607125097a3ca36f9178c4b4076b071 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 12 May 2011 12:45:18 +0200 Subject: move to ruby-plot 0.0.3, use png instead of svg, use ?media for compound images --- lib/ohm_util.rb | 7 ++++--- lib/ot_predictions.rb | 2 +- report/environment.rb | 2 +- report/plot_factory.rb | 16 ++++++++-------- report/report_content.rb | 12 ++++++------ report/xml_report.rb | 2 +- 6 files changed, 21 insertions(+), 20 deletions(-) diff --git a/lib/ohm_util.rb b/lib/ohm_util.rb index 21d7978..856f9d2 100644 --- a/lib/ohm_util.rb +++ b/lib/ohm_util.rb @@ -6,7 +6,7 @@ module Lib prop_names = model.attributes.collect{|p| p.to_s} params.keys.each do |k| key = k.to_s - if (key == "subjectid") + if (key == "subjectid" || key == "media") params.delete(k) else unless prop_names.include?(key) @@ -28,10 +28,11 @@ module Lib end def self.find(model, filter_params) - if (filter_params.size==0) + params = check_params(model,filter_params) + if (params.size==0) model.all else - model.find(check_params(model,filter_params)) + model.find(params) end end diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index 1fd601c..22f9b20 100755 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -213,7 +213,7 @@ module Lib begin #a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+ # URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic - a << p.identifier(i)+"/image" + a << p.identifier(i)+"?media=image/png" rescue => ex raise ex #a.push("Could not add pic: "+ex.message) diff --git a/report/environment.rb b/report/environment.rb index 4729dc8..a149a1d 100755 --- a/report/environment.rb +++ b/report/environment.rb @@ -4,7 +4,7 @@ 'rexml/document', 'ruby-plot', 'opentox-ruby' ].each do |g| require g end -gem 'ruby-plot', '= 0.0.2' +gem 'ruby-plot', '= 0.0.3' #R.quit diff --git a/report/plot_factory.rb b/report/plot_factory.rb index 43c45fc..deb1880 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -161,7 +161,7 @@ module Reports end - def self.create_ranking_plot( svg_out_file, validation_set, compare_attribute, equal_attribute, rank_attribute, class_value=nil ) + def self.create_ranking_plot( out_file, validation_set, compare_attribute, equal_attribute, rank_attribute, class_value=nil ) #compute ranks #puts "rank attibute is "+rank_attribute.to_s @@ -184,14 +184,14 @@ module Reports ranks, nil, #0.1, validation_set.num_different_values(equal_attribute), - svg_out_file) + out_file) end protected - def self.plot_ranking( title, comparables_array, ranks_array, confidence = nil, numdatasets = nil, svg_out_file = nil ) + def self.plot_ranking( title, comparables_array, ranks_array, confidence = nil, numdatasets = nil, out_file = nil ) (confidence and numdatasets) ? conf = "-q "+confidence.to_s+" -k "+numdatasets.to_s : conf = "" - svg_out_file ? show = "-o" : show = "" + out_file ? show = "-o" : show = "" (title and title.length > 0) ? tit = '-t "'+title+'"' : tit = "" #title = "-t \""+ranking_value_prop+"-Ranking ("+comparables.size.to_s+" "+comparable_prop+"s, "+num_groups.to_s+" "+ranking_group_prop+"s, p < "+p.to_s+")\" " @@ -208,12 +208,12 @@ module Reports end raise "rank plot failed" unless $?==0 - if svg_out_file - f = File.new(svg_out_file, "w") + if out_file + f = File.new(out_file, "w") f.puts res end - svg_out_file ? svg_out_file : res + out_file ? out_file : res end def self.demo_ranking_plot @@ -264,7 +264,7 @@ module Reports :actual_values => [0, 1, 0, 0, 1, 1]} tp_fp_rates = get_tp_fp_rates(roc_values) data = { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] } - RubyPlot::plot_lines("/tmp/plot.svg", + RubyPlot::plot_lines("/tmp/plot.png", "ROC-Plot", "False positive rate", "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] ) diff --git a/report/report_content.rb b/report/report_content.rb index 674bb13..1e47cee 100755 --- a/report/report_content.rb +++ b/report/report_content.rb @@ -155,12 +155,12 @@ class Reports::ReportContent section_text += "\nWARNING: regression plot information not available for all validation results" if prediction_set.size!=validation_set.size @xml_report.add_paragraph(section_regr, section_text) if section_text - plot_file_name = "regr_plot"+@tmp_file_count.to_s+".svg" + plot_file_name = "regr_plot"+@tmp_file_count.to_s+".png" @tmp_file_count += 1 begin plot_file_path = add_tmp_file(plot_file_name) Reports::PlotFactory.create_regression_plot( plot_file_path, prediction_set, name_attribute ) - @xml_report.add_imagefigure(section_regr, image_title, plot_file_name, "SVG", 120, image_caption) + @xml_report.add_imagefigure(section_regr, image_title, plot_file_name, "PNG", 100, image_caption) rescue RuntimeError => ex LOGGER.error("Could not create regression plot: "+ex.message) rm_tmp_file(plot_file_name) @@ -194,12 +194,12 @@ class Reports::ReportContent class_value = accept_values[i] image_title = image_titles ? image_titles[i] : "ROC Plot for class-value '"+class_value.to_s+"'" image_caption = image_captions ? image_captions[i] : nil - plot_file_name = "roc_plot"+@tmp_file_count.to_s+".svg" + plot_file_name = "roc_plot"+@tmp_file_count.to_s+".png" @tmp_file_count += 1 begin plot_file_path = add_tmp_file(plot_file_name) Reports::PlotFactory.create_roc_plot( plot_file_path, prediction_set, class_value, split_set_attribute, false )#prediction_set.size>1 ) - @xml_report.add_imagefigure(section_roc, image_title, plot_file_name, "SVG", 120, image_caption) + @xml_report.add_imagefigure(section_roc, image_title, plot_file_name, "PNG", 100, image_caption) rescue RuntimeError => ex msg = "WARNING could not create roc plot for class value '"+class_value.to_s+"': "+ex.message LOGGER.error(msg) @@ -270,11 +270,11 @@ class Reports::ReportContent section_bar = @xml_report.add_section(@current_section, section_title) @xml_report.add_paragraph(section_bar, section_text) if section_text - plot_file_name = "bar_plot"+@tmp_file_count.to_s+".svg" + plot_file_name = "bar_plot"+@tmp_file_count.to_s+".png" @tmp_file_count += 1 plot_file_path = add_tmp_file(plot_file_name) Reports::PlotFactory.create_bar_plot(plot_file_path, validation_set, title_attribute, value_attributes ) - @xml_report.add_imagefigure(section_bar, image_title, plot_file_name, "SVG", 120, image_caption) + @xml_report.add_imagefigure(section_bar, image_title, plot_file_name, "PNG", 100, image_caption) end private diff --git a/report/xml_report.rb b/report/xml_report.rb index 4b9a11a..4fbfae3 100755 --- a/report/xml_report.rb +++ b/report/xml_report.rb @@ -184,7 +184,7 @@ module Reports row = Element.new("row") r.each do |v| entry = Element.new("entry") - if auto_link_urls && v.to_s =~ /depict/ || v.to_s =~ /image$/ #PENDING + if auto_link_urls && v.to_s =~ /depict/ || v.to_s =~ /image\/png$/ #PENDING add_image(entry, v.to_s) elsif auto_link_urls && v.to_s =~ /^http(s?):\/\// add_url(entry, v.to_s, v.to_s) -- cgit v1.2.3 From ac97115f25ca0e8ecc3ffb6baed3739353cadb6f Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 12 May 2011 13:09:17 +0200 Subject: rescue non-runtime error when plotting, more sensible error msg when no predictions for regression plot --- report/plot_factory.rb | 3 ++- report/report_content.rb | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/report/plot_factory.rb b/report/plot_factory.rb index deb1880..5fd20bb 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -77,7 +77,8 @@ module Reports x << x_i y << y_i end - + + raise "no predictions performed" if x.size==0 || x[0].size==0 RubyPlot::plot_points(out_file, "Regression plot", "Predicted values", "Actual values", names, x, y ) end diff --git a/report/report_content.rb b/report/report_content.rb index 1e47cee..6c8148e 100755 --- a/report/report_content.rb +++ b/report/report_content.rb @@ -161,7 +161,7 @@ class Reports::ReportContent plot_file_path = add_tmp_file(plot_file_name) Reports::PlotFactory.create_regression_plot( plot_file_path, prediction_set, name_attribute ) @xml_report.add_imagefigure(section_regr, image_title, plot_file_name, "PNG", 100, image_caption) - rescue RuntimeError => ex + rescue Exception => ex LOGGER.error("Could not create regression plot: "+ex.message) rm_tmp_file(plot_file_name) @xml_report.add_paragraph(section_regr, "could not create regression plot: "+ex.message) @@ -200,7 +200,7 @@ class Reports::ReportContent plot_file_path = add_tmp_file(plot_file_name) Reports::PlotFactory.create_roc_plot( plot_file_path, prediction_set, class_value, split_set_attribute, false )#prediction_set.size>1 ) @xml_report.add_imagefigure(section_roc, image_title, plot_file_name, "PNG", 100, image_caption) - rescue RuntimeError => ex + rescue Exception => ex msg = "WARNING could not create roc plot for class value '"+class_value.to_s+"': "+ex.message LOGGER.error(msg) rm_tmp_file(plot_file_name) -- cgit v1.2.3 From 5768eb36ee04727c5d88c0670b872b7621c1f59f Mon Sep 17 00:00:00 2001 From: mguetlein Date: Fri, 13 May 2011 08:59:31 +0200 Subject: reduce ruby-plot dependency to pessimistic version vonstraint (~>) --- report/environment.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/report/environment.rb b/report/environment.rb index a149a1d..19ea3a2 100755 --- a/report/environment.rb +++ b/report/environment.rb @@ -4,7 +4,7 @@ 'rexml/document', 'ruby-plot', 'opentox-ruby' ].each do |g| require g end -gem 'ruby-plot', '= 0.0.3' +gem 'ruby-plot', "~>0.3.0" #R.quit -- cgit v1.2.3 From 8afc018a179b254905f93ef8607338a7826baf4e Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 16 May 2011 14:02:33 +0200 Subject: fix mean_absolute_error computation --- lib/predictions.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/predictions.rb b/lib/predictions.rb index db3c60c..b40a8f1 100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -457,7 +457,7 @@ module Lib def mean_absolute_error return 0 if (@num_with_actual_value - @num_unpredicted)==0 - Math.sqrt(@sum_abs_error / (@num_with_actual_value - @num_unpredicted).to_f) + @sum_abs_error / (@num_with_actual_value - @num_unpredicted).to_f end def sum_squared_error -- cgit v1.2.3 From eb5f8b5da9b247d62abc8a7b9eb2e44fe46a1c79 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 16 May 2011 14:46:50 +0200 Subject: add confidence plots --- lib/predictions.rb | 24 +++++--- report/environment.rb | 2 +- report/plot_factory.rb | 147 ++++++++++++++++++++++++++++++++++++++++++++--- report/report_content.rb | 50 +++++++++++++++- report/report_factory.rb | 40 +++++++++++-- 5 files changed, 240 insertions(+), 23 deletions(-) diff --git a/lib/predictions.rb b/lib/predictions.rb index b40a8f1..420790e 100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -88,6 +88,14 @@ module Lib case @feature_type when "classification" + + # confusion-matrix will contain counts for predictions in a 2d array: + # index of first dim: actual values + # index of second dim: predicited values + # example: + # * summing up over all i with fixed n + # * confusion_matrix[i][n] + # * will give the number of instances that are predicted as n @confusion_matrix = [] @accept_values.each do |v| @confusion_matrix.push( Array.new( @num_classes, 0 ) ) @@ -289,8 +297,8 @@ module Lib def precision(class_index=nil) return prediction_feature_value_map( lambda{ |i| precision(i) } ) if class_index==nil - correct = 0 - total = 0 + correct = 0 # all instances with prediction class_index that are correctly classified + total = 0 # all instances with prediciton class_index (0..@num_classes-1).each do |i| correct += @confusion_matrix[i][class_index] if i == class_index total += @confusion_matrix[i][class_index] @@ -487,21 +495,21 @@ module Lib return @variance_actual end - # data for roc-plots ################################################################################### + # data for (roc-)plots ################################################################################### - def get_roc_values(class_value) + def get_prediction_values(class_value) #puts "get_roc_values for class_value: "+class_value.to_s raise "no confidence values" if @confidence_values==nil - raise "no class-value specified" if class_value==nil + #raise "no class-value specified" if class_value==nil - class_index = @accept_values.index(class_value) - raise "class not found "+class_value.to_s if class_index==nil + class_index = @accept_values.index(class_value) if class_value!=nil + raise "class not found "+class_value.to_s if (class_value!=nil && class_index==nil) c = []; p = []; a = [] (0..@predicted_values.size-1).each do |i| # NOTE: not predicted instances are ignored here - if @predicted_values[i]!=nil and @predicted_values[i]==class_index + if @predicted_values[i]!=nil and (class_index==nil || @predicted_values[i]==class_index) c << @confidence_values[i] p << @predicted_values[i] a << @actual_values[i] diff --git a/report/environment.rb b/report/environment.rb index 19ea3a2..59465aa 100755 --- a/report/environment.rb +++ b/report/environment.rb @@ -4,7 +4,7 @@ 'rexml/document', 'ruby-plot', 'opentox-ruby' ].each do |g| require g end -gem 'ruby-plot', "~>0.3.0" +gem 'ruby-plot', "~>0.4.0" #R.quit diff --git a/report/plot_factory.rb b/report/plot_factory.rb index 5fd20bb..a4e415a 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -79,7 +79,7 @@ module Reports end raise "no predictions performed" if x.size==0 || x[0].size==0 - RubyPlot::plot_points(out_file, "Regression plot", "Predicted values", "Actual values", names, x, y ) + RubyPlot::regression_point_plot(out_file, "Regression plot", "Predicted values", "Actual values", names, x, y ) end @@ -102,7 +102,7 @@ module Reports tp_rates = [] attribute_values.each do |value| begin - data = transform_predictions(validation_set.filter({split_set_attribute => value}), class_value, false) + data = transform_roc_predictions(validation_set.filter({split_set_attribute => value}), class_value, false) names << value.to_s fp_rates << data[:fp_rate][0] tp_rates << data[:tp_rate][0] @@ -112,11 +112,50 @@ module Reports end RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", names, fp_rates, tp_rates ) else - data = transform_predictions(validation_set, class_value, show_single_curves) + data = transform_roc_predictions(validation_set, class_value, show_single_curves) RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] ) end end + + def self.create_confidence_plot( out_file, validation_set, class_value, split_set_attribute=nil, show_single_curves=false ) + + LOGGER.debug "creating confidence plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_file.to_s + + if split_set_attribute + attribute_values = validation_set.get_values(split_set_attribute) + names = [] + confidence = [] + performance = [] + attribute_values.each do |value| + begin + data = transform_confidence_predictions(validation_set.filter({split_set_attribute => value}), class_value, false) + names << value.to_s + confidence << data[:confidence][0] + performance << data[:performance][0] + rescue + LOGGER.warn "could not create confidence plot for "+value.to_s + end + end + #RubyPlot::plot_lines(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, fp_rates, tp_rates ) + case validation_set.unique_feature_type + when "classification" + RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, confidence, performance) + when "regression" + RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", names, confidence, performance, true) + end + else + data = transform_confidence_predictions(validation_set, class_value, show_single_curves) + case validation_set.unique_feature_type + when "classification" + RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", data[:names], data[:confidence], data[:performance]) + when "regression" + RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", data[:names], data[:confidence], data[:performance], true) + end + end + end + + def self.create_bar_plot( out_file, validation_set, title_attribute, value_attributes ) LOGGER.debug "creating bar plot, out-file:"+out_file.to_s @@ -128,7 +167,7 @@ module Reports validation_set.validations.each do |v| values = [] value_attributes.each do |a| - validation_set.get_domain_for_attr(a).each do |class_value| + validation_set.get_accept_values_for_attr(a).each do |class_value| value = v.send(a) if value.is_a?(Hash) if class_value==nil @@ -222,7 +261,7 @@ module Reports end private - def self.transform_predictions(validation_set, class_value, add_single_folds=false) + def self.transform_roc_predictions(validation_set, class_value, add_single_folds=false) if (validation_set.size > 1) @@ -230,7 +269,7 @@ module Reports sum_roc_values = { :predicted_values => [], :actual_values => [], :confidence_values => []} (0..validation_set.size-1).each do |i| - roc_values = validation_set.get(i).get_predictions.get_roc_values(class_value) + roc_values = validation_set.get(i).get_predictions.get_prediction_values(class_value) sum_roc_values[:predicted_values] += roc_values[:predicted_values] sum_roc_values[:confidence_values] += roc_values[:confidence_values] sum_roc_values[:actual_values] += roc_values[:actual_values] @@ -253,12 +292,51 @@ module Reports faint << false return { :names => names, :fp_rate => fp_rate, :tp_rate => tp_rate, :faint => faint } else - roc_values = validation_set.validations[0].get_predictions.get_roc_values(class_value) + roc_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value) tp_fp_rates = get_tp_fp_rates(roc_values) return { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] } end end + def self.transform_confidence_predictions(validation_set, class_value, add_single_folds=false) + + if (validation_set.size > 1) + + names = []; performance = []; confidence = []; faint = [] + sum_confidence_values = { :predicted_values => [], :actual_values => [], :confidence_values => []} + + (0..validation_set.size-1).each do |i| + confidence_values = validation_set.get(i).get_predictions.get_prediction_values(class_value) + sum_confidence_values[:predicted_values] += confidence_values[:predicted_values] + sum_confidence_values[:confidence_values] += confidence_values[:confidence_values] + sum_confidence_values[:actual_values] += confidence_values[:actual_values] + + if add_single_folds + begin + pref_conf_rates = get_performance_confidence_rates(confidence_values) + names << "fold "+i.to_s + performance << pref_conf_rates[:performance] + confidence << pref_conf_rates[:confidence] + faint << true + rescue + LOGGER.warn "could not get confidence vals for fold "+i.to_s + end + end + end + pref_conf_rates = get_performance_confidence_rates(sum_confidence_values, validation_set.unique_feature_type) + names << nil # "all" + performance << pref_conf_rates[:performance] + confidence << pref_conf_rates[:confidence] + faint << false + return { :names => names, :performance => performance, :confidence => confidence, :faint => faint } + + else + confidence_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value) + pref_conf_rates = get_performance_confidence_rates(confidence_values, validation_set.unique_feature_type) + return { :names => ["default"], :performance => [pref_conf_rates[:performance]], :confidence => [pref_conf_rates[:confidence]] } + end + end + def self.demo_rock_plot roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6], :predicted_values => [1, 0, 0, 1, 0, 1], @@ -271,6 +349,61 @@ module Reports "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] ) end + def self.get_performance_confidence_rates(roc_values, feature_type) + + c = roc_values[:confidence_values] + p = roc_values[:predicted_values] + a = roc_values[:actual_values] + raise "no prediction values for roc-plot" if p.size==0 + + (0..p.size-2).each do |i| + ((i+1)..p.size-1).each do |j| + if c[i]0 && (c[i]>=conf[-1]-0.00001) + perf.pop + conf.pop + end + perf << correct/count.to_f * 100 + conf << c[i] + end + when "regression" + count = 0 + sum_squared_error = 0 + (0..p.size-1).each do |i| + count += 1 + sum_squared_error += (p[i]-a[i])**2 + if i>0 && (c[i]>=conf[-1]-0.00001) + perf.pop + conf.pop + end + perf << Math.sqrt(sum_squared_error/count.to_f) + conf << c[i] + end + end + #puts perf.inspect + + return {:performance => perf,:confidence => conf} + end + + def self.get_tp_fp_rates(roc_values) c = roc_values[:confidence_values] diff --git a/report/report_content.rb b/report/report_content.rb index 6c8148e..ca04f25 100755 --- a/report/report_content.rb +++ b/report/report_content.rb @@ -147,8 +147,8 @@ class Reports::ReportContent image_caption=nil) image_title = "Regression plot" unless image_title - - section_regr = @xml_report.add_section(@current_section, section_title) + #section_regr = @xml_report.add_section(@current_section, section_title) + section_regr = @current_section prediction_set = validation_set.collect{ |v| v.get_predictions } if prediction_set.size>0 @@ -178,7 +178,8 @@ class Reports::ReportContent image_titles=nil, image_captions=nil) - section_roc = @xml_report.add_section(@current_section, section_title) + #section_roc = @xml_report.add_section(@current_section, section_title) + section_roc = @current_section prediction_set = validation_set.collect{ |v| v.get_predictions && v.get_predictions.confidence_values_available? } if prediction_set.size>0 @@ -213,6 +214,49 @@ class Reports::ReportContent end + def add_confidence_plot( validation_set, + split_set_attribute = nil, + section_title="Confidence plots", + section_text=nil, + image_titles=nil, + image_captions=nil) + + #section_conf = @xml_report.add_section(@current_section, section_title) + section_conf = @current_section + prediction_set = validation_set.collect{ |v| v.get_predictions && v.get_predictions.confidence_values_available? } + + if prediction_set.size>0 + if prediction_set.size!=validation_set.size + section_text += "\nWARNING: plot information not available for all validation results" + LOGGER.error "WARNING: plot information not available for all validation results:\n"+ + "validation set size: "+validation_set.size.to_s+", prediction set size: "+prediction_set.size.to_s + end + @xml_report.add_paragraph(section_conf, section_text) if section_text + + image_title = image_titles ? image_titles[i] : "Percent Correct vs Confidence Plot" + image_caption = image_captions ? image_captions[i] : nil + plot_file_name = "conf_plot"+@tmp_file_count.to_s+".png" + @tmp_file_count += 1 + + begin + + plot_file_path = add_tmp_file(plot_file_name) + Reports::PlotFactory.create_confidence_plot( plot_file_path, prediction_set, nil, split_set_attribute, false ) + @xml_report.add_imagefigure(section_conf, image_title, plot_file_name, "PNG", 100, image_caption) + + rescue Exception => ex + msg = "WARNING could not create confidence plot: "+ex.message + LOGGER.error(msg) + rm_tmp_file(plot_file_name) + @xml_report.add_paragraph(section_conf, msg) + end + + else + @xml_report.add_paragraph(section_conf, "No prediction-confidence info for confidence plot available.") + end + + end + def add_ranking_plots( validation_set, compare_attribute, equal_attribute, diff --git a/report/report_factory.rb b/report/report_factory.rb index f48d11a..08d9418 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -68,11 +68,17 @@ module Reports::ReportFactory case val.feature_type when "classification" report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results") - report.add_roc_plot(validation_set) report.add_confusion_matrix(val) + report.add_section("Plots") + report.add_roc_plot(validation_set) + report.add_confidence_plot(validation_set) + report.end_section when "regression" report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_REGR, "Results", "Results") + report.add_section("Plots") report.add_regression_plot(validation_set, :model_uri) + report.add_confidence_plot(validation_set) + report.end_section end task.progress(90) if task @@ -104,14 +110,22 @@ module Reports::ReportFactory case validation_set.unique_feature_type when "classification" report.add_result(merged, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold],"Mean Results","Mean Results") - report.add_roc_plot(validation_set, nil, "ROC Plots over all folds") - report.add_roc_plot(validation_set, :crossvalidation_fold) report.add_confusion_matrix(merged.validations[0]) + report.add_section("Plots") + report.add_roc_plot(validation_set) + report.add_roc_plot(validation_set, :crossvalidation_fold) + report.add_confidence_plot(validation_set) + report.add_confidence_plot(validation_set, :crossvalidation_fold) + report.end_section report.add_result(validation_set, VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds], "Results","Results",nil,"validation") when "regression" report.add_result(merged, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results") + report.add_section("Plots") report.add_regression_plot(validation_set, :crossvalidation_fold) + report.add_confidence_plot(validation_set) + report.add_confidence_plot(validation_set, :crossvalidation_fold) + report.end_section report.add_result(validation_set, VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds], "Results","Results") end task.progress(90) if task @@ -194,7 +208,25 @@ module Reports::ReportFactory end when "regression" - raise OpenTox::BadRequestError.new("algorithm comparison for regression not yet implemented") + + attributes = VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold] + attributes = ([ :dataset_uri ] + attributes).uniq + + dataset_grouping.each do |validations| + + set = Reports::ValidationSet.create(validations) + + dataset = validations[0].dataset_uri + merged = set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri]) + merged.sort(:dataset_uri) + + report.add_section("Dataset: "+dataset) + report.add_result(merged,attributes, + "Mean Results","Mean Results",nil,"crossvalidation") + report.add_paired_ttest_table(set, :algorithm_uri, :r_square) + report.end_section + end + end task.progress(100) if task report -- cgit v1.2.3