summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Rakefile2
-rw-r--r--RankPlotter/RankPlotter.jarbin12309188 -> 12327740 bytes
-rw-r--r--db/migrate/001_init_validation.rb15
-rw-r--r--example.rb3
-rw-r--r--lib/active_record_setup.rb9
-rw-r--r--lib/ot_predictions.rb11
-rw-r--r--lib/predictions.rb67
-rw-r--r--lib/validation_db.rb57
-rw-r--r--nightly/nightly.rb8
-rw-r--r--reach_reports/reach_application.rb1
-rw-r--r--reach_reports/reach_service.rb117
-rw-r--r--reach_reports/reach_test.rb10
-rw-r--r--report/environment.rb2
-rw-r--r--report/plot_factory.rb153
-rw-r--r--report/report_application.rb14
-rw-r--r--report/report_content.rb259
-rw-r--r--report/report_factory.rb411
-rw-r--r--report/report_format.rb12
-rw-r--r--report/report_persistance.rb2
-rw-r--r--report/report_service.rb35
-rw-r--r--report/report_test.rb6
-rw-r--r--report/util.rb2
-rw-r--r--report/validation_access.rb32
-rw-r--r--report/validation_data.rb175
-rw-r--r--report/xml_report.rb25
-rw-r--r--saxonhe9-2-0-3j/doc/img/saxonica_logo.gifbin8603 -> 0 bytes
-rw-r--r--saxonhe9-2-0-3j/doc/index.html56
-rw-r--r--saxonhe9-2-0-3j/doc/saxondocs.css228
-rw-r--r--saxonhe9-2-0-3j/notices/APACHE-ANT.txt13
-rw-r--r--saxonhe9-2-0-3j/notices/APACHE-RESOLVER.txt9
-rw-r--r--saxonhe9-2-0-3j/notices/APACHE-XERCES.txt17
-rw-r--r--saxonhe9-2-0-3j/notices/CERN.txt7
-rw-r--r--saxonhe9-2-0-3j/notices/FRIJTERS.txt22
-rw-r--r--saxonhe9-2-0-3j/notices/GPL+CLASSPATH.txt347
-rw-r--r--saxonhe9-2-0-3j/notices/JAMESCLARK.txt25
-rw-r--r--saxonhe9-2-0-3j/notices/LEGAL.txt33
-rw-r--r--saxonhe9-2-0-3j/notices/LICENSE.txt15
-rw-r--r--saxonhe9-2-0-3j/notices/THAI.txt31
-rw-r--r--saxonhe9-2-0-3j/notices/UNICODE.txt29
-rw-r--r--saxonhe9-2-0-3j/saxon9he.jarbin5468048 -> 0 bytes
-rw-r--r--test/test_examples_util.rb1
-rw-r--r--validation/validation_application.rb73
-rw-r--r--validation/validation_service.rb95
-rw-r--r--validation/validation_test.rb22
44 files changed, 1020 insertions, 1431 deletions
diff --git a/Rakefile b/Rakefile
index 4225438..f764000 100644
--- a/Rakefile
+++ b/Rakefile
@@ -54,7 +54,7 @@ task :load_config do
puts "config loaded"
end
-# USER VERSION 0 instead
+# USE VERSION 0 instead
#desc "Clear database"
#task :clear_db => :load_config do
# if @@config[:database][:adapter]=="mysql"
diff --git a/RankPlotter/RankPlotter.jar b/RankPlotter/RankPlotter.jar
index 80d8e64..036c40a 100644
--- a/RankPlotter/RankPlotter.jar
+++ b/RankPlotter/RankPlotter.jar
Binary files differ
diff --git a/db/migrate/001_init_validation.rb b/db/migrate/001_init_validation.rb
index 93d8d2f..2189568 100644
--- a/db/migrate/001_init_validation.rb
+++ b/db/migrate/001_init_validation.rb
@@ -4,8 +4,7 @@ class InitValidation < ActiveRecord::Migration
create_table :crossvalidations do |t|
- [:crossvalidation_uri,
- :algorithm_uri,
+ [:algorithm_uri,
:dataset_uri ].each do |p|
t.column p, :string, :limit => 255
end
@@ -19,23 +18,21 @@ class InitValidation < ActiveRecord::Migration
t.column p, :integer, :null => false
end
- [ :stratified ].each do |p|
+ [ :stratified, :finished ].each do |p|
t.column p, :boolean, :null => false
end
-
end
create_table :validations do |t|
- [:validation_uri,
+ [:validation_type,
:model_uri,
:algorithm_uri,
:training_dataset_uri,
:test_target_dataset_uri,
:test_dataset_uri,
:prediction_dataset_uri,
- :prediction_feature,
- :crossvalidation_uri].each do |p|
+ :prediction_feature].each do |p|
t.column p, :string, :limit => 255
end
@@ -54,6 +51,10 @@ class InitValidation < ActiveRecord::Migration
[:classification_statistics, :regression_statistics].each do |p|
t.column(p, :text, :limit => 16320)
end
+
+ [ :finished ].each do |p|
+ t.column p, :boolean, :null => false
+ end
end
end
diff --git a/example.rb b/example.rb
index eb997c2..6417ae7 100644
--- a/example.rb
+++ b/example.rb
@@ -83,8 +83,7 @@ class Example
log "crossvalidation"
cv = Validation::Crossvalidation.new({ :dataset_uri => data_uri, :algorithm_uri => @@alg, :num_folds => 5, :stratified => false })
- cv.create_cv_datasets( URI.decode(@@feature) )
- cv.perform_cv( @@alg_params )
+ cv.perform_cv( URI.decode(@@feature), @@alg_params )
log "create validation report"
rep = Reports::ReportService.new(File.join(@@config[:services]["opentox-validation"],"report"))
diff --git a/lib/active_record_setup.rb b/lib/active_record_setup.rb
index cea4fb2..3682c7a 100644
--- a/lib/active_record_setup.rb
+++ b/lib/active_record_setup.rb
@@ -28,7 +28,14 @@ class ActiveRecord::Base
key = key+"_uri"
unless self.column_names.include?(key)
key = key+"s"
- $sinatra.halt 400,"no attribute found: '"+k.to_s+"'" unless self.column_names.include?(key)
+ unless self.column_names.include?(key)
+ err = "no attribute found: '"+k.to_s+"'"
+ if $sinatra
+ $sinatra.halt 400,err
+ else
+ raise err
+ end
+ end
end
end
end
diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb
index dcd7d09..0175a0c 100644
--- a/lib/ot_predictions.rb
+++ b/lib/ot_predictions.rb
@@ -15,7 +15,8 @@ module Lib
return @compounds[instance_index]
end
- def initialize(is_classification, test_dataset_uri, test_target_dataset_uri, prediction_feature, prediction_dataset_uri, predicted_variable)
+ def initialize(is_classification, test_dataset_uri, test_target_dataset_uri,
+ prediction_feature, prediction_dataset_uri, predicted_variable, task=nil)
LOGGER.debug("loading prediciton via test-dataset:'"+test_dataset_uri.to_s+
"', test-target-datset:'"+test_target_dataset_uri.to_s+
@@ -79,6 +80,7 @@ module Lib
actual_values.push value
end
end
+ task.progress(40) if task # loaded actual values
prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri
raise "prediction dataset not found: '"+prediction_dataset_uri.to_s+"'" unless prediction_dataset
@@ -118,9 +120,11 @@ module Lib
confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable)
end
end
+ task.progress(80) if task # loaded predicted values and confidence
super(predicted_values, actual_values, confidence_values, is_classification, class_values)
raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size
+ task.progress(100) if task # done with the mathmatics
end
@@ -128,7 +132,7 @@ module Lib
res = {}
if @is_classification
- (Lib::VAL_CLASS_PROPS_EXTENDED).each{ |s| res[s] = send(s)}
+ (Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)}
else
(Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) }
end
@@ -151,7 +155,8 @@ module Lib
a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+
URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic
rescue => ex
- a.push("Could not add pic: "+ex.message)
+ #a.push("Could not add pic: "+ex.message)
+ a.push(p.identifier(i))
end
a << (format ? p.actual_value(i).to_nice_s : p.actual_value(i))
diff --git a/lib/predictions.rb b/lib/predictions.rb
index 2873689..6e50e94 100644
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -23,13 +23,13 @@ module Lib
actual_values,
confidence_values,
is_classification,
- prediction_feature_values=nil )
+ class_domain=nil )
@predicted_values = predicted_values
@actual_values = actual_values
@confidence_values = confidence_values
@is_classification = is_classification
- @prediction_feature_values = prediction_feature_values
+ @class_domain = class_domain
@num_classes = 1
#puts "predicted: "+predicted_values.inspect
@@ -43,23 +43,27 @@ module Lib
raise "illegal num confidence values "+num_info if @confidence_values.size != @predicted_values.size
@confidence_values.each{ |c| raise "illegal confidence value: '"+c.to_s+"'" unless c==nil or (c.is_a?(Numeric) and c>=0 and c<=1) }
- conf_val_tmp = {}
- @confidence_values.each{ |c| conf_val_tmp[c] = nil }
- if conf_val_tmp.keys.size<2
- LOGGER.warn("prediction w/o confidence values");
- @confidence_values=nil
- end
+ ## check if there is more than one different conf value
+ ## DEPRECATED? not sure anymore what this was about,
+ ## I am pretty sure this was for r-plot of roc curves
+ ## roc curvers are now plotted manually
+ #conf_val_tmp = {}
+ #@confidence_values.each{ |c| conf_val_tmp[c] = nil }
+ #if conf_val_tmp.keys.size<2
+ # LOGGER.warn("prediction w/o confidence values");
+ # @confidence_values=nil
+ #end
if @is_classification
- raise "prediction_feature_values missing while performing classification" unless @prediction_feature_values
- @num_classes = @prediction_feature_values.size
+ raise "class_domain missing while performing classification" unless @class_domain
+ @num_classes = @class_domain.size
raise "num classes < 2" if @num_classes<2
{ "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+
"has to be either nil or index of predicted-values" if v!=nil and (v<0 or v>@num_classes)}
end
else
- raise "prediction_feature_values != nil while performing regression" if @prediction_feature_values
+ raise "class_domain != nil while performing regression" if @class_domain
{ "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+
"has to be either nil or number" unless v==nil or v.is_a?(Numeric)}
@@ -81,7 +85,7 @@ module Lib
if @is_classification
@confusion_matrix = []
- @prediction_feature_values.each do |v|
+ @class_domain.each do |v|
@confusion_matrix.push( Array.new( @num_classes, 0 ) )
end
@@ -209,31 +213,35 @@ module Lib
res = {}
(0..@num_classes-1).each do |actual|
(0..@num_classes-1).each do |predicted|
- res[{:confusion_matrix_actual => @prediction_feature_values[actual],
- :confusion_matrix_predicted => @prediction_feature_values[predicted]}] = @confusion_matrix[actual][predicted]
+ res[{:confusion_matrix_actual => @class_domain[actual],
+ :confusion_matrix_predicted => @class_domain[predicted]}] = @confusion_matrix[actual][predicted]
end
end
return res
end
def area_under_roc(class_index=nil)
- return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if class_index==nil
+ return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if
+ class_index==nil
return 0.0 if @confidence_values==nil
LOGGER.warn("TODO: implement approx computiation of AUC,"+
- "so far Wilcoxon-Man-Whitney is used (exponential)") if @predicted_values.size>1000
+ "so far Wilcoxon-Man-Whitney is used (exponential)") if
+ @predicted_values.size>1000
+ #puts "COMPUTING AUC "+class_index.to_s
tp_conf = []
fp_conf = []
(0..@predicted_values.size-1).each do |i|
if @predicted_values[i]==class_index
- if @actual_values[i]==class_index
+ if @actual_values[i]==@predicted_values[i]
tp_conf.push(@confidence_values[i])
else
fp_conf.push(@confidence_values[i])
end
end
end
+ #puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n"
return 0.0 if tp_conf.size == 0
return 1.0 if fp_conf.size == 0
@@ -241,9 +249,9 @@ module Lib
tp_conf.each do |tp|
fp_conf.each do |fp|
sum += 1 if tp>fp
+ sum += 0.5 if tp==fp
end
end
-
return sum / (tp_conf.size * fp_conf.size).to_f
end
@@ -460,21 +468,30 @@ module Lib
# data for roc-plots ###################################################################################
def get_roc_values(class_value)
+
+ #puts "get_roc_values for class_value: "+class_value.to_s
raise "no confidence values" if @confidence_values==nil
- class_index = @prediction_feature_values.index(class_value)
- raise "class not found "+class_value.to_s if class_index==nil and class_value!=nil
+ raise "no class-value specified" if class_value==nil
+
+ class_index = @class_domain.index(class_value)
+ raise "class not found "+class_value.to_s if class_index==nil
c = []; p = []; a = []
(0..@predicted_values.size-1).each do |i|
# NOTE: not predicted instances are ignored here
- if (@predicted_values[i]!=nil and (class_value==nil or @predicted_values[i]==class_index))
+ if @predicted_values[i]!=nil and @predicted_values[i]==class_index
c << @confidence_values[i]
p << @predicted_values[i]
a << @actual_values[i]
end
end
- return {:predicted_values => p, :actual_values => a, :confidence_values => c}
+ # DO NOT raise exception here, maybe different validations are concated
+ #raise "no instance predicted as '"+class_value+"'" if p.size == 0
+
+ h = {:predicted_values => p, :actual_values => a, :confidence_values => c}
+ #puts h.inspect
+ return h
end
########################################################################################
@@ -489,7 +506,7 @@ module Lib
def predicted_value(instance_index)
if @is_classification
- @predicted_values[instance_index]==nil ? nil : @prediction_feature_values[@predicted_values[instance_index]]
+ @predicted_values[instance_index]==nil ? nil : @class_domain[@predicted_values[instance_index]]
else
@predicted_values[instance_index]
end
@@ -501,7 +518,7 @@ module Lib
def actual_value(instance_index)
if @is_classification
- @actual_values[instance_index]==nil ? nil : @prediction_feature_values[@actual_values[instance_index]]
+ @actual_values[instance_index]==nil ? nil : @class_domain[@actual_values[instance_index]]
else
@actual_values[instance_index]
end
@@ -535,7 +552,7 @@ module Lib
def prediction_feature_value_map(proc)
res = {}
(0..@num_classes-1).each do |i|
- res[@prediction_feature_values[i]] = proc.call(i)
+ res[@class_domain[i]] = proc.call(i)
end
return res
end
diff --git a/lib/validation_db.rb b/lib/validation_db.rb
index 6de0bd4..cb3ece7 100644
--- a/lib/validation_db.rb
+++ b/lib/validation_db.rb
@@ -6,7 +6,7 @@ require "lib/merge.rb"
module Lib
- VAL_PROPS_GENERAL = [ :validation_uri, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature,
+ VAL_PROPS_GENERAL = [ :validation_uri, :validation_type, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature,
:test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :date ]
VAL_PROPS_SUM = [ :num_instances, :num_without_class, :num_unpredicted ]
VAL_PROPS_AVG = [:real_runtime, :percent_without_class, :percent_unpredicted ]
@@ -17,9 +17,11 @@ module Lib
# :classification_statistics
VAL_CLASS_PROPS_SINGLE_SUM = [ :num_correct, :num_incorrect, :confusion_matrix ]
- VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect, :weighted_area_under_roc ]
+ VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect,
+ :weighted_area_under_roc, :accuracy ]
VAL_CLASS_PROPS_SINGLE = VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_SINGLE_AVG
+
# :class_value_statistics
VAL_CLASS_PROPS_PER_CLASS_SUM = [ :num_false_positives, :num_false_negatives,
:num_true_positives, :num_true_negatives ]
@@ -32,7 +34,6 @@ module Lib
:true_negative_rate, :true_positive_rate ] #:precision, :recall,
VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS
- VAL_CLASS_PROPS_EXTENDED = VAL_CLASS_PROPS + [:accuracy]
# :regression_statistics
VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square,
@@ -41,20 +42,66 @@ module Lib
CROSS_VAL_PROPS = [:dataset_uri, :num_folds, :stratified, :random_seed]
CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :date] + CROSS_VAL_PROPS
- ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS_EXTENDED + VAL_REGR_PROPS + CROSS_VAL_PROPS
+ ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS + VAL_REGR_PROPS + CROSS_VAL_PROPS
VAL_MERGE_GENERAL = VAL_PROPS_GENERAL + VAL_CV_PROPS + [:classification_statistics, :regression_statistics] + CROSS_VAL_PROPS
VAL_MERGE_SUM = VAL_PROPS_SUM + VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_PER_CLASS_SUM
VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS
-
+
+
class Validation < ActiveRecord::Base
serialize :classification_statistics
serialize :regression_statistics
alias_attribute :date, :created_at
+
+ def validation_uri
+ $sinatra.url_for("/"+self.id.to_s, :full)
+ end
+
+ def crossvalidation_uri
+ $sinatra.url_for("/crossvalidation/"+self.crossvalidation_id.to_s, :full) if self.crossvalidation_id
+ end
+
+ def self.classification_property?( property )
+ VAL_CLASS_PROPS.include?( property )
+ end
+
+ def self.depends_on_class_value?( property )
+ VAL_CLASS_PROPS_PER_CLASS.include?( property )
+ end
+
+ def self.complement_exists?( property )
+ VAL_CLASS_PROPS_PER_CLASS_COMPLEMENT_EXISTS.include?( property )
+ end
+
end
class Crossvalidation < ActiveRecord::Base
alias_attribute :date, :created_at
+
+ def crossvalidation_uri
+ $sinatra.url_for("/crossvalidation/"+self.id.to_s, :full) if self.id
+ end
+
+ # convenience method to list all crossvalidations that are unique
+ # in terms of dataset_uri,num_folds,stratified,random_seed
+ # further conditions can be specified in __conditions__
+ def self.find_all_uniq(conditions={})
+ cvs = Lib::Crossvalidation.find(:all, :conditions => conditions)
+ uniq = []
+ cvs.each do |cv|
+ match = false
+ uniq.each do |cv2|
+ if cv.dataset_uri == cv2.dataset_uri and cv.num_folds == cv2.num_folds and
+ cv.stratified == cv2.stratified and cv.random_seed == cv2.random_seed
+ match = true
+ break
+ end
+ end
+ uniq << cv unless match
+ end
+ uniq
+ end
end
end
diff --git a/nightly/nightly.rb b/nightly/nightly.rb
index dca85d2..eb802a3 100644
--- a/nightly/nightly.rb
+++ b/nightly/nightly.rb
@@ -21,7 +21,7 @@ class Nightly
validationExamples = ValidationExamples.select(select)
return "please \"select\" validation examples:\n"+ValidationExamples.list if validationExamples.size==0
- task_uri = OpenTox::Task.as_task("Build nightly", "nightly-validation-test-service", {:select => select, :dry_run => dry_run}) do
+ task_uri = OpenTox::Task.as_task("Build nightly","nightly-validation-test-service",{:select => select, :dry_run => dry_run}) do |task|
LOGGER.info("Building nightly report")
benchmarks = validationExamples.collect{ |e| ValidationBenchmark.new(e) }
@@ -35,7 +35,7 @@ class Nightly
running << id
Thread.new do
begin
- b.build
+ b.build()
rescue => ex
LOGGER.error "uncaught nightly build error: "+ex.message
ensure
@@ -101,9 +101,9 @@ class Nightly
File.join(@@config[:services]["opentox-validation"],"nightly")
end
if defined?(halt)
- halt 202,task_uri
+ halt 202,task_uri+"\n"
else
- return task_uri
+ return task_uri+"\n"
end
end
diff --git a/reach_reports/reach_application.rb b/reach_reports/reach_application.rb
index e63c2a4..bbe4092 100644
--- a/reach_reports/reach_application.rb
+++ b/reach_reports/reach_application.rb
@@ -31,6 +31,7 @@ post '/reach_report/:type' do
content_type "text/uri-list"
LOGGER.info "creating "+type+" report "+params.inspect
+ #puts "creating "+type+" report "+params.inspect
result_uri = ReachReports.create_report(type,params,request.env["rack.input"])
if OpenTox::Utils.task_uri?(result_uri)
diff --git a/reach_reports/reach_service.rb b/reach_reports/reach_service.rb
index ef75621..f297f26 100644
--- a/reach_reports/reach_service.rb
+++ b/reach_reports/reach_service.rb
@@ -23,13 +23,14 @@ module ReachReports
def self.create_report( type, params, xml_data=nil )
- result_uri = ""
case type
when /(?i)QMRF/
if params[:model_uri]
- result_uri = OpenTox::Task.as_task( "Create "+type+" report", $sinatra.url_for("/reach_report/"+type, :full), params ) do
+ result_uri = OpenTox::Task.as_task( "Create "+type+" report",
+ $sinatra.url_for("/reach_report/"+type, :full), params ) do |task|
+
report = ReachReports::QmrfReport.new :model_uri => params[:model_uri]
- build_qmrf_report(report)
+ build_qmrf_report(report, task)
report.report_uri
end
elsif xml_data and (input = xml_data.read).to_s.size>0
@@ -57,8 +58,9 @@ module ReachReports
- def self.build_qmrf_report(r)
+ def self.build_qmrf_report(r, task=nil)
+ #puts r.model_uri
model = OpenTox::Model::PredictionModel.find(r.model_uri)
classification = model.classification?
@@ -66,9 +68,11 @@ module ReachReports
r.qsar_identifier = QsarIdentifier.new
r.qsar_identifier.qsar_title = model.title
# TODO QSAR_models -> sparql same endpoint
- r.qsar_identifier.qsar_software << QsarSoftware.new( :url => model.uri, :name => model.title, :contact => model.creator )
+ r.qsar_identifier.qsar_software << QsarSoftware.new( :url => model.uri,
+ :name => model.title, :contact => model.creator )
algorithm = OpenTox::Algorithm::Generic.find(model.algorithm) if model.algorithm
r.qsar_identifier.qsar_software << QsarSoftware.new( :url => algorithm.uri, :name => algorithm.title )
+ task.progress(10) if task
#chpater 2
r.qsar_general_information = QsarGeneralInformation.new
@@ -79,7 +83,8 @@ module ReachReports
# TODO: references?
# EMPTY: info_availablity
# TODO: related_models = find qmrf reports for QSAR_models
-
+ task.progress(20) if task
+
# chapter 3
# TODO "model_species" ?
r.qsar_endpoint = QsarEndpoint.new
@@ -89,12 +94,14 @@ module ReachReports
# TODO "endpoint_comments" => "3.3", "endpoint_units" => "3.4",
r.qsar_endpoint.endpoint_variable = model.dependentVariables if model.dependentVariables
# TODO "endpoint_protocol" => "3.6", "endpoint_data_quality" => "3.7",
-
+ task.progress(30) if task
+
# chapter 4
# TODO algorithm_type (='type of model')
# TODO algorithm_explicit.equation
# TODO algorithm_explicit.algorithms_catalog
# TODO algorithms_descriptors, descriptors_selection, descriptors_generation, descriptors_generation_software, descriptors_chemicals_ratio
+ task.progress(40) if task
# chapter 5
# TODO app_domain_description, app_domain_method, app_domain_software, applicability_limits
@@ -103,8 +110,9 @@ module ReachReports
begin
training_dataset = model.trainingDataset ? OpenTox::Dataset.find(model.trainingDataset) : nil
rescue
- LOGGER.warn "training_dataset not found "+model.trainingDataset.to_s
+ LOGGER.warn "build qmrf: training_dataset not found "+model.trainingDataset.to_s
end
+ task.progress(50) if task
# chapter 6
r.qsar_robustness = QsarRobustness.new
@@ -122,47 +130,56 @@ module ReachReports
val_datasets = []
if model.algorithm
- cvs = Lib::Crossvalidation.find(:all, :conditions => {:algorithm_uri => model.algorithm})
- cvs = [] unless cvs
- uniq_cvs = []
- cvs.each do |cv|
- # PENDING: cv classification hack
+ cvs = Lib::Crossvalidation.find_all_uniq({:algorithm_uri => model.algorithm})
+ # PENDING: cv classification/regression hack
+ cvs = cvs.delete_if do |cv|
val = Validation::Validation.first( :all, :conditions => { :crossvalidation_id => cv.id } )
- if (val.classification_statistics!=nil) == classification
- match = false
- uniq_cvs.each do |cv2|
- if cv.dataset_uri == cv2.dataset_uri and cv.num_folds == cv2.num_folds and cv.stratified == cv2.stratified and cv.random_seed == cv2.random_seed
- match = true
- break
- end
- end
- uniq_cvs << cv unless match
- end
+ (val.classification_statistics!=nil) != classification
end
-
- lmo = [ "found "+cvs.size.to_s+" crossvalidation/s for algorithm '"+model.algorithm ]
- lmo << ""
- uniq_cvs.each do |cv|
- lmo << "crossvalidation: "+cv.crossvalidation_uri
- lmo << "dataset (see 9.3 Validation data): "+cv.dataset_uri
- val_datasets << cv.dataset_uri
- lmo << "settings: num-folds="+cv.num_folds.to_s+", random-seed="+cv.random_seed.to_s+", stratified:"+cv.stratified.to_s
- val = YAML.load( OpenTox::RestClientWrapper.get File.join(cv.crossvalidation_uri,"statistics") )
- if classification
- lmo << "percent_correct: "+val[:classification_statistics][:percent_correct].to_s
- lmo << "weighted AUC: "+val[:classification_statistics][:weighted_area_under_roc].to_s
- else
- lmo << "root_mean_squared_error: "+val[:regression_statistics][:root_mean_squared_error].to_s
- lmo << "r_square "+val[:regression_statistics][:r_square].to_s
- end
- reports = OpenTox::RestClientWrapper.get File.join(CONFIG[:services]["opentox-validation"],"report/crossvalidation?crossvalidation_uris="+cv.crossvalidation_uri)
- if reports and reports.size>0
- lmo << "for more info see report: "+reports.split("\n")[0]
- else
- lmo << "for more info see report: not yet created for '"+cv.crossvalidation_uri+"'"
+
+ lmo = [ "found "+cvs.size.to_s+" crossvalidation/s for algorithm '"+model.algorithm+"'" ]
+ if cvs.size>0
+ cvs_same_data = []
+ cvs_other_data = []
+ cvs.each do |cv|
+ if cv.dataset_uri == model.trainingDataset
+ cvs_same_data << cv
+ else
+ cvs_other_data << cv
+ end
end
+ lmo << cvs_same_data.size.to_s+" crossvalidations/s where performed on the training dataset of the model ("+
+ model.trainingDataset.to_s+")"
+ lmo << cvs_other_data.size.to_s+" crossvalidations/s where performed on the other datasets"
lmo << ""
+
+ {cvs_same_data => "training dataset", cvs_other_data => "other datasets"}.each do |cvs,desc|
+ next if cvs.size==0
+ lmo << "crossvalidation/s on "+desc
+ cvs.each do |cv|
+ lmo << "crossvalidation: "+cv.crossvalidation_uri
+ lmo << "dataset (see 9.3 Validation data): "+cv.dataset_uri
+ val_datasets << cv.dataset_uri
+ lmo << "settings: num-folds="+cv.num_folds.to_s+", random-seed="+cv.random_seed.to_s+", stratified:"+cv.stratified.to_s
+ val = YAML.load( OpenTox::RestClientWrapper.get File.join(cv.crossvalidation_uri,"statistics") )
+ if classification
+ lmo << "percent_correct: "+val[:classification_statistics][:percent_correct].to_s
+ lmo << "weighted AUC: "+val[:classification_statistics][:weighted_area_under_roc].to_s
+ else
+ lmo << "root_mean_squared_error: "+val[:regression_statistics][:root_mean_squared_error].to_s
+ lmo << "r_square "+val[:regression_statistics][:r_square].to_s
+ end
+ reports = OpenTox::RestClientWrapper.get File.join(CONFIG[:services]["opentox-validation"],"report/crossvalidation?crossvalidation_uris="+cv.crossvalidation_uri)
+ if reports and reports.size>0
+ lmo << "for more info see report: "+reports.split("\n")[0]
+ else
+ lmo << "for more info see report: not yet created for '"+cv.crossvalidation_uri+"'"
+ end
+ end
+ lmo << ""
+ end
end
+
else
lmo = [ "no prediction algortihm for model found, crossvalidation not possible" ]
end
@@ -202,7 +219,8 @@ module ReachReports
v << "root_mean_squared_error: "+validation.regression_statistics[:root_mean_squared_error].to_s
v << "r_square "+validation.regression_statistics[:r_square].to_s
end
- reports = OpenTox::RestClientWrapper.get File.join(CONFIG[:services]["opentox-validation"],"report/validation?validation_uris="+validation.validation_uri)
+ reports = OpenTox::RestClientWrapper.get(File.join(CONFIG[:services]["opentox-validation"],
+ "report/validation?validation_uris="+validation.validation_uri))
if reports and reports.size>0
v << "for more info see report: "+reports.split("\n")[0]
else
@@ -214,15 +232,18 @@ module ReachReports
v = [ "no validation for model '"+model.uri+"' found" ]
end
r.qsar_predictivity.validation_predictivity = v.to_html
+ task.progress(60) if task
# chapter 7
# "validation_set_availability" => "7.1", "validation_set_data" => "7.2", "validation_set_descriptors" => "7.3",
# "validation_dependent_var_availability" => "7.4", "validation_other_info" => "7.5", "experimental_design" => "7.6",
# "validation_predictivity" => "7.7", "validation_assessment" => "7.8", "validation_comments" => "7.9",
+ task.progress(70) if task
# chapter 8
# "mechanistic_basis" => "8.1", "mechanistic_basis_comments" => "8.2", "mechanistic_basis_info" => "8.3",
-
+ task.progress(80) if task
+
# chapter 9
# "comments" => "9.1", "bibliography" => "9.2", "attachments" => "9.3",
@@ -244,8 +265,10 @@ module ReachReports
LOGGER.warn "could not add dataset: "+data_uri.to_s
end
end
- r.save
+ task.progress(90) if task
+ r.save
+ task.progress(100) if task
end
# def self.get_report_content(type, id, *keys)
diff --git a/reach_reports/reach_test.rb b/reach_reports/reach_test.rb
index 44f37ff..4a18f6d 100644
--- a/reach_reports/reach_test.rb
+++ b/reach_reports/reach_test.rb
@@ -115,8 +115,9 @@ class ReachTest < Test::Unit::TestCase
# puts last_response.body
#model_uri = "http://ambit.uni-plovdiv.bg:8080/ambit2/model/173393"
- model_uri = "http://localhost/model/1"
- #model_uri = "http://localhost/majority/regr/model/12"
+ model_uri = "http://localhost/model/6"
+ #http://localhost/majority/class/model/15
+ #model_uri = "http://localhost/majority/class/model/15"
# model_uri = "http://localhost/majority/class/model/91"
#model_uri = "http://apps.ideaconsult.net:8080/ambit2/model/2"
post '/reach_report/qmrf',:model_uri=>model_uri #http://localhost/model/1"
@@ -148,8 +149,9 @@ class ReachTest < Test::Unit::TestCase
#r = ReachReports::QmrfReport.find_like( :QSAR_title => "Hamster")
#puts r.collect{|rr| "report with id:"+rr.id.to_s}.inspect
- File.new("/home/martin/win/home/qmr_rep_del_me.xml","w").puts last_response.body
- File.new("/home/martin/info_home/.public_html/qmr_rep_del_me.xml","w").puts last_response.body
+ File.new("/home/martin/tmp/qmr_rep_del_me.xml","w").puts last_response.body
+ #File.new("/home/martin/win/home/qmr_rep_del_me.xml","w").puts last_response.body
+ #File.new("/home/martin/info_home/.public_html/qmr_rep_del_me.xml","w").puts last_response.body
end
end
diff --git a/report/environment.rb b/report/environment.rb
index f2bbe66..3daf39d 100644
--- a/report/environment.rb
+++ b/report/environment.rb
@@ -16,13 +16,13 @@ require "report/plot_factory.rb"
require "report/xml_report.rb"
require "report/xml_report_util.rb"
require "report/report_persistance.rb"
+require "report/report_content.rb"
require "report/report_factory.rb"
require "report/report_service.rb"
require "report/report_format.rb"
require "report/validation_access.rb"
require "report/validation_data.rb"
require "report/util.rb"
-require "report/external/mimeparse.rb"
diff --git a/report/plot_factory.rb b/report/plot_factory.rb
index daaba52..43c45fc 100644
--- a/report/plot_factory.rb
+++ b/report/plot_factory.rb
@@ -8,6 +8,43 @@ class Array
self[i] = self[j]
self[j] = tmp
end
+
+ # summing up values of fields where array __groups__ has equal values
+ # EXAMPLE
+ # self: [1, 0, 1, 2, 3, 0, 2]
+ # __groups__: [100, 90, 70, 70, 30, 10, 0]
+ # returns:
+ # [ 1, 0, 3, 3, 0, 2]
+ # (fields with equal value 70 are compressed)
+ # PRECONDITION
+ # __groups__ has to be sorted
+ def compress_sum(groups)
+ compress(groups) do |a,b|
+ a+b
+ end
+ end
+
+ # see compress_sum, replace sum with max
+ def compress_max(groups)
+ compress(groups) do |a,b|
+ a > b ? a : b
+ end
+ end
+
+ private
+ def compress(groups)
+ raise "length not equal" unless self.size==groups.size
+ raise "to small" unless self.size>=2
+ a = [ self[0] ]
+ (1..groups.size-1).each do |i|
+ if groups[i]!=groups[i-1]
+ a << self[i]
+ else
+ a[-1] = yield a[-1],self[i]
+ end
+ end
+ a
+ end
end
@@ -15,7 +52,7 @@ module Reports
module PlotFactory
- def self.create_regression_plot( out_file, validation_set )
+ def self.create_regression_plot( out_file, validation_set, name_attribute )
LOGGER.debug "Creating regression plot, out-file:"+out_file.to_s
@@ -23,14 +60,28 @@ module Reports
x = []
y = []
validation_set.validations.each do |v|
- names << v.algorithm_uri
- x << v.get_predictions.predicted_values
- y << v.get_predictions.actual_values
+ x_i = v.get_predictions.predicted_values
+ y_i = v.get_predictions.actual_values
+
+ # filter out nil-predictions
+ not_nil_indices = []
+ x_i.size.times do |i|
+ not_nil_indices << i if x_i[i]!=nil && y_i[i]!=nil
+ end
+ if not_nil_indices.size < x_i.size
+ x_i = not_nil_indices.collect{ |i| x_i[i] }
+ y_i = not_nil_indices.collect{ |i| y_i[i] }
+ end
+
+ names << ( name_attribute==:crossvalidation_fold ? "fold " : "" ) + v.send(name_attribute).to_s
+ x << x_i
+ y << y_i
end
RubyPlot::plot_points(out_file, "Regression plot", "Predicted values", "Actual values", names, x, y )
end
+
# creates a roc plot (result is plotted into out_file)
# * if (split_set_attributes == nil?)
# * the predictions of all validations in the validation set are plotted as one average roc-curve
@@ -41,19 +92,22 @@ module Reports
#
def self.create_roc_plot( out_file, validation_set, class_value, split_set_attribute=nil, show_single_curves=false )
- LOGGER.debug "creating roc plot, out-file:"+out_file.to_s
+ LOGGER.debug "creating roc plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_file.to_s
if split_set_attribute
attribute_values = validation_set.get_values(split_set_attribute)
-
names = []
fp_rates = []
tp_rates = []
attribute_values.each do |value|
- data = transform_predictions(validation_set.filter({split_set_attribute => value}), class_value, false)
- names << value.to_s
- fp_rates << data[:fp_rate][0]
- tp_rates << data[:tp_rate][0]
+ begin
+ data = transform_predictions(validation_set.filter({split_set_attribute => value}), class_value, false)
+ names << value.to_s
+ fp_rates << data[:fp_rate][0]
+ tp_rates << data[:tp_rate][0]
+ rescue
+ LOGGER.warn "could not create ROC plot for "+value.to_s
+ end
end
RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", names, fp_rates, tp_rates )
else
@@ -62,28 +116,33 @@ module Reports
end
end
- def self.create_bar_plot( out_file, validation_set, class_value, title_attribute, value_attributes )
+ def self.create_bar_plot( out_file, validation_set, title_attribute, value_attributes )
LOGGER.debug "creating bar plot, out-file:"+out_file.to_s
data = []
titles = []
+ labels = []
validation_set.validations.each do |v|
values = []
value_attributes.each do |a|
- value = v.send(a)
- if value.is_a?(Hash)
- if class_value==nil
- avg_value = 0
- value.values.each{ |val| avg_value+=val }
- value = avg_value/value.values.size.to_f
- else
- raise "bar plot value is hash, but no entry for class-value ("+class_value.to_s+"); value for "+a.to_s+" -> "+value.inspect unless value.key?(class_value)
- value = value[class_value]
+ validation_set.get_domain_for_attr(a).each do |class_value|
+ value = v.send(a)
+ if value.is_a?(Hash)
+ if class_value==nil
+ avg_value = 0
+ value.values.each{ |val| avg_value+=val }
+ value = avg_value/value.values.size.to_f
+ else
+ raise "bar plot value is hash, but no entry for class-value ("+class_value.to_s+"); value for "+a.to_s+" -> "+value.inspect unless value.key?(class_value)
+ value = value[class_value]
+ end
end
+ raise "value is nil\nattribute: "+a.to_s+"\nvalidation: "+v.inspect if value==nil
+ values.push(value)
+ labels.push(a.to_s.gsub("_","-") + ( class_value==nil ? "" : "("+class_value.to_s+")" ))
end
- values.push(value)
end
titles << v.send(title_attribute).to_s
@@ -95,8 +154,6 @@ module Reports
data[i] = [titles[i]] + data[i]
end
- labels = value_attributes.collect{|a| a.to_s.gsub("_","-")}
-
LOGGER.debug "bar plot labels: "+labels.inspect
LOGGER.debug "bar plot data: "+data.inspect
@@ -177,11 +234,15 @@ module Reports
sum_roc_values[:confidence_values] += roc_values[:confidence_values]
sum_roc_values[:actual_values] += roc_values[:actual_values]
if add_single_folds
- tp_fp_rates = get_tp_fp_rates(roc_values)
- names << "fold "+i.to_s
- fp_rate << tp_fp_rates[:fp_rate]
- tp_rate << tp_fp_rates[:tp_rate]
- faint << true
+ begin
+ tp_fp_rates = get_tp_fp_rates(roc_values)
+ names << "fold "+i.to_s
+ fp_rate << tp_fp_rates[:fp_rate]
+ tp_rate << tp_fp_rates[:tp_rate]
+ faint << true
+ rescue
+ LOGGER.warn "could not get ROC vals for fold "+i.to_s
+ end
end
end
tp_fp_rates = get_tp_fp_rates(sum_roc_values)
@@ -197,6 +258,18 @@ module Reports
end
end
+ def self.demo_rock_plot
+ roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6],
+ :predicted_values => [1, 0, 0, 1, 0, 1],
+ :actual_values => [0, 1, 0, 0, 1, 1]}
+ tp_fp_rates = get_tp_fp_rates(roc_values)
+ data = { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] }
+ RubyPlot::plot_lines("/tmp/plot.svg",
+ "ROC-Plot",
+ "False positive rate",
+ "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] )
+ end
+
def self.get_tp_fp_rates(roc_values)
c = roc_values[:confidence_values]
@@ -232,9 +305,11 @@ module Reports
end
end
#puts c.inspect+"\n"+a.inspect+"\n"+p.inspect+"\n\n"
-
+
tp_rate = [0]
fp_rate = [0]
+ w = [1]
+ c2 = [Float::MAX]
(0..p.size-1).each do |i|
if a[i]==p[i]
tp_rate << tp_rate[-1]+1
@@ -243,8 +318,15 @@ module Reports
fp_rate << fp_rate[-1]+1
tp_rate << tp_rate[-1]
end
+ w << 1
+ c2 << c[i]
end
- #puts tp_rate.inspect+"\n"+fp_rate.inspect+"\n\n"
+ #puts c2.inspect+"\n"+tp_rate.inspect+"\n"+fp_rate.inspect+"\n"+w.inspect+"\n\n"
+
+ tp_rate = tp_rate.compress_max(c2)
+ fp_rate = fp_rate.compress_max(c2)
+ w = w.compress_sum(c2)
+ #puts tp_rate.inspect+"\n"+fp_rate.inspect+"\n"+w.inspect+"\n\n"
(0..tp_rate.size-1).each do |i|
tp_rate[i] = tp_rate[-1]>0 ? tp_rate[i]/tp_rate[-1].to_f*100 : 100
@@ -256,5 +338,14 @@ module Reports
end
end
end
-
+
+#require "rubygems"
+#require "ruby-plot"
#Reports::PlotFactory::demo_ranking_plot
+#Reports::PlotFactory::demo_rock_plot
+
+#a = [1, 0, 1, 2, 3, 0, 2]
+#puts a.compress_sum([100, 90, 70, 70, 30, 10, 0]).inspect
+#puts a.compress_max([100, 90, 70, 70, 30, 10, 0]).inspect
+
+
diff --git a/report/report_application.rb b/report/report_application.rb
index baa91a0..f6b810c 100644
--- a/report/report_application.rb
+++ b/report/report_application.rb
@@ -80,15 +80,9 @@ end
post '/report/:type/:id/format_html' do
- task_uri = OpenTox::Task.as_task("Format report",url_for("/report/"+params[:type]+"/format_html", :full), params) do
- perform do |rs|
- rs.get_report(params[:type],params[:id],"text/html",true,params)
- content_type "text/uri-list"
- rs.get_uri(params[:type],params[:id])+"\n"
- end
- end
+ rs.get_report(params[:type],params[:id],"text/html",true,params)
content_type "text/uri-list"
- halt 202,task_uri+"\n"
+ rs.get_uri(params[:type],params[:id])+"\n"
end
@@ -128,9 +122,9 @@ delete '/report/:type/:id' do
end
post '/report/:type' do
- task_uri = OpenTox::Task.as_task("Create report",url_for("/report/"+params[:type], :full), params) do
+ task_uri = OpenTox::Task.as_task("Create report",url_for("/report/"+params[:type], :full), params) do |task|
perform do |rs|
- rs.create_report(params[:type],params[:validation_uris]?params[:validation_uris].split(/\n|,/):nil)
+ rs.create_report(params[:type],params[:validation_uris]?params[:validation_uris].split(/\n|,/):nil,task)
end
end
content_type "text/uri-list"
diff --git a/report/report_content.rb b/report/report_content.rb
new file mode 100644
index 0000000..98eaad6
--- /dev/null
+++ b/report/report_content.rb
@@ -0,0 +1,259 @@
+
+# = Reports::ReportContent
+#
+# wraps an xml-report, adds functionality for adding sections, adds a hash for tmp files
+#
+class Reports::ReportContent
+
+ attr_accessor :xml_report, :tmp_files
+
+ def initialize(title)
+ @xml_report = Reports::XMLReport.new(title, Time.now.strftime("Created at %m.%d.%Y - %H:%M"))
+ @tmp_file_count = 0
+ end
+
+ def add_section_predictions( validation_set,
+ validation_attributes=[],
+ section_title="Predictions",
+ section_text=nil,
+ table_title="Predictions")
+
+ #PENING
+ raise "validation attributes not implemented in get prediction array" if validation_attributes.size>0
+
+ section_table = @xml_report.add_section(@xml_report.get_root_element, section_title)
+ if validation_set.validations[0].get_predictions
+ @xml_report.add_paragraph(section_table, section_text) if section_text
+ @xml_report.add_table(section_table, table_title, Lib::OTPredictions.to_array(validation_set.validations.collect{|v| v.get_predictions}, true, true))
+ else
+ @xml_report.add_paragraph(section_table, "No prediction info available.")
+ end
+ end
+
+
+ def add_section_result_overview( validation_set,
+ attribute_col,
+ attribute_row,
+ attribute_values,
+ table_titles=nil,
+ section_title="Result overview",
+ section_text=nil )
+
+
+ section_table = @xml_report.add_section(xml_report.get_root_element, section_title)
+ @xml_report.add_paragraph(section_table, section_text) if section_text
+
+ attribute_values.size.times do |i|
+ attribute_val = attribute_values[i]
+ table_title = table_titles ? table_titles[i] : "Result overview for "+attribute_val.to_s
+ vals = validation_set.to_table( attribute_col, attribute_row, attribute_val)
+ @xml_report.add_table(section_table, table_title, vals)
+ end
+ end
+
+ # result (could be transposed)
+ #
+ # attr1 | attr2 | attr3
+ # ===========|===========|===========
+ # val1-attr1 |val1-attr2 |val1-attr3
+ # val2-attr1 |val2-attr2 |val2-attr3
+ # val3-attr1 |val3-attr2 |val3-attr3
+ #
+ def add_section_result( validation_set,
+ validation_attributes,
+ table_title,
+ section_title="Results",
+ section_text=nil,
+ #rem_equal_vals_attr=[],
+ search_for_existing_report_type=nil)
+
+ section_table = @xml_report.add_section(xml_report.get_root_element, section_title)
+ @xml_report.add_paragraph(section_table, section_text) if section_text
+ vals = validation_set.to_array(validation_attributes, true)
+ vals = vals.collect{|a| a.collect{|v| v.to_s }}
+
+ if (search_for_existing_report_type)
+ vals.size.times do |i|
+ puts i
+ if (i==0)
+ vals[i] = [ "Reports" ] + vals[i]
+ puts vals[i].inspect
+ else
+ if search_for_existing_report_type=="validation"
+ vals[i] = [ validation_set.validations[i-1].validation_report_uri() ] + vals[i]
+ elsif search_for_existing_report_type=="crossvalidation"
+ vals[i] = [ validation_set.validations[i-1].cv_report_uri() ] + vals[i]
+ else
+ raise "illegal report type: "+search_for_existing_report_type.to_s
+ end
+ end
+ end
+ end
+ #PENDING transpose values if there more than 4 columns, and there are more than columns than rows
+ transpose = vals[0].size>4 && vals[0].size>vals.size
+ @xml_report.add_table(section_table, table_title, vals, !transpose, transpose)
+ end
+
+ def add_section_confusion_matrix( validation,
+ section_title="Confusion Matrix",
+ section_text=nil,
+ table_title="Confusion Matrix")
+ section_confusion = @xml_report.add_section(xml_report.get_root_element, section_title)
+ @xml_report.add_paragraph(section_confusion, section_text) if section_text
+ @xml_report.add_table(section_confusion, table_title,
+ Reports::XMLReportUtil::create_confusion_matrix( validation.confusion_matrix ), false)
+ end
+
+ def add_section_regression_plot( validation_set,
+ name_attribute,
+ section_title="Regression Plot",
+ section_text=nil,
+ image_title=nil,
+ image_caption=nil)
+
+ image_title = "Regression plot" unless image_title
+
+ section_regr = @xml_report.add_section(@xml_report.get_root_element, section_title)
+ prediction_set = validation_set.collect{ |v| v.get_predictions }
+
+ if prediction_set.size>0
+
+ section_text += "\nWARNING: regression plot information not available for all validation results" if prediction_set.size!=validation_set.size
+ @xml_report.add_paragraph(section_regr, section_text) if section_text
+ plot_file_name = "regr_plot"+@tmp_file_count.to_s+".svg"
+ @tmp_file_count += 1
+ begin
+ plot_file_path = add_tmp_file(plot_file_name)
+ Reports::PlotFactory.create_regression_plot( plot_file_path, prediction_set, name_attribute )
+ @xml_report.add_imagefigure(section_regr, image_title, plot_file_name, "SVG", image_caption)
+ rescue RuntimeError => ex
+ LOGGER.error("Could not create regression plot: "+ex.message)
+ rm_tmp_file(plot_file_name)
+ @xml_report.add_paragraph(section_regr, "could not create regression plot: "+ex.message)
+ end
+ else
+ @xml_report.add_paragraph(section_regr, "No prediction info for regression available.")
+ end
+ end
+
+ def add_section_roc_plot( validation_set,
+ split_set_attribute = nil,
+ section_title="ROC Plots",
+ section_text=nil,
+ image_titles=nil,
+ image_captions=nil)
+
+ section_roc = @xml_report.add_section(@xml_report.get_root_element, section_title)
+ prediction_set = validation_set.collect{ |v| v.get_predictions && v.get_predictions.confidence_values_available? }
+
+ if prediction_set.size>0
+ if prediction_set.size!=validation_set.size
+ section_text += "\nWARNING: roc plot information not available for all validation results"
+ LOGGER.error "WARNING: roc plot information not available for all validation results:\n"+
+ "validation set size: "+validation_set.size.to_s+", prediction set size: "+prediction_set.size.to_s
+ end
+ @xml_report.add_paragraph(section_roc, section_text) if section_text
+
+ class_domain = validation_set.get_class_domain
+ class_domain.size.times do |i|
+ class_value = class_domain[i]
+ image_title = image_titles ? image_titles[i] : "ROC Plot for class-value '"+class_value+"'"
+ image_caption = image_captions ? image_captions[i] : nil
+ plot_file_name = "roc_plot"+@tmp_file_count.to_s+".svg"
+ @tmp_file_count += 1
+ begin
+ plot_file_path = add_tmp_file(plot_file_name)
+ Reports::PlotFactory.create_roc_plot( plot_file_path, prediction_set, class_value, split_set_attribute, false )#prediction_set.size>1 )
+ @xml_report.add_imagefigure(section_roc, image_title, plot_file_name, "SVG", image_caption)
+ rescue RuntimeError => ex
+ msg = "WARNING could not create roc plot for class value '"+class_value+"': "+ex.message
+ LOGGER.error(msg)
+ rm_tmp_file(plot_file_name)
+ @xml_report.add_paragraph(section_roc, msg)
+ end
+ end
+ else
+ @xml_report.add_paragraph(section_roc, "No prediction-confidence info for roc plot available.")
+ end
+
+ end
+
+ def add_section_ranking_plots( validation_set,
+ compare_attribute,
+ equal_attribute,
+ rank_attributes,
+ section_title="Ranking Plots",
+ section_text="This section contains the ranking plots.")
+
+ section_rank = @xml_report.add_section(@xml_report.get_root_element, section_title)
+ @xml_report.add_paragraph(section_rank, section_text) if section_text
+
+ rank_attributes.each do |a|
+ add_ranking_plot(section_rank, validation_set, compare_attribute, equal_attribute, a)
+ end
+ end
+
+ def add_ranking_plot( report_section,
+ validation_set,
+ compare_attribute,
+ equal_attribute,
+ rank_attribute,
+ image_titles=nil,
+ image_captions=nil)
+
+ class_domain = validation_set.get_domain_for_attr(rank_attribute)
+ puts "ranking plot for "+rank_attribute.to_s+", class values: "+class_domain.to_s
+
+ class_domain.size.times do |i|
+ class_value = class_domain[i]
+ if image_titles
+ image_title = image_titles[i]
+ else
+ if class_value!=nil
+ image_title = rank_attribute.to_s+" Ranking Plot for class-value '"+class_value+"'"
+ else
+ image_title = rank_attribute.to_s+" Ranking Plot"
+ end
+ end
+ image_caption = image_captions ? image_captions[i] : nil
+ plot_file_name = "ranking_plot"+@tmp_file_count.to_s+".svg"
+ @tmp_file_count += 1
+ plot_file_path = add_tmp_file(plot_file_name)
+ Reports::PlotFactory::create_ranking_plot(plot_file_path, validation_set, compare_attribute, equal_attribute, rank_attribute, class_value)
+ @xml_report.add_imagefigure(report_section, image_title, plot_file_name, "SVG", image_caption)
+ end
+ end
+
+ def add_section_bar_plot(validation_set,
+ title_attribute,
+ value_attributes,
+ section_title="Bar Plot",
+ section_text=nil,
+ image_title="Bar Plot",
+ image_caption=nil)
+
+ section_bar = @xml_report.add_section(@xml_report.get_root_element, section_title)
+ @xml_report.add_paragraph(section_bar, section_text) if section_text
+
+ plot_file_name = "bar_plot"+@tmp_file_count.to_s+".svg"
+ @tmp_file_count += 1
+ plot_file_path = add_tmp_file(plot_file_name)
+ Reports::PlotFactory.create_bar_plot(plot_file_path, validation_set, title_attribute, value_attributes )
+ @xml_report.add_imagefigure(section_bar, image_title, plot_file_name, "SVG", image_caption)
+ end
+
+ private
+ def add_tmp_file(tmp_file_name)
+
+ @tmp_files = {} unless @tmp_files
+ raise "file name already exits" if @tmp_files[tmp_file_name] || (@text_files && @text_files[tmp_file_name])
+ tmp_file_path = Reports::Util.create_tmp_file(tmp_file_name)
+ @tmp_files[tmp_file_name] = tmp_file_path
+ return tmp_file_path
+ end
+
+ def rm_tmp_file(tmp_file_name)
+ @tmp_files.delete(tmp_file_name) if @tmp_files.has_key?(tmp_file_name)
+ end
+
+end \ No newline at end of file
diff --git a/report/report_factory.rb b/report/report_factory.rb
index 7e63e32..ef22be1 100644
--- a/report/report_factory.rb
+++ b/report/report_factory.rb
@@ -3,11 +3,14 @@
VAL_ATTR_TRAIN_TEST = [ :model_uri, :training_dataset_uri, :test_dataset_uri, :prediction_feature ]
# selected attributes of interest when generating the crossvalidation report
VAL_ATTR_CV = [ :algorithm_uri, :dataset_uri, :num_folds, :crossvalidation_fold ]
+
# selected attributes of interest when performing classification
-VAL_ATTR_CLASS = [ :percent_correct, :weighted_area_under_roc, :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ]
+VAL_ATTR_CLASS = [ :percent_correct, :weighted_area_under_roc,
+ :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ]
VAL_ATTR_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ]
-VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :weighted_area_under_roc, :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ]
+VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :weighted_area_under_roc,
+ :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ]
VAL_ATTR_BAR_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ]
@@ -28,45 +31,57 @@ module Reports::ReportFactory
# call-seq:
# self.create_report(type, validation_set) => Reports::ReportContent
#
- def self.create_report(type, validation_set)
+ def self.create_report(type, validation_set, task=nil)
case type
when RT_VALIDATION
- create_report_validation(validation_set)
+ create_report_validation(validation_set, task)
when RT_CV
- create_report_crossvalidation(validation_set)
+ create_report_crossvalidation(validation_set, task)
when RT_ALG_COMP
- create_report_compare_algorithms(validation_set)
+ create_report_compare_algorithms(validation_set, task)
else
raise "unknown report type "+type.to_s
end
end
private
- def self.create_report_validation(validation_set)
+ # this function is only to set task progress accordingly
+ # loading predicitons is time consuming, and is done dynamically ->
+ # pre-load and set task progress
+ def self.pre_load_predictions( validation_set, task=nil)
+ i = 0
+ task_step = 100 / validation_set.size.to_f
+ validation_set.validations.each do |v|
+ v.get_predictions( OpenTox::SubTask.create(task, i*task_step, (i+1)*task_step ) )
+ i += 1
+ end
+ end
+
+ def self.create_report_validation(validation_set, task=nil)
raise Reports::BadRequest.new("num validations is not equal to 1") unless validation_set.size==1
val = validation_set.validations[0]
-
+ pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) )
+
report = Reports::ReportContent.new("Validation report")
if (val.classification?)
- report.add_section_result(validation_set, VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results")
- report.add_section_roc_plot(validation_set, nil, nil, "roc-plot.svg")
- #val.get_prediction_feature_values.each do |class_value|
- #report.add_section_roc_plot(validation_set, class_value, nil, "roc-plot-"+class_value+".svg")
- #end
+ report.add_section_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results")
+ report.add_section_roc_plot(validation_set)
report.add_section_confusion_matrix(val)
else #regression
- report.add_section_result(validation_set, VAL_ATTR_TRAIN_TEST + VAL_ATTR_REGR, "Results", "Results")
- report.add_section_regression_plot(validation_set)
+ report.add_section_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_REGR, "Results", "Results")
+ report.add_section_regression_plot(validation_set, :model_uri)
end
+ task.progress(90) if task
report.add_section_result(validation_set, Lib::ALL_PROPS, "All Results", "All Results")
- report.add_section_predictions( validation_set )
- return report
+ report.add_section_predictions( validation_set )
+ task.progress(100) if task
+ report
end
- def self.create_report_crossvalidation(validation_set)
+ def self.create_report_crossvalidation(validation_set, task=nil)
raise Reports::BadRequest.new("num validations is not >1") unless validation_set.size>1
raise Reports::BadRequest.new("crossvalidation-id not unique and != nil: "+
@@ -77,6 +92,7 @@ module Reports::ReportFactory
raise Reports::BadRequest.new("num different folds is not equal to num validations") unless validation_set.num_different_values(:crossvalidation_fold)==validation_set.size
raise Reports::BadRequest.new("validations must be either all regression, "+
+"or all classification validations") unless validation_set.all_classification? or validation_set.all_regression?
+ pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) )
merged = validation_set.merge([:crossvalidation_id])
raise unless merged.size==1
@@ -85,27 +101,26 @@ module Reports::ReportFactory
report = Reports::ReportContent.new("Crossvalidation report")
if (validation_set.all_classification?)
- report.add_section_result(merged, VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold],"Mean Results","Mean Results")
-
- report.add_section_roc_plot(validation_set, nil, nil, "roc-plot.svg", "Roc Plot", nil, "Roc plot")
- report.add_section_roc_plot(validation_set, nil, :crossvalidation_fold, "roc-plot-folds.svg", "Roc Plot", nil, "Roc plots for folds")
- #validation_set.first.get_prediction_feature_values.each do |class_value|
- #report.add_section_roc_plot(validation_set, class_value, nil, "roc-plot-"+class_value+".svg")
- #end
+ report.add_section_result(merged, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold],"Mean Results","Mean Results")
+ report.add_section_roc_plot(validation_set, nil, "ROC Plots over all folds")
+ report.add_section_roc_plot(validation_set, :crossvalidation_fold)
report.add_section_confusion_matrix(merged.validations[0])
- report.add_section_result(validation_set, VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds], "Results","Results")
+ report.add_section_result(validation_set, VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds],
+ "Results","Results",nil,"validation")
else #regression
- report.add_section_result(merged, VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results")
- #report.add_section_regression_plot(validation_set)
+ report.add_section_result(merged, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results")
+ report.add_section_regression_plot(validation_set, :crossvalidation_fold)
report.add_section_result(validation_set, VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds], "Results","Results")
end
+ task.progress(90) if task
report.add_section_result(validation_set, Lib::ALL_PROPS, "All Results", "All Results")
- report.add_section_predictions( validation_set ) #, [:crossvalidation_fold] )
- return report
+ report.add_section_predictions( validation_set ) #, [:crossvalidation_fold] )
+ task.progress(100) if task
+ report
end
- def self.create_report_compare_algorithms(validation_set)
+ def self.create_report_compare_algorithms(validation_set, task=nil)
#validation_set.to_array([:test_dataset_uri, :model_uri, :algorithm_uri], false).each{|a| puts a.inspect}
raise Reports::BadRequest.new("num validations is not >1") unless validation_set.size>1
@@ -115,309 +130,67 @@ module Reports::ReportFactory
validation_set.get_values(:algorithm_uri).inspect) if validation_set.num_different_values(:algorithm_uri)<2
if validation_set.has_nil_values?(:crossvalidation_id)
- if validation_set.num_different_values(:test_dataset_uri)>1
-
- # groups results into sets with equal test and training dataset
- dataset_grouping = Reports::Util.group(validation_set.validations, [:test_dataset_uri, :training_dataset_uri])
- # check if the same algorithms exists for each test and training dataset
- Reports::Util.check_group_matching(dataset_grouping, [:algorithm_uri])
-
- #merged = validation_set.merge([:algorithm_uri, :dataset_uri])
- report = Reports::ReportContent.new("Algorithm comparison report - Many datasets")
-
- if (validation_set.all_classification?)
- report.add_section_result(validation_set,[:algorithm_uri, :test_dataset_uri]+VAL_ATTR_CLASS,"Mean Results","Mean Results")
- report.add_section_ranking_plots(validation_set, :algorithm_uri, :test_dataset_uri,
- [:percent_correct, :true_positive_rate, :true_negative_rate], "true")
- else # regression
- raise Reports::BadRequest.new("not implemented yet for regression")
- end
- return report
- else
- # this groups all validations in x different groups (arrays) according to there algorithm-uri
- algorithm_grouping = Reports::Util.group(validation_set.validations, [:algorithm_uri])
- # we check if there are corresponding validations in each group that have equal attributes (folds, num-folds,..)
- Reports::Util.check_group_matching(algorithm_grouping, [:training_dataset_uri, :test_dataset_uri, :prediction_feature])
-
- report = Reports::ReportContent.new("Algorithm comparison report")
-
- if (validation_set.all_classification?)
- report.add_section_bar_plot(validation_set,nil,:algorithm_uri,VAL_ATTR_BAR_PLOT_CLASS, "bar-plot.svg")
- report.add_section_roc_plot(validation_set,nil, :algorithm_uri, "roc-plot.svg")
- #validation_set.first.get_prediction_feature_values.each do |class_value|
- #report.add_section_bar_plot(validation_set,class_value,:algorithm_uri,VAL_ATTR_CLASS, "bar-plot-"+class_value+".svg")
- #report.add_section_roc_plot(validation_set, class_value, :algorithm_uri, "roc-plot-"+class_value+".svg")
- #end
- report.add_section_result(validation_set,[:algorithm_uri]+VAL_ATTR_CLASS,"Results","Results")
- else
- #regression
- report.add_section_result(validation_set,[:algorithm_uri]+VAL_ATTR_REGR,"Results","Results")
- report.add_section_bar_plot(validation_set,nil,:algorithm_uri,VAL_ATTR_BAR_PLOT_REGR, "bar-plot.svg")
- report.add_section_regression_plot(validation_set)
-
- #report.add_section_result(merged, VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results")
- #report.add_section_result(validation_set, VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds], "Results","Results")
- end
- report.add_section_result(validation_set, Lib::ALL_PROPS, "All Results", "All Results")
- return report
- end
+ raise Reports::BadRequest.new("algorithm comparison for non crossvalidation not yet implemented")
else
raise Reports::BadRequest.new("num different cross-validation-ids <2") if validation_set.num_different_values(:crossvalidation_id)<2
validation_set.load_cv_attributes
-
- if validation_set.num_different_values(:dataset_uri)>1
- # groups results into sets with equal dataset
- dataset_grouping = Reports::Util.group(validation_set.validations, [:dataset_uri])
- # check if equal values in each group exist
- Reports::Util.check_group_matching(dataset_grouping, [:algorithm_uri, :crossvalidation_fold, :num_folds, :stratified, :random_seed])
- # we only checked that equal validations exist in each dataset group, now check for each algorithm
- dataset_grouping.each do |validations|
- algorithm_grouping = Reports::Util.group(validations, [:algorithm_uri])
- Reports::Util.check_group_matching(algorithm_grouping, [:crossvalidation_fold, :num_folds, :stratified, :random_seed])
- end
-
- merged = validation_set.merge([:algorithm_uri, :dataset_uri])
- report = Reports::ReportContent.new("Algorithm comparison report - Many datasets")
-
- if (validation_set.all_classification?)
- report.add_section_result(merged,VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold],"Mean Results","Mean Results")
- report.add_section_ranking_plots(merged, :algorithm_uri, :dataset_uri, [:acc, :auc, :sens, :spec], "true")
- else # regression
- report.add_section_result(merged,VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results")
- end
-
- return report
- else
- # this groups all validations in x different groups (arrays) according to there algorithm-uri
- algorithm_grouping = Reports::Util.group(validation_set.validations, [:algorithm_uri])
- # we check if there are corresponding validations in each group that have equal attributes (folds, num-folds,..)
- Reports::Util.check_group_matching(algorithm_grouping, [:crossvalidation_fold, :num_folds, :dataset_uri, :stratified, :random_seed])
- merged = validation_set.merge([:algorithm_uri])
-
- report = Reports::ReportContent.new("Algorithm comparison report")
-
- if (validation_set.all_classification?)
-
- report.add_section_result(merged,VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold],"Mean Results","Mean Results")
-
- true_class = validation_set.get_true_prediction_feature_value
- if true_class!=nil
- report.add_section_bar_plot(merged,true_class,:algorithm_uri,VAL_ATTR_BAR_PLOT_CLASS, "bar-plot.svg")
- report.add_section_roc_plot(validation_set, nil, :algorithm_uri, "roc-plot.svg")
- else
- validation_set.get_prediction_feature_values.each do |class_value|
- report.add_section_bar_plot(merged,class_value,:algorithm_uri,VAL_ATTR_BAR_PLOT_CLASS, "bar-plot-"+class_value+".svg")
- report.add_section_roc_plot(validation_set, class_value, :algorithm_uri, "roc-plot-"+class_value+".svg")
- end
- end
-
- report.add_section_result(validation_set,VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds],"Results","Results")
- else #regression
- report.add_section_result(merged, VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results")
- report.add_section_result(validation_set, VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds], "Results","Results")
- end
-
- return report
- end
+ compare_algorithms_crossvalidation(validation_set, task)
end
- end
-
-end
-
-# = Reports::ReportContent
-#
-# wraps an xml-report, adds functionality for adding sections, adds a hash for tmp files
-#
-class Reports::ReportContent
-
- attr_accessor :xml_report, :tmp_files
-
- def initialize(title)
- @xml_report = Reports::XMLReport.new(title, Time.now.strftime("Created at %m.%d.%Y - %H:%M"))
- end
+ end
- def add_section_predictions( validation_set,
- validation_attributes=[],
- section_title="Predictions",
- section_text="This section contains predictions.",
- table_title="Predictions")
-
- #PENING
- raise "validation attributes not implemented in get prediction array" if validation_attributes.size>0
+ # create Algorithm Comparison report
+ # crossvalidations, 1-n datasets, 2-n algorithms
+ def self.compare_algorithms_crossvalidation(validation_set, task=nil)
- section_table = @xml_report.add_section(@xml_report.get_root_element, section_title)
- if validation_set.validations[0].get_predictions
- @xml_report.add_paragraph(section_table, section_text) if section_text
- @xml_report.add_table(section_table, table_title, Lib::OTPredictions.to_array(validation_set.validations.collect{|v| v.get_predictions}, true, true))
+ # groups results into sets with equal dataset
+ if (validation_set.num_different_values(:dataset_uri)>1)
+ dataset_grouping = Reports::Util.group(validation_set.validations, [:dataset_uri])
+ # check if equal values in each group exist
+ Reports::Util.check_group_matching(dataset_grouping, [:algorithm_uri, :crossvalidation_fold, :num_folds, :stratified, :random_seed])
else
- @xml_report.add_paragraph(section_table, "No prediction info available.")
+ dataset_grouping = [ validation_set.validations ]
end
- end
-
- def add_section_result( validation_set,
- validation_attributes,
- table_title,
- section_title="Results",
- section_text="This section contains results.")
-
- section_table = @xml_report.add_section(xml_report.get_root_element, section_title)
- @xml_report.add_paragraph(section_table, section_text) if section_text
- vals = validation_set.to_array(validation_attributes,true,validation_set.get_true_prediction_feature_value)
- vals = vals.collect{|a| a.collect{|v| v.to_s }}
- #PENDING transpose values if there more than 4 columns, and there are more than columns than rows
- transpose = vals[0].size>4 && vals[0].size>vals.size
- @xml_report.add_table(section_table, table_title, vals, !transpose, transpose)
- end
-
- def add_section_confusion_matrix( validation,
- section_title="Confusion Matrix",
- section_text="This section contains the confusion matrix.",
- table_title="Confusion Matrix")
- section_confusion = @xml_report.add_section(xml_report.get_root_element, section_title)
- @xml_report.add_paragraph(section_confusion, section_text) if section_text
- @xml_report.add_table(section_confusion, table_title,
- Reports::XMLReportUtil::create_confusion_matrix( validation.confusion_matrix ), false)
- end
-
- def add_section_regression_plot( validation_set,
- split_set_attribute = nil,
- plot_file_name="regression-plot.svg",
- section_title="Regression Plot",
- section_text=nil,
- image_title=nil,
- image_caption=nil)
-
- section_text = "This section contains the regression plot." unless section_text
- image_title = "Regression plot" unless image_title
- section_regr = @xml_report.add_section(@xml_report.get_root_element, section_title)
- prediction_set = validation_set.collect{ |v| v.get_predictions }
-
- if prediction_set.size>0
-
- section_text += "\nWARNING: regression plot information not available for all validation results" if prediction_set.size!=validation_set.size
- @xml_report.add_paragraph(section_regr, section_text) if section_text
- begin
- plot_file_path = add_tmp_file(plot_file_name)
- Reports::PlotFactory.create_regression_plot( plot_file_path, prediction_set )
- @xml_report.add_imagefigure(section_regr, image_title, plot_file_name, "SVG", image_caption)
- rescue RuntimeError => ex
- LOGGER.error("Could not create regression plot: "+ex.message)
- rm_tmp_file(plot_file_name)
- @xml_report.add_paragraph(section_regr, "could not create regression plot: "+ex.message)
- end
- else
- @xml_report.add_paragraph(section_regr, "No prediction info for regression available.")
+ # we only checked that equal validations exist in each dataset group, now check for each algorithm
+ dataset_grouping.each do |validations|
+ algorithm_grouping = Reports::Util.group(validations, [:algorithm_uri])
+ Reports::Util.check_group_matching(algorithm_grouping, [:crossvalidation_fold, :num_folds, :stratified, :random_seed])
end
- end
-
- def add_section_roc_plot( validation_set,
- class_value = nil,
- split_set_attribute = nil,
- plot_file_name="roc-plot.svg",
- section_title="Roc Plot",
- section_text=nil,
- image_title=nil,
- image_caption=nil)
-
- if class_value
- section_text = "This section contains the roc plot for class '"+class_value+"'." unless section_text
- image_title = "Roc Plot for class-value '"+class_value+"'" unless image_title
- else
- section_text = "This section contains the roc plot." unless section_text
- image_title = "Roc Plot for all classes" unless image_title
- end
+ pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) )
+ report = Reports::ReportContent.new("Algorithm comparison report - Many datasets")
- section_roc = @xml_report.add_section(@xml_report.get_root_element, section_title)
- prediction_set = validation_set.collect{ |v| v.get_predictions && v.get_predictions.confidence_values_available? }
+ if (validation_set.num_different_values(:dataset_uri)>1)
+ all_merged = validation_set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri])
+ report.add_section_ranking_plots(all_merged, :algorithm_uri, :dataset_uri,
+ [:percent_correct, :weighted_area_under_roc, :true_positive_rate, :true_negative_rate] )
+ report.add_section_result_overview(all_merged, :algorithm_uri, :dataset_uri, [:percent_correct, :weighted_area_under_roc])
+ end
+
+ if (validation_set.all_classification?)
+ attributes = VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold]
+ attributes = ([ :dataset_uri ] + attributes).uniq
+
+ dataset_grouping.each do |validations|
+
+ set = Reports::ValidationSet.create(validations)
+ dataset = validations[0].dataset_uri
+ merged = set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri])
+ merged.sort(:algorithm_uri)
+ merged.sort(:dataset_uri)
- if prediction_set.size>0
+ report.add_section_result(merged,attributes,
+ "Mean Results","Dataset: "+dataset,nil,"crossvalidation")
+ report.add_section_bar_plot(merged, :algorithm_uri, VAL_ATTR_BAR_PLOT_CLASS)
+ report.add_section_roc_plot(set, :algorithm_uri)
+ end
- section_text += "\nWARNING: roc plot information not available for all validation results" if prediction_set.size!=validation_set.size
- @xml_report.add_paragraph(section_roc, section_text) if section_text
- begin
- plot_file_path = add_tmp_file(plot_file_name)
- Reports::PlotFactory.create_roc_plot( plot_file_path, prediction_set, class_value, split_set_attribute, false )#prediction_set.size>1 )
- @xml_report.add_imagefigure(section_roc, image_title, plot_file_name, "SVG", image_caption)
- rescue RuntimeError => ex
- LOGGER.error("could not create roc plot: "+ex.message)
- rm_tmp_file(plot_file_name)
- @xml_report.add_paragraph(section_roc, "could not create roc plot: "+ex.message)
- end
- else
- @xml_report.add_paragraph(section_roc, "No prediction-confidence info for roc plot available.")
+ else # regression
+ raise Reports::BadRequest.new("algorithm comparison for regression not yet implemented")
end
-
+ task.progress(100) if task
+ report
end
-
- def add_section_ranking_plots( validation_set,
- compare_attribute,
- equal_attribute,
- rank_attributes,
- class_value,
- section_title="Ranking Plots",
- section_text="This section contains the ranking plots.")
-
- section_rank = @xml_report.add_section(@xml_report.get_root_element, section_title)
- @xml_report.add_paragraph(section_rank, section_text) if section_text
- rank_attributes.each{|a| add_ranking_plot(section_rank, validation_set, compare_attribute, equal_attribute, a, class_value, a.to_s+"-ranking.svg")}
- end
-
- def add_ranking_plot( report_section,
- validation_set,
- compare_attribute,
- equal_attribute,
- rank_attribute,
- class_value=nil,
- plot_file_name="ranking.svg",
- image_title=nil,
- image_caption=nil)
+end
- image_title = "Ranking Plot for class value: '"+class_value.to_s+"'" if image_title==nil
- plot_file_path = add_tmp_file(plot_file_name)
- Reports::PlotFactory::create_ranking_plot(plot_file_path, validation_set, compare_attribute, equal_attribute, rank_attribute, class_value)
- @xml_report.add_imagefigure(report_section, image_title, plot_file_name, "SVG", image_caption)
-
- end
-
- def add_section_bar_plot(validation_set,
- class_value,
- title_attribute,
- value_attributes,
- plot_file_name="bar-plot.svg",
- section_title="Bar Plot",
- section_text=nil,
- image_title=nil,
- image_caption=nil)
- if class_value
- section_text = "This section contains the bar plot for class '"+class_value+"'." unless section_text
- image_title = "Bar Plot for class-value '"+class_value+"'" unless image_title
- else
- section_text = "This section contains the bar plot." unless section_text
- image_title = "Bar Plot for all classes" unless image_title
- end
- section_bar = @xml_report.add_section(@xml_report.get_root_element, section_title)
- @xml_report.add_paragraph(section_bar, section_text) if section_text
-
- plot_file_path = add_tmp_file(plot_file_name)
- Reports::PlotFactory.create_bar_plot(plot_file_path, validation_set, class_value, title_attribute, value_attributes )
- @xml_report.add_imagefigure(section_bar, image_title, plot_file_name, "SVG", image_caption)
- end
-
- private
- def add_tmp_file(tmp_file_name)
-
- @tmp_files = {} unless @tmp_files
- raise "file name already exits" if @tmp_files[tmp_file_name] || (@text_files && @text_files[tmp_file_name])
- tmp_file_path = Reports::Util.create_tmp_file(tmp_file_name)
- @tmp_files[tmp_file_name] = tmp_file_path
- return tmp_file_path
- end
-
- def rm_tmp_file(tmp_file_name)
- @tmp_files.delete(tmp_file_name) if @tmp_files.has_key?(tmp_file_name)
- end
-
-end \ No newline at end of file
diff --git a/report/report_format.rb b/report/report_format.rb
index aafa204..5368858 100644
--- a/report/report_format.rb
+++ b/report/report_format.rb
@@ -69,14 +69,18 @@ module Reports::ReportFormat
def self.format_report_to_html(directory, xml_filename, html_filename, css_style_sheet)
css_style_sheet = "http://opentox.informatik.uni-freiburg.de/simple_ot_stylesheet.css" unless css_style_sheet
- css = css_style_sheet ? " html.stylesheet=css_style_sheet?css_style_sheet="+URI.encode(css_style_sheet.to_s) : nil
+
+ css = css_style_sheet ? "--stringparam html.stylesheet "+URI.encode(css_style_sheet.to_s) : nil
+ cmd = "xsltproc "+css.to_s+" "+ENV['REPORT_XSL']+" "+File.join(directory,xml_filename.to_s)+" > "+File.join(directory,html_filename.to_s)
+ #css = css_style_sheet ? " html.stylesheet=css_style_sheet?css_style_sheet="+URI.encode(css_style_sheet.to_s) : nil
+ #cmd = "java -jar "+ENV['SAXON_JAR']+" -o:" + File.join(directory,html_filename.to_s)+
+ # " -s:"+File.join(directory,xml_filename.to_s)+" -xsl:"+ENV['REPORT_XSL']+" -versionmsg:off"+css.to_s
- cmd = "java -jar "+ENV['SAXON_JAR']+" -o:" + File.join(directory,html_filename.to_s)+
- " -s:"+File.join(directory,xml_filename.to_s)+" -xsl:"+ENV['REPORT_XSL']+" -versionmsg:off"+css.to_s
LOGGER.debug "Converting report to html: '"+cmd+"'"
IO.popen(cmd.to_s) do |f|
while line = f.gets do
- LOGGER.info "saxon-xslt> "+line
+ LOGGER.info "xsltproc> "+line
+ #LOGGER.info "saxon-xslt> "+line
end
end
raise "error during conversion" unless $?==0
diff --git a/report/report_persistance.rb b/report/report_persistance.rb
index 424d5ca..1bd37b0 100644
--- a/report/report_persistance.rb
+++ b/report/report_persistance.rb
@@ -223,7 +223,7 @@ module Reports
def list_reports(type, filter_params={})
filter_params["report_type"]=type unless filter_params.has_key?("report_type")
- ReportData.find_like(filter_params).collect{ |r| r.id }
+ ReportData.find_like(filter_params).delete_if{|r| r.report_type!=type}.collect{ |r| r.id }
end
def get_report(type, id, format, force_formating, params)
diff --git a/report/report_service.rb b/report/report_service.rb
index 2187493..51be248 100644
--- a/report/report_service.rb
+++ b/report/report_service.rb
@@ -6,10 +6,21 @@ module Reports
class ReportService
+ @@persistance = Reports::ExtendedFileReportPersistance.new
+
+ def self.persistance
+ @@persistance
+ end
+
+ def self.instance
+ @@instance
+ end
+
def initialize(home_uri)
+ raise "supposed to be a singleton" if defined?@@instance
LOGGER.info "init report service"
@home_uri = home_uri
- @persistance = Reports::ExtendedFileReportPersistance.new
+ @@instance = self
end
# lists all available report types, returns list of uris
@@ -32,7 +43,7 @@ module Reports
LOGGER.info "get all reports of type '"+type.to_s+"', filter_params: '"+filter_params.inspect+"'"
check_report_type(type)
- @persistance.list_reports(type, filter_params).collect{ |id| get_uri(type,id) }.join("\n")+"\n"
+ @@persistance.list_reports(type, filter_params).collect{ |id| get_uri(type,id) }.join("\n")+"\n"
end
# creates a report of a certain type, __validation_uris__ must contain be a list of validation or cross-validation-uris
@@ -41,7 +52,7 @@ module Reports
# call-seq:
# create_report(type, validation_uris) => string
#
- def create_report(type, validation_uris)
+ def create_report(type, validation_uris, task=nil)
LOGGER.info "create report of type '"+type.to_s+"'"
check_report_type(type)
@@ -52,14 +63,17 @@ module Reports
validation_set = Reports::ValidationSet.new(validation_uris)
raise Reports::BadRequest.new("cannot get validations from validation_uris '"+validation_uris.inspect+"'") unless validation_set and validation_set.size > 0
LOGGER.debug "loaded "+validation_set.size.to_s+" validation/s"
+ task.progress(10) if task
#step 2: create report of type
- report_content = Reports::ReportFactory.create_report(type, validation_set)
+ report_content = Reports::ReportFactory.create_report(type, validation_set,
+ OpenTox::SubTask.create(task,10,90))
LOGGER.debug "report created"
#step 3: persist report if creation not failed
- id = @persistance.new_report(report_content, type, create_meta_data(type, validation_set, validation_uris), self)
+ id = @@persistance.new_report(report_content, type, create_meta_data(type, validation_set, validation_uris), self)
LOGGER.debug "report persisted with id: '"+id.to_s+"'"
+ task.progress(100) if task
return get_uri(type, id)
end
@@ -75,7 +89,7 @@ module Reports
accept_header_value.to_s+"', force-formating:"+force_formating.to_s+" params: '"+params.inspect+"')"
check_report_type(type)
format = Reports::ReportFormat.get_format(accept_header_value)
- return @persistance.get_report(type, id, format, force_formating, params)
+ return @@persistance.get_report(type, id, format, force_formating, params)
end
# returns a report resource (i.e. image)
@@ -87,7 +101,7 @@ module Reports
LOGGER.info "get resource '"+resource+"' for report '"+id.to_s+"' of type '"+type.to_s+"'"
check_report_type(type)
- return @persistance.get_report_resource(type, id, resource)
+ return @@persistance.get_report_resource(type, id, resource)
end
@@ -100,7 +114,7 @@ module Reports
LOGGER.info "delete report '"+id.to_s+"' of type '"+type.to_s+"'"
check_report_type(type)
- @persistance.delete_report(type, id)
+ @@persistance.delete_report(type, id)
end
# no api-access for this method
@@ -108,7 +122,7 @@ module Reports
LOGGER.info "deleting all reports of type '"+type.to_s+"'"
check_report_type(type)
- @persistance.list_reports(type).each{ |id| @persistance.delete_report(type, id) }
+ @@persistance.list_reports(type).each{ |id| @@persistance.delete_report(type, id) }
end
def parse_type( report_uri )
@@ -123,7 +137,7 @@ module Reports
raise "invalid uri" unless report_uri.to_s =~/^#{@home_uri}.*/
id = report_uri.squeeze("/").split("/")[-1]
- @persistance.check_report_id_format(id)
+ @@persistance.check_report_id_format(id)
return id
end
@@ -151,6 +165,7 @@ module Reports
cvs << v if v =~ /crossvalidation/ and !cvs.include?(v)
end
meta_data[:crossvalidation_uris] = cvs
+
meta_data
end
diff --git a/report/report_test.rb b/report/report_test.rb
index 9ea1a89..5351c7d 100644
--- a/report/report_test.rb
+++ b/report/report_test.rb
@@ -23,8 +23,8 @@ class Reports::ApplicationTest < Test::Unit::TestCase
# puts OpenTox::RestClientWrapper.post("http://localhost/validation/report/qmrf/1",{:content_type => "application/qmrf-xml"},data).to_s.chomp
#get "/report/qmrf/1",nil,'HTTP_ACCEPT' => "application/qmrf-xml"#"application/rdf+xml"#"application/x-yaml"
- get "/report/validation" # ?model=http://localhost/model/1" #,nil,'HTTP_ACCEPT' => "application/rdf+xml"#"application/x-yaml"
- puts last_response.body.to_s
+# get "/report/validation" # ?model=http://localhost/model/1" #,nil,'HTTP_ACCEPT' => "application/rdf+xml"#"application/x-yaml"
+# puts last_response.body.to_s
#Reports::XMLReport.generate_demo_xml_report.write_to
#raise "stop"
@@ -33,7 +33,7 @@ class Reports::ApplicationTest < Test::Unit::TestCase
#puts uri
#get uri
- #get '/report/validation/1',nil,'HTTP_ACCEPT' => "text/html"
+ get '/report/validation/117',nil,'HTTP_ACCEPT' => "text/html"
#post '/report/validation/1/format_html',:css_style_sheet=>"http://apps.ideaconsult.net:8180/ToxPredict/style/global.css"
#post 'http://ot.validation.de/report/validation',:validation_uris=>"http://ot.validation.de/1"
diff --git a/report/util.rb b/report/util.rb
index db783b3..9844fd8 100644
--- a/report/util.rb
+++ b/report/util.rb
@@ -1,4 +1,3 @@
-
# graph-files are generated in the tmp-dir before they are stored
ENV['TMP_DIR'] = File.join(FileUtils.pwd,"reports","tmp") unless ENV['TMP_DIR']
@@ -15,6 +14,7 @@ class Array
return self.collect{|word| word[prefix.size..-1]}
end
end
+ self
end
end
diff --git a/report/validation_access.rb b/report/validation_access.rb
index 7d318af..a0b6355 100644
--- a/report/validation_access.rb
+++ b/report/validation_access.rb
@@ -20,7 +20,7 @@ class Reports::ValidationAccess
# yields predictions (Lib::OTPredictions) if available
#
- def get_predictions(validation)
+ def get_predictions(validation, task=nil)
raise "not implemented"
end
@@ -32,12 +32,11 @@ class Reports::ValidationAccess
# get domain/class values of prediction feature
#
- def get_prediction_feature_values(validation)
+ def get_class_domain(validation)
raise "not implemented"
end
# is validation classification?
- #
def classification?(validation)
raise "not implemented"
end
@@ -50,8 +49,8 @@ end
class Reports::ValidationDB < Reports::ValidationAccess
- def initialize
- @model_store = {}
+ def initialize()
+ @model_store = {}
end
def resolve_cv_uris(validation_uris)
@@ -59,6 +58,14 @@ class Reports::ValidationDB < Reports::ValidationAccess
validation_uris.each do |u|
if u.to_s =~ /.*\/crossvalidation\/[0-9]+/
cv_id = u.split("/")[-1].to_i
+ cv = nil
+ begin
+ cv = Lib::Crossvalidation.find( cv_id )
+ rescue => ex
+ raise "could not access crossvalidation with id "+validation_id.to_s+", error-msg: "+ex.message
+ end
+ raise Reports::BadRequest.new("crossvalidation with id '"+cv_id.to_s+"' not found") unless cv
+ raise Reports::BadRequest.new("crossvalidation with id '"+cv_id.to_s+"' not finished") unless cv.finished
res += Lib::Validation.find( :all, :conditions => { :crossvalidation_id => cv_id } ).collect{|v| v.validation_uri.to_s}
else
res += [u.to_s]
@@ -67,7 +74,6 @@ class Reports::ValidationDB < Reports::ValidationAccess
res
end
-
def init_validation(validation, uri)
raise Reports::BadRequest.new "not a validation uri: "+uri.to_s unless uri =~ /.*\/[0-9]+/
@@ -81,6 +87,7 @@ class Reports::ValidationDB < Reports::ValidationAccess
raise "could not access validation with id "+validation_id.to_s+", error-msg: "+ex.message
end
raise Reports::BadRequest.new "no validation found with id "+validation_id.to_s unless v #+" and uri "+uri.to_s unless v
+ raise Reports::BadRequest.new "validation with id "+validation_id.to_s+" is not finished yet" unless v.finished
(Lib::VAL_PROPS + Lib::VAL_CV_PROPS).each do |p|
validation.send("#{p.to_s}=".to_sym, v.send(p))
@@ -103,12 +110,13 @@ class Reports::ValidationDB < Reports::ValidationAccess
end
end
- def get_predictions(validation)
- Lib::OTPredictions.new( validation.classification?, validation.test_dataset_uri, validation.test_target_dataset_uri,
- validation.prediction_feature, validation.prediction_dataset_uri, validation.predicted_variable)
+ def get_predictions(validation, task=nil)
+ Lib::OTPredictions.new( validation.classification?, validation.test_dataset_uri,
+ validation.test_target_dataset_uri, validation.prediction_feature, validation.prediction_dataset_uri,
+ validation.predicted_variable, task)
end
- def get_prediction_feature_values( validation )
+ def get_class_domain( validation )
OpenTox::Feature.domain( validation.prediction_feature )
end
@@ -197,7 +205,7 @@ class Reports::ValidationWebservice < Reports::ValidationAccess
end
end
- def get_predictions(validation)
+ def get_predictions(validation, task=nil)
Lib::Predictions.new( validation.prediction_feature, validation.test_dataset_uri, validation.prediction_dataset_uri)
end
end
@@ -283,7 +291,7 @@ class Reports::ValidationMockLayer < Reports::ValidationAccess
#validation.CV_dataset_name = @datasets[validation.crossvalidation_id.to_i * NUM_FOLDS]
end
- def get_predictions(validation)
+ def get_predictions(validation, task=nil)
p = Array.new
c = Array.new
diff --git a/report/validation_data.rb b/report/validation_data.rb
index 0a25e87..bd04554 100644
--- a/report/validation_data.rb
+++ b/report/validation_data.rb
@@ -1,7 +1,7 @@
# the variance is computed when merging results for these attributes
VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct, :root_mean_squared_error, :mean_absolute_error, :r_square, :accuracy ]
-VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :accuracy ]
+VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :weighted_area_under_roc ] #:accuracy ]
ATTR_NICE_NAME = {}
@@ -20,10 +20,14 @@ class Object
def to_nice_s
if is_a?(Float)
- if self>0.01
+ if self==0
+ return "0"
+ elsif abs>0.1
return "%.2f" % self
+ elsif abs>0.01
+ return "%.3f" % self
else
- return self.to_s
+ return "%.2e" % self
end
end
return collect{ |i| i.to_nice_s }.join(", ") if is_a?(Array)
@@ -81,20 +85,26 @@ module Reports
# call-seq:
# get_predictions => Reports::Predictions
#
- def get_predictions
- return @predictions if @predictions
- unless @prediction_dataset_uri
- LOGGER.info("no predictions available, prediction_dataset_uri not set")
- return nil
+ def get_predictions( task=nil )
+ if @predictions
+ task.progress(100) if task
+ @predictions
+ else
+ unless @prediction_dataset_uri
+ LOGGER.info("no predictions available, prediction_dataset_uri not set")
+ task.progress(100) if task
+ nil
+ else
+ @predictions = @@validation_access.get_predictions( self, task )
+ end
end
- @predictions = @@validation_access.get_predictions( self )
end
# returns the predictions feature values (i.e. the domain of the class attribute)
#
- def get_prediction_feature_values
- return @prediction_feature_values if @prediction_feature_values
- @prediction_feature_values = @@validation_access.get_prediction_feature_values(self)
+ def get_class_domain()
+ @class_domain = @@validation_access.get_class_domain(self) unless @class_domain
+ @class_domain
end
# is classification validation? cache to save resr-calls
@@ -115,6 +125,24 @@ module Reports
@@validation_access.init_cv(self)
end
+ @@persistance = Reports::ReportService.persistance
+
+ def validation_report_uri
+ #puts "searching for validation report: "+self.validation_uri.to_s
+ return @validation_report_uri if @validation_report_uri!=nil
+ ids = @@persistance.list_reports("validation",{:validation=>validation_uri })
+ @validation_report_uri = Reports::ReportService.instance.get_uri("validation",ids[-1]) if ids and ids.size>0
+ end
+
+ def cv_report_uri
+ #puts "searching for cv report: "+self.crossvalidation_uri.to_s
+ return @cv_report_uri if @cv_report_uri!=nil
+ raise "no cv uri "+to_yaml unless self.crossvalidation_uri
+ ids = @@persistance.list_reports("crossvalidation",{:crossvalidation=>self.crossvalidation_uri.to_s })
+ #puts "-> "+ids.inspect
+ @cv_report_uri = Reports::ReportService.instance.get_uri("crossvalidation",ids[-1]) if ids and ids.size>0
+ end
+
def clone_validation
new_val = clone
VAL_ATTR_VARIANCE.each { |a| new_val.send((a.to_s+"_variance=").to_sym,nil) }
@@ -134,6 +162,13 @@ module Reports
@validations = Array.new
validation_uris.each{|u| @validations.push(Reports::Validation.new(u))} if validation_uris
end
+
+
+ def self.create(validations)
+ set = ValidationSet.new
+ validations.each{ |v| set.validations.push(v) }
+ set
+ end
def get(index)
return @validations[index]
@@ -194,20 +229,34 @@ module Reports
return val
end
- def get_true_prediction_feature_value
- if all_classification?
- class_values = get_prediction_feature_values
- if class_values.size == 2
- (0..1).each do |i|
- return class_values[i] if (class_values[i].to_s.downcase == "true" || class_values[i].to_s.downcase == "active")
- end
- end
- end
- return nil
+# def get_true_prediction_feature_value
+# if all_classification?
+# class_values = get_class_domain
+# if class_values.size == 2
+# (0..1).each do |i|
+# return class_values[i] if (class_values[i].to_s.downcase == "true" || class_values[i].to_s.downcase == "active")
+# end
+# end
+# end
+# return nil
+# end
+
+ def get_class_domain( )
+ return unique_value("get_class_domain")
end
- def get_prediction_feature_values
- return unique_value("get_prediction_feature_values")
+ def get_domain_for_attr( attribute )
+ class_domain = get_class_domain()
+ if Lib::Validation.classification_property?(attribute) and
+ !Lib::Validation.depends_on_class_value?(attribute)
+ [ nil ]
+ elsif Lib::Validation.classification_property?(attribute) and
+ class_domain.size==2 and
+ Lib::Validation.complement_exists?(attribute)
+ [ class_domain[0] ]
+ else
+ class_domain
+ end
end
# checks weather all validations are classification validations
@@ -246,6 +295,39 @@ module Reports
return new_set
end
+ def to_table( attribute_col, attribute_row, attribute_val)
+
+ row_values = get_values(attribute_row)
+ #puts row_values.inspect
+ col_values = get_values(attribute_col)
+ #puts col_values.inspect
+
+ cell_values = {}
+ row_values.each do |row|
+ col_values.each do |col|
+ val = nil
+ @validations.each do |v|
+ if v.send(attribute_row)==row and v.send(attribute_col)==col
+ raise "two validation have equal row and column values"if val!=nil
+ val = v.send(attribute_val).to_nice_s
+ end
+ end
+ cell_values[row] = [] if cell_values[row]==nil
+ cell_values[row] << val
+ end
+ end
+ #puts cell_values.inspect
+
+ table = []
+ table << [ "" ] + col_values
+ row_values.each do |row|
+ table << [ row ] + cell_values[row]
+ end
+ #puts table.inspect
+
+ table
+ end
+
# returns an array, with values for __attributes__, that can be use for a table
# * first row is header row
# * other rows are values
@@ -253,7 +335,7 @@ module Reports
# call-seq:
# to_array(attributes, remove_nil_attributes) => array
#
- def to_array(attributes, remove_nil_attributes=true, true_class_value=nil)
+ def to_array(attributes, remove_nil_attributes=true)
array = Array.new
array.push(attributes.collect{|a| a.to_s.nice_attr})
attribute_not_nil = Array.new(attributes.size)
@@ -263,19 +345,43 @@ module Reports
if VAL_ATTR_VARIANCE.index(a)
variance = v.send( (a.to_s+"_variance").to_sym )
end
- variance = " +- "+variance.to_nice_s if variance
+
+ #variance = " +- "+variance.to_nice_s if variance
attribute_not_nil[index] = true if remove_nil_attributes and v.send(a)!=nil
index += 1
val = v.send(a)
- val = val[true_class_value] if true_class_value!=nil && val.is_a?(Hash) && Lib::VAL_CLASS_PROPS_PER_CLASS_COMPLEMENT_EXISTS.index(a)!=nil
- val.to_nice_s + variance.to_s
+
+ class_domain = get_domain_for_attr(a)
+ # get domain for classification attribute, i.e. ["true","false"]
+ if class_domain.size==1 && class_domain[0]!=nil
+ # or the attribute has a complementary value, i.e. true_positive_rate
+ # -> domain is reduced to one class value
+ raise "illegal state" unless (val.is_a?(Hash))
+ val = val[class_domain[0]]
+ end
+
+ if variance
+ if (val.is_a?(Array))
+ raise "not implemented"
+ elsif (val.is_a?(Hash))
+ val.collect{ |i,j| i.to_nice_s+": "+j.to_nice_s + " +- " +
+ variance[i].to_nice_s }.join(", ")
+ else
+ val.to_nice_s + " +- " + variance.to_nice_s
+ end
+ else
+ val.to_nice_s
+ end
+
end)
end
+
if remove_nil_attributes #delete in reverse order to avoid shifting of indices
(0..attribute_not_nil.size-1).to_a.reverse.each do |i|
array.each{|row| row.delete_at(i)} unless attribute_not_nil[i]
end
end
+
return array
end
@@ -294,6 +400,7 @@ module Reports
#compute grouping
grouping = Reports::Util.group(@validations, equal_attributes)
+ #puts "groups "+grouping.size.to_s
Lib::MergeObjects.register_merge_attributes( Reports::Validation,
Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL) unless
@@ -310,6 +417,10 @@ module Reports
return new_set
end
+ def sort(attribute, ascending=true)
+ @validations.sort!{ |a,b| a.send(attribute).to_s <=> b.send(attribute).to_s }
+ end
+
# creates a new validaiton set, that contains a ranking for __ranking_attribute__
# (i.e. for ranking attribute :acc, :acc_ranking is calculated)
# all validation with equal values for __equal_attributes__ are compared
@@ -319,7 +430,8 @@ module Reports
# compute_ranking(equal_attributes, ranking_attribute) => array
#
def compute_ranking(equal_attributes, ranking_attribute, class_value=nil )
-
+
+ #puts "compute_ranking("+equal_attributes.inspect+", "+ranking_attribute.inspect+", "+class_value.to_s+" )"
new_set = Reports::ValidationSet.new
(0..@validations.size-1).each do |i|
new_set.validations.push(@validations[i].clone_validation)
@@ -337,14 +449,16 @@ module Reports
raise "no value for class value "+class_value.class.to_s+" "+class_value.to_s+" in hash "+val.inspect.to_s unless val.has_key?(class_value)
val = val[class_value]
else
- raise "is a hash "+ranking_attribute+", specify class value plz"
+ raise "value for '"+ranking_attribute.to_s+"' is a hash, specify class value plz"
end
end
rank_hash[i] = val
end
+ #puts rank_hash.inspect
# sort group accrording to second value (= ranking value)
rank_array = rank_hash.sort { |a, b| b[1] <=> a[1] }
+ #puts rank_array.inspect
# create ranks array
ranks = Array.new
@@ -370,6 +484,7 @@ module Reports
end
end
end
+ #puts ranks.inspect
# set rank as validation value
(0..rank_array.size-1).each do |j|
diff --git a/report/xml_report.rb b/report/xml_report.rb
index 110c2a9..d280345 100644
--- a/report/xml_report.rb
+++ b/report/xml_report.rb
@@ -1,3 +1,6 @@
+#['rubygems', 'rexml/document' ].each do |g|
+# require g
+#end
require "report/xml_report_util.rb"
@@ -18,13 +21,23 @@ module Reports
class XMLReport
include REXML
+ def self.dtd_directory
+ if $sinatra
+ $sinatra.url_for('/'+ENV['DOCBOOK_DIRECTORY']+'/'+ENV['REPORT_DTD'], :full)
+ else
+ f = File.expand_path(File.join(ENV['DOCBOOK_DIRECTORY'],ENV['REPORT_DTD']))
+ raise "cannot find dtd" unless File.exist?(f)
+ f
+ end
+ end
+
# create new xmlreport
def initialize(title, pubdate=nil, author_firstname = nil, author_surname = nil)
@doc = Document.new
decl = XMLDecl.new
@doc << decl
- type = DocType.new('article PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "'+$sinatra.url_for('/'+ENV['DOCBOOK_DIRECTORY']+'/'+ENV['REPORT_DTD'], :full)+'"')
+ type = DocType.new('article PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "'+XMLReport.dtd_directory+'"')
@doc << type
@root = Element.new("article")
@@ -94,12 +107,20 @@ module Reports
media = Element.new("mediaobject")
image = Element.new("imageobject")
imagedata = Reports::XMLReportUtil.attribute_element("imagedata",
- {"fileref" => path, "format"=>filetype, "contentwidth" => "6in", "contentdepth"=> "4in"
+ {"fileref" => path, "format"=>filetype, "contentwidth" => "100%",
+ #"contentdepth"=> "4in"
})#"width" => "6in", "height" => "5in"}) #"contentwidth" => "100%"})
#imagedata = Reports::XMLReportUtil.attribute_element("imagedata",{"width" => "6in", "fileref" => path, "format"=>filetype})
@resource_path_elements[imagedata] = "fileref"
image << imagedata
+
media << image
+
+# ulink = Element.new("ulink")
+# ulink.add_attributes({"url" => "http://google.de"})
+# ulink << image
+# media << ulink
+
media << Reports::XMLReportUtil.text_element("caption", caption) if caption
figure << media
element << figure
diff --git a/saxonhe9-2-0-3j/doc/img/saxonica_logo.gif b/saxonhe9-2-0-3j/doc/img/saxonica_logo.gif
deleted file mode 100644
index 8f0bd8d..0000000
--- a/saxonhe9-2-0-3j/doc/img/saxonica_logo.gif
+++ /dev/null
Binary files differ
diff --git a/saxonhe9-2-0-3j/doc/index.html b/saxonhe9-2-0-3j/doc/index.html
deleted file mode 100644
index ce4293a..0000000
--- a/saxonhe9-2-0-3j/doc/index.html
+++ /dev/null
@@ -1,56 +0,0 @@
-<!DOCTYPE html
- PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
- <head>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
- <title>Saxonica: XSLT and XQuery Processing: Welcome</title>
- <meta name="coverage" content="Worldwide">
- <meta name="copyright" content="Copyright Saxonica Ltd">
- <meta name="title" content="Saxonica: XSLT and XQuery Processing: About Saxon">
- <meta name="robots" content="noindex,nofollow">
- <link rel="stylesheet" href="saxondocs.css" type="text/css">
- </head>
- <body class="main">
- <div id="rhDkBlueArea" style="position:absolute; width:123px; height:800px; z-index:2; right: 0px; top: 0px; border: 1px none #000000; background-color: #C1CEDE; layer-background-color: #C1CEDE; visibility: visible;"></div>
- <div id="rhMdBlueArea" style="position:absolute; width:217px; height:800px; z-index:2; right: 340px; top: 0px; border: 1px none #000000; background-color: #E4EEF0; layer-background-color: #E4EEF0; visibility: visible;"></div>
- <div id="lhLightArea" style="position:absolute; width:34px; height:800px; z-index:2; left: 66px; top: 0px; border: 1px none #000000; background-color: #f6fffb; layer-background-color: #E4EEF0; visibility: visible;"></div>
- <div id="lhDkBlueArea" style="position:absolute; width:66px; height:800px; z-index:2; left: 0px; top: 0px; border: 1px none #000000; background-color: #C1CEDE; layer-background-color: #C1CEDE; visibility: visible;"></div>
- <div id="LogoArea" style="position:absolute; width:340px; height:72px; z-index:3; right: 0px; top: 0px; border: 1px none #000000; visibility: visible;"><a href="http://www.saxonica.com/"><img src="img/saxonica_logo.gif" width="340" height="72" border="0" alt="Saxonica.com"></a></div>
-
- <div id="MainTextArea" style="position:absolute; height:100%; z-index:5; left: 130px; right: 260px; top: 110px; border: 1px none #000000; visibility: visible;">
- <h1>Welcome to Saxon</h1>
-
-
- <p class="subhead">Online Documentation</p>
-
- <p>Saxon documentation for the current release is available online:</p>
-
- <ul>
- <li><p><a href="http://www.saxonica.com/documentation/documentation.html" class="bodylink">Saxon Documentation</a></p></li>
-
- <li><p><a href="http://www.saxonica.com/documentation/javadoc/index.html" class="bodylink">Javadoc API Documentation</a></p></li>
-
- <li><p><a href="http://www.saxonica.com/documentation/dotnetdoc/index.html" class="bodylink">.NET API Documentation</a></p></li>
- </ul>
-
-
- <p class="subhead">Downloads</p>
-
- <p>Saxon documentation, together with source code and sample applications
- can also be downloaded, both for the current release and for earlier releases.
- </p>
-
- <p>The same file <code>saxon-resources8-N.zip</code> covers both Saxon products
- (Saxon-B and Saxon-SA), and both platforms (Java and .NET).</p>
-
- <p>The file also contains sample applications and Saxon-B source code.</p>
-
-
- <ul>
- <li><p><a class="bodylink" href="http://sourceforge.net/project/showfiles.php?group_id=29872">Saxon-B downloads at SourceForge</a></li>
- <li><p><a class="bodylink" href="http://www.saxonica.com/">Saxon-SA downloads at www.saxonica.com</a></li>
- </ul>
-
- </div>
- </body>
-</html> \ No newline at end of file
diff --git a/saxonhe9-2-0-3j/doc/saxondocs.css b/saxonhe9-2-0-3j/doc/saxondocs.css
deleted file mode 100644
index 681c337..0000000
--- a/saxonhe9-2-0-3j/doc/saxondocs.css
+++ /dev/null
@@ -1,228 +0,0 @@
-
-
-/*
-Text blue: #3D5B96
-Dark blue: #c1cede
-Mid blue: #e4eef0
-Light blue: #f6fffb
-mid green #B1CCC7
-rust #96433D
-*/
-
-/* used for frameset holders */
-.bgnd {
- margin-top:0;
- margin-left:0;
- background: #f6fffb;
- }
-
-/* used for menu */
-
-.menu {
- background: #f6fffb;
- margin-top:20;
- margin-left:40;
- SCROLLBAR-FACE-COLOR: #c1cede;
- SCROLLBAR-HIGHLIGHT-COLOR: #e4eef0;
- SCROLLBAR-SHADOW-COLOR: #e4eef0;
- SCROLLBAR-ARROW-COLOR: #f6fffb;
- SCROLLBAR-BASE-COLOR: #e4eef0;
-}
-
-/* used for content pages */
-
-.main {
- background: #e4eef0;
- margin-top:10px;
- margin-left:5px;
- margin-right:5px;
- margin-bottom:20px;
- SCROLLBAR-FACE-COLOR: #c1cede;
- SCROLLBAR-HIGHLIGHT-COLOR: #e4eef0;
- SCROLLBAR-SHADOW-COLOR: #e4eef0;
- SCROLLBAR-ARROW-COLOR: #f6fffb;
- SCROLLBAR-BASE-COLOR: #e4eef0;
-}
-
-/* used for menu links */
-
-a {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 8pt;
- font-style:normal;
- color: #3D5B96;
- font-weight: normal;
- text-decoration: none;
-}
-
-/* used for in body links */
-
-a.bodylink {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 9pt;
- font-style:normal;
- color: #3D5B96;
- font-weight: normal;
- text-decoration: underline;
-}
-
-/* used for table of contents level 1 */
-
-a.toc1 {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 12pt;
- font-style:normal;
- color: #3D5B96;
- font-weight: bold;
- text-decoration: none;
-}
-
-/* used for table of contents level 2 */
-
-a.toc2 {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 10pt;
- font-style:normal;
- color: #3D5B96;
- font-weight: normal;
- text-decoration: none;
-}
-
-/* used for menu heading */
-.title {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 10pt;
- font-style:normal;
- color: #3D5B96;
- font-weight: bold;
- text-decoration: none;
- line-height: 1.3em;
-}
-
-/* used for main page headings */
-
-
-h1 {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 14pt;
- font-style: normal;
- color: #3D5B96;
- font-weight: bold;
- text-decoration: none;
- }
-
-/* used for subheads in pref. to H2 etc, to limit underlining width */
-
-.subhead {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 10pt;
- font-style: normal;
- color: #3D5B96;
- font-weight: bold;
- text-decoration: none;
- border-bottom : thin dashed #3D5B96;
- padding-right : 5px;
-}
-
-/* used for standard text */
-
-p {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 9pt;
- font-style: normal;
- color: #3D5B96;
- font-weight: normal;
- text-decoration: none;
- line-height: 1.3em;
- padding-right:15px;
-}
-
-code {
- font-family: lucida sans typewriter, courier, monospace;
- font-size: 8pt;
- font-style: normal;
- font-weight: normal;
- text-decoration: none;
- line-height: 1.3em;
-}
-
-ul {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 9pt;
- font-style: normal;
- color: #3D5B96;
- font-weight: normal;
- text-decoration: none;
-}
-
-li {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 9pt;
- font-style: normal;
- color: #3D5B96;
- font-weight: normal;
-
-}
-
-/* used for text in boxed areas */
-
-.boxed {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 8pt;
- font-style: normal;
- color: #96433D;
- font-weight: bold;
- text-decoration: none;
- margin-top:5px;
- margin-bottom:5px;
-}
-
-/* used for example code */
-
-.codeblock {
- background: #B1CCC7;
- /*background: #e4eef0;*/
- font-family: lucida sans typewriter, courier, monospace;
- font-size: 8pt;
- font-style: normal;
- color: #96433D;
- font-weight: normal;
- text-decoration: none;
- padding-right:15px;
-}
-
-/* used for example commands */
-
-.command {
- font-size: 8pt;
- font-style: normal;
- color: #96433D;
- font-weight: bold;
- text-decoration: none;
- padding-right:15px;
-}
-
-
-
-/* used for links in boxed areas */
-
-a.rust {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 8pt;
- font-style:normal;
- color: #96433D;
- font-weight: bold;
- text-decoration: underline;
-}
-
-/* used for links at the end of a page */
-
-a.nav {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 8pt;
- font-style:normal;
- color: #96433D;
- font-weight: bold;
-}
-
-
diff --git a/saxonhe9-2-0-3j/notices/APACHE-ANT.txt b/saxonhe9-2-0-3j/notices/APACHE-ANT.txt
deleted file mode 100644
index fc3ed76..0000000
--- a/saxonhe9-2-0-3j/notices/APACHE-ANT.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-Apache Ant
-Copyright 1999-2006 The Apache Software Foundation
-
-This product includes software developed by
-The Apache Software Foundation (http://www.apache.org/).
-
-This product includes also software developed by :
- - the W3C consortium (http://www.w3c.org) ,
- - the SAX project (http://www.saxproject.org)
-
-The <sync> task is based on code Copyright (c) 2002, Landmark
-Graphics Corp that has been kindly donated to the Apache Software
-Foundation.
diff --git a/saxonhe9-2-0-3j/notices/APACHE-RESOLVER.txt b/saxonhe9-2-0-3j/notices/APACHE-RESOLVER.txt
deleted file mode 100644
index 214dadd..0000000
--- a/saxonhe9-2-0-3j/notices/APACHE-RESOLVER.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-Apache XML Commons Resolver
-Copyright 2006 The Apache Software Foundation.
-
-This product includes software developed at
-The Apache Software Foundation http://www.apache.org/
-
-Portions of this code are derived from classes placed in the
-public domain by Arbortext on 10 Apr 2000. See:
-http://www.arbortext.com/customer_support/updates_and_technical_notes/catalogs/docs/README.htm
diff --git a/saxonhe9-2-0-3j/notices/APACHE-XERCES.txt b/saxonhe9-2-0-3j/notices/APACHE-XERCES.txt
deleted file mode 100644
index e99c2e6..0000000
--- a/saxonhe9-2-0-3j/notices/APACHE-XERCES.txt
+++ /dev/null
@@ -1,17 +0,0 @@
- =========================================================================
- == NOTICE file corresponding to section 4(d) of the Apache License, ==
- == Version 2.0, in this case for the Apache Xerces Java distribution. ==
- =========================================================================
-
- Apache Xerces Java
- Copyright 1999-2006 The Apache Software Foundation
-
- This product includes software developed at
- The Apache Software Foundation (http://www.apache.org/).
-
- Portions of this software were originally based on the following:
- - software copyright (c) 1999, IBM Corporation., http://www.ibm.com.
- - software copyright (c) 1999, Sun Microsystems., http://www.sun.com.
- - voluntary contributions made by Paul Eng on behalf of the
- Apache Software Foundation that were originally developed at iClick, Inc.,
- software copyright (c) 1999. \ No newline at end of file
diff --git a/saxonhe9-2-0-3j/notices/CERN.txt b/saxonhe9-2-0-3j/notices/CERN.txt
deleted file mode 100644
index f77ab1a..0000000
--- a/saxonhe9-2-0-3j/notices/CERN.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-Copyright © 1999 CERN - European Organization for Nuclear Research.<br/><br/>
-
-Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
-is hereby granted without fee, provided that the above copyright notice appear in all copies and
-that both that copyright notice and this permission notice appear in supporting documentation.
-CERN makes no representations about the suitability of this software for any purpose.
-It is provided "as is" without expressed or implied warranty. \ No newline at end of file
diff --git a/saxonhe9-2-0-3j/notices/FRIJTERS.txt b/saxonhe9-2-0-3j/notices/FRIJTERS.txt
deleted file mode 100644
index 568f0bc..0000000
--- a/saxonhe9-2-0-3j/notices/FRIJTERS.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Copyright (C) 2002-2007 Jeroen Frijters
-
- This software is provided 'as-is', without any express or implied
- warranty. In no event will the authors be held liable for any damages
- arising from the use of this software.
-
- Permission is granted to anyone to use this software for any purpose,
- including commercial applications, and to alter it and redistribute it
- freely, subject to the following restrictions:
-
- 1. The origin of this software must not be misrepresented; you must not
- claim that you wrote the original software. If you use this software
- in a product, an acknowledgment in the product documentation would be
- appreciated but is not required.
-
- 2. Altered source versions must be plainly marked as such, and must not be
- misrepresented as being the original software.
-
- 3. This notice may not be removed or altered from any source distribution.
-
- Jeroen Frijters
- jeroen@frijters.net
diff --git a/saxonhe9-2-0-3j/notices/GPL+CLASSPATH.txt b/saxonhe9-2-0-3j/notices/GPL+CLASSPATH.txt
deleted file mode 100644
index eeab58c..0000000
--- a/saxonhe9-2-0-3j/notices/GPL+CLASSPATH.txt
+++ /dev/null
@@ -1,347 +0,0 @@
-The GNU General Public License (GPL)
-
-Version 2, June 1991
-
-Copyright (C) 1989, 1991 Free Software Foundation, Inc.
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-Everyone is permitted to copy and distribute verbatim copies of this license
-document, but changing it is not allowed.
-
-Preamble
-
-The licenses for most software are designed to take away your freedom to share
-and change it. By contrast, the GNU General Public License is intended to
-guarantee your freedom to share and change free software--to make sure the
-software is free for all its users. This General Public License applies to
-most of the Free Software Foundation's software and to any other program whose
-authors commit to using it. (Some other Free Software Foundation software is
-covered by the GNU Library General Public License instead.) You can apply it to
-your programs, too.
-
-When we speak of free software, we are referring to freedom, not price. Our
-General Public Licenses are designed to make sure that you have the freedom to
-distribute copies of free software (and charge for this service if you wish),
-that you receive source code or can get it if you want it, that you can change
-the software or use pieces of it in new free programs; and that you know you
-can do these things.
-
-To protect your rights, we need to make restrictions that forbid anyone to deny
-you these rights or to ask you to surrender the rights. These restrictions
-translate to certain responsibilities for you if you distribute copies of the
-software, or if you modify it.
-
-For example, if you distribute copies of such a program, whether gratis or for
-a fee, you must give the recipients all the rights that you have. You must
-make sure that they, too, receive or can get the source code. And you must
-show them these terms so they know their rights.
-
-We protect your rights with two steps: (1) copyright the software, and (2)
-offer you this license which gives you legal permission to copy, distribute
-and/or modify the software.
-
-Also, for each author's protection and ours, we want to make certain that
-everyone understands that there is no warranty for this free software. If the
-software is modified by someone else and passed on, we want its recipients to
-know that what they have is not the original, so that any problems introduced
-by others will not reflect on the original authors' reputations.
-
-Finally, any free program is threatened constantly by software patents. We
-wish to avoid the danger that redistributors of a free program will
-individually obtain patent licenses, in effect making the program proprietary.
-To prevent this, we have made it clear that any patent must be licensed for
-everyone's free use or not licensed at all.
-
-The precise terms and conditions for copying, distribution and modification
-follow.
-
-TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
-0. This License applies to any program or other work which contains a notice
-placed by the copyright holder saying it may be distributed under the terms of
-this General Public License. The "Program", below, refers to any such program
-or work, and a "work based on the Program" means either the Program or any
-derivative work under copyright law: that is to say, a work containing the
-Program or a portion of it, either verbatim or with modifications and/or
-translated into another language. (Hereinafter, translation is included
-without limitation in the term "modification".) Each licensee is addressed as
-"you".
-
-Activities other than copying, distribution and modification are not covered by
-this License; they are outside its scope. The act of running the Program is
-not restricted, and the output from the Program is covered only if its contents
-constitute a work based on the Program (independent of having been made by
-running the Program). Whether that is true depends on what the Program does.
-
-1. You may copy and distribute verbatim copies of the Program's source code as
-you receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice and
-disclaimer of warranty; keep intact all the notices that refer to this License
-and to the absence of any warranty; and give any other recipients of the
-Program a copy of this License along with the Program.
-
-You may charge a fee for the physical act of transferring a copy, and you may
-at your option offer warranty protection in exchange for a fee.
-
-2. You may modify your copy or copies of the Program or any portion of it, thus
-forming a work based on the Program, and copy and distribute such modifications
-or work under the terms of Section 1 above, provided that you also meet all of
-these conditions:
-
- a) You must cause the modified files to carry prominent notices stating
- that you changed the files and the date of any change.
-
- b) You must cause any work that you distribute or publish, that in whole or
- in part contains or is derived from the Program or any part thereof, to be
- licensed as a whole at no charge to all third parties under the terms of
- this License.
-
- c) If the modified program normally reads commands interactively when run,
- you must cause it, when started running for such interactive use in the
- most ordinary way, to print or display an announcement including an
- appropriate copyright notice and a notice that there is no warranty (or
- else, saying that you provide a warranty) and that users may redistribute
- the program under these conditions, and telling the user how to view a copy
- of this License. (Exception: if the Program itself is interactive but does
- not normally print such an announcement, your work based on the Program is
- not required to print an announcement.)
-
-These requirements apply to the modified work as a whole. If identifiable
-sections of that work are not derived from the Program, and can be reasonably
-considered independent and separate works in themselves, then this License, and
-its terms, do not apply to those sections when you distribute them as separate
-works. But when you distribute the same sections as part of a whole which is a
-work based on the Program, the distribution of the whole must be on the terms
-of this License, whose permissions for other licensees extend to the entire
-whole, and thus to each and every part regardless of who wrote it.
-
-Thus, it is not the intent of this section to claim rights or contest your
-rights to work written entirely by you; rather, the intent is to exercise the
-right to control the distribution of derivative or collective works based on
-the Program.
-
-In addition, mere aggregation of another work not based on the Program with the
-Program (or with a work based on the Program) on a volume of a storage or
-distribution medium does not bring the other work under the scope of this
-License.
-
-3. You may copy and distribute the Program (or a work based on it, under
-Section 2) in object code or executable form under the terms of Sections 1 and
-2 above provided that you also do one of the following:
-
- a) Accompany it with the complete corresponding machine-readable source
- code, which must be distributed under the terms of Sections 1 and 2 above
- on a medium customarily used for software interchange; or,
-
- b) Accompany it with a written offer, valid for at least three years, to
- give any third party, for a charge no more than your cost of physically
- performing source distribution, a complete machine-readable copy of the
- corresponding source code, to be distributed under the terms of Sections 1
- and 2 above on a medium customarily used for software interchange; or,
-
- c) Accompany it with the information you received as to the offer to
- distribute corresponding source code. (This alternative is allowed only
- for noncommercial distribution and only if you received the program in
- object code or executable form with such an offer, in accord with
- Subsection b above.)
-
-The source code for a work means the preferred form of the work for making
-modifications to it. For an executable work, complete source code means all
-the source code for all modules it contains, plus any associated interface
-definition files, plus the scripts used to control compilation and installation
-of the executable. However, as a special exception, the source code
-distributed need not include anything that is normally distributed (in either
-source or binary form) with the major components (compiler, kernel, and so on)
-of the operating system on which the executable runs, unless that component
-itself accompanies the executable.
-
-If distribution of executable or object code is made by offering access to copy
-from a designated place, then offering equivalent access to copy the source
-code from the same place counts as distribution of the source code, even though
-third parties are not compelled to copy the source along with the object code.
-
-4. You may not copy, modify, sublicense, or distribute the Program except as
-expressly provided under this License. Any attempt otherwise to copy, modify,
-sublicense or distribute the Program is void, and will automatically terminate
-your rights under this License. However, parties who have received copies, or
-rights, from you under this License will not have their licenses terminated so
-long as such parties remain in full compliance.
-
-5. You are not required to accept this License, since you have not signed it.
-However, nothing else grants you permission to modify or distribute the Program
-or its derivative works. These actions are prohibited by law if you do not
-accept this License. Therefore, by modifying or distributing the Program (or
-any work based on the Program), you indicate your acceptance of this License to
-do so, and all its terms and conditions for copying, distributing or modifying
-the Program or works based on it.
-
-6. Each time you redistribute the Program (or any work based on the Program),
-the recipient automatically receives a license from the original licensor to
-copy, distribute or modify the Program subject to these terms and conditions.
-You may not impose any further restrictions on the recipients' exercise of the
-rights granted herein. You are not responsible for enforcing compliance by
-third parties to this License.
-
-7. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues), conditions
-are imposed on you (whether by court order, agreement or otherwise) that
-contradict the conditions of this License, they do not excuse you from the
-conditions of this License. If you cannot distribute so as to satisfy
-simultaneously your obligations under this License and any other pertinent
-obligations, then as a consequence you may not distribute the Program at all.
-For example, if a patent license would not permit royalty-free redistribution
-of the Program by all those who receive copies directly or indirectly through
-you, then the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Program.
-
-If any portion of this section is held invalid or unenforceable under any
-particular circumstance, the balance of the section is intended to apply and
-the section as a whole is intended to apply in other circumstances.
-
-It is not the purpose of this section to induce you to infringe any patents or
-other property right claims or to contest validity of any such claims; this
-section has the sole purpose of protecting the integrity of the free software
-distribution system, which is implemented by public license practices. Many
-people have made generous contributions to the wide range of software
-distributed through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing to
-distribute software through any other system and a licensee cannot impose that
-choice.
-
-This section is intended to make thoroughly clear what is believed to be a
-consequence of the rest of this License.
-
-8. If the distribution and/or use of the Program is restricted in certain
-countries either by patents or by copyrighted interfaces, the original
-copyright holder who places the Program under this License may add an explicit
-geographical distribution limitation excluding those countries, so that
-distribution is permitted only in or among countries not thus excluded. In
-such case, this License incorporates the limitation as if written in the body
-of this License.
-
-9. The Free Software Foundation may publish revised and/or new versions of the
-General Public License from time to time. Such new versions will be similar in
-spirit to the present version, but may differ in detail to address new problems
-or concerns.
-
-Each version is given a distinguishing version number. If the Program
-specifies a version number of this License which applies to it and "any later
-version", you have the option of following the terms and conditions either of
-that version or of any later version published by the Free Software Foundation.
-If the Program does not specify a version number of this License, you may
-choose any version ever published by the Free Software Foundation.
-
-10. If you wish to incorporate parts of the Program into other free programs
-whose distribution conditions are different, write to the author to ask for
-permission. For software which is copyrighted by the Free Software Foundation,
-write to the Free Software Foundation; we sometimes make exceptions for this.
-Our decision will be guided by the two goals of preserving the free status of
-all derivatives of our free software and of promoting the sharing and reuse of
-software generally.
-
-NO WARRANTY
-
-11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR
-THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE
-STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE
-PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED,
-INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
-FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND
-PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE,
-YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL
-ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE
-PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
-INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA
-BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
-FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER
-OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
-
-END OF TERMS AND CONDITIONS
-
-How to Apply These Terms to Your New Programs
-
-If you develop a new program, and you want it to be of the greatest possible
-use to the public, the best way to achieve this is to make it free software
-which everyone can redistribute and change under these terms.
-
-To do so, attach the following notices to the program. It is safest to attach
-them to the start of each source file to most effectively convey the exclusion
-of warranty; and each file should have at least the "copyright" line and a
-pointer to where the full notice is found.
-
- One line to give the program's name and a brief idea of what it does.
-
- Copyright (C) <year> <name of author>
-
- This program is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the Free
- Software Foundation; either version 2 of the License, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along
- with this program; if not, write to the Free Software Foundation, Inc., 59
- Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-Also add information on how to contact you by electronic and paper mail.
-
-If the program is interactive, make it output a short notice like this when it
-starts in an interactive mode:
-
- Gnomovision version 69, Copyright (C) year name of author Gnomovision comes
- with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free
- software, and you are welcome to redistribute it under certain conditions;
- type 'show c' for details.
-
-The hypothetical commands 'show w' and 'show c' should show the appropriate
-parts of the General Public License. Of course, the commands you use may be
-called something other than 'show w' and 'show c'; they could even be
-mouse-clicks or menu items--whatever suits your program.
-
-You should also get your employer (if you work as a programmer) or your school,
-if any, to sign a "copyright disclaimer" for the program, if necessary. Here
-is a sample; alter the names:
-
- Yoyodyne, Inc., hereby disclaims all copyright interest in the program
- 'Gnomovision' (which makes passes at compilers) written by James Hacker.
-
- signature of Ty Coon, 1 April 1989
-
- Ty Coon, President of Vice
-
-This General Public License does not permit incorporating your program into
-proprietary programs. If your program is a subroutine library, you may
-consider it more useful to permit linking proprietary applications with the
-library. If this is what you want to do, use the GNU Library General Public
-License instead of this License.
-
-
-"CLASSPATH" EXCEPTION TO THE GPL
-
-Certain source files distributed by Sun Microsystems, Inc. are subject to
-the following clarification and special exception to the GPL, but only where
-Sun has expressly included in the particular source file's header the words
-"Sun designates this particular file as subject to the "Classpath" exception
-as provided by Sun in the LICENSE file that accompanied this code."
-
- Linking this library statically or dynamically with other modules is making
- a combined work based on this library. Thus, the terms and conditions of
- the GNU General Public License cover the whole combination.
-
- As a special exception, the copyright holders of this library give you
- permission to link this library with independent modules to produce an
- executable, regardless of the license terms of these independent modules,
- and to copy and distribute the resulting executable under terms of your
- choice, provided that you also meet, for each linked independent module,
- the terms and conditions of the license of that module. An independent
- module is a module which is not derived from or based on this library. If
- you modify this library, you may extend this exception to your version of
- the library, but you are not obligated to do so. If you do not wish to do
- so, delete this exception statement from your version.
diff --git a/saxonhe9-2-0-3j/notices/JAMESCLARK.txt b/saxonhe9-2-0-3j/notices/JAMESCLARK.txt
deleted file mode 100644
index b3ffc4e..0000000
--- a/saxonhe9-2-0-3j/notices/JAMESCLARK.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Copyright (c) 1998, 1999 James Clark
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be included
-in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
-OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL JAMES CLARK BE LIABLE FOR ANY CLAIM, DAMAGES OR
-OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-OTHER DEALINGS IN THE SOFTWARE.
-
-Except as contained in this notice, the name of James Clark shall
-not be used in advertising or otherwise to promote the sale, use or
-other dealings in this Software without prior written authorization
-from James Clark.
diff --git a/saxonhe9-2-0-3j/notices/LEGAL.txt b/saxonhe9-2-0-3j/notices/LEGAL.txt
deleted file mode 100644
index 8e2a59b..0000000
--- a/saxonhe9-2-0-3j/notices/LEGAL.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-LEGAL NOTICE
-
-This notice is issued to fulfil the requirements of the Mozilla Public License version 1.0 ("MPL 1.0")
-sections 3.4(a) and 3.6. MPL 1.0 can be found at http://www.mozilla.org/MPL/MPL-1.0.html.
-
-Section 3.4(a) of MPL 1.0 states that any third party intellectual property rights in particular
-functionality or code must be notified in a text file named LEGAL that is issued with the source code. Saxon
-includes a number of such third party components, and the relevant claims are included in notices included
-in the same directory as this notice. Although MPL 1.0 requires this notice to be included only with source
-code, some of the third parties may also require notices to be included with executable code. Therefore, Saxon
-executable code must not be distributed separately from this notice and all the accompanying third
-party notices. The term "Distribution" here includes making the code available for download, and its
-inclusion in download repositories such as Maven.
-
-Section 3.6 of MPL 1.0 states:
-
-You may distribute Covered Code in Executable form only if the requirements of Section 3.1-3.5 have
-been met for that Covered Code, and if You include a notice stating that the Source Code version of
-the Covered Code is available under the terms of this License, including a description of how and
-where You have fulfilled the obligations of Section 3.2.
-
-Section 3.2 requires the Source Code of Covered Code to be made available via an accepted Electronic
-Distribution Mechanism.
-
-The Source Code version of the Covered Code (that is, the source code of Saxon-B) is available under the
-terms of the Mozilla Public License version 1.0, and may be obtained from the Subversion repository
-for the Saxon project on SourceForge, at https://sourceforge.net/svn/?group_id=29872.
-The precise version of the Subversion source for a particular Saxon maintenance release can be
-determined by referring to the release notes for the particular release in the SourceForge download area.
-
-Note that MPL 1.0 requires that any modifications to this source code must be made available under the terms
-of the MPL "to anyone to whom you made an executable version available". As a courtesy, it is also requested
-that you make such modifications available to Saxonica Limited. \ No newline at end of file
diff --git a/saxonhe9-2-0-3j/notices/LICENSE.txt b/saxonhe9-2-0-3j/notices/LICENSE.txt
deleted file mode 100644
index c57dd4c..0000000
--- a/saxonhe9-2-0-3j/notices/LICENSE.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-The contents of these file are subject to the Mozilla Public License Version 1.0 (the "License");
-you may not use these files except in compliance with the License. You may obtain a copy of the
-License at http://www.mozilla.org/MPL/
-
-Software distributed under the License is distributed on an "AS IS" basis,
-WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
-specific language governing rights and limitations under the License.
-
-The Original Code is all Saxon modules labelled with a notice referring to this license.
-
-The Initial Developer of the Original Code is Michael Kay, except where otherwise specified in an individual module.
-
-Portions created by other named contributors are copyright as identified in the relevant module. All Rights Reserved.
-
-Contributor(s) are listed in the documentation: see notices/contributors. \ No newline at end of file
diff --git a/saxonhe9-2-0-3j/notices/THAI.txt b/saxonhe9-2-0-3j/notices/THAI.txt
deleted file mode 100644
index ccc7e42..0000000
--- a/saxonhe9-2-0-3j/notices/THAI.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Copyright (c) 2001-2003 Thai Open Source Software Center Ltd
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
-
- Neither the name of the Thai Open Source Software Center Ltd nor
- the names of its contributors may be used to endorse or promote
- products derived from this software without specific prior written
- permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/saxonhe9-2-0-3j/notices/UNICODE.txt b/saxonhe9-2-0-3j/notices/UNICODE.txt
deleted file mode 100644
index 58f0484..0000000
--- a/saxonhe9-2-0-3j/notices/UNICODE.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-COPYRIGHT AND PERMISSION NOTICE
-Copyright © 1991-2007 Unicode, Inc. All rights reserved. Distributed under the Terms of Use
-in http://www.unicode.org/copyright.html.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of the Unicode
-data files and any associated documentation (the "Data Files") or Unicode software and any
-associated documentation (the "Software") to deal in the Data Files or Software without
-restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute,
-and/or sell copies of the Data Files or Software, and to permit persons to whom the Data Files or
-Software are furnished to do so, provided that (a) the above copyright notice(s) and this
-permission notice appear with all copies of the Data Files or Software, (b) both the above
-copyright notice(s) and this permission notice appear in associated documentation, and
-(c) there is clear notice in each modified Data File or in the Software as well as in the
-documentation associated with the Data File(s) or Software that the data or software has
-been modified.
-
-THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
-IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
-BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
-OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
-WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
-ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
-FILES OR SOFTWARE.
-
-Except as contained in this notice, the name of a copyright holder shall not be used
-in advertising or otherwise to promote the sale, use or other dealings in these
-Data Files or Software without prior written authorization of the copyright holder.
diff --git a/saxonhe9-2-0-3j/saxon9he.jar b/saxonhe9-2-0-3j/saxon9he.jar
deleted file mode 100644
index 0719a94..0000000
--- a/saxonhe9-2-0-3j/saxon9he.jar
+++ /dev/null
Binary files differ
diff --git a/test/test_examples_util.rb b/test/test_examples_util.rb
index c5a9900..cc31fe5 100644
--- a/test/test_examples_util.rb
+++ b/test/test_examples_util.rb
@@ -249,6 +249,7 @@ module ValidationExamples
yaml = YAML.load(Util.validation_get(@report_uri.split("/")[-3..-1].join("/"),'application/x-yaml'))
owl = OpenTox::Owl.from_data(Util.validation_get(@report_uri.split("/")[-3..-1].join("/")),@report_uri,"ValidationReport")
Util.compare_yaml_and_owl(yaml,owl)
+ Util.validation_get(@report_uri.split("/")[-3..-1].join("/"),'text/html')
else
puts "no report"
end
diff --git a/validation/validation_application.rb b/validation/validation_application.rb
index cf6fc78..6785943 100644
--- a/validation/validation_application.rb
+++ b/validation/validation_application.rb
@@ -10,7 +10,7 @@ require 'validation/validation_service.rb'
get '/crossvalidation/?' do
LOGGER.info "list all crossvalidations"
- uri_list = Validation::Crossvalidation.find_like(params).collect{ |d| url_for("/crossvalidation/", :full) + d.id.to_s }.join("\n")+"\n"
+ uri_list = Validation::Crossvalidation.find_like(params).collect{ |cv| cv.crossvalidation_uri }.join("\n")+"\n"
if request.env['HTTP_ACCEPT'] =~ /text\/html/
related_links =
"Single validations: "+$sinatra.url_for("/",:full)+"\n"+
@@ -29,7 +29,7 @@ end
post '/crossvalidation/?' do
content_type "text/uri-list"
- task_uri = OpenTox::Task.as_task( "Perform crossvalidation", url_for("/crossvalidation", :full), params ) do
+ task_uri = OpenTox::Task.as_task( "Perform crossvalidation", url_for("/crossvalidation", :full), params ) do |task|
LOGGER.info "creating crossvalidation "+params.inspect
halt 400, "dataset_uri missing" unless params[:dataset_uri]
halt 400, "algorithm_uri missing" unless params[:algorithm_uri]
@@ -41,8 +41,7 @@ post '/crossvalidation/?' do
:algorithm_uri => params[:algorithm_uri] }
[ :num_folds, :random_seed, :stratified ].each{ |sym| cv_params[sym] = params[sym] if params[sym] }
cv = Validation::Crossvalidation.new cv_params
- cv.create_cv_datasets( params[:prediction_feature] )
- cv.perform_cv( params[:algorithm_params])
+ cv.perform_cv( params[:prediction_feature], params[:algorithm_params], task )
cv.crossvalidation_uri
end
halt 202,task_uri+"\n"
@@ -56,7 +55,7 @@ post '/crossvalidation/cleanup/?' do
num_vals = Validation::Validation.find( :all, :conditions => { :crossvalidation_id => cv.id } ).size
if cv.num_folds != num_vals
LOGGER.debug "delete cv with id:"+cv.id.to_s+" num-folds should be "+cv.num_folds.to_s+", is "+num_vals.to_s
- deleted << url_for("/crossvalidation/", :full) + cv.id.to_s
+ deleted << cv.crossvalidation_uri
Validation::Crossvalidation.delete(cv.id)
end
end
@@ -110,13 +109,13 @@ get '/crossvalidation/:id/statistics' do
rescue ActiveRecord::RecordNotFound => ex
halt 404, "Crossvalidation '#{params[:id]}' not found."
end
+ halt 400,"Crossvalidation '"+params[:id].to_s+"' not finished" unless crossvalidation.finished
Lib::MergeObjects.register_merge_attributes( Validation::Validation,
- Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL-[:validation_uri]) unless
+ Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL-[:validation_uri,:crossvalidation_uri]) unless
Lib::MergeObjects.merge_attributes_registered?(Validation::Validation)
v = Lib::MergeObjects.merge_array_objects( Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ) )
- v.validation_uri = nil
v.date = nil
v.id = nil
@@ -163,6 +162,8 @@ get '/crossvalidation/:id/predictions' do
rescue ActiveRecord::RecordNotFound => ex
halt 404, "Crossvalidation '#{params[:id]}' not found."
end
+ halt 400,"Crossvalidation '"+params[:id].to_s+"' not finished" unless crossvalidation.finished
+
content_type "application/x-yaml"
validations = Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } )
p = Lib::OTPredictions.to_array( validations.collect{ |v| v.compute_validation_stats_with_model(nil, true) } ).to_yaml
@@ -184,7 +185,7 @@ end
get '/?' do
LOGGER.info "list all validations, params: "+params.inspect
- uri_list = Validation::Validation.find_like(params).collect{ |d| url_for("/", :full) + d.id.to_s }.join("\n")+"\n"
+ uri_list = Validation::Validation.find_like(params).collect{ |v| v.validation_uri }.join("\n")+"\n"
if request.env['HTTP_ACCEPT'] =~ /text\/html/
related_links =
"To perform a validation:\n"+
@@ -215,11 +216,12 @@ post '/test_set_validation' do
LOGGER.info "creating test-set-validation "+params.inspect
if params[:model_uri] and params[:test_dataset_uri] and !params[:training_dataset_uri] and !params[:algorithm_uri]
task_uri = OpenTox::Task.as_task( "Perform test-set-validation", url_for("/", :full), params ) do |task|
- v = Validation::Validation.new :model_uri => params[:model_uri],
+ v = Validation::Validation.new :validation_type => "test_set_validation",
+ :model_uri => params[:model_uri],
:test_dataset_uri => params[:test_dataset_uri],
:test_target_dataset_uri => params[:test_target_dataset_uri],
:prediction_feature => params[:prediction_feature]
- v.validate_model
+ v.validate_model( task )
v.validation_uri
end
halt 202,task_uri+"\n"
@@ -231,7 +233,7 @@ end
get '/test_set_validation' do
LOGGER.info "list all test-set-validations, params: "+params.inspect
- uri_list = "NOT YET IMPLEMENTED"
+ uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "test_set_validation" } ).collect{ |v| v.validation_uri }.join("\n")+"\n"
if request.env['HTTP_ACCEPT'] =~ /text\/html/
related_links =
"All validations: "+$sinatra.url_for("/",:full)+"\n"+
@@ -253,12 +255,13 @@ post '/training_test_validation/?' do
LOGGER.info "creating training-test-validation "+params.inspect
if params[:algorithm_uri] and params[:training_dataset_uri] and params[:test_dataset_uri] and params[:prediction_feature] and !params[:model_uri]
task_uri = OpenTox::Task.as_task( "Perform training-test-validation", url_for("/", :full), params ) do |task|
- v = Validation::Validation.new :algorithm_uri => params[:algorithm_uri],
+ v = Validation::Validation.new :validation_type => "training_test_validation",
+ :algorithm_uri => params[:algorithm_uri],
:training_dataset_uri => params[:training_dataset_uri],
:test_dataset_uri => params[:test_dataset_uri],
:test_target_dataset_uri => params[:test_target_dataset_uri],
:prediction_feature => params[:prediction_feature]
- v.validate_algorithm( params[:algorithm_params])
+ v.validate_algorithm( params[:algorithm_params], task )
v.validation_uri
end
halt 202,task_uri+"\n"
@@ -270,7 +273,7 @@ end
get '/training_test_validation' do
LOGGER.info "list all training-test-validations, params: "+params.inspect
- uri_list = "NOT YET IMPLEMENTED"
+ uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "training_test_validation" } ).collect{ |v| v.validation_uri }.join("\n")+"\n"
if request.env['HTTP_ACCEPT'] =~ /text\/html/
related_links =
"All validations: "+$sinatra.url_for("/",:full)+"\n"+
@@ -294,19 +297,21 @@ end
post '/bootstrapping' do
content_type "text/uri-list"
- task_uri = OpenTox::Task.as_task( "Perform bootstrapping validation", url_for("/bootstrapping", :full), params ) do
+ task_uri = OpenTox::Task.as_task( "Perform bootstrapping validation", url_for("/bootstrapping", :full), params ) do |task|
LOGGER.info "performing bootstrapping validation "+params.inspect
halt 400, "dataset_uri missing" unless params[:dataset_uri]
halt 400, "algorithm_uri missing" unless params[:algorithm_uri]
halt 400, "prediction_feature missing" unless params[:prediction_feature]
- params.merge!(Validation::Util.bootstrapping(params[:dataset_uri], params[:prediction_feature], params[:random_seed]))
- v = Validation::Validation.new :training_dataset_uri => params[:training_dataset_uri],
+ params.merge!( Validation::Util.bootstrapping( params[:dataset_uri],
+ params[:prediction_feature], params[:random_seed], OpenTox::SubTask.create(task,0,33)) )
+ v = Validation::Validation.new :validation_type => "bootstrapping",
+ :training_dataset_uri => params[:training_dataset_uri],
:test_dataset_uri => params[:test_dataset_uri],
:test_target_dataset_uri => params[:dataset_uri],
:prediction_feature => params[:prediction_feature],
:algorithm_uri => params[:algorithm_uri]
- v.validate_algorithm( params[:algorithm_params])
+ v.validate_algorithm( params[:algorithm_params], OpenTox::SubTask.create(task,33,100))
v.validation_uri
end
halt 202,task_uri+"\n"
@@ -314,7 +319,7 @@ end
get '/bootstrapping' do
LOGGER.info "list all bootstrapping-validations, params: "+params.inspect
- uri_list = "NOT YET IMPLEMENTED"
+ uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "bootstrapping" } ).collect{ |v| v.validation_uri }.join("\n")+"\n"
if request.env['HTTP_ACCEPT'] =~ /text\/html/
related_links =
"All validations: "+$sinatra.url_for("/",:full)+"\n"+
@@ -336,20 +341,24 @@ get '/bootstrapping' do
end
post '/training_test_split' do
+
content_type "text/uri-list"
- task_uri = OpenTox::Task.as_task( "Perform training test split validation", url_for("/training_test_split", :full), params ) do
+ task_uri = OpenTox::Task.as_task( "Perform training test split validation", url_for("/training_test_split", :full), params ) do |task|
+
LOGGER.info "creating training test split "+params.inspect
halt 400, "dataset_uri missing" unless params[:dataset_uri]
halt 400, "algorithm_uri missing" unless params[:algorithm_uri]
halt 400, "prediction_feature missing" unless params[:prediction_feature]
- params.merge!(Validation::Util.train_test_dataset_split(params[:dataset_uri], params[:prediction_feature], params[:split_ratio], params[:random_seed]))
- v = Validation::Validation.new :training_dataset_uri => params[:training_dataset_uri],
+ params.merge!( Validation::Util.train_test_dataset_split(params[:dataset_uri], params[:prediction_feature],
+ params[:split_ratio], params[:random_seed], OpenTox::SubTask.create(task,0,33)))
+ v = Validation::Validation.new :validation_type => "training_test_split",
+ :training_dataset_uri => params[:training_dataset_uri],
:test_dataset_uri => params[:test_dataset_uri],
:test_target_dataset_uri => params[:dataset_uri],
:prediction_feature => params[:prediction_feature],
:algorithm_uri => params[:algorithm_uri]
- v.validate_algorithm( params[:algorithm_params])
+ v.validate_algorithm( params[:algorithm_params], OpenTox::SubTask.create(task,33,100))
v.validation_uri
end
halt 202,task_uri+"\n"
@@ -357,7 +366,7 @@ end
get '/training_test_split' do
LOGGER.info "list all training-test-split-validations, params: "+params.inspect
- uri_list = "NOT YET IMPLEMENTED"
+ uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "training_test_split" } ).collect{ |v| v.validation_uri }.join("\n")+"\n"
if request.env['HTTP_ACCEPT'] =~ /text\/html/
related_links =
"All validations: "+$sinatra.url_for("/",:full)+"\n"+
@@ -385,19 +394,13 @@ post '/cleanup/?' do
deleted = []
Validation::Validation.find( :all, :conditions => { :prediction_dataset_uri => nil } ).each do |val|
LOGGER.debug "delete val with id:"+val.id.to_s+" prediction_dataset_uri is nil"
- deleted << url_for("/", :full) + val.id.to_s
+ deleted << val.validation_uri
Validation::Validation.delete(val.id)
end
LOGGER.info "validation cleanup, deleted "+deleted.size.to_s+" validations"
deleted.join("\n")+"\n"
end
-
-
-
-
-
-
post '/plain_training_test_split' do
LOGGER.info "creating pure training test split "+params.inspect
halt 400, "dataset_uri missing" unless params[:dataset_uri]
@@ -409,14 +412,15 @@ end
post '/validate_datasets' do
content_type "text/uri-list"
- task_uri = OpenTox::Task.as_task( "Perform dataset validation", url_for("/validate_datasets", :full), params ) do
+ task_uri = OpenTox::Task.as_task( "Perform dataset validation", url_for("/validate_datasets", :full), params ) do |task|
LOGGER.info "validating values "+params.inspect
halt 400, "test_dataset_uri missing" unless params[:test_dataset_uri]
halt 400, "prediction_datset_uri missing" unless params[:prediction_dataset_uri]
+ params[:validation_type] = "validate_datasets"
if params[:model_uri]
v = Validation::Validation.new params
- v.compute_validation_stats_with_model()
+ v.compute_validation_stats_with_model(nil,false,task)
else
halt 400, "please specify 'model_uri' or 'prediction_feature'" unless params[:prediction_feature]
halt 400, "please specify 'model_uri' or 'predicted_feature'" unless params[:predicted_feature]
@@ -427,7 +431,7 @@ post '/validate_datasets' do
clazz = params.delete("classification")!=nil
regr = params.delete("regression")!=nil
v = Validation::Validation.new params
- v.compute_validation_stats((clazz and !regr),predicted_feature)
+ v.compute_validation_stats((clazz and !regr),predicted_feature,nil,false,task)
end
v.validation_uri
end
@@ -441,6 +445,7 @@ get '/:id/predictions' do
rescue ActiveRecord::RecordNotFound => ex
halt 404, "Validation '#{params[:id]}' not found."
end
+ halt 400,"Validation '"+params[:id].to_s+"' not finished" unless validation.finished
p = validation.compute_validation_stats_with_model(nil, true)
case request.env['HTTP_ACCEPT'].to_s
when /text\/html/
diff --git a/validation/validation_service.rb b/validation/validation_service.rb
index 67fdbee..0907edb 100644
--- a/validation/validation_service.rb
+++ b/validation/validation_service.rb
@@ -37,12 +37,10 @@ module Validation
# constructs a validation object, Rsets id und uri
def initialize( params={} )
$sinatra.halt 500,"do not set id manually" if params[:id]
- $sinatra.halt 500,"do not set uri manually" if params[:validation_uri]
+ params[:finished] = false
super params
self.save!
raise "internal error, validation-id not set "+to_yaml if self.id==nil
- self.attributes = { :validation_uri => $sinatra.url_for("/"+self.id.to_s, :full).to_s }
- self.save!
end
# deletes a validation
@@ -62,7 +60,7 @@ module Validation
end
# validates an algorithm by building a model and validating this model
- def validate_algorithm( algorithm_params=nil )
+ def validate_algorithm( algorithm_params=nil, task=nil )
$sinatra.halt 404, "no algorithm uri: '"+self.algorithm_uri.to_s+"'" if self.algorithm_uri==nil or self.algorithm_uri.to_s.size<1
@@ -77,7 +75,8 @@ module Validation
end
LOGGER.debug "building model '"+algorithm_uri.to_s+"' "+params.inspect
- model = OpenTox::Model::PredictionModel.build(algorithm_uri, params)
+ model = OpenTox::Model::PredictionModel.build(algorithm_uri, params,
+ OpenTox::SubTask.create(task, 0, 33) )
$sinatra.halt 500,"model building failed" unless model
self.attributes = { :model_uri => model.uri }
self.save!
@@ -85,12 +84,12 @@ module Validation
$sinatra.halt 500,"error after building model: model.dependent_variable != validation.prediciton_feature ("+
model.dependentVariables.to_s+" != "+self.prediction_feature+")" if self.prediction_feature!=model.dependentVariables
- validate_model
+ validate_model OpenTox::SubTask.create(task, 33, 100)
end
# validates a model
# PENDING: a new dataset is created to store the predictions, this should be optional: delete predictions afterwards yes/no
- def validate_model
+ def validate_model( task=nil )
LOGGER.debug "validating model '"+self.model_uri+"'"
@@ -113,25 +112,27 @@ module Validation
prediction_dataset_uri = ""
benchmark = Benchmark.measure do
- prediction_dataset_uri = model.predict_dataset(self.test_dataset_uri)
+ prediction_dataset_uri = model.predict_dataset(self.test_dataset_uri, OpenTox::SubTask.create(task, 0, 50))
end
self.attributes = { :prediction_dataset_uri => prediction_dataset_uri,
:real_runtime => benchmark.real }
self.save!
- compute_validation_stats_with_model( model )
+ compute_validation_stats_with_model( model, false, OpenTox::SubTask.create(task, 50, 100) )
end
- def compute_validation_stats_with_model( model=nil, dry_run=false )
+ def compute_validation_stats_with_model( model=nil, dry_run=false, task=nil )
model = OpenTox::Model::PredictionModel.find(self.model_uri) if model==nil and self.model_uri
$sinatra.halt 400, "model not found: "+self.model_uri.to_s unless model
prediction_feature = self.prediction_feature ? nil : model.dependentVariables
algorithm_uri = self.algorithm_uri ? nil : model.algorithm
- compute_validation_stats( model.classification?, model.predictedVariables, prediction_feature, algorithm_uri, dry_run )
+ compute_validation_stats( model.classification?, model.predictedVariables,
+ prediction_feature, algorithm_uri, dry_run, task )
end
- def compute_validation_stats( classification, predicted_feature, prediction_feature=nil, algorithm_uri=nil, dry_run=false)
+ def compute_validation_stats( classification, predicted_feature, prediction_feature=nil,
+ algorithm_uri=nil, dry_run=false, task=nil )
self.attributes = { :prediction_feature => prediction_feature } if self.prediction_feature==nil && prediction_feature
self.attributes = { :algorithm_uri => algorithm_uri } if self.algorithm_uri==nil && algorithm_uri
@@ -140,7 +141,8 @@ module Validation
LOGGER.debug "computing prediction stats"
prediction = Lib::OTPredictions.new( classification,
self.test_dataset_uri, self.test_target_dataset_uri, self.prediction_feature,
- self.prediction_dataset_uri, predicted_feature )
+ self.prediction_dataset_uri, predicted_feature, OpenTox::SubTask.create(task, 0, 80) )
+ #reading datasets and computing the main stats is 80% the work
unless dry_run
if prediction.classification?
@@ -152,9 +154,12 @@ module Validation
:num_without_class => prediction.num_without_class,
:percent_without_class => prediction.percent_without_class,
:num_unpredicted => prediction.num_unpredicted,
- :percent_unpredicted => prediction.percent_unpredicted }
+ :percent_unpredicted => prediction.percent_unpredicted,
+ :finished => true}
self.save!
end
+
+ task.progress(100) if task
prediction
end
end
@@ -165,16 +170,19 @@ module Validation
def initialize( params={} )
$sinatra.halt 500,"do not set id manually" if params[:id]
- $sinatra.halt 500,"do not set uri manually" if params[:crossvalidation_uri]
-
params[:num_folds] = 10 if params[:num_folds]==nil
params[:random_seed] = 1 if params[:random_seed]==nil
params[:stratified] = false if params[:stratified]==nil
+ params[:finished] = false
super params
self.save!
raise "internal error, crossvalidation-id not set" if self.id==nil
- self.attributes = { :crossvalidation_uri => $sinatra.url_for("/crossvalidation/"+self.id.to_s, :full) }
- self.save!
+ end
+
+ def perform_cv ( prediction_feature, algorithm_params=nil, task=nil )
+
+ create_cv_datasets( prediction_feature, OpenTox::SubTask.create(task, 0, 0.33) )
+ perform_cv_validations( algorithm_params, OpenTox::SubTask.create(task, 0.33, 1) )
end
# deletes a crossvalidation, all validations are deleted as well
@@ -186,20 +194,28 @@ module Validation
# creates the cv folds
# PENDING copying datasets of an equal (same dataset, same params) crossvalidation is disabled for now
- def create_cv_datasets( prediction_feature )
+ def create_cv_datasets( prediction_feature, task=nil )
- create_new_cv_datasets( prediction_feature ) #unless copy_cv_datasets( prediction_feature )
+ create_new_cv_datasets( prediction_feature, task ) #unless copy_cv_datasets( prediction_feature )
end
# executes the cross-validation (build models and validates them)
- def perform_cv ( algorithm_params=nil )
+ def perform_cv_validations( algorithm_params, task=nil )
- LOGGER.debug "perform cv validations"
+ LOGGER.debug "perform cv validations "+algorithm_params.inspect
+ i = 0
+ task_step = 100 / self.num_folds.to_f;
@tmp_validations.each do | val |
validation = Validation.new val
- validation.validate_algorithm( algorithm_params )
- #break
+ validation.validate_algorithm( algorithm_params,
+ OpenTox::SubTask.create(task, i * task_step, ( i + 1 ) * task_step) )
+ raise "validation '"+validation.validation_uri+"' for crossvaldation could not be finished" unless
+ validation.finished
+ i += 1
end
+
+ self.attributes = { :finished => true }
+ self.save!
end
private
@@ -222,7 +238,8 @@ module Validation
Validation.all( :crossvalidation_id => self.id ).each{ |v| v.delete }
return false
end
- validation = Validation.new :crossvalidation_id => self.id,
+ validation = Validation.new :validation_type => "crossvalidation",
+ :crossvalidation_id => self.id,
:crossvalidation_fold => v.crossvalidation_fold,
:training_dataset_uri => v.training_dataset_uri,
:test_dataset_uri => v.test_dataset_uri,
@@ -234,7 +251,7 @@ module Validation
# creates cv folds (training and testdatasets)
# stores uris in validation objects
- def create_new_cv_datasets( prediction_feature )
+ def create_new_cv_datasets( prediction_feature, task = nil )
$sinatra.halt(500,"random seed not set") unless self.random_seed
LOGGER.debug "creating datasets for crossvalidation"
@@ -318,13 +335,16 @@ module Validation
LOGGER.debug "test set: "+datasetname+"_test, compounds: "+test_compounds.size.to_s
test_dataset_uri = orig_dataset.create_new_dataset( test_compounds, test_features, datasetname + '_test', source )
- tmp_validation = { :training_dataset_uri => train_dataset_uri,
+ tmp_validation = { :validation_type => "crossvalidation",
+ :training_dataset_uri => train_dataset_uri,
:test_dataset_uri => test_dataset_uri,
:test_target_dataset_uri => self.dataset_uri,
:crossvalidation_id => self.id, :crossvalidation_fold => n,
:prediction_feature => prediction_feature,
:algorithm_uri => self.algorithm_uri }
@tmp_validations << tmp_validation
+
+ task.progress( n / self.num_folds.to_f * 100 ) if task
end
end
end
@@ -335,7 +355,7 @@ module Validation
# splits a dataset into test and training dataset via bootstrapping
# (training dataset-size is n, sampling from orig dataset with replacement)
# returns map with training_dataset_uri and test_dataset_uri
- def self.bootstrapping( orig_dataset_uri, prediction_feature, random_seed=nil )
+ def self.bootstrapping( orig_dataset_uri, prediction_feature, random_seed=nil, task=nil )
random_seed=1 unless random_seed
@@ -372,16 +392,20 @@ module Validation
LOGGER.debug "bootstrapping on dataset "+orig_dataset_uri+
" into training ("+training_compounds.size.to_s+") and test ("+test_compounds.size.to_s+")"+
", duplicates in training dataset: "+test_compounds.size.to_s
+ task.progress(33) if task
result = {}
result[:training_dataset_uri] = orig_dataset.create_new_dataset( training_compounds,
orig_dataset.features,
"Bootstrapping training dataset of "+orig_dataset.title.to_s,
$sinatra.url_for('/bootstrapping',:full) )
+ task.progress(66) if task
+
result[:test_dataset_uri] = orig_dataset.create_new_dataset( test_compounds,
orig_dataset.features.dclone - [prediction_feature],
"Bootstrapping test dataset of "+orig_dataset.title.to_s,
$sinatra.url_for('/bootstrapping',:full) )
+ task.progress(100) if task
if ENV['RACK_ENV'] =~ /test|debug/
training_dataset = OpenTox::Dataset.find result[:training_dataset_uri]
@@ -390,7 +414,6 @@ module Validation
$sinatra.halt 500, "training compounds error" unless training_compounds_verify==training_compounds
$sinatra.halt 400, "Test dataset not found: '"+result[:test_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:test_dataset_uri]
end
-
LOGGER.debug "bootstrapping done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'"
return result
@@ -398,7 +421,7 @@ module Validation
# splits a dataset into test and training dataset
# returns map with training_dataset_uri and test_dataset_uri
- def self.train_test_dataset_split( orig_dataset_uri, prediction_feature, split_ratio=nil, random_seed=nil )
+ def self.train_test_dataset_split( orig_dataset_uri, prediction_feature, split_ratio=nil, random_seed=nil, task=nil )
split_ratio=0.67 unless split_ratio
random_seed=1 unless random_seed
@@ -424,24 +447,28 @@ module Validation
LOGGER.debug "splitting dataset "+orig_dataset_uri+
" into train:0-"+split.to_s+" and test:"+(split+1).to_s+"-"+(compounds.size-1).to_s+
" (shuffled with seed "+random_seed.to_s+")"
-
compounds.shuffle!( random_seed )
+ task.progress(33) if task
result = {}
result[:training_dataset_uri] = orig_dataset.create_new_dataset( compounds[0..split],
orig_dataset.features,
"Training dataset split of "+orig_dataset.title.to_s,
$sinatra.url_for('/training_test_split',:full) )
+ task.progress(66) if task
+
result[:test_dataset_uri] = orig_dataset.create_new_dataset( compounds[(split+1)..-1],
orig_dataset.features.dclone - [prediction_feature],
"Test dataset split of "+orig_dataset.title.to_s,
$sinatra.url_for('/training_test_split',:full) )
+ task.progress(100) if task
- $sinatra.halt 400, "Training dataset not found: '"+result[:training_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:training_dataset_uri]
- $sinatra.halt 400, "Test dataset not found: '"+result[:test_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:test_dataset_uri]
+ if ENV['RACK_ENV'] =~ /test|debug/
+ $sinatra.halt 400, "Training dataset not found: '"+result[:training_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:training_dataset_uri]
+ $sinatra.halt 400, "Test dataset not found: '"+result[:test_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:test_dataset_uri]
+ end
LOGGER.debug "split done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'"
-
return result
end
diff --git a/validation/validation_test.rb b/validation/validation_test.rb
index 5e068a3..31495a2 100644
--- a/validation/validation_test.rb
+++ b/validation/validation_test.rb
@@ -34,9 +34,9 @@ class ValidationTest < Test::Unit::TestCase
def test_it
$test_case = self
- #get "/report/crossvalidation/15",nil,'HTTP_ACCEPT' => "text/html"
+ #get "/1",nil,'HTTP_ACCEPT' => "text/html"
# get "/",nil,'HTTP_ACCEPT' => "text/html"
-# puts last_response.body
+ #puts last_response.body
# d = OpenTox::Dataset.find("http://ot-dev.in-silico.ch/dataset/307")
# puts d.compounds.inspect
@@ -64,7 +64,7 @@ class ValidationTest < Test::Unit::TestCase
# :test_target_dataset_uri=>"http://localhost/dataset/1"
# get "/crossvalidation/2",nil,'HTTP_ACCEPT' => "application/rdf+xml"
-# puts last_response.body
+ #puts last_response.body
#get "/crossvalidation?model_uri=lazar"
# post "/test_validation",:select=>"6d" #,:report=>"yes,please"
@@ -80,18 +80,24 @@ class ValidationTest < Test::Unit::TestCase
# #:classification=>"true"}
# puts last_response.body
- #run_test("1b")#,"http://localhost/validation/394");
+ #run_test("1b","http://localhost/validation/20")#,"http://localhost/validation/394");
- #run_test("11b", "http://localhost/validation/crossvalidation/2" )# //localhost/validation/42")#, "http://localhost/validation/report/validation/8") #,"http://localhost/validation/report/validation/36") #, "http://localhost/validation/321")
+ #run_test("7b","http://localhost/validation/21")
- # run_test("7a","http://localhost/validation/40") #,"http://localhost/validation/crossvalidation/10") #, "http://localhost/validation/321")
+ #run_test("3a","http://localhost/validation/crossvalidation/4")
+ #run_test("3b","http://localhost/validation/crossvalidation/3")
+ run_test("8a", "http://localhost/validation/crossvalidation/6")
+ #run_test("8b", "http://localhost/validation/crossvalidation/5")
+
+ #run_test("11b", "http://localhost/validation/crossvalidation/2" )# //localhost/validation/42")#, "http://localhost/validation/report/validation/8") #,"http://localhost/validation/report/validation/36") #, "http://localhost/validation/321")
+ # run_test("7a","http://localhost/validation/40") #,"http://localhost/validation/crossvalidation/10") #, "http://localhost/validation/321")
#run_test("8b", "http://localhost/validation/crossvalidation/4")
#puts Nightly.build_nightly("1")
- #prepare_examples
- do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE
+ # prepare_examples
+ #do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE
#do_test_examples_ortona
end