diff options
44 files changed, 1020 insertions, 1431 deletions
@@ -54,7 +54,7 @@ task :load_config do puts "config loaded" end -# USER VERSION 0 instead +# USE VERSION 0 instead #desc "Clear database" #task :clear_db => :load_config do # if @@config[:database][:adapter]=="mysql" diff --git a/RankPlotter/RankPlotter.jar b/RankPlotter/RankPlotter.jar Binary files differindex 80d8e64..036c40a 100644 --- a/RankPlotter/RankPlotter.jar +++ b/RankPlotter/RankPlotter.jar diff --git a/db/migrate/001_init_validation.rb b/db/migrate/001_init_validation.rb index 93d8d2f..2189568 100644 --- a/db/migrate/001_init_validation.rb +++ b/db/migrate/001_init_validation.rb @@ -4,8 +4,7 @@ class InitValidation < ActiveRecord::Migration create_table :crossvalidations do |t| - [:crossvalidation_uri, - :algorithm_uri, + [:algorithm_uri, :dataset_uri ].each do |p| t.column p, :string, :limit => 255 end @@ -19,23 +18,21 @@ class InitValidation < ActiveRecord::Migration t.column p, :integer, :null => false end - [ :stratified ].each do |p| + [ :stratified, :finished ].each do |p| t.column p, :boolean, :null => false end - end create_table :validations do |t| - [:validation_uri, + [:validation_type, :model_uri, :algorithm_uri, :training_dataset_uri, :test_target_dataset_uri, :test_dataset_uri, :prediction_dataset_uri, - :prediction_feature, - :crossvalidation_uri].each do |p| + :prediction_feature].each do |p| t.column p, :string, :limit => 255 end @@ -54,6 +51,10 @@ class InitValidation < ActiveRecord::Migration [:classification_statistics, :regression_statistics].each do |p| t.column(p, :text, :limit => 16320) end + + [ :finished ].each do |p| + t.column p, :boolean, :null => false + end end end @@ -83,8 +83,7 @@ class Example log "crossvalidation" cv = Validation::Crossvalidation.new({ :dataset_uri => data_uri, :algorithm_uri => @@alg, :num_folds => 5, :stratified => false }) - cv.create_cv_datasets( URI.decode(@@feature) ) - cv.perform_cv( @@alg_params ) + cv.perform_cv( URI.decode(@@feature), @@alg_params ) log "create validation report" rep = Reports::ReportService.new(File.join(@@config[:services]["opentox-validation"],"report")) diff --git a/lib/active_record_setup.rb b/lib/active_record_setup.rb index cea4fb2..3682c7a 100644 --- a/lib/active_record_setup.rb +++ b/lib/active_record_setup.rb @@ -28,7 +28,14 @@ class ActiveRecord::Base key = key+"_uri" unless self.column_names.include?(key) key = key+"s" - $sinatra.halt 400,"no attribute found: '"+k.to_s+"'" unless self.column_names.include?(key) + unless self.column_names.include?(key) + err = "no attribute found: '"+k.to_s+"'" + if $sinatra + $sinatra.halt 400,err + else + raise err + end + end end end end diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index dcd7d09..0175a0c 100644 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -15,7 +15,8 @@ module Lib return @compounds[instance_index] end - def initialize(is_classification, test_dataset_uri, test_target_dataset_uri, prediction_feature, prediction_dataset_uri, predicted_variable) + def initialize(is_classification, test_dataset_uri, test_target_dataset_uri, + prediction_feature, prediction_dataset_uri, predicted_variable, task=nil) LOGGER.debug("loading prediciton via test-dataset:'"+test_dataset_uri.to_s+ "', test-target-datset:'"+test_target_dataset_uri.to_s+ @@ -79,6 +80,7 @@ module Lib actual_values.push value end end + task.progress(40) if task # loaded actual values prediction_dataset = OpenTox::Dataset.find prediction_dataset_uri raise "prediction dataset not found: '"+prediction_dataset_uri.to_s+"'" unless prediction_dataset @@ -118,9 +120,11 @@ module Lib confidence_values << prediction_dataset.get_prediction_confidence(c, predicted_variable) end end + task.progress(80) if task # loaded predicted values and confidence super(predicted_values, actual_values, confidence_values, is_classification, class_values) raise "illegal num compounds "+num_info if @compounds.size != @predicted_values.size + task.progress(100) if task # done with the mathmatics end @@ -128,7 +132,7 @@ module Lib res = {} if @is_classification - (Lib::VAL_CLASS_PROPS_EXTENDED).each{ |s| res[s] = send(s)} + (Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)} else (Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) } end @@ -151,7 +155,8 @@ module Lib a.push( "http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="+ URI.encode(OpenTox::Compound.new(:uri=>p.identifier(i)).smiles) ) if add_pic rescue => ex - a.push("Could not add pic: "+ex.message) + #a.push("Could not add pic: "+ex.message) + a.push(p.identifier(i)) end a << (format ? p.actual_value(i).to_nice_s : p.actual_value(i)) diff --git a/lib/predictions.rb b/lib/predictions.rb index 2873689..6e50e94 100644 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -23,13 +23,13 @@ module Lib actual_values, confidence_values, is_classification, - prediction_feature_values=nil ) + class_domain=nil ) @predicted_values = predicted_values @actual_values = actual_values @confidence_values = confidence_values @is_classification = is_classification - @prediction_feature_values = prediction_feature_values + @class_domain = class_domain @num_classes = 1 #puts "predicted: "+predicted_values.inspect @@ -43,23 +43,27 @@ module Lib raise "illegal num confidence values "+num_info if @confidence_values.size != @predicted_values.size @confidence_values.each{ |c| raise "illegal confidence value: '"+c.to_s+"'" unless c==nil or (c.is_a?(Numeric) and c>=0 and c<=1) } - conf_val_tmp = {} - @confidence_values.each{ |c| conf_val_tmp[c] = nil } - if conf_val_tmp.keys.size<2 - LOGGER.warn("prediction w/o confidence values"); - @confidence_values=nil - end + ## check if there is more than one different conf value + ## DEPRECATED? not sure anymore what this was about, + ## I am pretty sure this was for r-plot of roc curves + ## roc curvers are now plotted manually + #conf_val_tmp = {} + #@confidence_values.each{ |c| conf_val_tmp[c] = nil } + #if conf_val_tmp.keys.size<2 + # LOGGER.warn("prediction w/o confidence values"); + # @confidence_values=nil + #end if @is_classification - raise "prediction_feature_values missing while performing classification" unless @prediction_feature_values - @num_classes = @prediction_feature_values.size + raise "class_domain missing while performing classification" unless @class_domain + @num_classes = @class_domain.size raise "num classes < 2" if @num_classes<2 { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+ "has to be either nil or index of predicted-values" if v!=nil and (v<0 or v>@num_classes)} end else - raise "prediction_feature_values != nil while performing regression" if @prediction_feature_values + raise "class_domain != nil while performing regression" if @class_domain { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+ "has to be either nil or number" unless v==nil or v.is_a?(Numeric)} @@ -81,7 +85,7 @@ module Lib if @is_classification @confusion_matrix = [] - @prediction_feature_values.each do |v| + @class_domain.each do |v| @confusion_matrix.push( Array.new( @num_classes, 0 ) ) end @@ -209,31 +213,35 @@ module Lib res = {} (0..@num_classes-1).each do |actual| (0..@num_classes-1).each do |predicted| - res[{:confusion_matrix_actual => @prediction_feature_values[actual], - :confusion_matrix_predicted => @prediction_feature_values[predicted]}] = @confusion_matrix[actual][predicted] + res[{:confusion_matrix_actual => @class_domain[actual], + :confusion_matrix_predicted => @class_domain[predicted]}] = @confusion_matrix[actual][predicted] end end return res end def area_under_roc(class_index=nil) - return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if class_index==nil + return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if + class_index==nil return 0.0 if @confidence_values==nil LOGGER.warn("TODO: implement approx computiation of AUC,"+ - "so far Wilcoxon-Man-Whitney is used (exponential)") if @predicted_values.size>1000 + "so far Wilcoxon-Man-Whitney is used (exponential)") if + @predicted_values.size>1000 + #puts "COMPUTING AUC "+class_index.to_s tp_conf = [] fp_conf = [] (0..@predicted_values.size-1).each do |i| if @predicted_values[i]==class_index - if @actual_values[i]==class_index + if @actual_values[i]==@predicted_values[i] tp_conf.push(@confidence_values[i]) else fp_conf.push(@confidence_values[i]) end end end + #puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n" return 0.0 if tp_conf.size == 0 return 1.0 if fp_conf.size == 0 @@ -241,9 +249,9 @@ module Lib tp_conf.each do |tp| fp_conf.each do |fp| sum += 1 if tp>fp + sum += 0.5 if tp==fp end end - return sum / (tp_conf.size * fp_conf.size).to_f end @@ -460,21 +468,30 @@ module Lib # data for roc-plots ################################################################################### def get_roc_values(class_value) + + #puts "get_roc_values for class_value: "+class_value.to_s raise "no confidence values" if @confidence_values==nil - class_index = @prediction_feature_values.index(class_value) - raise "class not found "+class_value.to_s if class_index==nil and class_value!=nil + raise "no class-value specified" if class_value==nil + + class_index = @class_domain.index(class_value) + raise "class not found "+class_value.to_s if class_index==nil c = []; p = []; a = [] (0..@predicted_values.size-1).each do |i| # NOTE: not predicted instances are ignored here - if (@predicted_values[i]!=nil and (class_value==nil or @predicted_values[i]==class_index)) + if @predicted_values[i]!=nil and @predicted_values[i]==class_index c << @confidence_values[i] p << @predicted_values[i] a << @actual_values[i] end end - return {:predicted_values => p, :actual_values => a, :confidence_values => c} + # DO NOT raise exception here, maybe different validations are concated + #raise "no instance predicted as '"+class_value+"'" if p.size == 0 + + h = {:predicted_values => p, :actual_values => a, :confidence_values => c} + #puts h.inspect + return h end ######################################################################################## @@ -489,7 +506,7 @@ module Lib def predicted_value(instance_index) if @is_classification - @predicted_values[instance_index]==nil ? nil : @prediction_feature_values[@predicted_values[instance_index]] + @predicted_values[instance_index]==nil ? nil : @class_domain[@predicted_values[instance_index]] else @predicted_values[instance_index] end @@ -501,7 +518,7 @@ module Lib def actual_value(instance_index) if @is_classification - @actual_values[instance_index]==nil ? nil : @prediction_feature_values[@actual_values[instance_index]] + @actual_values[instance_index]==nil ? nil : @class_domain[@actual_values[instance_index]] else @actual_values[instance_index] end @@ -535,7 +552,7 @@ module Lib def prediction_feature_value_map(proc) res = {} (0..@num_classes-1).each do |i| - res[@prediction_feature_values[i]] = proc.call(i) + res[@class_domain[i]] = proc.call(i) end return res end diff --git a/lib/validation_db.rb b/lib/validation_db.rb index 6de0bd4..cb3ece7 100644 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -6,7 +6,7 @@ require "lib/merge.rb" module Lib - VAL_PROPS_GENERAL = [ :validation_uri, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature, + VAL_PROPS_GENERAL = [ :validation_uri, :validation_type, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature, :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :date ] VAL_PROPS_SUM = [ :num_instances, :num_without_class, :num_unpredicted ] VAL_PROPS_AVG = [:real_runtime, :percent_without_class, :percent_unpredicted ] @@ -17,9 +17,11 @@ module Lib # :classification_statistics VAL_CLASS_PROPS_SINGLE_SUM = [ :num_correct, :num_incorrect, :confusion_matrix ] - VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect, :weighted_area_under_roc ] + VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect, + :weighted_area_under_roc, :accuracy ] VAL_CLASS_PROPS_SINGLE = VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_SINGLE_AVG + # :class_value_statistics VAL_CLASS_PROPS_PER_CLASS_SUM = [ :num_false_positives, :num_false_negatives, :num_true_positives, :num_true_negatives ] @@ -32,7 +34,6 @@ module Lib :true_negative_rate, :true_positive_rate ] #:precision, :recall, VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS - VAL_CLASS_PROPS_EXTENDED = VAL_CLASS_PROPS + [:accuracy] # :regression_statistics VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square, @@ -41,20 +42,66 @@ module Lib CROSS_VAL_PROPS = [:dataset_uri, :num_folds, :stratified, :random_seed] CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :date] + CROSS_VAL_PROPS - ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS_EXTENDED + VAL_REGR_PROPS + CROSS_VAL_PROPS + ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS + VAL_REGR_PROPS + CROSS_VAL_PROPS VAL_MERGE_GENERAL = VAL_PROPS_GENERAL + VAL_CV_PROPS + [:classification_statistics, :regression_statistics] + CROSS_VAL_PROPS VAL_MERGE_SUM = VAL_PROPS_SUM + VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_PER_CLASS_SUM VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS - + + class Validation < ActiveRecord::Base serialize :classification_statistics serialize :regression_statistics alias_attribute :date, :created_at + + def validation_uri + $sinatra.url_for("/"+self.id.to_s, :full) + end + + def crossvalidation_uri + $sinatra.url_for("/crossvalidation/"+self.crossvalidation_id.to_s, :full) if self.crossvalidation_id + end + + def self.classification_property?( property ) + VAL_CLASS_PROPS.include?( property ) + end + + def self.depends_on_class_value?( property ) + VAL_CLASS_PROPS_PER_CLASS.include?( property ) + end + + def self.complement_exists?( property ) + VAL_CLASS_PROPS_PER_CLASS_COMPLEMENT_EXISTS.include?( property ) + end + end class Crossvalidation < ActiveRecord::Base alias_attribute :date, :created_at + + def crossvalidation_uri + $sinatra.url_for("/crossvalidation/"+self.id.to_s, :full) if self.id + end + + # convenience method to list all crossvalidations that are unique + # in terms of dataset_uri,num_folds,stratified,random_seed + # further conditions can be specified in __conditions__ + def self.find_all_uniq(conditions={}) + cvs = Lib::Crossvalidation.find(:all, :conditions => conditions) + uniq = [] + cvs.each do |cv| + match = false + uniq.each do |cv2| + if cv.dataset_uri == cv2.dataset_uri and cv.num_folds == cv2.num_folds and + cv.stratified == cv2.stratified and cv.random_seed == cv2.random_seed + match = true + break + end + end + uniq << cv unless match + end + uniq + end end end diff --git a/nightly/nightly.rb b/nightly/nightly.rb index dca85d2..eb802a3 100644 --- a/nightly/nightly.rb +++ b/nightly/nightly.rb @@ -21,7 +21,7 @@ class Nightly validationExamples = ValidationExamples.select(select) return "please \"select\" validation examples:\n"+ValidationExamples.list if validationExamples.size==0 - task_uri = OpenTox::Task.as_task("Build nightly", "nightly-validation-test-service", {:select => select, :dry_run => dry_run}) do + task_uri = OpenTox::Task.as_task("Build nightly","nightly-validation-test-service",{:select => select, :dry_run => dry_run}) do |task| LOGGER.info("Building nightly report") benchmarks = validationExamples.collect{ |e| ValidationBenchmark.new(e) } @@ -35,7 +35,7 @@ class Nightly running << id Thread.new do begin - b.build + b.build() rescue => ex LOGGER.error "uncaught nightly build error: "+ex.message ensure @@ -101,9 +101,9 @@ class Nightly File.join(@@config[:services]["opentox-validation"],"nightly") end if defined?(halt) - halt 202,task_uri + halt 202,task_uri+"\n" else - return task_uri + return task_uri+"\n" end end diff --git a/reach_reports/reach_application.rb b/reach_reports/reach_application.rb index e63c2a4..bbe4092 100644 --- a/reach_reports/reach_application.rb +++ b/reach_reports/reach_application.rb @@ -31,6 +31,7 @@ post '/reach_report/:type' do content_type "text/uri-list" LOGGER.info "creating "+type+" report "+params.inspect + #puts "creating "+type+" report "+params.inspect result_uri = ReachReports.create_report(type,params,request.env["rack.input"]) if OpenTox::Utils.task_uri?(result_uri) diff --git a/reach_reports/reach_service.rb b/reach_reports/reach_service.rb index ef75621..f297f26 100644 --- a/reach_reports/reach_service.rb +++ b/reach_reports/reach_service.rb @@ -23,13 +23,14 @@ module ReachReports def self.create_report( type, params, xml_data=nil ) - result_uri = "" case type when /(?i)QMRF/ if params[:model_uri] - result_uri = OpenTox::Task.as_task( "Create "+type+" report", $sinatra.url_for("/reach_report/"+type, :full), params ) do + result_uri = OpenTox::Task.as_task( "Create "+type+" report", + $sinatra.url_for("/reach_report/"+type, :full), params ) do |task| + report = ReachReports::QmrfReport.new :model_uri => params[:model_uri] - build_qmrf_report(report) + build_qmrf_report(report, task) report.report_uri end elsif xml_data and (input = xml_data.read).to_s.size>0 @@ -57,8 +58,9 @@ module ReachReports - def self.build_qmrf_report(r) + def self.build_qmrf_report(r, task=nil) + #puts r.model_uri model = OpenTox::Model::PredictionModel.find(r.model_uri) classification = model.classification? @@ -66,9 +68,11 @@ module ReachReports r.qsar_identifier = QsarIdentifier.new r.qsar_identifier.qsar_title = model.title # TODO QSAR_models -> sparql same endpoint - r.qsar_identifier.qsar_software << QsarSoftware.new( :url => model.uri, :name => model.title, :contact => model.creator ) + r.qsar_identifier.qsar_software << QsarSoftware.new( :url => model.uri, + :name => model.title, :contact => model.creator ) algorithm = OpenTox::Algorithm::Generic.find(model.algorithm) if model.algorithm r.qsar_identifier.qsar_software << QsarSoftware.new( :url => algorithm.uri, :name => algorithm.title ) + task.progress(10) if task #chpater 2 r.qsar_general_information = QsarGeneralInformation.new @@ -79,7 +83,8 @@ module ReachReports # TODO: references? # EMPTY: info_availablity # TODO: related_models = find qmrf reports for QSAR_models - + task.progress(20) if task + # chapter 3 # TODO "model_species" ? r.qsar_endpoint = QsarEndpoint.new @@ -89,12 +94,14 @@ module ReachReports # TODO "endpoint_comments" => "3.3", "endpoint_units" => "3.4", r.qsar_endpoint.endpoint_variable = model.dependentVariables if model.dependentVariables # TODO "endpoint_protocol" => "3.6", "endpoint_data_quality" => "3.7", - + task.progress(30) if task + # chapter 4 # TODO algorithm_type (='type of model') # TODO algorithm_explicit.equation # TODO algorithm_explicit.algorithms_catalog # TODO algorithms_descriptors, descriptors_selection, descriptors_generation, descriptors_generation_software, descriptors_chemicals_ratio + task.progress(40) if task # chapter 5 # TODO app_domain_description, app_domain_method, app_domain_software, applicability_limits @@ -103,8 +110,9 @@ module ReachReports begin training_dataset = model.trainingDataset ? OpenTox::Dataset.find(model.trainingDataset) : nil rescue - LOGGER.warn "training_dataset not found "+model.trainingDataset.to_s + LOGGER.warn "build qmrf: training_dataset not found "+model.trainingDataset.to_s end + task.progress(50) if task # chapter 6 r.qsar_robustness = QsarRobustness.new @@ -122,47 +130,56 @@ module ReachReports val_datasets = [] if model.algorithm - cvs = Lib::Crossvalidation.find(:all, :conditions => {:algorithm_uri => model.algorithm}) - cvs = [] unless cvs - uniq_cvs = [] - cvs.each do |cv| - # PENDING: cv classification hack + cvs = Lib::Crossvalidation.find_all_uniq({:algorithm_uri => model.algorithm}) + # PENDING: cv classification/regression hack + cvs = cvs.delete_if do |cv| val = Validation::Validation.first( :all, :conditions => { :crossvalidation_id => cv.id } ) - if (val.classification_statistics!=nil) == classification - match = false - uniq_cvs.each do |cv2| - if cv.dataset_uri == cv2.dataset_uri and cv.num_folds == cv2.num_folds and cv.stratified == cv2.stratified and cv.random_seed == cv2.random_seed - match = true - break - end - end - uniq_cvs << cv unless match - end + (val.classification_statistics!=nil) != classification end - - lmo = [ "found "+cvs.size.to_s+" crossvalidation/s for algorithm '"+model.algorithm ] - lmo << "" - uniq_cvs.each do |cv| - lmo << "crossvalidation: "+cv.crossvalidation_uri - lmo << "dataset (see 9.3 Validation data): "+cv.dataset_uri - val_datasets << cv.dataset_uri - lmo << "settings: num-folds="+cv.num_folds.to_s+", random-seed="+cv.random_seed.to_s+", stratified:"+cv.stratified.to_s - val = YAML.load( OpenTox::RestClientWrapper.get File.join(cv.crossvalidation_uri,"statistics") ) - if classification - lmo << "percent_correct: "+val[:classification_statistics][:percent_correct].to_s - lmo << "weighted AUC: "+val[:classification_statistics][:weighted_area_under_roc].to_s - else - lmo << "root_mean_squared_error: "+val[:regression_statistics][:root_mean_squared_error].to_s - lmo << "r_square "+val[:regression_statistics][:r_square].to_s - end - reports = OpenTox::RestClientWrapper.get File.join(CONFIG[:services]["opentox-validation"],"report/crossvalidation?crossvalidation_uris="+cv.crossvalidation_uri) - if reports and reports.size>0 - lmo << "for more info see report: "+reports.split("\n")[0] - else - lmo << "for more info see report: not yet created for '"+cv.crossvalidation_uri+"'" + + lmo = [ "found "+cvs.size.to_s+" crossvalidation/s for algorithm '"+model.algorithm+"'" ] + if cvs.size>0 + cvs_same_data = [] + cvs_other_data = [] + cvs.each do |cv| + if cv.dataset_uri == model.trainingDataset + cvs_same_data << cv + else + cvs_other_data << cv + end end + lmo << cvs_same_data.size.to_s+" crossvalidations/s where performed on the training dataset of the model ("+ + model.trainingDataset.to_s+")" + lmo << cvs_other_data.size.to_s+" crossvalidations/s where performed on the other datasets" lmo << "" + + {cvs_same_data => "training dataset", cvs_other_data => "other datasets"}.each do |cvs,desc| + next if cvs.size==0 + lmo << "crossvalidation/s on "+desc + cvs.each do |cv| + lmo << "crossvalidation: "+cv.crossvalidation_uri + lmo << "dataset (see 9.3 Validation data): "+cv.dataset_uri + val_datasets << cv.dataset_uri + lmo << "settings: num-folds="+cv.num_folds.to_s+", random-seed="+cv.random_seed.to_s+", stratified:"+cv.stratified.to_s + val = YAML.load( OpenTox::RestClientWrapper.get File.join(cv.crossvalidation_uri,"statistics") ) + if classification + lmo << "percent_correct: "+val[:classification_statistics][:percent_correct].to_s + lmo << "weighted AUC: "+val[:classification_statistics][:weighted_area_under_roc].to_s + else + lmo << "root_mean_squared_error: "+val[:regression_statistics][:root_mean_squared_error].to_s + lmo << "r_square "+val[:regression_statistics][:r_square].to_s + end + reports = OpenTox::RestClientWrapper.get File.join(CONFIG[:services]["opentox-validation"],"report/crossvalidation?crossvalidation_uris="+cv.crossvalidation_uri) + if reports and reports.size>0 + lmo << "for more info see report: "+reports.split("\n")[0] + else + lmo << "for more info see report: not yet created for '"+cv.crossvalidation_uri+"'" + end + end + lmo << "" + end end + else lmo = [ "no prediction algortihm for model found, crossvalidation not possible" ] end @@ -202,7 +219,8 @@ module ReachReports v << "root_mean_squared_error: "+validation.regression_statistics[:root_mean_squared_error].to_s v << "r_square "+validation.regression_statistics[:r_square].to_s end - reports = OpenTox::RestClientWrapper.get File.join(CONFIG[:services]["opentox-validation"],"report/validation?validation_uris="+validation.validation_uri) + reports = OpenTox::RestClientWrapper.get(File.join(CONFIG[:services]["opentox-validation"], + "report/validation?validation_uris="+validation.validation_uri)) if reports and reports.size>0 v << "for more info see report: "+reports.split("\n")[0] else @@ -214,15 +232,18 @@ module ReachReports v = [ "no validation for model '"+model.uri+"' found" ] end r.qsar_predictivity.validation_predictivity = v.to_html + task.progress(60) if task # chapter 7 # "validation_set_availability" => "7.1", "validation_set_data" => "7.2", "validation_set_descriptors" => "7.3", # "validation_dependent_var_availability" => "7.4", "validation_other_info" => "7.5", "experimental_design" => "7.6", # "validation_predictivity" => "7.7", "validation_assessment" => "7.8", "validation_comments" => "7.9", + task.progress(70) if task # chapter 8 # "mechanistic_basis" => "8.1", "mechanistic_basis_comments" => "8.2", "mechanistic_basis_info" => "8.3", - + task.progress(80) if task + # chapter 9 # "comments" => "9.1", "bibliography" => "9.2", "attachments" => "9.3", @@ -244,8 +265,10 @@ module ReachReports LOGGER.warn "could not add dataset: "+data_uri.to_s end end - r.save + task.progress(90) if task + r.save + task.progress(100) if task end # def self.get_report_content(type, id, *keys) diff --git a/reach_reports/reach_test.rb b/reach_reports/reach_test.rb index 44f37ff..4a18f6d 100644 --- a/reach_reports/reach_test.rb +++ b/reach_reports/reach_test.rb @@ -115,8 +115,9 @@ class ReachTest < Test::Unit::TestCase # puts last_response.body #model_uri = "http://ambit.uni-plovdiv.bg:8080/ambit2/model/173393" - model_uri = "http://localhost/model/1" - #model_uri = "http://localhost/majority/regr/model/12" + model_uri = "http://localhost/model/6" + #http://localhost/majority/class/model/15 + #model_uri = "http://localhost/majority/class/model/15" # model_uri = "http://localhost/majority/class/model/91" #model_uri = "http://apps.ideaconsult.net:8080/ambit2/model/2" post '/reach_report/qmrf',:model_uri=>model_uri #http://localhost/model/1" @@ -148,8 +149,9 @@ class ReachTest < Test::Unit::TestCase #r = ReachReports::QmrfReport.find_like( :QSAR_title => "Hamster") #puts r.collect{|rr| "report with id:"+rr.id.to_s}.inspect - File.new("/home/martin/win/home/qmr_rep_del_me.xml","w").puts last_response.body - File.new("/home/martin/info_home/.public_html/qmr_rep_del_me.xml","w").puts last_response.body + File.new("/home/martin/tmp/qmr_rep_del_me.xml","w").puts last_response.body + #File.new("/home/martin/win/home/qmr_rep_del_me.xml","w").puts last_response.body + #File.new("/home/martin/info_home/.public_html/qmr_rep_del_me.xml","w").puts last_response.body end end diff --git a/report/environment.rb b/report/environment.rb index f2bbe66..3daf39d 100644 --- a/report/environment.rb +++ b/report/environment.rb @@ -16,13 +16,13 @@ require "report/plot_factory.rb" require "report/xml_report.rb" require "report/xml_report_util.rb" require "report/report_persistance.rb" +require "report/report_content.rb" require "report/report_factory.rb" require "report/report_service.rb" require "report/report_format.rb" require "report/validation_access.rb" require "report/validation_data.rb" require "report/util.rb" -require "report/external/mimeparse.rb" diff --git a/report/plot_factory.rb b/report/plot_factory.rb index daaba52..43c45fc 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -8,6 +8,43 @@ class Array self[i] = self[j] self[j] = tmp end + + # summing up values of fields where array __groups__ has equal values + # EXAMPLE + # self: [1, 0, 1, 2, 3, 0, 2] + # __groups__: [100, 90, 70, 70, 30, 10, 0] + # returns: + # [ 1, 0, 3, 3, 0, 2] + # (fields with equal value 70 are compressed) + # PRECONDITION + # __groups__ has to be sorted + def compress_sum(groups) + compress(groups) do |a,b| + a+b + end + end + + # see compress_sum, replace sum with max + def compress_max(groups) + compress(groups) do |a,b| + a > b ? a : b + end + end + + private + def compress(groups) + raise "length not equal" unless self.size==groups.size + raise "to small" unless self.size>=2 + a = [ self[0] ] + (1..groups.size-1).each do |i| + if groups[i]!=groups[i-1] + a << self[i] + else + a[-1] = yield a[-1],self[i] + end + end + a + end end @@ -15,7 +52,7 @@ module Reports module PlotFactory - def self.create_regression_plot( out_file, validation_set ) + def self.create_regression_plot( out_file, validation_set, name_attribute ) LOGGER.debug "Creating regression plot, out-file:"+out_file.to_s @@ -23,14 +60,28 @@ module Reports x = [] y = [] validation_set.validations.each do |v| - names << v.algorithm_uri - x << v.get_predictions.predicted_values - y << v.get_predictions.actual_values + x_i = v.get_predictions.predicted_values + y_i = v.get_predictions.actual_values + + # filter out nil-predictions + not_nil_indices = [] + x_i.size.times do |i| + not_nil_indices << i if x_i[i]!=nil && y_i[i]!=nil + end + if not_nil_indices.size < x_i.size + x_i = not_nil_indices.collect{ |i| x_i[i] } + y_i = not_nil_indices.collect{ |i| y_i[i] } + end + + names << ( name_attribute==:crossvalidation_fold ? "fold " : "" ) + v.send(name_attribute).to_s + x << x_i + y << y_i end RubyPlot::plot_points(out_file, "Regression plot", "Predicted values", "Actual values", names, x, y ) end + # creates a roc plot (result is plotted into out_file) # * if (split_set_attributes == nil?) # * the predictions of all validations in the validation set are plotted as one average roc-curve @@ -41,19 +92,22 @@ module Reports # def self.create_roc_plot( out_file, validation_set, class_value, split_set_attribute=nil, show_single_curves=false ) - LOGGER.debug "creating roc plot, out-file:"+out_file.to_s + LOGGER.debug "creating roc plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_file.to_s if split_set_attribute attribute_values = validation_set.get_values(split_set_attribute) - names = [] fp_rates = [] tp_rates = [] attribute_values.each do |value| - data = transform_predictions(validation_set.filter({split_set_attribute => value}), class_value, false) - names << value.to_s - fp_rates << data[:fp_rate][0] - tp_rates << data[:tp_rate][0] + begin + data = transform_predictions(validation_set.filter({split_set_attribute => value}), class_value, false) + names << value.to_s + fp_rates << data[:fp_rate][0] + tp_rates << data[:tp_rate][0] + rescue + LOGGER.warn "could not create ROC plot for "+value.to_s + end end RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", names, fp_rates, tp_rates ) else @@ -62,28 +116,33 @@ module Reports end end - def self.create_bar_plot( out_file, validation_set, class_value, title_attribute, value_attributes ) + def self.create_bar_plot( out_file, validation_set, title_attribute, value_attributes ) LOGGER.debug "creating bar plot, out-file:"+out_file.to_s data = [] titles = [] + labels = [] validation_set.validations.each do |v| values = [] value_attributes.each do |a| - value = v.send(a) - if value.is_a?(Hash) - if class_value==nil - avg_value = 0 - value.values.each{ |val| avg_value+=val } - value = avg_value/value.values.size.to_f - else - raise "bar plot value is hash, but no entry for class-value ("+class_value.to_s+"); value for "+a.to_s+" -> "+value.inspect unless value.key?(class_value) - value = value[class_value] + validation_set.get_domain_for_attr(a).each do |class_value| + value = v.send(a) + if value.is_a?(Hash) + if class_value==nil + avg_value = 0 + value.values.each{ |val| avg_value+=val } + value = avg_value/value.values.size.to_f + else + raise "bar plot value is hash, but no entry for class-value ("+class_value.to_s+"); value for "+a.to_s+" -> "+value.inspect unless value.key?(class_value) + value = value[class_value] + end end + raise "value is nil\nattribute: "+a.to_s+"\nvalidation: "+v.inspect if value==nil + values.push(value) + labels.push(a.to_s.gsub("_","-") + ( class_value==nil ? "" : "("+class_value.to_s+")" )) end - values.push(value) end titles << v.send(title_attribute).to_s @@ -95,8 +154,6 @@ module Reports data[i] = [titles[i]] + data[i] end - labels = value_attributes.collect{|a| a.to_s.gsub("_","-")} - LOGGER.debug "bar plot labels: "+labels.inspect LOGGER.debug "bar plot data: "+data.inspect @@ -177,11 +234,15 @@ module Reports sum_roc_values[:confidence_values] += roc_values[:confidence_values] sum_roc_values[:actual_values] += roc_values[:actual_values] if add_single_folds - tp_fp_rates = get_tp_fp_rates(roc_values) - names << "fold "+i.to_s - fp_rate << tp_fp_rates[:fp_rate] - tp_rate << tp_fp_rates[:tp_rate] - faint << true + begin + tp_fp_rates = get_tp_fp_rates(roc_values) + names << "fold "+i.to_s + fp_rate << tp_fp_rates[:fp_rate] + tp_rate << tp_fp_rates[:tp_rate] + faint << true + rescue + LOGGER.warn "could not get ROC vals for fold "+i.to_s + end end end tp_fp_rates = get_tp_fp_rates(sum_roc_values) @@ -197,6 +258,18 @@ module Reports end end + def self.demo_rock_plot + roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6], + :predicted_values => [1, 0, 0, 1, 0, 1], + :actual_values => [0, 1, 0, 0, 1, 1]} + tp_fp_rates = get_tp_fp_rates(roc_values) + data = { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] } + RubyPlot::plot_lines("/tmp/plot.svg", + "ROC-Plot", + "False positive rate", + "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] ) + end + def self.get_tp_fp_rates(roc_values) c = roc_values[:confidence_values] @@ -232,9 +305,11 @@ module Reports end end #puts c.inspect+"\n"+a.inspect+"\n"+p.inspect+"\n\n" - + tp_rate = [0] fp_rate = [0] + w = [1] + c2 = [Float::MAX] (0..p.size-1).each do |i| if a[i]==p[i] tp_rate << tp_rate[-1]+1 @@ -243,8 +318,15 @@ module Reports fp_rate << fp_rate[-1]+1 tp_rate << tp_rate[-1] end + w << 1 + c2 << c[i] end - #puts tp_rate.inspect+"\n"+fp_rate.inspect+"\n\n" + #puts c2.inspect+"\n"+tp_rate.inspect+"\n"+fp_rate.inspect+"\n"+w.inspect+"\n\n" + + tp_rate = tp_rate.compress_max(c2) + fp_rate = fp_rate.compress_max(c2) + w = w.compress_sum(c2) + #puts tp_rate.inspect+"\n"+fp_rate.inspect+"\n"+w.inspect+"\n\n" (0..tp_rate.size-1).each do |i| tp_rate[i] = tp_rate[-1]>0 ? tp_rate[i]/tp_rate[-1].to_f*100 : 100 @@ -256,5 +338,14 @@ module Reports end end end - + +#require "rubygems" +#require "ruby-plot" #Reports::PlotFactory::demo_ranking_plot +#Reports::PlotFactory::demo_rock_plot + +#a = [1, 0, 1, 2, 3, 0, 2] +#puts a.compress_sum([100, 90, 70, 70, 30, 10, 0]).inspect +#puts a.compress_max([100, 90, 70, 70, 30, 10, 0]).inspect + + diff --git a/report/report_application.rb b/report/report_application.rb index baa91a0..f6b810c 100644 --- a/report/report_application.rb +++ b/report/report_application.rb @@ -80,15 +80,9 @@ end post '/report/:type/:id/format_html' do - task_uri = OpenTox::Task.as_task("Format report",url_for("/report/"+params[:type]+"/format_html", :full), params) do - perform do |rs| - rs.get_report(params[:type],params[:id],"text/html",true,params) - content_type "text/uri-list" - rs.get_uri(params[:type],params[:id])+"\n" - end - end + rs.get_report(params[:type],params[:id],"text/html",true,params) content_type "text/uri-list" - halt 202,task_uri+"\n" + rs.get_uri(params[:type],params[:id])+"\n" end @@ -128,9 +122,9 @@ delete '/report/:type/:id' do end post '/report/:type' do - task_uri = OpenTox::Task.as_task("Create report",url_for("/report/"+params[:type], :full), params) do + task_uri = OpenTox::Task.as_task("Create report",url_for("/report/"+params[:type], :full), params) do |task| perform do |rs| - rs.create_report(params[:type],params[:validation_uris]?params[:validation_uris].split(/\n|,/):nil) + rs.create_report(params[:type],params[:validation_uris]?params[:validation_uris].split(/\n|,/):nil,task) end end content_type "text/uri-list" diff --git a/report/report_content.rb b/report/report_content.rb new file mode 100644 index 0000000..98eaad6 --- /dev/null +++ b/report/report_content.rb @@ -0,0 +1,259 @@ + +# = Reports::ReportContent +# +# wraps an xml-report, adds functionality for adding sections, adds a hash for tmp files +# +class Reports::ReportContent + + attr_accessor :xml_report, :tmp_files + + def initialize(title) + @xml_report = Reports::XMLReport.new(title, Time.now.strftime("Created at %m.%d.%Y - %H:%M")) + @tmp_file_count = 0 + end + + def add_section_predictions( validation_set, + validation_attributes=[], + section_title="Predictions", + section_text=nil, + table_title="Predictions") + + #PENING + raise "validation attributes not implemented in get prediction array" if validation_attributes.size>0 + + section_table = @xml_report.add_section(@xml_report.get_root_element, section_title) + if validation_set.validations[0].get_predictions + @xml_report.add_paragraph(section_table, section_text) if section_text + @xml_report.add_table(section_table, table_title, Lib::OTPredictions.to_array(validation_set.validations.collect{|v| v.get_predictions}, true, true)) + else + @xml_report.add_paragraph(section_table, "No prediction info available.") + end + end + + + def add_section_result_overview( validation_set, + attribute_col, + attribute_row, + attribute_values, + table_titles=nil, + section_title="Result overview", + section_text=nil ) + + + section_table = @xml_report.add_section(xml_report.get_root_element, section_title) + @xml_report.add_paragraph(section_table, section_text) if section_text + + attribute_values.size.times do |i| + attribute_val = attribute_values[i] + table_title = table_titles ? table_titles[i] : "Result overview for "+attribute_val.to_s + vals = validation_set.to_table( attribute_col, attribute_row, attribute_val) + @xml_report.add_table(section_table, table_title, vals) + end + end + + # result (could be transposed) + # + # attr1 | attr2 | attr3 + # ===========|===========|=========== + # val1-attr1 |val1-attr2 |val1-attr3 + # val2-attr1 |val2-attr2 |val2-attr3 + # val3-attr1 |val3-attr2 |val3-attr3 + # + def add_section_result( validation_set, + validation_attributes, + table_title, + section_title="Results", + section_text=nil, + #rem_equal_vals_attr=[], + search_for_existing_report_type=nil) + + section_table = @xml_report.add_section(xml_report.get_root_element, section_title) + @xml_report.add_paragraph(section_table, section_text) if section_text + vals = validation_set.to_array(validation_attributes, true) + vals = vals.collect{|a| a.collect{|v| v.to_s }} + + if (search_for_existing_report_type) + vals.size.times do |i| + puts i + if (i==0) + vals[i] = [ "Reports" ] + vals[i] + puts vals[i].inspect + else + if search_for_existing_report_type=="validation" + vals[i] = [ validation_set.validations[i-1].validation_report_uri() ] + vals[i] + elsif search_for_existing_report_type=="crossvalidation" + vals[i] = [ validation_set.validations[i-1].cv_report_uri() ] + vals[i] + else + raise "illegal report type: "+search_for_existing_report_type.to_s + end + end + end + end + #PENDING transpose values if there more than 4 columns, and there are more than columns than rows + transpose = vals[0].size>4 && vals[0].size>vals.size + @xml_report.add_table(section_table, table_title, vals, !transpose, transpose) + end + + def add_section_confusion_matrix( validation, + section_title="Confusion Matrix", + section_text=nil, + table_title="Confusion Matrix") + section_confusion = @xml_report.add_section(xml_report.get_root_element, section_title) + @xml_report.add_paragraph(section_confusion, section_text) if section_text + @xml_report.add_table(section_confusion, table_title, + Reports::XMLReportUtil::create_confusion_matrix( validation.confusion_matrix ), false) + end + + def add_section_regression_plot( validation_set, + name_attribute, + section_title="Regression Plot", + section_text=nil, + image_title=nil, + image_caption=nil) + + image_title = "Regression plot" unless image_title + + section_regr = @xml_report.add_section(@xml_report.get_root_element, section_title) + prediction_set = validation_set.collect{ |v| v.get_predictions } + + if prediction_set.size>0 + + section_text += "\nWARNING: regression plot information not available for all validation results" if prediction_set.size!=validation_set.size + @xml_report.add_paragraph(section_regr, section_text) if section_text + plot_file_name = "regr_plot"+@tmp_file_count.to_s+".svg" + @tmp_file_count += 1 + begin + plot_file_path = add_tmp_file(plot_file_name) + Reports::PlotFactory.create_regression_plot( plot_file_path, prediction_set, name_attribute ) + @xml_report.add_imagefigure(section_regr, image_title, plot_file_name, "SVG", image_caption) + rescue RuntimeError => ex + LOGGER.error("Could not create regression plot: "+ex.message) + rm_tmp_file(plot_file_name) + @xml_report.add_paragraph(section_regr, "could not create regression plot: "+ex.message) + end + else + @xml_report.add_paragraph(section_regr, "No prediction info for regression available.") + end + end + + def add_section_roc_plot( validation_set, + split_set_attribute = nil, + section_title="ROC Plots", + section_text=nil, + image_titles=nil, + image_captions=nil) + + section_roc = @xml_report.add_section(@xml_report.get_root_element, section_title) + prediction_set = validation_set.collect{ |v| v.get_predictions && v.get_predictions.confidence_values_available? } + + if prediction_set.size>0 + if prediction_set.size!=validation_set.size + section_text += "\nWARNING: roc plot information not available for all validation results" + LOGGER.error "WARNING: roc plot information not available for all validation results:\n"+ + "validation set size: "+validation_set.size.to_s+", prediction set size: "+prediction_set.size.to_s + end + @xml_report.add_paragraph(section_roc, section_text) if section_text + + class_domain = validation_set.get_class_domain + class_domain.size.times do |i| + class_value = class_domain[i] + image_title = image_titles ? image_titles[i] : "ROC Plot for class-value '"+class_value+"'" + image_caption = image_captions ? image_captions[i] : nil + plot_file_name = "roc_plot"+@tmp_file_count.to_s+".svg" + @tmp_file_count += 1 + begin + plot_file_path = add_tmp_file(plot_file_name) + Reports::PlotFactory.create_roc_plot( plot_file_path, prediction_set, class_value, split_set_attribute, false )#prediction_set.size>1 ) + @xml_report.add_imagefigure(section_roc, image_title, plot_file_name, "SVG", image_caption) + rescue RuntimeError => ex + msg = "WARNING could not create roc plot for class value '"+class_value+"': "+ex.message + LOGGER.error(msg) + rm_tmp_file(plot_file_name) + @xml_report.add_paragraph(section_roc, msg) + end + end + else + @xml_report.add_paragraph(section_roc, "No prediction-confidence info for roc plot available.") + end + + end + + def add_section_ranking_plots( validation_set, + compare_attribute, + equal_attribute, + rank_attributes, + section_title="Ranking Plots", + section_text="This section contains the ranking plots.") + + section_rank = @xml_report.add_section(@xml_report.get_root_element, section_title) + @xml_report.add_paragraph(section_rank, section_text) if section_text + + rank_attributes.each do |a| + add_ranking_plot(section_rank, validation_set, compare_attribute, equal_attribute, a) + end + end + + def add_ranking_plot( report_section, + validation_set, + compare_attribute, + equal_attribute, + rank_attribute, + image_titles=nil, + image_captions=nil) + + class_domain = validation_set.get_domain_for_attr(rank_attribute) + puts "ranking plot for "+rank_attribute.to_s+", class values: "+class_domain.to_s + + class_domain.size.times do |i| + class_value = class_domain[i] + if image_titles + image_title = image_titles[i] + else + if class_value!=nil + image_title = rank_attribute.to_s+" Ranking Plot for class-value '"+class_value+"'" + else + image_title = rank_attribute.to_s+" Ranking Plot" + end + end + image_caption = image_captions ? image_captions[i] : nil + plot_file_name = "ranking_plot"+@tmp_file_count.to_s+".svg" + @tmp_file_count += 1 + plot_file_path = add_tmp_file(plot_file_name) + Reports::PlotFactory::create_ranking_plot(plot_file_path, validation_set, compare_attribute, equal_attribute, rank_attribute, class_value) + @xml_report.add_imagefigure(report_section, image_title, plot_file_name, "SVG", image_caption) + end + end + + def add_section_bar_plot(validation_set, + title_attribute, + value_attributes, + section_title="Bar Plot", + section_text=nil, + image_title="Bar Plot", + image_caption=nil) + + section_bar = @xml_report.add_section(@xml_report.get_root_element, section_title) + @xml_report.add_paragraph(section_bar, section_text) if section_text + + plot_file_name = "bar_plot"+@tmp_file_count.to_s+".svg" + @tmp_file_count += 1 + plot_file_path = add_tmp_file(plot_file_name) + Reports::PlotFactory.create_bar_plot(plot_file_path, validation_set, title_attribute, value_attributes ) + @xml_report.add_imagefigure(section_bar, image_title, plot_file_name, "SVG", image_caption) + end + + private + def add_tmp_file(tmp_file_name) + + @tmp_files = {} unless @tmp_files + raise "file name already exits" if @tmp_files[tmp_file_name] || (@text_files && @text_files[tmp_file_name]) + tmp_file_path = Reports::Util.create_tmp_file(tmp_file_name) + @tmp_files[tmp_file_name] = tmp_file_path + return tmp_file_path + end + + def rm_tmp_file(tmp_file_name) + @tmp_files.delete(tmp_file_name) if @tmp_files.has_key?(tmp_file_name) + end + +end
\ No newline at end of file diff --git a/report/report_factory.rb b/report/report_factory.rb index 7e63e32..ef22be1 100644 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -3,11 +3,14 @@ VAL_ATTR_TRAIN_TEST = [ :model_uri, :training_dataset_uri, :test_dataset_uri, :prediction_feature ] # selected attributes of interest when generating the crossvalidation report VAL_ATTR_CV = [ :algorithm_uri, :dataset_uri, :num_folds, :crossvalidation_fold ] + # selected attributes of interest when performing classification -VAL_ATTR_CLASS = [ :percent_correct, :weighted_area_under_roc, :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ] +VAL_ATTR_CLASS = [ :percent_correct, :weighted_area_under_roc, + :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ] VAL_ATTR_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ] -VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :weighted_area_under_roc, :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ] +VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :weighted_area_under_roc, + :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ] VAL_ATTR_BAR_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ] @@ -28,45 +31,57 @@ module Reports::ReportFactory # call-seq: # self.create_report(type, validation_set) => Reports::ReportContent # - def self.create_report(type, validation_set) + def self.create_report(type, validation_set, task=nil) case type when RT_VALIDATION - create_report_validation(validation_set) + create_report_validation(validation_set, task) when RT_CV - create_report_crossvalidation(validation_set) + create_report_crossvalidation(validation_set, task) when RT_ALG_COMP - create_report_compare_algorithms(validation_set) + create_report_compare_algorithms(validation_set, task) else raise "unknown report type "+type.to_s end end private - def self.create_report_validation(validation_set) + # this function is only to set task progress accordingly + # loading predicitons is time consuming, and is done dynamically -> + # pre-load and set task progress + def self.pre_load_predictions( validation_set, task=nil) + i = 0 + task_step = 100 / validation_set.size.to_f + validation_set.validations.each do |v| + v.get_predictions( OpenTox::SubTask.create(task, i*task_step, (i+1)*task_step ) ) + i += 1 + end + end + + def self.create_report_validation(validation_set, task=nil) raise Reports::BadRequest.new("num validations is not equal to 1") unless validation_set.size==1 val = validation_set.validations[0] - + pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) ) + report = Reports::ReportContent.new("Validation report") if (val.classification?) - report.add_section_result(validation_set, VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results") - report.add_section_roc_plot(validation_set, nil, nil, "roc-plot.svg") - #val.get_prediction_feature_values.each do |class_value| - #report.add_section_roc_plot(validation_set, class_value, nil, "roc-plot-"+class_value+".svg") - #end + report.add_section_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results") + report.add_section_roc_plot(validation_set) report.add_section_confusion_matrix(val) else #regression - report.add_section_result(validation_set, VAL_ATTR_TRAIN_TEST + VAL_ATTR_REGR, "Results", "Results") - report.add_section_regression_plot(validation_set) + report.add_section_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_REGR, "Results", "Results") + report.add_section_regression_plot(validation_set, :model_uri) end + task.progress(90) if task report.add_section_result(validation_set, Lib::ALL_PROPS, "All Results", "All Results") - report.add_section_predictions( validation_set ) - return report + report.add_section_predictions( validation_set ) + task.progress(100) if task + report end - def self.create_report_crossvalidation(validation_set) + def self.create_report_crossvalidation(validation_set, task=nil) raise Reports::BadRequest.new("num validations is not >1") unless validation_set.size>1 raise Reports::BadRequest.new("crossvalidation-id not unique and != nil: "+ @@ -77,6 +92,7 @@ module Reports::ReportFactory raise Reports::BadRequest.new("num different folds is not equal to num validations") unless validation_set.num_different_values(:crossvalidation_fold)==validation_set.size raise Reports::BadRequest.new("validations must be either all regression, "+ +"or all classification validations") unless validation_set.all_classification? or validation_set.all_regression? + pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) ) merged = validation_set.merge([:crossvalidation_id]) raise unless merged.size==1 @@ -85,27 +101,26 @@ module Reports::ReportFactory report = Reports::ReportContent.new("Crossvalidation report") if (validation_set.all_classification?) - report.add_section_result(merged, VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold],"Mean Results","Mean Results") - - report.add_section_roc_plot(validation_set, nil, nil, "roc-plot.svg", "Roc Plot", nil, "Roc plot") - report.add_section_roc_plot(validation_set, nil, :crossvalidation_fold, "roc-plot-folds.svg", "Roc Plot", nil, "Roc plots for folds") - #validation_set.first.get_prediction_feature_values.each do |class_value| - #report.add_section_roc_plot(validation_set, class_value, nil, "roc-plot-"+class_value+".svg") - #end + report.add_section_result(merged, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold],"Mean Results","Mean Results") + report.add_section_roc_plot(validation_set, nil, "ROC Plots over all folds") + report.add_section_roc_plot(validation_set, :crossvalidation_fold) report.add_section_confusion_matrix(merged.validations[0]) - report.add_section_result(validation_set, VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds], "Results","Results") + report.add_section_result(validation_set, VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds], + "Results","Results",nil,"validation") else #regression - report.add_section_result(merged, VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results") - #report.add_section_regression_plot(validation_set) + report.add_section_result(merged, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results") + report.add_section_regression_plot(validation_set, :crossvalidation_fold) report.add_section_result(validation_set, VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds], "Results","Results") end + task.progress(90) if task report.add_section_result(validation_set, Lib::ALL_PROPS, "All Results", "All Results") - report.add_section_predictions( validation_set ) #, [:crossvalidation_fold] ) - return report + report.add_section_predictions( validation_set ) #, [:crossvalidation_fold] ) + task.progress(100) if task + report end - def self.create_report_compare_algorithms(validation_set) + def self.create_report_compare_algorithms(validation_set, task=nil) #validation_set.to_array([:test_dataset_uri, :model_uri, :algorithm_uri], false).each{|a| puts a.inspect} raise Reports::BadRequest.new("num validations is not >1") unless validation_set.size>1 @@ -115,309 +130,67 @@ module Reports::ReportFactory validation_set.get_values(:algorithm_uri).inspect) if validation_set.num_different_values(:algorithm_uri)<2 if validation_set.has_nil_values?(:crossvalidation_id) - if validation_set.num_different_values(:test_dataset_uri)>1 - - # groups results into sets with equal test and training dataset - dataset_grouping = Reports::Util.group(validation_set.validations, [:test_dataset_uri, :training_dataset_uri]) - # check if the same algorithms exists for each test and training dataset - Reports::Util.check_group_matching(dataset_grouping, [:algorithm_uri]) - - #merged = validation_set.merge([:algorithm_uri, :dataset_uri]) - report = Reports::ReportContent.new("Algorithm comparison report - Many datasets") - - if (validation_set.all_classification?) - report.add_section_result(validation_set,[:algorithm_uri, :test_dataset_uri]+VAL_ATTR_CLASS,"Mean Results","Mean Results") - report.add_section_ranking_plots(validation_set, :algorithm_uri, :test_dataset_uri, - [:percent_correct, :true_positive_rate, :true_negative_rate], "true") - else # regression - raise Reports::BadRequest.new("not implemented yet for regression") - end - return report - else - # this groups all validations in x different groups (arrays) according to there algorithm-uri - algorithm_grouping = Reports::Util.group(validation_set.validations, [:algorithm_uri]) - # we check if there are corresponding validations in each group that have equal attributes (folds, num-folds,..) - Reports::Util.check_group_matching(algorithm_grouping, [:training_dataset_uri, :test_dataset_uri, :prediction_feature]) - - report = Reports::ReportContent.new("Algorithm comparison report") - - if (validation_set.all_classification?) - report.add_section_bar_plot(validation_set,nil,:algorithm_uri,VAL_ATTR_BAR_PLOT_CLASS, "bar-plot.svg") - report.add_section_roc_plot(validation_set,nil, :algorithm_uri, "roc-plot.svg") - #validation_set.first.get_prediction_feature_values.each do |class_value| - #report.add_section_bar_plot(validation_set,class_value,:algorithm_uri,VAL_ATTR_CLASS, "bar-plot-"+class_value+".svg") - #report.add_section_roc_plot(validation_set, class_value, :algorithm_uri, "roc-plot-"+class_value+".svg") - #end - report.add_section_result(validation_set,[:algorithm_uri]+VAL_ATTR_CLASS,"Results","Results") - else - #regression - report.add_section_result(validation_set,[:algorithm_uri]+VAL_ATTR_REGR,"Results","Results") - report.add_section_bar_plot(validation_set,nil,:algorithm_uri,VAL_ATTR_BAR_PLOT_REGR, "bar-plot.svg") - report.add_section_regression_plot(validation_set) - - #report.add_section_result(merged, VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results") - #report.add_section_result(validation_set, VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds], "Results","Results") - end - report.add_section_result(validation_set, Lib::ALL_PROPS, "All Results", "All Results") - return report - end + raise Reports::BadRequest.new("algorithm comparison for non crossvalidation not yet implemented") else raise Reports::BadRequest.new("num different cross-validation-ids <2") if validation_set.num_different_values(:crossvalidation_id)<2 validation_set.load_cv_attributes - - if validation_set.num_different_values(:dataset_uri)>1 - # groups results into sets with equal dataset - dataset_grouping = Reports::Util.group(validation_set.validations, [:dataset_uri]) - # check if equal values in each group exist - Reports::Util.check_group_matching(dataset_grouping, [:algorithm_uri, :crossvalidation_fold, :num_folds, :stratified, :random_seed]) - # we only checked that equal validations exist in each dataset group, now check for each algorithm - dataset_grouping.each do |validations| - algorithm_grouping = Reports::Util.group(validations, [:algorithm_uri]) - Reports::Util.check_group_matching(algorithm_grouping, [:crossvalidation_fold, :num_folds, :stratified, :random_seed]) - end - - merged = validation_set.merge([:algorithm_uri, :dataset_uri]) - report = Reports::ReportContent.new("Algorithm comparison report - Many datasets") - - if (validation_set.all_classification?) - report.add_section_result(merged,VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold],"Mean Results","Mean Results") - report.add_section_ranking_plots(merged, :algorithm_uri, :dataset_uri, [:acc, :auc, :sens, :spec], "true") - else # regression - report.add_section_result(merged,VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results") - end - - return report - else - # this groups all validations in x different groups (arrays) according to there algorithm-uri - algorithm_grouping = Reports::Util.group(validation_set.validations, [:algorithm_uri]) - # we check if there are corresponding validations in each group that have equal attributes (folds, num-folds,..) - Reports::Util.check_group_matching(algorithm_grouping, [:crossvalidation_fold, :num_folds, :dataset_uri, :stratified, :random_seed]) - merged = validation_set.merge([:algorithm_uri]) - - report = Reports::ReportContent.new("Algorithm comparison report") - - if (validation_set.all_classification?) - - report.add_section_result(merged,VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold],"Mean Results","Mean Results") - - true_class = validation_set.get_true_prediction_feature_value - if true_class!=nil - report.add_section_bar_plot(merged,true_class,:algorithm_uri,VAL_ATTR_BAR_PLOT_CLASS, "bar-plot.svg") - report.add_section_roc_plot(validation_set, nil, :algorithm_uri, "roc-plot.svg") - else - validation_set.get_prediction_feature_values.each do |class_value| - report.add_section_bar_plot(merged,class_value,:algorithm_uri,VAL_ATTR_BAR_PLOT_CLASS, "bar-plot-"+class_value+".svg") - report.add_section_roc_plot(validation_set, class_value, :algorithm_uri, "roc-plot-"+class_value+".svg") - end - end - - report.add_section_result(validation_set,VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds],"Results","Results") - else #regression - report.add_section_result(merged, VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results") - report.add_section_result(validation_set, VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds], "Results","Results") - end - - return report - end + compare_algorithms_crossvalidation(validation_set, task) end - end - -end - -# = Reports::ReportContent -# -# wraps an xml-report, adds functionality for adding sections, adds a hash for tmp files -# -class Reports::ReportContent - - attr_accessor :xml_report, :tmp_files - - def initialize(title) - @xml_report = Reports::XMLReport.new(title, Time.now.strftime("Created at %m.%d.%Y - %H:%M")) - end + end - def add_section_predictions( validation_set, - validation_attributes=[], - section_title="Predictions", - section_text="This section contains predictions.", - table_title="Predictions") - - #PENING - raise "validation attributes not implemented in get prediction array" if validation_attributes.size>0 + # create Algorithm Comparison report + # crossvalidations, 1-n datasets, 2-n algorithms + def self.compare_algorithms_crossvalidation(validation_set, task=nil) - section_table = @xml_report.add_section(@xml_report.get_root_element, section_title) - if validation_set.validations[0].get_predictions - @xml_report.add_paragraph(section_table, section_text) if section_text - @xml_report.add_table(section_table, table_title, Lib::OTPredictions.to_array(validation_set.validations.collect{|v| v.get_predictions}, true, true)) + # groups results into sets with equal dataset + if (validation_set.num_different_values(:dataset_uri)>1) + dataset_grouping = Reports::Util.group(validation_set.validations, [:dataset_uri]) + # check if equal values in each group exist + Reports::Util.check_group_matching(dataset_grouping, [:algorithm_uri, :crossvalidation_fold, :num_folds, :stratified, :random_seed]) else - @xml_report.add_paragraph(section_table, "No prediction info available.") + dataset_grouping = [ validation_set.validations ] end - end - - def add_section_result( validation_set, - validation_attributes, - table_title, - section_title="Results", - section_text="This section contains results.") - - section_table = @xml_report.add_section(xml_report.get_root_element, section_title) - @xml_report.add_paragraph(section_table, section_text) if section_text - vals = validation_set.to_array(validation_attributes,true,validation_set.get_true_prediction_feature_value) - vals = vals.collect{|a| a.collect{|v| v.to_s }} - #PENDING transpose values if there more than 4 columns, and there are more than columns than rows - transpose = vals[0].size>4 && vals[0].size>vals.size - @xml_report.add_table(section_table, table_title, vals, !transpose, transpose) - end - - def add_section_confusion_matrix( validation, - section_title="Confusion Matrix", - section_text="This section contains the confusion matrix.", - table_title="Confusion Matrix") - section_confusion = @xml_report.add_section(xml_report.get_root_element, section_title) - @xml_report.add_paragraph(section_confusion, section_text) if section_text - @xml_report.add_table(section_confusion, table_title, - Reports::XMLReportUtil::create_confusion_matrix( validation.confusion_matrix ), false) - end - - def add_section_regression_plot( validation_set, - split_set_attribute = nil, - plot_file_name="regression-plot.svg", - section_title="Regression Plot", - section_text=nil, - image_title=nil, - image_caption=nil) - - section_text = "This section contains the regression plot." unless section_text - image_title = "Regression plot" unless image_title - section_regr = @xml_report.add_section(@xml_report.get_root_element, section_title) - prediction_set = validation_set.collect{ |v| v.get_predictions } - - if prediction_set.size>0 - - section_text += "\nWARNING: regression plot information not available for all validation results" if prediction_set.size!=validation_set.size - @xml_report.add_paragraph(section_regr, section_text) if section_text - begin - plot_file_path = add_tmp_file(plot_file_name) - Reports::PlotFactory.create_regression_plot( plot_file_path, prediction_set ) - @xml_report.add_imagefigure(section_regr, image_title, plot_file_name, "SVG", image_caption) - rescue RuntimeError => ex - LOGGER.error("Could not create regression plot: "+ex.message) - rm_tmp_file(plot_file_name) - @xml_report.add_paragraph(section_regr, "could not create regression plot: "+ex.message) - end - else - @xml_report.add_paragraph(section_regr, "No prediction info for regression available.") + # we only checked that equal validations exist in each dataset group, now check for each algorithm + dataset_grouping.each do |validations| + algorithm_grouping = Reports::Util.group(validations, [:algorithm_uri]) + Reports::Util.check_group_matching(algorithm_grouping, [:crossvalidation_fold, :num_folds, :stratified, :random_seed]) end - end - - def add_section_roc_plot( validation_set, - class_value = nil, - split_set_attribute = nil, - plot_file_name="roc-plot.svg", - section_title="Roc Plot", - section_text=nil, - image_title=nil, - image_caption=nil) - - if class_value - section_text = "This section contains the roc plot for class '"+class_value+"'." unless section_text - image_title = "Roc Plot for class-value '"+class_value+"'" unless image_title - else - section_text = "This section contains the roc plot." unless section_text - image_title = "Roc Plot for all classes" unless image_title - end + pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) ) + report = Reports::ReportContent.new("Algorithm comparison report - Many datasets") - section_roc = @xml_report.add_section(@xml_report.get_root_element, section_title) - prediction_set = validation_set.collect{ |v| v.get_predictions && v.get_predictions.confidence_values_available? } + if (validation_set.num_different_values(:dataset_uri)>1) + all_merged = validation_set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri]) + report.add_section_ranking_plots(all_merged, :algorithm_uri, :dataset_uri, + [:percent_correct, :weighted_area_under_roc, :true_positive_rate, :true_negative_rate] ) + report.add_section_result_overview(all_merged, :algorithm_uri, :dataset_uri, [:percent_correct, :weighted_area_under_roc]) + end + + if (validation_set.all_classification?) + attributes = VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold] + attributes = ([ :dataset_uri ] + attributes).uniq + + dataset_grouping.each do |validations| + + set = Reports::ValidationSet.create(validations) + dataset = validations[0].dataset_uri + merged = set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri]) + merged.sort(:algorithm_uri) + merged.sort(:dataset_uri) - if prediction_set.size>0 + report.add_section_result(merged,attributes, + "Mean Results","Dataset: "+dataset,nil,"crossvalidation") + report.add_section_bar_plot(merged, :algorithm_uri, VAL_ATTR_BAR_PLOT_CLASS) + report.add_section_roc_plot(set, :algorithm_uri) + end - section_text += "\nWARNING: roc plot information not available for all validation results" if prediction_set.size!=validation_set.size - @xml_report.add_paragraph(section_roc, section_text) if section_text - begin - plot_file_path = add_tmp_file(plot_file_name) - Reports::PlotFactory.create_roc_plot( plot_file_path, prediction_set, class_value, split_set_attribute, false )#prediction_set.size>1 ) - @xml_report.add_imagefigure(section_roc, image_title, plot_file_name, "SVG", image_caption) - rescue RuntimeError => ex - LOGGER.error("could not create roc plot: "+ex.message) - rm_tmp_file(plot_file_name) - @xml_report.add_paragraph(section_roc, "could not create roc plot: "+ex.message) - end - else - @xml_report.add_paragraph(section_roc, "No prediction-confidence info for roc plot available.") + else # regression + raise Reports::BadRequest.new("algorithm comparison for regression not yet implemented") end - + task.progress(100) if task + report end - - def add_section_ranking_plots( validation_set, - compare_attribute, - equal_attribute, - rank_attributes, - class_value, - section_title="Ranking Plots", - section_text="This section contains the ranking plots.") - - section_rank = @xml_report.add_section(@xml_report.get_root_element, section_title) - @xml_report.add_paragraph(section_rank, section_text) if section_text - rank_attributes.each{|a| add_ranking_plot(section_rank, validation_set, compare_attribute, equal_attribute, a, class_value, a.to_s+"-ranking.svg")} - end - - def add_ranking_plot( report_section, - validation_set, - compare_attribute, - equal_attribute, - rank_attribute, - class_value=nil, - plot_file_name="ranking.svg", - image_title=nil, - image_caption=nil) +end - image_title = "Ranking Plot for class value: '"+class_value.to_s+"'" if image_title==nil - plot_file_path = add_tmp_file(plot_file_name) - Reports::PlotFactory::create_ranking_plot(plot_file_path, validation_set, compare_attribute, equal_attribute, rank_attribute, class_value) - @xml_report.add_imagefigure(report_section, image_title, plot_file_name, "SVG", image_caption) - - end - - def add_section_bar_plot(validation_set, - class_value, - title_attribute, - value_attributes, - plot_file_name="bar-plot.svg", - section_title="Bar Plot", - section_text=nil, - image_title=nil, - image_caption=nil) - if class_value - section_text = "This section contains the bar plot for class '"+class_value+"'." unless section_text - image_title = "Bar Plot for class-value '"+class_value+"'" unless image_title - else - section_text = "This section contains the bar plot." unless section_text - image_title = "Bar Plot for all classes" unless image_title - end - section_bar = @xml_report.add_section(@xml_report.get_root_element, section_title) - @xml_report.add_paragraph(section_bar, section_text) if section_text - - plot_file_path = add_tmp_file(plot_file_name) - Reports::PlotFactory.create_bar_plot(plot_file_path, validation_set, class_value, title_attribute, value_attributes ) - @xml_report.add_imagefigure(section_bar, image_title, plot_file_name, "SVG", image_caption) - end - - private - def add_tmp_file(tmp_file_name) - - @tmp_files = {} unless @tmp_files - raise "file name already exits" if @tmp_files[tmp_file_name] || (@text_files && @text_files[tmp_file_name]) - tmp_file_path = Reports::Util.create_tmp_file(tmp_file_name) - @tmp_files[tmp_file_name] = tmp_file_path - return tmp_file_path - end - - def rm_tmp_file(tmp_file_name) - @tmp_files.delete(tmp_file_name) if @tmp_files.has_key?(tmp_file_name) - end - -end
\ No newline at end of file diff --git a/report/report_format.rb b/report/report_format.rb index aafa204..5368858 100644 --- a/report/report_format.rb +++ b/report/report_format.rb @@ -69,14 +69,18 @@ module Reports::ReportFormat def self.format_report_to_html(directory, xml_filename, html_filename, css_style_sheet) css_style_sheet = "http://opentox.informatik.uni-freiburg.de/simple_ot_stylesheet.css" unless css_style_sheet - css = css_style_sheet ? " html.stylesheet=css_style_sheet?css_style_sheet="+URI.encode(css_style_sheet.to_s) : nil + + css = css_style_sheet ? "--stringparam html.stylesheet "+URI.encode(css_style_sheet.to_s) : nil + cmd = "xsltproc "+css.to_s+" "+ENV['REPORT_XSL']+" "+File.join(directory,xml_filename.to_s)+" > "+File.join(directory,html_filename.to_s) + #css = css_style_sheet ? " html.stylesheet=css_style_sheet?css_style_sheet="+URI.encode(css_style_sheet.to_s) : nil + #cmd = "java -jar "+ENV['SAXON_JAR']+" -o:" + File.join(directory,html_filename.to_s)+ + # " -s:"+File.join(directory,xml_filename.to_s)+" -xsl:"+ENV['REPORT_XSL']+" -versionmsg:off"+css.to_s - cmd = "java -jar "+ENV['SAXON_JAR']+" -o:" + File.join(directory,html_filename.to_s)+ - " -s:"+File.join(directory,xml_filename.to_s)+" -xsl:"+ENV['REPORT_XSL']+" -versionmsg:off"+css.to_s LOGGER.debug "Converting report to html: '"+cmd+"'" IO.popen(cmd.to_s) do |f| while line = f.gets do - LOGGER.info "saxon-xslt> "+line + LOGGER.info "xsltproc> "+line + #LOGGER.info "saxon-xslt> "+line end end raise "error during conversion" unless $?==0 diff --git a/report/report_persistance.rb b/report/report_persistance.rb index 424d5ca..1bd37b0 100644 --- a/report/report_persistance.rb +++ b/report/report_persistance.rb @@ -223,7 +223,7 @@ module Reports def list_reports(type, filter_params={}) filter_params["report_type"]=type unless filter_params.has_key?("report_type") - ReportData.find_like(filter_params).collect{ |r| r.id } + ReportData.find_like(filter_params).delete_if{|r| r.report_type!=type}.collect{ |r| r.id } end def get_report(type, id, format, force_formating, params) diff --git a/report/report_service.rb b/report/report_service.rb index 2187493..51be248 100644 --- a/report/report_service.rb +++ b/report/report_service.rb @@ -6,10 +6,21 @@ module Reports class ReportService + @@persistance = Reports::ExtendedFileReportPersistance.new + + def self.persistance + @@persistance + end + + def self.instance + @@instance + end + def initialize(home_uri) + raise "supposed to be a singleton" if defined?@@instance LOGGER.info "init report service" @home_uri = home_uri - @persistance = Reports::ExtendedFileReportPersistance.new + @@instance = self end # lists all available report types, returns list of uris @@ -32,7 +43,7 @@ module Reports LOGGER.info "get all reports of type '"+type.to_s+"', filter_params: '"+filter_params.inspect+"'" check_report_type(type) - @persistance.list_reports(type, filter_params).collect{ |id| get_uri(type,id) }.join("\n")+"\n" + @@persistance.list_reports(type, filter_params).collect{ |id| get_uri(type,id) }.join("\n")+"\n" end # creates a report of a certain type, __validation_uris__ must contain be a list of validation or cross-validation-uris @@ -41,7 +52,7 @@ module Reports # call-seq: # create_report(type, validation_uris) => string # - def create_report(type, validation_uris) + def create_report(type, validation_uris, task=nil) LOGGER.info "create report of type '"+type.to_s+"'" check_report_type(type) @@ -52,14 +63,17 @@ module Reports validation_set = Reports::ValidationSet.new(validation_uris) raise Reports::BadRequest.new("cannot get validations from validation_uris '"+validation_uris.inspect+"'") unless validation_set and validation_set.size > 0 LOGGER.debug "loaded "+validation_set.size.to_s+" validation/s" + task.progress(10) if task #step 2: create report of type - report_content = Reports::ReportFactory.create_report(type, validation_set) + report_content = Reports::ReportFactory.create_report(type, validation_set, + OpenTox::SubTask.create(task,10,90)) LOGGER.debug "report created" #step 3: persist report if creation not failed - id = @persistance.new_report(report_content, type, create_meta_data(type, validation_set, validation_uris), self) + id = @@persistance.new_report(report_content, type, create_meta_data(type, validation_set, validation_uris), self) LOGGER.debug "report persisted with id: '"+id.to_s+"'" + task.progress(100) if task return get_uri(type, id) end @@ -75,7 +89,7 @@ module Reports accept_header_value.to_s+"', force-formating:"+force_formating.to_s+" params: '"+params.inspect+"')" check_report_type(type) format = Reports::ReportFormat.get_format(accept_header_value) - return @persistance.get_report(type, id, format, force_formating, params) + return @@persistance.get_report(type, id, format, force_formating, params) end # returns a report resource (i.e. image) @@ -87,7 +101,7 @@ module Reports LOGGER.info "get resource '"+resource+"' for report '"+id.to_s+"' of type '"+type.to_s+"'" check_report_type(type) - return @persistance.get_report_resource(type, id, resource) + return @@persistance.get_report_resource(type, id, resource) end @@ -100,7 +114,7 @@ module Reports LOGGER.info "delete report '"+id.to_s+"' of type '"+type.to_s+"'" check_report_type(type) - @persistance.delete_report(type, id) + @@persistance.delete_report(type, id) end # no api-access for this method @@ -108,7 +122,7 @@ module Reports LOGGER.info "deleting all reports of type '"+type.to_s+"'" check_report_type(type) - @persistance.list_reports(type).each{ |id| @persistance.delete_report(type, id) } + @@persistance.list_reports(type).each{ |id| @@persistance.delete_report(type, id) } end def parse_type( report_uri ) @@ -123,7 +137,7 @@ module Reports raise "invalid uri" unless report_uri.to_s =~/^#{@home_uri}.*/ id = report_uri.squeeze("/").split("/")[-1] - @persistance.check_report_id_format(id) + @@persistance.check_report_id_format(id) return id end @@ -151,6 +165,7 @@ module Reports cvs << v if v =~ /crossvalidation/ and !cvs.include?(v) end meta_data[:crossvalidation_uris] = cvs + meta_data end diff --git a/report/report_test.rb b/report/report_test.rb index 9ea1a89..5351c7d 100644 --- a/report/report_test.rb +++ b/report/report_test.rb @@ -23,8 +23,8 @@ class Reports::ApplicationTest < Test::Unit::TestCase # puts OpenTox::RestClientWrapper.post("http://localhost/validation/report/qmrf/1",{:content_type => "application/qmrf-xml"},data).to_s.chomp #get "/report/qmrf/1",nil,'HTTP_ACCEPT' => "application/qmrf-xml"#"application/rdf+xml"#"application/x-yaml" - get "/report/validation" # ?model=http://localhost/model/1" #,nil,'HTTP_ACCEPT' => "application/rdf+xml"#"application/x-yaml" - puts last_response.body.to_s +# get "/report/validation" # ?model=http://localhost/model/1" #,nil,'HTTP_ACCEPT' => "application/rdf+xml"#"application/x-yaml" +# puts last_response.body.to_s #Reports::XMLReport.generate_demo_xml_report.write_to #raise "stop" @@ -33,7 +33,7 @@ class Reports::ApplicationTest < Test::Unit::TestCase #puts uri #get uri - #get '/report/validation/1',nil,'HTTP_ACCEPT' => "text/html" + get '/report/validation/117',nil,'HTTP_ACCEPT' => "text/html" #post '/report/validation/1/format_html',:css_style_sheet=>"http://apps.ideaconsult.net:8180/ToxPredict/style/global.css" #post 'http://ot.validation.de/report/validation',:validation_uris=>"http://ot.validation.de/1" diff --git a/report/util.rb b/report/util.rb index db783b3..9844fd8 100644 --- a/report/util.rb +++ b/report/util.rb @@ -1,4 +1,3 @@ - # graph-files are generated in the tmp-dir before they are stored ENV['TMP_DIR'] = File.join(FileUtils.pwd,"reports","tmp") unless ENV['TMP_DIR'] @@ -15,6 +14,7 @@ class Array return self.collect{|word| word[prefix.size..-1]} end end + self end end diff --git a/report/validation_access.rb b/report/validation_access.rb index 7d318af..a0b6355 100644 --- a/report/validation_access.rb +++ b/report/validation_access.rb @@ -20,7 +20,7 @@ class Reports::ValidationAccess # yields predictions (Lib::OTPredictions) if available # - def get_predictions(validation) + def get_predictions(validation, task=nil) raise "not implemented" end @@ -32,12 +32,11 @@ class Reports::ValidationAccess # get domain/class values of prediction feature # - def get_prediction_feature_values(validation) + def get_class_domain(validation) raise "not implemented" end # is validation classification? - # def classification?(validation) raise "not implemented" end @@ -50,8 +49,8 @@ end class Reports::ValidationDB < Reports::ValidationAccess - def initialize - @model_store = {} + def initialize() + @model_store = {} end def resolve_cv_uris(validation_uris) @@ -59,6 +58,14 @@ class Reports::ValidationDB < Reports::ValidationAccess validation_uris.each do |u| if u.to_s =~ /.*\/crossvalidation\/[0-9]+/ cv_id = u.split("/")[-1].to_i + cv = nil + begin + cv = Lib::Crossvalidation.find( cv_id ) + rescue => ex + raise "could not access crossvalidation with id "+validation_id.to_s+", error-msg: "+ex.message + end + raise Reports::BadRequest.new("crossvalidation with id '"+cv_id.to_s+"' not found") unless cv + raise Reports::BadRequest.new("crossvalidation with id '"+cv_id.to_s+"' not finished") unless cv.finished res += Lib::Validation.find( :all, :conditions => { :crossvalidation_id => cv_id } ).collect{|v| v.validation_uri.to_s} else res += [u.to_s] @@ -67,7 +74,6 @@ class Reports::ValidationDB < Reports::ValidationAccess res end - def init_validation(validation, uri) raise Reports::BadRequest.new "not a validation uri: "+uri.to_s unless uri =~ /.*\/[0-9]+/ @@ -81,6 +87,7 @@ class Reports::ValidationDB < Reports::ValidationAccess raise "could not access validation with id "+validation_id.to_s+", error-msg: "+ex.message end raise Reports::BadRequest.new "no validation found with id "+validation_id.to_s unless v #+" and uri "+uri.to_s unless v + raise Reports::BadRequest.new "validation with id "+validation_id.to_s+" is not finished yet" unless v.finished (Lib::VAL_PROPS + Lib::VAL_CV_PROPS).each do |p| validation.send("#{p.to_s}=".to_sym, v.send(p)) @@ -103,12 +110,13 @@ class Reports::ValidationDB < Reports::ValidationAccess end end - def get_predictions(validation) - Lib::OTPredictions.new( validation.classification?, validation.test_dataset_uri, validation.test_target_dataset_uri, - validation.prediction_feature, validation.prediction_dataset_uri, validation.predicted_variable) + def get_predictions(validation, task=nil) + Lib::OTPredictions.new( validation.classification?, validation.test_dataset_uri, + validation.test_target_dataset_uri, validation.prediction_feature, validation.prediction_dataset_uri, + validation.predicted_variable, task) end - def get_prediction_feature_values( validation ) + def get_class_domain( validation ) OpenTox::Feature.domain( validation.prediction_feature ) end @@ -197,7 +205,7 @@ class Reports::ValidationWebservice < Reports::ValidationAccess end end - def get_predictions(validation) + def get_predictions(validation, task=nil) Lib::Predictions.new( validation.prediction_feature, validation.test_dataset_uri, validation.prediction_dataset_uri) end end @@ -283,7 +291,7 @@ class Reports::ValidationMockLayer < Reports::ValidationAccess #validation.CV_dataset_name = @datasets[validation.crossvalidation_id.to_i * NUM_FOLDS] end - def get_predictions(validation) + def get_predictions(validation, task=nil) p = Array.new c = Array.new diff --git a/report/validation_data.rb b/report/validation_data.rb index 0a25e87..bd04554 100644 --- a/report/validation_data.rb +++ b/report/validation_data.rb @@ -1,7 +1,7 @@ # the variance is computed when merging results for these attributes VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct, :root_mean_squared_error, :mean_absolute_error, :r_square, :accuracy ] -VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :accuracy ] +VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :weighted_area_under_roc ] #:accuracy ] ATTR_NICE_NAME = {} @@ -20,10 +20,14 @@ class Object def to_nice_s if is_a?(Float) - if self>0.01 + if self==0 + return "0" + elsif abs>0.1 return "%.2f" % self + elsif abs>0.01 + return "%.3f" % self else - return self.to_s + return "%.2e" % self end end return collect{ |i| i.to_nice_s }.join(", ") if is_a?(Array) @@ -81,20 +85,26 @@ module Reports # call-seq: # get_predictions => Reports::Predictions # - def get_predictions - return @predictions if @predictions - unless @prediction_dataset_uri - LOGGER.info("no predictions available, prediction_dataset_uri not set") - return nil + def get_predictions( task=nil ) + if @predictions + task.progress(100) if task + @predictions + else + unless @prediction_dataset_uri + LOGGER.info("no predictions available, prediction_dataset_uri not set") + task.progress(100) if task + nil + else + @predictions = @@validation_access.get_predictions( self, task ) + end end - @predictions = @@validation_access.get_predictions( self ) end # returns the predictions feature values (i.e. the domain of the class attribute) # - def get_prediction_feature_values - return @prediction_feature_values if @prediction_feature_values - @prediction_feature_values = @@validation_access.get_prediction_feature_values(self) + def get_class_domain() + @class_domain = @@validation_access.get_class_domain(self) unless @class_domain + @class_domain end # is classification validation? cache to save resr-calls @@ -115,6 +125,24 @@ module Reports @@validation_access.init_cv(self) end + @@persistance = Reports::ReportService.persistance + + def validation_report_uri + #puts "searching for validation report: "+self.validation_uri.to_s + return @validation_report_uri if @validation_report_uri!=nil + ids = @@persistance.list_reports("validation",{:validation=>validation_uri }) + @validation_report_uri = Reports::ReportService.instance.get_uri("validation",ids[-1]) if ids and ids.size>0 + end + + def cv_report_uri + #puts "searching for cv report: "+self.crossvalidation_uri.to_s + return @cv_report_uri if @cv_report_uri!=nil + raise "no cv uri "+to_yaml unless self.crossvalidation_uri + ids = @@persistance.list_reports("crossvalidation",{:crossvalidation=>self.crossvalidation_uri.to_s }) + #puts "-> "+ids.inspect + @cv_report_uri = Reports::ReportService.instance.get_uri("crossvalidation",ids[-1]) if ids and ids.size>0 + end + def clone_validation new_val = clone VAL_ATTR_VARIANCE.each { |a| new_val.send((a.to_s+"_variance=").to_sym,nil) } @@ -134,6 +162,13 @@ module Reports @validations = Array.new validation_uris.each{|u| @validations.push(Reports::Validation.new(u))} if validation_uris end + + + def self.create(validations) + set = ValidationSet.new + validations.each{ |v| set.validations.push(v) } + set + end def get(index) return @validations[index] @@ -194,20 +229,34 @@ module Reports return val end - def get_true_prediction_feature_value - if all_classification? - class_values = get_prediction_feature_values - if class_values.size == 2 - (0..1).each do |i| - return class_values[i] if (class_values[i].to_s.downcase == "true" || class_values[i].to_s.downcase == "active") - end - end - end - return nil +# def get_true_prediction_feature_value +# if all_classification? +# class_values = get_class_domain +# if class_values.size == 2 +# (0..1).each do |i| +# return class_values[i] if (class_values[i].to_s.downcase == "true" || class_values[i].to_s.downcase == "active") +# end +# end +# end +# return nil +# end + + def get_class_domain( ) + return unique_value("get_class_domain") end - def get_prediction_feature_values - return unique_value("get_prediction_feature_values") + def get_domain_for_attr( attribute ) + class_domain = get_class_domain() + if Lib::Validation.classification_property?(attribute) and + !Lib::Validation.depends_on_class_value?(attribute) + [ nil ] + elsif Lib::Validation.classification_property?(attribute) and + class_domain.size==2 and + Lib::Validation.complement_exists?(attribute) + [ class_domain[0] ] + else + class_domain + end end # checks weather all validations are classification validations @@ -246,6 +295,39 @@ module Reports return new_set end + def to_table( attribute_col, attribute_row, attribute_val) + + row_values = get_values(attribute_row) + #puts row_values.inspect + col_values = get_values(attribute_col) + #puts col_values.inspect + + cell_values = {} + row_values.each do |row| + col_values.each do |col| + val = nil + @validations.each do |v| + if v.send(attribute_row)==row and v.send(attribute_col)==col + raise "two validation have equal row and column values"if val!=nil + val = v.send(attribute_val).to_nice_s + end + end + cell_values[row] = [] if cell_values[row]==nil + cell_values[row] << val + end + end + #puts cell_values.inspect + + table = [] + table << [ "" ] + col_values + row_values.each do |row| + table << [ row ] + cell_values[row] + end + #puts table.inspect + + table + end + # returns an array, with values for __attributes__, that can be use for a table # * first row is header row # * other rows are values @@ -253,7 +335,7 @@ module Reports # call-seq: # to_array(attributes, remove_nil_attributes) => array # - def to_array(attributes, remove_nil_attributes=true, true_class_value=nil) + def to_array(attributes, remove_nil_attributes=true) array = Array.new array.push(attributes.collect{|a| a.to_s.nice_attr}) attribute_not_nil = Array.new(attributes.size) @@ -263,19 +345,43 @@ module Reports if VAL_ATTR_VARIANCE.index(a) variance = v.send( (a.to_s+"_variance").to_sym ) end - variance = " +- "+variance.to_nice_s if variance + + #variance = " +- "+variance.to_nice_s if variance attribute_not_nil[index] = true if remove_nil_attributes and v.send(a)!=nil index += 1 val = v.send(a) - val = val[true_class_value] if true_class_value!=nil && val.is_a?(Hash) && Lib::VAL_CLASS_PROPS_PER_CLASS_COMPLEMENT_EXISTS.index(a)!=nil - val.to_nice_s + variance.to_s + + class_domain = get_domain_for_attr(a) + # get domain for classification attribute, i.e. ["true","false"] + if class_domain.size==1 && class_domain[0]!=nil + # or the attribute has a complementary value, i.e. true_positive_rate + # -> domain is reduced to one class value + raise "illegal state" unless (val.is_a?(Hash)) + val = val[class_domain[0]] + end + + if variance + if (val.is_a?(Array)) + raise "not implemented" + elsif (val.is_a?(Hash)) + val.collect{ |i,j| i.to_nice_s+": "+j.to_nice_s + " +- " + + variance[i].to_nice_s }.join(", ") + else + val.to_nice_s + " +- " + variance.to_nice_s + end + else + val.to_nice_s + end + end) end + if remove_nil_attributes #delete in reverse order to avoid shifting of indices (0..attribute_not_nil.size-1).to_a.reverse.each do |i| array.each{|row| row.delete_at(i)} unless attribute_not_nil[i] end end + return array end @@ -294,6 +400,7 @@ module Reports #compute grouping grouping = Reports::Util.group(@validations, equal_attributes) + #puts "groups "+grouping.size.to_s Lib::MergeObjects.register_merge_attributes( Reports::Validation, Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL) unless @@ -310,6 +417,10 @@ module Reports return new_set end + def sort(attribute, ascending=true) + @validations.sort!{ |a,b| a.send(attribute).to_s <=> b.send(attribute).to_s } + end + # creates a new validaiton set, that contains a ranking for __ranking_attribute__ # (i.e. for ranking attribute :acc, :acc_ranking is calculated) # all validation with equal values for __equal_attributes__ are compared @@ -319,7 +430,8 @@ module Reports # compute_ranking(equal_attributes, ranking_attribute) => array # def compute_ranking(equal_attributes, ranking_attribute, class_value=nil ) - + + #puts "compute_ranking("+equal_attributes.inspect+", "+ranking_attribute.inspect+", "+class_value.to_s+" )" new_set = Reports::ValidationSet.new (0..@validations.size-1).each do |i| new_set.validations.push(@validations[i].clone_validation) @@ -337,14 +449,16 @@ module Reports raise "no value for class value "+class_value.class.to_s+" "+class_value.to_s+" in hash "+val.inspect.to_s unless val.has_key?(class_value) val = val[class_value] else - raise "is a hash "+ranking_attribute+", specify class value plz" + raise "value for '"+ranking_attribute.to_s+"' is a hash, specify class value plz" end end rank_hash[i] = val end + #puts rank_hash.inspect # sort group accrording to second value (= ranking value) rank_array = rank_hash.sort { |a, b| b[1] <=> a[1] } + #puts rank_array.inspect # create ranks array ranks = Array.new @@ -370,6 +484,7 @@ module Reports end end end + #puts ranks.inspect # set rank as validation value (0..rank_array.size-1).each do |j| diff --git a/report/xml_report.rb b/report/xml_report.rb index 110c2a9..d280345 100644 --- a/report/xml_report.rb +++ b/report/xml_report.rb @@ -1,3 +1,6 @@ +#['rubygems', 'rexml/document' ].each do |g| +# require g +#end require "report/xml_report_util.rb" @@ -18,13 +21,23 @@ module Reports class XMLReport include REXML + def self.dtd_directory + if $sinatra + $sinatra.url_for('/'+ENV['DOCBOOK_DIRECTORY']+'/'+ENV['REPORT_DTD'], :full) + else + f = File.expand_path(File.join(ENV['DOCBOOK_DIRECTORY'],ENV['REPORT_DTD'])) + raise "cannot find dtd" unless File.exist?(f) + f + end + end + # create new xmlreport def initialize(title, pubdate=nil, author_firstname = nil, author_surname = nil) @doc = Document.new decl = XMLDecl.new @doc << decl - type = DocType.new('article PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "'+$sinatra.url_for('/'+ENV['DOCBOOK_DIRECTORY']+'/'+ENV['REPORT_DTD'], :full)+'"') + type = DocType.new('article PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "'+XMLReport.dtd_directory+'"') @doc << type @root = Element.new("article") @@ -94,12 +107,20 @@ module Reports media = Element.new("mediaobject") image = Element.new("imageobject") imagedata = Reports::XMLReportUtil.attribute_element("imagedata", - {"fileref" => path, "format"=>filetype, "contentwidth" => "6in", "contentdepth"=> "4in" + {"fileref" => path, "format"=>filetype, "contentwidth" => "100%", + #"contentdepth"=> "4in" })#"width" => "6in", "height" => "5in"}) #"contentwidth" => "100%"}) #imagedata = Reports::XMLReportUtil.attribute_element("imagedata",{"width" => "6in", "fileref" => path, "format"=>filetype}) @resource_path_elements[imagedata] = "fileref" image << imagedata + media << image + +# ulink = Element.new("ulink") +# ulink.add_attributes({"url" => "http://google.de"}) +# ulink << image +# media << ulink + media << Reports::XMLReportUtil.text_element("caption", caption) if caption figure << media element << figure diff --git a/saxonhe9-2-0-3j/doc/img/saxonica_logo.gif b/saxonhe9-2-0-3j/doc/img/saxonica_logo.gif Binary files differdeleted file mode 100644 index 8f0bd8d..0000000 --- a/saxonhe9-2-0-3j/doc/img/saxonica_logo.gif +++ /dev/null diff --git a/saxonhe9-2-0-3j/doc/index.html b/saxonhe9-2-0-3j/doc/index.html deleted file mode 100644 index ce4293a..0000000 --- a/saxonhe9-2-0-3j/doc/index.html +++ /dev/null @@ -1,56 +0,0 @@ -<!DOCTYPE html - PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> -<html> - <head> - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> - <title>Saxonica: XSLT and XQuery Processing: Welcome</title> - <meta name="coverage" content="Worldwide"> - <meta name="copyright" content="Copyright Saxonica Ltd"> - <meta name="title" content="Saxonica: XSLT and XQuery Processing: About Saxon"> - <meta name="robots" content="noindex,nofollow"> - <link rel="stylesheet" href="saxondocs.css" type="text/css"> - </head> - <body class="main"> - <div id="rhDkBlueArea" style="position:absolute; width:123px; height:800px; z-index:2; right: 0px; top: 0px; border: 1px none #000000; background-color: #C1CEDE; layer-background-color: #C1CEDE; visibility: visible;"></div> - <div id="rhMdBlueArea" style="position:absolute; width:217px; height:800px; z-index:2; right: 340px; top: 0px; border: 1px none #000000; background-color: #E4EEF0; layer-background-color: #E4EEF0; visibility: visible;"></div> - <div id="lhLightArea" style="position:absolute; width:34px; height:800px; z-index:2; left: 66px; top: 0px; border: 1px none #000000; background-color: #f6fffb; layer-background-color: #E4EEF0; visibility: visible;"></div> - <div id="lhDkBlueArea" style="position:absolute; width:66px; height:800px; z-index:2; left: 0px; top: 0px; border: 1px none #000000; background-color: #C1CEDE; layer-background-color: #C1CEDE; visibility: visible;"></div> - <div id="LogoArea" style="position:absolute; width:340px; height:72px; z-index:3; right: 0px; top: 0px; border: 1px none #000000; visibility: visible;"><a href="http://www.saxonica.com/"><img src="img/saxonica_logo.gif" width="340" height="72" border="0" alt="Saxonica.com"></a></div> - - <div id="MainTextArea" style="position:absolute; height:100%; z-index:5; left: 130px; right: 260px; top: 110px; border: 1px none #000000; visibility: visible;"> - <h1>Welcome to Saxon</h1> - - - <p class="subhead">Online Documentation</p> - - <p>Saxon documentation for the current release is available online:</p> - - <ul> - <li><p><a href="http://www.saxonica.com/documentation/documentation.html" class="bodylink">Saxon Documentation</a></p></li> - - <li><p><a href="http://www.saxonica.com/documentation/javadoc/index.html" class="bodylink">Javadoc API Documentation</a></p></li> - - <li><p><a href="http://www.saxonica.com/documentation/dotnetdoc/index.html" class="bodylink">.NET API Documentation</a></p></li> - </ul> - - - <p class="subhead">Downloads</p> - - <p>Saxon documentation, together with source code and sample applications - can also be downloaded, both for the current release and for earlier releases. - </p> - - <p>The same file <code>saxon-resources8-N.zip</code> covers both Saxon products - (Saxon-B and Saxon-SA), and both platforms (Java and .NET).</p> - - <p>The file also contains sample applications and Saxon-B source code.</p> - - - <ul> - <li><p><a class="bodylink" href="http://sourceforge.net/project/showfiles.php?group_id=29872">Saxon-B downloads at SourceForge</a></li> - <li><p><a class="bodylink" href="http://www.saxonica.com/">Saxon-SA downloads at www.saxonica.com</a></li> - </ul> - - </div> - </body> -</html>
\ No newline at end of file diff --git a/saxonhe9-2-0-3j/doc/saxondocs.css b/saxonhe9-2-0-3j/doc/saxondocs.css deleted file mode 100644 index 681c337..0000000 --- a/saxonhe9-2-0-3j/doc/saxondocs.css +++ /dev/null @@ -1,228 +0,0 @@ -
-
-/*
-Text blue: #3D5B96
-Dark blue: #c1cede
-Mid blue: #e4eef0
-Light blue: #f6fffb
-mid green #B1CCC7
-rust #96433D
-*/
-
-/* used for frameset holders */
-.bgnd {
- margin-top:0;
- margin-left:0;
- background: #f6fffb;
- }
-
-/* used for menu */
-
-.menu {
- background: #f6fffb;
- margin-top:20;
- margin-left:40;
- SCROLLBAR-FACE-COLOR: #c1cede;
- SCROLLBAR-HIGHLIGHT-COLOR: #e4eef0;
- SCROLLBAR-SHADOW-COLOR: #e4eef0;
- SCROLLBAR-ARROW-COLOR: #f6fffb;
- SCROLLBAR-BASE-COLOR: #e4eef0;
-}
-
-/* used for content pages */
-
-.main {
- background: #e4eef0;
- margin-top:10px;
- margin-left:5px;
- margin-right:5px;
- margin-bottom:20px;
- SCROLLBAR-FACE-COLOR: #c1cede;
- SCROLLBAR-HIGHLIGHT-COLOR: #e4eef0;
- SCROLLBAR-SHADOW-COLOR: #e4eef0;
- SCROLLBAR-ARROW-COLOR: #f6fffb;
- SCROLLBAR-BASE-COLOR: #e4eef0;
-}
-
-/* used for menu links */
-
-a {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 8pt;
- font-style:normal;
- color: #3D5B96;
- font-weight: normal;
- text-decoration: none;
-}
-
-/* used for in body links */
-
-a.bodylink {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 9pt;
- font-style:normal;
- color: #3D5B96;
- font-weight: normal;
- text-decoration: underline;
-}
-
-/* used for table of contents level 1 */
-
-a.toc1 {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 12pt;
- font-style:normal;
- color: #3D5B96;
- font-weight: bold;
- text-decoration: none;
-}
-
-/* used for table of contents level 2 */
-
-a.toc2 {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 10pt;
- font-style:normal;
- color: #3D5B96;
- font-weight: normal;
- text-decoration: none;
-}
-
-/* used for menu heading */
-.title {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 10pt;
- font-style:normal;
- color: #3D5B96;
- font-weight: bold;
- text-decoration: none;
- line-height: 1.3em;
-}
-
-/* used for main page headings */
-
-
-h1 {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 14pt;
- font-style: normal;
- color: #3D5B96;
- font-weight: bold;
- text-decoration: none;
- }
-
-/* used for subheads in pref. to H2 etc, to limit underlining width */
-
-.subhead {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 10pt;
- font-style: normal;
- color: #3D5B96;
- font-weight: bold;
- text-decoration: none;
- border-bottom : thin dashed #3D5B96;
- padding-right : 5px;
-}
-
-/* used for standard text */
-
-p {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 9pt;
- font-style: normal;
- color: #3D5B96;
- font-weight: normal;
- text-decoration: none;
- line-height: 1.3em;
- padding-right:15px;
-}
-
-code {
- font-family: lucida sans typewriter, courier, monospace;
- font-size: 8pt;
- font-style: normal;
- font-weight: normal;
- text-decoration: none;
- line-height: 1.3em;
-}
-
-ul {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 9pt;
- font-style: normal;
- color: #3D5B96;
- font-weight: normal;
- text-decoration: none;
-}
-
-li {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 9pt;
- font-style: normal;
- color: #3D5B96;
- font-weight: normal;
-
-}
-
-/* used for text in boxed areas */
-
-.boxed {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 8pt;
- font-style: normal;
- color: #96433D;
- font-weight: bold;
- text-decoration: none;
- margin-top:5px;
- margin-bottom:5px;
-}
-
-/* used for example code */
-
-.codeblock {
- background: #B1CCC7;
- /*background: #e4eef0;*/
- font-family: lucida sans typewriter, courier, monospace;
- font-size: 8pt;
- font-style: normal;
- color: #96433D;
- font-weight: normal;
- text-decoration: none;
- padding-right:15px;
-}
-
-/* used for example commands */
-
-.command {
- font-size: 8pt;
- font-style: normal;
- color: #96433D;
- font-weight: bold;
- text-decoration: none;
- padding-right:15px;
-}
-
-
-
-/* used for links in boxed areas */
-
-a.rust {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 8pt;
- font-style:normal;
- color: #96433D;
- font-weight: bold;
- text-decoration: underline;
-}
-
-/* used for links at the end of a page */
-
-a.nav {
- font-family: Verdana, Arial, Helvetica, sans-serif;
- font-size: 8pt;
- font-style:normal;
- color: #96433D;
- font-weight: bold;
-}
-
-
diff --git a/saxonhe9-2-0-3j/notices/APACHE-ANT.txt b/saxonhe9-2-0-3j/notices/APACHE-ANT.txt deleted file mode 100644 index fc3ed76..0000000 --- a/saxonhe9-2-0-3j/notices/APACHE-ANT.txt +++ /dev/null @@ -1,13 +0,0 @@ -Apache Ant
-Copyright 1999-2006 The Apache Software Foundation
-
-This product includes software developed by
-The Apache Software Foundation (http://www.apache.org/).
-
-This product includes also software developed by :
- - the W3C consortium (http://www.w3c.org) ,
- - the SAX project (http://www.saxproject.org)
-
-The <sync> task is based on code Copyright (c) 2002, Landmark
-Graphics Corp that has been kindly donated to the Apache Software
-Foundation.
diff --git a/saxonhe9-2-0-3j/notices/APACHE-RESOLVER.txt b/saxonhe9-2-0-3j/notices/APACHE-RESOLVER.txt deleted file mode 100644 index 214dadd..0000000 --- a/saxonhe9-2-0-3j/notices/APACHE-RESOLVER.txt +++ /dev/null @@ -1,9 +0,0 @@ -Apache XML Commons Resolver
-Copyright 2006 The Apache Software Foundation.
-
-This product includes software developed at
-The Apache Software Foundation http://www.apache.org/
-
-Portions of this code are derived from classes placed in the
-public domain by Arbortext on 10 Apr 2000. See:
-http://www.arbortext.com/customer_support/updates_and_technical_notes/catalogs/docs/README.htm
diff --git a/saxonhe9-2-0-3j/notices/APACHE-XERCES.txt b/saxonhe9-2-0-3j/notices/APACHE-XERCES.txt deleted file mode 100644 index e99c2e6..0000000 --- a/saxonhe9-2-0-3j/notices/APACHE-XERCES.txt +++ /dev/null @@ -1,17 +0,0 @@ - =========================================================================
- == NOTICE file corresponding to section 4(d) of the Apache License, ==
- == Version 2.0, in this case for the Apache Xerces Java distribution. ==
- =========================================================================
-
- Apache Xerces Java
- Copyright 1999-2006 The Apache Software Foundation
-
- This product includes software developed at
- The Apache Software Foundation (http://www.apache.org/).
-
- Portions of this software were originally based on the following:
- - software copyright (c) 1999, IBM Corporation., http://www.ibm.com.
- - software copyright (c) 1999, Sun Microsystems., http://www.sun.com.
- - voluntary contributions made by Paul Eng on behalf of the
- Apache Software Foundation that were originally developed at iClick, Inc.,
- software copyright (c) 1999.
\ No newline at end of file diff --git a/saxonhe9-2-0-3j/notices/CERN.txt b/saxonhe9-2-0-3j/notices/CERN.txt deleted file mode 100644 index f77ab1a..0000000 --- a/saxonhe9-2-0-3j/notices/CERN.txt +++ /dev/null @@ -1,7 +0,0 @@ -Copyright © 1999 CERN - European Organization for Nuclear Research.<br/><br/>
-
-Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
-is hereby granted without fee, provided that the above copyright notice appear in all copies and
-that both that copyright notice and this permission notice appear in supporting documentation.
-CERN makes no representations about the suitability of this software for any purpose.
-It is provided "as is" without expressed or implied warranty.
\ No newline at end of file diff --git a/saxonhe9-2-0-3j/notices/FRIJTERS.txt b/saxonhe9-2-0-3j/notices/FRIJTERS.txt deleted file mode 100644 index 568f0bc..0000000 --- a/saxonhe9-2-0-3j/notices/FRIJTERS.txt +++ /dev/null @@ -1,22 +0,0 @@ -Copyright (C) 2002-2007 Jeroen Frijters
-
- This software is provided 'as-is', without any express or implied
- warranty. In no event will the authors be held liable for any damages
- arising from the use of this software.
-
- Permission is granted to anyone to use this software for any purpose,
- including commercial applications, and to alter it and redistribute it
- freely, subject to the following restrictions:
-
- 1. The origin of this software must not be misrepresented; you must not
- claim that you wrote the original software. If you use this software
- in a product, an acknowledgment in the product documentation would be
- appreciated but is not required.
-
- 2. Altered source versions must be plainly marked as such, and must not be
- misrepresented as being the original software.
-
- 3. This notice may not be removed or altered from any source distribution.
-
- Jeroen Frijters
- jeroen@frijters.net
diff --git a/saxonhe9-2-0-3j/notices/GPL+CLASSPATH.txt b/saxonhe9-2-0-3j/notices/GPL+CLASSPATH.txt deleted file mode 100644 index eeab58c..0000000 --- a/saxonhe9-2-0-3j/notices/GPL+CLASSPATH.txt +++ /dev/null @@ -1,347 +0,0 @@ -The GNU General Public License (GPL) - -Version 2, June 1991 - -Copyright (C) 1989, 1991 Free Software Foundation, Inc. -59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -Everyone is permitted to copy and distribute verbatim copies of this license -document, but changing it is not allowed. - -Preamble - -The licenses for most software are designed to take away your freedom to share -and change it. By contrast, the GNU General Public License is intended to -guarantee your freedom to share and change free software--to make sure the -software is free for all its users. This General Public License applies to -most of the Free Software Foundation's software and to any other program whose -authors commit to using it. (Some other Free Software Foundation software is -covered by the GNU Library General Public License instead.) You can apply it to -your programs, too. - -When we speak of free software, we are referring to freedom, not price. Our -General Public Licenses are designed to make sure that you have the freedom to -distribute copies of free software (and charge for this service if you wish), -that you receive source code or can get it if you want it, that you can change -the software or use pieces of it in new free programs; and that you know you -can do these things. - -To protect your rights, we need to make restrictions that forbid anyone to deny -you these rights or to ask you to surrender the rights. These restrictions -translate to certain responsibilities for you if you distribute copies of the -software, or if you modify it. - -For example, if you distribute copies of such a program, whether gratis or for -a fee, you must give the recipients all the rights that you have. You must -make sure that they, too, receive or can get the source code. And you must -show them these terms so they know their rights. - -We protect your rights with two steps: (1) copyright the software, and (2) -offer you this license which gives you legal permission to copy, distribute -and/or modify the software. - -Also, for each author's protection and ours, we want to make certain that -everyone understands that there is no warranty for this free software. If the -software is modified by someone else and passed on, we want its recipients to -know that what they have is not the original, so that any problems introduced -by others will not reflect on the original authors' reputations. - -Finally, any free program is threatened constantly by software patents. We -wish to avoid the danger that redistributors of a free program will -individually obtain patent licenses, in effect making the program proprietary. -To prevent this, we have made it clear that any patent must be licensed for -everyone's free use or not licensed at all. - -The precise terms and conditions for copying, distribution and modification -follow. - -TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - -0. This License applies to any program or other work which contains a notice -placed by the copyright holder saying it may be distributed under the terms of -this General Public License. The "Program", below, refers to any such program -or work, and a "work based on the Program" means either the Program or any -derivative work under copyright law: that is to say, a work containing the -Program or a portion of it, either verbatim or with modifications and/or -translated into another language. (Hereinafter, translation is included -without limitation in the term "modification".) Each licensee is addressed as -"you". - -Activities other than copying, distribution and modification are not covered by -this License; they are outside its scope. The act of running the Program is -not restricted, and the output from the Program is covered only if its contents -constitute a work based on the Program (independent of having been made by -running the Program). Whether that is true depends on what the Program does. - -1. You may copy and distribute verbatim copies of the Program's source code as -you receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice and -disclaimer of warranty; keep intact all the notices that refer to this License -and to the absence of any warranty; and give any other recipients of the -Program a copy of this License along with the Program. - -You may charge a fee for the physical act of transferring a copy, and you may -at your option offer warranty protection in exchange for a fee. - -2. You may modify your copy or copies of the Program or any portion of it, thus -forming a work based on the Program, and copy and distribute such modifications -or work under the terms of Section 1 above, provided that you also meet all of -these conditions: - - a) You must cause the modified files to carry prominent notices stating - that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in whole or - in part contains or is derived from the Program or any part thereof, to be - licensed as a whole at no charge to all third parties under the terms of - this License. - - c) If the modified program normally reads commands interactively when run, - you must cause it, when started running for such interactive use in the - most ordinary way, to print or display an announcement including an - appropriate copyright notice and a notice that there is no warranty (or - else, saying that you provide a warranty) and that users may redistribute - the program under these conditions, and telling the user how to view a copy - of this License. (Exception: if the Program itself is interactive but does - not normally print such an announcement, your work based on the Program is - not required to print an announcement.) - -These requirements apply to the modified work as a whole. If identifiable -sections of that work are not derived from the Program, and can be reasonably -considered independent and separate works in themselves, then this License, and -its terms, do not apply to those sections when you distribute them as separate -works. But when you distribute the same sections as part of a whole which is a -work based on the Program, the distribution of the whole must be on the terms -of this License, whose permissions for other licensees extend to the entire -whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest your -rights to work written entirely by you; rather, the intent is to exercise the -right to control the distribution of derivative or collective works based on -the Program. - -In addition, mere aggregation of another work not based on the Program with the -Program (or with a work based on the Program) on a volume of a storage or -distribution medium does not bring the other work under the scope of this -License. - -3. You may copy and distribute the Program (or a work based on it, under -Section 2) in object code or executable form under the terms of Sections 1 and -2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable source - code, which must be distributed under the terms of Sections 1 and 2 above - on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three years, to - give any third party, for a charge no more than your cost of physically - performing source distribution, a complete machine-readable copy of the - corresponding source code, to be distributed under the terms of Sections 1 - and 2 above on a medium customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer to - distribute corresponding source code. (This alternative is allowed only - for noncommercial distribution and only if you received the program in - object code or executable form with such an offer, in accord with - Subsection b above.) - -The source code for a work means the preferred form of the work for making -modifications to it. For an executable work, complete source code means all -the source code for all modules it contains, plus any associated interface -definition files, plus the scripts used to control compilation and installation -of the executable. However, as a special exception, the source code -distributed need not include anything that is normally distributed (in either -source or binary form) with the major components (compiler, kernel, and so on) -of the operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering access to copy -from a designated place, then offering equivalent access to copy the source -code from the same place counts as distribution of the source code, even though -third parties are not compelled to copy the source along with the object code. - -4. You may not copy, modify, sublicense, or distribute the Program except as -expressly provided under this License. Any attempt otherwise to copy, modify, -sublicense or distribute the Program is void, and will automatically terminate -your rights under this License. However, parties who have received copies, or -rights, from you under this License will not have their licenses terminated so -long as such parties remain in full compliance. - -5. You are not required to accept this License, since you have not signed it. -However, nothing else grants you permission to modify or distribute the Program -or its derivative works. These actions are prohibited by law if you do not -accept this License. Therefore, by modifying or distributing the Program (or -any work based on the Program), you indicate your acceptance of this License to -do so, and all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - -6. Each time you redistribute the Program (or any work based on the Program), -the recipient automatically receives a license from the original licensor to -copy, distribute or modify the Program subject to these terms and conditions. -You may not impose any further restrictions on the recipients' exercise of the -rights granted herein. You are not responsible for enforcing compliance by -third parties to this License. - -7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), conditions -are imposed on you (whether by court order, agreement or otherwise) that -contradict the conditions of this License, they do not excuse you from the -conditions of this License. If you cannot distribute so as to satisfy -simultaneously your obligations under this License and any other pertinent -obligations, then as a consequence you may not distribute the Program at all. -For example, if a patent license would not permit royalty-free redistribution -of the Program by all those who receive copies directly or indirectly through -you, then the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under any -particular circumstance, the balance of the section is intended to apply and -the section as a whole is intended to apply in other circumstances. - -It is not the purpose of this section to induce you to infringe any patents or -other property right claims or to contest validity of any such claims; this -section has the sole purpose of protecting the integrity of the free software -distribution system, which is implemented by public license practices. Many -people have made generous contributions to the wide range of software -distributed through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing to -distribute software through any other system and a licensee cannot impose that -choice. - -This section is intended to make thoroughly clear what is believed to be a -consequence of the rest of this License. - -8. If the distribution and/or use of the Program is restricted in certain -countries either by patents or by copyrighted interfaces, the original -copyright holder who places the Program under this License may add an explicit -geographical distribution limitation excluding those countries, so that -distribution is permitted only in or among countries not thus excluded. In -such case, this License incorporates the limitation as if written in the body -of this License. - -9. The Free Software Foundation may publish revised and/or new versions of the -General Public License from time to time. Such new versions will be similar in -spirit to the present version, but may differ in detail to address new problems -or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any later -version", you have the option of following the terms and conditions either of -that version or of any later version published by the Free Software Foundation. -If the Program does not specify a version number of this License, you may -choose any version ever published by the Free Software Foundation. - -10. If you wish to incorporate parts of the Program into other free programs -whose distribution conditions are different, write to the author to ask for -permission. For software which is copyrighted by the Free Software Foundation, -write to the Free Software Foundation; we sometimes make exceptions for this. -Our decision will be guided by the two goals of preserving the free status of -all derivatives of our free software and of promoting the sharing and reuse of -software generally. - -NO WARRANTY - -11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR -THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE -STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE -PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, -INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND -PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, -YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - -12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL -ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE -PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR -INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA -BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A -FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER -OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - -END OF TERMS AND CONDITIONS - -How to Apply These Terms to Your New Programs - -If you develop a new program, and you want it to be of the greatest possible -use to the public, the best way to achieve this is to make it free software -which everyone can redistribute and change under these terms. - -To do so, attach the following notices to the program. It is safest to attach -them to the start of each source file to most effectively convey the exclusion -of warranty; and each file should have at least the "copyright" line and a -pointer to where the full notice is found. - - One line to give the program's name and a brief idea of what it does. - - Copyright (C) <year> <name of author> - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., 59 - Temple Place, Suite 330, Boston, MA 02111-1307 USA - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this when it -starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author Gnomovision comes - with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free - software, and you are welcome to redistribute it under certain conditions; - type 'show c' for details. - -The hypothetical commands 'show w' and 'show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may be -called something other than 'show w' and 'show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your school, -if any, to sign a "copyright disclaimer" for the program, if necessary. Here -is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - 'Gnomovision' (which makes passes at compilers) written by James Hacker. - - signature of Ty Coon, 1 April 1989 - - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General Public -License instead of this License. - - -"CLASSPATH" EXCEPTION TO THE GPL - -Certain source files distributed by Sun Microsystems, Inc. are subject to -the following clarification and special exception to the GPL, but only where -Sun has expressly included in the particular source file's header the words -"Sun designates this particular file as subject to the "Classpath" exception -as provided by Sun in the LICENSE file that accompanied this code." - - Linking this library statically or dynamically with other modules is making - a combined work based on this library. Thus, the terms and conditions of - the GNU General Public License cover the whole combination. - - As a special exception, the copyright holders of this library give you - permission to link this library with independent modules to produce an - executable, regardless of the license terms of these independent modules, - and to copy and distribute the resulting executable under terms of your - choice, provided that you also meet, for each linked independent module, - the terms and conditions of the license of that module. An independent - module is a module which is not derived from or based on this library. If - you modify this library, you may extend this exception to your version of - the library, but you are not obligated to do so. If you do not wish to do - so, delete this exception statement from your version. diff --git a/saxonhe9-2-0-3j/notices/JAMESCLARK.txt b/saxonhe9-2-0-3j/notices/JAMESCLARK.txt deleted file mode 100644 index b3ffc4e..0000000 --- a/saxonhe9-2-0-3j/notices/JAMESCLARK.txt +++ /dev/null @@ -1,25 +0,0 @@ -Copyright (c) 1998, 1999 James Clark
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be included
-in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
-OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL JAMES CLARK BE LIABLE FOR ANY CLAIM, DAMAGES OR
-OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-OTHER DEALINGS IN THE SOFTWARE.
-
-Except as contained in this notice, the name of James Clark shall
-not be used in advertising or otherwise to promote the sale, use or
-other dealings in this Software without prior written authorization
-from James Clark.
diff --git a/saxonhe9-2-0-3j/notices/LEGAL.txt b/saxonhe9-2-0-3j/notices/LEGAL.txt deleted file mode 100644 index 8e2a59b..0000000 --- a/saxonhe9-2-0-3j/notices/LEGAL.txt +++ /dev/null @@ -1,33 +0,0 @@ -LEGAL NOTICE - -This notice is issued to fulfil the requirements of the Mozilla Public License version 1.0 ("MPL 1.0") -sections 3.4(a) and 3.6. MPL 1.0 can be found at http://www.mozilla.org/MPL/MPL-1.0.html. - -Section 3.4(a) of MPL 1.0 states that any third party intellectual property rights in particular -functionality or code must be notified in a text file named LEGAL that is issued with the source code. Saxon -includes a number of such third party components, and the relevant claims are included in notices included -in the same directory as this notice. Although MPL 1.0 requires this notice to be included only with source -code, some of the third parties may also require notices to be included with executable code. Therefore, Saxon -executable code must not be distributed separately from this notice and all the accompanying third -party notices. The term "Distribution" here includes making the code available for download, and its -inclusion in download repositories such as Maven. - -Section 3.6 of MPL 1.0 states: - -You may distribute Covered Code in Executable form only if the requirements of Section 3.1-3.5 have -been met for that Covered Code, and if You include a notice stating that the Source Code version of -the Covered Code is available under the terms of this License, including a description of how and -where You have fulfilled the obligations of Section 3.2. - -Section 3.2 requires the Source Code of Covered Code to be made available via an accepted Electronic -Distribution Mechanism. - -The Source Code version of the Covered Code (that is, the source code of Saxon-B) is available under the -terms of the Mozilla Public License version 1.0, and may be obtained from the Subversion repository -for the Saxon project on SourceForge, at https://sourceforge.net/svn/?group_id=29872. -The precise version of the Subversion source for a particular Saxon maintenance release can be -determined by referring to the release notes for the particular release in the SourceForge download area. - -Note that MPL 1.0 requires that any modifications to this source code must be made available under the terms -of the MPL "to anyone to whom you made an executable version available". As a courtesy, it is also requested -that you make such modifications available to Saxonica Limited.
\ No newline at end of file diff --git a/saxonhe9-2-0-3j/notices/LICENSE.txt b/saxonhe9-2-0-3j/notices/LICENSE.txt deleted file mode 100644 index c57dd4c..0000000 --- a/saxonhe9-2-0-3j/notices/LICENSE.txt +++ /dev/null @@ -1,15 +0,0 @@ -The contents of these file are subject to the Mozilla Public License Version 1.0 (the "License");
-you may not use these files except in compliance with the License. You may obtain a copy of the
-License at http://www.mozilla.org/MPL/
-
-Software distributed under the License is distributed on an "AS IS" basis,
-WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
-specific language governing rights and limitations under the License.
-
-The Original Code is all Saxon modules labelled with a notice referring to this license.
-
-The Initial Developer of the Original Code is Michael Kay, except where otherwise specified in an individual module.
-
-Portions created by other named contributors are copyright as identified in the relevant module. All Rights Reserved.
-
-Contributor(s) are listed in the documentation: see notices/contributors.
\ No newline at end of file diff --git a/saxonhe9-2-0-3j/notices/THAI.txt b/saxonhe9-2-0-3j/notices/THAI.txt deleted file mode 100644 index ccc7e42..0000000 --- a/saxonhe9-2-0-3j/notices/THAI.txt +++ /dev/null @@ -1,31 +0,0 @@ -Copyright (c) 2001-2003 Thai Open Source Software Center Ltd
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
-
- Neither the name of the Thai Open Source Software Center Ltd nor
- the names of its contributors may be used to endorse or promote
- products derived from this software without specific prior written
- permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/saxonhe9-2-0-3j/notices/UNICODE.txt b/saxonhe9-2-0-3j/notices/UNICODE.txt deleted file mode 100644 index 58f0484..0000000 --- a/saxonhe9-2-0-3j/notices/UNICODE.txt +++ /dev/null @@ -1,29 +0,0 @@ -COPYRIGHT AND PERMISSION NOTICE
-Copyright © 1991-2007 Unicode, Inc. All rights reserved. Distributed under the Terms of Use
-in http://www.unicode.org/copyright.html.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of the Unicode
-data files and any associated documentation (the "Data Files") or Unicode software and any
-associated documentation (the "Software") to deal in the Data Files or Software without
-restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute,
-and/or sell copies of the Data Files or Software, and to permit persons to whom the Data Files or
-Software are furnished to do so, provided that (a) the above copyright notice(s) and this
-permission notice appear with all copies of the Data Files or Software, (b) both the above
-copyright notice(s) and this permission notice appear in associated documentation, and
-(c) there is clear notice in each modified Data File or in the Software as well as in the
-documentation associated with the Data File(s) or Software that the data or software has
-been modified.
-
-THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
-IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
-BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
-OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
-WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
-ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
-FILES OR SOFTWARE.
-
-Except as contained in this notice, the name of a copyright holder shall not be used
-in advertising or otherwise to promote the sale, use or other dealings in these
-Data Files or Software without prior written authorization of the copyright holder.
diff --git a/saxonhe9-2-0-3j/saxon9he.jar b/saxonhe9-2-0-3j/saxon9he.jar Binary files differdeleted file mode 100644 index 0719a94..0000000 --- a/saxonhe9-2-0-3j/saxon9he.jar +++ /dev/null diff --git a/test/test_examples_util.rb b/test/test_examples_util.rb index c5a9900..cc31fe5 100644 --- a/test/test_examples_util.rb +++ b/test/test_examples_util.rb @@ -249,6 +249,7 @@ module ValidationExamples yaml = YAML.load(Util.validation_get(@report_uri.split("/")[-3..-1].join("/"),'application/x-yaml')) owl = OpenTox::Owl.from_data(Util.validation_get(@report_uri.split("/")[-3..-1].join("/")),@report_uri,"ValidationReport") Util.compare_yaml_and_owl(yaml,owl) + Util.validation_get(@report_uri.split("/")[-3..-1].join("/"),'text/html') else puts "no report" end diff --git a/validation/validation_application.rb b/validation/validation_application.rb index cf6fc78..6785943 100644 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -10,7 +10,7 @@ require 'validation/validation_service.rb' get '/crossvalidation/?' do LOGGER.info "list all crossvalidations" - uri_list = Validation::Crossvalidation.find_like(params).collect{ |d| url_for("/crossvalidation/", :full) + d.id.to_s }.join("\n")+"\n" + uri_list = Validation::Crossvalidation.find_like(params).collect{ |cv| cv.crossvalidation_uri }.join("\n")+"\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = "Single validations: "+$sinatra.url_for("/",:full)+"\n"+ @@ -29,7 +29,7 @@ end post '/crossvalidation/?' do content_type "text/uri-list" - task_uri = OpenTox::Task.as_task( "Perform crossvalidation", url_for("/crossvalidation", :full), params ) do + task_uri = OpenTox::Task.as_task( "Perform crossvalidation", url_for("/crossvalidation", :full), params ) do |task| LOGGER.info "creating crossvalidation "+params.inspect halt 400, "dataset_uri missing" unless params[:dataset_uri] halt 400, "algorithm_uri missing" unless params[:algorithm_uri] @@ -41,8 +41,7 @@ post '/crossvalidation/?' do :algorithm_uri => params[:algorithm_uri] } [ :num_folds, :random_seed, :stratified ].each{ |sym| cv_params[sym] = params[sym] if params[sym] } cv = Validation::Crossvalidation.new cv_params - cv.create_cv_datasets( params[:prediction_feature] ) - cv.perform_cv( params[:algorithm_params]) + cv.perform_cv( params[:prediction_feature], params[:algorithm_params], task ) cv.crossvalidation_uri end halt 202,task_uri+"\n" @@ -56,7 +55,7 @@ post '/crossvalidation/cleanup/?' do num_vals = Validation::Validation.find( :all, :conditions => { :crossvalidation_id => cv.id } ).size if cv.num_folds != num_vals LOGGER.debug "delete cv with id:"+cv.id.to_s+" num-folds should be "+cv.num_folds.to_s+", is "+num_vals.to_s - deleted << url_for("/crossvalidation/", :full) + cv.id.to_s + deleted << cv.crossvalidation_uri Validation::Crossvalidation.delete(cv.id) end end @@ -110,13 +109,13 @@ get '/crossvalidation/:id/statistics' do rescue ActiveRecord::RecordNotFound => ex halt 404, "Crossvalidation '#{params[:id]}' not found." end + halt 400,"Crossvalidation '"+params[:id].to_s+"' not finished" unless crossvalidation.finished Lib::MergeObjects.register_merge_attributes( Validation::Validation, - Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL-[:validation_uri]) unless + Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL-[:validation_uri,:crossvalidation_uri]) unless Lib::MergeObjects.merge_attributes_registered?(Validation::Validation) v = Lib::MergeObjects.merge_array_objects( Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ) ) - v.validation_uri = nil v.date = nil v.id = nil @@ -163,6 +162,8 @@ get '/crossvalidation/:id/predictions' do rescue ActiveRecord::RecordNotFound => ex halt 404, "Crossvalidation '#{params[:id]}' not found." end + halt 400,"Crossvalidation '"+params[:id].to_s+"' not finished" unless crossvalidation.finished + content_type "application/x-yaml" validations = Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ) p = Lib::OTPredictions.to_array( validations.collect{ |v| v.compute_validation_stats_with_model(nil, true) } ).to_yaml @@ -184,7 +185,7 @@ end get '/?' do LOGGER.info "list all validations, params: "+params.inspect - uri_list = Validation::Validation.find_like(params).collect{ |d| url_for("/", :full) + d.id.to_s }.join("\n")+"\n" + uri_list = Validation::Validation.find_like(params).collect{ |v| v.validation_uri }.join("\n")+"\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = "To perform a validation:\n"+ @@ -215,11 +216,12 @@ post '/test_set_validation' do LOGGER.info "creating test-set-validation "+params.inspect if params[:model_uri] and params[:test_dataset_uri] and !params[:training_dataset_uri] and !params[:algorithm_uri] task_uri = OpenTox::Task.as_task( "Perform test-set-validation", url_for("/", :full), params ) do |task| - v = Validation::Validation.new :model_uri => params[:model_uri], + v = Validation::Validation.new :validation_type => "test_set_validation", + :model_uri => params[:model_uri], :test_dataset_uri => params[:test_dataset_uri], :test_target_dataset_uri => params[:test_target_dataset_uri], :prediction_feature => params[:prediction_feature] - v.validate_model + v.validate_model( task ) v.validation_uri end halt 202,task_uri+"\n" @@ -231,7 +233,7 @@ end get '/test_set_validation' do LOGGER.info "list all test-set-validations, params: "+params.inspect - uri_list = "NOT YET IMPLEMENTED" + uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "test_set_validation" } ).collect{ |v| v.validation_uri }.join("\n")+"\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = "All validations: "+$sinatra.url_for("/",:full)+"\n"+ @@ -253,12 +255,13 @@ post '/training_test_validation/?' do LOGGER.info "creating training-test-validation "+params.inspect if params[:algorithm_uri] and params[:training_dataset_uri] and params[:test_dataset_uri] and params[:prediction_feature] and !params[:model_uri] task_uri = OpenTox::Task.as_task( "Perform training-test-validation", url_for("/", :full), params ) do |task| - v = Validation::Validation.new :algorithm_uri => params[:algorithm_uri], + v = Validation::Validation.new :validation_type => "training_test_validation", + :algorithm_uri => params[:algorithm_uri], :training_dataset_uri => params[:training_dataset_uri], :test_dataset_uri => params[:test_dataset_uri], :test_target_dataset_uri => params[:test_target_dataset_uri], :prediction_feature => params[:prediction_feature] - v.validate_algorithm( params[:algorithm_params]) + v.validate_algorithm( params[:algorithm_params], task ) v.validation_uri end halt 202,task_uri+"\n" @@ -270,7 +273,7 @@ end get '/training_test_validation' do LOGGER.info "list all training-test-validations, params: "+params.inspect - uri_list = "NOT YET IMPLEMENTED" + uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "training_test_validation" } ).collect{ |v| v.validation_uri }.join("\n")+"\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = "All validations: "+$sinatra.url_for("/",:full)+"\n"+ @@ -294,19 +297,21 @@ end post '/bootstrapping' do content_type "text/uri-list" - task_uri = OpenTox::Task.as_task( "Perform bootstrapping validation", url_for("/bootstrapping", :full), params ) do + task_uri = OpenTox::Task.as_task( "Perform bootstrapping validation", url_for("/bootstrapping", :full), params ) do |task| LOGGER.info "performing bootstrapping validation "+params.inspect halt 400, "dataset_uri missing" unless params[:dataset_uri] halt 400, "algorithm_uri missing" unless params[:algorithm_uri] halt 400, "prediction_feature missing" unless params[:prediction_feature] - params.merge!(Validation::Util.bootstrapping(params[:dataset_uri], params[:prediction_feature], params[:random_seed])) - v = Validation::Validation.new :training_dataset_uri => params[:training_dataset_uri], + params.merge!( Validation::Util.bootstrapping( params[:dataset_uri], + params[:prediction_feature], params[:random_seed], OpenTox::SubTask.create(task,0,33)) ) + v = Validation::Validation.new :validation_type => "bootstrapping", + :training_dataset_uri => params[:training_dataset_uri], :test_dataset_uri => params[:test_dataset_uri], :test_target_dataset_uri => params[:dataset_uri], :prediction_feature => params[:prediction_feature], :algorithm_uri => params[:algorithm_uri] - v.validate_algorithm( params[:algorithm_params]) + v.validate_algorithm( params[:algorithm_params], OpenTox::SubTask.create(task,33,100)) v.validation_uri end halt 202,task_uri+"\n" @@ -314,7 +319,7 @@ end get '/bootstrapping' do LOGGER.info "list all bootstrapping-validations, params: "+params.inspect - uri_list = "NOT YET IMPLEMENTED" + uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "bootstrapping" } ).collect{ |v| v.validation_uri }.join("\n")+"\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = "All validations: "+$sinatra.url_for("/",:full)+"\n"+ @@ -336,20 +341,24 @@ get '/bootstrapping' do end post '/training_test_split' do + content_type "text/uri-list" - task_uri = OpenTox::Task.as_task( "Perform training test split validation", url_for("/training_test_split", :full), params ) do + task_uri = OpenTox::Task.as_task( "Perform training test split validation", url_for("/training_test_split", :full), params ) do |task| + LOGGER.info "creating training test split "+params.inspect halt 400, "dataset_uri missing" unless params[:dataset_uri] halt 400, "algorithm_uri missing" unless params[:algorithm_uri] halt 400, "prediction_feature missing" unless params[:prediction_feature] - params.merge!(Validation::Util.train_test_dataset_split(params[:dataset_uri], params[:prediction_feature], params[:split_ratio], params[:random_seed])) - v = Validation::Validation.new :training_dataset_uri => params[:training_dataset_uri], + params.merge!( Validation::Util.train_test_dataset_split(params[:dataset_uri], params[:prediction_feature], + params[:split_ratio], params[:random_seed], OpenTox::SubTask.create(task,0,33))) + v = Validation::Validation.new :validation_type => "training_test_split", + :training_dataset_uri => params[:training_dataset_uri], :test_dataset_uri => params[:test_dataset_uri], :test_target_dataset_uri => params[:dataset_uri], :prediction_feature => params[:prediction_feature], :algorithm_uri => params[:algorithm_uri] - v.validate_algorithm( params[:algorithm_params]) + v.validate_algorithm( params[:algorithm_params], OpenTox::SubTask.create(task,33,100)) v.validation_uri end halt 202,task_uri+"\n" @@ -357,7 +366,7 @@ end get '/training_test_split' do LOGGER.info "list all training-test-split-validations, params: "+params.inspect - uri_list = "NOT YET IMPLEMENTED" + uri_list = Validation::Validation.find( :all, :conditions => { :validation_type => "training_test_split" } ).collect{ |v| v.validation_uri }.join("\n")+"\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = "All validations: "+$sinatra.url_for("/",:full)+"\n"+ @@ -385,19 +394,13 @@ post '/cleanup/?' do deleted = [] Validation::Validation.find( :all, :conditions => { :prediction_dataset_uri => nil } ).each do |val| LOGGER.debug "delete val with id:"+val.id.to_s+" prediction_dataset_uri is nil" - deleted << url_for("/", :full) + val.id.to_s + deleted << val.validation_uri Validation::Validation.delete(val.id) end LOGGER.info "validation cleanup, deleted "+deleted.size.to_s+" validations" deleted.join("\n")+"\n" end - - - - - - post '/plain_training_test_split' do LOGGER.info "creating pure training test split "+params.inspect halt 400, "dataset_uri missing" unless params[:dataset_uri] @@ -409,14 +412,15 @@ end post '/validate_datasets' do content_type "text/uri-list" - task_uri = OpenTox::Task.as_task( "Perform dataset validation", url_for("/validate_datasets", :full), params ) do + task_uri = OpenTox::Task.as_task( "Perform dataset validation", url_for("/validate_datasets", :full), params ) do |task| LOGGER.info "validating values "+params.inspect halt 400, "test_dataset_uri missing" unless params[:test_dataset_uri] halt 400, "prediction_datset_uri missing" unless params[:prediction_dataset_uri] + params[:validation_type] = "validate_datasets" if params[:model_uri] v = Validation::Validation.new params - v.compute_validation_stats_with_model() + v.compute_validation_stats_with_model(nil,false,task) else halt 400, "please specify 'model_uri' or 'prediction_feature'" unless params[:prediction_feature] halt 400, "please specify 'model_uri' or 'predicted_feature'" unless params[:predicted_feature] @@ -427,7 +431,7 @@ post '/validate_datasets' do clazz = params.delete("classification")!=nil regr = params.delete("regression")!=nil v = Validation::Validation.new params - v.compute_validation_stats((clazz and !regr),predicted_feature) + v.compute_validation_stats((clazz and !regr),predicted_feature,nil,false,task) end v.validation_uri end @@ -441,6 +445,7 @@ get '/:id/predictions' do rescue ActiveRecord::RecordNotFound => ex halt 404, "Validation '#{params[:id]}' not found." end + halt 400,"Validation '"+params[:id].to_s+"' not finished" unless validation.finished p = validation.compute_validation_stats_with_model(nil, true) case request.env['HTTP_ACCEPT'].to_s when /text\/html/ diff --git a/validation/validation_service.rb b/validation/validation_service.rb index 67fdbee..0907edb 100644 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -37,12 +37,10 @@ module Validation # constructs a validation object, Rsets id und uri def initialize( params={} ) $sinatra.halt 500,"do not set id manually" if params[:id] - $sinatra.halt 500,"do not set uri manually" if params[:validation_uri] + params[:finished] = false super params self.save! raise "internal error, validation-id not set "+to_yaml if self.id==nil - self.attributes = { :validation_uri => $sinatra.url_for("/"+self.id.to_s, :full).to_s } - self.save! end # deletes a validation @@ -62,7 +60,7 @@ module Validation end # validates an algorithm by building a model and validating this model - def validate_algorithm( algorithm_params=nil ) + def validate_algorithm( algorithm_params=nil, task=nil ) $sinatra.halt 404, "no algorithm uri: '"+self.algorithm_uri.to_s+"'" if self.algorithm_uri==nil or self.algorithm_uri.to_s.size<1 @@ -77,7 +75,8 @@ module Validation end LOGGER.debug "building model '"+algorithm_uri.to_s+"' "+params.inspect - model = OpenTox::Model::PredictionModel.build(algorithm_uri, params) + model = OpenTox::Model::PredictionModel.build(algorithm_uri, params, + OpenTox::SubTask.create(task, 0, 33) ) $sinatra.halt 500,"model building failed" unless model self.attributes = { :model_uri => model.uri } self.save! @@ -85,12 +84,12 @@ module Validation $sinatra.halt 500,"error after building model: model.dependent_variable != validation.prediciton_feature ("+ model.dependentVariables.to_s+" != "+self.prediction_feature+")" if self.prediction_feature!=model.dependentVariables - validate_model + validate_model OpenTox::SubTask.create(task, 33, 100) end # validates a model # PENDING: a new dataset is created to store the predictions, this should be optional: delete predictions afterwards yes/no - def validate_model + def validate_model( task=nil ) LOGGER.debug "validating model '"+self.model_uri+"'" @@ -113,25 +112,27 @@ module Validation prediction_dataset_uri = "" benchmark = Benchmark.measure do - prediction_dataset_uri = model.predict_dataset(self.test_dataset_uri) + prediction_dataset_uri = model.predict_dataset(self.test_dataset_uri, OpenTox::SubTask.create(task, 0, 50)) end self.attributes = { :prediction_dataset_uri => prediction_dataset_uri, :real_runtime => benchmark.real } self.save! - compute_validation_stats_with_model( model ) + compute_validation_stats_with_model( model, false, OpenTox::SubTask.create(task, 50, 100) ) end - def compute_validation_stats_with_model( model=nil, dry_run=false ) + def compute_validation_stats_with_model( model=nil, dry_run=false, task=nil ) model = OpenTox::Model::PredictionModel.find(self.model_uri) if model==nil and self.model_uri $sinatra.halt 400, "model not found: "+self.model_uri.to_s unless model prediction_feature = self.prediction_feature ? nil : model.dependentVariables algorithm_uri = self.algorithm_uri ? nil : model.algorithm - compute_validation_stats( model.classification?, model.predictedVariables, prediction_feature, algorithm_uri, dry_run ) + compute_validation_stats( model.classification?, model.predictedVariables, + prediction_feature, algorithm_uri, dry_run, task ) end - def compute_validation_stats( classification, predicted_feature, prediction_feature=nil, algorithm_uri=nil, dry_run=false) + def compute_validation_stats( classification, predicted_feature, prediction_feature=nil, + algorithm_uri=nil, dry_run=false, task=nil ) self.attributes = { :prediction_feature => prediction_feature } if self.prediction_feature==nil && prediction_feature self.attributes = { :algorithm_uri => algorithm_uri } if self.algorithm_uri==nil && algorithm_uri @@ -140,7 +141,8 @@ module Validation LOGGER.debug "computing prediction stats" prediction = Lib::OTPredictions.new( classification, self.test_dataset_uri, self.test_target_dataset_uri, self.prediction_feature, - self.prediction_dataset_uri, predicted_feature ) + self.prediction_dataset_uri, predicted_feature, OpenTox::SubTask.create(task, 0, 80) ) + #reading datasets and computing the main stats is 80% the work unless dry_run if prediction.classification? @@ -152,9 +154,12 @@ module Validation :num_without_class => prediction.num_without_class, :percent_without_class => prediction.percent_without_class, :num_unpredicted => prediction.num_unpredicted, - :percent_unpredicted => prediction.percent_unpredicted } + :percent_unpredicted => prediction.percent_unpredicted, + :finished => true} self.save! end + + task.progress(100) if task prediction end end @@ -165,16 +170,19 @@ module Validation def initialize( params={} ) $sinatra.halt 500,"do not set id manually" if params[:id] - $sinatra.halt 500,"do not set uri manually" if params[:crossvalidation_uri] - params[:num_folds] = 10 if params[:num_folds]==nil params[:random_seed] = 1 if params[:random_seed]==nil params[:stratified] = false if params[:stratified]==nil + params[:finished] = false super params self.save! raise "internal error, crossvalidation-id not set" if self.id==nil - self.attributes = { :crossvalidation_uri => $sinatra.url_for("/crossvalidation/"+self.id.to_s, :full) } - self.save! + end + + def perform_cv ( prediction_feature, algorithm_params=nil, task=nil ) + + create_cv_datasets( prediction_feature, OpenTox::SubTask.create(task, 0, 0.33) ) + perform_cv_validations( algorithm_params, OpenTox::SubTask.create(task, 0.33, 1) ) end # deletes a crossvalidation, all validations are deleted as well @@ -186,20 +194,28 @@ module Validation # creates the cv folds # PENDING copying datasets of an equal (same dataset, same params) crossvalidation is disabled for now - def create_cv_datasets( prediction_feature ) + def create_cv_datasets( prediction_feature, task=nil ) - create_new_cv_datasets( prediction_feature ) #unless copy_cv_datasets( prediction_feature ) + create_new_cv_datasets( prediction_feature, task ) #unless copy_cv_datasets( prediction_feature ) end # executes the cross-validation (build models and validates them) - def perform_cv ( algorithm_params=nil ) + def perform_cv_validations( algorithm_params, task=nil ) - LOGGER.debug "perform cv validations" + LOGGER.debug "perform cv validations "+algorithm_params.inspect + i = 0 + task_step = 100 / self.num_folds.to_f; @tmp_validations.each do | val | validation = Validation.new val - validation.validate_algorithm( algorithm_params ) - #break + validation.validate_algorithm( algorithm_params, + OpenTox::SubTask.create(task, i * task_step, ( i + 1 ) * task_step) ) + raise "validation '"+validation.validation_uri+"' for crossvaldation could not be finished" unless + validation.finished + i += 1 end + + self.attributes = { :finished => true } + self.save! end private @@ -222,7 +238,8 @@ module Validation Validation.all( :crossvalidation_id => self.id ).each{ |v| v.delete } return false end - validation = Validation.new :crossvalidation_id => self.id, + validation = Validation.new :validation_type => "crossvalidation", + :crossvalidation_id => self.id, :crossvalidation_fold => v.crossvalidation_fold, :training_dataset_uri => v.training_dataset_uri, :test_dataset_uri => v.test_dataset_uri, @@ -234,7 +251,7 @@ module Validation # creates cv folds (training and testdatasets) # stores uris in validation objects - def create_new_cv_datasets( prediction_feature ) + def create_new_cv_datasets( prediction_feature, task = nil ) $sinatra.halt(500,"random seed not set") unless self.random_seed LOGGER.debug "creating datasets for crossvalidation" @@ -318,13 +335,16 @@ module Validation LOGGER.debug "test set: "+datasetname+"_test, compounds: "+test_compounds.size.to_s test_dataset_uri = orig_dataset.create_new_dataset( test_compounds, test_features, datasetname + '_test', source ) - tmp_validation = { :training_dataset_uri => train_dataset_uri, + tmp_validation = { :validation_type => "crossvalidation", + :training_dataset_uri => train_dataset_uri, :test_dataset_uri => test_dataset_uri, :test_target_dataset_uri => self.dataset_uri, :crossvalidation_id => self.id, :crossvalidation_fold => n, :prediction_feature => prediction_feature, :algorithm_uri => self.algorithm_uri } @tmp_validations << tmp_validation + + task.progress( n / self.num_folds.to_f * 100 ) if task end end end @@ -335,7 +355,7 @@ module Validation # splits a dataset into test and training dataset via bootstrapping # (training dataset-size is n, sampling from orig dataset with replacement) # returns map with training_dataset_uri and test_dataset_uri - def self.bootstrapping( orig_dataset_uri, prediction_feature, random_seed=nil ) + def self.bootstrapping( orig_dataset_uri, prediction_feature, random_seed=nil, task=nil ) random_seed=1 unless random_seed @@ -372,16 +392,20 @@ module Validation LOGGER.debug "bootstrapping on dataset "+orig_dataset_uri+ " into training ("+training_compounds.size.to_s+") and test ("+test_compounds.size.to_s+")"+ ", duplicates in training dataset: "+test_compounds.size.to_s + task.progress(33) if task result = {} result[:training_dataset_uri] = orig_dataset.create_new_dataset( training_compounds, orig_dataset.features, "Bootstrapping training dataset of "+orig_dataset.title.to_s, $sinatra.url_for('/bootstrapping',:full) ) + task.progress(66) if task + result[:test_dataset_uri] = orig_dataset.create_new_dataset( test_compounds, orig_dataset.features.dclone - [prediction_feature], "Bootstrapping test dataset of "+orig_dataset.title.to_s, $sinatra.url_for('/bootstrapping',:full) ) + task.progress(100) if task if ENV['RACK_ENV'] =~ /test|debug/ training_dataset = OpenTox::Dataset.find result[:training_dataset_uri] @@ -390,7 +414,6 @@ module Validation $sinatra.halt 500, "training compounds error" unless training_compounds_verify==training_compounds $sinatra.halt 400, "Test dataset not found: '"+result[:test_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:test_dataset_uri] end - LOGGER.debug "bootstrapping done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'" return result @@ -398,7 +421,7 @@ module Validation # splits a dataset into test and training dataset # returns map with training_dataset_uri and test_dataset_uri - def self.train_test_dataset_split( orig_dataset_uri, prediction_feature, split_ratio=nil, random_seed=nil ) + def self.train_test_dataset_split( orig_dataset_uri, prediction_feature, split_ratio=nil, random_seed=nil, task=nil ) split_ratio=0.67 unless split_ratio random_seed=1 unless random_seed @@ -424,24 +447,28 @@ module Validation LOGGER.debug "splitting dataset "+orig_dataset_uri+ " into train:0-"+split.to_s+" and test:"+(split+1).to_s+"-"+(compounds.size-1).to_s+ " (shuffled with seed "+random_seed.to_s+")" - compounds.shuffle!( random_seed ) + task.progress(33) if task result = {} result[:training_dataset_uri] = orig_dataset.create_new_dataset( compounds[0..split], orig_dataset.features, "Training dataset split of "+orig_dataset.title.to_s, $sinatra.url_for('/training_test_split',:full) ) + task.progress(66) if task + result[:test_dataset_uri] = orig_dataset.create_new_dataset( compounds[(split+1)..-1], orig_dataset.features.dclone - [prediction_feature], "Test dataset split of "+orig_dataset.title.to_s, $sinatra.url_for('/training_test_split',:full) ) + task.progress(100) if task - $sinatra.halt 400, "Training dataset not found: '"+result[:training_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:training_dataset_uri] - $sinatra.halt 400, "Test dataset not found: '"+result[:test_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:test_dataset_uri] + if ENV['RACK_ENV'] =~ /test|debug/ + $sinatra.halt 400, "Training dataset not found: '"+result[:training_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:training_dataset_uri] + $sinatra.halt 400, "Test dataset not found: '"+result[:test_dataset_uri].to_s+"'" unless OpenTox::Dataset.find result[:test_dataset_uri] + end LOGGER.debug "split done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'" - return result end diff --git a/validation/validation_test.rb b/validation/validation_test.rb index 5e068a3..31495a2 100644 --- a/validation/validation_test.rb +++ b/validation/validation_test.rb @@ -34,9 +34,9 @@ class ValidationTest < Test::Unit::TestCase def test_it $test_case = self - #get "/report/crossvalidation/15",nil,'HTTP_ACCEPT' => "text/html" + #get "/1",nil,'HTTP_ACCEPT' => "text/html" # get "/",nil,'HTTP_ACCEPT' => "text/html" -# puts last_response.body + #puts last_response.body # d = OpenTox::Dataset.find("http://ot-dev.in-silico.ch/dataset/307") # puts d.compounds.inspect @@ -64,7 +64,7 @@ class ValidationTest < Test::Unit::TestCase # :test_target_dataset_uri=>"http://localhost/dataset/1" # get "/crossvalidation/2",nil,'HTTP_ACCEPT' => "application/rdf+xml" -# puts last_response.body + #puts last_response.body #get "/crossvalidation?model_uri=lazar" # post "/test_validation",:select=>"6d" #,:report=>"yes,please" @@ -80,18 +80,24 @@ class ValidationTest < Test::Unit::TestCase # #:classification=>"true"} # puts last_response.body - #run_test("1b")#,"http://localhost/validation/394"); + #run_test("1b","http://localhost/validation/20")#,"http://localhost/validation/394"); - #run_test("11b", "http://localhost/validation/crossvalidation/2" )# //localhost/validation/42")#, "http://localhost/validation/report/validation/8") #,"http://localhost/validation/report/validation/36") #, "http://localhost/validation/321") + #run_test("7b","http://localhost/validation/21") - # run_test("7a","http://localhost/validation/40") #,"http://localhost/validation/crossvalidation/10") #, "http://localhost/validation/321") + #run_test("3a","http://localhost/validation/crossvalidation/4") + #run_test("3b","http://localhost/validation/crossvalidation/3") + run_test("8a", "http://localhost/validation/crossvalidation/6") + #run_test("8b", "http://localhost/validation/crossvalidation/5") + + #run_test("11b", "http://localhost/validation/crossvalidation/2" )# //localhost/validation/42")#, "http://localhost/validation/report/validation/8") #,"http://localhost/validation/report/validation/36") #, "http://localhost/validation/321") + # run_test("7a","http://localhost/validation/40") #,"http://localhost/validation/crossvalidation/10") #, "http://localhost/validation/321") #run_test("8b", "http://localhost/validation/crossvalidation/4") #puts Nightly.build_nightly("1") - #prepare_examples - do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE + # prepare_examples + #do_test_examples # USES CURL, DO NOT FORGET TO RESTART VALIDATION SERVICE #do_test_examples_ortona end |