From 7870cd69eaa1f0c62d980fb40c45abaacd8c71d2 Mon Sep 17 00:00:00 2001 From: gebele Date: Thu, 13 Jul 2017 07:41:33 +0000 Subject: updated qmrf report; consistent use of uri list for features --- lib/feature.rb | 7 +- lib/report.rb | 335 ++++++++++++++++++++++++++++----------------------------- 2 files changed, 170 insertions(+), 172 deletions(-) diff --git a/lib/feature.rb b/lib/feature.rb index 140cc25..06a5b37 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -7,10 +7,9 @@ get "/feature/?" do return uri_list.join("\n") + "\n" when "application/json" features = JSON.parse features.to_json - features.each_index do |idx| - features[idx][:URI] = uri("/feature/#{features[idx]["_id"]["$oid"]}") - end - return features.to_json + list = [] + features.each{|f| list << uri("/feature/#{f["_id"]["$oid"]}")} + return list.to_json else bad_request_error "Mime type #{@accept} is not supported." end diff --git a/lib/report.rb b/lib/report.rb index ae2526c..f576106 100644 --- a/lib/report.rb +++ b/lib/report.rb @@ -8,202 +8,201 @@ get "/report/?" do uri_list = models.collect{|model| uri("/report/#{model.model_id}")} return uri_list.join("\n") + "\n" when "application/json" - reports = [{}] - #models = JSON.parse models.to_json - models.each_index do |idx| - reports[idx] = {} - reports[idx][:URI] = uri("/report/#{models[idx]["model_id"]}") - reports[idx][:repeated_crossvalidation_uri] = uri("/validation/repeatedcrossvalidation/#{models[idx]["repeated_crossvalidation_id"]}") if models[idx]["repeated_crossvalidation_id"] - reports[idx][:leave_one_out_validation_uri] = uri("/validation/leaveoneoutvalidation/#{models[idx]["leave_one_out_validation_id"]}") if models[idx]["leave_one_out_validation_id"] - reports[idx][:training_dataset_URI] = uri("/dataset/#{models[idx].training_dataset.id}") if models[idx].training_dataset.id - end - return reports.to_json + models = JSON.parse models.to_json + list = [] + models.each{|m| list << uri("/report/#{m["model_id"]["$oid"]}")} + return list.to_json else bad_request_error "Mime type #{@accept} is not supported." end end get "/report/:id/?" do - model = Model::Lazar.find params[:id] - not_found_error "Model with id: #{params[:id]} not found." unless model - prediction_model = Model::Validation.find_by :model_id => params[:id] - validation_template = File.join(File.dirname(__FILE__),"../views/model_details.haml") - - if File.directory?("#{File.dirname(__FILE__)}/../../lazar") - lazar_commit = `cd #{File.dirname(__FILE__)}/../../lazar; git rev-parse HEAD`.strip - lazar_commit = "https://github.com/opentox/lazar/tree/#{lazar_commit}" - else - lazar_commit = "https://github.com/opentox/lazar/releases/tag/v#{Gem.loaded_specs["lazar"].version}" - end + case @accept + when "application/xml" + model = Model::Lazar.find params[:id] + not_found_error "Model with id: #{params[:id]} not found." unless model + prediction_model = Model::Validation.find_by :model_id => params[:id] + validation_template = File.join(File.dirname(__FILE__),"../views/model_details.haml") + + if File.directory?("#{File.dirname(__FILE__)}/../../lazar") + lazar_commit = `cd #{File.dirname(__FILE__)}/../../lazar; git rev-parse HEAD`.strip + lazar_commit = "https://github.com/opentox/lazar/tree/#{lazar_commit}" + else + lazar_commit = "https://github.com/opentox/lazar/releases/tag/v#{Gem.loaded_specs["lazar"].version}" + end - report = OpenTox::QMRFReport.new + report = OpenTox::QMRFReport.new - # QSAR Identifier Title 1.1 - report.value "QSAR_title", "Lazar model for #{prediction_model.species} #{prediction_model.endpoint}" + # QSAR Identifier Title 1.1 + report.value "QSAR_title", "Lazar model for #{prediction_model.species} #{prediction_model.endpoint}" - # Software coding the model 1.3 - report.change_catalog :software_catalog, :firstsoftware, {:name => "lazar", :description => "lazar Lazy Structure- Activity Relationships", :number => "1", :url => "https://lazar.in-silico.ch", :contact => "info@in-silico.ch"} - report.ref_catalog :QSAR_software, :software_catalog, :firstsoftware + # Software coding the model 1.3 + report.change_catalog :software_catalog, :firstsoftware, {:name => "lazar", :description => "lazar Lazy Structure- Activity Relationships", :number => "1", :url => "https://lazar.in-silico.ch", :contact => "info@in-silico.ch"} + report.ref_catalog :QSAR_software, :software_catalog, :firstsoftware - # Date of QMRF 2.1 - report.value "qmrf_date", "#{Time.now.strftime('%d %B %Y')}" + # Date of QMRF 2.1 + report.value "qmrf_date", "#{Time.now.strftime('%d %B %Y')}" - # QMRF author(s) and contact details 2.1 - report.change_catalog :authors_catalog, :firstauthor, {:name => "Christoph Helma", :affiliation => "in silico toxicology gmbh", :contact => "Rastatterstr. 41, CH-4057 Basel", :email => "info@in-silico.ch", :number => "1", :url => "www.in-silico.ch"} - report.ref_catalog :qmrf_authors, :authors_catalog, :firstauthor + # QMRF author(s) and contact details 2.1 + report.change_catalog :authors_catalog, :firstauthor, {:name => "Christoph Helma", :affiliation => "in silico toxicology gmbh", :contact => "Rastatterstr. 41, CH-4057 Basel", :email => "info@in-silico.ch", :number => "1", :url => "www.in-silico.ch"} + report.ref_catalog :qmrf_authors, :authors_catalog, :firstauthor - # Model developer(s) and contact details 2.5 - report.change_catalog :authors_catalog, :modelauthor, {:name => "Christoph Helma", :affiliation => "in silico toxicology gmbh", :contact => "Rastatterstr. 41, CH-4057 Basel", :email => "info@in-silico.ch", :number => "1", :url => "www.in-silico.ch"} - report.ref_catalog :model_authors, :authors_catalog, :modelauthor + # Model developer(s) and contact details 2.5 + report.change_catalog :authors_catalog, :modelauthor, {:name => "Christoph Helma", :affiliation => "in silico toxicology gmbh", :contact => "Rastatterstr. 41, CH-4057 Basel", :email => "info@in-silico.ch", :number => "1", :url => "www.in-silico.ch"} + report.ref_catalog :model_authors, :authors_catalog, :modelauthor - # Date of model development and/or publication 2.6 - report.value "model_date", "#{Time.parse(model.created_at.to_s).strftime('%Y')}" + # Date of model development and/or publication 2.6 + report.value "model_date", "#{Time.parse(model.created_at.to_s).strftime('%Y')}" - # Reference(s) to main scientific papers and/or software package 2.7 - report.change_catalog :publications_catalog, :publications_catalog_1, {:title => "Maunz, Guetlein, Rautenberg, Vorgrimmler, Gebele and Helma (2013), lazar: a modular predictive toxicology framework ", :url => "http://dx.doi.org/10.3389/fphar.2013.00038"} - report.ref_catalog :references, :publications_catalog, :publications_catalog_1 + # Reference(s) to main scientific papers and/or software package 2.7 + report.change_catalog :publications_catalog, :publications_catalog_1, {:title => "Maunz, Guetlein, Rautenberg, Vorgrimmler, Gebele and Helma (2013), lazar: a modular predictive toxicology framework ", :url => "http://dx.doi.org/10.3389/fphar.2013.00038"} + report.ref_catalog :references, :publications_catalog, :publications_catalog_1 - # Reference(s) to main scientific papers and/or software package 2.7 - report.change_catalog :publications_catalog, :publications_catalog_2, {:title => "Maunz A and Helma C (2008) Prediction of chemical toxicity with local support vector regression and activity-specific kernels. SAR & QSAR in Environmental Research 19 (5-6), 413-431", :url => "http://dx.doi.org/10.1080/10629360802358430"} - report.ref_catalog :references, :publications_catalog, :publications_catalog_2 + # Reference(s) to main scientific papers and/or software package 2.7 + report.change_catalog :publications_catalog, :publications_catalog_2, {:title => "Maunz A and Helma C (2008) Prediction of chemical toxicity with local support vector regression and activity-specific kernels. SAR & QSAR in Environmental Research 19 (5-6), 413-431", :url => "http://dx.doi.org/10.1080/10629360802358430"} + report.ref_catalog :references, :publications_catalog, :publications_catalog_2 - # Species 3.1 - report.value "model_species", prediction_model.species + # Species 3.1 + report.value "model_species", prediction_model.species - # Endpoint 3.2 - report.change_catalog :endpoints_catalog, :endpoints_catalog_1, {:name => prediction_model.endpoint, :group => ""} - report.ref_catalog :model_endpoint, :endpoints_catalog, :endpoints_catalog_1 + # Endpoint 3.2 + report.change_catalog :endpoints_catalog, :endpoints_catalog_1, {:name => prediction_model.endpoint, :group => ""} + report.ref_catalog :model_endpoint, :endpoints_catalog, :endpoints_catalog_1 - # Endpoint Units 3.4 - report.value "endpoint_units", "#{prediction_model.unit}" + # Endpoint Units 3.4 + report.value "endpoint_units", "#{prediction_model.unit}" - model_type = model.class.to_s.gsub('OpenTox::Model::Lazar','') + model_type = model.class.to_s.gsub('OpenTox::Model::Lazar','') - # Type of model 4.1 - report.value "algorithm_type", "#{model_type}" + # Type of model 4.1 + report.value "algorithm_type", "#{model_type}" - # Explicit algorithm 4.2 - report.change_catalog :algorithms_catalog, :algorithms_catalog_1, {:definition => "see Helma 2016 and lazar.in-silico.ch, submitted version: #{lazar_commit}", :description => "Neighbor algorithm: #{model.neighbor_algorithm.gsub('_',' ').titleize}#{(model.neighbor_algorithm_parameters[:min_sim] ? ' with similarity > ' + model.neighbor_algorithm_parameters[:min_sim].to_s : '')}"} - report.ref_catalog :algorithm_explicit, :algorithms_catalog, :algorithms_catalog_1 - report.change_catalog :algorithms_catalog, :algorithms_catalog_3, {:definition => "see Helma 2016 and lazar.in-silico.ch, submitted version: #{lazar_commit}", :description => "modified k-nearest neighbor #{model_type}"} - report.ref_catalog :algorithm_explicit, :algorithms_catalog, :algorithms_catalog_3 - if model.prediction_algorithm_parameters - pred_algorithm_params = (model.prediction_algorithm_parameters[:method] == "rf" ? "random forest" : model.prediction_algorithm_parameters[:method]) - end - report.change_catalog :algorithms_catalog, :algorithms_catalog_2, {:definition => "see Helma 2016 and lazar.in-silico.ch, submitted version: #{lazar_commit}", :description => "Prediction algorithm: #{model.prediction_algorithm.gsub('OpenTox::Algorithm::','').gsub('_',' ').gsub('.', ' with ')} #{(pred_algorithm_params ? pred_algorithm_params : '')}"} - report.ref_catalog :algorithm_explicit, :algorithms_catalog, :algorithms_catalog_2 + # Explicit algorithm 4.2 + report.change_catalog :algorithms_catalog, :algorithms_catalog_1, {:definition => "see Helma 2016 and lazar.in-silico.ch, submitted version: #{lazar_commit}", :description => "Neighbor algorithm: #{model.algorithms["similarity"]["method"].gsub('_',' ').titleize}#{(model.algorithms["similarity"][:min] ? ' with similarity > ' + model.algorithms["similarity"][:min].to_s : '')}"} + report.ref_catalog :algorithm_explicit, :algorithms_catalog, :algorithms_catalog_1 + report.change_catalog :algorithms_catalog, :algorithms_catalog_3, {:definition => "see Helma 2016 and lazar.in-silico.ch, submitted version: #{lazar_commit}", :description => "modified k-nearest neighbor #{model_type}"} + report.ref_catalog :algorithm_explicit, :algorithms_catalog, :algorithms_catalog_3 + if model.algorithms["prediction"] + pred_algorithm_params = (model.algorithms["prediction"][:method] == "rf" ? "random forest" : model.algorithms["prediction"][:method]) + end + report.change_catalog :algorithms_catalog, :algorithms_catalog_2, {:definition => "see Helma 2016 and lazar.in-silico.ch, submitted version: #{lazar_commit}", :description => "Prediction algorithm: #{model.algorithms["prediction"].to_s.gsub('OpenTox::Algorithm::','').gsub('_',' ').gsub('.', ' with ')} #{(pred_algorithm_params ? pred_algorithm_params : '')}"} + report.ref_catalog :algorithm_explicit, :algorithms_catalog, :algorithms_catalog_2 - # Descriptors in the model 4.3 - if model.neighbor_algorithm_parameters[:type] - report.change_catalog :descriptors_catalog, :descriptors_catalog_1, {:description => "", :name => "#{model.neighbor_algorithm_parameters[:type]}", :publication_ref => "", :units => ""} - report.ref_catalog :algorithms_descriptors, :descriptors_catalog, :descriptors_catalog_1 - end + # Descriptors in the model 4.3 + if model.algorithms["descriptors"][:type] + report.change_catalog :descriptors_catalog, :descriptors_catalog_1, {:description => "", :name => "#{model.algorithms["descriptors"][:type]}", :publication_ref => "", :units => ""} + report.ref_catalog :algorithms_descriptors, :descriptors_catalog, :descriptors_catalog_1 + end - # Descriptor selection 4.4 - report.value "descriptors_selection", "#{model.feature_selection_algorithm.gsub('_',' ')} #{model.feature_selection_algorithm_parameters.collect{|k,v| k.to_s + ': ' + v.to_s}.join(', ')}" if model.feature_selection_algorithm - - # Algorithm and descriptor generation 4.5 - report.value "descriptors_generation", "exhaustive breadth first search for paths in chemical graphs (simplified MolFea algorithm)" - - # Software name and version for descriptor generation 4.6 - report.change_catalog :software_catalog, :software_catalog_2, {:name => "lazar, submitted version: #{lazar_commit}", :description => "simplified MolFea algorithm", :number => "2", :url => "https://lazar.in-silico.ch", :contact => "info@in-silico.ch"} - report.ref_catalog :descriptors_generation_software, :software_catalog, :software_catalog_2 - - # Chemicals/Descriptors ratio 4.7 - report.value "descriptors_chemicals_ratio", "not applicable (classification based on activities of neighbors, descriptors are used for similarity calculation)" - - # Description of the applicability domain of the model 5.1 - report.value "app_domain_description", " -

- The applicability domain (AD) of the training set is characterized by - the confidence index of a prediction (high confidence index: close to - the applicability domain of the training set/reliable prediction, low - confidence: far from the applicability domain of the - trainingset/unreliable prediction). The confidence index considers (i) - the similarity and number of neighbors and (ii) contradictory examples - within the neighbors. A formal definition can be found in Helma 2006. -

+ # Descriptor selection 4.4 + report.value "descriptors_selection", "#{model.algorithms["feature_selection"].gsub('_',' ')} #{model.algorithms["feature_selection"].collect{|k,v| k.to_s + ': ' + v.to_s}.join(', ')}" if model.algorithms["feature_selection"] + + # Algorithm and descriptor generation 4.5 + report.value "descriptors_generation", "exhaustive breadth first search for paths in chemical graphs (simplified MolFea algorithm)" + + # Software name and version for descriptor generation 4.6 + report.change_catalog :software_catalog, :software_catalog_2, {:name => "lazar, submitted version: #{lazar_commit}", :description => "simplified MolFea algorithm", :number => "2", :url => "https://lazar.in-silico.ch", :contact => "info@in-silico.ch"} + report.ref_catalog :descriptors_generation_software, :software_catalog, :software_catalog_2 + + # Chemicals/Descriptors ratio 4.7 + report.value "descriptors_chemicals_ratio", "not applicable (classification based on activities of neighbors, descriptors are used for similarity calculation)" + + # Description of the applicability domain of the model 5.1 + report.value "app_domain_description", " +

+ The applicability domain (AD) of the training set is characterized by + the confidence index of a prediction (high confidence index: close to + the applicability domain of the training set/reliable prediction, low + confidence: far from the applicability domain of the + trainingset/unreliable prediction). The confidence index considers (i) + the similarity and number of neighbors and (ii) contradictory examples + within the neighbors. A formal definition can be found in Helma 2006. +

+

+ The reliability of predictions decreases gradually with increasing + distance from the applicability domain (i.e. decreasing confidence index) +

+ + " + + # Method used to assess the applicability domain 5.2 + report.value "app_domain_method", "see Helma 2006 and Maunz 2008" + + # Software name and version for applicability domain assessment 5.3 + report.change_catalog :software_catalog, :software_catalog_3, {:name => "lazar, submitted version: #{lazar_commit}", :description => "integrated into main lazar algorithm", :number => "3", :url => "https://lazar.in-silico.ch", :contact => "info@in-silico.ch"} + report.ref_catalog :app_domain_software, :software_catalog, :software_catalog_3 + + # Limits of applicability 5.4 + report.value "applicability_limits", "Predictions with low confidence index, unknown substructures and neighbors that might act by different mechanisms" + + # Availability of the training set 6.1 + report.change_attributes "training_set_availability", {:answer => "Yes"} + + # Available information for the training set 6.2 + report.change_attributes "training_set_data", {:cas => "Yes", :chemname => "Yes", :formula => "Yes", :inchi => "Yes", :mol => "Yes", :smiles => "Yes"} + + # Data for each descriptor variable for the training set 6.3 + report.change_attributes "training_set_descriptors", {:answer => "No"} + + # Data for the dependent variable for the training set 6.4 + report.change_attributes "dependent_var_availability", {:answer => "All"} + + # Other information about the training set 6.5 + report.value "other_info", "#{prediction_model.source}" + + # Pre-processing of data before modelling 6.6 + report.value "preprocessing", (model.class == OpenTox::Model::LazarRegression ? "-log10 transformation" : "none") + + # Robustness - Statistics obtained by leave-many-out cross-validation 6.9 + if prediction_model.repeated_crossvalidation + crossvalidations = prediction_model.crossvalidations + out = haml File.read(validation_template), :layout=> false, :locals => {:model => prediction_model} + report.value "lmo", out + end + + # Mechanistic basis of the model 8.1 + report.value "mechanistic_basis","

- The reliability of predictions decreases gradually with increasing - distance from the applicability domain (i.e. decreasing confidence index) + Compounds with similar structures (neighbors) are assumed to have + similar activities as the query compound. For the determination of + activity specific similarities only statistically relevant subtructures + (paths) are used. For this reason there is a priori no bias towards + specific mechanistic hypothesis.

" - # Method used to assess the applicability domain 5.2 - report.value "app_domain_method", "see Helma 2006 and Maunz 2008" - - # Software name and version for applicability domain assessment 5.3 - report.change_catalog :software_catalog, :software_catalog_3, {:name => "lazar, submitted version: #{lazar_commit}", :description => "integrated into main lazar algorithm", :number => "3", :url => "https://lazar.in-silico.ch", :contact => "info@in-silico.ch"} - report.ref_catalog :app_domain_software, :software_catalog, :software_catalog_3 - - # Limits of applicability 5.4 - report.value "applicability_limits", "Predictions with low confidence index, unknown substructures and neighbors that might act by different mechanisms" - - # Availability of the training set 6.1 - report.change_attributes "training_set_availability", {:answer => "Yes"} - - # Available information for the training set 6.2 - report.change_attributes "training_set_data", {:cas => "Yes", :chemname => "Yes", :formula => "Yes", :inchi => "Yes", :mol => "Yes", :smiles => "Yes"} - - # Data for each descriptor variable for the training set 6.3 - report.change_attributes "training_set_descriptors", {:answer => "No"} - - # Data for the dependent variable for the training set 6.4 - report.change_attributes "dependent_var_availability", {:answer => "All"} - - # Other information about the training set 6.5 - report.value "other_info", "#{prediction_model.source}" - - # Pre-processing of data before modelling 6.6 - report.value "preprocessing", (model.class == OpenTox::Model::LazarRegression ? "-log10 transformation" : "none") - - # Robustness - Statistics obtained by leave-many-out cross-validation 6.9 - if prediction_model.repeated_crossvalidation - crossvalidations = prediction_model.crossvalidations - out = haml File.read(validation_template), :layout=> false, :locals => {:model => prediction_model} - report.value "lmo", out + # A priori or a posteriori mechanistic interpretation 8.2 + report.value "mechanistic_basis_comments","a posteriori for individual predictions" + + # Other information about the mechanistic interpretation 8.3 + report.value "mechanistic_basis_info","

Hypothesis about biochemical mechanisms can be derived from individual + predictions by inspecting neighbors and relevant fragments.

+

Neighbors are compounds that are similar in respect to a certain + endpoint and it is likely that compounds with high similarity act by + similar mechanisms as the query compound. Links at the webinterface + prove an easy access to additional experimental data and literature + citations for the neighbors and the query structure.

+

Activating and deactivating parts of the query compound are highlighted + in red and green on the webinterface. Fragments that are unknown (or too + infrequent for statistical evaluation are marked in yellow and + additional statistical information about the individual fragments can be + retrieved. Please note that lazar predictions are based on neighbors and + not on fragments. Fragments and their statistical significance are used + for the calculation of activity specific similarities.

" + + # Bibliography 9.2 + report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_1 + report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_2 + report.change_catalog :publications_catalog, :publications_catalog_3, {:title => "Helma (2006), Lazy structure-activity relationships (lazar) for the prediction of rodent carcinogenicity and Salmonella mutagenicity.", :url => "http://dx.doi.org/10.1007/s11030-005-9001-5"} + report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_3 + + # output + response['Content-Type'] = "application/xml" + return report.to_xml + else + bad_request_error "Mime type #{@accept} is not supported." end - # Mechanistic basis of the model 8.1 - report.value "mechanistic_basis"," -

- Compounds with similar structures (neighbors) are assumed to have - similar activities as the query compound. For the determination of - activity specific similarities only statistically relevant subtructures - (paths) are used. For this reason there is a priori no bias towards - specific mechanistic hypothesis. -

- -" - - # A priori or a posteriori mechanistic interpretation 8.2 - report.value "mechanistic_basis_comments","a posteriori for individual predictions" - - # Other information about the mechanistic interpretation 8.3 - report.value "mechanistic_basis_info","

Hypothesis about biochemical mechanisms can be derived from individual - predictions by inspecting neighbors and relevant fragments.

-

Neighbors are compounds that are similar in respect to a certain - endpoint and it is likely that compounds with high similarity act by - similar mechanisms as the query compound. Links at the webinterface - prove an easy access to additional experimental data and literature - citations for the neighbors and the query structure.

-

Activating and deactivating parts of the query compound are highlighted - in red and green on the webinterface. Fragments that are unknown (or too - infrequent for statistical evaluation are marked in yellow and - additional statistical information about the individual fragments can be - retrieved. Please note that lazar predictions are based on neighbors and - not on fragments. Fragments and their statistical significance are used - for the calculation of activity specific similarities.

" - - # Bibliography 9.2 - report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_1 - report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_2 - report.change_catalog :publications_catalog, :publications_catalog_3, {:title => "Helma (2006), Lazy structure-activity relationships (lazar) for the prediction of rodent carcinogenicity and Salmonella mutagenicity.", :url => "http://dx.doi.org/10.1007/s11030-005-9001-5"} - report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_3 - - # output - response['Content-Type'] = "application/xml" - return report.to_xml - end -- cgit v1.2.3