summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgebele <gebele@in-silico.ch>2017-08-08 14:51:28 +0000
committergebele <gebele@in-silico.ch>2017-08-08 14:51:28 +0000
commit42e9f0f9a50372b3767c496436acad82340f8de7 (patch)
tree0b916ed5100a8be8eb08d076b2af0eed2e4894bd
parent292ffcd5eccb05b2bea1aab64504134f5cdd0834 (diff)
fixes for batch and qmrf
-rw-r--r--application.rb236
-rw-r--r--views/batch.haml11
-rw-r--r--views/neighbors.haml2
-rw-r--r--views/prediction.haml18
4 files changed, 163 insertions, 104 deletions
diff --git a/application.rb b/application.rb
index 1694b6e..ecc9ff6 100644
--- a/application.rb
+++ b/application.rb
@@ -135,7 +135,7 @@ post '/predict/?' do
@warnings.each do |w|
substring = w.match(/line .* of/)
unless substring.nil?
- delEntries += "\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"#{w.sub(/\b(tmp\/)\b/,"")}\"\n"
+ delEntries += "\"#{w.sub(/\b(tmp\/)\b/,"")}\"\n"
end
substring = w.match(/rows .* Entries/)
unless substring.nil?
@@ -149,49 +149,61 @@ post '/predict/?' do
@batch.each_with_index do |hash, idx|
@csvhash[idx] = ""
model = hash[0]
+ # create header
+ if model.regression?
+ predAunit = "(#{model.unit})"
+ predBunit = "(#{model.unit =~ /mmol\/L/ ? "(mol/L)" : "(mg/kg_bw/day)"})"
+ @csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction #{predAunit}\",\"Prediction #{predBunit}\",\"95% Prediction interval (low) #{predAunit}\",\"95% Prediction interval (high) #{predAunit}\",\"95% Prediction interval (low) #{predBunit}\",\"95% Prediction interval (high) #{predBunit}\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n"
+ else #classification
+ av = model.prediction_feature.accept_values
+ probFirst = av[0].capitalize
+ probLast = av[1].capitalize
+ @csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction\",\"predProbability#{probFirst}\",\"predProbability#{probLast}\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n"
+ end
values = hash[1]
dupEntries.keys.each{|k| values.insert(k-1, dupEntries[k])}.compact!
+
values.each_with_index do |array, id|
+ type = (model.regression? ? "Regression" : "Classification")
+ endpoint = "#{model.endpoint.gsub('_', ' ')} (#{model.species})"
+
+ if id == 0
+ @csvhash[idx] += delEntries unless delEntries.blank?
+ end
unless array.kind_of? String
compound = array[0]
prediction = array[1]
smiles = compound.smiles
- type = model.model.class.to_s.match("Classification") ? "Classification" : "Regression"
- endpoint = "#{model.endpoint.gsub('_', ' ')} (#{model.species})"
- pred = propA = propB = interval = inApp = inT = note = ""
+
if prediction[:neighbors]
if prediction[:value]
- pred = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)} (#{model.unit}), #{compound.mmol_to_mg(prediction[:value].delog10.signif(3))} #{(model.unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : prediction[:value]
+ pred = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)}" : prediction[:value]
+ predA = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)}" : prediction[:value]
+ predAunit = prediction[:value].numeric? ? "(#{model.unit})" : ""
+ predB = prediction[:value].numeric? ? "#{compound.mmol_to_mg(prediction[:value].delog10).signif(3)}" : prediction[:value]
+ predBunit = prediction[:value].numeric? ? "#{model.unit =~ /\b(mmol\/L)\b/ ? "(mg/L)" : "(mg/kg_bw/day)"}" : ""
int = (prediction[:prediction_interval].nil? ? nil : prediction[:prediction_interval])
- interval = (int.nil? ? "" : "#{int[1].delog10.signif(3)} - #{int[0].delog10.signif(3)} (#{model.unit})")
+ intervalLow = (int.nil? ? "" : "#{int[1].delog10.signif(3)}")
+ intervalHigh = (int.nil? ? "" : "#{int[0].delog10.signif(3)}")
+ intervalLowMg = (int.nil? ? "" : "#{compound.mmol_to_mg(int[1].delog10).signif(3)}")
+ intervalHighMg = (int.nil? ? "" : "#{compound.mmol_to_mg(int[0].delog10).signif(3)}")
inApp = "yes"
inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no"
note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" )
+
unless prediction[:probabilities].nil?
- if id == 0
- probFirst = probLast = ""
- probFirst = prediction[:probabilities].keys.first.capitalize
- prediction[:probabilities].keys.last.split("-").each{|s| probLast += s.capitalize}
- @csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction\",\"predProbability#{probFirst}\",\"predProbability#{probLast}\",\"95% Prediction interval\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n"
- unless delEntries.blank? and id == 0
- @csvhash[idx] += delEntries
- end
- end
- propA = "#{prediction[:probabilities].values_at(prediction[:probabilities].keys.first)[0].to_f.signif(3)}"
- propB = "#{prediction[:probabilities].values_at(prediction[:probabilities].keys.last)[0].to_f.signif(3)}"
- else
- @csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction\",\"predProbability\",\"predProbability\",\"95% Prediction interval\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n"
- unless delEntries.blank? and id == 0
- @csvhash[idx] += delEntries
- end
+ av = model.prediction_feature.accept_values
+ propA = "#{prediction[:probabilities][av[0]].to_f.signif(3)}"
+ propB = "#{prediction[:probabilities][av[1]].to_f.signif(3)}"
end
- # only one neighbor
else
+ # no prediction value only one neighbor
inApp = "no"
inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no"
note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" )
end
- else # no prediction value
+ else
+ # no prediction value
inApp = "no"
inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no"
note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" )
@@ -202,10 +214,15 @@ post '/predict/?' do
end
end
else
- endpoint = type = smiles = pred = propA = propB = interval = inApp = inT = ""
+ # string note for duplicates
+ endpoint = type = smiles = pred = predA = predB = propA = propB = intervalLow = intervalHigh = intervalLowMg = intervalHighMg = inApp = inT = ""
note = array
end
- @csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{pred}\",\"#{propA}\",\"#{propB}\",\"#{interval}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n"
+ if model.regression?
+ @csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{predA}\",\"#{predB}\",\"#{intervalLow}\",\"#{intervalHigh}\",\"#{intervalLowMg}\",\"#{intervalHighMg}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n"
+ else
+ @csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{pred}\",\"#{propA}\",\"#{propB}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n"
+ end
end
end
t = Tempfile.new
@@ -264,10 +281,10 @@ get "/report/:id/?" do
report = OpenTox::QMRFReport.new
# QSAR Identifier Title 1.1
- report.value "QSAR_title", "Lazar model for #{prediction_model.species} #{prediction_model.endpoint}"
+ report.value "QSAR_title", "Lazar model for #{prediction_model.species} #{prediction_model.endpoint.downcase}"
# Software coding the model 1.3
- report.change_catalog :software_catalog, :firstsoftware, {:name => "lazar", :description => "lazar Lazy Structure- Activity Relationships", :number => "1", :url => "https://lazar.in-silico.ch", :contact => "info@in-silico.ch"}
+ report.change_catalog :software_catalog, :firstsoftware, {:name => "lazar", :description => "lazar Lazy Structure- Activity Relationships. See #{lazar_commit}", :number => "1", :url => "https://lazar.in-silico.ch", :contact => "info@in-silico.ch"}
report.ref_catalog :QSAR_software, :software_catalog, :firstsoftware
# Date of QMRF 2.1
@@ -285,85 +302,79 @@ get "/report/:id/?" do
report.value "model_date", "#{Time.parse(model.created_at.to_s).strftime('%Y')}"
# Reference(s) to main scientific papers and/or software package 2.7
- report.change_catalog :publications_catalog, :publications_catalog_1, {:title => "Maunz, Guetlein, Rautenberg, Vorgrimmler, Gebele and Helma (2013), lazar: a modular predictive toxicology framework ", :url => "http://dx.doi.org/10.3389/fphar.2013.00038"}
+ report.change_catalog :publications_catalog, :publications_catalog_1, {:title => "Maunz A., Guetlein M., Rautenberg M., Vorgrimmler D., Gebele D. and Helma C. (2013), lazar: a modular predictive toxicology framework ", :number => "1", :url => "http://dx.doi.org/10.3389/fphar.2013.00038"}
report.ref_catalog :references, :publications_catalog, :publications_catalog_1
- # Reference(s) to main scientific papers and/or software package 2.7
- report.change_catalog :publications_catalog, :publications_catalog_2, {:title => "Maunz A and Helma C (2008) Prediction of chemical toxicity with local support vector regression and activity-specific kernels. SAR & QSAR in Environmental Research 19 (5-6), 413-431", :url => "http://dx.doi.org/10.1080/10629360802358430"}
+ report.change_catalog :publications_catalog, :publications_catalog_2, {:title => "Helma C, Gebele D, Rautenberg M (2017) lazar, software available at https://lazar.in-silico.ch,source code available at #{lazar_commit}", :number => "2", :url => "https://doi.org/10.5281/zenodo.215483"}
report.ref_catalog :references, :publications_catalog, :publications_catalog_2
+ # Availability of information about the model 2.8
+ report.value "info_availability", "Prediction interface and validation results available at https://lazar.in-silico.ch"
+
# Species 3.1
report.value "model_species", prediction_model.species
- # Endpoint 3.2
- report.change_catalog :endpoints_catalog, :endpoints_catalog_1, {:name => prediction_model.endpoint, :group => ""}
+ # Endpoint 3.2
+ report.change_catalog :endpoints_catalog, :endpoints_catalog_1, {:name => prediction_model.qmrf["name"], :group => "#{prediction_model.qmrf["group"]}"}
report.ref_catalog :model_endpoint, :endpoints_catalog, :endpoints_catalog_1
# Endpoint Units 3.4
report.value "endpoint_units", "#{prediction_model.unit}"
- model_type = model.class.to_s.gsub('OpenTox::Model::Lazar','')
+ # Dependent variable 3.5
+ report.value "endpoint_variable", "#{prediction_model.endpoint} #{prediction_model.regression? ? "regression" : "classification"}"
# Type of model 4.1
+ model_type = model.class.to_s.gsub('OpenTox::Model::Lazar','')
report.value "algorithm_type", "#{model_type}"
# Explicit algorithm 4.2
- report.change_catalog :algorithms_catalog, :algorithms_catalog_1, {:definition => "see Helma 2016 and lazar.in-silico.ch, submitted version: #{lazar_commit}", :description => "Neighbor algorithm: #{model.algorithms["similarity"]["method"].gsub('_',' ').titleize}#{(model.algorithms["similarity"][:min] ? ' with similarity > ' + model.algorithms["similarity"][:min].to_s : '')}"}
report.ref_catalog :algorithm_explicit, :algorithms_catalog, :algorithms_catalog_1
- report.change_catalog :algorithms_catalog, :algorithms_catalog_3, {:definition => "see Helma 2016 and lazar.in-silico.ch, submitted version: #{lazar_commit}", :description => "modified k-nearest neighbor #{model_type}"}
- report.ref_catalog :algorithm_explicit, :algorithms_catalog, :algorithms_catalog_3
- if model.algorithms["prediction"]
- pred_algorithm_params = (model.algorithms["prediction"][:method] == "rf" ? "random forest" : model.algorithms["prediction"][:method])
- end
- report.change_catalog :algorithms_catalog, :algorithms_catalog_2, {:definition => "see Helma 2016 and lazar.in-silico.ch, submitted version: #{lazar_commit}", :description => "Prediction algorithm: #{model.algorithms["prediction"].to_s.gsub('OpenTox::Algorithm::','').gsub('_',' ').gsub('.', ' with ')} #{(pred_algorithm_params ? pred_algorithm_params : '')}"}
- report.ref_catalog :algorithm_explicit, :algorithms_catalog, :algorithms_catalog_2
+ report.change_catalog :algorithms_catalog, :algorithms_catalog_1, {:definition => "", :description => "modified k-nearest neighbor #{model_type.downcase} (#{model_type =~ /regression/i ? "local random forest" : "weighted majority vote"}), see #{lazar_commit}" }
# Descriptors in the model 4.3
if model.algorithms["descriptors"][:type]
- report.change_catalog :descriptors_catalog, :descriptors_catalog_1, {:description => "", :name => "#{model.algorithms["descriptors"][:type]}", :publication_ref => "", :units => ""}
+ report.change_catalog :descriptors_catalog, :descriptors_catalog_1, {:description => "Molprint 2D (Bender et al. 2004)", :name => "#{model.algorithms["descriptors"][:type]} fingerprints", :publication_ref => "", :units => ""}
report.ref_catalog :algorithms_descriptors, :descriptors_catalog, :descriptors_catalog_1
end
# Descriptor selection 4.4
- report.value "descriptors_selection", "#{model.algorithms["feature_selection"].gsub('_',' ')} #{model.algorithms["feature_selection"].collect{|k,v| k.to_s + ': ' + v.to_s}.join(', ')}" if model.algorithms["feature_selection"]
+ report.value "descriptors_selection", (model.class == OpenTox::Model::LazarRegression ? "Correlation with dependent variable (Pearson p <= 0.05)" : "none")
# Algorithm and descriptor generation 4.5
- report.value "descriptors_generation", "exhaustive breadth first search for paths in chemical graphs (simplified MolFea algorithm)"
+ report.value "descriptors_generation", "lazar"
# Software name and version for descriptor generation 4.6
- report.change_catalog :software_catalog, :software_catalog_2, {:name => "lazar, submitted version: #{lazar_commit}", :description => "simplified MolFea algorithm", :number => "2", :url => "https://lazar.in-silico.ch", :contact => "info@in-silico.ch"}
+ report.change_catalog :software_catalog, :software_catalog_2, {:name => "lazar, submitted version: #{lazar_commit}", :description => "", :number => "2", :url => "", :contact => ""}
report.ref_catalog :descriptors_generation_software, :software_catalog, :software_catalog_2
# Chemicals/Descriptors ratio 4.7
- report.value "descriptors_chemicals_ratio", "not applicable (classification based on activities of neighbors, descriptors are used for similarity calculation)"
+ report.value "descriptors_chemicals_ratio", (model.class == OpenTox::Model::LazarRegression ? "variable (local regression models)" : "not applicable (classification based on activities of neighbors, descriptors are used for similarity calculation)")
# Description of the applicability domain of the model 5.1
report.value "app_domain_description", "<html><head></head><body>
<p>
- The applicability domain (AD) of the training set is characterized by
- the confidence index of a prediction (high confidence index: close to
- the applicability domain of the training set/reliable prediction, low
- confidence: far from the applicability domain of the
- trainingset/unreliable prediction). The confidence index considers (i)
- the similarity and number of neighbors and (ii) contradictory examples
- within the neighbors. A formal definition can be found in Helma 2006.
+ No predictions are made for query compounds without similar structures
+ in the training data. Similarity is determined as the Tanimoto coefficient of
+ Molprint 2D fingerprints with a threshold of 0.1.
</p>
<p>
- The reliability of predictions decreases gradually with increasing
- distance from the applicability domain (i.e. decreasing confidence index)
+ Predictions based on a low number and/or very dissimilar neighbors or
+ on neighbors with conflicting experimental measurements
+ should be treated with caution.
</p>
</body>
</html>"
# Method used to assess the applicability domain 5.2
- report.value "app_domain_method", "see Helma 2006 and Maunz 2008"
+ report.value "app_domain_method", "Number and similarity of training set compounds (part of the main lazar algorithm)"
# Software name and version for applicability domain assessment 5.3
- report.change_catalog :software_catalog, :software_catalog_3, {:name => "lazar, submitted version: #{lazar_commit}", :description => "integrated into main lazar algorithm", :number => "3", :url => "https://lazar.in-silico.ch", :contact => "info@in-silico.ch"}
+ report.change_catalog :software_catalog, :software_catalog_3, {:name => "lazar, submitted version: #{lazar_commit}", :description => "", :number => "3", :url => "", :contact => ""}
report.ref_catalog :app_domain_software, :software_catalog, :software_catalog_3
# Limits of applicability 5.4
- report.value "applicability_limits", "Predictions with low confidence index, unknown substructures and neighbors that might act by different mechanisms"
+ report.value "applicability_limits", "Compounds without similar substances in the training dataset"
# Availability of the training set 6.1
report.change_attributes "training_set_availability", {:answer => "Yes"}
@@ -372,66 +383,119 @@ get "/report/:id/?" do
report.change_attributes "training_set_data", {:cas => "Yes", :chemname => "Yes", :formula => "Yes", :inchi => "Yes", :mol => "Yes", :smiles => "Yes"}
# Data for each descriptor variable for the training set 6.3
- report.change_attributes "training_set_descriptors", {:answer => "No"}
+ report.change_attributes "training_set_descriptors", {:answer => "on demand"}
# Data for the dependent variable for the training set 6.4
- report.change_attributes "dependent_var_availability", {:answer => "All"}
+ report.change_attributes "dependent_var_availability", {:answer => "Yes"}
# Other information about the training set 6.5
- report.value "other_info", "#{prediction_model.source}"
+ report.value "other_info", "Original data from: #{prediction_model.source}"
# Pre-processing of data before modelling 6.6
report.value "preprocessing", (model.class == OpenTox::Model::LazarRegression ? "-log10 transformation" : "none")
# Robustness - Statistics obtained by leave-many-out cross-validation 6.9
if prediction_model.repeated_crossvalidation
- $logger.error "#####################{prediction_model}"
crossvalidations = prediction_model.crossvalidations
- out = haml File.read(validation_template), :layout=> false, :locals => {:model => prediction_model, :crossvalidations => crossvalidations}
- report.value "lmo", out
+ block = ""
+ crossvalidations.each do |cv|
+ block += "<p>
+ <p>Num folds: #{cv.folds}</p>
+ <p>Num instances: #{cv.nr_instances}</p>
+ <p>Num unpredicted: #{cv.nr_unpredicted}</p>"
+ if model_type =~ /classification/i
+ block += "<p>Accuracy: #{cv.accuracy.signif(3)}</p>
+ <p>Weighted accuracy: #{cv.weighted_accuracy.signif(3)}</p>
+ <p>True positive rate: #{cv.true_rate[cv.accept_values[0]].signif(3)}</p>
+ <p>True negative rate: #{cv.true_rate[cv.accept_values[1]].signif(3)}</p>
+ <p>Positive predictive value: #{cv.predictivity[cv.accept_values[0]].signif(3)}</p>
+ <p>Negative predictive value: #{cv.predictivity[cv.accept_values[1]].signif(3)}</p>"
+ end
+ if model_type =~ /regression/i
+ block += "<p>RMSE: #{cv.rmse.signif(3)}</p>
+ <p>MAE: #{cv.mae.signif(3)}</p>
+ <p>R<sup>2</sup>: #{cv.r_squared.signif(3)}</p>"
+ end
+ block += "</p>"
+ end
+ report.value "lmo", "<html><head></head><body><b>3 independent 10-fold crossvalidations:</b>"+block+"</body></html>"
end
+ # Availability of the external validation set 7.1
+ report.change_attributes "validation_set_availability", {:answer => "No"}
+
+ # Available information for the external validation set 7.2
+ report.change_attributes "validation_set_data", {:cas => "", :chemname => "", :formula => "", :inchi => "", :mol => "", :smiles => ""}
+
+ # Data for each descriptor variable for the external validation set 7.3
+ report.change_attributes "validation_set_descriptors", {:answer => "Unknown"}
+
+ # Data for the dependent variable for the external validation set 7.4
+ report.change_attributes "validation_dependent_var_availability", {:answer => "Unknown"}
+
# Mechanistic basis of the model 8.1
report.value "mechanistic_basis","<html><head></head><body>
<p>
- Compounds with similar structures (neighbors) are assumed to have
- similar activities as the query compound. For the determination of
- activity specific similarities only statistically relevant subtructures
- (paths) are used. For this reason there is a priori no bias towards
- specific mechanistic hypothesis.
+ Compounds with similar structures (neighbors) are assumed to have
+ similar activities as the query compound.
</p>
</body>
</html>"
# A priori or a posteriori mechanistic interpretation 8.2
- report.value "mechanistic_basis_comments","a posteriori for individual predictions"
+ report.value "mechanistic_basis_comments","A posteriori for individual predictions"
# Other information about the mechanistic interpretation 8.3
- report.value "mechanistic_basis_info","<html><head></head><body><p>Hypothesis about biochemical mechanisms can be derived from individual
- predictions by inspecting neighbors and relevant fragments.</p>
- <p>Neighbors are compounds that are similar in respect to a certain
+ report.value "mechanistic_basis_info","<html><head></head><body>
+ <p>
+ Hypothesis about biochemical mechanisms can be derived from individual
+ predictions by inspecting neighbors and relevant descriptors.
+ </p>
+ <p>
+ Neighbors are compounds that are similar in respect to a certain
endpoint and it is likely that compounds with high similarity act by
similar mechanisms as the query compound. Links at the webinterface
prove an easy access to additional experimental data and literature
- citations for the neighbors and the query structure.</p>
- <p>Activating and deactivating parts of the query compound are highlighted
- in red and green on the webinterface. Fragments that are unknown (or too
- infrequent for statistical evaluation are marked in yellow and
- additional statistical information about the individual fragments can be
- retrieved. Please note that lazar predictions are based on neighbors and
- not on fragments. Fragments and their statistical significance are used
- for the calculation of activity specific similarities.</p>"
-
- # Bibliography 9.2
+ citations for the neighbors and the query structure.
+ </p>
+ <p>
+ Please note that lazar predictions are based on neighbors.
+ Descriptors are only used for the calculation of similarities.
+ </p>
+ </body>
+</html>"
+
+ # Comments 9.1
+ report.value "comments", "<html><head></head><body>
+ <p>
+ Public model interface: https://lazar.in-silico.ch
+ </p>
+ <p>
+ Source code: #{lazar_commit}
+ </p>
+ <p>
+ Docker image: https://hub.docker.com/r/insilicotox/lazar/
+ </p>
+ </body>
+</html>"
+
+ # Bibliography 9.2
+ report.change_catalog :publications_catalog, :publications_catalog_1, {:title => "Helma (2017), Nano-Lazar: Read across Predictions for Nanoparticle Toxicities with Calculated and Measured Properties", :url => "https://dx.doi.org/10.3389%2Ffphar.2017.00377"}
+ report.change_catalog :publications_catalog, :publications_catalog_2, {:title => "Lo Piparo (2014), Automated and reproducible read-across like models for predicting carcinogenic potency", :url => "https://doi.org/10.1016/j.yrtph.2014.07.010"}
+ report.change_catalog :publications_catalog, :publications_catalog_3, {:title => "Helma (2006), Lazy structure-activity relationships (lazar) for the prediction of rodent carcinogenicity and Salmonella mutagenicity.", :url => "http://dx.doi.org/10.1007/s11030-005-9001-5"}
+ report.change_catalog :publications_catalog, :publications_catalog_4, {:title => "Bender et al. (2004), Molecular similarity searching using atom environments, information-based feature selection, and a nave bayesian classifier.", :url => "https://doi.org/10.1021/ci034207y"}
+
report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_1
report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_2
- report.change_catalog :publications_catalog, :publications_catalog_3, {:title => "Helma (2006), Lazy structure-activity relationships (lazar) for the prediction of rodent carcinogenicity and Salmonella mutagenicity.", :url => "http://dx.doi.org/10.1007/s11030-005-9001-5"}
report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_3
+ report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_4
+
# output
t = Tempfile.new
t << report.to_xml
- send_file t.path, :filename => "QMRF_report_#{model.name}.xml", :type => "application/xml", :disposition => "attachment"
+ name = prediction_model.species.sub(/\s/,"-")+"-"+prediction_model.endpoint.downcase.sub(/\s/,"-")
+ send_file t.path, :filename => "QMRF_report_#{name.gsub!(/[^0-9A-Za-z]/, '_')}.xml", :type => "application/xml", :disposition => "attachment"
end
get '/license' do
diff --git a/views/batch.haml b/views/batch.haml
index 38c8c6e..0e7efc7 100644
--- a/views/batch.haml
+++ b/views/batch.haml
@@ -55,19 +55,16 @@
%b Measured activity:
%br
- if prediction[:measurements].is_a?(Array)
- = (type == "Regression") ? prediction[:measurements].collect{|value| $logger.debug value ; "#{value.delog10.signif(3)} (#{unit})</br>#{compound.mmol_to_mg(value.delog10.signif(3))} #{unit =~ /mmol\/L/ ? "(mg/L)" : "(mg/kg_bw/day)"}"}.join("</br>") : prediction[:measurements].join(", ")
+ = (type == "Regression") ? prediction[:measurements].collect{|value| "#{value.delog10.signif(3)} (#{unit})</br>#{compound.mmol_to_mg(value.delog10).signif(3)} #{unit =~ /mmol\/L/ ? "(mg/L)" : "(mg/kg_bw/day)"}"}.join("</br>") : prediction[:measurements].join(", ")
- else
- - $logger.debug prediction[:measurements]
- - $logger.debug prediction[:measurements].delog10
- - $logger.debug prediction[:measurements].delog10.signif(3)
- = (type == "Regression") ? "#{prediction[:measurements].delog10.signif(3)} (#{unit})</br>#{compound.mmol_to_mg(prediction[:measurements].delog10.signif(3))} #{(unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : prediction[:measurements]
+ = (type == "Regression") ? "#{prediction[:measurements].delog10.signif(3)} (#{unit})</br>#{compound.mmol_to_mg(prediction[:measurements].delog10).signif(3)} #{(unit =~ /\b(mmol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : prediction[:measurements]
/ show prediction
%p
%b Prediction:
%br
- = (type == "Regression") ? "#{prediction[:value].delog10.signif(3)} (#{unit})</br>#{compound.mmol_to_mg(prediction[:value].delog10.signif(3))} #{(unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : prediction[:value]
+ = (type == "Regression") ? "#{prediction[:value].delog10.signif(3)} (#{unit})</br>#{compound.mmol_to_mg(prediction[:value].delog10).signif(3)} #{(unit =~ /\b(mmol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : prediction[:value]
/ show prediction interval or probability
%p
@@ -77,7 +74,7 @@
%br
= interval.nil? ? "" : "#{interval[1].delog10.signif(3)} - #{interval[0].delog10.signif(3)} (#{unit})"
%br
- = "#{compound.mmol_to_mg(interval[1].delog10.signif(3))} - #{compound.mmol_to_mg(interval[0].delog10.signif(3))} #{(unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" if !prediction[:prediction_interval].nil?
+ = "#{compound.mmol_to_mg(interval[1].delog10).signif(3)} - #{compound.mmol_to_mg(interval[0].delog10).signif(3)} #{(unit =~ /\b(mmol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" if !prediction[:prediction_interval].nil?
- else
%b Probability:
- unless prediction[:probabilities].nil?
diff --git a/views/neighbors.haml b/views/neighbors.haml
index 7c814dc..dd22e71 100644
--- a/views/neighbors.haml
+++ b/views/neighbors.haml
@@ -68,7 +68,7 @@
= (type == "Regression") ? neighbor[:measurement].collect{|value| "#{value.delog10.signif(3)} (#{unit})</br>#{c.mmol_to_mg(value.delog10).signif(3)} #{unit =~ /mmol\/L/ ? "(mg/L)" : "(mg/kg_bw/day)"}"}.join("</br>") : neighbor[:measurement].join(", ")
- else
- if !neighbor[:measurement].nil?
- = (type == "Regression") ? "#{neighbor[:measurement].delog10.signif(3)} (#{unit})</br>#{c.mmol_to_mg(neighbor[:measurement].delog10).signif(3)} #{(unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : neighbor[:measurement]
+ = (type == "Regression") ? "#{neighbor[:measurement].delog10.signif(3)} (#{unit})</br>#{c.mmol_to_mg(neighbor[:measurement].delog10).signif(3)} #{(unit =~ /\b(mmol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : neighbor[:measurement]
/ Similarity = tanimoto
%td{:style =>"vertical-align:middle;padding-left:1em;width:20%;"}
= neighbor[:similarity].round(3)
diff --git a/views/prediction.haml b/views/prediction.haml
index a657dba..9cdedde 100644
--- a/views/prediction.haml
+++ b/views/prediction.haml
@@ -23,7 +23,6 @@
= "#{@models[i].endpoint.gsub('_', ' ')} (#{@models[i].species})"
/ check for prediction
- /- if prediction[:neighbors].size > 0
- if prediction[:neighbors] and !prediction[:value].nil?
%p
/ show model type (classification|regression)
@@ -43,7 +42,7 @@
- if prediction[:measurements].is_a?(Array)
= (type == "Regression") ? prediction[:measurements].collect{|value| "#{value.delog10.signif(3)} (#{unit})</br>#{@compound.mmol_to_mg(value.delog10).signif(3)} #{unit =~ /mmol\/L/ ? "(mg/L)" : "(mg/kg_bw/day)"}"}.join("</br>") : prediction[:measurements].join(", ")
- else
- = (type == "Regression") ? "#{prediction[:measurements].delog10.signif(3)} (#{unit})</br>#{@compound.mmol_to_mg(prediction[:measurements].delog10).signif(3)} #{(unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : prediction[:measurements]
+ = (type == "Regression") ? "#{prediction[:measurements].delog10.signif(3)} (#{unit})</br>#{@compound.mmol_to_mg(prediction[:measurements].delog10).signif(3)} #{(unit =~ /\b(mmol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : prediction[:measurements]
- else
- @dbhit[i] = false
@@ -54,7 +53,7 @@
/ prediction popover
%a.btn.glyphicon.glyphicon-info-sign{:href=>"javascript:void(0)", :title=>"Prediction", :tabindex=>"0", data: {trigger:"focus", toggle:"popover", placement:"left", html:"true", content:"<p>lazar searches the training dataset for similar compounds (neighbors) and calculates the prediction from their experimental activities.<p><b>Classification:</b></br>Majority vote of neighbor activities weighted by similarity.<p><b>Regression:</b></br>Prediction from a local partial least squares regression model with neighbor activities weighted by similarity.<p><a href=\"http://www.frontiersin.org/Journal/10.3389/fphar.2013.00038/abstract\", target=\"_blank\"> Original publication</a>.<hr></hr><a href=\"https://doi.org/10.3389/fphar.2013.00038\", target=\"_blank\"><img src=\"https://zenodo.org/badge/DOI/10.3389/zenodo.10.3389.svg\" alt=\"DOI\"></a>"}}
%br
- = (type == "Regression") ? "#{prediction[:value].delog10.signif(3)} (#{unit})</br>#{@compound.mmol_to_mg(prediction[:value].delog10).signif(3)} #{(unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : prediction[:value]
+ = (type == "Regression") ? "#{prediction[:value].delog10.signif(3)} (#{unit})</br>#{@compound.mmol_to_mg(prediction[:value].delog10).signif(3)} #{(unit =~ /\b(mmol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : prediction[:value]
/ show prediction interval or probability
%p
@@ -66,7 +65,7 @@
%br
= interval.nil? ? "--" : "#{interval[1].delog10.signif(3)} - #{interval[0].delog10.signif(3)} (#{unit})"
%br
- = "#{@compound.mmol_to_mg(interval[1].delog10).signif(3)} - #{@compound.mmol_to_mg(interval[0].delog10).signif(3)} #{(unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" if !prediction[:prediction_interval].nil?
+ = "#{@compound.mmol_to_mg(interval[1].delog10).signif(3)} - #{@compound.mmol_to_mg(interval[0].delog10).signif(3)} #{(unit =~ /\b(mmol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" if !interval.nil?
- else
%b Probability:
/ probability popover
@@ -83,25 +82,24 @@
- if !prediction[:info].blank?
%b Info:
%br
- %p=prediction[:info].sub(/excluded/, "excluded<br>")
+ %p=prediction[:info].sub(/\'.*\'/,"").sub(/,/, ",<br>")
- if !prediction[:warnings].blank?
%b Warnings:
- prediction[:warnings].uniq.each do |warning|
%br
- %p=warning
- %p=warning.sub(/substances/, "substances<br>").sub(/prediction\:/, "prediction\:<br>")
+ %p=warning.sub(/,/, ",<br>")
+ /%p=warning.sub(/substances/, "substances<br>").sub(/prediction\:/, "prediction\:<br>")
- else
%br
- - @dbhit[i] = false
- if !prediction[:info].blank?
%b Info:
%br
- %p=prediction[:info].sub(/excluded/, "excluded<br>")
+ %p=prediction[:info].sub(/\'.*\'/,"").sub(/,/, ",<br>")
- if !prediction[:warnings].blank?
%b Warnings:
- prediction[:warnings].uniq.each do |warning|
%br
- %p=warning.sub(/substances/, "substances<br>").sub(/prediction\:/, "prediction\:<br>")
+ %p=warning.sub(/,/, ",<br>")
/ always show the neighbors table, message is given there
= haml :neighbors, :layout => false, :model_type => @model_types, :dbhit => @dbhit