From 6f527daf4875ce2ed864e8a6f4f30e44b4370561 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 13 Apr 2017 19:18:13 +0200
Subject: figure legends improved

---
 figures/crossvalidation0.pdf               | Bin 12908 -> 12852 bytes
 figures/crossvalidation1.pdf               | Bin 12931 -> 12878 bytes
 figures/crossvalidation2.pdf               | Bin 12862 -> 12844 bytes
 figures/prediction-test-correlation.pdf    | Bin 6316 -> 6303 bytes
 figures/test-prediction.pdf                | Bin 9892 -> 9892 bytes
 loael.Rmd                                  |  32 ++++++++++++++---------------
 loael.md                                   |  32 ++++++++++++++---------------
 loael.pdf                                  | Bin 348721 -> 348755 bytes
 scripts/crossvalidation-plots.R            |   2 +-
 scripts/prediction-test-correlation-plot.R |   2 +-
 10 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/figures/crossvalidation0.pdf b/figures/crossvalidation0.pdf
index 7ed5aa7..6c98522 100644
Binary files a/figures/crossvalidation0.pdf and b/figures/crossvalidation0.pdf differ
diff --git a/figures/crossvalidation1.pdf b/figures/crossvalidation1.pdf
index a3ee34c..0a27b9c 100644
Binary files a/figures/crossvalidation1.pdf and b/figures/crossvalidation1.pdf differ
diff --git a/figures/crossvalidation2.pdf b/figures/crossvalidation2.pdf
index f288110..560ff03 100644
Binary files a/figures/crossvalidation2.pdf and b/figures/crossvalidation2.pdf differ
diff --git a/figures/prediction-test-correlation.pdf b/figures/prediction-test-correlation.pdf
index 7ca7447..3566d46 100644
Binary files a/figures/prediction-test-correlation.pdf and b/figures/prediction-test-correlation.pdf differ
diff --git a/figures/test-prediction.pdf b/figures/test-prediction.pdf
index 5e0e2ff..62a70c8 100644
Binary files a/figures/test-prediction.pdf and b/figures/test-prediction.pdf differ
diff --git a/loael.Rmd b/loael.Rmd
index e75feab..dda3e38 100644
--- a/loael.Rmd
+++ b/loael.Rmd
@@ -233,7 +233,7 @@ for the prediction of quantitative
 properties.  First all uninformative fingerprints (i.e. features with identical
 values across all neighbors) are removed.  The remaining set of features is
 used as descriptors for creating a local weighted RF model with atom
-environments as descriptors and model similarities as weights. The `rf` method
+environments as descriptors and model similarities as weights. The RF method
 from the `caret` R package [@Kuhn08] is used for this purpose.  Models are
 trained with the default `caret` settings, optimizing the number of RF
 components by bootstrap resampling.
@@ -251,9 +251,9 @@ weighted by its similarity to the query compound. In this case the prediction is
 
 ### Applicability domain
 
-The applicability domain of lazar models is determined by the structural
+The applicability domain (AD) of lazar models is determined by the structural
 diversity of the training data. If no similar compounds are found in the
-training data no predictions will be generated. Warnings are issued if the similarity threshold has to be lowered from 0.5 to 0.2 in order to enable predictions and if lazar has to resort to weighted average predictions, because local random forests fail.
+training data no predictions will be generated. Warnings are issued if the similarity threshold has to be lowered from 0.5 to 0.2 in order to enable predictions and if lazar has to resort to weighted average predictions, because local random forests fail. Thus predictions without warnings can be considered as close to the applicability domain and predictions with warnings as more distant from the applicability domain. Quantitative applicability domain information can be obtained from the similarities of individual neighbors.
 
 Local regression models consider neighbor similarities to the query compound,
 by weighting the contribution of each neighbor is by its similarity.
@@ -407,8 +407,6 @@ The combined test set has a mean standard deviation of `r round(mean(10^(-1*c.du
 
 [@fig:comp] shows the experimental LOAEL variability of compounds occurring in both datasets (i.e. the *test* dataset) colored in red (experimental). This is the baseline reference for the comparison with predicted values.
 
-##### LOAEL correlation between datasets
-
 ```{r echo=F}
 data <- read.csv("data/median-correlation.csv",header=T)
 cor <- cor.test(data$mazzatorta,data$swiss)
@@ -463,7 +461,7 @@ Experimental data and 95\% prediction intervals did not overlap in `r incorrect_
 
 [@fig:comp] shows a comparison of predicted with experimental values:
 
-![Comparison of experimental with predicted LOAEL values. Each vertical line represents a compound, dots are individual measurements (red), predictions (green) or prdictions with warnings (blue).](figures/test-prediction.pdf){#fig:comp}
+![Comparison of experimental with predicted LOAEL values. Each vertical line represents a compound, dots are individual measurements (blue), predictions (green) or predictions far from the applicability domain, i.e. with warnings (red).](figures/test-prediction.pdf){#fig:comp}
 
 Correlation analysis was performed between individual predictions and the
 median of experimental data.  All correlations are statistically highly
@@ -474,13 +472,13 @@ multiple measurements into a single median value hides experimental variability.
 Comparison    | $r^2$                     | RMSE    |  Nr. predicted
 --------------|---------------------------|---------|---------------
 Mazzatorta vs. Swiss dataset | `r median.r.square`      | `r median.rmse`           
-Predictions without warnings vs. test median             | `r nowarnings.r_square` | `r nowarnings.rmse` | `r length(nowarnings$LOAEL_predicted)`/`r  length(unique(t$SMILES))`
-Predictions with warnings vs. test median             | `r warnings.r_square` | `r warnings.rmse`  | `r length(warnings$LOAEL_predicted)`/`r  length(unique(t$SMILES))`
+AD close predictions vs. test median             | `r nowarnings.r_square` | `r nowarnings.rmse` | `r length(nowarnings$LOAEL_predicted)`/`r  length(unique(t$SMILES))`
+AD distant predictions vs. test median             | `r warnings.r_square` | `r warnings.rmse`  | `r length(warnings$LOAEL_predicted)`/`r  length(unique(t$SMILES))`
 All predictions vs. test median             | `r training.r_square` | `r training.rmse`  | `r length(training$LOAEL_predicted)`/`r  length(unique(t$SMILES))`
 
 : Comparison of model predictions with experimental variability. {#tbl:common-pred}
 
-![Correlation of experimental with predicted LOAEL values (test set)](figures/prediction-test-correlation.pdf){#fig:corr}
+![Correlation of experimental with predicted LOAEL values (test set). Green dots indicate predictions close to the applicability domain (i.e. without warnings), red dots indicate predictions far from the applicability domain (i.e. with warnings).](figures/prediction-test-correlation.pdf){#fig:corr}
 
 ```{r echo=F}
 t0all = read.csv("data/training_log10-cv-0.csv",header=T)
@@ -520,16 +518,16 @@ All correlations of predicted with experimental values are statistically highly
 
 Predictions  | $r^2$ | RMSE | Nr. predicted
 --|-------|------|----------------
-No warnings | `r round(cv.t0nowarnings.r_square,2)`  | `r round(cv.t0nowarnings.rmse,2)` | `r length(unique(t0nowarnings$SMILES))`/`r length(unique(c$SMILES))`
-Warnings | `r round(cv.t0warnings.r_square,2)`  | `r round(cv.t0warnings.rmse,2)` | `r length(unique(t0warnings$SMILES))`/`r length(unique(c$SMILES))`
+AD close | `r round(cv.t0nowarnings.r_square,2)`  | `r round(cv.t0nowarnings.rmse,2)` | `r length(unique(t0nowarnings$SMILES))`/`r length(unique(c$SMILES))`
+AD distant | `r round(cv.t0warnings.r_square,2)`  | `r round(cv.t0warnings.rmse,2)` | `r length(unique(t0warnings$SMILES))`/`r length(unique(c$SMILES))`
 All | `r round(cv.t0all.r_square,2)`  | `r round(cv.t0all.rmse,2)` | `r length(unique(t0all$SMILES))`/`r length(unique(c$SMILES))`
   |  |  |
-No warnings | `r round(cv.t1nowarnings.r_square,2)`  | `r round(cv.t1nowarnings.rmse,2)` | `r length(unique(t1nowarnings$SMILES))`/`r length(unique(c$SMILES))`
-Warnings | `r round(cv.t1warnings.r_square,2)`  | `r round(cv.t1warnings.rmse,2)` | `r length(unique(t1warnings$SMILES))`/`r length(unique(c$SMILES))`
+AD close | `r round(cv.t1nowarnings.r_square,2)`  | `r round(cv.t1nowarnings.rmse,2)` | `r length(unique(t1nowarnings$SMILES))`/`r length(unique(c$SMILES))`
+AD distant | `r round(cv.t1warnings.r_square,2)`  | `r round(cv.t1warnings.rmse,2)` | `r length(unique(t1warnings$SMILES))`/`r length(unique(c$SMILES))`
 All | `r round(cv.t1all.r_square,2)`  | `r round(cv.t1all.rmse,2)` | `r length(unique(t1all$SMILES))`/`r length(unique(c$SMILES))`
   |  |  |
-No warnings | `r round(cv.t2nowarnings.r_square,2)`  | `r round(cv.t2nowarnings.rmse,2)` | `r length(unique(t2nowarnings$SMILES))`/`r length(unique(c$SMILES))`
-Warnings | `r round(cv.t2warnings.r_square,2)`  | `r round(cv.t2warnings.rmse,2)` | `r length(unique(t2warnings$SMILES))`/`r length(unique(c$SMILES))`
+AD close | `r round(cv.t2nowarnings.r_square,2)`  | `r round(cv.t2nowarnings.rmse,2)` | `r length(unique(t2nowarnings$SMILES))`/`r length(unique(c$SMILES))`
+AD distant | `r round(cv.t2warnings.r_square,2)`  | `r round(cv.t2warnings.rmse,2)` | `r length(unique(t2warnings$SMILES))`/`r length(unique(c$SMILES))`
 All | `r round(cv.t2all.r_square,2)`  | `r round(cv.t2all.rmse,2)` | `r length(unique(t2all$SMILES))`/`r length(unique(c$SMILES))`
 
 : Results from 3 independent 10-fold crossvalidations {#tbl:cv}
@@ -545,7 +543,7 @@ All | `r round(cv.t2all.r_square,2)`  | `r round(cv.t2all.rmse,2)` | `r length(u
 
 ![](figures/crossvalidation2.pdf){#fig:cv2 height=30%}
 
-Correlation of predicted vs. measured values for five independent crossvalidations with *MP2D* fingerprint descriptors and local *random forest* models
+Correlation of predicted vs. measured values for three independent crossvalidations with *MP2D* fingerprint descriptors and local *random forest* models
 </div>
 
 Discussion
@@ -594,6 +592,8 @@ Finally there is a substantial number of compounds
 where no predictions can be made, because there are no similar compounds in the training data. These compounds clearly fall beyond the applicability domain of the training dataset 
  and in such cases it is preferable to avoid predictions instead of random guessing.
 
+Elena: Should we add a GUI screenshot?
+
 <!--
 is covered in
 prediction interval shows that `lazar` read across predictions fit well into
diff --git a/loael.md b/loael.md
index 2548306..7202dbc 100644
--- a/loael.md
+++ b/loael.md
@@ -225,7 +225,7 @@ for the prediction of quantitative
 properties.  First all uninformative fingerprints (i.e. features with identical
 values across all neighbors) are removed.  The remaining set of features is
 used as descriptors for creating a local weighted RF model with atom
-environments as descriptors and model similarities as weights. The `rf` method
+environments as descriptors and model similarities as weights. The RF method
 from the `caret` R package [@Kuhn08] is used for this purpose.  Models are
 trained with the default `caret` settings, optimizing the number of RF
 components by bootstrap resampling.
@@ -243,9 +243,9 @@ weighted by its similarity to the query compound. In this case the prediction is
 
 ### Applicability domain
 
-The applicability domain of lazar models is determined by the structural
+The applicability domain (AD) of lazar models is determined by the structural
 diversity of the training data. If no similar compounds are found in the
-training data no predictions will be generated. Warnings are issued if the similarity threshold has to be lowered from 0.5 to 0.2 in order to enable predictions and if lazar has to resort to weighted average predictions, because local random forests fail.
+training data no predictions will be generated. Warnings are issued if the similarity threshold has to be lowered from 0.5 to 0.2 in order to enable predictions and if lazar has to resort to weighted average predictions, because local random forests fail. Thus predictions without warnings can be considered as close to the applicability domain and predictions with warnings as more distant from the applicability domain. Quantitative applicability domain information can be obtained from the similarities of individual neighbors.
 
 Local regression models consider neighbor similarities to the query compound,
 by weighting the contribution of each neighbor is by its similarity.
@@ -378,8 +378,6 @@ The combined test set has a mean standard deviation of 0.55 mmol/kg_bw/day (0.33
 
 [@fig:comp] shows the experimental LOAEL variability of compounds occurring in both datasets (i.e. the *test* dataset) colored in red (experimental). This is the baseline reference for the comparison with predicted values.
 
-##### LOAEL correlation between datasets
-
 
 
 [@fig:datacorr] depicts the correlation between LOAEL values from both datasets. As
@@ -415,7 +413,7 @@ Experimental data and 95\% prediction intervals did not overlap in 0 cases
 
 [@fig:comp] shows a comparison of predicted with experimental values:
 
-![Comparison of experimental with predicted LOAEL values. Each vertical line represents a compound, dots are individual measurements (red), predictions (green) or prdictions with warnings (blue).](figures/test-prediction.pdf){#fig:comp}
+![Comparison of experimental with predicted LOAEL values. Each vertical line represents a compound, dots are individual measurements (blue), predictions (green) or predictions far from the applicability domain, i.e. with warnings (red).](figures/test-prediction.pdf){#fig:comp}
 
 Correlation analysis was performed between individual predictions and the
 median of experimental data.  All correlations are statistically highly
@@ -426,13 +424,13 @@ multiple measurements into a single median value hides experimental variability.
 Comparison    | $r^2$                     | RMSE    |  Nr. predicted
 --------------|---------------------------|---------|---------------
 Mazzatorta vs. Swiss dataset | 0.52      | 0.59           
-Predictions without warnings vs. test median             | 0.48 | 0.56 | 34/155
-Predictions with warnings vs. test median             | 0.38 | 0.68  | 84/155
+AD close predictions vs. test median             | 0.48 | 0.56 | 34/155
+AD distant predictions vs. test median             | 0.38 | 0.68  | 84/155
 All predictions vs. test median             | 0.4 | 0.65  | 118/155
 
 : Comparison of model predictions with experimental variability. {#tbl:common-pred}
 
-![Correlation of experimental with predicted LOAEL values (test set)](figures/prediction-test-correlation.pdf){#fig:corr}
+![Correlation of experimental with predicted LOAEL values (test set). Green dots indicate predictions close to the applicability domain (i.e. without warnings), red dots indicate predictions far from the applicability domain (i.e. with warnings).](figures/prediction-test-correlation.pdf){#fig:corr}
 
 
 
@@ -442,16 +440,16 @@ All correlations of predicted with experimental values are statistically highly
 
 Predictions  | $r^2$ | RMSE | Nr. predicted
 --|-------|------|----------------
-No warnings | 0.61  | 0.58 | 102/671
-Warnings | 0.45  | 0.78 | 374/671
+AD close | 0.61  | 0.58 | 102/671
+AD distant | 0.45  | 0.78 | 374/671
 All | 0.47  | 0.74 | 476/671
   |  |  |
-No warnings | 0.59  | 0.6 | 101/671
-Warnings | 0.45  | 0.77 | 376/671
+AD close | 0.59  | 0.6 | 101/671
+AD distant | 0.45  | 0.77 | 376/671
 All | 0.47  | 0.74 | 477/671
   |  |  |
-No warnings | 0.59  | 0.57 | 93/671
-Warnings | 0.43  | 0.81 | 384/671
+AD close | 0.59  | 0.57 | 93/671
+AD distant | 0.43  | 0.81 | 384/671
 All | 0.45  | 0.77 | 477/671
 
 : Results from 3 independent 10-fold crossvalidations {#tbl:cv}
@@ -467,7 +465,7 @@ All | 0.45  | 0.77 | 477/671
 
 ![](figures/crossvalidation2.pdf){#fig:cv2 height=30%}
 
-Correlation of predicted vs. measured values for five independent crossvalidations with *MP2D* fingerprint descriptors and local *random forest* models
+Correlation of predicted vs. measured values for three independent crossvalidations with *MP2D* fingerprint descriptors and local *random forest* models
 </div>
 
 Discussion
@@ -516,6 +514,8 @@ Finally there is a substantial number of compounds
 where no predictions can be made, because there are no similar compounds in the training data. These compounds clearly fall beyond the applicability domain of the training dataset 
  and in such cases it is preferable to avoid predictions instead of random guessing.
 
+Elena: Should we add a GUI screenshot?
+
 <!--
 is covered in
 prediction interval shows that `lazar` read across predictions fit well into
diff --git a/loael.pdf b/loael.pdf
index 53112a0..ebf09fd 100644
Binary files a/loael.pdf and b/loael.pdf differ
diff --git a/scripts/crossvalidation-plots.R b/scripts/crossvalidation-plots.R
index 8a4f76e..de713f1 100755
--- a/scripts/crossvalidation-plots.R
+++ b/scripts/crossvalidation-plots.R
@@ -4,5 +4,5 @@ library(ggplot2)
 
 nr = commandArgs(TRUE)[1]
 data = read.csv(paste("data/training_log10-cv-",nr,".csv",sep=""))
-img = qplot(LOAEL_predicted,LOAEL_measured_median,data=data,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)",colour=Warnings) + geom_point() + geom_abline(intercept=0.0)  + xlim(-2,4.5) + ylim(-2,4.5) + scale_color_manual(values=c("#00BFC4", "#F8766D"))
+img = qplot(LOAEL_predicted,LOAEL_measured_median,data=data,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)",colour=Warnings) + geom_point() + geom_abline(intercept=0.0)  + xlim(-2,4.5) + ylim(-2,4.5) + scale_color_manual(name  = "Applicability domain",values=c("#00BFC4", "#F8766D"), breaks=c(TRUE,FALSE), labels=c("distant","close"))
 ggsave(file=paste('figures/crossvalidation',nr,'.pdf',sep=""), plot=img,width=12, height=8)
diff --git a/scripts/prediction-test-correlation-plot.R b/scripts/prediction-test-correlation-plot.R
index 648e864..eaa72c9 100755
--- a/scripts/prediction-test-correlation-plot.R
+++ b/scripts/prediction-test-correlation-plot.R
@@ -4,6 +4,6 @@ library(ggplot2)
 
 training = read.csv("data/training-test-predictions.csv",header=T)
 
-img = qplot(LOAEL_predicted,LOAEL_measured_median,data=training,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)", colour = Warnings) + geom_point() + geom_abline(intercept=0.0)  + xlim(-1,4) + ylim(-1,4) + scale_color_manual(values=c("#00BFC4", "#F8766D"))
+img = qplot(LOAEL_predicted,LOAEL_measured_median,data=training,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)", colour = Warnings) + geom_point() + geom_abline(intercept=0.0)  + xlim(-1,4) + ylim(-1,4) + scale_color_manual(name  = "Applicability domain",values=c("#00BFC4", "#F8766D"), breaks=c(TRUE,FALSE), labels=c("distant","close")) 
 
 ggsave(file='figures/prediction-test-correlation.pdf', plot=img,width=12, height=8)
-- 
cgit v1.2.3