summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2020-10-19 23:54:19 +0200
committerChristoph Helma <helma@in-silico.ch>2020-10-19 23:54:19 +0200
commit2c3bc133700f7e1e1ea8d038d87da1f3095ed103 (patch)
tree3e6ee6cf30cfd00cf8af3103851ad464bcb9ca7e
parent6b05cf7e4f5c0489325b496cbeeb332ec321c5a7 (diff)
PA prediction summary
-rw-r--r--10-fold-crossvalidations/summary.yaml375
-rw-r--r--Makefile23
-rw-r--r--figures/roc.pngbin134226 -> 146544 bytes
-rw-r--r--mutagenicity.md16
-rw-r--r--pyrrolizidine-alkaloids/summary.yaml83
-rwxr-xr-xscripts/confusion-matrix-summary.rb2
-rwxr-xr-xscripts/summary2roc.rb2
-rwxr-xr-xscripts/summary2table.rb2
8 files changed, 301 insertions, 202 deletions
diff --git a/10-fold-crossvalidations/summary.yaml b/10-fold-crossvalidations/summary.yaml
index c05db63..08c0b40 100644
--- a/10-fold-crossvalidations/summary.yaml
+++ b/10-fold-crossvalidations/summary.yaml
@@ -1,188 +1,189 @@
---
-lazar-all:
- :tp: 3326
- :fp: 833
- :tn: 3039
- :fn: 583
- :n: 7781
- :acc: 0.82
- :tpr: 0.85
- :fpr: 0.22
- :tnr: 0.78
- :ppv: 0.8
- :npv: 0.84
- :acc_perc: 82
- :tpr_perc: 85
- :tnr_perc: 78
- :ppv_perc: 80
- :npv_perc: 84
-lazar-high-confidence:
- :tp: 2816
- :fp: 571
- :tn: 2138
- :fn: 365
- :n: 5890
- :acc: 0.84
- :tpr: 0.89
- :fpr: 0.21
- :tnr: 0.79
- :ppv: 0.83
- :npv: 0.85
- :acc_perc: 84
- :tpr_perc: 89
- :tnr_perc: 79
- :ppv_perc: 83
- :npv_perc: 85
-lazar-padel-all:
- :tp: 593
- :fp: 466
- :tn: 1777
- :fn: 1253
- :n: 4089
- :acc: 0.58
- :tpr: 0.32
- :fpr: 0.21
- :tnr: 0.79
- :ppv: 0.56
- :npv: 0.59
- :acc_perc: 58
- :tpr_perc: 32
- :tnr_perc: 79
- :ppv_perc: 56
- :npv_perc: 59
-lazar-padel-high-confidence:
- :tp: 593
- :fp: 466
- :tn: 1771
- :fn: 1251
- :n: 4081
- :acc: 0.58
- :tpr: 0.32
- :fpr: 0.21
- :tnr: 0.79
- :ppv: 0.56
- :npv: 0.59
- :acc_perc: 58
- :tpr_perc: 32
- :tnr_perc: 79
- :ppv_perc: 56
- :npv_perc: 59
-R-RF:
- :tp: 2259
- :fp: 1173
- :tn: 2897
- :fn: 1741
- :n: 8070
- :acc: 0.64
- :tpr: 0.56
- :fpr: 0.29
- :tnr: 0.71
- :ppv: 0.66
- :npv: 0.62
- :acc_perc: 64
- :tpr_perc: 56
- :tnr_perc: 71
- :ppv_perc: 66
- :npv_perc: 62
-R-SVM:
- :tp: 2243
- :fp: 1353
- :tn: 2717
- :fn: 1757
- :n: 8070
- :acc: 0.61
- :tpr: 0.56
- :fpr: 0.33
- :tnr: 0.67
- :ppv: 0.62
- :npv: 0.61
- :acc_perc: 61
- :tpr_perc: 56
- :tnr_perc: 67
- :ppv_perc: 62
- :npv_perc: 61
-R-DL:
- :tp: 3517
- :fp: 3099
- :tn: 971
- :fn: 483
- :n: 8070
- :acc: 0.56
- :tpr: 0.88
- :fpr: 0.76
- :tnr: 0.24
- :ppv: 0.53
- :npv: 0.67
- :acc_perc: 56
- :tpr_perc: 88
- :tnr_perc: 24
- :ppv_perc: 53
- :npv_perc: 67
-tensorflow-rf.v3:
- :tp: 2362
- :fp: 1243
- :tn: 2835
- :fn: 1640
- :n: 8080
- :acc: 0.64
- :tpr: 0.59
- :fpr: 0.3
- :tnr: 0.7
- :ppv: 0.66
- :npv: 0.63
- :acc_perc: 64
- :tpr_perc: 59
- :tnr_perc: 70
- :ppv_perc: 66
- :npv_perc: 63
-tensorflow-lr.v3:
- :tp: 2395
- :fp: 1427
- :tn: 2651
- :fn: 1607
- :n: 8080
- :acc: 0.62
- :tpr: 0.6
- :fpr: 0.35
- :tnr: 0.65
- :ppv: 0.63
- :npv: 0.62
- :acc_perc: 62
- :tpr_perc: 60
- :tnr_perc: 65
- :ppv_perc: 63
- :npv_perc: 62
-tensorflow-lr2.v3:
- :tp: 2487
- :fp: 1497
- :tn: 2581
- :fn: 1515
- :n: 8080
- :acc: 0.63
- :tpr: 0.62
- :fpr: 0.37
- :tnr: 0.63
- :ppv: 0.62
- :npv: 0.63
- :acc_perc: 63
- :tpr_perc: 62
- :tnr_perc: 63
- :ppv_perc: 62
- :npv_perc: 63
-tensorflow-nn.v3:
- :tp: 2452
- :fp: 1468
- :tn: 2610
- :fn: 1550
- :n: 8080
- :acc: 0.63
- :tpr: 0.61
- :fpr: 0.36
- :tnr: 0.64
- :ppv: 0.63
- :npv: 0.63
- :acc_perc: 63
- :tpr_perc: 61
- :tnr_perc: 64
- :ppv_perc: 63
- :npv_perc: 63
+:cv:
+ lazar-all:
+ :tp: 3326
+ :fp: 833
+ :tn: 3039
+ :fn: 583
+ :n: 7781
+ :acc: 0.82
+ :tpr: 0.85
+ :fpr: 0.22
+ :tnr: 0.78
+ :ppv: 0.8
+ :npv: 0.84
+ :acc_perc: 82
+ :tpr_perc: 85
+ :tnr_perc: 78
+ :ppv_perc: 80
+ :npv_perc: 84
+ lazar-high-confidence:
+ :tp: 2816
+ :fp: 571
+ :tn: 2138
+ :fn: 365
+ :n: 5890
+ :acc: 0.84
+ :tpr: 0.89
+ :fpr: 0.21
+ :tnr: 0.79
+ :ppv: 0.83
+ :npv: 0.85
+ :acc_perc: 84
+ :tpr_perc: 89
+ :tnr_perc: 79
+ :ppv_perc: 83
+ :npv_perc: 85
+ lazar-padel-all:
+ :tp: 593
+ :fp: 466
+ :tn: 1777
+ :fn: 1253
+ :n: 4089
+ :acc: 0.58
+ :tpr: 0.32
+ :fpr: 0.21
+ :tnr: 0.79
+ :ppv: 0.56
+ :npv: 0.59
+ :acc_perc: 58
+ :tpr_perc: 32
+ :tnr_perc: 79
+ :ppv_perc: 56
+ :npv_perc: 59
+ lazar-padel-high-confidence:
+ :tp: 593
+ :fp: 466
+ :tn: 1771
+ :fn: 1251
+ :n: 4081
+ :acc: 0.58
+ :tpr: 0.32
+ :fpr: 0.21
+ :tnr: 0.79
+ :ppv: 0.56
+ :npv: 0.59
+ :acc_perc: 58
+ :tpr_perc: 32
+ :tnr_perc: 79
+ :ppv_perc: 56
+ :npv_perc: 59
+ R-RF:
+ :tp: 2259
+ :fp: 1173
+ :tn: 2897
+ :fn: 1741
+ :n: 8070
+ :acc: 0.64
+ :tpr: 0.56
+ :fpr: 0.29
+ :tnr: 0.71
+ :ppv: 0.66
+ :npv: 0.62
+ :acc_perc: 64
+ :tpr_perc: 56
+ :tnr_perc: 71
+ :ppv_perc: 66
+ :npv_perc: 62
+ R-SVM:
+ :tp: 2243
+ :fp: 1353
+ :tn: 2717
+ :fn: 1757
+ :n: 8070
+ :acc: 0.61
+ :tpr: 0.56
+ :fpr: 0.33
+ :tnr: 0.67
+ :ppv: 0.62
+ :npv: 0.61
+ :acc_perc: 61
+ :tpr_perc: 56
+ :tnr_perc: 67
+ :ppv_perc: 62
+ :npv_perc: 61
+ R-DL:
+ :tp: 3517
+ :fp: 3099
+ :tn: 971
+ :fn: 483
+ :n: 8070
+ :acc: 0.56
+ :tpr: 0.88
+ :fpr: 0.76
+ :tnr: 0.24
+ :ppv: 0.53
+ :npv: 0.67
+ :acc_perc: 56
+ :tpr_perc: 88
+ :tnr_perc: 24
+ :ppv_perc: 53
+ :npv_perc: 67
+ tensorflow-rf.v3:
+ :tp: 2362
+ :fp: 1243
+ :tn: 2835
+ :fn: 1640
+ :n: 8080
+ :acc: 0.64
+ :tpr: 0.59
+ :fpr: 0.3
+ :tnr: 0.7
+ :ppv: 0.66
+ :npv: 0.63
+ :acc_perc: 64
+ :tpr_perc: 59
+ :tnr_perc: 70
+ :ppv_perc: 66
+ :npv_perc: 63
+ tensorflow-lr.v3:
+ :tp: 2395
+ :fp: 1427
+ :tn: 2651
+ :fn: 1607
+ :n: 8080
+ :acc: 0.62
+ :tpr: 0.6
+ :fpr: 0.35
+ :tnr: 0.65
+ :ppv: 0.63
+ :npv: 0.62
+ :acc_perc: 62
+ :tpr_perc: 60
+ :tnr_perc: 65
+ :ppv_perc: 63
+ :npv_perc: 62
+ tensorflow-lr2.v3:
+ :tp: 2487
+ :fp: 1497
+ :tn: 2581
+ :fn: 1515
+ :n: 8080
+ :acc: 0.63
+ :tpr: 0.62
+ :fpr: 0.37
+ :tnr: 0.63
+ :ppv: 0.62
+ :npv: 0.63
+ :acc_perc: 63
+ :tpr_perc: 62
+ :tnr_perc: 63
+ :ppv_perc: 62
+ :npv_perc: 63
+ tensorflow-nn.v3:
+ :tp: 2452
+ :fp: 1468
+ :tn: 2610
+ :fn: 1550
+ :n: 8080
+ :acc: 0.63
+ :tpr: 0.61
+ :fpr: 0.36
+ :tnr: 0.64
+ :ppv: 0.63
+ :npv: 0.63
+ :acc_perc: 63
+ :tpr_perc: 61
+ :tnr_perc: 64
+ :ppv_perc: 63
+ :npv_perc: 63
diff --git a/Makefile b/Makefile
index 5ceb4aa..fb5eb30 100644
--- a/Makefile
+++ b/Makefile
@@ -13,6 +13,7 @@ TEMPLATE_FILE_LATEX = pandoc-scholar.latex
# Experiments
# crossvalidations
+
LAZAR_CONFUSION_MATRIX_DIR = 10-fold-crossvalidations/lazar/crossvalidation/confusion_matrices
LAZAR_PADEL_CONFUSION_MATRIX_DIR = 10-fold-crossvalidations/lazar-padel/crossvalidation/confusion_matrices
R_CV_DIR = 10-fold-crossvalidations/R
@@ -22,8 +23,10 @@ CONFUSION_MATRICES_DIR = 10-fold-crossvalidations/confusion-matrices
CONFUSION_MATRICES = $(CONFUSION_MATRICES_DIR)/lazar-all.csv $(CONFUSION_MATRICES_DIR)/lazar-high-confidence.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-all.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-high-confidence.csv $(CONFUSION_MATRICES_DIR)/R-RF.csv $(CONFUSION_MATRICES_DIR)/R-SVM.csv $(CONFUSION_MATRICES_DIR)/R-DL.csv $(CONFUSION_MATRICES_DIR)/tensorflow-rf.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-lr.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-lr2.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-nn.v3.csv
CV_SUMMARY = 10-fold-crossvalidations/summary.yaml
+PA_SUMMARY = pyrrolizidine-alkaloids/summary.yaml
# PA predictions
+
PA_DIR = pyrrolizidine-alkaloids
PA_LAZAR_DIR = $(PA_DIR)/lazar
PA_R_DIR = $(PA_DIR)/R
@@ -32,13 +35,14 @@ PA_TF_DIR = $(PA_DIR)/tensorflow
PA_PREDICTIONS = $(PA_LAZAR_DIR)/pa-mp2d-predictions.csv $(PA_LAZAR_DIR)/pa-padel-predictions.csv $(PA_R_DIR)/PA.RF.outcome.csv $(PA_R_DIR)/PA.SVM.outcome.csv $(PA_R_DIR)/PA.DL.outcome.csv $(PA_TF_DIR)/pred.lr.v3-ext-Padel-2D.csv $(PA_TF_DIR)/pred.lr2.v3-ext-Padel-2D.csv $(PA_TF_DIR)/pred.rf.v3-ext-Padel-2D.csv $(PA_TF_DIR)/pred.nn.v3-ext-Padel-2D.csv
# manuscript
+
TABLES = tables/lazar-summary.csv tables/r-summary.csv tables/tensorflow-summary.csv tables/pa-tab.tex
FIGURES = figures/roc.png figures/tsne-mp2d.png figures/tsne-padel.png
-all: $(TABLES) $(FIGURES) mutagenicity.pdf
+all: $(TABLES) $(FIGURES) $(CV_SUMMARY) mutagenicity.pdf
include $(PANDOC_SCHOLAR_PATH)/Makefile
-mutagenicity.mustache.md: $(CV_SUMMARY) mutagenicity.md $(TABLES) $(FIGURES)
+mutagenicity.mustache.md: $(CV_SUMMARY) $(PA_SUMMARY) mutagenicity.md $(TABLES) $(FIGURES)
mustache $^ > $@
# figures
@@ -62,6 +66,7 @@ figures/roc.csv: $(CV_SUMMARY)
scripts/summary2roc.rb $< > $@
# tables
+
tables/pa-tab.tex: tables/pa-table.csv
scripts/pa-tex-table.rb $< > $@
@@ -77,10 +82,10 @@ tables/r-summary.csv: $(CV_SUMMARY)
tables/tensorflow-summary.csv: $(CV_SUMMARY)
scripts/summary2table.rb tensorflow > $@
-# crossvalidation summary
+# PA summary
-$(CV_SUMMARY): $(CONFUSION_MATRICES)
- scripts/confusion-matrix-summary.rb $^ > $@
+$(PA_SUMMARY): tables/pa-table.csv
+ scripts/pa-summary.rb $< > $@
# PA predictions
@@ -93,9 +98,15 @@ $(PA_LAZAR_DIR)/pa-mp2d-predictions.csv: $(PA_LAZAR_DIR)/pa-smiles.csv
$(PA_LAZAR_DIR)/pa-smiles.csv: pyrrolizidine-alkaloids/180920_PA_complete_SMILES.csv
cut -f1,4 -d ';' $< | sed 's/;/,/' > $@
+# crossvalidation summary
+
+$(CV_SUMMARY): $(CONFUSION_MATRICES)
+ scripts/confusion-matrix-summary.rb $^ > $@
+
# confusion matrices
## lazar
+
$(CONFUSION_MATRICES_DIR)/lazar-all.csv: $(LAZAR_CONFUSION_MATRIX_DIR)
cp $</all $@
@@ -109,6 +120,7 @@ $(CONFUSION_MATRICES_DIR)/lazar-padel-high-confidence.csv: $(LAZAR_PADEL_CONFUSI
cp $</high_confidence $@
## R
+
$(CONFUSION_MATRICES_DIR)/R-SVM.csv: $(R_CV_DIR)/Sgl-Observations-SVM.csv
scripts/cv-r-confusion-matrix.rb $< > $@
@@ -119,6 +131,7 @@ $(CONFUSION_MATRICES_DIR)/R-DL.csv: $(R_CV_DIR)/Sgl-Observations-DL.csv
scripts/cv-r-confusion-matrix.rb $< > $@
## tensorflow
+
$(TENSORFLOW_CV_DIR)/pred.lr.v3.norm.sorted.csv: $(TENSORFLOW_CV_DIR)/pred.lr.v3.norm.csv
sort -n $< > $@
diff --git a/figures/roc.png b/figures/roc.png
index 732299b..24a9dfb 100644
--- a/figures/roc.png
+++ b/figures/roc.png
Binary files differ
diff --git a/mutagenicity.md b/mutagenicity.md
index 9f7e349..c278142 100644
--- a/mutagenicity.md
+++ b/mutagenicity.md
@@ -42,7 +42,7 @@ Abstract
Random forest, support vector machine, logistic regression, neural networks and k-nearest neighbor
(`lazar`) algorithms, were applied to new *Salmonella* mutagenicity dataset
with 8309 unique chemical structures. The best prediction accuracies in
-10-fold-crossvalidation were obtained with `lazar` models and MolPrint2D descriptors, that gave accuracies ({{lazar-high-confidence.acc_perc}}%)
+10-fold-crossvalidation were obtained with `lazar` models and MolPrint2D descriptors, that gave accuracies ({{cv.lazar-high-confidence.acc_perc}}%)
similar to the interlaboratory variability of the Ames test.
**TODO**: PA results
@@ -497,13 +497,15 @@ Crossvalidation results are summarized in the following tables: @tbl:lazar shows
Confusion matrices for all models are available from the git repository http://git.in-silico.ch/mutagenicity-paper/10-fold-crossvalidations/confusion-matrices/, individual predictions can be found in
http://git.in-silico.ch/mutagenicity-paper/10-fold-crossvalidations/predictions/.
-The most accurate crossvalidation predictions have been obtained with standard `lazar` models using MolPrint2D descriptors ({{lazar-high-confidence.acc}} for predictions with high confidence, {{lazar-all.acc}} for all predictions). Models utilizing PaDEL descriptors have generally lower accuracies ranging from {{R-DL.acc}} (R deep learning) to {{R-RF.acc}} (R/Tensorflow random forests). Sensitivity and specificity is generally well balanced with the exception of `lazar`-PaDEL (low sensitivity) and R deep learning (low specificity) models.
+The most accurate crossvalidation predictions have been obtained with standard `lazar` models using MolPrint2D descriptors ({{cv.lazar-high-confidence.acc}} for predictions with high confidence, {{cv.lazar-all.acc}} for all predictions). Models utilizing PaDEL descriptors have generally lower accuracies ranging from {{cv.R-DL.acc}} (R deep learning) to {{cv.R-RF.acc}} (R/Tensorflow random forests). Sensitivity and specificity is generally well balanced with the exception of `lazar`-PaDEL (low sensitivity) and R deep learning (low specificity) models.
Pyrrolizidine alkaloid mutagenicity predictions
-----------------------------------------------
Mutagenicity predictions from all investigated models for 602 pyrrolizidine alkaloids are summarized in Table 4.
+**TODO** **Verena und Philipp** Koennt Ihr bitte stichprobenweise die Tabelle ueberpruefen, mir verrutscht bei der Auswertung immer gerne etwas.
+
\input{tables/pa-tab.tex}
Training data and
@@ -546,16 +548,16 @@ models have low specificity.
The accuracy of `lazar` *in-silico* predictions are comparable to the
interlaboratory variability of the Ames test (80-85% according to
@Benigni1988), especially for predictions with high confidence
-({{lazar-high-confidence.acc_perc}}%). This is a clear indication that
+({{cv.lazar-high-confidence.acc_perc}}%). This is a clear indication that
*in-silico* predictions can be as reliable as the bioassays, if the compounds
are close to the applicability domain. This conclusion is also supported by our
analysis of `lazar` lowest observed effect level predictions, which are also
similar to the experimental variability (@Helma2018).
-The lowest number of predictions ({{lazar-padel-high-confidence.n}}) has been
+The lowest number of predictions ({{cv.lazar-padel-high-confidence.n}}) has been
obtained from `lazar`-PaDEL high confidence predictions, the largest number of
-predictions comes from Tensorflow models ({{tensorflow-rf.v3.n}}). Standard
-`lazar` give a slightly lower number of predictions ({{lazar-all.n}}) than R
+predictions comes from Tensorflow models ({{cv.tensorflow-rf.v3.n}}). Standard
+`lazar` give a slightly lower number of predictions ({{cv.lazar-all.n}}) than R
and Tensorflow models. This is not necessarily a disadvantage, because `lazar`
abstains from predictions, if the query compound is very dissimilar from the
compounds in the training set and thus avoids to make predictions for compounds
@@ -751,7 +753,7 @@ A new public *Salmonella* mutagenicity training dataset with 8309 compounds was
created and used it to train `lazar`, R and Tensorflow models with MolPrint2D
and PaDEL descriptors. The best performance was obtained with `lazar` models
using MolPrint2D descriptors, with prediction accuracies
-({{lazar-high-confidence.acc_perc}}%) comparable to the interlaboratory variability
+({{cv.lazar-high-confidence.acc_perc}}%) comparable to the interlaboratory variability
of the Ames test (80-85%). Models based on PaDEL descriptors had lower
accuracies than MolPrint2D models, but only the `lazar` algorithm could use
MolPrint2D descriptors.
diff --git a/pyrrolizidine-alkaloids/summary.yaml b/pyrrolizidine-alkaloids/summary.yaml
new file mode 100644
index 0000000..66c5030
--- /dev/null
+++ b/pyrrolizidine-alkaloids/summary.yaml
@@ -0,0 +1,83 @@
+---
+:pa:
+ :n: 602
+ :lazar:
+ :mp2d:
+ :all:
+ :n: 560
+ :mut: 111
+ :non_mut: 449
+ :n_perc: 93
+ :mut_perc: 19
+ :non_mut_perc: 80
+ :high_confidence:
+ :n: 301
+ :mut: 76
+ :non_mut: 225
+ :n_perc: 50
+ :mut_perc: 25
+ :non_mut_perc: 74
+ :padel:
+ :all:
+ :n: 600
+ :mut: 83
+ :non_mut: 517
+ :n_perc: 99
+ :mut_perc: 13
+ :non_mut_perc: 86
+ :high_confidence:
+ :n: 0
+ :mut: 0
+ :non_mut: 0
+ :n_perc: 0
+ :r:
+ :rf:
+ :n: 602
+ :mut: 18
+ :non_mut: 584
+ :n_perc: 100
+ :mut_perc: 2
+ :non_mut_perc: 97
+ :svm:
+ :n: 602
+ :mut: 11
+ :non_mut: 591
+ :n_perc: 100
+ :mut_perc: 1
+ :non_mut_perc: 98
+ :dl:
+ :n: 602
+ :mut: 521
+ :non_mut: 81
+ :n_perc: 100
+ :mut_perc: 86
+ :non_mut_perc: 13
+ :tf:
+ :rf:
+ :n: 602
+ :mut: 186
+ :non_mut: 416
+ :n_perc: 100
+ :mut_perc: 30
+ :non_mut_perc: 69
+ :lr_sgd:
+ :n: 602
+ :mut: 286
+ :non_mut: 316
+ :n_perc: 100
+ :mut_perc: 47
+ :non_mut_perc: 52
+ :lr_scikit:
+ :n: 602
+ :mut: 395
+ :non_mut: 207
+ :n_perc: 100
+ :mut_perc: 65
+ :non_mut_perc: 34
+ :nn:
+ :n: 602
+ :mut: 295
+ :non_mut: 307
+ :n_perc: 100
+ :mut_perc: 49
+ :non_mut_perc: 50
diff --git a/scripts/confusion-matrix-summary.rb b/scripts/confusion-matrix-summary.rb
index 129d69a..8a32f79 100755
--- a/scripts/confusion-matrix-summary.rb
+++ b/scripts/confusion-matrix-summary.rb
@@ -30,5 +30,5 @@ ARGV.each do |f|
}
results[File.basename(f,".csv")] = result
end
-
+results = {:cv => results}
puts results.to_yaml
diff --git a/scripts/summary2roc.rb b/scripts/summary2roc.rb
index e50d97a..e692d74 100755
--- a/scripts/summary2roc.rb
+++ b/scripts/summary2roc.rb
@@ -1,7 +1,7 @@
#!/usr/bin/env ruby
require "yaml"
-data = YAML.load(File.read ARGV[0])
+data = YAML.load(File.read ARGV[0])[:cv]
puts "tpr,fpr"
data.each do |algo,values|
algo = algo.sub("tensorflow","Tensorflow").sub("selected","FS").sub(".v3","").sub("-all"," (all)").sub("-high-confidence"," (high confidence)").sub("padel","PaDEL").sub("lazar ","lazar-MP2D ").sub("lr2","LR-scikit").sub("lr","LR-sgd").sub("nn","NN").sub("-rf","-RF")
diff --git a/scripts/summary2table.rb b/scripts/summary2table.rb
index 555097c..267bb97 100755
--- a/scripts/summary2table.rb
+++ b/scripts/summary2table.rb
@@ -2,7 +2,7 @@
require 'yaml'
rows = {:acc => "Accuracy", :tpr => "True positive rate/Sensitivity", :tnr => "True negative rate/Specificity", :ppv => "Positive predictive value/Precision", :npv => "Negative predictive value", :n => "Nr. predictions"}
-data = YAML.load_file "10-fold-crossvalidations/summary.yaml"
+data = YAML.load_file("10-fold-crossvalidations/summary.yaml")[:cv]
case ARGV[0]
when "R"