mustache preprocessing

author: Christoph Helma <helma@in-silico.ch> 2019-10-21 17:29:52 +0200
committer: Christoph Helma <helma@in-silico.ch> 2019-10-21 17:29:52 +0200
commit: 93f2fb17788b9d02b00935e0d1be7cd1d81ff555 (patch)
tree: 95ea869bf48bd41bb0d6d341e6cee7f3e01d2c81
parent: 1035124b854e21998d3fd9de4935780a19a2d3d3 (diff)
20 files changed, 176 insertions, 95 deletions
diff --git a/10-fold-crossvalidations/confusion-matrices/tensorflow-all.csv b/10-fold-crossvalidations/confusion-matrices/tensorflow-all.csv
index 329eae9..f4b80d7 100644
--- a/10-fold-crossvalidations/confusion-matrices/tensorflow-all.csv
+++ b/10-fold-crossvalidations/confusion-matrices/tensorflow-all.csv
@@ -1,2 +1,2 @@
-1991,2044
-2011,2034
+2507,1528
+1495,2550
diff --git a/10-fold-crossvalidations/confusion-matrices/tensorflow-selected.csv b/10-fold-crossvalidations/confusion-matrices/tensorflow-selected.csv
index 93b18af..9d5102e 100644
--- a/10-fold-crossvalidations/confusion-matrices/tensorflow-selected.csv
+++ b/10-fold-crossvalidations/confusion-matrices/tensorflow-selected.csv
@@ -1,2 +1,2 @@
-1928,1979
-2074,2099
+2453,1454
+1549,2624
diff --git a/10-fold-crossvalidations/summaries/R-DL.csv b/10-fold-crossvalidations/summaries/R-DL.json
index 8a48d30..8a48d30 100644
--- a/10-fold-crossvalidations/summaries/R-DL.csv
+++ b/10-fold-crossvalidations/summaries/R-DL.json
diff --git a/10-fold-crossvalidations/summaries/R-RF.csv b/10-fold-crossvalidations/summaries/R-RF.json
index ab7d6e7..ab7d6e7 100644
--- a/10-fold-crossvalidations/summaries/R-RF.csv
+++ b/10-fold-crossvalidations/summaries/R-RF.json
diff --git a/10-fold-crossvalidations/summaries/R-SVM.csv b/10-fold-crossvalidations/summaries/R-SVM.json
index a038447..a038447 100644
--- a/10-fold-crossvalidations/summaries/R-SVM.csv
+++ b/10-fold-crossvalidations/summaries/R-SVM.json
diff --git a/10-fold-crossvalidations/summaries/lazar-all.csv b/10-fold-crossvalidations/summaries/lazar-all.json
index e68ff79..e68ff79 100644
--- a/10-fold-crossvalidations/summaries/lazar-all.csv
+++ b/10-fold-crossvalidations/summaries/lazar-all.json
diff --git a/10-fold-crossvalidations/summaries/lazar-high-confidence.csv b/10-fold-crossvalidations/summaries/lazar-high-confidence.json
index a9f852e..a9f852e 100644
--- a/10-fold-crossvalidations/summaries/lazar-high-confidence.csv
+++ b/10-fold-crossvalidations/summaries/lazar-high-confidence.json
diff --git a/10-fold-crossvalidations/summaries/lazar-padel-all.csv b/10-fold-crossvalidations/summaries/lazar-padel-all.json
index d8ce18a..d8ce18a 100644
--- a/10-fold-crossvalidations/summaries/lazar-padel-all.csv
+++ b/10-fold-crossvalidations/summaries/lazar-padel-all.json
diff --git a/10-fold-crossvalidations/summaries/lazar-padel-high-confidence.csv b/10-fold-crossvalidations/summaries/lazar-padel-high-confidence.json
index 7ec0b1e..7ec0b1e 100644
--- a/10-fold-crossvalidations/summaries/lazar-padel-high-confidence.csv
+++ b/10-fold-crossvalidations/summaries/lazar-padel-high-confidence.json
diff --git a/10-fold-crossvalidations/summaries/results.json b/10-fold-crossvalidations/summaries/results.json
new file mode 100644
index 0000000..033c728
--- /dev/null
+++ b/10-fold-crossvalidations/summaries/results.json
@@ -0,0 +1 @@
+{"programs":[{"name":"R","algos":[{"accuracy":0.61,"true_positive_rate":0.56,"true_negative_rate":0.67,"positive_predictive_value":0.62,"negative_predictive_value":0.61,"accuracy_perc":61,"true_positive_rate_perc":56,"true_negative_rate_perc":67,"positive_predictive_value_perc":62,"negative_predictive_value_perc":61,"name":"SVM","abbrev":"R-SVM"},{"accuracy":0.64,"true_positive_rate":0.56,"true_negative_rate":0.71,"positive_predictive_value":0.66,"negative_predictive_value":0.62,"accuracy_perc":64,"true_positive_rate_perc":56,"true_negative_rate_perc":71,"positive_predictive_value_perc":66,"negative_predictive_value_perc":62,"name":"RF","abbrev":"R-RF"},{"accuracy":0.56,"true_positive_rate":0.88,"true_negative_rate":0.24,"positive_predictive_value":0.53,"negative_predictive_value":0.67,"accuracy_perc":56,"true_positive_rate_perc":88,"true_negative_rate_perc":24,"positive_predictive_value_perc":53,"negative_predictive_value_perc":67,"name":"DL","abbrev":"R-DL"}]},{"name":"tensorflow","algos":[{"accuracy":0.63,"true_positive_rate":0.63,"true_negative_rate":0.63,"positive_predictive_value":0.62,"negative_predictive_value":0.63,"accuracy_perc":63,"true_positive_rate_perc":63,"true_negative_rate_perc":63,"positive_predictive_value_perc":62,"negative_predictive_value_perc":63,"name":"without feature selection","abbrev":"tensorflow-without feature selection"},{"accuracy":0.63,"true_positive_rate":0.61,"true_negative_rate":0.64,"positive_predictive_value":0.63,"negative_predictive_value":0.63,"accuracy_perc":63,"true_positive_rate_perc":61,"true_negative_rate_perc":64,"positive_predictive_value_perc":63,"negative_predictive_value_perc":63,"name":"with feature selection","abbrev":"tensorflow-with feature selection"}]},{"name":"lazar","algos":[{"accuracy":0.82,"true_positive_rate":0.85,"true_negative_rate":0.78,"positive_predictive_value":0.8,"negative_predictive_value":0.84,"accuracy_perc":82,"true_positive_rate_perc":85,"true_negative_rate_perc":78,"positive_predictive_value_perc":80,"negative_predictive_value_perc":84,"name":"all","abbrev":"lazar-all"},{"accuracy":0.84,"true_positive_rate":0.89,"true_negative_rate":0.79,"positive_predictive_value":0.83,"negative_predictive_value":0.85,"accuracy_perc":84,"true_positive_rate_perc":89,"true_negative_rate_perc":79,"positive_predictive_value_perc":83,"negative_predictive_value_perc":85,"name":"high-confidence","abbrev":"lazar-high-confidence"},{"accuracy":0.58,"true_positive_rate":0.32,"true_negative_rate":0.79,"positive_predictive_value":0.56,"negative_predictive_value":0.59,"accuracy_perc":58,"true_positive_rate_perc":32,"true_negative_rate_perc":79,"positive_predictive_value_perc":56,"negative_predictive_value_perc":59,"name":"PaDEL all","abbrev":"lazar-PaDEL all"},{"accuracy":0.58,"true_positive_rate":0.32,"true_negative_rate":0.79,"positive_predictive_value":0.56,"negative_predictive_value":0.59,"accuracy_perc":58,"true_positive_rate_perc":32,"true_negative_rate_perc":79,"positive_predictive_value_perc":56,"negative_predictive_value_perc":59,"name":"PaDEL high-confidence","abbrev":"lazar-PaDEL high-confidence"}]}]}
diff --git a/10-fold-crossvalidations/summaries/tensorflow-all.csv b/10-fold-crossvalidations/summaries/tensorflow-all.csv
deleted file mode 100644
index 804b900..0000000
--- a/10-fold-crossvalidations/summaries/tensorflow-all.csv
+++ /dev/null
@@ -1 +0,0 @@
-{"accuracy":0.49814356435643564,"true_positive_rate":0.49750124937531237,"true_negative_rate":0.49877390877881317,"positive_predictive_value":0.49343246592317225,"negative_predictive_value":0.5028430160692212}
diff --git a/10-fold-crossvalidations/summaries/tensorflow-all.json b/10-fold-crossvalidations/summaries/tensorflow-all.json
new file mode 100644
index 0000000..a605a4d
--- /dev/null
+++ b/10-fold-crossvalidations/summaries/tensorflow-all.json
@@ -0,0 +1 @@
+{"accuracy":0.6258663366336633,"true_positive_rate":0.6264367816091954,"true_negative_rate":0.6253065228052967,"positive_predictive_value":0.6213135068153656,"negative_predictive_value":0.630407911001236}
diff --git a/10-fold-crossvalidations/summaries/tensorflow-selected.csv b/10-fold-crossvalidations/summaries/tensorflow-selected.csv
deleted file mode 100644
index 321dfc3..0000000
--- a/10-fold-crossvalidations/summaries/tensorflow-selected.csv
+++ /dev/null
@@ -1 +0,0 @@
-{"accuracy":0.4983910891089109,"true_positive_rate":0.4817591204397801,"true_negative_rate":0.5147130946542423,"positive_predictive_value":0.493473253135398,"negative_predictive_value":0.5029954469206805}
diff --git a/10-fold-crossvalidations/summaries/tensorflow-selected.json b/10-fold-crossvalidations/summaries/tensorflow-selected.json
new file mode 100644
index 0000000..93c54ef
--- /dev/null
+++ b/10-fold-crossvalidations/summaries/tensorflow-selected.json
@@ -0,0 +1 @@
+{"accuracy":0.6283415841584158,"true_positive_rate":0.612943528235882,"true_negative_rate":0.6434526728788622,"positive_predictive_value":0.6278474532889685,"negative_predictive_value":0.6288042175892643}
diff --git a/Makefile b/Makefile
index 421d75e..a95eb32 100644
--- a/Makefile
+++ b/Makefile
@@ -1,16 +1,16 @@
 # Manuscript
-# please install pandoc-scholar (https://github.com/pandoc-scholar/pandoc-scholar) in the pandoc-scholar directory or point PANDOC_SCHOLAR_PATH to your installation
+# Requirements:
+# pandoc-scholar (https://github.com/pandoc-scholar/pandoc-scholar) in PANDOC_SCHOLAR_PATH
+# pandoc-placetable (https://github.com/mb21/pandoc-placetable)
 
-ARTICLE_FILE          = mutagenicity.md
+ARTICLE_FILE          = mutagenicity.mustache.md
 PANDOC_SCHOLAR_PATH   = pandoc-scholar
 OUTFILE_PREFIX        = mutagenicity
-DEFAULT_EXTENSIONS    = latex pdf docx #odt epub html
-PANDOC_WRITER_OPTIONS = --filter=pandoc-citeproc
-#PANDOC_WRITER_OPTIONS = --filter=pandoc-placetable --filter=pandoc-citeproc
+DEFAULT_EXTENSIONS    = pdf #latex docx html #odt epub
+#PANDOC_WRITER_OPTIONS = --filter=panpipe --filter=pandoc-placetable --filter=pandoc-citeproc -M tmpvar=test
+PANDOC_WRITER_OPTIONS = --filter=pandoc-crossref --filter=pandoc-placetable --filter=pandoc-citeproc 
 TEMPLATE_FILE_LATEX   = pandoc-scholar.latex
 
-include $(PANDOC_SCHOLAR_PATH)/Makefile
-
 # Lazar
 
 LAZAR_DIR = ../lazar
@@ -28,29 +28,55 @@ CONFUSION_MATRICES_DIR = 10-fold-crossvalidations/confusion-matrices
 R_CV_DIR = 10-fold-crossvalidations/R
 TENSORFLOW_CV_DIR = 10-fold-crossvalidations/tensorflow
 
-#tables = tables/R-SVM.csv tables/R-RF.csv tables/R-DL.csv
+TABLES = tables/r-summary.csv tables/tf-summary.csv tables/lazar-summary.csv tables/R-SVM.csv tables/R-RF.csv tables/R-DL.csv tables/tensorflow-all.csv tables/tensorflow-selected.csv tables/lazar-all.csv tables/lazar-high-confidence.csv tables/lazar-padel-all.csv tables/lazar-padel-high-confidence.csv
+
+R_SUMMARIES = $(SUMMARIES_DIR)/R-SVM.json $(SUMMARIES_DIR)/R-RF.json $(SUMMARIES_DIR)/R-DL.json
+TF_SUMMARIES = $(SUMMARIES_DIR)/tensorflow-all.json $(SUMMARIES_DIR)/tensorflow-selected.json
+LAZAR_SUMMARIES = $(SUMMARIES_DIR)/lazar-all.json $(SUMMARIES_DIR)/lazar-high-confidence.json $(SUMMARIES_DIR)/lazar-padel-all.json $(SUMMARIES_DIR)/lazar-padel-high-confidence.json
 
-SUMMARIES = $(SUMMARIES_DIR)/R-SVM.csv $(SUMMARIES_DIR)/R-RF.csv $(SUMMARIES_DIR)/R-DL.csv $(SUMMARIES_DIR)/tensorflow-all.csv $(SUMMARIES_DIR)/tensorflow-selected.csv $(SUMMARIES_DIR)/lazar-all.csv $(SUMMARIES_DIR)/lazar-high-confidence.csv $(SUMMARIES_DIR)/lazar-padel-all.csv $(SUMMARIES_DIR)/lazar-padel-high-confidence.csv
+SUMMARIES = $(R_SUMMARIES) $(TF_SUMMARIES) $(LAZAR_SUMMARIES)
 
 CONFUSION_MATRICES = $(CONFUSION_MATRICES_DIR)/R-SVM.csv $(CONFUSION_MATRICES_DIR)/R-RF.csv $(CONFUSION_MATRICES_DIR)/R-DL.csv $(CONFUSION_MATRICES_DIR)/tensorflow-all.csv $(CONFUSION_MATRICES_DIR)/tensorflow-selected.csv $(CONFUSION_MATRICES_DIR)/lazar-all.csv $(CONFUSION_MATRICES_DIR)/lazar-high-confidence.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-all.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-high-confidence.csv
 
 DATA = data/mutagenicity.sdf data/mutagenicity.csv data/mutagenicity-fingerprints.csv
 
-all: $(SUMMARIES) $(DATA) #$(tables)
+all: $(DATA) $(TABLES) mutagenicity.pdf $(SUMMARIES_DIR)/results.json
+#all: $(SUMMARIES) $(DATA) $(TABLES) mutagenicity.pdf
+include $(PANDOC_SCHOLAR_PATH)/Makefile
 export: $(DATA) 
 
+mutagenicity.mustache.md: $(SUMMARIES_DIR)/results.json mutagenicity.md
+	mustache $^ > $@
+
+# tables
+
+tables/r-summary.csv: $(R_SUMMARIES)
+	scripts/summaries2table.rb $^ > $@
+
+tables/tf-summary.csv: $(TF_SUMMARIES)
+	scripts/summaries2table.rb $^ > $@
+
+tables/lazar-summary.csv: $(LAZAR_SUMMARIES)
+	scripts/summaries2table.rb $^ > $@
+
+tables/%.csv: $(CONFUSION_MATRICES_DIR)/%.csv
+	scripts/confusion-matrix2table.rb $< > $@
+
 # summaries
 
-$(SUMMARIES_DIR)/%.csv: $(CONFUSION_MATRICES_DIR)/%.csv
+$(SUMMARIES_DIR)/results.json: $(SUMMARIES)
+	scripts/results.rb $^ > $@
+
+$(SUMMARIES_DIR)/%.json: $(CONFUSION_MATRICES_DIR)/%.csv
 	scripts/confusion-matrix-summary.rb $< > $@
 
 # confusion matrices
 
 ## tensorflow
-$(CONFUSION_MATRICES_DIR)/tensorflow-selected.csv: $(TENSORFLOW_CV_DIR)/pred.csv
+$(CONFUSION_MATRICES_DIR)/tensorflow-selected.csv: $(TENSORFLOW_CV_DIR)/pred.sorted.csv
 	scripts/cv-tensorflow-confusion-matrix.rb $< > $@
 
-$(CONFUSION_MATRICES_DIR)/tensorflow-all.csv: $(TENSORFLOW_CV_DIR)/pred_ext.csv
+$(CONFUSION_MATRICES_DIR)/tensorflow-all.csv: $(TENSORFLOW_CV_DIR)/pred_ext.sorted.csv
 	scripts/cv-tensorflow-confusion-matrix.rb $< > $@
 
 ## R
diff --git a/mutagenicity.md b/mutagenicity.md
index bf4f6d1..2f80bad 100644
--- a/mutagenicity.md
+++ b/mutagenicity.md
@@ -134,8 +134,8 @@ of a compound can be constructed that can be used to calculate chemical
 similarities.
 
 The chemical similarity between two compounds a and b is expressed as
-the proportion between atom environments common in both structures A ∩ B
-and the total number of atom environments A U B (Jaccard/Tanimoto
+the proportion between atom environments common in both structures $A \cap B$
+and the total number of atom environments $A \cup B$ (Jaccard/Tanimoto
 index).
 
 $$sim = \frac{\left| A\  \cap B \right|}{\left| A\  \cup B \right|}$$
@@ -335,117 +335,106 @@ Validation
 Results
 =======
 
-`lazar`
------
+{{#programs}}
+{{name}} Models
+--------
+{{#algos}}
 
-Random Forest
--------------
+### {{name}}
 
-The validation showed that the RF model has an accuracy of 64%, a
-sensitivity of 66% and a specificity of 63%. The confusion matrix of the
+10-fold crossvalidation of the {{abbrev}} model gave an accuracy of
+{{accuracy_perc}}%
+a sensitivity of
+{{true_positive_rate_perc}}%
+and a specificity of
+{{true_negative_rate_perc}}%
+The confusion matrix of the
 model, calculated for 8080 instances, is provided in Table 1.
 
-Table 1: Confusion matrix of the RF model
+```{.table file="tables/R-RF.csv" caption="Confusion matrix for R Random Forest predictions"}
+```
+{{/algos}}
+{{/programs}}
 
-                          Predicted genotoxicity                         
-  ----------------------- ------------------------ ---------- ---------- -------------
-  Measured genotoxicity                            ***PP***   ***PN***   ***Total***
-                          ***TP***                 2274       1163       3437
-                          ***TN***                 1736       2907       4643
-                          ***Total***              4010       4070       8080
+R Models
+--------
 
-PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
-True negative
+### Random Forest
 
-Support Vector Machines
------------------------
+The validation showed that the RF model has an accuracy of
+{{R-RF.accuracy}}%
+`cat /home/ch/src/mutagenicity-paper/10-fold-crossvalidations/summaries/R-RF.json|jq '.accuracy * 100 | round'`{pipe="sh"}%,
+a sensitivity of
+`cat /home/ch/src/mutagenicity-paper/10-fold-crossvalidations/summaries/R-RF.json|jq '.true_positive_rate * 100 | round'`{pipe="sh"}%,
+and a specificity of
+`cat /home/ch/src/mutagenicity-paper/10-fold-crossvalidations/summaries/R-RF.json|jq '.true_negative_rate * 100 | round'`{pipe="sh"}%,
+The confusion matrix of the
+model, calculated for 8080 instances, is provided in Table 1.
+
+```{.table file="tables/R-RF.csv" caption="Confusion matrix for R Random Forest predictions"}
+```
+
+### Support Vector Machines
 
 The validation showed that the SVM model has an accuracy of 62%, a
 sensitivity of 65% and a specificity of 60%. The confusion matrix of SVM
 model, calculated for 8080 instances, is provided in Table 2.
 
-Table 2: Confusion matrix of the SVM model
-
-                          Predicted genotoxicity                         
-  ----------------------- ------------------------ ---------- ---------- -------------
-  Measured genotoxicity                            ***PP***   ***PN***   ***Total***
-                          ***TP***                 2057       1107       3164
-                          ***TN***                 1953       2963       4916
-                          ***Total***              4010       4070       8080
 
-PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
-True negative
+```{.table file="tables/R-SVM.csv" caption="Confusion matrix for R Support Vector Machine predictions"}
+```
 
-Deep Learning (R-project)
--------------------------
+### Deep Learning
 
 The validation showed that the DL model generated in R has an accuracy
 of 59%, a sensitivity of 89% and a specificity of 30%. The confusion
 matrix of the model, normalised to 8080 instances, is provided in Table
 3.
 
-Table 3: Confusion matrix of the DL model (R-project)
+```{.table file="tables/R-DL.csv" caption="Confusion matrix for R Deep Learning predictions"}
+```
 
-                          Predicted genotoxicity                         
-  ----------------------- ------------------------ ---------- ---------- -------------
-  Measured genotoxicity                            ***PP***   ***PN***   ***Total***
-                          ***TP***                 3575       435        4010
-                          ***TN***                 2853       1217       4070
-                          ***Total***              6428       1652       8080
+```{.table file="tables/r-summary.csv" caption="Summary of R model validations"}
+```
 
-PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
-True negative
-
-DL model (TensorFlow)
----------------------
+TensorFlow Models
+-----------------
 
 The validation showed that the DL model generated in TensorFlow has an
 accuracy of 68%, a sensitivity of 70% and a specificity of 46%. The
 confusion matrix of the model, normalised to 8080 instances, is provided
 in Table 4.
 
-Table 4: Confusion matrix of the DL model (TensorFlow)
-
-                          Predicted genotoxicity                         
-  ----------------------- ------------------------ ---------- ---------- -------------
-  Measured genotoxicity                            ***PP***   ***PN***   ***Total***
-                          ***TP***                 2851       1227       4078
-                          ***TN***                 1825       2177       4002
-                          ***Total***              4676       3404       8080
-
-PP: Predicted positive; PN: Predicted negative, TP: True positive, TN:
-True negative
-
-The ROC curves from the 6-fold validation are shown in Figure 7.
+```{.table file="tables/tensorflow-all.csv" caption="Confusion matrix for Tensorflow predictions without variable selecetion"}
+```
 
-![](figures/image7.png){width="3.825in"
-height="2.7327045056867894in"}
+```{.table file="tables/tensorflow-selected.csv" caption="Confusion matrix for Tensorflow predictions with variable selecetion"}
+```
 
-Figure 7: Six-fold cross-validation of TensorFlow DL model show an
-average area under the ROC-curve (ROC-AUC; measure of accuracy) of 68%.
+```{.table file="tables/tf-summary.csv" caption="Summary of TensorFlow model validations"}
+```
 
-In summary, the validation results of the four methods are presented in
-the following table.
+`lazar` Models
+--------------
 
-Table 5 Results of the cross-validation of the four models and after
-y-randomisation
+### MolPrint2D Descriptors
 
-  ----------------------------------------------------------------------
-                          Accuracy   CCR     Sensitivity   Specificity
-  ----------------------- ---------- ------- ------------- -------------
-  RF model                64.1%      64.4%   66.2%         62.6%
+```{.table file="tables/lazar-all.csv" caption="Confusion matrix for lazar predictions with MolPrint2D descriptors"}
+```
 
-  SVM model               62.1%      62.6%   65.0%         60.3%
+```{.table file="tables/lazar-high-confidence.csv" caption="Confusion matrix for high confidence lazar predictions with MolPrint2D descriptors"}
+```
 
-  DL model\               59.3%      59.5%   89.2%         29.9%
-  (R-project)                                              
+### PaDEL Descriptors
 
-  DL model (TensorFlow)   68%        62.2%   69.9%         45.6%
+```{.table file="tables/lazar-padel-all.csv" caption="Confusion matrix for lazar predictions with PaDEL descriptors"}
+```
 
-  y-randomisation         50.5%      50.4%   50.3%         50.6%
-  ----------------------------------------------------------------------
+```{.table file="tables/lazar-padel-high-confidence.csv" caption="Confusion matrix for high confidence lazar predictions with PaDEL descriptors"}
+```
 
-CCR (correct classification rate)
+```{.table file="tables/lazar-summary.csv" caption="Summary of lazar model validations"}
+```
 
 Discussion
 ==========
diff --git a/scripts/confusion-matrix2table.rb b/scripts/confusion-matrix2table.rb
new file mode 100755
index 0000000..ccb4817
--- /dev/null
+++ b/scripts/confusion-matrix2table.rb
@@ -0,0 +1,10 @@
+#!/usr/bin/env ruby
+
+mat = []
+File.readlines(ARGV[0]).each do |l|
+  mat << l.chomp.split(",")
+end
+puts ",,Predictions,"
+puts ",,mutagenic,non-mutagenic"
+puts "Measurements,mutagenic,#{mat[0][0]},#{mat[0][1]}"
+puts ",non-mutagenic,#{mat[1][0]},#{mat[1][1]}"
diff --git a/scripts/cv-tensorflow-confusion-matrix.rb b/scripts/cv-tensorflow-confusion-matrix.rb
index 067519b..2b0ee58 100755
--- a/scripts/cv-tensorflow-confusion-matrix.rb
+++ b/scripts/cv-tensorflow-confusion-matrix.rb
@@ -7,7 +7,7 @@ tn = 0
 fn = 0
 
 pred = CSV.read(ARGV[0],headers: true,:col_sep => ",")
-act = CSV.read(File.join(File.dirname(ARGV[0]),"GenoTox-database.csv"),headers: true,:col_sep => ",")
+act = CSV.read(File.join("data","GenoTox-database.csv"),headers: true,:col_sep => ",")
 
 pred.each_with_index do |row,i|
 
diff --git a/scripts/results.rb b/scripts/results.rb
new file mode 100755
index 0000000..1a36278
--- /dev/null
+++ b/scripts/results.rb
@@ -0,0 +1,36 @@
+#!/usr/bin/env ruby
+require 'json'
+
+result = {}
+ARGV.each do |f|
+  fname = File.basename(f,".json")
+  program,algo = fname.split('-')
+  case program
+  when "tensorflow"
+    algo == "all" ? algo = "without feature selection" : algo = "with feature selection"
+  when "lazar"
+    algo = "high-confidence" if algo == "high"
+    if algo == "padel"
+      algo = "PaDEL" 
+      fname.match("high") ? algo += " high-confidence" : algo += " all"
+    end
+  end
+  result[program] ||= {}
+  result[program][algo] = JSON.parse(File.read(f)).collect{|k,v| [k,v.round(2)]}.to_h
+end
+
+out = {:programs => []}
+result.keys.each do |prog|
+  out[:programs] << {:name => prog, :algos => []}
+  result[prog].keys.each do |algo|
+    r = result[prog][algo].dup
+    result[prog][algo].each do |k,v|
+      r[k+"_perc"] = (v*100).round
+    end
+    r[:name] = algo
+    r[:abbrev] = prog+"-"+algo
+    out[:programs].last[:algos] << r
+  end
+end
+
+puts out.to_json
diff --git a/scripts/summaries2table.rb b/scripts/summaries2table.rb
new file mode 100755
index 0000000..5470b26
--- /dev/null
+++ b/scripts/summaries2table.rb
@@ -0,0 +1,19 @@
+#!/usr/bin/env ruby
+require 'json'
+
+results = {}
+
+ARGV.each do |f|
+  results[File.basename(f,".json")] = JSON.parse(File.read(f))
+end
+
+print ","
+puts results.keys.collect{|k| k.sub("tensorflow","TF")}.join(",")
+["accuracy","true_positive_rate","true_negative_rate","positive_predictive_value","negative_predictive_value"].each do |m|
+  line = [m.gsub("_"," ")]
+  results.each do |k,v|
+    line << v[m].round(2)
+  end
+  puts line.join(",")
+end
+
author	Christoph Helma <helma@in-silico.ch>	2019-10-21 17:29:52 +0200
committer	Christoph Helma <helma@in-silico.ch>	2019-10-21 17:29:52 +0200
commit	93f2fb17788b9d02b00935e0d1be7cd1d81ff555 (patch)
tree	95ea869bf48bd41bb0d6d341e6cee7f3e01d2c81
parent	1035124b854e21998d3fd9de4935780a19a2d3d3 (diff)