diff options
author | Christoph Helma <helma@in-silico.ch> | 2019-10-21 17:29:52 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2019-10-21 17:29:52 +0200 |
commit | 93f2fb17788b9d02b00935e0d1be7cd1d81ff555 (patch) | |
tree | 95ea869bf48bd41bb0d6d341e6cee7f3e01d2c81 | |
parent | 1035124b854e21998d3fd9de4935780a19a2d3d3 (diff) |
mustache preprocessing
20 files changed, 176 insertions, 95 deletions
diff --git a/10-fold-crossvalidations/confusion-matrices/tensorflow-all.csv b/10-fold-crossvalidations/confusion-matrices/tensorflow-all.csv index 329eae9..f4b80d7 100644 --- a/10-fold-crossvalidations/confusion-matrices/tensorflow-all.csv +++ b/10-fold-crossvalidations/confusion-matrices/tensorflow-all.csv @@ -1,2 +1,2 @@ -1991,2044 -2011,2034 +2507,1528 +1495,2550 diff --git a/10-fold-crossvalidations/confusion-matrices/tensorflow-selected.csv b/10-fold-crossvalidations/confusion-matrices/tensorflow-selected.csv index 93b18af..9d5102e 100644 --- a/10-fold-crossvalidations/confusion-matrices/tensorflow-selected.csv +++ b/10-fold-crossvalidations/confusion-matrices/tensorflow-selected.csv @@ -1,2 +1,2 @@ -1928,1979 -2074,2099 +2453,1454 +1549,2624 diff --git a/10-fold-crossvalidations/summaries/R-DL.csv b/10-fold-crossvalidations/summaries/R-DL.json index 8a48d30..8a48d30 100644 --- a/10-fold-crossvalidations/summaries/R-DL.csv +++ b/10-fold-crossvalidations/summaries/R-DL.json diff --git a/10-fold-crossvalidations/summaries/R-RF.csv b/10-fold-crossvalidations/summaries/R-RF.json index ab7d6e7..ab7d6e7 100644 --- a/10-fold-crossvalidations/summaries/R-RF.csv +++ b/10-fold-crossvalidations/summaries/R-RF.json diff --git a/10-fold-crossvalidations/summaries/R-SVM.csv b/10-fold-crossvalidations/summaries/R-SVM.json index a038447..a038447 100644 --- a/10-fold-crossvalidations/summaries/R-SVM.csv +++ b/10-fold-crossvalidations/summaries/R-SVM.json diff --git a/10-fold-crossvalidations/summaries/lazar-all.csv b/10-fold-crossvalidations/summaries/lazar-all.json index e68ff79..e68ff79 100644 --- a/10-fold-crossvalidations/summaries/lazar-all.csv +++ b/10-fold-crossvalidations/summaries/lazar-all.json diff --git a/10-fold-crossvalidations/summaries/lazar-high-confidence.csv b/10-fold-crossvalidations/summaries/lazar-high-confidence.json index a9f852e..a9f852e 100644 --- a/10-fold-crossvalidations/summaries/lazar-high-confidence.csv +++ b/10-fold-crossvalidations/summaries/lazar-high-confidence.json diff --git a/10-fold-crossvalidations/summaries/lazar-padel-all.csv b/10-fold-crossvalidations/summaries/lazar-padel-all.json index d8ce18a..d8ce18a 100644 --- a/10-fold-crossvalidations/summaries/lazar-padel-all.csv +++ b/10-fold-crossvalidations/summaries/lazar-padel-all.json diff --git a/10-fold-crossvalidations/summaries/lazar-padel-high-confidence.csv b/10-fold-crossvalidations/summaries/lazar-padel-high-confidence.json index 7ec0b1e..7ec0b1e 100644 --- a/10-fold-crossvalidations/summaries/lazar-padel-high-confidence.csv +++ b/10-fold-crossvalidations/summaries/lazar-padel-high-confidence.json diff --git a/10-fold-crossvalidations/summaries/results.json b/10-fold-crossvalidations/summaries/results.json new file mode 100644 index 0000000..033c728 --- /dev/null +++ b/10-fold-crossvalidations/summaries/results.json @@ -0,0 +1 @@ +{"programs":[{"name":"R","algos":[{"accuracy":0.61,"true_positive_rate":0.56,"true_negative_rate":0.67,"positive_predictive_value":0.62,"negative_predictive_value":0.61,"accuracy_perc":61,"true_positive_rate_perc":56,"true_negative_rate_perc":67,"positive_predictive_value_perc":62,"negative_predictive_value_perc":61,"name":"SVM","abbrev":"R-SVM"},{"accuracy":0.64,"true_positive_rate":0.56,"true_negative_rate":0.71,"positive_predictive_value":0.66,"negative_predictive_value":0.62,"accuracy_perc":64,"true_positive_rate_perc":56,"true_negative_rate_perc":71,"positive_predictive_value_perc":66,"negative_predictive_value_perc":62,"name":"RF","abbrev":"R-RF"},{"accuracy":0.56,"true_positive_rate":0.88,"true_negative_rate":0.24,"positive_predictive_value":0.53,"negative_predictive_value":0.67,"accuracy_perc":56,"true_positive_rate_perc":88,"true_negative_rate_perc":24,"positive_predictive_value_perc":53,"negative_predictive_value_perc":67,"name":"DL","abbrev":"R-DL"}]},{"name":"tensorflow","algos":[{"accuracy":0.63,"true_positive_rate":0.63,"true_negative_rate":0.63,"positive_predictive_value":0.62,"negative_predictive_value":0.63,"accuracy_perc":63,"true_positive_rate_perc":63,"true_negative_rate_perc":63,"positive_predictive_value_perc":62,"negative_predictive_value_perc":63,"name":"without feature selection","abbrev":"tensorflow-without feature selection"},{"accuracy":0.63,"true_positive_rate":0.61,"true_negative_rate":0.64,"positive_predictive_value":0.63,"negative_predictive_value":0.63,"accuracy_perc":63,"true_positive_rate_perc":61,"true_negative_rate_perc":64,"positive_predictive_value_perc":63,"negative_predictive_value_perc":63,"name":"with feature selection","abbrev":"tensorflow-with feature selection"}]},{"name":"lazar","algos":[{"accuracy":0.82,"true_positive_rate":0.85,"true_negative_rate":0.78,"positive_predictive_value":0.8,"negative_predictive_value":0.84,"accuracy_perc":82,"true_positive_rate_perc":85,"true_negative_rate_perc":78,"positive_predictive_value_perc":80,"negative_predictive_value_perc":84,"name":"all","abbrev":"lazar-all"},{"accuracy":0.84,"true_positive_rate":0.89,"true_negative_rate":0.79,"positive_predictive_value":0.83,"negative_predictive_value":0.85,"accuracy_perc":84,"true_positive_rate_perc":89,"true_negative_rate_perc":79,"positive_predictive_value_perc":83,"negative_predictive_value_perc":85,"name":"high-confidence","abbrev":"lazar-high-confidence"},{"accuracy":0.58,"true_positive_rate":0.32,"true_negative_rate":0.79,"positive_predictive_value":0.56,"negative_predictive_value":0.59,"accuracy_perc":58,"true_positive_rate_perc":32,"true_negative_rate_perc":79,"positive_predictive_value_perc":56,"negative_predictive_value_perc":59,"name":"PaDEL all","abbrev":"lazar-PaDEL all"},{"accuracy":0.58,"true_positive_rate":0.32,"true_negative_rate":0.79,"positive_predictive_value":0.56,"negative_predictive_value":0.59,"accuracy_perc":58,"true_positive_rate_perc":32,"true_negative_rate_perc":79,"positive_predictive_value_perc":56,"negative_predictive_value_perc":59,"name":"PaDEL high-confidence","abbrev":"lazar-PaDEL high-confidence"}]}]} diff --git a/10-fold-crossvalidations/summaries/tensorflow-all.csv b/10-fold-crossvalidations/summaries/tensorflow-all.csv deleted file mode 100644 index 804b900..0000000 --- a/10-fold-crossvalidations/summaries/tensorflow-all.csv +++ /dev/null @@ -1 +0,0 @@ -{"accuracy":0.49814356435643564,"true_positive_rate":0.49750124937531237,"true_negative_rate":0.49877390877881317,"positive_predictive_value":0.49343246592317225,"negative_predictive_value":0.5028430160692212} diff --git a/10-fold-crossvalidations/summaries/tensorflow-all.json b/10-fold-crossvalidations/summaries/tensorflow-all.json new file mode 100644 index 0000000..a605a4d --- /dev/null +++ b/10-fold-crossvalidations/summaries/tensorflow-all.json @@ -0,0 +1 @@ +{"accuracy":0.6258663366336633,"true_positive_rate":0.6264367816091954,"true_negative_rate":0.6253065228052967,"positive_predictive_value":0.6213135068153656,"negative_predictive_value":0.630407911001236} diff --git a/10-fold-crossvalidations/summaries/tensorflow-selected.csv b/10-fold-crossvalidations/summaries/tensorflow-selected.csv deleted file mode 100644 index 321dfc3..0000000 --- a/10-fold-crossvalidations/summaries/tensorflow-selected.csv +++ /dev/null @@ -1 +0,0 @@ -{"accuracy":0.4983910891089109,"true_positive_rate":0.4817591204397801,"true_negative_rate":0.5147130946542423,"positive_predictive_value":0.493473253135398,"negative_predictive_value":0.5029954469206805} diff --git a/10-fold-crossvalidations/summaries/tensorflow-selected.json b/10-fold-crossvalidations/summaries/tensorflow-selected.json new file mode 100644 index 0000000..93c54ef --- /dev/null +++ b/10-fold-crossvalidations/summaries/tensorflow-selected.json @@ -0,0 +1 @@ +{"accuracy":0.6283415841584158,"true_positive_rate":0.612943528235882,"true_negative_rate":0.6434526728788622,"positive_predictive_value":0.6278474532889685,"negative_predictive_value":0.6288042175892643} @@ -1,16 +1,16 @@ # Manuscript -# please install pandoc-scholar (https://github.com/pandoc-scholar/pandoc-scholar) in the pandoc-scholar directory or point PANDOC_SCHOLAR_PATH to your installation +# Requirements: +# pandoc-scholar (https://github.com/pandoc-scholar/pandoc-scholar) in PANDOC_SCHOLAR_PATH +# pandoc-placetable (https://github.com/mb21/pandoc-placetable) -ARTICLE_FILE = mutagenicity.md +ARTICLE_FILE = mutagenicity.mustache.md PANDOC_SCHOLAR_PATH = pandoc-scholar OUTFILE_PREFIX = mutagenicity -DEFAULT_EXTENSIONS = latex pdf docx #odt epub html -PANDOC_WRITER_OPTIONS = --filter=pandoc-citeproc -#PANDOC_WRITER_OPTIONS = --filter=pandoc-placetable --filter=pandoc-citeproc +DEFAULT_EXTENSIONS = pdf #latex docx html #odt epub +#PANDOC_WRITER_OPTIONS = --filter=panpipe --filter=pandoc-placetable --filter=pandoc-citeproc -M tmpvar=test +PANDOC_WRITER_OPTIONS = --filter=pandoc-crossref --filter=pandoc-placetable --filter=pandoc-citeproc TEMPLATE_FILE_LATEX = pandoc-scholar.latex -include $(PANDOC_SCHOLAR_PATH)/Makefile - # Lazar LAZAR_DIR = ../lazar @@ -28,29 +28,55 @@ CONFUSION_MATRICES_DIR = 10-fold-crossvalidations/confusion-matrices R_CV_DIR = 10-fold-crossvalidations/R TENSORFLOW_CV_DIR = 10-fold-crossvalidations/tensorflow -#tables = tables/R-SVM.csv tables/R-RF.csv tables/R-DL.csv +TABLES = tables/r-summary.csv tables/tf-summary.csv tables/lazar-summary.csv tables/R-SVM.csv tables/R-RF.csv tables/R-DL.csv tables/tensorflow-all.csv tables/tensorflow-selected.csv tables/lazar-all.csv tables/lazar-high-confidence.csv tables/lazar-padel-all.csv tables/lazar-padel-high-confidence.csv + +R_SUMMARIES = $(SUMMARIES_DIR)/R-SVM.json $(SUMMARIES_DIR)/R-RF.json $(SUMMARIES_DIR)/R-DL.json +TF_SUMMARIES = $(SUMMARIES_DIR)/tensorflow-all.json $(SUMMARIES_DIR)/tensorflow-selected.json +LAZAR_SUMMARIES = $(SUMMARIES_DIR)/lazar-all.json $(SUMMARIES_DIR)/lazar-high-confidence.json $(SUMMARIES_DIR)/lazar-padel-all.json $(SUMMARIES_DIR)/lazar-padel-high-confidence.json -SUMMARIES = $(SUMMARIES_DIR)/R-SVM.csv $(SUMMARIES_DIR)/R-RF.csv $(SUMMARIES_DIR)/R-DL.csv $(SUMMARIES_DIR)/tensorflow-all.csv $(SUMMARIES_DIR)/tensorflow-selected.csv $(SUMMARIES_DIR)/lazar-all.csv $(SUMMARIES_DIR)/lazar-high-confidence.csv $(SUMMARIES_DIR)/lazar-padel-all.csv $(SUMMARIES_DIR)/lazar-padel-high-confidence.csv +SUMMARIES = $(R_SUMMARIES) $(TF_SUMMARIES) $(LAZAR_SUMMARIES) CONFUSION_MATRICES = $(CONFUSION_MATRICES_DIR)/R-SVM.csv $(CONFUSION_MATRICES_DIR)/R-RF.csv $(CONFUSION_MATRICES_DIR)/R-DL.csv $(CONFUSION_MATRICES_DIR)/tensorflow-all.csv $(CONFUSION_MATRICES_DIR)/tensorflow-selected.csv $(CONFUSION_MATRICES_DIR)/lazar-all.csv $(CONFUSION_MATRICES_DIR)/lazar-high-confidence.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-all.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-high-confidence.csv DATA = data/mutagenicity.sdf data/mutagenicity.csv data/mutagenicity-fingerprints.csv -all: $(SUMMARIES) $(DATA) #$(tables) +all: $(DATA) $(TABLES) mutagenicity.pdf $(SUMMARIES_DIR)/results.json +#all: $(SUMMARIES) $(DATA) $(TABLES) mutagenicity.pdf +include $(PANDOC_SCHOLAR_PATH)/Makefile export: $(DATA) +mutagenicity.mustache.md: $(SUMMARIES_DIR)/results.json mutagenicity.md + mustache $^ > $@ + +# tables + +tables/r-summary.csv: $(R_SUMMARIES) + scripts/summaries2table.rb $^ > $@ + +tables/tf-summary.csv: $(TF_SUMMARIES) + scripts/summaries2table.rb $^ > $@ + +tables/lazar-summary.csv: $(LAZAR_SUMMARIES) + scripts/summaries2table.rb $^ > $@ + +tables/%.csv: $(CONFUSION_MATRICES_DIR)/%.csv + scripts/confusion-matrix2table.rb $< > $@ + # summaries -$(SUMMARIES_DIR)/%.csv: $(CONFUSION_MATRICES_DIR)/%.csv +$(SUMMARIES_DIR)/results.json: $(SUMMARIES) + scripts/results.rb $^ > $@ + +$(SUMMARIES_DIR)/%.json: $(CONFUSION_MATRICES_DIR)/%.csv scripts/confusion-matrix-summary.rb $< > $@ # confusion matrices ## tensorflow -$(CONFUSION_MATRICES_DIR)/tensorflow-selected.csv: $(TENSORFLOW_CV_DIR)/pred.csv +$(CONFUSION_MATRICES_DIR)/tensorflow-selected.csv: $(TENSORFLOW_CV_DIR)/pred.sorted.csv scripts/cv-tensorflow-confusion-matrix.rb $< > $@ -$(CONFUSION_MATRICES_DIR)/tensorflow-all.csv: $(TENSORFLOW_CV_DIR)/pred_ext.csv +$(CONFUSION_MATRICES_DIR)/tensorflow-all.csv: $(TENSORFLOW_CV_DIR)/pred_ext.sorted.csv scripts/cv-tensorflow-confusion-matrix.rb $< > $@ ## R diff --git a/mutagenicity.md b/mutagenicity.md index bf4f6d1..2f80bad 100644 --- a/mutagenicity.md +++ b/mutagenicity.md @@ -134,8 +134,8 @@ of a compound can be constructed that can be used to calculate chemical similarities. The chemical similarity between two compounds a and b is expressed as -the proportion between atom environments common in both structures A ∩ B -and the total number of atom environments A U B (Jaccard/Tanimoto +the proportion between atom environments common in both structures $A \cap B$ +and the total number of atom environments $A \cup B$ (Jaccard/Tanimoto index). $$sim = \frac{\left| A\ \cap B \right|}{\left| A\ \cup B \right|}$$ @@ -335,117 +335,106 @@ Validation Results ======= -`lazar` ------ +{{#programs}} +{{name}} Models +-------- +{{#algos}} -Random Forest -------------- +### {{name}} -The validation showed that the RF model has an accuracy of 64%, a -sensitivity of 66% and a specificity of 63%. The confusion matrix of the +10-fold crossvalidation of the {{abbrev}} model gave an accuracy of +{{accuracy_perc}}% +a sensitivity of +{{true_positive_rate_perc}}% +and a specificity of +{{true_negative_rate_perc}}% +The confusion matrix of the model, calculated for 8080 instances, is provided in Table 1. -Table 1: Confusion matrix of the RF model +```{.table file="tables/R-RF.csv" caption="Confusion matrix for R Random Forest predictions"} +``` +{{/algos}} +{{/programs}} - Predicted genotoxicity - ----------------------- ------------------------ ---------- ---------- ------------- - Measured genotoxicity ***PP*** ***PN*** ***Total*** - ***TP*** 2274 1163 3437 - ***TN*** 1736 2907 4643 - ***Total*** 4010 4070 8080 +R Models +-------- -PP: Predicted positive; PN: Predicted negative, TP: True positive, TN: -True negative +### Random Forest -Support Vector Machines ------------------------ +The validation showed that the RF model has an accuracy of +{{R-RF.accuracy}}% +`cat /home/ch/src/mutagenicity-paper/10-fold-crossvalidations/summaries/R-RF.json|jq '.accuracy * 100 | round'`{pipe="sh"}%, +a sensitivity of +`cat /home/ch/src/mutagenicity-paper/10-fold-crossvalidations/summaries/R-RF.json|jq '.true_positive_rate * 100 | round'`{pipe="sh"}%, +and a specificity of +`cat /home/ch/src/mutagenicity-paper/10-fold-crossvalidations/summaries/R-RF.json|jq '.true_negative_rate * 100 | round'`{pipe="sh"}%, +The confusion matrix of the +model, calculated for 8080 instances, is provided in Table 1. + +```{.table file="tables/R-RF.csv" caption="Confusion matrix for R Random Forest predictions"} +``` + +### Support Vector Machines The validation showed that the SVM model has an accuracy of 62%, a sensitivity of 65% and a specificity of 60%. The confusion matrix of SVM model, calculated for 8080 instances, is provided in Table 2. -Table 2: Confusion matrix of the SVM model - - Predicted genotoxicity - ----------------------- ------------------------ ---------- ---------- ------------- - Measured genotoxicity ***PP*** ***PN*** ***Total*** - ***TP*** 2057 1107 3164 - ***TN*** 1953 2963 4916 - ***Total*** 4010 4070 8080 -PP: Predicted positive; PN: Predicted negative, TP: True positive, TN: -True negative +```{.table file="tables/R-SVM.csv" caption="Confusion matrix for R Support Vector Machine predictions"} +``` -Deep Learning (R-project) -------------------------- +### Deep Learning The validation showed that the DL model generated in R has an accuracy of 59%, a sensitivity of 89% and a specificity of 30%. The confusion matrix of the model, normalised to 8080 instances, is provided in Table 3. -Table 3: Confusion matrix of the DL model (R-project) +```{.table file="tables/R-DL.csv" caption="Confusion matrix for R Deep Learning predictions"} +``` - Predicted genotoxicity - ----------------------- ------------------------ ---------- ---------- ------------- - Measured genotoxicity ***PP*** ***PN*** ***Total*** - ***TP*** 3575 435 4010 - ***TN*** 2853 1217 4070 - ***Total*** 6428 1652 8080 +```{.table file="tables/r-summary.csv" caption="Summary of R model validations"} +``` -PP: Predicted positive; PN: Predicted negative, TP: True positive, TN: -True negative - -DL model (TensorFlow) ---------------------- +TensorFlow Models +----------------- The validation showed that the DL model generated in TensorFlow has an accuracy of 68%, a sensitivity of 70% and a specificity of 46%. The confusion matrix of the model, normalised to 8080 instances, is provided in Table 4. -Table 4: Confusion matrix of the DL model (TensorFlow) - - Predicted genotoxicity - ----------------------- ------------------------ ---------- ---------- ------------- - Measured genotoxicity ***PP*** ***PN*** ***Total*** - ***TP*** 2851 1227 4078 - ***TN*** 1825 2177 4002 - ***Total*** 4676 3404 8080 - -PP: Predicted positive; PN: Predicted negative, TP: True positive, TN: -True negative - -The ROC curves from the 6-fold validation are shown in Figure 7. +```{.table file="tables/tensorflow-all.csv" caption="Confusion matrix for Tensorflow predictions without variable selecetion"} +``` -![](figures/image7.png){width="3.825in" -height="2.7327045056867894in"} +```{.table file="tables/tensorflow-selected.csv" caption="Confusion matrix for Tensorflow predictions with variable selecetion"} +``` -Figure 7: Six-fold cross-validation of TensorFlow DL model show an -average area under the ROC-curve (ROC-AUC; measure of accuracy) of 68%. +```{.table file="tables/tf-summary.csv" caption="Summary of TensorFlow model validations"} +``` -In summary, the validation results of the four methods are presented in -the following table. +`lazar` Models +-------------- -Table 5 Results of the cross-validation of the four models and after -y-randomisation +### MolPrint2D Descriptors - ---------------------------------------------------------------------- - Accuracy CCR Sensitivity Specificity - ----------------------- ---------- ------- ------------- ------------- - RF model 64.1% 64.4% 66.2% 62.6% +```{.table file="tables/lazar-all.csv" caption="Confusion matrix for lazar predictions with MolPrint2D descriptors"} +``` - SVM model 62.1% 62.6% 65.0% 60.3% +```{.table file="tables/lazar-high-confidence.csv" caption="Confusion matrix for high confidence lazar predictions with MolPrint2D descriptors"} +``` - DL model\ 59.3% 59.5% 89.2% 29.9% - (R-project) +### PaDEL Descriptors - DL model (TensorFlow) 68% 62.2% 69.9% 45.6% +```{.table file="tables/lazar-padel-all.csv" caption="Confusion matrix for lazar predictions with PaDEL descriptors"} +``` - y-randomisation 50.5% 50.4% 50.3% 50.6% - ---------------------------------------------------------------------- +```{.table file="tables/lazar-padel-high-confidence.csv" caption="Confusion matrix for high confidence lazar predictions with PaDEL descriptors"} +``` -CCR (correct classification rate) +```{.table file="tables/lazar-summary.csv" caption="Summary of lazar model validations"} +``` Discussion ========== diff --git a/scripts/confusion-matrix2table.rb b/scripts/confusion-matrix2table.rb new file mode 100755 index 0000000..ccb4817 --- /dev/null +++ b/scripts/confusion-matrix2table.rb @@ -0,0 +1,10 @@ +#!/usr/bin/env ruby + +mat = [] +File.readlines(ARGV[0]).each do |l| + mat << l.chomp.split(",") +end +puts ",,Predictions," +puts ",,mutagenic,non-mutagenic" +puts "Measurements,mutagenic,#{mat[0][0]},#{mat[0][1]}" +puts ",non-mutagenic,#{mat[1][0]},#{mat[1][1]}" diff --git a/scripts/cv-tensorflow-confusion-matrix.rb b/scripts/cv-tensorflow-confusion-matrix.rb index 067519b..2b0ee58 100755 --- a/scripts/cv-tensorflow-confusion-matrix.rb +++ b/scripts/cv-tensorflow-confusion-matrix.rb @@ -7,7 +7,7 @@ tn = 0 fn = 0 pred = CSV.read(ARGV[0],headers: true,:col_sep => ",") -act = CSV.read(File.join(File.dirname(ARGV[0]),"GenoTox-database.csv"),headers: true,:col_sep => ",") +act = CSV.read(File.join("data","GenoTox-database.csv"),headers: true,:col_sep => ",") pred.each_with_index do |row,i| diff --git a/scripts/results.rb b/scripts/results.rb new file mode 100755 index 0000000..1a36278 --- /dev/null +++ b/scripts/results.rb @@ -0,0 +1,36 @@ +#!/usr/bin/env ruby +require 'json' + +result = {} +ARGV.each do |f| + fname = File.basename(f,".json") + program,algo = fname.split('-') + case program + when "tensorflow" + algo == "all" ? algo = "without feature selection" : algo = "with feature selection" + when "lazar" + algo = "high-confidence" if algo == "high" + if algo == "padel" + algo = "PaDEL" + fname.match("high") ? algo += " high-confidence" : algo += " all" + end + end + result[program] ||= {} + result[program][algo] = JSON.parse(File.read(f)).collect{|k,v| [k,v.round(2)]}.to_h +end + +out = {:programs => []} +result.keys.each do |prog| + out[:programs] << {:name => prog, :algos => []} + result[prog].keys.each do |algo| + r = result[prog][algo].dup + result[prog][algo].each do |k,v| + r[k+"_perc"] = (v*100).round + end + r[:name] = algo + r[:abbrev] = prog+"-"+algo + out[:programs].last[:algos] << r + end +end + +puts out.to_json diff --git a/scripts/summaries2table.rb b/scripts/summaries2table.rb new file mode 100755 index 0000000..5470b26 --- /dev/null +++ b/scripts/summaries2table.rb @@ -0,0 +1,19 @@ +#!/usr/bin/env ruby +require 'json' + +results = {} + +ARGV.each do |f| + results[File.basename(f,".json")] = JSON.parse(File.read(f)) +end + +print "," +puts results.keys.collect{|k| k.sub("tensorflow","TF")}.join(",") +["accuracy","true_positive_rate","true_negative_rate","positive_predictive_value","negative_predictive_value"].each do |m| + line = [m.gsub("_"," ")] + results.each do |k,v| + line << v[m].round(2) + end + puts line.join(",") +end + |