From c94ac24d68c137e93d11f0a7d7621ab0b2e808d7 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 9 Aug 2018 20:00:01 +0200 Subject: merged validation --- Makefile | 11 +- experiments/weighted_majority_merged.id | 1 + scripts/repeated_crossvalidation.rb | 8 +- summaries/weighted_majority_merged.json | 182 ++++++++++++++++++++++++++++++++ 4 files changed, 197 insertions(+), 5 deletions(-) create mode 100644 experiments/weighted_majority_merged.id mode change 100644 => 100755 scripts/repeated_crossvalidation.rb create mode 100644 summaries/weighted_majority_merged.json diff --git a/Makefile b/Makefile index 1bbcb57..be4fa18 100644 --- a/Makefile +++ b/Makefile @@ -9,8 +9,17 @@ data = data/kazius.csv data/efsa.csv data/hansen.csv #all: $(data) # experiments +summaries/weighted_majority_kazius.json: experiments/weighted_majority_kazius.id + scripts/repeated_crossvalidation_summary.rb $< > $@ + +experiments/weighted_majority_kazius.id: data/kazius.csv + scripts/repeated_crossvalidation.rb $< > $@ + +summaries/weighted_majority_merged.json: experiments/weighted_majority_merged.id + scripts/repeated_crossvalidation_summary.rb $< > $@ + experiments/weighted_majority_merged.id: data/merged.csv - scripts/repeated_crossvalidation.rb data/merged.csv > $@ + scripts/repeated_crossvalidation.rb $< > $@ # training data diff --git a/experiments/weighted_majority_merged.id b/experiments/weighted_majority_merged.id new file mode 100644 index 0000000..6e09c92 --- /dev/null +++ b/experiments/weighted_majority_merged.id @@ -0,0 +1 @@ +5b6c55beca626902ea7561ee diff --git a/scripts/repeated_crossvalidation.rb b/scripts/repeated_crossvalidation.rb old mode 100644 new mode 100755 index 9783c03..8631245 --- a/scripts/repeated_crossvalidation.rb +++ b/scripts/repeated_crossvalidation.rb @@ -2,9 +2,9 @@ require_relative '../../lazar/lib/lazar' include OpenTox -file = ARGV[0] -dataset = Dataset.from_csv_file file -model = Model::LazarRegression.create(training_dataset: dataset) +dataset = Dataset.from_csv_file ARGV[0] -rcv = Validation::RepeatedCrossValidation.create model, :repeats => 5 +model = Model::LazarClassification.create(training_dataset: dataset) + +rcv = Validation::RepeatedCrossValidation.create(model) puts "#{rcv.id}" diff --git a/summaries/weighted_majority_merged.json b/summaries/weighted_majority_merged.json new file mode 100644 index 0000000..810dbca --- /dev/null +++ b/summaries/weighted_majority_merged.json @@ -0,0 +1,182 @@ +[ + { + "accept_values": [ + "1", + "0" + ], + "confusion_matrix": [ + [ + 3373, + 1316 + ], + [ + 669, + 2828 + ] + ], + "weighted_confusion_matrix": [ + [ + 1822.433712652638, + 560.6063864210176 + ], + [ + 242.65797492287484, + 1318.160515235499 + ] + ], + "accuracy": 0.7575128267774248, + "weighted_accuracy": 0.7963252628942994, + "true_rate": { + "1": 0.7193431435274046, + "0": 0.808693165570489 + }, + "predictivity": { + "1": 0.8344878772884711, + "0": 0.6824324324324325 + } + }, + { + "accept_values": [ + "1", + "0" + ], + "confusion_matrix": [ + [ + 3386, + 1307 + ], + [ + 658, + 2838 + ] + ], + "weighted_confusion_matrix": [ + [ + 1824.264631237564, + 559.2324128586179 + ], + [ + 241.24201179451697, + 1309.7465905628842 + ] + ], + "accuracy": 0.7600439614116498, + "weighted_accuracy": 0.7965491562093621, + "true_rate": { + "1": 0.7215001065416577, + "0": 0.8117848970251716 + }, + "predictivity": { + "1": 0.8372898120672602, + "0": 0.684680337756333 + } + }, + { + "accept_values": [ + "1", + "0" + ], + "confusion_matrix": [ + [ + 3381, + 1314 + ], + [ + 663, + 2828 + ] + ], + "weighted_confusion_matrix": [ + [ + 1814.3112605631964, + 561.4771018317434 + ], + [ + 242.7105117609447, + 1310.7038716089296 + ] + ], + "accuracy": 0.7584901050574151, + "weighted_accuracy": 0.7953305885120077, + "true_rate": { + "1": 0.7201277955271566, + "0": 0.8100830707533658 + }, + "predictivity": { + "1": 0.836053412462908, + "0": 0.6827619507484307 + } + }, + { + "accept_values": [ + "1", + "0" + ], + "confusion_matrix": [ + [ + 3405, + 1328 + ], + [ + 638, + 2816 + ] + ], + "weighted_confusion_matrix": [ + [ + 1822.5516779697866, + 564.4178168671694 + ], + [ + 235.65618795776263, + 1310.0938586478562 + ] + ], + "accuracy": 0.7598631977525345, + "weighted_accuracy": 0.7965596080793866, + "true_rate": { + "1": 0.7194168603422776, + "0": 0.8152866242038217 + }, + "predictivity": { + "1": 0.842196388820183, + "0": 0.6795366795366795 + } + }, + { + "accept_values": [ + "1", + "0" + ], + "confusion_matrix": [ + [ + 3370, + 1316 + ], + [ + 673, + 2827 + ] + ], + "weighted_confusion_matrix": [ + [ + 1809.4488473160145, + 560.9971312333264 + ], + [ + 247.69910247012456, + 1306.990335940227 + ] + ], + "accuracy": 0.7570241876374297, + "weighted_accuracy": 0.7939698512797181, + "true_rate": { + "1": 0.7191634656423389, + "0": 0.8077142857142857 + }, + "predictivity": { + "1": 0.8335394509027949, + "0": 0.6823557808351436 + } + } +] -- cgit v1.2.3