From c94ac24d68c137e93d11f0a7d7621ab0b2e808d7 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 9 Aug 2018 20:00:01 +0200
Subject: merged validation

---
 Makefile                                |  11 +-
 experiments/weighted_majority_merged.id |   1 +
 scripts/repeated_crossvalidation.rb     |   8 +-
 summaries/weighted_majority_merged.json | 182 ++++++++++++++++++++++++++++++++
 4 files changed, 197 insertions(+), 5 deletions(-)
 create mode 100644 experiments/weighted_majority_merged.id
 mode change 100644 => 100755 scripts/repeated_crossvalidation.rb
 create mode 100644 summaries/weighted_majority_merged.json

diff --git a/Makefile b/Makefile
index 1bbcb57..be4fa18 100644
--- a/Makefile
+++ b/Makefile
@@ -9,8 +9,17 @@ data  = data/kazius.csv data/efsa.csv data/hansen.csv
 #all: $(data)
 # experiments
 
+summaries/weighted_majority_kazius.json: experiments/weighted_majority_kazius.id
+	scripts/repeated_crossvalidation_summary.rb $< > $@
+
+experiments/weighted_majority_kazius.id: data/kazius.csv
+	scripts/repeated_crossvalidation.rb $< > $@
+
+summaries/weighted_majority_merged.json: experiments/weighted_majority_merged.id
+	scripts/repeated_crossvalidation_summary.rb $< > $@
+
 experiments/weighted_majority_merged.id: data/merged.csv
-	scripts/repeated_crossvalidation.rb data/merged.csv > $@
+	scripts/repeated_crossvalidation.rb $< > $@
 	
 # training data
 
diff --git a/experiments/weighted_majority_merged.id b/experiments/weighted_majority_merged.id
new file mode 100644
index 0000000..6e09c92
--- /dev/null
+++ b/experiments/weighted_majority_merged.id
@@ -0,0 +1 @@
+5b6c55beca626902ea7561ee
diff --git a/scripts/repeated_crossvalidation.rb b/scripts/repeated_crossvalidation.rb
old mode 100644
new mode 100755
index 9783c03..8631245
--- a/scripts/repeated_crossvalidation.rb
+++ b/scripts/repeated_crossvalidation.rb
@@ -2,9 +2,9 @@
 require_relative '../../lazar/lib/lazar'
 include OpenTox
 
-file = ARGV[0]
-dataset = Dataset.from_csv_file file
-model = Model::LazarRegression.create(training_dataset: dataset)
+dataset = Dataset.from_csv_file ARGV[0]
 
-rcv = Validation::RepeatedCrossValidation.create model, :repeats => 5
+model = Model::LazarClassification.create(training_dataset: dataset)
+
+rcv = Validation::RepeatedCrossValidation.create(model)
 puts "#{rcv.id}"
diff --git a/summaries/weighted_majority_merged.json b/summaries/weighted_majority_merged.json
new file mode 100644
index 0000000..810dbca
--- /dev/null
+++ b/summaries/weighted_majority_merged.json
@@ -0,0 +1,182 @@
+[
+  {
+    "accept_values": [
+      "1",
+      "0"
+    ],
+    "confusion_matrix": [
+      [
+        3373,
+        1316
+      ],
+      [
+        669,
+        2828
+      ]
+    ],
+    "weighted_confusion_matrix": [
+      [
+        1822.433712652638,
+        560.6063864210176
+      ],
+      [
+        242.65797492287484,
+        1318.160515235499
+      ]
+    ],
+    "accuracy": 0.7575128267774248,
+    "weighted_accuracy": 0.7963252628942994,
+    "true_rate": {
+      "1": 0.7193431435274046,
+      "0": 0.808693165570489
+    },
+    "predictivity": {
+      "1": 0.8344878772884711,
+      "0": 0.6824324324324325
+    }
+  },
+  {
+    "accept_values": [
+      "1",
+      "0"
+    ],
+    "confusion_matrix": [
+      [
+        3386,
+        1307
+      ],
+      [
+        658,
+        2838
+      ]
+    ],
+    "weighted_confusion_matrix": [
+      [
+        1824.264631237564,
+        559.2324128586179
+      ],
+      [
+        241.24201179451697,
+        1309.7465905628842
+      ]
+    ],
+    "accuracy": 0.7600439614116498,
+    "weighted_accuracy": 0.7965491562093621,
+    "true_rate": {
+      "1": 0.7215001065416577,
+      "0": 0.8117848970251716
+    },
+    "predictivity": {
+      "1": 0.8372898120672602,
+      "0": 0.684680337756333
+    }
+  },
+  {
+    "accept_values": [
+      "1",
+      "0"
+    ],
+    "confusion_matrix": [
+      [
+        3381,
+        1314
+      ],
+      [
+        663,
+        2828
+      ]
+    ],
+    "weighted_confusion_matrix": [
+      [
+        1814.3112605631964,
+        561.4771018317434
+      ],
+      [
+        242.7105117609447,
+        1310.7038716089296
+      ]
+    ],
+    "accuracy": 0.7584901050574151,
+    "weighted_accuracy": 0.7953305885120077,
+    "true_rate": {
+      "1": 0.7201277955271566,
+      "0": 0.8100830707533658
+    },
+    "predictivity": {
+      "1": 0.836053412462908,
+      "0": 0.6827619507484307
+    }
+  },
+  {
+    "accept_values": [
+      "1",
+      "0"
+    ],
+    "confusion_matrix": [
+      [
+        3405,
+        1328
+      ],
+      [
+        638,
+        2816
+      ]
+    ],
+    "weighted_confusion_matrix": [
+      [
+        1822.5516779697866,
+        564.4178168671694
+      ],
+      [
+        235.65618795776263,
+        1310.0938586478562
+      ]
+    ],
+    "accuracy": 0.7598631977525345,
+    "weighted_accuracy": 0.7965596080793866,
+    "true_rate": {
+      "1": 0.7194168603422776,
+      "0": 0.8152866242038217
+    },
+    "predictivity": {
+      "1": 0.842196388820183,
+      "0": 0.6795366795366795
+    }
+  },
+  {
+    "accept_values": [
+      "1",
+      "0"
+    ],
+    "confusion_matrix": [
+      [
+        3370,
+        1316
+      ],
+      [
+        673,
+        2827
+      ]
+    ],
+    "weighted_confusion_matrix": [
+      [
+        1809.4488473160145,
+        560.9971312333264
+      ],
+      [
+        247.69910247012456,
+        1306.990335940227
+      ]
+    ],
+    "accuracy": 0.7570241876374297,
+    "weighted_accuracy": 0.7939698512797181,
+    "true_rate": {
+      "1": 0.7191634656423389,
+      "0": 0.8077142857142857
+    },
+    "predictivity": {
+      "1": 0.8335394509027949,
+      "0": 0.6823557808351436
+    }
+  }
+]
-- 
cgit v1.2.3