summaryrefslogtreecommitdiff
path: root/bin/crossvalidation-folds.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2021-03-08 17:41:26 +0100
committerChristoph Helma <helma@in-silico.ch>2021-03-08 17:41:26 +0100
commit08e5768e9a446db8ab95152d2e9403a0e635ec63 (patch)
tree6f4486c6bfd84b69febcb9d3a4d9de8fee1b1a26 /bin/crossvalidation-folds.rb
parenta29eb3e38414cd252850c9c4fb356f8b2bef6fb4 (diff)
cdk predictions fixed
Diffstat (limited to 'bin/crossvalidation-folds.rb')
-rwxr-xr-xbin/crossvalidation-folds.rb54
1 files changed, 54 insertions, 0 deletions
diff --git a/bin/crossvalidation-folds.rb b/bin/crossvalidation-folds.rb
new file mode 100755
index 0000000..0c765f7
--- /dev/null
+++ b/bin/crossvalidation-folds.rb
@@ -0,0 +1,54 @@
+#!/usr/bin/env ruby
+require_relative "../lib/lazar"
+model = Model.new ARGV[0]
+ARGV[1] ? folds = ARGV[1].to_i : folds = 10
+nr_instances = model.train.size
+indices = (0..nr_instances-1).to_a.shuffle
+mid = (nr_instances/folds)
+start = 0
+0.upto(folds-1) do |i|
+ fork do
+ # split train data
+ puts "Creating fold #{i}"
+ last = start+mid
+ last = last-1 unless nr_instances%folds > i
+ test_idxs = indices[start..last] || []
+ idxs = {
+ :train => indices-test_idxs,
+ :test => test_idxs
+ }
+ start = last+1
+ # write training/test data
+ cv_dir = File.join(File.dirname(ARGV[0]),"crossvalidation",i.to_s)
+ idxs.each do |t,idx|
+ file = File.join(cv_dir,t.to_s+".csv")
+ `mkdir -p #{File.dirname file}`
+ case t
+ when :train
+ File.open(file,"w+") do |f|
+ f.puts (["Canonical SMILES",model.dependent_variable_name] + model.independent_variable_names).join(",")
+ idx.collect{|i| model.train[i]}.each do |t|
+ f.puts t.join(",")
+ end
+ end
+ when :test
+ File.open(file,"w+") do |f|
+ f.puts (["Canonical SMILES"] + model.independent_variable_names).join(",")
+ idx.collect{|i| model.train[i]}.each do |t|
+ t.delete_at(1)
+ f.puts t.join(",")
+ end
+ end
+ file = File.join(cv_dir,t.to_s+"-experimental.csv")
+ File.open(file,"w+") do |f|
+ f.puts (["Canonical SMILES", model.dependent_variable_name]).join(",")
+ idx.collect{|i| model.train[i]}.each do |t|
+ # TODO fix
+ f.puts t[0..1].join(",")
+ end
+ end
+ end
+ end
+ Process.waitall
+ end
+end