diff options
author | Christoph Helma <helma@in-silico.ch> | 2021-03-08 17:41:26 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2021-03-08 17:41:26 +0100 |
commit | 08e5768e9a446db8ab95152d2e9403a0e635ec63 (patch) | |
tree | 6f4486c6bfd84b69febcb9d3a4d9de8fee1b1a26 /bin/crossvalidation-folds.rb | |
parent | a29eb3e38414cd252850c9c4fb356f8b2bef6fb4 (diff) |
cdk predictions fixed
Diffstat (limited to 'bin/crossvalidation-folds.rb')
-rwxr-xr-x | bin/crossvalidation-folds.rb | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/bin/crossvalidation-folds.rb b/bin/crossvalidation-folds.rb new file mode 100755 index 0000000..0c765f7 --- /dev/null +++ b/bin/crossvalidation-folds.rb @@ -0,0 +1,54 @@ +#!/usr/bin/env ruby +require_relative "../lib/lazar" +model = Model.new ARGV[0] +ARGV[1] ? folds = ARGV[1].to_i : folds = 10 +nr_instances = model.train.size +indices = (0..nr_instances-1).to_a.shuffle +mid = (nr_instances/folds) +start = 0 +0.upto(folds-1) do |i| + fork do + # split train data + puts "Creating fold #{i}" + last = start+mid + last = last-1 unless nr_instances%folds > i + test_idxs = indices[start..last] || [] + idxs = { + :train => indices-test_idxs, + :test => test_idxs + } + start = last+1 + # write training/test data + cv_dir = File.join(File.dirname(ARGV[0]),"crossvalidation",i.to_s) + idxs.each do |t,idx| + file = File.join(cv_dir,t.to_s+".csv") + `mkdir -p #{File.dirname file}` + case t + when :train + File.open(file,"w+") do |f| + f.puts (["Canonical SMILES",model.dependent_variable_name] + model.independent_variable_names).join(",") + idx.collect{|i| model.train[i]}.each do |t| + f.puts t.join(",") + end + end + when :test + File.open(file,"w+") do |f| + f.puts (["Canonical SMILES"] + model.independent_variable_names).join(",") + idx.collect{|i| model.train[i]}.each do |t| + t.delete_at(1) + f.puts t.join(",") + end + end + file = File.join(cv_dir,t.to_s+"-experimental.csv") + File.open(file,"w+") do |f| + f.puts (["Canonical SMILES", model.dependent_variable_name]).join(",") + idx.collect{|i| model.train[i]}.each do |t| + # TODO fix + f.puts t[0..1].join(",") + end + end + end + end + Process.waitall + end +end |