From 08e5768e9a446db8ab95152d2e9403a0e635ec63 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 8 Mar 2021 17:41:26 +0100 Subject: cdk predictions fixed --- bin/crossvalidation-folds.rb | 54 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100755 bin/crossvalidation-folds.rb (limited to 'bin/crossvalidation-folds.rb') diff --git a/bin/crossvalidation-folds.rb b/bin/crossvalidation-folds.rb new file mode 100755 index 0000000..0c765f7 --- /dev/null +++ b/bin/crossvalidation-folds.rb @@ -0,0 +1,54 @@ +#!/usr/bin/env ruby +require_relative "../lib/lazar" +model = Model.new ARGV[0] +ARGV[1] ? folds = ARGV[1].to_i : folds = 10 +nr_instances = model.train.size +indices = (0..nr_instances-1).to_a.shuffle +mid = (nr_instances/folds) +start = 0 +0.upto(folds-1) do |i| + fork do + # split train data + puts "Creating fold #{i}" + last = start+mid + last = last-1 unless nr_instances%folds > i + test_idxs = indices[start..last] || [] + idxs = { + :train => indices-test_idxs, + :test => test_idxs + } + start = last+1 + # write training/test data + cv_dir = File.join(File.dirname(ARGV[0]),"crossvalidation",i.to_s) + idxs.each do |t,idx| + file = File.join(cv_dir,t.to_s+".csv") + `mkdir -p #{File.dirname file}` + case t + when :train + File.open(file,"w+") do |f| + f.puts (["Canonical SMILES",model.dependent_variable_name] + model.independent_variable_names).join(",") + idx.collect{|i| model.train[i]}.each do |t| + f.puts t.join(",") + end + end + when :test + File.open(file,"w+") do |f| + f.puts (["Canonical SMILES"] + model.independent_variable_names).join(",") + idx.collect{|i| model.train[i]}.each do |t| + t.delete_at(1) + f.puts t.join(",") + end + end + file = File.join(cv_dir,t.to_s+"-experimental.csv") + File.open(file,"w+") do |f| + f.puts (["Canonical SMILES", model.dependent_variable_name]).join(",") + idx.collect{|i| model.train[i]}.each do |t| + # TODO fix + f.puts t[0..1].join(",") + end + end + end + end + Process.waitall + end +end -- cgit v1.2.3