#!/usr/bin/env ruby require_relative "../lib/lazar" model = Model.new ARGV[0] ARGV[1] ? folds = ARGV[1].to_i : folds = 10 nr_instances = model.train.size indices = (0..nr_instances-1).to_a.shuffle mid = (nr_instances/folds) start = 0 0.upto(folds-1) do |i| fork do # split train data puts "Creating fold #{i}" last = start+mid last = last-1 unless nr_instances%folds > i test_idxs = indices[start..last] || [] idxs = { :train => indices-test_idxs, :test => test_idxs } p idxs start = last+1 # write training/test data cv_dir = File.join(File.dirname(ARGV[0]),"crossvalidation",i.to_s) idxs.each do |t,idx| file = File.join(cv_dir,t.to_s+".csv") `mkdir -p #{File.dirname file}` case t when :train File.open(file,"w+") do |f| f.puts (["Canonical SMILES",model.dependent_variable_name] + model.independent_variable_names).join(",") idx.collect{|i| model.train[i]}.each do |t| f.puts t.join(",") end end when :test File.open(file,"w+") do |f| f.puts (["Canonical SMILES"] + model.independent_variable_names).join(",") idx.collect{|i| model.train[i]}.each do |t| t.delete_at(1) f.puts t.join(",") end end end end Process.waitall end end