diff options
author | Christoph Helma <helma@in-silico.ch> | 2016-03-02 11:20:26 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2016-03-02 11:20:26 +0100 |
commit | d3071896a7116670756199f0df7c2a618de2aea3 (patch) | |
tree | 2cf71d47232c08da2973452950e1f969c733478a /create-training.rb | |
parent | 7424234dbf1d7ebdb7a15adaec71c8b6fb53890f (diff) |
repeated crossvalidations
Diffstat (limited to 'create-training.rb')
-rw-r--r-- | create-training.rb | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/create-training.rb b/create-training.rb new file mode 100644 index 0000000..acba25b --- /dev/null +++ b/create-training.rb @@ -0,0 +1,34 @@ +require_relative 'include.rb' + +old = Dataset.from_csv_file File.join(DATA,"mazzatorta.csv") +new = Dataset.from_csv_file File.join(DATA,"swiss.csv") + +common_compound_ids = (old.compound_ids + new.compound_ids).uniq + +data = [] +common_compound_ids.each do |cid| + c = Compound.find cid + old_values = old.values(c,old.features.first) + new_values = new.values(c,new.features.first) + identical = old_values & new_values + unless identical.empty? + old_values -= identical + new_values -= identical + end + identical.each do |v| + data << [c.smiles,v,"mazzatorta, swiss"] + end + old_values.each do |v| + data << [c.smiles,v,"mazzatorta"] + end + new_values.each do |v| + data << [c.smiles,v,"swiss"] + end +end + +data.sort!{|a,b| a[1] <=> b[1]} + +CSV.open(File.join(DATA,"training.csv"),"w+") do |csv| + csv << ["SMILES","LOAEL","Dataset"] + data.each{|r| csv << r} +end |