summaryrefslogtreecommitdiff
path: root/create-training.rb
diff options
context:
space:
mode:
Diffstat (limited to 'create-training.rb')
-rw-r--r--create-training.rb34
1 files changed, 34 insertions, 0 deletions
diff --git a/create-training.rb b/create-training.rb
new file mode 100644
index 0000000..acba25b
--- /dev/null
+++ b/create-training.rb
@@ -0,0 +1,34 @@
+require_relative 'include.rb'
+
+old = Dataset.from_csv_file File.join(DATA,"mazzatorta.csv")
+new = Dataset.from_csv_file File.join(DATA,"swiss.csv")
+
+common_compound_ids = (old.compound_ids + new.compound_ids).uniq
+
+data = []
+common_compound_ids.each do |cid|
+ c = Compound.find cid
+ old_values = old.values(c,old.features.first)
+ new_values = new.values(c,new.features.first)
+ identical = old_values & new_values
+ unless identical.empty?
+ old_values -= identical
+ new_values -= identical
+ end
+ identical.each do |v|
+ data << [c.smiles,v,"mazzatorta, swiss"]
+ end
+ old_values.each do |v|
+ data << [c.smiles,v,"mazzatorta"]
+ end
+ new_values.each do |v|
+ data << [c.smiles,v,"swiss"]
+ end
+end
+
+data.sort!{|a,b| a[1] <=> b[1]}
+
+CSV.open(File.join(DATA,"training.csv"),"w+") do |csv|
+ csv << ["SMILES","LOAEL","Dataset"]
+ data.each{|r| csv << r}
+end