diff options
Diffstat (limited to 'scripts/create-training.rb')
-rwxr-xr-x | scripts/create-training.rb | 21 |
1 files changed, 9 insertions, 12 deletions
diff --git a/scripts/create-training.rb b/scripts/create-training.rb index d05bc1c..8fca3f4 100755 --- a/scripts/create-training.rb +++ b/scripts/create-training.rb @@ -2,14 +2,14 @@ require_relative '../../lazar/lib/lazar' include OpenTox -old = Dataset.from_csv_file File.join(DATA,"mazzatorta.csv") -new = Dataset.from_csv_file File.join(DATA,"swiss.csv") +old = Dataset.from_csv_file File.join("data","mazzatorta_log10.csv") +new = Dataset.from_csv_file File.join("data","swiss_log10.csv") -common_compound_ids = (old.compound_ids + new.compound_ids).uniq +common_compounds = (old.compounds + new.compounds).uniq +puts ["SMILES","-log10(LOAEL)","Dataset"].join "," data = [] -common_compound_ids.each do |cid| - c = Compound.find cid +common_compounds.each do |c| old_values = old.values(c,old.features.first) new_values = new.values(c,new.features.first) identical = old_values & new_values @@ -18,19 +18,16 @@ common_compound_ids.each do |cid| new_values -= identical end identical.each do |v| - data << [c.smiles,v,"mazzatorta, swiss"] + data << [c.smiles,v,"mazzatorta and swiss"] if v end old_values.each do |v| - data << [c.smiles,v,"mazzatorta"] + data << [c.smiles,v,"mazzatorta"] if v end new_values.each do |v| - data << [c.smiles,v,"swiss"] + data << [c.smiles,v,"swiss"] if v end end data.sort!{|a,b| a[1] <=> b[1]} -CSV.open(File.join(DATA,"training.csv"),"w+") do |csv| - csv << ["SMILES","LOAEL","Dataset"] - data.each{|r| csv << r} -end +puts data.collect{|r| r.join ","}.join "\n" |