diff options
Diffstat (limited to 'unique-smiles.rb')
-rw-r--r-- | unique-smiles.rb | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/unique-smiles.rb b/unique-smiles.rb new file mode 100644 index 0000000..d35ce90 --- /dev/null +++ b/unique-smiles.rb @@ -0,0 +1,19 @@ +require_relative "include.rb" + +input = Dataset.from_csv_file File.join(ARGV[0]) +outname = File.join(File.dirname(ARGV[0]),"#{ARGV[1]}.csv") + +data = [] +input.compounds.each_with_index do |cid,i| + c = Compound.find cid + # round to 5 significant digits in order to detect duplicates + v = input.data_entries[i].first.signif(5) + data << [c.smiles,v,ARGV[1]] +end + +data.sort!{|a,b| a[1] <=> b[1]} + +CSV.open(outname,"w+") do |csv| + csv << ["SMILES","LOAEL","Dataset"] + data.each{|r| csv << r} +end |