summaryrefslogtreecommitdiff
path: root/scripts/tsne-cdk-descriptors.rb
blob: a994c29ddfc17e9f5d577cca5450a9e083e52821 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/usr/bin/env ruby
train = File.readlines(ARGV[0])
pa = File.readlines(ARGV[1])
train_header = train.shift.chomp.split(",").collect{|i| i.gsub('"','')}
pa_header = pa.shift.chomp.split(",")
train_header.shift
pa_header.shift

common = train_header & pa_header

train.each do |line|
  items = line.chomp.split ","
  smi = items.shift
  descriptors = {}
  items.each_with_index {|item,i| descriptors[train_header[i]] = item.to_f }
  puts ([smi]+common.collect{|h| descriptors[h]}).join(",")
end


pa.each do |line|
  items = line.chomp.split ","
  smi = items.shift
  descriptors = {}
  items.each_with_index {|item,i| descriptors[pa_header[i]] = item.to_f }
  puts ([smi]+common.collect{|h| descriptors[h]}).join(",")
end