summaryrefslogtreecommitdiff
path: root/scripts/data.rb
blob: d24e46bc1eee043845fb5ae27ab0410c6381f5f0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env ruby
require 'yaml'

data = {}
["crossvalidations","pyrrolizidine-alkaloids"].each do |dir|
  data.merge!(YAML.load_file(File.join(dir,"summary.yaml")))
end

mut = {}
File.readlines("mutagenicity/mutagenicity.csv").each do |line|
  smi, m = line.chomp.split(",")
  mut[smi] ||= []
  mut[smi] << m
end

data[:cv][:n_mult] = mut.select{|s,m| m.size > 1}.size

data[:cv][:n] = `cut -f1 -d ',' mutagenicity/mutagenicity.csv | wc -l`.chomp.to_i - 1
data[:cv][:n_uniq] = `cut -f1 -d ',' mutagenicity/mutagenicity.csv | sort -u | wc -l`.chomp.to_i - 1

data[:cv][:cdk] = {}
cdk = File.readlines("mutagenicity/mutagenicity-cdk.csv")
data[:cv][:cdk][:n_descriptors] = cdk.shift.split(",").size-1
data[:cv][:cdk][:n_compounds] = cdk.size

data[:pa][:groups] = {}
lines = File.readlines("pyrrolizidine-alkaloids/pa-groups.csv")
pa_groups = lines.shift.chomp.split(",")[1..-1].collect{|g| g.sub(/[ -]/,"_").to_sym}
pa_groups.each {|g| data[:pa][:groups][g] = {}; data[:pa][:groups][g][:n] = 0}
groups = {}
lines.each do |l|
  items = l.chomp.split(",")
  smi = items[0]
  items[1..-1].each_with_index do |v,i|
    data[:pa][:groups][pa_groups[i]][:n] += v.to_i
    groups[pa_groups[i]] ||= []
    groups[pa_groups[i]] << smi if v == "1"
  end
end
lines = File.readlines("pyrrolizidine-alkaloids/pa-predictions.csv")
algos = lines.shift.chomp.split(",")[1..-1].collect{|g| g.sub(/[ -]/,"_").to_sym}
lines.each do |l|
  items = l.chomp.split(",")
  smi = items[0]
  items[1..-1].each do |v|
    groups.each do |group,smiles|
      data[:pa][:groups][group][:mut] ||= 0
      data[:pa][:groups][group][:non_mut] ||= 0
      if smiles.include? smi
        if v == "1"
          data[:pa][:groups][group][:mut] += 1
        elsif v == "0"
          data[:pa][:groups][group][:non_mut] += 1
        end
      end
    end
  end
end
data[:pa][:groups].each do |g,values|
  data[:pa][:groups][g][:n_pred] = values[:mut]+values[:non_mut]
  data[:pa][:groups][g][:mut_perc] = (100*values[:mut]/data[:pa][:groups][g][:n_pred]).round
  data[:pa][:groups][g][:non_mut_perc] = (100*values[:non_mut]/data[:pa][:groups][g][:n_pred]).round
end
puts data.to_yaml