summaryrefslogtreecommitdiff
path: root/lib/kazius-alerts.rb
blob: 2068b3434e2d9ce4d882b63f9b9ef92c6b637501 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
re 'openbabel'

class KaziusAlerts

  SMARTS = [ # name, smarts, excluded smarts
    ["specific arom nitro", 'O=N(~O)a', 'O=N(O)c[$(aS(=O)=O),$(aaS(=O)=O),$(aaaS(=O)=O),$(aC((F)F)F),$(aaC((F)F)F),$(aaaC((F)F)F)]'],
    ["specific arom amine", 'a[NH2]', '[NH2]a[$(a[$(C((F)F)F),$(S(=O)=O),$(C(=O)O)]),$(aa[$(C((F)F)F),$(S(=O)=O),$(C(=O)O)]),$(aaa[$(C((F)F)F),$(S(=O)=O),$(C(=O)O)])]'],
    ["aromatic nitroso", 'a[N;X2]=O'],
    ["alkyl nitrite", 'CO[N;X2]=O'],
    ["nitrosamine", 'N[N;X2]=O'],
    ["epoxide", 'O1[c,C]-[c,C]1'],
    ["aziridine", 'C1NC1'],
    ["azide", 'N=[N+]=[N-]'],
    ["diazo", 'C=[N+]=[N-]'],
    ["triazene", 'N=N-N'],
    ["aromatic azo", 'c[N;X2]!@;=[N,X2]c', '[$([N;X2]([$(acS((=O)=O)),$(aacS((=O)=O)),$(aaacS((=O)=O)),$(aaaacS((=O)=O))])=[N;X2][$(acS((=O)=O)),$(aacS((=O)=O)),$(aaacS((=O)=O)),$(aaaacS((=O)=O))])]'],
    ["unsubstituted heteroatom-bonded heteroatom", '[OH,NH2][N,O]', 'O=N(O)[O-]'],
    ["aromatic", '[OH]Na'],
    ["aliphatic halide", '[Cl,Br,I]C'],
    ["carboxylic acid halide", '[Cl,Br,I]C=O'],
    ["nitrogen or sulphur mustard", '[N,S]!@[C;X4]!@[CH2][Cl,Br,I]'],
    ["bay region in PAHs", '[cH]1[cH]ccc2c1c3c(cc2)cc[cH][cH]3'],
    ["k-region in PAHs", '[cH]1cccc2c1[cH][cH]c3c2ccc[cH]3'],
    ["polycyclic aromatic system", '[$(a13~a~a~a~a2~a1~a(~a~a~a3)~a~a~a2),$(a1~a~a~a2~a1~a~a3~a(~a2)~a~a~a3),$(a1~a~a~a2~a1~a~a~a3~a2~a~a~a3),$(a1~a~a~a~a2~a1~a3~a(~a2)~a~a~a~a3),$(a1~a~a~a~a2~a1~a~a3~a(~a2)~a~a~a3),$(a1~a~a~a~a2~a1~a~a3~a(~a2)~a~a~a~a3),$(a1~a~a~a~a2~a1~a~a~a3~a2~a~a~a3),$(a1~a~a~a~a2~a1~a~a~a3~a2~a~a~a~a3),$(a13~a~a~a~a2~a1~a(~a~a~a3)~a~a2)]'], # smarts error of original smarts fixed
    ["sulphonate bonded carbon", '[$([C,c]OS((=O)=O)O!@[c,C]),$([c,C]S((=O)=O)O!@[c,C])]'],
    ["aliphatic N-nitro", 'O=N(~O)N'],
    ["alpha,beta unsaturated aldehyde", '[$(O=[CH]C=C),$(O=[CH]C=O)]', '[$(O=[CH]C([N,O,S])=C),$(O=[CH]C=C[N,O,S]),$(O=[CH]C=Ca)]'],
    ["diazonium", '[N;v4]#N'],
    ["beta-propriolactone", 'O=C1CCO1'],
    ["alpha,beta unsaturated alkoxy group", '[CH]=[CH]O'],
    ["1-aryl-2-monoalkyl hydrazine", '[NH;!R][NH;!R]a'],
    ["aromatic methylamine", '[CH3][NH]a', '[CH3][NH]a[$(a[$(C((F)F)F),$(S=O),$(C(=O)O)]),$(aa[$(C((F)F)F),$(S=O),$(C(=O)O)]),$(aaa[$(C((F)F)F),$(S=O),$(C(=O)O)])]'],
    ["ester derivative of aromatic hydroxylamine", 'aN([$([OH]),$(O*=O)])[$([#1]),$(C(=O)[CH3]),$([CH3]),$([OH]),$(O*=O)]'],
    ["polycyclic planar system", '[$([X2,X3]13~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3
](~[X2,X3]~[X2,X3]~[X2,X3]3)~[X2,X3]~[X2,X3]~[X2,X3]2),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,
X3]1~[X2,X3]~[X2,X3]3~[X2,X3](~[X2,X3]2)~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]~[X2,X3]~[
X2,X3]2~[X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]3~[X2,X3]2~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]
~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]3~[X2,X3](~[X2,X3]2)~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]~[X2,X3]3~[X2,X3](~[X2,X3]2)~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]~[X2,X3]3~[X2,X3](~[X2,X3]2)~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]3~[X2,X3]2~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]3~[X2,X3]2~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]13~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3](~[X2,X3]~[X2,X3]~[X2,X3]3)~[X2,X3]~[X2,X3]2)]', '[S]1-*~*-[N,O,S]-*~*-1'] # smarts error of original smarts fixed
  ]

  CONFIDENCES = {
    "specific arom nitro" => 0.81,
    "specific arom amine" => 0.79,
    "nitrosamine" => 0.90,
    "epoxide" => 0.85,
    "aliphatic halide" => 0.79,
    "polycyclic aromatic system" => 0.90,
    "other SAs" => 0.81
  }

  def self.predict smiles
    smi2mol = OpenBabel::OBConversion.new
    smi2mol.set_in_format("smi")
    mol = OpenBabel::OBMol.new
    smi2mol.read_string(mol, smiles)

    matches = []
    prediction = false
    error_product = 1

    smarts_pattern = OpenBabel::OBSmartsPattern.new
    SMARTS.each do |sma|
      if sma[2]
        smarts_pattern.init sma[1]
        if smarts_pattern.match(mol)
          smarts_pattern.init sma[2]
          matches << sma if !smarts_pattern.match(mol)
        end
      else
        smarts_pattern.init sma[1]
        matches << sma if smarts_pattern.match(mol)
      end

    matches.each { |m| error_product *= error(m) }

    prediction = true if matches.size > 0
    {:prediction => prediction, :error_product => error_product, :matches => matches}
  end

  def self.error(alert)
    if CONFIDENCES[alert[0]]
      return 1 - CONFIDENCES[alert[0]]
    else
      return 1 - CONFIDENCES["other SAs"]
    end
  end

end

class ConsensusMutagenicity

  def self.predict smiles
    sa_prediction = KaziusAlerts.predict smiles
    lazar_prediction = Lazar.predict smiles
    confidence = 0
    if sa_prediction[:prediction] == false && lazar_mutagenicity.prediction == 0
      confidence = 0.85
    elsif sa_prediction[:prediction] == true && lazar_mutagenicity.prediction == 1
      confidence = 0.85 * ( 1 - sa_prediction[:error_product] )
    elsif sa_prediction[:prediction] == false && lazar_mutagenicity.prediction == 1
      confidence = 0.11
    elsif sa_prediction[:prediction] == true && lazar_mutagenicity.prediction == 0
      confidence = ( 1 - sa_prediction[:error_product] ) - 0.57
    end
    {:prediction => prediction, :confidence => confidence}
  end

end