From 61a0b59a491bbb26192050a1049e7f80998f125c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 12 Sep 2017 15:46:46 +0200 Subject: initial commit --- lib/kazius-alerts.rb | 109 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/kazius-alerts.rb~ | 99 ++++++++++++++++++++++++++++++++++++++++++++ test/kazius-alerts.rb | 17 ++++++++ test/kazius-alerts.rb~ | 15 +++++++ 4 files changed, 240 insertions(+) create mode 100644 lib/kazius-alerts.rb create mode 100644 lib/kazius-alerts.rb~ create mode 100644 test/kazius-alerts.rb create mode 100644 test/kazius-alerts.rb~ diff --git a/lib/kazius-alerts.rb b/lib/kazius-alerts.rb new file mode 100644 index 0000000..2068b34 --- /dev/null +++ b/lib/kazius-alerts.rb @@ -0,0 +1,109 @@ +re 'openbabel' + +class KaziusAlerts + + SMARTS = [ # name, smarts, excluded smarts + ["specific arom nitro", 'O=N(~O)a', 'O=N(O)c[$(aS(=O)=O),$(aaS(=O)=O),$(aaaS(=O)=O),$(aC((F)F)F),$(aaC((F)F)F),$(aaaC((F)F)F)]'], + ["specific arom amine", 'a[NH2]', '[NH2]a[$(a[$(C((F)F)F),$(S(=O)=O),$(C(=O)O)]),$(aa[$(C((F)F)F),$(S(=O)=O),$(C(=O)O)]),$(aaa[$(C((F)F)F),$(S(=O)=O),$(C(=O)O)])]'], + ["aromatic nitroso", 'a[N;X2]=O'], + ["alkyl nitrite", 'CO[N;X2]=O'], + ["nitrosamine", 'N[N;X2]=O'], + ["epoxide", 'O1[c,C]-[c,C]1'], + ["aziridine", 'C1NC1'], + ["azide", 'N=[N+]=[N-]'], + ["diazo", 'C=[N+]=[N-]'], + ["triazene", 'N=N-N'], + ["aromatic azo", 'c[N;X2]!@;=[N,X2]c', '[$([N;X2]([$(acS((=O)=O)),$(aacS((=O)=O)),$(aaacS((=O)=O)),$(aaaacS((=O)=O))])=[N;X2][$(acS((=O)=O)),$(aacS((=O)=O)),$(aaacS((=O)=O)),$(aaaacS((=O)=O))])]'], + ["unsubstituted heteroatom-bonded heteroatom", '[OH,NH2][N,O]', 'O=N(O)[O-]'], + ["aromatic", '[OH]Na'], + ["aliphatic halide", '[Cl,Br,I]C'], + ["carboxylic acid halide", '[Cl,Br,I]C=O'], + ["nitrogen or sulphur mustard", '[N,S]!@[C;X4]!@[CH2][Cl,Br,I]'], + ["bay region in PAHs", '[cH]1[cH]ccc2c1c3c(cc2)cc[cH][cH]3'], + ["k-region in PAHs", '[cH]1cccc2c1[cH][cH]c3c2ccc[cH]3'], + ["polycyclic aromatic system", '[$(a13~a~a~a~a2~a1~a(~a~a~a3)~a~a~a2),$(a1~a~a~a2~a1~a~a3~a(~a2)~a~a~a3),$(a1~a~a~a2~a1~a~a~a3~a2~a~a~a3),$(a1~a~a~a~a2~a1~a3~a(~a2)~a~a~a~a3),$(a1~a~a~a~a2~a1~a~a3~a(~a2)~a~a~a3),$(a1~a~a~a~a2~a1~a~a3~a(~a2)~a~a~a~a3),$(a1~a~a~a~a2~a1~a~a~a3~a2~a~a~a3),$(a1~a~a~a~a2~a1~a~a~a3~a2~a~a~a~a3),$(a13~a~a~a~a2~a1~a(~a~a~a3)~a~a2)]'], # smarts error of original smarts fixed + ["sulphonate bonded carbon", '[$([C,c]OS((=O)=O)O!@[c,C]),$([c,C]S((=O)=O)O!@[c,C])]'], + ["aliphatic N-nitro", 'O=N(~O)N'], + ["alpha,beta unsaturated aldehyde", '[$(O=[CH]C=C),$(O=[CH]C=O)]', '[$(O=[CH]C([N,O,S])=C),$(O=[CH]C=C[N,O,S]),$(O=[CH]C=Ca)]'], + ["diazonium", '[N;v4]#N'], + ["beta-propriolactone", 'O=C1CCO1'], + ["alpha,beta unsaturated alkoxy group", '[CH]=[CH]O'], + ["1-aryl-2-monoalkyl hydrazine", '[NH;!R][NH;!R]a'], + ["aromatic methylamine", '[CH3][NH]a', '[CH3][NH]a[$(a[$(C((F)F)F),$(S=O),$(C(=O)O)]),$(aa[$(C((F)F)F),$(S=O),$(C(=O)O)]),$(aaa[$(C((F)F)F),$(S=O),$(C(=O)O)])]'], + ["ester derivative of aromatic hydroxylamine", 'aN([$([OH]),$(O*=O)])[$([#1]),$(C(=O)[CH3]),$([CH3]),$([OH]),$(O*=O)]'], + ["polycyclic planar system", '[$([X2,X3]13~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3 +](~[X2,X3]~[X2,X3]~[X2,X3]3)~[X2,X3]~[X2,X3]~[X2,X3]2),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]2~[X2, +X3]1~[X2,X3]~[X2,X3]3~[X2,X3](~[X2,X3]2)~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]~[X2,X3]~[ +X2,X3]2~[X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]3~[X2,X3]2~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3] +~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]3~[X2,X3](~[X2,X3]2)~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]~[X2,X3]3~[X2,X3](~[X2,X3]2)~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]~[X2,X3]3~[X2,X3](~[X2,X3]2)~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]3~[X2,X3]2~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]3~[X2,X3]2~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]13~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3](~[X2,X3]~[X2,X3]~[X2,X3]3)~[X2,X3]~[X2,X3]2)]', '[S]1-*~*-[N,O,S]-*~*-1'] # smarts error of original smarts fixed + ] + + CONFIDENCES = { + "specific arom nitro" => 0.81, + "specific arom amine" => 0.79, + "nitrosamine" => 0.90, + "epoxide" => 0.85, + "aliphatic halide" => 0.79, + "polycyclic aromatic system" => 0.90, + "other SAs" => 0.81 + } + + def self.predict smiles + smi2mol = OpenBabel::OBConversion.new + smi2mol.set_in_format("smi") + mol = OpenBabel::OBMol.new + smi2mol.read_string(mol, smiles) + + matches = [] + prediction = false + error_product = 1 + + smarts_pattern = OpenBabel::OBSmartsPattern.new + SMARTS.each do |sma| + if sma[2] + smarts_pattern.init sma[1] + if smarts_pattern.match(mol) + smarts_pattern.init sma[2] + matches << sma if !smarts_pattern.match(mol) + end + else + smarts_pattern.init sma[1] + matches << sma if smarts_pattern.match(mol) + end + + matches.each { |m| error_product *= error(m) } + + prediction = true if matches.size > 0 + {:prediction => prediction, :error_product => error_product, :matches => matches} + end + + def self.error(alert) + if CONFIDENCES[alert[0]] + return 1 - CONFIDENCES[alert[0]] + else + return 1 - CONFIDENCES["other SAs"] + end + end + +end + +class ConsensusMutagenicity + + def self.predict smiles + sa_prediction = KaziusAlerts.predict smiles + lazar_prediction = Lazar.predict smiles + confidence = 0 + if sa_prediction[:prediction] == false && lazar_mutagenicity.prediction == 0 + confidence = 0.85 + elsif sa_prediction[:prediction] == true && lazar_mutagenicity.prediction == 1 + confidence = 0.85 * ( 1 - sa_prediction[:error_product] ) + elsif sa_prediction[:prediction] == false && lazar_mutagenicity.prediction == 1 + confidence = 0.11 + elsif sa_prediction[:prediction] == true && lazar_mutagenicity.prediction == 0 + confidence = ( 1 - sa_prediction[:error_product] ) - 0.57 + end + {:prediction => prediction, :confidence => confidence} + end + +end + diff --git a/lib/kazius-alerts.rb~ b/lib/kazius-alerts.rb~ new file mode 100644 index 0000000..ccc888c --- /dev/null +++ b/lib/kazius-alerts.rb~ @@ -0,0 +1,99 @@ +require 'openbabel' + +class KaziusAlerts + + SMARTS = [ # name, smarts, excluded smarts + ["specific arom nitro", 'O=N(~O)a', 'O=N(O)c[$(aS(=O)=O),$(aaS(=O)=O),$(aaaS(=O)=O),$(aC((F)F)F),$(aaC((F)F)F),$(aaaC((F)F)F)]'], + ["specific arom amine", 'a[NH2]', '[NH2]a[$(a[$(C((F)F)F),$(S(=O)=O),$(C(=O)O)]),$(aa[$(C((F)F)F),$(S(=O)=O),$(C(=O)O)]),$(aaa[$(C((F)F)F),$(S(=O)=O),$(C(=O)O)])]'], + ["aromatic nitroso", 'a[N;X2]=O'], + ["alkyl nitrite", 'CO[N;X2]=O'], + ["nitrosamine", 'N[N;X2]=O'], + ["epoxide", 'O1[c,C]-[c,C]1'], + ["aziridine", 'C1NC1'], + ["azide", 'N=[N+]=[N-]'], + ["diazo", 'C=[N+]=[N-]'], + ["triazene", 'N=N-N'], + ["aromatic azo", 'c[N;X2]!@;=[N,X2]c', '[$([N;X2]([$(acS((=O)=O)),$(aacS((=O)=O)),$(aaacS((=O)=O)),$(aaaacS((=O)=O))])=[N;X2][$(acS((=O)=O)),$(aacS((=O)=O)),$(aaacS((=O)=O)),$(aaaacS((=O)=O))])]'], + ["unsubstituted heteroatom-bonded heteroatom", '[OH,NH2][N,O]', 'O=N(O)[O-]'], + ["aromatic", '[OH]Na'], + ["aliphatic halide", '[Cl,Br,I]C'], + ["carboxylic acid halide", '[Cl,Br,I]C=O'], + ["nitrogen or sulphur mustard", '[N,S]!@[C;X4]!@[CH2][Cl,Br,I]'], + ["bay region in PAHs", '[cH]1[cH]ccc2c1c3c(cc2)cc[cH][cH]3'], + ["k-region in PAHs", '[cH]1cccc2c1[cH][cH]c3c2ccc[cH]3'], + ["polycyclic aromatic system", '[$(a13~a~a~a~a2~a1~a(~a~a~a3)~a~a~a2),$(a1~a~a~a2~a1~a~a3~a(~a2)~a~a~a3),$(a1~a~a~a2~a1~a~a~a3~a2~a~a~a3),$(a1~a~a~a~a2~a1~a3~a(~a2)~a~a~a~a3),$(a1~a~a~a~a2~a1~a~a3~a(~a2)~a~a~a3),$(a1~a~a~a~a2~a1~a~a3~a(~a2)~a~a~a~a3),$(a1~a~a~a~a2~a1~a~a~a3~a2~a~a~a3),$(a1~a~a~a~a2~a1~a~a~a3~a2~a~a~a~a3),$(a13~a~a~a~a2~a1~a(~a~a~a3)~a~a2)]'], # smarts error of original smarts fixed + ["sulphonate bonded carbon", '[$([C,c]OS((=O)=O)O!@[c,C]),$([c,C]S((=O)=O)O!@[c,C])]'], + ["aliphatic N-nitro", 'O=N(~O)N'], + ["alpha,beta unsaturated aldehyde", '[$(O=[CH]C=C),$(O=[CH]C=O)]', '[$(O=[CH]C([N,O,S])=C),$(O=[CH]C=C[N,O,S]),$(O=[CH]C=Ca)]'], + ["diazonium", '[N;v4]#N'], + ["beta-propriolactone", 'O=C1CCO1'], + ["alpha,beta unsaturated alkoxy group", '[CH]=[CH]O'], + ["1-aryl-2-monoalkyl hydrazine", '[NH;!R][NH;!R]a'], + ["aromatic methylamine", '[CH3][NH]a', '[CH3][NH]a[$(a[$(C((F)F)F),$(S=O),$(C(=O)O)]),$(aa[$(C((F)F)F),$(S=O),$(C(=O)O)]),$(aaa[$(C((F)F)F),$(S=O),$(C(=O)O)])]'], + ["ester derivative of aromatic hydroxylamine", 'aN([$([OH]),$(O*=O)])[$([#1]),$(C(=O)[CH3]),$([CH3]),$([OH]),$(O*=O)]'], + ["polycyclic planar system", '[$([X2,X3]13~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3](~[X2,X3]~[X2,X3]~[X2,X3]3)~[X2,X3]~[X2,X3]~[X2,X3]2),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]~[X2,X3]3~[X2,X3](~[X2,X3]2)~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]3~[X2,X3]2~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]3~[X2,X3](~[X2,X3]2)~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]~[X2,X3]3~[X2,X3](~[X2,X3]2)~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]~[X2,X3]3~[X2,X3](~[X2,X3]2)~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]3~[X2,X3]2~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3]~[X2,X3]~[X2,X3]3~[X2,X3]2~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]3),$([X2,X3]13~[X2,X3]~[X2,X3]~[X2,X3]~[X2,X3]2~[X2,X3]1~[X2,X3](~[X2,X3]~[X2,X3]~[X2,X3]3)~[X2,X3]~[X2,X3]2)]', '[S]1-*~*-[N,O,S]-*~*-1'] # smarts error of original smarts fixed + ] + + CONFIDENCES = { + "specific arom nitro" => 0.81, + "specific arom amine" => 0.79, + "nitrosamine" => 0.90, + "epoxide" => 0.85, + "aliphatic halide" => 0.79, + "polycyclic aromatic system" => 0.90, + "other SAs" => 0.81 + } + + def self.predict smiles + smi2mol = OBConversion.new + smi2mol.set_in_format("smi") + mol = OBMol.new + smi2mol.read_string(mol, smiles) + + matches = [] + prediction = false + error_product = 1 + + SMARTS.each do |sma| + if sma[2] + matches << sma if mol.matches_smarts?(sma[1]) && !mol.matches_smarts?(sma[2]) + else + matches << sma if mol.matches_smarts?(sma[1]) + end + end + + matches.each { |m| error_product *= error(m) } + + prediction = true if matches.size > 0 + {:prediction => prediction, :error_product => error_product, :matches => matches} + end + + def self.error(alert) + if CONFIDENCES[alert[0]] + return 1 - CONFIDENCES[alert[0]] + else + return 1 - CONFIDENCES["other SAs"] + end + end + +end + +class ConsensusMutagenicity + + def self.predict smiles + sa_prediction = KaziusAlerts.predict smiles + lazar_prediction = Lazar.predict smiles + confidence = 0 + if sa_prediction[:prediction] == false && lazar_mutagenicity.prediction == 0 + confidence = 0.85 + elsif sa_prediction[:prediction] == true && lazar_mutagenicity.prediction == 1 + confidence = 0.85 * ( 1 - sa_prediction[:error_product] ) + elsif sa_prediction[:prediction] == false && lazar_mutagenicity.prediction == 1 + confidence = 0.11 + elsif sa_prediction[:prediction] == true && lazar_mutagenicity.prediction == 0 + confidence = ( 1 - sa_prediction[:error_product] ) - 0.57 + end + {:prediction => prediction, :confidence => confidence} + end + +end diff --git a/test/kazius-alerts.rb b/test/kazius-alerts.rb new file mode 100644 index 0000000..ac2f098 --- /dev/null +++ b/test/kazius-alerts.rb @@ -0,0 +1,17 @@ +require 'minitest/autorun' +#require 'kazius-alerts' +require_relative '../lib/kazius-alerts.rb' + +class KaziusAlertsTest < MiniTest::Test + + def test_alerts_size + assert_equal 29, KaziusAlerts::SMARTS.size + end + + def test_kazius_alerts_prediction + prediction = KaziusAlerts.predict("c1ccccc1NN") + assert prediction[:prediction] + assert_equal [["unsubstituted heteroatom-bonded heteroatom", "[OH,NH2][N,O]", "O=N(O)[O-]"]], prediction[:matches] + end + +end diff --git a/test/kazius-alerts.rb~ b/test/kazius-alerts.rb~ new file mode 100644 index 0000000..e0e0acf --- /dev/null +++ b/test/kazius-alerts.rb~ @@ -0,0 +1,15 @@ +require 'minitest/autorun' +#require 'kazius-alerts' +require_relative '../lib/kazius-alerts.rb' + +class KaziusAlertsTest < MiniTest::Test + + def test_alerts_size + assert_equal 29, KaziusAlerts::SMARTS.size + end + + def test_kazius_alerts_prediction + assert_equal 29, KaziusAlerts::SMARTS.size + end + +end -- cgit v1.2.3