summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2019-06-20 22:01:50 +0200
committerChristoph Helma <helma@in-silico.ch>2019-06-20 22:01:50 +0200
commit455da06aa6459da0d25b286ca6cb866ff64c4c34 (patch)
treed0ed8fcf720a02742da781669251f379b8fd07f0
parent1b44e0cd76f2ead93b8b3fa0f970c85ef32a4b14 (diff)
separate csv serialisations for batch predictions and training data, repeated measurements in mutagenicity dataset fixed, daphnia import fixed, CENTRAL_MONGO_IP removed
-rw-r--r--data/Mutagenicity-Salmonella_typhimurium.csv96
-rw-r--r--ext/lazar/extconf.rb2
-rw-r--r--ext/lazar/rinstall.R22
-rw-r--r--lib/dataset.rb75
-rw-r--r--lib/download.rb3
-rw-r--r--lib/feature.rb11
-rw-r--r--lib/lazar.rb9
-rw-r--r--lib/model.rb8
-rw-r--r--lib/validation-statistics.rb4
-rw-r--r--test/classification-model.rb10
-rw-r--r--test/dataset.rb18
-rw-r--r--test/regression-model.rb17
12 files changed, 184 insertions, 91 deletions
diff --git a/data/Mutagenicity-Salmonella_typhimurium.csv b/data/Mutagenicity-Salmonella_typhimurium.csv
index 0694a94..331de54 100644
--- a/data/Mutagenicity-Salmonella_typhimurium.csv
+++ b/data/Mutagenicity-Salmonella_typhimurium.csv
@@ -696,7 +696,8 @@ Clc1cc(Cl)c(c(c1)S(=O)c1cc(Cl)cc(c1O)Cl)O,mutagenic
O=C1C=C(C(=O)C(=C1)C1=CC(=O)C=C(C1=O)C(C)(C)C)C(C)(C)C,mutagenic
OC(=O)Cc1ccccc1Cl,non-mutagenic
C=CCCC(C=O)CC,non-mutagenic
-Nc1cnn(c(=O)c1Cl)c1ccccc1,non-mutagenic mutagenic
+Nc1cnn(c(=O)c1Cl)c1ccccc1,non-mutagenic
+Nc1cnn(c(=O)c1Cl)c1ccccc1,mutagenic
COc1cccc2c1C(=O)c1c(C2=O)c(O)c2c(c1O)C(OC1CC(N)C(C(O1)C)O)CC(C2)(O)C(=O)C,mutagenic
c1ccc2c(-c3ccccc3C3C2N3)c1,mutagenic
c1ccc2c(c1)cc1c3c2[C@H]2O[C@H]2c3cc2c1cccc2,mutagenic
@@ -856,7 +857,8 @@ O[C@@H]1[C@H](O)[C@@H](O)CO[C@H]1N(c1ccc(cc1)[N+](=O)[O-])N=O,mutagenic
OC(=O)C(CC(=O)c1cccc(c1N)O)N,non-mutagenic
C=O,mutagenic
O=NN1CC[C@H](C1)O,mutagenic
-Oc1ccccc1c1ccccc1,non-mutagenic mutagenic
+Oc1ccccc1c1ccccc1,non-mutagenic
+Oc1ccccc1c1ccccc1,mutagenic
CC(=O)C1=C(O)C2N(C1=O)C(C1C2c2c[nH]c3c2c(C1)ccc3)(C)C,mutagenic
[O-][N+](=O)C1=Cc2c3c1cccc3cc1c2c2ccccc2cc1,mutagenic
c1cc2ccc3c4c2c(c1)ccc4nc1c3cccc1,mutagenic
@@ -1501,7 +1503,8 @@ CCCCOC(=O)c1ccc(cc1)O,non-mutagenic
OC1C=Cc2c(C1O)ccc1c2cc2ccccc2c1[N+](=O)[O-],mutagenic
O/N=C(/c1ccccc1)\N,mutagenic
Clc1cc(N)c(c(c1)C(=O)O)Cl,mutagenic
-CCOP(=O)(O/C(=C\Cl)/c1ccc(cc1Cl)Cl)OCC,mutagenic non-mutagenic
+CCOP(=O)(O/C(=C\Cl)/c1ccc(cc1Cl)Cl)OCC,mutagenic
+CCOP(=O)(O/C(=C\Cl)/c1ccc(cc1Cl)Cl)OCC,non-mutagenic
Nc1ccc(cc1)N=Nc1cccc(c1N)N,mutagenic
CC1=C[C@]2(O[C@@H](C1)[C@@H](/C=C/[C@H]1CC[C@]3(O1)CC[C@@H]1[C@@H](O3)[C@H](O)C(=C)[C@H](O1)[C@H](C[C@@H]([C@H]1O[C@@]3(CCCCO3)CC[C@H]1C)C)O)C)O[C@@H](CC[C@@H]2O)C[C@](C(=O)O)(O)C,non-mutagenic
OC(=O)C(Oc1ccc(cc1)[C@@H]1CC1(Cl)Cl)(C)C,non-mutagenic
@@ -1532,7 +1535,8 @@ ClCC(Cl)(Cl)Cl,non-mutagenic
O=C(c1csc(c1)[N+](=O)[O-])Nc1ccccc1[N+](=O)[O-],mutagenic
CC(=C)C=O,mutagenic
c1ccc2c(c1)cc1c(c2)c2Oc2c2c1cccc2,mutagenic
-Clc1cc(Cl)cc(c1Oc1ccc(cc1)[N+](=O)[O-])Cl,mutagenic non-mutagenic
+Clc1cc(Cl)cc(c1Oc1ccc(cc1)[N+](=O)[O-])Cl,mutagenic
+Clc1cc(Cl)cc(c1Oc1ccc(cc1)[N+](=O)[O-])Cl,non-mutagenic
CC[C@H](c1ccccc1O)C,non-mutagenic
c1ccc(cc1)Cc1ccccc1OCC1CO1,mutagenic
CC[n+]1c2ccccc2nc2c1cccc2,mutagenic
@@ -1708,7 +1712,8 @@ Oc1ccc2c(c1N=Nc1ccc(cc1)S(=O)(=O)O)c(cc(c2)S(=O)(=O)O)S(=O)(=O)O,non-mutagenic
CCCCCC(=O)OC1(CCC2C1(C)CCC1C2CCC2=CC(=O)CCC12)C(=O)C,non-mutagenic
O=NN1CCC[C@@H](C1)O,mutagenic
ClCC(=O)c1ccc(cc1Cl)Cl,mutagenic
-[O-][N+](=O)c1ccc(cc1)CNc1[nH]cnc2-c1ncn2,mutagenic non-mutagenic
+[O-][N+](=O)c1ccc(cc1)CNc1[nH]cnc2-c1ncn2,mutagenic
+[O-][N+](=O)c1ccc(cc1)CNc1[nH]cnc2-c1ncn2,non-mutagenic
Nc1ccc2c3c1-c1ccccc1-c3ccc2,mutagenic
CC(=O)OCc1ccc(cc1)N=Nc1ccc(cc1)COC(=O)C,mutagenic
BrCC(C(OP(=O)(OC(C(CBr)Br)C)OC(C(CBr)Br)C)C)Br,mutagenic
@@ -1757,7 +1762,8 @@ NCCN,mutagenic
Nc1sc2c(n1)C1CCCNC1CC2,non-mutagenic
O[C@@H]([C@@H](C(=O)O)O)C(=O)O,non-mutagenic
OC[C@H]1O[C@@H](Oc2cc(O)cc3c2c(=O)c2c(o3)c(O)ccc2O)[C@@H]([C@H]([C@@H]1O)O)O,mutagenic
-c1scc(n1)c1nc2c([nH]1)cccc2,mutagenic non-mutagenic
+c1scc(n1)c1nc2c([nH]1)cccc2,mutagenic
+c1scc(n1)c1nc2c([nH]1)cccc2,non-mutagenic
CCCCOc1ccc(cc1)CC(=O)NO,mutagenic
[O-][N+](=O)c1nc2c([nH]1)cccc2,mutagenic
Nc1ccc2c(n1)n1cccc(c1n2)C,mutagenic
@@ -2102,7 +2108,8 @@ N#Cc1cc(I)c(c(c1)[N+](=O)[O-])O,non-mutagenic
OCCOc1ccccc1,non-mutagenic
CC(OC(=O)COc1ccc(cc1Cl)Cl)C,non-mutagenic
CCCCCCOC(=O)c1ccccc1,non-mutagenic
-COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)Cl,mutagenic non-mutagenic
+COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)Cl,mutagenic
+COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)Cl,non-mutagenic
Nc1cc(Cl)c(c(c1)Cl)N,mutagenic
CNC(=O)/C=C(/OP(=O)(OC)OC)\C,mutagenic
COC(=C1C(=NC(=C([C@@H]1c1cccc(c1)[N+](=O)[O-])C(=O)OC/C=C/c1ccccc1)C)C)O,non-mutagenic
@@ -2538,7 +2545,8 @@ C=CC(=O)NC(CC(=O)C)(C)C,non-mutagenic
CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,non-mutagenic
CCOP(=O)(SCCN(C(C)C)C(C)C)C,non-mutagenic
COc1cc(CNC(=O)C(Br)C)ccc1O,mutagenic
-CN1CN(C)CSC1=S,mutagenic non-mutagenic
+CN1CN(C)CSC1=S,mutagenic
+CN1CN(C)CSC1=S,non-mutagenic
[O-][N+](=O)c1cccc(c1C)N=[N+](c1cccc(c1C)[N+](=O)[O-])[O-],non-mutagenic
OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,non-mutagenic
C=CCN(CC=C)N=O,mutagenic
@@ -2680,7 +2688,8 @@ ONc1ccc(cc1C(C)(C)C)c1ccccc1,mutagenic
O/N=C/c1ccccn1,non-mutagenic
ClCCCCN(COC(=O)C)N=O,mutagenic
COc1cc2O[C@@H]3[C@H](c2c2c1c1[C@@H](O)C[C@@H](c1c(=O)o2)O)C=CO3,mutagenic
-CNC(=O)Oc1cccc2c1cccc2,mutagenic non-mutagenic
+CNC(=O)Oc1cccc2c1cccc2,mutagenic
+CNC(=O)Oc1cccc2c1cccc2,non-mutagenic
Nc1cc(C)c(c(c1)[N+](=O)[O-])N,mutagenic
ClCc1ccc2c3c1ccc1c3c(cc2)cc2c1cccc2,non-mutagenic
CC(=O)Nc1nc(NC(=O)C)nc(n1)c1ccc(o1)[N+](=O)[O-],mutagenic
@@ -2722,7 +2731,8 @@ Cc1ccc(c(c1)[N+](=O)[O-])C,mutagenic
O=C1C=CC(=O)C=C1c1ccccc1,non-mutagenic
O=C(c1ccccc1)N(OC(=O)C)OCc1ccc(cc1)C(C)(C)C,mutagenic
CC(=O)Nc1scc(n1)c1scc(c1)[N+](=O)[O-],mutagenic
-CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,mutagenic non-mutagenic
+CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,mutagenic
+CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,non-mutagenic
OC(=O)c1ccco1,non-mutagenic
[O-][N+](=O)OOC(=O)C,mutagenic
Cc1ccccc1N=Nc1c(O)ccc2c1cccc2,mutagenic
@@ -3440,7 +3450,8 @@ CCc1ccccc1[N+](=O)[O-],non-mutagenic
ClC(c1ccccc1)C(=O)Cl,mutagenic
OC(=O)Cc1ccc(cc1)N,mutagenic
CCCSC(=O)Cl,mutagenic
-[O-][N+](=O)NC(=N)NC,mutagenic non-mutagenic
+[O-][N+](=O)NC(=N)NC,mutagenic
+[O-][N+](=O)NC(=N)NC,non-mutagenic
Oc1ccc2c(c1N=Nc1ccccc1)ccc(c2)S(=O)(=O)O,non-mutagenic
COC(C1=C(N2CC2)C(=O)C(=C(C1=O)N1CC1)C)COC(=O)N,mutagenic
COCC12OOC2(C)c2c(O1)cccc2,mutagenic
@@ -3634,7 +3645,8 @@ O=NN1CCCCCCCCCCCC1,mutagenic
Nc1cc([N+](=O)[O-])c(c(c1C)C)N,mutagenic
O=NN(Cc1ccc(cc1)C)C,non-mutagenic
O=Nc1cc(ccc1C)[N+](=O)[O-],mutagenic
-Cc1cccc(c1N)C,mutagenic non-mutagenic
+Cc1cccc(c1N)C,mutagenic
+Cc1cccc(c1N)C,non-mutagenic
[O-][N+](=O)c1ccc(c(c1)C)N,mutagenic
Clc1ccc(cc1)C(=O)c1ccc(cc1)OC(C(=O)O)(C)C,non-mutagenic
CC(=O)Nc1ccc(cc1)Oc1ccc(cc1)N,mutagenic
@@ -4066,7 +4078,8 @@ Nc1ccc2c(c1)cns2,mutagenic
Sc1nc2c(s1)cccc2,non-mutagenic
N#CCC[C@](C#N)(CBr)Br,non-mutagenic
COc1ccc(cc1)N=[N+](c1ccc(cc1)OC)[O-],mutagenic
-COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,mutagenic non-mutagenic
+COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,mutagenic
+COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,non-mutagenic
CCOc1ccc(cc1)[N+](=O)[O-],mutagenic
NCCCC[C@@H](C(=O)N1CCC[C@H]1C(=O)O)N[C@H](C(=O)O)CCc1ccccc1,non-mutagenic
Clc1cc(N)c(cc1c1cc(Cl)c(cc1Cl)N)Cl,mutagenic
@@ -4737,7 +4750,8 @@ CCCCOC(=O)c1ccccc1C(=O)OC1CCCCC1,non-mutagenic
CCc1[nH]c2c(n1)c1c(cc2)ccc2c1cc(O)cc2,mutagenic
ClC(=O)c1ccccc1C(=O)Cl,mutagenic
CCc1cccc2c1nccc2,mutagenic
-O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,mutagenic non-mutagenic
+O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,mutagenic
+O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,non-mutagenic
Nc1ccc(c(c1)Cl)C,mutagenic
[O-][N+](=O)c1ccc2c3c1cccc3CC2,mutagenic
Nc1ccc2c(c1)nc1c(c2)ccc(c1)N,mutagenic
@@ -4753,7 +4767,8 @@ Oc1ccc(c(c1)C)Cl,non-mutagenic
CCCCN(CC(=O)CC)N=O,mutagenic
Oc1ccc(cc1)c1ccc(cc1)O,non-mutagenic
c1ccc(cc1)c1ccccc1OCC1CO1,mutagenic
-COc1c(C/C=C(/CCC(=O)O)\C)c(O)c2c(c1C)COC2=O,mutagenic non-mutagenic
+COc1c(C/C=C(/CCC(=O)O)\C)c(O)c2c(c1C)COC2=O,mutagenic
+COc1c(C/C=C(/CCC(=O)O)\C)c(O)c2c(c1C)COC2=O,non-mutagenic
OCc1ccc(cc1)Br,non-mutagenic
OCCN(c1ccc(cc1)N=Nc1cccnc1)CCO,non-mutagenic
ClCCSCC(C(=O)NCC(=O)OC)NC(=O)CCC(C(=O)O)N,mutagenic
@@ -5364,7 +5379,8 @@ Clc1ccc(c(c1)Cl)S(=O)(=O)n1ncc(c(c1=O)Cl)Cl,non-mutagenic
OC(COc1ccc(cc1)NC(=O)C)CNC(C)C,non-mutagenic
O=C(C(=C)C)OCC(COC(=O)C(=C)C)(C)C,non-mutagenic
OCC1OC(OC23C=C(C)C4(C(C3C(=O)C(C2)(C)C)(C)O)CC4)C(C(C1OC(=O)/C=C/c1ccc(cc1)O)O)OC(=O)C,mutagenic
-CO/C(=C\C(=O)O)/C(=O)C(=C)C,mutagenic non-mutagenic
+CO/C(=C\C(=O)O)/C(=O)C(=C)C,mutagenic
+CO/C(=C\C(=O)O)/C(=O)C(=C)C,non-mutagenic
O=C1CCc2c1c1c(cc2)ccc2c1cccc2,non-mutagenic
CCCCOCCCC,non-mutagenic
CCNC(=N)N([N+](=O)[O-])N=O,mutagenic
@@ -5798,7 +5814,8 @@ C=CCOC(=O)c1ccccc1C(=O)OCC=C,non-mutagenic
CCC(=O)Nc1ccc(c(c1)Cl)Cl,non-mutagenic
Cc1cccc2c1c1ccc3c(c1cc2)cccc3,mutagenic
CC(=O)Nc1scc(n1)/C=C\c1ccc(o1)[N+](=O)[O-],mutagenic
-NC(=O)Cc1cccc2c1cccc2,mutagenic non-mutagenic
+NC(=O)Cc1cccc2c1cccc2,mutagenic
+NC(=O)Cc1cccc2c1cccc2,non-mutagenic
[O-][N+](=O)c1ccc(cc1)n1cnc2c1ncnc2N,mutagenic
Cc1cc2n(C)c(nc2c2c1nccn2)N,mutagenic
O=C(N(C)C)Nc1ccc(c(c1)Cl)C,non-mutagenic
@@ -6199,7 +6216,8 @@ Cc1nc(C)cc(c1)c1cc2c(cc1F)n1c(n2C2CC2)cc(=O)n(c1=O)O,mutagenic
ClCCN(c1ccc(cc1)c1[nH]c2c(n1)cc(cc2)CCCCCCc1nc2c([nH]1)ccc(c2)N1CCN(CC1)C)CCCl,non-mutagenic
Cn1c(N)nc2c1cc1ncccc1n2,non-mutagenic
COC(=O)C[C@H]1[C@@]2(C)[C@H](OC3C2=C(C)[C@@H](C3)c2cocc2)[C@H]2C3[C@]1(C)C(=O)C=C[C@@]3(C)C(=O)O2,non-mutagenic
-N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,non-mutagenic mutagenic
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,non-mutagenic
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,mutagenic
OC1CC=Cc2c1cc1ccc3c4c1c2ccc4ccc3,mutagenic
COc1ccc(cc1)C(C=C)O,non-mutagenic
BrCC(=O)N(Cc1ccccc1)C,non-mutagenic
@@ -6423,7 +6441,8 @@ O[C@@H]1C=Cc2c([C@@H]1O)c1cc3ccc(c4c3c(c1cc2)CC4)C,mutagenic
CC(=O)Nc1cccc2c1ncc(c2)F,mutagenic
Nc1ccc(cc1)/C=C/c1cccc(c1)N,mutagenic
CC(OC(=O)/C=C/c1ccc(o1)[N+](=O)[O-])C,mutagenic
-O=C1C=C(O)c2c(C1=O)cccc2,mutagenic non-mutagenic
+O=C1C=C(O)c2c(C1=O)cccc2,mutagenic
+O=C1C=C(O)c2c(C1=O)cccc2,non-mutagenic
Cl/C=C\C[N+]12CN3CN(C2)CN(C1)C3,mutagenic
[O-][N+](=O)c1cc(ccc1C)C(=O)O,mutagenic
ClCCN(c1ccc(cc1)CC(=O)O[C@H]1CC[C@]2([C@H](C1)CC[C@@H]1[C@@H]2CC[C@]2([C@H]1CCC(=O)N2)C)C)CCCl,mutagenic
@@ -7270,7 +7289,7 @@ OC[C@H]1O[C@@H](O[C@@]23C=C(C)C4([C@]([C@@H]3C(=O)C(C2)(C)C)(C)O)CC4)[C@@H]([C@H
Oc1ccc(cc1)/N=N/c1ccc(cc1)c1ccc(cc1)/N=N/c1c(N)c2c(cc1S(=O)(=O)O)cc(c(c2O)/N=N/c1ccccc1)S(=O)(=O)O.[Na+].[Na+],mutagenic
N[C@@H]1CCC[C@H](C1)N,non-mutagenic
O=C1c2ccccc2N/C/1=C\1/Nc2c(C1=O)cccc2,mutagenic
-C1CCC(CC1)N1[C@@H]2[C@H]1c1ccccc1-c1c2cccc1,mutagenic
+C1CCC(CC1)[N@@]1[C@@H]2[C@H]1c1ccccc1-c1c2cccc1,mutagenic
Cl[C@@H](C=C)CCl,non-mutagenic
COc1ccccc1N.Cl,mutagenic
BrC[C@H]1CN(C)[C@H]2[C@H](C1)c1cccc3c1c(C2)c[nH]3,mutagenic
@@ -7343,7 +7362,7 @@ O=CCC=O,mutagenic
Cc1nsc(c1)N.Cl,mutagenic
CCCCN1[C@@H]2[C@H]1c1ccccc1-c1c2cccc1,mutagenic
O[C@@H]1CC[C@]2([C@@H](C1)CC[C@@H]1[C@@H]2CC[C@]2([C@H]1CC[C@@H]2[C@@H](CCC(=O)O)C)C)C,non-mutagenic
-CCOC(=O)O[C@H](c1ccnc2c1cc(OC)cc2)[C@@H]1C[C@@H]2CCN1C[C@@H]2C=C,non-mutagenic
+CCOC(=O)O[C@H](c1ccnc2c1cc(OC)cc2)[C@@H]1C[C@@H]2CC[N@]1C[C@@H]2C=C,non-mutagenic
OS(=O)(=O)O.OC[C@@H]1O[C@@H](O[C@H]([C@@H](C(=O)N[C@@H]([C@@H]([C@H](C(=O)N[C@@H](C(=O)NCCc2scc(n2)c2ncc(s2)C(=O)NCCC[S+](C)C)[C@@H](O)C)C)O)C)NC(=O)c2nc(nc(c2C)N)[C@H](CC(=O)N)NC[C@@H](C(=O)N)N)c2nc[nH]c2)[C@H]([C@@H]([C@@H]1O)O)O[C@@H]1O[C@H](CO)[C@H]([C@H]([C@@H]1O)OC(=O)N)O.OC[C@@H]1O[C@@H](O[C@H]([C@H](C(=O)N[C@@H]([C@@H]([C@@H](C(=O)N[C@@H](C(=O)NCCc2scc(n2)c2ncc(s2)C(=O)NCCC[S+](C)C)[C@@H](O)C)C)O)C)NC(=O)c2nc(nc(c2C)N)[C@H](CC(=O)N)NC[C@@H](C(=O)N)N)c2nc[nH]c2)[C@H]([C@@H]([C@H]1O)O)O[C@@H]1O[C@H](CO)[C@H]([C@H]([C@@H]1O)OC(=O)N)O,mutagenic
O[C@@H]1C[C@@]2(C)[C@H]([C@]([C@@H]1O)(C)C(=O)O)CC[C@@]1([C@@H]2CC=C2[C@]1(C)CC[C@]1([C@H]2CC(C)(C)CC1)C(=O)O)C,non-mutagenic
CN(CCN(c1ccccn1)Cc1cscc1)C.Cl,non-mutagenic
@@ -7435,7 +7454,7 @@ Clc1ccc(cc1)O[C@@H](C(=O)C(C)(C)C)n1cncc1,non-mutagenic
OC[C@H]1O[C@H](C[C@H]1O)n1cc(CC)c(=O)[nH]c1=O,non-mutagenic
OC(=O)[C@H](c1ccc(cc1)Oc1nccs1)C,non-mutagenic
CCNCC#CC(OC(=O)[C@@](c1ccccc1)(C1CCCCC1)O)(C)C.Cl,non-mutagenic
-C=C[C@H]1CN2CC[C@H]1C[C@H]2[C@@H](c1ccnc2c1cc(OC)cc2)O.Cl.Cl,non-mutagenic
+C=C[C@H]1C[N@@]2CC[C@H]1C[C@H]2[C@@H](c1ccnc2c1cc(OC)cc2)O.Cl.Cl,non-mutagenic
OCCNc1ccc(cc1)/N=N/c1ccc(cc1)NCCO,mutagenic
CC(=C[C@H]1[C@@H](C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,mutagenic
CC[C@H](OS(=O)(=O)C)C,mutagenic
@@ -7631,6 +7650,7 @@ CC(=O)OCc1ccccc1/N=N/c1ccc(cc1)N(C)C,mutagenic
COc1nsc2c1cccc2OC[C@H]1CO1,mutagenic
CNNC,non-mutagenic
[N-]=[N+]=Nc1ccc(cc1)Nc1c2ccccc2nc2c1cccc2,mutagenic
+N#C[C@@H]1COCC[N@]1[C@H]1C[C@@H](O[C@H]([C@H]1O)C)O[C@H]1C[C@@](O)(Cc2c1c(O)c1c(c2O)C(=O)c2c(C1=O)c(OC)ccc2)C(=O)CO,non-mutagenic
C[C@@H](c1ccccc1)N(C)C,non-mutagenic
OC(=O)c1cn2[C@@H](C)COc3c2c(c1=O)cc(c3C1(N)CC1)F,non-mutagenic
CCc1cccc2c1[nH]c1c2CCO[C@@]1(C)CC,non-mutagenic
@@ -7722,6 +7742,7 @@ Sc1ncnc2c1[nH]cn2.O,mutagenic
SCCC(=O)N1[C@@H](CS[C@H]1c1ccccc1O)C(=O)O,non-mutagenic
COc1cc(ccc1OC)C[C@H]1CO1,mutagenic
CCCCOc1ccc2c(n1)c(NCCCNCCCl)c1c(n2)cc(cc1)Cl.Cl.Cl.O,mutagenic
+CC(CCC[C@H]([C@@H]1CC[C@@H]2[C@]1(C)CC[C@H]1[C@H]2C[C@H]2[C@@]3([C@]1(C)CC[C@@H](C3)OC(=O)C)[N@@]2N1C(=O)c2c(C1=O)cccc2)C)C,mutagenic
OS(=O)(=O)OCc1c2ccccc2c2c3c1ccc1c3c(cc2)ccc1.[Na+],mutagenic
C[n+]1c2ccccc2cc2c1cccc2N.Cl,mutagenic
CCCn1cc2c3c1cccc3[C@@H]1[C@@H](C2)N(C#N)C[C@@H](C1)C,mutagenic
@@ -7843,7 +7864,7 @@ O=[P@@]1(OCc2c(O1)cccc2)Oc1ccccc1,mutagenic
COC(=O)C1=C(CC)[C@@H](OC1=O)C,mutagenic
Fc1ccc(cc1)Cn1c(nc2c1cccc2)N1CCC(CC1)N(c1nccc(=O)[nH]1)C,non-mutagenic
Oc1ccc(cc1)[C@@H]1CC(=O)c2c(O1)cc(cc2O)O,non-mutagenic
-C=C[C@H]1CN2CC[C@H]1C[C@@H]2[C@H](c1ccnc2c1cc(OC)cc2)O,non-mutagenic
+C=C[C@H]1C[N@@]2CC[C@H]1C[C@@H]2[C@H](c1ccnc2c1cc(OC)cc2)O,non-mutagenic
Nc1cc(N)c(cc1/N=N/c1ccccc1C)C,mutagenic
CC[S@@](=O)CCSP(=O)(OC)OC,mutagenic
c1cc2[C@@H]3O[C@@H]3c3c2c(c1)c1cc2ccccc2cc1c3,mutagenic
@@ -7913,13 +7934,14 @@ CNC(=O)Oc1ccc(cc1)c1ccccc1,mutagenic
C[C@@H]1CCC[C@@H](N1CCC[C@](c1ccccn1)(c1ccccc1)O)C.Cl,non-mutagenic
BrC[C@H](CO[P@@](=O)(OC[C@@H](CBr)Br)O)Br,mutagenic
O=Nc1ccc2c(c1)cccc2,mutagenic
+OC[C@H]1O[C@@H](O[C@H]2CC[C@]3(C(=CC[C@@H]4[C@@H]3CC[C@]3([C@H]4C[C@@H]4[C@@H]3[C@H](C)[C@@H]3[N@@]4C[C@H](CC3)C)C)C2)C)[C@H]([C@H]([C@H]1O)O[C@@H]1O[C@@H](CO)[C@H]([C@H]([C@H]1O)O)O)O[C@@H]1O[C@@H](C)[C@@H]([C@H]([C@H]1O)O)O,non-mutagenic
c1cc2ccc3c4c2c(c1)ccc4ccc3,mutagenic
NCC(=O)O.Cl,non-mutagenic
COc1cc(/N=N/c2ccccc2)ccc1N,mutagenic
C=CCOc1ccccc1OC[C@@H](CNC(C)C)O,non-mutagenic
C/C(=N\O)/C(=O)C,non-mutagenic
c1ccc2c(-c3ccccc3[C@@H]3[C@H]2N3)c1,mutagenic
-COc1cc2c(cc1OC)N1[C@@H]3[C@@]42CCN2[C@H]4C[C@@H]4[C@H]3[C@H](CC1=O)OCC=C4C2,non-mutagenic
+COc1cc2c(cc1OC)N1[C@@H]3[C@@]42CC[N@@]2[C@H]4C[C@@H]4[C@H]3[C@H](CC1=O)OCC=C4C2,non-mutagenic
OCc1cc(ccc1O)C(=O)CN(C(C)(C)C)Cc1ccccc1.Cl,mutagenic
Oc1cc(O)c2c(c1)oc(c(c2=O)O)c1ccc(c(c1)O)O.O.O,mutagenic
ClCc1cccc(c1)/N=N/c1ccc(cc1)N(C)C,mutagenic
@@ -7941,7 +7963,7 @@ O=C1CN(CCCN2CC(=O)NC(=O)C2)CC(=O)N1,non-mutagenic
CC(c1ccc(c2-c(c1)c(C)cc2S(=O)(=O)O)C)C.[Na+],non-mutagenic
ClC[C@@]12[C@H](Cl)[C@H]([C@H](C2(CCl)CCl)CC1(Cl)Cl)Cl,non-mutagenic
OC(=O)COc1ccc(cc1Cl)Cl.CC(N)C,non-mutagenic
-C=C[C@@H]1CN2CC[C@H]1C[C@H]2[C@@H](c1ccnc2c1cc(OC)cc2)O.Br,non-mutagenic
+C=C[C@@H]1C[N@@]2CC[C@H]1C[C@H]2[C@@H](c1ccnc2c1cc(OC)cc2)O.Br,non-mutagenic
O[C@H]1[C@H]2O[C@H]2c2c([C@@H]1O)ccc1c2cc2ccccc2c1C,mutagenic
Nc1cc(N)c(cc1/N=N/c1ccc(cc1)c1ccc(cc1)/N=N/c1ccc(c(c1)C(=O)O)O)/N=N/c1ccc(cc1)S(=O)(=O)O,mutagenic
COc1ccc2c(c1)[nH]c1c2CCN=C1C.Cl.O.O,non-mutagenic
@@ -7972,11 +7994,12 @@ C[C@H](C(=O)O)Oc1cc(Cl)c(cc1Cl)Cl,non-mutagenic
[N-]=[N+]=Nc1ccc2c(c1)nc1c(c2Nc2ccc(cc2OC)NS(=O)(=O)C)cccc1,mutagenic
O[C@@H]1[C@@H](O)[C@@H](O[C@@H]1n1ccc(=N)[nH]c1=O)COP(=O)(O)O,non-mutagenic
O=CC1=C[C@@]2(O)CC(C[C@H]2[C@]2([C@]1(C=O)C2)C)(C)C,mutagenic
-C/C=C\1/CC(=C)[C@](O)(CO)C(=O)OCC2=CCN3[C@H]2[C@H](OC1=O)CC3,mutagenic
+C/C=C\1/CC(=C)[C@](O)(CO)C(=O)OCC2=CC[N@@]3[C@H]2[C@H](OC1=O)CC3,mutagenic
CC(=O)O[C@@H]1C(=O)O[C@H]2[C@H]1OC(=O)[C@@H]2OC(=O)C,non-mutagenic
CC(=O)N/N=C/c1c[n+]([O-])c2c([n+]1[O-])cccc2,mutagenic
OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O.[Na+].[Na+].[Na+].[Na+],non-mutagenic
O=C1NC(=O)[C@](N1)(c1ccc(cc1)O)c1ccccc1,non-mutagenic
+C1C[N@@]2CC[N@]1CC2,non-mutagenic
BrC[C@H]([C@H](O[P@](=O)(O[C@@H]([C@@H](CBr)Br)C)O[C@@H]([C@@H](CBr)Br)C)C)Br,mutagenic
Nc1ccc2c(c1)cc1c(c2)cccc1,mutagenic
Br/C=C(\c1ccc(cc1Cl)Cl)/OP(=O)(OC)OC,non-mutagenic
@@ -7987,6 +8010,7 @@ ClCC=CCCl,mutagenic
[O-][N+](=O)c1ccc2c(c1)ccc1c2ccc(c1)[N+](=O)[O-],mutagenic
CCC(c1cc(N2Nc3c(N2)cccc3)c(c(c1)C(CC)(C)C)O)(C)C,non-mutagenic
C[C@]1(OC1)c1ccc(cc1)c1ccccc1,mutagenic
+OC[C@H]1O[C@@H](O[C@H]2CC[C@]3(C(=CC[C@H]4[C@H]3CC[C@]3([C@H]4C[C@@H]4[C@@H]3[C@H](C)[C@H]3[N@]4C[C@@H](CC3)C)C)C2)C)[C@@H]([C@H]([C@@H]1O[C@H]1O[C@@H](C)[C@@H]([C@H]([C@H]1O[C@H]1O[C@@H](C)[C@@H]([C@H]([C@H]1O)O)O)O)O)O)O,non-mutagenic
O[C@@H]1C[C@@]23[C@](C1)(O3)C=CC=C2,non-mutagenic
COc1ccccc1C[C@H]1CO1,mutagenic
ClCC(=O)O[C@H](P(=O)(OC)OC)C(Cl)(Cl)Cl,non-mutagenic
@@ -8041,6 +8065,7 @@ CCCCCCCCCCCC(=O)OCCS(=O)(=O)O.[Na+],non-mutagenic
C[C@@H](C(C)(C)C)O[P@@](=O)(Cl)C,non-mutagenic
OC(=O)/C=C\C(=O)O.COc1ccc(cc1)CN(c1ccccn1)CCN(C)C,non-mutagenic
Oc1ccc(cc1)/C=N/n1nnc2c(c1=O)[nH]c1c2cccc1,mutagenic
+C1[N@@]2C[N@@]3C[N@]1C[N@](C2)C3,mutagenic
CC(=O)[C@@H]1C(=O)C=C2[C@](C1=O)(C)c1c(O)c(C)c(c(c1O2)C(=O)C)O,non-mutagenic
O=C1C(=O)[C@]2(C([C@@H]1CC2)(C)C)C,non-mutagenic
CCC/C=C/C(=O)O[C@@H]1C(C)(C)C[C@@H]2[C@]1(O)C=C(C=O)[C@@]13[C@@]2(C1)C(=O)O[C@@H]3O,mutagenic
@@ -8065,7 +8090,7 @@ CN(c1ccc(cc1)/C(=C\1/C=CC(=[N+](C)C)C=C1)/c1c2ccc(cc2cc(c1O)S(=O)(=O)O)S(=O)(=O)
Cc1cc(ccc1/N=N/c1ccc2c(c1O)c(N)c(cc2S(=O)(=O)O)S(=O)(=O)O)c1ccc(c(c1)C)/N=N/c1ccc2c(c1O)c(N)c(cc2S(=O)(=O)O)S(=O)(=O)O,mutagenic
Nc1cc(cc2c1c(O)c(c(c2)S(=O)(=O)O)/N=N/c1ccccc1)S(=O)(=O)O,non-mutagenic
Cc1ccc(c(c1)C)/N=N/c1c2ccc(cc2cc(c1O)S(=O)(=O)O)S(=O)(=O)O,mutagenic
-Cl/C=C/C[N@+]12CN3CN(C2)CN(C1)C3,mutagenic
+Cl/C=C/C[N@+]12C[N@]3C[N@@](C2)C[N@@](C1)C3,mutagenic
ClCCN(CCCl)CCC[C@H](Nc1c2cc(OC)ccc2nc2c1ccc(c2)Cl)C,mutagenic
Oc1ccc(cc1)/N=N/c1ccc(cc1)c1ccc(cc1)/N=N/c1c(O)c2c(cc1S(=O)(=O)O)cc(c(c2N)/N=N/c1ccc(cc1)[N+](=O)[O-])S(=O)(=O)O,mutagenic
ClCCCN(C)C,mutagenic
@@ -8077,7 +8102,7 @@ Cc1cc(ccc1/N=N/c1c(O)c2c(N)cc(cc2cc1S(=O)(=O)O)S(=O)(=O)O)c1ccc(c(c1)C)/N=N/c1c(
[N-]=[N+]=CC(=O)OC[C@@H](C(=O)O)N,mutagenic
O=c1[nH]ncc2c1cccc2,non-mutagenic
CC(CCC[C@H]([C@@H]1CC[C@@H]2[C@]1(C)CC[C@H]1[C@H]2C[C@H]2[C@@]3([C@]1(C)CC[C@@H](C3)O)O2)C)C,mutagenic
-C/C=C\1/C[C@@H](C)[C@@](C)(O)C(=O)OCC2=CCN3[C@H]2[C@@H](OC1=O)CC3,non-mutagenic
+C/C=C\1/C[C@@H](C)[C@@](C)(O)C(=O)OCC2=CC[N@@]3[C@H]2[C@@H](OC1=O)CC3,non-mutagenic
Nc1[nH]c(=O)c2c(n1)[nH]nn2,non-mutagenic
CCCCCCCCCCCCCCCC(=O)OC[C@@H]([C@@H]1OC(=O)C(=C1O)O)O,non-mutagenic
ClCCN(c1ccc(cc1)C[C@@H](C(=O)O)N)CCCl,mutagenic
@@ -8089,9 +8114,11 @@ CC(CC(=O)O[C@H]1C[C@@]2(OC(=O)C)[C@@H](C=C1C)O[C@@H]1[C@]3([C@@]2(C)[C@H](OC(=O)
CCCCCC[C@@H]([C@H]1C2=C(C[C@@H](CC3=C([C@H]1O)C(=O)OC3=O)[C@H]([C@@H]1CC=CC(=O)O1)O)C(=O)OC2=O)O,non-mutagenic
CC(=O)OC[C@@]12CCC(=C[C@H]1O[C@@H]1[C@@]3([C@@]2(C)[C@@H](OC(=O)C)[C@@H]1O)CO3)C,non-mutagenic
OC[C@@]12[C@H](C=C(C(=O)[C@@H]1O)C)O[C@@H]1[C@@]3([C@@]2(C)[C@H](O)[C@H]1O)OC3,non-mutagenic
+COc1ccc2c(c1)[nH]c1c2CC[N@@]2[C@@H]1C[C@H]1[C@H](C2)C[C@H]([C@@H]([C@H]1C(=O)OC)OC)OC(=O)/C=C/c1cc(OC)c(c(c1)OC)OC,non-mutagenic
ClC[C@@H]1[C@H](CCl)[C@@]2(C([C@@]1(Cl)C(=C2Cl)Cl)(Cl)Cl)Cl,non-mutagenic
Clc1ccc2c(c1)C(=NC=C1N2C=NN1)c1ccccc1,non-mutagenic
C=C[C@@H]1C[C@@H]2C[C@H]1C=C2,non-mutagenic
+O=C1O[C@H]2CC[N@]3[C@@H]2C(=CC3)COC(=O)[C@]([C@]([C@H]1C)(C)O)(C)O,non-mutagenic
O=C(O[C@@H]1C[C@@](O)(C[C@H]([C@H]1O)O)C(=O)O)/C=C/c1ccc(c(c1)O)O,non-mutagenic
Brc1cccc2c1cc1ccc3c(c1c2)cccc3,mutagenic
COc1cc(cc(c1O)OC)[C@@H]1[C@H]2C(=O)OC[C@@H]2[C@@H](c2c1cc1OCOc1c2)O[C@@H]1O[C@@H]2CO[C@H](O[C@H]2[C@@H]([C@H]1O)O)C,non-mutagenic
@@ -8107,6 +8134,7 @@ CN[C@H]1CCc2c(-c3c1cc(=O)c(cc3)OC)c(OC)c(c(c2)OC)OC,non-mutagenic
BrC[C@H]([C@H]([C@@H]([C@@H](CBr)O)O)O)O,mutagenic
Oc1ccc2c(c1)CC[C@@H]1[C@H]2CC[C@]2([C@H]1C[C@H]([C@@H]2O)O)C,non-mutagenic
Sc1ncnc2c1[nH]cn2,mutagenic
+COc1ccc2c(c1)[nH]c1c2CC[N@@]2[C@@H]1C[C@H]1[C@H](C2)C[C@H]([C@@H]([C@H]1C(=O)OC)OC)OC(=O)c1cc(OC)c(c(c1)OC)OC,non-mutagenic
OC[C@H]1O[C@H](C[C@H]1O)n1cc(C)c(=O)[nH]c1=O,non-mutagenic
Fc1c[nH]c(=O)[nH]c1=O,non-mutagenic
C[C@@H]1CC[C@@]2(OC1)O[C@H]1[C@H]([C@@H]2C)[C@@]2([C@@H](C1)[C@@H]1CC=C3[C@]([C@H]1CC2)(C)CC[C@@H](C3)O)C,non-mutagenic
@@ -8135,7 +8163,7 @@ C#C[C@@]1(O)CC[C@@H]2[C@]1(C)CC[C@H]1[C@H]2CCC2=CC(=O)CC[C@H]12,non-mutagenic
CO[C@H]([C@H]1Cc2cc3cc(O[C@H]4C[C@H](O[C@H]5C[C@H](O)[C@H]([C@@H](O5)C)OC)[C@@H]([C@@H](O4)C)OC(=O)C)c(c(c3c(c2C(=O)[C@H]1O[C@H]1C[C@H](O[C@@H]2C[C@H](O[C@@H]3O[C@@H](C)[C@H]([C@@](C3)(C)O)OC(=O)C)[C@@H]([C@@H](O2)C)O)[C@@H]([C@@H](O1)C)O)O)O)C)C(=O)[C@@H]([C@@H](O)C)O,non-mutagenic
O=C1CC[C@]2(C(=C1)[C@@H](C)C[C@@H]1[C@@H]2CC[C@]2([C@H]1CC[C@]2(OC(=O)C)C(=O)C)C)C,non-mutagenic
OC[C@@H]1C[C@H]([C@@H](O1)n1cnc2c1ncnc2N)O,mutagenic
-O[C@H]1CC[C@]2(C(=CC[C@@H]3[C@@H]2CC[C@]2([C@@H]3C[C@@H]3[C@@H]2[C@H](C)[C@@H]2N3C[C@H](CC2)C)C)C1)C,non-mutagenic
+O[C@H]1CC[C@]2(C(=CC[C@@H]3[C@@H]2CC[C@]2([C@@H]3C[C@@H]3[C@@H]2[C@H](C)[C@@H]2[N@@]3C[C@H](CC2)C)C)C1)C,non-mutagenic
O[C@@H]1CC[C@]2([C@@H](C1)C[C@H]([C@@H]1[C@@H]2C[C@H](O)[C@]2([C@H]1CC[C@@H]2[C@@H](CCC(=O)O)C)C)O)C,non-mutagenic
C/C/1=C\CCC(=C)C2C(CC1)C(C2)(C)C,non-mutagenic
OC[C@H]([C@H]1OC(=O)C(=C1O)O)O,non-mutagenic
@@ -8163,7 +8191,7 @@ OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O.[Na+].[Na+],non-mutagenic
NC(=N)c1ccc(cc1)OCCCCCOc1ccc(cc1)C(=N)N.OCCS(=O)(=O)O.OCCS(=O)(=O)O,non-mutagenic
Nc1ccccc1.Cl,non-mutagenic
OC(=O)CC[C@@H](C(=O)O)N.[Na+],non-mutagenic
-OS(=O)(=O)O.COc1cc2N(C)[C@H]3[C@@]4(c2cc1[C@]1(C[C@H]2CN(CCc5c1[nH]c1c5cccc1)C[C@](C2)(O)CC)C(=O)OC)CCN1[C@H]4[C@@]([C@H]([C@]3(O)C(=O)OC)OC(=O)C)(CC)C=CC1,non-mutagenic
+OS(=O)(=O)O.COc1cc2N(C)[C@H]3[C@@]4(c2cc1[C@]1(C[C@H]2C[N@@](CCc5c1[nH]c1c5cccc1)C[C@](C2)(O)CC)C(=O)OC)CC[N@@]1[C@H]4[C@@]([C@H]([C@]3(O)C(=O)OC)OC(=O)C)(CC)C=CC1,non-mutagenic
OC(=O)O.[Na+],non-mutagenic
N/N=c/1\sc2c(n1C)cccc2.Cl,mutagenic
COc1c2N(C)[C@@H]3[C@](c2cc(c1OC)Cl)(O)[C@H]([C@@]12N3C(=O)[C@@](C)(SS1)N(C2=O)C)O,non-mutagenic
@@ -8178,7 +8206,7 @@ OC(=O)C1=NN(C(=O)[C@H]1/N=N/c1ccc(cc1)S(=O)(=O)O)c1ccc(cc1)S(=O)(=O)O.[Na+].[Na+
CSCC[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N)Cc1ccccc1)CC(=O)O)NC(=O)[C@H](Cc1c[nH]c2c1cccc2)NC(=O)OCc1ccccc1,non-mutagenic
COc1cc(ccc1N)c1ccc(c(c1)OC)N.Cl.Cl,mutagenic
CN([C@H]1C(=C(C(=O)N)C(=O)[C@]2([C@H]1[C@H](O)[C@H]1C(=C2O)C(=O)c2c([C@]1(C)O)cccc2O)O)O)C.Cl,non-mutagenic
-OS(=O)(=O)O.O=CN1c2cc(OC)c(cc2[C@]23[C@H]1[C@@](O)(C(=O)OC)[C@H](OC(=O)C)[C@]1([C@@H]3N(CC2)CC=C1)CC)[C@]1(C[C@H]2CN(CCc3c1[nH]c1c3cccc1)C[C@](C2)(O)CC)C(=O)OC,non-mutagenic
+OS(=O)(=O)O.O=CN1c2cc(OC)c(cc2[C@]23[C@H]1[C@@](O)(C(=O)OC)[C@H](OC(=O)C)[C@]1([C@@H]3[N@@](CC2)CC=C1)CC)[C@]1(C[C@H]2C[N@@](CCc3c1[nH]c1c3cccc1)C[C@](C2)(O)CC)C(=O)OC,non-mutagenic
COc1cccc2c1C(=O)c1c(C2=O)c(O)c2c(c1O)[C@@H](O[C@H]1C[C@H](N)[C@H]([C@@H](O1)C)O)C[C@](C2)(O)C(=O)C,mutagenic
CCNc1nc(NC(C)C)[nH]c(=O)n1,non-mutagenic
O[C@@H]1[C@@H]2C[C@@]34[C@@H]5[C@H]1[C@@]1(C(=C2O)C(=O)c2c(C1=O)c(O)c(cc2O)C)C[C@@H]([C@H]5O)C(=C3C(=O)c1c(C4=O)c(O)c(cc1O)C)O,non-mutagenic
@@ -8201,7 +8229,7 @@ NC(=N)C(/N=N/C(C(=N)N)(C)C)(C)C.Cl.Cl,mutagenic
ClCC[N+](CCCl)(C)[O-].Cl,mutagenic
N/N=C/1\N=NC=C2[C@@H]1C=CC=C2.Cl,mutagenic
CCNC(=O)CC[C@@H](C(=O)O)N,non-mutagenic
-CC[C@H]1CN2CCc3c([C@H]2C[C@H]1C[C@H]1NCCc2c1cc(OC)c(c2)OC)cc(c(c3)OC)OC.Cl.Cl,non-mutagenic
+CC[C@H]1C[N@]2CCc3c([C@H]2C[C@H]1C[C@H]1NCCc2c1cc(OC)c(c2)OC)cc(c(c3)OC)OC.Cl.Cl,non-mutagenic
[O-][N+](=O)c1ccc(s1)NC(=O)NCCCl,mutagenic
CCN(CCCN(C1Cc2c(C1)cccc2)c1ccccc1)CC.Cl,non-mutagenic
O[C@H]1[C@H](O)[C@H](O[C@H]1n1ccc(=O)[nH]c1=O)COP(=O)(O)O.[Na+].[Na+],non-mutagenic
@@ -8220,10 +8248,10 @@ OC[C@H]1O[C@H]([C@@H]([C@@H]1O)O)n1cnc2c1ncnc2NCc1ccc(cc1)[N+](=O)[O-],non-mutag
OC(=O)[C@](Cc1ccc(c(c1)O)O)(N)C.O,non-mutagenic
OC[C@H]1O[C@H](C[C@H]1O)n1cc(C=O)c(=O)[nH]c1=O,non-mutagenic
O[C@@H]1[C@H](O)[C@H](O[C@H]1n1cnc2c1nc[nH]c2=O)COP(=O)(O)O.[Na+].[Na+],non-mutagenic
-C/C=C/1\CC(=C)[C@@](C)(O)C(=O)OCC2=CCN3[C@H]2[C@@H](OC1=O)CC3,mutagenic
+C/C=C/1\CC(=C)[C@@](C)(O)C(=O)OCC2=CC[N@@]3[C@H]2[C@@H](OC1=O)CC3,mutagenic
NCC(=O)Nc1ccccc1.Cl,mutagenic
O=c1[nH]cnc2c1cccc2,non-mutagenic
-CCC1=C(C[C@H]2NCCc3c2cc(OC)c(c3)OC)C[C@@H]2N(C1)CCc1c2cc(c(c1)OC)OC,non-mutagenic
+CCC1=C(C[C@H]2NCCc3c2cc(OC)c(c3)OC)C[C@@H]2[N@@](C1)CCc1c2cc(c(c1)OC)OC,non-mutagenic
OP(=O)(O)O.OP(=O)(O)O.CCN(CCC[C@H](Nc1ccnc2c1ccc(c2)Cl)C)CC,mutagenic
CC(C1=CC2=CC[C@H]3[C@]([C@H]2CC1)(C)CCC[C@@]3(C)C(=O)O)C,non-mutagenic
Oc1ccc2c(c1)Oc1c(C32OC(=O)c2c3cccc2)ccc(c1)O.[Na+].[Na+],non-mutagenic
diff --git a/ext/lazar/extconf.rb b/ext/lazar/extconf.rb
index 7cd85e0..aa031e5 100644
--- a/ext/lazar/extconf.rb
+++ b/ext/lazar/extconf.rb
@@ -10,7 +10,7 @@ programs.each do |program|
abort "Please install #{program} on your system." unless find_executable program
end
-abort "Please install Rserve on your system. Execute 'install.packages('Rserve')' in a R console running as root ('sudo R')." unless `R CMD Rserve --version`.match(/^Rserve/)
+abort "Please install the latest Rserve version on your system (the CRAN version is outdated). Execute 'install.packages('Rserve',,'http://www.rforge.net/')' in a R console running as root ('sudo R')." unless `R CMD Rserve --version`.match(/^Rserve v1.8/)
# install R packages
r_dir = File.join main_dir, "R"
diff --git a/ext/lazar/rinstall.R b/ext/lazar/rinstall.R
index 98e612d..17c2e61 100644
--- a/ext/lazar/rinstall.R
+++ b/ext/lazar/rinstall.R
@@ -1,12 +1,14 @@
libdir = commandArgs(trailingOnly=TRUE)[1]
repo = "https://stat.ethz.ch/CRAN/"
-#install.packages("Rserve",lib=libdir,repos=repo,dependencies=TRUE)
-install.packages("stringi",lib=libdir,repos=repo,dependencies=TRUE);
-install.packages("iterators",lib=libdir,repos=repo,dependencies=TRUE);
-install.packages("foreach",lib=libdir,repos=repo,dependencies=TRUE);
-install.packages("gridExtra",lib=libdir,repos=repo,dependencies=TRUE);
-install.packages("ggplot2",lib=libdir,repos=repo,dependencies=TRUE);
-install.packages("pls",lib=libdir,repos=repo,dependencies=TRUE);
-install.packages("randomForest",lib=libdir,repos=repo,dependencies=TRUE);
-install.packages("caret",lib=libdir,repos=repo,dependencies=TRUE);
-install.packages("doMC",lib=libdir,repos=repo,dependencies=TRUE);
+#install.packages("Rserve",lib=libdir,repos=)
+# dependencies=TRUE installs unnecessary Suggests packages. The default, NA, means c("Depends", "Imports", "LinkingTo").
+install.packages("caret",lib=libdir,repos=repo);
+install.packages("stringi",lib=libdir,repos=repo);
+install.packages("iterators",lib=libdir,repos=repo);
+install.packages("labeling",lib=libdir,repos=repo);
+install.packages("foreach",lib=libdir,repos=repo);
+install.packages("gridExtra",lib=libdir,repos=repo);
+install.packages("ggplot2",lib=libdir,repos=repo);
+install.packages("pls",lib=libdir,repos=repo);
+install.packages("randomForest",lib=libdir,repos=repo);
+install.packages("doMC",lib=libdir,repos=repo);
diff --git a/lib/dataset.rb b/lib/dataset.rb
index df17569..596c53c 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -96,8 +96,14 @@ module OpenTox
# Get nominal and numeric prediction features
# @return [Array<OpenTox::NominalLazarPrediction,OpenTox::NumericLazarPrediction>]
- def prediction_features
- features.select{|f| f._type.match("Prediction")}
+ def prediction_feature
+ features.select{|f| f._type.match(/Prediction$/)}.first
+ end
+
+ # Get supporting nominal and numeric prediction features (class probabilities, prediction interval)
+ # @return [Array<OpenTox::LazarPredictionProbability,OpenTox::LazarPredictionInterval>]
+ def prediction_supporting_features
+ features.select{|f| f.is_a?(LazarPredictionProbability) or f.is_a?(LazarPredictionInterval)}
end
# Get nominal and numeric merged features
@@ -259,7 +265,7 @@ module OpenTox
feature_names = table.shift.collect{|f| f.strip}
raise ArgumentError, "Duplicated features in table header." unless feature_names.size == feature_names.uniq.size
- if feature_names[0] =~ /ID/i # check ID column
+ if feature_names[0] !~ /SMILES|InChI/i # check ID column
original_id = OriginalId.find_or_create_by(:dataset_id => self.id,:name => feature_names.shift)
else
original_id = OriginalId.find_or_create_by(:dataset_id => self.id,:name => "LineID")
@@ -343,30 +349,52 @@ module OpenTox
# Serialisation
- # Convert dataset to csv format
+ # Convert lazar prediction dataset to csv format
# @return [String]
- def to_csv #inchi=false
- CSV.generate() do |csv|
-
- compound = substances.first.is_a? Compound
- f = features - original_id_features - original_smiles_features - warnings_features
- header = original_id_features.collect{|f| "ID "+Dataset.find(f.dataset_id).name}
- header += original_smiles_features.collect{|f| "SMILES "+Dataset.find(f.dataset_id).name} if compound
- compound ? header << "Canonical SMILES" : header << "Name"
- header += f.collect{|f| f.name}
- header += warnings_features.collect{|f| "Warnings "+Dataset.find(f.dataset_id).name}
- csv << header
-
- substances.each do |substance|
- row = original_id_features.collect{|f| values(substance,f).join(" ")}
- row += original_smiles_features.collect{|f| values(substance,f).join(" ")} if compound
- compound ? row << substance.smiles : row << substance.name
- row += f.collect{|f| values(substance,f).join(" ")}
- row += warnings_features.collect{|f| values(substance,f).uniq.join(" ")}
+ def to_prediction_csv
+
+ compound = substances.first.is_a? Compound
+ header = ["ID"]
+ header << "Original SMILES" if compound
+ compound ? header << "Canonical SMILES" : header << "Name"
+ header << "Prediction" if prediction_feature
+ header << "Confidence" if confidence_feature
+ header += prediction_supporting_features.collect{|f| f.name}
+ header << "Measurements"
+ csv = [header]
+
+ substances.each do |substance|
+ row = original_id_features.collect{|f| values(substance,f).join(" ")}
+ row += original_smiles_features.collect{|f| values(substance,f).join(" ")} if compound
+ compound ? row << substance.smiles : row << substance.name
+ row << values(substance,prediction_feature).join(" ")
+ row << values(substance,confidence_feature).join(" ")
+ row += prediction_supporting_features.collect{|f| values(substance,f).join(" ")}
+ row << values(substance,bioactivity_features[0]).join(" ")
+ csv << row
+ end
+ csv.collect{|r| r.join(",")}.join("\n")
+ end
+
+ # Convert dataset into csv formatted training data
+ # @return [String]
+ def to_training_csv
+
+ p features
+ p bioactivity_features
+ header = ["Canonical SMILES"]
+ header << bioactivity_features[0].name
+ csv = [header]
+
+ substances.each do |substance|
+ nr_activities = values(substance,bioactivity_features.first).size
+ (0..nr_activities-1).each do |n| # new row for each value
+ row = [substance.smiles]
+ row << values(substance,bioactivity_features[0])[n]
csv << row
end
-
end
+ csv.collect{|r| r.join(",")}.join("\n")
end
# Convert dataset to SDF format
@@ -396,7 +424,6 @@ module OpenTox
predictions = {}
substances.each do |s|
predictions[s] ||= {}
- prediction_feature = prediction_features.first
predictions[s][:value] = values(s,prediction_feature).first
#predictions[s][:warnings] = []
#warnings_features.each { |w| predictions[s][:warnings] += values(s,w) }
diff --git a/lib/download.rb b/lib/download.rb
index f17d060..2546dc4 100644
--- a/lib/download.rb
+++ b/lib/download.rb
@@ -122,7 +122,6 @@ module OpenTox
# Combine mutagenicity data from Kazius, Hansen and EFSA and download into the data folder
def self.mutagenicity
$logger.debug "Mutagenicity"
- # TODO add download/conversion programs to lazar dependencies
hansen_url = "http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv"
kazius_url = "http://cheminformatics.org/datasets/bursi/cas_4337.zip"
efsa_url = "https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX data and dictionary.xls"
@@ -185,7 +184,7 @@ module OpenTox
map = {"mutagen" => "mutagenic", "nonmutagen" => "non-mutagenic"}
dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,nil,map], keep_original_features: false, remove_duplicates: true
dataset.merged_features.first.name = "Mutagenicity"
- File.open(File.join(DATA,"Mutagenicity-Salmonella_typhimurium.csv"),"w+"){|f| f.puts dataset.to_csv}
+ File.open(File.join(DATA,"Mutagenicity-Salmonella_typhimurium.csv"),"w+"){|f| f.puts dataset.to_training_csv}
meta = {
:species => "Salmonella typhimurium",
:endpoint => "Mutagenicity",
diff --git a/lib/feature.rb b/lib/feature.rb
index 72c26d7..296a174 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -18,6 +18,9 @@ module OpenTox
# Confidence
class Confidence < Feature
field :dataset_id, type: BSON::ObjectId
+ def name
+ "Confidence"
+ end
end
# Categorical variables
@@ -66,13 +69,13 @@ module OpenTox
field :model_id, type: BSON::ObjectId
field :training_feature_id, type: BSON::ObjectId
def name
- "#{self[:name]} Prediction"
+ "Prediction: #{self[:name]}"
end
end
class LazarPredictionProbability < NominalLazarPrediction
def name
- "probability(#{self[:name]})"
+ "Probability: #{self[:name]}"
end
end
@@ -81,13 +84,13 @@ module OpenTox
field :model_id, type: BSON::ObjectId
field :training_feature_id, type: BSON::ObjectId
def name
- "#{self[:name]} Prediction"
+ "Prediction: #{self[:name]}"
end
end
class LazarPredictionInterval < NumericLazarPrediction
def name
- "prediction_interval_#{self[:name]}"
+ "#{self[:name].capitalize} prediction interval"
end
end
diff --git a/lib/lazar.rb b/lib/lazar.rb
index 2a3f749..e77de9d 100644
--- a/lib/lazar.rb
+++ b/lib/lazar.rb
@@ -17,19 +17,22 @@ raise "Incorrect lazar environment variable LAZAR_ENV '#{ENV["LAZAR_ENV"]}', ple
ENV["MONGOID_ENV"] = ENV["LAZAR_ENV"]
ENV["RACK_ENV"] = ENV["LAZAR_ENV"] # should set sinatra environment
+# CH: this interferes with /etc/hosts on my machine
# search for a central mongo database in use
# http://opentox.github.io/installation/2017/03/07/use-central-mongodb-in-docker-environment
-CENTRAL_MONGO_IP = `grep -oP '^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}(?=.*mongodb)' /etc/hosts`.chomp
+# CENTRAL_MONGO_IP = `grep -oP '^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}(?=.*mongodb)' /etc/hosts`.chomp
Mongoid.load_configuration({
:clients => {
:default => {
:database => ENV["LAZAR_ENV"],
- :hosts => (CENTRAL_MONGO_IP.blank? ? ["localhost:27017"] : ["#{CENTRAL_MONGO_IP}:27017"]),
+ #:hosts => (CENTRAL_MONGO_IP.blank? ? ["localhost:27017"] : ["#{CENTRAL_MONGO_IP}:27017"]),
+ :hosts => ["localhost:27017"]
}
}
})
Mongoid.raise_not_found_error = false # return nil if no document is found
-$mongo = Mongo::Client.new("mongodb://#{(CENTRAL_MONGO_IP.blank? ? "127.0.0.1" : CENTRAL_MONGO_IP)}:27017/#{ENV['LAZAR_ENV']}")
+#$mongo = Mongo::Client.new("mongodb://#{(CENTRAL_MONGO_IP.blank? ? "127.0.0.1" : CENTRAL_MONGO_IP)}:27017/#{ENV['LAZAR_ENV']}")
+$mongo = Mongo::Client.new("mongodb://127.0.0.1:27017/#{ENV['LAZAR_ENV']}")
$gridfs = $mongo.database.fs
# Logger setup
diff --git a/lib/model.rb b/lib/model.rb
index cbfefe3..05cd113 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -286,14 +286,14 @@ module OpenTox
end
if threshold == algorithms[:similarity][:min].first
if prediction[:warnings].empty?
- prediction[:confidence] = "High (close to bioassay results)"
+ prediction[:confidence] = "Similar to bioassay results"
return prediction
else # try again with a lower threshold
prediction[:warnings] << "Lowering similarity threshold to #{algorithms[:similarity][:min].last}."
predict_substance substance, algorithms[:similarity][:min].last, prediction
end
elsif threshold < algorithms[:similarity][:min].first
- prediction[:confidence] = "Low (lower than bioassay results)"
+ prediction[:confidence] = "Lower than bioassay results"
return prediction
end
end
@@ -348,9 +348,9 @@ module OpenTox
end
elsif prediction_feature.is_a? NumericBioActivity
f = NumericLazarPrediction.find_or_create_by(:name => prediction_feature.name, :unit => prediction_feature.unit, :model_id => self.id, :training_feature_id => prediction_feature.id)
- prediction_interval = {}
+ prediction_interval = []
["lower","upper"].each do |v|
- prediction_interval[v] = LazarPredictionInterval.find_or_create_by(:name => v, :model_id => self.id, :training_feature_id => prediction_feature.id)
+ prediction_interval << LazarPredictionInterval.find_or_create_by(:name => v, :model_id => self.id, :training_feature_id => prediction_feature.id)
end
end
diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index 8a8970e..d603294 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -18,7 +18,7 @@ module OpenTox
if pred[:value] == v
confusion_matrix[:all][i][i] += 1
self.nr_predictions[:all] += 1
- if pred[:confidence].match(/High/i)
+ if pred[:confidence].match(/Similar/i)
confusion_matrix[:confidence_high][i][i] += 1
self.nr_predictions[:confidence_high] += 1
elsif pred[:confidence].match(/Low/i)
@@ -32,7 +32,7 @@ module OpenTox
if pred[:value] == v
confusion_matrix[:all][i][(i+1)%2] += 1
self.nr_predictions[:all] += 1
- if pred[:confidence].match(/High/i)
+ if pred[:confidence].match(/Similar/i)
confusion_matrix[:confidence_high][i][(i+1)%2] += 1
self.nr_predictions[:confidence_high] += 1
elsif pred[:confidence].match(/Low/i)
diff --git a/test/classification-model.rb b/test/classification-model.rb
index 79ccb98..c41b211 100644
--- a/test/classification-model.rb
+++ b/test/classification-model.rb
@@ -84,13 +84,19 @@ class ClassificationModelTest < MiniTest::Test
assert_kind_of Dataset, result
assert_equal 7, result.features.size
assert_equal 85, result.compounds.size
- prediction_feature = result.prediction_features.first
+ prediction_feature = result.prediction_feature
assert_equal ["carcinogenic"], result.values(result.compounds[1], prediction_feature)
assert_equal ["non-carcinogenic"], result.values(result.compounds[5], prediction_feature)
assert_nil result.predictions[result.compounds.first][:value]
assert_equal "carcinogenic", result.predictions[result.compounds[1]][:value]
assert_equal 0.27, result.predictions[result.compounds[1]][:probabilities]["non-carcinogenic"].round(2)
- assert_match /High/i, result.predictions[result.compounds[1]][:confidence]
+ assert_match /Similar/i, result.predictions[result.compounds[1]][:confidence]
+ csv = result.to_prediction_csv
+ rows = csv.split("\n")
+ assert_equal "ID,Original SMILES,Canonical SMILES,Prediction,Confidence,Probability: carcinogenic,Probability: non-carcinogenic,Measurements", rows[0]
+ items = rows[2].split(",")
+ assert_equal "carcinogenic", items[3]
+ assert_equal 0.27, items[6].to_f.round(2) # probabilities
end
def test_carcinogenicity_rf_classification
diff --git a/test/dataset.rb b/test/dataset.rb
index 8e230e0..b978512 100644
--- a/test/dataset.rb
+++ b/test/dataset.rb
@@ -137,7 +137,6 @@ class DatasetTest < MiniTest::Test
d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv")
assert_equal Dataset, d.class
refute_nil d.id
- dataset = Dataset.find d.id
assert_equal 3, d.compounds.size
end
end
@@ -175,10 +174,16 @@ class DatasetTest < MiniTest::Test
datasets = [hansen,efsa,kazius]
map = {"mutagen" => "mutagenic", "nonmutagen" => "non-mutagenic"}
dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,nil,map], keep_original_features: true, remove_duplicates: true
- assert_equal 8281, dataset.compounds.size
- assert_equal 9, dataset.features.size
+ csv = dataset.to_training_csv
+ rows = csv.split("\n")
+ header = rows.shift
+ assert_equal "Canonical SMILES,Mutagenicity",header
+ values = rows.collect{|r| r.split(",")[1]}.uniq
+ assert_equal 2, values.size
+ assert_equal 8290, dataset.compounds.size
c = Compound.from_smiles("C/C=C/C=O")
assert_equal ["mutagenic"], dataset.values(c,dataset.merged_features.first)
+ assert_equal 9, dataset.features.size
end
# serialisation
@@ -203,6 +208,13 @@ class DatasetTest < MiniTest::Test
end
# special cases/details
+
+ def test_daphnia_import
+ d = Dataset.from_csv_file File.join(File.dirname(__FILE__),"..","data", "Acute_toxicity-Daphnia_magna.csv")
+ assert 3, d.features.size
+ assert 546, d.compounds.size
+ puts d.to_training_csv
+ end
def test_dataset_accessors
d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
diff --git a/test/regression-model.rb b/test/regression-model.rb
index 7f667dc..3b41171 100644
--- a/test/regression-model.rb
+++ b/test/regression-model.rb
@@ -173,13 +173,26 @@ class LazarRegressionTest < MiniTest::Test
model = Model::Lazar.create training_dataset: training_dataset
result = model.predict training_dataset
assert_kind_of Dataset, result
- assert_equal 6, result.features.size
+ assert_equal 8, result.features.size
assert_equal 88, result.compounds.size
assert_equal [1.95], result.values(result.compounds.first, result.bioactivity_features[0]).collect{|v| v.round(2)}
assert_equal [1.37], result.values(result.compounds[6], result.bioactivity_features[0]).collect{|v| v.round(2)}
- assert_equal [1.79], result.values(result.compounds[6], result.prediction_features[0]).collect{|v| v.round(2)}
+ assert_equal [1.79], result.values(result.compounds[6], result.prediction_feature).collect{|v| v.round(2)}
assert_equal [1.84,1.73], result.values(result.compounds[7], result.bioactivity_features[0]).collect{|v| v.round(2)}
assert_match /Low/i, result.predictions[result.compounds[6]][:confidence]
+ csv = result.to_prediction_csv
+ rows = csv.split("\n")
+ assert_equal "ID,Original SMILES,Canonical SMILES,Prediction,Confidence,Lower prediction interval,Upper prediction interval,Measurements", rows[0]
+ items = rows[3].split(",")
+ # prediction and measurement within prediciton interval
+ prediction = items[3].to_f
+ pi_low = items[5].to_f
+ pi_hi = items[6].to_f
+ measurement = items[7].to_f
+ [prediction,measurement].each do |v|
+ assert(v > pi_low)
+ assert(v < pi_hi)
+ end
end
end