From 488ce9fe6d4b715680675861105b8c52a7535140 Mon Sep 17 00:00:00 2001 From: gebele Date: Tue, 23 Jul 2019 11:30:52 +0000 Subject: updated training data without Mutagenicity because xls2csv is not recognized on my system --- data/Carcinogenicity-Rat_(TD50).csv | 50 ++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'data/Carcinogenicity-Rat_(TD50).csv') diff --git a/data/Carcinogenicity-Rat_(TD50).csv b/data/Carcinogenicity-Rat_(TD50).csv index 2e5460f..cdab2ed 100644 --- a/data/Carcinogenicity-Rat_(TD50).csv +++ b/data/Carcinogenicity-Rat_(TD50).csv @@ -41,7 +41,7 @@ SID,SMILES,-log10(Carcinogenicity-Rat_(TD50) [mmol/kg-bw/day]) 48413241,CCNC1=NC(=NC(=N1)Cl)NC(C)C,0.7695510786217261 48413243,CN(C)C1=CC=C(C=C1)C(=N)C2=CC=C(C=C2)N(C)C.Cl,1.4412914294668342 48413245,C1=NC(=NC(=O)N1C2C(C(C(O2)CO)O)O)N,3.157390760389438 -48413247,C(C(C(=O)O)N)OC(=C[N+]#N)[O-],2.339134521996131 +48413247,C(C(C(=O)O)N)OC(=O)C=[N+]=[N-],2.339134521996131 48413252,C1=CC=C(C=C1)N=NC2=CC=CC=C2,0.8794260687941502 48413253,CN=[N+](C)[O-],3.201349354554731 48413254,CCCN=[N+](CCC)[O-],5.732828271596986 @@ -58,17 +58,17 @@ SID,SMILES,-log10(Carcinogenicity-Rat_(TD50) [mmol/kg-bw/day]) 48413302,C(OCCl)Cl,4.507239610973162 48413303,C(C(CBr)Br)OP(=O)([O-])OCC(CBr)Br.C(C(CBr)Br)OP(=O)([O-])OCC(CBr)Br.[Mg+2],1.5030703519267852 48413305,C1=CC=C2C(=C1)C(=NC(=N2)C3=CC=C(S3)[N+](=O)[O-])N(CCO)CCO,2.0599818449923366 -48413313,C1=CC=C(C=C1)NN=C2C(=CC3=CC(=C(C(=C3C2=O)N)N=NC4=CC=C(C=C4)C5=CC=C(C=C5)N=NC6=C(C=C(C=C6)N)N)S(=O)(=O)[O-])S(=O)(=O)[O-].[Na+].[Na+],2.749579997691106 -48413314,C1=CC(=CC=C1C2=CC=C(C=C2)NN=C3C(=CC4=CC(=CC(=C4C3=O)N)S(=O)(=O)[O-])S(=O)(=O)[O-])NN=C5C(=CC6=CC(=CC(=C6C5=O)N)S(=O)(=O)[O-])S(=O)(=O)[O-].[Na+].[Na+].[Na+].[Na+],2.7328282715969863 -48413315,COC1=C(C=CC(=C1)C2=CC(=C(C=C2)NN=C3C(=CC4=CC(=CC(=C4C3=O)N)S(=O)(=O)[O-])S(=O)(=O)[O-])OC)NN=C5C(=CC6=CC(=CC(=C6C5=O)N)S(=O)(=O)[O-])S(=O)(=O)[O-].[Na+].[Na+].[Na+].[Na+],1.5575202309355514 -48413316,C1=CC(=C(C=C1C2=CC(=C(C=C2)NN=C3C(=CC4=CC(=CC(=C4C3=O)N)S(=O)(=O)[O-])S(=O)(=O)[O-])O)O)NN=C5C(=CC6=CC(=CC(=C6C5=O)N)S(=O)(=O)[O-])S(=O)(=O)[O-].[Na+].[Na+].[Na+].[Na+].[Cu].[Cu],-0.15836249209524964 +48413313,C1=CC=C(C=C1)N=NC2=C(C3=C(C(=C(C=C3C=C2S(=O)(=O)[O-])S(=O)(=O)[O-])N=NC4=CC=C(C=C4)C5=CC=C(C=C5)N=NC6=C(C=C(C=C6)N)N)N)O.[Na+].[Na+],2.749579997691106 +48413314,C1=CC(=CC=C1C2=CC=C(C=C2)N=NC3=C(C4=C(C=C(C=C4C=C3S(=O)(=O)[O-])S(=O)(=O)[O-])N)O)N=NC5=C(C6=C(C=C(C=C6C=C5S(=O)(=O)[O-])S(=O)(=O)[O-])N)O.[Na+].[Na+].[Na+].[Na+],2.7328282715969863 +48413315,COC1=C(C=CC(=C1)C2=CC(=C(C=C2)N=NC3=C(C4=C(C=C(C=C4C=C3S(=O)(=O)[O-])S(=O)(=O)[O-])N)O)OC)N=NC5=C(C6=C(C=C(C=C6C=C5S(=O)(=O)[O-])S(=O)(=O)[O-])N)O.[Na+].[Na+].[Na+].[Na+],1.5575202309355514 +48413316,C1=CC(=C(C=C1C2=CC(=C(C=C2)N=NC3=C(C4=C(C=C(C=C4C=C3S(=O)(=O)[O-])S(=O)(=O)[O-])N)O)O)O)N=NC5=C(C6=C(C=C(C=C6C=C5S(=O)(=O)[O-])S(=O)(=O)[O-])N)O.[Na+].[Na+].[Na+].[Na+].[Cu].[Cu],-0.15836249209524964 48413317,C1=CC(=C2C(=C1N)C(=O)C3=C(C=CC(=C3C2=O)N)N)N,0.23507701535011152 48413320,CNC1=C(C=C(C=C1)N(CCO)CCO)[N+](=O)[O-],-0.43933269383026263 48413325,[O-]Br(=O)=O.[K+],1.2306226739238615 48413327,CC(C)CC1C(=O)N2CCCC2C3(N1C(=O)C(O3)(C(C)C)NC(=O)C4CN(C5CC6=C(NC7=CC=CC(=C67)C5=C4)Br)C)O.CS(=O)(=O)O,1.3477536589966768 48413328,C(Cl)(Cl)Br,0.3535962737769304 48413329,CCBr,-0.1367205671564068 -48413331,C1=CC(=CC=C1C2=CC=C(C=C2)N=NC3=CC(=C(C=C3)[O-])C(=O)[O-])NN=C4C(=O)C=CC(=NNC5=C(C=CC(=C5)S(=O)(=O)[O-])[O-])C4=O.[Na+].[Na+].[Cu+2],2.5654310959658013 +48413331,C1=CC(=CC=C1C2=CC=C(C=C2)N=NC3=C(C=CC(=C3[O-])N=NC4=C(C=CC(=C4)S(=O)(=O)[O-])[O-])O)N=NC5=CC(=C(C=C5)O)C(=O)[O-].[Na+].[Na+].[Cu+2],2.5654310959658013 48413332,CCCC1OC2CC3C4CCC5=CC(=O)C=CC5(C4C(CC3(C2(O1)C(=O)CO)C)O)C,3.170053304058364 48413333,C=CC=C,-0.6839471307515121 48413335,CC(C)(C)O,0.059483515067432775 @@ -122,13 +122,13 @@ SID,SMILES,-log10(Carcinogenicity-Rat_(TD50) [mmol/kg-bw/day]) 48413488,CC1=C(C=CC(=C1)OC)N,-0.5352941200427705 48413489,CC1=CC(=C(C=C1)OC)N,0.14630178822382564 48413490,CC=CC=O,1.2225731776106885 -48413491,C1=CC=C(C=C1)N(N=O)[O-].[NH4+],1.269217724333611 +48413491,C1=CC=C(C=C1)[N+](=N[O-])[O-].[NH4+],1.269217724333611 48413493,CCNC1=NC(=NC(=N1)Cl)NC(C)(C)C#N,1.5800442515102422 48413503,C1CCC(=NO)C1,0.384049948343599 48413504,COC1=C(C=C(C=C1)C(=O)NC2=C(C=NC=C2Cl)Cl)OC3CCCC3,2.393618634889395 48413505,C1CNP(=O)(OC1)N(CCCl)CCCl,2.0726296369609765 48413509,COC1=CC=C(C=C1)C(=O)C(=CC(=O)[O-])Br.[Na+],2.0447934624580584 -48413510,CN(C)NN=C1C(=NC=N1)C(=O)N,2.408935392973501 +48413510,CN(C)N=NC1=C(NC=N1)C(=O)N,2.408935392973501 48413511,CN(C)NC(=O)CCC(=O)O,-1.1931245983544616 48413512,C1=CC(=CC=C1N)S(=O)(=O)C2=CC=C(C=C2)N,1.0447934624580584 48413516,C1=CC(=CC=C1C(C2=CC=C(C=C2)Cl)C(Cl)(Cl)Cl)Cl,0.6216020990518624 @@ -284,7 +284,7 @@ SID,SMILES,-log10(Carcinogenicity-Rat_(TD50) [mmol/kg-bw/day]) 48413950,CC1=NC=C(C=C1)CC2=CN=C(NC2=O)NCCSCC3=CC=C(O3)CN(C)C.Cl.Cl.Cl,-0.8082109729242221 48413957,C(=C[O-])C=O.[Na+],-0.11394335230683678 48413958,C(CNC(=S)[S-])NC(=S)[S-].[Mn+2],0.22767829327708025 -48413963,CC1=C(N=C2C(=C1)C3=CC=CC=C3N2)[NH3+].CC(=O)[O-],1.585026652029182 +48413963,CC1=CC2=C(NC3=CC=CC=C32)[NH+]=C1N.CC(=O)[O-],1.585026652029182 48413965,CC1=CN=C2C=CC3=C(C2=N1)N=C(N3C)N,2.109020403010311 48413966,C1(=NC(=NC(=N1)N)N)N,-0.7656685547590141 48413968,C1=CC(=CC=C1CC(C(=O)O)N)N(CCCl)CCCl,3.5128616245228135 @@ -303,7 +303,7 @@ SID,SMILES,-log10(Carcinogenicity-Rat_(TD50) [mmol/kg-bw/day]) 48414002,CC(C)(C(=O)OC)OC1=CC=C(C=C1)C2=CC=C(C=C2)Cl,1.8041003475907664 48414004,CC1=CC(=CC=C1)N=NC2=CC=C(C=C2)N(C)C,1.8632794328435933 48414005,CN(C1=CC=C(C=C1)N=O)N=O,2.1040252676409352 -48414012,CN(C(=N[N+](=O)[O-])N)N=O,2.2628073572952627 +48414012,CN(C(=N)N[N+](=O)[O-])N=O,2.2628073572952627 48414013,CC1=C(C2=C(C=C1)C(=O)C3=CC=CC=C3C2=O)[N+](=O)[O-],0.49894073778224846 48414014,CC1CN(C(=O)N1)N=CC2=CC=C(O2)[N+](=O)[O-],1.6497519816658373 48414015,CN(C1=CC=C(C=C1)C=CC2=CC=NC3=CC=CC=C23)N=O,2.616184634019569 @@ -355,7 +355,7 @@ SID,SMILES,-log10(Carcinogenicity-Rat_(TD50) [mmol/kg-bw/day]) 48414124,C1=C(OC(=C1)[N+](=O)[O-])C2=CSC(=N2)NC=O,1.749579997691106 48414125,CC(=O)NC1=NC(=NC(=N1)C2=CC=C(O2)[N+](=O)[O-])NC(=O)C,1.337242168318426 48414126,CCC=C(CC)[N+](=O)[O-],1.1739251972991736 -48414131,C1CC2=CC=CC3=C(C=CC1=C23)[N+](=O)[O-],1.3615107430453628 +48414131,C1CC2=CC=C(C3=CC=CC1=C23)[N+](=O)[O-],1.3615107430453628 48414133,COC1=CC=CC=C1[N+](=O)[O-],0.9913998282380825 48414135,C1=CC=C(C=C1)[N+](=O)[O-],0.6840296545430823 48414137,C1=CC(=CC=C1C(=O)O)[N+](=O)[O-],-0.23552844690754893 @@ -459,7 +459,7 @@ SID,SMILES,-log10(Carcinogenicity-Rat_(TD50) [mmol/kg-bw/day]) 48414304,CC(COC1=CC=CC=C1)N(CCCl)CC2=CC=CC=C2.Cl,2.494850021680094 48414305,CN(C)N=NC1=CC=CC=C1,1.8096683018297086 48414310,C1=CC=C(C=C1)C(COC(=O)N)COC(=O)N,-0.6608654780038692 -48414313,C1=CC=C(C=C1)NN=C2C(=O)C=CC3=CC=CC=C32,0.9281179926938746 +48414313,C1=CC=C(C=C1)N=NC2=C(C=CC3=CC=CC=C32)O,0.9281179926938746 48414314,CCCCC1C(=O)N(N(C1=O)C2=CC=CC=C2)C3=CC=CC=C3,-0.5751878449276611 48414318,C1=CC=C(C(=C1)N)N.Cl.Cl,-0.1367205671564068 48414323,C1C(O1)COC2=CC=CC=C2,0.5331323796458906 @@ -477,23 +477,23 @@ SID,SMILES,-log10(Carcinogenicity-Rat_(TD50) [mmol/kg-bw/day]) 48414373,CC(C)NC(=O)C1=CC=C(C=C1)CNNC.Cl,2.8664610916297826 48414378,C1COS(=O)(=O)C1,1.5030703519267852 48414380,C1COC1=O,1.692503962086787 -48414386,CCCN(C(=N[N+](=O)[O-])N)N=O,2.1260984021355385 +48414386,CCCN(C(=N)N[N+](=O)[O-])N=O,2.1260984021355385 48414387,CCCN(C(=O)N)N=O,1.5421181032660076 48414391,CC1CO1,-0.10720996964786837 48414393,CCCC1=CC(=O)NC(=S)N1,1.0942041196321315 -48414397,C1=CC=C2C(=C1)C(=CC=C2S(=O)(=O)[O-])NN=C3C(=O)C=CC4=CC(=CC(=C43)S(=O)(=O)[O-])S(=O)(=O)[O-].[Na+].[Na+].[Na+],-1.6074550232146685 +48414397,C1=CC=C2C(=C1)C(=CC=C2S(=O)(=O)[O-])N=NC3=C(C=CC4=CC(=CC(=C43)S(=O)(=O)[O-])S(=O)(=O)[O-])O.[Na+].[Na+].[Na+],-1.6074550232146685 48414398,C1=CC=C2C(=C1)C(=O)C3=C(C2=O)C(=C(C=C3O)O)O,-0.42324587393680785 48414400,C1=CC=NC=C1,0.07007043991541213 48414401,CN(C)CCN(CC1=CC=C(C=C1)OC)C2=CC=CC=N2.C(=CC(=O)O)C(=O)O,0.1567672219019906 48414403,C1=CC(=C(C=C1C2=C(C(=O)C3=C(C=C(C=C3O2)O)O)O)O)O,1.4762535331884354 48414407,C1=CC(=CC=C1NO)N=O,0.11520463605101904 -48414409,CC1=CC=C(C=C1)S(=O)(=O)OC2=CC=C(C=C2)N=NC3=C(C=C(C=C3)C4=CC(=C(C=C4)NN=C5C(=O)C=CC6=CC(=CC(=C65)S(=O)(=O)[O-])S(=O)(=O)[O-])C)C.[Na+].[Na+],2.329754146925876 -48414411,CC1=CC(=C(C=C1)NN=C2C(=O)C=CC3=CC=CC=C32)[N+](=O)[O-],-0.5809249756756193 -48414413,CC1=CC(=C(C=C1)NN=C2C3=C(C=CC(=C3)S(=O)(=O)[O-])C=C(C2=O)S(=O)(=O)[O-])C.[Na+].[Na+],0.06348625752110672 -48414414,CC1=CC(=C(C=C1Cl)S(=O)(=O)[O-])NN=C2C(=O)C=CC3=CC=CC=C32.CC1=CC(=C(C=C1Cl)S(=O)(=O)[O-])NN=C2C(=O)C=CC3=CC=CC=C32.[Ba+2],0.785156151952302 -48414416,CC1=CC(=C(C=C1C)NN=C2C3=C(C=C(C=C3)S(=O)(=O)[O-])C=C(C2=O)S(=O)(=O)[O-])C.[Na+].[Na+],-0.021189299069938092 -48414417,C1=CC=C2C(=C1)C(=CC=C2S(=O)(=O)[O-])NN=C3C4=C(C=C(C=C4)S(=O)(=O)[O-])C=C(C3=O)S(=O)(=O)[O-].[Na+].[Na+].[Na+],-0.38560627359831223 -48414419,CC1=CC(=C(C=C1NN=C2C=C(C3=CC=CC=C3C2=O)S(=O)(=O)[O-])S(=O)(=O)[O-])C.[Na+].[Na+],-1.2278867046136734 +48414409,CC1=CC=C(C=C1)S(=O)(=O)OC2=CC=C(C=C2)N=NC3=C(C=C(C=C3)C4=CC(=C(C=C4)N=NC5=C(C=CC6=CC(=CC(=C65)S(=O)(=O)[O-])S(=O)(=O)[O-])O)C)C.[Na+].[Na+],2.329754146925876 +48414411,CC1=CC(=C(C=C1)N=NC2=C(C=CC3=CC=CC=C32)O)[N+](=O)[O-],-0.5809249756756193 +48414413,CC1=CC(=C(C=C1)N=NC2=C3C=C(C=CC3=CC(=C2O)S(=O)(=O)[O-])S(=O)(=O)[O-])C.[Na+].[Na+],0.06348625752110672 +48414414,CC1=CC(=C(C=C1Cl)S(=O)(=O)[O-])N=NC2=C(C=CC3=CC=CC=C32)O.CC1=CC(=C(C=C1Cl)S(=O)(=O)[O-])N=NC2=C(C=CC3=CC=CC=C32)O.[Ba+2],0.785156151952302 +48414416,CC1=CC(=C(C=C1C)N=NC2=C3C=CC(=CC3=CC(=C2O)S(=O)(=O)[O-])S(=O)(=O)[O-])C.[Na+].[Na+],-0.021189299069938092 +48414417,C1=CC=C2C(=C1)C(=CC=C2S(=O)(=O)[O-])N=NC3=C4C=CC(=CC4=CC(=C3O)S(=O)(=O)[O-])S(=O)(=O)[O-].[Na+].[Na+].[Na+],-0.38560627359831223 +48414419,CC1=CC(=C(C=C1N=NC2=C(C3=CC=CC=C3C(=C2)S(=O)(=O)[O-])O)S(=O)(=O)[O-])C.[Na+].[Na+],-1.2278867046136734 48414422,COC1C(CC2CN3CCC4=C(C3CC2C1C(=O)OC)NC5=C4C=CC(=C5)OC)OC(=O)C6=CC(=C(C(=C6)OC)OC)OC,3.2984320149440727 48414425,CC1=C(C(CCC1)(C)C)C=CC(=CC=CC(=CCOC(=O)C)C)C,0.4190750243243807 48414427,CC=C1CC(C(C(=O)OCC2=CCN3C2C(CC3)OC1=O)(CO)O)C,2.6108339156354674 @@ -502,11 +502,11 @@ SID,SMILES,-log10(Carcinogenicity-Rat_(TD50) [mmol/kg-bw/day]) 48414439,C1=CC=C2C(=C1)C(=O)[N-]S2(=O)=O.[Na+],-1.0170333392987803 48414440,C=CCC1=CC2=C(C=C1)OCO2,-0.43456890403419873 48414441,CC(C)(C)NCC(C1=CC(=C(C=C1)O)CO)O,0.7772835288524167 -48414442,C1=CC=NC(=C1)NS(=O)(=O)C2=CC=C(C=C2)NN=C3C=CC(=O)C(=C3)C(=O)O,-0.6009728956867483 +48414442,C1=CC=NC(=C1)NS(=O)(=O)C2=CC=C(C=C2)N=NC3=CC(=C(C=C3)O)C(=O)O,-0.6009728956867483 48414450,S=[Se],1.142064735280571 48414451,CC=C1CC(C(C(=O)OCC2=CCN(CCC(C2=O)OC1=O)C)(C)O)C,2.332547047110046 48414453,C1OC2=C(O1)C=C(C=C2)O,-0.989894563718773 -48414466,COC1=C2C(=C3C4C=COC4OC3=C1)OC5=C(C2=O)C(=CC=C5)O,3.3288271572849166 +48414466,COC1=C2C(=C3C4C=COC4OC3=C1)OC5=CC=CC(=C5C2=O)O,3.3288271572849166 48414469,CN(C(=O)NC1C(C(C(OC1O)CO)O)O)N=O,2.4400933749638876 48414471,C=CC1=CC=CC=C1,0.6497519816658371 48414473,C1C(O1)C2=CC=CC=C2,0.33629907461035186 @@ -555,7 +555,7 @@ SID,SMILES,-log10(Carcinogenicity-Rat_(TD50) [mmol/kg-bw/day]) 48414601,C(CCl)OP(=O)(OCCCl)OCCCl,0.5171264163912462 48414603,C(C(CBr)Br)OP(=O)(OCC(CBr)Br)OCC(CBr)Br,2.260427655549908 48414604,C(C(CCl)OP(=O)(OC(CCl)CCl)OC(CCl)CCl)Cl,0.9665762445130504 -48414608,CC1=C(N=C(C2=C1NC3=CC=CC=C32)C)N.CC(=O)O,2.673664139071249 +48414608,CC1=C2C(=C(N=C1N)C)C3=CC=CC=C3N2.CC(=O)O,2.673664139071249 48414609,CC1=C2C3=CC=CC=C3NC2=CC(=N1)N.CC(=O)O,1.586700235918748 48414616,C1=CNC(=O)NC1=O,-0.7774268223893114 48414619,CCOC(=O)N,0.3334820194451191 @@ -568,6 +568,6 @@ SID,SMILES,-log10(Carcinogenicity-Rat_(TD50) [mmol/kg-bw/day]) 48414639,CC1=CC(=CC=C1)C,-1.4668676203541096 48414639,CC1=CC(=CC=C1)C,-1.4668676203541096 48414642,CC1=CC(=C(C=C1)C)N.Cl,0.015922966097169238 -48414643,CC1=CC(=NNC2=CC=C(C=C2)NC(=O)C)C(=O)C=C1,-0.14921911265537988 +48414643,CC1=CC(=C(C=C1)O)N=NC2=CC=C(C=C2)NC(=O)C,-0.14921911265537988 48414657,CN(C)C(=S)[S-].CN(C)C(=S)[S-].[Zn+2],0.8761483590329142 48414658,C(CNC(=S)[S-])NC(=S)[S-].[Zn+2],0.03385826726096737 -- cgit v1.2.3