From bb8797e0047f02768033cf6839dc926d30c016d2 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 14 Feb 2017 15:02:17 +0100 Subject: rf models, sim 0.5, no weighted average --- data/predictions-measurements.csv | 77 ++++++++++++------ data/training-test-predictions.csv | 60 ++++++++------ data/training-test-predictions.id | 2 +- data/training_log10-cv-0.csv | 150 +++++++++++++++++++++++------------ data/training_log10-cv-0.id | 2 +- data/training_log10-cv-1.csv | 155 ++++++++++++++++++++++-------------- data/training_log10-cv-1.id | 2 +- data/training_log10-cv-2.csv | 157 +++++++++++++++++++++++-------------- data/training_log10-cv-2.id | 2 +- figures/crossvalidation.pdf | Bin 6667 -> 7754 bytes figures/dataset-variability.pdf | Bin 10212 -> 10212 bytes figures/functional-groups.pdf | Bin 6376 -> 6376 bytes figures/test-correlation.pdf | Bin 6512 -> 6583 bytes figures/test-prediction.pdf | Bin 5840 -> 6197 bytes loael.md | 10 +-- loael.pdf | Bin 643486 -> 645001 bytes scripts/crossvalidation.rb | 6 +- scripts/test-validation.rb | 2 +- 18 files changed, 392 insertions(+), 233 deletions(-) diff --git a/data/predictions-measurements.csv b/data/predictions-measurements.csv index 9c6bede..d19097c 100644 --- a/data/predictions-measurements.csv +++ b/data/predictions-measurements.csv @@ -1,90 +1,115 @@ SMILES,LOAEL,Origin -COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.6440145091827779,Prediction +COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.6879561843647223,Prediction +COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,-0.04743064015554219,Measurement +COC(=O)Nc1nc2c([nH]1)cccc2,1.5392859574286677,Prediction +COC(=O)Nc1nc2c([nH]1)cccc2,-0.11647491190833141,Measurement +COC(=O)Nc1nc2c([nH]1)cccc2,0.40639276248013095,Measurement +COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.7153960864733138,Prediction COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.27691392283721084,Measurement -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,1.08729219715294,Prediction +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.9841230105681268,Prediction COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.18340080321223973,Measurement COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.5057897307706736,Measurement -CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.9936908951508763,Prediction +CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.9999740698329931,Prediction CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.36853593732630685,Measurement -OC(=O)COc1ccc(cc1Cl)Cl,1.2903965824234715,Prediction +OC(=O)COc1ccc(cc1Cl)Cl,1.2847669118955252,Prediction OC(=O)COc1ccc(cc1Cl)Cl,0.46940334082401963,Measurement OC(=O)COc1ccc(cc1Cl)Cl,0.5520671344078197,Measurement OC(=O)COc1ccc(cc1Cl)Cl,1.6454882002557272,Measurement -CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.6614835102658171,Prediction +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.7337346143256055,Prediction CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.5921334169692242,Measurement CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.6055658314011255,Measurement CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.7320074096344176,Measurement CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.7542899704547085,Measurement -ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.8402417574704084,Prediction +ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.646480877887512,Prediction ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.26004020676066664,Measurement ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,1.1269178314119968,Measurement -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.8387439163561423,Prediction +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.7486691451711134,Prediction COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.6781324193692118,Measurement COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.7200594271604471,Measurement -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.4738604918495701,Prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.5673147534071126,Prediction N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.7443416268287478,Measurement N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.9204208330298691,Measurement -CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,1.2881028599130595,Prediction +CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,1.3750196575989226,Prediction +CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.5520361697732551,Measurement +CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,1.13636788955756,Measurement +CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,1.3457613975775877,Prediction CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.8340734503909036,Measurement CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.9562841419387937,Measurement -ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.571082862420873,Prediction +ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.7288845774295261,Prediction ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.8935371162038789,Measurement ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,1.1945535170115598,Measurement -OC(=O)C(Oc1ccc(cc1Cl)Cl)C,1.1647860825776641,Prediction +OC(=O)C(Oc1ccc(cc1Cl)Cl)C,1.0558069650022,Prediction OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.8088844776196935,Measurement OC(=O)C(Oc1ccc(cc1Cl)Cl)C,1.4169486617141684,Measurement -CCNc1nc(NC(C)C)nc(n1)Cl,1.158750561362179,Prediction +CCNc1nc(NC(C)C)nc(n1)Cl,1.3332962711923355,Prediction CCNc1nc(NC(C)C)nc(n1)Cl,0.9358790941703782,Measurement CCNc1nc(NC(C)C)nc(n1)Cl,0.9609032895585495,Measurement CCNc1nc(NC(C)C)nc(n1)Cl,1.3338191028424158,Measurement CCNc1nc(NC(C)C)nc(n1)Cl,1.8424525744221922,Measurement -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,1.9775960252398304,Prediction +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,1.334274883779086,Prediction O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,1.1362846674663016,Measurement O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,1.2068664410308745,Measurement -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.6696281692176134,Prediction +CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,1.7277657827120518,Prediction +CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,1.182467683028378,Measurement +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.6488113179178151,Prediction COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,1.155665748672126,Measurement COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,1.2525804230510118,Measurement -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,1.7084151495523532,Prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,1.6489407526760536,Prediction N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,1.1454085716982032,Measurement N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,1.2211327542550814,Measurement N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,1.2544333907376006,Measurement -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,1.6704801123412765,Prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,1.4981503249943258,Prediction N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,1.225205758260884,Measurement -COCN(c1c(CC)cccc1CC)C(=O)CCl,0.5174704176880593,Prediction +COCN(c1c(CC)cccc1CC)C(=O)CCl,0.5402155092707276,Prediction COCN(c1c(CC)cccc1CC)C(=O)CCl,0.3306180266382941,Measurement COCN(c1c(CC)cccc1CC)C(=O)CCl,1.2548939653344497,Measurement COCN(c1c(CC)cccc1CC)C(=O)CCl,1.2848577466043891,Measurement COCN(c1c(CC)cccc1CC)C(=O)CCl,2.033046777818888,Measurement -Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.6588453197521718,Prediction +OC(=O)COc1ccc(cc1C)Cl,1.1504814913912227,Prediction +OC(=O)COc1ccc(cc1C)Cl,1.0236179201622833,Measurement +OC(=O)COc1ccc(cc1C)Cl,1.7003184083376452,Measurement +Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.9636674654171898,Prediction +Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,1.370111513834565,Measurement +Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,1.4773210761437898,Measurement +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.7871047528022255,Prediction Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.9432475594325607,Measurement Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,1.9432475594325607,Measurement -N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.1173489675704753,Prediction +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.0686805193571414,Prediction N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.2798406965940432,Measurement N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.354479485094126,Measurement N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.5408639423123824,Measurement N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.558603397145632,Measurement N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.7627076624325413,Measurement N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.8596176754405975,Measurement -CON(C(=O)Nc1ccc(cc1)Br)C,1.8538504780796292,Prediction +CON(C(=O)Nc1ccc(cc1)Br)C,1.7546132261934178,Prediction CON(C(=O)Nc1ccc(cc1)Br)C,1.3165566912097522,Measurement CON(C(=O)Nc1ccc(cc1)Br)C,1.8429362996173768,Measurement -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,1.071108200096774,Prediction +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.861099197081431,Prediction +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,1.571654591815115,Measurement +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,1.5893745854018753,Measurement +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,2.137081876700897,Measurement +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,1.068969719707985,Prediction O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.3954067884187704,Measurement O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,1.5892227666227903,Measurement O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,1.600222475511201,Measurement O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,1.6472195324286747,Measurement -CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,1.9059788494124326,Prediction +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,1.7885657616300024,Prediction CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.9015293343493707,Measurement CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,1.6004820295651263,Measurement CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,1.9984393473574271,Measurement -CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,1.397894682807939,Prediction +CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,1.4366578577188656,Prediction CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,1.3811468505131965,Measurement CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,2.0032782361791086,Measurement -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,1.3629249579291256,Prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,1.3537390250015546,Prediction N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,1.971062922929322,Measurement N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,2.3055264253989747,Measurement -CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,3.238495980111839,Prediction +CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,3.2386342421675964,Prediction CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,2.2838380008263486,Measurement CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,2.3295964190287313,Measurement -CCSCSP(=S)(OCC)OCC,3.063482062180425,Prediction +CCSCSP(=S)(OCC)OCC,3.298628321529855,Prediction CCSCSP(=S)(OCC)OCC,3.211485180279816,Measurement +CCOP(=S)(SCSC(C)(C)C)OCC,2.558822312316036,Prediction +CCOP(=S)(SCSC(C)(C)C)OCC,2.159009899464302,Measurement +CCOP(=S)(SCSC(C)(C)C)OCC,3.4600461583436033,Measurement +CCOP(=S)(SCSC(C)(C)C)OCC,3.68189490795996,Measurement +CCOP(=S)(SCSC(C)(C)C)OCC,3.7610761540075845,Measurement diff --git a/data/training-test-predictions.csv b/data/training-test-predictions.csv index 4a54539..44dfc34 100644 --- a/data/training-test-predictions.csv +++ b/data/training-test-predictions.csv @@ -1,27 +1,35 @@ SMILES,LOAEL_measured_median,LOAEL_predicted,Error,Dataset -COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.27691392283721084,0.6440145091827779,0.3671005863455671,test-prediction -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.34459526699145665,1.08729219715294,0.7426969301614832,test-prediction -CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.36853593732630685,0.9936908951508763,0.6251549578245694,test-prediction -OC(=O)COc1ccc(cc1Cl)Cl,0.5520671344078197,1.2903965824234715,0.7383294480156518,test-prediction -CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.6687866205177715,0.6614835102658171,0.007303110251954403,test-prediction -ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.6934790190863317,0.8402417574704084,0.14676273838407672,test-prediction -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.6990959232648295,0.8387439163561423,0.13964799309131282,test-prediction -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.8323812299293085,1.4738604918495701,0.6414792619202616,test-prediction -CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.8951787961648487,1.2881028599130595,0.39292406374821076,test-prediction -ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,1.0440453166077193,0.571082862420873,0.47296245418684624,test-prediction -OC(=O)C(Oc1ccc(cc1Cl)Cl)C,1.112916569666931,1.1647860825776641,0.05186951291073316,test-prediction -CCNc1nc(NC(C)C)nc(n1)Cl,1.1473611962004826,1.158750561362179,0.011389365161696308,test-prediction -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,1.1715755542485882,1.9775960252398304,0.8060204709912422,test-prediction -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,1.204123085861569,0.6696281692176134,0.5344949166439557,test-prediction -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,1.2211327542550814,1.7084151495523532,0.4872823952972718,test-prediction -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,1.225205758260884,1.6704801123412765,0.4452743540803925,test-prediction -COCN(c1c(CC)cccc1CC)C(=O)CCl,1.2698758559694194,0.5174704176880593,0.7524054382813601,test-prediction -Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,1.4432475594325607,0.6588453197521718,0.7844022396803889,test-prediction -N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.5497336697290072,1.1173489675704753,0.43238470215853186,test-prediction -CON(C(=O)Nc1ccc(cc1)Br)C,1.5797464954135645,1.8538504780796292,0.2741039826660647,test-prediction -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,1.5947226210669956,1.071108200096774,0.5236144209702216,test-prediction -CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,1.6004820295651263,1.9059788494124326,0.30549681984730626,test-prediction -CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,1.6922125433461526,1.397894682807939,0.2943178605382135,test-prediction -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,2.138294674164148,1.3629249579291256,0.7753697162350226,test-prediction -CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,2.30671720992754,3.238495980111839,0.931778770184299,test-prediction -CCSCSP(=S)(OCC)OCC,3.211485180279816,3.063482062180425,0.14800311809939126,test-prediction +COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,-0.04743064015554219,0.6879561843647223,0.7353868245202645,test-prediction +COC(=O)Nc1nc2c([nH]1)cccc2,0.14495892528589976,1.5392859574286677,1.3943270321427679,test-prediction +COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.27691392283721084,0.7153960864733138,0.43848216363610293,test-prediction +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.34459526699145665,0.9841230105681268,0.6395277435766702,test-prediction +CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.36853593732630685,0.9999740698329931,0.6314381325066862,test-prediction +OC(=O)COc1ccc(cc1Cl)Cl,0.5520671344078197,1.2847669118955252,0.7326997774877054,test-prediction +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.6687866205177715,0.7337346143256055,0.064947993807834,test-prediction +ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.6934790190863317,0.646480877887512,0.04699814119881973,test-prediction +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.6990959232648295,0.7486691451711134,0.04957322190628388,test-prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.8323812299293085,1.5673147534071126,0.734933523477804,test-prediction +CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.8442020296654076,1.3750196575989226,0.5308176279335151,test-prediction +CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.8951787961648487,1.3457613975775877,0.450582601412739,test-prediction +ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,1.0440453166077193,0.7288845774295261,0.3151607391781932,test-prediction +OC(=O)C(Oc1ccc(cc1Cl)Cl)C,1.112916569666931,1.0558069650022,0.05710960466473103,test-prediction +CCNc1nc(NC(C)C)nc(n1)Cl,1.1473611962004826,1.3332962711923355,0.1859350749918529,test-prediction +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,1.1715755542485882,1.334274883779086,0.16269932953049793,test-prediction +CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,1.182467683028378,1.7277657827120518,0.5452980996836738,test-prediction +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,1.204123085861569,0.6488113179178151,0.5553117679437539,test-prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,1.2211327542550814,1.6489407526760536,0.42780799842097217,test-prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,1.225205758260884,1.4981503249943258,0.2729445667334418,test-prediction +COCN(c1c(CC)cccc1CC)C(=O)CCl,1.2698758559694194,0.5402155092707276,0.7296603466986918,test-prediction +OC(=O)COc1ccc(cc1C)Cl,1.3619681642499644,1.1504814913912227,0.21148667285874168,test-prediction +Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,1.4237162949891773,0.9636674654171898,0.46004882957198745,test-prediction +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,1.4432475594325607,0.7871047528022255,0.6561428066303352,test-prediction +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.5497336697290072,1.0686805193571414,0.4810531503718658,test-prediction +CON(C(=O)Nc1ccc(cc1)Br)C,1.5797464954135645,1.7546132261934178,0.17486673077985326,test-prediction +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,1.5893745854018753,0.861099197081431,0.7282753883204443,test-prediction +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,1.5947226210669956,1.068969719707985,0.5257529013590105,test-prediction +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,1.6004820295651263,1.7885657616300024,0.18808373206487605,test-prediction +CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,1.6922125433461526,1.4366578577188656,0.255554685627287,test-prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,2.138294674164148,1.3537390250015546,0.7845556491625936,test-prediction +CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,2.30671720992754,3.2386342421675964,0.9319170322400563,test-prediction +CCSCSP(=S)(OCC)OCC,3.211485180279816,3.298628321529855,0.08714314125003897,test-prediction +CCOP(=S)(SCSC(C)(C)C)OCC,3.5709705331517814,2.558822312316036,1.0121482208357455,test-prediction diff --git a/data/training-test-predictions.id b/data/training-test-predictions.id index 1b077bb..b771961 100644 --- a/data/training-test-predictions.id +++ b/data/training-test-predictions.id @@ -1 +1 @@ -58a2f570a8ae8b14eb8dd1e8 +58a306bda8ae8b1fca02fc29 diff --git a/data/training_log10-cv-0.csv b/data/training_log10-cv-0.csv index c78df72..23cf90a 100644 --- a/data/training_log10-cv-0.csv +++ b/data/training_log10-cv-0.csv @@ -1,52 +1,100 @@ SMILES,LOAEL_measured_median,LOAEL_predicted,Prediction_interval_low,Prediction_interval_high -C[N]1(C)CCCCC1,-0.4564837711150284,-0.11839701195157529,-1.2540506461294998,0.341083103899443 -COC(=O)Nc1nc2c([nH]1)cc(cc2)Sc1ccccc1,0.3894094818235073,1.3000842641666306,-1.488379990455892,2.2671989541029065 -COC(=O)c1ccccc1S(=O)(=O)NC(=O)Nc1nc(OC(F)F)cc(n1)OC(F)F,0.40170482112025646,0.5244304978725236,-0.20331378220709578,1.0067234244476087 -Clc1cc(ccc1Oc1ccc(c(c1)C(=O)[O-])[N+](=O)[O-])C(F)(F)F.[Na+],0.4505879438444319,0.3286512527182731,-0.9297971779981761,1.83097306568704 -COCN(c1c(CC)cccc1CC)C(=O)CCl,0.5174704176880593,1.2698758559694194,0.017118181671745725,1.017822653704373 -ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.5673267770133072,1.0440453166077193,0.030671832566891544,1.103981721459723 -CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC1CC1,0.5682759537448641,1.8417279953479162,-1.147303708191009,2.283855615680737 -COc1nc(C)nc(n1)N(C(=O)NS(=O)(=O)c1ccccc1C(=O)OC)C,0.5794383468632692,1.500120551043568,-0.47514545811233455,1.634022151838873 -COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.5821868826277775,0.27691392283721084,-0.7583573829222893,1.9227311481778444 -CCCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC,0.6187153955546366,1.7930123052435918,-0.9928047860685697,2.230235577177843 -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.6564462360302253,1.204123085861569,-0.43913369362676946,1.7520261656872202 -Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.6588453197521718,1.4432475594325607,0.09070098683869166,1.2269896526656519 -CCCN(c1c(cc(c(c1[N+](=O)[O-])N)C(F)(F)F)[N+](=O)[O-])CCC,0.6720069540158988,1.0760827768868946,-1.2282790747865044,2.5722929828183023 -CCOc1nc(nc(n1)NC)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.6953202141870661,0.29099307039793554,0.21014764733349445,1.1804927810406378 -CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCCl,0.7304363334427151,0.2448724686893053,-1.2103597498190666,2.6712324167044965 -Clc1cc(ccc1Oc1ccc(c(c1)C(=O)O)[N+](=O)[O-])C(F)(F)F,0.7378914052598666,0.4613885686555222,-0.3525496586756094,1.8283324691953426 -CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.7555482458124941,0.6687866205177715,-0.7471139332295637,2.258210424854552 -ClCC(=O)N(c1c(C)cccc1CC)COC(C)C,0.8038710801176221,0.40281883334761076,-0.18512146439994015,1.7928636246351843 -ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.8402417574704084,0.6934790190863317,0.3786754777225548,1.301808037218262 -CCOC(=O)COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.8868841902441673,1.253093479525902,-0.14274711591395284,1.9165154964022875 -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.9677351246684518,0.34459526699145665,0.04553056162186986,1.8899396877150338 -OC(=O)C(Oc1ccc(cc1C)Cl)C,0.9955461440956712,1.5913530743442346,-0.28350181265456587,2.274594100845908 -CCOC(=O)C(OC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F)C,1.0244020567794645,0.9654517529015312,0.380780533500658,1.668023580058271 -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,1.071108200096774,1.5947226210669956,0.41141725553915964,1.7307991446543882 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C(C(Br)(Br)Br)Br,1.1133389866550485,2.3457079184197034,0.9437977008856246,1.2828802724244723 -N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.1173489675704755,1.5497336697290072,0.40109926364797166,1.8335986714929793 -OC(=O)COc1cc(Cl)c(cc1Cl)Cl,1.1398473897523569,1.4073569873961393,-1.0572649111176238,3.3369596906223373 -CCNc1nc(NC(C)C)nc(n1)Cl,1.1587505613621794,1.1473611962004826,0.6898662262448538,1.6276348964795049 -OC(=O)C(Oc1ccc(cc1Cl)Cl)C,1.1647860825776644,1.112916569666931,-0.6614091745601773,2.990981339715506 -CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,1.1739902884363427,0.446231021019864,-0.27700060835513773,2.6249811852278233 -CC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Oc1ccc(cc1)Cl,1.1753530384218474,1.2267915364902278,0.9652222962957666,1.385483780547928 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,1.2266643241259165,2.138294674164148,0.6614648605098293,1.7918637877420038 -CNC(=O)Oc1cc(C)c(c(c1)C)C,1.228849942742214,1.286098038116031,-0.33458629149800845,2.7922861769824365 -CCNc1nc(Cl)nc(n1)NC(C)(C)C,1.2410244406637643,2.1597809534601558,0.24233011594345077,2.2397187653840778 -CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,1.2717840633302147,0.8951787961648487,-0.30034412776724717,2.8439122544276767 -OC(=O)COc1ccc(cc1Cl)Cl,1.2903965824234715,0.5520671344078197,0.13369906168946888,2.4470941031574744 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl,1.4575572216521004,1.5738792912533845,0.8538474401537863,2.0612670031504146 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(C(F)(F)F)Cl,1.4773270760988695,1.5561583389288367,0.8520414557358483,2.1026126964618905 -CON(C(=O)Nc1ccc(cc1)Cl)C,1.5228920251829052,2.0307005985741227,0.37029166896409005,2.67549238140172 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.5474775316205376,0.8323812299293085,0.9381184985173618,2.1568365647237133 -N#CC(c1ccc(cc1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C\C(=O)OC(C(F)(F)F)C(F)(F)F,1.549603437800714,1.8250684064715574,0.815566035782989,2.283640839818439 -CSC(=NOC(=O)N(SN(C(=O)ON=C(SC)C)C)C)C,1.623526826109727,1.549581519063189,1.188899525933145,2.0581541262863094 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,1.6704801123412765,1.225205758260884,1.4865084368254098,1.8544517878571432 -N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)/C=C(/c1ccc(cc1)Cl)\Cl,1.834091237605563,2.309954619710552,1.3319719672734738,2.3362105079376523 -O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1c(F)c(F)c(c(c1F)F)C,1.8510890068906438,1.9591604053146598,1.717736617504523,1.9844413962767646 -CON(C(=O)Nc1ccc(cc1)Br)C,1.8538504780796292,1.5797464954135645,0.6141407359223952,3.0935602202368635 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,1.8585191232084395,1.2211327542550814,1.1591012306038144,2.5579370158130645 -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,1.9775960252398304,1.1715755542485882,1.1236391732419282,2.8315528772377325 -CCOP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OCC,2.435408990943892,2.1632497125104866,1.4936115921860988,3.3772063897016853 -CCSCSP(=S)(OCC)OCC,3.063482062180425,3.211485180279816,1.7060561051684917,4.420908019192358 -CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,3.238495980111839,2.30671720992754,1.3932433009904641,5.083748659233214 +CCOC(=O)c1ccccc1C(=O)OCC,-0.8025078028037981,-1.300073495267144,-1.7643541536650869,0.1593385480574906 +CCCCCCCCCC[N+](CCCCCCCCCC)(C)C.[Cl-],-0.4523093558402334,0.7526160071097443,-1.1486970067003368,0.24407829501987005 +C[N]1(C)CCCCC1,-0.4078951621232227,-0.11839701195157529,-1.921923802734729,1.1061334784882835 +COC(=O)c1ccccc1C(=O)OC,-0.395980366966349,-1.0128372247051722,-1.931061006625142,1.1391002726924442 +CCCCOC(=O)c1ccccc1C(=O)OCCCC,-0.3277659620337507,-0.3335681749239878,-2.487800303138573,1.8322683790710714 +COC(=O)c1ccc(cc1)O,-0.2850870472688833,-0.9938284615355355,-1.7889481781869567,1.21877408364919 +CON=C(c1ccccc1COc1ccccc1C)C(=O)OC,-0.1863320943149324,-0.07509896249967626,-1.6687835258256438,1.2961193371957789 +ClCC[N+](C)(C)C,-0.1860801154641762,-0.025387799890407722,-0.6739263829103855,0.30176615198203316 +CO/N=C(\c1ccccc1COc1ccccc1C)/C(=O)OC,-0.043272116542567574,-0.2291345059654671,-1.1542003318018443,1.0676560987167092 +CC(OC(=O)Nc1ccccc1)C,-0.0027835595633655154,0.7909678031634652,-1.561948454694262,1.5563813355675307 +ClCC[N](C)(C)C,0.03374761054485871,-0.3507905474264569,-0.28132410236409594,0.34881932345381333 +COC(=O)Nc1nc2c([nH]1)cc(cc2)S(=O)c1ccccc1,0.2001824877751192,2.197760066703943,-1.3307182373789743,1.7310832129292129 +COC(=O)c1ccccc1S(=O)(=O)NC(=O)Nc1nc(OC(F)F)cc(n1)OC(F)F,0.38347464789102037,0.5244304978725236,-0.030215691760235253,0.7971649875422759 +Cc1cc(C)cc(c1)C(=O)N(C(C)(C)C)NC(=O)c1ccc2c(c1C)CCCO2,0.44298913145109764,0.4343882750979413,-0.3943894517877705,1.280367714689966 +Clc1ccc(cc1)Cl,0.5033454743566039,-0.3098004456017336,-0.6263877532264377,1.6330787019396455 +COc1nc(C)nc(n1)N(C(=O)NS(=O)(=O)c1ccccc1C(=O)OC)C,0.5268727796678758,1.500120551043568,-0.4938190744435693,1.547564633779321 +COCN(c1c(CC)cccc1CC)C(=O)CCl,0.5378125188757769,1.2698758559694194,0.11482382374698508,0.9608012140045686 +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.5641635950559667,0.6687866205177715,-0.935415374028574,2.063742564140507 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccsc1C(=O)OC,0.571120503057232,1.1902114152572187,-0.5624462365772057,1.7046872426916697 +CCOc1nc(nc(n1)NC)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.5738607905791832,0.29099307039793554,-0.0245889206879496,1.172310501846316 +CCCSc1ccc2c(c1)[nH]c(n2)NC(=O)OC,0.5744096282076099,1.1227611514363303,-0.5917366312526837,1.7405558876679035 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.5751475463326001,1.204123085861569,-0.16104928781752337,1.3113443804827236 +COC(=O)Nc1nc2c([nH]1)cc(cc2)Sc1ccccc1,0.6122308015623443,1.3000842641666306,-1.4232949013945055,2.647756504519194 +COc1nc(nc(n1)C)NC(=O)[N-]S(=O)(=O)c1cc(I)ccc1C(=O)OC.[Na+],0.6256852292162783,1.2508978152709322,-0.46941431748010565,1.7207847759126622 +ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.6375482224998074,0.6934790190863317,0.14937936055556394,1.1257170844440507 +COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.69818422275262,-0.04743064015554219,0.2984533992646649,1.0979150462405751 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.7052445584030766,0.6990959232648295,-0.2945493831529916,1.7050384999591448 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC1CC1,0.707922957773828,1.8417279953479162,-1.140692817636872,2.556538733184528 +COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.7222236447614959,0.27691392283721084,-0.39373928624199583,1.8381865757649876 +ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.7265391683074566,1.0440453166077193,0.21646633772142443,1.2366119988934887 +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.7908991449057705,1.5893745854018753,-1.033012111668208,2.6148104014797493 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.7929525459919748,1.4432475594325607,0.16909630269159237,1.4168087892923573 +CCCN(c1c(cc(c(c1[N+](=O)[O-])N)C(F)(F)F)[N+](=O)[O-])CCC,0.8238704600370966,1.0760827768868946,-1.5717141591389834,3.219455079213177 +ClCC(=O)N(c1c(C)cccc1CC)COC(C)C,0.8386026030513786,0.40281883334761076,-0.2551568896870394,1.9323620957897965 +CCCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC,0.8452394137523537,1.7930123052435918,-0.5704133445801335,2.2608921720848407 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCCl,0.8495361619871143,0.2448724686893053,-1.4236619673207351,3.122734291294964 +CCSC(=O)N1CCCCCC1,0.8966760010606655,1.5367491723403042,-0.12183158753273038,1.9151835896540614 +CCOC(=O)COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.9095013680771945,1.253093479525902,0.01793597911698397,1.801066757037405 +C=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.9580439931473148,1.0914292030286552,-0.24640643441358967,2.162494420708219 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.9593269519958577,1.5497336697290072,0.3270519446129584,1.5916019593787571 +CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.9833397647961664,0.36853593732630685,-0.189840653075539,2.1565201826678715 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,1.0040458896075388,0.34459526699145665,0.2151347036457547,1.792957075569323 +OC(=O)C(Oc1ccc(cc1Cl)Cl)C,1.0616880045369097,1.112916569666931,-0.4610895903699117,2.584465599443731 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,1.0633347810421627,1.5947226210669956,0.4809632193717639,1.6457063427125616 +OC(=O)COc1cc(Cl)c(cc1Cl)Cl,1.0727153354550625,1.4073569873961393,-0.44855369303024384,2.593984363940369 +OC(=O)C(Oc1ccc(cc1C)Cl)C,1.120512151949517,1.5913530743442346,-0.2487713949882142,2.489795698887248 +OC(=O)COc1ccc(cc1C)Cl,1.125686290401133,1.3619681642499644,-0.22819428340111392,2.47956686420338 +CNC(=O)ON=C(SC)C,1.1307383083177946,1.0595589658676712,0.4644693127362094,1.79700730389938 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C(C(Br)(Br)Br)Br,1.132172117673495,2.3457079184197034,0.07750238037343915,2.186841854973551 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)[O-])[N+](=O)[O-])C(F)(F)F.[Na+],1.1531209179324986,0.3286512527182731,0.12181405917496968,2.1844277766900273 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)O)[N+](=O)[O-])C(F)(F)F,1.1594137569164724,0.4613885686555222,0.3485349019057663,1.9702926119271784 +CC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Oc1ccc(cc1)Cl,1.1738059844312232,1.2267915364902278,0.3020063985190974,2.045605570343349 +CNC(=O)Oc1cc(C)c(c(c1)C)C,1.1822621846267198,1.286098038116031,-0.1710231748573523,2.535547544110792 +COc1nc(C)nc(n1)N(C(=O)NS(=O)(=O)c1ccccc1C(=O)O)C,1.185817673781389,0.8409745087750948,-0.13976635635020696,2.511401703912985 +CC(C(=O)O)Oc1cc(Cl)c(cc1Cl)Cl,1.1859403190651523,1.4910530201781254,0.37533228306083066,1.996548355069474 +CCCN(C(=O)SCC)CCC,1.189073537317779,1.1011057762548884,-0.9173673341065174,3.2955144087420756 +CNC(=O)Oc1cc(C)c(c(c1)C)N(C)C,1.2183905415592224,2.170818490231342,0.4637602249081815,1.9730208582102633 +CC(OC(=O)C(c1ccc(cc1)Cl)(c1ccc(cc1)Cl)O)C,1.2289080622278439,1.5762300113736614,1.1962995035553659,1.261516620900322 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,1.241773235813927,0.446231021019864,-0.029785091365845506,2.5133315629936996 +OC(=O)C(Oc1cccc(c1)Cl)C,1.2700935935194824,0.30236968685337956,0.21211847358862035,2.328068713450344 +CCNc1nc(NC(C)C)nc(n1)Cl,1.2988398346222096,1.1473611962004826,0.6391631883730443,1.958516480871375 +CCSC(=O)N(CC(C)C)CC(C)C,1.3059015946869588,0.33720440522271905,0.2669331551404108,2.344870034233507 +OC(=O)COc1ccc(cc1Cl)Cl,1.3132228457253756,0.5520671344078197,0.4580219001141508,2.1684237913366005 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,1.3492644508493083,1.1715755542485882,0.4100006609960879,2.288528240702529 +CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,1.3669962857587108,0.8951787961648487,0.2234758404774262,2.5105167310399956 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(C(F)(F)F)Cl,1.3730348855122099,1.5561583389288367,0.6289470217345903,2.1171227492898295 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl,1.39702635178543,1.5738792912533845,0.6263982746651467,2.1676544289057134 +N#CC(c1ccc(cc1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C\C(=O)OC(C(F)(F)F)C(F)(F)F,1.410534558569072,1.8250684064715574,0.6938190379040367,2.1272500792341074 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,1.4254546187473904,2.138294674164148,0.5636942962626837,2.2872149412320972 +CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,1.466915293527425,1.6922125433461526,0.46563733871306434,2.4681932483417857 +CCNc1nc(Cl)nc(n1)NC(C)(C)C,1.4722639674351332,2.1597809534601558,0.72053161669756,2.2239963181727065 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.5078856849187368,0.8323812299293085,0.7780582109942299,2.2377131588432437 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,1.5170392450643782,1.2211327542550814,0.6188727639850882,2.415205726143668 +CON(C(=O)Nc1ccc(cc1)Cl)C,1.5254925367311198,2.0307005985741227,0.3067442325901706,2.7442408408720693 +O=C(C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C)OCCON=C(C)C,1.5428097494483122,1.598814644976232,0.3690843553729859,2.7165351435236387 +COC(=O)Nc1nc2c([nH]1)cccc2,1.5446498917661196,0.14495892528589976,0.4442473622269525,2.6450524213052864 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,1.544815855341314,1.225205758260884,1.1033950901095004,1.9862366205731277 +CCNc1nc(nc(n1)Cl)NC(C#N)(C)C,1.5803564502056193,2.2845565889047834,1.018126185702696,2.1425867147085427 +[S]C(=S)NCCNC(=S)S[Mn],1.5980898153536156,0.597669823868334,1.3265266418792125,1.8696529888280187 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)/C=C(/c1ccc(cc1)Cl)\Cl,1.6285100898436353,2.309954619710552,0.909061982686879,2.3479581970003918 +CCOC(=O)C(Oc1ccc(cc1)Oc1nc2c(o1)cc(cc2)Cl)C,1.6305580473483157,1.6042019938366077,0.6568955043189723,2.604220590377659 +Clc1ccc2c(c1)ncc(n2)Oc1ccc(cc1)OC(C(=O)OCC1CCCO1)C,1.6707144848465831,1.0357262236334814,0.6039971849442818,2.7374317847488845 +CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,1.7256110847698114,1.182467683028378,0.7183646205617393,2.7328575489778837 +CON(C(=O)Nc1ccc(cc1)Br)C,1.7353559838645514,1.5797464954135645,0.6676304305284868,2.803081537200616 +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,1.7876596295719032,1.6004820295651263,0.9969495009142936,2.578369758229513 +O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1c(F)c(F)c(c(c1F)F)C,1.8532517508371147,1.9591604053146598,1.6238835395320574,2.082619962142172 +O=C(C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl)OCc1c(F)c(F)c(c(c1F)F)C,1.9314814660721966,1.8510890068906436,1.6540060704656363,2.208956861678757 +CCNc1nc(NCC)nc(n1)Cl,2.05731503573624,1.5929887382061456,1.1522591169089416,2.962370954563539 +COP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OC,2.2598509476536943,2.67993684040502,0.16317460544317175,4.356527289864217 +COP(=S)(Oc1ccc(cc1)N(=O)=O)OC,2.4094774109929284,3.0223586898833017,1.3074109005099437,3.511543921475913 +CCOP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OCC,2.448284557340227,2.1632497125104866,1.290207089479298,3.6063620252011557 +CCOP(=S)(Oc1ccc(cc1)N(=O)=O)OCC,2.5191669376773373,2.3806193392221147,1.6379462510007847,3.4003876243538897 +CCOP(=S)(SCSC(C)(C)C)OCC,2.5774210143233094,3.5709705331517814,1.6601635866300595,3.4946784420165593 +COC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,2.716714062392793,2.574873529491226,0.8451600729081084,4.588268051877478 +COC1CC(OC2C(C)C=CC=C3COC4C3(O)C(C=C(C4O)C)C(=O)OC3CC(CC=C2C)OC2(C3)C=CC(C(O2)C(CC)C)C)OC(C1OC1CC(OC)C(C(O1)C)O)C.COC1CC(OC(C1OC1CC(OC)C(C(O1)C)O)C)OC1C(C)C=CC=C2COC3C2(O)C(C=C(C3O)C)C(=O)OC2CC(CC=C1C)OC1(C2)C=CC(C(O1)C(C)C)C,2.778240115180643,2.9375684468373877,2.1251915094205005,3.4312887209407856 +OC(=O)c1ccccc1.CCC(C1OC2(C=CC1C)OC1CC=C(C)C(OC3CC(OC)C(C(O3)C)OC3CC(OC)C(C(O3)C)NC)C(C)C=CC=C3C4(C(C(=O)OC(C2)C1)C=C(C)C(C4OC3)O)O)C,2.8676859744872543,2.804590566004922,2.282579812604683,3.4527921363698257 +COC1CC(OC2C(C)C=CC=C3COC4C3(O)C(C=C(C4O)C)C(=O)OC3CC(CC=C2C)OC2(C3)C=CC(C(O2)C(C)C)C)OC(C1OC1CC(OC)C(C(O1)C)NC(=O)C)C,2.871759894155651,2.7553159367231137,2.26551389974309,3.4780058885682115 +CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,3.246064258981671,2.30671720992754,2.003154758799517,4.488973759163825 +CCSCSP(=S)(OCC)OCC,3.3175935043721014,3.211485180279816,2.137535349661088,4.497651659083115 diff --git a/data/training_log10-cv-0.id b/data/training_log10-cv-0.id index 1aa42ca..4a3a1ec 100644 --- a/data/training_log10-cv-0.id +++ b/data/training_log10-cv-0.id @@ -1 +1 @@ -58a2f585a8ae8b161ed85090 +58a306d6a8ae8b212dcf11f1 diff --git a/data/training_log10-cv-1.csv b/data/training_log10-cv-1.csv index db88cc8..226e5ee 100644 --- a/data/training_log10-cv-1.csv +++ b/data/training_log10-cv-1.csv @@ -1,58 +1,99 @@ SMILES,LOAEL_measured_median,LOAEL_predicted,Prediction_interval_low,Prediction_interval_high -C[N]1(C)CCCCC1,-0.4564837711150284,-0.11839701195157529,-1.2540506461294996,0.3410831038994428 -CC(OC(=O)Nc1ccccc1)C,-0.009097936702771729,0.7909678031634652,-2.268952003381956,2.2507561299764123 -Clc1cc(N)c(c(n1)C(=O)O)Cl,0.17198133130954318,-0.6840010770259624,-0.13480364285591773,0.4787663054750041 -COC(=O)Nc1nc2c([nH]1)cc(cc2)Sc1ccccc1,0.3894094818235073,1.3000842641666306,-0.797956750391183,1.5767757140381975 -Clc1cc(ccc1Oc1ccc(c(c1)C(=O)[O-])[N+](=O)[O-])C(F)(F)F.[Na+],0.4419532382565241,0.3286512527182731,-0.13410324032338283,1.018009716836431 -Clc1cc(ccc1Oc1ccc(c(c1)C(=O)O)[N+](=O)[O-])C(F)(F)F,0.44294144160350096,0.4613885686555222,-1.051008001272546,1.9368908844795478 -COCN(c1c(CC)cccc1CC)C(=O)CCl,0.5174704176880593,1.2698758559694194,0.04782181860817114,0.9871190167679476 -O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.5554422652280658,1.5893745854018753,-1.018664512452463,2.129549042908595 -ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.5673267770133078,1.0440453166077193,0.08599310710730707,1.0486604469193086 -CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC1CC1,0.5682759537448641,1.8417279953479162,-1.012374904760441,2.148926812250169 -COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.5736011478664845,0.27691392283721084,-0.3936769645117293,1.5408792602446983 -COc1nc(C)nc(n1)N(C(=O)NS(=O)(=O)c1ccccc1C(=O)OC)C,0.5762389131571544,1.500120551043568,-0.2686944680725718,1.4211722943868805 -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccsc1C(=O)OC,0.6571508374896793,1.1902114152572187,0.4671980757070634,0.8471035992722952 -CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.6614835102658171,0.6687866205177715,-0.614080100789186,1.9370471213208202 -CCCN(c1c(cc(c(c1[N+](=O)[O-])N)C(F)(F)F)[N+](=O)[O-])CCC,0.6720069540158988,1.0760827768868946,-1.1020099433378698,2.4460238513696675 -CCOc1nc(nc(n1)NC)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.6953202141870661,0.29099307039793554,0.12285844509673549,1.2677819832773967 -Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.7284223016096832,1.4432475594325607,0.5183582233224583,0.9384863798969082 -CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCCl,0.7304363334427153,0.2448724686893053,-1.1475250784331081,2.6083977453185385 -CCCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC,0.796853084648671,1.7930123052435918,-0.72785495456293,2.3215611238602722 -ClCC(=O)N(c1c(C)cccc1CC)COC(C)C,0.8076520704595649,0.40281883334761076,-0.3711101556131535,1.9864142965322833 -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.8155096673615454,1.204123085861569,-0.15978697238487938,1.79080630710797 -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.8315405085251539,0.6990959232648295,-0.25008251448050234,1.9131635315308102 -ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.8402417574704084,0.6934790190863317,0.31377627701211597,1.3667072379287009 -CCOC(=O)COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.8868841902441673,1.253093479525902,-0.10277135991185771,1.8765397404001924 -CNC(=O)ON=C(SC)C,0.9267583523880016,1.0595589658676712,0.7544373579213387,1.0990793468546647 -CCOC(=O)C(OC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F)C,1.0244020567794645,0.9654517529015312,0.5924683584633463,1.4563357550955827 -OC(=O)C(Oc1ccc(cc1C)Cl)C,1.0547136516060283,1.5913530743442346,-0.17631317652150802,2.2857404797335645 -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,1.067644022492981,0.34459526699145665,0.15707261594235233,1.9782154290436098 -N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.117348967570475,1.5497336697290072,0.3644893728007613,1.870208562340189 -CCNc1nc(NC(C)C)nc(n1)Cl,1.158750561362179,1.1473611962004826,0.5817785310842738,1.735722591640084 -CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,1.1739902884363425,0.446231021019864,-0.2238682158102845,2.571848792682969 -CC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Oc1ccc(cc1)Cl,1.1753530384218474,1.2267915364902278,0.9652222962957666,1.385483780547928 -OC(=O)C(Oc1ccc(cc1Cl)Cl)C,1.1838034627625664,1.112916569666931,-0.4194751857633423,2.787082111288475 -OC(=O)COc1ccc(cc1Cl)Cl,1.2903965824234715,0.5520671344078197,0.09433694027599415,2.486456224570949 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C(C(Br)(Br)Br)Br,1.2926366121738129,2.3457079184197034,0.8600488542476945,1.7252243700999312 -CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,1.325697462064925,0.8951787961648487,-0.1503769558421717,2.8017718799720215 -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,1.3454474595862016,1.1715755542485882,0.28189028506922376,2.409004634103179 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.3792626480407972,0.8323812299293085,0.7701398555861736,1.9883854404954209 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl,1.4489810762170605,1.5738792912533845,0.8277685782419199,2.070193574192201 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(C(F)(F)F)Cl,1.4773270760988693,1.5561583389288367,0.9164916147796413,2.0381625374180974 -CCNc1nc(Cl)nc(n1)NC(C)(C)C,1.4857434193443673,2.1597809534601558,0.6291500483807473,2.3423367903079875 -N#CC(c1ccc(cc1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C\C(=O)OC(C(F)(F)F)C(F)(F)F,1.5496034378007137,1.8250684064715574,0.9002193891264287,2.1989874864749988 -CCNc1nc(nc(n1)Cl)NC(C#N)(C)C,1.5929887382061456,2.2845565889047834,0.9539530758856821,2.232024400526609 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,1.6331579105042295,2.138294674164148,0.9039915016733127,2.3623243193351464 -Clc1ccc2c(c1)ncc(n2)Oc1ccc(cc1)OC(C(=O)OCC1CCCO1)C,1.6688630687536725,1.0357262236334814,0.4038576015162354,2.9338685359911096 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,1.6704801123412765,1.225205758260884,1.521170204249966,1.819790020432587 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,1.7053412847612943,1.2211327542550814,0.815720269567251,2.5949622999553377 -CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,1.7312256228644531,1.182467683028378,0.9577247620768303,2.504726483652076 -CON(C(=O)Nc1ccc(cc1)Br)C,1.853850478079629,1.5797464954135645,0.709147692438419,2.998553263720839 -CCNc1nc(NCC)nc(n1)Cl,1.8558032695189919,1.5929887382061456,1.0420007334206187,2.669605805617365 -CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,1.9059788494124328,1.6004820295651263,1.0393661655879431,2.7725915332369224 -O=C(C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl)OCc1c(F)c(F)c(c(c1F)F)C,1.9591604053146598,1.8510890068906436,1.8258080159285386,2.092512794700781 -N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)/C=C(/c1ccc(cc1)Cl)\Cl,2.2306967567590874,2.309954619710552,1.451308861557522,3.0100846519606526 -COP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OC,2.284416857948971,2.67993684040502,0.5401652093234526,4.02866850657449 -COP(=S)(Oc1ccc(cc1)N(=O)=O)OC,2.41975480473767,3.0223586898833017,0.8394855032976698,4.00002410617767 -CCOP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OCC,2.4354089909438916,2.1632497125104866,0.72440945859261,4.1464085232951735 -CCSCSP(=S)(OCC)OCC,3.0097062381173734,3.211485180279816,1.7615847018726656,4.257827774362081 +CCOC(=O)c1ccccc1C(=O)OCC,-0.8370100645050937,-1.300073495267144,-1.6741483492890255,0.00012822027883785392 +CCCCCCCCCC[N+](CCCCCCCCCC)(C)C.[Cl-],-0.44403527771508333,0.7526160071097443,-1.112159357685514,0.22408880225534733 +C[N]1(C)CCCCC1,-0.411033878655308,-0.11839701195157529,-1.745889181815833,0.923821424505217 +COC(=O)c1ccccc1C(=O)OC,-0.3546310887442844,-1.0128372247051722,-2.0032155722907925,1.293953394802224 +COC(=O)c1ccc(cc1)O,-0.34093997851180363,-0.9938284615355355,-1.6142351761037723,0.9323552190801649 +CCCCOC(=O)c1ccccc1C(=O)OCCCC,-0.2999820599797061,-0.3335681749239878,-2.336449199784935,1.7364850798255227 +ClCC[N+](C)(C)C,-0.18180692836685125,-0.025387799890407722,-0.7811049423214849,0.4174910855877824 +CC(OC(=O)Nc1ccccc1)C,0.005061882632518843,0.7909678031634652,-1.686986895870403,1.6971106611354407 +ClCC[N](C)(C)C,0.03309339253515783,-0.3507905474264569,-0.30900749229323377,0.3751942773635494 +Clc1cc(N)c(c(n1)C(=O)O)Cl,0.17341834123842617,-0.6840010770259624,-0.369665197128178,0.7165018796050303 +COC(=O)Nc1nc2c([nH]1)cc(cc2)S(=O)c1ccccc1,0.18148904996651877,2.197760066703943,-1.2728772662405146,1.635855366173552 +COC(=O)c1ccccc1S(=O)(=O)NC(=O)Nc1nc(OC(F)F)cc(n1)OC(F)F,0.33002089975400073,0.5244304978725236,-0.08233397491751665,0.7423757744255182 +Cc1cc(C)cc(c1)C(=O)N(C(C)(C)C)NC(=O)c1ccc2c(c1C)CCCO2,0.42281650039615654,0.4343882750979413,-0.586413789224745,1.4320467900170581 +Clc1ccc(cc1)Cl,0.5382154369801656,-0.3098004456017336,-0.9711767615077024,2.0476076354680335 +COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.5443211634833082,0.27691392283721084,-0.5343409688764665,1.622983295843083 +COCN(c1c(CC)cccc1CC)C(=O)CCl,0.5538077526920319,1.2698758559694194,0.12889054391746335,0.9787249614666004 +CCOc1nc(nc(n1)NC)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.5834656394321274,0.29099307039793554,-0.25174948988069,1.4186807687449448 +CCCSc1ccc2c(c1)[nH]c(n2)NC(=O)OC,0.5929732675468257,1.1227611514363303,-0.9665991079596404,2.152545643053292 +CCCN(c1c(cc(c(c1[N+](=O)[O-])N)C(F)(F)F)[N+](=O)[O-])CCC,0.5931369798207013,1.0760827768868946,-1.3295308367921879,2.5158047964335903 +COC(=O)Nc1nc2c([nH]1)cc(cc2)Sc1ccccc1,0.6180707138323848,1.3000842641666306,-1.5771439168696664,2.8132853445344357 +COc1nc(C)nc(n1)N(C(=O)NS(=O)(=O)c1ccccc1C(=O)OC)C,0.6183181033986996,1.500120551043568,-0.3121155459893188,1.548751752786718 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.6406512321432409,1.204123085861569,-0.31625524939671246,1.5975577136831944 +ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.6461493943447365,0.6934790190863317,0.07541723029792047,1.2168815583915524 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC1CC1,0.6487348903076077,1.8417279953479162,-0.7216821167060651,2.0191518973212803 +COc1nc(nc(n1)C)NC(=O)[N-]S(=O)(=O)c1cc(I)ccc1C(=O)OC.[Na+],0.6747364710921058,1.2508978152709322,-0.764599966004883,2.1140729081890948 +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.7048465404320757,0.6687866205177715,-0.5099146079632925,1.919607688827444 +ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.7257069022311738,1.0440453166077193,0.1757681819071122,1.2756456225552355 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)O)[N+](=O)[O-])C(F)(F)F,0.7323369621101146,0.4613885686555222,-0.22008287660862857,1.6847568008288576 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.7758677121083559,1.4432475594325607,0.1143538152057666,1.4373816090109452 +ClCC(=O)N(c1c(C)cccc1CC)COC(C)C,0.8078598568427287,0.40281883334761076,-0.08477368193916246,1.7004933956246198 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccsc1C(=O)OC,0.8154576385757448,1.1902114152572187,-0.19086759880796644,1.821782875959456 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)[O-])[N+](=O)[O-])C(F)(F)F.[Na+],0.8208769675021408,0.3286512527182731,-0.485285492755831,2.1270394277601126 +CCCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC,0.867238375377393,1.7930123052435918,-0.764742769639524,2.49921952039431 +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.8810886106643333,1.5893745854018753,-0.8243499767178145,2.5865271980464812 +CCOC(=O)COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.8854430922488048,1.253093479525902,-0.06331177548445188,1.8341979599820615 +CCSC(=O)N1CCCCCC1,0.8871157093057233,1.5367491723403042,-0.2797384558560406,2.0539698744674872 +Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.932127966504802,1.4237162949891773,-0.05245178414146956,1.9167077171510736 +CCOC(=O)C(OC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F)C,0.9607654770388614,0.9654517529015312,0.32720660375034827,1.5943243503273745 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCCl,0.9933505027218723,0.2448724686893053,-0.9734910303008493,2.960192035744594 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.9957458449505873,0.6990959232648295,-0.11073567963559239,2.102227369536767 +CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,1.0025319682304328,0.36853593732630685,-0.1470349837265199,2.1520989201873855 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,1.018617710205671,0.34459526699145665,0.08695419919871661,1.9502812212126255 +OC(=O)COc1cc(Cl)c(cc1Cl)Cl,1.0423643522103792,1.4073569873961393,-0.47459650016547217,2.5593252045862305 +OC(=O)C(Oc1ccc(cc1Cl)Cl)C,1.0434239900090239,1.112916569666931,-0.4417339929238311,2.528581972941879 +NC(=N)NCCCCCCCCNC(=N)N,1.046904900541644,1.0914077611524307,0.6024978100381186,1.4913119910451693 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,1.065580587089146,1.5947226210669956,0.4637497132041484,1.6674114609741435 +OC(=O)C(Oc1ccc(cc1C)Cl)C,1.0803355604159302,1.5913530743442346,-0.09544597671889532,2.2561170975507556 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.093756761539861,1.5497336697290072,0.3417001334831383,1.8458133895965836 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C(C(Br)(Br)Br)Br,1.1251341708050022,2.3457079184197034,0.5567196088996671,1.6935487327103373 +OC(=O)COc1ccc(cc1C)Cl,1.1300715902953602,1.3619681642499644,-0.3928808207623249,2.653024001353045 +COc1nc(C)nc(n1)N(C(=O)NS(=O)(=O)c1ccccc1C(=O)O)C,1.140350096424813,0.8409745087750948,-0.15906866486714555,2.4397688577167713 +CC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Oc1ccc(cc1)Cl,1.1687137375772116,1.2267915364902278,0.442963891184023,1.8944635839704003 +CC(C(=O)O)Oc1cc(Cl)c(cc1Cl)Cl,1.1962965935433945,1.4910530201781254,0.2952203028004401,2.097372884286349 +CNC(=O)Oc1cc(C)c(c(c1)C)N(C)C,1.2078108177156763,2.170818490231342,0.46028992433000593,1.9553317111013468 +CCCN(C(=O)SCC)CCC,1.2181208342441197,1.1011057762548884,-0.5366844291118507,2.97292609760009 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,1.2267892522452761,0.446231021019864,0.059070044179798,2.394508460310754 +CC(OC(=O)C(c1ccc(cc1)Cl)(c1ccc(cc1)Cl)O)C,1.2290584642368096,1.5762300113736614,1.2017561724094663,1.256360756064153 +OC(=O)C(Oc1cccc(c1)Cl)C,1.2688868701917368,0.30236968685337956,0.3043608891206412,2.2334128512628326 +CNC(=O)Oc1cc(C)c(c(c1)C)C,1.272568405830823,1.286098038116031,-0.18000203188304198,2.725138843544688 +OC(=O)COc1ccc(cc1Cl)Cl,1.2940536702099834,0.5520671344078197,0.3747254683044451,2.2133818721155216 +N#CC(c1ccc(cc1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C\C(=O)OC(C(F)(F)F)C(F)(F)F,1.307968555194375,1.8250684064715574,0.6457831402199155,1.9701539701688344 +CCSC(=O)N(CC(C)C)CC(C)C,1.3176810455469876,0.33720440522271905,0.16931613118438915,2.466045959909586 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,1.3410620100937982,1.1715755542485882,0.05999749578898128,2.622126524398615 +CS/C(=N/OC(=O)N(SN(C(=O)O/N=C(/SC)\C)C)C)/C,1.3653813446023095,1.549581519063189,0.8188742574761888,1.9118884317284301 +CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,1.3793884865951995,0.8442020296654076,1.1366953113639247,1.6220816618264744 +CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,1.400290210280719,0.8951787961648487,0.2664802078726458,2.5341002126887924 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(C(F)(F)F)Cl,1.406640920274055,1.5561583389288367,0.6127107129295228,2.2005711276185873 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,1.4138993066545986,2.138294674164148,0.5423466210077708,2.2854519923014265 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl,1.4241136067719982,1.5738792912533845,0.7382120946330134,2.110015118910983 +CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,1.4522720983578052,1.6922125433461526,0.4250157646187651,2.4795284320968456 +CCNc1nc(Cl)nc(n1)NC(C)(C)C,1.467812596328255,2.1597809534601558,0.5500010433463091,2.3856241493102006 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.4833009345009622,0.8323812299293085,0.6545171788417207,2.3120846901602037 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,1.4933798777263922,1.225205758260884,1.038021442245198,1.9487383132075864 +COC(=O)Nc1nc2c([nH]1)cccc2,1.5423004503450832,0.14495892528589976,0.3031609568942335,2.7814399437959327 +O=C(C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C)OCCON=C(C)C,1.5600158134970257,1.598814644976232,0.28652565860942425,2.8335059683846273 +CCNc1nc(nc(n1)Cl)NC(C#N)(C)C,1.578759944445773,2.2845565889047834,0.9052269084437082,2.2522929804478378 +[S]C(=S)NCCNC(=S)S[Mn],1.598089815353616,0.597669823868334,1.2691929083833997,1.9269867223238324 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)/C=C(/c1ccc(cc1)Cl)\Cl,1.6015245366090984,2.309954619710552,0.8160849390186764,2.3869641341995207 +CSC(=NOC(=O)N(SN(C(=O)ON=C(SC)C)C)C)C,1.606001016974945,1.549581519063189,1.138191844665517,2.073810189284373 +CCOC(=O)C(Oc1ccc(cc1)Oc1nc2c(o1)cc(cc2)Cl)C,1.6192213500716994,1.6042019938366077,0.7549829911066055,2.4834597090367936 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,1.6307594953831186,1.2211327542550814,0.6861175644528081,2.5754014263134293 +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,1.6691782694201736,1.6004820295651263,0.7859624152148486,2.552394123625499 +Clc1ccc2c(c1)ncc(n2)Oc1ccc(cc1)OC(C(=O)OCC1CCCO1)C,1.6696069536776386,1.0357262236334814,0.5692653267315289,2.7699485806237485 +CON(C(=O)Nc1ccc(cc1)Br)C,1.7348206757027769,1.5797464954135645,0.5087961919000095,2.9608451595055443 +CCNc1nc(NC(C)C)nc(n1)Cl,1.8004917568753196,1.1473611962004826,1.1700073376559643,2.430976176094675 +O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1c(F)c(F)c(c(c1F)F)C,1.8544388171886226,1.9591604053146598,1.6257768521215825,2.0831007822556624 +CCOP(=O)(OC(=CCl)c1ccc(cc1Cl)Cl)OCC,1.857166570019257,2.3796869305976593,0.1919247243015445,3.5224084157369697 +CCOP(=O)(O/C(=C/Cl)/c1ccc(cc1Cl)Cl)OCC,1.8579692742920217,2.3796869305976593,-0.0882655157655905,3.804204064349634 +O=C(C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl)OCc1c(F)c(F)c(c(c1F)F)C,1.927806431227453,1.8510890068906436,1.6680194216191897,2.187593440835716 +CCNc1nc(NCC)nc(n1)Cl,1.9996561418144898,1.5929887382061456,1.1295855648969666,2.869726718732013 +COP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OC,2.2669139426386247,2.67993684040502,0.3836651127368931,4.150162772540356 +CCOP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OCC,2.4227546396361834,2.1632497125104866,1.3599539204685467,3.48555535880382 +COP(=S)(Oc1ccc(cc1)N(=O)=O)OC,2.427757642463682,3.0223586898833017,1.1891520929913195,3.6663631919360444 +CCOP(=S)(Oc1ccc(cc1)N(=O)=O)OCC,2.512306381495352,2.3806193392221147,1.5470183885383975,3.4775943744523063 +CCSCSP(=S)(OCC)OCC,2.790882716310415,3.211485180279816,1.9632813663657116,3.6184840662551183 +COC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,2.8325650714917345,2.574873529491226,1.403562824560694,4.261567318422776 +OC(=O)c1ccccc1.CCC(C1OC2(C=CC1C)OC1CC=C(C)C(OC3CC(OC)C(C(O3)C)OC3CC(OC)C(C(O3)C)NC)C(C)C=CC=C3C4(C(C(=O)OC(C2)C1)C=C(C)C(C4OC3)O)O)C,2.8584341426066855,2.804590566004922,2.2424799337925334,3.4743883514208376 +CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,3.242009158195384,2.30671720992754,1.7565807831170788,4.727437533273688 diff --git a/data/training_log10-cv-1.id b/data/training_log10-cv-1.id index 32cae05..915bf98 100644 --- a/data/training_log10-cv-1.id +++ b/data/training_log10-cv-1.id @@ -1 +1 @@ -58a2f674a8ae8b18150b7005 +58a307dda8ae8b232c684047 diff --git a/data/training_log10-cv-2.csv b/data/training_log10-cv-2.csv index 7c3e56a..9f4f83b 100644 --- a/data/training_log10-cv-2.csv +++ b/data/training_log10-cv-2.csv @@ -1,59 +1,100 @@ SMILES,LOAEL_measured_median,LOAEL_predicted,Prediction_interval_low,Prediction_interval_high -C[N]1(C)CCCCC1,-0.4564837711150284,-0.11839701195157529,-1.2540506461294998,0.341083103899443 -CC(OC(=O)Nc1ccccc1)C,-0.009097936702771729,0.7909678031634652,-2.5591028972725183,2.5409070238669744 -Clc1cc(N)c(c(n1)C(=O)O)Cl,0.17198133130954318,-0.6840010770259624,0.006897993720497625,0.33706466889858877 -COC(=O)Nc1nc2c([nH]1)cc(cc2)Sc1ccccc1,0.38940948182350754,1.3000842641666306,-1.482681831697949,2.261500795344964 -CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC1CC1,0.4462310210198641,1.8417279953479162,-1.748497182577673,2.640959224617401 -Clc1cc(ccc1Oc1ccc(c(c1)C(=O)[O-])[N+](=O)[O-])C(F)(F)F.[Na+],0.45415167208588125,0.3286512527182731,-0.8341071327426074,1.7424104769143698 -COCN(c1c(CC)cccc1CC)C(=O)CCl,0.5174704176880593,1.2698758559694194,0.014774464079029714,1.020166371297089 -O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.5554422652280655,1.5893745854018753,-1.0857583331921425,2.1966428636482735 -ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.5673267770133079,1.0440453166077193,0.04915883701864843,1.0854947170079674 -CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCCl,0.5729261041791303,0.2448724686893053,-1.4770424724418703,2.6228946808001306 -COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.6440145091827779,0.27691392283721084,-0.3460068343010406,1.6340358526665963 -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.6696281692176134,1.204123085861569,-0.3032177242539539,1.6424740626891805 -CCCN(c1c(cc(c(c1[N+](=O)[O-])N)C(F)(F)F)[N+](=O)[O-])CCC,0.6720069540158989,1.0760827768868946,-1.396949639954025,2.7409635479858228 -CCOC(=O)COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.7410560458270756,1.253093479525902,0.2719243477222782,1.2101877439318731 -ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.8402417574704084,0.6934790190863317,0.3369897561529681,1.3434937587878486 -CCCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC,0.914967946962185,1.7930123052435918,-0.7669710015593527,2.5969068954837224 -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccsc1C(=O)OC,0.9160944227123866,1.1902114152572187,0.058202322271559415,1.773986523153214 -CNC(=O)ON=C(SC)C,0.9267583523880014,1.0595589658676712,0.6678210732877135,1.1856956314882894 -ClCC(=O)N(c1c(C)cccc1CC)COC(C)C,0.9411720672204669,0.40281883334761076,-0.1718130654343506,2.0541571998752843 -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.9882625780424082,0.34459526699145665,0.031318598259950536,1.945206557824866 -CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.9936908951508764,0.36853593732630685,-0.3489552012536389,2.3363369915553918 -OC(=O)C(Oc1ccc(cc1C)Cl)C,0.9955461440956712,1.5913530743442346,-0.2736709910625508,2.264763279253893 -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,1.018434334306937,0.6990959232648295,0.5737407837319753,1.463127884881899 -CNC(=O)O/N=C(\SC)/C,1.0595589658676712,0.9267583523880014,0.5772329086434385,1.541885023091904 -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,1.071108200096774,1.5947226210669956,0.4140738338544866,1.7281425663390615 -N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.117348967570475,1.5497336697290072,0.3674966507186219,1.8672012844223282 -OC(=O)COc1cc(Cl)c(cc1Cl)Cl,1.1398473897523569,1.4073569873961393,-1.0572649111176238,3.3369596906223373 -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,1.1429580825833277,1.1715755542485882,0.16519910713260066,2.120717058034055 -CC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Oc1ccc(cc1)Cl,1.1753530384218476,1.2267915364902278,0.03006286764460997,2.320643209199085 -OC(=O)C(Oc1ccc(cc1Cl)Cl)C,1.1838034627625664,1.112916569666931,-0.5488297310291974,2.91643665655433 -CCNc1nc(NC(C)C)nc(n1)Cl,1.1884535763302977,1.1473611962004826,0.7076587506313603,1.669248402029235 -CNC(=O)Oc1cc(C)c(c(c1)C)C,1.228849942742214,1.286098038116031,0.22391197632806836,2.2337879091563595 -CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,1.2451096252659142,0.446231021019864,-0.35639061838429353,2.846609868916122 -CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,1.2720120279431697,0.8951787961648487,0.08438812194123058,2.459635933945109 -OC(=O)COc1ccc(cc1Cl)Cl,1.2903965824234715,0.5520671344078197,0.11331611803719333,2.4674770468097496 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C(C(Br)(Br)Br)Br,1.2926366121738129,2.3457079184197034,0.6087189674957241,1.9765542568519017 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.3745857524755498,0.8323812299293085,0.570334630499469,2.1788368744516307 -CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,1.397894682807939,1.6922125433461526,0.11334079746883519,2.682448568147043 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(C(F)(F)F)Cl,1.4764466094375324,1.5561583389288367,0.9318300971363,2.0210631217387647 -CCNc1nc(Cl)nc(n1)NC(C)(C)C,1.4857434193443673,2.1597809534601558,0.6651358877934408,2.3063509508952937 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl,1.4877923506899162,1.5738792912533845,0.7998305174133207,2.1757541839665118 -N#CC(c1ccc(cc1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C\C(=O)OC(C(F)(F)F)C(F)(F)F,1.507110119093896,1.8250684064715574,0.7442886164722401,2.269931621715552 -CON(C(=O)Nc1ccc(cc1)Cl)C,1.5228920251829052,2.0307005985741227,0.2084754820696353,2.837308568296175 -CS/C(=N/OC(=O)N(SN(C(=O)O/N=C(/SC)\C)C)C)/C,1.5495815190631888,1.549581519063189,1.3876333843857924,1.711529653740585 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,1.6175150185701135,2.138294674164148,0.9376686901219012,2.2973613470183256 -CSC(=NOC(=O)N(SN(C(=O)ON=C(SC)C)C)C)C,1.623526826109727,1.549581519063189,1.204531205097886,2.042522447121568 -CCNc1nc(NCC)nc(n1)Cl,1.6671292160263464,1.5929887382061456,0.8596358386526425,2.4746225934000505 -Clc1ccc2c(c1)ncc(n2)Oc1ccc(cc1)OC(C(=O)OCC1CCCO1)C,1.6688630687536725,1.0357262236334814,0.44948555284848446,2.8882405846588606 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,1.6704801123412765,1.225205758260884,1.4865084368254098,1.8544517878571432 -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,1.708415149552353,1.2211327542550814,0.7903635313818305,2.6264667677228752 -CCNc1nc(nc(n1)Cl)NC(C#N)(C)C,1.727177273703254,2.2845565889047834,1.2355841119854445,2.218770435421064 -CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,1.7860327006117207,1.6004820295651263,0.8665563871316834,2.705509014091758 -CON(C(=O)Nc1ccc(cc1)Br)C,1.853850478079629,1.5797464954135645,0.4157690760298165,3.2919318801294413 -CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,2.1044767111432936,2.30671720992754,0.2820963165000967,3.9268571057864907 -N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)/C=C(/c1ccc(cc1)Cl)\Cl,2.1735153216338507,2.309954619710552,1.5177048605200936,2.829325782747608 -COP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OC,2.284377493641497,2.67993684040502,-0.051140260267335425,4.61989524755033 -CCOP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OCC,2.435408990943892,2.1632497125104866,0.8731565084388309,3.9976614734489533 -CCSCSP(=S)(OCC)OCC,3.0097062381173734,3.211485180279816,1.7203888462710328,4.299023629963714 +CCOC(=O)c1ccccc1C(=O)OCC,-0.8131598695559016,-1.300073495267144,-1.644551796314825,0.018232057203022012 +CCCCCCCCCC[N+](CCCCCCCCCC)(C)C.[Cl-],-0.42818562065664917,0.7526160071097443,-1.170930976154624,0.31455973484132566 +C[N]1(C)CCCCC1,-0.42210539698175426,-0.11839701195157529,-2.1946273738586184,1.3504165798951098 +COC(=O)c1ccccc1C(=O)OC,-0.4043170573535165,-1.0128372247051722,-2.1119697598652816,1.3033356451582487 +COC(=O)c1ccc(cc1)O,-0.3036696074406008,-0.9938284615355355,-1.624349538450672,1.0170103235694707 +CCCCOC(=O)c1ccccc1C(=O)OCCCC,-0.23663053363705333,-0.3335681749239878,-2.4317710987955623,1.9585100315214559 +CON=C(c1ccccc1COc1ccccc1C)C(=O)OC,-0.1867090466786571,-0.07509896249967626,-1.6445984790236408,1.2711803856663264 +ClCC[N+](C)(C)C,-0.18373554933678096,-0.025387799890407722,-0.6312999612174768,0.2638288625439149 +CO/N=C(\c1ccccc1COc1ccccc1C)/C(=O)OC,-0.028492276804183828,-0.2291345059654671,-1.2012815131584889,1.1442969595501213 +ClCC[N](C)(C)C,0.042299230530483414,-0.3507905474264569,-0.2854430496578721,0.3700415107188389 +COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1ncccc1S(=O)(=O)CC,0.042333046288222424,0.5521445556544645,-3.42566057269631,3.5103266652727547 +COC(=O)Nc1nc2c([nH]1)cc(cc2)S(=O)c1ccccc1,0.21369908718894576,2.197760066703943,-1.4495190929241075,1.876917267301999 +Cc1cc(C)cc(c1)C(=O)N(C(C)(C)C)NC(=O)c1ccc2c(c1C)CCCO2,0.38997810916054315,0.4343882750979413,-0.5393277535965919,1.3192839719176783 +COC(=O)c1ccccc1S(=O)(=O)NC(=O)Nc1nc(OC(F)F)cc(n1)OC(F)F,0.40030931761078414,0.5244304978725236,-0.09927455098886301,0.8998931862104314 +COC(=O)Nc1nc2c([nH]1)cc(cc2)Sc1ccccc1,0.4802466000915624,1.3000842641666306,-1.5934506240454733,2.553943824228598 +COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.4886267795976623,0.27691392283721084,0.14121557724582212,0.8360379819495025 +CCCSc1ccc2c(c1)[nH]c(n2)NC(=O)OC,0.5744059544107609,1.1227611514363303,-0.9852622079584847,2.1340741167800066 +CCOc1nc(nc(n1)NC)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.6170049302210167,0.29099307039793554,-0.39862895121947417,1.6326388116615074 +ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.6310381773960064,1.0440453166077193,0.21693517329554335,1.0451411814964695 +ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.6349128797535077,0.6934790190863317,0.028579697948489247,1.2412460615585261 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.6422639518374699,1.204123085861569,-0.20719540805090697,1.4917233117258468 +COc1nc(nc(n1)C)NC(=O)[N-]S(=O)(=O)c1cc(I)ccc1C(=O)OC.[Na+],0.6442304094919054,1.2508978152709322,-0.9028261419985124,2.191286960982323 +COCN(c1c(CC)cccc1CC)C(=O)CCl,0.6756633842708125,1.2698758559694194,0.5220499999470909,0.8292767685945341 +COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.6835446546831545,-0.04743064015554219,0.3310166370695789,1.0360726722967302 +CCCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC,0.7196008914256098,1.7930123052435918,-1.2317599404530128,2.6709617233042326 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.7381701557067727,0.6990959232648295,-0.19482850940717555,1.671168820820721 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.7527216406234662,1.4432475594325607,0.17890464023405872,1.3265386410128737 +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.7664585214730067,0.6687866205177715,-0.3297895910420391,1.8627066339880525 +CCCN(c1c(cc(c(c1[N+](=O)[O-])N)C(F)(F)F)[N+](=O)[O-])CCC,0.7908333753033355,1.0760827768868946,-1.3557682381552314,2.9374349887619027 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccsc1C(=O)OC,0.8363110220717199,1.1902114152572187,-0.2167676290112318,1.8893896731546715 +CCOC(=O)COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.8692150950922544,1.253093479525902,-0.22320767006397757,1.9616378602484863 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.8809390658163835,0.34459526699145665,0.12018317892087416,1.6416949527118927 +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.8817289982831876,1.5893745854018753,-0.4174242759597603,2.1808822725261354 +CCSC(=O)N1CCCCCC1,0.9065822981190435,1.5367491723403042,-0.3339673868532065,2.1471319830912936 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCCl,0.9262819274002431,0.2448724686893053,-0.5327183985514182,2.3852822533519045 +Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.9588122063700005,1.4237162949891773,-0.055069583621282336,1.9726939963612833 +CCOC(=O)C(OC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F)C,0.9683598069933811,0.9654517529015312,0.17804105066837583,1.7586785633183863 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,0.9752704734295072,0.446231021019864,-0.9007755849960473,2.851316531855062 +C=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.982443323405749,1.0914292030286552,-0.2094450280095539,2.174331674821052 +CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.9951050735443849,0.36853593732630685,-0.0651592006297741,2.055369347718544 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC1CC1,1.0128517077768624,1.8417279953479162,-0.6074298367319317,2.6331332522856563 +NC(=N)NCCCCCCCCNC(=N)N,1.0512935333986806,1.0914077611524307,0.5122701545833607,1.5903169122140004 +OC(=O)C(Oc1ccc(cc1Cl)Cl)C,1.0618482744607656,1.112916569666931,-0.40438963643188797,2.528086185353419 +OC(=O)COc1cc(Cl)c(cc1Cl)Cl,1.072370576010839,1.4073569873961393,-0.41869825714357245,2.563439409165251 +OC(=O)COc1ccc(cc1C)Cl,1.106510009085713,1.3619681642499644,-0.29541791575151266,2.508437933922939 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C(C(Br)(Br)Br)Br,1.1178142148236576,2.3457079184197034,0.4212896665723077,1.8143387630750074 +CNC(=O)O/N=C(\SC)/C,1.1193750914462024,0.9267583523880014,0.6578564346214911,1.5808937482709138 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)[O-])[N+](=O)[O-])C(F)(F)F.[Na+],1.1209655358097124,0.3286512527182731,0.1249337251581163,2.1169973464613085 +COc1nc(C)nc(n1)N(C(=O)NS(=O)(=O)c1ccccc1C(=O)O)C,1.129223502313166,0.8409745087750948,-0.2659401258628258,2.524387130489158 +CNC(=O)ON=C(SC)C,1.1349650728335707,1.0595589658676712,0.5555317935566112,1.7143983521105302 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)O)[N+](=O)[O-])C(F)(F)F,1.1658484576929986,0.4613885686555222,0.4664325000471832,1.865264415338814 +CCCN(C(=O)SCC)CCC,1.1774970415117871,1.1011057762548884,-0.804091109415924,3.1590851924394983 +CC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Oc1ccc(cc1)Cl,1.1828965862316103,1.2267915364902278,0.5115920921016062,1.8542010803616145 +CNC(=O)Oc1cc(C)c(c(c1)C)N(C)C,1.2151746688934792,2.170818490231342,0.589278651667687,1.8410706861192714 +CC(C(=O)O)Oc1cc(Cl)c(cc1Cl)Cl,1.2256013002731179,1.4910530201781254,0.3219247184651157,2.1292778820811202 +CC(OC(=O)C(c1ccc(cc1)Cl)(c1ccc(cc1)Cl)O)C,1.22860582584693,1.5762300113736614,1.2019820460345405,1.2552296056593193 +OC(=O)C(Oc1ccc(cc1C)Cl)C,1.2367872169410257,1.5913530743442346,0.0870668745605867,2.3865075593214646 +CNC(=O)Oc1cc(C)c(c(c1)C)C,1.2377272232668657,1.286098038116031,-0.22981535190929092,2.7052697984430223 +CCNc1nc(NC(C)C)nc(n1)Cl,1.2541614082392798,1.1473611962004826,0.45077494648454797,2.0575478699940115 +OC(=O)COc1ccc(cc1Cl)Cl,1.2886519760701258,0.5520671344078197,0.4097610511559311,2.1675429009843206 +N#CC(c1ccc(cc1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C\C(=O)OC(C(F)(F)F)C(F)(F)F,1.3154581773400171,1.8250684064715574,0.572005684135983,2.0589106705440514 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.3369628204949657,1.5497336697290072,0.5572927608087364,2.116632880181195 +CCSC(=O)N(CC(C)C)CC(C)C,1.3411935450348622,0.33720440522271905,0.07121779051530974,2.6111692995544145 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,1.3520609627577913,1.1715755542485882,0.031484984731280585,2.672636940784302 +CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,1.3567580079773038,0.8951787961648487,0.31777033299483937,2.3957456829597685 +CS/C(=N/OC(=O)N(SN(C(=O)O/N=C(/SC)\C)C)C)/C,1.364144558983451,1.549581519063189,0.6784227436388685,2.0498663743280336 +CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,1.3794964439460404,0.8442020296654076,1.1229316984783138,1.636061189413767 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,1.4030812331325997,2.138294674164148,0.2962543719596751,2.5099080943055245 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl,1.4199791739776855,1.5738792912533845,0.711837569549499,2.128120778405872 +CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,1.4391162449114059,1.6922125433461526,0.49913587960052597,2.3790966102222857 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(C(F)(F)F)Cl,1.4399915948594795,1.5561583389288367,0.6357211105500314,2.2442620791689274 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,1.5119517654481116,1.225205758260884,1.0919096151282504,1.9319939157679729 +CCNc1nc(Cl)nc(n1)NC(C)(C)C,1.5155846606187175,2.1597809534601558,0.813227204455287,2.2179421167821483 +CON(C(=O)Nc1ccc(cc1)Cl)C,1.5178978741073856,2.0307005985741227,0.4762730945172329,2.559522653697538 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,1.5263205180142954,1.2211327542550814,0.6882656614598811,2.3643753745687097 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,1.5440338302248031,0.8323812299293085,0.7911710322018068,2.2968966282477994 +CCNc1nc(nc(n1)Cl)NC(C#N)(C)C,1.5543689690137,2.2845565889047834,0.8258778692648472,2.2828600687625524 +COC(=O)Nc1nc2c([nH]1)cccc2,1.5672359292256253,0.14495892528589976,0.4375302967367667,2.6969415617144836 +O=C(C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C)OCCON=C(C)C,1.5739088335859583,1.598814644976232,0.3066026967733091,2.8412149703986076 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)/C=C(/c1ccc(cc1)Cl)\Cl,1.6093541340303636,2.309954619710552,0.8826476739538424,2.336060594106885 +CSC(=NOC(=O)N(SN(C(=O)ON=C(SC)C)C)C)C,1.6116492765141213,1.549581519063189,1.1527630247009217,2.070535528327321 +Clc1ccc2c(c1)ncc(n2)Oc1ccc(cc1)OC(C(=O)OCC1CCCO1)C,1.667604999751091,1.0357262236334814,0.5462726974552439,2.7889373020469383 +CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,1.752840193196464,1.182467683028378,0.7906485880259838,2.715031798366944 +CON(C(=O)Nc1ccc(cc1)Br)C,1.7571679113543246,1.5797464954135645,0.5604334581110146,2.9539023645976346 +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,1.7753929686891388,1.6004820295651263,0.8970922541735924,2.6536936832046853 +O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1c(F)c(F)c(c(c1F)F)C,1.8547778421020618,1.9591604053146598,1.5938586082589188,2.1156970759452047 +CCOP(=O)(O/C(=C/Cl)/c1ccc(cc1Cl)Cl)OCC,1.8985287387234222,2.3796869305976593,-0.007818501679409673,3.804875979126254 +CCOP(=O)(OC(=CCl)c1ccc(cc1Cl)Cl)OCC,1.9104499266466934,2.3796869305976593,-0.03394921637321335,3.8548490696666002 +O=C(C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl)OCc1c(F)c(F)c(c(c1F)F)C,1.9271428110163682,1.8510890068906436,1.6365832479041758,2.2177023741285606 +CCNc1nc(NCC)nc(n1)Cl,2.021562134651509,1.5929887382061456,1.2188638190648284,2.8242604502381896 +COP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OC,2.2378430576367956,2.67993684040502,0.6239467432437766,3.8517393720298143 +CCOP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OCC,2.436375824086263,2.1632497125104866,1.0858860454349613,3.7868656027375653 +CCOP(=S)(SCSC(C)(C)C)OCC,2.580201181871007,3.5709705331517814,1.9195844235632218,3.240817940178792 +COC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,2.759464280847568,2.574873529491226,1.3525432808553357,4.166385280839799 +COC1CC(OC2C(C)C=CC=C3COC4C3(O)C(C=C(C4O)C)C(=O)OC3CC(CC=C2C)OC2(C3)C=CC(C(O2)C(CC)C)C)OC(C1OC1CC(OC)C(C(O1)C)O)C.COC1CC(OC(C1OC1CC(OC)C(C(O1)C)O)C)OC1C(C)C=CC=C2COC3C2(O)C(C=C(C3O)C)C(=O)OC2CC(CC=C1C)OC1(C2)C=CC(C(O1)C(C)C)C,2.7803035814504313,2.9375684468373877,2.117898730596729,3.4427084323041335 +OC(=O)c1ccccc1.CCC(C1OC2(C=CC1C)OC1CC=C(C)C(OC3CC(OC)C(C(O3)C)OC3CC(OC)C(C(O3)C)NC)C(C)C=CC=C3C4(C(C(=O)OC(C2)C1)C=C(C)C(C4OC3)O)O)C,2.858933480864903,2.804590566004922,2.2698253153929735,3.4480416463368324 +COC1CC(OC2C(C)C=CC=C3COC4C3(O)C(C=C(C4O)C)C(=O)OC3CC(CC=C2C)OC2(C3)C=CC(C(O2)C(C)C)C)OC(C1OC1CC(OC)C(C(O1)C)NC(=O)C)C,2.881727629439025,2.7553159367231137,2.3168280230497458,3.4466272358283043 +CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,3.253153163127129,2.30671720992754,1.77956663308394,4.726739693170318 +CCSCSP(=S)(OCC)OCC,3.3036263178407395,3.211485180279816,1.9525492024515643,4.654703433229915 diff --git a/data/training_log10-cv-2.id b/data/training_log10-cv-2.id index 80b2642..775286b 100644 --- a/data/training_log10-cv-2.id +++ b/data/training_log10-cv-2.id @@ -1 +1 @@ -58a2f75ba8ae8b19e4b1dc28 +58a308dca8ae8b2527237d21 diff --git a/figures/crossvalidation.pdf b/figures/crossvalidation.pdf index 5393ef4..b4a779b 100644 Binary files a/figures/crossvalidation.pdf and b/figures/crossvalidation.pdf differ diff --git a/figures/dataset-variability.pdf b/figures/dataset-variability.pdf index deede55..43ba4db 100644 Binary files a/figures/dataset-variability.pdf and b/figures/dataset-variability.pdf differ diff --git a/figures/functional-groups.pdf b/figures/functional-groups.pdf index 7a285e5..2750751 100644 Binary files a/figures/functional-groups.pdf and b/figures/functional-groups.pdf differ diff --git a/figures/test-correlation.pdf b/figures/test-correlation.pdf index e36263f..390214a 100644 Binary files a/figures/test-correlation.pdf and b/figures/test-correlation.pdf differ diff --git a/figures/test-prediction.pdf b/figures/test-prediction.pdf index bb9f329..8d024a8 100644 Binary files a/figures/test-prediction.pdf and b/figures/test-prediction.pdf differ diff --git a/loael.md b/loael.md index aa1764e..3caacf5 100644 --- a/loael.md +++ b/loael.md @@ -345,7 +345,7 @@ In order to compare the performance of in silico read across models with experim variability we are using compounds that occur in both datasets as a test set (375 measurements, 155 compounds). `lazar` read across predictions -were obtained for 155 compounds, 129 +were obtained for 155 compounds, 121 predictions failed, because no similar compounds were found in the training data (i.e. they were not covered by the applicability domain of the training data). @@ -367,7 +367,7 @@ experimental data into a single median value hides experimental variability. Comparison | $r^2$ | RMSE --------------|---------------------------|------------------------- Mazzatorta vs. Swiss | 0.52 | 0.59 -Prediction vs. Test median | 0.48 | 0.53 +Prediction vs. Test median | 0.47 | 0.57 : Comparison of model predictions with experimental variability. {#tbl:common-pred} @@ -381,9 +381,9 @@ All correlations of predicted with experimental values are statistically highly $r^2$ | RMSE | Nr. predicted -------|------|---------------- -0.41 | 0.57 | 51/671 -0.52 | 0.54 | 57/671 -0.51 | 0.52 | 58/671 +0.6 | 0.6 | 99/671 +0.55 | 0.6 | 98/671 +0.63 | 0.55 | 99/671 : Results from 3 independent 10-fold crossvalidations {#tbl:cv} diff --git a/loael.pdf b/loael.pdf index 220718a..4ccc792 100644 Binary files a/loael.pdf and b/loael.pdf differ diff --git a/scripts/crossvalidation.rb b/scripts/crossvalidation.rb index 32b9950..e02c5ca 100755 --- a/scripts/crossvalidation.rb +++ b/scripts/crossvalidation.rb @@ -5,13 +5,11 @@ require 'yaml' name = File.basename ARGV[0], ".csv" file = File.join "data",ARGV[0] dataset = Dataset.from_csv_file file -#model = Model::LazarRegression.create(training_dataset: dataset)#, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_fingerprint_regression") -model = Model::LazarRegression.create(training_dataset: dataset, algorithms: { :similarity => { :min => 0.5 }}) +model = Model::LazarRegression.create(training_dataset: dataset, algorithms: { :prediction => {:method => "Algorithm::Caret.rf"}, :similarity => { :min => 0.5 }}) csv_file = File.join("data",ARGV[0].sub(/.csv/,"-cv-#{ARGV[1]}.csv")) id_file = File.join("data",ARGV[0].sub(/.csv/,"-cv-#{ARGV[1]}.id")) cv = Validation::RegressionCrossValidation.create model File.open(id_file,"w+"){|f| f.puts cv.id} -#cv = Validation::RegressionCrossValidation.first p cv.id data = [] cv.predictions.each do |cid,p| @@ -29,5 +27,3 @@ CSV.open(csv_file,"w+") do |csv| csv << ["SMILES","LOAEL_measured_median","LOAEL_predicted","Prediction_interval_low","Prediction_interval_high"] data.each{|r| csv << r} end -=begin -=end diff --git a/scripts/test-validation.rb b/scripts/test-validation.rb index b64edd6..0b8c0a7 100755 --- a/scripts/test-validation.rb +++ b/scripts/test-validation.rb @@ -5,6 +5,6 @@ include OpenTox test = Dataset.from_csv_file(File.join("data","test_log10.csv")) train = Dataset.from_csv_file(File.join("data","training_log10.csv")) -model = Model::LazarRegression.create(training_dataset: train, algorithms: { :similarity => { :min => 0.5 }}) +model = Model::LazarRegression.create(training_dataset: train, algorithms: { :prediction => {:method => "Algorithm::Caret.rf"}, :similarity => { :min => 0.5 }}) validation = Validation::TrainTest.create model, train, test File.open(File.join("data","training-test-predictions.id"),"w+") { |f| f.puts validation.id } -- cgit v1.2.3