From 4d67ddb9fe72cb4cba42e508a943e1d4d04fef8c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 17 Feb 2016 21:13:23 +0100 Subject: validations, figures, text finalisation started --- paper/Makefile | 60 +- paper/Rakefile | 12 - paper/SMARTS_InteLigand.txt | 983 --------------------- paper/appendix/functional-groups.csv | 1 - paper/create-median-correlation.rb | 4 +- paper/crossvalidation-plots.R | 20 + paper/crossvalidation.R | 16 + paper/crossvalidation.rb | 23 + paper/crossvalidations.R | 9 - paper/crossvalidations.rb | 18 - paper/data/SMARTS_InteLigand.txt | 983 +++++++++++++++++++++ paper/data/combined-cv.csv | 923 +++++++++++++++++++ paper/data/combined-cv.id | 1 + paper/data/combined-test-predictions.csv | 302 +++---- paper/data/combined-test-predictions.id | 1 + paper/data/common-median.csv | 34 +- paper/data/functional-groups-reduced.csv | 34 + paper/data/functional-groups-reduced4R.csv | 68 ++ paper/data/functional-groups.csv | 138 +++ paper/data/mazzatorta-cv.csv | 519 +++++++++++ paper/data/mazzatorta-cv.id | 1 + paper/data/mazzatorta-test-predictions.csv | 298 +++---- paper/data/mazzatorta-test-predictions.id | 1 + paper/data/mazzatorta.csv~ | 568 ------------ paper/data/median-correlation.csv | 208 ++--- paper/data/swiss-cv.csv | 447 ++++++++++ paper/data/swiss-cv.id | 1 + paper/data/swiss-test-predictions.csv | 300 +++---- paper/data/swiss-test-predictions.id | 1 + paper/data/swiss.csv~ | 494 ----------- paper/figure/corr-1.png | Bin 8183 -> 0 bytes paper/figure/crossvalidation.pdf | Bin 0 -> 24785 bytes paper/figure/functional-groups.pdf | Bin 0 -> 6389 bytes paper/figure/predictions-1.png | Bin 11720 -> 0 bytes paper/figure/test-correlation.pdf | Bin 0 -> 10885 bytes paper/figure/test-prediction.pdf | Bin 0 -> 27063 bytes paper/figure/unnamed-chunk-2-1.png | Bin 8183 -> 0 bytes paper/figure/unnamed-chunk-5-1.png | Bin 11706 -> 0 bytes paper/functional-groups-reduced.csv | 34 - paper/functional-groups-reduced4R.csv | 68 -- paper/functional-groups.R | 11 +- paper/functional-groups.csv | 138 --- paper/loael-dataset-comparison-all-compounds.pdf | Bin 67739 -> 0 bytes .../loael-dataset-comparison-common-compounds.pdf | Bin 71753 -> 0 bytes paper/loael-dataset-comparison.rb | 75 -- paper/loael-dataset-correlation.pdf | Bin 6050 -> 0 bytes paper/loael.Rmd | 236 ++--- paper/loael.md | 231 ++--- paper/loael.pdf | Bin 356362 -> 272191 bytes paper/references.bib | 85 -- paper/references.bibtex | 116 +++ paper/test-correlation-plot.R | 21 + paper/test-correlation.R | 15 + paper/test-prediction-plot.R | 32 + paper/test-set-validation.rb | 22 - paper/test-validation.rb | 24 + paper/unique-smiles.rb | 18 + 57 files changed, 4259 insertions(+), 3335 deletions(-) delete mode 100644 paper/Rakefile delete mode 100644 paper/SMARTS_InteLigand.txt delete mode 120000 paper/appendix/functional-groups.csv create mode 100644 paper/crossvalidation-plots.R create mode 100644 paper/crossvalidation.R create mode 100644 paper/crossvalidation.rb delete mode 100644 paper/crossvalidations.R delete mode 100644 paper/crossvalidations.rb create mode 100644 paper/data/SMARTS_InteLigand.txt create mode 100644 paper/data/combined-cv.csv create mode 100644 paper/data/combined-cv.id create mode 100644 paper/data/combined-test-predictions.id create mode 100644 paper/data/functional-groups-reduced.csv create mode 100644 paper/data/functional-groups-reduced4R.csv create mode 100644 paper/data/functional-groups.csv create mode 100644 paper/data/mazzatorta-cv.csv create mode 100644 paper/data/mazzatorta-cv.id create mode 100644 paper/data/mazzatorta-test-predictions.id delete mode 100644 paper/data/mazzatorta.csv~ create mode 100644 paper/data/swiss-cv.csv create mode 100644 paper/data/swiss-cv.id create mode 100644 paper/data/swiss-test-predictions.id delete mode 100644 paper/data/swiss.csv~ delete mode 100644 paper/figure/corr-1.png create mode 100644 paper/figure/crossvalidation.pdf create mode 100644 paper/figure/functional-groups.pdf delete mode 100644 paper/figure/predictions-1.png create mode 100644 paper/figure/test-correlation.pdf create mode 100644 paper/figure/test-prediction.pdf delete mode 100644 paper/figure/unnamed-chunk-2-1.png delete mode 100644 paper/figure/unnamed-chunk-5-1.png delete mode 100644 paper/functional-groups-reduced.csv delete mode 100644 paper/functional-groups-reduced4R.csv delete mode 100644 paper/functional-groups.csv delete mode 100644 paper/loael-dataset-comparison-all-compounds.pdf delete mode 100644 paper/loael-dataset-comparison-common-compounds.pdf delete mode 100644 paper/loael-dataset-comparison.rb delete mode 100644 paper/loael-dataset-correlation.pdf delete mode 100644 paper/references.bib create mode 100644 paper/references.bibtex create mode 100644 paper/test-correlation-plot.R create mode 100644 paper/test-correlation.R create mode 100644 paper/test-prediction-plot.R delete mode 100644 paper/test-set-validation.rb create mode 100644 paper/test-validation.rb create mode 100644 paper/unique-smiles.rb diff --git a/paper/Makefile b/paper/Makefile index d66bf36..4aa5ab3 100644 --- a/paper/Makefile +++ b/paper/Makefile @@ -1,51 +1,55 @@ # Paper -loael.pdf: loael.md functional-groups.pdf loael-dataset-correlation.pdf rmse.R test-set-validation.csv crossvalidations.R data/common-median.csv +loael.pdf: loael.md references.bibtex + pandoc -r markdown+simple_tables+table_captions+yaml_metadata_block -s -S --bibliography=references.bibtex --latex-engine=pdflatex --filter pandoc-crossref --filter pandoc-citeproc -o loael.pdf loael.md -loael.md: loael.Rmd rmse.R +loael.md: loael.Rmd figures validations + Rscript --vanilla -e "library(knitr); knit('loael.Rmd');" -loael.docx: loael.md functional-groups.pdf loael-dataset-correlation.pdf +loael.docx: loael.md pandoc --filter pandoc-crossref --filter pandoc-citeproc loael.md -s -o loael.docx rmse.R: rmse.rb ruby rmse.rb -crossvalidations.R: crossvalidations.rb - ruby crossvalidations.rb +# Figures -test-set-validation.csv: test-set-validation.rb - ruby test-set-validation.rb +figures: datasets validations figure/functional-groups.pdf figure/test-prediction.pdf figure/test-correlation.pdf figure/crossvalidation.pdf -%.md: %.Rmd - Rscript --vanilla -e "library(knitr); knit('$<');" +figure/functional-groups.pdf: data/functional-groups-reduced4R.csv functional-groups.R + Rscript functional-groups.R -%.pdf: %.md - pandoc --filter pandoc-crossref --filter pandoc-citeproc $< -s -o $@ +figure/crossvalidation.pdf: data/mazzatorta-cv.csv data/swiss-cv.csv data/combined-cv.csv + Rscript crossvalidation-plots.R -functional-groups.pdf: functional-groups-reduced4R.csv functional-groups.R - R CMD BATCH functional-groups.R +figure/test-prediction.pdf: data/mazzatorta-test-predictions.csv data/swiss-test-predictions.csv data/combined-test-predictions.csv data/median-correlation.csv test-prediction-plot.R + Rscript test-prediction-plot.R -loael-dataset-correlation.pdf: loael-dataset-comparison.rb - ruby loael-dataset-comparison.rb +figure/test-correlation.pdf: data/mazzatorta-test-predictions.csv data/swiss-test-predictions.csv data/combined-test-predictions.csv data/median-correlation.csv test-correlation-plot.R + Rscript test-correlation-plot.R -#data/common-test.csv: create-test-set.rb - #ruby create-test-set.rb +# Validations -dataset-correlation.R: data/common-test.csv dataset-correlation.R - Rscript dataset-correlation.R +validations: test-predictions crossvalidations -# Validations +crossvalidations: data/mazzatorta-cv.csv data/swiss-cv.csv data/combined-cv.csv + +data/mazzatorta-cv.csv: crossvalidation.rb data/mazzatorta.csv + ruby crossvalidation.rb mazzatorta.csv + +data/swiss-cv.csv: crossvalidation.rb data/swiss.csv + ruby crossvalidation.rb swiss.csv -crossvalidation.R: data/mazzatorta.csv,data/swiss.csv,data/combined.csv crossvalidation.rb - crossvalidation.rb +data/combined-cv.csv: crossvalidation.rb data/combined.csv + ruby crossvalidation.rb combined.csv test-predictions: data/mazzatorta-test-predictions.csv data/swiss-test-predictions.csv data/combined-test-predictions.csv -data/mazzatorta-test-predictions.csv: test-set-validation.rb data/test.csv data/mazzatorta.csv - ruby test-set-validation.rb mazzatorta.csv -data/swiss-test-predictions.csv: test-set-validation.rb data/test.csv data/swiss.csv - ruby test-set-validation.rb swiss.csv -data/combined-test-predictions.csv: test-set-validation.rb data/test.csv data/combined.csv - ruby test-set-validation.rb combined.csv +data/mazzatorta-test-predictions.csv: test-validation.rb data/test.csv data/mazzatorta.csv + ruby test-validation.rb mazzatorta.csv +data/swiss-test-predictions.csv: test-validation.rb data/test.csv data/swiss.csv + ruby test-validation.rb swiss.csv +data/combined-test-predictions.csv: test-validation.rb data/test.csv data/combined.csv + ruby test-validation.rb combined.csv # Datasets diff --git a/paper/Rakefile b/paper/Rakefile deleted file mode 100644 index 1d45b9e..0000000 --- a/paper/Rakefile +++ /dev/null @@ -1,12 +0,0 @@ -task :default => "loael.pdf" - -file "loael.pdf" => ["loael.md","functional-groups.pdf", "loael-dataset-correlation.pdf"] - `pandoc --filter pandoc-citeproc loael.md -s -o loael.pdf` - pid=`pidof mupdf`.chomp - `kill -s SIGHUP #{pid}` - -file "functional-groups.pdf" => ["functional-groups-reduced4R.csv","functional-groups.R"] - `R CMD BATCH functional-groups.R` - -file "loael-dataset-correlation.pdf" => ["loael-dataset-comparison.rb"] - `ruby loael-dataset-comparison.rb` diff --git a/paper/SMARTS_InteLigand.txt b/paper/SMARTS_InteLigand.txt deleted file mode 100644 index 23bc6e2..0000000 --- a/paper/SMARTS_InteLigand.txt +++ /dev/null @@ -1,983 +0,0 @@ -# -# SMARTS Patterns for Functional Group Classification -# -# written by Christian Laggner -# Copyright 2005 Inte:Ligand Software-Entwicklungs und Consulting GmbH -# -# Released under the Lesser General Public License (LGPL license) -# see http://www.gnu.org/copyleft/lesser.html -# Modified from Version 221105 -##################################################################################################### - -# General Stuff: -# These patters were written in an attempt to represent the classification of organic compounds -# from the viewpoint of an organic chemist. -# They are often very restrictive. This may be generally a good thing, but it also takes some time -# for filtering/indexing large compound sets. -# For filtering undesired groups (in druglike compounds) one will want to have more general patterns -# (e.g. you don't want *any* halide of *any* acid, *neither* aldehyde *nor* formyl esters and amides, ...). -# - -# Part I: Carbon -# ============== - - -# I.1: Carbon-Carbon Bonds -# ------------------------ - -# I.1.1 Alkanes: - -Primary_carbon: [CX4H3][#6] - -Secondary_carbon: [CX4H2]([#6])[#6] - -Tertiary_carbon: [CX4H1]([#6])([#6])[#6] - -Quaternary_carbon: [CX4]([#6])([#6])([#6])[#6] - - -# I.1.2 C-C double and Triple Bonds - -Alkene: [CX3;$([H2]),$([H1][#6]),$(C([#6])[#6])]=[CX3;$([H2]),$([H1][#6]),$(C([#6])[#6])] -# sp2 C may be substituted only by C or H - -# does not hit ketenes and allenes, nor enamines, enols and the like - -Alkyne: [CX2]#[CX2] -# non-carbon substituents (e.g. alkynol ethers) are rather rare, thus no further discrimination - -Allene: [CX3]=[CX2]=[CX3] - - -# I.2: One Carbon-Hetero Bond -# --------------------------- - - -# I.2.1 Alkyl Halogenides - -Alkylchloride: [ClX1][CX4] -# will also hit chloromethylethers and the like, but no chloroalkenes, -alkynes or -aromats -# a more restrictive version can be obtained by modifying the Alcohol string. - -Alkylfluoride: [FX1][CX4] - -Alkylbromide: [BrX1][CX4] - -Alkyliodide: [IX1][CX4] - - -# I.2.2 Alcohols and Ethers - -Alcohol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15])] -# nonspecific definition, no acetals, aminals, and the like - -Primary_alcohol: [OX2H][CX4H2;!$(C([OX2H])[O,S,#7,#15])] - -Secondary_alcohol: [OX2H][CX4H;!$(C([OX2H])[O,S,#7,#15])] - -Tertiary_alcohol: [OX2H][CX4D4;!$(C([OX2H])[O,S,#7,#15])] - -Dialkylether: [OX2]([CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([OX2])[O,S,#7,#15])] -# no acetals and the like; no enolethers - -Dialkylthioether: [SX2]([CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([OX2])[O,S,#7,#15])] -# no acetals and the like; no enolethers - -Alkylarylether: [OX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])] -# no acetals and the like; no enolethers - -Diarylether: [c][OX2][c] - -Alkylarylthioether: [SX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])] - -Diarylthioether: [c][SX2][c] - -Oxonium: [O+;!$([O]~[!#6]);!$([S]*~[#7,#8,#15,#16])] -# can't be aromatic, thus O and not #8 - -# I.2.3 Amines - -Amine: [NX3+0,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])] -# hits all amines (prim/sec/tert/quart), including ammonium salts, also enamines, but not amides, imides, aminals, ... - -# the following amines include also the protonated forms - -Primary_aliph_amine: [NX3H2+0,NX4H3+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])] - -Secondary_aliph_amine: [NX3H1+0,NX4H2+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])] - -Tertiary_aliph_amine: [NX3H0+0,NX4H1+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])] - -Quaternary_aliph_ammonium: [NX4H0+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])] - -Primary_arom_amine: [NX3H2+0,NX4H3+]c - -Secondary_arom_amine: [NX3H1+0,NX4H2+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])] - -Tertiary_arom_amine: [NX3H0+0,NX4H1+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])] - -Quaternary_arom_ammonium: [NX4H0+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])] - -Secondary_mixed_amine: [NX3H1+0,NX4H2+;$([N]([c])[C]);!$([N]*~[#7,#8,#15,#16])] - -Tertiary_mixed_amine: [NX3H0+0,NX4H1+;$([N]([c])([C])[#6]);!$([N]*~[#7,#8,#15,#16])] - -Quaternary_mixed_ammonium: [NX4H0+;$([N]([c])([C])[#6][#6]);!$([N]*~[#7,#8,#15,#16])] - -Ammonium: [N+;!$([N]~[!#6]);!$(N=*);!$([N]*~[#7,#8,#15,#16])] -# only C and H substituents allowed. Quaternary or protonated amines -# NX4+ or Nv4+ is not recognized by Daylight's depictmatch if less than four C are present - - -# I.2.4 Others - -Alkylthiol: [SX2H][CX4;!$(C([SX2H])~[O,S,#7,#15])] - -Dialkylthioether: [SX2]([CX4;!$(C([SX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([SX2])[O,S,#7,#15])] - -Alkylarylthioether: [SX2](c)[CX4;!$(C([SX2])[O,S,#7,#15])] - -Disulfide: [SX2D2][SX2D2] - -1,2-Aminoalcohol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15,F,Cl,Br,I])][CX4;!$(C([N])[O,S,#7,#15])][NX3;!$(NC=[O,S,N])] -# does not hit alpha-amino acids, enaminoalcohols, 1,2-aminoacetals, o-aminophenols, etc. - -1,2-Diol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15])][CX4;!$(C([OX2H])[O,S,#7,#15])][OX2H] -# does not hit alpha-hydroxy acids, enolalcohols, 1,2-hydroxyacetals, 1,2-diphenols, etc. - -1,1-Diol: [OX2H][CX4;!$(C([OX2H])([OX2H])[O,S,#7,#15])][OX2H] - -Hydroperoxide: [OX2H][OX2] -#does not neccessarily have to be connected to a carbon atom, includes also hydrotrioxides - -Peroxo: [OX2D2][OX2D2] - -Organolithium_compounds: [LiX1][#6,#14] - -Organomagnesium_compounds: [MgX2][#6,#14] -# not restricted to Grignard compounds, also dialkyl Mg - -Organometallic_compounds: [!#1;!#5;!#6;!#7;!#8;!#9;!#14;!#15;!#16;!#17;!#33;!#34;!#35;!#52;!#53;!#85]~[#6;!-] -# very general, includes all metals covalently bound to carbon - - -# I.3: Two Carbon-Hetero Bonds (Carbonyl and Derivatives) -# ---------------------------- - -# I.3.1 Double Bond to Hetero - -Aldehyde: [$([CX3H][#6]),$([CX3H2])]=[OX1] -# hits aldehydes including formaldehyde - -Ketone: [#6][CX3](=[OX1])[#6] -# does not include oxo-groups connected to a (hetero-) aromatic ring - -Thioaldehyde: [$([CX3H][#6]),$([CX3H2])]=[SX1] - -Thioketone: [#6][CX3](=[SX1])[#6] -# does not include thioxo-groups connected to a (hetero-) aromatic ring - -Imine: [NX2;$([N][#6]),$([NH]);!$([N][CX3]=[#7,#8,#15,#16])]=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])] -# nitrogen is not part of an amidelike strukture, nor of an aromatic ring, but can be part of an aminal or similar - -Immonium: [NX3+;!$([N][!#6]);!$([N][CX3]=[#7,#8,#15,#16])] - -Oxime: [NX2](=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])])[OX2H] - -Oximether: [NX2](=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])])[OX2][#6;!$(C=[#7,#8])] -# ether, not ester or amide; does not hit isoxazole - - -# I.3.2. Two Single Bonds to Hetero - -Acetal: [OX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(O)(O)[!#6])][OX2][#6;!$(C=[O,S,N])] -# does not hit hydroxy-methylesters, ketenacetals, hemiacetals, orthoesters, etc. - -Hemiacetal: [OX2H][CX4;!$(C(O)(O)[!#6])][OX2][#6;!$(C=[O,S,N])] - -Aminal: [NX3v3;!$(NC=[#7,#8,#15,#16])]([#6])[CX4;!$(C(N)(N)[!#6])][NX3v3;!$(NC=[#7,#8,#15,#16])][#6] -# Ns are not part of an amide or similar. v3 ist to exclude nitro and similar groups - -Hemiaminal: [NX3v3;!$(NC=[#7,#8,#15,#16])]([#6])[CX4;!$(C(N)(N)[!#6])][OX2H] - -Thioacetal: [SX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(S)(S)[!#6])][SX2][#6;!$(C=[O,S,N])] - -Thiohemiacetal: [SX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(S)(S)[!#6])][OX2H] - -Halogen_acetal_like: [NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1] -# hits chloromethylenethers and other reactive alkylating agents - -Acetal_like: [NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1,NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])] -# includes all of the above and other combinations (S-C-N, hydrates, ...), but still no aminomethylenesters and similar - -Halogenmethylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1] -# also reactive alkylating agents. Acid does not have to be carboxylic acid, also S- and P-based acids allowed - -NOS_methylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])] -# Same as above, but N,O or S instead of halogen. Ester/amide allowed only on one side - -Hetero_methylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1,NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])] -# Combination of the last two patterns - -Cyanhydrine: [NX1]#[CX2][CX4;$([CH2]),$([CH]([CX2])[#6]),$(C([CX2])([#6])[#6])][OX2H] - - -# I.3.3 Single Bond to Hetero, C=C Double Bond (Enols and Similar) - -Chloroalkene: [ClX1][CX3]=[CX3] - -Fluoroalkene: [FX1][CX3]=[CX3] - -Bromoalkene: [BrX1][CX3]=[CX3] - -Iodoalkene: [IX1][CX3]=[CX3] - -Enol: [OX2H][CX3;$([H1]),$(C[#6])]=[CX3] -# no phenols - -Endiol: [OX2H][CX3;$([H1]),$(C[#6])]=[CX3;$([H1]),$(C[#6])][OX2H] -# no 1,2-diphenols, ketenacetals, ... - -Enolether: [OX2]([#6;!$(C=[N,O,S])])[CX3;$([H0][#6]),$([H1])]=[CX3] -# finds also endiodiethers, but not enolesters, no aromats - -Enolester: [OX2]([CX3]=[OX1])[#6X3;$([#6][#6]),$([H1])]=[#6X3;!$(C[OX2H])] - - -Enamine: [NX3;$([NH2][CX3]),$([NH1]([CX3])[#6]),$([N]([CX3])([#6])[#6]);!$([N]*=[#7,#8,#15,#16])][CX3;$([CH]),$([C][#6])]=[CX3] -# does not hit amines attached to aromatic rings, nor may the nitrogen be aromatic - -Thioenol: [SX2H][CX3;$([H1]),$(C[#6])]=[CX3] - -Thioenolether: [SX2]([#6;!$(C=[N,O,S])])[CX3;$(C[#6]),$([CH])]=[CX3] - - -# I.4: Three Carbon-Hetero Bonds (Carboxyl and Derivatives) -# ------------------------------ - -Acylchloride: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[ClX1] - -Acylfluoride: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[FX1] - -Acylbromide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[BrX1] - -Acyliodide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[IX1] - -Acylhalide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[FX1,ClX1,BrX1,IX1] -# all of the above - - -# The following contains all simple carboxylic combinations of O, N, S, & Hal - -# - acids, esters, amides, ... as well as a few extra cases (anhydride, hydrazide...) -# Cyclic structures (including aromats) like lactones, lactames, ... got their own -# definitions. Structures where both heteroatoms are part of an aromatic ring -# (oxazoles, imidazoles, ...) were excluded. - -Carboxylic_acid: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[$([OX2H]),$([OX1-])] -# includes carboxylate anions - -Carboxylic_ester: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[OX2][#6;!$(C=[O,N,S])] -# does not hit anhydrides or lactones - -Lactone: [#6][#6X3R](=[OX1])[#8X2][#6;!$(C=[O,N,S])] -# may also be aromatic - -Carboxylic_anhydride: [CX3;$([H0][#6]),$([H1])](=[OX1])[#8X2][CX3;$([H0][#6]),$([H1])](=[OX1]) -# anhydride formed by two carboxylic acids, no mixed anhydrides (e.g. between carboxylic acid and sulfuric acid); may be part of a ring, even aromatic - -Carboxylic_acid_derivative: [$([#6X3H0][#6]),$([#6X3H])](=[!#6])[!#6] -# includes most of the structures of I.4 and many more, also 1,3-heteroaromatics such as isoxazole - -Carbothioic_acid: [CX3;!R;$([C][#6]),$([CH]);$([C](=[OX1])[$([SX2H]),$([SX1-])]),$([C](=[SX1])[$([OX2H]),$([OX1-])])] -# hits both tautomeric forms, as well as anions - -Carbothioic_S_ester: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[SX2][#6;!$(C=[O,N,S])] - -Carbothioic_S_lactone: [#6][#6X3R](=[OX1])[#16X2][#6;!$(C=[O,N,S])] -# may also be aromatic - -Carbothioic_O_ester: [CX3;$([H0][#6]),$([H1])](=[SX1])[OX2][#6;!$(C=[O,N,S])] - -Carbothioic_O_lactone: [#6][#6X3R](=[SX1])[#8X2][#6;!$(C=[O,N,S])] - -Carbothioic_halide: [CX3;$([H0][#6]),$([H1])](=[SX1])[FX1,ClX1,BrX1,IX1] - -Carbodithioic_acid: [CX3;!R;$([C][#6]),$([CH]);$([C](=[SX1])[SX2H])] - -Carbodithioic_ester: [CX3;!R;$([C][#6]),$([CH]);$([C](=[SX1])[SX2][#6;!$(C=[O,N,S])])] - -Carbodithiolactone: [#6][#6X3R](=[SX1])[#16X2][#6;!$(C=[O,N,S])] - - -Amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] -# does not hit lactames - -Primary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[NX3H2] - -Secondary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H1][#6;!$(C=[O,N,S])] - -Tertiary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])] - -Lactam: [#6R][#6X3R](=[OX1])[#7X3;$([H1][#6;!$(C=[O,N,S])]),$([H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] -# cyclic amides, may also be aromatic - -Alkyl_imide: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H0]([#6])[#6X3;$([H0][#6]),$([H1])](=[OX1]) -# may be part of a ring, even aromatic. only C allowed at central N. May also be triacyl amide - -N_hetero_imide: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H0]([!#6])[#6X3;$([H0][#6]),$([H1])](=[OX1]) -# everything else than H or C at central N - -Imide_acidic: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H1][#6X3;$([H0][#6]),$([H1])](=[OX1]) -# can be deprotonated - -Thioamide: [$([CX3;!R][#6]),$([CX3H;!R])](=[SX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] -# does not hit thiolactames - -Thiolactam: [#6R][#6X3R](=[SX1])[#7X3;$([H1][#6;!$(C=[O,N,S])]),$([H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] -# cyclic thioamides, may also be aromatic - - -Oximester: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#8X2][#7X2]=,:[#6X3;$([H0]([#6])[#6]),$([H1][#6]),$([H2])] -# may also be part of a ring / aromatic - -Amidine: [NX3;!$(NC=[O,S])][CX3;$([CH]),$([C][#6])]=[NX2;!$(NC=[O,S])] -# only basic amidines, not as part of aromatic ring (e.g. imidazole) - -Hydroxamic_acid: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][$([OX2H]),$([OX1-])] - -Hydroxamic_acid_ester: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][OX2][#6;!$(C=[O,N,S])] -# does not hit anhydrides of carboxylic acids withs hydroxamic acids - - -Imidoacid: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])] -# not cyclic - -Imidoacid_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])] -# the enamide-form of lactames. may be aromatic like 2-hydroxypyridine - -Imidoester: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[OX2][#6;!$(C=[O,N,S])] -# esters of the above structures. no anhydrides. - -Imidolactone: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[OX2][#6;!$(C=[O,N,S])] -# no oxazoles and similar - -Imidothioacid: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([SX2H]),$([SX1-])] -# not cyclic - -Imidothioacid_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([SX2H]),$([SX1-])] -# the enamide-form of thiolactames. may be aromatic like 2-thiopyridine - -Imidothioester: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[SX2][#6;!$(C=[O,N,S])] -# thioesters of the above structures. no anhydrides. - -Imidothiolactone: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[SX2][#6;!$(C=[O,N,S])] -# no thioxazoles and similar - -Amidine: [#7X3v3;!$(N([#6X3]=[#7X2])C=[O,S])][CX3R0;$([H1]),$([H0][#6])]=[NX2v3;!$(N(=[#6X3][#7X3])C=[O,S])] -# only basic amidines, not substituted by carbonyl or thiocarbonyl, not as part of a ring - -Imidolactam: [#6][#6X3R;$([H0](=[NX2;!$(N(=[#6X3][#7X3])C=[O,S])])[#7X3;!$(N([#6X3]=[#7X2])C=[O,S])]),$([H0](-[NX3;!$(N([#6X3]=[#7X2])C=[O,S])])=,:[#7X2;!$(N(=[#6X3][#7X3])C=[O,S])])] -# one of the two C~N bonds is part of a ring (may be aromatic), but not both - thus no imidazole - -Imidoylhalide: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[FX1,ClX1,BrX1,IX1] -# not cyclic - -Imidoylhalide_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[FX1,ClX1,BrX1,IX1] -# may also be aromatic - -# may be ring, aromatic, substituted with carbonyls, hetero, ... -# (everything else would get too complicated) - -Amidrazone: [$([$([#6X3][#6]),$([#6X3H])](=[#7X2v3])[#7X3v3][#7X3v3]),$([$([#6X3][#6]),$([#6X3H])]([#7X3v3])=[#7X2v3][#7X3v3])] -# hits both tautomers. as above, it may be ring, aromatic, substituted with carbonyls, hetero, ... - - -Alpha_aminoacid: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])][C][CX3](=[OX1])[OX2H,OX1-] -# N may be alkylated, but not part of an amide (as in peptides), ionic forms are included -# includes also non-natural aminoacids with double-bonded or two aliph./arom. substituents at alpha-C -# N may not be aromatic as in 1H-pyrrole-2-carboxylic acid - -Alpha_hydroxyacid: [OX2H][C][CX3](=[OX1])[OX2H,OX1-] - -Peptide_middle: [NX3;$([N][CX3](=[OX1])[C][NX3,NX4+])][C][CX3](=[OX1])[NX3;$([N][C][CX3](=[OX1])[NX3,OX2,OX1-])] -# finds peptidic structures which are neither C- nor N-terminal. Both neighbours must be amino-acids/peptides - -Peptide_C_term: [NX3;$([N][CX3](=[OX1])[C][NX3,NX4+])][C][CX3](=[OX1])[OX2H,OX1-] -# finds C-terminal amino acids - -Peptide_N_term: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])][C][CX3](=[OX1])[NX3;$([N][C][CX3](=[OX1])[NX3,OX2,OX1-])] -# finds N-terminal amino acids. As above, N may be substituted, but not part of an amide-bond. - - -Carboxylic_orthoester: [#6][OX2][CX4;$(C[#6]),$([CH])]([OX2][#6])[OX2][#6] -# hits also anhydride like struktures (e. g. HC(OMe)2-OC=O residues) - -Ketene: [CX3]=[CX2]=[OX1] - -Ketenacetal: [#7X2,#8X3,#16X2;$(*[#6,#14])][#6X3]([#7X2,#8X3,#16X2;$(*[#6,#14])])=[#6X3] -# includes aminals, silylacetals, ketenesters, etc. C=C DB is not aromatic, everything else may be - -Nitrile: [NX1]#[CX2] -# includes cyanhydrines - -Isonitrile: [CX1-]#[NX2+] - - -Vinylogous_carbonyl_or_carboxyl_derivative: [#6X3](=[OX1])[#6X3]=,:[#6X3][#7,#8,#16,F,Cl,Br,I] -# may be part of a ring, even aromatic - -Vinylogous_acid: [#6X3](=[OX1])[#6X3]=,:[#6X3][$([OX2H]),$([OX1-])] - -Vinylogous_ester: [#6X3](=[OX1])[#6X3]=,:[#6X3][#6;!$(C=[O,N,S])] - -Vinylogous_amide: [#6X3](=[OX1])[#6X3]=,:[#6X3][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Vinylogous_halide: [#6X3](=[OX1])[#6X3]=,:[#6X3][FX1,ClX1,BrX1,IX1] - - - -# I.5: Four Carbon-Hetero Bonds (Carbonic Acid and Derivatives) -# ----------------------------- - -Carbonic_acid_dieester: [#6;!$(C=[O,N,S])][#8X2][#6X3](=[OX1])[#8X2][#6;!$(C=[O,N,S])] -# may be part of a ring, even aromatic - -Carbonic_acid_esterhalide: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[OX1])[OX2][FX1,ClX1,BrX1,IX1] - -Carbonic_acid_monoester: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[OX1])[$([OX2H]),$([OX1-])] -# unstable - -Carbonic_acid_derivatives: [!#6][#6X3](=[!#6])[!#6] - - -Thiocarbonic_acid_dieester: [#6;!$(C=[O,N,S])][#8X2][#6X3](=[SX1])[#8X2][#6;!$(C=[O,N,S])] -# may be part of a ring, even aromatic - -Thiocarbonic_acid_esterhalide: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[SX1])[OX2][FX1,ClX1,BrX1,IX1] - -Thiocarbonic_acid_monoester: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[SX1])[$([OX2H]),$([OX1-])] - - -Urea:[#7X3;!$([#7][!#6])][#6X3](=[OX1])[#7X3;!$([#7][!#6])] -# no check whether part of imide, biuret, etc. Aromatic structures are only hit if -# both N share no double bonds, like in the dioxo-form of uracil - -Thiourea: [#7X3;!$([#7][!#6])][#6X3](=[SX1])[#7X3;!$([#7][!#6])] - -Isourea: [#7X2;!$([#7][!#6])]=,:[#6X3]([#8X2&!$([#8][!#6]),OX1-])[#7X3;!$([#7][!#6])] -# O may be substituted. no check whether further amide-like bonds are present. Aromatic -# structures are only hit if single bonded N shares no additional double bond, like in -# the 1-hydroxy-3-oxo form of uracil - -Isothiourea: [#7X2;!$([#7][!#6])]=,:[#6X3]([#16X2&!$([#16][!#6]),SX1-])[#7X3;!$([#7][!#6])] - -Guanidine: [N;v3X3,v4X4+][CX3](=[N;v3X2,v4X3+])[N;v3X3,v4X4+] -# also hits guanidinium salts. v3 and v4 to avoid nitroamidines - -Carbaminic_acid: [NX3]C(=[OX1])[O;X2H,X1-] -# quite unstable, unlikely to be found. Also hits salts - -Urethan: [#7X3][#6](=[OX1])[#8X2][#6] -# also hits when part of a ring, no check whether the last C is part of carbonyl - -Biuret: [#7X3][#6](=[OX1])[#7X3][#6](=[OX1])[#7X3] - -Semicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1] - -Carbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[OX1] - -Semicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1] - -Carbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[OX1] - -Thiosemicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[SX1] - -Thiocarbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[SX1] - -Thiosemicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])])=[SX1] - -Thiocarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[SX1] - - -Isocyanate: [NX2]=[CX2]=[OX1] - -Cyanate: [OX2][CX2]#[NX1] - -Isothiocyanate: [NX2]=[CX2]=[SX1] - -Thiocyanate: [SX2][CX2]#[NX1] - -Carbodiimide: [NX2]=[CX2]=[NX2] - -Orthocarbonic_derivatives: [CX4H0]([O,S,#7])([O,S,#7])([O,S,#7])[O,S,#7,F,Cl,Br,I] -# halogen allowed just once, to avoid mapping to -OCF3 and similar groups (much more -# stable as for example C(OCH3)4) - - -# I.6 Aromatics -# ------------- - -# I know that this classification is not very logical, arylamines are found under I.2 ... - -Phenol: [OX2H][c] - -1,2-Diphenol: [OX2H][c][c][OX2H] - -Arylchloride: [Cl][c] - -Arylfluoride: [F][c] - -Arylbromide: [Br][c] - -Aryliodide: [I][c] - -Arylthiol: [SX2H][c] - -Iminoarene: [c]=[NX2;$([H1]),$([H0][#6;!$([C]=[N,S,O])])] -# N may be substituted with H or C, but not carbonyl or similar -# aromatic atom is always C, not S or P (these are not planar when substituted) - -Oxoarene: [c]=[OX1] - -Thioarene: [c]=[SX1] - -Hetero_N_basic_H: [nX3H1+0] -# as in pyrole. uncharged to exclude pyridinium ions - -Hetero_N_basic_no_H: [nX3H0+0] -# as in N-methylpyrole. uncharged to exclude pyridinium ions - -Hetero_N_nonbasic: [nX2,nX3+] -# as in pyridine, pyridinium - -Hetero_O: [o] - -Hetero_S: [sX2] -# X2 because Daylight's depictmatch falsely describes C1=CS(=O)C=C1 as aromatic -# (is not planar because of lonepair at S) - -Heteroaromatic: [a;!c] - - -# Part II: N, S, P, Si, B -# ======================= - - -# II.1 Nitrogen -# ------------- - -Nitrite: [NX2](=[OX1])[O;$([X2]),$([X1-])] -# hits nitrous acid, its anion, esters, and other O-substituted derivatives - -Thionitrite: [SX2][NX2]=[OX1] - -Nitrate: [$([NX3](=[OX1])(=[OX1])[O;$([X2]),$([X1-])]),$([NX3+]([OX1-])(=[OX1])[O;$([X2]),$([X1-])])] -# hits nitric acid, its anion, esters, and other O-substituted derivatives - -Nitro: [$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8] -# hits nitro groups attached to C,N, ... but not nitrates - -Nitroso: [NX2](=[OX1])[!#7;!#8] -# no nitrites, no nitrosamines - -Azide: [NX1]~[NX2]~[NX2,NX1] -# hits both mesomeric forms, also anion - -Acylazide: [CX3](=[OX1])[NX2]~[NX2]~[NX1] - -Diazo: [$([#6]=[NX2+]=[NX1-]),$([#6-]-[NX2+]#[NX1])] - -Diazonium: [#6][NX2+]#[NX1] - -Nitrosamine: [#7;!$(N*=O)][NX2]=[OX1] - -Nitrosamide: [NX2](=[OX1])N-*=O -# includes nitrososulfonamides - -N-Oxide: [$([#7+][OX1-]),$([#7v5]=[OX1]);!$([#7](~[O])~[O]);!$([#7]=[#7])] -# Hits both forms. Won't hit azoxy, nitro, nitroso, or nitrate. - - -Hydrazine: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])] -# no hydrazides - -Hydrazone: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][NX2]=[#6] - -Hydroxylamine: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][OX2;$([H1]),$(O[#6;!$(C=[N,O,S])])] -# no discrimination between O-, N-, and O,N-substitution - - -# II.2 Sulfur -# ----------- - -Sulfon: [$([SX4](=[OX1])(=[OX1])([#6])[#6]),$([SX4+2]([OX1-])([OX1-])([#6])[#6])] -# can't be aromatic, thus S and not #16 - -Sulfoxide: [$([SX3](=[OX1])([#6])[#6]),$([SX3+]([OX1-])([#6])[#6])] - -Sulfonium: [S+;!$([S]~[!#6]);!$([S]*~[#7,#8,#15,#16])] -# can't be aromatic, thus S and not #16 - -Sulfuric_acid: [SX4](=[OX1])(=[OX1])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])] -# includes anions - -Sulfuric_monoester: [SX4](=[OX1])(=[OX1])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])] - -Sulfuric_diester: [SX4](=[OX1])(=[OX1])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] - -Sulfuric_monoamide: [SX4](=[OX1])(=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])] - -Sulfuric_diamide: [SX4](=[OX1])(=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Sulfuric_esteramide: [SX4](=[OX1])(=[OX1])([#7X3][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] - -Sulfuric_derivative: [SX4D4](=[!#6])(=[!#6])([!#6])[!#6] -# everything else (would not be a "true" derivative of sulfuric acid, if one of the substituents were less electronegative -# than sulfur, but this should be very very rare, anyway) - - - -#### sulfurous acid and derivatives missing!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - - - -Sulfonic_acid: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[$([OX2H]),$([OX1-])] - -Sulfonamide: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Sulfonic_ester: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[OX2][#6;!$(C=[O,N,S])] - -Sulfonic_halide: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[FX1,ClX1,BrX1,IX1] - -Sulfonic_derivative: [SX4;$([H1]),$([H0][#6])](=[!#6])(=[!#6])[!#6] -# includes all of the above and many more -# for comparison: this is what "all sulfonic derivatives but not the ones above" would look like: -# [$([SX4;$([H1]),$([H0][#6])](=[!#6])(=[!#6;!O])[!#6]),$([SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[!$([FX1,ClX1,BrX1,IX1]);!$([#6]);!$([OX2H]);!$([OX1-]);!$([OX2][#6;!$(C=[O,N,S])]);!$([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])])] - - -Sulfinic_acid: [SX3;$([H1]),$([H0][#6])](=[OX1])[$([OX2H]),$([OX1-])] - -Sulfinic_amide: [SX3;$([H1]),$([H0][#6])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Sulfinic_ester: [SX3;$([H1]),$([H0][#6])](=[OX1])[OX2][#6;!$(C=[O,N,S])] - -Sulfinic_halide: [SX3;$([H1]),$([H0][#6])](=[OX1])[FX1,ClX1,BrX1,IX1] - -Sulfinic_derivative: [SX3;$([H1]),$([H0][#6])](=[!#6])[!#6] - -Sulfenic_acid: [SX2;$([H1]),$([H0][#6])][$([OX2H]),$([OX1-])] - -Sulfenic_amide: [SX2;$([H1]),$([H0][#6])][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Sulfenic_ester: [SX2;$([H1]),$([H0][#6])][OX2][#6;!$(C=[O,N,S])] - -Sulfenic_halide: [SX2;$([H1]),$([H0][#6])][FX1,ClX1,BrX1,IX1] - -Sulfenic_derivative: [SX2;$([H1]),$([H0][#6])][!#6] - - -# II.3 Phosphorous -# ---------------- - -Phosphine: [PX3;$([H3]),$([H2][#6]),$([H1]([#6])[#6]),$([H0]([#6])([#6])[#6])] -# similar to amine, but less restrictive: includes also amide- and aminal-analogues - -Phosphine_oxide: [PX4;$([H3]=[OX1]),$([H2](=[OX1])[#6]),$([H1](=[OX1])([#6])[#6]),$([H0](=[OX1])([#6])([#6])[#6])] - -Phosphonium: [P+;!$([P]~[!#6]);!$([P]*~[#7,#8,#15,#16])] -# similar to Ammonium - -Phosphorylen: [PX4;$([H3]=[CX3]),$([H2](=[CX3])[#6]),$([H1](=[CX3])([#6])[#6]),$([H0](=[CX3])([#6])([#6])[#6])] - - -# conventions for the following acids and derivatives: -# acids find protonated and deprotonated acids -# esters do not find mixed anhydrides ( ...P-O-C(=O)) -# derivatives: subtituents which go in place of the OH and =O are not H or C (may also be O, -# thus including acids and esters) - -Phosphonic_acid: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])] -# includes anions - -Phosphonic_monoester: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])] - -Phosphonic_diester: [PX4;$([H1]),$([H0][#6])](=[OX1])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] - -Phosphonic_monoamide: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Phosphonic_diamide: [PX4;$([H1]),$([H0][#6])](=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Phosphonic_esteramide: [PX4;$([H1]),$([H0][#6])](=[OX1])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Phosphonic_acid_derivative: [PX4;$([H1]),$([H0][#6])](=[!#6])([!#6])[!#6] -# all of the above and much more - - -Phosphoric_acid: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])] -# includes anions - -Phosphoric_monoester: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])] - -Phosphoric_diester: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] - -Phosphoric_triester: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] - -Phosphoric_monoamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Phosphoric_diamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Phosphoric_triamide: [PX4D4](=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Phosphoric_monoestermonoamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Phosphoric_diestermonoamide: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Phosphoric_monoesterdiamide: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Phosphoric_acid_derivative: [PX4D4](=[!#6])([!#6])([!#6])[!#6] - - -Phosphinic_acid: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[$([OX2H]),$([OX1-])] - -Phosphinic_ester: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[OX2][#6;!$(C=[O,N,S])] - -Phosphinic_amide: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Phosphinic_acid_derivative: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[!#6])[!#6] - - -Phosphonous_acid: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])] - -Phosphonous_monoester: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])] - -Phosphonous_diester: [PX3;$([H1]),$([H0][#6])]([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] - -Phosphonous_monoamide: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Phosphonous_diamide: [PX3;$([H1]),$([H0][#6])]([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Phosphonous_esteramide: [PX3;$([H1]),$([H0][#6])]([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Phosphonous_derivatives: [PX3;$([D2]),$([D3][#6])]([!#6])[!#6] - - -Phosphinous_acid: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][$([OX2H]),$([OX1-])] - -Phosphinous_ester: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][OX2][#6;!$(C=[O,N,S])] - -Phosphinous_amide: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] - -Phosphinous_derivatives: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][!#6] - - -# II.4 Silicon -# ------------ - -Quart_silane: [SiX4]([#6])([#6])([#6])[#6] -# four C-substituents. non-reactive, non-toxic, in experimental phase for drug development - -Non-quart_silane: [SiX4;$([H1]([#6])([#6])[#6]),$([H2]([#6])[#6]),$([H3][#6]),$([H4])] -# has 1-4 hydride(s), reactive. Daylight's depictmatch does not add hydrogens automatically to -# the free positions at Si, thus Hs had to be added implicitly - -Silylmonohalide: [SiX4]([FX1,ClX1,BrX1,IX1])([#6])([#6])[#6] -# reagents for inserting protection groups - -Het_trialkylsilane: [SiX4]([!#6])([#6])([#6])[#6] -# mostly acid-labile protection groups such as trimethylsilyl-ethers - -Dihet_dialkylsilane: [SiX4]([!#6])([!#6])([#6])[#6] - -Trihet_alkylsilane: [SiX4]([!#6])([!#6])([!#6])[#6] - -Silicic_acid_derivative: [SiX4]([!#6])([!#6])([!#6])[!#6] -# four substituent which are neither C nor H - - -# II.5 Boron -# ---------- - -Trialkylborane: [BX3]([#6])([#6])[#6] -# also carbonyls allowed - -Boric_acid_derivatives: [BX3]([!#6])([!#6])[!#6] -# includes acids, esters, amides, ... H-substituent at B is very rare. - -Boronic_acid_derivative: [BX3]([!#6])([!#6])[!#6] -# # includes acids, esters, amides, ... - -Borohydride: [BH1,BH2,BH3,BH4] -# at least one H attached to B - -Quaternary_boron: [BX4] -# mostly borates (negative charge), in complex with Lewis-base - - - -# Part III: Some Special Patterns -# =============================== - - -# III.1 Chains -# ------------ - -# some simple chains - - - -# III.2 Rings -# ----------- - -Aromatic: a - -Heterocyclic: [!#6;!R0] -# may be aromatic or not - -Epoxide: [OX2r3]1[#6r3][#6r3]1 -# toxic/reactive. may be annelated to aromat, but must not be aromatic itself (oxirane-2,3-dione) - -NH_aziridine: [NX3H1r3]1[#6r3][#6r3]1 -# toxic/reactive according to Maybridge's garbage filter - -Spiro: [D4R;$(*(@*)(@*)(@*)@*)] -# at least two different rings can be found which are sharing just one atom. -# these two rings can be connected by a third ring, so it matches also some -# bridged systems, like morphine - -Annelated_rings: [R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]@[R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])] -# two different rings sharing exactly two atoms - -Bridged_rings: [R;$(*(@*)(@*)@*);!$([D4R;$(*(@*)(@*)(@*)@*)]);!$([R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]@[R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])])] -# part of two or more rings, not spiro, not annelated -> finds bridgehead atoms, -# but only if they are not annelated at the same time - otherwise impossible (?) -# to distinguish from non-bridgehead annelated atoms - -# some basic ring-patterns (just size, no other information): - - - - - -# III.3 Sugars and Nucleosides/Nucleotides, Steroids -# -------------------------------------------------- - -# because of the large variety of sugar derivatives, different patterns can be applied. -# The choice of patterns and their combinations will depend on the contents of the database -# e.g. natural products, nucleoside analoges with modified sugars, ... as well as on the -# desired restriction - - -Sugar_pattern_1: [OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)] -# 5 or 6-membered ring containing one O and at least one (r5) or two (r6) oxygen-substituents. - -Sugar_pattern_2: [OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)] -# 5 or 6-membered ring containing one O and an acetal-like bond at postion 2. - -Sugar_pattern_combi: [OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C(O)@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C(O)@C(O)@C1)] -# combination of the two above - -Sugar_pattern_2_reducing: [OX2;$([r5]1@C(!@[OX2H1])@C@C@C1),$([r6]1@C(!@[OX2H1])@C@C@C@C1)] -# 5 or 6-membered cyclic hemi-acetal - -Sugar_pattern_2_alpha: [OX2;$([r5]1@[C@@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@[C@@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)] -# 5 or 6-membered cyclic hemi-acetal - -Sugar_pattern_2_beta: [OX2;$([r5]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)] -# 5 or 6-membered cyclic hemi-acetal - -##Poly_sugar_1: ([OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)].[OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)]) -# pattern1 occours more than once (in same molecule, but moieties don't have to be adjacent!) - -##Poly_sugar_2: ([OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)].[OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)]) -# pattern2 occours more than once (in same molecule, but moieties don't have to be adjacent!) - - -# III.4 Everything else... -# ------------------------ - -Conjugated_double_bond: *=*[*]=,#,:[*] - -Conjugated_tripple_bond: *#*[*]=,#,:[*] - -Cis_double_bond: */[D2]=[D2]\* -# only one single-bonded substituent on each DB-atom. no aromats. -# only found when character of DB is explicitely stated. - -Trans_double_bond: */[D2]=[D2]/* -# analog - -Mixed_anhydrides: [$(*=O),$([#16,#14,#5]),$([#7]([#6]=[OX1]))][#8X2][$(*=O),$([#16,#14,#5]),$([#7]([#6]=[OX1]))] -# should hits all combinations of two acids - -Halogen_on_hetero: [FX1,ClX1,BrX1,IX1][!#6] - -Halogen_multi_subst: [F,Cl,Br,I;!$([X1]);!$([X0-])] -# Halogen which is not mono-substituted nor an anion, e.g. chlorate. -# Most of these cases should be also filtered by Halogen_on_hetero. - -Trifluoromethyl: [FX1][CX4;!$([H0][Cl,Br,I]);!$([F][C]([F])([F])[F])]([FX1])([FX1]) -# C with three F attached, connected to anything which is not another halogen - -C_ONS_bond: [#6]~[#7,#8,#16] -# probably all drug-like molecules have at least one O, N, or S connected to a C -> nice filter - -## Mixture: (*).(*) -# two or more seperate parts, may also be salt -# component-level grouping is not yet supported in Open Babel Version 2.0 - - -Charged: [!+0] - -Anion: [-1,-2,-3,-4,-5,-6,-7] - -Kation: [+1,+2,+3,+4,+5,+6,+7] - -Salt: ([-1,-2,-3,-4,-5,-6,-7]).([+1,+2,+3,+4,+5,+6,+7]) -# two or more seperate components with opposite charges - -##Zwitterion: ([-1,-2,-3,-4,-5,-6,-7].[+1,+2,+3,+4,+5,+6,+7]) -# both negative and positive charges somewhere within the same molecule. - -1,3-Tautomerizable: [$([#7X2,OX1,SX1]=*[!H0;!$([a;!n])]),$([#7X3,OX2,SX2;!H0]*=*),$([#7X3,OX2,SX2;!H0]*:n)] -# 1,3 migration of H allowed. Includes keto/enol and amide/enamide. -# Aromatic rings must stay aromatic - no keto form of phenol - -1,5-Tautomerizable: [$([#7X2,OX1,SX1]=,:**=,:*[!H0;!$([a;!n])]),$([#7X3,OX2,SX2;!H0]*=**=*),$([#7X3,OX2,SX2;!H0]*=,:**:n)] - -Rotatable_bond: [!$(*#*)&!D1]-!@[!$(*#*)&!D1] -# taken from http://www.daylight.com/support/contrib/smarts/content.html - -Michael_acceptor: [CX3]=[CX3][$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-])] -# the classical case: C=C near carbonyl, nitrile, nitro, or similar -# Oxo-heteroaromats and similar are not included. - -Dicarbodiazene: [CX3](=[OX1])[NX2]=[NX2][CX3](=[OX1]) -# Michael-like acceptor, see Mitsunobu reaction - -# H-Bond_donor: - -# H-Bond_acceptor: - -# Pos_ionizable: - -# Neg_ionizable: - -# Unlikely_ions: -# O+,N-,C+,C-, ... - -CH-acidic: [$([CX4;!$([H0]);!$(C[!#6;!$([P,S]=O);!$(N(~O)~O)])][$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])]),$([CX4;!$([H0])]1[CX3]=[CX3][CX3]=[CX3]1)] -# C-H alpha to carbony, nitro or similar, C is not double-bonded, only C, H, S,P=O and nitro substituents allowed. -# pentadiene is included. acids, their salts, prim./sec. amides, and imides are excluded. -# hits also CH-acidic_strong - -CH-acidic_strong: [CX4;!$([H0]);!$(C[!#6;!$([P,S]=O);!$(N(~O)~O)])]([$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])])[$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])] -# same as above (without pentadiene), but carbonyl or similar on two or three sides - -Chiral_center_specified: [$([*@](~*)(~*)(*)*),$([*@H](*)(*)*),$([*@](~*)(*)*),$([*@H](~*)~*)] -# Hits atoms with tetrahedral chirality, if chiral center is specified in the SMILES string -# depictmach does not find oxonium, sulfonium, or sulfoxides! - -# Chiral_center_unspecified: [$([*@?](~*)(~*)(*)*),$([*@?H](*)(*)*),$([*@?](~*)(*)*),$([*@?H](~*)~*)] -# Hits atoms with tetrahedral chirality, if chiral center is not specified in the SMILES string -# "@?" (unspecified chirality) is not yet supported in Open Babel Version 2.0 - \ No newline at end of file diff --git a/paper/appendix/functional-groups.csv b/paper/appendix/functional-groups.csv deleted file mode 120000 index ce02e14..0000000 --- a/paper/appendix/functional-groups.csv +++ /dev/null @@ -1 +0,0 @@ -functional-groups.csv \ No newline at end of file diff --git a/paper/create-median-correlation.rb b/paper/create-median-correlation.rb index 6aeee11..9a2f6f5 100644 --- a/paper/create-median-correlation.rb +++ b/paper/create-median-correlation.rb @@ -16,13 +16,13 @@ common_compound_ids.each do |cid| new_values -= identical end unless old_values.empty? or new_values.empty? - data << [c.smiles,old_values.mean,new_values.mean] + data << [c.smiles,old_values.median,new_values.median] end end data.sort!{|a,b| a[1] <=> b[1]} -CSV.open(File.join(DATA,"common-median.csv"),"w+") do |csv| +CSV.open(File.join(DATA,"median-correlation.csv"),"w+") do |csv| csv << ["SMILES","mazzatorta","swiss"] data.each{|r| csv << r} end diff --git a/paper/crossvalidation-plots.R b/paper/crossvalidation-plots.R new file mode 100644 index 0000000..6665fdb --- /dev/null +++ b/paper/crossvalidation-plots.R @@ -0,0 +1,20 @@ +library(ggplot2) +library(grid) +library(gridExtra) + +mazzatorta = read.csv("data/mazzatorta-cv.csv",header=T) +swiss = read.csv("data/swiss-cv.csv",header=T) +combined = read.csv("data/combined-cv.csv",header=T) + +#experimental <- read.csv("data/median-correlation.csv",header=T) +#p1 = qplot(-log10(mazzatorta),-log10(swiss),data=experimental,xlab="-log10(LOAEL Mazzatorta median)",ylab="-log10(LOAEL Swiss Federal Office median)",main="Experimental data") + geom_point() + geom_abline(intercept=0.0) + xlim(-2,4.5) + ylim(-2,4.5) + +p2 = qplot(-log10(LOAEL_predicted),-log10(LOAEL_measured_median),data=mazzatorta,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)",main="Mazzatorta") + geom_point() + geom_abline(intercept=0.0) + xlim(-2,4.5) + ylim(-2,4.5) + +p3 = qplot(-log10(LOAEL_predicted),-log10(LOAEL_measured_median),data=swiss,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)",main="Swiss Federal Office") + geom_point() + geom_abline(intercept=0.0) + xlim(-2,4.5) + ylim(-2,4.5) + +p4 = qplot(-log10(LOAEL_predicted),-log10(LOAEL_measured_median),data=combined,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)",main="Combined") + geom_point() + geom_abline(intercept=0.0) + xlim(-2,4.5) + ylim(-2,4.5) + +pdf('figure/crossvalidation.pdf') +grid.arrange(p2,p3,p4,ncol=2) +dev.off() diff --git a/paper/crossvalidation.R b/paper/crossvalidation.R new file mode 100644 index 0000000..a32f608 --- /dev/null +++ b/paper/crossvalidation.R @@ -0,0 +1,16 @@ +mazzatorta = read.csv("data/mazzatorta-cv.csv",header=T) +swiss = read.csv("data/swiss-cv.csv",header=T) +combined = read.csv("data/combined-cv.csv",header=T) + +cv.mazzatorta.p = round(cor.test(-log(mazzatorta$LOAEL_measured_median),-log(mazzatorta$LOAEL_predicted))$p.value,2) +cv.mazzatorta.r_square = round(cor(-log(mazzatorta$LOAEL_measured_median),-log(mazzatorta$LOAEL_predicted))^2,2) +cv.mazzatorta.rmse = round(sqrt(mean((-log(mazzatorta$LOAEL_measured_median)+log(mazzatorta$LOAEL_predicted))^2)),2) + +cv.swiss.p = round(cor.test(-log(swiss$LOAEL_measured_median),-log(swiss$LOAEL_predicted))$p.value,2) +cv.swiss.r_square = round(cor(-log(swiss$LOAEL_measured_median),-log(swiss$LOAEL_predicted))^2,2) +cv.swiss.rmse = round(sqrt(mean((-log(swiss$LOAEL_measured_median)+log(swiss$LOAEL_predicted))^2)),2) + +cv.combined.p = round(cor.test(-log(combined$LOAEL_measured_median),-log(combined$LOAEL_predicted))$p.value,2) +cv.combined.r_square = round(cor(-log(combined$LOAEL_measured_median),-log(combined$LOAEL_predicted))^2,2) +cv.combined.rmse = round(sqrt(mean((-log(combined$LOAEL_measured_median)+log(combined$LOAEL_predicted))^2)),2) + diff --git a/paper/crossvalidation.rb b/paper/crossvalidation.rb new file mode 100644 index 0000000..1f4c023 --- /dev/null +++ b/paper/crossvalidation.rb @@ -0,0 +1,23 @@ +require_relative 'include.rb' + +name = File.basename ARGV[0], ".csv" +file = File.join DATA,ARGV[0] +csv_file = File.join(DATA,ARGV[0].sub(/.csv/,'-cv.csv')) +id_file = File.join(DATA,ARGV[0].sub(/.csv/,'-cv.id')) +dataset = Dataset.from_csv_file file +model = Model::LazarRegression.create dataset +cv = RegressionCrossValidation.create model +File.open(id_file,"w+"){|f| f.puts cv.id} + +data = [] +cv.predictions.each do |p| + smi = Compound.find(p[0]).smiles + data << [smi,p[1].median,p[2],p[3]] +end + +data.sort!{|a,b| a[1] <=> b[1]} + +CSV.open(csv_file,"w+") do |csv| + csv << ["SMILES","LOAEL_measured_median","LOAEL_predicted","Confidence"] + data.each{|r| csv << r} +end diff --git a/paper/crossvalidations.R b/paper/crossvalidations.R deleted file mode 100644 index cdc4c7e..0000000 --- a/paper/crossvalidations.R +++ /dev/null @@ -1,9 +0,0 @@ -cv.mazzatorta.rmse <- 0.8439115008205602 -cv.mazzatorta.r.squared <- 0.3730663179459023 -cv.mazzatorta.mae <- 0.6546182843884356 -cv.swiss.rmse <- 0.7507139457130771 -cv.swiss.r.squared <- 0.2507741939299348 -cv.swiss.mae <- 0.6068918271561476 -cv.combined.rmse <- 1.4536329017938434 -cv.combined.r.squared <- 0.12100621490895397 -cv.combined.mae <- 1.2096846143410287 diff --git a/paper/crossvalidations.rb b/paper/crossvalidations.rb deleted file mode 100644 index f6a5143..0000000 --- a/paper/crossvalidations.rb +++ /dev/null @@ -1,18 +0,0 @@ -require_relative 'include.rb' -file = File.join(DATA,ARGV[0]) -dataset = Dataset.from_csv_file file -model = Model::LazarRegression.create dataset -cv = RegressionCrossValidation.create model -=begin -=end - -datasets = ["mazzatorta","swiss","combined"] -File.open("crossvalidations.R","w+") do |f| - [0,1,5].each do |i| - dataset = datasets.shift - cv = OpenTox::RegressionCrossValidation.all[i] - f.puts "cv.#{dataset}.rmse <- #{cv.rmse}" - f.puts "cv.#{dataset}.r.squared <- #{cv.r_squared}" - f.puts "cv.#{dataset}.mae <- #{cv.mae}" - end -end diff --git a/paper/data/SMARTS_InteLigand.txt b/paper/data/SMARTS_InteLigand.txt new file mode 100644 index 0000000..23bc6e2 --- /dev/null +++ b/paper/data/SMARTS_InteLigand.txt @@ -0,0 +1,983 @@ +# +# SMARTS Patterns for Functional Group Classification +# +# written by Christian Laggner +# Copyright 2005 Inte:Ligand Software-Entwicklungs und Consulting GmbH +# +# Released under the Lesser General Public License (LGPL license) +# see http://www.gnu.org/copyleft/lesser.html +# Modified from Version 221105 +##################################################################################################### + +# General Stuff: +# These patters were written in an attempt to represent the classification of organic compounds +# from the viewpoint of an organic chemist. +# They are often very restrictive. This may be generally a good thing, but it also takes some time +# for filtering/indexing large compound sets. +# For filtering undesired groups (in druglike compounds) one will want to have more general patterns +# (e.g. you don't want *any* halide of *any* acid, *neither* aldehyde *nor* formyl esters and amides, ...). +# + +# Part I: Carbon +# ============== + + +# I.1: Carbon-Carbon Bonds +# ------------------------ + +# I.1.1 Alkanes: + +Primary_carbon: [CX4H3][#6] + +Secondary_carbon: [CX4H2]([#6])[#6] + +Tertiary_carbon: [CX4H1]([#6])([#6])[#6] + +Quaternary_carbon: [CX4]([#6])([#6])([#6])[#6] + + +# I.1.2 C-C double and Triple Bonds + +Alkene: [CX3;$([H2]),$([H1][#6]),$(C([#6])[#6])]=[CX3;$([H2]),$([H1][#6]),$(C([#6])[#6])] +# sp2 C may be substituted only by C or H - +# does not hit ketenes and allenes, nor enamines, enols and the like + +Alkyne: [CX2]#[CX2] +# non-carbon substituents (e.g. alkynol ethers) are rather rare, thus no further discrimination + +Allene: [CX3]=[CX2]=[CX3] + + +# I.2: One Carbon-Hetero Bond +# --------------------------- + + +# I.2.1 Alkyl Halogenides + +Alkylchloride: [ClX1][CX4] +# will also hit chloromethylethers and the like, but no chloroalkenes, -alkynes or -aromats +# a more restrictive version can be obtained by modifying the Alcohol string. + +Alkylfluoride: [FX1][CX4] + +Alkylbromide: [BrX1][CX4] + +Alkyliodide: [IX1][CX4] + + +# I.2.2 Alcohols and Ethers + +Alcohol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15])] +# nonspecific definition, no acetals, aminals, and the like + +Primary_alcohol: [OX2H][CX4H2;!$(C([OX2H])[O,S,#7,#15])] + +Secondary_alcohol: [OX2H][CX4H;!$(C([OX2H])[O,S,#7,#15])] + +Tertiary_alcohol: [OX2H][CX4D4;!$(C([OX2H])[O,S,#7,#15])] + +Dialkylether: [OX2]([CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([OX2])[O,S,#7,#15])] +# no acetals and the like; no enolethers + +Dialkylthioether: [SX2]([CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([OX2])[O,S,#7,#15])] +# no acetals and the like; no enolethers + +Alkylarylether: [OX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])] +# no acetals and the like; no enolethers + +Diarylether: [c][OX2][c] + +Alkylarylthioether: [SX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])] + +Diarylthioether: [c][SX2][c] + +Oxonium: [O+;!$([O]~[!#6]);!$([S]*~[#7,#8,#15,#16])] +# can't be aromatic, thus O and not #8 + +# I.2.3 Amines + +Amine: [NX3+0,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])] +# hits all amines (prim/sec/tert/quart), including ammonium salts, also enamines, but not amides, imides, aminals, ... + +# the following amines include also the protonated forms + +Primary_aliph_amine: [NX3H2+0,NX4H3+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])] + +Secondary_aliph_amine: [NX3H1+0,NX4H2+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])] + +Tertiary_aliph_amine: [NX3H0+0,NX4H1+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])] + +Quaternary_aliph_ammonium: [NX4H0+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])] + +Primary_arom_amine: [NX3H2+0,NX4H3+]c + +Secondary_arom_amine: [NX3H1+0,NX4H2+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])] + +Tertiary_arom_amine: [NX3H0+0,NX4H1+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])] + +Quaternary_arom_ammonium: [NX4H0+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])] + +Secondary_mixed_amine: [NX3H1+0,NX4H2+;$([N]([c])[C]);!$([N]*~[#7,#8,#15,#16])] + +Tertiary_mixed_amine: [NX3H0+0,NX4H1+;$([N]([c])([C])[#6]);!$([N]*~[#7,#8,#15,#16])] + +Quaternary_mixed_ammonium: [NX4H0+;$([N]([c])([C])[#6][#6]);!$([N]*~[#7,#8,#15,#16])] + +Ammonium: [N+;!$([N]~[!#6]);!$(N=*);!$([N]*~[#7,#8,#15,#16])] +# only C and H substituents allowed. Quaternary or protonated amines +# NX4+ or Nv4+ is not recognized by Daylight's depictmatch if less than four C are present + + +# I.2.4 Others + +Alkylthiol: [SX2H][CX4;!$(C([SX2H])~[O,S,#7,#15])] + +Dialkylthioether: [SX2]([CX4;!$(C([SX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([SX2])[O,S,#7,#15])] + +Alkylarylthioether: [SX2](c)[CX4;!$(C([SX2])[O,S,#7,#15])] + +Disulfide: [SX2D2][SX2D2] + +1,2-Aminoalcohol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15,F,Cl,Br,I])][CX4;!$(C([N])[O,S,#7,#15])][NX3;!$(NC=[O,S,N])] +# does not hit alpha-amino acids, enaminoalcohols, 1,2-aminoacetals, o-aminophenols, etc. + +1,2-Diol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15])][CX4;!$(C([OX2H])[O,S,#7,#15])][OX2H] +# does not hit alpha-hydroxy acids, enolalcohols, 1,2-hydroxyacetals, 1,2-diphenols, etc. + +1,1-Diol: [OX2H][CX4;!$(C([OX2H])([OX2H])[O,S,#7,#15])][OX2H] + +Hydroperoxide: [OX2H][OX2] +#does not neccessarily have to be connected to a carbon atom, includes also hydrotrioxides + +Peroxo: [OX2D2][OX2D2] + +Organolithium_compounds: [LiX1][#6,#14] + +Organomagnesium_compounds: [MgX2][#6,#14] +# not restricted to Grignard compounds, also dialkyl Mg + +Organometallic_compounds: [!#1;!#5;!#6;!#7;!#8;!#9;!#14;!#15;!#16;!#17;!#33;!#34;!#35;!#52;!#53;!#85]~[#6;!-] +# very general, includes all metals covalently bound to carbon + + +# I.3: Two Carbon-Hetero Bonds (Carbonyl and Derivatives) +# ---------------------------- + +# I.3.1 Double Bond to Hetero + +Aldehyde: [$([CX3H][#6]),$([CX3H2])]=[OX1] +# hits aldehydes including formaldehyde + +Ketone: [#6][CX3](=[OX1])[#6] +# does not include oxo-groups connected to a (hetero-) aromatic ring + +Thioaldehyde: [$([CX3H][#6]),$([CX3H2])]=[SX1] + +Thioketone: [#6][CX3](=[SX1])[#6] +# does not include thioxo-groups connected to a (hetero-) aromatic ring + +Imine: [NX2;$([N][#6]),$([NH]);!$([N][CX3]=[#7,#8,#15,#16])]=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])] +# nitrogen is not part of an amidelike strukture, nor of an aromatic ring, but can be part of an aminal or similar + +Immonium: [NX3+;!$([N][!#6]);!$([N][CX3]=[#7,#8,#15,#16])] + +Oxime: [NX2](=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])])[OX2H] + +Oximether: [NX2](=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])])[OX2][#6;!$(C=[#7,#8])] +# ether, not ester or amide; does not hit isoxazole + + +# I.3.2. Two Single Bonds to Hetero + +Acetal: [OX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(O)(O)[!#6])][OX2][#6;!$(C=[O,S,N])] +# does not hit hydroxy-methylesters, ketenacetals, hemiacetals, orthoesters, etc. + +Hemiacetal: [OX2H][CX4;!$(C(O)(O)[!#6])][OX2][#6;!$(C=[O,S,N])] + +Aminal: [NX3v3;!$(NC=[#7,#8,#15,#16])]([#6])[CX4;!$(C(N)(N)[!#6])][NX3v3;!$(NC=[#7,#8,#15,#16])][#6] +# Ns are not part of an amide or similar. v3 ist to exclude nitro and similar groups + +Hemiaminal: [NX3v3;!$(NC=[#7,#8,#15,#16])]([#6])[CX4;!$(C(N)(N)[!#6])][OX2H] + +Thioacetal: [SX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(S)(S)[!#6])][SX2][#6;!$(C=[O,S,N])] + +Thiohemiacetal: [SX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(S)(S)[!#6])][OX2H] + +Halogen_acetal_like: [NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1] +# hits chloromethylenethers and other reactive alkylating agents + +Acetal_like: [NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1,NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])] +# includes all of the above and other combinations (S-C-N, hydrates, ...), but still no aminomethylenesters and similar + +Halogenmethylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1] +# also reactive alkylating agents. Acid does not have to be carboxylic acid, also S- and P-based acids allowed + +NOS_methylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])] +# Same as above, but N,O or S instead of halogen. Ester/amide allowed only on one side + +Hetero_methylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1,NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])] +# Combination of the last two patterns + +Cyanhydrine: [NX1]#[CX2][CX4;$([CH2]),$([CH]([CX2])[#6]),$(C([CX2])([#6])[#6])][OX2H] + + +# I.3.3 Single Bond to Hetero, C=C Double Bond (Enols and Similar) + +Chloroalkene: [ClX1][CX3]=[CX3] + +Fluoroalkene: [FX1][CX3]=[CX3] + +Bromoalkene: [BrX1][CX3]=[CX3] + +Iodoalkene: [IX1][CX3]=[CX3] + +Enol: [OX2H][CX3;$([H1]),$(C[#6])]=[CX3] +# no phenols + +Endiol: [OX2H][CX3;$([H1]),$(C[#6])]=[CX3;$([H1]),$(C[#6])][OX2H] +# no 1,2-diphenols, ketenacetals, ... + +Enolether: [OX2]([#6;!$(C=[N,O,S])])[CX3;$([H0][#6]),$([H1])]=[CX3] +# finds also endiodiethers, but not enolesters, no aromats + +Enolester: [OX2]([CX3]=[OX1])[#6X3;$([#6][#6]),$([H1])]=[#6X3;!$(C[OX2H])] + + +Enamine: [NX3;$([NH2][CX3]),$([NH1]([CX3])[#6]),$([N]([CX3])([#6])[#6]);!$([N]*=[#7,#8,#15,#16])][CX3;$([CH]),$([C][#6])]=[CX3] +# does not hit amines attached to aromatic rings, nor may the nitrogen be aromatic + +Thioenol: [SX2H][CX3;$([H1]),$(C[#6])]=[CX3] + +Thioenolether: [SX2]([#6;!$(C=[N,O,S])])[CX3;$(C[#6]),$([CH])]=[CX3] + + +# I.4: Three Carbon-Hetero Bonds (Carboxyl and Derivatives) +# ------------------------------ + +Acylchloride: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[ClX1] + +Acylfluoride: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[FX1] + +Acylbromide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[BrX1] + +Acyliodide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[IX1] + +Acylhalide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[FX1,ClX1,BrX1,IX1] +# all of the above + + +# The following contains all simple carboxylic combinations of O, N, S, & Hal - +# - acids, esters, amides, ... as well as a few extra cases (anhydride, hydrazide...) +# Cyclic structures (including aromats) like lactones, lactames, ... got their own +# definitions. Structures where both heteroatoms are part of an aromatic ring +# (oxazoles, imidazoles, ...) were excluded. + +Carboxylic_acid: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[$([OX2H]),$([OX1-])] +# includes carboxylate anions + +Carboxylic_ester: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[OX2][#6;!$(C=[O,N,S])] +# does not hit anhydrides or lactones + +Lactone: [#6][#6X3R](=[OX1])[#8X2][#6;!$(C=[O,N,S])] +# may also be aromatic + +Carboxylic_anhydride: [CX3;$([H0][#6]),$([H1])](=[OX1])[#8X2][CX3;$([H0][#6]),$([H1])](=[OX1]) +# anhydride formed by two carboxylic acids, no mixed anhydrides (e.g. between carboxylic acid and sulfuric acid); may be part of a ring, even aromatic + +Carboxylic_acid_derivative: [$([#6X3H0][#6]),$([#6X3H])](=[!#6])[!#6] +# includes most of the structures of I.4 and many more, also 1,3-heteroaromatics such as isoxazole + +Carbothioic_acid: [CX3;!R;$([C][#6]),$([CH]);$([C](=[OX1])[$([SX2H]),$([SX1-])]),$([C](=[SX1])[$([OX2H]),$([OX1-])])] +# hits both tautomeric forms, as well as anions + +Carbothioic_S_ester: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[SX2][#6;!$(C=[O,N,S])] + +Carbothioic_S_lactone: [#6][#6X3R](=[OX1])[#16X2][#6;!$(C=[O,N,S])] +# may also be aromatic + +Carbothioic_O_ester: [CX3;$([H0][#6]),$([H1])](=[SX1])[OX2][#6;!$(C=[O,N,S])] + +Carbothioic_O_lactone: [#6][#6X3R](=[SX1])[#8X2][#6;!$(C=[O,N,S])] + +Carbothioic_halide: [CX3;$([H0][#6]),$([H1])](=[SX1])[FX1,ClX1,BrX1,IX1] + +Carbodithioic_acid: [CX3;!R;$([C][#6]),$([CH]);$([C](=[SX1])[SX2H])] + +Carbodithioic_ester: [CX3;!R;$([C][#6]),$([CH]);$([C](=[SX1])[SX2][#6;!$(C=[O,N,S])])] + +Carbodithiolactone: [#6][#6X3R](=[SX1])[#16X2][#6;!$(C=[O,N,S])] + + +Amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] +# does not hit lactames + +Primary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[NX3H2] + +Secondary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H1][#6;!$(C=[O,N,S])] + +Tertiary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])] + +Lactam: [#6R][#6X3R](=[OX1])[#7X3;$([H1][#6;!$(C=[O,N,S])]),$([H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] +# cyclic amides, may also be aromatic + +Alkyl_imide: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H0]([#6])[#6X3;$([H0][#6]),$([H1])](=[OX1]) +# may be part of a ring, even aromatic. only C allowed at central N. May also be triacyl amide + +N_hetero_imide: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H0]([!#6])[#6X3;$([H0][#6]),$([H1])](=[OX1]) +# everything else than H or C at central N + +Imide_acidic: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H1][#6X3;$([H0][#6]),$([H1])](=[OX1]) +# can be deprotonated + +Thioamide: [$([CX3;!R][#6]),$([CX3H;!R])](=[SX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] +# does not hit thiolactames + +Thiolactam: [#6R][#6X3R](=[SX1])[#7X3;$([H1][#6;!$(C=[O,N,S])]),$([H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] +# cyclic thioamides, may also be aromatic + + +Oximester: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#8X2][#7X2]=,:[#6X3;$([H0]([#6])[#6]),$([H1][#6]),$([H2])] +# may also be part of a ring / aromatic + +Amidine: [NX3;!$(NC=[O,S])][CX3;$([CH]),$([C][#6])]=[NX2;!$(NC=[O,S])] +# only basic amidines, not as part of aromatic ring (e.g. imidazole) + +Hydroxamic_acid: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][$([OX2H]),$([OX1-])] + +Hydroxamic_acid_ester: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][OX2][#6;!$(C=[O,N,S])] +# does not hit anhydrides of carboxylic acids withs hydroxamic acids + + +Imidoacid: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])] +# not cyclic + +Imidoacid_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])] +# the enamide-form of lactames. may be aromatic like 2-hydroxypyridine + +Imidoester: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[OX2][#6;!$(C=[O,N,S])] +# esters of the above structures. no anhydrides. + +Imidolactone: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[OX2][#6;!$(C=[O,N,S])] +# no oxazoles and similar + +Imidothioacid: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([SX2H]),$([SX1-])] +# not cyclic + +Imidothioacid_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([SX2H]),$([SX1-])] +# the enamide-form of thiolactames. may be aromatic like 2-thiopyridine + +Imidothioester: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[SX2][#6;!$(C=[O,N,S])] +# thioesters of the above structures. no anhydrides. + +Imidothiolactone: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[SX2][#6;!$(C=[O,N,S])] +# no thioxazoles and similar + +Amidine: [#7X3v3;!$(N([#6X3]=[#7X2])C=[O,S])][CX3R0;$([H1]),$([H0][#6])]=[NX2v3;!$(N(=[#6X3][#7X3])C=[O,S])] +# only basic amidines, not substituted by carbonyl or thiocarbonyl, not as part of a ring + +Imidolactam: [#6][#6X3R;$([H0](=[NX2;!$(N(=[#6X3][#7X3])C=[O,S])])[#7X3;!$(N([#6X3]=[#7X2])C=[O,S])]),$([H0](-[NX3;!$(N([#6X3]=[#7X2])C=[O,S])])=,:[#7X2;!$(N(=[#6X3][#7X3])C=[O,S])])] +# one of the two C~N bonds is part of a ring (may be aromatic), but not both - thus no imidazole + +Imidoylhalide: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[FX1,ClX1,BrX1,IX1] +# not cyclic + +Imidoylhalide_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[FX1,ClX1,BrX1,IX1] +# may also be aromatic + +# may be ring, aromatic, substituted with carbonyls, hetero, ... +# (everything else would get too complicated) + +Amidrazone: [$([$([#6X3][#6]),$([#6X3H])](=[#7X2v3])[#7X3v3][#7X3v3]),$([$([#6X3][#6]),$([#6X3H])]([#7X3v3])=[#7X2v3][#7X3v3])] +# hits both tautomers. as above, it may be ring, aromatic, substituted with carbonyls, hetero, ... + + +Alpha_aminoacid: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])][C][CX3](=[OX1])[OX2H,OX1-] +# N may be alkylated, but not part of an amide (as in peptides), ionic forms are included +# includes also non-natural aminoacids with double-bonded or two aliph./arom. substituents at alpha-C +# N may not be aromatic as in 1H-pyrrole-2-carboxylic acid + +Alpha_hydroxyacid: [OX2H][C][CX3](=[OX1])[OX2H,OX1-] + +Peptide_middle: [NX3;$([N][CX3](=[OX1])[C][NX3,NX4+])][C][CX3](=[OX1])[NX3;$([N][C][CX3](=[OX1])[NX3,OX2,OX1-])] +# finds peptidic structures which are neither C- nor N-terminal. Both neighbours must be amino-acids/peptides + +Peptide_C_term: [NX3;$([N][CX3](=[OX1])[C][NX3,NX4+])][C][CX3](=[OX1])[OX2H,OX1-] +# finds C-terminal amino acids + +Peptide_N_term: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])][C][CX3](=[OX1])[NX3;$([N][C][CX3](=[OX1])[NX3,OX2,OX1-])] +# finds N-terminal amino acids. As above, N may be substituted, but not part of an amide-bond. + + +Carboxylic_orthoester: [#6][OX2][CX4;$(C[#6]),$([CH])]([OX2][#6])[OX2][#6] +# hits also anhydride like struktures (e. g. HC(OMe)2-OC=O residues) + +Ketene: [CX3]=[CX2]=[OX1] + +Ketenacetal: [#7X2,#8X3,#16X2;$(*[#6,#14])][#6X3]([#7X2,#8X3,#16X2;$(*[#6,#14])])=[#6X3] +# includes aminals, silylacetals, ketenesters, etc. C=C DB is not aromatic, everything else may be + +Nitrile: [NX1]#[CX2] +# includes cyanhydrines + +Isonitrile: [CX1-]#[NX2+] + + +Vinylogous_carbonyl_or_carboxyl_derivative: [#6X3](=[OX1])[#6X3]=,:[#6X3][#7,#8,#16,F,Cl,Br,I] +# may be part of a ring, even aromatic + +Vinylogous_acid: [#6X3](=[OX1])[#6X3]=,:[#6X3][$([OX2H]),$([OX1-])] + +Vinylogous_ester: [#6X3](=[OX1])[#6X3]=,:[#6X3][#6;!$(C=[O,N,S])] + +Vinylogous_amide: [#6X3](=[OX1])[#6X3]=,:[#6X3][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Vinylogous_halide: [#6X3](=[OX1])[#6X3]=,:[#6X3][FX1,ClX1,BrX1,IX1] + + + +# I.5: Four Carbon-Hetero Bonds (Carbonic Acid and Derivatives) +# ----------------------------- + +Carbonic_acid_dieester: [#6;!$(C=[O,N,S])][#8X2][#6X3](=[OX1])[#8X2][#6;!$(C=[O,N,S])] +# may be part of a ring, even aromatic + +Carbonic_acid_esterhalide: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[OX1])[OX2][FX1,ClX1,BrX1,IX1] + +Carbonic_acid_monoester: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[OX1])[$([OX2H]),$([OX1-])] +# unstable + +Carbonic_acid_derivatives: [!#6][#6X3](=[!#6])[!#6] + + +Thiocarbonic_acid_dieester: [#6;!$(C=[O,N,S])][#8X2][#6X3](=[SX1])[#8X2][#6;!$(C=[O,N,S])] +# may be part of a ring, even aromatic + +Thiocarbonic_acid_esterhalide: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[SX1])[OX2][FX1,ClX1,BrX1,IX1] + +Thiocarbonic_acid_monoester: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[SX1])[$([OX2H]),$([OX1-])] + + +Urea:[#7X3;!$([#7][!#6])][#6X3](=[OX1])[#7X3;!$([#7][!#6])] +# no check whether part of imide, biuret, etc. Aromatic structures are only hit if +# both N share no double bonds, like in the dioxo-form of uracil + +Thiourea: [#7X3;!$([#7][!#6])][#6X3](=[SX1])[#7X3;!$([#7][!#6])] + +Isourea: [#7X2;!$([#7][!#6])]=,:[#6X3]([#8X2&!$([#8][!#6]),OX1-])[#7X3;!$([#7][!#6])] +# O may be substituted. no check whether further amide-like bonds are present. Aromatic +# structures are only hit if single bonded N shares no additional double bond, like in +# the 1-hydroxy-3-oxo form of uracil + +Isothiourea: [#7X2;!$([#7][!#6])]=,:[#6X3]([#16X2&!$([#16][!#6]),SX1-])[#7X3;!$([#7][!#6])] + +Guanidine: [N;v3X3,v4X4+][CX3](=[N;v3X2,v4X3+])[N;v3X3,v4X4+] +# also hits guanidinium salts. v3 and v4 to avoid nitroamidines + +Carbaminic_acid: [NX3]C(=[OX1])[O;X2H,X1-] +# quite unstable, unlikely to be found. Also hits salts + +Urethan: [#7X3][#6](=[OX1])[#8X2][#6] +# also hits when part of a ring, no check whether the last C is part of carbonyl + +Biuret: [#7X3][#6](=[OX1])[#7X3][#6](=[OX1])[#7X3] + +Semicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1] + +Carbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[OX1] + +Semicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1] + +Carbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[OX1] + +Thiosemicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[SX1] + +Thiocarbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[SX1] + +Thiosemicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])])=[SX1] + +Thiocarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[SX1] + + +Isocyanate: [NX2]=[CX2]=[OX1] + +Cyanate: [OX2][CX2]#[NX1] + +Isothiocyanate: [NX2]=[CX2]=[SX1] + +Thiocyanate: [SX2][CX2]#[NX1] + +Carbodiimide: [NX2]=[CX2]=[NX2] + +Orthocarbonic_derivatives: [CX4H0]([O,S,#7])([O,S,#7])([O,S,#7])[O,S,#7,F,Cl,Br,I] +# halogen allowed just once, to avoid mapping to -OCF3 and similar groups (much more +# stable as for example C(OCH3)4) + + +# I.6 Aromatics +# ------------- + +# I know that this classification is not very logical, arylamines are found under I.2 ... + +Phenol: [OX2H][c] + +1,2-Diphenol: [OX2H][c][c][OX2H] + +Arylchloride: [Cl][c] + +Arylfluoride: [F][c] + +Arylbromide: [Br][c] + +Aryliodide: [I][c] + +Arylthiol: [SX2H][c] + +Iminoarene: [c]=[NX2;$([H1]),$([H0][#6;!$([C]=[N,S,O])])] +# N may be substituted with H or C, but not carbonyl or similar +# aromatic atom is always C, not S or P (these are not planar when substituted) + +Oxoarene: [c]=[OX1] + +Thioarene: [c]=[SX1] + +Hetero_N_basic_H: [nX3H1+0] +# as in pyrole. uncharged to exclude pyridinium ions + +Hetero_N_basic_no_H: [nX3H0+0] +# as in N-methylpyrole. uncharged to exclude pyridinium ions + +Hetero_N_nonbasic: [nX2,nX3+] +# as in pyridine, pyridinium + +Hetero_O: [o] + +Hetero_S: [sX2] +# X2 because Daylight's depictmatch falsely describes C1=CS(=O)C=C1 as aromatic +# (is not planar because of lonepair at S) + +Heteroaromatic: [a;!c] + + +# Part II: N, S, P, Si, B +# ======================= + + +# II.1 Nitrogen +# ------------- + +Nitrite: [NX2](=[OX1])[O;$([X2]),$([X1-])] +# hits nitrous acid, its anion, esters, and other O-substituted derivatives + +Thionitrite: [SX2][NX2]=[OX1] + +Nitrate: [$([NX3](=[OX1])(=[OX1])[O;$([X2]),$([X1-])]),$([NX3+]([OX1-])(=[OX1])[O;$([X2]),$([X1-])])] +# hits nitric acid, its anion, esters, and other O-substituted derivatives + +Nitro: [$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8] +# hits nitro groups attached to C,N, ... but not nitrates + +Nitroso: [NX2](=[OX1])[!#7;!#8] +# no nitrites, no nitrosamines + +Azide: [NX1]~[NX2]~[NX2,NX1] +# hits both mesomeric forms, also anion + +Acylazide: [CX3](=[OX1])[NX2]~[NX2]~[NX1] + +Diazo: [$([#6]=[NX2+]=[NX1-]),$([#6-]-[NX2+]#[NX1])] + +Diazonium: [#6][NX2+]#[NX1] + +Nitrosamine: [#7;!$(N*=O)][NX2]=[OX1] + +Nitrosamide: [NX2](=[OX1])N-*=O +# includes nitrososulfonamides + +N-Oxide: [$([#7+][OX1-]),$([#7v5]=[OX1]);!$([#7](~[O])~[O]);!$([#7]=[#7])] +# Hits both forms. Won't hit azoxy, nitro, nitroso, or nitrate. + + +Hydrazine: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])] +# no hydrazides + +Hydrazone: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][NX2]=[#6] + +Hydroxylamine: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][OX2;$([H1]),$(O[#6;!$(C=[N,O,S])])] +# no discrimination between O-, N-, and O,N-substitution + + +# II.2 Sulfur +# ----------- + +Sulfon: [$([SX4](=[OX1])(=[OX1])([#6])[#6]),$([SX4+2]([OX1-])([OX1-])([#6])[#6])] +# can't be aromatic, thus S and not #16 + +Sulfoxide: [$([SX3](=[OX1])([#6])[#6]),$([SX3+]([OX1-])([#6])[#6])] + +Sulfonium: [S+;!$([S]~[!#6]);!$([S]*~[#7,#8,#15,#16])] +# can't be aromatic, thus S and not #16 + +Sulfuric_acid: [SX4](=[OX1])(=[OX1])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])] +# includes anions + +Sulfuric_monoester: [SX4](=[OX1])(=[OX1])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])] + +Sulfuric_diester: [SX4](=[OX1])(=[OX1])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] + +Sulfuric_monoamide: [SX4](=[OX1])(=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])] + +Sulfuric_diamide: [SX4](=[OX1])(=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Sulfuric_esteramide: [SX4](=[OX1])(=[OX1])([#7X3][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] + +Sulfuric_derivative: [SX4D4](=[!#6])(=[!#6])([!#6])[!#6] +# everything else (would not be a "true" derivative of sulfuric acid, if one of the substituents were less electronegative +# than sulfur, but this should be very very rare, anyway) + + + +#### sulfurous acid and derivatives missing!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + + + +Sulfonic_acid: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[$([OX2H]),$([OX1-])] + +Sulfonamide: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Sulfonic_ester: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[OX2][#6;!$(C=[O,N,S])] + +Sulfonic_halide: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[FX1,ClX1,BrX1,IX1] + +Sulfonic_derivative: [SX4;$([H1]),$([H0][#6])](=[!#6])(=[!#6])[!#6] +# includes all of the above and many more +# for comparison: this is what "all sulfonic derivatives but not the ones above" would look like: +# [$([SX4;$([H1]),$([H0][#6])](=[!#6])(=[!#6;!O])[!#6]),$([SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[!$([FX1,ClX1,BrX1,IX1]);!$([#6]);!$([OX2H]);!$([OX1-]);!$([OX2][#6;!$(C=[O,N,S])]);!$([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])])] + + +Sulfinic_acid: [SX3;$([H1]),$([H0][#6])](=[OX1])[$([OX2H]),$([OX1-])] + +Sulfinic_amide: [SX3;$([H1]),$([H0][#6])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Sulfinic_ester: [SX3;$([H1]),$([H0][#6])](=[OX1])[OX2][#6;!$(C=[O,N,S])] + +Sulfinic_halide: [SX3;$([H1]),$([H0][#6])](=[OX1])[FX1,ClX1,BrX1,IX1] + +Sulfinic_derivative: [SX3;$([H1]),$([H0][#6])](=[!#6])[!#6] + +Sulfenic_acid: [SX2;$([H1]),$([H0][#6])][$([OX2H]),$([OX1-])] + +Sulfenic_amide: [SX2;$([H1]),$([H0][#6])][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Sulfenic_ester: [SX2;$([H1]),$([H0][#6])][OX2][#6;!$(C=[O,N,S])] + +Sulfenic_halide: [SX2;$([H1]),$([H0][#6])][FX1,ClX1,BrX1,IX1] + +Sulfenic_derivative: [SX2;$([H1]),$([H0][#6])][!#6] + + +# II.3 Phosphorous +# ---------------- + +Phosphine: [PX3;$([H3]),$([H2][#6]),$([H1]([#6])[#6]),$([H0]([#6])([#6])[#6])] +# similar to amine, but less restrictive: includes also amide- and aminal-analogues + +Phosphine_oxide: [PX4;$([H3]=[OX1]),$([H2](=[OX1])[#6]),$([H1](=[OX1])([#6])[#6]),$([H0](=[OX1])([#6])([#6])[#6])] + +Phosphonium: [P+;!$([P]~[!#6]);!$([P]*~[#7,#8,#15,#16])] +# similar to Ammonium + +Phosphorylen: [PX4;$([H3]=[CX3]),$([H2](=[CX3])[#6]),$([H1](=[CX3])([#6])[#6]),$([H0](=[CX3])([#6])([#6])[#6])] + + +# conventions for the following acids and derivatives: +# acids find protonated and deprotonated acids +# esters do not find mixed anhydrides ( ...P-O-C(=O)) +# derivatives: subtituents which go in place of the OH and =O are not H or C (may also be O, +# thus including acids and esters) + +Phosphonic_acid: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])] +# includes anions + +Phosphonic_monoester: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])] + +Phosphonic_diester: [PX4;$([H1]),$([H0][#6])](=[OX1])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] + +Phosphonic_monoamide: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Phosphonic_diamide: [PX4;$([H1]),$([H0][#6])](=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Phosphonic_esteramide: [PX4;$([H1]),$([H0][#6])](=[OX1])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Phosphonic_acid_derivative: [PX4;$([H1]),$([H0][#6])](=[!#6])([!#6])[!#6] +# all of the above and much more + + +Phosphoric_acid: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])] +# includes anions + +Phosphoric_monoester: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])] + +Phosphoric_diester: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] + +Phosphoric_triester: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] + +Phosphoric_monoamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Phosphoric_diamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Phosphoric_triamide: [PX4D4](=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Phosphoric_monoestermonoamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Phosphoric_diestermonoamide: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Phosphoric_monoesterdiamide: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Phosphoric_acid_derivative: [PX4D4](=[!#6])([!#6])([!#6])[!#6] + + +Phosphinic_acid: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[$([OX2H]),$([OX1-])] + +Phosphinic_ester: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[OX2][#6;!$(C=[O,N,S])] + +Phosphinic_amide: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Phosphinic_acid_derivative: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[!#6])[!#6] + + +Phosphonous_acid: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])] + +Phosphonous_monoester: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])] + +Phosphonous_diester: [PX3;$([H1]),$([H0][#6])]([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] + +Phosphonous_monoamide: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Phosphonous_diamide: [PX3;$([H1]),$([H0][#6])]([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Phosphonous_esteramide: [PX3;$([H1]),$([H0][#6])]([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Phosphonous_derivatives: [PX3;$([D2]),$([D3][#6])]([!#6])[!#6] + + +Phosphinous_acid: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][$([OX2H]),$([OX1-])] + +Phosphinous_ester: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][OX2][#6;!$(C=[O,N,S])] + +Phosphinous_amide: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] + +Phosphinous_derivatives: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][!#6] + + +# II.4 Silicon +# ------------ + +Quart_silane: [SiX4]([#6])([#6])([#6])[#6] +# four C-substituents. non-reactive, non-toxic, in experimental phase for drug development + +Non-quart_silane: [SiX4;$([H1]([#6])([#6])[#6]),$([H2]([#6])[#6]),$([H3][#6]),$([H4])] +# has 1-4 hydride(s), reactive. Daylight's depictmatch does not add hydrogens automatically to +# the free positions at Si, thus Hs had to be added implicitly + +Silylmonohalide: [SiX4]([FX1,ClX1,BrX1,IX1])([#6])([#6])[#6] +# reagents for inserting protection groups + +Het_trialkylsilane: [SiX4]([!#6])([#6])([#6])[#6] +# mostly acid-labile protection groups such as trimethylsilyl-ethers + +Dihet_dialkylsilane: [SiX4]([!#6])([!#6])([#6])[#6] + +Trihet_alkylsilane: [SiX4]([!#6])([!#6])([!#6])[#6] + +Silicic_acid_derivative: [SiX4]([!#6])([!#6])([!#6])[!#6] +# four substituent which are neither C nor H + + +# II.5 Boron +# ---------- + +Trialkylborane: [BX3]([#6])([#6])[#6] +# also carbonyls allowed + +Boric_acid_derivatives: [BX3]([!#6])([!#6])[!#6] +# includes acids, esters, amides, ... H-substituent at B is very rare. + +Boronic_acid_derivative: [BX3]([!#6])([!#6])[!#6] +# # includes acids, esters, amides, ... + +Borohydride: [BH1,BH2,BH3,BH4] +# at least one H attached to B + +Quaternary_boron: [BX4] +# mostly borates (negative charge), in complex with Lewis-base + + + +# Part III: Some Special Patterns +# =============================== + + +# III.1 Chains +# ------------ + +# some simple chains + + + +# III.2 Rings +# ----------- + +Aromatic: a + +Heterocyclic: [!#6;!R0] +# may be aromatic or not + +Epoxide: [OX2r3]1[#6r3][#6r3]1 +# toxic/reactive. may be annelated to aromat, but must not be aromatic itself (oxirane-2,3-dione) + +NH_aziridine: [NX3H1r3]1[#6r3][#6r3]1 +# toxic/reactive according to Maybridge's garbage filter + +Spiro: [D4R;$(*(@*)(@*)(@*)@*)] +# at least two different rings can be found which are sharing just one atom. +# these two rings can be connected by a third ring, so it matches also some +# bridged systems, like morphine + +Annelated_rings: [R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]@[R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])] +# two different rings sharing exactly two atoms + +Bridged_rings: [R;$(*(@*)(@*)@*);!$([D4R;$(*(@*)(@*)(@*)@*)]);!$([R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]@[R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])])] +# part of two or more rings, not spiro, not annelated -> finds bridgehead atoms, +# but only if they are not annelated at the same time - otherwise impossible (?) +# to distinguish from non-bridgehead annelated atoms + +# some basic ring-patterns (just size, no other information): + + + + + +# III.3 Sugars and Nucleosides/Nucleotides, Steroids +# -------------------------------------------------- + +# because of the large variety of sugar derivatives, different patterns can be applied. +# The choice of patterns and their combinations will depend on the contents of the database +# e.g. natural products, nucleoside analoges with modified sugars, ... as well as on the +# desired restriction + + +Sugar_pattern_1: [OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)] +# 5 or 6-membered ring containing one O and at least one (r5) or two (r6) oxygen-substituents. + +Sugar_pattern_2: [OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)] +# 5 or 6-membered ring containing one O and an acetal-like bond at postion 2. + +Sugar_pattern_combi: [OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C(O)@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C(O)@C(O)@C1)] +# combination of the two above + +Sugar_pattern_2_reducing: [OX2;$([r5]1@C(!@[OX2H1])@C@C@C1),$([r6]1@C(!@[OX2H1])@C@C@C@C1)] +# 5 or 6-membered cyclic hemi-acetal + +Sugar_pattern_2_alpha: [OX2;$([r5]1@[C@@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@[C@@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)] +# 5 or 6-membered cyclic hemi-acetal + +Sugar_pattern_2_beta: [OX2;$([r5]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)] +# 5 or 6-membered cyclic hemi-acetal + +##Poly_sugar_1: ([OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)].[OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)]) +# pattern1 occours more than once (in same molecule, but moieties don't have to be adjacent!) + +##Poly_sugar_2: ([OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)].[OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)]) +# pattern2 occours more than once (in same molecule, but moieties don't have to be adjacent!) + + +# III.4 Everything else... +# ------------------------ + +Conjugated_double_bond: *=*[*]=,#,:[*] + +Conjugated_tripple_bond: *#*[*]=,#,:[*] + +Cis_double_bond: */[D2]=[D2]\* +# only one single-bonded substituent on each DB-atom. no aromats. +# only found when character of DB is explicitely stated. + +Trans_double_bond: */[D2]=[D2]/* +# analog + +Mixed_anhydrides: [$(*=O),$([#16,#14,#5]),$([#7]([#6]=[OX1]))][#8X2][$(*=O),$([#16,#14,#5]),$([#7]([#6]=[OX1]))] +# should hits all combinations of two acids + +Halogen_on_hetero: [FX1,ClX1,BrX1,IX1][!#6] + +Halogen_multi_subst: [F,Cl,Br,I;!$([X1]);!$([X0-])] +# Halogen which is not mono-substituted nor an anion, e.g. chlorate. +# Most of these cases should be also filtered by Halogen_on_hetero. + +Trifluoromethyl: [FX1][CX4;!$([H0][Cl,Br,I]);!$([F][C]([F])([F])[F])]([FX1])([FX1]) +# C with three F attached, connected to anything which is not another halogen + +C_ONS_bond: [#6]~[#7,#8,#16] +# probably all drug-like molecules have at least one O, N, or S connected to a C -> nice filter + +## Mixture: (*).(*) +# two or more seperate parts, may also be salt +# component-level grouping is not yet supported in Open Babel Version 2.0 + + +Charged: [!+0] + +Anion: [-1,-2,-3,-4,-5,-6,-7] + +Kation: [+1,+2,+3,+4,+5,+6,+7] + +Salt: ([-1,-2,-3,-4,-5,-6,-7]).([+1,+2,+3,+4,+5,+6,+7]) +# two or more seperate components with opposite charges + +##Zwitterion: ([-1,-2,-3,-4,-5,-6,-7].[+1,+2,+3,+4,+5,+6,+7]) +# both negative and positive charges somewhere within the same molecule. + +1,3-Tautomerizable: [$([#7X2,OX1,SX1]=*[!H0;!$([a;!n])]),$([#7X3,OX2,SX2;!H0]*=*),$([#7X3,OX2,SX2;!H0]*:n)] +# 1,3 migration of H allowed. Includes keto/enol and amide/enamide. +# Aromatic rings must stay aromatic - no keto form of phenol + +1,5-Tautomerizable: [$([#7X2,OX1,SX1]=,:**=,:*[!H0;!$([a;!n])]),$([#7X3,OX2,SX2;!H0]*=**=*),$([#7X3,OX2,SX2;!H0]*=,:**:n)] + +Rotatable_bond: [!$(*#*)&!D1]-!@[!$(*#*)&!D1] +# taken from http://www.daylight.com/support/contrib/smarts/content.html + +Michael_acceptor: [CX3]=[CX3][$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-])] +# the classical case: C=C near carbonyl, nitrile, nitro, or similar +# Oxo-heteroaromats and similar are not included. + +Dicarbodiazene: [CX3](=[OX1])[NX2]=[NX2][CX3](=[OX1]) +# Michael-like acceptor, see Mitsunobu reaction + +# H-Bond_donor: + +# H-Bond_acceptor: + +# Pos_ionizable: + +# Neg_ionizable: + +# Unlikely_ions: +# O+,N-,C+,C-, ... + +CH-acidic: [$([CX4;!$([H0]);!$(C[!#6;!$([P,S]=O);!$(N(~O)~O)])][$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])]),$([CX4;!$([H0])]1[CX3]=[CX3][CX3]=[CX3]1)] +# C-H alpha to carbony, nitro or similar, C is not double-bonded, only C, H, S,P=O and nitro substituents allowed. +# pentadiene is included. acids, their salts, prim./sec. amides, and imides are excluded. +# hits also CH-acidic_strong + +CH-acidic_strong: [CX4;!$([H0]);!$(C[!#6;!$([P,S]=O);!$(N(~O)~O)])]([$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])])[$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])] +# same as above (without pentadiene), but carbonyl or similar on two or three sides + +Chiral_center_specified: [$([*@](~*)(~*)(*)*),$([*@H](*)(*)*),$([*@](~*)(*)*),$([*@H](~*)~*)] +# Hits atoms with tetrahedral chirality, if chiral center is specified in the SMILES string +# depictmach does not find oxonium, sulfonium, or sulfoxides! + +# Chiral_center_unspecified: [$([*@?](~*)(~*)(*)*),$([*@?H](*)(*)*),$([*@?](~*)(*)*),$([*@?H](~*)~*)] +# Hits atoms with tetrahedral chirality, if chiral center is not specified in the SMILES string +# "@?" (unspecified chirality) is not yet supported in Open Babel Version 2.0 + \ No newline at end of file diff --git a/paper/data/combined-cv.csv b/paper/data/combined-cv.csv new file mode 100644 index 0000000..4de9b72 --- /dev/null +++ b/paper/data/combined-cv.csv @@ -0,0 +1,923 @@ +SMILES,LOAEL_measured_median,LOAEL_predicted,Confidence +ClC12C3C4(C(C1(Cl)Cl)(C1(C2(C3(Cl)C(C41Cl)(Cl)Cl)Cl)Cl)Cl)Cl,1.9565721591442926e-05,0.0014218133641616987,0.625 +ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C=C2)(Cl)Cl)Cl,2.7404023436797774e-05,0.0012794179116857743,1 +ClC1C2OC2C2C1C1(Cl)C(=C(C2(C1(Cl)Cl)Cl)Cl)Cl,6.421500622500271e-05,0.0006312904946719587,1 +N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.00013496580117055152,0.034974567888840846,1 +N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.0001372533562906347,0.02698619904377642,1 +CCSCCSP(=S)(OCC)OCC,0.00014577045919371006,0.0029396972764529202,1 +CCOP(=S)(SCSC(C)(C)C)OCC,0.00019068711849574906,0.0034018170593390737,1 +Clc1ccc2c(c1)[n+]([O-])nc(n2)n1cncc1,0.00020190555530632425,0.03094282625719898,1 +ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C1C2O1)(Cl)Cl)Cl,0.0002625296750418184,0.0014370243329576793,1 +OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.00027647194701359843,0.01497899097540358,1 +OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.0002764719511333511,0.018693951262222713,1 +ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C1C2O1)(Cl)Cl)Cl,0.000328162093802273,0.0009393540815108845,1 +CCOP(=S)(SCSC(C)(C)C)OCC,0.00034670385697674235,0.0034018170593390737,1 +CCSCCSP(=S)(OCC)OCC,0.00036442614798427517,0.0032087834044491653,1 +ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C1C2O1)(Cl)Cl)Cl,0.00039379451256272755,0.0009393540815108845,1 +ClC1C2OC2C2C1C1(Cl)C(=C(C2(C1(Cl)Cl)Cl)Cl)Cl,0.0005137200498000217,0.0006312904946719587,1 +CNC(=O)ON=CC(SC)(C)C,0.0005255875464343458,0.03526134931259953,1 +COC(=O)C(Oc1ccc(cc1)Oc1ccc(cc1Cl)Cl)C,0.0005861906011027885,0.03401894999785191,1 +CCSCSP(=S)(OCC)OCC,0.0006144925475253195,0.0015689457511481922,1 +CCSCSP(=S)(OCC)OCC,0.0006144925612602997,0.0015930132454562913,1 +OC1CCCCCc2cc(O)cc(c2C(=O)OC(CCC1)C)O,0.0006203550142861557,1.1513016524808917,1 +ClC1CC2C(C1Cl)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.000634488903569986,0.0005835576184954016,1 +ClC(C(c1ccc(cc1)Cl)c1ccc(cc1)Cl)(Cl)Cl,0.0007052459522690667,0.043153175434619336,1 +CCSCCSP(=S)(OCC)OCC,0.0008017375255654054,0.0029396972764529202,1 +CCOP(=O)(SC(CC)C)SC(CC)C,0.0008210296720157477,0.012277367120843022,1 +ClC1C=CC2C1C1(Cl)C(=C(C2(C1(Cl)Cl)Cl)Cl)Cl,0.0008437853335108407,0.0008952202189140214,1 +c1ccc(cc1)[Sn](c1ccccc1)c1ccccc1,0.0008571117562305596,0.3621072701107427,1 +CNC(=O)CSP(=S)(OC)OC,0.000872381733741038,0.011977939066676562,1 +CCS(=O)CCSP(=O)(OC)OC,0.0008932752807580748,0.001392123295168118,1 +CCOP(=O)(SC(CC)C)SC(CC)C,0.0009245829520661433,0.01634116998391423,1 +COP(=S)(Oc1ccc(cc1)N(=O)=O)OC,0.0009498211030948742,0.025384429146594292,1 +OC(=O)c1ccccc1.CCC(C1OC2(C=CC1C)OC1CC=C(C)C(OC3CC(OC)C(C(O3)C)OC3CC(OC)C(C(O3)C)NC)C(C)C=CC=C3C4(C(C(=O)OC(C2)C1)C=C(C)C(C4OC3)O)O)C,0.0009918273033473258,0.0020195689898693642,1 +Clc1c(Cl)c(Cl)c(c(c1Cl)Cl)Cl,0.0010183220720957982,0.09921198034267042,1 +COC1CC(OC2C(C)C=CC=C3COC4C3(O)C(C=C(C4O)C)C(=O)OC3CC(CC=C2C)OC2(C3)C=CC(C(O2)C(C)C)C)OC(C1OC1CC(OC)C(C(O1)C)NC(=O)C)C,0.0011109849279118543,0.0018866442387745258,1 +COP(=O)(SCCS(=O)(=O)CC)OC,0.0011437981092748413,0.0012462691090840692,1 +COC1CC(OC2C(C)C=CC=C3COC4C3(O)C(C=C(C4O)C)C(=O)OC3CC(CC=C2C)OC2(C3)C=CC(C(O2)C(CC)C)C)OC(C1OC1CC(OC)C(C(O1)C)O)C.COC1CC(OC(C1OC1CC(OC)C(C(O1)C)O)C)OC1C(C)C=CC=C2COC3C2(O)C(C=C(C3O)C)C(=O)OC2CC(CC=C1C)OC1(C2)C=CC(C(O1)C(C)C)C,0.0011546496256700967,0.0021478186033654857,1 +O=C1CCCC(=O)C1C(=O)c1ccc(cc1Cl)S(=O)(=O)C,0.0012166633663470796,0.017372736621606367,1 +ClC1CC2C(C1Cl)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.0012201709684038192,0.0005942117597564336,1 +ClC12C(Cl)(Cl)C3(C4(C1(Cl)C1(C2(Cl)C3(C4(C1(Cl)Cl)Cl)Cl)Cl)Cl)Cl,0.0012831252531881078,6.239999085601705e-05,0.625 +CCOP(=O)(N1CCSC1=O)SC(CC)C,0.001341107599716744,0.01204374051145044,1 +COP(=O)(SC)N,0.0013817128392583306,0.33442367385922134,1 +O=C1CCCC(=O)C1C(=O)c1ccc(cc1[N+](=O)[O-])S(=O)(=O)C,0.001414591694222218,0.01480091228679537,1 +CCOP(=S)(Oc1ccc(cc1)N(=O)=O)OCC,0.001442007505168395,0.020357599749119566,1 +CCOP(=S)(Oc1ccccc1C(=O)OC(C)C)NC(C)C,0.0014476216329334154,0.11305876392072264,1 +CCSCCSP(=O)(OC)OC,0.001519854088965729,0.0009444327450089038,1 +CCOc1cc(nc(n1)CC)OP(=S)(OC)OC,0.0015395577035464635,0.008340344661164758,1 +COC(=O)/C=C(/OP(=O)(OC)OC)\C,0.0015614663384413926,0.02855485815997305,1 +COC(=O)C=C(OP(=O)(OC)OC)C,0.001561466365033004,0.019128379067914394,1 +CSc1ccc(cc1C)OP(=S)(OC)OC,0.001616797099077973,0.009022712543242823,1 +COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.001659247904766673,0.012452573244964524,1 +C1CCC(CC1)[Sn](n1ncnc1)(C1CCCCC1)C1CCCCC1,0.0018110419025972907,0.013846742083373052,1 +ClC1C2(Cl)C3C4C5C1(Cl)C(C2(Cl)C5C3C1C4O1)(Cl)Cl,0.0018377077252927285,0.00013082348029644925,1 +CNC(=O)CCSCCSP(=O)(OC)OC,0.001879329112916984,0.002248599256352105,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)[C@H](C(C)C)Nc1ccc(cc1Cl)C(F)(F)F,0.001988416717024977,0.011949513448740262,1 +COP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OC,0.0019946243391447357,0.016643996746722955,1 +CNC(=O)C=C(OP(=O)(OC)OC)C,0.0020164586039868883,0.01114229082863414,1 +COP(=O)(SC)N,0.0020548549325897737,0.33442367385922134,1 +CCCSP(=O)(SCCC)OCC,0.002063225311384027,0.004637753717447314,1 +COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.0021168829879502555,0.011073447351926287,1 +CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.0022052807653206367,0.01194888189741255,1 +CO[C@H]1C[C@H](O[C@H]2[C@@H](C)C=CC=C3CO[C@H]4[C@]3(O)[C@@H](C=C([C@H]4O)C)C(=O)O[C@H]3C[C@@H](CC=C2C)O[C@]2(C3)C=C[C@@H]([C@H](O2)[C@H](CC)C)C)O[C@H]([C@@H]1O[C@H]1C[C@H](OC)[C@H]([C@@H](O1)C)O)C,0.002290749011702154,0.01125080791253125,1 +O=C1CCCC(=O)C1C(=O)c1ccc(c(c1Cl)COCC(F)(F)F)S(=O)(=O)C,0.002381932321850521,0.008233311333722891,1 +S=C1NCCN1,0.0024471862937206963,0.1028640018189016,1 +OC(=O)c1ccccc1.CCC(C1OC2(C=CC1C)OC1CC=C(C)C(OC3CC(OC)C(C(O3)C)OC3CC(OC)C(C(O3)C)NC)C(C)C=CC=C3C4(C(C(=O)OC(C2)C1)C=C(C)C(C4OC3)O)O)C,0.0024795682583683147,0.0020195689898693642,1 +CSc1ccc(cc1C)OP(=S)(OC)OC,0.0025868753585247565,0.00964124005965057,1 +CSc1ccc(cc1C)OP(=S)(OC)OC,0.0025868754613179463,0.00964124005965057,1 +O[Sn](C1CCCCC1)(C1CCCCC1)C1CCCCC1,0.002596303652874617,0.20819551253516297,1 +COC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.0026615073878255148,0.001110815102353126,1 +COC1CC(OC2C(C)C=CC=C3COC4C3(O)C(C=C(C4O)C)C(=O)OC3CC(CC=C2C)OC2(C3)C=CC(C(O2)C(C)C)C)OC(C1OC1CC(OC)C(C(O1)C)NC(=O)C)C,0.0027774623197796356,0.0016355023479626871,1 +CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.002852364738724816,0.012867798419937399,1 +CCOP(=S)(OCC)SCSc1ccc(cc1)Cl,0.0029165972759564764,0.007851952924790793,1 +C1CCN2C(C1)C1CCCCN1CC2,0.002933359023382885,0.19425171774419603,1 +C1CCN2C(C1)C1CCCCN1CC2,0.002984821462389602,0.1549258324104941,1 +COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.003100456591840454,0.02511976746876867,1 +Fc1ccc(cc1)N(C(=O)COc1nnc(s1)C(F)(F)F)C(C)C,0.0033027779077186826,0.04258949053140572,1 +CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.0033630532459809582,0.01194888189741255,1 +CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.0035601567181414275,0.007466085017812861,1 +CNC(=O)Oc1cccc2c1OC(O2)(C)C,0.0035838244976124515,0.07694918518284904,1 +O[Sn](C1CCCCC1)(C1CCCCC1)C1CCCCC1,0.003608862040355308,0.2852919932509755,1 +OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.0038990829980641837,0.04457170010150825,1 +CCc1nn(c(c1Cl)C(=O)NCc1ccc(cc1)Oc1ccc(cc1)C)C,0.003907559846623587,0.09156997000962404,1 +CCCCSP(=O)(SCCCC)SCCCC,0.003974424546249488,0.08968357544503174,1 +COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.0041125115079416845,0.022792432288964708,1 +COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.004134537178254452,0.011073447351926287,1 +CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149211896481245,0.020459303699418275,1 +CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149212048673449,0.019916396281095625,1 +COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.00416761352299651,0.011073447351926287,1 +CCOP(=O)(OC(=CCl)c1ccc(cc1Cl)Cl)OCC,0.004171650398342553,0.034753601670754815,1 +CCOP(=O)(O/C(=C/Cl)/c1ccc(cc1Cl)Cl)OCC,0.004171650398342553,0.027041195785532685,1 +Clc1nc(nc(n1)Cl)Nc1ccccc1Cl,0.004173898399328111,0.12718506389486406,1 +Clc1cccc(n1)C(Cl)(Cl)Cl,0.00433075312836283,0.31356490536288883,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C(C(Br)(Br)Br)Br,0.004511229623452476,0.034342832130992346,1 +CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.004681695305160139,0.0018870676558296706,1 +CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.004686221626306353,0.01194888189741255,1 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)/C=C(/c1ccc(cc1)Cl)\Cl,0.004898276703964497,0.012215934999635499,1 +CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.004928609097226672,0.013579132085509897,1 +CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.004944661980269876,0.008370828170108842,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.004948543461552866,0.028969815332537945,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(C(C)C)Nc1ccc(cc1Cl)C(F)(F)F,0.004971041792562443,0.008927560449046905,1 +CCN(C(=O)C(=C(OP(=O)(OC)OC)C)Cl)CC,0.005005200069191886,0.0031029291827278202,1 +CCNc1nc(nc(n1)Cl)NC(C#N)(C)C,0.005193343612552968,0.02498552169568395,1 +CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.005201883810203027,0.0019642241382633774,1 +COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.005292207588165698,0.011073447351926287,1 +CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0054376113486863924,0.007139222358353881,1 +CCOP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OCC,0.005493362006308507,0.016722640185006663,1 +CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601647965290344,0.1536898021522977,1 +CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601648122412352,0.1562686252062576,1 +CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005603950244305859,0.008370828170108842,1 +COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.005672488506643871,0.05202092805434199,1 +OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.005938151689011985,0.05029432604141614,1 +Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.006038720639060896,0.05060086507650378,1 +Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.006341300659739408,0.03675874806038504,1 +COC(=O)Nc1nc2c([nH]1)cc(cc2)S(=O)c1ccccc1,0.006342219438128827,0.2672054856199581,1 +ClCC(N1C(=O)c2c(C1=O)cccc2)SP(=S)(OCC)OCC,0.006347661308292605,0.017422784587449506,1 +COP(=O)(SC)N,0.006377136181192296,0.33442367385922134,1 +CCNc1nc(Cl)nc(n1)NC(C)(C)C,0.006399377704543392,0.03380755287596103,1 +CCP(=S)(Sc1ccccc1)OCC,0.006414179135682054,0.005075504695397763,1 +COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.006615259485207122,0.011073447351926287,1 +CNC(=O)Oc1cc(C)c(c(c1)C)N(C)C,0.0067481385934503825,0.07218409438237827,1 +COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319575237628,0.06339245177977164,1 +COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319755914397,0.05472978349709951,1 +CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.0068777238395693234,0.01923940928152079,1 +CCNc1nc(Cl)nc(n1)NC(C)(C)C,0.006921775895097049,0.036341284639957824,1 +CCOP(=S)(SCSC(C)(C)C)OCC,0.006934077036209056,0.0034018170593390737,1 +Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.00694452873492003,0.05368556487660718,1 +CSc1ccc(cc1C)OP(=S)(OC)OC,0.007095942829084915,0.00964124005965057,1 +Cc1nn(c(c1/C=N/OCc1ccc(cc1)C(=O)OC(C)(C)C)Oc1ccccc1)C,0.0071176254993963305,0.35851467476737114,1 +O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.007126618167084564,0.00036386924695734017,1 +S=C1NCCN1,0.00724367142941326,0.1028640018189016,1 +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.007293179580314936,0.09848470718294207,1 +Cc1nn(c(c1C=NOCc1ccc(cc1)C(=O)OC(C)(C)C)Oc1ccccc1)C,0.0073074288460468996,0.27349694800258606,1 +Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.007548400798826121,0.05192051124148076,1 +Fc1ccc(cc1)[Si](c1ccc(cc1)F)Cn1cncn1,0.007657523838454347,0.015872285245794083,1 +CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.007825509706097071,0.0135847101618635,1 +Fc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.007943029289634557,0.01679270358592001,1 +N#Cc1nn(c(c1S(=O)CC)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.008056334643428573,0.00038395497183535377,1 +COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.00813048252144793,0.023715894144774607,1 +CN(C(=S)S[Zn]SC(=S)N(C)C)C,0.00817493363915869,0.060687914488021834,1 +CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.008187766847509327,0.020367652110695248,1 +Clc1cc(cnc1Nc1c(cc(c(c1[N+](=O)[O-])Cl)C(F)(F)F)[N+](=O)[O-])C(F)(F)F,0.00821343424858256,0.043234794638137425,1 +Clc1ccc(cc1)OS(=O)(=O)c1ccc(cc1)Cl,0.008246440044818412,0.05297844739133817,1 +Clc1cc(cnc1Nc1c(cc(c(c1[N+](=O)[O-])Cl)C(F)(F)F)[N+](=O)[O-])C(F)(F)F,0.008385443694386083,0.04319062832350276,1 +COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.008508644649457775,0.023715894144774607,1 +CCOP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OCC,0.008583378006954733,0.017244476111998752,1 +[O-][N+](=O)c1cc([N+](=O)[O-])c(c(c1)[N+](=O)[O-])C,0.008805487227420639,0.102957030662423,1 +CSC(=O)c1c(nc(c(c1CC(C)C)C(=O)SC)C(F)(F)F)C(F)F,0.00904300899921393,0.11151045292283462,0.4074074074074074 +Clc1ccc(c(c1)Cl)C(Cn1cncn1)COC(C(F)F)(F)F,0.00913621053742932,0.051596683516273174,1 +CCCN(C(=O)SCc1ccccc1)CCC,0.009149216533940492,0.07936016289079004,1 +COCN(c1c(CC)cccc1CC)C(=O)CCl,0.009267253123156974,0.3058322031788613,1 +COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.009301369775521361,0.023420014304830983,1 +CON(C(=O)Nc1ccc(cc1)Cl)C,0.00931754394759366,0.04327784599272855,1 +C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.009436904951368202,0.10602678152830673,1 +COP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OC,0.009498211030948741,0.01628204746038644,1 +CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cc1F)C#N)C,0.009625729959721526,0.03360318733595453,1 +COP(=O)(OC=C(Cl)Cl)OC,0.009729574839301364,0.030641286118982685,1 +CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.009886227162529472,0.009826009376979397,1 +N#C/N=C\1/SCCN1Cc1ccc(nc1)Cl,0.009892243396986886,0.06554310510256599,1 +Cc1c(ccc(c1C1=NOCC1)S(=O)(=O)C)C(=O)c1cnn(c1O)C,0.009906758425540224,0.01953854834171629,1 +CC1(C)CNC(=NC1)NN=C(C=Cc1ccc(cc1)C(F)(F)F)C=Cc1ccc(cc1)C(F)(F)F,0.009909494556264633,0.06380780596868028,1 +CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.009924832004782804,0.017223073095094944,1 +c1scc(n1)c1nc2c([nH]1)cccc2,0.009938002763559809,0.07816579105343396,1 +OCC(CCl)O,0.009951195933270719,8.18897175780498,1 +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.010036375840596658,0.04188056937044515,1 +CCCSP(=O)(SCCC)OCC,0.010068539755671456,0.007260057210269471,1 +FC(c1ccc(cc1)C=CC(=NN=C1NCC(CN1)(C)C)C=Cc1ccc(cc1)C(F)(F)F)(F)F,0.010111728942243584,0.10365462498638998,1 +COP(=O)(OC=C(Cl)Cl)OC,0.010408382170442241,0.024813902049782052,1 +COP(=O)(OC=C(Cl)Cl)OC,0.010408382386229365,0.024813902049782052,1 +CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.010428101697378017,0.12913282716460453,1 +CCOC(=O)Nc1cccc(c1)OC(=O)Nc1ccccc1,0.010655682947629983,0.21208880713068504,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.010688854065726137,0.027254476379320505,1 +CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.010713392485187262,0.04161641779803941,1 +O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1c(F)c(F)c(c(c1F)F)C,0.010985502766340648,0.05759405368015889,1 +CCCSP(=O)(SCCC)OCC,0.011141416681473747,0.0060229627834060846,1 +O=C(C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C)OCCON=C(C)C,0.011264301100355506,0.012923335664014797,1 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.011395676083924233,0.08349876089402122,1 +CNC(=O)CSP(=S)(OC)OC,0.011450010084732691,0.012794716153570413,1 +O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1cccc(c1C)c1ccccc1,0.011824026606519262,0.15137022050268478,1 +O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1cccc(c1C)c1ccccc1,0.011824026606519262,0.143812768795011,1 +CCOP(=S)(Oc1ccc(cc1)N(=O)=O)OCC,0.012016729209736626,0.018791316556032868,1 +O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.01228727229779905,0.00036386924695734017,1 +Clc1cc(Cl)c(c(c1O)Cc1c(O)c(Cl)cc(c1Cl)Cl)Cl,0.012287924553322883,0.054451504972635976,1 +N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.012410167132297197,0.06894214569190218,1 +COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.012605530348696702,0.05298278025145332,1 +Clc1ccc(c(c1)Cl)n1c(nc2c(c1=O)cc(cc2)F)n1cncn1,0.01268036889326992,0.027097912477256368,1 +CCC1CCCC(OC2CCC(C(O2)C)N(C)C)C(C)C(=O)C2=CC3C(C2CC(=O)O1)C=C(C)C1C3CC(C1)OC1OC(C)C(C(C1OC)OC)OC,0.012734890360905185,0.019305450727286316,1 +CC1C=CC=C2COC3C2(O)C(C=C(C3O)C)C(=O)OC2CC(CC=C(C1)C)OC1(C2)CCC(C(O1)C)C,0.01286229964885329,0.004182757927211324,1 +COc1ccc(cc1NNC(=O)OC(C)C)c1ccccc1,0.01298475189092086,0.7046634001865686,1 +CN1CCC(CC1)C1CCN(CC1)C,0.012988179839533329,0.07921931623605762,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)OC(F)F)C(C)C,0.013290157156772887,0.03653238913423377,1 +CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.013539867103284017,0.05900731758603697,1 +COP(=O)(NC(=O)C)SC,0.013648831720059621,0.01603420284847195,1 +C[n+]1ccc(cc1)c1cc[n+](cc1)C,0.013691057325028715,0.08721751069244073,1 +O=c1c(Cl)c(SCc2ccc(cc2)C(C)(C)C)cnn1C(C)(C)C,0.013701160159437661,0.03738986432484401,1 +CNP(=O)(Oc1ccc(cc1Cl)C(C)(C)C)OC,0.013712205220154254,0.045144130422190586,1 +NC1=C(Cl)C(=O)c2c(C1=O)cccc2,0.013920121360835688,0.9171968702966398,1 +CCN(C(=O)SCC)C1CCCCC1,0.013930451940080113,0.0635433548789227,1 +CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.0139433514779606,0.06747705209462423,1 +O=C(C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl)OCc1c(F)c(F)c(c(c1F)F)C,0.01409010160197152,0.06064324783216236,1 +CS/C(=N/OC(=O)N(SN(C(=O)O/N=C(/SC)\C)C)C)/C,0.014105593115928905,0.10857811869982233,1 +CC(c1ccccc1)(C[Sn](O[Sn](CC(c1ccccc1)(C)C)(CC(c1ccccc1)(C)C)CC(c1ccccc1)(C)C)(CC(c1ccccc1)(C)C)CC(c1ccccc1)(C)C)C,0.014249578440471417,0.10151914399441331,1 +CON(C(=O)Nc1ccc(cc1)Br)C,0.014357399945172603,0.04331377020795739,1 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC1CC1,0.014397200032537671,0.15891148422161222,1 +CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.01448347496337274,0.12913282716460453,1 +N#Cc1c(Cl)cccc1Cl,0.014533918736325764,0.08170904638795044,1 +ClC1CC2C(C1Cl)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.014642051620845831,0.0008687145669203877,1 +CC(c1ccc(cc1)CCOc1ncnc2c1cccc2)(C)C,0.014686613132547533,0.06701642332516593,1 +CCCCC(c1ccc(cc1Cl)Cl)(Cn1cncn1)O,0.014958135679074535,0.043118381206081816,1 +N#CC(c1ccc(cc1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C\C(=O)OC(C(F)(F)F)C(F)(F)F,0.014960133059978587,0.03591110528319521,1 +N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.015042627044387032,0.07089396189028405,1 +N#CC(c1cc(C)c(cc1Cl)NC(=O)c1cc(I)cc(c1O)I)c1ccc(cc1)Cl,0.015081279803436631,0.08223009030815089,1 +Clc1cc(cnc1CCNC(=O)c1ccccc1C(F)(F)F)C(F)(F)F,0.015124216704213374,0.04198917544443029,1 +Fc1ccc(cc1)C1(Cn2cncn2)OC1c1ccccc1Cl,0.015162725459871818,0.03914579999970951,1 +N#CN=S(=O)(C(c1ccc(nc1)C(F)(F)F)C)C,0.015292167409562457,0.046420554340929926,1 +CC(C1C2CCC1c1c2cccc1NC(=O)c1cn(nc1C(F)F)C)C,0.015302732709143212,0.07596715525228381,1 +O[Sn](C1CCCCC1)(C1CCCCC1)C1CCCCC1,0.015577821917247702,0.20819551253516297,1 +c1cc[n+]2c(c1)c1cccc[n+]1CC2,0.015794866515636753,0.10910993361405372,1 +Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.01585325164934852,0.033295448823770106,1 +ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.016019730669239306,0.09987678658093127,1 +CCCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC,0.016105987222784814,0.14199505491303127,1 +CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.01642869699075557,0.013579132085509897,1 +CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.016680921188449865,0.0507760144199629,1 +O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.01707930849394068,0.00028693011525711496,1 +CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.0171141884323489,0.013006855002501155,1 +BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185416989653705,0.048587566189001355,1 +CC(NC(=S)[S])CNC(=S)S[Zn],0.017255039351497643,0.16793286430045296,1 +CC(NC(=S)[S])CNC(=S)S[Zn],0.017255039351497643,0.25122749958492463,1 +COCC(N(c1c(C)csc1C)C(=O)CCl)C,0.018129419544573026,0.2688602203826675,1 +Fc1ccc(cc1)C1(Cn2cncn2)OC1c1ccccc1Cl,0.018195270551846183,0.04116692662855389,1 +N#Cc1c(Cl)cccc1Cl,0.0186034162597095,0.09362570930859937,1 +O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1cccc(c1C)c1ccccc1,0.018918442570430818,0.14587700628362912,1 +CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.019057288509276463,0.011024013658939326,1 +CN1CCC(CC1)C1CCN(CC1)C,0.019100264469901956,0.07921931623605762,1 +CNC(=O)ON=C(C(=O)N(C)C)SC,0.019109609238234706,0.030626742070780578,1 +CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.019396419126203733,0.04165007467236668,1 +CCc1nn(c(c1Cl)C(=O)NCc1ccc(cc1)C(C)(C)C)C,0.019469491695902355,0.021113366139126708,1 +CCO/N=C(\C1=C(O)CC(CC1=O)C1CCCSC1)/CCC,0.019664101798126703,0.07762178550268999,1 +CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.01991156926953532,0.05960631504385691,1 +OC(=O)COc1ccc(cc1C)Cl,0.019938294964743114,0.06346773142597349,1 +C[n+]1ccc(cc1)c1cc[n+](cc1)C,0.020133908207418557,0.08721751069244073,1 +N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.020248123201460456,0.15366918020134765,1 +CCP(=S)(Sc1ccccc1)OCC,0.020298035239500172,0.007022624901791193,1 +ClC=C,0.020800592400871575,0.04595842510750215,0.14285714285714285 +c1cc[n+]2c(c1)c1cccc[n+]1CC2,0.021168377697732887,0.10910993361405372,1 +Clc1cccc(c1)c1ccccc1,0.021202965065040626,0.13009825979015374,1 +CN1CN(C)CSC1=S,0.022184384932566064,0.05143501540726455,1 +CNC(=O)ON=C(C(=O)N(C)C)SC,0.022347753176858155,0.033050819536401606,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.022530984690614337,0.14263910849947523,1 +OC(=O)COc1ccc(cc1Cl)Cl,0.022620602193004043,0.04097134152841404,1 +CSC1=NC(C(=O)N1Nc1ccccc1)(C)c1ccccc1,0.022800155556897562,0.12539808663217103,1 +CNC(=O)ON=C(C(=O)N(C)C)SC,0.02280382932847922,0.027408561103509445,1 +COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.02320682656135787,0.14507030195358356,1 +CCCCCCCCCCCCC1=C(OC(=O)C)C(=O)c2c(C1=O)cccc2,0.02340650588512378,0.9676223412028344,1 +OC(COc1cccc2c1c1ccccc1[nH]2)CNC(C)C,0.023460058312320942,0.4421518817692824,1 +O=C(NC(=O)c1ccccc1Cl)Nc1ccc(cc1)OC(F)(F)F,0.023557308728421166,0.08799294295544582,1 +CCNc1nc(NCC)nc(n1)Cl,0.024794616275543167,0.023199124921492648,1 +CCOC(=O)C(Oc1ccc(cc1)Oc1nc2c(o1)cc(cc2)Cl)C,0.02487724874434851,0.02121434089596425,1 +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.025090939601491648,0.04429754960600537,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.02510595436954169,0.14478308433451992,1 +OC(=O)C(Oc1ccc(cc1C)Cl)C,0.02562363979237584,0.021707289602534444,1 +Clc1ccccc1CC(C1(Cl)CC1)(Cn1cncn1)O,0.025625059257949535,0.06836698901333005,1 +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.02574063309087087,0.10030552885026119,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.025749696789273527,0.13729423490475642,1 +CCNc1nc(NCC)nc(n1)Cl,0.026282293252075754,0.024182292204448317,1 +CC(OC(=O)C(c1ccc(cc1)Cl)(c1ccc(cc1)Cl)O)C,0.026531991066147967,0.07402452017139158,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl,0.026675554368592185,0.02013792188022764,1 +CCOC(=O)CCN(C(C)C)SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C,0.02679478797527864,0.046797017478298335,1 +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.026813159469657157,0.10298053181047498,1 +O=C(C1C(C1(C)C)C=C(Cl)Cl)OCc1c(F)c(F)cc(c1F)F,0.026942980220700186,0.07895997963188385,1 +CCOC(=O)c1ccccc1C1=c2cc(C)c(cc2=[O]c2c1cc(C)c(c2)NCC)NCC,0.027053999376946393,0.558940736971508,1 +CSCC(=NOC(=O)NC)C(C)(C)C,0.027483045022449526,0.02814269771044187,1 +Cc1nn(c(c1C(=O)c1ccc(cc1S(=O)(=O)C)C(F)(F)F)O)C,0.027599589461626675,0.01939274876574121,1 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.02763145769616919,0.016213422691065498,1 +CCOc1cc(ccc1N(=O)=O)Oc1ccc(cc1Cl)C(F)(F)F,0.02764719470135984,0.0817358759734112,1 +[O-][N+](=O)c1cc(C(=O)N)c(c(c1)[N+](=O)[O-])C,0.027758250773633555,0.14450417327731357,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(C(F)(F)F)Cl,0.02778703580061686,0.019431401179377007,1 +OC(=O)C(Oc1ccc(cc1)Oc1nc2c(o1)cc(cc2)Cl)C,0.028167056356499628,0.01657291399146401,1 +CSC(=NOC(=O)N(SN(C(=O)ON=C(SC)C)C)C)C,0.02821118623185781,0.06462052763667345,1 +CS/C(=N/OC(=O)N(SN(C(=O)O/N=C(/SC)\C)C)C)/C,0.02821118623185781,0.10857811869982233,1 +CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.028227806467376604,0.07944134555128374,1 +COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.02836244328456758,0.055364745981801994,1 +CC(N1C(=NC(C)(C)C)SCN(C1=O)c1ccccc1)C,0.02848365588181601,0.05869546609799135,1 +CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.028523647387248163,0.012656906925039336,1 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.028782768433509572,0.015746986073988292,1 +CCO[C@H]1[C@@H](OC)[C@H](O[C@@H]2C[C@@H]3[C@@H](C2)[C@@H]2C=C4[C@H]([C@@H]2CC3)CC(=O)O[C@@H](CC)CCC[C@@H]([C@H](C4=O)C)O[C@H]2CC[C@@H]([C@H](O2)C)N(C)C)O[C@H]([C@@H]1OC)C,0.028877084613265123,0.0022907490117021535,0.20238095238095238 +N#Cc1cc(Br)c(c(c1)Br)O,0.028889958940868102,0.036947076319497126,1 +CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.029112705155716945,0.13407963594673467,1 +C#CCOC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1F)Cl)C,0.029164453292198207,0.015479254307699006,1 +Fc1ccc(c(c1)c1ccc(c(c1)Cl)Cl)NC(=O)c1cn(nc1C(F)F)C,0.02921233570136655,0.07619243803311933,1 +CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.02946182933426497,0.05714513450842352,1 +CC(N1/C(=N/C(C)(C)C)/SCN(C1=O)c1ccccc1)C,0.029465850912223458,0.05807349906709352,1 +Nc1n[nH]cn1,0.029733601205328832,0.042049662082769036,0.2727272727272727 +COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.030123726579706293,0.7349282276874579,1 +Clc1c(ccc(c1N)[N+](=O)[O-])Oc1ccccc1,0.030226952270055448,0.14853625939827544,1 +CN(C(=S)SSC(=S)N(C)C)C,0.03036190470594063,0.052029910797683425,1 +OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.030365547751564796,0.04860032343819596,1 +CCNc1nc(NC(C)C)nc(n1)Cl,0.03036860603978392,0.04120360138919732,1 +COc1nc(Oc2cccc(c2C(=O)[O-])Oc2nc(OC)cc(n2)OC)nc(c1)OC.[Na+],0.030507347552487064,0.5473869319708509,1 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.03073999756536866,0.0168735264811073,1 +CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.03138138916099924,0.026593616679335002,1 +COc1nc(C)nc(n1)N(C(=O)NS(=O)(=O)c1ccccc1C(=O)OC)C,0.031614325062739264,0.24608427091801371,1 +Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.03170650329869704,0.03561255473876881,1 +Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.03185425122707191,0.04655895302624533,1 +Cc1ccc2c(c1)nc1c(n2)sc(=O)s1,0.03201059303080734,0.07026663535161724,1 +CCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC(=C)C,0.03210675757919814,0.09640323913286399,1 +Fc1ccc(cc1)NC(=O)c1cccc(n1)Oc1cccc(c1)C(F)(F)F,0.032154821211279785,0.07351158254926493,1 +CCCn1c(OCCC)nc2c(c1=O)cc(cc2)I,0.03224060518839999,0.11241236083791278,0.10810810810810811 +CC(C(=O)O)Oc1cc(Cl)c(cc1Cl)Cl,0.03228091610123117,0.028342965522937247,1 +COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1ncccc1C(F)(F)F,0.0326520524201809,0.42156125408141487,1 +CN1CN(C)CSC1=S,0.03266034652463028,0.05143501540726455,1 +CCC1CCCC(OC2CCC(C(O2)C)N(C)C)C(C)C(=O)C2C(CC(=O)O1)C1CCC3C(C1C2)CC(C3)OC1CC(C)C(C(C1OC)OC)OC,0.03269690443692089,0.12862257019212,1 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.03315774835702259,0.017722336281145168,1 +CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.03318543029523152,0.18996129431743372,1 +Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.03331771398901528,0.04900048298881555,1 +CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.03336499327732185,0.04693645636437956,1 +N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.03374687200243409,0.16793514728653292,1 +CS/C(=N/OC(=O)N(SN(C(=O)O/N=C(/SC)\C)C)C)/C,0.03385342347822937,0.10857811869982233,1 +Fc1ccc(cc1)C(c1ccccc1F)(Cn1cncn1)O,0.03385434330908588,0.03999277860738707,1 +CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.033936422812922216,0.08410265973865252,1 +CN(C=Nc1ccc(cc1C)C)C=Nc1ccc(cc1C)C,0.03408246361134649,0.06779996390642057,1 +CN(/C=N/c1ccc(cc1C)C)/C=N/c1ccc(cc1C)C,0.03408246361134649,0.056616345813375844,1 +CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.03428271152063386,0.07913702747112562,1 +ClC(C(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl)Cl,0.034377949341570596,0.32963878644864847,0.42857142857142855 +CN(C=Nc1ccc(cc1C)C)C=Nc1ccc(cc1C)C,0.034764112883573416,0.06034262025387581,1 +ClC(=CCOc1cc(Cl)c(c(c1)Cl)OCCCOc1ccc(cn1)C(F)(F)F)Cl,0.034818667907167616,0.030963599852862136,1 +O=C(C1=C(C)OCCS1)Nc1ccccc1,0.034848813981213346,0.23922425558098437,1 +CCCSP(=S)(Oc1ccc(cc1)SC)OCC,0.03566479582586673,0.0028959670740657155,1 +CCC(C(=O)OC1=C(C(=O)OC21CCCCC2)c1ccc(cc1Cl)Cl)(C)C,0.03578732146400678,0.05428588647784607,1 +CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.03634528529867737,0.0681906147697324,1 +N#CC(c1c(Cl)ccc(c1Cl)n1ncc(=O)[nH]c1=O)c1ccc(cc1)Cl,0.03679735812631385,0.04692561833297489,1 +CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.036799624938222635,0.047490155350518225,1 +C#CCOC(c1ccc(cc1)Cl)C(=O)NCCc1ccc(c(c1)OC)OCC#C,0.0369041241749624,0.14271941307426342,1 +CC(Cc1ccccc1)N,0.036980547196719206,0.1383071475607304,1 +OC(C(C)(C)C)C(n1ncnc1)Cc1ccc(cc1)Cl,0.03744148066760202,0.06917897069352194,1 +CCN(c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O)CC(=C)C,0.0375078950368263,0.14694818194557713,1 +Clc1c(O)c(Cl)c(c(c1Cl)Cl)Cl,0.037546481605565646,0.2687854579582351,1 +CC(OP(=S)(OC(C)C)SCCNS(=O)(=O)c1ccccc1)C,0.03773457509937652,0.003947387860828739,1 +Clc1ccc(cc1)c1ccccc1NC(=O)c1cccnc1Cl,0.03787805062535496,0.15771531798304803,1 +CCOC(=O)CSc1nc(nn1C(=O)N(C)C)C(C)(C)C,0.03816748004747272,0.12937755361522535,1 +OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.03828744186371015,0.017916585548049518,1 +CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.038746408312020406,0.025608931320986156,1 +OC(=O)COc1cc(Cl)c(cc1Cl)Cl,0.03914162418169542,0.10092531552039558,1 +CCOP(=S)(Oc1nn(c(n1)Cl)C(C)C)OCC,0.039841737145637234,0.009533681952914956,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(cc1Cl)OC(C(C(F)(F)F)F)(F)F,0.03990998658130422,0.09428444360947194,1 +O=C(CC(C)(C)C)OC1=C(C(=O)OC21CCCC2)c1c(C)cc(cc1C)C,0.039947970982482275,0.06756139796521358,1 +CNC(=S)S,0.04011276528748593,0.055889921503454794,1 +CCCN(c1c(cc(cc1[N+](=O)[O-])S(=O)(=O)N)[N+](=O)[O-])CCC,0.04042042788372036,0.14423663839522555,1 +CCCOC/C(=N\c1ccc(cc1C(F)(F)F)Cl)/n1cncc1,0.04049199977868229,0.013123381063449912,1 +OC1(Cn2ncnc2)C(CCC1(C)C)Cc1ccc(cc1)Cl,0.04095937862019833,0.0520426556409229,1 +CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04102878665011248,0.03101114141686048,1 +CCN(C(=O)C(=C(OP(=O)(OC)OC)C)Cl)CC,0.041042640567373466,0.005254440579764089,1 +CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.041269285481015994,0.10474606679531039,1 +CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.041276958181115306,0.02137860199550009,1 +CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.04157699893895499,0.017607383250018305,1 +Fc1ccc(c(c1)c1ccc(c(c1)Cl)Cl)NC(=O)c1cn(nc1C(F)F)C,0.04200781934177246,0.08272377649064482,1 +CN(C(=S)S[Zn]SC(=S)N(C)C)C,0.04250965492362519,0.060687914488021834,1 +Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.042646674541424644,0.04484975320989537,1 +CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04266993811611698,0.03101114141686048,1 +C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.04279938325518071,0.08305408428735889,1 +O=C(CC(C)(C)C)OC1=C(C(=O)OC21CCCC2)c1c(C)cc(cc1C)C,0.042917075351131324,0.11500417228402214,1 +ClC(C(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl)Cl,0.04297243667696324,0.32963878644864847,0.42857142857142855 +S=C1NCCCN1,0.04303491887745652,0.013843826474382414,1 +OC1(Cn2ncnc2)C(CCC1(C)C)Cc1ccc(cc1)Cl,0.043148047046675374,0.05035795195547215,1 +CC(Cc1ccc(cc1)C(C)(C)C)CN1CCCCC1,0.04326105065224025,0.008362846917186934,1 +CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.04432099700732809,0.12785635803715648,1 +Nc1ncn[nH]1,0.04460040180799325,0.042049662082769036,0.2727272727272727 +[S-]C(=S)NCCNC(=S)[S-].[Zn+2],0.04460661819584039,0.03222668255774988,1 +O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.044873074905021335,0.11984363339466098,1 +CCSC(=O)N1CCCCCC1,0.04487396262663614,0.0674393124471613,1 +CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.04519647299825149,0.01978012974291905,1 +O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.045407278177700156,0.12814432090874833,1 +ClCC=CCl,0.045958425107502164,0.020800592400871572,0.14285714285714285 +CCOC(=O)Cn1c(=O)sc2c1c(Cl)ccc2,0.046003238627999404,0.13549038572812314,1 +Nc1ccc(cc1)Cl,0.047032433723070206,0.18361498193404419,1 +CCCN(C(=O)SCC)CCC,0.047538995974292175,0.03231666295357693,1 +C[n+]1ccc(cc1)c1cc[n+](cc1)C,0.04762340359884257,0.08721751069244073,1 +CN(C(=S)SSC(=S)N(C)C)C,0.04783039657471141,0.03890835518730275,1 +S=C1NCCCN1,0.04819910832192538,0.013843826474382414,1 +CON(C(=O)Nc1ccc(cc1)Br)C,0.048243951057630914,0.04331377020795739,1 +Cc1cccc(c1O)C,0.04911414454620167,0.25122554015626974,1 +CCC(C(=O)NCc1ccccc1)Oc1ccc(c(c1)C(F)(F)F)F,0.049813316199071624,0.07230599259525354,1 +O=C(c1cccc(c1C(=O)NC(CS(=O)(=O)C)(C)C)I)Nc1ccc(cc1C)C(C(F)(F)F)(C(F)(F)F)F,0.04982487508940451,0.1722962430290261,1 +CN(C(=S)SSC(=S)N(C)C)C,0.04990997903448147,0.03027061366730548,1 +COC(=O)Nc1nc2c([nH]1)cc(cc2)Sc1ccccc1,0.050108966959550236,0.19492200103766277,1 +ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.05030195369030707,0.10208585777687137,1 +C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.05047450068604942,0.02261325719862112,1 +CCSC(CC1CC(=O)C(C(=O)C1)C(=NOCC)CCC)C,0.05056765552287047,0.11190017893703338,1 +C#CCN1C(=O)COc2c1cc(c(c2)F)N1C(=O)C2=C(C1=O)CCCC2,0.05079984353648191,0.053941765550951655,0.11764705882352941 +CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.051618595485714625,0.03602454025477674,1 +CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.05161859628615915,0.03602454025477674,1 +Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.05165383561566402,0.054611602947941554,1 +CNC(=O)Oc1cc(C)c(c(c1)C)C,0.05174850433885335,0.0778264984093653,1 +Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.051834835094095484,0.07395889559292834,1 +CNC(=O)Oc1cccc(c1)/N=C/N(C)C,0.051976062085632144,0.06722824407685239,1 +CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.0524579222415799,0.03602454025477674,1 +O=N(=O)c1ccc(c(c1)N)C,0.05257947683683445,0.22402691528565083,1 +O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.05279126047017867,0.09049118545197148,1 +CC(C(c1ccc(cc1)Cl)(Cn1ncnc1)O)C1CC1,0.05326004956767166,0.03794754117008268,1 +O=C(N/C(=N\OCC1CC1)/c1c(F)c(F)ccc1C(F)(F)F)Cc1ccccc1,0.053352320292409515,0.0645264280345271,1 +NC(=NCCCCCCCCNCCCCCCCCN=C(N)N)N,0.053436074592710235,0.05644161644026265,1 +C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.05350296944357954,0.019176634380673414,1 +COCN(c1c(CC)cccc1CC)C(=O)CCl,0.05375006811431045,0.30362184184365276,1 +C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.05492821614526029,0.15407527700390677,1 +CNC1=C(c2cccc(c2)C(F)(F)F)C(=O)C(O1)c1ccccc1,0.055204779037407746,0.186848969258529,1 +CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.05522147585284508,0.20461649895602257,1 +O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.05566064749641608,0.028872253041882734,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.05566320606558952,0.026489557558876053,1 +CCOC(=O)COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.05583516191627437,0.10425225781099558,1 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.05590140200157206,0.24111855236121182,1 +COc1nc(nc(n1)C)NC(=O)[N-]S(=O)(=O)c1cc(I)ccc1C(=O)OC.[Na+],0.05611797964648073,0.21978363812693485,1 +O=C(C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C)OCCON=C(C)C,0.05632150550177753,0.0126188830300113,1 +N#CC(c1c(Cl)ccc(c1Cl)n1ncc(=O)[nH]c1=O)c1ccc(cc1)Cl,0.056422615793681234,0.047374255673794156,1 +CNC(=O)Oc1cccc(c1)N=CN(C)C,0.056495719658295813,0.06685758904726949,1 +CCOC(=O)C(c1ccc(cc1)Cl)(c1ccc(cc1)Cl)O,0.056582904287311254,0.07321730385685384,1 +Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.05706818624978773,0.08823825917757558,1 +Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.05706818876652619,0.07955524614517077,1 +CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.05707983190600125,0.06413029578988182,1 +CCOCCN(C(=C(C)C)c1ccccc1)C(=O)CCl,0.057470413386035736,0.8644625103771973,1 +CN(C(CN1c2ccccc2Sc2c1cccc2)C)C,0.058364575374860554,0.0733243982471679,1 +CCOC(=O)CCN(C(C)C)SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C,0.05846135558242613,0.047500388629521745,1 +CC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Oc1ccc(cc1)Cl,0.05932124091140686,0.07528051740350367,1 +Nc1n[nH]cn1,0.059467202410657664,0.042049662082769036,0.2727272727272727 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.05953797389131243,0.01695755720477578,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.06009909138187043,0.028551020286732454,1 +CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.060497742776698574,0.09289647796203974,1 +CSc1nnc(c(=O)n1N)C(C)(C)C,0.060666030886662975,0.013701160159437665,0.11538461538461539 +CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.06073132568962639,0.09289647796203974,1 +C=CC=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C.C=CC=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C(=O)OC)C,0.06134969850332702,0.18835039966107628,1 +C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.0616311129759049,0.06290623779987833,1 +CNC(=O)ON=C(SC)C,0.061648442359631114,0.020019468411940664,1 +CNc1cnn(c(=O)c1Cl)c1cccc(c1)C(F)(F)F,0.06174515112035177,0.04547266109787749,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.062106180868884746,0.06886154953419707,1 +COC(=O)c1cccc(c1S(=O)(=O)NC(=O)Nc1nc(OCC(F)(F)F)nc(n1)N(C)C)C,0.062140866929396014,0.23793955635016453,1 +CCNc1nc(SC)nc(n1)NC(C)(C)C,0.06214876624755196,0.03210078098954097,1 +[O-][N+](=O)c1cc(cc(c1)[N+](=O)[O-])[N+](=O)[O-],0.06245761469536169,0.12314474995874793,1 +Clc1cc(F)c(cc1C(=O)NS(=O)(=O)N(C(C)C)C)n1c(=O)cc(n(c1=O)C)C(F)(F)F,0.06269313377509025,0.029112705155716952,0.10416666666666667 +COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.06302765174348351,0.052741117655533944,1 +C=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.06332505687516009,0.15319090542803218,1 +ClCC(=O)N(c1c(C)cccc1C)Cn1cccn1,0.06336648858092589,0.2009430958791048,1 +CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0636200517424888,0.007268854919151466,1 +ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.06389160712181856,0.05152334138082678,1 +O=C(NS(=O)(=O)c1c(C)cccc1C(=O)O)Nc1nc(OCC(F)(F)F)nc(n1)N(C)C,0.06396281173215432,0.19918021134354338,1 +CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.06423944765895072,0.6484736290157593,1 +OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.06424027322808253,0.06532519308478073,1 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccsc1C(=O)OC,0.06453419527613821,0.17678677136859147,1 +O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.06459882942614491,0.05921475394254172,1 +O=C(N(C)C)Nc1ccc(c(c1)Cl)n1nc(oc1=O)C(C)(C)C,0.06493710428214157,0.015942173623299506,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06507320207279278,0.05286631638058653,1 +CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.06546156290207059,0.04247781166837751,1 +CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.06569530810416269,0.09306700536140901,1 +Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.06597478470118634,0.0852179108472748,1 +[O-][N+](=O)NC1=NCCN1Cc1ccc(nc1)Cl,0.0664943030028045,0.0767295442612898,1 +CSc1nnc(c(=O)n1N)C(C)(C)C,0.06719929397120725,0.013701160159437665,0.11538461538461539 +CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.06773123883198195,0.13345614868935765,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06822190749765324,0.05286631638058653,1 +CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.0690593023384914,0.11750268224676094,1 +CCO/N=C(/C1=C(O)CC(CC1=O)c1c(C)cc(cc1C)C)\CC,0.06981686853252955,0.10822472760447657,1 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.06987675250196507,0.25518897842008476,1 +CSc1nnc(c(=O)n1N)C(C)(C)C,0.06999926640768805,0.013701160159437665,0.11538461538461539 +CO/N=C(/c1ccccc1COc1cc(C)ccc1C)\C(=O)NC,0.07046793589427701,0.35915435645836064,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.07154653735936956,0.02658541539960583,1 +COc1cc(OC)n2c(n1)nc(n2)S(=O)(=O)Nc1c(Cl)ccc(c1Cl)C,0.07172655770478076,0.20222775129603407,1 +C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.07190296604559293,0.06217697904984631,1 +CCN1CCN(CC1)c1cc2c(cc1F)c(=O)c(cn2C1CC1)C(=O)O,0.07234386441112595,0.37000179744555806,1 +CO/N=C(\c1ccccc1CO/N=C(/c1cccc(c1)C(F)(F)F)\C)/C(=O)OC,0.07272797449373557,0.31053460388440923,1 +CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.07305234130123987,0.03935031057771622,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.07306609422899836,0.07390850442771738,1 +O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.07395704796137248,0.05990809332390995,1 +OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.07409262028018154,0.16009035449920442,1 +ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.07465930346752149,0.16636976770957124,1 +CCCSc1ccc2c(c1)[nH]c(n2)NC(=O)OC,0.07537743365466734,0.3352032975296077,1 +Cn1nc(c(c1)C(=O)Nc1cccc2c1C1CCC2C1=C(Cl)Cl)C(F)F,0.07583481070072216,0.055676398132991405,1 +Cn1cc(c2cccc(c2)C(F)(F)F)c(=O)c(c1)c1ccccc1,0.07591497971688389,0.09032237501386235,1 +Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.0760257762657501,0.06594162430630518,1 +CNC(=O)Oc1cccc2c1cccc2,0.07752660703214034,0.12181893049236775,1 +N#C/N=C(/N(Cc1ccc(nc1)Cl)C)\C,0.07859017665904088,0.02837256498884179,1 +CC(c1cc(ccc1O)C(c1ccc(c(c1)C(C)C)O)(C)C)C,0.08001387248515598,0.4789486600597755,1 +NC(=N)NCCCCCCCCNC(=N)N,0.08102032708037427,0.2959746109899016,1 +CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.08196801536106943,0.21672697643629135,1 +Clc1cc(cnc1CNC(=O)c1c(Cl)cccc1Cl)C(F)(F)F,0.08212099927021806,0.017595171727997856,1 +O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08272375649019124,0.0611989580609177,1 +ClCCP(=O)(O)O,0.08304843107672291,2.424380344082731,1 +COC(=O)Nc1cccc(c1)OC(=O)Nc1cccc(c1)C,0.0832475217878744,0.15841056524633793,1 +COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.08332310268057162,0.011938651697814882,1 +CCCN(c1c(cc(c(c1[N+](=O)[O-])N)C(F)(F)F)[N+](=O)[O-])CCC,0.08392957349588569,0.17738314568013785,1 +ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.08430066662269543,0.21959456105712238,1 +O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08510674803234901,0.06216876566048836,1 +CCO/N=C(\C1=C(O)CC(CC1=O)C1CCCSC1)/CCC,0.08603044408485085,0.08133431394836685,1 +CCCC(=C1C(=O)CC(CC1=O)C1CCCSC1)NOCC,0.08603044408485085,0.029262026512538307,1 +CCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC(=C)C,0.08701831648543702,0.10401259406145555,1 +COc1cc(ccc1OC)/C(=C/C(=O)N1CCOCC1)/c1ccc(cc1)Cl,0.08766124641710438,0.1664019969917766,1 +CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.08778355070659401,0.02087282021134488,1 +CC(=O)Nc1cc(NS(=O)(=O)C(F)(F)F)c(cc1C)C,0.08894826507859208,1.3357335474250853,1 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,0.08947770521301585,0.09458038139224523,1 +COCC(=O)Nc1cc(ccc1NC(=NC(=O)OC)NC(=O)OC)Sc1ccccc1,0.08959030532555236,0.15753779629122847,1 +O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.08969617860069455,0.06602590336420336,1 +Nc1nc(NC2CC2)nc(n1)N,0.09026150563412319,0.10982169517930987,1 +ClC(C(c1ccc(cc1)Cl)c1ccc(cc1)Cl)(Cl)Cl,0.09027148189044054,0.042908725101945995,1 +Fc1ccc(cc1)C(=O)CCCN1CCN(CC1)c1ccccn1,0.09163218547527233,0.11110252941847366,1 +CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.09170952329114665,0.11334998639980184,1 +N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.09203781459712614,0.04994324105131357,1 +Clc1ccc2c(c1)ncc(n2)Oc1ccc(cc1)OC(C(=O)OCC1CCCO1)C,0.09210345974638111,0.0127703526163709,1 +NC(=O)c1c(Cl)cccc1Cl,0.09261856560930491,0.11495532711221508,1 +COc1cc(ccc1OC)/C(=C/C(=O)N1CCOCC1)/c1ccc(cc1)Cl,0.09281779032399287,0.1584749483368686,1 +Clc1ccc(c(c1)Cl)NC(=O)C1(CC1)C(=O)O,0.09303171987631087,0.10818304574151816,1 +COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.09345959256991566,0.23311870284543604,1 +Clc1cc(Cl)cc(c1)C1(CO1)CC(Cl)(Cl)Cl,0.09362507489225783,0.0431096794497545,1 +OC(=O)COc1ccc(cc1C)Cl,0.0947069010825298,0.0596687595644816,1 +Cc1nnc(c(=O)n1N)c1ccccc1,0.09643315995145703,0.16643101472065308,1 +[O-][N+](=O)/N=C/1\NCCN1Cc1ccc(nc1)Cl,0.0973945952590747,0.11879102876531308,1 +Nc1ccc(cc1)Cl,0.09798423692306293,0.18636555783036246,1 +Cn1cc(c2cccc(c2)C(F)(F)F)c(=O)c(c1)c1ccccc1,0.09868947363194906,0.09346150569723749,1 +COc1c(OC)cc(c(c1OC)C(=O)c1c(OC)ncc(c1C)Cl)C,0.09950572862076837,0.5022089453626634,1 +NC(=N)NCCCCCCCCCCCCOC(=O)C,0.10160268068512719,0.3976348891342203,1 +OC1CC2(O)CC(O)C(C(O2)(C)CC(C=CC=CC=CC=CCC(OC(=O)C=CC2C(C1)(C)O2)C)OC1(C)OC(C)C(C(C1O)N)O)C(=O)O,0.10172294366080416,0.3945818999004983,1 +COc1ncc(c2n1nc(n2)NS(=O)(=O)c1c(cccc1C(F)(F)F)OCC(F)F)OC,0.1034404543369562,0.17566048716063784,1 +N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.10391366164191661,0.048327010230331986,1 +CCCN(c1c(cc(cc1[N+](=O)[O-])S(=O)(=O)N)[N+](=O)[O-])CCC,0.10393824312956665,0.1243653010335676,1 +COCc1c(F)c(F)c(c(c1F)F)COC(=O)C1C(C1(C)C)/C=C/C,0.10573252781458294,0.07439706043879453,1 +[O-][N+](=O)c1cnc(n1C)C,0.10628650675790867,0.19401311372646204,1 +CC(N(c1c(cc(cc1N(=O)=O)S(=O)(=O)N)N(=O)=O)C(C)C)C,0.10642121227099519,0.21225805280325383,1 +CCOC(=O)C(OC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F)C,0.10827828411229923,0.06648436472564069,1 +CCOC(=O)C(OC(=O)c1cc(ccc1N(=O)=O)Oc1cc(ccc1Cl)C(F)(F)F)C,0.10827828411229923,0.061062681077149544,1 +S=C1NCCNC(=S)S[Mn+2]S1,0.10855557507359069,0.017281371487307694,1 +ClCC(=O)N(c1ccccc1)C(C)C,0.10865048725491992,0.09496861063144992,1 +CCNc1nc(NC(C)C)nc(n1)Cl,0.10941971287651023,0.043173870512644,1 +CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.11058877880543937,0.04678204170857044,1 +COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045196043953,0.037382248593410286,1 +COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045388522976,0.03983577416044405,1 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.11395676083924232,0.0793295655760358,1 +Oc1ccc(c(c1)C)C,0.1145996706078039,0.11410846546983064,1 +N#Cc1c(N)nc(nc1N)NC1CC1,0.11566455596376966,0.06515502205741146,1 +O=C(c1cccc(c1C(=O)NC(CS(=O)(=O)C)(C)C)I)Nc1ccc(cc1C)C(C(F)(F)F)(C(F)(F)F)F,0.115769562707734,0.1353589335978793,1 +CCNc1nc(NC(C)C)nc(n1)Cl,0.11591071091933607,0.043902127532966045,1 +COc1c(OC)cc(c(c1OC)C(=O)c1c(OC)ncc(c1C)Cl)C,0.11727460798675288,0.49714109722368416,1 +CNC(=O)O/N=C(\SC)/C,0.11836501403389492,0.027383715681877,1 +CCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])Cc1c(F)cccc1Cl,0.1185590456888386,0.09933213815884388,1 +Nc1ccc(cc1)S(=O)(=O)Nc1nc(C)cc(n1)C,0.1185642260256668,0.3465603274803372,1 +CCNC(=O)NC(=O)/C(=N\OC)/C#N,0.11857948837239812,0.15289185096526228,1 +CC(N(c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O)C(C)C)C,0.1193036069506878,0.06759688882241817,1 +COc1cc(ccc1OC)C(=CC(=O)N1CCOCC1)c1ccc(cc1)Cl,0.11937399144446861,0.1639515356772961,1 +CCCCc1c(=O)nc([nH]c1C)NCC,0.1194525860672606,0.13288110687759513,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.12010651237688001,0.01578324760281871,1 +CNC(=O)ON=C(SC)C,0.12329688471926223,0.024637179457617557,1 +CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.1242747128033579,0.022377478984048814,1 +c1coc(c1)c1nc2c([nH]1)cccc2,0.12486833177320307,0.06283286902314578,1 +CO/C=C(\c1ccccc1COc1cccc(n1)C(F)(F)F)/C(=O)OC,0.1252316956521325,0.17628948629366487,1 +CN(C(=O)C(c1ccccc1)c1ccccc1)C,0.1253592168358431,0.11980652421596152,1 +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.12545469800745823,0.042874763652812176,1 +O=C(C1=C(C)OCCS1)Nc1ccccc1,0.1274956638724717,0.22358836955729258,1 +ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.12778321424363712,0.042877873409045086,1 +O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.12919765885228982,0.05872275009746561,1 +CC(N(c1c(cc(cc1N(=O)=O)S(=O)(=O)N)N(=O)=O)C(C)C)C,0.12992280391195832,0.21225805280325383,1 +[O-][N+](=O)/N=C(\NC)/NCc1cnc(s1)Cl,0.13016764551401042,0.0664943030028045,0.10344827586206896 +CCCN(C(=O)SCC)CCC,0.13205276659525605,0.025649453338359905,1 +C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.13459866849613178,0.020586813499091246,1 +OC(C(C)(C)C)C(=Cc1ccc(cc1)Cl)n1ncnc1,0.13506940531624406,0.09513382772182304,1 +CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.13618183361575933,0.8946497962290012,1 +O=C(Nc1cnns1)Nc1ccccc1,0.13620822278144273,0.08999508813305773,1 +CCCCCCCCc1cc(N(=O)=O)c(c(c1)N(=O)=O)OC(=O)C=CC,0.1372145060102149,0.2884147588603236,1 +CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.13731668655832788,0.36134549162412616,1 +Cc1cccc2c1n1cnnc1s2,0.1373938645607217,0.4649462320529616,1 +CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.13747135609511818,0.08194730919474813,1 +CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.13801406108477293,0.02334539398625156,1 +CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.13932359364492994,0.07115407630403497,1 +CN1CC2CC1CN2c1cc2c(cc1F)c(=O)c(cn2C1CC1)C(=O)O,0.13990757146198934,0.2090948708768445,0.5862068965517241 +OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.1403669879303106,0.04327585927626025,1 +CC(OC(=O)Nc1cccc(c1)Cl)C,0.14040948460452124,0.09113750740767616,1 +COC(=O)C(N(c1c(C)cccc1C)C(=O)Cc1ccccc1)C,0.14136381415796706,0.2135515487520386,1 +C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.1413788142745837,0.10756688196876787,1 +COc1nc(C)nc(n1)N(C(=O)NS(=O)(=O)c1ccccc1C(=O)O)C,0.14421924681891674,0.26729546067178,1 +ClC(=C)Cl,0.14441434207714035,0.010177007878307786,0.1 +Clc1ccccc1CC(C1(Cl)CC1)(Cn1nc[nH]c1=S)O,0.1452393166315865,0.04660628941940032,1 +CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.14566407168203882,0.4056883000934699,1 +CC(=O)O.CCCCCCCCCCCCNC(=N)N,0.1461167287581588,0.2239202257852629,1 +CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.14653013191720715,0.05205345386490815,1 +CON=C(c1ccccc1CON=C(c1cccc(c1)C(F)(F)F)C)C(=O)OC,0.14692519722320194,0.2857188922342546,1 +c1ccc(cc1)Nc1ccccc1,0.14773454395291782,0.20378273649665343,1 +COC(CCCC(CC=CC(=CC(=O)OC(C)C)C)C)(C)C,0.14816176662421726,0.7514525775875477,1 +c1scc(n1)c1nc2c([nH]1)cccc2,0.1490700414533971,0.04999034581341369,1 +CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.14982590230152565,0.48891072826169246,1 +N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.15042627044387033,0.07089396189028405,1 +ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.15174119992085178,0.24009429524650522,1 +CON=C(c1ccc(cc1Cl)Cl)Cc1cccnc1,0.15245767876475944,0.07528095892566167,1 +CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,0.15252975563710267,0.22465071517314014,1 +CCNC(=O)NC(=O)C(=NOC)C#N,0.15289185096526225,0.13464708317401125,1 +Clc1ccc(c(c1)Cl)C=C(C(C(C)(C)C)O)n1cncn1,0.15327033840680634,0.08718428281810346,1 +COC=C(c1ccccc1Oc1ncnc(c1)Oc1ccccc1C#N)C(=O)OC,0.15431812608561873,0.18865576740539436,1 +COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.15479245019392282,0.23311870284543604,1 +OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.15527684755838006,0.019604411057235267,1 +COP(=S)(Oc1cc(Cl)c(cc1Cl)Cl)OC,0.15549919159080278,0.014323208545850756,1 +Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.15801925188118618,0.19454416629038565,1 +CCOC(=O)CN(c1c(CC)cccc1CC)C(=O)CCl,0.1603572605822803,0.18455943460404955,1 +CC(OC(=O)Nc1ccccc1)C,0.16181616210899355,0.4576296267631658,1 +Cc1cccc2c1n1cnnc1s2,0.16381576159162972,0.4344528846227351,1 +C#CC(NC(=O)c1cc(Cl)cc(c1)Cl)(C)C,0.16593276232681306,0.06064270880188062,1 +CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.16647322477947293,0.41085861196208356,1 +CNc1cnn(c(=O)c1Cl)c1cccc(c1)C(F)(F)F,0.1687700797289615,0.04724600485885422,1 +O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.16893203350457175,0.0828914722158967,1 +CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.16929970598735858,0.07890908052701984,1 +Cc1ccc(c2c1cccc2)C,0.1728291127183792,0.12707622740780478,1 +Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.1730416993562668,0.024541035827570765,1 +COC(=O)c1ccc(cc1C1=NC(C(=O)N1)(C)C(C)C)C,0.1734054330003024,0.32766976057445574,1 +CNC(=O)N(c1nnc(s1)C(C)(C)C)C,0.1751969016077557,0.15654611790372291,0.4 +CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.17526912017369997,0.25559916952159206,1 +CCCCCCCCc1cc(N(=O)=O)c(c(c1)N(=O)=O)OC(=O)C=CC,0.17563456769307506,0.2542481616501888,1 +CCCCCCC(c1cc(cc(c1OC(=O)/C=C/C)[N+](=O)[O-])[N+](=O)[O-])C,0.17563456769307506,0.26463567647786024,1 +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.17607780933998252,0.08322210554037121,1 +CCCCCCCCCC[N+](CCCCCCCCCC)(C)C.[Cl-],0.1767583631976715,0.777859424387322,1 +CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.17813968959673715,0.15669364902191532,1 +Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.17867678986550448,0.05298126176351619,1 +N#Cc1cc(C)c(c(c1)C(=O)NC)NC(=O)c1cc(nn1c1ncccc1Cl)Br,0.17901230859828976,0.02537036919235666,1 +COCC(=O)N(c1c(C)cccc1C)N1CCOC1=O,0.17965983350851364,0.11870027623897902,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.18015976856532,0.014790752702768572,1 +CC1=NNC(=O)N(C1)/N=C/c1cccnc1,0.18091653347462547,0.6560215843907876,1 +c1ccc(cc1)Nc1ccccc1,0.1831908345016181,0.2623047551120752,1 +CN1CN(C)CSC1=S,0.18486987933542975,0.02964017440587401,1 +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.18534506246313948,0.0688130158812099,1 +O=N(=O)c1ccc(c(c1)N(=O)=O)C,0.1866762157041476,0.11487634907046192,1 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.19051986050321804,0.13616466229510565,1 +COP(=O)(NC(=O)C)SC,0.1910836440808347,0.01603420284847195,1 +N#CCNC(=O)c1cnccc1C(F)(F)F,0.19244308898713228,0.4221312557184933,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.19351406453661255,0.057013512580576416,1 +CCCCCCC(c1cc(cc(c1OC(=O)/C=C\C)[N+](=O)[O-])[N+](=O)[O-])C,0.19484459853450517,0.16294718993200522,1 +OC1CN(C(=O)N1c1nnc(s1)C(C)(C)C)C,0.19506513302817866,0.17519690160775567,0.4 +OC(=O)C(Cl)(Cl)C,0.1970361896096669,0.42676977936996974,0.125 +O=c1nc(N(C)C)n(c(=O)n1C1CCCCC1)C,0.19816672003956992,0.503640251987437,0.16666666666666666 +c1scc(n1)c1nc2c([nH]1)cccc2,0.19876005527119617,0.07124290059304189,1 +CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.20057118462494436,0.08323848349319968,1 +Nc1ccc(c(c1)N)O,0.2013846888993215,0.8190054209287416,1 +C=Cc1ccccc1,0.20163396483810905,0.4669300291718144,1 +Cn1nc(c(c1)C(=O)Nc1ccccc1C1CC1C1CC1)C(F)F,0.2021971466240455,0.050901264305063164,1 +O=C(NS(=O)(=O)c1ccccc1C(=O)OC1COC1)Nc1nc(C)cc(n1)C,0.20422574060250331,0.3857400812695065,1 +CO/C=C(\c1ccccc1Oc1ncnc(c1)Oc1ccccc1C#N)/C(=O)OC,0.20427010160523304,0.19465114260586,1 +ClC=C(c1cc(Cl)c(cc1Cl)Cl)OP(=O)(OC)OC,0.20493941143914957,0.017923200540319623,1 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.2098341392275743,0.13665038085545952,1 +ClCC(=O)N(c1c(CC)cccc1CC)CNC(=O)C,0.21058487877925733,0.17555052706620422,1 +O=C1CCCC(=O)C1C(=O)c1ccc(cc1Cl)S(=O)(=O)C,0.2189994026791292,0.002847075141623806,1 +CC(c1ccc(cc1)O)(c1ccc(cc1)O)C,0.21902317939829427,0.7987528952107646,1 +OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.21976935578028234,0.07219258757735694,1 +CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.2198425631222415,0.057817340618821475,1 +ClCC(=O)N(c1c(C)cccc1CC)COC(C)C,0.22199225860138957,0.0986751773034067,1 +COCC(=O)N(c1c(C)cccc1C)C(C(=O)OC)C,0.22374845318219344,0.2272789169439581,1 +Nc1ccc2c(c1)nc1c(c2)ccc(c1)N,0.22461542255370148,0.5127318005761181,1 +Nc1cnn(c(=O)c1Cl)c1ccccc1,0.2255879747532767,0.06819037903102541,1 +CNC(=O)Oc1ccccc1OC(C)C,0.22939978025412716,0.05128012589016116,1 +CC(CC(c1sccc1NC(=O)c1cn(nc1C(F)(F)F)C)C)C,0.23093421710838027,0.09605024222468138,1 +CCNC(=O)C(OC(=O)Nc1ccccc1)C,0.23278744254805916,0.31163505026311,1 +Clc1c(ccc(c1N)[N+](=O)[O-])Oc1ccccc1,0.23425888009292972,0.15238507364192602,1 +Cl/C=C/CO/N=C(\C1=C(O)CC(CC1=O)CC(SCC)C)/CC,0.2389478027971563,0.253664493846163,1 +CCSC(CC1CC(=O)C(=C(NOCC=CCl)CC)C(=O)C1)C,0.2389478027971563,0.25672688702175106,0.18181818181818182 +CNC(=O)Oc1ccccc1OC(C)C,0.23895810443138246,0.0540412050930721,1 +CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.23935747721355113,0.2948426093660432,0.25 +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.24799169923196304,0.07857893254252615,1 +C=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.24800936112986982,0.171834951313278,1 +OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.24848916516834604,0.3714104972948829,1 +CCc1cc(C)cc(c1c1c(OC(=O)C(C)(C)C)n2n(c1=O)CCOCC2)CC,0.24968092026794356,0.07242815295555487,1 +CC1OC(C)OC(OC(O1)C)C,0.249701719945447,0.7175892491582392,0.25 +[S]C(=S)NCCNC(=S)S[Mn],0.2525424903682367,0.03648895915666588,1 +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.2557761861991325,0.07996202101917224,1 +Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.25917417547047744,0.07402684850899094,1 +Fc1ccc(cc1)Oc1ccnc2c1c(Cl)cc(c2)Cl,0.25962686686321285,0.058384643171399436,1 +CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.2690918752347788,0.1149002721824295,1 +Nc1cnn(c(=O)c1Cl)c1ccccc1,0.270705569703932,0.0723832343431398,1 +OCC(CCl)O,0.27139624684320934,8.18897175780498,1 +CCSC(CC1CC(=O)C(=C(NOCC=CCl)CC)C(=O)C1)C,0.27784628232227476,0.18075913870978183,1 +CC(CC(c1sccc1NC(=O)c1cn(nc1C(F)(F)F)C)C)C,0.2782339965161208,0.09605024222468138,1 +COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1ncccc1S(=O)(=O)CC,0.2804534946915948,0.6913408558790916,1 +OC(=O)COc1ccc(cc1Cl)Cl,0.28049546719325014,0.041836193207807806,1 +CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.2805209905967611,0.03936504164521463,1 +C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.2853292217012047,0.07235320420788187,1 +Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.2879713060783083,0.0672060941474649,1 +CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.2921073325236663,0.02307472490573678,1 +CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.29547465787728056,0.3350153436602428,1 +CNC(=O)Oc1cccc2c1cccc2,0.2981792578159244,0.12885391431706575,1 +COC(=O)c1ccccc1S(=O)(=O)NC(=O)Nc1nc(OC(F)F)cc(n1)OC(F)F,0.2989300503468667,0.4021371024540237,1 +CNC(=O)Oc1cccc2c1cccc2,0.2991731924668564,0.11663521010507076,1 +CN(C(=S)[S-])C.CN(C(=S)[S-])C.CN(C(=S)[S-])C.[Fe+3],0.30012414094866885,0.03664917430602502,1 +CCOC(=O)c1ccccc1S(=O)(=O)NC(=O)Nc1nc(Cl)cc(n1)OC,0.30133493788161053,0.4908814012760744,1 +Fc1ccc(c(c1)F)NC(=O)c1cccnc1Oc1cccc(c1)C(F)(F)F,0.3033262936121485,0.08329374437288468,1 +CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.3034972489425892,0.1275872230209465,1 +NC1CC(N=C(C(=O)O)N)C(OC1OC1C(O)C(O)C(C(C1O)O)O)C,0.3057757345866624,2.5745022532058908,1 +CNC(=O)Oc1cc(C)cc(c1C)C,0.30635114568601185,0.06817593429629851,1 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.31203800675365617,0.10672040100126456,1 +ClCC(=O)N(c1c(C)cccc1C)Cn1cccn1,0.31323206744613685,0.21419826576830403,1 +C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.316253365684832,0.15407527700390677,1 +OC(=O)CCl,0.317470328693963,0.38399352507880175,1 +ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.3260262207586085,0.07976114599708196,1 +CCCCCCC(c1cc(cc(c1OC(=O)/C=C\C)[N+](=O)[O-])[N+](=O)[O-])C,0.3315102548955885,0.15393856181627197,1 +ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.3326798171006209,0.07976114599708196,1 +CN(C1C(=O)C(=C(O)N)C(=O)C2(C1CC1C(=C(O)c3c(C1(C)O)cccc3O)C2=O)O)C,0.33750750616693714,7.376190802377752,1 +OC(=O)COc1ccc(cc1Cl)Cl,0.33930903289506065,0.04146720868611565,1 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)O)[N+](=O)[O-])C(F)(F)F,0.34563108073944815,0.09371047325644374,1 +CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.3484961885063573,0.1283708489726044,1 +OC(=O)C(Cl)(Cl)C,0.3497269961122948,0.42676977936996974,0.125 +Fc1ccc(cc1)C(=O)CCCN1CCN(CC1)c1ccccn1,0.35125671098854394,0.1949888828070151,1 +N=C(NC(=N)N)NCCc1ccccc1,0.35564719019232227,0.2989014799552515,1 +COc1ccc(cc1)C(C(Cl)(Cl)Cl)c1ccc(cc1)OC,0.36163948246786254,0.12070782067019675,1 +Cc1cc(C)cc(c1)C(=O)N(C(C)(C)C)NC(=O)c1ccc2c(c1C)CCCO2,0.3678012132205545,0.23474791298621292,1 +CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.36852210915226874,0.24365961404385508,1 +Cc1nnc(c(=O)n1N)c1ccccc1,0.36891864539658303,0.1923581389323803,1 +CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.3693416417277341,0.17187893661334366,1 +O=C(Nc1ccnc(c1)Cl)Nc1ccccc1,0.37548404132262436,0.07959376639892965,1 +O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.3880867710275115,0.06307401130325499,1 +COC(=O)Nc1nc2c([nH]1)cccc2,0.3922867840256219,0.11674597711463099,1 +CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.39446112244793224,0.2948426093660432,0.25 +CO/N=C(\c1ccccc1Oc1ncnc(c1F)Oc1ccccc1Cl)/C1=NOCCO1,0.39448424715427566,0.09988973974944607,1 +N#Cc1ccc(cc1)C/C(=N/NC(=O)Nc1ccc(cc1)OC(F)(F)F)/c1ccc(cc1)C(F)(F)F,0.394944816927872,0.055871495745498795,1 +CCCCNC(=O)n1c(NC(=O)OC)nc2c1cccc2,0.3961177430023906,0.18977813725506376,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.4023390123323988,0.14297640125707387,1 +O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.41719152837532353,0.07091709112656906,1 +Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.423248605734443,0.08300342479617458,1 +NCCNc1cccc2c1cccc2,0.4241543329029509,0.2252360875684565,1 +CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.42802021191337764,0.06014092276203531,1 +CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.43361266621389954,0.13385739744024536,1 +CO/C=C(\c1ccccc1COc1cccc(n1)C(F)(F)F)/C(=O)OC,0.4410333629488144,0.18118444726245583,1 +N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.443217671652664,0.1779144876306056,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1F)Oc1ccc(cc1Cl)C(F)(F)F,0.4460202371248177,0.07230910035578181,1 +CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.4534134152107278,0.04034944223996247,1 +N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.4553054263341003,0.17957314863237633,1 +CCSC(=O)N(CC(C)C)CC(C)C,0.4600420791288938,0.030069785449812297,1 +CSC(=O)c1cccc2c1snn2,0.4608228380460223,1.504300581444509,1 +Cc1cc(N)c(cc1C)C,0.46595489467866197,0.053899581356752935,1 +COCN(c1c(CC)cccc1CC)C(=O)CCl,0.4670695574071115,0.30055949596754833,1 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)[O-])[N+](=O)[O-])C(F)(F)F.[Na+],0.46919094173712006,0.06702828193704673,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1F)Oc1ccc(cc1Cl)C(F)(F)F,0.4705718098105875,0.08241832742445686,1 +Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.47403843842257615,0.0740572717714695,1 +ClCC[N+](C)(C)C.[Cl-],0.47447507557122687,2.3396956191028,1 +Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.4774244272684517,0.07406861996239202,1 +CC1OC(C)CN(C1)C1CCCCCCCCCCC1,0.48316627385722294,0.06345349249676542,1 +CCCCc1c(C)nc(nc1OS(=O)(=O)N(C)C)NCC,0.4930161419173511,0.04492074603746372,1 +CN1N(C)C(CC1c1ccccc1)c1ccccc1,0.49533572071941767,0.13524048454968857,1 +OC(=O)C(Oc1cccc(c1)Cl)C,0.4984573741185779,0.027013763409090032,1 +COC(=O)C(NC(=O)C(CC(=O)O)N)Cc1ccccc1,0.4994850207500349,0.5695813389794603,1 +O=c1[nH]c2CCCc2c(=O)n1C1CCCCC1,0.503640251987437,0.20429718941494676,1 +CCOC(=O)OC1=C(C(=O)NC21CCC(CC2)OC)c1cc(C)ccc1C,0.5061016308843888,0.08215925774965492,1 +CCOC(=O)OC1=C(C(=O)NC21CCC(CC2)OC)c1cc(C)ccc1C,0.5061016308843888,0.06592558590239668,1 +N#Cc1c[nH]cc1c1cccc(c1Cl)Cl,0.5061481392686851,0.1482202723687745,1 +CCCOC(=O)NCCCN(C)C.Cl,0.5072793699625824,0.5109577674385912,1 +CCOc1nc(nc(n1)NC)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.5116896474609399,0.2846622352597012,1 +CCCCC(COC(=O)c1ccccc1C(=O)OCC(CCCC)CC)CC,0.5120902983161549,0.36436394331206917,1 +COc1c(Cl)ccc(c1C(=O)O)Cl,0.520273850439093,0.2583948966616764,1 +CCOc1cc(ccc1C1COC(=N1)c1c(F)cccc1F)C(C)(C)C,0.5202976892967504,0.04918878971844987,1 +COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.5285529966699751,0.07490420627303267,1 +O=CCC1CC(C)C(=O)C=CC(=CC(C(OC(=O)CC(C(C1OC1(C)OC(C)C(C(C1O)N(C)C)OC1(C)OC(C)C(C(C1)(C)O)O)C)O)CC)COC1OC(C)C(C(C1OC)OC)O)C,0.5295750507618869,0.3246343630441296,1 +[O-][N+](=O)/N=C\1/N(Cc2cnc(s2)Cl)COCN1C,0.5313410671453993,0.09739459525907473,0.13333333333333333 +COC(=O)C1(O)c2cc(Cl)ccc2c2c1cccc2,0.546052144921948,0.05087196650796755,1 +COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)Cl,0.5465743293153008,0.10739545351005231,1 +CC(C12CCC(O2)(C(C1)OCc1ccccc1C)C)C,0.5466515334085721,0.08211852030367763,1 +Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.5473855891134007,0.05943549532064574,1 +Oc1ccc2c(c1N=Nc1ccccc1)ccc(c2)S(=O)(=O)O,0.5482080783455129,1.478344656846752,1 +ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.5494924735209582,0.14729599082809905,1 +[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.5651787298028309,0.13483685160949663,1 +Nc1ccc(c(c1)N(=O)=O)N,0.5681125108300529,0.35354400012085735,1 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCCl,0.5690227874227859,0.0997282984823185,1 +NC1CCCCC1,0.5898716318329822,0.09220486462697597,1 +COC(=O)c1csc(c1S(=O)(=O)NC(=O)n1nc(n(c1=O)C)OC)C,0.5993972829782238,0.974697517038119,1 +COc1cc(Cl)c(cc1Cl)OC,0.6037074787089276,0.10235720239559151,1 +NC1CCCCC1,0.6049965454697254,0.10993035550045605,1 +COCCN(c1c(C)cccc1C)C(=O)CCl,0.6139034987494355,0.1286461396675361,1 +OC(=O)C1C2CCC(C1C(=O)O)O2,0.6177415369409439,0.43214806325138994,1 +CN(C(=O)Nc1ccc(cc1)Cl)C,0.6292491939569526,0.05347112024655942,1 +COc1c(OC)cc(c(c1OC)C(=O)c1c(OC)ccc(c1C)Br)C,0.6352758309016929,0.11898261682472457,1 +OC1(Cn2ncnc2)/C(=C/c2ccc(cc2)Cl)/CCC1(C)C,0.6406279100538178,0.07245691711531918,1 +COC(=O)c1ccc(cc1)C(=O)OC,0.6437193589585136,0.6869017557459655,1 +Clc1ccc(cc1)S(=O)(=O)c1cc(Cl)c(cc1Cl)Cl,0.6459733503975151,0.041979418428373126,1 +CC(OC(=O)NC(C(=O)NC(c1nc2c(s1)cc(cc2)F)C)C(C)C)C,0.6543197874203039,0.11406505535619535,1 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.655542030995076,0.13112768667837227,1 +CCCOC(=O)NCCCN(C)C.Cl,0.6674728552139242,0.5109577674385912,1 +CCCCOCC(OCC(O)C)C,0.6726932978936081,0.5751760289817799,1 +ClCC(=O)N(c1c(C)cccc1CC)COC(C)C,0.7047373288933002,0.09958077993964556,1 +CC1OC(C)OC(C1)OC(=O)C,0.7175892491582392,0.1800593102021387,0.25 +[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.7245881151318344,0.16130841961480835,1 +CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.7386866446932013,0.3350153436602428,1 +COc1nc(nc(c1)OC)NC(=O)NS(=O)(=O)Cc1ccccc1C(=O)OC,0.7529208210920754,0.5028214604937333,1 +O=C(C1C(C1(C)C)C=C(C)C)OCN1C(=O)C2=C(C1=O)CCCC2,0.7543614918373561,0.10396626070058967,1 +CCOc1nc(F)cc2n1nc(n2)S(=O)(=O)Nc1c(Cl)cccc1C(=O)OC,0.7561469746838736,0.2345262145021008,1 +COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)Cc1ccccc1C(=O)O,0.7795645307119917,0.5813782889511574,1 +Clc1ccc(c(n1)C(=O)O)Cl,0.7812519531298828,0.3047209470891338,1 +COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.7817895162025876,0.48006440533268346,1 +ClCC[N+](C)(C)C.[Cl-],0.7907917926187115,2.3566838959846437,1 +COC(CCCC(C/C=C/C(=C/C(=O)OC(C)C)/C)C)(C)C,0.8052269925229198,0.7514525775875477,1 +CCOc1cc(ccc1OCC)NC(=O)OC(C)C,0.8241033622809132,0.3090825648890777,1 +OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.8282972172278201,0.4720460499425296,1 +Nc1ccc(c(c1)C)NOS(=O)(=O)O,0.8431459792705229,0.3729699848772478,1 +CCOC(=O)C1OC1(C)c1ccccc1,0.8485352051922984,0.15204782183286927,1 +ClCC[N+](C)(C)C.[Cl-],0.860381470369158,2.3566838959846437,1 +CCCCNC(=O)n1c(NC(=O)OC)nc2c1cccc2,0.8611255282660666,0.1627783573692901,1 +OCCn1c(C)ncc1[N+](=O)[O-],0.8764039114257128,0.07062719125960476,1 +ClCCP(=O)(O)O,0.9066120392542251,2.424380344082731,1 +COP(=O)OC,0.9086866261501474,0.0020899832476404022,0.1 +Nc1nc(NC2CC2)nc(n1)N,0.9387196585948812,0.10982169517930987,1 +OCCNc1ccc(cc1OCCO)N(=O)=O,0.9453881078267568,0.9356572196349415,1 +O=N(=O)c1cccc2c1cccc2,0.952831491808421,0.19375245039704106,1 +O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.9662594125910484,0.42312539665837845,1 +Oc1cccc2c1nccc2,0.9851335765350275,0.5509106089207596,1 +CCCOC(=O)c1ccc(cn1)C(=O)OCCC,0.9949124950582696,1.1075161098582462,1 +Oc1noc(c1)C,0.9991119005328597,4.460830164062197,0.1 +CC[N](=C1C=CC(=C(c2ccc(cc2)N(Cc2cccc(c2)S(=O)(=O)O)CC)c2ccc(cc2)N(C)C)C=C1)Cc1cccc(c1)S(=O)(=O)O,1.009963174498295,0.18540142003081284,1 +OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,1.0353715215347752,0.3638306361396559,1 +ClCC[N+](C)(C)C,1.0602168942789227,0.9100716288762254,1 +Clc1ccccc1,1.0661274430976688,0.09929943773759063,1 +CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,1.0897268363577188,0.024525489375934365,1 +O=C1CCCCCN1,1.10465364954589,0.7540424650828417,1 +Cc1cc(C)nc(n1)Nc1ccccc1,1.1091497729605546,0.12083480234381865,1 +COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,1.1154252951100516,0.2559914298530539,1 +COC(=O)C(=CC=CC(=CC=CC=C(C=CC=C(C=CC1=C(C)CCCC1(C)C)C)C)C)C,1.119409718240544,0.022228348031877943,1 +[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,1.159340984210935,0.15334536959372352,1 +Oc1ccc(cc1Cl)C(C)(C)C,1.1697007223226876,0.10384161634159869,1 +Oc1ccccc1c1ccccc1,1.1750384237564568,0.20553569827566362,1 +COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)Nc1ccccc1C(=O)N(C)C,1.1780461209768547,0.42683956236105325,1 +CON=C(c1ccccc1COc1ccccc1C)C(=O)OC,1.1807966969350603,0.3457416736197563,1 +CCCOc1nn(c(=O)n1C)C(=O)[N-]S(=O)(=O)c1ccccc1C(=O)OC.[Na+],1.1894202967675005,0.7650789414872524,1 +CO/N=C(\c1ccccc1COc1ccccc1C)/C(=O)OC,1.1967534090558043,0.34191903724770456,1 +CON=C(c1ccccc1COc1ccccc1C)C(=O)OC,1.1967534090558043,0.3631304872247199,1 +OC(=O)Cc1cccc2c1cccc2,1.205650068257516,0.17731115397814587,1 +OCc1cc(N=Nc2ccc(c3c2cccc3)S(=O)(=O)O)c(c(c1O)N=Nc1ccc(c2c1cccc2)S(=O)(=O)O)O,1.2093346835379808,1.490670783637784,1 +CC1=CC(=O)CC(C1)(C)C,1.295160023171064,0.11315631785675133,1 +COC(=O)Nc1nc2c([nH]1)cccc2,1.3076226134187396,0.083049663729908,1 +C[N]1(C)CCCCC1,1.3133857473480115,0.44032207102935567,1 +OC1CCC2(C(C1)CCC1C2CCC2(C1CCC2C(CCC(=O)O)C)C)C,1.3277652171188237,0.8209063397614011,0.21052631578947367 +COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)N(S(=O)(=O)C)C,1.3411855059279478,0.4773030778075665,1 +Oc1ccc(c(c1)C(C)(C)C)O,1.3536524792656537,0.2280341915527015,1 +COc1ncc(c2n1nc(n2)S(=O)(=O)Nc1c(F)cccc1F)F,1.391657397996453,0.18238639424428663,1 +OCC1OC2OC3C(CO)OC(C(C3O)O)OC3C(CO)OC(C(C3O)O)OC3C(CO)OC(C(C3O)O)OC3C(OC(OC4C(OC(OC5C(OC(OC1C(C2O)O)C(O)C5O)CO)C(O)C4O)CO)C(O)C3O)CO,1.4097112541302337,1.9926642160138068,1 +O=C(Nc1cc(F)cc(c1)F)N/N=C(/c1ncccc1C(=O)O)\C,1.4120001283962829,0.4813337208316472,1 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,1.4316432834082535,0.08660753713188539,1 +CCCCOC(=O)c1ccccc1C(=O)OCc1ccccc1,1.504675539130048,0.2949966180640334,1 +COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,1.5061863289853148,0.7244629133093408,1 +Fc1cc2CCC(n3c2c(c1)c(=O)c(c3)C(=O)O)C,1.531109972815908,0.23409347986251686,1 +CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,1.5465050300849357,0.10785437781127803,1 +CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,1.5854670852219546,0.600658613403707,1 +CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,1.605986191473768,0.8661050233290373,1 +c1ccc(cc1)c1ccccc1,1.6211890708511503,0.19623937242557016,1 +NCC(c1ccc(cc1)O)O,1.6320834707547616,0.7275430591610657,1 +ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,1.6860133324539086,0.2279278063389893,1 +CCc1c(C(=O)O)c(=O)cnn1c1ccc(cc1)Cl,1.6864553664875628,0.06988049924178537,1 +ClCC#CCOC(=O)Nc1cccc(c1)Cl,1.743505808935165,0.17838785643679636,1 +O/C(=C\1/C(=O)CC(CC1=O)C(=O)O)/C1CC1,1.752821172367082,1.8740405032498018,0.35294117647058826 +OC(=O)CNCP(=O)(O)O,1.7743806406081915,0.700841565636653,0.16666666666666666 +C[N+]1(C)CCCCC1.[Cl-],1.790706021930536,0.39535589379894426,1 +COc1ccc(c(c1)OC)N,1.8018201517132568,0.3052747819868152,1 +CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,1.8050858655278421,0.6950927673017638,1 +CCC(=O)C1=C([O-])CC(CC1=O)C(=O)[O-].[Ca+2],1.874040503249802,0.7843481454913989,1 +CC(C1(C)N=C(NC1=O)c1ncccc1C(=O)O)C,1.913681483026602,0.5005571515667719,1 +OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,1.9605490478397496,0.13193009603279973,1 +Clc1ccc(cc1)Cl,2.0407891160090657,0.059940824641567726,1 +ClCCP(=O)(O)O,2.062369371738619,2.424380344082731,1 +CCCCOC(=O)c1ccccc1C(=O)OCCCC,2.1556100397968727,0.3306107423417943,1 +Clc1ccccc1CC(C1(Cl)CC1)(Cn1nc[nH]c1=S)O,2.178589749473798,0.04476029229671277,1 +c1ccc(cc1)c1ccccc1OCC1CO1,2.209744922072461,0.8308076307932614,1 +ClCC[N](C)(C)C,2.2427665071284903,0.7132413319358359,1 +COc1cc(OC)n2c(n1)nc(n2)NS(=O)(=O)c1c(OC)nccc1C(F)(F)F,2.302288500094267,0.07863543399156463,1 +CC=Cc1ccc(cc1)OC,2.3211612715861247,0.57509958551583,1 +CC(OC(=O)Nc1cccc(c1)Cl)C,2.340158076742021,0.09379184389027877,1 +COC(=O)c1ccccc1O,2.366127776683809,0.40910477089720465,1 +CO/N=C(\c1ccccc1COc1ccccc1C)/C(=O)OC,2.4002085592886893,0.30308910509520615,1 +CCOC(=O)C=C,2.477130986890983,0.07252011054930413,1 +COC(=O)CC(c1ccc(cc1)Cl)NC(=O)C(C(C)C)NC(=O)OC(C)C,2.5070128670931195,0.1392906426524743,1 +COc1nn(c(=O)n1C)C(=O)NS(=O)(=O)c1ccccc1OC(F)(F)F,2.5233463155295692,0.17079237831250552,1 +CCc1ccccc1,2.741016342485753,0.10816844160527843,1 +CC(c1ccccc1)C,2.7539366734341955,0.10052257452618389,1 +COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1n2ccccc2nc1S(=O)(=O)CC,2.7556956072872962,0.47934744084043035,1 +CC(=C)C(=O)O,2.8807316686731115,9.313172081918692,0.14285714285714285 +CC(N(c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O)C(C)C)C,2.982590173767195,0.06936563821486286,1 +CN(NC(=O)CCC(=O)O)C,3.0342556221759884,0.4994850207500349,0.13043478260869565 +CCCOC(=O)NCCCN(C)C.Cl,3.0347765817059753,0.4951806205403354,1 +Oc1ccccc1c1ccccc1,3.119727015073393,0.20208034019115165,1 +Clc1cnc2c(c1)ccc(c2C(=O)O)Cl,3.127347059508829,0.22941129754989037,1 +C=Cc1ccccc1,3.2885539503358263,0.40610034701537245,1 +OC(=O)CNCP(=O)(O)O,3.3121771958019575,0.700841565636653,0.16666666666666666 +Cc1cnc2c(c1)ccc(c2C(=O)O)Cl,3.3387517363764943,0.12532539229532583,1 +CCCOC(=O)NCCCN(C)C,3.611885866531256,0.6205586301556104,1 +CCOP(=O)O,3.6347465046005896,0.0037493977240957736,1 +Oc1ccccc1,3.655248831064175,0.4577004151346716,1 +[O-]P(=O)OCC.[O-]P(=O)OCC.[O-]P(=O)OCC.[Al+3],3.6853523538557287,0.0036565143470414633,1 +CC1CCC(C(C1)O)C(C)C,3.7948308388559964,7.0995880579775275,1 +CCc1ccccc1,3.843074459567654,0.10475847999476977,1 +CC(c1ccccc1)C,3.8438632722857955,0.10445904296431191,1 +COc1ccc(cc1)N,3.8488877932280037,0.316595477102011,1 +OCCO,4.027850816139244,2.4679094429571533,1 +CCCCC(COC(=O)CCCCC(=O)OCC(CCCC)CC)CC,4.047856676081442,0.32235006632991803,1 +CCCOC(=O)c1cc(O)c(c(c1)O)O,4.071644352421931,0.8719186417792422,1 +CC(CCCC1(C)CCc2c(O1)c(C)c(c(c2C)OC(=O)C)C)CCCC(CCCC(C)C)C,4.230630449818821,1.228920788624059,1 +COc1ccc(cc1N=Nc1c(O)c(cc2c1cccc2)C(=O)Nc1cccc(c1)N(=O)=O)N(=O)=O,4.308389780762046,0.29938908034808137,1 +Oc1ccc(nn1)O,4.460830164062196,0.9991119005328597,0.1 +S=c1sc2c([nH]1)cccc2,4.484270077422418,0.08651938034196377,1 +C[N+]1(C)CCCCC1.[Cl-],4.570309399255547,0.4822129846769693,1 +CC(OC(=O)Nc1cccc(c1)Cl)C,4.680316153484042,0.09870482366830721,1 +Clc1cc(N)c(c(n1)C(=O)O)Cl,4.830587434212229,0.3382938372048412,1 +COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1ncccc1C(=O)N(C)C,5.08765706618306,0.7202216754770163,1 +OC(=O)CNCP(=O)(O)O,5.559726007239,0.700841565636653,0.16666666666666666 +Oc1ccccc1c1ccccc1,5.875192118782284,0.2034781041782699,1 +OC(=O)CNCP(=O)(O)O,5.914602135360638,0.700841565636653,0.16666666666666666 +CCOc1ccc(cc1N)NC(=O)C,6.1010029534002825,0.21386700637051745,1 +Nc1ccc(cc1)O,6.286318149278613,0.5281758108186807,1 +NC(=S)NNC(=S)N,6.303842268414009,0.009908374299988254,0.14285714285714285 +NC(=O)c1cnccn1,6.408762052980724,0.08275360610326041,1 +OCCO,6.44456130582279,2.4679094429571533,1 +OC(=O)c1ccc(cc1N)N(=O)=O,6.506215164982792,0.3198789458458921,1 +Oc1cc(O)c2c(c1)oc(c(c2=O)O)c1ccc(c(c1)O)O,6.729846937340625,0.7372084952076587,1 +OC(=O)CNCP(=O)(O)O,7.180326992327815,0.700841565636653,0.16666666666666666 +ClCC(=O)c1ccc(cc1)NC(=O)C,7.465334624174738,0.1673576108507557,1 +O=C1OC(=O)c2c1cccc2,8.000509872156579,0.5274835013205029,1 +CCCOC(=O)c1ccc(cc1)O,8.324062177858794,0.6944698273376053,1 +OCC(C1OC(=O)C(=C1O)O)O,8.82332300652517,3.7042082617048484,1 +CCOC(=O)COC(=O)c1ccccc1C(=O)OCC,8.919866912731305,0.19875027484458077,1 +O=C1CCCCC1,9.272184465524795,0.1823661785242976,1 +OC(=O)C=CC(=O)O,9.313172081918696,2.8807316686731115,0.14285714285714285 +COC(=O)c1ccc(cc1)O,9.858865736182537,0.4737443305504875,1 +COC(=O)c1ccccc1C(=O)OC,10.299509743336218,0.5316830719957618,1 +OC1C2C(N(C)C)C(=O)C(=C(O)N)C(=O)C2(O)C(=O)C2=C(O)c3c(C(C12)(C)O)c(Cl)ccc3O,10.50761860949369,0.3375075061669371,0.6216216216216216 +OCCO,14.822491003392418,3.7042082617048484,1 +OCCO,16.111403264556976,2.4679094429571533,1 +CCCCCCCCCCCCCCCCCC(=O)OCC(C1OCC(C1O)O)O,16.727105323218392,0.40281615341572896,1 +OCC(C1OC(=O)C(=C1O)O)O,17.323010613197102,12.13655283247625,1 +[O-]S(=O)(=O)NC1CCCCC1.[Na+],17.900880706433757,0.19005758519114205,1 +O=C1NS(=O)(=O)c2c1cccc2,19.66323569952698,0.3155082567836235,1 +CCCCCCCCCCCC(=O)OCC(C1OCC(C1O)O)O,19.866710908558982,0.3736091886918899,1 +CCOC(=O)c1ccccc1C(=O)OCC,19.95615854702247,0.23974536678467762,1 +OC(=O)c1ccccc1N,20.060380944519448,0.5033545540660739,1 +OCCO,32.22280652911395,0.2885556079481661,1 +OCC(CO)O,74.73899985905678,1.1811816236152828,1 diff --git a/paper/data/combined-cv.id b/paper/data/combined-cv.id new file mode 100644 index 0000000..0591b35 --- /dev/null +++ b/paper/data/combined-cv.id @@ -0,0 +1 @@ +56c42e262b72ed11e7000001 diff --git a/paper/data/combined-test-predictions.csv b/paper/data/combined-test-predictions.csv index d378693..976ca41 100644 --- a/paper/data/combined-test-predictions.csv +++ b/paper/data/combined-test-predictions.csv @@ -1,151 +1,151 @@ -SMILES,LOAEL,Confidence,Dataset -O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.0003638692469573398,1,combined-prediction -CCSCSP(=S)(OCC)OCC,0.0016526156453431225,1,combined-prediction -CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.0019642241382633796,1,combined-prediction -CCOP(=S)(SCSC(C)(C)C)OCC,0.0034018170593390737,1,combined-prediction -CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.007284468451831286,1,combined-prediction -CCCSP(=O)(SCCC)OCC,0.007302272401231296,1,combined-prediction -CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.008370828170108851,1,combined-prediction -CSc1ccc(cc1C)OP(=S)(OC)OC,0.00964124005965057,1,combined-prediction -COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.011073447351926287,1,combined-prediction -CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.01194888189741255,1,combined-prediction -CNC(=O)CSP(=S)(OC)OC,0.011977939066676562,1,combined-prediction -COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.01253495184268662,1,combined-prediction -CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.012661569287564291,1,combined-prediction -CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.013006855002501155,1,combined-prediction -CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.01357913208550989,1,combined-prediction -CSc1nnc(c(=O)n1N)C(C)(C)C,0.013701160159437665,0.11538461538461539,combined-prediction -CCOP(=O)(SC(CC)C)SC(CC)C,0.014239911275829733,1,combined-prediction -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.014761743547403035,1,combined-prediction -OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.015808512748976208,1,combined-prediction -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.016001687478085606,1,combined-prediction -COP(=O)(NC(=O)C)SC,0.01603420284847195,1,combined-prediction -N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.016181663783863653,1,combined-prediction -CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.01709608560436501,1,combined-prediction -C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.01934170105611924,1,combined-prediction -OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.01969166251145765,1,combined-prediction -CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.02036765211069526,1,combined-prediction -CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.021095802363679313,1,combined-prediction -CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.02137860199550009,1,combined-prediction -COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.023715894144774617,1,combined-prediction -CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.02452548937593434,1,combined-prediction -Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.024541035827570765,1,combined-prediction -COP(=O)(OC=C(Cl)Cl)OC,0.024813902049782052,1,combined-prediction -COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.02511976746876867,1,combined-prediction -N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.026175977495430238,1,combined-prediction -Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.026214170808255548,1,combined-prediction -CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.026593616679335016,1,combined-prediction -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.027078346141040016,1,combined-prediction -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.02720741121158317,1,combined-prediction -CNC(=O)ON=C(C(=O)N(C)C)SC,0.02723422893053635,1,combined-prediction -O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.028785189377791072,1,combined-prediction -CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.02933672191817046,1,combined-prediction -CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.03101114141686048,1,combined-prediction -CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.03602454025477674,1,combined-prediction -COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.03652048573032131,1,combined-prediction -CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.03831103939366667,1,combined-prediction -CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.0404095891170599,1,combined-prediction -CCNc1nc(NC(C)C)nc(n1)Cl,0.04120360138919732,1,combined-prediction -OC(=O)COc1ccc(cc1Cl)Cl,0.04220782045663331,1,combined-prediction -CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.042584273505466334,1,combined-prediction -BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.042806163072444406,1,combined-prediction -OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.04327585927626025,1,combined-prediction -CON(C(=O)Nc1ccc(cc1)Br)C,0.04331377020795739,1,combined-prediction -CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.04491567539992076,1,combined-prediction -Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.0456310249928634,1,combined-prediction -CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.047490155350518225,1,combined-prediction -N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.04952824330064722,1,combined-prediction -CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.04976438552720587,1,combined-prediction -OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.049903283432057355,1,combined-prediction -Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.0502876438433783,1,combined-prediction -ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.05112367301257726,1,combined-prediction -CN1CN(C)CSC1=S,0.05143501540726455,1,combined-prediction -Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.05167638607399666,1,combined-prediction -CN(C(=S)SSC(=S)N(C)C)C,0.052029910797683425,1,combined-prediction -CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.05274344365338553,1,combined-prediction -O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.05286631638058653,1,combined-prediction -CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.053208362874294673,1,combined-prediction -CNC(=O)Oc1ccccc1OC(C)C,0.05328869177586038,1,combined-prediction -COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.054676171737019746,1,combined-prediction -COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.05536474598180194,1,combined-prediction -CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.057817340618821475,1,combined-prediction -O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.057843156201256546,1,combined-prediction -CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.05900731758603697,1,combined-prediction -CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.059860013602209265,1,combined-prediction -O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.061223837435072606,1,combined-prediction -OC(=O)COc1ccc(cc1C)Cl,0.06409863417711698,1,combined-prediction -Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.06564925552956072,1,combined-prediction -Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.0672060941474649,1,combined-prediction -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.06936283864904705,1,combined-prediction -CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.07084460220094288,1,combined-prediction -N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.07089396189028405,1,combined-prediction -c1scc(n1)c1nc2c([nH]1)cccc2,0.07124290059304189,1,combined-prediction -O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.07178252950433608,1,combined-prediction -C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.07255184301853296,1,combined-prediction -OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.07311717098654685,1,combined-prediction -Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.07316751274390274,1,combined-prediction -CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.07663751118571578,1,combined-prediction -CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.07899149299378629,1,combined-prediction -Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.07907394396694326,1,combined-prediction -ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.07976114599708196,1,combined-prediction -CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.08067037428729226,1,combined-prediction -COC(=O)Nc1nc2c([nH]1)cccc2,0.0814533769149189,1,combined-prediction -COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.08251097640354867,1,combined-prediction -Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.08591820939424631,1,combined-prediction -O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.08912049056135805,1,combined-prediction -N#Cc1c(Cl)cccc1Cl,0.0921129946367937,1,combined-prediction -CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.09289647796203974,1,combined-prediction -ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.0941350572749445,1,combined-prediction -CC(OC(=O)Nc1cccc(c1)Cl)C,0.10044437465404737,1,combined-prediction -O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.10144600229996162,1,combined-prediction -CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.10535546107335386,1,combined-prediction -C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.10756688196876785,1,combined-prediction -Nc1nc(NC2CC2)nc(n1)N,0.10982169517930987,1,combined-prediction -O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.11918717728199056,1,combined-prediction -CNC(=O)Oc1cccc2c1cccc2,0.1209025875895827,1,combined-prediction -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.12263485736061217,1,combined-prediction -O=Cc1ccco1,0.12486833177320306,0.1111111111111111,combined-prediction -CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.12843709655351573,1,combined-prediction -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.1291134993044419,1,combined-prediction -CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.12913282716460453,1,combined-prediction -OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,0.13193009603279973,1,combined-prediction -CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.13348596957039296,1,combined-prediction -CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.13723730603736453,1,combined-prediction -CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.14135429894506185,1,combined-prediction -COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.1422803945334266,1,combined-prediction -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.14262489512256243,1,combined-prediction -CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.14416196256229533,1,combined-prediction -ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.14729599082809905,1,combined-prediction -[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.15109322689681717,1,combined-prediction -C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.15407527700390689,1,combined-prediction -OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.1554702671615349,1,combined-prediction -CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.15669364902191535,1,combined-prediction -CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.16342863567669363,1,combined-prediction -N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.167935147286533,1,combined-prediction -N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.17957314863237633,1,combined-prediction -CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.18966068443126202,1,combined-prediction -Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.19454416629038565,1,combined-prediction -Oc1ccccc1c1ccccc1,0.20273166023828132,1,combined-prediction -CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.20999292062465813,1,combined-prediction -O=C(C1=C(C)OCCS1)Nc1ccccc1,0.21865371953286197,1,combined-prediction -COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.23311870284543604,1,combined-prediction -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.2405933419678417,1,combined-prediction -ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.24709110501373716,1,combined-prediction -CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.24709794800636262,1,combined-prediction -COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.2488946852404996,1,combined-prediction -c1ccc(cc1)Nc1ccccc1,0.25118644343506413,1,combined-prediction -CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.2948426093660432,0.25,combined-prediction -COCN(c1c(CC)cccc1CC)C(=O)CCl,0.30333289283418186,1,combined-prediction -COP(=O)(SC)N,0.33442367385922134,1,combined-prediction -CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.3350153436602428,1,combined-prediction -Cc1cccc2c1n1cnnc1s2,0.3856935237852282,1,combined-prediction -O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.42312539665837845,1,combined-prediction -CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,0.4306936653208536,1,combined-prediction -OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.4720460499425296,1,combined-prediction -CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.4889107282616924,1,combined-prediction -COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.4978392275403079,1,combined-prediction -OC(=O)CNCP(=O)(O)O,0.700841565636653,0.16666666666666666,combined-prediction -COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.7477534452700889,1,combined-prediction -CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.8106254748473309,1,combined-prediction -CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,1.0000960182418923,1,combined-prediction -ClCCP(=O)(O)O,2.424380344082731,1,combined-prediction +SMILES,LOAEL_measured_median,LOAEL_predicted,Confidence,Dataset +N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.0001361095787305931,0.026175977495430252,1,combined-prediction +OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.0002764719490734748,0.015808512748976208,1,combined-prediction +CCOP(=S)(SCSC(C)(C)C)OCC,0.000277363084031507,0.0034018170593390737,1,combined-prediction +CCSCSP(=S)(OCC)OCC,0.0006144925543928096,0.0016526156453431208,1,combined-prediction +CCOP(=O)(SC(CC)C)SC(CC)C,0.0008728063120409454,0.014239911275829733,1,combined-prediction +CNC(=O)CSP(=S)(OC)OC,0.001090477150926923,0.011977939066676562,1,combined-prediction +COP(=O)(SC)N,0.0020548549621536454,0.33442367385922134,1,combined-prediction +CSc1ccc(cc1C)OP(=S)(OC)OC,0.0025868753585247565,0.00964124005965057,1,combined-prediction +CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.0033630532459809582,0.01194888189741255,1,combined-prediction +CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149211972577347,0.021095802363679302,1,combined-prediction +COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.004713372383210075,0.011073447351926287,1,combined-prediction +CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.0049417895576815835,0.0019642241382633796,1,combined-prediction +CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005274306112287868,0.008370828170108851,1,combined-prediction +CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601648043851348,0.14135429894506182,1,combined-prediction +COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.006200913183680908,0.025119767468768657,1,combined-prediction +OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.006747899500347733,0.049903283432057355,1,combined-prediction +COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319665576013,0.05467617173701966,1,combined-prediction +Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.007548400798826121,0.0502876438433783,1,combined-prediction +COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.0076105098020530036,0.023715894144774607,1,combined-prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.007818698763639501,0.027207411211583155,1,combined-prediction +CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.008187766847509327,0.02036765211069526,1,combined-prediction +CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.008855868434313272,0.012661569287564251,1,combined-prediction +O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.009706945232441807,0.00036386924695734017,1,combined-prediction +CCCSP(=O)(SCCC)OCC,0.010068539755671456,0.007302272401231296,1,combined-prediction +COP(=O)(OC=C(Cl)Cl)OC,0.010408382170442241,0.024813902049782052,1,combined-prediction +CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.012455788330375379,0.12913282716460453,1,combined-prediction +CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.0139433514779606,0.05274344365338547,1,combined-prediction +N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.015042627044387032,0.07089396189028405,1,combined-prediction +Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.01585325164934852,0.026214170808255468,1,combined-prediction +CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.01642869699075557,0.01357913208550989,1,combined-prediction +N#Cc1c(Cl)cccc1Cl,0.016568667498017633,0.0921129946367937,1,combined-prediction +CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.0171141884323489,0.013006855002501155,1,combined-prediction +BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185416989653705,0.042806163072444434,1,combined-prediction +CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.018038670157326797,0.042584273505466334,1,combined-prediction +CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.01991156926953532,0.05900731758603697,1,combined-prediction +CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.020087610909726116,0.04976438552720587,1,combined-prediction +COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.02048398681663214,0.055364745981801966,1,combined-prediction +CNC(=O)ON=C(C(=O)N(C)C)SC,0.022347753176858155,0.027234228930536335,1,combined-prediction +COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.02320682656135787,0.14228039453342656,1,combined-prediction +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.025090939601491648,0.04491567539992076,1,combined-prediction +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.025427825579407606,0.14262489512256243,1,combined-prediction +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.02574063309087087,0.10144600229996162,1,combined-prediction +CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.025750915471868897,0.01709608560436501,1,combined-prediction +N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.026997497601947272,0.167935147286533,1,combined-prediction +C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.027961199362093195,0.10756688196876785,1,combined-prediction +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.028207113064839383,0.016181663783863653,1,combined-prediction +CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.029112705155716945,0.14416196256229533,1,combined-prediction +CON(C(=O)Nc1ccc(cc1)Br)C,0.03130067550140176,0.04331377020795739,1,combined-prediction +CN1CN(C)CSC1=S,0.03266034652463028,0.05143501540726455,1,combined-prediction +ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.03316084217977319,0.09413505727494445,1,combined-prediction +CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.0341788251725187,0.08067037428729226,1,combined-prediction +CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0345288315455876,0.007284468451831286,1,combined-prediction +CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.03508230910777224,0.047490155350518225,1,combined-prediction +Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.03798219426521996,0.04563102499286343,1,combined-prediction +CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.038746408312020406,0.026593616679335016,1,combined-prediction +CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04102878665011248,0.03101114141686048,1,combined-prediction +COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.042491175292669145,0.012534951842686624,1,combined-prediction +O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.045140176541360745,0.11918717728199056,1,combined-prediction +CN(C(=S)SSC(=S)N(C)C)C,0.04783039657471141,0.052029910797683425,1,combined-prediction +CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.05161859628615915,0.03602454025477674,1,combined-prediction +C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.05350296944357954,0.01934170105611924,1,combined-prediction +COCN(c1c(CC)cccc1CC)C(=O)CCl,0.05375006811431045,0.30333289283418186,1,combined-prediction +CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.05522147585284508,0.20999292062465813,1,combined-prediction +O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.05566064749641608,0.028785189377791072,1,combined-prediction +Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.05706818876652619,0.08591820939424627,1,combined-prediction +OC(=O)COc1ccc(cc1C)Cl,0.057322598023636456,0.06409863417711698,1,combined-prediction +CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.057576722828150476,0.1896606844312623,1,combined-prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.05953797389131243,0.016001687478085606,1,combined-prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.06009909138187043,0.027078346141040016,1,combined-prediction +CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.06061453423316249,0.09289647796203974,1,combined-prediction +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.06267621846158328,0.07907394396694326,1,combined-prediction +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.06288907725176857,0.24059334196784166,1,combined-prediction +CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.06569530810416269,0.029336721918170473,1,combined-prediction +O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06690994773808298,0.05286631638058653,1,combined-prediction +CSc1nnc(c(=O)n1N)C(C)(C)C,0.06719929397120725,0.013701160159437665,0.11538461538461539,combined-prediction +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.06758613754894155,0.06936283864904705,1,combined-prediction +CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.06839527058523667,0.12843709655351573,1,combined-prediction +C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.06904967382858089,0.07255184301853296,1,combined-prediction +OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.0721330354641874,0.04327585927626025,1,combined-prediction +O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.07395704796137248,0.057843156201256546,1,combined-prediction +OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.07409262028018154,0.1554702671615349,1,combined-prediction +CCNc1nc(NC(C)C)nc(n1)Cl,0.07789199862212233,0.04120360138919732,1,combined-prediction +O=C(C1=C(C)OCCS1)Nc1ccccc1,0.08117223892684251,0.21865371953286192,1,combined-prediction +CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.0827758354922366,0.02137860199550009,1,combined-prediction +OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08452667530010859,0.07311717098654681,1,combined-prediction +O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08510674803234901,0.061223837435072606,1,combined-prediction +CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.08783443947180365,0.07084460220094288,1,combined-prediction +CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.09170952329114665,0.13348596957039296,1,combined-prediction +N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.09203781459712614,0.049528243300647194,1,combined-prediction +ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.09583741068272783,0.05112367301257726,1,combined-prediction +OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.0967821447110451,0.01969166251145765,1,combined-prediction +COP(=O)(NC(=O)C)SC,0.10236623790044716,0.01603420284847195,1,combined-prediction +O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.11086164698737522,0.08912049056135805,1,combined-prediction +COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045292283465,0.03652048573032131,1,combined-prediction +Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.11516531274058425,0.05167638607399666,1,combined-prediction +COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.12412602138191925,0.23311870284543604,1,combined-prediction +CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.12855945536132327,0.0532083628742947,1,combined-prediction +CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.1367492600870436,1.000096018241897,1,combined-prediction +c1scc(n1)c1nc2c([nH]1)cccc2,0.1490700414533971,0.07124290059304189,1,combined-prediction +CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.14982590230152565,0.4889107282616925,1,combined-prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.15013314047110002,0.014761743547403035,1,combined-prediction +Cc1cccc2c1n1cnnc1s2,0.1506048130761757,0.3856935237852282,1,combined-prediction +ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.15174119992085178,0.24709110501373716,1,combined-prediction +CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.15338553104123837,0.07663751118571574,1,combined-prediction +Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.15801925188118618,0.19454416629038565,1,combined-prediction +c1ccc(cc1)Nc1ccccc1,0.16546268922726798,0.251186443435064,1,combined-prediction +Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.1730416993562668,0.024541035827570765,1,combined-prediction +CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.1767866659490005,0.03831103939366669,1,combined-prediction +C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.18559079091504613,0.15407527700390689,1,combined-prediction +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.20017699986539617,0.1291134993044419,1,combined-prediction +CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.2068313193675311,0.16342863567669358,1,combined-prediction +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.21666838084755125,0.07899149299378624,1,combined-prediction +CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.2198425631222415,0.057817340618821475,1,combined-prediction +CNC(=O)Oc1ccccc1OC(C)C,0.23417894234275483,0.05328869177586038,1,combined-prediction +CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.2408184692696632,0.15669364902191532,1,combined-prediction +O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.253443853488009,0.07178252950433611,1,combined-prediction +Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.25917417547047744,0.0672060941474649,1,combined-prediction +CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.2594374890563992,0.04040958911705992,1,combined-prediction +CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.27189561466298434,0.2470979480063625,1,combined-prediction +OC(=O)COc1ccc(cc1Cl)Cl,0.28049546719325014,0.04220782045663329,1,combined-prediction +CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.2921073325236663,0.024525489375934365,1,combined-prediction +CNC(=O)Oc1cccc2c1cccc2,0.2981792578159244,0.1209025875895827,1,combined-prediction +Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.31170568268957544,0.06564925552956072,1,combined-prediction +ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.31207588849423984,0.14729599082809905,1,combined-prediction +CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.3169092998307417,0.2948426093660432,0.25,combined-prediction +ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.32935301892961466,0.07976114599708196,1,combined-prediction +CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.42802021191337764,0.059860013602209265,1,combined-prediction +CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.43361266621389954,0.13723730603736453,1,combined-prediction +N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.44926154899338216,0.1795731486323763,1,combined-prediction +Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.47403843842257615,0.0731675127439027,1,combined-prediction +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.4837900188743661,0.12263485736061217,1,combined-prediction +Nc1nc(NC2CC2)nc(n1)N,0.5144905821145022,0.10982169517930987,1,combined-prediction +CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.5170806512852409,0.3350153436602428,1,combined-prediction +COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.5285529966699751,0.08251097640354867,1,combined-prediction +O=Cc1ccco1,0.624453213155231,0.12486833177320306,0.1111111111111111,combined-prediction +[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.7245881151318344,0.15109322689681717,1,combined-prediction +COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.7681550277825105,0.747753445270089,1,combined-prediction +COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.7817895162025876,0.49783922754030796,1,combined-prediction +OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.8282972172278201,0.4720460499425296,1,combined-prediction +CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.8351128195663594,0.8106254748473309,1,combined-prediction +COC(=O)Nc1nc2c([nH]1)cccc2,0.8499546987221808,0.08145337691491894,1,combined-prediction +CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.9077984526598573,0.1053554610733538,1,combined-prediction +O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.9662594125910484,0.4231253966583784,1,combined-prediction +ClCCP(=O)(O)O,0.9723587138566308,2.424380344082731,1,combined-prediction +COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,1.1154252951100516,0.2488946852404996,1,combined-prediction +CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,1.5854670852219546,0.43069366532085357,1,combined-prediction +OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,1.9605490478397496,0.13193009603279973,1,combined-prediction +CC(OC(=O)Nc1cccc(c1)Cl)C,2.340158076742021,0.10044437465404735,1,combined-prediction +Oc1ccccc1c1ccccc1,3.119727015073393,0.20273166023828124,1,combined-prediction +OC(=O)CNCP(=O)(O)O,5.559726007239,0.700841565636653,0.16666666666666666,combined-prediction diff --git a/paper/data/combined-test-predictions.id b/paper/data/combined-test-predictions.id new file mode 100644 index 0000000..27835d9 --- /dev/null +++ b/paper/data/combined-test-predictions.id @@ -0,0 +1 @@ +56c42aa82b72ed1054000004 diff --git a/paper/data/common-median.csv b/paper/data/common-median.csv index f1244b0..e83961d 100644 --- a/paper/data/common-median.csv +++ b/paper/data/common-median.csv @@ -1,24 +1,24 @@ SMILES,mazzatorta,swiss N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.0001372533562906347,0.00013496580117055152 +CCOP(=S)(SCSC(C)(C)C)OCC,0.0002080223110862717,0.00034670385697674235 OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.00027647194701359843,0.0002764719511333511 CCSCSP(=S)(OCC)OCC,0.0006144925612602997,0.0006144925475253195 CCOP(=O)(SC(CC)C)SC(CC)C,0.0009245829520661433,0.0008210296720157477 COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.001659247904766673,0.08332310268057162 -CCOP(=S)(SCSC(C)(C)C)OCC,0.002438483757733518,0.00034670385697674235 -COP(=O)(SC)N,0.003046853953236319,0.0020548549325897737 +COP(=O)(SC)N,0.002054854991717517,0.0020548549325897737 +CSc1ccc(cc1C)OP(=S)(OC)OC,0.0025868753585247565,0.0016527259802523342 CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.003445751195813495,0.0033630532459809582 CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149212048673449,0.004149211896481245 -COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.00467202701142753,0.0039030031199302137 +COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.004713372383210075,0.0039030031199302137 CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.005201883810203027,0.004681695305160139 CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005274306112287868,0.0035601567181414275 CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0054376113486863924,0.0636200517424888 -COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.005451835179110433,0.008508644649457775 CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601647965290344,0.005601648122412352 -CSc1ccc(cc1C)OP(=S)(OC)OC,0.005892327205528613,0.0016527259802523342 Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.006341300659739408,0.023779877474022784 COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319755914397,0.006820319575237628 -Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.00694452873492003,0.01932390597300771 -O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.007126617932723449,0.015481963173347177 +Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.00694452873492003,0.00920904883059355 +COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.0070905370826580775,0.008508644649457775 +O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.007126617932723449,0.01228727229779905 CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.007825509706097071,0.009886227162529472 COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.009301369775521361,0.003100456591840454 CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.009924832004782804,0.04157699893895499 @@ -31,15 +31,15 @@ CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.01448347496337274,0.0104281016973780 N#Cc1c(Cl)cccc1Cl,0.014533918736325764,0.0186034162597095 N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.015042627044387032,0.08141821878808377 ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.016019730669239306,0.05030195369030707 +ClC1C(Cl)C(Cl)C(C(C1Cl)Cl)Cl,0.016160652565775233,0.01616065190994549 CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.01672571818640967,0.05707983190600125 BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185416964361586,0.017185417014945824 -ClC1C(Cl)C(Cl)C(C(C1Cl)Cl)Cl,0.01808617712680377,0.01616065190994549 OC(=O)C(CCP(=O)(O)C)N,0.019323475195614302,1.2637552440957067 CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.019396419126203733,0.016680921188449865 OC(=O)COc1ccc(cc1C)Cl,0.019938294964743114,0.0947069010825298 -N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.021874904009467275,0.04835505096829608 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.022450559378137468,0.04835505096829608 CNC(=O)ON=C(C(=O)N(C)C)SC,0.02280382932847922,0.02072868120754643 -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.02446221194980985,0.4023390123323988 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.02510595436954169,0.4023390123323988 O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.026276896280264014,0.007293179580314936 CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.026692119922880408,0.0068777238395693234 OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.030365547751564796,0.005938151689011985 @@ -51,10 +51,10 @@ OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.03828744186371015,0.15527684755838006 CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.038746408312020406,0.08469772512288609 CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04102878665011248,0.03544887229174679 CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.041276958181115306,0.1242747128033579 -CN(C(=S)SSC(=S)N(C)C)C,0.04432283415923257,0.03036190470594063 O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.044873074905021335,0.045407278177700156 COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.04569504751402555,0.009139009427670286 CC1=C(C)S(=O)(=O)CCS1(=O)=O,0.047557630336441704,0.23778815168220852 +CN(C(=S)SSC(=S)N(C)C)C,0.04783039657471141,0.03036190470594063 CON(C(=O)Nc1ccc(cc1)Br)C,0.048243951057630914,0.014357399945172603 Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.05165383561566402,0.17867678986550448 Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.051834835094095484,0.27357274077439286 @@ -80,7 +80,7 @@ N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.10391366164191661,0.09203781459712614 CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.11058877880543937,0.14653013191720715 COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045196043953,0.11151045388522976 Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.11395676083924232,0.011395676083924233 -CCNc1nc(NC(C)C)nc(n1)Cl,0.11591071091933607,0.056718974985359355 +CCNc1nc(NC(C)C)nc(n1)Cl,0.11591071091933607,0.04636428436773443 O=C(C1=C(C)OCCS1)Nc1ccccc1,0.1274956638724717,0.034848813981213346 C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.13459866849613178,0.05350296944357954 CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.13618183361575933,0.13731668655832788 @@ -90,11 +90,12 @@ CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,0.15252975563710267,1.6952764753748983 CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.1560686482307559,0.14982590230152565 Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.15801925526767843,0.15801924849469393 Cc1cccc2c1n1cnnc1s2,0.16381576159162972,0.1373938645607217 +ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.16860133324539087,0.08430066662269543 CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.16929970598735858,0.13747135609511818 -C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.1697708869122168,0.061250674376451514 +C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.1697708869122168,0.06904967382858089 CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.17813968959673715,0.3034972489425892 CN1CN(C)CSC1=S,0.18486987933542975,0.027422365728598172 -CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.18534506246313948,0.22661523159035935 +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.18534506246313948,0.24799169923196304 CNC(=O)Oc1cccc2c1cccc2,0.1878529324240324,0.2991731924668564 COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.19051986050321804,0.2098341392275743 CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.21766590408142725,0.22201922216305578 @@ -114,8 +115,7 @@ CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.4534134152107278,0.06546156290207059 ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.5494924735209582,0.07465930346752149 CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.6205388929259232,0.2603236331298995 COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.655542030995076,0.31203800675365617 -ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.6631652440985374,0.08430066662269543 [O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.9419645496713847,0.5651787298028309 -ClCCP(=O)(O)O,1.4025957248513201,0.9066120392542251 +ClCCP(=O)(O)O,1.0381053884590363,0.9066120392542251 CC(OC(=O)Nc1cccc(c1)Cl)C,3.510237115113031,0.14040948460452124 -OC(=O)CNCP(=O)(O)O,5.914602135360638,5.350743398456257 +OC(=O)CNCP(=O)(O)O,5.914602135360638,5.559726007239 diff --git a/paper/data/functional-groups-reduced.csv b/paper/data/functional-groups-reduced.csv new file mode 100644 index 0000000..525acbf --- /dev/null +++ b/paper/data/functional-groups-reduced.csv @@ -0,0 +1,34 @@ +Alkene,39,30 +Alkyne,5,8 +Alcohol,44,27 +Dialkylether,35,32 +Amine,66,41 +Aldehyde,3,1 +Ketone,21,25 +Enol,4,5 +Carboxylic_acid,33,38 +Lactone,11,10 +Carboxylic_acid_derivative,215,227 +Amide,38,60 +Lactam,12,18 +Amidine,3,4 +Nitrile,35,39 +Vinylogous_carbonyl_or_carboxyl_derivative,70,97 +Vinylogous_ester,113,120 +Carbonic_acid_derivatives,109,131 +Phenol,27,9 +Arylchloride,142,163 +Arylfluoride,22,41 +Heteroaromatic,147,205 +Nitro,42,31 +Sulfonic_derivative,24,29 +Sulfenic_derivative,48,34 +Phosphoric_acid_derivative,70,44 +Aromatic,402,396 +Heterocyclic,228,272 +Epoxide,11,2 +Annelated_rings,97,73 +Conjugated_double_bond,207,213 +Trifluoromethyl,44,63 +C_ONS_bond,520,484 +Salt,26,45 \ No newline at end of file diff --git a/paper/data/functional-groups-reduced4R.csv b/paper/data/functional-groups-reduced4R.csv new file mode 100644 index 0000000..e37c41d --- /dev/null +++ b/paper/data/functional-groups-reduced4R.csv @@ -0,0 +1,68 @@ +Alkene, 39, Mazzatorta +Alkene, 30, Swiss Federal Office +Alkyne, 5, Mazzatorta +Alkyne, 8, Swiss Federal Office +Alcohol, 44, Mazzatorta +Alcohol, 27, Swiss Federal Office +Dialkylether, 35, Mazzatorta +Dialkylether, 32, Swiss Federal Office +Amine, 66, Mazzatorta +Amine, 41, Swiss Federal Office +Aldehyde, 3, Mazzatorta +Aldehyde, 1, Swiss Federal Office +Ketone, 21, Mazzatorta +Ketone, 25, Swiss Federal Office +Enol, 4, Mazzatorta +Enol, 5, Swiss Federal Office +Carboxylic_acid, 33, Mazzatorta +Carboxylic_acid, 38, Swiss Federal Office +Lactone, 11, Mazzatorta +Lactone, 10, Swiss Federal Office +Carboxylic_acid_derivative, 215, Mazzatorta +Carboxylic_acid_derivative, 227, Swiss Federal Office +Amide, 38, Mazzatorta +Amide, 60, Swiss Federal Office +Lactam, 12, Mazzatorta +Lactam, 18, Swiss Federal Office +Amidine, 3, Mazzatorta +Amidine, 4, Swiss Federal Office +Nitrile, 35, Mazzatorta +Nitrile, 39, Swiss Federal Office +Vinylogous_carbonyl_or_carboxyl_derivative, 70, Mazzatorta +Vinylogous_carbonyl_or_carboxyl_derivative, 97, Swiss Federal Office +Vinylogous_ester, 113, Mazzatorta +Vinylogous_ester, 120, Swiss Federal Office +Carbonic_acid_derivatives, 109, Mazzatorta +Carbonic_acid_derivatives, 131, Swiss Federal Office +Phenol, 27, Mazzatorta +Phenol, 9, Swiss Federal Office +Arylchloride, 142, Mazzatorta +Arylchloride, 163, Swiss Federal Office +Arylfluoride, 22, Mazzatorta +Arylfluoride, 41, Swiss Federal Office +Heteroaromatic, 147, Mazzatorta +Heteroaromatic, 205, Swiss Federal Office +Nitro, 42, Mazzatorta +Nitro, 31, Swiss Federal Office +Sulfonic_derivative, 24, Mazzatorta +Sulfonic_derivative, 29, Swiss Federal Office +Sulfenic_derivative, 48, Mazzatorta +Sulfenic_derivative, 34, Swiss Federal Office +Phosphoric_acid_derivative, 70, Mazzatorta +Phosphoric_acid_derivative, 44, Swiss Federal Office +Aromatic, 402, Mazzatorta +Aromatic, 396, Swiss Federal Office +Heterocyclic, 228, Mazzatorta +Heterocyclic, 272, Swiss Federal Office +Epoxide, 11, Mazzatorta +Epoxide, 2, Swiss Federal Office +Annelated_rings, 97, Mazzatorta +Annelated_rings, 73, Swiss Federal Office +Conjugated_double_bond, 207, Mazzatorta +Conjugated_double_bond, 213, Swiss Federal Office +Trifluoromethyl, 44, Mazzatorta +Trifluoromethyl, 63, Swiss Federal Office +C_ONS_bond, 520, Mazzatorta +C_ONS_bond, 484, Swiss Federal Office +Salt, 26, Mazzatorta +Salt, 45, Swiss Federal Office diff --git a/paper/data/functional-groups.csv b/paper/data/functional-groups.csv new file mode 100644 index 0000000..0e2b4ca --- /dev/null +++ b/paper/data/functional-groups.csv @@ -0,0 +1,138 @@ +Primary_carbon, 303, 287 +Secondary_carbon, 147, 149 +Tertiary_carbon, 91, 73 +Quaternary_carbon, 58, 60 +Alkene, 39, 30 +Alkyne, 5, 8 +Alkylchloride, 71, 41 +Alkylfluoride, 52, 74 +Alkylbromide, 5, 3 +Alcohol, 44, 27 +Primary_alcohol, 12, 2 +Secondary_alcohol, 23, 10 +Tertiary_alcohol, 18, 21 +Dialkylether, 35, 32 +Dialkylthioether, 14, 6 +Alkylarylether, 62, 90 +Diarylether, 35, 47 +Alkylarylthioether, 11, 10 +Amine, 66, 41 +Primary_aliph_amine, 8, 3 +Secondary_aliph_amine, 2, 2 +Tertiary_aliph_amine, 16, 9 +Quaternary_aliph_ammonium, 1, 6 +Primary_arom_amine, 26, 15 +Secondary_arom_amine, 2, 1 +Secondary_mixed_amine, 8, 3 +Tertiary_mixed_amine, 14, 7 +Ammonium, 1, 6 +Dialkylthioether, 10, 4 +Alkylarylthioether, 10, 10 +Disulfide, 4, 2 +1,2-Diol, 12, 3 +Organometallic_compounds, 1, 5 +Aldehyde, 3, 1 +Ketone, 21, 25 +Oximether, 7, 15 +Acetal, 13, 13 +Halogen_acetal_like, 13, 14 +Acetal_like, 34, 30 +NOS_methylen_ester_and_similar, 18, 19 +Hetero_methylen_ester_and_similar, 19, 19 +Chloroalkene, 39, 21 +Bromoalkene, 1, 2 +Enol, 4, 5 +Enolether, 2, 4 +Thioenolether, 1, 1 +Carboxylic_acid, 33, 38 +Lactone, 11, 10 +Carboxylic_acid_derivative, 215, 227 +Carbothioic_S_ester, 1, 1 +Amide, 38, 60 +Primary_amide, 2, 1 +Secondary_amide, 22, 36 +Tertiary_amide, 15, 23 +Lactam, 12, 18 +Alkyl_imide, 5, 4 +N_hetero_imide, 6, 3 +Amidine, 3, 4 +Hydroxamic_acid_ester, 1, 1 +Imidolactone, 13, 32 +Imidothiolactone, 1, 1 +Amidine, 3, 5 +Imidolactam, 3, 2 +Imidoylhalide_cyclic, 10, 19 +Alpha_aminoacid, 1, 1 +Nitrile, 35, 39 +Vinylogous_carbonyl_or_carboxyl_derivative, 70, 97 +Vinylogous_acid, 9, 8 +Vinylogous_ester, 113, 120 +Vinylogous_amide, 19, 24 +Vinylogous_halide, 11, 27 +Carbonic_acid_derivatives, 109, 131 +Urea, 23, 21 +Thiourea, 4, 4 +Isothiourea, 1, 3 +Guanidine, 6, 7 +Urethan, 34, 35 +Biuret, 1, 4 +Semicarbazone, 1, 3 +Phenol, 27, 9 +Arylchloride, 142, 163 +Arylfluoride, 22, 41 +Arylbromide, 4, 8 +Aryliodide, 1, 4 +Oxoarene, 32, 29 +Thioarene, 1, 2 +Hetero_N_basic_H, 17, 11 +Hetero_N_basic_no_H, 59, 90 +Hetero_N_nonbasic, 127, 191 +Hetero_O, 10, 12 +Hetero_S, 17, 18 +Heteroaromatic, 147, 205 +Nitro, 42, 31 +Sulfon, 3, 14 +Sulfoxide, 2, 3 +Sulfuric_derivative, 5, 8 +Sulfonamide, 6, 9 +Sulfonic_derivative, 24, 29 +Sulfenic_derivative, 48, 34 +Phosphonic_acid, 5, 5 +Phosphonic_monoester, 1, 1 +Phosphonic_diester, 2, 1 +Phosphonic_acid_derivative, 10, 7 +Phosphoric_triester, 10, 3 +Phosphoric_diestermonoamide, 3, 2 +Phosphoric_acid_derivative, 70, 44 +Phosphinic_acid, 1, 1 +Phosphinic_acid_derivative, 1, 1 +Quart_silane, 1, 2 +Aromatic, 402, 396 +Heterocyclic, 228, 272 +Epoxide, 11, 2 +Spiro, 3, 10 +Annelated_rings, 97, 73 +Bridged_rings, 8, 4 +Sugar_pattern_1, 10, 9 +Sugar_pattern_2, 6, 7 +Sugar_pattern_combi, 4, 5 +Sugar_pattern_2_alpha, 1, 1 +Sugar_pattern_2_beta, 1, 1 +Conjugated_double_bond, 207, 213 +Conjugated_tripple_bond, 10, 19 +Cis_double_bond, 31, 19 +Trans_double_bond, 31, 19 +Halogen_multi_subst, 1, 6 +Trifluoromethyl, 44, 63 +C_ONS_bond, 520, 484 +Charged, 27, 51 +Anion, 26, 45 +Kation, 25, 51 +Salt, 26, 45 +1,3-Tautomerizable, 265, 296 +1,5-Tautomerizable, 124, 148 +Rotatable_bond, 488, 462 +Michael_acceptor, 33, 33 +CH-acidic, 60, 73 +CH-acidic_strong, 1, 4 +Chiral_center_specified, 1, 2 diff --git a/paper/data/mazzatorta-cv.csv b/paper/data/mazzatorta-cv.csv new file mode 100644 index 0000000..200058c --- /dev/null +++ b/paper/data/mazzatorta-cv.csv @@ -0,0 +1,519 @@ +SMILES,LOAEL_measured_median,LOAEL_predicted,Confidence +ClC12C3C4(C(C1(Cl)Cl)(C1(C2(C3(Cl)C(C41Cl)(Cl)Cl)Cl)Cl)Cl)Cl,1.9565721591442926e-05,0.0014218133641616987,0.625 +ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C=C2)(Cl)Cl)Cl,2.7404023436797774e-05,0.0012794179116857743,1 +ClC1C2OC2C2C1C1(Cl)C(=C(C2(C1(Cl)Cl)Cl)Cl)Cl,6.421500622500271e-05,0.0006312904946719587,1 +ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C1C2O1)(Cl)Cl)Cl,0.0001312648375209092,0.0009393540815108845,1 +N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.0001372533562906347,0.030320302552666413,1 +CCSCCSP(=S)(OCC)OCC,0.00014577045919371006,0.002904296720614731,1 +CCOP(=S)(SCSC(C)(C)C)OCC,0.0001733519259052264,0.004705691894690802,1 +CCOP(=S)(SCSC(C)(C)C)OCC,0.0002080223110862717,0.0031198946651464694,1 +ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C1C2O1)(Cl)Cl)Cl,0.0002625296750418184,0.0009393540815108845,1 +OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.00027647194701359843,0.016349996939185575,1 +ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C1C2O1)(Cl)Cl)Cl,0.000328162093802273,0.00023577843419090474,1 +CCSCCSP(=S)(OCC)OCC,0.00036442614798427517,0.002759857629239433,1 +ClC1C2OC2C2C1C1(Cl)C(=C(C2(C1(Cl)Cl)Cl)Cl)Cl,0.0005137200498000217,0.0008261557263644955,1 +CNC(=O)ON=CC(SC)(C)C,0.0005255875464343458,0.0394300510866583,1 +ClC1CC2C(C1Cl)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.0006100854842019096,0.0005942117597564336,1 +CCSCSP(=S)(OCC)OCC,0.0006144925612602997,0.0013675281812878628,1 +OC1CCCCCc2cc(O)cc(c2C(=O)OC(CCC1)C)O,0.0006203550142861557,0.7144075963289822,1 +ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C1C2O1)(Cl)Cl)Cl,0.000656324187604546,0.0009393540815108845,1 +ClC1CC2C(C1Cl)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.0006588923229380624,0.0008687145669203877,1 +ClC1C=CC2C1C1(Cl)C(=C(C2(C1(Cl)Cl)Cl)Cl)Cl,0.0006696708996117783,0.0008952202189140214,1 +ClC(C(c1ccc(cc1)Cl)c1ccc(cc1)Cl)(Cl)Cl,0.0007052459522690667,0.04147328692582911,1 +COP(=O)(SC)N,0.000708570686799144,0.33442367385922134,1 +CCSCCSP(=S)(OCC)OCC,0.0008017375255654054,0.0029306716037033644,1 +c1ccc(cc1)[Sn](c1ccccc1)c1ccccc1,0.0008571117562305596,0.6096639268673245,1 +CCOP(=O)(SC(CC)C)SC(CC)C,0.0009245829520661433,0.0119018935050396,1 +COP(=S)(Oc1ccc(cc1)N(=O)=O)OC,0.0009498211030948742,0.03193259568430547,1 +ClC1C=CC2C1C1(Cl)C(=C(C2(C1(Cl)Cl)Cl)Cl)Cl,0.001017899767409903,0.0008952202189140214,1 +Clc1c(Cl)c(Cl)c(c(c1Cl)Cl)Cl,0.0010183220720957982,0.09587934918066965,1 +CNC(=O)CSP(=S)(OC)OC,0.001090477150926923,0.011977939066676569,1 +COC1CC(OC2C(C)C=CC=C3COC4C3(O)C(C=C(C4O)C)C(=O)OC3CC(CC=C2C)OC2(C3)C=CC(C(O2)C(C)C)C)OC(C1OC1CC(OC)C(C(O1)C)NC(=O)C)C,0.0011109849279118543,0.030023844212028958,1 +COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.0011344859332252924,0.02189964718237587,1 +ClC1CC2C(C1Cl)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.0012201709684038192,0.0005942117597564336,1 +ClC12C(Cl)(Cl)C3(C4(C1(Cl)C1(C2(Cl)C3(C4(C1(Cl)Cl)Cl)Cl)Cl)Cl)Cl,0.0012831252531881078,6.239999085601705e-05,0.625 +CCOP(=S)(Oc1ccc(cc1)N(=O)=O)OCC,0.001442007505168395,0.02324515370361023,1 +CCOP(=S)(Oc1ccccc1C(=O)OC(C)C)NC(C)C,0.0014476216329334154,0.10541114923752212,1 +CCOc1cc(nc(n1)CC)OP(=S)(OC)OC,0.0015395577035464635,0.008933340243284798,1 +COC(=O)C=C(OP(=O)(OC)OC)C,0.001561466365033004,0.05630681195605883,1 +CSc1ccc(cc1C)OP(=S)(OC)OC,0.001616797099077973,0.00998805136771544,1 +COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.001659247904766673,0.015728201435628045,1 +ClC1C2(Cl)C3C4C5C1(Cl)C(C2(Cl)C5C3C1C4O1)(Cl)Cl,0.0018377077252927285,0.00013082348029644925,1 +CNC(=O)CCSCCSP(=O)(OC)OC,0.001879329112916984,0.004845236789812529,1 +CNC(=O)C=C(OP(=O)(OC)OC)C,0.0020164586039868883,0.010849425248724263,1 +COP(=O)(SC)N,0.002054854991717517,0.33442367385922134,1 +CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.0022052807653206367,0.010561978243550603,1 +S=C1NCCN1,0.0022514113902230405,6.303842268414008,0.14285714285714285 +CO[C@H]1C[C@H](O[C@H]2[C@@H](C)C=CC=C3CO[C@H]4[C@]3(O)[C@@H](C=C([C@H]4O)C)C(=O)O[C@H]3C[C@@H](CC=C2C)O[C@]2(C3)C=C[C@@H]([C@H](O2)[C@H](CC)C)C)O[C@H]([C@@H]1O[C@H]1C[C@H](OC)[C@H]([C@@H](O1)C)O)C,0.002290749011702154,0.0017566214091597164,0.11827956989247312 +S=C1NCCN1,0.0024471862937206963,6.303842268414008,0.14285714285714285 +COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.002646103794082849,0.013342851700514885,1 +COC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.0026615073878255148,0.0012344748927784325,1 +COC1CC(OC2C(C)C=CC=C3COC4C3(O)C(C=C(C4O)C)C(=O)OC3CC(CC=C2C)OC2(C3)C=CC(C(O2)C(C)C)C)OC(C1OC1CC(OC)C(C(O1)C)NC(=O)C)C,0.0027774623197796356,0.04161465926335706,0.1348314606741573 +CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.002852364738724816,0.012560097152432495,1 +CCOP(=S)(OCC)SCSc1ccc(cc1)Cl,0.0029165972759564764,0.007657588282036417,1 +C1CCN2C(C1)C1CCCCN1CC2,0.002933359023382885,0.16703078340793057,1 +C1CCN2C(C1)C1CCCCN1CC2,0.002984821462389602,0.16703078340793057,1 +CCCCSP(=O)(SCCCC)SCCCC,0.003974424546249488,0.1504496631045886,1 +COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.004134537178254452,0.011073447351926287,1 +CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149212048673449,0.020215395668456124,1 +CCOP(=O)(OC(=CCl)c1ccc(cc1Cl)Cl)OCC,0.004171650398342553,0.04995900954278815,1 +Clc1nc(nc(n1)Cl)Nc1ccccc1Cl,0.004173898399328111,0.1093606955215401,1 +Clc1cccc(n1)C(Cl)(Cl)Cl,0.00433075312836283,0.0664943030028045,0.13043478260869565 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C(C(Br)(Br)Br)Br,0.004511229623452476,0.0348345939452587,1 +CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.004686221626306353,0.010714479147398627,1 +CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.004928609097226672,0.010980300528105117,1 +CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.004944661980269876,0.008165508970666315,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.004948543461552866,0.03481040749532821,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(C(C)C)Nc1ccc(cc1Cl)C(F)(F)F,0.004971041792562443,0.011146277874077666,1 +CCN(C(=O)C(=C(OP(=O)(OC)OC)C)Cl)CC,0.005005200069191886,0.006725562763289336,1 +CCNc1nc(nc(n1)Cl)NC(C#N)(C)C,0.005193343612552968,0.03818729902070168,1 +CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.005201883810203027,0.001847030797857757,1 +COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.005292207588165698,0.011073447351926287,1 +CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0054376113486863924,0.0072971278933458734,1 +CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601647965290344,0.24428343783187767,1 +CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005603950244305859,0.008071940830053378,1 +Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.006341300659739408,0.08859636009379407,1 +COC(=O)Nc1nc2c([nH]1)cc(cc2)S(=O)c1ccccc1,0.006342219438128827,0.3094692255753645,1 +ClCC(N1C(=O)c2c(C1=O)cccc2)SP(=S)(OCC)OCC,0.006347661308292605,0.01655335793163268,1 +COP(=O)(SC)N,0.006377136181192296,0.045296304153967855,0.13333333333333333 +CCP(=S)(Sc1ccccc1)OCC,0.006414179135682054,0.003722191492864322,1 +COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.006615259485207122,0.011073447351926287,1 +OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.006747899500347733,0.049047172630179624,1 +CNC(=O)Oc1cc(C)c(c(c1)C)N(C)C,0.0067481385934503825,0.07806475331674674,1 +COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319755914397,0.05460021012041988,1 +CCOP(=S)(SCSC(C)(C)C)OCC,0.006934077036209056,0.0029602477287608174,1 +Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.00694452873492003,0.05356866802000862,1 +COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.0070905370826580775,0.018322295358547312,1 +O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.007126617932723449,0.00036386924695734017,1 +Cc1nn(c(c1C=NOCc1ccc(cc1)C(=O)OC(C)(C)C)Oc1ccccc1)C,0.0073074288460468996,0.48191774605001353,1 +Fc1ccc(cc1)[Si](c1ccc(cc1)F)Cn1cncn1,0.007657523838454347,0.08859636009379407,1 +CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.007825509706097071,0.01255029551403124,1 +Fc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.007943029289634557,0.014438054300148512,1 +CSc1ccc(cc1C)OP(=S)(OC)OC,0.008030092258753932,0.011338986062901048,1 +COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.00813048252144793,0.021687531960393556,1 +CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.008187766847509327,0.017844597124334886,1 +Clc1ccc(cc1)OS(=O)(=O)c1ccc(cc1)Cl,0.008246440044818412,0.05907193607395852,1 +[O-][N+](=O)c1cc([N+](=O)[O-])c(c(c1)[N+](=O)[O-])C,0.008805487227420639,0.16464376372059966,1 +COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.009301369775521361,0.024139165440739862,1 +COP(=O)(OC=C(Cl)Cl)OC,0.009729574839301364,0.027361947682508048,1 +CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.009924832004782804,0.022894519521138888,1 +c1scc(n1)c1nc2c([nH]1)cccc2,0.009938002763559809,0.12605015112782628,1 +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.010036375840596658,0.07530660632375387,1 +FC(c1ccc(cc1)C=CC(=NN=C1NCC(CN1)(C)C)C=Cc1ccc(cc1)C(F)(F)F)(F)F,0.010111728942243584,0.18296746014371312,1 +COP(=O)(OC=C(Cl)Cl)OC,0.010408382386229365,0.027361947682508048,1 +CCSC(=O)N1CCCCCC1,0.010677920910561842,0.1420990237195984,1 +CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.010713392485187262,0.058687722933369206,1 +O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1c(F)c(F)c(c(c1F)F)C,0.010985502766340648,0.07572819478774503,1 +CCCSP(=O)(SCCC)OCC,0.011141416681473747,0.005788126717953556,1 +O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1cccc(c1C)c1ccccc1,0.011824026606519262,0.16129123255733774,1 +O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1cccc(c1C)c1ccccc1,0.011824026606519262,0.22030867390239214,1 +CCOP(=S)(Oc1ccc(cc1)N(=O)=O)OCC,0.012016729209736626,0.022496877806644597,1 +S=C1NCCN1,0.012235931468603481,6.303842268414008,0.14285714285714285 +Clc1cc(Cl)c(c(c1O)Cc1c(O)c(Cl)cc(c1Cl)Cl)Cl,0.012287924553322883,0.05189799091313671,1 +CN1CCC(CC1)C1CCN(CC1)C,0.012988179839533329,0.11626874970227083,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)OC(F)F)C(C)C,0.013290157156772887,0.043440290068252216,1 +CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.013539867103284017,0.08089999015583163,1 +COP(=O)(NC(=O)C)SC,0.013648831720059621,0.01603420284847195,1 +CNP(=O)(Oc1ccc(cc1Cl)C(C)(C)C)OC,0.013712205220154254,0.05218409380418495,1 +CCN(C(=O)SCC)C1CCCCC1,0.013930451940080113,0.10603844418623201,1 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC1CC1,0.014397200032537671,0.16269252962824168,1 +CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.01448347496337274,0.1493882391935513,1 +N#Cc1c(Cl)cccc1Cl,0.014533918736325764,0.07166060251297335,1 +ClC1CC2C(C1Cl)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.014642051620845831,0.0005942117597564336,1 +CCCCC(c1ccc(cc1Cl)Cl)(Cn1cncn1)O,0.014958135679074535,0.05648294085533073,1 +N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.015042627044387032,0.0678327800715719,1 +N#CC(c1cc(C)c(cc1Cl)NC(=O)c1cc(I)cc(c1O)I)c1ccc(cc1)Cl,0.015081279803436631,0.09331510618768761,1 +ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.016019730669239306,0.12327721586222236,1 +BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185416964361586,0.051356609948269835,1 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.017269661060105742,0.020383124560822165,1 +O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1cccc(c1C)c1ccccc1,0.018918442570430818,0.18549608847080756,1 +CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.019057288509276463,0.01216124318722561,1 +CN1CCC(CC1)C1CCN(CC1)C,0.019100264469901956,0.15374244717438296,1 +CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.019396419126203733,0.05408292051478764,1 +CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.01991156926953532,0.07632294114171351,1 +OC(=O)COc1ccc(cc1C)Cl,0.019938294964743114,0.0399652589799207,1 +N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.020248123201460456,0.15445645494051066,1 +CCP(=S)(Sc1ccccc1)OCC,0.020298035239500172,0.002754326808265995,1 +ClC=C,0.020800592400871575,0.04595842510750215,0.14285714285714285 +Clc1cccc(c1)c1ccccc1,0.021202965065040626,0.1352285536055998,1 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.021299248640797082,0.02247265000158328,1 +CNC(=O)CSP(=S)(OC)OC,0.02180954301853846,0.011977939066676569,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.022530984690614337,0.1232662493071642,1 +CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.022598624918870935,0.09740277479679331,1 +OC(=O)COc1ccc(cc1Cl)Cl,0.022620602193004043,0.041009690437328015,1 +CN(C(=S)SSC(=S)N(C)C)C,0.02275063210988447,0.051165604885929104,0.16666666666666666 +CNC(=O)ON=C(C(=O)N(C)C)SC,0.02280382932847922,0.031079812793433585,1 +COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.02320682656135787,0.14426832315094906,1 +OC(COc1cccc2c1c1ccccc1[nH]2)CNC(C)C,0.023460058312320942,0.369466939624125,1 +CCNc1nc(NCC)nc(n1)Cl,0.024794616275543167,0.021316557238817504,1 +CCOC(=O)C(Oc1ccc(cc1)Oc1nc2c(o1)cc(cc2)Cl)C,0.02487724874434851,0.02323272314653561,1 +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.025090939601491648,0.05083711820179249,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.02510595436954169,0.12628718875372724,1 +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.02574063309087087,0.08860938474145841,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.025749696789273527,0.12641058047221496,1 +CCNc1nc(NCC)nc(n1)Cl,0.026282293252075754,0.07780743018432164,1 +CC(OC(=O)C(c1ccc(cc1)Cl)(c1ccc(cc1)Cl)O)C,0.026531991066147967,0.05646245319768563,1 +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.026813159469657157,0.10088477692803749,1 +CCOC(=O)c1ccccc1C1=c2cc(C)c(cc2=[O]c2c1cc(C)c(c2)NCC)NCC,0.027053999376946393,0.6380027934805974,1 +CSCC(=NOC(=O)NC)C(C)(C)C,0.027483045022449526,0.029438466688171353,1 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.02763145769616919,0.024234096209191043,1 +CCOc1cc(ccc1N(=O)=O)Oc1ccc(cc1Cl)C(F)(F)F,0.02764719470135984,0.07890874815143262,1 +[O-][N+](=O)c1cc(C(=O)N)c(c(c1)[N+](=O)[O-])C,0.027758250773633555,0.142247474970479,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(C(F)(F)F)Cl,0.02778703580061686,0.02381165262416268,1 +CSC(=NOC(=O)N(SN(C(=O)ON=C(SC)C)C)C)C,0.02821118623185781,0.06400593479605718,1 +COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.02836244328456758,0.06797949317882583,1 +CC(N1C(=NC(C)(C)C)SCN(C1=O)c1ccccc1)C,0.02848365588181601,0.0689459121736827,1 +CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.028523647387248163,0.01285055734967491,1 +CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.029112705155716945,0.14860201500770867,1 +COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.030123726579706293,0.57695845139982,1 +COc1nc(C)nc(n1)N(C(=O)NS(=O)(=O)c1ccccc1C(=O)OC)C,0.031614325062739264,0.2832573792096585,1 +Cc1ccc2c(c1)nc1c(n2)sc(=O)s1,0.03201059303080734,0.08614003067869859,1 +CC(C(=O)O)Oc1cc(Cl)c(cc1Cl)Cl,0.03228091610123117,0.02898082429359809,1 +CCC1CCCC(OC2CCC(C(O2)C)N(C)C)C(C)C(=O)C2C(CC(=O)O1)C1CCC3C(C1C2)CC(C3)OC1CC(C)C(C(C1OC)OC)OC,0.03269690443692089,0.21141397929967962,1 +CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.03318543029523152,0.17738294181594516,1 +Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.03331771398901528,0.04815264437273454,1 +CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.03336499327732185,0.04261363346757391,1 +N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.03374687200243409,0.2942994099520108,1 +CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.033936422812922216,0.09882354692414844,1 +CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.03407493882440353,0.06962158879997161,1 +CN(C=Nc1ccc(cc1C)C)C=Nc1ccc(cc1C)C,0.03408246361134649,0.13262968511235923,1 +ClC(C(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl)Cl,0.034377949341570596,0.32963878644864847,0.42857142857142855 +CN(C=Nc1ccc(cc1C)C)C=Nc1ccc(cc1C)C,0.034764112883573416,0.16021803876523436,1 +CCCSP(=S)(Oc1ccc(cc1)SC)OCC,0.03566479582586673,0.0033372571615199595,1 +N#CC(c1c(Cl)ccc(c1Cl)n1ncc(=O)[nH]c1=O)c1ccc(cc1)Cl,0.03679735812631385,0.04044424911778916,1 +CC(Cc1ccccc1)N,0.036980547196719206,0.15298008942484267,1 +CCN(c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O)CC(=C)C,0.0375078950368263,0.16811251283481254,1 +Clc1c(O)c(Cl)c(c(c1Cl)Cl)Cl,0.037546481605565646,0.2093085161156731,1 +CC(OP(=S)(OC(C)C)SCCNS(=O)(=O)c1ccccc1)C,0.03773457509937652,0.003947387860828739,1 +OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.03828744186371015,0.018695785718636188,1 +CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.038746408312020406,0.02067135597373708,1 +OC(=O)COc1cc(Cl)c(cc1Cl)Cl,0.03914162418169542,0.09184075863798795,1 +CCOP(=S)(Oc1nn(c(n1)Cl)C(C)C)OCC,0.039841737145637234,0.007638665240055393,1 +CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04102878665011248,0.11851229785685428,1 +CCN(C(=O)C(=C(OP(=O)(OC)OC)C)Cl)CC,0.041042640567373466,0.006725562763289336,1 +CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.041276958181115306,0.01244898356693107,1 +ClC(C(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl)Cl,0.04297243667696324,0.32963878644864847,0.42857142857142855 +O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.044873074905021335,0.135992755390313,1 +CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.04519647299825149,0.019762932791303964,1 +C=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.04563372244789605,0.17688013246322926,1 +ClCC=CCl,0.045958425107502164,0.0742053426494004,0.14285714285714285 +CCOC(=O)Cn1c(=O)sc2c1c(Cl)ccc2,0.046003238627999404,0.13903555779637022,1 +CCCN(C(=O)SCC)CCC,0.047538995974292175,0.03568739809152346,1 +CON(C(=O)Nc1ccc(cc1)Br)C,0.048243951057630914,0.0387394680026393,1 +CN(C(=S)SSC(=S)N(C)C)C,0.04887018780459644,0.051165604885929104,0.16666666666666666 +Cc1cccc(c1O)C,0.04911414454620167,0.2990053132281964,1 +COC(=O)Nc1nc2c([nH]1)cc(cc2)Sc1ccccc1,0.050108966959550236,0.2040829902451164,1 +C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.05047450068604942,0.026552818063037695,1 +CCSC(CC1CC(=O)C(C(=O)C1)C(=NOCC)CCC)C,0.05056765552287047,0.2576640422648569,0.21875 +CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.051618595485714625,0.00947421966200617,1 +Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.05165383561566402,0.06031770103514649,1 +CNC(=O)Oc1cc(C)c(c(c1)C)C,0.05174850433885335,0.08220731490776746,1 +Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.051834835094095484,0.051179522110690205,1 +CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.0524579222415799,0.00947421966200617,1 +O=N(=O)c1ccc(c(c1)N)C,0.05257947683683445,0.2130456994423962,1 +O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.05279126047017867,0.08848370769665356,1 +NC(=NCCCCCCCCNCCCCCCCCN=C(N)N)N,0.053436074592710235,0.04339969605964679,1 +COCN(c1c(CC)cccc1CC)C(=O)CCl,0.05375006811431045,0.3416422958308449,1 +OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.05398319600278186,0.04775054042433673,1 +C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.0542125521232289,0.0882738484824748,1 +CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.05522147585284508,0.3448543217147861,1 +O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.05566064749641608,0.1418188285865031,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.05566320606558952,0.032070039079003125,1 +CCOC(=O)COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.05583516191627437,0.08304633486416381,1 +N#CC(c1c(Cl)ccc(c1Cl)n1ncc(=O)[nH]c1=O)c1ccc(cc1)Cl,0.056422615793681234,0.04201701712817465,1 +CNC(=O)Oc1cccc(c1)N=CN(C)C,0.056495719658295813,0.08543967286780094,1 +CCOC(=O)C(c1ccc(cc1)Cl)(c1ccc(cc1)Cl)O,0.056582904287311254,0.0760610772949622,1 +Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.05706818876652619,0.06524247107291589,1 +CN(C(CN1c2ccccc2Sc2c1cccc2)C)C,0.058364575374860554,0.07462370481313495,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.05953797389131243,0.02239458456128216,1 +CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.06073132568962639,0.04246445180870021,1 +CNC(=O)ON=C(SC)C,0.061648442359631114,0.02965108174786982,1 +CNc1cnn(c(=O)c1Cl)c1cccc(c1)C(F)(F)F,0.06174515112035177,0.030447515039680465,1 +CCNc1nc(SC)nc(n1)NC(C)(C)C,0.06214876624755196,0.0764957405369865,1 +CN(C(=S)SSC(=S)N(C)C)C,0.06238747379310184,0.051165604885929104,0.16666666666666666 +[O-][N+](=O)c1cc(cc(c1)[N+](=O)[O-])[N+](=O)[O-],0.06245761469536169,0.23539870476616265,1 +COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.06302765174348351,0.06346841632986405,1 +ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.06389160712181856,0.048329611043453576,1 +CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.06423944765895072,0.9871399288405841,1 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccsc1C(=O)OC,0.06453419527613821,0.20114055010294407,1 +O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.06459882942614491,0.054230294082185056,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06559798797851273,0.040528628452314384,1 +CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.06569530810416269,0.0400926736559006,1 +Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.06597478470118634,0.06738207410701946,1 +[O-][N+](=O)NC1=NCCN1Cc1ccc(nc1)Cl,0.0664943030028045,0.004330753128362828,0.13043478260869565 +O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06822190749765324,0.04160579288164088,1 +CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.0690593023384914,0.12059148862626623,1 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.06987675250196507,0.18894976588375237,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.07154653735936956,0.039897050062012586,1 +CCN1CCN(CC1)c1cc2c(cc1F)c(=O)c(cn2C1CC1)C(=O)O,0.07234386441112595,0.37000179744555806,1 +Nc1ccc(cc1)Cl,0.07250833532306657,0.17342070934363113,1 +CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.07305234130123987,0.0459174084353172,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.07306609422899836,0.0639912743902412,1 +OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.07409262028018154,0.1920790850026332,1 +CCCSc1ccc2c(c1)[nH]c(n2)NC(=O)OC,0.07537743365466734,0.30320218104074037,1 +Cn1cc(c2cccc(c2)C(F)(F)F)c(=O)c(c1)c1ccccc1,0.07591497971688389,0.10916794690519636,1 +Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.0760257762657501,0.05887962606246405,1 +CCSC(=O)N1CCCCCC1,0.07907000434271044,0.1420990237195984,1 +CC(c1cc(ccc1O)C(c1ccc(c(c1)C(C)C)O)(C)C)C,0.08001387248515598,0.5422932490073515,1 +C=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.08101639130242413,0.1649358203743006,1 +ClCCP(=O)(O)O,0.08304843107672291,2.424380344082731,1 +COC(=O)Nc1cccc(c1)OC(=O)Nc1cccc(c1)C,0.0832475217878744,0.19354884508408218,1 +CCCN(c1c(cc(c(c1[N+](=O)[O-])N)C(F)(F)F)[N+](=O)[O-])CCC,0.08392957349588569,0.18727220561469346,1 +OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08452667530010859,0.07205142115160633,1 +O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08510674803234901,0.06511854133132516,1 +CCCC(=C1C(=O)CC(CC1=O)C1CCCSC1)NOCC,0.08603044408485085,0.009979530982780172,1 +CC(=O)Nc1cc(NS(=O)(=O)C(F)(F)F)c(cc1C)C,0.08894826507859208,1.144357215930057,1 +N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.08906885283592852,0.06989004489788962,1 +COCC(=O)Nc1cc(ccc1NC(=NC(=O)OC)NC(=O)OC)Sc1ccccc1,0.08959030532555236,0.15048437937513895,1 +O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.08969617860069455,0.06563526849047481,1 +Nc1nc(NC2CC2)nc(n1)N,0.09026150563412319,0.1456955513263534,1 +ClC(C(c1ccc(cc1)Cl)c1ccc(cc1)Cl)(Cl)Cl,0.09027148189044054,0.04785917219329116,1 +Fc1ccc(cc1)C(=O)CCCN1CCN(CC1)c1ccccn1,0.09163218547527233,0.3441994467725427,1 +CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.09170952329114665,0.13723522769182628,1 +COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.09345959256991566,0.3200955745826286,1 +Clc1cc(Cl)cc(c1)C1(CO1)CC(Cl)(Cl)Cl,0.09362507489225783,0.049819983746996574,1 +Cn1cc(c2cccc(c2)C(F)(F)F)c(=O)c(c1)c1ccccc1,0.09868947363194906,0.11320935441784255,1 +NC(=N)NCCCCCCCCCCCCOC(=O)C,0.10160268068512719,0.713383542089078,1 +OC1CC2(O)CC(O)C(C(O2)(C)CC(C=CC=CC=CC=CCC(OC(=O)C=CC2C(C1)(C)O2)C)OC1(C)OC(C)C(C(C1O)N)O)C(=O)O,0.10172294366080416,0.2884908443749386,0.12658227848101267 +[O-][N+](=O)c1cnc(n1C)C,0.10628650675790867,0.25583359344399015,1 +CC(N(c1c(cc(cc1N(=O)=O)S(=O)(=O)N)N(=O)=O)C(C)C)C,0.10642121227099519,0.8110931516114431,1 +CCOC(=O)C(OC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F)C,0.10827828411229923,0.060150877044639596,1 +CCOC(=O)C(OC(=O)c1cc(ccc1N(=O)=O)Oc1cc(ccc1Cl)C(F)(F)F)C,0.10827828411229923,0.06391354072273552,1 +ClCC(=O)N(c1ccccc1)C(C)C,0.10865048725491992,0.14063355647533457,1 +CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.11058877880543937,0.055855601003025876,1 +COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045196043953,0.6030561295372162,1 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.11395676083924232,0.07874258062509669,1 +Oc1ccc(c(c1)C)C,0.1145996706078039,0.14058064499529827,1 +N#Cc1c(N)nc(nc1N)NC1CC1,0.11566455596376966,0.0671588260211166,1 +CCNc1nc(NC(C)C)nc(n1)Cl,0.11591071091933607,0.07264693165285359,1 +CCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])Cc1c(F)cccc1Cl,0.1185590456888386,0.10968410897370563,1 +Nc1ccc(cc1)S(=O)(=O)Nc1nc(C)cc(n1)C,0.1185642260256668,0.3488361590116861,1 +N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.11875847044790469,0.06325306833995115,1 +CC(N(c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O)C(C)C)C,0.1193036069506878,0.07647943573166908,1 +COc1cc(ccc1OC)C(=CC(=O)N1CCOCC1)c1ccc(cc1)Cl,0.11937399144446861,0.15938750502122703,1 +CCCCc1c(=O)nc([nH]c1C)NCC,0.1194525860672606,0.13601548650468212,1 +CNC(=O)ON=C(SC)C,0.12329688471926223,0.019043488181468577,1 +CN(C(=O)C(c1ccccc1)c1ccccc1)C,0.1253592168358431,0.16058478564428405,1 +O=C(C1=C(C)OCCS1)Nc1ccccc1,0.1274956638724717,0.34245069872781153,1 +CC(N(c1c(cc(cc1N(=O)=O)S(=O)(=O)N)N(=O)=O)C(C)C)C,0.12992280391195832,0.8110931516114431,1 +CCCN(C(=O)SCC)CCC,0.13205276659525605,0.06906850635347721,1 +C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.13459866849613178,0.024048983960135852,1 +ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.1348810665963127,0.35545219964034264,1 +OC(C(C)(C)C)C(=Cc1ccc(cc1)Cl)n1ncnc1,0.13506940531624406,0.09325300787311752,1 +CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.13618183361575933,0.8007493146491557,1 +O=C(Nc1cnns1)Nc1ccccc1,0.13620822278144273,0.08031296188431028,1 +ClC=C(c1cc(Cl)c(cc1Cl)Cl)OP(=O)(OC)OC,0.1366262742927664,0.01695886379908282,1 +CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.13932359364492994,0.07765239200013897,1 +CN1CC2CC1CN2c1cc2c(cc1F)c(=O)c(cn2C1CC1)C(=O)O,0.13990757146198934,0.2090948708768445,0.5862068965517241 +OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.1403669879303106,0.04466802484258431,1 +COC(=O)C(N(c1c(C)cccc1C)C(=O)Cc1ccccc1)C,0.14136381415796706,0.23276538536699015,1 +ClC(=C)Cl,0.14441434207714035,0.010177007878307786,0.1 +CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.14566407168203882,0.5073265138326565,1 +CON=C(c1ccccc1CON=C(c1cccc(c1)C(F)(F)F)C)C(=O)OC,0.14692519722320194,0.24556842297038736,1 +c1ccc(cc1)Nc1ccccc1,0.14773454395291782,0.48409780026481714,1 +COC(CCCC(CC=CC(=CC(=O)OC(C)C)C)C)(C)C,0.14816176662421726,1.0038894881647953,1 +c1scc(n1)c1nc2c([nH]1)cccc2,0.1490700414533971,0.06053075972454769,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.15013314047110002,0.017720839447182994,1 +CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.1513509494941276,0.025399859207357336,1 +CON=C(c1ccc(cc1Cl)Cl)Cc1cccnc1,0.15245767876475944,0.06053667496108435,1 +CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,0.15252975563710267,1.224432087935241,1 +Clc1ccc(c(c1)Cl)C=C(C(C(C)(C)C)O)n1cncn1,0.15327033840680634,0.10122954702097303,1 +COC=C(c1ccccc1Oc1ncnc(c1)Oc1ccccc1C#N)C(=O)OC,0.15431812608561873,0.38732816429606964,1 +COP(=S)(Oc1cc(Cl)c(cc1Cl)Cl)OC,0.15549919159080278,0.016427144246218724,1 +CCCCCCCCc1cc(N(=O)=O)c(c(c1)N(=O)=O)OC(=O)C=CC,0.15642453685164498,0.7353871066554781,1 +Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.15801925526767843,0.14258833778495594,1 +CCOC(=O)CN(c1c(CC)cccc1CC)C(=O)CCl,0.1603572605822803,0.18868594261143684,1 +Cc1cccc2c1n1cnnc1s2,0.16381576159162972,0.6851471667728956,1 +CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.16647322477947293,0.5248637450337764,1 +ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.16860133324539087,0.35545219964034264,1 +CNc1cnn(c(=O)c1Cl)c1cccc(c1)C(F)(F)F,0.1687700797289615,0.031597288914811124,1 +CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.16929970598735858,0.1270677771191105,1 +Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.1730416993562668,0.022002424130460663,1 +COC(=O)c1ccc(cc1C1=NC(C(=O)N1)(C)C(C)C)C,0.1734054330003024,0.27706226280243773,1 +CNC(=O)N(c1nnc(s1)C(C)(C)C)C,0.1751969016077557,0.19506513302817866,0.4 +CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.17813968959673715,0.18686850946296205,1 +COCC(=O)N(c1c(C)cccc1C)N1CCOC1=O,0.17965983350851364,0.11878355690291638,1 +c1ccc(cc1)Nc1ccccc1,0.1831908345016181,0.2809891238813933,1 +CN1CN(C)CSC1=S,0.18486987933542975,0.040377923983948856,0.16666666666666666 +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.18534506246313948,0.07277588371973029,1 +O=N(=O)c1ccc(c(c1)N(=O)=O)C,0.1866762157041476,0.10265714109290516,1 +CNC(=O)Oc1cccc2c1cccc2,0.1878529324240324,0.13059631278321485,1 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.19051986050321804,0.142337016902528,1 +COP(=O)(NC(=O)C)SC,0.1910836440808347,0.01603420284847195,1 +OC1CN(C(=O)N1c1nnc(s1)C(C)(C)C)C,0.19506513302817866,0.17519690160775567,0.4 +OC(=O)C(Cl)(Cl)C,0.1970361896096669,0.42676977936996974,0.125 +c1scc(n1)c1nc2c([nH]1)cccc2,0.19876005527119617,0.06053075972454769,1 +Nc1ccc(c(c1)N)O,0.2013846888993215,0.6976394362438932,1 +O=C(NS(=O)(=O)c1ccccc1C(=O)OC1COC1)Nc1nc(C)cc(n1)C,0.20422574060250331,0.3749650612729717,1 +ClCC(=O)N(c1c(CC)cccc1CC)CNC(=O)C,0.21058487877925733,0.3814418917881575,1 +CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.21766590408142725,0.05784177240937466,1 +CC(c1ccc(cc1)O)(c1ccc(cc1)O)C,0.21902317939829427,0.7237798298552831,1 +COCC(=O)N(c1c(C)cccc1C)C(C(=O)OC)C,0.22374845318219344,0.2149499838579308,1 +Nc1ccc2c(c1)nc1c(c2)ccc(c1)N,0.22461542255370148,0.6782252254224121,1 +CCSC(CC1CC(=O)C(=C(NOCC=CCl)CC)C(=O)C1)C,0.2389478027971563,0.06436113994662118,0.21875 +CNC(=O)Oc1ccccc1OC(C)C,0.23895810443138246,0.05797512153929558,1 +CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.23935747721355113,0.1194525860672606,0.14814814814814814 +C=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.24800936112986982,0.19013970584234022,1 +OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.24848916516834604,0.21532428756816943,1 +C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.25479642918707424,0.10104185355147247,1 +CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.2690918752347788,0.10495954737172976,1 +ClC=C(c1cc(Cl)c(cc1Cl)Cl)OP(=O)(OC)OC,0.2732525485855328,0.015157957101805187,1 +CCSC(CC1CC(=O)C(=C(NOCC=CCl)CC)C(=O)C1)C,0.27784628232227476,0.06436113994662118,0.21875 +CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.2805209905967611,0.055344746560950404,1 +C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.2853292217012047,0.07713406742523517,1 +CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.29547465787728056,0.3350153436602428,1 +COC(=O)c1ccccc1S(=O)(=O)NC(=O)Nc1nc(OC(F)F)cc(n1)OC(F)F,0.2989300503468667,0.4105408249862013,1 +CCOC(=O)c1ccccc1S(=O)(=O)NC(=O)Nc1nc(Cl)cc(n1)OC,0.30133493788161053,0.3808470469587571,1 +CNC(=O)Oc1cc(C)cc(c1C)C,0.30635114568601185,0.0976602822721586,1 +C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.316253365684832,0.19212808361152056,1 +OC(=O)CCl,0.317470328693963,0.1630859508654308,1 +ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.3326798171006209,0.07976114599708196,1 +CN(C1C(=O)C(=C(O)N)C(=O)C2(C1CC1C(=C(O)c3c(C1(C)O)cccc3O)C2=O)O)C,0.33750750616693714,8.335917849904947,0.6216216216216216 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)O)[N+](=O)[O-])C(F)(F)F,0.34563108073944815,0.08634724402138849,1 +CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.3484961885063573,0.10641333146048552,1 +OC(=O)C(Cl)(Cl)C,0.3497269961122948,0.42676977936996974,0.125 +Fc1ccc(cc1)C(=O)CCCN1CCN(CC1)c1ccccn1,0.35125671098854394,0.07554116271775944,1 +OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.3550120362604561,0.05298039495100552,1 +N=C(NC(=N)N)NCCc1ccccc1,0.35564719019232227,0.4293116814818228,1 +COc1ccc(cc1)C(C(Cl)(Cl)Cl)c1ccc(cc1)OC,0.36163948246786254,0.12900055509999012,1 +CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.36852210915226874,0.2666090555504209,1 +CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.3693416417277341,0.1741052713034425,1 +O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.3880867710275115,0.06530531261415101,1 +COC(=O)Nc1nc2c([nH]1)cccc2,0.3922867840256219,0.08503054904294756,1 +CCCCNC(=O)n1c(NC(=O)OC)nc2c1cccc2,0.3961177430023906,0.13425870370019663,1 +Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.423248605734443,0.12324532538914809,1 +NCCNc1cccc2c1cccc2,0.4241543329029509,0.23852536609869093,1 +CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.42802021191337764,0.06507172681898275,1 +CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.4356352632556343,0.18762505282704417,1 +N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.443217671652664,0.1582577446590667,1 +CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.4534134152107278,0.04044915666609474,1 +CCSC(=O)N(CC(C)C)CC(C)C,0.4600420791288938,0.03228951683176119,1 +Cc1cc(N)c(cc1C)C,0.46595489467866197,0.06597029300388296,1 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)[O-])[N+](=O)[O-])C(F)(F)F.[Na+],0.46919094173712006,0.06314263015489453,1 +Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.47403843842257615,0.12305192134429824,1 +CN1N(C)C(CC1c1ccccc1)c1ccccc1,0.49533572071941767,0.15296953131716254,1 +OC(=O)C(Oc1cccc(c1)Cl)C,0.4984573741185779,0.031571537397910326,1 +COC(=O)C(NC(=O)C(CC(=O)O)N)Cc1ccccc1,0.4994850207500349,0.3915748574119684,1 +CCCCC(COC(=O)c1ccccc1C(=O)OCC(CCCC)CC)CC,0.5120902983161549,0.5490084227192451,1 +COc1c(Cl)ccc(c1C(=O)O)Cl,0.520273850439093,0.27032818788451163,1 +COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.5285529966699751,0.09022939400978072,1 +O=CCC1CC(C)C(=O)C=CC(=CC(C(OC(=O)CC(C(C1OC1(C)OC(C)C(C(C1O)N(C)C)OC1(C)OC(C)C(C(C1)(C)O)O)C)O)CC)COC1OC(C)C(C(C1OC)OC)O)C,0.5295750507618869,0.32043182213812477,1 +COC(=O)C1(O)c2cc(Cl)ccc2c2c1cccc2,0.546052144921948,0.05128735297771426,1 +CC(C12CCC(O2)(C(C1)OCc1ccccc1C)C)C,0.5466515334085721,0.07979167550031513,1 +Oc1ccc2c(c1N=Nc1ccccc1)ccc(c2)S(=O)(=O)O,0.5482080783455129,2.226070226693587,1 +ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.5494924735209582,0.14061208442890083,1 +Nc1ccc(c(c1)N(=O)=O)N,0.5681125108300529,0.407842963979915,1 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCCl,0.5690227874227859,0.10229430897526087,1 +NC1CCCCC1,0.5898716318329822,0.27111140054462934,1 +COc1cc(Cl)c(cc1Cl)OC,0.6037074787089276,0.14551736437093893,1 +NC1CCCCC1,0.6049965454697254,0.27111140054462934,1 +OC(=O)C1C2CCC(C1C(=O)O)O2,0.6177415369409439,0.3762966519177018,1 +CN(C(=O)Nc1ccc(cc1)Cl)C,0.6292491939569526,0.05314085450837755,1 +COC(=O)c1ccc(cc1)C(=O)OC,0.6437193589585136,0.6359402307400387,1 +Clc1ccc(cc1)S(=O)(=O)c1cc(Cl)c(cc1Cl)Cl,0.6459733503975151,0.039780209619230206,1 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.655542030995076,0.1510565040147731,1 +CCCCOCC(OCC(O)C)C,0.6726932978936081,0.7345230884100764,1 +CC1OC(C)OC(C1)OC(=O)C,0.7175892491582392,0.1016026806851272,0.14285714285714285 +CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.7386866446932013,0.43725287577182737,1 +COc1nc(nc(c1)OC)NC(=O)NS(=O)(=O)Cc1ccccc1C(=O)OC,0.7529208210920754,0.43020251605384513,1 +O=C(C1C(C1(C)C)C=C(C)C)OCN1C(=O)C2=C(C1=O)CCCC2,0.7543614918373561,0.11072031885625554,1 +COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.7817895162025876,0.3433567591603662,1 +Nc1ccc(c(c1)C)NOS(=O)(=O)O,0.8431459792705229,0.402820671344419,1 +CCOC(=O)C1OC1(C)c1ccccc1,0.8485352051922984,0.1653576500580577,1 +CCCCNC(=O)n1c(NC(=O)OC)nc2c1cccc2,0.8611255282660666,0.1563339051142994,1 +OCCn1c(C)ncc1[N+](=O)[O-],0.8764039114257128,0.05770780251062409,1 +COP(=O)OC,0.9086866261501474,0.11612790576142619,1 +[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.9419645496713847,0.21199123476247797,1 +OCCNc1ccc(cc1OCCO)N(=O)=O,0.9453881078267568,0.9356572196349415,1 +O=N(=O)c1cccc2c1cccc2,0.952831491808421,0.1614476326965743,1 +O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.9662594125910484,0.4457496787338429,1 +Oc1cccc2c1nccc2,0.9851335765350275,0.721989181638648,1 +CCCOC(=O)c1ccc(cn1)C(=O)OCCC,0.9949124950582696,0.8759044368876665,1 +CC[N](=C1C=CC(=C(c2ccc(cc2)N(Cc2cccc(c2)S(=O)(=O)O)CC)c2ccc(cc2)N(C)C)C=C1)Cc1cccc(c1)S(=O)(=O)O,1.009963174498295,0.18540142003081284,1 +ClCCP(=O)(O)O,1.0381053884590363,2.9662351498622144,1 +ClCC[N+](C)(C)C,1.0602168942789227,1.7717264844452583,1 +Clc1ccccc1,1.0661274430976688,0.09347672424517633,1 +CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,1.0897268363577188,0.021360738953880024,1 +O=C1CCCCCN1,1.10465364954589,0.11720225163340323,1 +COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,1.1154252951100516,0.2509122725796672,1 +COC(=O)C(=CC=CC(=CC=CC=C(C=CC=C(C=CC1=C(C)CCCC1(C)C)C)C)C)C,1.119409718240544,0.01608733804096794,0.12121212121212122 +Oc1ccc(cc1Cl)C(C)(C)C,1.1697007223226876,0.11220135445041438,1 +CON=C(c1ccccc1COc1ccccc1C)C(=O)OC,1.1807966969350603,0.15177727689808973,1 +CON=C(c1ccccc1COc1ccccc1C)C(=O)OC,1.1967534090558043,0.16001530012602447,1 +OCc1cc(N=Nc2ccc(c3c2cccc3)S(=O)(=O)O)c(c(c1O)N=Nc1ccc(c2c1cccc2)S(=O)(=O)O)O,1.2093346835379808,1.319500758990477,1 +CC1=CC(=O)CC(C1)(C)C,1.295160023171064,0.11315631785675133,1 +C[N]1(C)CCCCC1,1.3133857473480115,1.7551096434605418,1 +OC1CCC2(C(C1)CCC1C2CCC2(C1CCC2C(CCC(=O)O)C)C)C,1.3277652171188237,1.387743406118009,1 +Oc1ccc(c(c1)C(C)(C)C)O,1.3536524792656537,0.22216113939448204,1 +OCC1OC2OC3C(CO)OC(C(C3O)O)OC3C(CO)OC(C(C3O)O)OC3C(CO)OC(C(C3O)O)OC3C(OC(OC4C(OC(OC5C(OC(OC1C(C2O)O)C(O)C5O)CO)C(O)C4O)CO)C(O)C3O)CO,1.4097112541302337,20.11975049013973,1 +CCCCOC(=O)c1ccccc1C(=O)OCc1ccccc1,1.504675539130048,0.37909491054043404,1 +COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,1.5061863289853148,0.7284064393720566,1 +Fc1cc2CCC(n3c2c(c1)c(=O)c(c3)C(=O)O)C,1.531109972815908,0.0995408783900811,0.3333333333333333 +CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,1.5465050300849357,0.10755408570863038,1 +c1ccc(cc1)c1ccccc1,1.6211890708511503,0.26000708215571994,1 +NCC(c1ccc(cc1)O)O,1.6320834707547616,0.6023190083757878,1 +ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,1.6860133324539086,0.35545219964034264,1 +ClCC#CCOC(=O)Nc1cccc(c1)Cl,1.743505808935165,0.17719995498834726,1 +OC(=O)CNCP(=O)(O)O,1.7743806406081915,0.700841565636653,0.16666666666666666 +COc1ccc(c(c1)OC)N,1.8018201517132568,0.4406371111426593,1 +CC(C1(C)N=C(NC1=O)c1ncccc1C(=O)O)C,1.913681483026602,1.0116079695709865,1 +OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,1.9605490478397496,0.14062735149788871,1 +C=Cc1ccccc1,2.021140457067712,0.5581719843674564,1 +Clc1ccc(cc1)Cl,2.0407891160090657,0.058396786076860684,1 +CCCCOC(=O)c1ccccc1C(=O)OCCCC,2.1556100397968727,0.4824557089800792,1 +c1ccc(cc1)c1ccccc1OCC1CO1,2.209744922072461,0.47297048589951723,1 +ClCC[N](C)(C)C,2.2427665071284903,1.1121371793371502,1 +CC=Cc1ccc(cc1)OC,2.3211612715861247,1.2223687229695321,1 +CC(OC(=O)Nc1cccc(c1)Cl)C,2.340158076742021,0.07885333594015259,1 +COC(=O)c1ccccc1O,2.366127776683809,0.6131172056131932,1 +CCOC(=O)C=C,2.477130986890983,0.160637047130486,1 +C=Cc1ccccc1,2.736460951374337,0.48345391227477846,1 +CCc1ccccc1,2.741016342485753,0.12025839466555979,1 +CC(c1ccccc1)C,2.7539366734341955,0.12179927831297671,1 +CC(=C)C(=O)O,2.8807316686731115,9.313172081918692,0.14285714285714285 +CC(N(c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O)C(C)C)C,2.982590173767195,0.10106093503191066,1 +ClCCP(=O)(O)O,3.0866333550182015,2.424380344082731,1 +Clc1cnc2c(c1)ccc(c2C(=O)O)Cl,3.127347059508829,0.11793510219271723,1 +CCCOC(=O)NCCCN(C)C,3.611885866531256,0.24217162746131807,1 +CCOP(=O)O,3.6347465046005896,0.003842329850641841,1 +Oc1ccccc1,3.655248831064175,0.5726822872379717,1 +CC1CCC(C(C1)O)C(C)C,3.7948308388559964,7.011629148921578,1 +CCc1ccccc1,3.843074459567654,0.11981018705088699,1 +CC(c1ccccc1)C,3.8438632722857955,0.1197379646857681,1 +COc1ccc(cc1)N,3.8488877932280037,0.36732614842795186,1 +OCCO,4.027850816139244,8.499181435776759,1 +CCCCC(COC(=O)CCCCC(=O)OCC(CCCC)CC)CC,4.047856676081442,0.3479573670205783,1 +CCCOC(=O)c1cc(O)c(c(c1)O)O,4.071644352421931,0.7702810499269743,1 +CC(CCCC1(C)CCc2c(O1)c(C)c(c(c2C)OC(=O)C)C)CCCC(CCCC(C)C)C,4.230630449818821,0.43643913496013176,1 +COc1ccc(cc1N=Nc1c(O)c(cc2c1cccc2)C(=O)Nc1cccc(c1)N(=O)=O)N(=O)=O,4.308389780762046,0.411182841264013,1 +S=c1sc2c([nH]1)cccc2,4.484270077422418,0.10787015001421449,1 +CC(OC(=O)Nc1cccc(c1)Cl)C,4.680316153484042,0.08345354088958273,1 +Oc1ccccc1c1ccccc1,5.875192118782284,0.2214370368151079,1 +OC(=O)CNCP(=O)(O)O,5.914602135360638,0.700841565636653,0.16666666666666666 +CCOc1ccc(cc1N)NC(=O)C,6.1010029534002825,0.3233024548760301,1 +Nc1ccc(cc1)O,6.286318149278613,0.612495359780696,1 +NC(=S)NNC(=S)N,6.303842268414009,0.004069924087402051,0.14285714285714285 +NC(=O)c1cnccn1,6.408762052980724,0.027758250773633534,0.10526315789473684 +OC(=O)c1ccc(cc1N)N(=O)=O,6.506215164982792,0.5563741830253752,1 +Oc1cc(O)c2c(c1)oc(c(c2=O)O)c1ccc(c(c1)O)O,6.729846937340625,0.6908804048917031,1 +ClCC(=O)c1ccc(cc1)NC(=O)C,7.465334624174738,0.20790177229212267,1 +O=C1OC(=O)c2c1cccc2,8.000509872156579,0.6713795852396433,1 +CCCOC(=O)c1ccc(cc1)O,8.324062177858794,0.6396720070179559,1 +OCC(C1OC(=O)C(=C1O)O)O,8.82332300652517,1.4097112541302337,0.17647058823529413 +CCOC(=O)COC(=O)c1ccccc1C(=O)OCC,8.919866912731305,0.21304859946913612,1 +O=C1CCCCC1,9.272184465524795,1.272174495388276,1 +OC(=O)C=CC(=O)O,9.313172081918696,2.8807316686731115,0.14285714285714285 +COC(=O)c1ccc(cc1)O,9.858865736182537,0.46643220721186857,1 +COC(=O)c1ccccc1C(=O)OC,10.299509743336218,0.5164877342007698,1 +OC1C2C(N(C)C)C(=O)C(=C(O)N)C(=O)C2(O)C(=O)C2=C(O)c3c(C(C12)(C)O)c(Cl)ccc3O,10.50761860949369,0.3375075061669371,0.6216216216216216 +OCCO,10.633526154607605,18.100227209506496,0.2 +OCCO,16.111403264556976,12.098739817238384,0.2 +CCCCCCCCCCCCCCCCCC(=O)OCC(C1OCC(C1O)O)O,16.727105323218392,0.5487265062934492,1 +OCC(C1OC(=O)C(=C1O)O)O,17.323010613197102,13.375187084226921,0.23076923076923078 +[O-]S(=O)(=O)NC1CCCCC1.[Na+],17.900880706433757,0.1311146954310952,1 +O=C1NS(=O)(=O)c2c1cccc2,19.66323569952698,0.30457294474070207,1 +CCCCCCCCCCCC(=O)OCC(C1OCC(C1O)O)O,19.866710908558982,0.4802563354164847,1 +CCOC(=O)c1ccccc1C(=O)OCC,19.95615854702247,0.2835725804367756,1 +OC(=O)c1ccccc1N,20.060380944519448,0.6470696879448438,1 +OCCO,32.22280652911395,8.499181435776759,1 +OCC(CO)O,74.73899985905678,5.776891986788403,1 diff --git a/paper/data/mazzatorta-cv.id b/paper/data/mazzatorta-cv.id new file mode 100644 index 0000000..516d987 --- /dev/null +++ b/paper/data/mazzatorta-cv.id @@ -0,0 +1 @@ +56c42aab2b72ed10be000001 diff --git a/paper/data/mazzatorta-test-predictions.csv b/paper/data/mazzatorta-test-predictions.csv index 6c61db5..e508e33 100644 --- a/paper/data/mazzatorta-test-predictions.csv +++ b/paper/data/mazzatorta-test-predictions.csv @@ -1,149 +1,149 @@ -SMILES,LOAEL,Confidence,Dataset -O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.0003638692469573398,1,mazzatorta-prediction -CCSCSP(=S)(OCC)OCC,0.0016300938762789745,1,mazzatorta-prediction -CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.001847030797857755,1,mazzatorta-prediction -CCOP(=S)(SCSC(C)(C)C)OCC,0.0031106867605998826,1,mazzatorta-prediction -CCCSP(=O)(SCCC)OCC,0.0061411247977180205,1,mazzatorta-prediction -CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.007526804342298479,1,mazzatorta-prediction -CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.008291760373156038,1,mazzatorta-prediction -CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.00947421966200617,1,mazzatorta-prediction -CSc1ccc(cc1C)OP(=S)(OC)OC,0.00998805136771544,1,mazzatorta-prediction -CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.010714479147398627,1,mazzatorta-prediction -CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.010980300528105117,1,mazzatorta-prediction -COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.011073447351926287,1,mazzatorta-prediction -CCOP(=O)(SC(CC)C)SC(CC)C,0.011316358861878211,1,mazzatorta-prediction -CNC(=O)CSP(=S)(OC)OC,0.011977939066676562,1,mazzatorta-prediction -CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.01285055734967491,1,mazzatorta-prediction -COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.015728201435628045,1,mazzatorta-prediction -COP(=O)(NC(=O)C)SC,0.01603420284847195,1,mazzatorta-prediction -OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.01643167623693211,1,mazzatorta-prediction -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.017186299494700404,1,mazzatorta-prediction -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.017235945805185275,1,mazzatorta-prediction -CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.017635466228081265,1,mazzatorta-prediction -CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.018612600873944375,1,mazzatorta-prediction -CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.019680297281264553,1,mazzatorta-prediction -CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.02067135597373707,1,mazzatorta-prediction -OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.020745537156134756,1,mazzatorta-prediction -COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.021687531960393556,1,mazzatorta-prediction -CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.0217719179484974,1,mazzatorta-prediction -N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.02241011099945114,1,mazzatorta-prediction -CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.022838553346053704,1,mazzatorta-prediction -Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.024541035827570765,1,mazzatorta-prediction -C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.024853325579571102,1,mazzatorta-prediction -CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.025399859207357323,1,mazzatorta-prediction -COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.02586178816777326,1,mazzatorta-prediction -COP(=O)(OC=C(Cl)Cl)OC,0.027361947682508048,1,mazzatorta-prediction -CNC(=O)ON=C(C(=O)N(C)C)SC,0.02790918990194414,1,mazzatorta-prediction -N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.03032030255266643,1,mazzatorta-prediction -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.03405677944151583,1,mazzatorta-prediction -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.03520125762569351,1,mazzatorta-prediction -COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.0369855483661329,1,mazzatorta-prediction -CON(C(=O)Nc1ccc(cc1)Br)C,0.0387394680026393,1,mazzatorta-prediction -CN1CN(C)CSC1=S,0.040377923983948856,0.16666666666666666,mazzatorta-prediction -CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.040449156666094756,1,mazzatorta-prediction -O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.040528628452314384,1,mazzatorta-prediction -OC(=O)COc1ccc(cc1Cl)Cl,0.041293632648700326,1,mazzatorta-prediction -CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.04261363346757391,1,mazzatorta-prediction -CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.04291166973357382,1,mazzatorta-prediction -CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.0434822264129367,1,mazzatorta-prediction -OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.04466802484258436,1,mazzatorta-prediction -OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.04857785280417766,1,mazzatorta-prediction -CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.050018494066167395,1,mazzatorta-prediction -CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.050114646105433334,1,mazzatorta-prediction -CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.05083350716627098,1,mazzatorta-prediction -CN(C(=S)SSC(=S)N(C)C)C,0.051165604885929104,0.16666666666666666,mazzatorta-prediction -O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.052337960737326904,1,mazzatorta-prediction -Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.052851131392490244,1,mazzatorta-prediction -CCNc1nc(NC(C)C)nc(n1)Cl,0.053039565463993625,1,mazzatorta-prediction -CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.05314229228135397,1,mazzatorta-prediction -COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.0535200267253048,1,mazzatorta-prediction -BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.053693231227279335,1,mazzatorta-prediction -Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.0538394393290607,1,mazzatorta-prediction -OC(=O)COc1ccc(cc1C)Cl,0.05445622621994639,1,mazzatorta-prediction -Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.05584570953801489,1,mazzatorta-prediction -CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.056357793591707304,1,mazzatorta-prediction -ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.05722075950509786,1,mazzatorta-prediction -N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.058271163381068226,1,mazzatorta-prediction -Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.059370957979920064,1,mazzatorta-prediction -CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.059446571641332435,1,mazzatorta-prediction -Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.06017129137104992,1,mazzatorta-prediction -c1scc(n1)c1nc2c([nH]1)cccc2,0.06053075972454769,1,mazzatorta-prediction -Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.06221038764609867,1,mazzatorta-prediction -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.06365856284683262,1,mazzatorta-prediction -CNC(=O)Oc1ccccc1OC(C)C,0.06370353086320016,1,mazzatorta-prediction -O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.06511854133132516,1,mazzatorta-prediction -CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.06648016528067341,1,mazzatorta-prediction -CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.06733522342267834,1,mazzatorta-prediction -N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.0678327800715719,1,mazzatorta-prediction -COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.06797949317882576,1,mazzatorta-prediction -O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.06808330607768283,1,mazzatorta-prediction -Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.06951066613764369,1,mazzatorta-prediction -N#Cc1c(Cl)cccc1Cl,0.07075139304586898,1,mazzatorta-prediction -OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.07314761133650725,1,mazzatorta-prediction -Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.07458280632191289,1,mazzatorta-prediction -CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.07509600041356945,1,mazzatorta-prediction -ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.07976114599708196,1,mazzatorta-prediction -CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.08041755256984288,1,mazzatorta-prediction -CC(OC(=O)Nc1cccc(c1)Cl)C,0.08082419839147705,1,mazzatorta-prediction -COC(=O)Nc1nc2c([nH]1)cccc2,0.08503054904294756,1,mazzatorta-prediction -O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.08848370769665356,1,mazzatorta-prediction -CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.08899189256538585,1,mazzatorta-prediction -CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.09304673991992557,1,mazzatorta-prediction -C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.09394776953418806,1,mazzatorta-prediction -CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.09995117906018544,1,mazzatorta-prediction -O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.1009416101848442,1,mazzatorta-prediction -Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.10273222601735031,1,mazzatorta-prediction -C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.10370887199340015,1,mazzatorta-prediction -CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.10985201253951346,1,mazzatorta-prediction -COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.11629727690023284,1,mazzatorta-prediction -O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.11723725716301076,1,mazzatorta-prediction -CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.1194525860672606,0.14814814814814814,mazzatorta-prediction -CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.1270677771191105,1,mazzatorta-prediction -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.12890443143014826,1,mazzatorta-prediction -COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.13042675416012312,1,mazzatorta-prediction -OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,0.13193009603279973,1,mazzatorta-prediction -ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.1364078153236936,1,mazzatorta-prediction -ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.13694971527814467,1,mazzatorta-prediction -CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.13831022672047752,1,mazzatorta-prediction -CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.13989660392944153,1,mazzatorta-prediction -O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.1401261123626703,1,mazzatorta-prediction -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.142337016902528,1,mazzatorta-prediction -Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.14373018518177136,1,mazzatorta-prediction -Nc1nc(NC2CC2)nc(n1)N,0.1456955513263534,1,mazzatorta-prediction -CNC(=O)Oc1cccc2c1cccc2,0.14826054249092455,1,mazzatorta-prediction -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.1482947332858024,1,mazzatorta-prediction -CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.1493882391935513,1,mazzatorta-prediction -N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.15445645494051075,1,mazzatorta-prediction -N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.1582577446590667,1,mazzatorta-prediction -CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.15949214928142758,1,mazzatorta-prediction -OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.16221937215111784,1,mazzatorta-prediction -CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.1642031063051573,1,mazzatorta-prediction -C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.1739298872266669,1,mazzatorta-prediction -CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.1881056272435862,1,mazzatorta-prediction -[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.19997575454195834,1,mazzatorta-prediction -CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.20212320440807907,1,mazzatorta-prediction -Oc1ccccc1c1ccccc1,0.2236833070650602,1,mazzatorta-prediction -CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.2246734303430016,1,mazzatorta-prediction -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.22642545612510342,1,mazzatorta-prediction -CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.24428343783187767,1,mazzatorta-prediction -COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.2509122725796671,1,mazzatorta-prediction -CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.25786920018026926,1,mazzatorta-prediction -O=C(C1=C(C)OCCS1)Nc1ccccc1,0.2689999596587689,1,mazzatorta-prediction -COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.26940550668026203,1,mazzatorta-prediction -OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.2719107573679272,1,mazzatorta-prediction -CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.2825970996116866,1,mazzatorta-prediction -c1ccc(cc1)Nc1ccccc1,0.3003220074311764,1,mazzatorta-prediction -COCN(c1c(CC)cccc1CC)C(=O)CCl,0.32452363754907937,1,mazzatorta-prediction -COP(=O)(SC)N,0.33442367385922134,1,mazzatorta-prediction -CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.3350153436602428,1,mazzatorta-prediction -ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.35545219964034264,1,mazzatorta-prediction -O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.4457496787338429,1,mazzatorta-prediction -COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.46002982126481345,1,mazzatorta-prediction -CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.5248637450337764,1,mazzatorta-prediction -Cc1cccc2c1n1cnnc1s2,0.5255899798851922,1,mazzatorta-prediction -OC(=O)CNCP(=O)(O)O,0.700841565636653,0.16666666666666666,mazzatorta-prediction -COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.7284064393720566,1,mazzatorta-prediction -CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.8007493146491558,1,mazzatorta-prediction -CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.9871399288405841,1,mazzatorta-prediction -CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,1.7292009012474114,1,mazzatorta-prediction -ClCCP(=O)(O)O,2.424380344082731,1,mazzatorta-prediction +SMILES,LOAEL_measured_median,LOAEL_predicted,Confidence,Dataset +N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.0001361095787305931,0.030320302552666413,1,mazzatorta-prediction +OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.0002764719490734748,0.01643167623693211,1,mazzatorta-prediction +CCOP(=S)(SCSC(C)(C)C)OCC,0.000277363084031507,0.0031106867605998826,1,mazzatorta-prediction +CCSCSP(=S)(OCC)OCC,0.0006144925543928096,0.0016300938762789745,1,mazzatorta-prediction +CCOP(=O)(SC(CC)C)SC(CC)C,0.0008728063120409454,0.011316358861878211,1,mazzatorta-prediction +CNC(=O)CSP(=S)(OC)OC,0.001090477150926923,0.011977939066676562,1,mazzatorta-prediction +COP(=O)(SC)N,0.0020548549621536454,0.33442367385922134,1,mazzatorta-prediction +CSc1ccc(cc1C)OP(=S)(OC)OC,0.0025868753585247565,0.00998805136771544,1,mazzatorta-prediction +CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.0033630532459809582,0.010714479147398627,1,mazzatorta-prediction +CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149211972577347,0.019680297281264553,1,mazzatorta-prediction +COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.004713372383210075,0.011073447351926287,1,mazzatorta-prediction +CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.0049417895576815835,0.001847030797857757,1,mazzatorta-prediction +CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005274306112287868,0.008291760373156038,1,mazzatorta-prediction +CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601648043851348,0.24428343783187767,1,mazzatorta-prediction +COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.006200913183680908,0.02586178816777326,1,mazzatorta-prediction +OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.006747899500347733,0.04857785280417766,1,mazzatorta-prediction +COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319665576013,0.053520026725304856,1,mazzatorta-prediction +Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.007548400798826121,0.05383943932906067,1,mazzatorta-prediction +COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.0076105098020530036,0.021687531960393556,1,mazzatorta-prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.007818698763639501,0.03520125762569349,1,mazzatorta-prediction +CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.008187766847509327,0.017635466228081265,1,mazzatorta-prediction +CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.008855868434313272,0.018612600873944365,1,mazzatorta-prediction +O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.009706945232441807,0.00036386924695734017,1,mazzatorta-prediction +CCCSP(=O)(SCCC)OCC,0.010068539755671456,0.0061411247977180205,1,mazzatorta-prediction +COP(=O)(OC=C(Cl)Cl)OC,0.010408382170442241,0.027361947682508048,1,mazzatorta-prediction +CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.012455788330375379,0.1493882391935513,1,mazzatorta-prediction +CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.0139433514779606,0.09304673991992557,1,mazzatorta-prediction +N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.015042627044387032,0.0678327800715719,1,mazzatorta-prediction +Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.01585325164934852,0.06017129137104992,1,mazzatorta-prediction +CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.01642869699075557,0.010980300528105117,1,mazzatorta-prediction +N#Cc1c(Cl)cccc1Cl,0.016568667498017633,0.07075139304586898,1,mazzatorta-prediction +CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.0171141884323489,0.01285055734967491,1,mazzatorta-prediction +BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185416989653705,0.053693231227279314,1,mazzatorta-prediction +CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.018038670157326797,0.050018494066167395,1,mazzatorta-prediction +CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.01991156926953532,0.08041755256984288,1,mazzatorta-prediction +CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.020087610909726116,0.05083350716627104,1,mazzatorta-prediction +COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.02048398681663214,0.06797949317882583,1,mazzatorta-prediction +CNC(=O)ON=C(C(=O)N(C)C)SC,0.022347753176858155,0.02790918990194414,1,mazzatorta-prediction +COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.02320682656135787,0.13042675416012312,1,mazzatorta-prediction +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.025090939601491648,0.050114646105433334,1,mazzatorta-prediction +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.025427825579407606,0.1289044314301482,1,mazzatorta-prediction +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.02574063309087087,0.10094161018484414,1,mazzatorta-prediction +CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.025750915471868897,0.022838553346053694,1,mazzatorta-prediction +N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.026997497601947272,0.15445645494051072,1,mazzatorta-prediction +C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.027961199362093195,0.10370887199340012,1,mazzatorta-prediction +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.028207113064839383,0.022410110999451165,1,mazzatorta-prediction +CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.029112705155716945,0.1642031063051573,1,mazzatorta-prediction +CON(C(=O)Nc1ccc(cc1)Br)C,0.03130067550140176,0.0387394680026393,1,mazzatorta-prediction +CN1CN(C)CSC1=S,0.03266034652463028,0.040377923983948856,0.16666666666666666,mazzatorta-prediction +ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.03316084217977319,0.1369497152781446,1,mazzatorta-prediction +CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.0341788251725187,0.09995117906018544,1,mazzatorta-prediction +CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0345288315455876,0.007526804342298486,1,mazzatorta-prediction +CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.03508230910777224,0.04261363346757391,1,mazzatorta-prediction +Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.03798219426521996,0.05285113139249019,1,mazzatorta-prediction +CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.038746408312020406,0.02067135597373707,1,mazzatorta-prediction +CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04102878665011248,0.06648016528067341,1,mazzatorta-prediction +COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.042491175292669145,0.015728201435628038,1,mazzatorta-prediction +O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.045140176541360745,0.14012611236267028,1,mazzatorta-prediction +CN(C(=S)SSC(=S)N(C)C)C,0.04783039657471141,0.051165604885929104,0.16666666666666666,mazzatorta-prediction +CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.05161859628615915,0.00947421966200617,1,mazzatorta-prediction +C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.05350296944357954,0.024853325579571102,1,mazzatorta-prediction +COCN(c1c(CC)cccc1CC)C(=O)CCl,0.05375006811431045,0.3245236375490794,1,mazzatorta-prediction +CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.05522147585284508,0.2578692001802692,1,mazzatorta-prediction +O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.05566064749641608,0.11723725716301076,1,mazzatorta-prediction +Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.05706818876652619,0.06951066613764371,1,mazzatorta-prediction +OC(=O)COc1ccc(cc1C)Cl,0.057322598023636456,0.05445622621994636,1,mazzatorta-prediction +CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.057576722828150476,0.20212320440807913,1,mazzatorta-prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.05953797389131243,0.017235945805185275,1,mazzatorta-prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.06009909138187043,0.034056779441515854,1,mazzatorta-prediction +CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.06061453423316249,0.059446571641332435,1,mazzatorta-prediction +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.06267621846158328,0.07458280632191285,1,mazzatorta-prediction +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.06288907725176857,0.2264254561251038,1,mazzatorta-prediction +CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.06569530810416269,0.04291166973357382,1,mazzatorta-prediction +O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06690994773808298,0.040528628452314384,1,mazzatorta-prediction +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.06758613754894155,0.06365856284683262,1,mazzatorta-prediction +CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.06839527058523667,0.13831022672047752,1,mazzatorta-prediction +C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.06904967382858089,0.09394776953418806,1,mazzatorta-prediction +OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.0721330354641874,0.04466802484258431,1,mazzatorta-prediction +O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.07395704796137248,0.052337960737326904,1,mazzatorta-prediction +OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.07409262028018154,0.16221937215111784,1,mazzatorta-prediction +CCNc1nc(NC(C)C)nc(n1)Cl,0.07789199862212233,0.0530395654639936,1,mazzatorta-prediction +O=C(C1=C(C)OCCS1)Nc1ccccc1,0.08117223892684251,0.2689999596587689,1,mazzatorta-prediction +CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.0827758354922366,0.0217719179484974,1,mazzatorta-prediction +OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08452667530010859,0.07314761133650725,1,mazzatorta-prediction +O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08510674803234901,0.06511854133132516,1,mazzatorta-prediction +CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.08783443947180365,0.07509600041356941,1,mazzatorta-prediction +CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.09170952329114665,0.1398966039294415,1,mazzatorta-prediction +N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.09203781459712614,0.058271163381068254,1,mazzatorta-prediction +ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.09583741068272783,0.05722075950509786,1,mazzatorta-prediction +OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.0967821447110451,0.020745537156134766,1,mazzatorta-prediction +COP(=O)(NC(=O)C)SC,0.10236623790044716,0.01603420284847195,1,mazzatorta-prediction +O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.11086164698737522,0.08848370769665356,1,mazzatorta-prediction +COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045292283465,0.0369855483661329,1,mazzatorta-prediction +Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.11516531274058425,0.05584570953801489,1,mazzatorta-prediction +COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.12412602138191925,0.26940550668026203,1,mazzatorta-prediction +CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.12855945536132327,0.05314229228135397,1,mazzatorta-prediction +CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.1367492600870436,0.8007493146491557,1,mazzatorta-prediction +c1scc(n1)c1nc2c([nH]1)cccc2,0.1490700414533971,0.06053075972454769,1,mazzatorta-prediction +CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.14982590230152565,0.5248637450337764,1,mazzatorta-prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.15013314047110002,0.017186299494700404,1,mazzatorta-prediction +Cc1cccc2c1n1cnnc1s2,0.1506048130761757,0.5255899798851918,1,mazzatorta-prediction +ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.15174119992085178,0.35545219964034264,1,mazzatorta-prediction +CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.15338553104123837,0.1270677771191105,1,mazzatorta-prediction +Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.15801925188118618,0.1437301851817713,1,mazzatorta-prediction +c1ccc(cc1)Nc1ccccc1,0.16546268922726798,0.30032200743117654,1,mazzatorta-prediction +Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.1730416993562668,0.024541035827570765,1,mazzatorta-prediction +CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.1767866659490005,0.0434822264129367,1,mazzatorta-prediction +C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.18559079091504613,0.1739298872266669,1,mazzatorta-prediction +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.20017699986539617,0.142337016902528,1,mazzatorta-prediction +CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.2068313193675311,0.188105627243586,1,mazzatorta-prediction +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.21666838084755125,0.08899189256538591,1,mazzatorta-prediction +CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.2198425631222415,0.056357793591707304,1,mazzatorta-prediction +CNC(=O)Oc1ccccc1OC(C)C,0.23417894234275483,0.06370353086320013,1,mazzatorta-prediction +CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.2408184692696632,0.2246734303430016,1,mazzatorta-prediction +O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.253443853488009,0.06808330607768283,1,mazzatorta-prediction +Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.25917417547047744,0.06221038764609867,1,mazzatorta-prediction +CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.2594374890563992,0.04044915666609474,1,mazzatorta-prediction +CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.27189561466298434,0.2825970996116866,1,mazzatorta-prediction +OC(=O)COc1ccc(cc1Cl)Cl,0.28049546719325014,0.041293632648700326,1,mazzatorta-prediction +CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.2921073325236663,0.025399859207357336,1,mazzatorta-prediction +CNC(=O)Oc1cccc2c1cccc2,0.2981792578159244,0.14826054249092452,1,mazzatorta-prediction +Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.31170568268957544,0.0593709579799201,1,mazzatorta-prediction +ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.31207588849423984,0.1364078153236936,1,mazzatorta-prediction +CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.3169092998307417,0.1194525860672606,0.14814814814814814,mazzatorta-prediction +ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.32935301892961466,0.07976114599708196,1,mazzatorta-prediction +CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.42802021191337764,0.0673352234226783,1,mazzatorta-prediction +CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.43361266621389954,0.1594921492814276,1,mazzatorta-prediction +N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.44926154899338216,0.1582577446590667,1,mazzatorta-prediction +Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.47403843842257615,0.10273222601735031,1,mazzatorta-prediction +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.4837900188743661,0.1482947332858024,1,mazzatorta-prediction +Nc1nc(NC2CC2)nc(n1)N,0.5144905821145022,0.1456955513263534,1,mazzatorta-prediction +CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.5170806512852409,0.3350153436602428,1,mazzatorta-prediction +COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.5285529966699751,0.11629727690023284,1,mazzatorta-prediction +[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.7245881151318344,0.19997575454195834,1,mazzatorta-prediction +COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.7681550277825105,0.7284064393720566,1,mazzatorta-prediction +COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.7817895162025876,0.46002982126481323,1,mazzatorta-prediction +OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.8282972172278201,0.2719107573679272,1,mazzatorta-prediction +CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.8351128195663594,0.9871399288405841,1,mazzatorta-prediction +COC(=O)Nc1nc2c([nH]1)cccc2,0.8499546987221808,0.08503054904294756,1,mazzatorta-prediction +CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.9077984526598573,0.1098520125395136,1,mazzatorta-prediction +O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.9662594125910484,0.44574967873384286,1,mazzatorta-prediction +ClCCP(=O)(O)O,0.9723587138566308,2.424380344082731,1,mazzatorta-prediction +COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,1.1154252951100516,0.2509122725796672,1,mazzatorta-prediction +CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,1.5854670852219546,1.729200901247411,1,mazzatorta-prediction +OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,1.9605490478397496,0.13193009603279973,1,mazzatorta-prediction +CC(OC(=O)Nc1cccc(c1)Cl)C,2.340158076742021,0.08082419839147705,1,mazzatorta-prediction +Oc1ccccc1c1ccccc1,3.119727015073393,0.22368330706506026,1,mazzatorta-prediction +OC(=O)CNCP(=O)(O)O,5.559726007239,0.700841565636653,0.16666666666666666,mazzatorta-prediction diff --git a/paper/data/mazzatorta-test-predictions.id b/paper/data/mazzatorta-test-predictions.id new file mode 100644 index 0000000..65e6336 --- /dev/null +++ b/paper/data/mazzatorta-test-predictions.id @@ -0,0 +1 @@ +56c429252b72ed0afe000004 diff --git a/paper/data/mazzatorta.csv~ b/paper/data/mazzatorta.csv~ deleted file mode 100644 index 921a53b..0000000 --- a/paper/data/mazzatorta.csv~ +++ /dev/null @@ -1,568 +0,0 @@ -SMILES,LOAEL_mmol_kg_bw_day -C1=C(C(=CC(=C1NN=C3C2=C(C=C([S]([O-])(=O)=O)C=C2)C=CC3=O)OC)[S]([O-])(=O)=O)C.[Na+].[Na+],7.531899781214326 -O1C(=O)C(O)=C(O)C1C(O)CO,17.323010613197102 -C1(C)=C(C=CC(C)=CC=CC(C)=CC=CC=C(C)C=CC=C(C)C(=O)OC)C(C)(C)CCC1,1.119409718240544 -c(cccc1)(c1)C(C)C,3.8438632722857955 -O=C(OCCCC)c(c(ccc1)C(=O)OCCCC)c1,2.1556100397968727 -O=C(OCC)c(c(ccc1)C(=O)OCC)c1,19.95615854702247 -O=C(OC(OC(OC1C)C)C1)C,0.7175892491582392 -Oc(c(ccc1)C)c1C,0.04911414454620167 -Oc(ccc(c1C)C)c1,0.1145996706078039 -O=C(OCC)C=C,2.477130986890983 -c(cccc1)(c1)CC,3.843074459567654 -OCCO,4.027850816139244 -c(ccc1C(=O)OCC(=O)OCC)cc1C(=O)OCC,8.919866912731305 -O=C,2.73096831477274 -O=C(O)C=CC(=O)O,9.313172081918696 -OCC(O)CO,74.73899985905678 -O=C(OC)c(ccc(O)c1)c1,9.858865736182537 -O=C(OCCC)c(ccc(O)c1)c1,8.324062177858794 -CC(CCC(=O)(O))C3CCC4C2CCC1CC(O)CCC1(C)C2CCC34C,1.3277652171188237 -OC(C(CCC1C)C(C)C)C1,3.7948308388559964 -O=C(O)C(=C)C,2.8807316686731115 -O=C(OC)c(c(O)ccc1)c1,2.366127776683809 -Oc(cccc1)c1,3.655248831064175 -O=C(OCCC)c(cc(O)c(O)c1O)c1,4.071644352421931 -OCC(O)C1C(O)=C(O)C(=O)O1,8.82332300652517 -c(cccc1)(c1)C=C,0.20163396483810905 -O=Cc(occ1)c1,0.624453213155231 -NCCNc1cccc2ccccc12,0.4241543329029509 -CN(C)(C)CCCl,2.2427665071284903 -O=C(Nc(ccc(c1)C(=O)CCl)c1)C,7.465334624174738 -c(ccc(c1)Cl)(c1)C(c(ccc(c2)Cl)c2)C(Cl)(Cl)Cl,0.09027148189044054 -CC(Oc1cc(Cl)c(Cl)cc1Cl)C(=O)(O),0.03228091610123117 -O=N(=O)C(=CC=C1OC)C=C1N=NC(C(O)=C2C(=O)NC(=CC=C4)C=C4N(=O)=O)=C(C=C3)C(=C2)C=C3,4.308389780762046 -O=N(=O)C(C=C1)=CC(OCCO)=C1NCCO,0.9453881078267568 -Cc1cccc(CC)c1N(C(=O)CCl)COCC,0.18534506246313948 -C1=C(C(=CC=C1OC2=CC=C(C=C2Cl)C(F)(F)F)[N+](=O)[O-])C(=O)[O-].[Na+],0.46919094173712006 -CCc1cccc(CC)c1N(COC)C(=O)CCl,0.05560351873894184 -O=C(Nc(ccc(OCC)c1N)c1)C,6.1010029534002825 -Oc(ccc(N)c1)c1,6.286318149278613 -CC(N)CC(=CC=C1)C=C1,0.036980547196719206 -O(c(ccc(c1)C=CC)c1)C,2.3211612715861247 -COc1ccc(N)cc1,3.8488877932280037 -O=C(O)c(c(N)ccc1)c1,20.060380944519448 -Clc2cccc(c2)c1ccccc1,0.021202965065040626 -O=C(NC(C(=O)OC)Cc(cccc1)c1)C(N)CC(=O)O,0.4994850207500349 -n1c2ccc(Cl)cc2ncc1Oc3ccc(OC(C)C(=O)OCC)cc3,0.009924832004782804 -COC(=O)NS(=O)(=O)c1ccc(N)cc1,0.7817895162025876 -S=P(OC)(OC)SCN1N=Nc2ccccc2C1(=O),0.0011344859332252924 -CNC(=O)Oc1ccccc1OC(C)C,0.23895810443138246 -CC(C)(C)C(=O)C(Oc1ccc(Cl)cc1)n2cncn2,0.08510674803234901 -O=S(O)(=O)C(=CC=C1)C=C1CN(CC)=C(C=C2)C=CC2=C(C(C=C3)=CC=C3N(C)C)C(C=C4)=CC=C4N(CC)CC(C=C5)=CC(=C5)S(=O)(=O)O,1.009963174498295 -c(c(cccc1)c1)(cccc2)c2,1.6211890708511503 -BrC(Cl)Cl,0.7935120501519148 -ClC(Cl)C(Cl)(Cl)SN2C(=O)C1CC=CCC1C2(=O),0.034377949341570596 -O=C(Oc(c(c(ccc1)cc2)c1)c2)NC,0.07752660703214034 -CC1=C(SCCO1)C(=O)Nc2ccccc2,0.1274956638724717 -ClC1CC2C(C1Cl)C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl,0.0006588923229380624 -O=C(O)CCl,0.317470328693963 -ClC(=CC=C1N)C=C1,0.047032433723070206 -CC(C)OC(=O)C(O)(c1ccc(Cl)cc1)c2ccc(Cl)cc2,0.026531991066147967 -n1c(OC)nc(C)nc1NC(=O)NS(=O)(=O)c2ccccc2Cl,0.06987675250196507 -OS(=O)(=O)C(C(=CC=C2)C1=C2)=CC=C1N=NC(C(O)=C3N=NC(C(C=C5)=C4C=C5)=CC=C4S(O)(=O)=O)=CC(=C3O)CO,1.2093346835379808 -S=P(OCC)(OCC)Oc1ccc2C(C)=C(Cl)C(=O)Oc2c1,0.0022052807653206367 -CNP(=O)(OC)Oc1ccc(cc1Cl)C(C)(C)C,0.013712205220154254 -C(C1C2C(C(O)C(O1)OC8C(OC(OC7C(OC(OC6C(OC(OC5C(C(C(OC4C(C(C(OC3C(C(C(O2)OC3CO)O)O)OC4CO)O)O)OC5CO)O)O)C(C6O)O)CO)C(C7O)O)CO)C(C8O)O)CO)O)O,1.4097112541302337 -n1c(N)nc(N)nc1NC2CC2,0.09026150563412319 -COC(=O)c1c(Cl)c(Cl)c(C(=O)OC)c(Cl)c1Cl,1.5061863289853148 -O=C(O)C(Cl)(Cl)C,0.1970361896096669 -Nc1cc(N)c(O)cc1,0.2013846888993215 -FC(F)(Cl)Cl,1.2405561628307704 -ClCCl,0.5887022388817106 -O=P(OC)(OC)OC=C(Cl)Cl,0.010408382386229365 -OC(c1ccc(Cl)cc1)(c2ccc(Cl)cc2)C(Cl)(Cl)Cl,0.05398319600278186 -ClC4=C(Cl)C5(Cl)C3C1CC(C2OC12)C3C4(Cl)C5(Cl)Cl,0.0001312648375209092 -CN(=C1C(C=C2)=CC=C2)N(C)C(=C1)C(C=C3)=CC=C3,0.49533572071941767 -O=C(NC(=O)c(c(F)ccc1)c1F)Nc(ccc(c2)Cl)c2,0.025749696789273527 -CC1=C(C)S(=O)(=O)CCS1(=O)=O,0.047557630336441704 -O=C(NC)CSP(OC)(OC)=S,0.001090477150926923 -COc1ccc(N)c(OC)c1,1.8018201517132568 -COP(=O)OC,0.9086866261501474 -CC(=C(N(=O)=O)C=C1N(=O)=O)C=C1,0.1866762157041476 -CN(C)C(=O)C(c1ccccc1)c2ccccc2,0.1253592168358431 -N(c(cccc1)c1)c(cccc2)c2,0.1831908345016181 -C(C=C1)(=N(C=C1)CC2)C(N2=C3)=CC=C3,0.002984821462389602 -CCOP(=S)(OCC)SCCSCC,0.00036442614798427517 -NC(=S)NNC(N)=S,6.303842268414009 -O=C(N(C)C)Nc(ccc(c1Cl)Cl)c1,0.02574063309087087 -O=P(O)(O)CCCl,1.0381053884590363 -O=C(OCC)C(O1)C1(c(cccc2)c2)C,0.8485352051922984 -COC(=O)NC(=NC1=C2)NC1=CC(=C2)SC(C=C3)=CC=C3,0.050108966959550236 -CN1C=C(c2ccccc2)C(=O)C(c3cc(C(F)(F)F)ccc3)=C1,0.07591497971688389 -c1cc(C(F)(F)F)cc(Cl)c1NC(C(C)C)C(=O)OC(C(#N))c2cccc(Oc3ccccc3)c2,0.004971041792562443 -S=P(OCC)(Sc1ccccc1)CC,0.006414179135682054 -n1c(C)nc(OC)nc1NC(=O)NS(=O)(=O)c2ccsc2C(=O)OC,0.06453419527613821 -C(C(C(C(C1Cl)Cl)Cl)Cl)(C1Cl)Cl,0.017192183580611947 -O=N(=O)N(CN1N(=O)=O)CN(C1)N(=O)=O,0.006753217705640206 -O=C(N=C(N(C1(=O))C)N(C)C)N1C(CCCC2)C2,0.19816672003956992 -n(c(c(ccc1)cc2)c1O)c2,0.9851335765350275 -c1cc(Cl)cc(Cl)c1C(OCC=C)Cn2cncc2,0.13459866849613178 -COc1cccc(OC)c1C(=O)Nc2onc(C(C)(CC)CC)c2,0.15252975563710267 -n1c(OC)cc(OC)nc1NC(=O)NS(=O)(=O)Cc2ccccc2C(=O)OC,0.7529208210920754 -CCOC(=O)CC(SP(=S)(OC)OC)C(=O)OCC,0.1513509494941276 -CN(C)(CCC1)CC1,1.3133857473480115 -O=P(SCCCC)(SCCCC)SCCCC,0.003974424546249488 -COCC(=O)N(C(C)C(=O)OC)c1c(C)cccc1C,0.22374845318219344 -S=P(OC)(OC)SCN1C(=O)SC(OC)=N1,0.006615259485207122 -CNC(=O)ON=C(C)SC,0.061648442359631114 -COP(=S)(OC)Oc1ccc(cc1)N(=O)(=O),0.0009498211030948742 -O=C1N(N)C(SC)=NN=C1C(C)(C)C,0.06999926640768805 -COP(=O)(OC)OC(Br)C(Cl)(Cl)Br,0.005252325112411575 -OC(C(N)C1O)C(C)OC1(C)OC(CC(C)(C(C2O)C(O)=O)OC(O)(C2)CC(O)CC(C)(O3)C3C=C4)C=CC=CC=CC=CCC(C)OC4=O,0.10172294366080416 -O=N(=O)c(ccc(c1N)C)c1,0.05257947683683445 -O=N(=O)c(c(N)ccc1N)c1,0.5681125108300529 -O=C(O)C(=C(N)C=C1N(=O)=O)C=C1,6.506215164982792 -O=N(=O)c(c(c(ccc1)cc2)c1)c2,0.952831491808421 -c12c(N=Nc3ccccc3)c(O)ccc1cc(S(=O)(=O)O)cc2,0.5482080783455129 -CC(C)Oc1cc(c(Cl)cc1Cl)N2N=C(OC2(=O))C(C)(C)C,0.01448347496337274 -CNC(=O)ON=C(SC)C(=O)N(C)C,0.02280382932847922 -CCOP(=S)(OCC)Oc1ccc(cc1)N(=O)=O,0.012016729209736626 -Oc(c(c(c(c1Cl)Cl)Cl)Cl)c1Cl,0.037546481605565646 -NC(=N)NC(=N)NCCc1ccccc1,0.35564719019232227 -COP(=S)(OC)SCN2C(=O)c1ccccc1C2(=O),0.06302765174348351 -CCN(CC)C(=O)C(Cl)=C(C)OP(=O)(OC)OC,0.041042640567373466 -ClC3C6(Cl)C4C2C1OC1C5C2C3(Cl)C(Cl)(C45)C6(Cl)Cl,0.0018377077252927285 -O=C(OC(=O)c1cccc2)c12,8.000509872156579 -Nc1c(Cl)c(Cl)nc(C(=O)(O))c1Cl,0.24848916516834604 -CCN(CC)c1nc(C)cc(OP(=S)(OC)OC)n1,0.008187766847509327 -Nc3ccc2cc1ccc(N)cc1nc2c3,0.22461542255370148 -CCC(=O)Nc1ccc(Cl)c(Cl)c1,0.09170952329114665 -Clc1cc(Cl)ccc1C2(Cn3ncnc3)OC(CCC)CO2,0.07305234130123987 -O=C(N)c(nccn1)c1,6.408762052980724 -Oc1cc(O)c2C(=O)C(O)=C(c3cc(O)c(O)cc3)Oc2c1,6.729846937340625 -CCC(O)(C)C#C,0.4687038301254292 -CC(C(NCC)=C1)=CC(C1=O2)=C(C(C2=C3)=CC(C)=C3NCC)C(=CC=C4)C(=C4)C(=O)OCC,0.027053999376946393 -O=C(NS(=O)(=O)c1cccc2)c12,19.66323569952698 -c1cc(Cl)ccc1C2SC(=O)N(C(=O)NC3CCCCC3)C2C,0.4534134152107278 -n(c(nc(n1)NCC)NCC)c1Cl,0.024794616275543167 -O=[S](NC1CCCCC1)(=O)[O-].[Na+],17.900880706433757 -O=C(OCC(C1OCC(C1O)O)O)CCCCCCCCCCC,19.866710908558982 -O(CC1O)C(C1O)C(O)COC(=O)CCCCCCCCCCCCCCCCC,16.727105323218392 -O=S(=O)(Nc(nc(cc1C)C)n1)c(ccc(N)c2)c2,0.1185642260256668 -CCNc1nc(NC(C)(C)C)nc(SC)n1,0.06214876624755196 -Oc(c(cc(c1)C(C)(C)C)Cl)c1,1.1697007223226876 -C(C(Cl)Cl)(Cl)Cl,0.6434343954290421 -COP(=O)(OC)OC(=CCl)c1cc(Cl)c(Cl)cc1Cl,0.2732525485855328 -CCN(CC)C(=O)SCc1ccc(Cl)cc1,0.019396419126203733 -COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.09345959256991566 -N(C(=S)SSC(N(C)C)=S)(C)C,0.06238747379310184 -c12OC(CCCC(C)CCCC(C)CCCC(C)C)(C)CCc1c(C)c(OC(=O)C)c(C)c2C,4.230630449818821 -Cc1cc(N)ccc1NOS(O)(=O)=O,0.8431459792705229 -C(Br)(C(Br)(Br)Br)C1C(C)(C)C1C(=O)OC(C(#N))c2cccc(Oc3ccccc3)c2,0.004511229623452476 -O=C(O)COc(c(cc(c1Cl)Cl)Cl)c1,0.03914162418169542 -FC(F)(F)C(=CC(N(=O)=O)=C1N(C(C)C)C(C)C)C=C1N(=O)=O,0.1193036069506878 -Cc1cc(C)c(N)cc1C,0.46595489467866197 -CC(O)(C(O)C(O1)C)CC1(C)OC(C(C)O2)C(C(O)C2(C)OC(C(C)C(O)CC(=O)OC(CC)C3COC(C(OC)C4OC)OC(C)C4O)C(CC=O)CC(C)C(=O)C=CC(=C3)C)N(C)C,0.5295750507618869 -c1c(Cl)cc(Cl)cc1N2C(=O)C(C)(C=C)OC2(=O),0.25479642918707424 -O=C(OC(CCCC(O)CCCCCc1cc(O)cc2O)C)c12,0.0006203550142861557 -COC(=O)C1(C2=CC=CC=C2C3=C1C=C(C=C3)Cl)O,0.546052144921948 -CC(C(=O)O)OC1=CC(=CC=C1)Cl,0.4984573741185779 -P12P3P1P23,11.881024454247726 -C(CO)O,6.44456130582279 -CCCCOCC(C)OCC(C)O,0.6726932978936081 -C(CO)O,32.22280652911395 -C(CO)O,14.822491003392418 -[O-][As](=O)([O-])[O-],0.044990181342823746 -[Si](CN1C=NC=N1)(C2=CC=C(C=C2)F)C3=CC=C(C=C3)F,0.007657523838454347 -N(C(=S)SSC(N(C)C)=S)(C)C,0.04783039657471141 -COP(=O)(N)SC,0.006377136181192296 -N(C(=S)SSC(N(C)C)=S)(C)C,0.02275063210988447 -COP(=O)(NC(=O)(C))SC,0.1910836440808347 -C1=CC=C(C=C1)NC(=O)NC2=CN=NS2,0.13620822278144273 -CCOP(=S)(NC(C)C)OC1=CC=CC=C1C(=O)OC(C)C,0.0014476216329334154 -CC(=NOC(=O)N(C)SN(C)C(=O)ON=C(C)SC)SC,0.02821118623185781 -CCOP(=S)(OCC)OC1=NC(=NC(=C1)C)C(C)C,0.004928609097226672 -NC(CCCC1)C1,0.5898716318329822 -CN1C=C(c2ccccc2)C(=O)C(c3cc(C(F)(F)F)ccc3)=C1,0.09868947363194906 -ClC1CC2C(C1Cl)C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl,0.0012201709684038192 -CCCCC(CC)COC(=O)C1=CC=CC=C1C(=O)OCC(CC)CCCC,0.5120902983161549 -OC(=O)CNCP(O)(O)=O,5.914602135360638 -C1CNC(=S)N1,0.0022514113902230405 -O=C(N(OC)C)Nc(ccc(c1Cl)Cl)c1,0.025090939601491648 -C1=CC=C2C(=C1)NC(=S)S2,4.484270077422418 -CCOP(=S)(OCC)OC1=NC(=C(C=C1Cl)Cl)Cl,0.028523647387248163 -c(c(c(c(c1Cl)Cl)Cl)Cl)(c1Cl)Cl,0.0010183220720957982 -COc1ccc(cc1)C(c2ccc(OC)cc2)C(Cl)(Cl)Cl,0.36163948246786254 -C1=CC(=CC=C1Cl)Cl,2.0407891160090657 -CC(C)OC(=O)NC1=CC(=CC=C1)Cl,2.340158076742021 -COP(=O)(OC)OC=C(Cl)Cl,0.009729574839301364 -CCOP(=S)(OCC)Oc1ccc(cc1)N(=O)=O,0.001442007505168395 -CNC(=O)N(C)c1nnc(s1)C(C)(C)C,0.1751969016077557 -CCCCOCCOCCOCC1=CC2=C(C=C1CCC)OCO2,0.7386866446932013 -CC(C(=O)O)OC1=C(C=C(C=C1)Cl)Cl,0.03828744186371015 -CC1(C(C1C(=O)OC(C#N)C2=CC(=C(C=C2)F)OC3=CC=CC=C3)C=C(Cl)Cl)C,0.028782768433509572 -C(#N)c(c(c(c(c1C(#N))Cl)Cl)Cl)c1Cl,0.015042627044387032 -O=C(OCC)C(O)(c(ccc(c1)Cl)c1)c(ccc(c2)Cl)c2,0.056582904287311254 -O=C(N(C)C)Nc(ccc(c1)Cl)c1,0.6292491939569526 -O=C(N(SC(Cl)(Cl)Cl)C(=O)C1CC=CC2)C12,0.3326798171006209 -CCc1cccc(C)c1N(C(C)COC)C(=O)CCl,0.5285529966699751 -C1=CC(=C(C=C1Cl)Cl)OCC(=O)O,0.022620602193004043 -CCNC1=NC(=NC(=N1)Cl)NC(C)(C)C#N,0.005193343612552968 -C(C(C(C(C1Cl)Cl)Cl)Cl)(C1Cl)Cl,0.015816808894162992 -COP(=S)(OC)OC1=CC(=C(C=C1Cl)Cl)Cl,0.15549919159080278 -C1C2C=CC1C3C2C4(C(=C(C3(C4(Cl)Cl)Cl)Cl)Cl)Cl,2.7404023436797774e-05 -CC1(C(C1C(=O)OCC2=CC(=CC=C2)OC3=CC=CC=C3)C=C(Cl)Cl)C,0.06389160712181856 -CCN(CC)C(=O)C(C)OC1=CC=CC2=CC=CC=C21,0.36852210915226874 -ClC1C=CC2C1C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl,0.0006696708996117783 -CC(=CC(=O)NC)OP(=O)(OC)OC,0.0020164586039868883 -CC(C)C1(C)N=C(NC1(=O))c3nc2ccccc2cc3C(=O)(O),0.06423944765895072 -CC(C)Nc1nc(Cl)nc(NC(C)C)n1,0.21766590408142725 -CC(C(=O)O)(Cl)Cl,0.3497269961122948 -CC1(C(C1C(=O)OC(C#N)C2=CC(=C(C=C2)F)OC3=CC=CC=C3)C=C(Cl)Cl)C,0.017269661060105742 -O=C(NC)CSP(OC)(OC)=S,0.02180954301853846 -C12C3(C4(C5(C3(C(C1(C5(C2(C4(Cl)Cl)Cl)Cl)Cl)(Cl)Cl)Cl)Cl)Cl)Cl,1.9565721591442926e-05 -COC(=O)C1=CC=CC=C1C(=O)OC,10.299509743336218 -CCOP(=S)(OCC)SCSC(C)(C)C,0.0001733519259052264 -CCC(C)SP(=O)(OCC)SC(C)CC,0.0009245829520661433 -CCOP(=S)(OCC)SCSC(C)(C)C,0.006934077036209056 -C1C(C(C(=O)N1C2=CC=CC(=C2)C(F)(F)F)Cl)CCl,0.016019730669239306 -O=C(O)C(C(C(=O)O)C(O1)CC2)C12,0.6177415369409439 -O=C(Oc(c(OC(C1)(C)C)c1cc2)c2)NC,0.022598624918870935 -Oc(c(c(c(c1)Cl)Cl)Cc(c(c(cc2Cl)Cl)Cl)c2O)c1Cl,0.012287924553322883 -CC1(CON(C1=O)CC2=CC=CC=C2Cl)C,0.08969617860069455 -CCC(C)N1C(=O)C(=C(NC1=O)C)Br,0.23935747721355113 -CC1=CC(=CC(=C1N(C)C)C)OC(=O)NC,0.0067481385934503825 -CNC(=O)OC1=CC=CC(=C1)N=CN(C)C,0.056495719658295813 -CC1=NN(C(=O)N1C(F)F)C2=CC(=C(C=C2Cl)Cl)NS(=O)(=O)C,0.1730416993562668 -CCOP(=S)(CC)SC1=CC=CC=C1,0.020298035239500172 -CCOP(=S)(OCC)SC(CCl)N1C(=O)C2=CC=CC=C2C1=O,0.006347661308292605 -N1CC(C)(C)CNC1=NN=C(C=Cc2ccc(C(F)(F)F)cc2)C=Cc3ccc(C(F)(F)F)cc3,0.010111728942243584 -CC1=C(C(=C(C(=C1F)F)COC(=O)C2C(C2(C)C)C=C(C(F)(F)F)Cl)F)F,0.010985502766340648 -CC1=CC(=C(C=C1)N=CN(C)C=NC2=C(C=C(C=C2)C)C)C,0.034764112883573416 -S=P(OC)(OC)SCN1C(=O)SC(OC)=N1,0.002646103794082849 -CC(C)N(C(C)C)C(=O)SCC(Cl)=C(Cl)Cl,0.04102878665011248 -S=P(OC)(OC)SCN1N=Nc2ccccc2C1(=O),0.0070905370826580775 -CC1=NC(=NC(=N1)OC)NC(=O)NS(=O)(=O)C2=CC=CC=C2CCC(F)(F)F,0.19051986050321804 -O=C(N(S(=O)(=O)Nc1cccc2)C(C)C)c12,0.16647322477947293 -CCN(CC)c1nc(C)cc(OP(=S)(OC)OC)n1,0.04519647299825149 -O=C(ON=CC(SC)(C)C)NC,0.0005255875464343458 -ClC(Cl)(Cl)CC1(OC1)c2cc(Cl)cc(Cl)c2,0.09362507489225783 -CCC1CCCC(C(C(=O)C2CC3C(C2CC(=O)O1)CCC4C3CC(C4)OC5CC(C(C(C5OC)OC)OC)C)C)OC6CCC(C(O6)C)N(C)C,0.03269690443692089 -CC(C)(C)C(=NOC(=O)NC)CSC,0.027483045022449526 -CON=C(CC1=CN=CC=C1)C2=C(C=C(C=C2)Cl)Cl,0.15245767876475944 -CC(=CC1C(C1(C)C)C(=O)OCN2C(=O)C3=C(C2=O)CCCC3)C,0.7543614918373561 -C1COC(O1)(CN2C=NC=N2)C3=C(C=C(C=C3)Cl)Cl,0.03331771398901528 -CCCOC(=O)C1=CN=C(C=C1)C(=O)OCCC,0.9949124950582696 -CC1=CC(=NC(=N1)NC(=O)NS(=O)(=O)C2=CC=CC=C2C(=O)OC3COC3)C,0.20422574060250331 -C1=C(C(=NC(=C1Cl)Cl)OCC(=O)O)Cl,0.1403669879303106 -CCOCN1C(=C(C(=C1C(F)(F)F)Br)C#N)C2=CC=C(C=C2)Cl,0.03336499327732185 -CC(C)CC1=C(C(=NC(=C1C(=O)SC)C(F)(F)F)C(F)F)C(=O)SC,0.00904300899921393 -CC12CC1(C(=O)N(C2=O)C3=CC(=CC(=C3)Cl)Cl)C,0.05279126047017867 -CC1=CC(=C(C=C1)C(=O)OC)C2=NC(C(=O)N2)(C)C(C)C,0.1734054330003024 -COP(=S)(OC)OC1=NC(=C(C=C1Cl)Cl)Cl,0.009301369775521361 -CC(C)CC1=C(C(=NC(=C1C(=O)OC)C(F)F)C(F)(F)F)C2=NCCS2,0.11151045196043953 -CCOC(=O)C(CC1=CC(=C(C=C1Cl)F)N2C(=O)N(C(=N2)C)C(F)F)Cl,0.029112705155716945 -CC(C)=CC3C(C(=O)OCc2coc(Cc1ccccc1)c2)C3(C)C,0.3693416417277341 -CCCSP(=S)(OCC)OC1=CC=C(C=C1)SC,0.03566479582586673 -CC1=CC(=C(C(=C1)OC(=O)NC)C)C,0.30635114568601185 -CC1=CC=CC=C1COC2CC3(CCC2(O3)C)C(C)C,0.5466515334085721 -CC1=C2C(=CC=C1)SC3=NN=CN23,0.16381576159162972 -CCC(=C1C(=O)CC(CC1=O)CC(C)SCC)NOCC=CCl,0.27784628232227476 -CCCN(CCC)C(=O)SCC,0.047538995974292175 -CC(C)OC(=O)C=C(C)C=CCC(C)CCCC(C)(C)OC,0.14816176662421726 -COP(=S)(OC)Oc1ccc(SC)c(C)c1,0.013473309158983109 -COC1=C(C=C(C=C1)C(=CC(=O)N2CCOCC2)C3=CC=C(C=C3)Cl)OC,0.11937399144446861 -CCSC(=O)N(CC(C)C)CC(C)C,0.4600420791288938 -CC(C)OP(=S)(OC(C)C)SCCNS(=O)(=O)C1=CC=CC=C1,0.03773457509937652 -CC(=CC1C(C1(C)C)C(=O)OCC2=CC(=CC=C2)OC3=CC=CC=C3)C,0.42802021191337764 -CC1=CC(=CC(=C1C)C)OC(=O)NC,0.05174850433885335 -CCOP(=S)(OCC)SCSC1=CC=C(C=C1)Cl,0.0029165972759564764 -C1CN(CCN1C(C(Cl)(Cl)Cl)NC=O)C(C(Cl)(Cl)Cl)NC=O,0.22990526799413355 -C(=CC=C1)(C2=C1)NC(=N2)C(=CS3)N=C3,0.009938002763559809 -C1(=C(C(=C(C(=C1Cl)Cl)Cl)Cl)Cl)[N+](=O)[O-],0.423248605734443 -CCCC(=NOCC)C1C(=O)CC(CC1=O)CC(C)SCC,0.05056765552287047 -ClC2(Cl)C4(Cl)C1(Cl)C5(Cl)C(Cl)(Cl)C3(Cl)C1(Cl)C2(Cl)C3(Cl)C45Cl,0.0012831252531881078 -CCN(C1CCCCC1)C(=O)SCC,0.013930451940080113 -CC1=NC(=NC(=N1)OC)NC(=O)NS(=O)(=O)C2=CC=CC=C2OCCCl,0.5494924735209582 -C(C(=O)O)OC1=NC(=C(C(=C1Cl)N)Cl)F,1.9605490478397496 -CC1=C(C(=O)CC1OC(=O)C2C(C2(C)C)C=C(C)C)CC#C,0.316253365684832 -C1C(COC1(CN2C=NC=N2)C3=C(C=C(C=C3)Cl)Cl)Br,0.017185416964361586 -C1=NNC(=N1)N,0.029733601205328832 -C1=CC(=C(C(=C1)F)C(=O)NC(=O)NC2=CC(=C(C(=C2F)Cl)F)Cl)F,0.06822190749765324 -C1=CC(=CC=C1OS(=O)(=O)C2=CC=C(C=C2)Cl)Cl,0.008246440044818412 -FC(F)(F)C(=CC(N(=O)=O)=C1N(C(C)C)C(C)C)C=C1N(=O)=O,2.982590173767195 -CC1=C(C(=O)CC1OC(=O)C2C(C2(C)C)C=C(C)C)CC=C,0.24800936112986982 -C1=CC(=C(C(=C1)Cl)C#N)Cl,0.014533918736325764 -C1C(O1)COC2=CC=CC=C2C3=CC=CC=C3,2.209744922072461 -CC1=CC=CC=C1OCC2=CC=CC=C2C(=NOC)C(=O)OC,1.1967534090558043 -O=N(=O)C(C(=C1N(=O)=O)N(C(C)C)C(C)C)=CC(=C1)S(=O)(=O)N,0.10642121227099519 -C1=CC(=C(C2=NC=C(C=C21)Cl)C(=O)O)Cl,3.127347059508829 -CC(C)NC(=O)N1CC(=O)N(C1=O)C2=CC(=CC(=C2)Cl)Cl,0.13932359364492994 -CCCN(CCCl)C1=C(C=C(C=C1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-],0.5690227874227859 -C1=CC=C(C=C1)C2=CC=CC=C2O,5.875192118782284 -O=C(N(C)C)Nc(cccc1C(F)(F)F)c1,0.06459882942614491 -C1=CC(=NC(=C1)Cl)C(Cl)(Cl)Cl,0.00433075312836283 -Clc1cc(Cl)cc(Cl)c1OCCN(CCC)C(=O)n2cncc2,0.01991156926953532 -CC1=CC(=C(C=C1NC(=O)C)NS(=O)(=O)C(F)(F)F)C,0.08894826507859208 -C(=C(I)I)(I)I,0.09404873168890004 -C1=C(C=C(C(=C1Cl)N2C(=C(C(=N2)C#N)S(=O)C(F)(F)F)N)Cl)C(F)(F)F,0.0001372533562906347 -O=C(N(C)C)Nc(ccc(c1Cl)Cl)c1,0.026813159469657157 -CC1=C(C(=O)CC1OC(=O)C2C(C2(C)C)C=C(C)C)CC=C,0.08101639130242413 -CC1=CC2=C(C=C1)N=C3C(=N2)SC(=O)S3,0.03201059303080734 -CC(C)N(C(=O)CCl)c1ccccc1,0.10865048725491992 -CC(C)C1(C(=O)NC(=N1)C2=C(C=CC=N2)C(=O)O)C,1.913681483026602 -CC1(C(C1(C)C)C(=O)OC(C#N)C2=CC(=CC=C2)OC3=CC=CC=C3)C,0.05566320606558952 -CN1CN(C(=S)SC1)C,0.18486987933542975 -ClC(Cl)=CC1C(C)(C)C1C(=O)OC(C(#N))c2cccc(Oc3ccccc3)c2,0.18015976856532 -ClC2C1OC1C3C2C4(Cl)C(=C(Cl)C3(Cl)C4(Cl)Cl)Cl,0.0005137200498000217 -CCOC(=O)CN1C2=C(C=CC=C2Cl)SC1=O,0.046003238627999404 -CCCN(CCC)C1=C(C=C(C(=C1[N+](=O)[O-])N)C(F)(F)F)[N+](=O)[O-],0.08392957349588569 -C1=CC=C(C=C1)C(CCC2=CC=C(C=C2)Cl)(CN3C=NC=N3)C#N,0.11875847044790469 -CC(C)(C)C(CCC1=CC=C(C=C1)Cl)(CN2C=NC=N2)O,0.05165383561566402 -CC1=C(C=CC=C1COC(=O)C2C(C2(C)C)C=C(C(F)(F)F)Cl)C3=CC=CC=C3,0.011824026606519262 -C(=CC=C1)(C2=C1)NC(=N2)C(=CS3)N=C3,0.19876005527119617 -C1=C(C=C(C(=C1Cl)N)Cl)[N+](=O)[O-],0.7245881151318344 -CC1=C(C=C(C=C1C(=O)N)[N+](=O)[O-])[N+](=O)[O-],0.027758250773633555 -CC(C)OC1=CC=CC(=C1)NC(=O)C2=CC=CC=C2C(F)(F)F,1.5465050300849357 -OC(c1ccc(Cl)cc1)(c2ccc(Cl)cc2)C(Cl)(Cl)Cl,0.006747899500347733 -O=P(O)(O)CCCl,3.0866333550182015 -CN(C(=O)NC1=CC=C(C=C1)Br)OC,0.048243951057630914 -CC1=CC(=CC=C1)NC(=O)OC2=CC=CC(=C2)NC(=O)OC,0.0832475217878744 -CC(C)(C)C1=NN=C(S1)N2C(CN(C2=O)C)O,0.19506513302817866 -S=P(OCC)(OCC)Oc1ccc2C(C)=C(Cl)C(=O)Oc2c1,0.004686221626306353 -COC(=O)c1ccccc1S(=O)(=O)NC(=O)N(C)c2nc(OC)nc(C)n2,0.031614325062739264 -C1=CC=C(C(=C1)NC2=NC(=NC(=N2)Cl)Cl)Cl,0.004173898399328111 -CC1=C(C=CC(=C1)OP(=S)(OC)OC)[N+](=O)[O-],0.001659247904766673 -COc1c(Cl)ccc(Cl)c1C(=O)(O),0.520273850439093 -CC1(C(C1C(=O)OC(C#N)C2=CC(=CC=C2)OC3=CC=CC=C3)C=C(Br)Br)C,0.004948543461552866 -C1=CC=C(C(=C1)C(C2=CC=C(C=C2)F)(C3=CN=CN=C3)O)Cl,0.007943029289634557 -c1ccc2nc(NC(=O)OC)n(C(=O)NCCCC)c2c1,0.3961177430023906 -CCOC(=O)COC(=O)C1=C(C=CC(=C1)OC2=C(C=C(C=C2)C(F)(F)F)Cl)[N+](=O)[O-],0.05583516191627437 -CC(C)(C)C(C(N1C=NC=N1)OC2=CC=C(C=C2)C3=CC=CC=C3)O,0.07409262028018154 -CCNC(=O)NC(=O)C(=NOC)C#N,0.15289185096526225 -CCOC1=C(C=CC(=C1)OC2=C(C=C(C=C2)C(F)(F)F)Cl)[N+](=O)[O-],0.11058877880543937 -CC1=NC=C(N1CCO)[N+](=O)[O-],0.8764039114257128 -O=C(N(SC(Cl)(Cl)Cl)C(=O)c1cccc2)c12,1.6860133324539086 -CC(C)(C)c2ccc(OC1CCCCC1OS(=O)OCC#C)cc2,0.2853292217012047 -CCCCC(CN1C=NC=N1)(C2=C(C=C(C=C2)Cl)Cl)O,0.014958135679074535 -CC(C)(C)C(C(=CC1=C(C=C(C=C1)Cl)Cl)N2C=NC=N2)O,0.15327033840680634 -C1=CC(C2C1C3(C(=C(C2(C3(Cl)Cl)Cl)Cl)Cl)Cl)Cl,0.001017899767409903 -C1=CC=C2C(=C1)C(=O)C3=C(C2=O)SC(=C(S3)C#N)C#N,0.03374687200243409 -CC1=NN(C(=C1C=NOCC2=CC=C(C=C2)C(=O)OC(C)(C)C)OC3=CC=CC=C3)C,0.0073074288460468996 -CS(=O)(=O)C1=C(C=CC(=C1)C(F)(F)F)C(=O)C2=C(ON=C2)C3CC3,0.05566064749641608 -CC1=C(N=C(N=C1OC(=O)N(C)C)N(C)C)C,0.0524579222415799 -CCN(CC)C(=O)C(Cl)=C(C)OP(=O)(OC)OC,0.005005200069191886 -C1=CC(=CC=C1C(CN)O)O,1.6320834707547616 -CC1=C(C(=CC=C1)C)N(C(=O)COC)N2CCOC2=O,0.17965983350851364 -c1c(C(F)(F)F)cccc1N2C(=O)C(Cl)=C(NC)C=N2,0.06174515112035177 -CCC(C)NC1=C(C=C(C=C1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-],0.16929970598735858 -C1=CC(=CC=C1S(=O)(=O)C2=CC(=C(C=C2Cl)Cl)Cl)Cl,0.6459733503975151 -CCCCC1=C(NC(=NC1=O)NCC)C,0.1194525860672606 -n(c(nc(n1)NCC)NCC)c1Cl,0.026282293252075754 -FC(F)(F)C(C=C1N(=O)=O)=CC(N(=O)=O)=C1N(CC)CC(C)=C,0.0375078950368263 -C1CCC(C1)N(CC2=CC=C(C=C2)Cl)C(=O)NC3=CC=CC=C3,0.0760257762657501 -CS(=O)(=O)NC(=O)C1=C(C=CC(=C1)OC2=C(C=C(C=C2)C(F)(F)F)Cl)[N+](=O)[O-],0.11395676083924232 -CCOC(=O)C(C)OC1=CC=C(C=C1)OC2=NC3=C(O2)C=C(C=C3)Cl,0.02487724874434851 -CCC1=C(C(=CC=C1)CC)N(CNC(=O)C)C(=O)CCl,0.21058487877925733 -NC(=N)NCCCCCCCCCCCC(OC(=O)C),0.10160268068512719 -C1=CC(=CC(=C1)Cl)NC(=O)OCC#CCCl,1.743505808935165 -CC(C)C(C(=O)OC(C(#N))c2cccc(Oc1ccccc1)c2)c3ccc(Cl)cc3,0.05953797389131243 -CC(C)C1=C(C=CC(=C1)C(C)(C)C2=CC(=C(C=C2)O)C(C)C)O,0.08001387248515598 -CCN(CC1=C(C=CC=C1Cl)F)C2=C(C=C(C=C2[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-],0.1185590456888386 -CCCCCCCCSC(=O)OC1=CC(=NN=C1C2=CC=CC=C2)Cl,0.17813968959673715 -CC1=C(C(=O)CC1OC(=O)C2C(C2(C)C)C=C(C)C)CC=C,0.04563372244789605 -CCCCNC(=O)N1C2=CC=CC=C2N=C1NC(=O)OC,0.8611255282660666 -CC1=C(C(=CC=C1)C)N(C(C)C(=O)OC)C(=O)CC2=CC=CC=C2,0.14136381415796706 -CC(C)C(C1=CC=C(C=C1)OC(F)F)C(=O)OC(C#N)C2=CC(=CC=C2)OC3=CC=CC=C3,0.013290157156772887 -Clc1ccccc1c2nnc(c3ccccc3Cl)nn2,0.06597478470118634 -CCOC(=O)NCCOC1=CC=C(C=C1)OC2=CC=CC=C2,0.03318543029523152 -CCC1=C(C(=CC=C1)CC)N(CC(=O)OCC)C(=O)CCl,0.1603572605822803 -c1(O2)c(CC2(C)C)cccc1OC(=O)N(C)SN(CCCC)CCCC,0.06569530810416269 -CCCCOC(=O)C(C)OC1=CC=C(C=C1)OC2=NC=C(C=C2)C(F)(F)F,0.007825509706097071 -COC1=CC(=C(C=C1Cl)OC)Cl,0.6037074787089276 -CCOP(=S)(OCC)OC1=NN(C(=N1)Cl)C(C)C,0.039841737145637234 -n(c(nc(n1)NC(C)C)NCC)c1Cl,0.11591071091933607 -CC(C)(C)C(C(=CC1=CC=C(C=C1)Cl)N2C=NC=N2)O,0.13506940531624406 -CCCCCCCCc1cc(N(=O)(=O))c(OC(=O)C=CC)c(c1)N(=O)(=O),0.1372145060102149 -c1cc(OC(F)(F)F)ccc1C(O)(C(C)C)c2cncnc2,0.038746408312020406 -COP(=O)(C(C(Cl)(Cl)Cl)O)OC,0.07768900686568829 -C1=CC(=C(C=C1C(F)(F)F)Cl)OC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O,0.34563108073944815 -CCCN(CC1CC1)C2=C(C=C(C=C2[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-],0.014397200032537671 -CCOC(=O)C(C)OC(=O)C1=C(C=CC(=C1)OC2=C(C=C(C=C2)C(F)(F)F)Cl)[N+](=O)[O-],0.10827828411229923 -CCSC(=O)N1CCCCCC1,0.07907000434271044 -CCC1=CC=C(C=C1)C(=O)NN(C(=O)C2=CC(=CC(=C2)C)C)C(C)(C)C,0.13618183361575933 -ClC(Cl)C(Cl)(Cl)SN2C(=O)C1CC=CCC1C2(=O),0.04297243667696324 -COC(=O)C1=CC=CC=C1S(=O)(=O)NC(=O)NC2=NC(=CC(=N2)OC(F)F)OC(F)F,0.2989300503468667 -CC(C)(C)C(C(N1C=NC=N1)OC2=CC=C(C=C2)Cl)O,0.08452667530010859 -CC1=C(C=CC(=C1)Cl)OCC(=O)O,0.019938294964743114 -COC=C(C1=CC=CC=C1OC2=NC=NC(=C2)OC3=CC=CC=C3C#N)C(=O)OC,0.15431812608561873 -C1=CC=C(C(=C1)C(C2=CC=C(C=C2)Cl)(C3=CN=CN=C3)O)Cl,0.00694452873492003 -[O-]Br(=O)=O,0.047692690196102956 -OP(=O)OCC,3.6347465046005896 -COP(N)(=O)SC,0.000708570686799144 -CCOP(=O)(NC(C)C)Oc1ccc(SC)c(C)c1,0.004944661980269876 -CCOP(=S)(OCC)SCCSCC,0.00014577045919371006 -CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.005201883810203027 -O=C(OCC(CCCC)CC)CCCCC(=O)OCC(CCCC)CC,4.047856676081442 -CN(C=Nc1ccc(C)cc1C)C=Nc2ccc(C)cc2C,0.03408246361134649 -[C@@]14([C@@H]5OCC1=CC=C[C@@H]([C@H](O[C@H]2C[C@@H]([C@H]([C@@H](O2)C)O[C@H]3C[C@@H]([C@H]([C@@H](O3)C)O)OC)OC)C(=CC[C@@H]6C[C@H](OC([C@@H]4C=C([C@H]5O)C)=O)C[C@]7(O6)O[C@@H]([C@H](C=C7)C)[C@H](CC)C)C)C)O,0.002290749011702154 -O=C(N(S(=O)(=O)Nc1cccc2)C(C)C)c12,0.14566407168203882 -S=P(OC)(OC)SCN1C(=O)SC(OC)=N1,0.004134537178254452 -C(#N)Cl,1.1387594679715767 -C(#N)Br,1.1517974649126617 -C1=CC(=CC=C1C(C2=CC=C(C=C2)Cl)C(Cl)(Cl)Cl)Cl,0.0007052459522690667 -c(cccc1)(c1)C(C)C,2.7539366734341955 -CCCN(CCC)C(=O)SCC,0.13205276659525605 -NC(CCCC1)C1,0.6049965454697254 -ClC1CC2C(C1Cl)C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl,0.014642051620845831 -CC(C)OC(=O)NC1=CC(=CC=C1)Cl,4.680316153484042 -COC(=O)c1c(Cl)c(Cl)c(C(=O)OC)c(Cl)c1Cl,0.030123726579706293 -COC(=O)C1=CC=C(C=C1)C(=O)OC,0.6437193589585136 -N(C(=S)NC1)C1,0.0024471862937206963 -O=C(N(OC)C)Nc(ccc(c1Cl)Cl)c1,0.010036375840596658 -c(cccc1)(c1)C=C,2.736460951374337 -COC(=O)c1ccccc1S(=O)(=O)NC(=O)Nc2nc(OC)nc(C)n2,0.655542030995076 -C1=CC(=CC=C1N)Cl,0.09798423692306293 -FC(F)(F)C(Cl)=CC1C(C)(C)C1C(=O)OC(C(#N))c2cc(Oc3ccccc3)ccc2,0.02778703580061686 -C(Cl)(Br)Br,0.13683526627950768 -C=C(Cl)Cl,0.14441434207714035 -C(C=CCl)Cl,0.045958425107502164 -COP(=O)(OC)OC(=CCl)c1cc(Cl)c(Cl)cc1Cl,0.1366262742927664 -Oc(ccc(c1)C(c(ccc(O)c2)c2)(C)C)c1,0.21902317939829427 -O=C(OCc(cccc1)c1)c(c(ccc2)C(=O)OCCCC)c2,1.504675539130048 -O=C(NCCCC1)C1,1.10465364954589 -c(cccc1)(c1)Cl,1.0661274430976688 -C(Cl)(Cl)Cl,0.502606685808163 -ClCCl,0.6190792744080069 -C1C2C3C(C1C4C2O4)C5(C(=C(C3(C5(Cl)Cl)Cl)Cl)Cl)Cl,0.000328162093802273 -OCCO,16.111403264556976 -O=C(C=C(CC1(C)C)C)C1,1.295160023171064 -C(F)(Cl)(Cl)Cl,2.540618964665013 -CCc1cccc(CC)c1N(COC)C(=O)CCl,0.05189661748967905 -c1ccccc1c2c(C)c(COC(=O)C3C(C)(C)C3C=C(Cl)C(F)(F)F)ccc2,0.011824026606519262 -n1c(Cl)cc(OC)nc1NC(=O)NS(=O)(=O)c2ccccc2C(=O)OCC,0.30133493788161053 -O=C(NC(=O)c(c(F)ccc1)c1F)Nc(ccc(c2)Cl)c2,0.02510595436954169 -c(cccc1)(c1)CC,2.741016342485753 -O=C(N(SC(Cl)(Cl)Cl)C(=O)c1cccc2)c12,0.1348810665963127 -Clc1cc(C(F)(F)F)cnc1Oc2ccc(OC(C)C(=O)OC)cc2,0.0026615073878255148 -C#N,1.1470716002092851 -ClC(C(OC(C=C2C(=O)OC(C)C(=O)OCC)=CC=C2N(=O)=O)=C1)=CC=C1C(F)(F)F,0.10827828411229923 -c1c(C(F)(F)F)cccc1N2C(=O)C(Cl)=C(NC)C=N2,0.1687700797289615 -O=N(=O)C(C(=C1N(=O)=O)N(C(C)C)C(C)C)=CC(=C1)S(=O)(=O)N,0.12992280391195832 -CN(=CC=C1C(C=C2)=CC=N2C)C=C1,0.019100264469901956 -C1=C(C=C(C=C1[N+](=O)[O-])[N+](=O)[O-])[N+](=O)[O-],0.06245761469536169 -C(=C)Cl,0.020800592400871575 -C1CCC(=O)CC1,9.272184465524795 -CC1(C(C1(C)C)C(=O)OC(C#N)C2=CC(=CC=C2)OC3=CC=CC=C3)C,0.07154653735936956 -C1=CC(=O)NNC1(=O),4.460830164062196 -CCSC(=O)N1CCCCCC1,0.010677920910561842 -C(C(Cl)(Cl)Cl)(O)O,0.8161882413029702 -ClC2C1OC1C3C2C4(Cl)C(=C(Cl)C3(Cl)C4(Cl)Cl)Cl,6.421500622500271e-05 -Clc1cc(C(F)(F)F)ccc1Oc2cc(OCC)c(N(=O)(=O))cc2,0.02764719470135984 -c1cc(Cl)ccc1C(C(#N))(CCCC)Cn2ncnc2,0.03407493882440353 -CC1=C(C=C(C=C1[N+](=O)[O-])[N+](=O)[O-])[N+](=O)[O-],0.008805487227420639 -CC(C)OC(=O)C(C1=CC=C(C=C1)Br)(C2=CC=C(C=C2)Br)O,0.06073132568962639 -C[N+](C)(C)CCCl,1.0602168942789227 -CCC(=C1C(=O)CC(CC1=O)CC(C)SCC)NOCC=CCl,0.2389478027971563 -CC1=NC(=NC(=C1)C2CC2)NC3=CC=CC=C3,0.15801925526767843 -N(c(cccc1)c1)c(cccc2)c2,0.14773454395291782 -CC1(C(=O)N(C(=O)O1)NC2=CC=CC=C2)C3=CC=C(C=C3)OC4=CC=CC=C4,0.044873074905021335 -CC1(CCCCC1)C(=O)NC2=C(C(=C(C=C2)O)Cl)Cl,0.9662594125910484 -C1=CC(=C2C(=C1)OC(O2)(F)F)C3=CNC=C3C#N,0.443217671652664 -C(F)(F)(F)c1ccccc1C(=O)Nc2cccc(OC(C)C)c2,0.2690918752347788 -C(CCCCN=C(N)N)CCCNCCCCCCCCN=C(N)N,0.053436074592710235 -C1CN(C(=N1)N[N+](=O)[O-])CC2=CN=C(C=C2)Cl,0.0664943030028045 -COC(=O)C12CC3=C(C1=NN(CO2)C(=O)N(C4=CC=C(C=C4)OC(F)(F)F)C(=O)OC)C=CC(=C3)Cl,0.006820319755914397 -CC1=CC=CC=C1OCC2=CC=CC=C2C(=NOC)C(=O)OC,1.1807966969350603 -CC1=CC(=CC(=C1)C(=O)N(C(C)(C)C)NC(=O)C2=C(C(=CC=C2)OC)C)C,1.1154252951100516 -C1=CC(=C(C(=C1)F)C(=O)NC(=O)NC2=CC(=C(C=C2)OC(C(OC(F)(F)F)F)(F)F)Cl)F,0.07306609422899836 -CC1=CC=C(C=C1)N(SC(F)(Cl)Cl)S(=O)(=O)N(C)C,0.051834835094095484 -CC(C)(C)C1=C(C=CC(=C1)O)O,1.3536524792656537 -CC(=NOCC1=CC=CC=C1C(=NOC)C(=O)OC)C2=CC(=CC=C2)C(F)(F)F,0.14692519722320194 -COC(=O)N(C1=CC=CC=C1COC2=NN(C=C2)C3=CC=C(C=C3)Cl)OC,0.02320682656135787 -CC(C)N1C(=NC(C)(C)C)SCN(C1=O)C2=CC=CC=C2,0.02848365588181601 -C1=CC=C(C(=C1)C2=NN=C(N=N2)C3=CC=CC=C3Cl)Cl,0.05706818876652619 -CCCC(=C1C(=O)CC(CC1=O)C2CCCSC2)NOCC,0.08603044408485085 -C1CC1NC2=NC(=C(C(=N2)N)C#N)N,0.11566455596376966 -C(C=C1)(=N(C=C1)CC2)C(N2=C3)=CC=C3,0.002933359023382885 -C1=CC=C2C(=C1)C(=O)C3=C(C2=O)SC(=C(S3)C#N)C#N,0.020248123201460456 -CCOC1=CC2=C(C=C1)NC(C=C2C)(C)C,0.05522147585284508 -C1=CC=C(C=C1)C(CCC2=CC=C(C=C2)Cl)(CN3C=NC=N3)C#N,0.08906885283592852 -CC1CN(CC(O1)C)CC(C)CC2=CC=C(C=C2)C(C)(C)C,0.005601647965290344 -C[Si](CN1C=NC=N1)(C2=CC=C(C=C2)F)C3=CC=C(C=C3)F,0.006341300659739408 -c1cc(Cl)cc(Cl)c1C(OCC=C)Cn2cncc2,0.05047450068604942 -CN(=CC=C1C(C=C2)=CC=N2C)C=C1,0.012988179839533329 -CCCCOCCOCCOCC1=CC2=C(C=C1CCC)OCO2,0.29547465787728056 -Clc1cc(Cl)ccc1C2(Cn3ncnc3)OC(CCC)CO2,0.2805209905967611 -C(=CC=C1)(C2=C1)NC(=N2)C(=CS3)N=C3,0.1490700414533971 -CCOC1=CC=C(C=C1)C(C)(C)COCC2=CC(=CC=C2)OC3=CC=CC=C3,0.0690593023384914 -CC(C)(C)c2ccc(OC1CCCCC1OS(=O)OCC#C)cc2,0.0542125521232289 -CC(COC1=CC=C(C=C1)OC2=CC=CC=C2)OC3=CC=CC=N3,0.4356352632556343 -CC(C)(C)C(=O)C(N1C=NC=N1)OC2=CC=C(C=C2)Cl,0.3880867710275115 -CC(C)(C)C(C(N1C=NC=N1)OC2=CC=C(C=C2)Cl)O,0.3550120362604561 -c1ccccc1c2c(C)c(COC(=O)C3C(C)(C)C3C=C(Cl)C(F)(F)F)ccc2,0.018918442570430818 -CC1(C(C1C(=O)OC(C#N)C2=CC(=C(C=C2)F)OC3=CC=CC=C3)C=C(Cl)Cl)C,0.013815728848084595 -CC1(C(C1C(=O)OC(C#N)C2=CC(=C(C=C2)F)OC3=CC=CC=C3)C=C(Cl)Cl)C,0.02763145769616919 -ClC(Cl)=CC1C(C)(C)C1C(=O)OC(C(#N))c2cccc(Oc3ccccc3)c2,0.12010651237688001 -ClC1CC2C(C1Cl)C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl,0.0006100854842019096 -ClC4=C(Cl)C5(Cl)C3C1CC(C2OC12)C3C4(Cl)C5(Cl)Cl,0.0002625296750418184 -C1C2C3C(C1C4C2O4)C5(C(=C(C3(C5(Cl)Cl)Cl)Cl)Cl)Cl,0.000656324187604546 -C1(C(C(C(C(C1Cl)Cl)Cl)Cl)Cl)Cl,0.016160652565775233 -CCC(=O)Nc1ccc(Cl)c(Cl)c1,0.3484961885063573 -C1(=C(C(=C(C(=C1Cl)Cl)Cl)Cl)Cl)[N+](=O)[O-],0.47403843842257615 -C1C2C(COS(=O)O1)C3(C(=C(C2(C3(Cl)Cl)Cl)Cl)Cl)Cl,0.007126617932723449 -O=C(N(SC(Cl)(Cl)Cl)C(=O)c1cccc2)c12,0.16860133324539087 -CC(C(=O)O)OC1=CC=C(C=C1)OC2=C(C=C(C=N2)C(F)(F)F)Cl,0.00027647194701359843 -CCCCC(CN1C=NC=N1)(C#N)C2=CC=C(C=C2)Cl,0.033936422812922216 -Clc1cc(Cl)cc(Cl)c1OCCN(CCC)C(=O)n2cncc2,0.013539867103284017 -COP(=O)(NC(=O)(C))SC,0.013648831720059621 -CCOP(=O)(OCC)OC(=CCl)C1=C(C=C(C=C1)Cl)Cl,0.004171650398342553 -CCOP(=S)(OCC)SCCSCC,0.0008017375255654054 -O=P(O)(O)CCCl,0.08304843107672291 -CCCSP(=O)(OCC)SCCC,0.011141416681473747 -CCOP(=O)(NC(C)C)Oc1ccc(SC)c(C)c1,0.005603950244305859 -COP(=S)(OC)Oc1ccc(SC)c(C)c1,0.0025868753585247565 -OC(=O)C(N)CCP(C)(=O)O,0.019323475195614302 -OC(=O)CNCP(O)(O)=O,1.7743806406081915 -CCOC(=O)CC(SP(=S)(OC)OC)C(=O)OCC,1.0897268363577188 -COP(N)(=O)SC,0.002054854991717517 -CC(=CC(=O)OC)OP(=O)(OC)OC,0.001561466365033004 -CCOP(=S)(OCC)SCSCC,0.0006144925612602997 -CCOP(=S)(OCC)SCSC(C)(C)C,0.0002080223110862717 -S=P(OC)(OC)SCN1N=Nc2ccccc2C1(=O),0.00813048252144793 -CCOP(=S)(OCC)OC1=NC(=C(C=C1Cl)Cl)Cl,0.002852364738724816 -CCOP(=S)(OCC)OC1=NC(=NC(=C1)C)C(C)C,0.019057288509276463 -CCC1=NC(=CC(=N1)OP(=S)(OC)OC)OCC,0.0015395577035464635 -S=P(OC)(OC)SCN1C(=O)SC(OC)=N1,0.005292207588165698 -CCOP(=S)(OCC)SCN1C2=C(C=C(C=C2)Cl)OC1=O,0.0054376113486863924 -COP(=S)(OC)SCN2C(=O)c1ccccc1C2(=O),0.02836244328456758 -CCOC(=O)C1=CN2C(=CC(=N2)OP(=S)(OCC)OCC)N=C1C,0.010713392485187262 -CCOP(=S)(OCC)OC1=NN(C=N1)C2=CC=CC=C2,0.004149212048673449 -O=C(Oc(c(c(ccc1)cc2)c1)c2)NC,0.2981792578159244 -CC1=CC(=CC(=C1SC)C)OC(=O)NC,0.041276958181115306 -CNC(=O)ON=C(C)SC,0.12329688471926223 -CCCOC(=O)NCCCN(C)C,3.611885866531256 -COC(=O)NC1=NC2=CC=CC=C2N1,0.3922867840256219 -CC1=C(N=C(N=C1OC(=O)N(C)C)N(C)C)C,0.051618595485714625 -CNC(=O)CCSCCSP(=O)(OC)OC,0.001879329112916984 -N(C(=S)SSC(N(C)C)=S)(C)C,0.04990997903448147 -C1=NNC(=N1)N,0.059467202410657664 -CCCCCCCCc1cc(N(=O)(=O))c(OC(=O)C=CC)c(c1)N(=O)(=O),0.17563456769307506 -C1=CC=C(C=C1)[Sn](C2=CC=CC=C2)C3=CC=CC=C3,0.0008571117562305596 -O=C(NC(=O)c(c(F)ccc1)c1F)Nc(ccc(c2)Cl)c2,0.022530984690614337 -N(C(=S)NC1)C1,0.012235931468603481 -C1=CC(=C(C(=C1)F)C(=O)NC(=O)NC2=CC(=C(C(=C2F)Cl)F)Cl)F,0.06559798797851273 -CCCSC1=CC2=C(C=C1)N=C(N2)NC(=O)OC,0.07537743365466734 -C1CN(CCN1CCCC(=O)C2=CC=C(C=C2)F)C3=CC=CC=N3,0.35125671098854394 -CC1(C2C(C3C(C(=O)C(=C(N)O)C(=O)C3(C(=O)C2=C(C4=C(C=CC(=C41)Cl)O)O)O)N(C)C)O)O,10.50761860949369 -CC1=CC(=C(C=C1NC(=O)C2=CC(=CC(=C2O)I)I)Cl)C(C#N)C3=CC=C(C=C3)Cl,0.015081279803436631 -CN1CC2CC1CN2C3=C(C=C4C(=C3)N(C=C(C4=O)C(=O)O)C5CC5)F,0.13990757146198934 -C1=CC(=CC=C1C(C#N)C2=C(C=CC(=C2Cl)N3C(=O)NC(=O)C=N3)Cl)Cl,0.03679735812631385 -CC1=NC=C(N1C)[N+](=O)[O-],0.10628650675790867 -CCN1CCN(CC1)C2=C(C=C3C(=C2)N(C=C(C3=O)C(=O)O)C4CC4)F,0.07234386441112595 -CC1C=CC=C2COC3C2(C(C=C(C3O)C)C(=O)OC4CC(CC=C(C1OC5CC(C(C(O5)C)OC6CC(C(C(O6)C)NC(=O)C)OC)OC)C)OC7(C4)C=CC(C(O7)C(C)C)C)O,0.0011109849279118543 -COCC(=O)NC1=C(C=CC(=C1)SC2=CC=CC=C2)NC(=NC(=O)OC)NC(=O)OC,0.08959030532555236 -CC1CCC2=C3N1C=C(C(=O)C3=CC(=C2)F)C(=O)O,1.531109972815908 -COC(=O)NC1=NC2=C(N1)C=C(C=C2)S(=O)C3=CC=CC=C3,0.006342219438128827 -CC1(C2CC3C(C(=O)C(=C(N)O)C(=O)C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)N(C)C)O,0.33750750616693714 -C1CN(CCN1CCCC(=O)C2=CC=C(C=C2)F)C3=CC=CC=N3,0.09163218547527233 -CC(C)NCC(COC1=CC=CC2=C1C3=CC=CC=C3N2)O,0.023460058312320942 -C1=CC(=CC=C1C(C#N)C2=C(C=CC(=C2Cl)N3C(=O)NC(=O)C=N3)Cl)Cl,0.056422615793681234 -CC1C=CC=C2COC3C2(C(C=C(C3O)C)C(=O)OC4CC(CC=C(C1OC5CC(C(C(O5)C)OC6CC(C(C(O6)C)NC(=O)C)OC)OC)C)OC7(C4)C=CC(C(O7)C(C)C)C)O,0.0027774623197796356 -COP(=S)(OC)Oc1ccc(SC)c(C)c1,0.001616797099077973 -C(C(C(C(C1Cl)Cl)Cl)Cl)(C1Cl)Cl,0.027507493728979118 -C(C(C(C(C1Cl)Cl)Cl)Cl)(C1Cl)Cl,0.013753746864489559 -CC(N(C)C)CN(C(=CC=C3)C1=C3)C(=CC=C2)C(=C2)S1,0.058364575374860554 -C1=C(C=C(C(=C1Cl)N)Cl)[N+](=O)[O-],1.159340984210935 -c(cccc1)(c1)C=C,3.8406469492973154 diff --git a/paper/data/median-correlation.csv b/paper/data/median-correlation.csv index 889453d..e83961d 100644 --- a/paper/data/median-correlation.csv +++ b/paper/data/median-correlation.csv @@ -1,121 +1,121 @@ SMILES,mazzatorta,swiss -CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.18534506246313948,0.22661523159035935 -COCN(c1c(CC)cccc1CC)C(=O)CCl,0.05560351873894184,0.23816840526513422 -CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.009924832004782804,0.04157699893895499 -COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.005451835179110433,0.008508644649457775 -CNC(=O)Oc1ccccc1OC(C)C,0.23895810443138246,0.22939978025412716 -O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08510674803234901,0.08272375649019124 -CNC(=O)Oc1cccc2c1cccc2,0.1878529324240324,0.2991731924668564 -O=C(C1=C(C)OCCS1)Nc1ccccc1,0.1274956638724717,0.034848813981213346 -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.06987675250196507,0.05590140200157206 +N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.0001372533562906347,0.00013496580117055152 +CCOP(=S)(SCSC(C)(C)C)OCC,0.0002080223110862717,0.00034670385697674235 +OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.00027647194701359843,0.0002764719511333511 +CCSCSP(=S)(OCC)OCC,0.0006144925612602997,0.0006144925475253195 +CCOP(=O)(SC(CC)C)SC(CC)C,0.0009245829520661433,0.0008210296720157477 +COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.001659247904766673,0.08332310268057162 +COP(=O)(SC)N,0.002054854991717517,0.0020548549325897737 +CSc1ccc(cc1C)OP(=S)(OC)OC,0.0025868753585247565,0.0016527259802523342 CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.003445751195813495,0.0033630532459809582 -Nc1nc(NC2CC2)nc(n1)N,0.09026150563412319,0.9387196585948812 +CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149212048673449,0.004149211896481245 +COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.004713372383210075,0.0039030031199302137 +CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.005201883810203027,0.004681695305160139 +CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005274306112287868,0.0035601567181414275 +CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0054376113486863924,0.0636200517424888 +CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601647965290344,0.005601648122412352 +Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.006341300659739408,0.023779877474022784 +COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319755914397,0.006820319575237628 +Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.00694452873492003,0.00920904883059355 +COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.0070905370826580775,0.008508644649457775 +O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.007126617932723449,0.01228727229779905 +CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.007825509706097071,0.009886227162529472 +COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.009301369775521361,0.003100456591840454 +CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.009924832004782804,0.04157699893895499 +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.010036375840596658,0.12545469800745823 COP(=O)(OC=C(Cl)Cl)OC,0.010068978612765365,0.010408382170442241 -OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.030365547751564796,0.005938151689011985 -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.02446221194980985,0.4023390123323988 -CC1=C(C)S(=O)(=O)CCS1(=O)=O,0.047557630336441704,0.23778815168220852 +CCCSP(=O)(SCCC)OCC,0.011141416681473747,0.006065882533527741 CNC(=O)CSP(=S)(OC)OC,0.011450010084732691,0.000872381733741038 -O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.026276896280264014,0.007293179580314936 -ClCCP(=O)(O)O,1.4025957248513201,0.9066120392542251 -ClC1C(Cl)C(Cl)C(C(C1Cl)Cl)Cl,0.01808617712680377,0.01616065190994549 -C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.13459866849613178,0.05350296944357954 -CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,0.15252975563710267,1.6952764753748983 -CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.6205388929259232,0.2603236331298995 -COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.00467202701142753,0.0039030031199302137 -CSc1nnc(c(=O)n1N)C(C)(C)C,0.06999926640768805,0.06393266242893511 -CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.01448347496337274,0.010428101697378017 -CNC(=O)ON=C(C(=O)N(C)C)SC,0.02280382932847922,0.02072868120754643 -COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.04569504751402555,0.009139009427670286 -OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.24848916516834604,0.9318343693812976 -CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.026692119922880408,0.0068777238395693234 -CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.22010285589875195,0.041269285481015994 -CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.4534134152107278,0.06546156290207059 -CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.019396419126203733,0.016680921188449865 -CN(C(=S)SSC(=S)N(C)C)C,0.04432283415923257,0.03036190470594063 -C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.25479642918707424,0.0186990521567307 -COP(=O)(SC)N,0.003046853953236319,0.0020548549325897737 CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.011992948803251567,0.01642869699075557 -OC(=O)CNCP(=O)(O)O,5.914602135360638,5.350743398456257 -CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.010036375840596658,0.12545469800745823 -CC(OC(=O)Nc1cccc(c1)Cl)C,3.510237115113031,0.14040948460452124 -OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.03828744186371015,0.15527684755838006 -N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.021874904009467275,0.04835505096829608 +CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.01448347496337274,0.010428101697378017 +N#Cc1c(Cl)cccc1Cl,0.014533918736325764,0.0186034162597095 N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.015042627044387032,0.08141821878808377 -CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.06423944765895072,1.605986191473768 -CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.21766590408142725,0.22201922216305578 -CCOP(=S)(SCSC(C)(C)C)OCC,0.002438483757733518,0.00034670385697674235 -CCOP(=O)(SC(CC)C)SC(CC)C,0.0009245829520661433,0.0008210296720157477 ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.016019730669239306,0.05030195369030707 -O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.08969617860069455,0.41719152837532353 -CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.23935747721355113,0.39446112244793224 -CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04102878665011248,0.03544887229174679 -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.19051986050321804,0.2098341392275743 -CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.1560686482307559,0.14982590230152565 +ClC1C(Cl)C(Cl)C(C(C1Cl)Cl)Cl,0.016160652565775233,0.01616065190994549 +CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.01672571818640967,0.05707983190600125 +BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185416964361586,0.017185417014945824 +OC(=O)C(CCP(=O)(O)C)N,0.019323475195614302,1.2637552440957067 +CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.019396419126203733,0.016680921188449865 +OC(=O)COc1ccc(cc1C)Cl,0.019938294964743114,0.0947069010825298 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.022450559378137468,0.04835505096829608 +CNC(=O)ON=C(C(=O)N(C)C)SC,0.02280382932847922,0.02072868120754643 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.02510595436954169,0.4023390123323988 +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.026276896280264014,0.007293179580314936 +CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.026692119922880408,0.0068777238395693234 +OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.030365547751564796,0.005938151689011985 +CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.03318543029523152,0.08196801536106943 Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.03331771398901528,0.042646674541424644 -OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.1403669879303106,0.0038990829980641837 CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.03336499327732185,0.036799624938222635 +CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.03400568081866287,0.20067507097305953 +OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.03828744186371015,0.15527684755838006 +CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.038746408312020406,0.08469772512288609 +CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04102878665011248,0.03544887229174679 +CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.041276958181115306,0.1242747128033579 +O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.044873074905021335,0.045407278177700156 +COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.04569504751402555,0.009139009427670286 +CC1=C(C)S(=O)(=O)CCS1(=O)=O,0.047557630336441704,0.23778815168220852 +CN(C(=S)SSC(=S)N(C)C)C,0.04783039657471141,0.03036190470594063 +CON(C(=O)Nc1ccc(cc1)Br)C,0.048243951057630914,0.014357399945172603 +Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.05165383561566402,0.17867678986550448 +Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.051834835094095484,0.27357274077439286 +CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.05203825886364726,0.05161859628615915 O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.05279126047017867,0.16893203350457175 -COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.009301369775521361,0.003100456591840454 -COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045196043953,0.11151045388522976 -CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.3693416417277341,0.04432099700732809 -Cc1cccc2c1n1cnnc1s2,0.16381576159162972,0.1373938645607217 -CSc1ccc(cc1C)OP(=S)(OC)OC,0.005892327205528613,0.0016527259802523342 -Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.44864352207850955,0.4774244272684517 -ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.5494924735209582,0.07465930346752149 -C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.316253365684832,0.05492821614526029 -BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185416964361586,0.017185417014945824 -O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06690994773808298,0.19325167158375256 -N#Cc1c(Cl)cccc1Cl,0.014533918736325764,0.0186034162597095 -CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.13932359364492994,0.03634528529867737 -O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.06459882942614491,0.10157735340683115 -CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.01672571818640967,0.05707983190600125 -N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.0001372533562906347,0.00013496580117055152 +COCN(c1c(CC)cccc1CC)C(=O)CCl,0.05560351873894184,0.23816840526513422 +CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.06073132568962639,0.060497742776698574 +Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.06152148673385627,0.05706818624978773 N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.06360487171247954,0.06009909138187043 -CN1CN(C)CSC1=S,0.18486987933542975,0.027422365728598172 +CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.06423944765895072,1.605986191473768 +O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.06459882942614491,0.10157735340683115 +O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06690994773808298,0.19325167158375256 +CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.0690593023384914,0.06773123883198195 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.06987675250196507,0.05590140200157206 +CSc1nnc(c(=O)n1N)C(C)(C)C,0.06999926640768805,0.06393266242893511 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.07306609422899836,0.062106180868884746 +Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.0760257762657501,0.5473855891134007 +COP(=O)(C(C(Cl)(Cl)Cl)O)OC,0.07768900686568829,0.05166319030658296 +O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08510674803234901,0.08272375649019124 +O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.08969617860069455,0.41719152837532353 +Nc1nc(NC2CC2)nc(n1)N,0.09026150563412319,0.9387196585948812 N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.10391366164191661,0.09203781459712614 -Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.05165383561566402,0.17867678986550448 -[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.9419645496713847,0.5651787298028309 -CON(C(=O)Nc1ccc(cc1)Br)C,0.048243951057630914,0.014357399945172603 -COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.001659247904766673,0.08332310268057162 CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.11058877880543937,0.14653013191720715 -ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.6631652440985374,0.08430066662269543 -C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.1697708869122168,0.061250674376451514 -CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.05203825886364726,0.05161859628615915 -CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.16929970598735858,0.13747135609511818 -Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.0760257762657501,0.5473855891134007 +COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045196043953,0.11151045388522976 Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.11395676083924232,0.011395676083924233 -CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.17813968959673715,0.3034972489425892 -Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.06152148673385627,0.05706818624978773 -CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.03318543029523152,0.08196801536106943 -CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.007825509706097071,0.009886227162529472 -CCNc1nc(NC(C)C)nc(n1)Cl,0.11591071091933607,0.056718974985359355 -CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.038746408312020406,0.08469772512288609 -COP(=O)(C(C(Cl)(Cl)Cl)O)OC,0.07768900686568829,0.05166319030658296 +CCNc1nc(NC(C)C)nc(n1)Cl,0.11591071091933607,0.04636428436773443 +O=C(C1=C(C)OCCS1)Nc1ccccc1,0.1274956638724717,0.034848813981213346 +C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.13459866849613178,0.05350296944357954 CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.13618183361575933,0.13731668655832788 -OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.21976935578028234,0.06424027322808253 -OC(=O)COc1ccc(cc1C)Cl,0.019938294964743114,0.0947069010825298 -Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.00694452873492003,0.01932390597300771 -CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005274306112287868,0.0035601567181414275 -CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.005201883810203027,0.004681695305160139 -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.655542030995076,0.31203800675365617 -CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.03400568081866287,0.20067507097305953 -CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.06073132568962639,0.060497742776698574 +CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.13932359364492994,0.03634528529867737 +OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.1403669879303106,0.0038990829980641837 +CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,0.15252975563710267,1.6952764753748983 +CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.1560686482307559,0.14982590230152565 Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.15801925526767843,0.15801924849469393 -O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.044873074905021335,0.045407278177700156 -N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.443217671652664,0.4553054263341003 -COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319755914397,0.006820319575237628 -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.07306609422899836,0.062106180868884746 -Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.051834835094095484,0.27357274077439286 -CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601647965290344,0.005601648122412352 -Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.006341300659739408,0.023779877474022784 -CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.0690593023384914,0.06773123883198195 -CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.4356352632556343,0.4315900691721648 -O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.007126617932723449,0.015481963173347177 -OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.00027647194701359843,0.0002764719511333511 -CCCSP(=O)(SCCC)OCC,0.011141416681473747,0.006065882533527741 -OC(=O)C(CCP(=O)(O)C)N,0.019323475195614302,1.2637552440957067 -CCSCSP(=S)(OCC)OCC,0.0006144925612602997,0.0006144925475253195 -CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0054376113486863924,0.0636200517424888 -CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149212048673449,0.004149211896481245 -CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.041276958181115306,0.1242747128033579 +Cc1cccc2c1n1cnnc1s2,0.16381576159162972,0.1373938645607217 +ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.16860133324539087,0.08430066662269543 +CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.16929970598735858,0.13747135609511818 +C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.1697708869122168,0.06904967382858089 +CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.17813968959673715,0.3034972489425892 +CN1CN(C)CSC1=S,0.18486987933542975,0.027422365728598172 +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.18534506246313948,0.24799169923196304 +CNC(=O)Oc1cccc2c1cccc2,0.1878529324240324,0.2991731924668564 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.19051986050321804,0.2098341392275743 +CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.21766590408142725,0.22201922216305578 +OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.21976935578028234,0.06424027322808253 +CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.22010285589875195,0.041269285481015994 +CNC(=O)Oc1ccccc1OC(C)C,0.23895810443138246,0.22939978025412716 +CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.23935747721355113,0.39446112244793224 +OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.24848916516834604,0.9318343693812976 +C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.25479642918707424,0.0186990521567307 +C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.316253365684832,0.05492821614526029 +CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.3693416417277341,0.04432099700732809 COC(=O)Nc1nc2c([nH]1)cccc2,0.3922867840256219,1.3076226134187396 +CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.4356352632556343,0.4315900691721648 +N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.443217671652664,0.4553054263341003 +Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.44864352207850955,0.4774244272684517 +CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.4534134152107278,0.06546156290207059 +ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.5494924735209582,0.07465930346752149 +CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.6205388929259232,0.2603236331298995 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.655542030995076,0.31203800675365617 +[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.9419645496713847,0.5651787298028309 +ClCCP(=O)(O)O,1.0381053884590363,0.9066120392542251 +CC(OC(=O)Nc1cccc(c1)Cl)C,3.510237115113031,0.14040948460452124 +OC(=O)CNCP(=O)(O)O,5.914602135360638,5.559726007239 diff --git a/paper/data/swiss-cv.csv b/paper/data/swiss-cv.csv new file mode 100644 index 0000000..309ba82 --- /dev/null +++ b/paper/data/swiss-cv.csv @@ -0,0 +1,447 @@ +SMILES,LOAEL_measured_median,LOAEL_predicted,Confidence +N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.00013496580117055152,0.017344813680253365,1 +Clc1ccc2c(c1)[n+]([O-])nc(n2)n1cncc1,0.00020190555530632425,0.03523100134958194,1 +OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.0002764719511333511,0.018009645835635024,1 +CCOP(=S)(SCSC(C)(C)C)OCC,0.00034670385697674235,0.004425660789408393,1 +COC(=O)C(Oc1ccc(cc1)Oc1ccc(cc1Cl)Cl)C,0.0005861906011027885,0.03191598936338135,1 +CCSCSP(=S)(OCC)OCC,0.0006144925475253195,0.001840915966735963,1 +CSc1ccc(cc1C)OP(=S)(OC)OC,0.0007185764991867223,0.005508449832284467,1 +CCOP(=O)(SC(CC)C)SC(CC)C,0.0008210296720157477,0.008579634171466552,1 +CNC(=O)CSP(=S)(OC)OC,0.000872381733741038,0.014593717469688315,1 +CCS(=O)CCSP(=O)(OC)OC,0.0008932752807580748,0.0015198540889657284,0.3125 +OC(=O)c1ccccc1.CCC(C1OC2(C=CC1C)OC1CC=C(C)C(OC3CC(OC)C(C(O3)C)OC3CC(OC)C(C(O3)C)NC)C(C)C=CC=C3C4(C(C(=O)OC(C2)C1)C=C(C)C(C4OC3)O)O)C,0.0009918273033473258,0.0018793779503536868,0.7758620689655172 +COP(=O)(SCCS(=O)(=O)CC)OC,0.0011437981092748413,0.0015198540889657284,0.3125 +COC1CC(OC2C(C)C=CC=C3COC4C3(O)C(C=C(C4O)C)C(=O)OC3CC(CC=C2C)OC2(C3)C=CC(C(O2)C(CC)C)C)OC(C1OC1CC(OC)C(C(O1)C)O)C.COC1CC(OC(C1OC1CC(OC)C(C(O1)C)O)C)OC1C(C)C=CC=C2COC3C2(O)C(C=C(C3O)C)C(=O)OC2CC(CC=C1C)OC1(C2)C=CC(C(O1)C(C)C)C,0.0011546496256700967,0.0028270088965558233,1 +O=C1CCCC(=O)C1C(=O)c1ccc(cc1Cl)S(=O)(=O)C,0.0012166633663470796,0.002407097420013963,1 +CCOP(=O)(N1CCSC1=O)SC(CC)C,0.001341107599716744,0.0011515824980518601,1 +O=C1CCCC(=O)C1C(=O)c1ccc(cc1[N+](=O)[O-])S(=O)(=O)C,0.001414591694222218,0.019307092486906843,1 +CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.001516363034790411,0.007657024526944333,1 +CCSCCSP(=O)(OC)OC,0.001519854088965729,0.000883234745514374,1 +COC(=O)/C=C(/OP(=O)(OC)OC)\C,0.0015614663384413926,0.038332425152094556,1 +C1CCC(CC1)[Sn](n1ncnc1)(C1CCCCC1)C1CCCCC1,0.0018110419025972907,0.014704825918970935,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)[C@H](C(C)C)Nc1ccc(cc1Cl)C(F)(F)F,0.001988416717024977,0.012590156321812351,1 +COP(=O)(SC)N,0.0020548549325897737,0.045296304153967855,0.13333333333333333 +CCCSP(=O)(SCCC)OCC,0.002063225311384027,0.0018731882921710285,1 +COP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OC,0.002089606472099723,0.01737984618635041,1 +COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.0021168829879502555,0.011073447351926287,1 +O=C1CCCC(=O)C1C(=O)c1ccc(c(c1Cl)COCC(F)(F)F)S(=O)(=O)C,0.002381932321850521,0.007692682105248146,1 +OC(=O)c1ccccc1.CCC(C1OC2(C=CC1C)OC1CC=C(C)C(OC3CC(OC)C(C(O3)C)OC3CC(OC)C(C(O3)C)NC)C(C)C=CC=C3C4(C(C(=O)OC(C2)C1)C=C(C)C(C4OC3)O)O)C,0.0024795682583683147,0.001880233874102364,1 +CSc1ccc(cc1C)OP(=S)(OC)OC,0.0025868754613179463,0.008959434615561151,1 +O[Sn](C1CCCCC1)(C1CCCCC1)C1CCCCC1,0.002596303652874617,0.07470222573840693,1 +CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.002852364738724816,0.014300523748580053,1 +COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.003100456591840454,0.02321316597224481,1 +Fc1ccc(cc1)N(C(=O)COc1nnc(s1)C(F)(F)F)C(C)C,0.0033027779077186826,0.031034162092124017,1 +CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.0033630532459809582,0.009462543754698434,1 +CNC(=O)Oc1cccc2c1OC(O2)(C)C,0.0035838244976124515,0.08761345281390893,1 +O[Sn](C1CCCCC1)(C1CCCCC1)C1CCCCC1,0.003608862040355308,0.07470222573840693,1 +OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.0038990829980641837,0.06919651159726646,1 +CCc1nn(c(c1Cl)C(=O)NCc1ccc(cc1)Oc1ccc(cc1)C)C,0.003907559846623587,0.09871158498928112,1 +CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149211896481245,0.022355398180114477,1 +CCOP(=O)(O/C(=C/Cl)/c1ccc(cc1Cl)Cl)OCC,0.004171650398342553,0.03530064138051918,1 +CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.004681695305160139,0.006211804536307154,1 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)/C=C(/c1ccc(cc1)Cl)\Cl,0.004898276703964497,0.012756681713004479,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.004948543461552866,0.03386160134712949,1 +CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.005288078037050265,0.0617750690364596,1 +CCNc1nc(Cl)nc(n1)NC(C)(C)C,0.005398114462735858,0.07999443961831189,1 +CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601648122412352,0.0686569170847295,1 +CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005603950401492444,0.007657024526944333,1 +COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.005672488506643871,0.03943927185787346,1 +COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.005689123251910172,0.011073447351926287,1 +COP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OC,0.005698926618569244,0.018344682252482723,1 +OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.005938151689011985,0.04868409643292804,1 +Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.006038720639060896,0.05532402173688333,1 +COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319575237628,0.05716874508521694,1 +CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.0068777238395693234,0.018493234339046313,1 +CCNc1nc(Cl)nc(n1)NC(C)(C)C,0.006921775895097049,0.09426207720335626,1 +CCOP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OCC,0.00703837000663162,0.017057539160016816,1 +Cc1nn(c(c1/C=N/OCc1ccc(cc1)C(=O)OC(C)(C)C)Oc1ccccc1)C,0.0071176254993963305,0.21775855137076283,1 +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.007293179580314936,0.08327089627070684,1 +CCNc1nc(Cl)nc(n1)NC(C)(C)C,0.0074006409463509264,0.08321807469197018,1 +Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.007548400798826121,0.0471479016716324,1 +N#Cc1nn(c(c1S(=O)CC)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.008056334643428573,0.0003583362548250535,1 +CN(C(=S)S[Zn]SC(=S)N(C)C)C,0.00817493363915869,0.060687914488021834,1 +Clc1cc(cnc1Nc1c(cc(c(c1[N+](=O)[O-])Cl)C(F)(F)F)[N+](=O)[O-])C(F)(F)F,0.00821343424858256,0.06022444999031304,1 +Clc1cc(cnc1Nc1c(cc(c(c1[N+](=O)[O-])Cl)C(F)(F)F)[N+](=O)[O-])C(F)(F)F,0.008385443694386083,0.037891775077278596,1 +COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.008508644649457775,0.022637038229098648,1 +Clc1ccc(c(c1)Cl)C(Cn1cncn1)COC(C(F)F)(F)F,0.00913621053742932,0.055026926191950536,1 +CCCN(C(=O)SCc1ccccc1)CCC,0.009149216533940492,0.026602989294595297,1 +COCN(c1c(CC)cccc1CC)C(=O)CCl,0.009267253123156974,0.22739182733921706,1 +CON(C(=O)Nc1ccc(cc1)Cl)C,0.00931754394759366,0.03688265812273081,1 +C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.009436904951368202,0.12399084292970325,1 +CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cc1F)C#N)C,0.009625729959721526,0.012480268779678033,1 +CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.009886227162529472,0.005677758509221912,1 +N#C/N=C\1/SCCN1Cc1ccc(nc1)Cl,0.009892243396986886,0.1373774325740661,1 +Cc1c(ccc(c1C1=NOCC1)S(=O)(=O)C)C(=O)c1cnn(c1O)C,0.009906758425540224,0.01665216481560526,1 +CC1(C)CNC(=NC1)NN=C(C=Cc1ccc(cc1)C(F)(F)F)C=Cc1ccc(cc1)C(F)(F)F,0.009909494556264633,0.07981845994371692,1 +CCCSP(=O)(SCCC)OCC,0.010068539755671456,0.004650206638789641,1 +COP(=O)(OC=C(Cl)Cl)OC,0.010408382170442241,0.0015614663384413924,0.25 +CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.010428101697378017,0.12778348323867558,1 +CCOC(=O)Nc1cccc(c1)OC(=O)Nc1ccccc1,0.010655682947629983,0.19281355438109932,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.010688854065726137,0.0346981875702237,1 +CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.010713392485187262,0.006900785651842258,1 +Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.01086969686236098,0.046116554883113524,1 +O=C(C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C)OCCON=C(C)C,0.011264301100355506,0.011573916415728037,1 +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.011395676083924233,0.04346838792923881,1 +N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.012410167132297197,0.08243149771542137,1 +COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.012605530348696702,0.0444225657944749,1 +Clc1ccc(c(c1)Cl)n1c(nc2c(c1=O)cc(cc2)F)n1cncn1,0.01268036889326992,0.029111840582303525,1 +CCC1CCCC(OC2CCC(C(O2)C)N(C)C)C(C)C(=O)C2=CC3C(C2CC(=O)O1)C=C(C)C1C3CC(C1)OC1OC(C)C(C(C1OC)OC)OC,0.012734890360905185,0.004194655459081704,1 +CC1C=CC=C2COC3C2(O)C(C=C(C3O)C)C(=O)OC2CC(CC=C(C1)C)OC1(C2)CCC(C(O1)C)C,0.01286229964885329,0.002492034563523696,1 +COc1ccc(cc1NNC(=O)OC(C)C)c1ccccc1,0.01298475189092086,0.6426764188081437,1 +COP(=O)(NC(=O)C)SC,0.013648831720059621,0.0023317481611294373,1 +O=c1c(Cl)c(SCc2ccc(cc2)C(C)(C)C)cnn1C(C)(C)C,0.013701160159437661,0.03149493417295726,1 +NC1=C(Cl)C(=O)c2c(C1=O)cccc2,0.013920121360835688,0.0767015036114862,1 +O=C(C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl)OCc1c(F)c(F)c(c(c1F)F)C,0.01409010160197152,0.09735069347835236,1 +CS/C(=N/OC(=O)N(SN(C(=O)O/N=C(/SC)\C)C)C)/C,0.014105593115928905,0.02135491357652788,0.14285714285714285 +CC(c1ccccc1)(C[Sn](O[Sn](CC(c1ccccc1)(C)C)(CC(c1ccccc1)(C)C)CC(c1ccccc1)(C)C)(CC(c1ccccc1)(C)C)CC(c1ccccc1)(C)C)C,0.014249578440471417,0.05519865342588798,1 +CON(C(=O)Nc1ccc(cc1)Br)C,0.014357399945172603,0.04262134693069911,1 +CCNc1nc(NC(C)C)nc(n1)Cl,0.014372927711833409,0.051943767855990995,1 +CC(c1ccc(cc1)CCOc1ncnc2c1cccc2)(C)C,0.014686613132547533,0.04855108106681143,1 +N#CC(c1ccc(cc1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C\C(=O)OC(C(F)(F)F)C(F)(F)F,0.014960133059978587,0.04345047588412717,1 +Clc1cc(cnc1CCNC(=O)c1ccccc1C(F)(F)F)C(F)(F)F,0.015124216704213374,0.03310157945598653,1 +Fc1ccc(cc1)C1(Cn2cncn2)OC1c1ccccc1Cl,0.015162725459871818,0.058218089172169,1 +N#CN=S(=O)(C(c1ccc(nc1)C(F)(F)F)C)C,0.015292167409562457,0.08044125093401416,1 +CC(C1C2CCC1c1c2cccc1NC(=O)c1cn(nc1C(F)F)C)C,0.015302732709143212,0.0659916700840019,1 +O[Sn](C1CCCCC1)(C1CCCCC1)C1CCCCC1,0.015577821917247702,0.48143700649247756,1 +c1cc[n+]2c(c1)c1cccc[n+]1CC2,0.015794866515636753,0.11000206815300977,1 +Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.01585325164934852,0.030000496093829605,1 +CCCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC,0.016105987222784814,0.1189023058328751,1 +CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.01642869699075557,0.011528157789546231,1 +CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.016680921188449865,0.04306025403236089,1 +CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.0171141884323489,0.012566395107108207,1 +BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185417014945824,0.053028769050118635,1 +CC(NC(=S)[S])CNC(=S)S[Zn],0.017255039351497643,0.07465412245481072,1 +CC(NC(=S)[S])CNC(=S)S[Zn],0.017255039351497643,0.16793286430045296,1 +COCC(N(c1c(C)csc1C)C(=O)CCl)C,0.018129419544573026,0.2826580402554426,1 +Fc1ccc(cc1)C1(Cn2cncn2)OC1c1ccccc1Cl,0.018195270551846183,0.04421228477468209,1 +N#Cc1c(Cl)cccc1Cl,0.0186034162597095,0.07890493307638533,1 +CNC(=O)ON=C(C(=O)N(C)C)SC,0.019109609238234706,0.056548471383657296,1 +CCc1nn(c(c1Cl)C(=O)NCc1ccc(cc1)C(C)(C)C)C,0.019469491695902355,0.01932744862005293,1 +C[n+]1ccc(cc1)c1cc[n+](cc1)C,0.020133908207418557,0.018285286437140467,0.2 +N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.020248123201460456,0.23063198145893238,1 +c1cc[n+]2c(c1)c1cccc[n+]1CC2,0.021168377697732887,0.07893105200824276,1 +CN1CN(C)CSC1=S,0.022184384932566064,0.05143501540726455,1 +CNC(=O)ON=C(C(=O)N(C)C)SC,0.022347753176858155,0.0525615588790517,1 +CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.022598624918870935,0.06327057843197975,1 +OC(=O)COc1ccc(cc1Cl)Cl,0.022620602193004043,0.0343951090528196,1 +CSC1=NC(C(=O)N1Nc1ccccc1)(C)c1ccccc1,0.022800155556897562,0.0769113775658633,1 +COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.02320682656135787,0.09470190703489169,1 +CCCCCCCCCCCCC1=C(OC(=O)C)C(=O)c2c(C1=O)cccc2,0.02340650588512378,0.060675637920424984,1 +O=C(NC(=O)c1ccccc1Cl)Nc1ccc(cc1)OC(F)(F)F,0.023557308728421166,0.08557931600835092,1 +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.025090939601491648,0.03923477121002939,1 +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.025090939601491648,0.028754208201110545,1 +OC(=O)C(Oc1ccc(cc1C)Cl)C,0.02562363979237584,0.016643468119522744,1 +Clc1ccccc1CC(C1(Cl)CC1)(Cn1cncn1)O,0.025625059257949535,0.0660230798561165,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl,0.026675554368592185,0.02509109884427809,1 +O=C(C1C(C1(C)C)C=C(Cl)Cl)OCc1c(F)c(F)cc(c1F)F,0.026942980220700186,0.09663897903564027,1 +Cc1nn(c(c1C(=O)c1ccc(cc1S(=O)(=O)C)C(F)(F)F)O)C,0.027599589461626675,0.025378541876685114,1 +C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.027961199362093195,0.10035495983582812,1 +OC(=O)C(Oc1ccc(cc1)Oc1nc2c(o1)cc(cc2)Cl)C,0.028167056356499628,0.012381567627824417,1 +CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.028227806467376604,0.009149216533940489,0.1 +CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.028523647387248163,0.01260929482868455,1 +N#Cc1cc(Br)c(c(c1)Br)O,0.028889958940868102,0.029869344404748466,1 +CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.029112705155716945,0.16848705182588955,1 +C#CCOC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1F)Cl)C,0.029164453292198207,0.014784205836078774,1 +Fc1ccc(c(c1)c1ccc(c(c1)Cl)Cl)NC(=O)c1cn(nc1C(F)F)C,0.02921233570136655,0.06361277589588017,1 +CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.02946182933426497,0.007637073565493268,1 +CC(N1/C(=N/C(C)(C)C)/SCN(C1=O)c1ccccc1)C,0.029465850912223458,0.07039595644138076,1 +COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.030123726579706293,0.2828087742644706,1 +Clc1c(ccc(c1N)[N+](=O)[O-])Oc1ccccc1,0.030226952270055448,0.1706598767161664,1 +CN(C(=S)SSC(=S)N(C)C)C,0.03036190470594063,0.052029910797683425,1 +COc1nc(Oc2cccc(c2C(=O)[O-])Oc2nc(OC)cc(n2)OC)nc(c1)OC.[Na+],0.030507347552487064,0.38910433529085675,1 +C[n+]1ccc(cc1)c1cc[n+](cc1)C,0.030657230461935643,0.018285286437140467,0.2 +CS/C(=N/OC(=O)N(SN(C(=O)O/N=C(/SC)\C)C)C)/C,0.03103230485504359,0.11836501403389493,1 +CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.03138138916099924,0.02071602054046362,1 +Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.03170650329869704,0.029386017466380077,1 +CCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC(=C)C,0.03210675757919814,0.1016500364317196,1 +Fc1ccc(cc1)NC(=O)c1cccc(n1)Oc1cccc(c1)C(F)(F)F,0.032154821211279785,0.06431687769950017,1 +CCCn1c(OCCC)nc2c(c1=O)cc(cc2)I,0.03224060518839999,0.11241236083791278,0.10810810810810811 +COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1ncccc1C(F)(F)F,0.0326520524201809,0.7659687318664031,1 +CN1CN(C)CSC1=S,0.03266034652463028,0.05143501540726455,1 +Fc1ccc(cc1)C(c1ccccc1F)(Cn1cncn1)O,0.03385434330908588,0.038703944827712805,1 +CN(/C=N/c1ccc(cc1C)C)/C=N/c1ccc(cc1C)C,0.03408246361134649,0.08953376139918832,1 +CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.03428271152063386,0.07298099293681495,1 +ClC(=CCOc1cc(Cl)c(c(c1)Cl)OCCCOc1ccc(cn1)C(F)(F)F)Cl,0.034818667907167616,0.0362288141006914,1 +O=C(C1=C(C)OCCS1)Nc1ccccc1,0.034848813981213346,0.21864734359359156,1 +CCC(C(=O)OC1=C(C(=O)OC21CCCCC2)c1ccc(cc1Cl)Cl)(C)C,0.03578732146400678,0.04379943535631055,1 +CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.03634528529867737,0.07637115370975499,1 +CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.036799624938222635,0.03897860826866038,1 +C#CCOC(c1ccc(cc1)Cl)C(=O)NCCc1ccc(c(c1)OC)OCC#C,0.0369041241749624,0.09203593406976318,1 +OC(C(C)(C)C)C(n1ncnc1)Cc1ccc(cc1)Cl,0.03744148066760202,0.0613731142050769,1 +Clc1ccc(cc1)c1ccccc1NC(=O)c1cccnc1Cl,0.03787805062535496,0.16131302749798718,1 +CCOC(=O)CSc1nc(nn1C(=O)N(C)C)C(C)(C)C,0.03816748004747272,0.1637892862116553,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(cc1Cl)OC(C(C(F)(F)F)F)(F)F,0.03990998658130422,0.08948423022911094,1 +O=C(CC(C)(C)C)OC1=C(C(=O)OC21CCCC2)c1c(C)cc(cc1C)C,0.039947970982482275,0.03534257976629232,0.3225806451612903 +CNC(=S)S,0.04011276528748593,0.08008768834713341,1 +CCCN(c1c(cc(cc1[N+](=O)[O-])S(=O)(=O)N)[N+](=O)[O-])CCC,0.04042042788372036,0.12945843909761384,1 +CCCOC/C(=N\c1ccc(cc1C(F)(F)F)Cl)/n1cncc1,0.04049199977868229,0.015563764732395404,1 +OC1(Cn2ncnc2)C(CCC1(C)C)Cc1ccc(cc1)Cl,0.04095937862019833,0.04921201205383799,1 +CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.041269285481015994,0.09057740215879534,1 +CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.04157699893895499,0.014607562266177505,1 +Fc1ccc(c(c1)c1ccc(c(c1)Cl)Cl)NC(=O)c1cn(nc1C(F)F)C,0.04200781934177246,0.05498513608625931,1 +CN(C(=S)S[Zn]SC(=S)N(C)C)C,0.04250965492362519,0.060687914488021834,1 +CCOC(=O)CCN(C(C)C)SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C,0.04262807177885238,0.040276083030728126,1 +Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.042646674541424644,0.04899254563307668,1 +CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04266993811611698,0.009149216533940489,0.1 +C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.04279938325518071,0.055408460312378546,1 +O=C(CC(C)(C)C)OC1=C(C(=O)OC21CCCC2)c1c(C)cc(cc1C)C,0.042917075351131324,0.046464409855751024,1 +OC1(Cn2ncnc2)C(CCC1(C)C)Cc1ccc(cc1)Cl,0.043148047046675374,0.056602427368467184,1 +CC(Cc1ccc(cc1)C(C)(C)C)CN1CCCCC1,0.04326105065224025,0.013280675424547358,1 +CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.04432099700732809,0.134338708765534,1 +[S-]C(=S)NCCNC(=S)[S-].[Zn+2],0.04460661819584039,0.04860046355249439,1 +O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.045407278177700156,0.10857645875840657,1 +CCNc1nc(NC(C)C)nc(n1)Cl,0.04636428436773443,0.051943767855990995,1 +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.04835505096829608,0.01697256463740768,1 +CCC(C(=O)NCc1ccccc1)Oc1ccc(c(c1)C(F)(F)F)F,0.049813316199071624,0.0631047049249134,1 +O=C(c1cccc(c1C(=O)NC(CS(=O)(=O)C)(C)C)I)Nc1ccc(cc1C)C(C(F)(F)F)(C(F)(F)F)F,0.04982487508940451,0.16092687388776916,1 +CN(C(=S)SSC(=S)N(C)C)C,0.04990997903448147,0.052029910797683425,1 +ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.05030195369030707,0.08103248684079799,1 +C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.05047450068604942,0.023118608097902237,1 +CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.05161859628615915,0.04977891267988713,1 +COCN(c1c(CC)cccc1CC)C(=O)CCl,0.05189661748967905,0.20188678565232013,1 +CNC(=O)Oc1cccc(c1)/N=C/N(C)C,0.051976062085632144,0.09195186539535166,1 +Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.05283880559178284,0.04189686717166363,1 +CCO/N=C(\C1=C(O)CC(CC1=O)C1CCCSC1)/CCC,0.052847272941488777,0.11265754814947855,0.2857142857142857 +CC(C(c1ccc(cc1)Cl)(Cn1ncnc1)O)C1CC1,0.05326004956767166,0.03798066345599283,1 +O=C(N/C(=N\OCC1CC1)/c1c(F)c(F)ccc1C(F)(F)F)Cc1ccccc1,0.053352320292409515,0.056521863313852456,1 +C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.05350296944357954,0.019218559674619703,1 +C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.05492821614526029,0.10390334684834944,1 +CNC1=C(c2cccc(c2)C(F)(F)F)C(=O)C(O1)c1ccccc1,0.055204779037407746,0.08269089323283962,1 +CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.05522147585284508,0.1387754887585445,1 +O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.05566064749641608,0.02298512893865435,1 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.05590140200157206,0.2350095494055934,1 +COc1nc(nc(n1)C)NC(=O)[N-]S(=O)(=O)c1cc(I)ccc1C(=O)OC.[Na+],0.05611797964648073,0.2676949335249149,1 +O=C(C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C)OCCON=C(C)C,0.05632150550177753,0.015406382072853698,1 +Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.05706818624978773,0.08896364017396666,1 +CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.05707983190600125,0.061911009434330255,1 +CCOCCN(C(=C(C)C)c1ccccc1)C(=O)CCl,0.057470413386035736,0.1275914675240323,1 +CC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Oc1ccc(cc1)Cl,0.05932124091140686,0.0727699851914961,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.05953797389131243,0.020978319280864646,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.05953797389131243,0.020417506344953266,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.05953797389131243,0.019365048465195316,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.06009909138187043,0.032860975562319344,1 +CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.060497742776698574,0.1495196727139019,1 +C=CC=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C.C=CC=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C(=O)OC)C,0.06134969850332702,0.12306134086604247,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.062106180868884746,0.07069517565485765,1 +COC(=O)c1cccc(c1S(=O)(=O)NC(=O)Nc1nc(OCC(F)(F)F)nc(n1)N(C)C)C,0.062140866929396014,0.248120536258991,1 +Clc1cc(F)c(cc1C(=O)NS(=O)(=O)N(C(C)C)C)n1c(=O)cc(n(c1=O)C)C(F)(F)F,0.06269313377509025,0.029112705155716952,0.10416666666666667 +ClCC(=O)N(c1c(C)cccc1C)Cn1cccn1,0.06336648858092589,0.1332496670549599,1 +CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0636200517424888,0.008434010240467348,1 +ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.06389160712181856,0.056430499830820414,1 +CSc1nnc(c(=O)n1N)C(C)(C)C,0.06393266242893511,0.013701160159437665,0.11538461538461539 +O=C(NS(=O)(=O)c1c(C)cccc1C(=O)O)Nc1nc(OCC(F)(F)F)nc(n1)N(C)C,0.06396281173215432,0.15839611977121426,1 +OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.06424027322808253,0.0714520296319689,1 +O=C(N(C)C)Nc1ccc(c(c1)Cl)n1nc(oc1=O)C(C)(C)C,0.06493710428214157,0.021477732951960638,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06507320207279278,0.06084688873507753,1 +CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.06546156290207059,0.041977666914404244,1 +CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.06569530810416269,0.021138552025194924,1 +CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.06773123883198195,0.1279190797644839,1 +C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.06904967382858089,0.08650638636803425,1 +CCO/N=C(/C1=C(O)CC(CC1=O)c1c(C)cc(cc1C)C)\CC,0.06981686853252955,0.11290411668826239,1 +CO/N=C(/c1ccccc1COc1cc(C)ccc1C)\C(=O)NC,0.07046793589427701,0.314493998741278,1 +COc1cc(OC)n2c(n1)nc(n2)S(=O)(=O)Nc1c(Cl)ccc(c1Cl)C,0.07172655770478076,0.24864999556659206,1 +C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.07190296604559293,0.05421753035009689,1 +CO/N=C(\c1ccccc1CO/N=C(/c1cccc(c1)C(F)(F)F)\C)/C(=O)OC,0.07272797449373557,0.17533172033775735,1 +O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.07395704796137248,0.053248039540905706,1 +OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.07409262028018154,0.1535614611373911,1 +ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.07465930346752149,0.27773056338281416,1 +Cn1nc(c(c1)C(=O)Nc1cccc2c1C1CCC2C1=C(Cl)Cl)C(F)F,0.07583481070072216,0.0461951218232978,1 +N#C/N=C(/N(Cc1ccc(nc1)Cl)C)\C,0.07859017665904088,0.04505453842232625,1 +NC(=N)NCCCCCCCCNC(=N)N,0.08102032708037427,0.04459021281877348,1 +CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.08196801536106943,0.1646969251307165,1 +Clc1cc(cnc1CNC(=O)c1c(Cl)cccc1Cl)C(F)(F)F,0.08212099927021806,0.024768757321864513,1 +O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08272375649019124,0.06543705860142954,1 +COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.08332310268057162,0.011289815763514382,1 +ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.08430066662269543,0.09817808803261632,1 +CCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC(=C)C,0.08701831648543702,0.10048074651217331,1 +COc1cc(ccc1OC)/C(=C/C(=O)N1CCOCC1)/c1ccc(cc1)Cl,0.08766124641710438,0.09875624800076958,1 +CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.08778355070659401,0.004737027498363926,1 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,0.08947770521301585,0.04727481758871042,1 +N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.09203781459712614,0.053374219703886394,1 +Clc1ccc2c(c1)ncc(n2)Oc1ccc(cc1)OC(C(=O)OCC1CCCO1)C,0.09210345974638111,0.011353292778411973,1 +NC(=O)c1c(Cl)cccc1Cl,0.09261856560930491,0.08591160820223859,1 +COc1cc(ccc1OC)/C(=C/C(=O)N1CCOCC1)/c1ccc(cc1)Cl,0.09281779032399287,0.08717488588981356,1 +Clc1ccc(c(c1)Cl)NC(=O)C1(CC1)C(=O)O,0.09303171987631087,0.07557961997756538,1 +COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.09345959256991566,0.26870233958934475,1 +OC(=O)COc1ccc(cc1C)Cl,0.0947069010825298,0.04426982712431076,1 +Cc1nnc(c(=O)n1N)c1ccccc1,0.09643315995145703,0.14758316159354984,1 +[O-][N+](=O)/N=C/1\NCCN1Cc1ccc(nc1)Cl,0.0973945952590747,0.13032534293969533,1 +COc1c(OC)cc(c(c1OC)C(=O)c1c(OC)ncc(c1C)Cl)C,0.09950572862076837,0.303134327475591,1 +COc1ncc(c2n1nc(n2)NS(=O)(=O)c1c(cccc1C(F)(F)F)OCC(F)F)OC,0.1034404543369562,0.21197350920915925,1 +CCCN(c1c(cc(cc1[N+](=O)[O-])S(=O)(=O)N)[N+](=O)[O-])CCC,0.10393824312956665,0.13107642839933267,1 +COCc1c(F)c(F)c(c(c1F)F)COC(=O)C1C(C1(C)C)/C=C/C,0.10573252781458294,0.08851686119004981,1 +S=C1NCCNC(=S)S[Mn+2]S1,0.10855557507359069,0.038417173954574634,1 +CCNc1nc(NC(C)C)nc(n1)Cl,0.10941971287651023,0.023699434768191727,1 +COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045388522976,0.1783067523541709,0.16 +O=C(c1cccc(c1C(=O)NC(CS(=O)(=O)C)(C)C)I)Nc1ccc(cc1C)C(C(F)(F)F)(C(F)(F)F)F,0.115769562707734,0.16092687388776916,1 +COc1c(OC)cc(c(c1OC)C(=O)c1c(OC)ncc(c1C)Cl)C,0.11727460798675288,0.303134327475591,1 +CNC(=O)O/N=C(\SC)/C,0.11836501403389492,0.026052127905166686,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.12010651237688001,0.017629061431206107,1 +CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.1242747128033579,0.01659260666947232,1 +c1coc(c1)c1nc2c([nH]1)cccc2,0.12486833177320307,0.14928260732342855,1 +CO/C=C(\c1ccccc1COc1cccc(n1)C(F)(F)F)/C(=O)OC,0.1252316956521325,0.10792834060304858,1 +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.12545469800745823,0.08572873540501698,1 +ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.12778321424363712,0.05749565678663053,1 +O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.12919765885228982,0.045845688053057596,1 +[O-][N+](=O)/N=C(\NC)/NCc1cnc(s1)Cl,0.13016764551401042,1.1362376254520026,0.23076923076923078 +CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.13731668655832788,0.5134702813140367,1 +Cc1cccc2c1n1cnnc1s2,0.1373938645607217,0.19681387136689427,1 +CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.13747135609511818,0.04812730013014119,1 +CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.13801406108477293,0.02238566497087926,1 +CC(OC(=O)Nc1cccc(c1)Cl)C,0.14040948460452124,0.08671233605999264,1 +COc1nc(C)nc(n1)N(C(=O)NS(=O)(=O)c1ccccc1C(=O)O)C,0.14421924681891674,0.24337175539925646,1 +Clc1ccccc1CC(C1(Cl)CC1)(Cn1nc[nH]c1=S)O,0.1452393166315865,0.03879388107080294,1 +CC(=O)O.CCCCCCCCCCCCNC(=N)N,0.1461167287581588,0.10263505101799683,1 +CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.14653013191720715,0.024823910652283242,1 +c1ccc(cc1)Nc1ccccc1,0.14773454395291782,0.12245503297742702,1 +CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.14982590230152565,0.12640643328259163,1 +N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.15042627044387033,0.08243149771542137,1 +COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.15479245019392282,0.28657183120817714,1 +OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.15527684755838006,0.016086093578697225,1 +Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.15801924849469393,0.16887323351376893,1 +CC(OC(=O)Nc1ccccc1)C,0.16181616210899355,0.14633879628836383,1 +C#CC(NC(=O)c1cc(Cl)cc(c1)Cl)(C)C,0.16593276232681306,0.05314488180888698,1 +O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.16893203350457175,0.08671261321187769,1 +Cc1ccc(c2c1cccc2)C,0.1728291127183792,0.10352660953011326,1 +Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.1730416993562668,0.019936495077846474,1 +CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.17526912017369997,0.20026950731392412,1 +CCCCCCC(c1cc(cc(c1OC(=O)/C=C/C)[N+](=O)[O-])[N+](=O)[O-])C,0.17563456769307506,0.22483093246893712,1 +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.17607780933998252,0.10612134197624272,1 +CCCCCCCCCC[N+](CCCCCCCCCC)(C)C.[Cl-],0.1767583631976715,0.5202137438013086,1 +Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.17867678986550448,0.05314189537646974,1 +N#Cc1cc(C)c(c(c1)C(=O)NC)NC(=O)c1cc(nn1c1ncccc1Cl)Br,0.17901230859828976,0.030845016985805107,1 +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.18015976856532,0.016307999222054163,1 +CC1=NNC(=O)N(C1)/N=C/c1cccnc1,0.18091653347462547,0.8264782088052985,1 +N#CCNC(=O)c1cnccc1C(F)(F)F,0.19244308898713228,0.18396100074583865,1 +CCCCCCC(c1cc(cc(c1OC(=O)/C=C\C)[N+](=O)[O-])[N+](=O)[O-])C,0.19484459853450517,0.12123612571900029,1 +c1scc(n1)c1nc2c([nH]1)cccc2,0.19876005527119617,0.0875690712634714,1 +Cn1nc(c(c1)C(=O)Nc1ccccc1C1CC1C1CC1)C(F)F,0.2021971466240455,0.04672471563475691,1 +CO/C=C(\c1ccccc1Oc1ncnc(c1)Oc1ccccc1C#N)/C(=O)OC,0.20427010160523304,0.1924110484439188,1 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.2098341392275743,0.10359077423092204,1 +O=C1CCCC(=O)C1C(=O)c1ccc(cc1Cl)S(=O)(=O)C,0.2189994026791292,0.002447483645886486,1 +ClCC(=O)N(c1c(C)cccc1CC)COC(C)C,0.22199225860138957,0.1280135192798158,1 +CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.22201922216305578,0.07003774391915464,1 +Nc1cnn(c(=O)c1Cl)c1ccccc1,0.2255879747532767,0.05570688290625887,1 +CNC(=O)Oc1ccccc1OC(C)C,0.22939978025412716,0.05695835209106234,1 +CCNC(=O)C(OC(=O)Nc1ccccc1)C,0.23278744254805916,0.2688088403129588,1 +Clc1c(ccc(c1N)[N+](=O)[O-])Oc1ccccc1,0.23425888009292972,0.16213639684416756,1 +Cl/C=C/CO/N=C(\C1=C(O)CC(CC1=O)CC(SCC)C)/CC,0.2389478027971563,0.19671826575084897,1 +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.24799169923196304,0.07165946917369154,1 +CCc1cc(C)cc(c1c1c(OC(=O)C(C)(C)C)n2n(c1=O)CCOCC2)CC,0.24968092026794356,0.06646847904503225,1 +[S]C(=S)NCCNC(=S)S[Mn],0.2525424903682367,0.04075145933254611,1 +CC(CC(c1sccc1NC(=O)c1cn(nc1C(F)(F)F)C)C)C,0.2545841068122505,0.13075689109193567,1 +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.2557761861991325,0.10157267923887335,1 +Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.25917417547047744,0.07855841127983834,1 +Fc1ccc(cc1)Oc1ccnc2c1c(Cl)cc(c2)Cl,0.25962686686321285,0.04972859326379051,1 +CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.2690918752347788,0.08909548105359592,1 +Nc1cnn(c(=O)c1Cl)c1ccccc1,0.270705569703932,0.06995787809045063,1 +COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1ncccc1S(=O)(=O)CC,0.2804534946915948,0.6959703502985611,1 +OC(=O)COc1ccc(cc1Cl)Cl,0.28049546719325014,0.034331532886187406,1 +CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.2805209905967611,0.03954696343660991,1 +Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.2879713060783083,0.07855841127983834,1 +CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.29547465787728056,0.0683920470541598,0.1 +CNC(=O)Oc1cccc2c1cccc2,0.2991731924668564,0.15220135307866764,1 +CN(C(=S)[S-])C.CN(C(=S)[S-])C.CN(C(=S)[S-])C.[Fe+3],0.30012414094866885,0.03664069651809872,1 +Fc1ccc(c(c1)F)NC(=O)c1cccnc1Oc1cccc(c1)C(F)(F)F,0.3033262936121485,0.040464380681156376,1 +CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.3034972489425892,0.10177125384326928,1 +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.31203800675365617,0.10306738554824552,1 +ClCC(=O)N(c1c(C)cccc1C)Cn1cccn1,0.31323206744613685,0.1388909198134489,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.32143014109471235,0.058379476811434815,1 +ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.3260262207586085,0.23843829210981765,0.2 +CCCCCCC(c1cc(cc(c1OC(=O)/C=C\C)[N+](=O)[O-])[N+](=O)[O-])C,0.3315102548955885,0.1454764467989611,1 +OC(=O)COc1ccc(cc1Cl)Cl,0.33930903289506065,0.03674313560429932,1 +CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.3670674304254852,0.07196366656054391,1 +Cc1cc(C)cc(c1)C(=O)N(C(C)(C)C)NC(=O)c1ccc2c(c1C)CCCO2,0.3678012132205545,0.27389970955162496,1 +CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.36852210915226874,0.16923914313012559,1 +Cc1nnc(c(=O)n1N)c1ccccc1,0.36891864539658303,0.14647471195254017,1 +O=C(Nc1ccnc(c1)Cl)Nc1ccccc1,0.37548404132262436,0.08265962969203304,1 +O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.3880867710275115,0.07120229065098448,1 +CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.39446112244793224,0.503640251987437,0.25 +CO/N=C(\c1ccccc1Oc1ncnc(c1F)Oc1ccccc1Cl)/C1=NOCCO1,0.39448424715427566,0.08162616264267387,1 +N#Cc1ccc(cc1)C/C(=N/NC(=O)Nc1ccc(cc1)OC(F)(F)F)/c1ccc(cc1)C(F)(F)F,0.394944816927872,0.04926211530037183,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.4023390123323988,0.07991360285522836,1 +O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.41719152837532353,0.05991367718355013,1 +CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.42802021191337764,0.06812573964857704,1 +CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.4315900691721648,0.12243583387510579,1 +CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.43286371555320496,0.008236162894755514,1 +CO/C=C(\c1ccccc1COc1cccc(n1)C(F)(F)F)/C(=O)OC,0.4410333629488144,0.10792834060304858,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1F)Oc1ccc(cc1Cl)C(F)(F)F,0.4460202371248177,0.049224333757024806,1 +N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.4553054263341003,0.12725725033199659,1 +CSC(=O)c1cccc2c1snn2,0.4608228380460223,0.0549637606627435,1 +COCN(c1c(CC)cccc1CC)C(=O)CCl,0.4670695574071115,0.21177533873169335,1 +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1F)Oc1ccc(cc1Cl)C(F)(F)F,0.4705718098105875,0.06418396495173338,1 +ClCC[N+](C)(C)C.[Cl-],0.47447507557122687,1.261437789326052,0.2 +Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.4774244272684517,0.07224143410034059,1 +CC1OC(C)CN(C1)C1CCCCCCCCCCC1,0.48316627385722294,0.005601648043851357,0.2222222222222222 +CCCCc1c(C)nc(nc1OS(=O)(=O)N(C)C)NCC,0.4930161419173511,0.03660545255794927,1 +O=c1[nH]c2CCCc2c(=O)n1C1CCCCC1,0.503640251987437,0.30727385038099597,0.25 +CCOC(=O)OC1=C(C(=O)NC21CCC(CC2)OC)c1cc(C)ccc1C,0.5061016308843888,0.1015896148452947,1 +CCOC(=O)OC1=C(C(=O)NC21CCC(CC2)OC)c1cc(C)ccc1C,0.5061016308843888,0.1015896148452947,1 +N#Cc1c[nH]cc1c1cccc(c1Cl)Cl,0.5061481392686851,0.06860514832511305,1 +CCCOC(=O)NCCCN(C)C.Cl,0.5072793699625824,0.1014922037802734,1 +CCOc1nc(nc(n1)NC)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.5116896474609399,0.2833675875893271,1 +CCOc1cc(ccc1C1COC(=N1)c1c(F)cccc1F)C(C)(C)C,0.5202976892967504,0.03331836205820965,1 +COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.5285529966699751,0.09081986063352195,1 +[O-][N+](=O)/N=C\1/N(Cc2cnc(s2)Cl)COCN1C,0.5313410671453993,0.11705065605473255,0.23076923076923078 +COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)Cl,0.5465743293153008,0.06711842505159077,1 +Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.5473855891134007,0.048361251776754224,1 +[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.5651787298028309,0.1751390284551102,1 +COC(=O)c1csc(c1S(=O)(=O)NC(=O)n1nc(n(c1=O)C)OC)C,0.5993972829782238,1.0148375272967118,1 +COCCN(c1c(C)cccc1C)C(=O)CCl,0.6139034987494355,0.1390796686283413,1 +O=Cc1ccco1,0.624453213155231,0.12486833177320306,0.1111111111111111 +COc1c(OC)cc(c(c1OC)C(=O)c1c(OC)ccc(c1C)Br)C,0.6352758309016929,0.06875499955650785,1 +OC1(Cn2ncnc2)/C(=C/c2ccc(cc2)Cl)/CCC1(C)C,0.6406279100538178,0.0659303175711405,1 +CC(OC(=O)NC(C(=O)NC(c1nc2c(s1)cc(cc2)F)C)C(C)C)C,0.6543197874203039,0.3913277156537192,1 +CCCOC(=O)NCCCN(C)C.Cl,0.6674728552139242,0.1014922037802734,1 +ClCC(=O)N(c1c(C)cccc1CC)COC(C)C,0.7047373288933002,0.18006974794657446,1 +CCOc1nc(F)cc2n1nc(n2)S(=O)(=O)Nc1c(Cl)cccc1C(=O)OC,0.7561469746838736,0.15887585713526345,1 +COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)Cc1ccccc1C(=O)O,0.7795645307119917,0.370537825344271,1 +Clc1ccc(c(n1)C(=O)O)Cl,0.7812519531298828,0.28910519558310543,1 +Clc1ccc(c(n1)C(=O)O)Cl,0.7812519531298828,0.29250825632761424,1 +COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.7817895162025876,0.32172060768096433,1 +ClCC[N+](C)(C)C.[Cl-],0.7907917926187115,2.8607832080275912,0.2 +CCOc1cc(ccc1OCC)NC(=O)OC(C)C,0.8241033622809132,0.35721426039200926,1 +OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.8282972172278201,0.3802071151378962,1 +ClCC[N+](C)(C)C.[Cl-],0.860381470369158,1.261437789326052,0.2 +ClCCP(=O)(O)O,0.9066120392542251,4.1102099979069795,0.16666666666666666 +Nc1nc(NC2CC2)nc(n1)N,0.9387196585948812,0.09185975434766332,1 +O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.9662594125910484,0.4032551433167842,1 +OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,1.0353715215347752,0.3802071151378962,1 +Cc1cc(C)nc(n1)Nc1ccccc1,1.1091497729605546,0.10587990647536236,1 +COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,1.1154252951100516,0.18143584072688565,1 +Oc1ccccc1c1ccccc1,1.1750384237564568,0.12270365808404284,1 +COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)Nc1ccccc1C(=O)N(C)C,1.1780461209768547,0.4643030652501503,1 +CCCOc1nn(c(=O)n1C)C(=O)[N-]S(=O)(=O)c1ccccc1C(=O)OC.[Na+],1.1894202967675005,0.3690275404021419,1 +CO/N=C(\c1ccccc1COc1ccccc1C)/C(=O)OC,1.1967534090558043,0.10555973307076402,1 +OC(=O)Cc1cccc2c1cccc2,1.205650068257516,0.12789294483841956,1 +COC(=O)Nc1nc2c([nH]1)cccc2,1.3076226134187396,0.06947884063218772,1 +COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)N(S(=O)(=O)C)C,1.3411855059279478,0.5688398904999331,1 +COc1ncc(c2n1nc(n2)S(=O)(=O)Nc1c(F)cccc1F)F,1.391657397996453,0.2568559278403449,0.24324324324324326 +O=C(Nc1cc(F)cc(c1)F)N/N=C(/c1ncccc1C(=O)O)\C,1.4120001283962829,0.16386253811184753,1 +CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,1.4316432834082535,0.059544784520966634,1 +CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,1.5854670852219546,0.18859272947054131,1 +CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,1.605986191473768,0.2428137170977884,1 +CCc1c(C(=O)O)c(=O)cnn1c1ccc(cc1)Cl,1.6864553664875628,0.05046250020331599,1 +O/C(=C\1/C(=O)CC(CC1=O)C(=O)O)/C1CC1,1.752821172367082,1.8740405032498018,0.35294117647058826 +C[N+]1(C)CCCCC1.[Cl-],1.790706021930536,0.10999358670499064,1 +CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,1.8050858655278421,0.07580850240005735,1 +CCC(=O)C1=C([O-])CC(CC1=O)C(=O)[O-].[Ca+2],1.874040503249802,0.8805851892527955,1 +OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,1.9605490478397496,0.14404836568635776,1 +Clc1ccccc1CC(C1(Cl)CC1)(Cn1nc[nH]c1=S)O,2.178589749473798,0.044494450061192195,1 +COc1cc(OC)n2c(n1)nc(n2)NS(=O)(=O)c1c(OC)nccc1C(F)(F)F,2.302288500094267,0.08283257780127506,1 +CO/N=C(\c1ccccc1COc1ccccc1C)/C(=O)OC,2.4002085592886893,0.07353278767027785,1 +COC(=O)CC(c1ccc(cc1)Cl)NC(=O)C(C(C)C)NC(=O)OC(C)C,2.5070128670931195,0.10806290920415802,1 +COc1nn(c(=O)n1C)C(=O)NS(=O)(=O)c1ccccc1OC(F)(F)F,2.5233463155295692,0.107945327220545,1 +COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1n2ccccc2nc1S(=O)(=O)CC,2.7556956072872962,0.5237598413587932,1 +CCCOC(=O)NCCCN(C)C.Cl,3.0347765817059753,0.1014922037802734,1 +Oc1ccccc1c1ccccc1,3.119727015073393,0.13804976267570115,1 +OC(=O)CNCP(=O)(O)O,3.3121771958019575,0.700841565636653,0.16666666666666666 +Cc1cnc2c(c1)ccc(c2C(=O)O)Cl,3.3387517363764943,0.06061841733867432,1 +[O-]P(=O)OCC.[O-]P(=O)OCC.[O-]P(=O)OCC.[Al+3],3.6853523538557287,0.0028016911425965685,1 +C[N+]1(C)CCCCC1.[Cl-],4.570309399255547,0.10999358670499064,1 +Clc1cc(N)c(c(n1)C(=O)O)Cl,4.830587434212229,0.6461033364698784,1 +CNC(=N[N](=O)[O])NCC1COCC1,4.900819965040488,0.13016764551401044,0.12 +COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1ncccc1C(=O)N(C)C,5.08765706618306,0.5851124569365994,1 +OC(=O)CNCP(=O)(O)O,5.559726007239,0.700841565636653,0.16666666666666666 +Oc1ccccc1c1ccccc1,5.875192118782284,0.15340790034125226,1 +OC(=O)CNCP(=O)(O)O,7.180326992327815,0.700841565636653,0.16666666666666666 diff --git a/paper/data/swiss-cv.id b/paper/data/swiss-cv.id new file mode 100644 index 0000000..7c4d169 --- /dev/null +++ b/paper/data/swiss-cv.id @@ -0,0 +1 @@ +56c42c7e2b72ed1141000001 diff --git a/paper/data/swiss-test-predictions.csv b/paper/data/swiss-test-predictions.csv index e29dfc8..b570805 100644 --- a/paper/data/swiss-test-predictions.csv +++ b/paper/data/swiss-test-predictions.csv @@ -1,150 +1,150 @@ -SMILES,LOAEL,Confidence,Dataset -COP(=O)(OC=C(Cl)Cl)OC,0.0015614663384413924,0.25,swiss-prediction -CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.0021000613932692083,1,swiss-prediction -CCSCSP(=S)(OCC)OCC,0.0021391291181705988,1,swiss-prediction -COP(=O)(NC(=O)C)SC,0.0023317481611294373,1,swiss-prediction -CCOP(=S)(SCSC(C)(C)C)OCC,0.0038932756645553637,1,swiss-prediction -CCCSP(=O)(SCCC)OCC,0.00465020663878965,1,swiss-prediction -CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.006594681734518869,1,swiss-prediction -CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.007407343935315939,1,swiss-prediction -CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.007637073565493268,1,swiss-prediction -CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.007657024526944333,1,swiss-prediction -CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.007932473192811151,1,swiss-prediction -CCOP(=O)(SC(CC)C)SC(CC)C,0.008579634171466552,1,swiss-prediction -CSc1ccc(cc1C)OP(=S)(OC)OC,0.008959434615561151,1,swiss-prediction -CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.009149216533940489,0.1,swiss-prediction -CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.009462543754698434,1,swiss-prediction -COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.011073447351926287,1,swiss-prediction -CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.011528157789546231,1,swiss-prediction -CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.01260929482868455,1,swiss-prediction -COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.01284173174704369,1,swiss-prediction -CSc1nnc(c(=O)n1N)C(C)(C)C,0.013701160159437665,0.11538461538461539,swiss-prediction -CNC(=O)CSP(=S)(OC)OC,0.014593717469688338,1,swiss-prediction -CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.015316912166292015,1,swiss-prediction -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.015809693317509144,1,swiss-prediction -OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.017069971996617916,1,swiss-prediction -N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.017779187285422816,1,swiss-prediction -N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.018032363548935507,1,swiss-prediction -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.018130739436787953,1,swiss-prediction -OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.018461826477538752,1,swiss-prediction -C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.019218559674619714,1,swiss-prediction -Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.019936495077846474,1,swiss-prediction -CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.020160859255457103,1,swiss-prediction -CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.0206066733541471,1,swiss-prediction -CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.02071602054046363,1,swiss-prediction -CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.0209058951160662,1,swiss-prediction -CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.02176055906933658,1,swiss-prediction -COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.022759394852161156,1,swiss-prediction -COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.023258622289143393,1,swiss-prediction -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.027036059762033778,1,swiss-prediction -O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.027386062506459995,1,swiss-prediction -CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.029097658324682158,1,swiss-prediction -Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.030000496093829623,1,swiss-prediction -N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.033983552550211814,1,swiss-prediction -OC(=O)COc1ccc(cc1Cl)Cl,0.03674313560429932,1,swiss-prediction -COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.03862209230395826,1,swiss-prediction -CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.040036364983302354,1,swiss-prediction -CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.04005668682016992,1,swiss-prediction -CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.04008581787410761,1,swiss-prediction -CON(C(=O)Nc1ccc(cc1)Br)C,0.041158230988177814,1,swiss-prediction -CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.041544797190431346,1,swiss-prediction -CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.042629025508833636,1,swiss-prediction -Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.04346838792923881,1,swiss-prediction -O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.045035773822150356,1,swiss-prediction -OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.04528624604587442,1,swiss-prediction -COP(=O)(SC)N,0.045296304153967855,0.13333333333333333,swiss-prediction -BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.04710913483831366,1,swiss-prediction -OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.04843867148274343,1,swiss-prediction -N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.04878763252869801,1,swiss-prediction -OC(=O)COc1ccc(cc1C)Cl,0.04905212014498487,1,swiss-prediction -Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.049460296466489834,1,swiss-prediction -CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.04977891267988713,1,swiss-prediction -CN1CN(C)CSC1=S,0.05143501540726455,1,swiss-prediction -CCNc1nc(NC(C)C)nc(n1)Cl,0.051943767855990995,1,swiss-prediction -CN(C(=S)SSC(=S)N(C)C)C,0.052029910797683425,1,swiss-prediction -CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.05287343441882649,1,swiss-prediction -Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.053013598502549705,1,swiss-prediction -C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.05421753035009689,1,swiss-prediction -CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.05500615548608445,1,swiss-prediction -Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.05500705393091043,1,swiss-prediction -COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.056463299399384,1,swiss-prediction -CNC(=O)ON=C(C(=O)N(C)C)SC,0.056548471383657296,1,swiss-prediction -ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.058035645801953625,1,swiss-prediction -O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.058379476811434815,1,swiss-prediction -Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.058445620408330214,1,swiss-prediction -CNC(=O)Oc1ccccc1OC(C)C,0.05950240879470488,1,swiss-prediction -O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.060568960163232294,1,swiss-prediction -CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.06059577630169314,1,swiss-prediction -CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.06131182057778625,1,swiss-prediction -O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.06552995555017295,1,swiss-prediction -CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.06621783334677835,1,swiss-prediction -CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.0683920470541598,0.1,swiss-prediction -COC(=O)Nc1nc2c([nH]1)cccc2,0.06899951625487367,1,swiss-prediction -CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.07003774391915464,1,swiss-prediction -CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.0706115065820919,1,swiss-prediction -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.07121509909913679,1,swiss-prediction -Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.07184435965438675,1,swiss-prediction -CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.07298099293681495,1,swiss-prediction -CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.07326983970840215,1,swiss-prediction -OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.07342623822245768,1,swiss-prediction -ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.07688860200812843,1,swiss-prediction -CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.07845200347176315,1,swiss-prediction -Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.07855841127983834,1,swiss-prediction -Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.07983484928350948,1,swiss-prediction -N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.08243149771542137,1,swiss-prediction -O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.08391380469075903,1,swiss-prediction -O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.08671261321187769,1,swiss-prediction -c1scc(n1)c1nc2c([nH]1)cccc2,0.0875690712634714,1,swiss-prediction -CC(OC(=O)Nc1cccc(c1)Cl)C,0.08873389182433263,1,swiss-prediction -N#Cc1c(Cl)cccc1Cl,0.0895509304878687,1,swiss-prediction -COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.09081986063352185,1,swiss-prediction -Nc1nc(NC2CC2)nc(n1)N,0.09185975434766332,1,swiss-prediction -CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.09329533119250687,1,swiss-prediction -CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.09353524776797524,1,swiss-prediction -CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,0.1002087398883717,1,swiss-prediction -C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.10035495983582815,1,swiss-prediction -C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.10390334684834944,1,swiss-prediction -CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.11119329800179159,1,swiss-prediction -CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.11147220458900187,1,swiss-prediction -O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.11177541637848143,1,swiss-prediction -ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.11359098888992505,1,swiss-prediction -CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.12117285527596837,1,swiss-prediction -c1ccc(cc1)Nc1ccccc1,0.12245503297742702,1,swiss-prediction -O=Cc1ccco1,0.12486833177320306,0.1111111111111111,swiss-prediction -CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.12674868328005046,1,swiss-prediction -O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.12765610432548405,1,swiss-prediction -CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.12778348323867558,1,swiss-prediction -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.12938220260072822,1,swiss-prediction -CNC(=O)Oc1cccc2c1cccc2,0.13169394021834496,1,swiss-prediction -Oc1ccccc1c1ccccc1,0.13372301955518534,1,swiss-prediction -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.13759637912715172,1,swiss-prediction -CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.13967461554721775,1,swiss-prediction -COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.14037755043662442,1,swiss-prediction -OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,0.14404836568635776,1,swiss-prediction -CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.1495196727139019,1,swiss-prediction -CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.15662666843714387,1,swiss-prediction -OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.1588970703544765,1,swiss-prediction -N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.16073556275027362,1,swiss-prediction -N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.1622542917276107,1,swiss-prediction -ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.16698214361799124,1,swiss-prediction -Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.16887323351376893,1,swiss-prediction -CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.1869332752762242,1,swiss-prediction -CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.1870996604409719,1,swiss-prediction -COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.19454287979711238,1,swiss-prediction -[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.19490539203138116,1,swiss-prediction -COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.20342339615295477,1,swiss-prediction -COCN(c1c(CC)cccc1CC)C(=O)CCl,0.2117753387316933,1,swiss-prediction -O=C(C1=C(C)OCCS1)Nc1ccccc1,0.21864734359359142,1,swiss-prediction -COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.2320365574545069,1,swiss-prediction -ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.23843829210981765,0.2,swiss-prediction -CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.2428137170977884,1,swiss-prediction -COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.2515974159735541,1,swiss-prediction -COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.2828087742644706,1,swiss-prediction -Cc1cccc2c1n1cnnc1s2,0.28403797861303776,1,swiss-prediction -COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.30649366939843004,1,swiss-prediction -O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.31288159348953604,1,swiss-prediction -OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.3802071151378962,1,swiss-prediction -CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.503640251987437,0.25,swiss-prediction -CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.5121170140168649,1,swiss-prediction -OC(=O)CNCP(=O)(O)O,0.700841565636653,0.16666666666666666,swiss-prediction -ClCCP(=O)(O)O,4.1102099979069795,0.16666666666666666,swiss-prediction +SMILES,LOAEL_measured_median,LOAEL_predicted,Confidence,Dataset +N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.0001361095787305931,0.018032363548935507,1,swiss-prediction +OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.0002764719490734748,0.01846182647753877,1,swiss-prediction +CCOP(=S)(SCSC(C)(C)C)OCC,0.000277363084031507,0.0038932756645553637,1,swiss-prediction +CCSCSP(=S)(OCC)OCC,0.0006144925543928096,0.0021391291181705988,1,swiss-prediction +CCOP(=O)(SC(CC)C)SC(CC)C,0.0008728063120409454,0.008579634171466552,1,swiss-prediction +CNC(=O)CSP(=S)(OC)OC,0.001090477150926923,0.014593717469688338,1,swiss-prediction +COP(=O)(SC)N,0.0020548549621536454,0.045296304153967855,0.13333333333333333,swiss-prediction +CSc1ccc(cc1C)OP(=S)(OC)OC,0.0025868753585247565,0.008959434615561151,1,swiss-prediction +CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.0033630532459809582,0.009462543754698434,1,swiss-prediction +CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149211972577347,0.0209058951160662,1,swiss-prediction +COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.004713372383210075,0.011073447351926287,1,swiss-prediction +CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.0049417895576815835,0.0021000613932692083,1,swiss-prediction +CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005274306112287868,0.007657024526944333,1,swiss-prediction +CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601648043851348,0.05500615548608445,1,swiss-prediction +COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.006200913183680908,0.023258622289143393,1,swiss-prediction +OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.006747899500347733,0.04843867148274346,1,swiss-prediction +COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319665576013,0.056463299399384,1,swiss-prediction +Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.007548400798826121,0.049460296466489855,1,swiss-prediction +COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.0076105098020530036,0.022759394852161142,1,swiss-prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.007818698763639501,0.033983552550211814,1,swiss-prediction +CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.008187766847509327,0.0206066733541471,1,swiss-prediction +CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.008855868434313272,0.006594681734518869,1,swiss-prediction +CCCSP(=O)(SCCC)OCC,0.010068539755671456,0.004650206638789641,1,swiss-prediction +COP(=O)(OC=C(Cl)Cl)OC,0.010408382170442241,0.0015614663384413924,0.25,swiss-prediction +CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.012455788330375379,0.12778348323867558,1,swiss-prediction +CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.0139433514779606,0.06131182057778638,1,swiss-prediction +N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.015042627044387032,0.08243149771542137,1,swiss-prediction +Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.01585325164934852,0.030000496093829623,1,swiss-prediction +CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.01642869699075557,0.011528157789546231,1,swiss-prediction +N#Cc1c(Cl)cccc1Cl,0.016568667498017633,0.08955093048786876,1,swiss-prediction +CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.0171141884323489,0.01260929482868455,1,swiss-prediction +BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185416989653705,0.047109134838313686,1,swiss-prediction +CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.018038670157326797,0.04008581787410763,1,swiss-prediction +CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.01991156926953532,0.05287343441882649,1,swiss-prediction +CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.020087610909726116,0.007637073565493268,1,swiss-prediction +COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.02048398681663214,0.03862209230395829,1,swiss-prediction +CNC(=O)ON=C(C(=O)N(C)C)SC,0.022347753176858155,0.056548471383657296,1,swiss-prediction +COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.02320682656135787,0.14037755043662448,1,swiss-prediction +CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.025090939601491648,0.04005668682016994,1,swiss-prediction +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.025427825579407606,0.12765610432548405,1,swiss-prediction +O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.02574063309087087,0.08391380469075903,1,swiss-prediction +CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.025750915471868897,0.015316912166292006,1,swiss-prediction +N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.026997497601947272,0.1622542917276107,1,swiss-prediction +C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.027961199362093195,0.10035495983582815,1,swiss-prediction +N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.028207113064839383,0.017779187285422805,1,swiss-prediction +CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.029112705155716945,0.11119329800179159,1,swiss-prediction +CON(C(=O)Nc1ccc(cc1)Br)C,0.03130067550140176,0.041158230988177856,1,swiss-prediction +CN1CN(C)CSC1=S,0.03266034652463028,0.05143501540726455,1,swiss-prediction +ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.03316084217977319,0.07688860200812843,1,swiss-prediction +CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.0341788251725187,0.07298099293681495,1,swiss-prediction +CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0345288315455876,0.007407343935315931,1,swiss-prediction +CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.03508230910777224,0.040036364983302354,1,swiss-prediction +Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.03798219426521996,0.05301359850254968,1,swiss-prediction +CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.038746408312020406,0.02071602054046363,1,swiss-prediction +CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04102878665011248,0.009149216533940489,0.1,swiss-prediction +COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.042491175292669145,0.01284173174704369,1,swiss-prediction +O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.045140176541360745,0.1117754163784813,1,swiss-prediction +CN(C(=S)SSC(=S)N(C)C)C,0.04783039657471141,0.052029910797683425,1,swiss-prediction +CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.05161859628615915,0.04977891267988713,1,swiss-prediction +C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.05350296944357954,0.019218559674619703,1,swiss-prediction +COCN(c1c(CC)cccc1CC)C(=O)CCl,0.05375006811431045,0.21177533873169335,1,swiss-prediction +CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.05522147585284508,0.07326983970840215,1,swiss-prediction +O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.05566064749641608,0.027386062506459995,1,swiss-prediction +Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.05706818876652619,0.07184435965438675,1,swiss-prediction +OC(=O)COc1ccc(cc1C)Cl,0.057322598023636456,0.0490521201449849,1,swiss-prediction +CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.057576722828150476,0.1566266684371439,1,swiss-prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.05953797389131243,0.018130739436787953,1,swiss-prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.06009909138187043,0.027036059762033778,1,swiss-prediction +CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.06061453423316249,0.1495196727139019,1,swiss-prediction +Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.06267621846158328,0.04346838792923878,1,swiss-prediction +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.06288907725176857,0.2515974159735541,1,swiss-prediction +CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.06569530810416269,0.02176055906933658,1,swiss-prediction +O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06690994773808298,0.058379476811434815,1,swiss-prediction +CSc1nnc(c(=O)n1N)C(C)(C)C,0.06719929397120725,0.013701160159437665,0.11538461538461539,swiss-prediction +O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.06758613754894155,0.07121509909913679,1,swiss-prediction +CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.06839527058523667,0.12674868328005046,1,swiss-prediction +C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.06904967382858089,0.05421753035009689,1,swiss-prediction +OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.0721330354641874,0.04528624604587442,1,swiss-prediction +O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.07395704796137248,0.04503577382215038,1,swiss-prediction +OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.07409262028018154,0.15889707035447642,1,swiss-prediction +CCNc1nc(NC(C)C)nc(n1)Cl,0.07789199862212233,0.051943767855990995,1,swiss-prediction +O=C(C1=C(C)OCCS1)Nc1ccccc1,0.08117223892684251,0.21864734359359156,1,swiss-prediction +CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.0827758354922366,0.020160859255457103,1,swiss-prediction +OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08452667530010859,0.07342623822245768,1,swiss-prediction +O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08510674803234901,0.06552995555017291,1,swiss-prediction +CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.08783443947180365,0.0706115065820919,1,swiss-prediction +CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.09170952329114665,0.07845200347176311,1,swiss-prediction +N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.09203781459712614,0.048787632528698034,1,swiss-prediction +ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.09583741068272783,0.058035645801953625,1,swiss-prediction +OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.0967821447110451,0.017069971996617916,1,swiss-prediction +COP(=O)(NC(=O)C)SC,0.10236623790044716,0.0023317481611294373,1,swiss-prediction +O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.11086164698737522,0.08671261321187769,1,swiss-prediction +COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045292283465,0.20342339615295477,1,swiss-prediction +Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.11516531274058425,0.05500705393091043,1,swiss-prediction +COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.12412602138191925,0.2320365574545069,1,swiss-prediction +CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.12855945536132327,0.029097658324682158,1,swiss-prediction +CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.1367492600870436,0.5121170140168657,1,swiss-prediction +c1scc(n1)c1nc2c([nH]1)cccc2,0.1490700414533971,0.0875690712634714,1,swiss-prediction +CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.14982590230152565,0.18693327527622422,1,swiss-prediction +N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.15013314047110002,0.015809693317509144,1,swiss-prediction +Cc1cccc2c1n1cnnc1s2,0.1506048130761757,0.28403797861303787,1,swiss-prediction +ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.15174119992085178,0.11359098888992508,1,swiss-prediction +CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.15338553104123837,0.06059577630169314,1,swiss-prediction +Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.15801925188118618,0.16887323351376893,1,swiss-prediction +c1ccc(cc1)Nc1ccccc1,0.16546268922726798,0.12245503297742702,1,swiss-prediction +Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.1730416993562668,0.019936495077846474,1,swiss-prediction +CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.1767866659490005,0.042629025508833636,1,swiss-prediction +C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.18559079091504613,0.10390334684834952,1,swiss-prediction +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.20017699986539617,0.13759637912715172,1,swiss-prediction +CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.2068313193675311,0.13967461554721777,1,swiss-prediction +CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.21666838084755125,0.09353524776797524,1,swiss-prediction +CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.2198425631222415,0.07003774391915464,1,swiss-prediction +CNC(=O)Oc1ccccc1OC(C)C,0.23417894234275483,0.05950240879470488,1,swiss-prediction +CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.2408184692696632,0.12117285527596833,1,swiss-prediction +O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.253443853488009,0.060568960163232294,1,swiss-prediction +Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.25917417547047744,0.07855841127983834,1,swiss-prediction +CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.2594374890563992,0.041544797190431366,1,swiss-prediction +CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.27189561466298434,0.1870996604409719,1,swiss-prediction +OC(=O)COc1ccc(cc1Cl)Cl,0.28049546719325014,0.03674313560429932,1,swiss-prediction +CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.2921073325236663,0.007932473192811151,1,swiss-prediction +CNC(=O)Oc1cccc2c1cccc2,0.2981792578159244,0.13169394021834496,1,swiss-prediction +Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.31170568268957544,0.05844562040833024,1,swiss-prediction +ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.31207588849423984,0.16698214361799124,1,swiss-prediction +CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.3169092998307417,0.503640251987437,0.25,swiss-prediction +ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.32935301892961466,0.23843829210981765,0.2,swiss-prediction +CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.42802021191337764,0.06621783334677835,1,swiss-prediction +CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.43361266621389954,0.11147220458900187,1,swiss-prediction +N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.44926154899338216,0.16073556275027356,1,swiss-prediction +Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.47403843842257615,0.07983484928350948,1,swiss-prediction +COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.4837900188743661,0.12938220260072822,1,swiss-prediction +Nc1nc(NC2CC2)nc(n1)N,0.5144905821145022,0.09185975434766332,1,swiss-prediction +CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.5170806512852409,0.0683920470541598,0.1,swiss-prediction +COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.5285529966699751,0.09081986063352195,1,swiss-prediction +O=Cc1ccco1,0.624453213155231,0.12486833177320306,0.1111111111111111,swiss-prediction +[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.7245881151318344,0.19490539203138116,1,swiss-prediction +COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.7681550277825105,0.2828087742644706,1,swiss-prediction +COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.7817895162025876,0.30649366939843004,1,swiss-prediction +OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.8282972172278201,0.3802071151378962,1,swiss-prediction +CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.8351128195663594,0.2428137170977884,1,swiss-prediction +COC(=O)Nc1nc2c([nH]1)cccc2,0.8499546987221808,0.06899951625487367,1,swiss-prediction +CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.9077984526598573,0.09329533119250691,1,swiss-prediction +O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.9662594125910484,0.31288159348953604,1,swiss-prediction +ClCCP(=O)(O)O,0.9723587138566308,4.1102099979069795,0.16666666666666666,swiss-prediction +COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,1.1154252951100516,0.19454287979711238,1,swiss-prediction +CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,1.5854670852219546,0.1002087398883717,1,swiss-prediction +OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,1.9605490478397496,0.14404836568635776,1,swiss-prediction +CC(OC(=O)Nc1cccc(c1)Cl)C,2.340158076742021,0.08873389182433254,1,swiss-prediction +Oc1ccccc1c1ccccc1,3.119727015073393,0.1337230195551853,1,swiss-prediction +OC(=O)CNCP(=O)(O)O,5.559726007239,0.700841565636653,0.16666666666666666,swiss-prediction diff --git a/paper/data/swiss-test-predictions.id b/paper/data/swiss-test-predictions.id new file mode 100644 index 0000000..fc28534 --- /dev/null +++ b/paper/data/swiss-test-predictions.id @@ -0,0 +1 @@ +56c429cc2b72ed0d79000004 diff --git a/paper/data/swiss.csv~ b/paper/data/swiss.csv~ deleted file mode 100644 index 997d8a4..0000000 --- a/paper/data/swiss.csv~ +++ /dev/null @@ -1,494 +0,0 @@ -SMILES,LOAEL parental as dose (mmol/kg bw per day) -[O-][N+]2=NC(N3C=CN=C3)=NC1=CC=C(Cl)C=C12,0.00020190555530632425 -ClC(C=C(C=C1Cl)C(F)(F)F)=C1N2N=C(C(S(C(F)(F)F)=O)=C2N)C#N,0.00013496580117055152 -CC(C(O)=O)OC2=CC=C(C=C2)OC1=C(Cl)C=C(C(F)(F)F)C=N1,0.0002764719511333511 -S=P(OCC)(OCC)SCSC(C)(C)C,0.00034670385697674235 -CCSCSP(OCC)(OCC)=S,0.0006144925475253195 -CC(OC1=CC=C(OC2=CC=C(Cl)C=C2Cl)C=C1)C(OC)=O,0.0005861906011027885 -S=P(OC)(OC)SCC(NC)=O,0.000872381733741038 -S=P(OC)(OC)OC1=CC(C)=C(SC)C=C1,0.0007185764991867223 -O=P(OC)(OC)SCCS(CC)=O,0.0008932752807580748 -O=P(OCC)(SC(C)CC)SC(C)CC,0.0008210296720157477 -O=P(SC)(OC)N,0.0020548549325897737 -CCS(CCSP(OC)(OC)=O)(=O)=O,0.0011437981092748413 -O=P(OC)(OC)SCCSCC,0.001519854088965729 -O=P(OC)(OC)O/C(C)=C/C(OC)=O,0.0015614663384413926 -O=C1N(P(OCC)(SC(C)CC)=O)CCS1,0.001341107599716744 -CS(C(C=C2Cl)=CC=C2C(C(C(CCC1)=O)C1=O)=O)(=O)=O,0.0012166633663470796 -O=P(OCC)(OC1=CC=C(SC)C(C)=C1)NC(C)C,0.001516363034790411 -O=C(C2=CC=C(S(=O)(C)=O)C=C2[N+]([O-])=O)C1C(CCCC1=O)=O,0.001414591694222218 -O=P(SCCC)(OCC)SCCC,0.002063225311384027 -S=P(OC)(OC)OC1=CC=C([N+]([O-])=O)C=C1,0.0018996422061897484 -S=P(OC)(OC)OC1=CC=C([N+]([O-])=O)C=C1,0.002089606472099723 -S=P(OC)(OC)SCN1C(SC(OC)=N1)=O,0.0021168829879502555 -S=P(OC)(OC)OC1=CC(C)=C(SC)C=C1,0.0025868754613179463 -N4(N=CN=C4)[Sn](C2CCCCC2)(C3CCCCC3)C1CCCCC1,0.0018110419025972907 -CC2(C)OC1=CC=CC(OC(NC)=O)=C1O2,0.0035838244976124515 -O=[N+]([O-])C(Cl)(Cl)Cl,0.006083642787963147 -S=P(OC1=NC(Cl)=C(Cl)C=C1Cl)(OCC)OCC,0.002852364738724816 -ClC1=CC(Cl)=C(OP(OC)(OC)=S)N=C1Cl,0.003100456591840454 -O[Sn](C2CCCCC2)(C3CCCCC3)C1CCCCC1,0.002596303652874617 -CCC(C)C1C(C=CC2(O1)CC3CC(O2)CC=C(C(C(C=CC=C4COC5C4(C(C=C(C5O)C)C(=O)O3)O)C)OC6CC(C(C(O6)C)OC7CC(C(C(O7)C)NC)OC)OC)C)C.C1=CC=C(C=C1)C(=O)O,0.0009918273033473258 -FC(F)(F)C1=CC=C(N[C@@H]([C@H](C)C)C(OC(C#N)C2=CC(OC3=CC=CC=C3)=CC=C2)=O)C(Cl)=C1,0.001988416717024977 -O=C(O)COC1=NC(Cl)=C(Cl)C=C1Cl,0.0038990829980641837 -O=C(CCC1)C(C(C2=CC=C(S(=O)(C)=O)C(COCC(F)(F)F)=C2Cl)=O)C1=O,0.002381932321850521 -C(C(CCl)O)O,0.009951195933270719 -O=C(NC)OC1=C(OC(C)(C)C2)C2=CC=C1,0.005288078037050265 -FC(F)(F)C(S2)=NN=C2OCC(N(C(C)C)C1=CC=C(F)C=C1)=O,0.0033027779077186826 -S=P(OCC)(OCC)OC(C=C2)=CC1=C2C(C)=C(Cl)C(O1)=O,0.0033630532459809582 -ClC1=NC(NCC)=NC(NC(C)(C)C)=N1,0.005398114462735858 -CCOP(OC2=NN(C=N2)C1=CC=CC=C1)(OCC)=S,0.004149211896481245 -O[Sn](C2CCCCC2)(C3CCCCC3)C1CCCCC1,0.003608862040355308 -O=P(OCC)(OCC)O/C(C1=CC=C(Cl)C=C1Cl)=C/Cl,0.004171650398342553 -CCC1=NN(C(=C1Cl)C(=O)NCC2=CC=C(C=C2)OC3=CC=C(C=C3)C)C,0.003907559846623587 -ClC1=NC(NCC)=NC(NC(C)(C)C)=N1,0.006921775895097049 -CCOP(=S)(OCC)OC1=CC=C(C=C1)[N+](=O)[O-],0.005493362006308507 -O=C(N(C)C)NC1=CC(Cl)=C(Cl)C=C1,0.007293179580314936 -O=P(OCC)(OC1=CC=C(SC)C(C)=C1)NC(C)C,0.005603950401492444 -CC(CC2=CC=C(C(C)(C)C)C=C2)CN1CC(OC(C1)C)C,0.005601648122412352 -ClC1=NC(NCC)=NC(NC(C)(C)C)=N1,0.0074006409463509264 -S=P(OC)(OC)SCN1C(SC(OC)=N1)=O,0.005689123251910172 -S=P(OCC)(OCC)SCSP(OCC)(OCC)=S,0.004681695305160139 -S=P(OC)(OC)SCN1C(C(C=CC=C2)=C2C1=O)=O,0.005672488506643871 -CCC(C)C1C(C=CC2(O1)CC3CC(O2)CC=C(C(C(C=CC=C4COC5C4(C(C=C(C5O)C)C(=O)O3)O)C)OC6CC(C(C(O6)C)OC7CC(C(C(O7)C)O)OC)OC)C)C.CC1C=CC=C2COC3C2(C(C=C(C3O)C)C(=O)OC4CC(CC=C(C1OC5CC(C(C(O5)C)OC6CC(C(C(O6)C)O)OC)OC)C)OC7(C4)C=CC(C(O7)C(C)C)C)O,0.0011546496256700967 -OC(C2=C(Cl)C=CC=C2)(C3=CN=CN=C3)C1=CC=C(Cl)C=C1,0.006038720639060896 -O=C(N(OC)C)NC1=CC=C(Cl)C=C1,0.00931754394759366 -S=P(OC)(OC)OC1=NC(N(CC)CC)=NC(C)=C1,0.0068777238395693234 -OC(C1=CC=C(Cl)C=C1)(C2=CC=C(Cl)C=C2)C(Cl)(Cl)Cl,0.005938151689011985 -O=P(OC)(OC)O/C=C(Cl)\Cl,0.010408382170442241 -O=C(SCC1=CC=CC=C1)N(CCC)CCC,0.009149216533940492 -O=P(SCCC)(OCC)SCCC,0.010068539755671456 -O=P(SC)(OC)NC(C)=O,0.013648831720059621 -O=C(N(C1=C(CC)C=CC=C1CC)COC)CCl,0.009267253123156974 -NC1=NNC=N1,0.029733601205328832 -NC#N,0.059467202410657664 -CC1(C)C(/C=C(Br)/Br)C1C(OC(C#N)C2=CC(OC3=CC=CC=C3)=CC=C2)=O,0.004948543461552866 -CCC(C)C1C(C=CC2(O1)CC3CC(O2)CC=C(C(C(C=CC=C4COC5C4(C(C=C(C5O)C)C(=O)O3)O)C)OC6CC(C(C(O6)C)OC7CC(C(C(O7)C)NC)OC)OC)C)C.C1=CC=C(C=C1)C(=O)O,0.0024795682583683147 -OC(C2=C(Cl)C=CC=C2)(C3=CN=CN=C3)C1=CC=C(Cl)C=C1,0.007548400798826121 -O=C(OC(C#N)C3=CC(OC4=CC=CC=C4)=C(F)C=C3)C(C(C)2C)C2/C=C(Cl)/C1=CC=C(Cl)C=C1,0.004898276703964497 -CCOP(=S)(OCC)OC1=CC=C(C=C1)[N+](=O)[O-],0.008583378006954733 -S=P(OC)(OC)OC1=CC=C([N+]([O-])=O)C=C1,0.009498211030948741 -N#C/N=C1SCCN/1CC2=CC=C(Cl)N=C2,0.009892243396986886 -CN(C)C(S[Zn]SC(N(C)C)=S)=S,0.00817493363915869 -C[N+](C=C2)=CC=C2C1=CC=[N+](C)C=C1,0.013691057325028715 -S=P(OC)(OC)SCN1N=NC(C=CC=C2)=C2C1=O,0.008508644649457775 -O=C(OC(C)1C=C)N(C2=CC(Cl)=CC(Cl)=C2)C1=O,0.009436904951368202 -O=C2C(N)=C(Cl)C(C1=CC=CC=C12)=O,0.013920121360835688 -O=S3OCC1C(CO3)C2(Cl)C(Cl)=C(Cl)C(Cl)1C(Cl)2Cl,0.007126618167084564 -[N+]12=CC=CC=C1C3=[N+](C=CC=C3)CC2,0.015794866515636753 -O=C(OC(C)(C)C)C1=CC=C(CO/N=C/C2=C(OC3=CC=CC=C3)N(C)N=C2C)C=C1,0.0071176254993963305 -CCNC1=NC(NC(C)C)=NC(Cl)=N1,0.014372927711833409 -O=C(OCC)NC1=CC=CC(OC(NC2=CC=CC=C2)=O)=C1,0.010655682947629983 -N#CC(C(Cl)=CC=C1)=C1Cl,0.0186034162597095 -CCS(=O)C1=C(N(N=C1C#N)C2=C(C=C(C=C2Cl)C(F)(F)F)Cl)N,0.008056334643428573 -N#CC1=C(Cl)C(Cl)=C(Cl)C(C#N)=C1Cl,0.012410167132297197 -FC(C(F)(OCC(C2=CC=C(C=C2Cl)Cl)CN1C=NC=N1)F)F,0.00913621053742932 -N#CC1=CC(F)=C(OC2=CC=C(OC(C)C(OCCCC)=O)C=C2)C=C1,0.009625729959721526 -CN(CN1C)CSC1=S,0.022184384932566064 -OC(C2=C(Cl)C=CC=C2)(C3=CN=CN=C3)C1=CC=C(Cl)C=C1,0.01086969686236098 -ClC1=CC=C(C2=NN(C(N(C(OC)=O)C4=CC=C(OC(F)(F)F)C=C4)=O)COC(C(OC)=O)2C3)C3=C1,0.006820319575237628 -O=C(OC(C(C)(C)C)=N2)N2C1=CC(OC(C)C)=C(Cl)C=C1Cl,0.010428101697378017 -CC1=C(C2=NOCC2)C(S(C)(=O)=O)=CC=C1C(C3=C(O)N(C)N=C3)=O,0.009906758425540224 -BrC1=CC=C(NC(N(C)OC)=O)C=C1,0.014357399945172603 -C[N+](C=C2)=CC=C2C1=CC=[N+](C)C=C1,0.020133908207418557 -CC(C(OCCCC)=O)OC1=CC=C(OC2=CC=C(C(F)(F)F)C=N2)C=C1,0.009886227162529472 -ClC1=C([N+]([O-])=O)C(NC2=NC=C(C(F)(F)F)C=C2Cl)=C([N+]([O-])=O)C=C1C(F)(F)F,0.00821343424858256 -COC(C(NNC(OC(C)C)=O)=C2)=CC=C2C1=CC=CC=C1,0.01298475189092086 -[N+]12=CC=CC=C1C3=[N+](C=CC=C3)CC2,0.021168377697732887 -ClC1=C([N+]([O-])=O)C(NC2=NC=C(C(F)(F)F)C=C2Cl)=C([N+]([O-])=O)C=C1C(F)(F)F,0.008385443694386083 -S=P(OC)(OC)SCN1C(C(C=CC=C2)=C2C1=O)=O,0.012605530348696702 -O=C(C(C(C)=N2)=CN1C2=CC(OP(OCC)(OCC)=S)=N1)OCC,0.010713392485187262 -CNC(ON=C(SC)C(N(C)C)=O)=O,0.019109609238234706 -FC(F)(F)C1=CC=C(C(S(C)(=O)=NC#N)C)C=N1,0.015292167409562457 -S=C(S)NC,0.04011276528748593 -O=C(N(CC)CC)SCC1=CC=C(Cl)C=C1,0.016680921188449865 -CC(C)(C1=CC=C(CCOC2=NC=NC3=CC=CC=C23)C=C1)C,0.014686613132547533 -ClC1C(Cl)C(Cl)C(Cl)C(Cl)C1Cl,0.01616065190994549 -O=C2C1=CC(F)=CC=C1N=C(N4N=CN=C4)N2C3=C(Cl)C=C(Cl)C=C3,0.01268036889326992 -CC1(CNC(=NC1)NN=C(C=CC2=CC=C(C=C2)C(F)(F)F)C=CC3=CC=C(C=C3)C(F)(F)F)C,0.009909494556264633 -CNC(ON=C(SC)C(N(C)C)=O)=O,0.022347753176858155 -ClC1=CC(Cl)=C(OCC(O)=O)C=C1,0.022620602193004043 -NC1=NNC=N1,0.059467202410657664 -O=C(NC)OC1=C(OC(C)(C)C2)C2=CC=C1,0.022598624918870935 -S=P(OCC)(OCC)OC1=NC(C(C)C)=NC(C)=C1,0.01642869699075557 -O=C(N(C1=C(C)SC=C1C)C(C)COC)CCl,0.018129419544573026 -O=S3OCC1C(CO3)C2(Cl)C(Cl)=C(Cl)C(Cl)1C(Cl)2Cl,0.01228727229779905 -FC1=CC=C(C2(CN4C=NC=N4)C(C3=CC=CC=C3Cl)O2)C=C1,0.015162725459871818 -ClC(Cl)(Cl)C1=NSC(OCC)=N1,0.020199571769078495 -C[Si](C2=CC=C(C=C2)F)(C3=CC=C(F)C=C3)CN1C=NC=N1,0.01585325164934852 -ClC1=CC(C(F)(F)F)=CC=C1OC2=CC=C([N+]([O-])=O)C(C(NS(C)(=O)=O)=O)=C2,0.011395676083924233 -ClC1=CC=C2C(N=CC(OC3=CC=C(OC(C)C(OCCO/N=C(C)/C)=O)C=C3)=N2)=C1,0.011264301100355506 -[S]C(NC(C)CNC(S[Zn])=S)=S,0.017255039351497643 -[S]C(NC(C)CNC(S[Zn])=S)=S,0.017255039351497643 -C1CNC(=S)NC1,0.04303491887745652 -CC(C)(C)C1=CC=C(CSC2=C(Cl)C(N(C(C)(C)C)N=C2)=O)C=C1,0.013701160159437661 -CS/C(C)=N/OC(N(SN(C(O/N=C(SC)\C)=O)C)C)=O,0.014105593115928905 -CN(CN1C)CSC1=S,0.03266034652463028 -CCN(CCCC)C1=C([N+]([O-])=O)C=C(C(F)(F)F)C=C1[N+]([O-])=O,0.016105987222784814 -CC1(C)C(/C=C(Br)/Br)C1C(OC(C#N)C2=CC(OC3=CC=CC=C3)=CC=C2)=O,0.010688854065726137 -CC(C)C1C2CCC1C3=C2C(NC(C4=CN(C)N=C4C(F)F)=O)=CC=C3,0.015302732709143212 -O=C(O)C(C)OC1=C(C)C=C(Cl)C=C1,0.02562363979237584 -C1CNC(=S)NC1,0.04819910832192538 -FC(F)(F)/C(Cl)=C/C1C(C)(C)C1C(OCC2=C(F)C(F)=C(C)C(F)=C2F)=O,0.01409010160197152 -S=P(OC1=NC(Cl)=C(Cl)C=C1Cl)(OCC)OCC,0.0171141884323489 -O[Sn](C2CCCCC2)(C3CCCCC3)C1CCCCC1,0.015577821917247702 -O=C(C2=C1C=CC=C2)C(SC(C#N)=C(C#N)S3)=C3C1=O,0.020248123201460456 -FC1=CC=C(C2(CN4C=NC=N4)C(C3=CC=CC=C3Cl)O2)C=C1,0.018195270551846183 -O=C(NCCC2=NC=C(C(F)(F)F)C=C2Cl)C1=CC=CC=C1C(F)(F)F,0.015124216704213374 -O=C(NC1=CC(Cl)=C(Cl)C=C1)N(C)OC,0.025090939601491648 -O=C(NC1=CC(Cl)=C(Cl)C=C1)N(C)OC,0.025090939601491648 -O=C1C(/C(CCC)=N\OCC)=C(O)CC(C2CSCCC2)C1,0.019664101798126703 -BrC(C3)COC(CN2C=NC=N2)3C1=C(Cl)C=C(Cl)C=C1,0.017185417014945824 -CN1N=C(C(Cl)=C1C(NCC2=CC=C(C=C2)C(C)(C)C)=O)CC,0.019469491695902355 -CC1CCC2(CC3CC(O2)CC=C(CC(C=CC=C4COC5C4(C(C=C(C5O)C)C(=O)O3)O)C)C)OC1C,0.01286229964885329 -O=C1N(NC3=CC=CC=C3)C(SC)=NC(C)1C2=CC=CC=C2,0.022800155556897562 -S=C(SSC(N(C)C)=S)N(C)C,0.03036190470594063 -NC1=C(C(OC2=CC=CC=C2)=CC=C1[N+]([O-])=O)Cl,0.030226952270055448 -BrC1=CC(C#N)=CC(Br)=C1O,0.028889958940868102 -C1CC1(C(CC2=CC=CC=C2Cl)(CN3C=NC=N3)O)Cl,0.025625059257949535 -O=C(OC(C)1C=C)N(C2=CC(Cl)=CC(Cl)=C2)C1=O,0.027961199362093195 -FC(F)(F)C(C(F)(F)F)OC(/C=C\C1C(C)(C)C1C(OC(C#N)C2=CC=C(OC3=CC=CC=C3)C=C2)=O)=O,0.014960133059978587 -O=C(C(SCCO2)=C2C)NC1=CC=CC=C1,0.034848813981213346 -FC(F)(F)OC(C=C2)=CC=C2NC(NC(C1=C(Cl)C=CC=C1)=O)=O,0.023557308728421166 -O=C(SC/C(Cl)=C(Cl)\Cl)N(C(C)C)C(C)C,0.028227806467376604 -C[N+](C=C2)=CC=C2C1=CC=[N+](C)C=C1,0.04762340359884257 -O=C2C(CCCCCCCCCCCC)=C(OC(C)=O)C(C1=CC=CC=C12)=O,0.02340650588512378 -O=C(N1C(C)C)N(C2=CC=CC=C2)CS/C1=N\C(C)(C)C,0.029465850912223458 -O=C(CC)NC1=CC(Cl)=C(Cl)C=C1,0.041269285481015994 -CON(C(OC)=O)C(C=CC=C3)=C3COC2=NN(C=C2)C1=CC=C(Cl)C=C1,0.02320682656135787 -CC(C(O)=O)OC(C=C3)=CC=C3OC2=NC1=CC=C(Cl)C=C1O2,0.028167056356499628 -CCC1CCCC(C(C(=O)C2=CC3C4CC(CC4C(=CC3C2CC(=O)O1)C)OC5C(C(C(C(O5)C)OC)OC)OC)C)OC6CCC(C(O6)C)N(C)C,0.012734890360905185 -C1=C(SC(=N1)S(=O)(=O)CCC(=C(F)F)F)Cl,0.03291071649153446 -CC(C)C(C2=CN=CN=C2)(O)C1=CC=C(OC(F)(F)F)C=C1,0.03138138916099924 -CCCCC(C#N)(C2=CC=C(C=C2)Cl)CN1C=NC=N1,0.03428271152063386 -CN(/C=N/C(C=CC(C)=C2)=C2C)/C=N/C(C=CC(C)=C1)=C1C,0.03408246361134649 -CCNC1=NC(NC(C)C)=NC(Cl)=N1,0.04636428436773443 -S=P(OC1=NC(Cl)=C(Cl)C=C1Cl)(OCC)OCC,0.028523647387248163 -O=C(C(C(Cl)=C(Cl)C(C(OC)=O)=C1Cl)=C1Cl)OC,0.030123726579706293 -C[Si](C2=CC=C(C=C2)F)(C3=CC=C(F)C=C3)CN1C=NC=N1,0.03170650329869704 -CS(C1=CC(C(F)(F)F)=CC=C1C(C2=C(O)N(C)N=C2C)=O)(=O)=O,0.027599589461626675 -CS/C(C)=N/OC(N(SN(C(O/N=C(SC)\C)=O)C)C)=O,0.02821118623185781 -O=C(C2C(/C=C(Cl)/Cl)C(C)2C)OCC1=C(F)C(F)=CC(F)=C1F,0.026942980220700186 -CC(C(OCC#C)=O)OC1=CC=C(OC2=C(F)C=C(Cl)C=N2)C=C1,0.029164453292198207 -OC(C2=CC=C(F)C=C2)(C3=C(F)C=CC=C3)CN1C=NC=N1,0.03385434330908588 -FC(F)(F)C1=CC([N+]([O-])=O)=C(N(CC(C)=C)CC)C([N+]([O-])=O)=C1,0.03210675757919814 -CCOC(CCN(C(C)C)SN(C)C(OC1=CC=CC2=C1OC(C)(C)C2)=O)=O,0.02679478797527864 -O=S3OCC1C(CO3)C2(Cl)C(Cl)=C(Cl)C(Cl)1C(Cl)2Cl,0.02703199905515791 -BrC([H])([H])[H],0.1158644562818127 -CC(C)(C(C(N2C=NC=N2)CC1=CC=C(C=C1)Cl)O)C,0.03744148066760202 -O=C(C(C(C)=N2)=CN1C2=CC(OP(OCC)(OCC)=S)=N1)OCC,0.02946182933426497 -CNC(OC1=CC=CC(/N=C/N(C)C)=C1)=O,0.051976062085632144 -CC(CC2=CC=C(C(C)(C)C)C=C2)CN1CCCCC1,0.04326105065224025 -O=C(OCC)C(Cl)CC1=CC(N2N=C(C)N(C(F)F)C2=O)=C(F)C=C1Cl,0.029112705155716945 -CC(C(C1C(OC(C2=CC=CC(OC3=CC=CC=C3)=C2)C#N)=O)/C=C(C(F)(F)F)\Cl)1C,0.026675554368592185 -CC2(C)C=C(C)C1=CC(OCC)=CC=C1N2,0.05522147585284508 -CC(NC(N1CC(N(C2=CC(Cl)=CC(Cl)=C2)C1=O)=O)=O)C,0.03634528529867737 -IC1=CC=C(N=C(OCCC)N(CCC)C2=O)C2=C1,0.03224060518839999 -CS/C(C)=N/OC(N(SN(C(O/N=C(SC)\C)=O)C)C)=O,0.03385342347822937 -S=C(SSC(N(C)C)=S)N(C)C,0.04990997903448147 -O=C(OCC)CSC1=NC(C(C)(C)C)=NN1C(N(C)C)=O,0.03816748004747272 -ClC1=CC=C(C2=CC(F)=CC=C2NC(C3=CN(C)N=C3C(F)F)=O)C=C1Cl,0.02921233570136655 -FC(F)(F)C1=CC(OC2=CC=CC(C(NC3=CC=C(F)C=C3)=O)=N2)=CC=C1,0.032154821211279785 -C(CNC(=S)[S-])NC(=S)[S-].[Zn+2],0.04460661819584039 -CN(C)C1=NC(C)=C(C)C(OC(N(C)C)=O)=N1,0.05161859628615915 -ClC1=C(C3(OCCO3)CN2C=NC=N2)C=CC(Cl)=C1,0.042646674541424644 -ClC(N=CC=C3)=C3C(NC1=CC=CC=C1C2=CC=C(Cl)C=C2)=O,0.03787805062535496 -O=C1C(C(C)(C)C)=NN=C(SC)N1N,0.060666030886662975 -O=C(SC/C(Cl)=C(Cl)\Cl)N(C(C)C)C(C)C,0.04266993811611698 -CN(C)C(S[Zn]SC(N(C)C)=S)=S,0.04250965492362519 -OC1(CN3C=NC=N3)C(C)(C)CCC1CC2=CC=C(Cl)C=C2,0.04095937862019833 -O=S(C1=C(C(F)(F)F)C=CC=N1)(NC(NC2=NC(OC)=CC(OC)=N2)=O)=O,0.0326520524201809 -OC(C(Cl)(Cl)Cl)P(OC)(OC)=O,0.05166319030658296 -COC1=CC(=NC(=N1)OC2=C(C(=CC=C2)OC3=NC(=CC(=N3)OC)OC)C(=O)[O-])OC.[Na+],0.030507347552487064 -OC1(CN3C=NC=N3)C(C)(C)CCC1CC2=CC=C(Cl)C=C2,0.043148047046675374 -O=C(N(C1=C(CC)C=CC=C1CC)COC)CCl,0.05189661748967905 -NS(C1=CC([N+]([O-])=O)=C(N(CCC)CCC)C([N+]([O-])=O)=C1)(=O)=O,0.04042042788372036 -CCCOC/C(N2C=CN=C2)=N\C1=CC=C(C=C1C(F)(F)F)Cl,0.04049199977868229 -O=C1C(C(C)(C)C)=NN=C(SC)N1N,0.06719929397120725 -O=C1OC3(CCCCC3)C(OC(C(C)(C)CC)=O)=C1C2=C(Cl)C=C(Cl)C=C2,0.03578732146400678 -CC1=CC(C)=CC(C)=C1C2=C(OC(CC(C)(C)C)=O)C3(CCCC3)OC2=O,0.039947970982482275 -CC1(C)C(C(OCC2=COC(CC3=CC=CC=C3)=C2)=O)C1/C=C(C)\C,0.04432099700732809 -ClC(C=C2)=CC=C2C1=C(C#N)C(Br)=C(C(F)(F)F)N1COCC,0.036799624938222635 -CC(C1=CC=CC=C1)(C)C[Sn](CC(C)(C2=CC=CC=C2)C)(CC(C)(C3=CC=CC=C3)C)O[Sn](CC(C)(C4=CC=CC=C4)C)(CC(C)(C6=CC=CC=C6)C)CC(C)(C5=CC=CC=C5)C,0.014249578440471417 -ClC(C=C(Cl)C=C2)=C2C(OCC=C)CN1C=NC=C1,0.05047450068604942 -CC(C)(C2=CC=C(C=C2)OC1CCCCC1OS(OCC#C)=O)C,0.04279938325518071 -C#CCOC(C(NCCC2=CC=C(OCC#C)C(OC)=C2)=O)C1=CC=C(Cl)C=C1,0.0369041241749624 -CCOC(C(OC3=CC=C(C=C3)OC2=NC1=CC=C(Cl)C=C1N=C2)C)=O,0.04157699893895499 -CC(C(CN2C=NC=N2)(C3=CC=C(C=C3)Cl)O)C1CC1,0.05326004956767166 -FC(F)(C1=CC=CC(N2CC(C(C2=O)Cl)CCl)=C1)F,0.05030195369030707 -ClC(C=C(Cl)C=C2)=C2C(OCC=C)CN1C=NC=C1,0.05350296944357954 -CC1=CC(C)=CC(C)=C1C2=C(OC(CC(C)(C)C)=O)C3(CCCC3)OC2=O,0.042917075351131324 -CCC1=C(C(NC(C2=CC=CS2)C#N)=O)SC(NCC)=N1,0.05118073847356783 -O=C(C2C(/C=C(C)/C)C(C)2C)OC1CC(C(CC#C)=C1C)=O,0.05492821614526029 -O=C(C(O3)(C)C(C=C2)=CC=C2OC1=CC=CC=C1)N(NC4=CC=CC=C4)C3=O,0.045407278177700156 -BrC([H])([H])[H],0.1790632506173469 -C/C(C)=C(N(C(CCl)=O)CCOCC)/C1=CC=CC=C1,0.057470413386035736 -FC(F)(F)C1=CC=C(OCCCOC2=C(Cl)C=C(OC/C=C(Cl)\Cl)C=C2Cl)N=C1,0.034818667907167616 -FC(F)(F)C1=CC(NC(N(C)C)=O)=CC=C1,0.07395704796137248 -ClC1=C(C2=NN=C(C3=C(Cl)C=CC=C3)N=N2)C=CC=C1,0.05706818624978773 -ClC1=CC=C(C2=CC(F)=CC=C2NC(C3=CN(C)N=C3C(F)F)=O)C=C1Cl,0.04200781934177246 -ClC1=CC=C(CN(C)/C(C)=N/C#N)C=N1,0.07859017665904088 -OC(C2=C(Cl)C=CC=C2)(C3=CN=CN=C3)C1=CC=C(Cl)C=C1,0.05283880559178284 -NC(C1=C(Cl)C=CC=C1Cl)=O,0.09261856560930491 -O=C(N(C2=C(C)C=CC=C2C)CN1N=CC=C1)CCl,0.06336648858092589 -FC(F)(F)C1=C(F)C=CC(OC(CC)C(NCC2=CC=CC=C2)=O)=C1,0.049813316199071624 -O=C(C(CCCC4)=C4C3=O)N3C(C(F)=C2)=CC1=C2OCC(N1CC#C)=O,0.05079984353648191 -O=C1C(C3=CC=CC(C(F)(F)F)=C3)=C(NC)OC1C2=CC=CC=C2,0.055204779037407746 -NC(NCCCCCCCCNC(N)=N)=N,0.08102032708037427 -O=C(O)COC(C=CC(Cl)=C1)=C1C,0.0947069010825298 -ClC1=CC=C(OC(N2N=CN=C2)C(O)C(C)(C)C)C=C1,0.06424027322808253 -O=C(OC(C#N)C2=CC(OC3=CC=CC=C3)=C(F)C=C2)C1C(/C=C(Cl)/Cl)C(C)1C,0.044210334070631574 -CNC(O/N=C(C)\SC)=O,0.11836501403389492 -O=C1C(C2=CC=CC=C2)=NN=C(C)N1N,0.09643315995145703 -O=C(NS(C2=CC=CC=C2Cl)(=O)=O)NC1=NC(C)=NC(OC)=N1,0.05590140200157206 -FC(F)(F)C(C=C3S(C)(=O)=O)=CC=C3C(C1=C(C2CC2)ON=C1)=O,0.05566064749641608 -O=C(NC(NC2=CC(Cl)=C(OC(F)(F)C(F)C(F)(F)F)C=C2Cl)=O)C1=C(F)C=CC=C1F,0.03990998658130422 -CC(C(C(C)3C)C(OC(C1=CC=CC(OC2=CC=CC=C2)=C1)C#N)=O)3C,0.06009909138187043 -CCCN(C(N2C=CN=C2)=O)CCOC1=C(Cl)C=C(Cl)C=C1Cl,0.05707983190600125 -C[C@H]([C@@H](N(C)C)CC5)O[C@H]5O[C@@H]2[C@@H](C)C(C1=C[C@]3([H])[C@](CC[C@@]4([H])[C@]([H])3C[C@H](O[C@@H]6O[C@@H](C)[C@H](OC)[C@@H](OCC)[C@H]6OC)C4)([H])[C@@]([H])1CC(O[C@@H](CC)CCC2)=O)=O,0.028877084613265123 -O=C(CC3=CC=CC=C3)N/C(C2=C(F)C(F)=CC=C2C(F)(F)F)=N\OCC1CC1,0.053352320292409515 -O=C(NC1=CC(Cl)=C(N2N=C(C(C)(C)C)OC2=O)C=C1)N(C)C,0.06493710428214157 -O=C(OC(C#N)C2=CC(OC3=CC=CC=C3)=C(F)C=C2)C1C(/C=C(Cl)/Cl)C(C)1C,0.052499767865960584 -CC1=CC=C(C)C=C1OCC2=CC=CC=C2/C(C(NC)=O)=N/OC,0.07046793589427701 -C2(C3=CC=CO3)=NC1=CC=CC=C1N2,0.12486833177320307 -O=C1C(/C(CC)=N/OCC)=C(O)CC(C2=C(C)C=C(C)C=C2C)C1,0.06981686853252955 -S=P(OC)(OC)OC1=CC(C)=C([N+]([O-])=O)C=C1,0.08332310268057162 -CC1C(C3=CC=C(C=C3)Cl)SC(N1C(NC2CCCCC2)=O)=O,0.06546156290207059 -CCOP(OCC)(SCN1C(OC2=C1C=CC(Cl)=C2)=O)=S,0.0636200517424888 -O=C(NC(NCC)=O)/C(C#N)=N\OC,0.11857948837239812 -CCNC1=NC(NC(C)C)=NC(Cl)=N1,0.10941971287651023 -CCOC(CCN(C(C)C)SN(C)C(OC1=CC=CC2=C1OC(C)(C)C2)=O)=O,0.05846135558242613 -CC2COC(O2)(C4=CC=C(C=C4Cl)OC3=CC=C(Cl)C=C3)CN1C=NC=N1,0.05932124091140686 -CC(C)(C2=CC=C(C=C2)OC1CCCCC1OS(OCC#C)=O)C,0.06904967382858089 -O=C(C(C)(C)C)C(N2C=NC=N2)OC1=CC=C(Cl)C=C1,0.08272375649019124 -O=C(OCC)NCCOC1=CC=C(OC2=CC=CC=C2)C=C1,0.08196801536106943 -FC1=CC=CC(F)=C1C(NC(NC2=CC(Cl)=C(C(Cl)=C2F)F)=O)=O,0.06507320207279278 -ClC2=CC=C(C=N2)CN1CCN/C1=N\[N+]([O-])=O,0.0973945952590747 -CC(C)(C(C(N3C=NC=N3)OC1=CC=C(C2=CC=CC=C2)C=C1)O)C,0.07409262028018154 -CCCCN(SN(C(OC2=C1OC(C)(C)CC1=CC=C2)=O)C)CCCC,0.06569530810416269 -C1(NC2=CC=CC=C2)=CC=CC=C1,0.14773454395291782 -N#CC(C2=CC(OC3=CC=CC=C3)=CC=C2)OC(C(C(C)C)C1=CC=C(Cl)C=C1)=O,0.05953797389131243 -ClC1=CC=C(C(C(C)C)C(OC(C#N)C2=CC(OC3=CC=CC=C3)=CC=C2)=O)C=C1,0.05953797389131243 -ClC1=CC=C(C(C(C)C)C(OC(C#N)C2=CC(OC3=CC=CC=C3)=CC=C2)=O)C=C1,0.05953797389131243 -O=C2N(SC(Cl)(Cl)Cl)C(C1=CC=CC=C12)=O,0.08430066662269543 -O=C(C2C(/C=C(Cl)/Cl)C(C)2C)OCC1=CC(OC3=CC=CC=C3)=CC=C1,0.06389160712181856 -ClC1=CC=C2C(N=CC(OC3=CC=C(OC(C)C(OCCO/N=C(C)/C)=O)C=C3)=N2)=C1,0.05632150550177753 -CC(C)(C2=CC=C(C=C2)OC1CCCCC1OS(OCC#C)=O)C,0.07190296604559293 -CC(C)C1=NN(C(=O)N1N)C(=O)NC(C)(C)C,0.10485300866417636 -OC(C2(CC2)C(NC1=C(Cl)C=C(Cl)C=C1)=O)=O,0.09303171987631087 -CCOC1=CC=C(C(C)(C)COCC2=CC(OC3=CC=CC=C3)=CC=C2)C=C1,0.06773123883198195 -CC(OC(C(C1=CC=C(Br)C=C1)(C2=CC=C(Br)C=C2)O)=O)C,0.060497742776698574 -CC1=C(N2C(S3)=NN=C2)C3=CC=C1,0.1373938645607217 -CC1=CC=C(C2=CC=CC=C12)C,0.1728291127183792 -O=C1C(/C(CCC)=N\OCC)=C(O)CC(C2CSCCC2)C1,0.08603044408485085 -O=C(OC1=CC(C)=C(SC)C(C)=C1)NC,0.1242747128033579 -S=C1S[Mn+2]SC(NCCN1)=S,0.10855557507359069 -FC(F)(F)C1=CC([N+]([O-])=O)=C(N(CC(C)=C)CC)C([N+]([O-])=O)=C1,0.08701831648543702 -O=C(OCC)C(CC(OCC)=O)SP(OC)(OC)=S,0.08778355070659401 -O=C(OC(C)C)NC1=CC=CC=C1,0.16181616210899355 -IC1=CC=C(C(OC)=O)C(S([N-]C(NC2=NC(C)=NC(OC)=N2)=O)(=O)=O)=C1.[Na+],0.05611797964648073 -CO\N=C(C(OC)=O)/C1=C(CO/N=C(C)/C2=CC=CC(C(F)(F)F)=C2)C=CC=C1,0.07272797449373557 -C(C(CCl)O)O,0.27139624684320934 -ClC1=CC=CC(NC(OC(C)C)=O)=C1,0.14040948460452124 -FC(F)(F)C1=CC(NC(N(C)C)=O)=CC=C1,0.12919765885228982 -CC1=C(Cl)C(NS(C2=NN3C(N=C(OC)C=C3OC)=N2)(=O)=O)=C(Cl)C=C1,0.07172655770478076 -ClCCOC1=CC=CC=C1S(NC(NC2=NC(OC)=NC(C)=N2)=O)(=O)=O,0.07465930346752149 -O=[N+]([O-])C1=CC(C(F)(F)F)=CC([N+]([O-])=O)=C1N(CCC)CCC,0.08947770521301585 -CN1C=C(C(=N1)C(F)F)C(=O)NC2=CC=CC3=C2C4CCC3C4=C(Cl)Cl,0.07583481070072216 -O=C(NC(NCC)=O)/C(C#N)=N\OC,0.15289184711551862 -FC(C=CC=C2F)=C2C(NC(NC1=CC=C(OC(F)(F)C(F)OC(F)(F)F)C(Cl)=C1)=O)=O,0.062106180868884746 -CC1=CC=CC(=C1S(=O)(=O)NC(=O)NC2=NC(=NC(=N2)OCC(F)(F)F)N(C)C)C(=O)O,0.06396281173215432 -O=S(C1=C(C)C=CC=C1C(OC)=O)(NC(NC2=NC(OCC(F)(F)F)=NC(N(C)C)=N2)=O)=O,0.062140866929396014 -ClC(C=C2)=CC=C2CCC(C#N)(CN3C=NC=N3)C1=CC=CC=C1,0.09203781459712614 -O=C(NC1=CC(Cl)=C(Cl)C=C1)N(C)OC,0.12545469800745823 -O=C(NS(N(C(C)C)C)(=O)=O)C1=CC(N2C(C=C(C(F)(F)F)N(C)C2=O)=O)=C(F)C=C1Cl,0.06269313377509025 -O=C(C2=C(Cl)C=CC=C2Cl)NCC1=NC=C(C(F)(F)F)C=C1Cl,0.08212099927021806 -S=C(NC(OC)=O)NC(C=CC=C1)=C1NC(NC(OC)=O)=S,0.09345959256991566 -CN/C(NCC1=CN=C(Cl)S1)=N\[N+]([O-])=O,0.13016764551401042 -CC(C)(C)C1CCC2(OC(CN(CCC)CC)CO2)CC1,0.11029461661878942 -O=C(N2CCOCC2)/C=C(C3=CC=C(Cl)C=C3)/C1=CC=C(OC)C(OC)=C1,0.08766124641710438 -O=C(NC2=CC=C(C(C(F)(F)F)(F)C(F)(F)F)C=C2C)C1=CC=CC(I)=C1C(NC(C)(C)CS(C)(=O)=O)=O,0.04982487508940451 -CC1=CC(C3CC3)=NC(NC2=CC=CC=C2)=N1,0.15801924849469393 -CC(C)N2C(C1=CC=CC=C1NS(=O)2=O)=O,0.14982590230152565 -O=C(N2CCOCC2)/C=C(C3=CC=C(Cl)C=C3)/C1=CC=C(OC)C(OC)=C1,0.09281779032399287 -NS(C1=CC([N+]([O-])=O)=C(N(CCC)CCC)C([N+]([O-])=O)=C1)(=O)=O,0.10393824312956665 -CC1=CC(=C(C(=C1C(=O)C2=C(C(=CN=C2OC)Cl)C)OC)OC)OC,0.09950572862076837 -CC(C(O)=O)OC(C=CC(Cl)=C1)=C1Cl,0.15527684755838006 -COCC1=C(F)C(F)=C(COC(C2C(/C=C/C)C(C)2C)=O)C(F)=C1F,0.10573252781458294 -O=C1N(/N=C/C2=CC=CN=C2)CC(C)=NN1,0.18091653347462547 -CC(C(=O)OCC1CCCO1)OC2=CC=C(C=C2)OC3=CN=C4C=C(C=CC4=N3)Cl,0.09210345974638111 -N#CC1=C(Cl)C(Cl)=C(Cl)C(C#N)=C1Cl,0.15042627044387033 -C2(C3=CSC=N3)=NC1=CC=CC=C1N2,0.19876005527119617 -CCC(NC1=C([N+]([O-])=O)C=C(C(C)(C)C)C=C1[N+]([O-])=O)C,0.13747135609511818 -N=C(N)NCCCCCCCCCCCC.OC(C)=O,0.1461167287581588 -O=C(NC(C)(C)C#C)C1=CC(Cl)=CC(Cl)=C1,0.16593276232681306 -CC1=CC(=C(C(=C1C(=O)C2=C(C(=CN=C2OC)Cl)C)OC)OC)OC,0.11727460798675288 -CC1=C(C(=O)CC1OC(=O)C2C(C2(C)C)C=C(C)C)CC=CC=C.CC1=C(C(=O)CC1OC(=O)C2C(C2(C)C)C=C(C)C(=O)OC)CC=CC=C,0.06134969850332702 -CC(C)C(C2=CN=CN=C2)(O)C1=CC=C(OC(F)(F)F)C=C1,0.13801406108477293 -CC1OC(C)OC(C)OC(C)O1,0.249701719945447 -O=C(NCC#N)C1=CN=CC=C1C(F)(F)F,0.19244308898713228 -FC(F)C1=NC(C(F)(F)F)=C(C2=NCCS2)C(CC(C)C)=C1C(OC)=O,0.11151045388522976 -FC(F)(F)C1=CC=CC(OCC2=C(/C(C(OC)=O)=C\OC)C=CC=C2)=N1,0.1252316956521325 -O=C(N(COCC)C1=C(CC)C=CC=C1C)CCl,0.17607780933998252 -O=C(N(CC)CC)C(OC1=CC=CC2=C1C=CC=C2)C,0.17526912017369997 -CC12CC(C(N(C3=CC(Cl)=CC(Cl)=C3)C2=O)=O)1C,0.16893203350457175 -O=C(NC)OC(C=CC=C1)=C1OC(C)C,0.22939978025412716 -O=C(NN(C(C)(C)C)C(C2=CC(C)=CC(C)=C2)=O)C1=CC=C(C=C1)CC,0.13731668655832788 -O=C1C(Cl)=C(N)C=NN1C2=CC=CC=C2,0.2255879747532767 -CC1(C(C1C(=O)OC(C#N)C2=CC(=CC=C2)OC3=CC=CC=C3)C=C(Cl)Cl)C,0.12010651237688001 -O=S(C(C)=C(C)S(CC1)(=O)=O)1=O,0.23778815168220852 -O=S(NC1=NN2C(C(OC)=CN=C2OC)=N1)(C3=C(OCC(F)F)C=CC=C3C(F)(F)F)=O,0.1034404543369562 -O=C(C2C(/C=C(Cl)/Cl)C(C)2C)OCC1=CC(OC3=CC=CC=C3)=CC=C1,0.12778321424363712 -ClC1=C(CC(C3(CC3)Cl)(O)CN2N=CNC2=S)C=CC=C1,0.1452393166315865 -CC(C)NC1=NC(=NC(=N1)Cl)NC(C)C,0.22201922216305578 -FC(F)(F)C1=CC(Cl)=C(OC2=CC=C([N+]([O-])=O)C(OCC)=C2)C=C1,0.14653013191720715 -S=C(NC(OC)=O)NC(C=CC=C1)=C1NC(NC(OC)=O)=S,0.15479245019392282 -O=C(NCC)C(OC(NC1=CC=CC=C1)=O)C,0.23278744254805916 -CC(C)(C(CCC1=CC=C(Cl)C=C1)(CN2C=NC=N2)O)C,0.17867678986550448 -O=C(O)C1=CC=CC=C1S(=O)(NC(N(C2=NC(OC)=NC(C)=N2)C)=O)=O,0.14421924681891674 -O=C1C(Cl)=C(N)C=NN1C2=CC=CC=C2,0.270705569703932 -O=CC1=CC=CO1,0.624453213155231 -O=C(NC)OC1=C(C=CC=C2)C2=CC=C1,0.2991731924668564 -ClC1=CC(Cl)=C(OCC(O)=O)C=C1,0.28049546719325014 -NC1=C(C(OC2=CC=CC=C2)=CC=C1[N+]([O-])=O)Cl,0.23425888009292972 -O=C(N(C1=C(C)C=CC=C1CC)COC(C)C)CCl,0.22199225860138957 -CCCCCCCCCC[N+](C)(C)CCCCCCCCCC.[Cl-],0.1767583631976715 -C/C=C/C(OC1=C([N+]([O-])=O)C=C([N+]([O-])=O)C=C1C(C)CCCCCC)=O,0.17563456769307506 -O=C(N(COCC)C1=C(CC)C=CC=C1C)CCl,0.24799169923196304 -[S]C(NCCNC(S[Mn])=S)=S,0.2525424903682367 -O=C(C2=CN(C)N=C2C(F)F)NC1=C(C3CC3C4CC4)C=CC=C1,0.2021971466240455 -CS(NC1=C(Cl)C=C(Cl)C(N2C(N(C(F)F)C(C)=N2)=O)=C1)(=O)=O,0.1730416993562668 -O=C(N(COCC)C1=C(CC)C=CC=C1C)CCl,0.2557761861991325 -CC(CCCCCC)C1=C(OC(/C=C\C)=O)C([N+]([O-])=O)=CC([N+]([O-])=O)=C1,0.19484459853450517 -CS(C(C=C2Cl)=CC=C2C(C(C(CCC1)=O)C1=O)=O)(=O)=O,0.2189994026791292 -O=C1C(C2=CC=CC=C2)=NN=C(C)N1N,0.36891864539658303 -ClC1=CC(Cl)=C(OCC(O)=O)C=C1,0.33930903289506065 -C[N+](C)(C)CCCl.[Cl-],0.47447507557122687 -CC1(C(C1C(=O)OC(C#N)C2=CC(=CC=C2)OC3=CC=CC=C3)C=C(Cl)Cl)C,0.18015976856532 -O=C(NC2=CC=C(C(C(F)(F)F)(F)C(F)(F)F)C=C2C)C1=CC=CC(I)=C1C(NC(C)(C)CS(C)(=O)=O)=O,0.115769562707734 -FC(C=C3)=CC=C3OC1=CC=NC2=C1C(Cl)=CC(Cl)=C2,0.25962686686321285 -O=C(OC)/C(C1=CC=CC=C1OC2=CC(OC3=CC=CC=C3C#N)=NC=N2)=C/OC,0.20427010160523304 -O=C(C2=CN(C)N=C2C(F)(F)F)NC1=C(C(C)CC(C)C)SC=C1,0.23093421710838027 -BrC1=NN(C3=C(Cl)C=CC=N3)C(C(NC2=C(C)C=C(C#N)C=C2C(NC)=O)=O)=C1,0.17901230859828976 -O=C1C(/C(CC)=N\OC/C=C/Cl)=C(O)CC(CC(C)SCC)C1,0.2389478027971563 -O=C(NC2=CC(OC(C)C)=CC=C2)C1=C(C(F)(F)F)C=CC=C1,0.2690918752347788 -O=C(N(C2=C(C)C=CC=C2C)CN1N=CC=C1)CCl,0.31323206744613685 -O=S(C1=C(CCC(F)(F)F)C=CC=C1)(NC(NC2=NC(C)=NC(OC)=N2)=O)=O,0.2098341392275743 -CN(C)S(N(SC(F)(Cl)Cl)C1=CC=C(C)C=C1)(=O)=O,0.25917417547047744 -ClC1=CC(NC(NC2=CC=CC=C2)=O)=CC=N1,0.37548404132262436 -CCCC1COC(C2=CC=C(Cl)C=C2Cl)(CN3N=CN=C3)O1,0.2805209905967611 -CSC(C1=C2C(N=NS2)=CC=C1)=O,0.4608228380460223 -ClC(Cl)(Cl)SN(C1=O)C(C2C1CC=CC2)=O,0.3260262207586085 -OC1=NOC(C)=C1,0.9991119005328597 -ClC(Cl)(Cl)SN(C1=O)C(C2C1CC=CC2)=O,0.3326798171006209 -ClC1=CC=CC=C1CN2C(C(C)(C)CO2)=O,0.41719152837532353 -O=C(N(CC)CC)C(OC1=CC=CC2=C1C=CC=C2)C,0.36852210915226874 -O=C(C2=CN(C)N=C2C(F)(F)F)NC1=C(C(C)CC(C)C)SC=C1,0.2782339965161208 -CCC1=C(C2=C(OC(C(C)(C)C)=O)N(CCOCC3)N3C2=O)C(CC)=CC(C)=C1,0.24968092026794356 -CCCCOCCOCCOCC1=C(CCC)C=C(OCO2)C2=C1,0.29547465787728056 -CN(C)S(N(SC(F)(Cl)Cl)C1=CC=C(C)C=C1)(=O)=O,0.2879713060783083 -ClC(Cl)(Cl)C(N1CCN(C(NC=O)C(Cl)(Cl)Cl)CC1)NC=O,0.22990526799413355 -CCC(C)N1C(=O)C(=C(NC1=O)C)Br,0.39446112244793224 -CCCCC(C#N)(C2=CC=C(C=C2)Cl)CN1C=NC=N1,0.3670674304254852 -CN(C)S(N(C=N3)N=C3S(N2C(C)=C(Br)C1=CC=C(F)C=C12)(=O)=O)(=O)=O,0.24018572189384213 -FC(O3)(F)OC(C3=CC=C2)=C2C1=CNC=C1C#N,0.4553054263341003 -O=C(OCCC)NCCCN(C)C.[H]Cl,0.5072793699625824 -O=C(C(C)(C)C)C(N2C=NC=N2)OC1=CC=C(Cl)C=C1,0.3880867710275115 -CCCCCCCCSC(OC1=C(C2=CC=CC=C2)N=NC(Cl)=C1)=O,0.3034972489425892 -CC1C(CC(C(O1)OC2C(C(C(C(C2O)O)O)O)O)N)N=C(C(=O)O)N,0.3057757345866624 -[O-][N+](C1=CC(Cl)=C(N)C(Cl)=C1)=O,0.5651787298028309 -O=C2NC1=C(C(N2C3CCCCC3)=O)CCC1,0.503640251987437 -O=C(OC)C1=CC=CC=C1S(NC(NC2=NC(C)=NC(OC)=N2)=O)(=O)=O,0.31203800675365617 -O=C(C2=CC=CN=C2OC3=CC=CC(C(F)(F)F)=C3)NC1=CC=C(F)C=C1F,0.3033262936121485 -ClC1=CC=CC(C2=CNC=C2C#N)=C1Cl,0.5061481392686851 -CC(CCCCCC)C1=C(OC(/C=C\C)=O)C([N+]([O-])=O)=CC([N+]([O-])=O)=C1,0.3315102548955885 -O=S(C1=C(S(CC)(=O)=O)C=CC=N1)(NC(NC2=NC(OC)=CC(OC)=N2)=O)=O,0.2804534946915948 -FC1=CC=CC(F)=C1C(NC(NC2=CC(Cl)=C(C(Cl)=C2F)F)=O)=O,0.32143014109471235 -C[N+](C)(C)CCCl.[Cl-],0.7907917926187115 -O=C(NC(NC2=CC=C(Cl)C=C2)=O)C1=C(F)C=CC=C1F,0.4023390123323988 -CN(C)C(=S)[S-].CN(C)C(=S)[S-].CN(C)C(=S)[S-].[Fe+3],0.30012414094866885 -O=C(N(C1=C(CC)C=CC=C1CC)COC)CCl,0.4670695574071115 -ClC(C(SCC(O)=O)=C3)=CC(F)=C3\N=C2/SC(N1CCCCN12)=O,0.33345926123075403 -O=P(O)(O)CCCl,0.9066120392542251 -C[N+](C)(C)CCCl.[Cl-],0.860381470369158 -CC(C2)OC(C)CN2C1CCCCCCCCCCC1,0.48316627385722294 -CC(COC2=CC=C(OC3=CC=CC=C3)C=C2)OC1=CC=CC=N1,0.4315900691721648 -[O-][N+](C(C(Cl)=C(Cl)C(Cl)=C1Cl)=C1Cl)=O,0.4774244272684517 -O=C(OCC)C(CC(OCC)=O)SP(OC)(OC)=S,0.43286371555320496 -CC1=CC(=CC(=C1)C(=O)N(C(C)(C)C)NC(=O)C2=C(C3=C(C=C2)OCCC3)C)C,0.3678012132205545 -ClC1=CC=C(Cl)C(C(O)=O)=N1,0.7812519531298828 -ClC1=CC=C(Cl)C(C(O)=O)=N1,0.7812519531298828 -O=C(N(C1=C(C)C=CC=C1CC)C(C)COC)CCl,0.5285529966699751 -O=C(C(C(C)3C)C3/C=C(C)/C)OCC1=CC=CC(OC2=CC=CC=C2)=C1,0.42802021191337764 -O=C(OCCC)NCCCN(C)C.[H]Cl,0.6674728552139242 -ClC1=NC=C(CN2/C(N(C)COC2)=N/[N+]([O-])=O)S1,0.5313410671453993 -CCCCC1=C(OS(=O)(N(C)C)=O)N=C(NCC)N=C1C,0.4930161419173511 -NC1=NC(N)=NC(NC2CC2)=N1,0.9387196585948812 -COCCN(C1=C(C)C=CC=C1C)C(CCl)=O,0.6139034987494355 -ClC(Cl)(Cl)C(N1CCN(C(NC=O)C(Cl)(Cl)Cl)CC1)NC=O,0.36784842879061364 -FC(F)(F)C1=CC=CC(OCC2=C(/C(C(OC)=O)=C\OC)C=CC=C2)=N1,0.4410333629488144 -O=C(OC)NS(C1=CC=C(N)C=C1)(=O)=O,0.7817895162025876 -ClC1=CC=C(CN(C(NC3=CC=CC=C3)=O)C2CCCC2)C=C1,0.5473855891134007 -CO\N=C(C4=NOCCO4)/C(C=CC=C3)=C3OC1=C(F)C(OC2=CC=CC=C2Cl)=NC=N1,0.39448424715427566 -COC(C1=C([N+]([O-])=O)C=CC(OC2=C(Cl)C=C(Cl)C=C2)=C1)=O,0.5465743293153008 -FC1=CC=CC(F)=C1C(OC3)=NC3C2=CC=C(C(C)(C)C)C=C2OCC,0.5202976892967504 -CC1=CC=C(C)C=C1C(C(N3)=O)=C(OC(OCC)=O)C23CCC(OC)CC2,0.5061016308843888 -CC1=CC=C(C)C=C1C(C(N3)=O)=C(OC(OCC)=O)C23CCC(OC)CC2,0.5061016308843888 -O=C(NC3=CC=C(OC(F)(F)F)C=C3)N/N=C(C2=CC=C(C(F)(F)F)C=C2)/CC1=CC=C(C#N)C=C1,0.394944816927872 -OC1=C(C2=CC=CC=C2)C=CC=C1,1.1750384237564568 -O=C(O)C1=NC(Cl)=C(Cl)C(N)=C1Cl,0.8282972172278201 -O=C(N(C1=C(C)C=CC=C1CC)COC(C)C)CCl,0.7047373288933002 -OC1(CN3N=CN=C3)C(C)(C)CC/C1=C\C2=CC=C(Cl)C=C2,0.6406279100538178 -CCOC1=NC(=NC(=N1)NC)NC(=O)NS(=O)(=O)C2=CC=CC=C2C(=O)OC,0.5116896474609399 -FC1=C(NC(NC(C3=C(F)C=CC=C3F)=O)=O)C=CC(OC2=C(Cl)C=C(C(F)(F)F)C=C2)=C1,0.4460202371248177 -CCOC1=CC=C(C=C1OCC)NC(OC(C)C)=O,0.8241033622809132 -CC1=NC(NC2=CC=CC=C2)=NC(C)=C1,1.1091497729605546 -O=C(O)CC2=CC=CC1=CC=CC=C12,1.205650068257516 -CP(CCC(N)C(O)=O)(O)=O,1.2637552440957067 -FC1=C(NC(NC(C3=C(F)C=CC=C3F)=O)=O)C=CC(OC2=C(Cl)C=C(C(F)(F)F)C=C2)=C1,0.4705718098105875 -O=S(NC(N1N=C(OC)N(C)C1=O)=O)(C2=C(C)SC=C2C(OC)=O)=O,0.5993972829782238 -FC1=CC=C2C(SC(C(C)NC(C(C(C)C)NC(OC(C)C)=O)=O)=N2)=C1,0.6543197874203039 -O=C(OC)NC2=NC1=CC=CC=C1N2,1.3076226134187396 -CC(C)OC(/C=C(C)/C=C/CC(C)CCCC(C)(C)OC)=O,0.8052269925229198 -O=C(O)C1=NC(Cl)=C(Cl)C(N)=C1Cl,1.0353715215347752 -BrC1=C(C)C(C(C2=C(C)C=C(OC)C(OC)=C2OC)=O)=C(OC)C=C1,0.6352758309016929 -C[N+]1(C)CCCCC1.[Cl-],1.790706021930536 -CC1(C(NC2=CC=C(O)C(Cl)=C2Cl)=O)CCCCC1,0.9662594125910484 -OC(CNCP(O)(O)=O)=O,1.7743806406081915 -COC1=CC(OC)=NC(NC(NS(CC2=CC=CC=C2C(O)=O)(=O)=O)=O)=N1,0.7795645307119917 -CCOC1=NC(=CC2=NC(=NN21)S(=O)(=O)NC3=C(C=CC=C3Cl)C(=O)OC)F,0.7561469746838736 -COC(/C(C(C=CC=C2)=C2COC1=C(C)C=CC=C1)=N/OC)=O,1.1967534090558043 -O=C/1CC(C(O)=O)CC(C1=C(C2CC2)\O)=O,1.752821172367082 -CC1=C(OC)C=CC=C1C(NN(C(C)(C)C)C(C2=CC(C)=CC(C)=C2)=O)=O,1.1154252951100516 -O=C([O-])C(CC([O-])=C1C(CC)=O)CC1=O.[Ca+2],1.874040503249802 -ClC(C=C2)=CC=C2N1C(CC)=C(C(O)=O)C(C=N1)=O,1.6864553664875628 -C/C(C1=NC=CC=C1C(O)=O)=N\NC(NC2=CC(F)=CC(F)=C2)=O,1.4120001283962829 -O=[N+]([O-])C1=CC(C(F)(F)F)=CC([N+]([O-])=O)=C1N(CCC)CCC,1.4316432834082535 -CN(NC(CCC(O)=O)=O)C,3.0342556221759884 -O=S(N(C)S(C)(=O)=O)(NC(NC1=NC(OC)=CC(OC)=N1)=O)=O,1.3411855059279478 -FC1=CC=CC(F)=C1NS(C3=NN2C(OC)=NC=C(F)C2=N3)(=O)=O,1.391657397996453 -OC(COC1=C(C(N)=C(C(F)=N1)Cl)Cl)=O,1.9605490478397496 -O=C(O)C2=C(N=C(C=CC=C3)C3=C2)C(N1)=NC(C)(C(C)C)C1=O,1.605986191473768 -OC1=CC=C(O)N=N1,4.460830164062196 -CN(C)C(C(C=CC=C2)=C2NS(NC(NC1=NC(OC)=CC(OC)=N1)=O)(=O)=O)=O,1.1780461209768547 -O=S(C1=C(C(OC)=O)C=CC=C1)([N-]C(N2N=C(OCCC)N(C)C2=O)=O)=O.[Na+],1.1894202967675005 -COC1=CC=CC(OC)=C1C(NC2=CC(C(CC)(C)CC)=NO2)=O,1.5854670852219546 -OC1=C(C2=CC=CC=C2)C=CC=C1,3.119727015073393 -OC(CNCP(O)(O)=O)=O,3.3121771958019575 -COC1=CC=CC(OC)=C1C(NC2=CC(C(CC)(C)CC)=NO2)=O,1.8050858655278421 -O=C(OCCC)NCCCN(C)C.[H]Cl,3.0347765817059753 -C[N+]1(C)CCCCC1.[Cl-],4.570309399255547 -ClC1=CC=C(C=C(C)C=N2)C2=C1C(O)=O,3.3387517363764943 -ClC1=C(CC(C3(CC3)Cl)(O)CN2N=CNC2=S)C=CC=C1,2.178589749473798 -COC(/C(C(C=CC=C2)=C2COC1=C(C)C=CC=C1)=N/OC)=O,2.4002085592886893 -OC(CNCP(O)(O)=O)=O,5.559726007239 -[O][N](N=C(NC)NCC1COCC1)=O,4.900819965040488 -OC(C1=NC(Cl)=CC(N)=C1Cl)=O,4.830587434212229 -FC(F)(F)OC(C=CC=C2)=C2S(NC(N1C(N(C)C(OC)=N1)=O)=O)(=O)=O,2.5233463155295692 -OC1=C(C2=CC=CC=C2)C=CC=C1,5.875192118782284 -O=S(NC1=NN2C(N=C(OC)C=C2OC)=N1)(C3=C(OC)N=CC=C3C(F)(F)F)=O,2.302288500094267 -CC(C)OC(NC(C(C)C)C(NC(C1=CC=C(Cl)C=C1)CC(OC)=O)=O)=O,2.5070128670931195 -OC(CNCP(O)(O)=O)=O,7.180326992327815 -O=S(C1=C(S(CC)(=O)=O)N=C2N1C=CC=C2)(NC(NC3=NC(OC)=CC(OC)=N3)=O)=O,2.7556956072872962 -[O-]P(OCC)([H])=O.[O-]P(OCC)([H])=O.[O-]P(OCC)([H])=O.[Al+3],3.6853523538557287 -O=S(C1=C(C(N(C)C)=O)C=CC=N1)(NC(NC2=NC(OC)=CC(OC)=N2)=O)=O,5.08765706618306 diff --git a/paper/figure/corr-1.png b/paper/figure/corr-1.png deleted file mode 100644 index 8c26052..0000000 Binary files a/paper/figure/corr-1.png and /dev/null differ diff --git a/paper/figure/crossvalidation.pdf b/paper/figure/crossvalidation.pdf new file mode 100644 index 0000000..19a9913 Binary files /dev/null and b/paper/figure/crossvalidation.pdf differ diff --git a/paper/figure/functional-groups.pdf b/paper/figure/functional-groups.pdf new file mode 100644 index 0000000..65ca730 Binary files /dev/null and b/paper/figure/functional-groups.pdf differ diff --git a/paper/figure/predictions-1.png b/paper/figure/predictions-1.png deleted file mode 100644 index 759c327..0000000 Binary files a/paper/figure/predictions-1.png and /dev/null differ diff --git a/paper/figure/test-correlation.pdf b/paper/figure/test-correlation.pdf new file mode 100644 index 0000000..835c927 Binary files /dev/null and b/paper/figure/test-correlation.pdf differ diff --git a/paper/figure/test-prediction.pdf b/paper/figure/test-prediction.pdf new file mode 100644 index 0000000..f02330e Binary files /dev/null and b/paper/figure/test-prediction.pdf differ diff --git a/paper/figure/unnamed-chunk-2-1.png b/paper/figure/unnamed-chunk-2-1.png deleted file mode 100644 index 8c26052..0000000 Binary files a/paper/figure/unnamed-chunk-2-1.png and /dev/null differ diff --git a/paper/figure/unnamed-chunk-5-1.png b/paper/figure/unnamed-chunk-5-1.png deleted file mode 100644 index 50699fd..0000000 Binary files a/paper/figure/unnamed-chunk-5-1.png and /dev/null differ diff --git a/paper/functional-groups-reduced.csv b/paper/functional-groups-reduced.csv deleted file mode 100644 index 525acbf..0000000 --- a/paper/functional-groups-reduced.csv +++ /dev/null @@ -1,34 +0,0 @@ -Alkene,39,30 -Alkyne,5,8 -Alcohol,44,27 -Dialkylether,35,32 -Amine,66,41 -Aldehyde,3,1 -Ketone,21,25 -Enol,4,5 -Carboxylic_acid,33,38 -Lactone,11,10 -Carboxylic_acid_derivative,215,227 -Amide,38,60 -Lactam,12,18 -Amidine,3,4 -Nitrile,35,39 -Vinylogous_carbonyl_or_carboxyl_derivative,70,97 -Vinylogous_ester,113,120 -Carbonic_acid_derivatives,109,131 -Phenol,27,9 -Arylchloride,142,163 -Arylfluoride,22,41 -Heteroaromatic,147,205 -Nitro,42,31 -Sulfonic_derivative,24,29 -Sulfenic_derivative,48,34 -Phosphoric_acid_derivative,70,44 -Aromatic,402,396 -Heterocyclic,228,272 -Epoxide,11,2 -Annelated_rings,97,73 -Conjugated_double_bond,207,213 -Trifluoromethyl,44,63 -C_ONS_bond,520,484 -Salt,26,45 \ No newline at end of file diff --git a/paper/functional-groups-reduced4R.csv b/paper/functional-groups-reduced4R.csv deleted file mode 100644 index e37c41d..0000000 --- a/paper/functional-groups-reduced4R.csv +++ /dev/null @@ -1,68 +0,0 @@ -Alkene, 39, Mazzatorta -Alkene, 30, Swiss Federal Office -Alkyne, 5, Mazzatorta -Alkyne, 8, Swiss Federal Office -Alcohol, 44, Mazzatorta -Alcohol, 27, Swiss Federal Office -Dialkylether, 35, Mazzatorta -Dialkylether, 32, Swiss Federal Office -Amine, 66, Mazzatorta -Amine, 41, Swiss Federal Office -Aldehyde, 3, Mazzatorta -Aldehyde, 1, Swiss Federal Office -Ketone, 21, Mazzatorta -Ketone, 25, Swiss Federal Office -Enol, 4, Mazzatorta -Enol, 5, Swiss Federal Office -Carboxylic_acid, 33, Mazzatorta -Carboxylic_acid, 38, Swiss Federal Office -Lactone, 11, Mazzatorta -Lactone, 10, Swiss Federal Office -Carboxylic_acid_derivative, 215, Mazzatorta -Carboxylic_acid_derivative, 227, Swiss Federal Office -Amide, 38, Mazzatorta -Amide, 60, Swiss Federal Office -Lactam, 12, Mazzatorta -Lactam, 18, Swiss Federal Office -Amidine, 3, Mazzatorta -Amidine, 4, Swiss Federal Office -Nitrile, 35, Mazzatorta -Nitrile, 39, Swiss Federal Office -Vinylogous_carbonyl_or_carboxyl_derivative, 70, Mazzatorta -Vinylogous_carbonyl_or_carboxyl_derivative, 97, Swiss Federal Office -Vinylogous_ester, 113, Mazzatorta -Vinylogous_ester, 120, Swiss Federal Office -Carbonic_acid_derivatives, 109, Mazzatorta -Carbonic_acid_derivatives, 131, Swiss Federal Office -Phenol, 27, Mazzatorta -Phenol, 9, Swiss Federal Office -Arylchloride, 142, Mazzatorta -Arylchloride, 163, Swiss Federal Office -Arylfluoride, 22, Mazzatorta -Arylfluoride, 41, Swiss Federal Office -Heteroaromatic, 147, Mazzatorta -Heteroaromatic, 205, Swiss Federal Office -Nitro, 42, Mazzatorta -Nitro, 31, Swiss Federal Office -Sulfonic_derivative, 24, Mazzatorta -Sulfonic_derivative, 29, Swiss Federal Office -Sulfenic_derivative, 48, Mazzatorta -Sulfenic_derivative, 34, Swiss Federal Office -Phosphoric_acid_derivative, 70, Mazzatorta -Phosphoric_acid_derivative, 44, Swiss Federal Office -Aromatic, 402, Mazzatorta -Aromatic, 396, Swiss Federal Office -Heterocyclic, 228, Mazzatorta -Heterocyclic, 272, Swiss Federal Office -Epoxide, 11, Mazzatorta -Epoxide, 2, Swiss Federal Office -Annelated_rings, 97, Mazzatorta -Annelated_rings, 73, Swiss Federal Office -Conjugated_double_bond, 207, Mazzatorta -Conjugated_double_bond, 213, Swiss Federal Office -Trifluoromethyl, 44, Mazzatorta -Trifluoromethyl, 63, Swiss Federal Office -C_ONS_bond, 520, Mazzatorta -C_ONS_bond, 484, Swiss Federal Office -Salt, 26, Mazzatorta -Salt, 45, Swiss Federal Office diff --git a/paper/functional-groups.R b/paper/functional-groups.R index 765817a..47cec0b 100755 --- a/paper/functional-groups.R +++ b/paper/functional-groups.R @@ -1,7 +1,8 @@ library("ggplot2") -#functional_groups <- read.csv("functional-groups-reduced.csv",header=F,row.names = 1) -functional_groups <- read.csv("functional-groups-reduced4R.csv",header=F) -print(functional_groups) -ggplot(functional_groups,aes(x=V1,y=V2,fill=V3),legendTitle="Dataset") + geom_bar(stat="identity", position=position_dodge()) + xlab("") + ylab("") + coord_flip() -ggsave("functional-groups.pdf") +functional_groups <- read.csv("data/functional-groups-reduced4R.csv",header=F) + +names(functional_groups) = c("V1","V2","Dataset") + +ggplot(functional_groups,aes(x=V1,y=V2,fill=Dataset)) + geom_bar(stat="identity", position=position_dodge()) + xlab("") + ylab("") + coord_flip() +ggsave("figure/functional-groups.pdf") diff --git a/paper/functional-groups.csv b/paper/functional-groups.csv deleted file mode 100644 index 0e2b4ca..0000000 --- a/paper/functional-groups.csv +++ /dev/null @@ -1,138 +0,0 @@ -Primary_carbon, 303, 287 -Secondary_carbon, 147, 149 -Tertiary_carbon, 91, 73 -Quaternary_carbon, 58, 60 -Alkene, 39, 30 -Alkyne, 5, 8 -Alkylchloride, 71, 41 -Alkylfluoride, 52, 74 -Alkylbromide, 5, 3 -Alcohol, 44, 27 -Primary_alcohol, 12, 2 -Secondary_alcohol, 23, 10 -Tertiary_alcohol, 18, 21 -Dialkylether, 35, 32 -Dialkylthioether, 14, 6 -Alkylarylether, 62, 90 -Diarylether, 35, 47 -Alkylarylthioether, 11, 10 -Amine, 66, 41 -Primary_aliph_amine, 8, 3 -Secondary_aliph_amine, 2, 2 -Tertiary_aliph_amine, 16, 9 -Quaternary_aliph_ammonium, 1, 6 -Primary_arom_amine, 26, 15 -Secondary_arom_amine, 2, 1 -Secondary_mixed_amine, 8, 3 -Tertiary_mixed_amine, 14, 7 -Ammonium, 1, 6 -Dialkylthioether, 10, 4 -Alkylarylthioether, 10, 10 -Disulfide, 4, 2 -1,2-Diol, 12, 3 -Organometallic_compounds, 1, 5 -Aldehyde, 3, 1 -Ketone, 21, 25 -Oximether, 7, 15 -Acetal, 13, 13 -Halogen_acetal_like, 13, 14 -Acetal_like, 34, 30 -NOS_methylen_ester_and_similar, 18, 19 -Hetero_methylen_ester_and_similar, 19, 19 -Chloroalkene, 39, 21 -Bromoalkene, 1, 2 -Enol, 4, 5 -Enolether, 2, 4 -Thioenolether, 1, 1 -Carboxylic_acid, 33, 38 -Lactone, 11, 10 -Carboxylic_acid_derivative, 215, 227 -Carbothioic_S_ester, 1, 1 -Amide, 38, 60 -Primary_amide, 2, 1 -Secondary_amide, 22, 36 -Tertiary_amide, 15, 23 -Lactam, 12, 18 -Alkyl_imide, 5, 4 -N_hetero_imide, 6, 3 -Amidine, 3, 4 -Hydroxamic_acid_ester, 1, 1 -Imidolactone, 13, 32 -Imidothiolactone, 1, 1 -Amidine, 3, 5 -Imidolactam, 3, 2 -Imidoylhalide_cyclic, 10, 19 -Alpha_aminoacid, 1, 1 -Nitrile, 35, 39 -Vinylogous_carbonyl_or_carboxyl_derivative, 70, 97 -Vinylogous_acid, 9, 8 -Vinylogous_ester, 113, 120 -Vinylogous_amide, 19, 24 -Vinylogous_halide, 11, 27 -Carbonic_acid_derivatives, 109, 131 -Urea, 23, 21 -Thiourea, 4, 4 -Isothiourea, 1, 3 -Guanidine, 6, 7 -Urethan, 34, 35 -Biuret, 1, 4 -Semicarbazone, 1, 3 -Phenol, 27, 9 -Arylchloride, 142, 163 -Arylfluoride, 22, 41 -Arylbromide, 4, 8 -Aryliodide, 1, 4 -Oxoarene, 32, 29 -Thioarene, 1, 2 -Hetero_N_basic_H, 17, 11 -Hetero_N_basic_no_H, 59, 90 -Hetero_N_nonbasic, 127, 191 -Hetero_O, 10, 12 -Hetero_S, 17, 18 -Heteroaromatic, 147, 205 -Nitro, 42, 31 -Sulfon, 3, 14 -Sulfoxide, 2, 3 -Sulfuric_derivative, 5, 8 -Sulfonamide, 6, 9 -Sulfonic_derivative, 24, 29 -Sulfenic_derivative, 48, 34 -Phosphonic_acid, 5, 5 -Phosphonic_monoester, 1, 1 -Phosphonic_diester, 2, 1 -Phosphonic_acid_derivative, 10, 7 -Phosphoric_triester, 10, 3 -Phosphoric_diestermonoamide, 3, 2 -Phosphoric_acid_derivative, 70, 44 -Phosphinic_acid, 1, 1 -Phosphinic_acid_derivative, 1, 1 -Quart_silane, 1, 2 -Aromatic, 402, 396 -Heterocyclic, 228, 272 -Epoxide, 11, 2 -Spiro, 3, 10 -Annelated_rings, 97, 73 -Bridged_rings, 8, 4 -Sugar_pattern_1, 10, 9 -Sugar_pattern_2, 6, 7 -Sugar_pattern_combi, 4, 5 -Sugar_pattern_2_alpha, 1, 1 -Sugar_pattern_2_beta, 1, 1 -Conjugated_double_bond, 207, 213 -Conjugated_tripple_bond, 10, 19 -Cis_double_bond, 31, 19 -Trans_double_bond, 31, 19 -Halogen_multi_subst, 1, 6 -Trifluoromethyl, 44, 63 -C_ONS_bond, 520, 484 -Charged, 27, 51 -Anion, 26, 45 -Kation, 25, 51 -Salt, 26, 45 -1,3-Tautomerizable, 265, 296 -1,5-Tautomerizable, 124, 148 -Rotatable_bond, 488, 462 -Michael_acceptor, 33, 33 -CH-acidic, 60, 73 -CH-acidic_strong, 1, 4 -Chiral_center_specified, 1, 2 diff --git a/paper/loael-dataset-comparison-all-compounds.pdf b/paper/loael-dataset-comparison-all-compounds.pdf deleted file mode 100644 index ee34390..0000000 Binary files a/paper/loael-dataset-comparison-all-compounds.pdf and /dev/null differ diff --git a/paper/loael-dataset-comparison-common-compounds.pdf b/paper/loael-dataset-comparison-common-compounds.pdf deleted file mode 100644 index 4dba051..0000000 Binary files a/paper/loael-dataset-comparison-common-compounds.pdf and /dev/null differ diff --git a/paper/loael-dataset-comparison.rb b/paper/loael-dataset-comparison.rb deleted file mode 100644 index 5850236..0000000 --- a/paper/loael-dataset-comparison.rb +++ /dev/null @@ -1,75 +0,0 @@ -require_relative '../../lazar/lib/lazar' -include OpenTox - -old = Dataset.from_csv_file File.join(File.dirname(__FILE__),"..","regression","LOAEL_mg_corrected_smiles_mmol.csv") -new = Dataset.from_csv_file File.join(File.dirname(__FILE__),"..","regression","swissRat_chron_LOAEL_mmol.csv") - -combined_compounds = old.compound_ids & new.compound_ids - -compound_vector = [] -value_vector = [] -dataset_vector = [] - -old_median = [] -new_median = [] - -errors = [] -combined_compounds.each do |cid| - c = Compound.find cid - old_values = old.values(c,old.features.first) - old_median << -Math.log(old_values.mean) - old_values.each do |v| - compound_vector << c.smiles - value_vector << -Math.log(v.to_f) - dataset_vector << old.name - end - new_values = new.values(c,new.features.first) - new_median << -Math.log(new_values.mean) - new_values.each do |v| - compound_vector << c.smiles - value_vector << -Math.log(v) - dataset_vector << new.name - end -end -old_median.each_index do |i| - errors[i] = (old_median[i] - new_median[i]).abs unless old_median[i] == new_median[i] -end -rmse = 0 -mae = 0 -errors.compact.each do |e| - rmse += e**2 - mae += e -end -rmse = Math.sqrt(rmse/errors.size) -mae = mae/errors.size - -=begin -R.assign "smi", compound_vector -R.assign "values", value_vector -R.assign "dataset", dataset_vector -R.eval "df <- data.frame(factor(smi),values,factor(dataset))" -R.eval "df$smi <- reorder(df$factor.smi,df$values)" -R.eval "img <- ggplot(df, aes(smi,values,ymin = min(values), ymax=max(values),color=dataset))" -R.eval "img <- img + ylab('-log(LOAEL mg/kg_bw/day)') + xlab('Compound') + theme(axis.text.x = element_blank())" -R.eval "img <- img + geom_point()" - -R.eval "ggsave(file='/home/ch/opentox/lazar-nestec-data/loael-dataset-comparison-mmol_kg_day.svg', plot=img,width=12, height=8)" -=end -#img <- ggplot(data, aes(SMILES,LOAEL,ymin = min(LOAEL), ymax=max(LOAEL),color=Dataset)) - -#img <- img + ylab('-log(LOAEL mg/kg_bw/day)') + xlab('Compound') + theme(axis.text.x = element_blank()) - -#img <- img + geom_point() - -#print(img) - -R.assign "Mazzatorta", old_median -R.assign "SwissFederalOffice", new_median -R.eval "df <- data.frame(Mazzatorta,SwissFederalOffice)" -R.eval "ggplot(df, aes(Mazzatorta,SwissFederalOffice)) + geom_point() + geom_abline(intercept=0.0) " -R.eval "ggsave(file='/home/ch/src/lazar-nestec-data/paper/loael-dataset-correlation.pdf')" - -puts "Correlation Mazzatorta/SwissFederalOffice:" -puts "\tr^2: #{R.eval("cor(Mazzatorta,SwissFederalOffice,use='complete')").to_f**2}" -puts "\tRMSE: #{rmse}" -puts "\tMAE: #{mae}" diff --git a/paper/loael-dataset-correlation.pdf b/paper/loael-dataset-correlation.pdf deleted file mode 100644 index 33dcfdf..0000000 Binary files a/paper/loael-dataset-correlation.pdf and /dev/null differ diff --git a/paper/loael.Rmd b/paper/loael.Rmd index a94e88a..29456a6 100644 --- a/paper/loael.Rmd +++ b/paper/loael.Rmd @@ -8,9 +8,8 @@ keywords: (Q)SAR, read-across, LOAEL date: \today abstract: " " documentclass: achemso -bibliography: references.bib +bibliography: references.bibtex bibliographystyle: achemso -biblio-style: achemso output: pdf_document: fig_caption: yes @@ -21,14 +20,36 @@ Introduction Christoph + Elena + Benoit -The main objectives of this study are +The quality and reproducibility of (Q)SAR and read-across predictions is a controversial topic in the toxicological risk-assessment community. Although model predictions can be validated with various procedures it is rarely possible to put the results into the context of experimental variability, because replicate experiments are rarely available. -- to investigate the experimental variability of LOAEL data +With missing information about the variability of experimental toxicity data it is hard to judge the performance of predictive models and it is tempting for model developments to use aggressive model optimisation methods that lead to impressive validation results, but also to overfitted models with little practical relevance. -- develop predictive model for lowest observed effect levels +In this study we intent to compare model predictions with experimental variability with chronic oral rat lowest adverse effect levels (LOAEL) as toxicity endpoint. +We are using two datasets, one from [@mazzatorta08] (*Mazzatorta* dataset) and one from the Swiss Federal Office of TODO (*Swiss Federal Office* dataset). -- compare the performance of model predictions with experimental - variability +Elena: do you have a reference and the name of the department? + +```{r echo=F} +t = read.csv("data/test.csv") +``` + +`r length(unique(t$SMILES))` compounds are common in both datasets and we use them as a test set in our investigation. For this test set we will + +- compare the structural diversity of both datasets +- compare the LOAEL values in both datasets +- build prediction models based on the Mazzatorta, Swiss Federal Office datasets and a combination of both +- predict LOAELs of the training set +- compare predictions with experimental variability + +With this investigation we also want to support the idea of reproducible research, by providing all datasets and programs that have been used to generate this manuscript under a TODO license. + +A self-contained docker image with all program dependencies required for the reproduction of these results is available from TODO. + +Source code and datasets for the reproduction of this manuscript can be downloaded from the GitHub repository TODO. The lazar framework [@Maunz2013] is also available under a GPL License from https://github.com/opentox/lazar. + +TODO: github tags + +Elena: please check if this is publication strategy is ok for the Swiss Federal Office Materials and Methods ===================== @@ -36,69 +57,87 @@ Materials and Methods Datasets -------- +```{r echo=F} +m = read.csv("data/mazzatorta.csv",header=T) +s = read.csv("data/swiss.csv",header=T) +t = read.csv("data/test.csv",header=T) +c = read.csv("data/combined.csv",header=T) +``` + ### Mazzatorta dataset -Just referred to the paper 2008. +The first dataset (*Mazzatorta* dataset for further reference) originates from +the publication of [@mazzatorta08]. It contains chronic (> 180 days) lowest +observed effect levels (LOAEL) for rats (*Rattus norvegicus*) after oral +(gavage, diet, drinking water) administration. The Mazzatorta dataset consists +of `r length(m$SMILES)` LOAEL values for `r length(unique(m$SMILES))` unique +chemical structures. ### Swiss Federal Office dataset Elena + Swiss Federal Office contribution (input) -Only rat LOAEL values were used for the current investigation, because -they correspond directly to the Mazzatorta dataset. +The Swiss Federal Office dataset consists of `r length(s$SMILES)` LOAEL values +for `r length(unique(s$SMILES))` unique chemical structures. ### Preprocessing -Christoph +Chemical structures in both datasets were initially represented as SMILES strings +[@doi:10.1021/ci00057a005]. Syntactically incorrect and missing SMILES were +generated from other identifiers (e.g names, CAS numbers). Unique smiles from the OpenBabel library [@OBoyle2011] were used for the identification of duplicated structures. + +Studies with undefined or empty LOAEL entries were removed from the datasets. LOAEL values were converted to mmol/kg_bw/day units. For prediction, validation and visualisation purposes -log10 transformations are used. -Chemical structures in both datasets are represented as SMILES strings -(Weininger 1988). Syntactically incorrect and missing SMILES were -generated from other identifiers (e.g names, CAS numbers) when possible. -Studies with undefined (“0”) or empty LOAEL entries were removed for -this study. +David: please check if we have missed something + +### Derived datasets + +Two derived datasets were obtained from the original datasets: + +The *test* dataset contains data of compounds that occur in both datasets. Exact duplications of LOAEL values were removed, because it is very likely that they originate from the same study. +The test dataset has `r length(t$SMILES)` LOAEL values for `r length(unique(t$SMILES))` unique chemical structures. + +The *combined* dataset is the union of the Mazzatorta and the Swiss Federal Office dataset and it is used to build predictive models. Exact LOAEL duplications were removed, as for the test dataset. +The combined dataset has `r length(c$SMILES)` LOAEL values for `r length(unique(c$SMILES))` unique chemical structures. Algorithms ---------- -Christoph - -For this study we are using the modular lazar (*la*zy *s*tructure -*a*ctivity *r*elationships) framework (Maunz et al. 2013) for model +In this study we are using the modular lazar (*la*zy *s*tructure +*a*ctivity *r*elationships) framework [@Maunz2013] for model development and validation. lazar follows the following basic workflow: For a given chemical -structure it searches in a database for similar structures (neighbors) -with experimental data, builds a local (Q)SAR model with these neighbors -and uses this model to predict the unknown activity of the query -compound. This procedure resembles an automated version of *read across* +structure lazar + +- searches in a database for similar structures (*neighbors*) +with experimental data, +- builds a local QSAR model with these neighbors +and +- uses this model to predict the unknown activity of the query +compound. + +This procedure resembles an automated version of *read across* predictions in toxicology, in machine learning terms it would be classified as a *k-nearest-neighbor* algorithm. Apart from this basic workflow lazar is completely modular and allows -the researcher to use any algorithm for neighbor identification and -local (Q)SAR modelling. Within this study we are using the following +the researcher to use any algorithm for similarity searches and +local QSAR modelling. Within this study we are using the following algorithms: ### Neighbor identification -Christoph - -Similarity calculations are based on MolPrint2D fingerprints (Bender et -al. 2004) from the OpenBabel chemoinformatics library (OBoyle et al. -2011). +Similarity calculations are based on MolPrint2D fingerprints [@doi:10.1021/ci034207y] from the OpenBabel chemoinformatics library [@OBoyle2011]. The MolPrint2D fingerprint uses atom environments as molecular representation, which resemble basically the chemical concept of functional groups. For each atom in a molecule it represents the -chemical environment with the atom types of connected atoms. +chemical environment using the atom types of connected atoms. -The main advantage of MolPrint2D fingerprints over fingerprints with -predefined substructures (such as OpenBabel FP3, FP4 or MACCs -fingerprints) is that it may capture substructures of toxicological -relevance that are not included in predefined substructure lists. +MolPrint2D fingerprints are generated dynamically from chemical structures and do not rely on predefined lists of fragments (such as OpenBabel FP3, FP4 or MACCs fingerprints or lists of toxocophores/toxicophobes). This has the advantage the they may capture substructures of toxicological relevance that are not included in other fingerprints. Preliminary experiments have shown that predictions with MolPrint2D -fingerprints are indeed more accurate than fingerprints with predefined -substructures. +fingerprints are indeed more accurate than other OpenBabel fingerprints. From MolPrint2D fingerprints we can construct a feature vector with all atom environments of a compound, which can be used to calculate chemical @@ -106,27 +145,38 @@ similarities. [//]: # https://openbabel.org/docs/dev/FileFormats/MolPrint2D_format.html#molprint2d-format -The chemical similarity between two compounds is expressed as the -proportion between atom environments common in both structures and the -total number of atom environments (Jaccard/Tanimoto index, [@eq:jaccard]). +The chemical similarity between two compounds A and B is expressed as the +proportion between atom environments common in both structures $A \cap B$ and the +total number of atom environments $A \cup B$ (Jaccard/Tanimoto index, [@eq:jaccard]). $$ sim = \frac{|A \cap B|}{|A \cup B|} $$ {#eq:jaccard} -$A$ atom environments of compound A, $B$ atom environments of compound B. +A threshold of $sim < 0.1$ is used for the identification of neighbors for local QSAR models. +Compounds with the same structure as the query structure are eliminated from the neighbors to obtain an unbiased prediction. -### Local (Q)SAR models +### Local QSAR models and predictions -Christoph +Only similar compounds (*neighbors*) are used for local QSAR models. +In this investigation we are using a weighted partial least squares regression (PLS) algorithm for the prediction of quantitative properties. +First all fingerprint features with identical values across all neighbors are removed. +The reamining set of features is used as descriptors for creating a local weighted PLS model with atom environments as descriptors and model similarities as weights. The `plsr` function of the `pls` R package [@pls] is used for this purpose. +Finally the local PLS model is applied to predict the activity of the query compound. -As soon as neighbors for a query compound have been identified, we can -use their experimental LOAEL values to predict the activity of the -untested compound. In this case we are using the weighted mean of the +If PLS modelling or prediction fails, the program resorts to using the weighted mean of the neighbors LOAEL values, where the contribution of each neighbor is weighted by its similarity to the query compound. ### Validation -Christoph +Two types of validations are used within this study: + +For the comparison of experimental variability with predictive accuracies we are using a test set of compounds that occur in both datasets. The *Mazzatorta*, *Swiss Federal Office* and *combined* datasets are used as training data for read across predictions. In order to obtain unbiased predictions *all* information from the test compound is removed from the training set prior to predictions. This procedure is hardcoded into the prediction algorithm in order to prevent validation errors. + +TODO: treatment of duplicates + +In addition traditional 10-fold crossvalidation results are provided. + +Christoph: check if these specifications have changed at submission Results ======= @@ -147,7 +197,7 @@ baseline for evaluating prediction performance. Martin CheS-Mapper (Chemical Space Mapping and Visualization in 3D, -http://ches-mapper.org/, (Gutlein, Karwath, and Kramer 2012)) can be +http://ches-mapper.org/, @Gütlein2012) can be used to analyze the relationship between the structure of chemical compounds, their physico-chemical properties, and biological or toxic effects. CheS-Mapper embeds a dataset into 3D space, such that compounds @@ -164,7 +214,7 @@ Christoph datasets. A complete table for 138 functional groups from OpenBabel FP4 fingerprints can be found in the appendix. -![Frequency of functional groups.](functional-groups.pdf){#fig:fg} +![Frequency of functional groups.](figure/functional-groups.pdf){#fig:fg} ### Experimental variability versus prediction uncertainty @@ -179,6 +229,8 @@ substantial overlap of compounds, with LOAEL values in both datasets. ##### Intra dataset variability +TODO: read data from files + The Mazzatorta dataset has 562 LOAEL values with 439 unique structures, the Swiss Federal Office dataset has 493 rat LOAEL values with 381 unique structures. [@fig:intra] shows the intra-dataset variability, where @@ -188,7 +240,7 @@ similar in both datasets (p-value: 0.48). [//]: # p-value: 0.4750771581019402 -![Intra dataset variability: Each vertical line represents a compound, dots are individual LOAEL values.](loael-dataset-comparison-all-compounds.pdf){#fig:intra} +[//]: # ![Intra dataset variability: Each vertical line represents a compound, dots are individual LOAEL values.](loael-dataset-comparison-all-compounds.pdf){#fig:intra} ##### Inter dataset variability @@ -196,30 +248,29 @@ similar in both datasets (p-value: 0.48). and Swiss Federal Office datasets. Obviously the experimental variability is larger than for individual datasets. -![Inter dataset variability](loael-dataset-comparison-common-compounds.pdf){#fig:inter} +[//]: # ![Inter dataset variability](loael-dataset-comparison-common-compounds.pdf){#fig:inter} ##### LOAEL correlation between datasets -[@fig:corr-1] depicts the correlation between LOAEL data from both datasets +[@fig:corr] depicts the correlation between LOAEL data from both datasets (using means for multiple measurements). Identical values were removed from analysis. [//]: # MAE: 0.801626064534318 [//]: # with identical values -```{r fig.cap="Correlation of dataset medians (-log10(LOAEL [mmol/kg_bw])", fig.lp="fig:", echo=F} -library(ggplot2) + +```{r echo=F} data <- read.csv("data/common-median.csv",header=T) -print(qplot(-log10(mazzatorta),-log10(swiss),data=data,xlab="Mazzatorta",ylab="Swiss Federal Office") + geom_point() + geom_abline(intercept=0.0) ) cor <- cor.test(-log(data$mazzatorta),-log(data$swiss)) -median.r.square <- cor(-log(data$mazzatorta),-log(data$swiss),use='complete')**2 -median.rmse <- sqrt(mean((-log(data$mazzatorta)+log(data$swiss))^2)) +median.r.square <- round(cor(-log(data$mazzatorta),-log(data$swiss),use='complete')^2,2) +median.rmse <- round(sqrt(mean((-log(data$mazzatorta)+log(data$swiss))^2)),2) ``` Correlation analysis shows a significant correlation (p-value < 2.2e-16) with r\^2: `r round(median.r.square,2)`, RMSE: `r round(median.rmse,2)` -### Local (Q)SAR models +### Local QSAR models Christoph @@ -227,39 +278,38 @@ In order to compare the perfomance of in silico models with experimental variabi The Mazzatorta, the Swiss Federal Office dataset and a combined dataset were used as training data. Predictions for the test set compounds were made after eliminating all information from the test compound from the corresponding training dataset. [@tbl:common-pred] summarizes the results: +![Comparison of experimental with predicted LOAEL values, each vertical line represents a compound.](figure/test-prediction.pdf){#fig:comp} + ```{r echo=F} -validation <- read.csv("test-set-validation.csv",header=T) +source("test-correlation.R") ``` -Training data | Model prediction | Experimental variability ---------------|------------------|------------------------- -Mazzatorta | `r round(validation$rmse[1],2)` | `r round(mazzatorta.rmse,2)` -Swiss Federal Office |`r round(validation$rmse[2],2)` | `r round(swiss.rmse,2)` -Commmon | `r round(validation$rmse[3],2)`| `r common.rmse` -Combined | | `r combined.rmse` + +Training data | $r^2$ | RMSE +--------------|---------------------------|------------------------- +Experimental | `r median.r.square` | `r median.rmse` +Mazzatorta | `r mazzatorta.r_square` | `r mazzatorta.rmse` +Swiss Federal Office |`r swiss.r_square` | `r swiss.rmse` +Combined | `r combined.r_square` | `r combined.rmse` : Comparison of model predictions with experimental variability. {#tbl:common-pred} ```{r echo=F} -source("crossvalidations.R") +source("crossvalidation.R") ``` Traditional 10-fold cross-validation results are summarised in [@tbl:cv]: -Training dataset | $r^2$ | RMSE | MAE ------------------|-------|------|---- -Mazzatorta | `r round(cv.mazzatorta.r.squared,2)` | `r round(cv.mazzatorta.rmse,2)`| `r round(cv.mazzatorta.mae,2)` -Swiss Federal Office | `r round(cv.swiss.r.squared,2)` | `r round(cv.swiss.rmse,2)`| `r round(cv.swiss.mae,2)` -Combined | `r round(cv.combined.r.squared,2)` | `r round(cv.combined.rmse,2)`| `r round(cv.combined.mae,2)` +Training dataset | $r^2$ | RMSE +-----------------|-------|------ +Mazzatorta | `r round(cv.mazzatorta.r_square,2)` | `r round(cv.mazzatorta.rmse,2)` +Swiss Federal Office | `r round(cv.swiss.r_square,2)` | `r round(cv.swiss.rmse,2)` +Combined | `r round(cv.combined.r_square,2)` | `r round(cv.combined.rmse,2)` : 10-fold crossvalidation results {#tbl:cv} -[//]: # ```{r fig.cap="Comparison of predictions with measured values (-log10(LOAEL [mmol/kg_bw])", fig.lp="fig:", echo=F} +![Correlation of experimental with predicted LOAEL values (test set)](figure/test-correlation.pdf){} + +![Correlation of experimental with predicted LOAEL values (10-fold crossvalidation)](figure/crossvalidation.pdf){} -```{r predictions, fig.cap='Comparison of predictions with measured values (-log10(LOAEL [mmol/kg_bw])', echo=F} -library(ggplot2) -data <- read.csv("data/common-test.csv",header=T) -sorted = data[ order(-log10(data$LOAEL)), ] -ggplot(sorted, aes(SMILES,-log10(LOAEL),ymin = min(-log10(LOAEL)), ymax=max(-log10(LOAEL)),color=Dataset)) + geom_point() + ylab('-log(LOAEL mg/kg_bw/day)') + xlab('Compound') + theme(axis.text.x = element_blank()) -``` Discussion ========== @@ -273,31 +323,3 @@ Summary References ========== - -Bender, Andreas, Hamse Y. Mussa, and Robert C. Glen, and Stephan -Reiling. 2004. “Molecular Similarity Searching Using Atom Environments, -Information-Based Feature Selection, and a Naïve Bayesian Classifier.” -*Journal of Chemical Information and Computer Sciences* 44 (1): 170–78. -doi:[10.1021/ci034207y](https://doi.org/10.1021/ci034207y). - -Gütlein, Martin, Andreas Karwath, and Stefan Kramer. 2012. “CheS-Mapper -- Chemical Space Mapping and Visualization in 3D.” *Journal of -Cheminformatics* 4 (1): 7. -doi:[10.1186/1758-2946-4-7](https://doi.org/10.1186/1758-2946-4-7). - -Maunz, Andreas, Martin Gütlein, Micha Rautenberg, David Vorgrimmler, -Denis Gebele, and Christoph Helma. 2013. “Lazar: A Modular Predictive -Toxicology Framework.” *Frontiers in Pharmacology* 4. Frontiers Media -SA. -doi:[10.3389/fphar.2013.00038](https://doi.org/10.3389/fphar.2013.00038). - -OBoyle, Noel M, Michael Banck, Craig A James, Chris Morley, Tim -Vandermeersch, and Geoffrey R Hutchison. 2011. “Open Babel: An Open -Chemical Toolbox.” *Journal of Cheminformatics* 3 (1). Springer Science; -Business Media: 33. -doi:[10.1186/1758-2946-3-33](https://doi.org/10.1186/1758-2946-3-33). - -Weininger, David. 1988. “SMILES, a Chemical Language and Information -System. 1. Introduction to Methodology and Encoding Rules.” *Journal of -Chemical Information and Computer Sciences* 28 (1): 31–36. -doi:[10.1021/ci00057a005](https://doi.org/10.1021/ci00057a005). diff --git a/paper/loael.md b/paper/loael.md index a2fc458..69ee1ff 100644 --- a/paper/loael.md +++ b/paper/loael.md @@ -8,9 +8,8 @@ keywords: (Q)SAR, read-across, LOAEL date: \today abstract: " " documentclass: achemso -bibliography: references.bib +bibliography: references.bibtex bibliographystyle: achemso -biblio-style: achemso output: pdf_document: fig_caption: yes @@ -21,14 +20,34 @@ Introduction Christoph + Elena + Benoit -The main objectives of this study are +The quality and reproducibility of (Q)SAR and read-across predictions is a controversial topic in the toxicological risk-assessment community. Although model predictions can be validated with various procedures it is rarely possible to put the results into the context of experimental variability, because replicate experiments are rarely available. -- to investigate the experimental variability of LOAEL data +With missing information about the variability of experimental toxicity data it is hard to judge the performance of predictive models and it is tempting for model developments to use aggressive model optimisation methods that lead to impressive validation results, but also to overfitted models with little practical relevance. -- develop predictive model for lowest observed effect levels +In this study we intent to compare model predictions with experimental variability with chronic oral rat lowest adverse effect levels (LOAEL) as toxicity endpoint. +We are using two datasets, one from [@mazzatorta08] (*Mazzatorta* dataset) and one from the Swiss Federal Office of TODO (*Swiss Federal Office* dataset). -- compare the performance of model predictions with experimental - variability +Elena: do you have a reference and the name of the department? + + + +155 compounds are common in both datasets and we use them as a test set in our investigation. For this test set we will + +- compare the structural diversity of both datasets +- compare the LOAEL values in both datasets +- build prediction models based on the Mazzatorta, Swiss Federal Office datasets and a combination of both +- predict LOAELs of the training set +- compare predictions with experimental variability + +With this investigation we also want to support the idea of reproducible research, by providing all datasets and programs that have been used to generate this manuscript under a TODO license. + +A self-contained docker image with all program dependencies required for the reproduction of these results is available from TODO. + +Source code and datasets for the reproduction of this manuscript can be downloaded from the GitHub repository TODO. The lazar framework [@Maunz2013] is also available under a GPL License from https://github.com/opentox/lazar. + +TODO: github tags + +Elena: please check if this is publication strategy is ok for the Swiss Federal Office Materials and Methods ===================== @@ -36,69 +55,82 @@ Materials and Methods Datasets -------- + + ### Mazzatorta dataset -Just referred to the paper 2008. +The first dataset (*Mazzatorta* dataset for further reference) originates from +the publication of [@mazzatorta08]. It contains chronic (> 180 days) lowest +observed effect levels (LOAEL) for rats (*Rattus norvegicus*) after oral +(gavage, diet, drinking water) administration. The Mazzatorta dataset consists +of 567 LOAEL values for 445 unique +chemical structures. ### Swiss Federal Office dataset Elena + Swiss Federal Office contribution (input) -Only rat LOAEL values were used for the current investigation, because -they correspond directly to the Mazzatorta dataset. +The Swiss Federal Office dataset consists of 493 LOAEL values +for 381 unique chemical structures. ### Preprocessing -Christoph +Chemical structures in both datasets were initially represented as SMILES strings +[@doi:10.1021/ci00057a005]. Syntactically incorrect and missing SMILES were +generated from other identifiers (e.g names, CAS numbers). Unique smiles from the OpenBabel library [@OBoyle2011] were used for the identification of duplicated structures. + +Studies with undefined or empty LOAEL entries were removed from the datasets. LOAEL values were converted to mmol/kg_bw/day units. For prediction, validation and visualisation purposes -log10 transformations are used. + +David: please check if we have missed something -Chemical structures in both datasets are represented as SMILES strings -(Weininger 1988). Syntactically incorrect and missing SMILES were -generated from other identifiers (e.g names, CAS numbers) when possible. -Studies with undefined (“0”) or empty LOAEL entries were removed for -this study. +### Derived datasets + +Two derived datasets were obtained from the original datasets: + +The *test* dataset contains data of compounds that occur in both datasets. Exact duplications of LOAEL values were removed, because it is very likely that they originate from the same study. +The test dataset has 391 LOAEL values for 155 unique chemical structures. + +The *combined* dataset is the union of the Mazzatorta and the Swiss Federal Office dataset and it is used to build predictive models. Exact LOAEL duplications were removed, as for the test dataset. +The combined dataset has 1014 LOAEL values for 671 unique chemical structures. Algorithms ---------- -Christoph - -For this study we are using the modular lazar (*la*zy *s*tructure -*a*ctivity *r*elationships) framework (Maunz et al. 2013) for model +In this study we are using the modular lazar (*la*zy *s*tructure +*a*ctivity *r*elationships) framework [@Maunz2013] for model development and validation. lazar follows the following basic workflow: For a given chemical -structure it searches in a database for similar structures (neighbors) -with experimental data, builds a local (Q)SAR model with these neighbors -and uses this model to predict the unknown activity of the query -compound. This procedure resembles an automated version of *read across* +structure lazar + +- searches in a database for similar structures (*neighbors*) +with experimental data, +- builds a local QSAR model with these neighbors +and +- uses this model to predict the unknown activity of the query +compound. + +This procedure resembles an automated version of *read across* predictions in toxicology, in machine learning terms it would be classified as a *k-nearest-neighbor* algorithm. Apart from this basic workflow lazar is completely modular and allows -the researcher to use any algorithm for neighbor identification and -local (Q)SAR modelling. Within this study we are using the following +the researcher to use any algorithm for similarity searches and +local QSAR modelling. Within this study we are using the following algorithms: ### Neighbor identification -Christoph - -Similarity calculations are based on MolPrint2D fingerprints (Bender et -al. 2004) from the OpenBabel chemoinformatics library (OBoyle et al. -2011). +Similarity calculations are based on MolPrint2D fingerprints [@doi:10.1021/ci034207y] from the OpenBabel chemoinformatics library [@OBoyle2011]. The MolPrint2D fingerprint uses atom environments as molecular representation, which resemble basically the chemical concept of functional groups. For each atom in a molecule it represents the -chemical environment with the atom types of connected atoms. +chemical environment using the atom types of connected atoms. -The main advantage of MolPrint2D fingerprints over fingerprints with -predefined substructures (such as OpenBabel FP3, FP4 or MACCs -fingerprints) is that it may capture substructures of toxicological -relevance that are not included in predefined substructure lists. +MolPrint2D fingerprints are generated dynamically from chemical structures and do not rely on predefined lists of fragments (such as OpenBabel FP3, FP4 or MACCs fingerprints or lists of toxocophores/toxicophobes). This has the advantage the they may capture substructures of toxicological relevance that are not included in other fingerprints. Preliminary experiments have shown that predictions with MolPrint2D -fingerprints are indeed more accurate than fingerprints with predefined -substructures. +fingerprints are indeed more accurate than other OpenBabel fingerprints. From MolPrint2D fingerprints we can construct a feature vector with all atom environments of a compound, which can be used to calculate chemical @@ -106,27 +138,38 @@ similarities. [//]: # https://openbabel.org/docs/dev/FileFormats/MolPrint2D_format.html#molprint2d-format -The chemical similarity between two compounds is expressed as the -proportion between atom environments common in both structures and the -total number of atom environments (Jaccard/Tanimoto index, [@eq:jaccard]). +The chemical similarity between two compounds A and B is expressed as the +proportion between atom environments common in both structures $A \cap B$ and the +total number of atom environments $A \cup B$ (Jaccard/Tanimoto index, [@eq:jaccard]). $$ sim = \frac{|A \cap B|}{|A \cup B|} $$ {#eq:jaccard} -$A$ atom environments of compound A, $B$ atom environments of compound B. +A threshold of $sim < 0.1$ is used for the identification of neighbors for local QSAR models. +Compounds with the same structure as the query structure are eliminated from the neighbors to obtain an unbiased prediction. -### Local (Q)SAR models +### Local QSAR models and predictions -Christoph +Only similar compounds (*neighbors*) are used for local QSAR models. +In this investigation we are using a weighted partial least squares regression (PLS) algorithm for the prediction of quantitative properties. +First all fingerprint features with identical values across all neighbors are removed. +The reamining set of features is used as descriptors for creating a local weighted PLS model with atom environments as descriptors and model similarities as weights. The `plsr` function of the `pls` R package [@pls] is used for this purpose. +Finally the local PLS model is applied to predict the activity of the query compound. -As soon as neighbors for a query compound have been identified, we can -use their experimental LOAEL values to predict the activity of the -untested compound. In this case we are using the weighted mean of the +If PLS modelling or prediction fails, the program resorts to using the weighted mean of the neighbors LOAEL values, where the contribution of each neighbor is weighted by its similarity to the query compound. ### Validation -Christoph +Two types of validations are used within this study: + +For the comparison of experimental variability with predictive accuracies we are using a test set of compounds that occur in both datasets. The *Mazzatorta*, *Swiss Federal Office* and *combined* datasets are used as training data for read across predictions. In order to obtain unbiased predictions *all* information from the test compound is removed from the training set prior to predictions. This is hardcoded into the prediction algorithm in order to prevent validation errors. + +TODO: treatment of duplicates + +In addition traditional 10-fold crossvalidation results are provided. + +Christoph: check if these specifications have changed at submission Results ======= @@ -147,7 +190,7 @@ baseline for evaluating prediction performance. Martin CheS-Mapper (Chemical Space Mapping and Visualization in 3D, -http://ches-mapper.org/, (Gutlein, Karwath, and Kramer 2012)) can be +http://ches-mapper.org/, @Gütlein2012) can be used to analyze the relationship between the structure of chemical compounds, their physico-chemical properties, and biological or toxic effects. CheS-Mapper embeds a dataset into 3D space, such that compounds @@ -164,7 +207,7 @@ Christoph datasets. A complete table for 138 functional groups from OpenBabel FP4 fingerprints can be found in the appendix. -![Frequency of functional groups.](functional-groups.pdf){#fig:fg} +![Frequency of functional groups.](figure/functional-groups.pdf){#fig:fg} ### Experimental variability versus prediction uncertainty @@ -177,6 +220,8 @@ substantial overlap of compounds, with LOAEL values in both datasets. ##### Intra dataset variability +TODO: read data from files + The Mazzatorta dataset has 562 LOAEL values with 439 unique structures, the Swiss Federal Office dataset has 493 rat LOAEL values with 381 unique structures. [@fig:intra] shows the intra-dataset variability, where @@ -186,7 +231,7 @@ similar in both datasets (p-value: 0.48). [//]: # p-value: 0.4750771581019402 -![Intra dataset variability: Each vertical line represents a compound, dots are individual LOAEL values.](loael-dataset-comparison-all-compounds.pdf){#fig:intra} +[//]: # ![Intra dataset variability: Each vertical line represents a compound, dots are individual LOAEL values.](loael-dataset-comparison-all-compounds.pdf){#fig:intra} ##### Inter dataset variability @@ -194,11 +239,11 @@ similar in both datasets (p-value: 0.48). and Swiss Federal Office datasets. Obviously the experimental variability is larger than for individual datasets. -![Inter dataset variability](loael-dataset-comparison-common-compounds.pdf){#fig:inter} +[//]: # ![Inter dataset variability](loael-dataset-comparison-common-compounds.pdf){#fig:inter} ##### LOAEL correlation between datasets -[@fig:corr-1] depicts the correlation between LOAEL data from both datasets +[@fig:corr] depicts the correlation between LOAEL data from both datasets (using means for multiple measurements). Identical values were removed from analysis. @@ -206,16 +251,12 @@ Identical values were removed from analysis. [//]: # with identical values -``` -## Loading required package: methods -``` -![Correlation of dataset medians (-log10(LOAEL [mmol/kg_bw])](figure/unnamed-chunk-2-1.png) Correlation analysis shows a -significant correlation (p-value < 2.2e-16) with r\^2: 0.55, RMSE: 1.34 +significant correlation (p-value < 2.2e-16) with r\^2: 0.58, RMSE: 1.3 -### Local (Q)SAR models +### Local QSAR models Christoph @@ -223,46 +264,34 @@ In order to compare the perfomance of in silico models with experimental variabi The Mazzatorta, the Swiss Federal Office dataset and a combined dataset were used as training data. Predictions for the test set compounds were made after eliminating all information from the test compound from the corresponding training dataset. [@tbl:common-pred] summarizes the results: +![Comparison of experimental with predicted LOAEL values, each vertical line represents a compound.](figure/test-prediction.pdf){#fig:comp} + + -Training data | Model prediction | Experimental variability ---------------|------------------|------------------------- -Mazzatorta | 0.88 | 0.87 -Swiss Federal Office |0.65 | 0.76 -Commmon | 1.28| 0.8314774 -Combined | | 0.8242536 +Training data | $r^2$ | RMSE +--------------|---------------------------|------------------------- +Experimental | 0.58 | 1.3 +Mazzatorta | 0.38 | 1.49 +Swiss Federal Office |0.38 | 1.47 +Combined | 0.38 | 1.47 : Comparison of model predictions with experimental variability. {#tbl:common-pred} Traditional 10-fold cross-validation results are summarised in [@tbl:cv]: -Training dataset | $r^2$ | RMSE | MAE ------------------|-------|------|---- -Mazzatorta | 0.37 | 0.84| 0.65 -Swiss Federal Office | 0.25 | 0.75| 0.61 -Combined | 0.12 | 1.45| 1.21 +Training dataset | $r^2$ | RMSE +-----------------|-------|------ +Mazzatorta | 0.38 | 2.01 +Swiss Federal Office | 0.3 | 1.67 +Combined | 0.38 | 1.81 : 10-fold crossvalidation results {#tbl:cv} -[//]: # ```{r fig.cap="Comparison of predictions with measured values (-log10(LOAEL [mmol/kg_bw])", fig.lp="fig:", echo=F} +![Correlation of experimental with predicted LOAEL values (test set)](figure/test-correlation.pdf){} +![Correlation of experimental with predicted LOAEL values (10-fold crossvalidation)](figure/crossvalidation.pdf){} -``` -## Warning in file(file, "rt"): cannot open file 'data/common-test.csv': No -## such file or directory -``` - -``` -## Error in file(file, "rt"): cannot open the connection -``` - -``` -## Error in log10(data$LOAEL): non-numeric argument to mathematical function -``` - -``` -## Error in ggplot(sorted, aes(SMILES, -log10(LOAEL), ymin = min(-log10(LOAEL)), : object 'sorted' not found -``` Discussion ========== @@ -276,31 +305,3 @@ Summary References ========== - -Bender, Andreas, Hamse Y. Mussa, and Robert C. Glen, and Stephan -Reiling. 2004. “Molecular Similarity Searching Using Atom Environments, -Information-Based Feature Selection, and a Naïve Bayesian Classifier.” -*Journal of Chemical Information and Computer Sciences* 44 (1): 170–78. -doi:[10.1021/ci034207y](https://doi.org/10.1021/ci034207y). - -Gütlein, Martin, Andreas Karwath, and Stefan Kramer. 2012. “CheS-Mapper -- Chemical Space Mapping and Visualization in 3D.” *Journal of -Cheminformatics* 4 (1): 7. -doi:[10.1186/1758-2946-4-7](https://doi.org/10.1186/1758-2946-4-7). - -Maunz, Andreas, Martin Gütlein, Micha Rautenberg, David Vorgrimmler, -Denis Gebele, and Christoph Helma. 2013. “Lazar: A Modular Predictive -Toxicology Framework.” *Frontiers in Pharmacology* 4. Frontiers Media -SA. -doi:[10.3389/fphar.2013.00038](https://doi.org/10.3389/fphar.2013.00038). - -OBoyle, Noel M, Michael Banck, Craig A James, Chris Morley, Tim -Vandermeersch, and Geoffrey R Hutchison. 2011. “Open Babel: An Open -Chemical Toolbox.” *Journal of Cheminformatics* 3 (1). Springer Science; -Business Media: 33. -doi:[10.1186/1758-2946-3-33](https://doi.org/10.1186/1758-2946-3-33). - -Weininger, David. 1988. “SMILES, a Chemical Language and Information -System. 1. Introduction to Methodology and Encoding Rules.” *Journal of -Chemical Information and Computer Sciences* 28 (1): 31–36. -doi:[10.1021/ci00057a005](https://doi.org/10.1021/ci00057a005). diff --git a/paper/loael.pdf b/paper/loael.pdf index c937d62..cef90e5 100644 Binary files a/paper/loael.pdf and b/paper/loael.pdf differ diff --git a/paper/references.bib b/paper/references.bib deleted file mode 100644 index 6b40541..0000000 --- a/paper/references.bib +++ /dev/null @@ -1,85 +0,0 @@ -@Article{Gütlein2012, -AUTHOR = {Gutlein, Martin and Karwath, Andreas and Kramer, Stefan}, -TITLE = {CheS-Mapper - Chemical Space Mapping and Visualization in 3D}, -JOURNAL = {Journal of Cheminformatics}, -VOLUME = {4}, -YEAR = {2012}, -NUMBER = {1}, -PAGES = {7}, -URL = {http://www.jcheminf.com/content/4/1/7}, -DOI = {10.1186/1758-2946-4-7}, -PubMedID = {22424447}, -ISSN = {1758-2946}, -ABSTRACT = {Analyzing chemical datasets is a challenging task for scientific researchers in the field of chemoinformatics. It is important, yet difficult to understand the relationship between the structure of chemical compounds, their physico-chemical properties, and biological or toxic effects. To that respect, visualization tools can help to better comprehend the underlying correlations. Our recently developed 3D molecular viewer CheS-Mapper (Chemical Space Mapper) divides large datasets into clusters of similar compounds and consequently arranges them in 3D space, such that their spatial proximity reflects their similarity. The user can indirectly determine similarity, by selecting which features to employ in the process. The tool can use and calculate different kind of features, like structural fragments as well as quantitative chemical descriptors. These features can be highlighted within CheS-Mapper, which aids the chemist to better understand patterns and regularities and relate the observations to established scientific knowledge. As a final function, the tool can also be used to select and export specific subsets of a given dataset for further analysis.}, -} - -@article{doi:10.1021/ci034207y, -author = {Andreas Bender and Hamse Y. Mussa, and and Robert C. Glen and Stephan Reiling}, -title = {Molecular Similarity Searching Using Atom Environments, Information-Based Feature Selection, and a Naïve Bayesian Classifier}, -journal = {Journal of Chemical Information and Computer Sciences}, -volume = {44}, -number = {1}, -pages = {170-178}, -year = {2004}, -doi = {10.1021/ci034207y}, - note ={PMID: 14741025}, - -URL = { - http://dx.doi.org/10.1021/ci034207y - -}, -eprint = { - http://dx.doi.org/10.1021/ci034207y - -} - -} - - -@article{Maunz2013, - doi = {10.3389/fphar.2013.00038}, - url = {http://dx.doi.org/10.3389/fphar.2013.00038}, - year = {2013}, - publisher = {Frontiers Media {SA}}, - volume = {4}, - author = {Andreas Maunz and Martin G\"{u}tlein and Micha Rautenberg and David Vorgrimmler and Denis Gebele and Christoph Helma}, - title = {lazar: a modular predictive toxicology framework}, - journal = {Frontiers in Pharmacology} -} - - - - -@article{doi:10.1021/ci00057a005, -author = {David Weininger}, -title = {SMILES, a chemical language and information system. 1. Introduction to methodology and encoding rules}, -journal = {Journal of Chemical Information and Computer Sciences}, -volume = {28}, -number = {1}, -pages = {31-36}, -year = {1988}, -doi = {10.1021/ci00057a005}, - -URL = { - http://dx.doi.org/10.1021/ci00057a005 - -}, -eprint = { - http://dx.doi.org/10.1021/ci00057a005 - -} - -} - -@article{OBoyle2011, - doi = {10.1186/1758-2946-3-33}, - url = {http://dx.doi.org/10.1186/1758-2946-3-33}, - year = {2011}, - publisher = {Springer Science and Business Media}, - volume = {3}, - number = {1}, - pages = {33}, - author = {Noel M OBoyle and Michael Banck and Craig A James and Chris Morley and Tim Vandermeersch and Geoffrey R Hutchison}, - title = {Open Babel: An open chemical toolbox}, - journal = {Journal of Cheminformatics} -} diff --git a/paper/references.bibtex b/paper/references.bibtex new file mode 100644 index 0000000..735a52f --- /dev/null +++ b/paper/references.bibtex @@ -0,0 +1,116 @@ +@Article{Gütlein2012, +AUTHOR = {Gütlein, Martin and Karwath, Andreas and Kramer, Stefan}, +TITLE = {CheS-Mapper - Chemical Space Mapping and Visualization in 3D}, +JOURNAL = {Journal of Cheminformatics}, +VOLUME = {4}, +YEAR = {2012}, +NUMBER = {1}, +PAGES = {7}, +URL = {http://www.jcheminf.com/content/4/1/7}, +DOI = {10.1186/1758-2946-4-7}, +PubMedID = {22424447}, +ISSN = {1758-2946}, +ABSTRACT = {Analyzing chemical datasets is a challenging task for scientific researchers in the field of chemoinformatics. It is important, yet difficult to understand the relationship between the structure of chemical compounds, their physico-chemical properties, and biological or toxic effects. To that respect, visualization tools can help to better comprehend the underlying correlations. Our recently developed 3D molecular viewer CheS-Mapper (Chemical Space Mapper) divides large datasets into clusters of similar compounds and consequently arranges them in 3D space, such that their spatial proximity reflects their similarity. The user can indirectly determine similarity, by selecting which features to employ in the process. The tool can use and calculate different kind of features, like structural fragments as well as quantitative chemical descriptors. These features can be highlighted within CheS-Mapper, which aids the chemist to better understand patterns and regularities and relate the observations to established scientific knowledge. As a final function, the tool can also be used to select and export specific subsets of a given dataset for further analysis.}, +} + +@article{doi:10.1021/ci034207y, +author = {Andreas Bender and Hamse Y. Mussa, and and Robert C. Glen and Stephan Reiling}, +title = {Molecular Similarity Searching Using Atom Environments, Information-Based Feature Selection, and a Naïve Bayesian Classifier}, +journal = {Journal of Chemical Information and Computer Sciences}, +volume = {44}, +number = {1}, +pages = {170-178}, +year = {2004}, +doi = {10.1021/ci034207y}, + note ={PMID: 14741025}, + +URL = { + http://dx.doi.org/10.1021/ci034207y + +}, +eprint = { + http://dx.doi.org/10.1021/ci034207y + +} + +} + + +@article{Maunz2013, + doi = {10.3389/fphar.2013.00038}, + url = {http://dx.doi.org/10.3389/fphar.2013.00038}, + year = {2013}, + publisher = {Frontiers Media {SA}}, + volume = {4}, + author = {Andreas Maunz and Martin G\"{u}tlein and Micha Rautenberg and David Vorgrimmler and Denis Gebele and Christoph Helma}, + title = {lazar: a modular predictive toxicology framework}, + journal = {Frontiers in Pharmacology} +} + + + + +@article{doi:10.1021/ci00057a005, +author = {David Weininger}, +title = {SMILES, a chemical language and information system. 1. Introduction to methodology and encoding rules}, +journal = {Journal of Chemical Information and Computer Sciences}, +volume = {28}, +number = {1}, +pages = {31-36}, +year = {1988}, +doi = {10.1021/ci00057a005}, + +URL = { + http://dx.doi.org/10.1021/ci00057a005 + +}, +eprint = { + http://dx.doi.org/10.1021/ci00057a005 + +} + +} + +@article{OBoyle2011, + doi = {10.1186/1758-2946-3-33}, + url = {http://dx.doi.org/10.1186/1758-2946-3-33}, + year = {2011}, + publisher = {Springer Science and Business Media}, + volume = {3}, + number = {1}, + pages = {33}, + author = {Noel M OBoyle and Michael Banck and Craig A James and Chris Morley and Tim Vandermeersch and Geoffrey R Hutchison}, + title = {Open Babel: An open chemical toolbox}, + journal = {Journal of Cheminformatics} +} + +@article{mazzatorta08, +author = {Paolo Mazzatorta and Manuel Dominguez Estevez and Myriam Coulet and Benoit Schilter}, +title = {Modeling Oral Rat Chronic Toxicity}, +journal = {Journal of Chemical Information and Modeling}, +volume = {48}, +number = {10}, +pages = {1949-1954}, +year = {2008}, +doi = {10.1021/ci8001974}, + note ={PMID: 18803370}, + +URL = { + http://dx.doi.org/10.1021/ci8001974 + +}, +eprint = { + http://dx.doi.org/10.1021/ci8001974 + +} + +} + +@Manual{pls, + title = {pls: Partial Least Squares and Principal Component Regression}, + author = {Bjørn-Helge Mevik and Ron Wehrens and Kristian Hovde Liland}, + year = {2015}, + note = {R package version 2.5-0}, + url = {https://CRAN.R-project.org/package=pls}, + } + diff --git a/paper/test-correlation-plot.R b/paper/test-correlation-plot.R new file mode 100644 index 0000000..0626bd4 --- /dev/null +++ b/paper/test-correlation-plot.R @@ -0,0 +1,21 @@ +library(ggplot2) +library(grid) +library(gridExtra) + +experimental <- read.csv("data/median-correlation.csv",header=T) +p1 = qplot(-log10(mazzatorta),-log10(swiss),data=experimental,xlab="-log10(LOAEL Mazzatorta median)",ylab="-log10(LOAEL Swiss Federal Office median)",main="Experimental data") + geom_point() + geom_abline(intercept=0.0) + xlim(-1,4) + ylim(-1,4) + +mazzatorta = read.csv("data/mazzatorta-test-predictions.csv",header=T) +swiss = read.csv("data/swiss-test-predictions.csv",header=T) +combined = read.csv("data/combined-test-predictions.csv",header=T) + +p2 = qplot(-log10(LOAEL_predicted),-log10(LOAEL_measured_median),data=mazzatorta,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)",main="Mazzatorta") + geom_point() + geom_abline(intercept=0.0) + xlim(-1,4) + ylim(-1,4) + +p3 = qplot(-log10(LOAEL_predicted),-log10(LOAEL_measured_median),data=swiss,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)",main="Swiss Federal Office") + geom_point() + geom_abline(intercept=0.0) + xlim(-1,4) + ylim(-1,4) + +p4 = qplot(-log10(LOAEL_predicted),-log10(LOAEL_measured_median),data=combined,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)",main="Combined") + geom_point() + geom_abline(intercept=0.0) + xlim(-1,4) + ylim(-1,4) + +pdf('figure/test-correlation.pdf') +grid.arrange(p1,p2,p3,p4,ncol=2) +dev.off() + diff --git a/paper/test-correlation.R b/paper/test-correlation.R new file mode 100644 index 0000000..99d113a --- /dev/null +++ b/paper/test-correlation.R @@ -0,0 +1,15 @@ +mazzatorta = read.csv("data/mazzatorta-test-predictions.csv",header=T) +swiss = read.csv("data/swiss-test-predictions.csv",header=T) +combined = read.csv("data/combined-test-predictions.csv",header=T) + +mazzatorta.p = round(cor.test(-log(mazzatorta$LOAEL_measured_median),-log(mazzatorta$LOAEL_predicted))$p.value,2) +mazzatorta.r_square = round(cor(-log(mazzatorta$LOAEL_measured_median),-log(mazzatorta$LOAEL_predicted))^2,2) +mazzatorta.rmse = round(sqrt(mean((-log(mazzatorta$LOAEL_measured_median)+log(mazzatorta$LOAEL_predicted))^2)),2) + +swiss.p = round(cor.test(-log(swiss$LOAEL_measured_median),-log(swiss$LOAEL_predicted))$p.value,2) +swiss.r_square = round(cor(-log(swiss$LOAEL_measured_median),-log(swiss$LOAEL_predicted))^2,2) +swiss.rmse = round(sqrt(mean((-log(swiss$LOAEL_measured_median)+log(swiss$LOAEL_predicted))^2)),2) + +combined.p = round(cor.test(-log(combined$LOAEL_measured_median),-log(combined$LOAEL_predicted))$p.value,2) +combined.r_square = round(cor(-log(combined$LOAEL_measured_median),-log(combined$LOAEL_predicted))^2,2) +combined.rmse = round(sqrt(mean((-log(combined$LOAEL_measured_median)+log(combined$LOAEL_predicted))^2)),2) diff --git a/paper/test-prediction-plot.R b/paper/test-prediction-plot.R new file mode 100644 index 0000000..c43737f --- /dev/null +++ b/paper/test-prediction-plot.R @@ -0,0 +1,32 @@ +library(ggplot2) +library(grid) +library(gridExtra) + +mazzatorta = read.csv("data/mazzatorta-test-predictions.csv",header=T) +swiss = read.csv("data/swiss-test-predictions.csv",header=T) +combined = read.csv("data/combined-test-predictions.csv",header=T) +test <- read.csv("data/test.csv",header=T) +n = c("SMILES","LOAEL","Source") + +data = data.frame(factor(test$SMILES),test$LOAEL,factor(test$Dataset)) +names(data) = n +data$Type = "experimental" +maz = data.frame(factor(mazzatorta$SMILES),mazzatorta$LOAEL_predicted,factor(mazzatorta$Dataset)) +names(maz) = n +maz$Type = "predicted" +data = rbind(data,maz) +swi = data.frame(factor(swiss$SMILES),swiss$LOAEL_predicted,factor(swiss$Dataset)) +names(swi) = n +swi$Type = "predicted" +data = rbind(data,swi) +comb = data.frame(factor(combined$SMILES),combined$LOAEL_predicted,factor(combined$Dataset)) +names(comb) = n +comb$Type = "predicted" +data = rbind(data,comb) +data$LOAEL = -log(data$LOAEL) +data$SMILES <- reorder(data$SMILES,data$LOAEL) +img <- ggplot(data, aes(SMILES,LOAEL,ymin = min(LOAEL), ymax=max(LOAEL),shape=Source,color=Type)) +img <- img + ylab('-log(LOAEL mg/kg_bw/day)') + xlab('Compound') + theme(axis.text.x = element_blank()) +img <- img + geom_point() + +ggsave(file='figure/test-prediction.pdf', plot=img,width=12, height=8) diff --git a/paper/test-set-validation.rb b/paper/test-set-validation.rb deleted file mode 100644 index d842d47..0000000 --- a/paper/test-set-validation.rb +++ /dev/null @@ -1,22 +0,0 @@ -require_relative "include.rb" - -test = Dataset.from_csv_file(File.join(DATA,"common-test.csv")) - -file = File.join(DATA,ARGV[0]) -dataset = Dataset.from_csv_file file -model = Model::LazarRegression.create dataset -validation = Validation.create model, dataset, test -csv_file = file.sub(".csv","-test-predictions.csv") -name = File.basename(ARGV[0],".csv") - -data = [] -validation.predictions.each do |p| - data << [Compound.find(p[0]).smiles, p[2], p[3],"#{name}-prediction"] -end - -data.sort!{|a,b| a[1] <=> b[1]} - -CSV.open(csv_file,"w+") do |csv| - csv << ["SMILES","LOAEL","Confidence","Dataset"] - data.each{|r| csv << r} -end diff --git a/paper/test-validation.rb b/paper/test-validation.rb new file mode 100644 index 0000000..b748a8d --- /dev/null +++ b/paper/test-validation.rb @@ -0,0 +1,24 @@ +require_relative "include.rb" + +test = Dataset.from_csv_file(File.join(DATA,"common-test.csv")) + +file = File.join(DATA,ARGV[0]) +dataset = Dataset.from_csv_file file +model = Model::LazarRegression.create dataset +validation = RegressionValidation.create model, dataset, test +csv_file = file.sub(".csv","-test-predictions.csv") +id_file = file.sub(".csv","-test-predictions.id") +File.open(id_file,"w+"){|f| f.puts validation.id} +name = File.basename(ARGV[0],".csv") + +data = [] +validation.predictions.each do |p| + data << [Compound.find(p[0]).smiles, p[1].median, p[2], p[3],"#{name}-prediction"] +end + +data.sort!{|a,b| a[1] <=> b[1]} + +CSV.open(csv_file,"w+") do |csv| + csv << ["SMILES","LOAEL_measured_median","LOAEL_predicted","Confidence","Dataset"] + data.each{|r| csv << r} +end diff --git a/paper/unique-smiles.rb b/paper/unique-smiles.rb new file mode 100644 index 0000000..d316c3f --- /dev/null +++ b/paper/unique-smiles.rb @@ -0,0 +1,18 @@ +require_relative "include.rb" + +input = Dataset.from_csv_file File.join(ARGV[0]) +outname = File.join(File.dirname(ARGV[0]),"#{ARGV[1]}.csv") + +data = [] +input.compounds.each_with_index do |cid,i| + c = Compound.find cid + v = input.data_entries[i].first + data << [c.smiles,v,ARGV[1]] +end + +data.sort!{|a,b| a[1] <=> b[1]} + +CSV.open(outname,"w+") do |csv| + csv << ["SMILES","LOAEL","Dataset"] + data.each{|r| csv << r} +end -- cgit v1.2.3