summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-02-17 21:13:23 +0100
committerChristoph Helma <helma@in-silico.ch>2016-02-17 21:13:23 +0100
commit4d67ddb9fe72cb4cba42e508a943e1d4d04fef8c (patch)
tree7495d315e55521adcb1818bc97214b1096915d16
parent0ab42bc04fefd4dd67b62c87dc651c90dc7988f0 (diff)
validations, figures, text finalisation started
-rw-r--r--paper/Makefile60
-rw-r--r--paper/Rakefile12
l---------paper/appendix/functional-groups.csv1
-rw-r--r--paper/create-median-correlation.rb4
-rw-r--r--paper/crossvalidation-plots.R20
-rw-r--r--paper/crossvalidation.R16
-rw-r--r--paper/crossvalidation.rb23
-rw-r--r--paper/crossvalidations.R9
-rw-r--r--paper/crossvalidations.rb18
-rw-r--r--paper/data/SMARTS_InteLigand.txt (renamed from paper/SMARTS_InteLigand.txt)0
-rw-r--r--paper/data/combined-cv.csv923
-rw-r--r--paper/data/combined-cv.id1
-rw-r--r--paper/data/combined-test-predictions.csv302
-rw-r--r--paper/data/combined-test-predictions.id1
-rw-r--r--paper/data/common-median.csv34
-rw-r--r--paper/data/functional-groups-reduced.csv (renamed from paper/functional-groups-reduced.csv)0
-rw-r--r--paper/data/functional-groups-reduced4R.csv (renamed from paper/functional-groups-reduced4R.csv)0
-rw-r--r--paper/data/functional-groups.csv (renamed from paper/functional-groups.csv)0
-rw-r--r--paper/data/mazzatorta-cv.csv519
-rw-r--r--paper/data/mazzatorta-cv.id1
-rw-r--r--paper/data/mazzatorta-test-predictions.csv298
-rw-r--r--paper/data/mazzatorta-test-predictions.id1
-rw-r--r--paper/data/mazzatorta.csv~568
-rw-r--r--paper/data/median-correlation.csv208
-rw-r--r--paper/data/swiss-cv.csv447
-rw-r--r--paper/data/swiss-cv.id1
-rw-r--r--paper/data/swiss-test-predictions.csv300
-rw-r--r--paper/data/swiss-test-predictions.id1
-rw-r--r--paper/data/swiss.csv~494
-rw-r--r--paper/figure/corr-1.pngbin8183 -> 0 bytes
-rw-r--r--paper/figure/crossvalidation.pdfbin0 -> 24785 bytes
-rw-r--r--paper/figure/functional-groups.pdf (renamed from paper/loael-dataset-correlation.pdf)bin6050 -> 6389 bytes
-rw-r--r--paper/figure/predictions-1.pngbin11720 -> 0 bytes
-rw-r--r--paper/figure/test-correlation.pdfbin0 -> 10885 bytes
-rw-r--r--paper/figure/test-prediction.pdfbin0 -> 27063 bytes
-rw-r--r--paper/figure/unnamed-chunk-2-1.pngbin8183 -> 0 bytes
-rw-r--r--paper/figure/unnamed-chunk-5-1.pngbin11706 -> 0 bytes
-rwxr-xr-xpaper/functional-groups.R11
-rw-r--r--paper/loael-dataset-comparison-all-compounds.pdfbin67739 -> 0 bytes
-rw-r--r--paper/loael-dataset-comparison-common-compounds.pdfbin71753 -> 0 bytes
-rw-r--r--paper/loael-dataset-comparison.rb75
-rw-r--r--paper/loael.Rmd236
-rw-r--r--paper/loael.md231
-rw-r--r--paper/loael.pdfbin356362 -> 272191 bytes
-rw-r--r--paper/references.bibtex (renamed from paper/references.bib)33
-rw-r--r--paper/test-correlation-plot.R21
-rw-r--r--paper/test-correlation.R15
-rw-r--r--paper/test-prediction-plot.R32
-rw-r--r--paper/test-validation.rb (renamed from paper/test-set-validation.rb)8
-rw-r--r--paper/unique-smiles.rb18
50 files changed, 2933 insertions, 2009 deletions
diff --git a/paper/Makefile b/paper/Makefile
index d66bf36..4aa5ab3 100644
--- a/paper/Makefile
+++ b/paper/Makefile
@@ -1,51 +1,55 @@
# Paper
-loael.pdf: loael.md functional-groups.pdf loael-dataset-correlation.pdf rmse.R test-set-validation.csv crossvalidations.R data/common-median.csv
+loael.pdf: loael.md references.bibtex
+ pandoc -r markdown+simple_tables+table_captions+yaml_metadata_block -s -S --bibliography=references.bibtex --latex-engine=pdflatex --filter pandoc-crossref --filter pandoc-citeproc -o loael.pdf loael.md
-loael.md: loael.Rmd rmse.R
+loael.md: loael.Rmd figures validations
+ Rscript --vanilla -e "library(knitr); knit('loael.Rmd');"
-loael.docx: loael.md functional-groups.pdf loael-dataset-correlation.pdf
+loael.docx: loael.md
pandoc --filter pandoc-crossref --filter pandoc-citeproc loael.md -s -o loael.docx
rmse.R: rmse.rb
ruby rmse.rb
-crossvalidations.R: crossvalidations.rb
- ruby crossvalidations.rb
+# Figures
-test-set-validation.csv: test-set-validation.rb
- ruby test-set-validation.rb
+figures: datasets validations figure/functional-groups.pdf figure/test-prediction.pdf figure/test-correlation.pdf figure/crossvalidation.pdf
-%.md: %.Rmd
- Rscript --vanilla -e "library(knitr); knit('$<');"
+figure/functional-groups.pdf: data/functional-groups-reduced4R.csv functional-groups.R
+ Rscript functional-groups.R
-%.pdf: %.md
- pandoc --filter pandoc-crossref --filter pandoc-citeproc $< -s -o $@
+figure/crossvalidation.pdf: data/mazzatorta-cv.csv data/swiss-cv.csv data/combined-cv.csv
+ Rscript crossvalidation-plots.R
-functional-groups.pdf: functional-groups-reduced4R.csv functional-groups.R
- R CMD BATCH functional-groups.R
+figure/test-prediction.pdf: data/mazzatorta-test-predictions.csv data/swiss-test-predictions.csv data/combined-test-predictions.csv data/median-correlation.csv test-prediction-plot.R
+ Rscript test-prediction-plot.R
-loael-dataset-correlation.pdf: loael-dataset-comparison.rb
- ruby loael-dataset-comparison.rb
+figure/test-correlation.pdf: data/mazzatorta-test-predictions.csv data/swiss-test-predictions.csv data/combined-test-predictions.csv data/median-correlation.csv test-correlation-plot.R
+ Rscript test-correlation-plot.R
-#data/common-test.csv: create-test-set.rb
- #ruby create-test-set.rb
+# Validations
-dataset-correlation.R: data/common-test.csv dataset-correlation.R
- Rscript dataset-correlation.R
+validations: test-predictions crossvalidations
-# Validations
+crossvalidations: data/mazzatorta-cv.csv data/swiss-cv.csv data/combined-cv.csv
+
+data/mazzatorta-cv.csv: crossvalidation.rb data/mazzatorta.csv
+ ruby crossvalidation.rb mazzatorta.csv
+
+data/swiss-cv.csv: crossvalidation.rb data/swiss.csv
+ ruby crossvalidation.rb swiss.csv
-crossvalidation.R: data/mazzatorta.csv,data/swiss.csv,data/combined.csv crossvalidation.rb
- crossvalidation.rb
+data/combined-cv.csv: crossvalidation.rb data/combined.csv
+ ruby crossvalidation.rb combined.csv
test-predictions: data/mazzatorta-test-predictions.csv data/swiss-test-predictions.csv data/combined-test-predictions.csv
-data/mazzatorta-test-predictions.csv: test-set-validation.rb data/test.csv data/mazzatorta.csv
- ruby test-set-validation.rb mazzatorta.csv
-data/swiss-test-predictions.csv: test-set-validation.rb data/test.csv data/swiss.csv
- ruby test-set-validation.rb swiss.csv
-data/combined-test-predictions.csv: test-set-validation.rb data/test.csv data/combined.csv
- ruby test-set-validation.rb combined.csv
+data/mazzatorta-test-predictions.csv: test-validation.rb data/test.csv data/mazzatorta.csv
+ ruby test-validation.rb mazzatorta.csv
+data/swiss-test-predictions.csv: test-validation.rb data/test.csv data/swiss.csv
+ ruby test-validation.rb swiss.csv
+data/combined-test-predictions.csv: test-validation.rb data/test.csv data/combined.csv
+ ruby test-validation.rb combined.csv
# Datasets
diff --git a/paper/Rakefile b/paper/Rakefile
deleted file mode 100644
index 1d45b9e..0000000
--- a/paper/Rakefile
+++ /dev/null
@@ -1,12 +0,0 @@
-task :default => "loael.pdf"
-
-file "loael.pdf" => ["loael.md","functional-groups.pdf", "loael-dataset-correlation.pdf"]
- `pandoc --filter pandoc-citeproc loael.md -s -o loael.pdf`
- pid=`pidof mupdf`.chomp
- `kill -s SIGHUP #{pid}`
-
-file "functional-groups.pdf" => ["functional-groups-reduced4R.csv","functional-groups.R"]
- `R CMD BATCH functional-groups.R`
-
-file "loael-dataset-correlation.pdf" => ["loael-dataset-comparison.rb"]
- `ruby loael-dataset-comparison.rb`
diff --git a/paper/appendix/functional-groups.csv b/paper/appendix/functional-groups.csv
deleted file mode 120000
index ce02e14..0000000
--- a/paper/appendix/functional-groups.csv
+++ /dev/null
@@ -1 +0,0 @@
-functional-groups.csv \ No newline at end of file
diff --git a/paper/create-median-correlation.rb b/paper/create-median-correlation.rb
index 6aeee11..9a2f6f5 100644
--- a/paper/create-median-correlation.rb
+++ b/paper/create-median-correlation.rb
@@ -16,13 +16,13 @@ common_compound_ids.each do |cid|
new_values -= identical
end
unless old_values.empty? or new_values.empty?
- data << [c.smiles,old_values.mean,new_values.mean]
+ data << [c.smiles,old_values.median,new_values.median]
end
end
data.sort!{|a,b| a[1] <=> b[1]}
-CSV.open(File.join(DATA,"common-median.csv"),"w+") do |csv|
+CSV.open(File.join(DATA,"median-correlation.csv"),"w+") do |csv|
csv << ["SMILES","mazzatorta","swiss"]
data.each{|r| csv << r}
end
diff --git a/paper/crossvalidation-plots.R b/paper/crossvalidation-plots.R
new file mode 100644
index 0000000..6665fdb
--- /dev/null
+++ b/paper/crossvalidation-plots.R
@@ -0,0 +1,20 @@
+library(ggplot2)
+library(grid)
+library(gridExtra)
+
+mazzatorta = read.csv("data/mazzatorta-cv.csv",header=T)
+swiss = read.csv("data/swiss-cv.csv",header=T)
+combined = read.csv("data/combined-cv.csv",header=T)
+
+#experimental <- read.csv("data/median-correlation.csv",header=T)
+#p1 = qplot(-log10(mazzatorta),-log10(swiss),data=experimental,xlab="-log10(LOAEL Mazzatorta median)",ylab="-log10(LOAEL Swiss Federal Office median)",main="Experimental data") + geom_point() + geom_abline(intercept=0.0) + xlim(-2,4.5) + ylim(-2,4.5)
+
+p2 = qplot(-log10(LOAEL_predicted),-log10(LOAEL_measured_median),data=mazzatorta,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)",main="Mazzatorta") + geom_point() + geom_abline(intercept=0.0) + xlim(-2,4.5) + ylim(-2,4.5)
+
+p3 = qplot(-log10(LOAEL_predicted),-log10(LOAEL_measured_median),data=swiss,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)",main="Swiss Federal Office") + geom_point() + geom_abline(intercept=0.0) + xlim(-2,4.5) + ylim(-2,4.5)
+
+p4 = qplot(-log10(LOAEL_predicted),-log10(LOAEL_measured_median),data=combined,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)",main="Combined") + geom_point() + geom_abline(intercept=0.0) + xlim(-2,4.5) + ylim(-2,4.5)
+
+pdf('figure/crossvalidation.pdf')
+grid.arrange(p2,p3,p4,ncol=2)
+dev.off()
diff --git a/paper/crossvalidation.R b/paper/crossvalidation.R
new file mode 100644
index 0000000..a32f608
--- /dev/null
+++ b/paper/crossvalidation.R
@@ -0,0 +1,16 @@
+mazzatorta = read.csv("data/mazzatorta-cv.csv",header=T)
+swiss = read.csv("data/swiss-cv.csv",header=T)
+combined = read.csv("data/combined-cv.csv",header=T)
+
+cv.mazzatorta.p = round(cor.test(-log(mazzatorta$LOAEL_measured_median),-log(mazzatorta$LOAEL_predicted))$p.value,2)
+cv.mazzatorta.r_square = round(cor(-log(mazzatorta$LOAEL_measured_median),-log(mazzatorta$LOAEL_predicted))^2,2)
+cv.mazzatorta.rmse = round(sqrt(mean((-log(mazzatorta$LOAEL_measured_median)+log(mazzatorta$LOAEL_predicted))^2)),2)
+
+cv.swiss.p = round(cor.test(-log(swiss$LOAEL_measured_median),-log(swiss$LOAEL_predicted))$p.value,2)
+cv.swiss.r_square = round(cor(-log(swiss$LOAEL_measured_median),-log(swiss$LOAEL_predicted))^2,2)
+cv.swiss.rmse = round(sqrt(mean((-log(swiss$LOAEL_measured_median)+log(swiss$LOAEL_predicted))^2)),2)
+
+cv.combined.p = round(cor.test(-log(combined$LOAEL_measured_median),-log(combined$LOAEL_predicted))$p.value,2)
+cv.combined.r_square = round(cor(-log(combined$LOAEL_measured_median),-log(combined$LOAEL_predicted))^2,2)
+cv.combined.rmse = round(sqrt(mean((-log(combined$LOAEL_measured_median)+log(combined$LOAEL_predicted))^2)),2)
+
diff --git a/paper/crossvalidation.rb b/paper/crossvalidation.rb
new file mode 100644
index 0000000..1f4c023
--- /dev/null
+++ b/paper/crossvalidation.rb
@@ -0,0 +1,23 @@
+require_relative 'include.rb'
+
+name = File.basename ARGV[0], ".csv"
+file = File.join DATA,ARGV[0]
+csv_file = File.join(DATA,ARGV[0].sub(/.csv/,'-cv.csv'))
+id_file = File.join(DATA,ARGV[0].sub(/.csv/,'-cv.id'))
+dataset = Dataset.from_csv_file file
+model = Model::LazarRegression.create dataset
+cv = RegressionCrossValidation.create model
+File.open(id_file,"w+"){|f| f.puts cv.id}
+
+data = []
+cv.predictions.each do |p|
+ smi = Compound.find(p[0]).smiles
+ data << [smi,p[1].median,p[2],p[3]]
+end
+
+data.sort!{|a,b| a[1] <=> b[1]}
+
+CSV.open(csv_file,"w+") do |csv|
+ csv << ["SMILES","LOAEL_measured_median","LOAEL_predicted","Confidence"]
+ data.each{|r| csv << r}
+end
diff --git a/paper/crossvalidations.R b/paper/crossvalidations.R
deleted file mode 100644
index cdc4c7e..0000000
--- a/paper/crossvalidations.R
+++ /dev/null
@@ -1,9 +0,0 @@
-cv.mazzatorta.rmse <- 0.8439115008205602
-cv.mazzatorta.r.squared <- 0.3730663179459023
-cv.mazzatorta.mae <- 0.6546182843884356
-cv.swiss.rmse <- 0.7507139457130771
-cv.swiss.r.squared <- 0.2507741939299348
-cv.swiss.mae <- 0.6068918271561476
-cv.combined.rmse <- 1.4536329017938434
-cv.combined.r.squared <- 0.12100621490895397
-cv.combined.mae <- 1.2096846143410287
diff --git a/paper/crossvalidations.rb b/paper/crossvalidations.rb
deleted file mode 100644
index f6a5143..0000000
--- a/paper/crossvalidations.rb
+++ /dev/null
@@ -1,18 +0,0 @@
-require_relative 'include.rb'
-file = File.join(DATA,ARGV[0])
-dataset = Dataset.from_csv_file file
-model = Model::LazarRegression.create dataset
-cv = RegressionCrossValidation.create model
-=begin
-=end
-
-datasets = ["mazzatorta","swiss","combined"]
-File.open("crossvalidations.R","w+") do |f|
- [0,1,5].each do |i|
- dataset = datasets.shift
- cv = OpenTox::RegressionCrossValidation.all[i]
- f.puts "cv.#{dataset}.rmse <- #{cv.rmse}"
- f.puts "cv.#{dataset}.r.squared <- #{cv.r_squared}"
- f.puts "cv.#{dataset}.mae <- #{cv.mae}"
- end
-end
diff --git a/paper/SMARTS_InteLigand.txt b/paper/data/SMARTS_InteLigand.txt
index 23bc6e2..23bc6e2 100644
--- a/paper/SMARTS_InteLigand.txt
+++ b/paper/data/SMARTS_InteLigand.txt
diff --git a/paper/data/combined-cv.csv b/paper/data/combined-cv.csv
new file mode 100644
index 0000000..4de9b72
--- /dev/null
+++ b/paper/data/combined-cv.csv
@@ -0,0 +1,923 @@
+SMILES,LOAEL_measured_median,LOAEL_predicted,Confidence
+ClC12C3C4(C(C1(Cl)Cl)(C1(C2(C3(Cl)C(C41Cl)(Cl)Cl)Cl)Cl)Cl)Cl,1.9565721591442926e-05,0.0014218133641616987,0.625
+ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C=C2)(Cl)Cl)Cl,2.7404023436797774e-05,0.0012794179116857743,1
+ClC1C2OC2C2C1C1(Cl)C(=C(C2(C1(Cl)Cl)Cl)Cl)Cl,6.421500622500271e-05,0.0006312904946719587,1
+N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.00013496580117055152,0.034974567888840846,1
+N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.0001372533562906347,0.02698619904377642,1
+CCSCCSP(=S)(OCC)OCC,0.00014577045919371006,0.0029396972764529202,1
+CCOP(=S)(SCSC(C)(C)C)OCC,0.00019068711849574906,0.0034018170593390737,1
+Clc1ccc2c(c1)[n+]([O-])nc(n2)n1cncc1,0.00020190555530632425,0.03094282625719898,1
+ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C1C2O1)(Cl)Cl)Cl,0.0002625296750418184,0.0014370243329576793,1
+OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.00027647194701359843,0.01497899097540358,1
+OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.0002764719511333511,0.018693951262222713,1
+ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C1C2O1)(Cl)Cl)Cl,0.000328162093802273,0.0009393540815108845,1
+CCOP(=S)(SCSC(C)(C)C)OCC,0.00034670385697674235,0.0034018170593390737,1
+CCSCCSP(=S)(OCC)OCC,0.00036442614798427517,0.0032087834044491653,1
+ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C1C2O1)(Cl)Cl)Cl,0.00039379451256272755,0.0009393540815108845,1
+ClC1C2OC2C2C1C1(Cl)C(=C(C2(C1(Cl)Cl)Cl)Cl)Cl,0.0005137200498000217,0.0006312904946719587,1
+CNC(=O)ON=CC(SC)(C)C,0.0005255875464343458,0.03526134931259953,1
+COC(=O)C(Oc1ccc(cc1)Oc1ccc(cc1Cl)Cl)C,0.0005861906011027885,0.03401894999785191,1
+CCSCSP(=S)(OCC)OCC,0.0006144925475253195,0.0015689457511481922,1
+CCSCSP(=S)(OCC)OCC,0.0006144925612602997,0.0015930132454562913,1
+OC1CCCCCc2cc(O)cc(c2C(=O)OC(CCC1)C)O,0.0006203550142861557,1.1513016524808917,1
+ClC1CC2C(C1Cl)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.000634488903569986,0.0005835576184954016,1
+ClC(C(c1ccc(cc1)Cl)c1ccc(cc1)Cl)(Cl)Cl,0.0007052459522690667,0.043153175434619336,1
+CCSCCSP(=S)(OCC)OCC,0.0008017375255654054,0.0029396972764529202,1
+CCOP(=O)(SC(CC)C)SC(CC)C,0.0008210296720157477,0.012277367120843022,1
+ClC1C=CC2C1C1(Cl)C(=C(C2(C1(Cl)Cl)Cl)Cl)Cl,0.0008437853335108407,0.0008952202189140214,1
+c1ccc(cc1)[Sn](c1ccccc1)c1ccccc1,0.0008571117562305596,0.3621072701107427,1
+CNC(=O)CSP(=S)(OC)OC,0.000872381733741038,0.011977939066676562,1
+CCS(=O)CCSP(=O)(OC)OC,0.0008932752807580748,0.001392123295168118,1
+CCOP(=O)(SC(CC)C)SC(CC)C,0.0009245829520661433,0.01634116998391423,1
+COP(=S)(Oc1ccc(cc1)N(=O)=O)OC,0.0009498211030948742,0.025384429146594292,1
+OC(=O)c1ccccc1.CCC(C1OC2(C=CC1C)OC1CC=C(C)C(OC3CC(OC)C(C(O3)C)OC3CC(OC)C(C(O3)C)NC)C(C)C=CC=C3C4(C(C(=O)OC(C2)C1)C=C(C)C(C4OC3)O)O)C,0.0009918273033473258,0.0020195689898693642,1
+Clc1c(Cl)c(Cl)c(c(c1Cl)Cl)Cl,0.0010183220720957982,0.09921198034267042,1
+COC1CC(OC2C(C)C=CC=C3COC4C3(O)C(C=C(C4O)C)C(=O)OC3CC(CC=C2C)OC2(C3)C=CC(C(O2)C(C)C)C)OC(C1OC1CC(OC)C(C(O1)C)NC(=O)C)C,0.0011109849279118543,0.0018866442387745258,1
+COP(=O)(SCCS(=O)(=O)CC)OC,0.0011437981092748413,0.0012462691090840692,1
+COC1CC(OC2C(C)C=CC=C3COC4C3(O)C(C=C(C4O)C)C(=O)OC3CC(CC=C2C)OC2(C3)C=CC(C(O2)C(CC)C)C)OC(C1OC1CC(OC)C(C(O1)C)O)C.COC1CC(OC(C1OC1CC(OC)C(C(O1)C)O)C)OC1C(C)C=CC=C2COC3C2(O)C(C=C(C3O)C)C(=O)OC2CC(CC=C1C)OC1(C2)C=CC(C(O1)C(C)C)C,0.0011546496256700967,0.0021478186033654857,1
+O=C1CCCC(=O)C1C(=O)c1ccc(cc1Cl)S(=O)(=O)C,0.0012166633663470796,0.017372736621606367,1
+ClC1CC2C(C1Cl)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.0012201709684038192,0.0005942117597564336,1
+ClC12C(Cl)(Cl)C3(C4(C1(Cl)C1(C2(Cl)C3(C4(C1(Cl)Cl)Cl)Cl)Cl)Cl)Cl,0.0012831252531881078,6.239999085601705e-05,0.625
+CCOP(=O)(N1CCSC1=O)SC(CC)C,0.001341107599716744,0.01204374051145044,1
+COP(=O)(SC)N,0.0013817128392583306,0.33442367385922134,1
+O=C1CCCC(=O)C1C(=O)c1ccc(cc1[N+](=O)[O-])S(=O)(=O)C,0.001414591694222218,0.01480091228679537,1
+CCOP(=S)(Oc1ccc(cc1)N(=O)=O)OCC,0.001442007505168395,0.020357599749119566,1
+CCOP(=S)(Oc1ccccc1C(=O)OC(C)C)NC(C)C,0.0014476216329334154,0.11305876392072264,1
+CCSCCSP(=O)(OC)OC,0.001519854088965729,0.0009444327450089038,1
+CCOc1cc(nc(n1)CC)OP(=S)(OC)OC,0.0015395577035464635,0.008340344661164758,1
+COC(=O)/C=C(/OP(=O)(OC)OC)\C,0.0015614663384413926,0.02855485815997305,1
+COC(=O)C=C(OP(=O)(OC)OC)C,0.001561466365033004,0.019128379067914394,1
+CSc1ccc(cc1C)OP(=S)(OC)OC,0.001616797099077973,0.009022712543242823,1
+COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.001659247904766673,0.012452573244964524,1
+C1CCC(CC1)[Sn](n1ncnc1)(C1CCCCC1)C1CCCCC1,0.0018110419025972907,0.013846742083373052,1
+ClC1C2(Cl)C3C4C5C1(Cl)C(C2(Cl)C5C3C1C4O1)(Cl)Cl,0.0018377077252927285,0.00013082348029644925,1
+CNC(=O)CCSCCSP(=O)(OC)OC,0.001879329112916984,0.002248599256352105,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)[C@H](C(C)C)Nc1ccc(cc1Cl)C(F)(F)F,0.001988416717024977,0.011949513448740262,1
+COP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OC,0.0019946243391447357,0.016643996746722955,1
+CNC(=O)C=C(OP(=O)(OC)OC)C,0.0020164586039868883,0.01114229082863414,1
+COP(=O)(SC)N,0.0020548549325897737,0.33442367385922134,1
+CCCSP(=O)(SCCC)OCC,0.002063225311384027,0.004637753717447314,1
+COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.0021168829879502555,0.011073447351926287,1
+CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.0022052807653206367,0.01194888189741255,1
+CO[C@H]1C[C@H](O[C@H]2[C@@H](C)C=CC=C3CO[C@H]4[C@]3(O)[C@@H](C=C([C@H]4O)C)C(=O)O[C@H]3C[C@@H](CC=C2C)O[C@]2(C3)C=C[C@@H]([C@H](O2)[C@H](CC)C)C)O[C@H]([C@@H]1O[C@H]1C[C@H](OC)[C@H]([C@@H](O1)C)O)C,0.002290749011702154,0.01125080791253125,1
+O=C1CCCC(=O)C1C(=O)c1ccc(c(c1Cl)COCC(F)(F)F)S(=O)(=O)C,0.002381932321850521,0.008233311333722891,1
+S=C1NCCN1,0.0024471862937206963,0.1028640018189016,1
+OC(=O)c1ccccc1.CCC(C1OC2(C=CC1C)OC1CC=C(C)C(OC3CC(OC)C(C(O3)C)OC3CC(OC)C(C(O3)C)NC)C(C)C=CC=C3C4(C(C(=O)OC(C2)C1)C=C(C)C(C4OC3)O)O)C,0.0024795682583683147,0.0020195689898693642,1
+CSc1ccc(cc1C)OP(=S)(OC)OC,0.0025868753585247565,0.00964124005965057,1
+CSc1ccc(cc1C)OP(=S)(OC)OC,0.0025868754613179463,0.00964124005965057,1
+O[Sn](C1CCCCC1)(C1CCCCC1)C1CCCCC1,0.002596303652874617,0.20819551253516297,1
+COC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.0026615073878255148,0.001110815102353126,1
+COC1CC(OC2C(C)C=CC=C3COC4C3(O)C(C=C(C4O)C)C(=O)OC3CC(CC=C2C)OC2(C3)C=CC(C(O2)C(C)C)C)OC(C1OC1CC(OC)C(C(O1)C)NC(=O)C)C,0.0027774623197796356,0.0016355023479626871,1
+CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.002852364738724816,0.012867798419937399,1
+CCOP(=S)(OCC)SCSc1ccc(cc1)Cl,0.0029165972759564764,0.007851952924790793,1
+C1CCN2C(C1)C1CCCCN1CC2,0.002933359023382885,0.19425171774419603,1
+C1CCN2C(C1)C1CCCCN1CC2,0.002984821462389602,0.1549258324104941,1
+COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.003100456591840454,0.02511976746876867,1
+Fc1ccc(cc1)N(C(=O)COc1nnc(s1)C(F)(F)F)C(C)C,0.0033027779077186826,0.04258949053140572,1
+CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.0033630532459809582,0.01194888189741255,1
+CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.0035601567181414275,0.007466085017812861,1
+CNC(=O)Oc1cccc2c1OC(O2)(C)C,0.0035838244976124515,0.07694918518284904,1
+O[Sn](C1CCCCC1)(C1CCCCC1)C1CCCCC1,0.003608862040355308,0.2852919932509755,1
+OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.0038990829980641837,0.04457170010150825,1
+CCc1nn(c(c1Cl)C(=O)NCc1ccc(cc1)Oc1ccc(cc1)C)C,0.003907559846623587,0.09156997000962404,1
+CCCCSP(=O)(SCCCC)SCCCC,0.003974424546249488,0.08968357544503174,1
+COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.0041125115079416845,0.022792432288964708,1
+COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.004134537178254452,0.011073447351926287,1
+CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149211896481245,0.020459303699418275,1
+CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149212048673449,0.019916396281095625,1
+COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.00416761352299651,0.011073447351926287,1
+CCOP(=O)(OC(=CCl)c1ccc(cc1Cl)Cl)OCC,0.004171650398342553,0.034753601670754815,1
+CCOP(=O)(O/C(=C/Cl)/c1ccc(cc1Cl)Cl)OCC,0.004171650398342553,0.027041195785532685,1
+Clc1nc(nc(n1)Cl)Nc1ccccc1Cl,0.004173898399328111,0.12718506389486406,1
+Clc1cccc(n1)C(Cl)(Cl)Cl,0.00433075312836283,0.31356490536288883,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C(C(Br)(Br)Br)Br,0.004511229623452476,0.034342832130992346,1
+CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.004681695305160139,0.0018870676558296706,1
+CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.004686221626306353,0.01194888189741255,1
+N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)/C=C(/c1ccc(cc1)Cl)\Cl,0.004898276703964497,0.012215934999635499,1
+CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.004928609097226672,0.013579132085509897,1
+CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.004944661980269876,0.008370828170108842,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.004948543461552866,0.028969815332537945,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(C(C)C)Nc1ccc(cc1Cl)C(F)(F)F,0.004971041792562443,0.008927560449046905,1
+CCN(C(=O)C(=C(OP(=O)(OC)OC)C)Cl)CC,0.005005200069191886,0.0031029291827278202,1
+CCNc1nc(nc(n1)Cl)NC(C#N)(C)C,0.005193343612552968,0.02498552169568395,1
+CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.005201883810203027,0.0019642241382633774,1
+COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.005292207588165698,0.011073447351926287,1
+CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0054376113486863924,0.007139222358353881,1
+CCOP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OCC,0.005493362006308507,0.016722640185006663,1
+CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601647965290344,0.1536898021522977,1
+CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601648122412352,0.1562686252062576,1
+CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005603950244305859,0.008370828170108842,1
+COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.005672488506643871,0.05202092805434199,1
+OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.005938151689011985,0.05029432604141614,1
+Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.006038720639060896,0.05060086507650378,1
+Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.006341300659739408,0.03675874806038504,1
+COC(=O)Nc1nc2c([nH]1)cc(cc2)S(=O)c1ccccc1,0.006342219438128827,0.2672054856199581,1
+ClCC(N1C(=O)c2c(C1=O)cccc2)SP(=S)(OCC)OCC,0.006347661308292605,0.017422784587449506,1
+COP(=O)(SC)N,0.006377136181192296,0.33442367385922134,1
+CCNc1nc(Cl)nc(n1)NC(C)(C)C,0.006399377704543392,0.03380755287596103,1
+CCP(=S)(Sc1ccccc1)OCC,0.006414179135682054,0.005075504695397763,1
+COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.006615259485207122,0.011073447351926287,1
+CNC(=O)Oc1cc(C)c(c(c1)C)N(C)C,0.0067481385934503825,0.07218409438237827,1
+COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319575237628,0.06339245177977164,1
+COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319755914397,0.05472978349709951,1
+CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.0068777238395693234,0.01923940928152079,1
+CCNc1nc(Cl)nc(n1)NC(C)(C)C,0.006921775895097049,0.036341284639957824,1
+CCOP(=S)(SCSC(C)(C)C)OCC,0.006934077036209056,0.0034018170593390737,1
+Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.00694452873492003,0.05368556487660718,1
+CSc1ccc(cc1C)OP(=S)(OC)OC,0.007095942829084915,0.00964124005965057,1
+Cc1nn(c(c1/C=N/OCc1ccc(cc1)C(=O)OC(C)(C)C)Oc1ccccc1)C,0.0071176254993963305,0.35851467476737114,1
+O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.007126618167084564,0.00036386924695734017,1
+S=C1NCCN1,0.00724367142941326,0.1028640018189016,1
+O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.007293179580314936,0.09848470718294207,1
+Cc1nn(c(c1C=NOCc1ccc(cc1)C(=O)OC(C)(C)C)Oc1ccccc1)C,0.0073074288460468996,0.27349694800258606,1
+Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.007548400798826121,0.05192051124148076,1
+Fc1ccc(cc1)[Si](c1ccc(cc1)F)Cn1cncn1,0.007657523838454347,0.015872285245794083,1
+CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.007825509706097071,0.0135847101618635,1
+Fc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.007943029289634557,0.01679270358592001,1
+N#Cc1nn(c(c1S(=O)CC)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.008056334643428573,0.00038395497183535377,1
+COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.00813048252144793,0.023715894144774607,1
+CN(C(=S)S[Zn]SC(=S)N(C)C)C,0.00817493363915869,0.060687914488021834,1
+CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.008187766847509327,0.020367652110695248,1
+Clc1cc(cnc1Nc1c(cc(c(c1[N+](=O)[O-])Cl)C(F)(F)F)[N+](=O)[O-])C(F)(F)F,0.00821343424858256,0.043234794638137425,1
+Clc1ccc(cc1)OS(=O)(=O)c1ccc(cc1)Cl,0.008246440044818412,0.05297844739133817,1
+Clc1cc(cnc1Nc1c(cc(c(c1[N+](=O)[O-])Cl)C(F)(F)F)[N+](=O)[O-])C(F)(F)F,0.008385443694386083,0.04319062832350276,1
+COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.008508644649457775,0.023715894144774607,1
+CCOP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OCC,0.008583378006954733,0.017244476111998752,1
+[O-][N+](=O)c1cc([N+](=O)[O-])c(c(c1)[N+](=O)[O-])C,0.008805487227420639,0.102957030662423,1
+CSC(=O)c1c(nc(c(c1CC(C)C)C(=O)SC)C(F)(F)F)C(F)F,0.00904300899921393,0.11151045292283462,0.4074074074074074
+Clc1ccc(c(c1)Cl)C(Cn1cncn1)COC(C(F)F)(F)F,0.00913621053742932,0.051596683516273174,1
+CCCN(C(=O)SCc1ccccc1)CCC,0.009149216533940492,0.07936016289079004,1
+COCN(c1c(CC)cccc1CC)C(=O)CCl,0.009267253123156974,0.3058322031788613,1
+COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.009301369775521361,0.023420014304830983,1
+CON(C(=O)Nc1ccc(cc1)Cl)C,0.00931754394759366,0.04327784599272855,1
+C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.009436904951368202,0.10602678152830673,1
+COP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OC,0.009498211030948741,0.01628204746038644,1
+CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cc1F)C#N)C,0.009625729959721526,0.03360318733595453,1
+COP(=O)(OC=C(Cl)Cl)OC,0.009729574839301364,0.030641286118982685,1
+CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.009886227162529472,0.009826009376979397,1
+N#C/N=C\1/SCCN1Cc1ccc(nc1)Cl,0.009892243396986886,0.06554310510256599,1
+Cc1c(ccc(c1C1=NOCC1)S(=O)(=O)C)C(=O)c1cnn(c1O)C,0.009906758425540224,0.01953854834171629,1
+CC1(C)CNC(=NC1)NN=C(C=Cc1ccc(cc1)C(F)(F)F)C=Cc1ccc(cc1)C(F)(F)F,0.009909494556264633,0.06380780596868028,1
+CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.009924832004782804,0.017223073095094944,1
+c1scc(n1)c1nc2c([nH]1)cccc2,0.009938002763559809,0.07816579105343396,1
+OCC(CCl)O,0.009951195933270719,8.18897175780498,1
+CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.010036375840596658,0.04188056937044515,1
+CCCSP(=O)(SCCC)OCC,0.010068539755671456,0.007260057210269471,1
+FC(c1ccc(cc1)C=CC(=NN=C1NCC(CN1)(C)C)C=Cc1ccc(cc1)C(F)(F)F)(F)F,0.010111728942243584,0.10365462498638998,1
+COP(=O)(OC=C(Cl)Cl)OC,0.010408382170442241,0.024813902049782052,1
+COP(=O)(OC=C(Cl)Cl)OC,0.010408382386229365,0.024813902049782052,1
+CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.010428101697378017,0.12913282716460453,1
+CCOC(=O)Nc1cccc(c1)OC(=O)Nc1ccccc1,0.010655682947629983,0.21208880713068504,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.010688854065726137,0.027254476379320505,1
+CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.010713392485187262,0.04161641779803941,1
+O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1c(F)c(F)c(c(c1F)F)C,0.010985502766340648,0.05759405368015889,1
+CCCSP(=O)(SCCC)OCC,0.011141416681473747,0.0060229627834060846,1
+O=C(C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C)OCCON=C(C)C,0.011264301100355506,0.012923335664014797,1
+Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.011395676083924233,0.08349876089402122,1
+CNC(=O)CSP(=S)(OC)OC,0.011450010084732691,0.012794716153570413,1
+O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1cccc(c1C)c1ccccc1,0.011824026606519262,0.15137022050268478,1
+O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1cccc(c1C)c1ccccc1,0.011824026606519262,0.143812768795011,1
+CCOP(=S)(Oc1ccc(cc1)N(=O)=O)OCC,0.012016729209736626,0.018791316556032868,1
+O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.01228727229779905,0.00036386924695734017,1
+Clc1cc(Cl)c(c(c1O)Cc1c(O)c(Cl)cc(c1Cl)Cl)Cl,0.012287924553322883,0.054451504972635976,1
+N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.012410167132297197,0.06894214569190218,1
+COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.012605530348696702,0.05298278025145332,1
+Clc1ccc(c(c1)Cl)n1c(nc2c(c1=O)cc(cc2)F)n1cncn1,0.01268036889326992,0.027097912477256368,1
+CCC1CCCC(OC2CCC(C(O2)C)N(C)C)C(C)C(=O)C2=CC3C(C2CC(=O)O1)C=C(C)C1C3CC(C1)OC1OC(C)C(C(C1OC)OC)OC,0.012734890360905185,0.019305450727286316,1
+CC1C=CC=C2COC3C2(O)C(C=C(C3O)C)C(=O)OC2CC(CC=C(C1)C)OC1(C2)CCC(C(O1)C)C,0.01286229964885329,0.004182757927211324,1
+COc1ccc(cc1NNC(=O)OC(C)C)c1ccccc1,0.01298475189092086,0.7046634001865686,1
+CN1CCC(CC1)C1CCN(CC1)C,0.012988179839533329,0.07921931623605762,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)OC(F)F)C(C)C,0.013290157156772887,0.03653238913423377,1
+CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.013539867103284017,0.05900731758603697,1
+COP(=O)(NC(=O)C)SC,0.013648831720059621,0.01603420284847195,1
+C[n+]1ccc(cc1)c1cc[n+](cc1)C,0.013691057325028715,0.08721751069244073,1
+O=c1c(Cl)c(SCc2ccc(cc2)C(C)(C)C)cnn1C(C)(C)C,0.013701160159437661,0.03738986432484401,1
+CNP(=O)(Oc1ccc(cc1Cl)C(C)(C)C)OC,0.013712205220154254,0.045144130422190586,1
+NC1=C(Cl)C(=O)c2c(C1=O)cccc2,0.013920121360835688,0.9171968702966398,1
+CCN(C(=O)SCC)C1CCCCC1,0.013930451940080113,0.0635433548789227,1
+CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.0139433514779606,0.06747705209462423,1
+O=C(C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl)OCc1c(F)c(F)c(c(c1F)F)C,0.01409010160197152,0.06064324783216236,1
+CS/C(=N/OC(=O)N(SN(C(=O)O/N=C(/SC)\C)C)C)/C,0.014105593115928905,0.10857811869982233,1
+CC(c1ccccc1)(C[Sn](O[Sn](CC(c1ccccc1)(C)C)(CC(c1ccccc1)(C)C)CC(c1ccccc1)(C)C)(CC(c1ccccc1)(C)C)CC(c1ccccc1)(C)C)C,0.014249578440471417,0.10151914399441331,1
+CON(C(=O)Nc1ccc(cc1)Br)C,0.014357399945172603,0.04331377020795739,1
+CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC1CC1,0.014397200032537671,0.15891148422161222,1
+CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.01448347496337274,0.12913282716460453,1
+N#Cc1c(Cl)cccc1Cl,0.014533918736325764,0.08170904638795044,1
+ClC1CC2C(C1Cl)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.014642051620845831,0.0008687145669203877,1
+CC(c1ccc(cc1)CCOc1ncnc2c1cccc2)(C)C,0.014686613132547533,0.06701642332516593,1
+CCCCC(c1ccc(cc1Cl)Cl)(Cn1cncn1)O,0.014958135679074535,0.043118381206081816,1
+N#CC(c1ccc(cc1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C\C(=O)OC(C(F)(F)F)C(F)(F)F,0.014960133059978587,0.03591110528319521,1
+N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.015042627044387032,0.07089396189028405,1
+N#CC(c1cc(C)c(cc1Cl)NC(=O)c1cc(I)cc(c1O)I)c1ccc(cc1)Cl,0.015081279803436631,0.08223009030815089,1
+Clc1cc(cnc1CCNC(=O)c1ccccc1C(F)(F)F)C(F)(F)F,0.015124216704213374,0.04198917544443029,1
+Fc1ccc(cc1)C1(Cn2cncn2)OC1c1ccccc1Cl,0.015162725459871818,0.03914579999970951,1
+N#CN=S(=O)(C(c1ccc(nc1)C(F)(F)F)C)C,0.015292167409562457,0.046420554340929926,1
+CC(C1C2CCC1c1c2cccc1NC(=O)c1cn(nc1C(F)F)C)C,0.015302732709143212,0.07596715525228381,1
+O[Sn](C1CCCCC1)(C1CCCCC1)C1CCCCC1,0.015577821917247702,0.20819551253516297,1
+c1cc[n+]2c(c1)c1cccc[n+]1CC2,0.015794866515636753,0.10910993361405372,1
+Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.01585325164934852,0.033295448823770106,1
+ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.016019730669239306,0.09987678658093127,1
+CCCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC,0.016105987222784814,0.14199505491303127,1
+CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.01642869699075557,0.013579132085509897,1
+CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.016680921188449865,0.0507760144199629,1
+O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.01707930849394068,0.00028693011525711496,1
+CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.0171141884323489,0.013006855002501155,1
+BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185416989653705,0.048587566189001355,1
+CC(NC(=S)[S])CNC(=S)S[Zn],0.017255039351497643,0.16793286430045296,1
+CC(NC(=S)[S])CNC(=S)S[Zn],0.017255039351497643,0.25122749958492463,1
+COCC(N(c1c(C)csc1C)C(=O)CCl)C,0.018129419544573026,0.2688602203826675,1
+Fc1ccc(cc1)C1(Cn2cncn2)OC1c1ccccc1Cl,0.018195270551846183,0.04116692662855389,1
+N#Cc1c(Cl)cccc1Cl,0.0186034162597095,0.09362570930859937,1
+O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1cccc(c1C)c1ccccc1,0.018918442570430818,0.14587700628362912,1
+CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.019057288509276463,0.011024013658939326,1
+CN1CCC(CC1)C1CCN(CC1)C,0.019100264469901956,0.07921931623605762,1
+CNC(=O)ON=C(C(=O)N(C)C)SC,0.019109609238234706,0.030626742070780578,1
+CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.019396419126203733,0.04165007467236668,1
+CCc1nn(c(c1Cl)C(=O)NCc1ccc(cc1)C(C)(C)C)C,0.019469491695902355,0.021113366139126708,1
+CCO/N=C(\C1=C(O)CC(CC1=O)C1CCCSC1)/CCC,0.019664101798126703,0.07762178550268999,1
+CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.01991156926953532,0.05960631504385691,1
+OC(=O)COc1ccc(cc1C)Cl,0.019938294964743114,0.06346773142597349,1
+C[n+]1ccc(cc1)c1cc[n+](cc1)C,0.020133908207418557,0.08721751069244073,1
+N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.020248123201460456,0.15366918020134765,1
+CCP(=S)(Sc1ccccc1)OCC,0.020298035239500172,0.007022624901791193,1
+ClC=C,0.020800592400871575,0.04595842510750215,0.14285714285714285
+c1cc[n+]2c(c1)c1cccc[n+]1CC2,0.021168377697732887,0.10910993361405372,1
+Clc1cccc(c1)c1ccccc1,0.021202965065040626,0.13009825979015374,1
+CN1CN(C)CSC1=S,0.022184384932566064,0.05143501540726455,1
+CNC(=O)ON=C(C(=O)N(C)C)SC,0.022347753176858155,0.033050819536401606,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.022530984690614337,0.14263910849947523,1
+OC(=O)COc1ccc(cc1Cl)Cl,0.022620602193004043,0.04097134152841404,1
+CSC1=NC(C(=O)N1Nc1ccccc1)(C)c1ccccc1,0.022800155556897562,0.12539808663217103,1
+CNC(=O)ON=C(C(=O)N(C)C)SC,0.02280382932847922,0.027408561103509445,1
+COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.02320682656135787,0.14507030195358356,1
+CCCCCCCCCCCCC1=C(OC(=O)C)C(=O)c2c(C1=O)cccc2,0.02340650588512378,0.9676223412028344,1
+OC(COc1cccc2c1c1ccccc1[nH]2)CNC(C)C,0.023460058312320942,0.4421518817692824,1
+O=C(NC(=O)c1ccccc1Cl)Nc1ccc(cc1)OC(F)(F)F,0.023557308728421166,0.08799294295544582,1
+CCNc1nc(NCC)nc(n1)Cl,0.024794616275543167,0.023199124921492648,1
+CCOC(=O)C(Oc1ccc(cc1)Oc1nc2c(o1)cc(cc2)Cl)C,0.02487724874434851,0.02121434089596425,1
+CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.025090939601491648,0.04429754960600537,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.02510595436954169,0.14478308433451992,1
+OC(=O)C(Oc1ccc(cc1C)Cl)C,0.02562363979237584,0.021707289602534444,1
+Clc1ccccc1CC(C1(Cl)CC1)(Cn1cncn1)O,0.025625059257949535,0.06836698901333005,1
+O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.02574063309087087,0.10030552885026119,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.025749696789273527,0.13729423490475642,1
+CCNc1nc(NCC)nc(n1)Cl,0.026282293252075754,0.024182292204448317,1
+CC(OC(=O)C(c1ccc(cc1)Cl)(c1ccc(cc1)Cl)O)C,0.026531991066147967,0.07402452017139158,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl,0.026675554368592185,0.02013792188022764,1
+CCOC(=O)CCN(C(C)C)SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C,0.02679478797527864,0.046797017478298335,1
+O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.026813159469657157,0.10298053181047498,1
+O=C(C1C(C1(C)C)C=C(Cl)Cl)OCc1c(F)c(F)cc(c1F)F,0.026942980220700186,0.07895997963188385,1
+CCOC(=O)c1ccccc1C1=c2cc(C)c(cc2=[O]c2c1cc(C)c(c2)NCC)NCC,0.027053999376946393,0.558940736971508,1
+CSCC(=NOC(=O)NC)C(C)(C)C,0.027483045022449526,0.02814269771044187,1
+Cc1nn(c(c1C(=O)c1ccc(cc1S(=O)(=O)C)C(F)(F)F)O)C,0.027599589461626675,0.01939274876574121,1
+N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.02763145769616919,0.016213422691065498,1
+CCOc1cc(ccc1N(=O)=O)Oc1ccc(cc1Cl)C(F)(F)F,0.02764719470135984,0.0817358759734112,1
+[O-][N+](=O)c1cc(C(=O)N)c(c(c1)[N+](=O)[O-])C,0.027758250773633555,0.14450417327731357,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(C(F)(F)F)Cl,0.02778703580061686,0.019431401179377007,1
+OC(=O)C(Oc1ccc(cc1)Oc1nc2c(o1)cc(cc2)Cl)C,0.028167056356499628,0.01657291399146401,1
+CSC(=NOC(=O)N(SN(C(=O)ON=C(SC)C)C)C)C,0.02821118623185781,0.06462052763667345,1
+CS/C(=N/OC(=O)N(SN(C(=O)O/N=C(/SC)\C)C)C)/C,0.02821118623185781,0.10857811869982233,1
+CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.028227806467376604,0.07944134555128374,1
+COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.02836244328456758,0.055364745981801994,1
+CC(N1C(=NC(C)(C)C)SCN(C1=O)c1ccccc1)C,0.02848365588181601,0.05869546609799135,1
+CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.028523647387248163,0.012656906925039336,1
+N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.028782768433509572,0.015746986073988292,1
+CCO[C@H]1[C@@H](OC)[C@H](O[C@@H]2C[C@@H]3[C@@H](C2)[C@@H]2C=C4[C@H]([C@@H]2CC3)CC(=O)O[C@@H](CC)CCC[C@@H]([C@H](C4=O)C)O[C@H]2CC[C@@H]([C@H](O2)C)N(C)C)O[C@H]([C@@H]1OC)C,0.028877084613265123,0.0022907490117021535,0.20238095238095238
+N#Cc1cc(Br)c(c(c1)Br)O,0.028889958940868102,0.036947076319497126,1
+CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.029112705155716945,0.13407963594673467,1
+C#CCOC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1F)Cl)C,0.029164453292198207,0.015479254307699006,1
+Fc1ccc(c(c1)c1ccc(c(c1)Cl)Cl)NC(=O)c1cn(nc1C(F)F)C,0.02921233570136655,0.07619243803311933,1
+CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.02946182933426497,0.05714513450842352,1
+CC(N1/C(=N/C(C)(C)C)/SCN(C1=O)c1ccccc1)C,0.029465850912223458,0.05807349906709352,1
+Nc1n[nH]cn1,0.029733601205328832,0.042049662082769036,0.2727272727272727
+COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.030123726579706293,0.7349282276874579,1
+Clc1c(ccc(c1N)[N+](=O)[O-])Oc1ccccc1,0.030226952270055448,0.14853625939827544,1
+CN(C(=S)SSC(=S)N(C)C)C,0.03036190470594063,0.052029910797683425,1
+OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.030365547751564796,0.04860032343819596,1
+CCNc1nc(NC(C)C)nc(n1)Cl,0.03036860603978392,0.04120360138919732,1
+COc1nc(Oc2cccc(c2C(=O)[O-])Oc2nc(OC)cc(n2)OC)nc(c1)OC.[Na+],0.030507347552487064,0.5473869319708509,1
+N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.03073999756536866,0.0168735264811073,1
+CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.03138138916099924,0.026593616679335002,1
+COc1nc(C)nc(n1)N(C(=O)NS(=O)(=O)c1ccccc1C(=O)OC)C,0.031614325062739264,0.24608427091801371,1
+Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.03170650329869704,0.03561255473876881,1
+Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.03185425122707191,0.04655895302624533,1
+Cc1ccc2c(c1)nc1c(n2)sc(=O)s1,0.03201059303080734,0.07026663535161724,1
+CCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC(=C)C,0.03210675757919814,0.09640323913286399,1
+Fc1ccc(cc1)NC(=O)c1cccc(n1)Oc1cccc(c1)C(F)(F)F,0.032154821211279785,0.07351158254926493,1
+CCCn1c(OCCC)nc2c(c1=O)cc(cc2)I,0.03224060518839999,0.11241236083791278,0.10810810810810811
+CC(C(=O)O)Oc1cc(Cl)c(cc1Cl)Cl,0.03228091610123117,0.028342965522937247,1
+COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1ncccc1C(F)(F)F,0.0326520524201809,0.42156125408141487,1
+CN1CN(C)CSC1=S,0.03266034652463028,0.05143501540726455,1
+CCC1CCCC(OC2CCC(C(O2)C)N(C)C)C(C)C(=O)C2C(CC(=O)O1)C1CCC3C(C1C2)CC(C3)OC1CC(C)C(C(C1OC)OC)OC,0.03269690443692089,0.12862257019212,1
+N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.03315774835702259,0.017722336281145168,1
+CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.03318543029523152,0.18996129431743372,1
+Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.03331771398901528,0.04900048298881555,1
+CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.03336499327732185,0.04693645636437956,1
+N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.03374687200243409,0.16793514728653292,1
+CS/C(=N/OC(=O)N(SN(C(=O)O/N=C(/SC)\C)C)C)/C,0.03385342347822937,0.10857811869982233,1
+Fc1ccc(cc1)C(c1ccccc1F)(Cn1cncn1)O,0.03385434330908588,0.03999277860738707,1
+CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.033936422812922216,0.08410265973865252,1
+CN(C=Nc1ccc(cc1C)C)C=Nc1ccc(cc1C)C,0.03408246361134649,0.06779996390642057,1
+CN(/C=N/c1ccc(cc1C)C)/C=N/c1ccc(cc1C)C,0.03408246361134649,0.056616345813375844,1
+CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.03428271152063386,0.07913702747112562,1
+ClC(C(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl)Cl,0.034377949341570596,0.32963878644864847,0.42857142857142855
+CN(C=Nc1ccc(cc1C)C)C=Nc1ccc(cc1C)C,0.034764112883573416,0.06034262025387581,1
+ClC(=CCOc1cc(Cl)c(c(c1)Cl)OCCCOc1ccc(cn1)C(F)(F)F)Cl,0.034818667907167616,0.030963599852862136,1
+O=C(C1=C(C)OCCS1)Nc1ccccc1,0.034848813981213346,0.23922425558098437,1
+CCCSP(=S)(Oc1ccc(cc1)SC)OCC,0.03566479582586673,0.0028959670740657155,1
+CCC(C(=O)OC1=C(C(=O)OC21CCCCC2)c1ccc(cc1Cl)Cl)(C)C,0.03578732146400678,0.05428588647784607,1
+CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.03634528529867737,0.0681906147697324,1
+N#CC(c1c(Cl)ccc(c1Cl)n1ncc(=O)[nH]c1=O)c1ccc(cc1)Cl,0.03679735812631385,0.04692561833297489,1
+CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.036799624938222635,0.047490155350518225,1
+C#CCOC(c1ccc(cc1)Cl)C(=O)NCCc1ccc(c(c1)OC)OCC#C,0.0369041241749624,0.14271941307426342,1
+CC(Cc1ccccc1)N,0.036980547196719206,0.1383071475607304,1
+OC(C(C)(C)C)C(n1ncnc1)Cc1ccc(cc1)Cl,0.03744148066760202,0.06917897069352194,1
+CCN(c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O)CC(=C)C,0.0375078950368263,0.14694818194557713,1
+Clc1c(O)c(Cl)c(c(c1Cl)Cl)Cl,0.037546481605565646,0.2687854579582351,1
+CC(OP(=S)(OC(C)C)SCCNS(=O)(=O)c1ccccc1)C,0.03773457509937652,0.003947387860828739,1
+Clc1ccc(cc1)c1ccccc1NC(=O)c1cccnc1Cl,0.03787805062535496,0.15771531798304803,1
+CCOC(=O)CSc1nc(nn1C(=O)N(C)C)C(C)(C)C,0.03816748004747272,0.12937755361522535,1
+OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.03828744186371015,0.017916585548049518,1
+CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.038746408312020406,0.025608931320986156,1
+OC(=O)COc1cc(Cl)c(cc1Cl)Cl,0.03914162418169542,0.10092531552039558,1
+CCOP(=S)(Oc1nn(c(n1)Cl)C(C)C)OCC,0.039841737145637234,0.009533681952914956,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(cc1Cl)OC(C(C(F)(F)F)F)(F)F,0.03990998658130422,0.09428444360947194,1
+O=C(CC(C)(C)C)OC1=C(C(=O)OC21CCCC2)c1c(C)cc(cc1C)C,0.039947970982482275,0.06756139796521358,1
+CNC(=S)S,0.04011276528748593,0.055889921503454794,1
+CCCN(c1c(cc(cc1[N+](=O)[O-])S(=O)(=O)N)[N+](=O)[O-])CCC,0.04042042788372036,0.14423663839522555,1
+CCCOC/C(=N\c1ccc(cc1C(F)(F)F)Cl)/n1cncc1,0.04049199977868229,0.013123381063449912,1
+OC1(Cn2ncnc2)C(CCC1(C)C)Cc1ccc(cc1)Cl,0.04095937862019833,0.0520426556409229,1
+CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04102878665011248,0.03101114141686048,1
+CCN(C(=O)C(=C(OP(=O)(OC)OC)C)Cl)CC,0.041042640567373466,0.005254440579764089,1
+CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.041269285481015994,0.10474606679531039,1
+CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.041276958181115306,0.02137860199550009,1
+CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.04157699893895499,0.017607383250018305,1
+Fc1ccc(c(c1)c1ccc(c(c1)Cl)Cl)NC(=O)c1cn(nc1C(F)F)C,0.04200781934177246,0.08272377649064482,1
+CN(C(=S)S[Zn]SC(=S)N(C)C)C,0.04250965492362519,0.060687914488021834,1
+Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.042646674541424644,0.04484975320989537,1
+CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04266993811611698,0.03101114141686048,1
+C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.04279938325518071,0.08305408428735889,1
+O=C(CC(C)(C)C)OC1=C(C(=O)OC21CCCC2)c1c(C)cc(cc1C)C,0.042917075351131324,0.11500417228402214,1
+ClC(C(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl)Cl,0.04297243667696324,0.32963878644864847,0.42857142857142855
+S=C1NCCCN1,0.04303491887745652,0.013843826474382414,1
+OC1(Cn2ncnc2)C(CCC1(C)C)Cc1ccc(cc1)Cl,0.043148047046675374,0.05035795195547215,1
+CC(Cc1ccc(cc1)C(C)(C)C)CN1CCCCC1,0.04326105065224025,0.008362846917186934,1
+CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.04432099700732809,0.12785635803715648,1
+Nc1ncn[nH]1,0.04460040180799325,0.042049662082769036,0.2727272727272727
+[S-]C(=S)NCCNC(=S)[S-].[Zn+2],0.04460661819584039,0.03222668255774988,1
+O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.044873074905021335,0.11984363339466098,1
+CCSC(=O)N1CCCCCC1,0.04487396262663614,0.0674393124471613,1
+CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.04519647299825149,0.01978012974291905,1
+O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.045407278177700156,0.12814432090874833,1
+ClCC=CCl,0.045958425107502164,0.020800592400871572,0.14285714285714285
+CCOC(=O)Cn1c(=O)sc2c1c(Cl)ccc2,0.046003238627999404,0.13549038572812314,1
+Nc1ccc(cc1)Cl,0.047032433723070206,0.18361498193404419,1
+CCCN(C(=O)SCC)CCC,0.047538995974292175,0.03231666295357693,1
+C[n+]1ccc(cc1)c1cc[n+](cc1)C,0.04762340359884257,0.08721751069244073,1
+CN(C(=S)SSC(=S)N(C)C)C,0.04783039657471141,0.03890835518730275,1
+S=C1NCCCN1,0.04819910832192538,0.013843826474382414,1
+CON(C(=O)Nc1ccc(cc1)Br)C,0.048243951057630914,0.04331377020795739,1
+Cc1cccc(c1O)C,0.04911414454620167,0.25122554015626974,1
+CCC(C(=O)NCc1ccccc1)Oc1ccc(c(c1)C(F)(F)F)F,0.049813316199071624,0.07230599259525354,1
+O=C(c1cccc(c1C(=O)NC(CS(=O)(=O)C)(C)C)I)Nc1ccc(cc1C)C(C(F)(F)F)(C(F)(F)F)F,0.04982487508940451,0.1722962430290261,1
+CN(C(=S)SSC(=S)N(C)C)C,0.04990997903448147,0.03027061366730548,1
+COC(=O)Nc1nc2c([nH]1)cc(cc2)Sc1ccccc1,0.050108966959550236,0.19492200103766277,1
+ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.05030195369030707,0.10208585777687137,1
+C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.05047450068604942,0.02261325719862112,1
+CCSC(CC1CC(=O)C(C(=O)C1)C(=NOCC)CCC)C,0.05056765552287047,0.11190017893703338,1
+C#CCN1C(=O)COc2c1cc(c(c2)F)N1C(=O)C2=C(C1=O)CCCC2,0.05079984353648191,0.053941765550951655,0.11764705882352941
+CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.051618595485714625,0.03602454025477674,1
+CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.05161859628615915,0.03602454025477674,1
+Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.05165383561566402,0.054611602947941554,1
+CNC(=O)Oc1cc(C)c(c(c1)C)C,0.05174850433885335,0.0778264984093653,1
+Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.051834835094095484,0.07395889559292834,1
+CNC(=O)Oc1cccc(c1)/N=C/N(C)C,0.051976062085632144,0.06722824407685239,1
+CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.0524579222415799,0.03602454025477674,1
+O=N(=O)c1ccc(c(c1)N)C,0.05257947683683445,0.22402691528565083,1
+O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.05279126047017867,0.09049118545197148,1
+CC(C(c1ccc(cc1)Cl)(Cn1ncnc1)O)C1CC1,0.05326004956767166,0.03794754117008268,1
+O=C(N/C(=N\OCC1CC1)/c1c(F)c(F)ccc1C(F)(F)F)Cc1ccccc1,0.053352320292409515,0.0645264280345271,1
+NC(=NCCCCCCCCNCCCCCCCCN=C(N)N)N,0.053436074592710235,0.05644161644026265,1
+C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.05350296944357954,0.019176634380673414,1
+COCN(c1c(CC)cccc1CC)C(=O)CCl,0.05375006811431045,0.30362184184365276,1
+C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.05492821614526029,0.15407527700390677,1
+CNC1=C(c2cccc(c2)C(F)(F)F)C(=O)C(O1)c1ccccc1,0.055204779037407746,0.186848969258529,1
+CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.05522147585284508,0.20461649895602257,1
+O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.05566064749641608,0.028872253041882734,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.05566320606558952,0.026489557558876053,1
+CCOC(=O)COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.05583516191627437,0.10425225781099558,1
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.05590140200157206,0.24111855236121182,1
+COc1nc(nc(n1)C)NC(=O)[N-]S(=O)(=O)c1cc(I)ccc1C(=O)OC.[Na+],0.05611797964648073,0.21978363812693485,1
+O=C(C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C)OCCON=C(C)C,0.05632150550177753,0.0126188830300113,1
+N#CC(c1c(Cl)ccc(c1Cl)n1ncc(=O)[nH]c1=O)c1ccc(cc1)Cl,0.056422615793681234,0.047374255673794156,1
+CNC(=O)Oc1cccc(c1)N=CN(C)C,0.056495719658295813,0.06685758904726949,1
+CCOC(=O)C(c1ccc(cc1)Cl)(c1ccc(cc1)Cl)O,0.056582904287311254,0.07321730385685384,1
+Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.05706818624978773,0.08823825917757558,1
+Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.05706818876652619,0.07955524614517077,1
+CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.05707983190600125,0.06413029578988182,1
+CCOCCN(C(=C(C)C)c1ccccc1)C(=O)CCl,0.057470413386035736,0.8644625103771973,1
+CN(C(CN1c2ccccc2Sc2c1cccc2)C)C,0.058364575374860554,0.0733243982471679,1
+CCOC(=O)CCN(C(C)C)SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C,0.05846135558242613,0.047500388629521745,1
+CC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Oc1ccc(cc1)Cl,0.05932124091140686,0.07528051740350367,1
+Nc1n[nH]cn1,0.059467202410657664,0.042049662082769036,0.2727272727272727
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.05953797389131243,0.01695755720477578,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.06009909138187043,0.028551020286732454,1
+CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.060497742776698574,0.09289647796203974,1
+CSc1nnc(c(=O)n1N)C(C)(C)C,0.060666030886662975,0.013701160159437665,0.11538461538461539
+CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.06073132568962639,0.09289647796203974,1
+C=CC=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C.C=CC=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C(=O)OC)C,0.06134969850332702,0.18835039966107628,1
+C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.0616311129759049,0.06290623779987833,1
+CNC(=O)ON=C(SC)C,0.061648442359631114,0.020019468411940664,1
+CNc1cnn(c(=O)c1Cl)c1cccc(c1)C(F)(F)F,0.06174515112035177,0.04547266109787749,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.062106180868884746,0.06886154953419707,1
+COC(=O)c1cccc(c1S(=O)(=O)NC(=O)Nc1nc(OCC(F)(F)F)nc(n1)N(C)C)C,0.062140866929396014,0.23793955635016453,1
+CCNc1nc(SC)nc(n1)NC(C)(C)C,0.06214876624755196,0.03210078098954097,1
+[O-][N+](=O)c1cc(cc(c1)[N+](=O)[O-])[N+](=O)[O-],0.06245761469536169,0.12314474995874793,1
+Clc1cc(F)c(cc1C(=O)NS(=O)(=O)N(C(C)C)C)n1c(=O)cc(n(c1=O)C)C(F)(F)F,0.06269313377509025,0.029112705155716952,0.10416666666666667
+COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.06302765174348351,0.052741117655533944,1
+C=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.06332505687516009,0.15319090542803218,1
+ClCC(=O)N(c1c(C)cccc1C)Cn1cccn1,0.06336648858092589,0.2009430958791048,1
+CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0636200517424888,0.007268854919151466,1
+ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.06389160712181856,0.05152334138082678,1
+O=C(NS(=O)(=O)c1c(C)cccc1C(=O)O)Nc1nc(OCC(F)(F)F)nc(n1)N(C)C,0.06396281173215432,0.19918021134354338,1
+CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.06423944765895072,0.6484736290157593,1
+OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.06424027322808253,0.06532519308478073,1
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccsc1C(=O)OC,0.06453419527613821,0.17678677136859147,1
+O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.06459882942614491,0.05921475394254172,1
+O=C(N(C)C)Nc1ccc(c(c1)Cl)n1nc(oc1=O)C(C)(C)C,0.06493710428214157,0.015942173623299506,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06507320207279278,0.05286631638058653,1
+CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.06546156290207059,0.04247781166837751,1
+CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.06569530810416269,0.09306700536140901,1
+Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.06597478470118634,0.0852179108472748,1
+[O-][N+](=O)NC1=NCCN1Cc1ccc(nc1)Cl,0.0664943030028045,0.0767295442612898,1
+CSc1nnc(c(=O)n1N)C(C)(C)C,0.06719929397120725,0.013701160159437665,0.11538461538461539
+CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.06773123883198195,0.13345614868935765,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06822190749765324,0.05286631638058653,1
+CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.0690593023384914,0.11750268224676094,1
+CCO/N=C(/C1=C(O)CC(CC1=O)c1c(C)cc(cc1C)C)\CC,0.06981686853252955,0.10822472760447657,1
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.06987675250196507,0.25518897842008476,1
+CSc1nnc(c(=O)n1N)C(C)(C)C,0.06999926640768805,0.013701160159437665,0.11538461538461539
+CO/N=C(/c1ccccc1COc1cc(C)ccc1C)\C(=O)NC,0.07046793589427701,0.35915435645836064,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.07154653735936956,0.02658541539960583,1
+COc1cc(OC)n2c(n1)nc(n2)S(=O)(=O)Nc1c(Cl)ccc(c1Cl)C,0.07172655770478076,0.20222775129603407,1
+C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.07190296604559293,0.06217697904984631,1
+CCN1CCN(CC1)c1cc2c(cc1F)c(=O)c(cn2C1CC1)C(=O)O,0.07234386441112595,0.37000179744555806,1
+CO/N=C(\c1ccccc1CO/N=C(/c1cccc(c1)C(F)(F)F)\C)/C(=O)OC,0.07272797449373557,0.31053460388440923,1
+CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.07305234130123987,0.03935031057771622,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.07306609422899836,0.07390850442771738,1
+O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.07395704796137248,0.05990809332390995,1
+OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.07409262028018154,0.16009035449920442,1
+ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.07465930346752149,0.16636976770957124,1
+CCCSc1ccc2c(c1)[nH]c(n2)NC(=O)OC,0.07537743365466734,0.3352032975296077,1
+Cn1nc(c(c1)C(=O)Nc1cccc2c1C1CCC2C1=C(Cl)Cl)C(F)F,0.07583481070072216,0.055676398132991405,1
+Cn1cc(c2cccc(c2)C(F)(F)F)c(=O)c(c1)c1ccccc1,0.07591497971688389,0.09032237501386235,1
+Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.0760257762657501,0.06594162430630518,1
+CNC(=O)Oc1cccc2c1cccc2,0.07752660703214034,0.12181893049236775,1
+N#C/N=C(/N(Cc1ccc(nc1)Cl)C)\C,0.07859017665904088,0.02837256498884179,1
+CC(c1cc(ccc1O)C(c1ccc(c(c1)C(C)C)O)(C)C)C,0.08001387248515598,0.4789486600597755,1
+NC(=N)NCCCCCCCCNC(=N)N,0.08102032708037427,0.2959746109899016,1
+CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.08196801536106943,0.21672697643629135,1
+Clc1cc(cnc1CNC(=O)c1c(Cl)cccc1Cl)C(F)(F)F,0.08212099927021806,0.017595171727997856,1
+O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08272375649019124,0.0611989580609177,1
+ClCCP(=O)(O)O,0.08304843107672291,2.424380344082731,1
+COC(=O)Nc1cccc(c1)OC(=O)Nc1cccc(c1)C,0.0832475217878744,0.15841056524633793,1
+COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.08332310268057162,0.011938651697814882,1
+CCCN(c1c(cc(c(c1[N+](=O)[O-])N)C(F)(F)F)[N+](=O)[O-])CCC,0.08392957349588569,0.17738314568013785,1
+ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.08430066662269543,0.21959456105712238,1
+O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08510674803234901,0.06216876566048836,1
+CCO/N=C(\C1=C(O)CC(CC1=O)C1CCCSC1)/CCC,0.08603044408485085,0.08133431394836685,1
+CCCC(=C1C(=O)CC(CC1=O)C1CCCSC1)NOCC,0.08603044408485085,0.029262026512538307,1
+CCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC(=C)C,0.08701831648543702,0.10401259406145555,1
+COc1cc(ccc1OC)/C(=C/C(=O)N1CCOCC1)/c1ccc(cc1)Cl,0.08766124641710438,0.1664019969917766,1
+CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.08778355070659401,0.02087282021134488,1
+CC(=O)Nc1cc(NS(=O)(=O)C(F)(F)F)c(cc1C)C,0.08894826507859208,1.3357335474250853,1
+CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,0.08947770521301585,0.09458038139224523,1
+COCC(=O)Nc1cc(ccc1NC(=NC(=O)OC)NC(=O)OC)Sc1ccccc1,0.08959030532555236,0.15753779629122847,1
+O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.08969617860069455,0.06602590336420336,1
+Nc1nc(NC2CC2)nc(n1)N,0.09026150563412319,0.10982169517930987,1
+ClC(C(c1ccc(cc1)Cl)c1ccc(cc1)Cl)(Cl)Cl,0.09027148189044054,0.042908725101945995,1
+Fc1ccc(cc1)C(=O)CCCN1CCN(CC1)c1ccccn1,0.09163218547527233,0.11110252941847366,1
+CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.09170952329114665,0.11334998639980184,1
+N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.09203781459712614,0.04994324105131357,1
+Clc1ccc2c(c1)ncc(n2)Oc1ccc(cc1)OC(C(=O)OCC1CCCO1)C,0.09210345974638111,0.0127703526163709,1
+NC(=O)c1c(Cl)cccc1Cl,0.09261856560930491,0.11495532711221508,1
+COc1cc(ccc1OC)/C(=C/C(=O)N1CCOCC1)/c1ccc(cc1)Cl,0.09281779032399287,0.1584749483368686,1
+Clc1ccc(c(c1)Cl)NC(=O)C1(CC1)C(=O)O,0.09303171987631087,0.10818304574151816,1
+COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.09345959256991566,0.23311870284543604,1
+Clc1cc(Cl)cc(c1)C1(CO1)CC(Cl)(Cl)Cl,0.09362507489225783,0.0431096794497545,1
+OC(=O)COc1ccc(cc1C)Cl,0.0947069010825298,0.0596687595644816,1
+Cc1nnc(c(=O)n1N)c1ccccc1,0.09643315995145703,0.16643101472065308,1
+[O-][N+](=O)/N=C/1\NCCN1Cc1ccc(nc1)Cl,0.0973945952590747,0.11879102876531308,1
+Nc1ccc(cc1)Cl,0.09798423692306293,0.18636555783036246,1
+Cn1cc(c2cccc(c2)C(F)(F)F)c(=O)c(c1)c1ccccc1,0.09868947363194906,0.09346150569723749,1
+COc1c(OC)cc(c(c1OC)C(=O)c1c(OC)ncc(c1C)Cl)C,0.09950572862076837,0.5022089453626634,1
+NC(=N)NCCCCCCCCCCCCOC(=O)C,0.10160268068512719,0.3976348891342203,1
+OC1CC2(O)CC(O)C(C(O2)(C)CC(C=CC=CC=CC=CCC(OC(=O)C=CC2C(C1)(C)O2)C)OC1(C)OC(C)C(C(C1O)N)O)C(=O)O,0.10172294366080416,0.3945818999004983,1
+COc1ncc(c2n1nc(n2)NS(=O)(=O)c1c(cccc1C(F)(F)F)OCC(F)F)OC,0.1034404543369562,0.17566048716063784,1
+N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.10391366164191661,0.048327010230331986,1
+CCCN(c1c(cc(cc1[N+](=O)[O-])S(=O)(=O)N)[N+](=O)[O-])CCC,0.10393824312956665,0.1243653010335676,1
+COCc1c(F)c(F)c(c(c1F)F)COC(=O)C1C(C1(C)C)/C=C/C,0.10573252781458294,0.07439706043879453,1
+[O-][N+](=O)c1cnc(n1C)C,0.10628650675790867,0.19401311372646204,1
+CC(N(c1c(cc(cc1N(=O)=O)S(=O)(=O)N)N(=O)=O)C(C)C)C,0.10642121227099519,0.21225805280325383,1
+CCOC(=O)C(OC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F)C,0.10827828411229923,0.06648436472564069,1
+CCOC(=O)C(OC(=O)c1cc(ccc1N(=O)=O)Oc1cc(ccc1Cl)C(F)(F)F)C,0.10827828411229923,0.061062681077149544,1
+S=C1NCCNC(=S)S[Mn+2]S1,0.10855557507359069,0.017281371487307694,1
+ClCC(=O)N(c1ccccc1)C(C)C,0.10865048725491992,0.09496861063144992,1
+CCNc1nc(NC(C)C)nc(n1)Cl,0.10941971287651023,0.043173870512644,1
+CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.11058877880543937,0.04678204170857044,1
+COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045196043953,0.037382248593410286,1
+COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045388522976,0.03983577416044405,1
+Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.11395676083924232,0.0793295655760358,1
+Oc1ccc(c(c1)C)C,0.1145996706078039,0.11410846546983064,1
+N#Cc1c(N)nc(nc1N)NC1CC1,0.11566455596376966,0.06515502205741146,1
+O=C(c1cccc(c1C(=O)NC(CS(=O)(=O)C)(C)C)I)Nc1ccc(cc1C)C(C(F)(F)F)(C(F)(F)F)F,0.115769562707734,0.1353589335978793,1
+CCNc1nc(NC(C)C)nc(n1)Cl,0.11591071091933607,0.043902127532966045,1
+COc1c(OC)cc(c(c1OC)C(=O)c1c(OC)ncc(c1C)Cl)C,0.11727460798675288,0.49714109722368416,1
+CNC(=O)O/N=C(\SC)/C,0.11836501403389492,0.027383715681877,1
+CCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])Cc1c(F)cccc1Cl,0.1185590456888386,0.09933213815884388,1
+Nc1ccc(cc1)S(=O)(=O)Nc1nc(C)cc(n1)C,0.1185642260256668,0.3465603274803372,1
+CCNC(=O)NC(=O)/C(=N\OC)/C#N,0.11857948837239812,0.15289185096526228,1
+CC(N(c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O)C(C)C)C,0.1193036069506878,0.06759688882241817,1
+COc1cc(ccc1OC)C(=CC(=O)N1CCOCC1)c1ccc(cc1)Cl,0.11937399144446861,0.1639515356772961,1
+CCCCc1c(=O)nc([nH]c1C)NCC,0.1194525860672606,0.13288110687759513,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.12010651237688001,0.01578324760281871,1
+CNC(=O)ON=C(SC)C,0.12329688471926223,0.024637179457617557,1
+CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.1242747128033579,0.022377478984048814,1
+c1coc(c1)c1nc2c([nH]1)cccc2,0.12486833177320307,0.06283286902314578,1
+CO/C=C(\c1ccccc1COc1cccc(n1)C(F)(F)F)/C(=O)OC,0.1252316956521325,0.17628948629366487,1
+CN(C(=O)C(c1ccccc1)c1ccccc1)C,0.1253592168358431,0.11980652421596152,1
+CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.12545469800745823,0.042874763652812176,1
+O=C(C1=C(C)OCCS1)Nc1ccccc1,0.1274956638724717,0.22358836955729258,1
+ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.12778321424363712,0.042877873409045086,1
+O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.12919765885228982,0.05872275009746561,1
+CC(N(c1c(cc(cc1N(=O)=O)S(=O)(=O)N)N(=O)=O)C(C)C)C,0.12992280391195832,0.21225805280325383,1
+[O-][N+](=O)/N=C(\NC)/NCc1cnc(s1)Cl,0.13016764551401042,0.0664943030028045,0.10344827586206896
+CCCN(C(=O)SCC)CCC,0.13205276659525605,0.025649453338359905,1
+C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.13459866849613178,0.020586813499091246,1
+OC(C(C)(C)C)C(=Cc1ccc(cc1)Cl)n1ncnc1,0.13506940531624406,0.09513382772182304,1
+CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.13618183361575933,0.8946497962290012,1
+O=C(Nc1cnns1)Nc1ccccc1,0.13620822278144273,0.08999508813305773,1
+CCCCCCCCc1cc(N(=O)=O)c(c(c1)N(=O)=O)OC(=O)C=CC,0.1372145060102149,0.2884147588603236,1
+CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.13731668655832788,0.36134549162412616,1
+Cc1cccc2c1n1cnnc1s2,0.1373938645607217,0.4649462320529616,1
+CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.13747135609511818,0.08194730919474813,1
+CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.13801406108477293,0.02334539398625156,1
+CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.13932359364492994,0.07115407630403497,1
+CN1CC2CC1CN2c1cc2c(cc1F)c(=O)c(cn2C1CC1)C(=O)O,0.13990757146198934,0.2090948708768445,0.5862068965517241
+OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.1403669879303106,0.04327585927626025,1
+CC(OC(=O)Nc1cccc(c1)Cl)C,0.14040948460452124,0.09113750740767616,1
+COC(=O)C(N(c1c(C)cccc1C)C(=O)Cc1ccccc1)C,0.14136381415796706,0.2135515487520386,1
+C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.1413788142745837,0.10756688196876787,1
+COc1nc(C)nc(n1)N(C(=O)NS(=O)(=O)c1ccccc1C(=O)O)C,0.14421924681891674,0.26729546067178,1
+ClC(=C)Cl,0.14441434207714035,0.010177007878307786,0.1
+Clc1ccccc1CC(C1(Cl)CC1)(Cn1nc[nH]c1=S)O,0.1452393166315865,0.04660628941940032,1
+CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.14566407168203882,0.4056883000934699,1
+CC(=O)O.CCCCCCCCCCCCNC(=N)N,0.1461167287581588,0.2239202257852629,1
+CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.14653013191720715,0.05205345386490815,1
+CON=C(c1ccccc1CON=C(c1cccc(c1)C(F)(F)F)C)C(=O)OC,0.14692519722320194,0.2857188922342546,1
+c1ccc(cc1)Nc1ccccc1,0.14773454395291782,0.20378273649665343,1
+COC(CCCC(CC=CC(=CC(=O)OC(C)C)C)C)(C)C,0.14816176662421726,0.7514525775875477,1
+c1scc(n1)c1nc2c([nH]1)cccc2,0.1490700414533971,0.04999034581341369,1
+CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.14982590230152565,0.48891072826169246,1
+N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.15042627044387033,0.07089396189028405,1
+ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.15174119992085178,0.24009429524650522,1
+CON=C(c1ccc(cc1Cl)Cl)Cc1cccnc1,0.15245767876475944,0.07528095892566167,1
+CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,0.15252975563710267,0.22465071517314014,1
+CCNC(=O)NC(=O)C(=NOC)C#N,0.15289185096526225,0.13464708317401125,1
+Clc1ccc(c(c1)Cl)C=C(C(C(C)(C)C)O)n1cncn1,0.15327033840680634,0.08718428281810346,1
+COC=C(c1ccccc1Oc1ncnc(c1)Oc1ccccc1C#N)C(=O)OC,0.15431812608561873,0.18865576740539436,1
+COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.15479245019392282,0.23311870284543604,1
+OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.15527684755838006,0.019604411057235267,1
+COP(=S)(Oc1cc(Cl)c(cc1Cl)Cl)OC,0.15549919159080278,0.014323208545850756,1
+Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.15801925188118618,0.19454416629038565,1
+CCOC(=O)CN(c1c(CC)cccc1CC)C(=O)CCl,0.1603572605822803,0.18455943460404955,1
+CC(OC(=O)Nc1ccccc1)C,0.16181616210899355,0.4576296267631658,1
+Cc1cccc2c1n1cnnc1s2,0.16381576159162972,0.4344528846227351,1
+C#CC(NC(=O)c1cc(Cl)cc(c1)Cl)(C)C,0.16593276232681306,0.06064270880188062,1
+CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.16647322477947293,0.41085861196208356,1
+CNc1cnn(c(=O)c1Cl)c1cccc(c1)C(F)(F)F,0.1687700797289615,0.04724600485885422,1
+O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.16893203350457175,0.0828914722158967,1
+CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.16929970598735858,0.07890908052701984,1
+Cc1ccc(c2c1cccc2)C,0.1728291127183792,0.12707622740780478,1
+Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.1730416993562668,0.024541035827570765,1
+COC(=O)c1ccc(cc1C1=NC(C(=O)N1)(C)C(C)C)C,0.1734054330003024,0.32766976057445574,1
+CNC(=O)N(c1nnc(s1)C(C)(C)C)C,0.1751969016077557,0.15654611790372291,0.4
+CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.17526912017369997,0.25559916952159206,1
+CCCCCCCCc1cc(N(=O)=O)c(c(c1)N(=O)=O)OC(=O)C=CC,0.17563456769307506,0.2542481616501888,1
+CCCCCCC(c1cc(cc(c1OC(=O)/C=C/C)[N+](=O)[O-])[N+](=O)[O-])C,0.17563456769307506,0.26463567647786024,1
+CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.17607780933998252,0.08322210554037121,1
+CCCCCCCCCC[N+](CCCCCCCCCC)(C)C.[Cl-],0.1767583631976715,0.777859424387322,1
+CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.17813968959673715,0.15669364902191532,1
+Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.17867678986550448,0.05298126176351619,1
+N#Cc1cc(C)c(c(c1)C(=O)NC)NC(=O)c1cc(nn1c1ncccc1Cl)Br,0.17901230859828976,0.02537036919235666,1
+COCC(=O)N(c1c(C)cccc1C)N1CCOC1=O,0.17965983350851364,0.11870027623897902,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.18015976856532,0.014790752702768572,1
+CC1=NNC(=O)N(C1)/N=C/c1cccnc1,0.18091653347462547,0.6560215843907876,1
+c1ccc(cc1)Nc1ccccc1,0.1831908345016181,0.2623047551120752,1
+CN1CN(C)CSC1=S,0.18486987933542975,0.02964017440587401,1
+CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.18534506246313948,0.0688130158812099,1
+O=N(=O)c1ccc(c(c1)N(=O)=O)C,0.1866762157041476,0.11487634907046192,1
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.19051986050321804,0.13616466229510565,1
+COP(=O)(NC(=O)C)SC,0.1910836440808347,0.01603420284847195,1
+N#CCNC(=O)c1cnccc1C(F)(F)F,0.19244308898713228,0.4221312557184933,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.19351406453661255,0.057013512580576416,1
+CCCCCCC(c1cc(cc(c1OC(=O)/C=C\C)[N+](=O)[O-])[N+](=O)[O-])C,0.19484459853450517,0.16294718993200522,1
+OC1CN(C(=O)N1c1nnc(s1)C(C)(C)C)C,0.19506513302817866,0.17519690160775567,0.4
+OC(=O)C(Cl)(Cl)C,0.1970361896096669,0.42676977936996974,0.125
+O=c1nc(N(C)C)n(c(=O)n1C1CCCCC1)C,0.19816672003956992,0.503640251987437,0.16666666666666666
+c1scc(n1)c1nc2c([nH]1)cccc2,0.19876005527119617,0.07124290059304189,1
+CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.20057118462494436,0.08323848349319968,1
+Nc1ccc(c(c1)N)O,0.2013846888993215,0.8190054209287416,1
+C=Cc1ccccc1,0.20163396483810905,0.4669300291718144,1
+Cn1nc(c(c1)C(=O)Nc1ccccc1C1CC1C1CC1)C(F)F,0.2021971466240455,0.050901264305063164,1
+O=C(NS(=O)(=O)c1ccccc1C(=O)OC1COC1)Nc1nc(C)cc(n1)C,0.20422574060250331,0.3857400812695065,1
+CO/C=C(\c1ccccc1Oc1ncnc(c1)Oc1ccccc1C#N)/C(=O)OC,0.20427010160523304,0.19465114260586,1
+ClC=C(c1cc(Cl)c(cc1Cl)Cl)OP(=O)(OC)OC,0.20493941143914957,0.017923200540319623,1
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.2098341392275743,0.13665038085545952,1
+ClCC(=O)N(c1c(CC)cccc1CC)CNC(=O)C,0.21058487877925733,0.17555052706620422,1
+O=C1CCCC(=O)C1C(=O)c1ccc(cc1Cl)S(=O)(=O)C,0.2189994026791292,0.002847075141623806,1
+CC(c1ccc(cc1)O)(c1ccc(cc1)O)C,0.21902317939829427,0.7987528952107646,1
+OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.21976935578028234,0.07219258757735694,1
+CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.2198425631222415,0.057817340618821475,1
+ClCC(=O)N(c1c(C)cccc1CC)COC(C)C,0.22199225860138957,0.0986751773034067,1
+COCC(=O)N(c1c(C)cccc1C)C(C(=O)OC)C,0.22374845318219344,0.2272789169439581,1
+Nc1ccc2c(c1)nc1c(c2)ccc(c1)N,0.22461542255370148,0.5127318005761181,1
+Nc1cnn(c(=O)c1Cl)c1ccccc1,0.2255879747532767,0.06819037903102541,1
+CNC(=O)Oc1ccccc1OC(C)C,0.22939978025412716,0.05128012589016116,1
+CC(CC(c1sccc1NC(=O)c1cn(nc1C(F)(F)F)C)C)C,0.23093421710838027,0.09605024222468138,1
+CCNC(=O)C(OC(=O)Nc1ccccc1)C,0.23278744254805916,0.31163505026311,1
+Clc1c(ccc(c1N)[N+](=O)[O-])Oc1ccccc1,0.23425888009292972,0.15238507364192602,1
+Cl/C=C/CO/N=C(\C1=C(O)CC(CC1=O)CC(SCC)C)/CC,0.2389478027971563,0.253664493846163,1
+CCSC(CC1CC(=O)C(=C(NOCC=CCl)CC)C(=O)C1)C,0.2389478027971563,0.25672688702175106,0.18181818181818182
+CNC(=O)Oc1ccccc1OC(C)C,0.23895810443138246,0.0540412050930721,1
+CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.23935747721355113,0.2948426093660432,0.25
+CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.24799169923196304,0.07857893254252615,1
+C=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.24800936112986982,0.171834951313278,1
+OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.24848916516834604,0.3714104972948829,1
+CCc1cc(C)cc(c1c1c(OC(=O)C(C)(C)C)n2n(c1=O)CCOCC2)CC,0.24968092026794356,0.07242815295555487,1
+CC1OC(C)OC(OC(O1)C)C,0.249701719945447,0.7175892491582392,0.25
+[S]C(=S)NCCNC(=S)S[Mn],0.2525424903682367,0.03648895915666588,1
+CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.2557761861991325,0.07996202101917224,1
+Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.25917417547047744,0.07402684850899094,1
+Fc1ccc(cc1)Oc1ccnc2c1c(Cl)cc(c2)Cl,0.25962686686321285,0.058384643171399436,1
+CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.2690918752347788,0.1149002721824295,1
+Nc1cnn(c(=O)c1Cl)c1ccccc1,0.270705569703932,0.0723832343431398,1
+OCC(CCl)O,0.27139624684320934,8.18897175780498,1
+CCSC(CC1CC(=O)C(=C(NOCC=CCl)CC)C(=O)C1)C,0.27784628232227476,0.18075913870978183,1
+CC(CC(c1sccc1NC(=O)c1cn(nc1C(F)(F)F)C)C)C,0.2782339965161208,0.09605024222468138,1
+COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1ncccc1S(=O)(=O)CC,0.2804534946915948,0.6913408558790916,1
+OC(=O)COc1ccc(cc1Cl)Cl,0.28049546719325014,0.041836193207807806,1
+CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.2805209905967611,0.03936504164521463,1
+C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.2853292217012047,0.07235320420788187,1
+Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.2879713060783083,0.0672060941474649,1
+CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.2921073325236663,0.02307472490573678,1
+CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.29547465787728056,0.3350153436602428,1
+CNC(=O)Oc1cccc2c1cccc2,0.2981792578159244,0.12885391431706575,1
+COC(=O)c1ccccc1S(=O)(=O)NC(=O)Nc1nc(OC(F)F)cc(n1)OC(F)F,0.2989300503468667,0.4021371024540237,1
+CNC(=O)Oc1cccc2c1cccc2,0.2991731924668564,0.11663521010507076,1
+CN(C(=S)[S-])C.CN(C(=S)[S-])C.CN(C(=S)[S-])C.[Fe+3],0.30012414094866885,0.03664917430602502,1
+CCOC(=O)c1ccccc1S(=O)(=O)NC(=O)Nc1nc(Cl)cc(n1)OC,0.30133493788161053,0.4908814012760744,1
+Fc1ccc(c(c1)F)NC(=O)c1cccnc1Oc1cccc(c1)C(F)(F)F,0.3033262936121485,0.08329374437288468,1
+CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.3034972489425892,0.1275872230209465,1
+NC1CC(N=C(C(=O)O)N)C(OC1OC1C(O)C(O)C(C(C1O)O)O)C,0.3057757345866624,2.5745022532058908,1
+CNC(=O)Oc1cc(C)cc(c1C)C,0.30635114568601185,0.06817593429629851,1
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.31203800675365617,0.10672040100126456,1
+ClCC(=O)N(c1c(C)cccc1C)Cn1cccn1,0.31323206744613685,0.21419826576830403,1
+C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.316253365684832,0.15407527700390677,1
+OC(=O)CCl,0.317470328693963,0.38399352507880175,1
+ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.3260262207586085,0.07976114599708196,1
+CCCCCCC(c1cc(cc(c1OC(=O)/C=C\C)[N+](=O)[O-])[N+](=O)[O-])C,0.3315102548955885,0.15393856181627197,1
+ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.3326798171006209,0.07976114599708196,1
+CN(C1C(=O)C(=C(O)N)C(=O)C2(C1CC1C(=C(O)c3c(C1(C)O)cccc3O)C2=O)O)C,0.33750750616693714,7.376190802377752,1
+OC(=O)COc1ccc(cc1Cl)Cl,0.33930903289506065,0.04146720868611565,1
+Clc1cc(ccc1Oc1ccc(c(c1)C(=O)O)[N+](=O)[O-])C(F)(F)F,0.34563108073944815,0.09371047325644374,1
+CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.3484961885063573,0.1283708489726044,1
+OC(=O)C(Cl)(Cl)C,0.3497269961122948,0.42676977936996974,0.125
+Fc1ccc(cc1)C(=O)CCCN1CCN(CC1)c1ccccn1,0.35125671098854394,0.1949888828070151,1
+N=C(NC(=N)N)NCCc1ccccc1,0.35564719019232227,0.2989014799552515,1
+COc1ccc(cc1)C(C(Cl)(Cl)Cl)c1ccc(cc1)OC,0.36163948246786254,0.12070782067019675,1
+Cc1cc(C)cc(c1)C(=O)N(C(C)(C)C)NC(=O)c1ccc2c(c1C)CCCO2,0.3678012132205545,0.23474791298621292,1
+CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.36852210915226874,0.24365961404385508,1
+Cc1nnc(c(=O)n1N)c1ccccc1,0.36891864539658303,0.1923581389323803,1
+CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.3693416417277341,0.17187893661334366,1
+O=C(Nc1ccnc(c1)Cl)Nc1ccccc1,0.37548404132262436,0.07959376639892965,1
+O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.3880867710275115,0.06307401130325499,1
+COC(=O)Nc1nc2c([nH]1)cccc2,0.3922867840256219,0.11674597711463099,1
+CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.39446112244793224,0.2948426093660432,0.25
+CO/N=C(\c1ccccc1Oc1ncnc(c1F)Oc1ccccc1Cl)/C1=NOCCO1,0.39448424715427566,0.09988973974944607,1
+N#Cc1ccc(cc1)C/C(=N/NC(=O)Nc1ccc(cc1)OC(F)(F)F)/c1ccc(cc1)C(F)(F)F,0.394944816927872,0.055871495745498795,1
+CCCCNC(=O)n1c(NC(=O)OC)nc2c1cccc2,0.3961177430023906,0.18977813725506376,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.4023390123323988,0.14297640125707387,1
+O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.41719152837532353,0.07091709112656906,1
+Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.423248605734443,0.08300342479617458,1
+NCCNc1cccc2c1cccc2,0.4241543329029509,0.2252360875684565,1
+CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.42802021191337764,0.06014092276203531,1
+CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.43361266621389954,0.13385739744024536,1
+CO/C=C(\c1ccccc1COc1cccc(n1)C(F)(F)F)/C(=O)OC,0.4410333629488144,0.18118444726245583,1
+N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.443217671652664,0.1779144876306056,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1F)Oc1ccc(cc1Cl)C(F)(F)F,0.4460202371248177,0.07230910035578181,1
+CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.4534134152107278,0.04034944223996247,1
+N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.4553054263341003,0.17957314863237633,1
+CCSC(=O)N(CC(C)C)CC(C)C,0.4600420791288938,0.030069785449812297,1
+CSC(=O)c1cccc2c1snn2,0.4608228380460223,1.504300581444509,1
+Cc1cc(N)c(cc1C)C,0.46595489467866197,0.053899581356752935,1
+COCN(c1c(CC)cccc1CC)C(=O)CCl,0.4670695574071115,0.30055949596754833,1
+Clc1cc(ccc1Oc1ccc(c(c1)C(=O)[O-])[N+](=O)[O-])C(F)(F)F.[Na+],0.46919094173712006,0.06702828193704673,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1F)Oc1ccc(cc1Cl)C(F)(F)F,0.4705718098105875,0.08241832742445686,1
+Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.47403843842257615,0.0740572717714695,1
+ClCC[N+](C)(C)C.[Cl-],0.47447507557122687,2.3396956191028,1
+Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.4774244272684517,0.07406861996239202,1
+CC1OC(C)CN(C1)C1CCCCCCCCCCC1,0.48316627385722294,0.06345349249676542,1
+CCCCc1c(C)nc(nc1OS(=O)(=O)N(C)C)NCC,0.4930161419173511,0.04492074603746372,1
+CN1N(C)C(CC1c1ccccc1)c1ccccc1,0.49533572071941767,0.13524048454968857,1
+OC(=O)C(Oc1cccc(c1)Cl)C,0.4984573741185779,0.027013763409090032,1
+COC(=O)C(NC(=O)C(CC(=O)O)N)Cc1ccccc1,0.4994850207500349,0.5695813389794603,1
+O=c1[nH]c2CCCc2c(=O)n1C1CCCCC1,0.503640251987437,0.20429718941494676,1
+CCOC(=O)OC1=C(C(=O)NC21CCC(CC2)OC)c1cc(C)ccc1C,0.5061016308843888,0.08215925774965492,1
+CCOC(=O)OC1=C(C(=O)NC21CCC(CC2)OC)c1cc(C)ccc1C,0.5061016308843888,0.06592558590239668,1
+N#Cc1c[nH]cc1c1cccc(c1Cl)Cl,0.5061481392686851,0.1482202723687745,1
+CCCOC(=O)NCCCN(C)C.Cl,0.5072793699625824,0.5109577674385912,1
+CCOc1nc(nc(n1)NC)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.5116896474609399,0.2846622352597012,1
+CCCCC(COC(=O)c1ccccc1C(=O)OCC(CCCC)CC)CC,0.5120902983161549,0.36436394331206917,1
+COc1c(Cl)ccc(c1C(=O)O)Cl,0.520273850439093,0.2583948966616764,1
+CCOc1cc(ccc1C1COC(=N1)c1c(F)cccc1F)C(C)(C)C,0.5202976892967504,0.04918878971844987,1
+COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.5285529966699751,0.07490420627303267,1
+O=CCC1CC(C)C(=O)C=CC(=CC(C(OC(=O)CC(C(C1OC1(C)OC(C)C(C(C1O)N(C)C)OC1(C)OC(C)C(C(C1)(C)O)O)C)O)CC)COC1OC(C)C(C(C1OC)OC)O)C,0.5295750507618869,0.3246343630441296,1
+[O-][N+](=O)/N=C\1/N(Cc2cnc(s2)Cl)COCN1C,0.5313410671453993,0.09739459525907473,0.13333333333333333
+COC(=O)C1(O)c2cc(Cl)ccc2c2c1cccc2,0.546052144921948,0.05087196650796755,1
+COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)Cl,0.5465743293153008,0.10739545351005231,1
+CC(C12CCC(O2)(C(C1)OCc1ccccc1C)C)C,0.5466515334085721,0.08211852030367763,1
+Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.5473855891134007,0.05943549532064574,1
+Oc1ccc2c(c1N=Nc1ccccc1)ccc(c2)S(=O)(=O)O,0.5482080783455129,1.478344656846752,1
+ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.5494924735209582,0.14729599082809905,1
+[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.5651787298028309,0.13483685160949663,1
+Nc1ccc(c(c1)N(=O)=O)N,0.5681125108300529,0.35354400012085735,1
+CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCCl,0.5690227874227859,0.0997282984823185,1
+NC1CCCCC1,0.5898716318329822,0.09220486462697597,1
+COC(=O)c1csc(c1S(=O)(=O)NC(=O)n1nc(n(c1=O)C)OC)C,0.5993972829782238,0.974697517038119,1
+COc1cc(Cl)c(cc1Cl)OC,0.6037074787089276,0.10235720239559151,1
+NC1CCCCC1,0.6049965454697254,0.10993035550045605,1
+COCCN(c1c(C)cccc1C)C(=O)CCl,0.6139034987494355,0.1286461396675361,1
+OC(=O)C1C2CCC(C1C(=O)O)O2,0.6177415369409439,0.43214806325138994,1
+CN(C(=O)Nc1ccc(cc1)Cl)C,0.6292491939569526,0.05347112024655942,1
+COc1c(OC)cc(c(c1OC)C(=O)c1c(OC)ccc(c1C)Br)C,0.6352758309016929,0.11898261682472457,1
+OC1(Cn2ncnc2)/C(=C/c2ccc(cc2)Cl)/CCC1(C)C,0.6406279100538178,0.07245691711531918,1
+COC(=O)c1ccc(cc1)C(=O)OC,0.6437193589585136,0.6869017557459655,1
+Clc1ccc(cc1)S(=O)(=O)c1cc(Cl)c(cc1Cl)Cl,0.6459733503975151,0.041979418428373126,1
+CC(OC(=O)NC(C(=O)NC(c1nc2c(s1)cc(cc2)F)C)C(C)C)C,0.6543197874203039,0.11406505535619535,1
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.655542030995076,0.13112768667837227,1
+CCCOC(=O)NCCCN(C)C.Cl,0.6674728552139242,0.5109577674385912,1
+CCCCOCC(OCC(O)C)C,0.6726932978936081,0.5751760289817799,1
+ClCC(=O)N(c1c(C)cccc1CC)COC(C)C,0.7047373288933002,0.09958077993964556,1
+CC1OC(C)OC(C1)OC(=O)C,0.7175892491582392,0.1800593102021387,0.25
+[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.7245881151318344,0.16130841961480835,1
+CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.7386866446932013,0.3350153436602428,1
+COc1nc(nc(c1)OC)NC(=O)NS(=O)(=O)Cc1ccccc1C(=O)OC,0.7529208210920754,0.5028214604937333,1
+O=C(C1C(C1(C)C)C=C(C)C)OCN1C(=O)C2=C(C1=O)CCCC2,0.7543614918373561,0.10396626070058967,1
+CCOc1nc(F)cc2n1nc(n2)S(=O)(=O)Nc1c(Cl)cccc1C(=O)OC,0.7561469746838736,0.2345262145021008,1
+COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)Cc1ccccc1C(=O)O,0.7795645307119917,0.5813782889511574,1
+Clc1ccc(c(n1)C(=O)O)Cl,0.7812519531298828,0.3047209470891338,1
+COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.7817895162025876,0.48006440533268346,1
+ClCC[N+](C)(C)C.[Cl-],0.7907917926187115,2.3566838959846437,1
+COC(CCCC(C/C=C/C(=C/C(=O)OC(C)C)/C)C)(C)C,0.8052269925229198,0.7514525775875477,1
+CCOc1cc(ccc1OCC)NC(=O)OC(C)C,0.8241033622809132,0.3090825648890777,1
+OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.8282972172278201,0.4720460499425296,1
+Nc1ccc(c(c1)C)NOS(=O)(=O)O,0.8431459792705229,0.3729699848772478,1
+CCOC(=O)C1OC1(C)c1ccccc1,0.8485352051922984,0.15204782183286927,1
+ClCC[N+](C)(C)C.[Cl-],0.860381470369158,2.3566838959846437,1
+CCCCNC(=O)n1c(NC(=O)OC)nc2c1cccc2,0.8611255282660666,0.1627783573692901,1
+OCCn1c(C)ncc1[N+](=O)[O-],0.8764039114257128,0.07062719125960476,1
+ClCCP(=O)(O)O,0.9066120392542251,2.424380344082731,1
+COP(=O)OC,0.9086866261501474,0.0020899832476404022,0.1
+Nc1nc(NC2CC2)nc(n1)N,0.9387196585948812,0.10982169517930987,1
+OCCNc1ccc(cc1OCCO)N(=O)=O,0.9453881078267568,0.9356572196349415,1
+O=N(=O)c1cccc2c1cccc2,0.952831491808421,0.19375245039704106,1
+O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.9662594125910484,0.42312539665837845,1
+Oc1cccc2c1nccc2,0.9851335765350275,0.5509106089207596,1
+CCCOC(=O)c1ccc(cn1)C(=O)OCCC,0.9949124950582696,1.1075161098582462,1
+Oc1noc(c1)C,0.9991119005328597,4.460830164062197,0.1
+CC[N](=C1C=CC(=C(c2ccc(cc2)N(Cc2cccc(c2)S(=O)(=O)O)CC)c2ccc(cc2)N(C)C)C=C1)Cc1cccc(c1)S(=O)(=O)O,1.009963174498295,0.18540142003081284,1
+OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,1.0353715215347752,0.3638306361396559,1
+ClCC[N+](C)(C)C,1.0602168942789227,0.9100716288762254,1
+Clc1ccccc1,1.0661274430976688,0.09929943773759063,1
+CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,1.0897268363577188,0.024525489375934365,1
+O=C1CCCCCN1,1.10465364954589,0.7540424650828417,1
+Cc1cc(C)nc(n1)Nc1ccccc1,1.1091497729605546,0.12083480234381865,1
+COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,1.1154252951100516,0.2559914298530539,1
+COC(=O)C(=CC=CC(=CC=CC=C(C=CC=C(C=CC1=C(C)CCCC1(C)C)C)C)C)C,1.119409718240544,0.022228348031877943,1
+[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,1.159340984210935,0.15334536959372352,1
+Oc1ccc(cc1Cl)C(C)(C)C,1.1697007223226876,0.10384161634159869,1
+Oc1ccccc1c1ccccc1,1.1750384237564568,0.20553569827566362,1
+COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)Nc1ccccc1C(=O)N(C)C,1.1780461209768547,0.42683956236105325,1
+CON=C(c1ccccc1COc1ccccc1C)C(=O)OC,1.1807966969350603,0.3457416736197563,1
+CCCOc1nn(c(=O)n1C)C(=O)[N-]S(=O)(=O)c1ccccc1C(=O)OC.[Na+],1.1894202967675005,0.7650789414872524,1
+CO/N=C(\c1ccccc1COc1ccccc1C)/C(=O)OC,1.1967534090558043,0.34191903724770456,1
+CON=C(c1ccccc1COc1ccccc1C)C(=O)OC,1.1967534090558043,0.3631304872247199,1
+OC(=O)Cc1cccc2c1cccc2,1.205650068257516,0.17731115397814587,1
+OCc1cc(N=Nc2ccc(c3c2cccc3)S(=O)(=O)O)c(c(c1O)N=Nc1ccc(c2c1cccc2)S(=O)(=O)O)O,1.2093346835379808,1.490670783637784,1
+CC1=CC(=O)CC(C1)(C)C,1.295160023171064,0.11315631785675133,1
+COC(=O)Nc1nc2c([nH]1)cccc2,1.3076226134187396,0.083049663729908,1
+C[N]1(C)CCCCC1,1.3133857473480115,0.44032207102935567,1
+OC1CCC2(C(C1)CCC1C2CCC2(C1CCC2C(CCC(=O)O)C)C)C,1.3277652171188237,0.8209063397614011,0.21052631578947367
+COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)N(S(=O)(=O)C)C,1.3411855059279478,0.4773030778075665,1
+Oc1ccc(c(c1)C(C)(C)C)O,1.3536524792656537,0.2280341915527015,1
+COc1ncc(c2n1nc(n2)S(=O)(=O)Nc1c(F)cccc1F)F,1.391657397996453,0.18238639424428663,1
+OCC1OC2OC3C(CO)OC(C(C3O)O)OC3C(CO)OC(C(C3O)O)OC3C(CO)OC(C(C3O)O)OC3C(OC(OC4C(OC(OC5C(OC(OC1C(C2O)O)C(O)C5O)CO)C(O)C4O)CO)C(O)C3O)CO,1.4097112541302337,1.9926642160138068,1
+O=C(Nc1cc(F)cc(c1)F)N/N=C(/c1ncccc1C(=O)O)\C,1.4120001283962829,0.4813337208316472,1
+CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,1.4316432834082535,0.08660753713188539,1
+CCCCOC(=O)c1ccccc1C(=O)OCc1ccccc1,1.504675539130048,0.2949966180640334,1
+COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,1.5061863289853148,0.7244629133093408,1
+Fc1cc2CCC(n3c2c(c1)c(=O)c(c3)C(=O)O)C,1.531109972815908,0.23409347986251686,1
+CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,1.5465050300849357,0.10785437781127803,1
+CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,1.5854670852219546,0.600658613403707,1
+CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,1.605986191473768,0.8661050233290373,1
+c1ccc(cc1)c1ccccc1,1.6211890708511503,0.19623937242557016,1
+NCC(c1ccc(cc1)O)O,1.6320834707547616,0.7275430591610657,1
+ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,1.6860133324539086,0.2279278063389893,1
+CCc1c(C(=O)O)c(=O)cnn1c1ccc(cc1)Cl,1.6864553664875628,0.06988049924178537,1
+ClCC#CCOC(=O)Nc1cccc(c1)Cl,1.743505808935165,0.17838785643679636,1
+O/C(=C\1/C(=O)CC(CC1=O)C(=O)O)/C1CC1,1.752821172367082,1.8740405032498018,0.35294117647058826
+OC(=O)CNCP(=O)(O)O,1.7743806406081915,0.700841565636653,0.16666666666666666
+C[N+]1(C)CCCCC1.[Cl-],1.790706021930536,0.39535589379894426,1
+COc1ccc(c(c1)OC)N,1.8018201517132568,0.3052747819868152,1
+CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,1.8050858655278421,0.6950927673017638,1
+CCC(=O)C1=C([O-])CC(CC1=O)C(=O)[O-].[Ca+2],1.874040503249802,0.7843481454913989,1
+CC(C1(C)N=C(NC1=O)c1ncccc1C(=O)O)C,1.913681483026602,0.5005571515667719,1
+OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,1.9605490478397496,0.13193009603279973,1
+Clc1ccc(cc1)Cl,2.0407891160090657,0.059940824641567726,1
+ClCCP(=O)(O)O,2.062369371738619,2.424380344082731,1
+CCCCOC(=O)c1ccccc1C(=O)OCCCC,2.1556100397968727,0.3306107423417943,1
+Clc1ccccc1CC(C1(Cl)CC1)(Cn1nc[nH]c1=S)O,2.178589749473798,0.04476029229671277,1
+c1ccc(cc1)c1ccccc1OCC1CO1,2.209744922072461,0.8308076307932614,1
+ClCC[N](C)(C)C,2.2427665071284903,0.7132413319358359,1
+COc1cc(OC)n2c(n1)nc(n2)NS(=O)(=O)c1c(OC)nccc1C(F)(F)F,2.302288500094267,0.07863543399156463,1
+CC=Cc1ccc(cc1)OC,2.3211612715861247,0.57509958551583,1
+CC(OC(=O)Nc1cccc(c1)Cl)C,2.340158076742021,0.09379184389027877,1
+COC(=O)c1ccccc1O,2.366127776683809,0.40910477089720465,1
+CO/N=C(\c1ccccc1COc1ccccc1C)/C(=O)OC,2.4002085592886893,0.30308910509520615,1
+CCOC(=O)C=C,2.477130986890983,0.07252011054930413,1
+COC(=O)CC(c1ccc(cc1)Cl)NC(=O)C(C(C)C)NC(=O)OC(C)C,2.5070128670931195,0.1392906426524743,1
+COc1nn(c(=O)n1C)C(=O)NS(=O)(=O)c1ccccc1OC(F)(F)F,2.5233463155295692,0.17079237831250552,1
+CCc1ccccc1,2.741016342485753,0.10816844160527843,1
+CC(c1ccccc1)C,2.7539366734341955,0.10052257452618389,1
+COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1n2ccccc2nc1S(=O)(=O)CC,2.7556956072872962,0.47934744084043035,1
+CC(=C)C(=O)O,2.8807316686731115,9.313172081918692,0.14285714285714285
+CC(N(c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O)C(C)C)C,2.982590173767195,0.06936563821486286,1
+CN(NC(=O)CCC(=O)O)C,3.0342556221759884,0.4994850207500349,0.13043478260869565
+CCCOC(=O)NCCCN(C)C.Cl,3.0347765817059753,0.4951806205403354,1
+Oc1ccccc1c1ccccc1,3.119727015073393,0.20208034019115165,1
+Clc1cnc2c(c1)ccc(c2C(=O)O)Cl,3.127347059508829,0.22941129754989037,1
+C=Cc1ccccc1,3.2885539503358263,0.40610034701537245,1
+OC(=O)CNCP(=O)(O)O,3.3121771958019575,0.700841565636653,0.16666666666666666
+Cc1cnc2c(c1)ccc(c2C(=O)O)Cl,3.3387517363764943,0.12532539229532583,1
+CCCOC(=O)NCCCN(C)C,3.611885866531256,0.6205586301556104,1
+CCOP(=O)O,3.6347465046005896,0.0037493977240957736,1
+Oc1ccccc1,3.655248831064175,0.4577004151346716,1
+[O-]P(=O)OCC.[O-]P(=O)OCC.[O-]P(=O)OCC.[Al+3],3.6853523538557287,0.0036565143470414633,1
+CC1CCC(C(C1)O)C(C)C,3.7948308388559964,7.0995880579775275,1
+CCc1ccccc1,3.843074459567654,0.10475847999476977,1
+CC(c1ccccc1)C,3.8438632722857955,0.10445904296431191,1
+COc1ccc(cc1)N,3.8488877932280037,0.316595477102011,1
+OCCO,4.027850816139244,2.4679094429571533,1
+CCCCC(COC(=O)CCCCC(=O)OCC(CCCC)CC)CC,4.047856676081442,0.32235006632991803,1
+CCCOC(=O)c1cc(O)c(c(c1)O)O,4.071644352421931,0.8719186417792422,1
+CC(CCCC1(C)CCc2c(O1)c(C)c(c(c2C)OC(=O)C)C)CCCC(CCCC(C)C)C,4.230630449818821,1.228920788624059,1
+COc1ccc(cc1N=Nc1c(O)c(cc2c1cccc2)C(=O)Nc1cccc(c1)N(=O)=O)N(=O)=O,4.308389780762046,0.29938908034808137,1
+Oc1ccc(nn1)O,4.460830164062196,0.9991119005328597,0.1
+S=c1sc2c([nH]1)cccc2,4.484270077422418,0.08651938034196377,1
+C[N+]1(C)CCCCC1.[Cl-],4.570309399255547,0.4822129846769693,1
+CC(OC(=O)Nc1cccc(c1)Cl)C,4.680316153484042,0.09870482366830721,1
+Clc1cc(N)c(c(n1)C(=O)O)Cl,4.830587434212229,0.3382938372048412,1
+COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1ncccc1C(=O)N(C)C,5.08765706618306,0.7202216754770163,1
+OC(=O)CNCP(=O)(O)O,5.559726007239,0.700841565636653,0.16666666666666666
+Oc1ccccc1c1ccccc1,5.875192118782284,0.2034781041782699,1
+OC(=O)CNCP(=O)(O)O,5.914602135360638,0.700841565636653,0.16666666666666666
+CCOc1ccc(cc1N)NC(=O)C,6.1010029534002825,0.21386700637051745,1
+Nc1ccc(cc1)O,6.286318149278613,0.5281758108186807,1
+NC(=S)NNC(=S)N,6.303842268414009,0.009908374299988254,0.14285714285714285
+NC(=O)c1cnccn1,6.408762052980724,0.08275360610326041,1
+OCCO,6.44456130582279,2.4679094429571533,1
+OC(=O)c1ccc(cc1N)N(=O)=O,6.506215164982792,0.3198789458458921,1
+Oc1cc(O)c2c(c1)oc(c(c2=O)O)c1ccc(c(c1)O)O,6.729846937340625,0.7372084952076587,1
+OC(=O)CNCP(=O)(O)O,7.180326992327815,0.700841565636653,0.16666666666666666
+ClCC(=O)c1ccc(cc1)NC(=O)C,7.465334624174738,0.1673576108507557,1
+O=C1OC(=O)c2c1cccc2,8.000509872156579,0.5274835013205029,1
+CCCOC(=O)c1ccc(cc1)O,8.324062177858794,0.6944698273376053,1
+OCC(C1OC(=O)C(=C1O)O)O,8.82332300652517,3.7042082617048484,1
+CCOC(=O)COC(=O)c1ccccc1C(=O)OCC,8.919866912731305,0.19875027484458077,1
+O=C1CCCCC1,9.272184465524795,0.1823661785242976,1
+OC(=O)C=CC(=O)O,9.313172081918696,2.8807316686731115,0.14285714285714285
+COC(=O)c1ccc(cc1)O,9.858865736182537,0.4737443305504875,1
+COC(=O)c1ccccc1C(=O)OC,10.299509743336218,0.5316830719957618,1
+OC1C2C(N(C)C)C(=O)C(=C(O)N)C(=O)C2(O)C(=O)C2=C(O)c3c(C(C12)(C)O)c(Cl)ccc3O,10.50761860949369,0.3375075061669371,0.6216216216216216
+OCCO,14.822491003392418,3.7042082617048484,1
+OCCO,16.111403264556976,2.4679094429571533,1
+CCCCCCCCCCCCCCCCCC(=O)OCC(C1OCC(C1O)O)O,16.727105323218392,0.40281615341572896,1
+OCC(C1OC(=O)C(=C1O)O)O,17.323010613197102,12.13655283247625,1
+[O-]S(=O)(=O)NC1CCCCC1.[Na+],17.900880706433757,0.19005758519114205,1
+O=C1NS(=O)(=O)c2c1cccc2,19.66323569952698,0.3155082567836235,1
+CCCCCCCCCCCC(=O)OCC(C1OCC(C1O)O)O,19.866710908558982,0.3736091886918899,1
+CCOC(=O)c1ccccc1C(=O)OCC,19.95615854702247,0.23974536678467762,1
+OC(=O)c1ccccc1N,20.060380944519448,0.5033545540660739,1
+OCCO,32.22280652911395,0.2885556079481661,1
+OCC(CO)O,74.73899985905678,1.1811816236152828,1
diff --git a/paper/data/combined-cv.id b/paper/data/combined-cv.id
new file mode 100644
index 0000000..0591b35
--- /dev/null
+++ b/paper/data/combined-cv.id
@@ -0,0 +1 @@
+56c42e262b72ed11e7000001
diff --git a/paper/data/combined-test-predictions.csv b/paper/data/combined-test-predictions.csv
index d378693..976ca41 100644
--- a/paper/data/combined-test-predictions.csv
+++ b/paper/data/combined-test-predictions.csv
@@ -1,151 +1,151 @@
-SMILES,LOAEL,Confidence,Dataset
-O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.0003638692469573398,1,combined-prediction
-CCSCSP(=S)(OCC)OCC,0.0016526156453431225,1,combined-prediction
-CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.0019642241382633796,1,combined-prediction
-CCOP(=S)(SCSC(C)(C)C)OCC,0.0034018170593390737,1,combined-prediction
-CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.007284468451831286,1,combined-prediction
-CCCSP(=O)(SCCC)OCC,0.007302272401231296,1,combined-prediction
-CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.008370828170108851,1,combined-prediction
-CSc1ccc(cc1C)OP(=S)(OC)OC,0.00964124005965057,1,combined-prediction
-COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.011073447351926287,1,combined-prediction
-CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.01194888189741255,1,combined-prediction
-CNC(=O)CSP(=S)(OC)OC,0.011977939066676562,1,combined-prediction
-COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.01253495184268662,1,combined-prediction
-CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.012661569287564291,1,combined-prediction
-CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.013006855002501155,1,combined-prediction
-CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.01357913208550989,1,combined-prediction
-CSc1nnc(c(=O)n1N)C(C)(C)C,0.013701160159437665,0.11538461538461539,combined-prediction
-CCOP(=O)(SC(CC)C)SC(CC)C,0.014239911275829733,1,combined-prediction
-N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.014761743547403035,1,combined-prediction
-OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.015808512748976208,1,combined-prediction
-N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.016001687478085606,1,combined-prediction
-COP(=O)(NC(=O)C)SC,0.01603420284847195,1,combined-prediction
-N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.016181663783863653,1,combined-prediction
-CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.01709608560436501,1,combined-prediction
-C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.01934170105611924,1,combined-prediction
-OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.01969166251145765,1,combined-prediction
-CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.02036765211069526,1,combined-prediction
-CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.021095802363679313,1,combined-prediction
-CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.02137860199550009,1,combined-prediction
-COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.023715894144774617,1,combined-prediction
-CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.02452548937593434,1,combined-prediction
-Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.024541035827570765,1,combined-prediction
-COP(=O)(OC=C(Cl)Cl)OC,0.024813902049782052,1,combined-prediction
-COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.02511976746876867,1,combined-prediction
-N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.026175977495430238,1,combined-prediction
-Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.026214170808255548,1,combined-prediction
-CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.026593616679335016,1,combined-prediction
-N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.027078346141040016,1,combined-prediction
-N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.02720741121158317,1,combined-prediction
-CNC(=O)ON=C(C(=O)N(C)C)SC,0.02723422893053635,1,combined-prediction
-O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.028785189377791072,1,combined-prediction
-CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.02933672191817046,1,combined-prediction
-CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.03101114141686048,1,combined-prediction
-CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.03602454025477674,1,combined-prediction
-COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.03652048573032131,1,combined-prediction
-CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.03831103939366667,1,combined-prediction
-CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.0404095891170599,1,combined-prediction
-CCNc1nc(NC(C)C)nc(n1)Cl,0.04120360138919732,1,combined-prediction
-OC(=O)COc1ccc(cc1Cl)Cl,0.04220782045663331,1,combined-prediction
-CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.042584273505466334,1,combined-prediction
-BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.042806163072444406,1,combined-prediction
-OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.04327585927626025,1,combined-prediction
-CON(C(=O)Nc1ccc(cc1)Br)C,0.04331377020795739,1,combined-prediction
-CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.04491567539992076,1,combined-prediction
-Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.0456310249928634,1,combined-prediction
-CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.047490155350518225,1,combined-prediction
-N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.04952824330064722,1,combined-prediction
-CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.04976438552720587,1,combined-prediction
-OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.049903283432057355,1,combined-prediction
-Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.0502876438433783,1,combined-prediction
-ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.05112367301257726,1,combined-prediction
-CN1CN(C)CSC1=S,0.05143501540726455,1,combined-prediction
-Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.05167638607399666,1,combined-prediction
-CN(C(=S)SSC(=S)N(C)C)C,0.052029910797683425,1,combined-prediction
-CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.05274344365338553,1,combined-prediction
-O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.05286631638058653,1,combined-prediction
-CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.053208362874294673,1,combined-prediction
-CNC(=O)Oc1ccccc1OC(C)C,0.05328869177586038,1,combined-prediction
-COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.054676171737019746,1,combined-prediction
-COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.05536474598180194,1,combined-prediction
-CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.057817340618821475,1,combined-prediction
-O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.057843156201256546,1,combined-prediction
-CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.05900731758603697,1,combined-prediction
-CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.059860013602209265,1,combined-prediction
-O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.061223837435072606,1,combined-prediction
-OC(=O)COc1ccc(cc1C)Cl,0.06409863417711698,1,combined-prediction
-Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.06564925552956072,1,combined-prediction
-Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.0672060941474649,1,combined-prediction
-O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.06936283864904705,1,combined-prediction
-CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.07084460220094288,1,combined-prediction
-N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.07089396189028405,1,combined-prediction
-c1scc(n1)c1nc2c([nH]1)cccc2,0.07124290059304189,1,combined-prediction
-O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.07178252950433608,1,combined-prediction
-C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.07255184301853296,1,combined-prediction
-OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.07311717098654685,1,combined-prediction
-Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.07316751274390274,1,combined-prediction
-CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.07663751118571578,1,combined-prediction
-CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.07899149299378629,1,combined-prediction
-Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.07907394396694326,1,combined-prediction
-ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.07976114599708196,1,combined-prediction
-CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.08067037428729226,1,combined-prediction
-COC(=O)Nc1nc2c([nH]1)cccc2,0.0814533769149189,1,combined-prediction
-COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.08251097640354867,1,combined-prediction
-Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.08591820939424631,1,combined-prediction
-O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.08912049056135805,1,combined-prediction
-N#Cc1c(Cl)cccc1Cl,0.0921129946367937,1,combined-prediction
-CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.09289647796203974,1,combined-prediction
-ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.0941350572749445,1,combined-prediction
-CC(OC(=O)Nc1cccc(c1)Cl)C,0.10044437465404737,1,combined-prediction
-O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.10144600229996162,1,combined-prediction
-CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.10535546107335386,1,combined-prediction
-C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.10756688196876785,1,combined-prediction
-Nc1nc(NC2CC2)nc(n1)N,0.10982169517930987,1,combined-prediction
-O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.11918717728199056,1,combined-prediction
-CNC(=O)Oc1cccc2c1cccc2,0.1209025875895827,1,combined-prediction
-COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.12263485736061217,1,combined-prediction
-O=Cc1ccco1,0.12486833177320306,0.1111111111111111,combined-prediction
-CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.12843709655351573,1,combined-prediction
-COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.1291134993044419,1,combined-prediction
-CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.12913282716460453,1,combined-prediction
-OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,0.13193009603279973,1,combined-prediction
-CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.13348596957039296,1,combined-prediction
-CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.13723730603736453,1,combined-prediction
-CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.14135429894506185,1,combined-prediction
-COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.1422803945334266,1,combined-prediction
-O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.14262489512256243,1,combined-prediction
-CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.14416196256229533,1,combined-prediction
-ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.14729599082809905,1,combined-prediction
-[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.15109322689681717,1,combined-prediction
-C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.15407527700390689,1,combined-prediction
-OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.1554702671615349,1,combined-prediction
-CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.15669364902191535,1,combined-prediction
-CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.16342863567669363,1,combined-prediction
-N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.167935147286533,1,combined-prediction
-N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.17957314863237633,1,combined-prediction
-CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.18966068443126202,1,combined-prediction
-Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.19454416629038565,1,combined-prediction
-Oc1ccccc1c1ccccc1,0.20273166023828132,1,combined-prediction
-CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.20999292062465813,1,combined-prediction
-O=C(C1=C(C)OCCS1)Nc1ccccc1,0.21865371953286197,1,combined-prediction
-COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.23311870284543604,1,combined-prediction
-COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.2405933419678417,1,combined-prediction
-ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.24709110501373716,1,combined-prediction
-CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.24709794800636262,1,combined-prediction
-COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.2488946852404996,1,combined-prediction
-c1ccc(cc1)Nc1ccccc1,0.25118644343506413,1,combined-prediction
-CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.2948426093660432,0.25,combined-prediction
-COCN(c1c(CC)cccc1CC)C(=O)CCl,0.30333289283418186,1,combined-prediction
-COP(=O)(SC)N,0.33442367385922134,1,combined-prediction
-CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.3350153436602428,1,combined-prediction
-Cc1cccc2c1n1cnnc1s2,0.3856935237852282,1,combined-prediction
-O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.42312539665837845,1,combined-prediction
-CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,0.4306936653208536,1,combined-prediction
-OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.4720460499425296,1,combined-prediction
-CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.4889107282616924,1,combined-prediction
-COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.4978392275403079,1,combined-prediction
-OC(=O)CNCP(=O)(O)O,0.700841565636653,0.16666666666666666,combined-prediction
-COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.7477534452700889,1,combined-prediction
-CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.8106254748473309,1,combined-prediction
-CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,1.0000960182418923,1,combined-prediction
-ClCCP(=O)(O)O,2.424380344082731,1,combined-prediction
+SMILES,LOAEL_measured_median,LOAEL_predicted,Confidence,Dataset
+N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.0001361095787305931,0.026175977495430252,1,combined-prediction
+OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.0002764719490734748,0.015808512748976208,1,combined-prediction
+CCOP(=S)(SCSC(C)(C)C)OCC,0.000277363084031507,0.0034018170593390737,1,combined-prediction
+CCSCSP(=S)(OCC)OCC,0.0006144925543928096,0.0016526156453431208,1,combined-prediction
+CCOP(=O)(SC(CC)C)SC(CC)C,0.0008728063120409454,0.014239911275829733,1,combined-prediction
+CNC(=O)CSP(=S)(OC)OC,0.001090477150926923,0.011977939066676562,1,combined-prediction
+COP(=O)(SC)N,0.0020548549621536454,0.33442367385922134,1,combined-prediction
+CSc1ccc(cc1C)OP(=S)(OC)OC,0.0025868753585247565,0.00964124005965057,1,combined-prediction
+CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.0033630532459809582,0.01194888189741255,1,combined-prediction
+CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149211972577347,0.021095802363679302,1,combined-prediction
+COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.004713372383210075,0.011073447351926287,1,combined-prediction
+CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.0049417895576815835,0.0019642241382633796,1,combined-prediction
+CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005274306112287868,0.008370828170108851,1,combined-prediction
+CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601648043851348,0.14135429894506182,1,combined-prediction
+COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.006200913183680908,0.025119767468768657,1,combined-prediction
+OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.006747899500347733,0.049903283432057355,1,combined-prediction
+COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319665576013,0.05467617173701966,1,combined-prediction
+Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.007548400798826121,0.0502876438433783,1,combined-prediction
+COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.0076105098020530036,0.023715894144774607,1,combined-prediction
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.007818698763639501,0.027207411211583155,1,combined-prediction
+CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.008187766847509327,0.02036765211069526,1,combined-prediction
+CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.008855868434313272,0.012661569287564251,1,combined-prediction
+O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.009706945232441807,0.00036386924695734017,1,combined-prediction
+CCCSP(=O)(SCCC)OCC,0.010068539755671456,0.007302272401231296,1,combined-prediction
+COP(=O)(OC=C(Cl)Cl)OC,0.010408382170442241,0.024813902049782052,1,combined-prediction
+CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.012455788330375379,0.12913282716460453,1,combined-prediction
+CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.0139433514779606,0.05274344365338547,1,combined-prediction
+N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.015042627044387032,0.07089396189028405,1,combined-prediction
+Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.01585325164934852,0.026214170808255468,1,combined-prediction
+CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.01642869699075557,0.01357913208550989,1,combined-prediction
+N#Cc1c(Cl)cccc1Cl,0.016568667498017633,0.0921129946367937,1,combined-prediction
+CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.0171141884323489,0.013006855002501155,1,combined-prediction
+BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185416989653705,0.042806163072444434,1,combined-prediction
+CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.018038670157326797,0.042584273505466334,1,combined-prediction
+CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.01991156926953532,0.05900731758603697,1,combined-prediction
+CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.020087610909726116,0.04976438552720587,1,combined-prediction
+COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.02048398681663214,0.055364745981801966,1,combined-prediction
+CNC(=O)ON=C(C(=O)N(C)C)SC,0.022347753176858155,0.027234228930536335,1,combined-prediction
+COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.02320682656135787,0.14228039453342656,1,combined-prediction
+CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.025090939601491648,0.04491567539992076,1,combined-prediction
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.025427825579407606,0.14262489512256243,1,combined-prediction
+O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.02574063309087087,0.10144600229996162,1,combined-prediction
+CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.025750915471868897,0.01709608560436501,1,combined-prediction
+N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.026997497601947272,0.167935147286533,1,combined-prediction
+C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.027961199362093195,0.10756688196876785,1,combined-prediction
+N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.028207113064839383,0.016181663783863653,1,combined-prediction
+CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.029112705155716945,0.14416196256229533,1,combined-prediction
+CON(C(=O)Nc1ccc(cc1)Br)C,0.03130067550140176,0.04331377020795739,1,combined-prediction
+CN1CN(C)CSC1=S,0.03266034652463028,0.05143501540726455,1,combined-prediction
+ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.03316084217977319,0.09413505727494445,1,combined-prediction
+CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.0341788251725187,0.08067037428729226,1,combined-prediction
+CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0345288315455876,0.007284468451831286,1,combined-prediction
+CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.03508230910777224,0.047490155350518225,1,combined-prediction
+Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.03798219426521996,0.04563102499286343,1,combined-prediction
+CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.038746408312020406,0.026593616679335016,1,combined-prediction
+CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04102878665011248,0.03101114141686048,1,combined-prediction
+COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.042491175292669145,0.012534951842686624,1,combined-prediction
+O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.045140176541360745,0.11918717728199056,1,combined-prediction
+CN(C(=S)SSC(=S)N(C)C)C,0.04783039657471141,0.052029910797683425,1,combined-prediction
+CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.05161859628615915,0.03602454025477674,1,combined-prediction
+C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.05350296944357954,0.01934170105611924,1,combined-prediction
+COCN(c1c(CC)cccc1CC)C(=O)CCl,0.05375006811431045,0.30333289283418186,1,combined-prediction
+CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.05522147585284508,0.20999292062465813,1,combined-prediction
+O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.05566064749641608,0.028785189377791072,1,combined-prediction
+Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.05706818876652619,0.08591820939424627,1,combined-prediction
+OC(=O)COc1ccc(cc1C)Cl,0.057322598023636456,0.06409863417711698,1,combined-prediction
+CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.057576722828150476,0.1896606844312623,1,combined-prediction
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.05953797389131243,0.016001687478085606,1,combined-prediction
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.06009909138187043,0.027078346141040016,1,combined-prediction
+CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.06061453423316249,0.09289647796203974,1,combined-prediction
+Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.06267621846158328,0.07907394396694326,1,combined-prediction
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.06288907725176857,0.24059334196784166,1,combined-prediction
+CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.06569530810416269,0.029336721918170473,1,combined-prediction
+O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06690994773808298,0.05286631638058653,1,combined-prediction
+CSc1nnc(c(=O)n1N)C(C)(C)C,0.06719929397120725,0.013701160159437665,0.11538461538461539,combined-prediction
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.06758613754894155,0.06936283864904705,1,combined-prediction
+CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.06839527058523667,0.12843709655351573,1,combined-prediction
+C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.06904967382858089,0.07255184301853296,1,combined-prediction
+OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.0721330354641874,0.04327585927626025,1,combined-prediction
+O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.07395704796137248,0.057843156201256546,1,combined-prediction
+OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.07409262028018154,0.1554702671615349,1,combined-prediction
+CCNc1nc(NC(C)C)nc(n1)Cl,0.07789199862212233,0.04120360138919732,1,combined-prediction
+O=C(C1=C(C)OCCS1)Nc1ccccc1,0.08117223892684251,0.21865371953286192,1,combined-prediction
+CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.0827758354922366,0.02137860199550009,1,combined-prediction
+OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08452667530010859,0.07311717098654681,1,combined-prediction
+O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08510674803234901,0.061223837435072606,1,combined-prediction
+CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.08783443947180365,0.07084460220094288,1,combined-prediction
+CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.09170952329114665,0.13348596957039296,1,combined-prediction
+N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.09203781459712614,0.049528243300647194,1,combined-prediction
+ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.09583741068272783,0.05112367301257726,1,combined-prediction
+OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.0967821447110451,0.01969166251145765,1,combined-prediction
+COP(=O)(NC(=O)C)SC,0.10236623790044716,0.01603420284847195,1,combined-prediction
+O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.11086164698737522,0.08912049056135805,1,combined-prediction
+COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045292283465,0.03652048573032131,1,combined-prediction
+Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.11516531274058425,0.05167638607399666,1,combined-prediction
+COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.12412602138191925,0.23311870284543604,1,combined-prediction
+CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.12855945536132327,0.0532083628742947,1,combined-prediction
+CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.1367492600870436,1.000096018241897,1,combined-prediction
+c1scc(n1)c1nc2c([nH]1)cccc2,0.1490700414533971,0.07124290059304189,1,combined-prediction
+CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.14982590230152565,0.4889107282616925,1,combined-prediction
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.15013314047110002,0.014761743547403035,1,combined-prediction
+Cc1cccc2c1n1cnnc1s2,0.1506048130761757,0.3856935237852282,1,combined-prediction
+ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.15174119992085178,0.24709110501373716,1,combined-prediction
+CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.15338553104123837,0.07663751118571574,1,combined-prediction
+Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.15801925188118618,0.19454416629038565,1,combined-prediction
+c1ccc(cc1)Nc1ccccc1,0.16546268922726798,0.251186443435064,1,combined-prediction
+Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.1730416993562668,0.024541035827570765,1,combined-prediction
+CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.1767866659490005,0.03831103939366669,1,combined-prediction
+C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.18559079091504613,0.15407527700390689,1,combined-prediction
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.20017699986539617,0.1291134993044419,1,combined-prediction
+CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.2068313193675311,0.16342863567669358,1,combined-prediction
+CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.21666838084755125,0.07899149299378624,1,combined-prediction
+CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.2198425631222415,0.057817340618821475,1,combined-prediction
+CNC(=O)Oc1ccccc1OC(C)C,0.23417894234275483,0.05328869177586038,1,combined-prediction
+CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.2408184692696632,0.15669364902191532,1,combined-prediction
+O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.253443853488009,0.07178252950433611,1,combined-prediction
+Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.25917417547047744,0.0672060941474649,1,combined-prediction
+CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.2594374890563992,0.04040958911705992,1,combined-prediction
+CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.27189561466298434,0.2470979480063625,1,combined-prediction
+OC(=O)COc1ccc(cc1Cl)Cl,0.28049546719325014,0.04220782045663329,1,combined-prediction
+CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.2921073325236663,0.024525489375934365,1,combined-prediction
+CNC(=O)Oc1cccc2c1cccc2,0.2981792578159244,0.1209025875895827,1,combined-prediction
+Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.31170568268957544,0.06564925552956072,1,combined-prediction
+ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.31207588849423984,0.14729599082809905,1,combined-prediction
+CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.3169092998307417,0.2948426093660432,0.25,combined-prediction
+ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.32935301892961466,0.07976114599708196,1,combined-prediction
+CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.42802021191337764,0.059860013602209265,1,combined-prediction
+CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.43361266621389954,0.13723730603736453,1,combined-prediction
+N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.44926154899338216,0.1795731486323763,1,combined-prediction
+Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.47403843842257615,0.0731675127439027,1,combined-prediction
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.4837900188743661,0.12263485736061217,1,combined-prediction
+Nc1nc(NC2CC2)nc(n1)N,0.5144905821145022,0.10982169517930987,1,combined-prediction
+CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.5170806512852409,0.3350153436602428,1,combined-prediction
+COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.5285529966699751,0.08251097640354867,1,combined-prediction
+O=Cc1ccco1,0.624453213155231,0.12486833177320306,0.1111111111111111,combined-prediction
+[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.7245881151318344,0.15109322689681717,1,combined-prediction
+COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.7681550277825105,0.747753445270089,1,combined-prediction
+COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.7817895162025876,0.49783922754030796,1,combined-prediction
+OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.8282972172278201,0.4720460499425296,1,combined-prediction
+CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.8351128195663594,0.8106254748473309,1,combined-prediction
+COC(=O)Nc1nc2c([nH]1)cccc2,0.8499546987221808,0.08145337691491894,1,combined-prediction
+CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.9077984526598573,0.1053554610733538,1,combined-prediction
+O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.9662594125910484,0.4231253966583784,1,combined-prediction
+ClCCP(=O)(O)O,0.9723587138566308,2.424380344082731,1,combined-prediction
+COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,1.1154252951100516,0.2488946852404996,1,combined-prediction
+CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,1.5854670852219546,0.43069366532085357,1,combined-prediction
+OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,1.9605490478397496,0.13193009603279973,1,combined-prediction
+CC(OC(=O)Nc1cccc(c1)Cl)C,2.340158076742021,0.10044437465404735,1,combined-prediction
+Oc1ccccc1c1ccccc1,3.119727015073393,0.20273166023828124,1,combined-prediction
+OC(=O)CNCP(=O)(O)O,5.559726007239,0.700841565636653,0.16666666666666666,combined-prediction
diff --git a/paper/data/combined-test-predictions.id b/paper/data/combined-test-predictions.id
new file mode 100644
index 0000000..27835d9
--- /dev/null
+++ b/paper/data/combined-test-predictions.id
@@ -0,0 +1 @@
+56c42aa82b72ed1054000004
diff --git a/paper/data/common-median.csv b/paper/data/common-median.csv
index f1244b0..e83961d 100644
--- a/paper/data/common-median.csv
+++ b/paper/data/common-median.csv
@@ -1,24 +1,24 @@
SMILES,mazzatorta,swiss
N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.0001372533562906347,0.00013496580117055152
+CCOP(=S)(SCSC(C)(C)C)OCC,0.0002080223110862717,0.00034670385697674235
OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.00027647194701359843,0.0002764719511333511
CCSCSP(=S)(OCC)OCC,0.0006144925612602997,0.0006144925475253195
CCOP(=O)(SC(CC)C)SC(CC)C,0.0009245829520661433,0.0008210296720157477
COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.001659247904766673,0.08332310268057162
-CCOP(=S)(SCSC(C)(C)C)OCC,0.002438483757733518,0.00034670385697674235
-COP(=O)(SC)N,0.003046853953236319,0.0020548549325897737
+COP(=O)(SC)N,0.002054854991717517,0.0020548549325897737
+CSc1ccc(cc1C)OP(=S)(OC)OC,0.0025868753585247565,0.0016527259802523342
CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.003445751195813495,0.0033630532459809582
CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149212048673449,0.004149211896481245
-COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.00467202701142753,0.0039030031199302137
+COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.004713372383210075,0.0039030031199302137
CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.005201883810203027,0.004681695305160139
CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005274306112287868,0.0035601567181414275
CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0054376113486863924,0.0636200517424888
-COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.005451835179110433,0.008508644649457775
CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601647965290344,0.005601648122412352
-CSc1ccc(cc1C)OP(=S)(OC)OC,0.005892327205528613,0.0016527259802523342
Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.006341300659739408,0.023779877474022784
COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319755914397,0.006820319575237628
-Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.00694452873492003,0.01932390597300771
-O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.007126617932723449,0.015481963173347177
+Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.00694452873492003,0.00920904883059355
+COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.0070905370826580775,0.008508644649457775
+O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.007126617932723449,0.01228727229779905
CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.007825509706097071,0.009886227162529472
COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.009301369775521361,0.003100456591840454
CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.009924832004782804,0.04157699893895499
@@ -31,15 +31,15 @@ CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.01448347496337274,0.0104281016973780
N#Cc1c(Cl)cccc1Cl,0.014533918736325764,0.0186034162597095
N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.015042627044387032,0.08141821878808377
ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.016019730669239306,0.05030195369030707
+ClC1C(Cl)C(Cl)C(C(C1Cl)Cl)Cl,0.016160652565775233,0.01616065190994549
CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.01672571818640967,0.05707983190600125
BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185416964361586,0.017185417014945824
-ClC1C(Cl)C(Cl)C(C(C1Cl)Cl)Cl,0.01808617712680377,0.01616065190994549
OC(=O)C(CCP(=O)(O)C)N,0.019323475195614302,1.2637552440957067
CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.019396419126203733,0.016680921188449865
OC(=O)COc1ccc(cc1C)Cl,0.019938294964743114,0.0947069010825298
-N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.021874904009467275,0.04835505096829608
+N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.022450559378137468,0.04835505096829608
CNC(=O)ON=C(C(=O)N(C)C)SC,0.02280382932847922,0.02072868120754643
-O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.02446221194980985,0.4023390123323988
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.02510595436954169,0.4023390123323988
O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.026276896280264014,0.007293179580314936
CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.026692119922880408,0.0068777238395693234
OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.030365547751564796,0.005938151689011985
@@ -51,10 +51,10 @@ OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.03828744186371015,0.15527684755838006
CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.038746408312020406,0.08469772512288609
CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04102878665011248,0.03544887229174679
CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.041276958181115306,0.1242747128033579
-CN(C(=S)SSC(=S)N(C)C)C,0.04432283415923257,0.03036190470594063
O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.044873074905021335,0.045407278177700156
COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.04569504751402555,0.009139009427670286
CC1=C(C)S(=O)(=O)CCS1(=O)=O,0.047557630336441704,0.23778815168220852
+CN(C(=S)SSC(=S)N(C)C)C,0.04783039657471141,0.03036190470594063
CON(C(=O)Nc1ccc(cc1)Br)C,0.048243951057630914,0.014357399945172603
Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.05165383561566402,0.17867678986550448
Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.051834835094095484,0.27357274077439286
@@ -80,7 +80,7 @@ N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.10391366164191661,0.09203781459712614
CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.11058877880543937,0.14653013191720715
COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045196043953,0.11151045388522976
Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.11395676083924232,0.011395676083924233
-CCNc1nc(NC(C)C)nc(n1)Cl,0.11591071091933607,0.056718974985359355
+CCNc1nc(NC(C)C)nc(n1)Cl,0.11591071091933607,0.04636428436773443
O=C(C1=C(C)OCCS1)Nc1ccccc1,0.1274956638724717,0.034848813981213346
C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.13459866849613178,0.05350296944357954
CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.13618183361575933,0.13731668655832788
@@ -90,11 +90,12 @@ CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,0.15252975563710267,1.6952764753748983
CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.1560686482307559,0.14982590230152565
Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.15801925526767843,0.15801924849469393
Cc1cccc2c1n1cnnc1s2,0.16381576159162972,0.1373938645607217
+ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.16860133324539087,0.08430066662269543
CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.16929970598735858,0.13747135609511818
-C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.1697708869122168,0.061250674376451514
+C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.1697708869122168,0.06904967382858089
CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.17813968959673715,0.3034972489425892
CN1CN(C)CSC1=S,0.18486987933542975,0.027422365728598172
-CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.18534506246313948,0.22661523159035935
+CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.18534506246313948,0.24799169923196304
CNC(=O)Oc1cccc2c1cccc2,0.1878529324240324,0.2991731924668564
COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.19051986050321804,0.2098341392275743
CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.21766590408142725,0.22201922216305578
@@ -114,8 +115,7 @@ CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.4534134152107278,0.06546156290207059
ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.5494924735209582,0.07465930346752149
CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.6205388929259232,0.2603236331298995
COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.655542030995076,0.31203800675365617
-ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.6631652440985374,0.08430066662269543
[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.9419645496713847,0.5651787298028309
-ClCCP(=O)(O)O,1.4025957248513201,0.9066120392542251
+ClCCP(=O)(O)O,1.0381053884590363,0.9066120392542251
CC(OC(=O)Nc1cccc(c1)Cl)C,3.510237115113031,0.14040948460452124
-OC(=O)CNCP(=O)(O)O,5.914602135360638,5.350743398456257
+OC(=O)CNCP(=O)(O)O,5.914602135360638,5.559726007239
diff --git a/paper/functional-groups-reduced.csv b/paper/data/functional-groups-reduced.csv
index 525acbf..525acbf 100644
--- a/paper/functional-groups-reduced.csv
+++ b/paper/data/functional-groups-reduced.csv
diff --git a/paper/functional-groups-reduced4R.csv b/paper/data/functional-groups-reduced4R.csv
index e37c41d..e37c41d 100644
--- a/paper/functional-groups-reduced4R.csv
+++ b/paper/data/functional-groups-reduced4R.csv
diff --git a/paper/functional-groups.csv b/paper/data/functional-groups.csv
index 0e2b4ca..0e2b4ca 100644
--- a/paper/functional-groups.csv
+++ b/paper/data/functional-groups.csv
diff --git a/paper/data/mazzatorta-cv.csv b/paper/data/mazzatorta-cv.csv
new file mode 100644
index 0000000..200058c
--- /dev/null
+++ b/paper/data/mazzatorta-cv.csv
@@ -0,0 +1,519 @@
+SMILES,LOAEL_measured_median,LOAEL_predicted,Confidence
+ClC12C3C4(C(C1(Cl)Cl)(C1(C2(C3(Cl)C(C41Cl)(Cl)Cl)Cl)Cl)Cl)Cl,1.9565721591442926e-05,0.0014218133641616987,0.625
+ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C=C2)(Cl)Cl)Cl,2.7404023436797774e-05,0.0012794179116857743,1
+ClC1C2OC2C2C1C1(Cl)C(=C(C2(C1(Cl)Cl)Cl)Cl)Cl,6.421500622500271e-05,0.0006312904946719587,1
+ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C1C2O1)(Cl)Cl)Cl,0.0001312648375209092,0.0009393540815108845,1
+N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.0001372533562906347,0.030320302552666413,1
+CCSCCSP(=S)(OCC)OCC,0.00014577045919371006,0.002904296720614731,1
+CCOP(=S)(SCSC(C)(C)C)OCC,0.0001733519259052264,0.004705691894690802,1
+CCOP(=S)(SCSC(C)(C)C)OCC,0.0002080223110862717,0.0031198946651464694,1
+ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C1C2O1)(Cl)Cl)Cl,0.0002625296750418184,0.0009393540815108845,1
+OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.00027647194701359843,0.016349996939185575,1
+ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C1C2O1)(Cl)Cl)Cl,0.000328162093802273,0.00023577843419090474,1
+CCSCCSP(=S)(OCC)OCC,0.00036442614798427517,0.002759857629239433,1
+ClC1C2OC2C2C1C1(Cl)C(=C(C2(C1(Cl)Cl)Cl)Cl)Cl,0.0005137200498000217,0.0008261557263644955,1
+CNC(=O)ON=CC(SC)(C)C,0.0005255875464343458,0.0394300510866583,1
+ClC1CC2C(C1Cl)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.0006100854842019096,0.0005942117597564336,1
+CCSCSP(=S)(OCC)OCC,0.0006144925612602997,0.0013675281812878628,1
+OC1CCCCCc2cc(O)cc(c2C(=O)OC(CCC1)C)O,0.0006203550142861557,0.7144075963289822,1
+ClC1=C(Cl)C2(C(C1(Cl)C1C2C2CC1C1C2O1)(Cl)Cl)Cl,0.000656324187604546,0.0009393540815108845,1
+ClC1CC2C(C1Cl)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.0006588923229380624,0.0008687145669203877,1
+ClC1C=CC2C1C1(Cl)C(=C(C2(C1(Cl)Cl)Cl)Cl)Cl,0.0006696708996117783,0.0008952202189140214,1
+ClC(C(c1ccc(cc1)Cl)c1ccc(cc1)Cl)(Cl)Cl,0.0007052459522690667,0.04147328692582911,1
+COP(=O)(SC)N,0.000708570686799144,0.33442367385922134,1
+CCSCCSP(=S)(OCC)OCC,0.0008017375255654054,0.0029306716037033644,1
+c1ccc(cc1)[Sn](c1ccccc1)c1ccccc1,0.0008571117562305596,0.6096639268673245,1
+CCOP(=O)(SC(CC)C)SC(CC)C,0.0009245829520661433,0.0119018935050396,1
+COP(=S)(Oc1ccc(cc1)N(=O)=O)OC,0.0009498211030948742,0.03193259568430547,1
+ClC1C=CC2C1C1(Cl)C(=C(C2(C1(Cl)Cl)Cl)Cl)Cl,0.001017899767409903,0.0008952202189140214,1
+Clc1c(Cl)c(Cl)c(c(c1Cl)Cl)Cl,0.0010183220720957982,0.09587934918066965,1
+CNC(=O)CSP(=S)(OC)OC,0.001090477150926923,0.011977939066676569,1
+COC1CC(OC2C(C)C=CC=C3COC4C3(O)C(C=C(C4O)C)C(=O)OC3CC(CC=C2C)OC2(C3)C=CC(C(O2)C(C)C)C)OC(C1OC1CC(OC)C(C(O1)C)NC(=O)C)C,0.0011109849279118543,0.030023844212028958,1
+COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.0011344859332252924,0.02189964718237587,1
+ClC1CC2C(C1Cl)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.0012201709684038192,0.0005942117597564336,1
+ClC12C(Cl)(Cl)C3(C4(C1(Cl)C1(C2(Cl)C3(C4(C1(Cl)Cl)Cl)Cl)Cl)Cl)Cl,0.0012831252531881078,6.239999085601705e-05,0.625
+CCOP(=S)(Oc1ccc(cc1)N(=O)=O)OCC,0.001442007505168395,0.02324515370361023,1
+CCOP(=S)(Oc1ccccc1C(=O)OC(C)C)NC(C)C,0.0014476216329334154,0.10541114923752212,1
+CCOc1cc(nc(n1)CC)OP(=S)(OC)OC,0.0015395577035464635,0.008933340243284798,1
+COC(=O)C=C(OP(=O)(OC)OC)C,0.001561466365033004,0.05630681195605883,1
+CSc1ccc(cc1C)OP(=S)(OC)OC,0.001616797099077973,0.00998805136771544,1
+COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.001659247904766673,0.015728201435628045,1
+ClC1C2(Cl)C3C4C5C1(Cl)C(C2(Cl)C5C3C1C4O1)(Cl)Cl,0.0018377077252927285,0.00013082348029644925,1
+CNC(=O)CCSCCSP(=O)(OC)OC,0.001879329112916984,0.004845236789812529,1
+CNC(=O)C=C(OP(=O)(OC)OC)C,0.0020164586039868883,0.010849425248724263,1
+COP(=O)(SC)N,0.002054854991717517,0.33442367385922134,1
+CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.0022052807653206367,0.010561978243550603,1
+S=C1NCCN1,0.0022514113902230405,6.303842268414008,0.14285714285714285
+CO[C@H]1C[C@H](O[C@H]2[C@@H](C)C=CC=C3CO[C@H]4[C@]3(O)[C@@H](C=C([C@H]4O)C)C(=O)O[C@H]3C[C@@H](CC=C2C)O[C@]2(C3)C=C[C@@H]([C@H](O2)[C@H](CC)C)C)O[C@H]([C@@H]1O[C@H]1C[C@H](OC)[C@H]([C@@H](O1)C)O)C,0.002290749011702154,0.0017566214091597164,0.11827956989247312
+S=C1NCCN1,0.0024471862937206963,6.303842268414008,0.14285714285714285
+COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.002646103794082849,0.013342851700514885,1
+COC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.0026615073878255148,0.0012344748927784325,1
+COC1CC(OC2C(C)C=CC=C3COC4C3(O)C(C=C(C4O)C)C(=O)OC3CC(CC=C2C)OC2(C3)C=CC(C(O2)C(C)C)C)OC(C1OC1CC(OC)C(C(O1)C)NC(=O)C)C,0.0027774623197796356,0.04161465926335706,0.1348314606741573
+CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.002852364738724816,0.012560097152432495,1
+CCOP(=S)(OCC)SCSc1ccc(cc1)Cl,0.0029165972759564764,0.007657588282036417,1
+C1CCN2C(C1)C1CCCCN1CC2,0.002933359023382885,0.16703078340793057,1
+C1CCN2C(C1)C1CCCCN1CC2,0.002984821462389602,0.16703078340793057,1
+CCCCSP(=O)(SCCCC)SCCCC,0.003974424546249488,0.1504496631045886,1
+COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.004134537178254452,0.011073447351926287,1
+CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149212048673449,0.020215395668456124,1
+CCOP(=O)(OC(=CCl)c1ccc(cc1Cl)Cl)OCC,0.004171650398342553,0.04995900954278815,1
+Clc1nc(nc(n1)Cl)Nc1ccccc1Cl,0.004173898399328111,0.1093606955215401,1
+Clc1cccc(n1)C(Cl)(Cl)Cl,0.00433075312836283,0.0664943030028045,0.13043478260869565
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C(C(Br)(Br)Br)Br,0.004511229623452476,0.0348345939452587,1
+CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.004686221626306353,0.010714479147398627,1
+CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.004928609097226672,0.010980300528105117,1
+CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.004944661980269876,0.008165508970666315,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.004948543461552866,0.03481040749532821,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(C(C)C)Nc1ccc(cc1Cl)C(F)(F)F,0.004971041792562443,0.011146277874077666,1
+CCN(C(=O)C(=C(OP(=O)(OC)OC)C)Cl)CC,0.005005200069191886,0.006725562763289336,1
+CCNc1nc(nc(n1)Cl)NC(C#N)(C)C,0.005193343612552968,0.03818729902070168,1
+CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.005201883810203027,0.001847030797857757,1
+COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.005292207588165698,0.011073447351926287,1
+CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0054376113486863924,0.0072971278933458734,1
+CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601647965290344,0.24428343783187767,1
+CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005603950244305859,0.008071940830053378,1
+Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.006341300659739408,0.08859636009379407,1
+COC(=O)Nc1nc2c([nH]1)cc(cc2)S(=O)c1ccccc1,0.006342219438128827,0.3094692255753645,1
+ClCC(N1C(=O)c2c(C1=O)cccc2)SP(=S)(OCC)OCC,0.006347661308292605,0.01655335793163268,1
+COP(=O)(SC)N,0.006377136181192296,0.045296304153967855,0.13333333333333333
+CCP(=S)(Sc1ccccc1)OCC,0.006414179135682054,0.003722191492864322,1
+COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.006615259485207122,0.011073447351926287,1
+OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.006747899500347733,0.049047172630179624,1
+CNC(=O)Oc1cc(C)c(c(c1)C)N(C)C,0.0067481385934503825,0.07806475331674674,1
+COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319755914397,0.05460021012041988,1
+CCOP(=S)(SCSC(C)(C)C)OCC,0.006934077036209056,0.0029602477287608174,1
+Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.00694452873492003,0.05356866802000862,1
+COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.0070905370826580775,0.018322295358547312,1
+O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.007126617932723449,0.00036386924695734017,1
+Cc1nn(c(c1C=NOCc1ccc(cc1)C(=O)OC(C)(C)C)Oc1ccccc1)C,0.0073074288460468996,0.48191774605001353,1
+Fc1ccc(cc1)[Si](c1ccc(cc1)F)Cn1cncn1,0.007657523838454347,0.08859636009379407,1
+CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.007825509706097071,0.01255029551403124,1
+Fc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.007943029289634557,0.014438054300148512,1
+CSc1ccc(cc1C)OP(=S)(OC)OC,0.008030092258753932,0.011338986062901048,1
+COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.00813048252144793,0.021687531960393556,1
+CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.008187766847509327,0.017844597124334886,1
+Clc1ccc(cc1)OS(=O)(=O)c1ccc(cc1)Cl,0.008246440044818412,0.05907193607395852,1
+[O-][N+](=O)c1cc([N+](=O)[O-])c(c(c1)[N+](=O)[O-])C,0.008805487227420639,0.16464376372059966,1
+COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.009301369775521361,0.024139165440739862,1
+COP(=O)(OC=C(Cl)Cl)OC,0.009729574839301364,0.027361947682508048,1
+CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.009924832004782804,0.022894519521138888,1
+c1scc(n1)c1nc2c([nH]1)cccc2,0.009938002763559809,0.12605015112782628,1
+CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.010036375840596658,0.07530660632375387,1
+FC(c1ccc(cc1)C=CC(=NN=C1NCC(CN1)(C)C)C=Cc1ccc(cc1)C(F)(F)F)(F)F,0.010111728942243584,0.18296746014371312,1
+COP(=O)(OC=C(Cl)Cl)OC,0.010408382386229365,0.027361947682508048,1
+CCSC(=O)N1CCCCCC1,0.010677920910561842,0.1420990237195984,1
+CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.010713392485187262,0.058687722933369206,1
+O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1c(F)c(F)c(c(c1F)F)C,0.010985502766340648,0.07572819478774503,1
+CCCSP(=O)(SCCC)OCC,0.011141416681473747,0.005788126717953556,1
+O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1cccc(c1C)c1ccccc1,0.011824026606519262,0.16129123255733774,1
+O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1cccc(c1C)c1ccccc1,0.011824026606519262,0.22030867390239214,1
+CCOP(=S)(Oc1ccc(cc1)N(=O)=O)OCC,0.012016729209736626,0.022496877806644597,1
+S=C1NCCN1,0.012235931468603481,6.303842268414008,0.14285714285714285
+Clc1cc(Cl)c(c(c1O)Cc1c(O)c(Cl)cc(c1Cl)Cl)Cl,0.012287924553322883,0.05189799091313671,1
+CN1CCC(CC1)C1CCN(CC1)C,0.012988179839533329,0.11626874970227083,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)OC(F)F)C(C)C,0.013290157156772887,0.043440290068252216,1
+CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.013539867103284017,0.08089999015583163,1
+COP(=O)(NC(=O)C)SC,0.013648831720059621,0.01603420284847195,1
+CNP(=O)(Oc1ccc(cc1Cl)C(C)(C)C)OC,0.013712205220154254,0.05218409380418495,1
+CCN(C(=O)SCC)C1CCCCC1,0.013930451940080113,0.10603844418623201,1
+CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC1CC1,0.014397200032537671,0.16269252962824168,1
+CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.01448347496337274,0.1493882391935513,1
+N#Cc1c(Cl)cccc1Cl,0.014533918736325764,0.07166060251297335,1
+ClC1CC2C(C1Cl)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.014642051620845831,0.0005942117597564336,1
+CCCCC(c1ccc(cc1Cl)Cl)(Cn1cncn1)O,0.014958135679074535,0.05648294085533073,1
+N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.015042627044387032,0.0678327800715719,1
+N#CC(c1cc(C)c(cc1Cl)NC(=O)c1cc(I)cc(c1O)I)c1ccc(cc1)Cl,0.015081279803436631,0.09331510618768761,1
+ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.016019730669239306,0.12327721586222236,1
+BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185416964361586,0.051356609948269835,1
+N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.017269661060105742,0.020383124560822165,1
+O=C(C1C(C1(C)C)C=C(C(F)(F)F)Cl)OCc1cccc(c1C)c1ccccc1,0.018918442570430818,0.18549608847080756,1
+CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.019057288509276463,0.01216124318722561,1
+CN1CCC(CC1)C1CCN(CC1)C,0.019100264469901956,0.15374244717438296,1
+CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.019396419126203733,0.05408292051478764,1
+CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.01991156926953532,0.07632294114171351,1
+OC(=O)COc1ccc(cc1C)Cl,0.019938294964743114,0.0399652589799207,1
+N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.020248123201460456,0.15445645494051066,1
+CCP(=S)(Sc1ccccc1)OCC,0.020298035239500172,0.002754326808265995,1
+ClC=C,0.020800592400871575,0.04595842510750215,0.14285714285714285
+Clc1cccc(c1)c1ccccc1,0.021202965065040626,0.1352285536055998,1
+N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.021299248640797082,0.02247265000158328,1
+CNC(=O)CSP(=S)(OC)OC,0.02180954301853846,0.011977939066676569,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.022530984690614337,0.1232662493071642,1
+CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.022598624918870935,0.09740277479679331,1
+OC(=O)COc1ccc(cc1Cl)Cl,0.022620602193004043,0.041009690437328015,1
+CN(C(=S)SSC(=S)N(C)C)C,0.02275063210988447,0.051165604885929104,0.16666666666666666
+CNC(=O)ON=C(C(=O)N(C)C)SC,0.02280382932847922,0.031079812793433585,1
+COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.02320682656135787,0.14426832315094906,1
+OC(COc1cccc2c1c1ccccc1[nH]2)CNC(C)C,0.023460058312320942,0.369466939624125,1
+CCNc1nc(NCC)nc(n1)Cl,0.024794616275543167,0.021316557238817504,1
+CCOC(=O)C(Oc1ccc(cc1)Oc1nc2c(o1)cc(cc2)Cl)C,0.02487724874434851,0.02323272314653561,1
+CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.025090939601491648,0.05083711820179249,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.02510595436954169,0.12628718875372724,1
+O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.02574063309087087,0.08860938474145841,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.025749696789273527,0.12641058047221496,1
+CCNc1nc(NCC)nc(n1)Cl,0.026282293252075754,0.07780743018432164,1
+CC(OC(=O)C(c1ccc(cc1)Cl)(c1ccc(cc1)Cl)O)C,0.026531991066147967,0.05646245319768563,1
+O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.026813159469657157,0.10088477692803749,1
+CCOC(=O)c1ccccc1C1=c2cc(C)c(cc2=[O]c2c1cc(C)c(c2)NCC)NCC,0.027053999376946393,0.6380027934805974,1
+CSCC(=NOC(=O)NC)C(C)(C)C,0.027483045022449526,0.029438466688171353,1
+N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.02763145769616919,0.024234096209191043,1
+CCOc1cc(ccc1N(=O)=O)Oc1ccc(cc1Cl)C(F)(F)F,0.02764719470135984,0.07890874815143262,1
+[O-][N+](=O)c1cc(C(=O)N)c(c(c1)[N+](=O)[O-])C,0.027758250773633555,0.142247474970479,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(C(F)(F)F)Cl,0.02778703580061686,0.02381165262416268,1
+CSC(=NOC(=O)N(SN(C(=O)ON=C(SC)C)C)C)C,0.02821118623185781,0.06400593479605718,1
+COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.02836244328456758,0.06797949317882583,1
+CC(N1C(=NC(C)(C)C)SCN(C1=O)c1ccccc1)C,0.02848365588181601,0.0689459121736827,1
+CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.028523647387248163,0.01285055734967491,1
+CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.029112705155716945,0.14860201500770867,1
+COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.030123726579706293,0.57695845139982,1
+COc1nc(C)nc(n1)N(C(=O)NS(=O)(=O)c1ccccc1C(=O)OC)C,0.031614325062739264,0.2832573792096585,1
+Cc1ccc2c(c1)nc1c(n2)sc(=O)s1,0.03201059303080734,0.08614003067869859,1
+CC(C(=O)O)Oc1cc(Cl)c(cc1Cl)Cl,0.03228091610123117,0.02898082429359809,1
+CCC1CCCC(OC2CCC(C(O2)C)N(C)C)C(C)C(=O)C2C(CC(=O)O1)C1CCC3C(C1C2)CC(C3)OC1CC(C)C(C(C1OC)OC)OC,0.03269690443692089,0.21141397929967962,1
+CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.03318543029523152,0.17738294181594516,1
+Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.03331771398901528,0.04815264437273454,1
+CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.03336499327732185,0.04261363346757391,1
+N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.03374687200243409,0.2942994099520108,1
+CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.033936422812922216,0.09882354692414844,1
+CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.03407493882440353,0.06962158879997161,1
+CN(C=Nc1ccc(cc1C)C)C=Nc1ccc(cc1C)C,0.03408246361134649,0.13262968511235923,1
+ClC(C(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl)Cl,0.034377949341570596,0.32963878644864847,0.42857142857142855
+CN(C=Nc1ccc(cc1C)C)C=Nc1ccc(cc1C)C,0.034764112883573416,0.16021803876523436,1
+CCCSP(=S)(Oc1ccc(cc1)SC)OCC,0.03566479582586673,0.0033372571615199595,1
+N#CC(c1c(Cl)ccc(c1Cl)n1ncc(=O)[nH]c1=O)c1ccc(cc1)Cl,0.03679735812631385,0.04044424911778916,1
+CC(Cc1ccccc1)N,0.036980547196719206,0.15298008942484267,1
+CCN(c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O)CC(=C)C,0.0375078950368263,0.16811251283481254,1
+Clc1c(O)c(Cl)c(c(c1Cl)Cl)Cl,0.037546481605565646,0.2093085161156731,1
+CC(OP(=S)(OC(C)C)SCCNS(=O)(=O)c1ccccc1)C,0.03773457509937652,0.003947387860828739,1
+OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.03828744186371015,0.018695785718636188,1
+CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.038746408312020406,0.02067135597373708,1
+OC(=O)COc1cc(Cl)c(cc1Cl)Cl,0.03914162418169542,0.09184075863798795,1
+CCOP(=S)(Oc1nn(c(n1)Cl)C(C)C)OCC,0.039841737145637234,0.007638665240055393,1
+CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04102878665011248,0.11851229785685428,1
+CCN(C(=O)C(=C(OP(=O)(OC)OC)C)Cl)CC,0.041042640567373466,0.006725562763289336,1
+CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.041276958181115306,0.01244898356693107,1
+ClC(C(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl)Cl,0.04297243667696324,0.32963878644864847,0.42857142857142855
+O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.044873074905021335,0.135992755390313,1
+CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.04519647299825149,0.019762932791303964,1
+C=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.04563372244789605,0.17688013246322926,1
+ClCC=CCl,0.045958425107502164,0.0742053426494004,0.14285714285714285
+CCOC(=O)Cn1c(=O)sc2c1c(Cl)ccc2,0.046003238627999404,0.13903555779637022,1
+CCCN(C(=O)SCC)CCC,0.047538995974292175,0.03568739809152346,1
+CON(C(=O)Nc1ccc(cc1)Br)C,0.048243951057630914,0.0387394680026393,1
+CN(C(=S)SSC(=S)N(C)C)C,0.04887018780459644,0.051165604885929104,0.16666666666666666
+Cc1cccc(c1O)C,0.04911414454620167,0.2990053132281964,1
+COC(=O)Nc1nc2c([nH]1)cc(cc2)Sc1ccccc1,0.050108966959550236,0.2040829902451164,1
+C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.05047450068604942,0.026552818063037695,1
+CCSC(CC1CC(=O)C(C(=O)C1)C(=NOCC)CCC)C,0.05056765552287047,0.2576640422648569,0.21875
+CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.051618595485714625,0.00947421966200617,1
+Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.05165383561566402,0.06031770103514649,1
+CNC(=O)Oc1cc(C)c(c(c1)C)C,0.05174850433885335,0.08220731490776746,1
+Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.051834835094095484,0.051179522110690205,1
+CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.0524579222415799,0.00947421966200617,1
+O=N(=O)c1ccc(c(c1)N)C,0.05257947683683445,0.2130456994423962,1
+O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.05279126047017867,0.08848370769665356,1
+NC(=NCCCCCCCCNCCCCCCCCN=C(N)N)N,0.053436074592710235,0.04339969605964679,1
+COCN(c1c(CC)cccc1CC)C(=O)CCl,0.05375006811431045,0.3416422958308449,1
+OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.05398319600278186,0.04775054042433673,1
+C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.0542125521232289,0.0882738484824748,1
+CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.05522147585284508,0.3448543217147861,1
+O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.05566064749641608,0.1418188285865031,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.05566320606558952,0.032070039079003125,1
+CCOC(=O)COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.05583516191627437,0.08304633486416381,1
+N#CC(c1c(Cl)ccc(c1Cl)n1ncc(=O)[nH]c1=O)c1ccc(cc1)Cl,0.056422615793681234,0.04201701712817465,1
+CNC(=O)Oc1cccc(c1)N=CN(C)C,0.056495719658295813,0.08543967286780094,1
+CCOC(=O)C(c1ccc(cc1)Cl)(c1ccc(cc1)Cl)O,0.056582904287311254,0.0760610772949622,1
+Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.05706818876652619,0.06524247107291589,1
+CN(C(CN1c2ccccc2Sc2c1cccc2)C)C,0.058364575374860554,0.07462370481313495,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.05953797389131243,0.02239458456128216,1
+CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.06073132568962639,0.04246445180870021,1
+CNC(=O)ON=C(SC)C,0.061648442359631114,0.02965108174786982,1
+CNc1cnn(c(=O)c1Cl)c1cccc(c1)C(F)(F)F,0.06174515112035177,0.030447515039680465,1
+CCNc1nc(SC)nc(n1)NC(C)(C)C,0.06214876624755196,0.0764957405369865,1
+CN(C(=S)SSC(=S)N(C)C)C,0.06238747379310184,0.051165604885929104,0.16666666666666666
+[O-][N+](=O)c1cc(cc(c1)[N+](=O)[O-])[N+](=O)[O-],0.06245761469536169,0.23539870476616265,1
+COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.06302765174348351,0.06346841632986405,1
+ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.06389160712181856,0.048329611043453576,1
+CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.06423944765895072,0.9871399288405841,1
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccsc1C(=O)OC,0.06453419527613821,0.20114055010294407,1
+O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.06459882942614491,0.054230294082185056,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06559798797851273,0.040528628452314384,1
+CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.06569530810416269,0.0400926736559006,1
+Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.06597478470118634,0.06738207410701946,1
+[O-][N+](=O)NC1=NCCN1Cc1ccc(nc1)Cl,0.0664943030028045,0.004330753128362828,0.13043478260869565
+O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06822190749765324,0.04160579288164088,1
+CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.0690593023384914,0.12059148862626623,1
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.06987675250196507,0.18894976588375237,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.07154653735936956,0.039897050062012586,1
+CCN1CCN(CC1)c1cc2c(cc1F)c(=O)c(cn2C1CC1)C(=O)O,0.07234386441112595,0.37000179744555806,1
+Nc1ccc(cc1)Cl,0.07250833532306657,0.17342070934363113,1
+CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.07305234130123987,0.0459174084353172,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.07306609422899836,0.0639912743902412,1
+OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.07409262028018154,0.1920790850026332,1
+CCCSc1ccc2c(c1)[nH]c(n2)NC(=O)OC,0.07537743365466734,0.30320218104074037,1
+Cn1cc(c2cccc(c2)C(F)(F)F)c(=O)c(c1)c1ccccc1,0.07591497971688389,0.10916794690519636,1
+Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.0760257762657501,0.05887962606246405,1
+CCSC(=O)N1CCCCCC1,0.07907000434271044,0.1420990237195984,1
+CC(c1cc(ccc1O)C(c1ccc(c(c1)C(C)C)O)(C)C)C,0.08001387248515598,0.5422932490073515,1
+C=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.08101639130242413,0.1649358203743006,1
+ClCCP(=O)(O)O,0.08304843107672291,2.424380344082731,1
+COC(=O)Nc1cccc(c1)OC(=O)Nc1cccc(c1)C,0.0832475217878744,0.19354884508408218,1
+CCCN(c1c(cc(c(c1[N+](=O)[O-])N)C(F)(F)F)[N+](=O)[O-])CCC,0.08392957349588569,0.18727220561469346,1
+OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08452667530010859,0.07205142115160633,1
+O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08510674803234901,0.06511854133132516,1
+CCCC(=C1C(=O)CC(CC1=O)C1CCCSC1)NOCC,0.08603044408485085,0.009979530982780172,1
+CC(=O)Nc1cc(NS(=O)(=O)C(F)(F)F)c(cc1C)C,0.08894826507859208,1.144357215930057,1
+N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.08906885283592852,0.06989004489788962,1
+COCC(=O)Nc1cc(ccc1NC(=NC(=O)OC)NC(=O)OC)Sc1ccccc1,0.08959030532555236,0.15048437937513895,1
+O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.08969617860069455,0.06563526849047481,1
+Nc1nc(NC2CC2)nc(n1)N,0.09026150563412319,0.1456955513263534,1
+ClC(C(c1ccc(cc1)Cl)c1ccc(cc1)Cl)(Cl)Cl,0.09027148189044054,0.04785917219329116,1
+Fc1ccc(cc1)C(=O)CCCN1CCN(CC1)c1ccccn1,0.09163218547527233,0.3441994467725427,1
+CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.09170952329114665,0.13723522769182628,1
+COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.09345959256991566,0.3200955745826286,1
+Clc1cc(Cl)cc(c1)C1(CO1)CC(Cl)(Cl)Cl,0.09362507489225783,0.049819983746996574,1
+Cn1cc(c2cccc(c2)C(F)(F)F)c(=O)c(c1)c1ccccc1,0.09868947363194906,0.11320935441784255,1
+NC(=N)NCCCCCCCCCCCCOC(=O)C,0.10160268068512719,0.713383542089078,1
+OC1CC2(O)CC(O)C(C(O2)(C)CC(C=CC=CC=CC=CCC(OC(=O)C=CC2C(C1)(C)O2)C)OC1(C)OC(C)C(C(C1O)N)O)C(=O)O,0.10172294366080416,0.2884908443749386,0.12658227848101267
+[O-][N+](=O)c1cnc(n1C)C,0.10628650675790867,0.25583359344399015,1
+CC(N(c1c(cc(cc1N(=O)=O)S(=O)(=O)N)N(=O)=O)C(C)C)C,0.10642121227099519,0.8110931516114431,1
+CCOC(=O)C(OC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F)C,0.10827828411229923,0.060150877044639596,1
+CCOC(=O)C(OC(=O)c1cc(ccc1N(=O)=O)Oc1cc(ccc1Cl)C(F)(F)F)C,0.10827828411229923,0.06391354072273552,1
+ClCC(=O)N(c1ccccc1)C(C)C,0.10865048725491992,0.14063355647533457,1
+CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.11058877880543937,0.055855601003025876,1
+COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045196043953,0.6030561295372162,1
+Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.11395676083924232,0.07874258062509669,1
+Oc1ccc(c(c1)C)C,0.1145996706078039,0.14058064499529827,1
+N#Cc1c(N)nc(nc1N)NC1CC1,0.11566455596376966,0.0671588260211166,1
+CCNc1nc(NC(C)C)nc(n1)Cl,0.11591071091933607,0.07264693165285359,1
+CCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])Cc1c(F)cccc1Cl,0.1185590456888386,0.10968410897370563,1
+Nc1ccc(cc1)S(=O)(=O)Nc1nc(C)cc(n1)C,0.1185642260256668,0.3488361590116861,1
+N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.11875847044790469,0.06325306833995115,1
+CC(N(c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O)C(C)C)C,0.1193036069506878,0.07647943573166908,1
+COc1cc(ccc1OC)C(=CC(=O)N1CCOCC1)c1ccc(cc1)Cl,0.11937399144446861,0.15938750502122703,1
+CCCCc1c(=O)nc([nH]c1C)NCC,0.1194525860672606,0.13601548650468212,1
+CNC(=O)ON=C(SC)C,0.12329688471926223,0.019043488181468577,1
+CN(C(=O)C(c1ccccc1)c1ccccc1)C,0.1253592168358431,0.16058478564428405,1
+O=C(C1=C(C)OCCS1)Nc1ccccc1,0.1274956638724717,0.34245069872781153,1
+CC(N(c1c(cc(cc1N(=O)=O)S(=O)(=O)N)N(=O)=O)C(C)C)C,0.12992280391195832,0.8110931516114431,1
+CCCN(C(=O)SCC)CCC,0.13205276659525605,0.06906850635347721,1
+C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.13459866849613178,0.024048983960135852,1
+ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.1348810665963127,0.35545219964034264,1
+OC(C(C)(C)C)C(=Cc1ccc(cc1)Cl)n1ncnc1,0.13506940531624406,0.09325300787311752,1
+CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.13618183361575933,0.8007493146491557,1
+O=C(Nc1cnns1)Nc1ccccc1,0.13620822278144273,0.08031296188431028,1
+ClC=C(c1cc(Cl)c(cc1Cl)Cl)OP(=O)(OC)OC,0.1366262742927664,0.01695886379908282,1
+CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.13932359364492994,0.07765239200013897,1
+CN1CC2CC1CN2c1cc2c(cc1F)c(=O)c(cn2C1CC1)C(=O)O,0.13990757146198934,0.2090948708768445,0.5862068965517241
+OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.1403669879303106,0.04466802484258431,1
+COC(=O)C(N(c1c(C)cccc1C)C(=O)Cc1ccccc1)C,0.14136381415796706,0.23276538536699015,1
+ClC(=C)Cl,0.14441434207714035,0.010177007878307786,0.1
+CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.14566407168203882,0.5073265138326565,1
+CON=C(c1ccccc1CON=C(c1cccc(c1)C(F)(F)F)C)C(=O)OC,0.14692519722320194,0.24556842297038736,1
+c1ccc(cc1)Nc1ccccc1,0.14773454395291782,0.48409780026481714,1
+COC(CCCC(CC=CC(=CC(=O)OC(C)C)C)C)(C)C,0.14816176662421726,1.0038894881647953,1
+c1scc(n1)c1nc2c([nH]1)cccc2,0.1490700414533971,0.06053075972454769,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.15013314047110002,0.017720839447182994,1
+CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.1513509494941276,0.025399859207357336,1
+CON=C(c1ccc(cc1Cl)Cl)Cc1cccnc1,0.15245767876475944,0.06053667496108435,1
+CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,0.15252975563710267,1.224432087935241,1
+Clc1ccc(c(c1)Cl)C=C(C(C(C)(C)C)O)n1cncn1,0.15327033840680634,0.10122954702097303,1
+COC=C(c1ccccc1Oc1ncnc(c1)Oc1ccccc1C#N)C(=O)OC,0.15431812608561873,0.38732816429606964,1
+COP(=S)(Oc1cc(Cl)c(cc1Cl)Cl)OC,0.15549919159080278,0.016427144246218724,1
+CCCCCCCCc1cc(N(=O)=O)c(c(c1)N(=O)=O)OC(=O)C=CC,0.15642453685164498,0.7353871066554781,1
+Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.15801925526767843,0.14258833778495594,1
+CCOC(=O)CN(c1c(CC)cccc1CC)C(=O)CCl,0.1603572605822803,0.18868594261143684,1
+Cc1cccc2c1n1cnnc1s2,0.16381576159162972,0.6851471667728956,1
+CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.16647322477947293,0.5248637450337764,1
+ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.16860133324539087,0.35545219964034264,1
+CNc1cnn(c(=O)c1Cl)c1cccc(c1)C(F)(F)F,0.1687700797289615,0.031597288914811124,1
+CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.16929970598735858,0.1270677771191105,1
+Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.1730416993562668,0.022002424130460663,1
+COC(=O)c1ccc(cc1C1=NC(C(=O)N1)(C)C(C)C)C,0.1734054330003024,0.27706226280243773,1
+CNC(=O)N(c1nnc(s1)C(C)(C)C)C,0.1751969016077557,0.19506513302817866,0.4
+CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.17813968959673715,0.18686850946296205,1
+COCC(=O)N(c1c(C)cccc1C)N1CCOC1=O,0.17965983350851364,0.11878355690291638,1
+c1ccc(cc1)Nc1ccccc1,0.1831908345016181,0.2809891238813933,1
+CN1CN(C)CSC1=S,0.18486987933542975,0.040377923983948856,0.16666666666666666
+CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.18534506246313948,0.07277588371973029,1
+O=N(=O)c1ccc(c(c1)N(=O)=O)C,0.1866762157041476,0.10265714109290516,1
+CNC(=O)Oc1cccc2c1cccc2,0.1878529324240324,0.13059631278321485,1
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.19051986050321804,0.142337016902528,1
+COP(=O)(NC(=O)C)SC,0.1910836440808347,0.01603420284847195,1
+OC1CN(C(=O)N1c1nnc(s1)C(C)(C)C)C,0.19506513302817866,0.17519690160775567,0.4
+OC(=O)C(Cl)(Cl)C,0.1970361896096669,0.42676977936996974,0.125
+c1scc(n1)c1nc2c([nH]1)cccc2,0.19876005527119617,0.06053075972454769,1
+Nc1ccc(c(c1)N)O,0.2013846888993215,0.6976394362438932,1
+O=C(NS(=O)(=O)c1ccccc1C(=O)OC1COC1)Nc1nc(C)cc(n1)C,0.20422574060250331,0.3749650612729717,1
+ClCC(=O)N(c1c(CC)cccc1CC)CNC(=O)C,0.21058487877925733,0.3814418917881575,1
+CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.21766590408142725,0.05784177240937466,1
+CC(c1ccc(cc1)O)(c1ccc(cc1)O)C,0.21902317939829427,0.7237798298552831,1
+COCC(=O)N(c1c(C)cccc1C)C(C(=O)OC)C,0.22374845318219344,0.2149499838579308,1
+Nc1ccc2c(c1)nc1c(c2)ccc(c1)N,0.22461542255370148,0.6782252254224121,1
+CCSC(CC1CC(=O)C(=C(NOCC=CCl)CC)C(=O)C1)C,0.2389478027971563,0.06436113994662118,0.21875
+CNC(=O)Oc1ccccc1OC(C)C,0.23895810443138246,0.05797512153929558,1
+CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.23935747721355113,0.1194525860672606,0.14814814814814814
+C=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.24800936112986982,0.19013970584234022,1
+OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.24848916516834604,0.21532428756816943,1
+C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.25479642918707424,0.10104185355147247,1
+CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.2690918752347788,0.10495954737172976,1
+ClC=C(c1cc(Cl)c(cc1Cl)Cl)OP(=O)(OC)OC,0.2732525485855328,0.015157957101805187,1
+CCSC(CC1CC(=O)C(=C(NOCC=CCl)CC)C(=O)C1)C,0.27784628232227476,0.06436113994662118,0.21875
+CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.2805209905967611,0.055344746560950404,1
+C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.2853292217012047,0.07713406742523517,1
+CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.29547465787728056,0.3350153436602428,1
+COC(=O)c1ccccc1S(=O)(=O)NC(=O)Nc1nc(OC(F)F)cc(n1)OC(F)F,0.2989300503468667,0.4105408249862013,1
+CCOC(=O)c1ccccc1S(=O)(=O)NC(=O)Nc1nc(Cl)cc(n1)OC,0.30133493788161053,0.3808470469587571,1
+CNC(=O)Oc1cc(C)cc(c1C)C,0.30635114568601185,0.0976602822721586,1
+C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.316253365684832,0.19212808361152056,1
+OC(=O)CCl,0.317470328693963,0.1630859508654308,1
+ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.3326798171006209,0.07976114599708196,1
+CN(C1C(=O)C(=C(O)N)C(=O)C2(C1CC1C(=C(O)c3c(C1(C)O)cccc3O)C2=O)O)C,0.33750750616693714,8.335917849904947,0.6216216216216216
+Clc1cc(ccc1Oc1ccc(c(c1)C(=O)O)[N+](=O)[O-])C(F)(F)F,0.34563108073944815,0.08634724402138849,1
+CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.3484961885063573,0.10641333146048552,1
+OC(=O)C(Cl)(Cl)C,0.3497269961122948,0.42676977936996974,0.125
+Fc1ccc(cc1)C(=O)CCCN1CCN(CC1)c1ccccn1,0.35125671098854394,0.07554116271775944,1
+OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.3550120362604561,0.05298039495100552,1
+N=C(NC(=N)N)NCCc1ccccc1,0.35564719019232227,0.4293116814818228,1
+COc1ccc(cc1)C(C(Cl)(Cl)Cl)c1ccc(cc1)OC,0.36163948246786254,0.12900055509999012,1
+CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.36852210915226874,0.2666090555504209,1
+CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.3693416417277341,0.1741052713034425,1
+O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.3880867710275115,0.06530531261415101,1
+COC(=O)Nc1nc2c([nH]1)cccc2,0.3922867840256219,0.08503054904294756,1
+CCCCNC(=O)n1c(NC(=O)OC)nc2c1cccc2,0.3961177430023906,0.13425870370019663,1
+Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.423248605734443,0.12324532538914809,1
+NCCNc1cccc2c1cccc2,0.4241543329029509,0.23852536609869093,1
+CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.42802021191337764,0.06507172681898275,1
+CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.4356352632556343,0.18762505282704417,1
+N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.443217671652664,0.1582577446590667,1
+CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.4534134152107278,0.04044915666609474,1
+CCSC(=O)N(CC(C)C)CC(C)C,0.4600420791288938,0.03228951683176119,1
+Cc1cc(N)c(cc1C)C,0.46595489467866197,0.06597029300388296,1
+Clc1cc(ccc1Oc1ccc(c(c1)C(=O)[O-])[N+](=O)[O-])C(F)(F)F.[Na+],0.46919094173712006,0.06314263015489453,1
+Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.47403843842257615,0.12305192134429824,1
+CN1N(C)C(CC1c1ccccc1)c1ccccc1,0.49533572071941767,0.15296953131716254,1
+OC(=O)C(Oc1cccc(c1)Cl)C,0.4984573741185779,0.031571537397910326,1
+COC(=O)C(NC(=O)C(CC(=O)O)N)Cc1ccccc1,0.4994850207500349,0.3915748574119684,1
+CCCCC(COC(=O)c1ccccc1C(=O)OCC(CCCC)CC)CC,0.5120902983161549,0.5490084227192451,1
+COc1c(Cl)ccc(c1C(=O)O)Cl,0.520273850439093,0.27032818788451163,1
+COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.5285529966699751,0.09022939400978072,1
+O=CCC1CC(C)C(=O)C=CC(=CC(C(OC(=O)CC(C(C1OC1(C)OC(C)C(C(C1O)N(C)C)OC1(C)OC(C)C(C(C1)(C)O)O)C)O)CC)COC1OC(C)C(C(C1OC)OC)O)C,0.5295750507618869,0.32043182213812477,1
+COC(=O)C1(O)c2cc(Cl)ccc2c2c1cccc2,0.546052144921948,0.05128735297771426,1
+CC(C12CCC(O2)(C(C1)OCc1ccccc1C)C)C,0.5466515334085721,0.07979167550031513,1
+Oc1ccc2c(c1N=Nc1ccccc1)ccc(c2)S(=O)(=O)O,0.5482080783455129,2.226070226693587,1
+ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.5494924735209582,0.14061208442890083,1
+Nc1ccc(c(c1)N(=O)=O)N,0.5681125108300529,0.407842963979915,1
+CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCCl,0.5690227874227859,0.10229430897526087,1
+NC1CCCCC1,0.5898716318329822,0.27111140054462934,1
+COc1cc(Cl)c(cc1Cl)OC,0.6037074787089276,0.14551736437093893,1
+NC1CCCCC1,0.6049965454697254,0.27111140054462934,1
+OC(=O)C1C2CCC(C1C(=O)O)O2,0.6177415369409439,0.3762966519177018,1
+CN(C(=O)Nc1ccc(cc1)Cl)C,0.6292491939569526,0.05314085450837755,1
+COC(=O)c1ccc(cc1)C(=O)OC,0.6437193589585136,0.6359402307400387,1
+Clc1ccc(cc1)S(=O)(=O)c1cc(Cl)c(cc1Cl)Cl,0.6459733503975151,0.039780209619230206,1
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.655542030995076,0.1510565040147731,1
+CCCCOCC(OCC(O)C)C,0.6726932978936081,0.7345230884100764,1
+CC1OC(C)OC(C1)OC(=O)C,0.7175892491582392,0.1016026806851272,0.14285714285714285
+CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.7386866446932013,0.43725287577182737,1
+COc1nc(nc(c1)OC)NC(=O)NS(=O)(=O)Cc1ccccc1C(=O)OC,0.7529208210920754,0.43020251605384513,1
+O=C(C1C(C1(C)C)C=C(C)C)OCN1C(=O)C2=C(C1=O)CCCC2,0.7543614918373561,0.11072031885625554,1
+COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.7817895162025876,0.3433567591603662,1
+Nc1ccc(c(c1)C)NOS(=O)(=O)O,0.8431459792705229,0.402820671344419,1
+CCOC(=O)C1OC1(C)c1ccccc1,0.8485352051922984,0.1653576500580577,1
+CCCCNC(=O)n1c(NC(=O)OC)nc2c1cccc2,0.8611255282660666,0.1563339051142994,1
+OCCn1c(C)ncc1[N+](=O)[O-],0.8764039114257128,0.05770780251062409,1
+COP(=O)OC,0.9086866261501474,0.11612790576142619,1
+[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.9419645496713847,0.21199123476247797,1
+OCCNc1ccc(cc1OCCO)N(=O)=O,0.9453881078267568,0.9356572196349415,1
+O=N(=O)c1cccc2c1cccc2,0.952831491808421,0.1614476326965743,1
+O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.9662594125910484,0.4457496787338429,1
+Oc1cccc2c1nccc2,0.9851335765350275,0.721989181638648,1
+CCCOC(=O)c1ccc(cn1)C(=O)OCCC,0.9949124950582696,0.8759044368876665,1
+CC[N](=C1C=CC(=C(c2ccc(cc2)N(Cc2cccc(c2)S(=O)(=O)O)CC)c2ccc(cc2)N(C)C)C=C1)Cc1cccc(c1)S(=O)(=O)O,1.009963174498295,0.18540142003081284,1
+ClCCP(=O)(O)O,1.0381053884590363,2.9662351498622144,1
+ClCC[N+](C)(C)C,1.0602168942789227,1.7717264844452583,1
+Clc1ccccc1,1.0661274430976688,0.09347672424517633,1
+CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,1.0897268363577188,0.021360738953880024,1
+O=C1CCCCCN1,1.10465364954589,0.11720225163340323,1
+COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,1.1154252951100516,0.2509122725796672,1
+COC(=O)C(=CC=CC(=CC=CC=C(C=CC=C(C=CC1=C(C)CCCC1(C)C)C)C)C)C,1.119409718240544,0.01608733804096794,0.12121212121212122
+Oc1ccc(cc1Cl)C(C)(C)C,1.1697007223226876,0.11220135445041438,1
+CON=C(c1ccccc1COc1ccccc1C)C(=O)OC,1.1807966969350603,0.15177727689808973,1
+CON=C(c1ccccc1COc1ccccc1C)C(=O)OC,1.1967534090558043,0.16001530012602447,1
+OCc1cc(N=Nc2ccc(c3c2cccc3)S(=O)(=O)O)c(c(c1O)N=Nc1ccc(c2c1cccc2)S(=O)(=O)O)O,1.2093346835379808,1.319500758990477,1
+CC1=CC(=O)CC(C1)(C)C,1.295160023171064,0.11315631785675133,1
+C[N]1(C)CCCCC1,1.3133857473480115,1.7551096434605418,1
+OC1CCC2(C(C1)CCC1C2CCC2(C1CCC2C(CCC(=O)O)C)C)C,1.3277652171188237,1.387743406118009,1
+Oc1ccc(c(c1)C(C)(C)C)O,1.3536524792656537,0.22216113939448204,1
+OCC1OC2OC3C(CO)OC(C(C3O)O)OC3C(CO)OC(C(C3O)O)OC3C(CO)OC(C(C3O)O)OC3C(OC(OC4C(OC(OC5C(OC(OC1C(C2O)O)C(O)C5O)CO)C(O)C4O)CO)C(O)C3O)CO,1.4097112541302337,20.11975049013973,1
+CCCCOC(=O)c1ccccc1C(=O)OCc1ccccc1,1.504675539130048,0.37909491054043404,1
+COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,1.5061863289853148,0.7284064393720566,1
+Fc1cc2CCC(n3c2c(c1)c(=O)c(c3)C(=O)O)C,1.531109972815908,0.0995408783900811,0.3333333333333333
+CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,1.5465050300849357,0.10755408570863038,1
+c1ccc(cc1)c1ccccc1,1.6211890708511503,0.26000708215571994,1
+NCC(c1ccc(cc1)O)O,1.6320834707547616,0.6023190083757878,1
+ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,1.6860133324539086,0.35545219964034264,1
+ClCC#CCOC(=O)Nc1cccc(c1)Cl,1.743505808935165,0.17719995498834726,1
+OC(=O)CNCP(=O)(O)O,1.7743806406081915,0.700841565636653,0.16666666666666666
+COc1ccc(c(c1)OC)N,1.8018201517132568,0.4406371111426593,1
+CC(C1(C)N=C(NC1=O)c1ncccc1C(=O)O)C,1.913681483026602,1.0116079695709865,1
+OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,1.9605490478397496,0.14062735149788871,1
+C=Cc1ccccc1,2.021140457067712,0.5581719843674564,1
+Clc1ccc(cc1)Cl,2.0407891160090657,0.058396786076860684,1
+CCCCOC(=O)c1ccccc1C(=O)OCCCC,2.1556100397968727,0.4824557089800792,1
+c1ccc(cc1)c1ccccc1OCC1CO1,2.209744922072461,0.47297048589951723,1
+ClCC[N](C)(C)C,2.2427665071284903,1.1121371793371502,1
+CC=Cc1ccc(cc1)OC,2.3211612715861247,1.2223687229695321,1
+CC(OC(=O)Nc1cccc(c1)Cl)C,2.340158076742021,0.07885333594015259,1
+COC(=O)c1ccccc1O,2.366127776683809,0.6131172056131932,1
+CCOC(=O)C=C,2.477130986890983,0.160637047130486,1
+C=Cc1ccccc1,2.736460951374337,0.48345391227477846,1
+CCc1ccccc1,2.741016342485753,0.12025839466555979,1
+CC(c1ccccc1)C,2.7539366734341955,0.12179927831297671,1
+CC(=C)C(=O)O,2.8807316686731115,9.313172081918692,0.14285714285714285
+CC(N(c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O)C(C)C)C,2.982590173767195,0.10106093503191066,1
+ClCCP(=O)(O)O,3.0866333550182015,2.424380344082731,1
+Clc1cnc2c(c1)ccc(c2C(=O)O)Cl,3.127347059508829,0.11793510219271723,1
+CCCOC(=O)NCCCN(C)C,3.611885866531256,0.24217162746131807,1
+CCOP(=O)O,3.6347465046005896,0.003842329850641841,1
+Oc1ccccc1,3.655248831064175,0.5726822872379717,1
+CC1CCC(C(C1)O)C(C)C,3.7948308388559964,7.011629148921578,1
+CCc1ccccc1,3.843074459567654,0.11981018705088699,1
+CC(c1ccccc1)C,3.8438632722857955,0.1197379646857681,1
+COc1ccc(cc1)N,3.8488877932280037,0.36732614842795186,1
+OCCO,4.027850816139244,8.499181435776759,1
+CCCCC(COC(=O)CCCCC(=O)OCC(CCCC)CC)CC,4.047856676081442,0.3479573670205783,1
+CCCOC(=O)c1cc(O)c(c(c1)O)O,4.071644352421931,0.7702810499269743,1
+CC(CCCC1(C)CCc2c(O1)c(C)c(c(c2C)OC(=O)C)C)CCCC(CCCC(C)C)C,4.230630449818821,0.43643913496013176,1
+COc1ccc(cc1N=Nc1c(O)c(cc2c1cccc2)C(=O)Nc1cccc(c1)N(=O)=O)N(=O)=O,4.308389780762046,0.411182841264013,1
+S=c1sc2c([nH]1)cccc2,4.484270077422418,0.10787015001421449,1
+CC(OC(=O)Nc1cccc(c1)Cl)C,4.680316153484042,0.08345354088958273,1
+Oc1ccccc1c1ccccc1,5.875192118782284,0.2214370368151079,1
+OC(=O)CNCP(=O)(O)O,5.914602135360638,0.700841565636653,0.16666666666666666
+CCOc1ccc(cc1N)NC(=O)C,6.1010029534002825,0.3233024548760301,1
+Nc1ccc(cc1)O,6.286318149278613,0.612495359780696,1
+NC(=S)NNC(=S)N,6.303842268414009,0.004069924087402051,0.14285714285714285
+NC(=O)c1cnccn1,6.408762052980724,0.027758250773633534,0.10526315789473684
+OC(=O)c1ccc(cc1N)N(=O)=O,6.506215164982792,0.5563741830253752,1
+Oc1cc(O)c2c(c1)oc(c(c2=O)O)c1ccc(c(c1)O)O,6.729846937340625,0.6908804048917031,1
+ClCC(=O)c1ccc(cc1)NC(=O)C,7.465334624174738,0.20790177229212267,1
+O=C1OC(=O)c2c1cccc2,8.000509872156579,0.6713795852396433,1
+CCCOC(=O)c1ccc(cc1)O,8.324062177858794,0.6396720070179559,1
+OCC(C1OC(=O)C(=C1O)O)O,8.82332300652517,1.4097112541302337,0.17647058823529413
+CCOC(=O)COC(=O)c1ccccc1C(=O)OCC,8.919866912731305,0.21304859946913612,1
+O=C1CCCCC1,9.272184465524795,1.272174495388276,1
+OC(=O)C=CC(=O)O,9.313172081918696,2.8807316686731115,0.14285714285714285
+COC(=O)c1ccc(cc1)O,9.858865736182537,0.46643220721186857,1
+COC(=O)c1ccccc1C(=O)OC,10.299509743336218,0.5164877342007698,1
+OC1C2C(N(C)C)C(=O)C(=C(O)N)C(=O)C2(O)C(=O)C2=C(O)c3c(C(C12)(C)O)c(Cl)ccc3O,10.50761860949369,0.3375075061669371,0.6216216216216216
+OCCO,10.633526154607605,18.100227209506496,0.2
+OCCO,16.111403264556976,12.098739817238384,0.2
+CCCCCCCCCCCCCCCCCC(=O)OCC(C1OCC(C1O)O)O,16.727105323218392,0.5487265062934492,1
+OCC(C1OC(=O)C(=C1O)O)O,17.323010613197102,13.375187084226921,0.23076923076923078
+[O-]S(=O)(=O)NC1CCCCC1.[Na+],17.900880706433757,0.1311146954310952,1
+O=C1NS(=O)(=O)c2c1cccc2,19.66323569952698,0.30457294474070207,1
+CCCCCCCCCCCC(=O)OCC(C1OCC(C1O)O)O,19.866710908558982,0.4802563354164847,1
+CCOC(=O)c1ccccc1C(=O)OCC,19.95615854702247,0.2835725804367756,1
+OC(=O)c1ccccc1N,20.060380944519448,0.6470696879448438,1
+OCCO,32.22280652911395,8.499181435776759,1
+OCC(CO)O,74.73899985905678,5.776891986788403,1
diff --git a/paper/data/mazzatorta-cv.id b/paper/data/mazzatorta-cv.id
new file mode 100644
index 0000000..516d987
--- /dev/null
+++ b/paper/data/mazzatorta-cv.id
@@ -0,0 +1 @@
+56c42aab2b72ed10be000001
diff --git a/paper/data/mazzatorta-test-predictions.csv b/paper/data/mazzatorta-test-predictions.csv
index 6c61db5..e508e33 100644
--- a/paper/data/mazzatorta-test-predictions.csv
+++ b/paper/data/mazzatorta-test-predictions.csv
@@ -1,149 +1,149 @@
-SMILES,LOAEL,Confidence,Dataset
-O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.0003638692469573398,1,mazzatorta-prediction
-CCSCSP(=S)(OCC)OCC,0.0016300938762789745,1,mazzatorta-prediction
-CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.001847030797857755,1,mazzatorta-prediction
-CCOP(=S)(SCSC(C)(C)C)OCC,0.0031106867605998826,1,mazzatorta-prediction
-CCCSP(=O)(SCCC)OCC,0.0061411247977180205,1,mazzatorta-prediction
-CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.007526804342298479,1,mazzatorta-prediction
-CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.008291760373156038,1,mazzatorta-prediction
-CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.00947421966200617,1,mazzatorta-prediction
-CSc1ccc(cc1C)OP(=S)(OC)OC,0.00998805136771544,1,mazzatorta-prediction
-CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.010714479147398627,1,mazzatorta-prediction
-CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.010980300528105117,1,mazzatorta-prediction
-COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.011073447351926287,1,mazzatorta-prediction
-CCOP(=O)(SC(CC)C)SC(CC)C,0.011316358861878211,1,mazzatorta-prediction
-CNC(=O)CSP(=S)(OC)OC,0.011977939066676562,1,mazzatorta-prediction
-CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.01285055734967491,1,mazzatorta-prediction
-COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.015728201435628045,1,mazzatorta-prediction
-COP(=O)(NC(=O)C)SC,0.01603420284847195,1,mazzatorta-prediction
-OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.01643167623693211,1,mazzatorta-prediction
-N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.017186299494700404,1,mazzatorta-prediction
-N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.017235945805185275,1,mazzatorta-prediction
-CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.017635466228081265,1,mazzatorta-prediction
-CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.018612600873944375,1,mazzatorta-prediction
-CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.019680297281264553,1,mazzatorta-prediction
-CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.02067135597373707,1,mazzatorta-prediction
-OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.020745537156134756,1,mazzatorta-prediction
-COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.021687531960393556,1,mazzatorta-prediction
-CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.0217719179484974,1,mazzatorta-prediction
-N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.02241011099945114,1,mazzatorta-prediction
-CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.022838553346053704,1,mazzatorta-prediction
-Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.024541035827570765,1,mazzatorta-prediction
-C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.024853325579571102,1,mazzatorta-prediction
-CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.025399859207357323,1,mazzatorta-prediction
-COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.02586178816777326,1,mazzatorta-prediction
-COP(=O)(OC=C(Cl)Cl)OC,0.027361947682508048,1,mazzatorta-prediction
-CNC(=O)ON=C(C(=O)N(C)C)SC,0.02790918990194414,1,mazzatorta-prediction
-N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.03032030255266643,1,mazzatorta-prediction
-N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.03405677944151583,1,mazzatorta-prediction
-N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.03520125762569351,1,mazzatorta-prediction
-COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.0369855483661329,1,mazzatorta-prediction
-CON(C(=O)Nc1ccc(cc1)Br)C,0.0387394680026393,1,mazzatorta-prediction
-CN1CN(C)CSC1=S,0.040377923983948856,0.16666666666666666,mazzatorta-prediction
-CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.040449156666094756,1,mazzatorta-prediction
-O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.040528628452314384,1,mazzatorta-prediction
-OC(=O)COc1ccc(cc1Cl)Cl,0.041293632648700326,1,mazzatorta-prediction
-CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.04261363346757391,1,mazzatorta-prediction
-CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.04291166973357382,1,mazzatorta-prediction
-CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.0434822264129367,1,mazzatorta-prediction
-OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.04466802484258436,1,mazzatorta-prediction
-OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.04857785280417766,1,mazzatorta-prediction
-CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.050018494066167395,1,mazzatorta-prediction
-CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.050114646105433334,1,mazzatorta-prediction
-CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.05083350716627098,1,mazzatorta-prediction
-CN(C(=S)SSC(=S)N(C)C)C,0.051165604885929104,0.16666666666666666,mazzatorta-prediction
-O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.052337960737326904,1,mazzatorta-prediction
-Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.052851131392490244,1,mazzatorta-prediction
-CCNc1nc(NC(C)C)nc(n1)Cl,0.053039565463993625,1,mazzatorta-prediction
-CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.05314229228135397,1,mazzatorta-prediction
-COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.0535200267253048,1,mazzatorta-prediction
-BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.053693231227279335,1,mazzatorta-prediction
-Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.0538394393290607,1,mazzatorta-prediction
-OC(=O)COc1ccc(cc1C)Cl,0.05445622621994639,1,mazzatorta-prediction
-Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.05584570953801489,1,mazzatorta-prediction
-CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.056357793591707304,1,mazzatorta-prediction
-ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.05722075950509786,1,mazzatorta-prediction
-N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.058271163381068226,1,mazzatorta-prediction
-Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.059370957979920064,1,mazzatorta-prediction
-CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.059446571641332435,1,mazzatorta-prediction
-Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.06017129137104992,1,mazzatorta-prediction
-c1scc(n1)c1nc2c([nH]1)cccc2,0.06053075972454769,1,mazzatorta-prediction
-Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.06221038764609867,1,mazzatorta-prediction
-O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.06365856284683262,1,mazzatorta-prediction
-CNC(=O)Oc1ccccc1OC(C)C,0.06370353086320016,1,mazzatorta-prediction
-O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.06511854133132516,1,mazzatorta-prediction
-CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.06648016528067341,1,mazzatorta-prediction
-CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.06733522342267834,1,mazzatorta-prediction
-N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.0678327800715719,1,mazzatorta-prediction
-COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.06797949317882576,1,mazzatorta-prediction
-O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.06808330607768283,1,mazzatorta-prediction
-Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.06951066613764369,1,mazzatorta-prediction
-N#Cc1c(Cl)cccc1Cl,0.07075139304586898,1,mazzatorta-prediction
-OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.07314761133650725,1,mazzatorta-prediction
-Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.07458280632191289,1,mazzatorta-prediction
-CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.07509600041356945,1,mazzatorta-prediction
-ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.07976114599708196,1,mazzatorta-prediction
-CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.08041755256984288,1,mazzatorta-prediction
-CC(OC(=O)Nc1cccc(c1)Cl)C,0.08082419839147705,1,mazzatorta-prediction
-COC(=O)Nc1nc2c([nH]1)cccc2,0.08503054904294756,1,mazzatorta-prediction
-O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.08848370769665356,1,mazzatorta-prediction
-CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.08899189256538585,1,mazzatorta-prediction
-CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.09304673991992557,1,mazzatorta-prediction
-C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.09394776953418806,1,mazzatorta-prediction
-CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.09995117906018544,1,mazzatorta-prediction
-O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.1009416101848442,1,mazzatorta-prediction
-Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.10273222601735031,1,mazzatorta-prediction
-C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.10370887199340015,1,mazzatorta-prediction
-CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.10985201253951346,1,mazzatorta-prediction
-COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.11629727690023284,1,mazzatorta-prediction
-O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.11723725716301076,1,mazzatorta-prediction
-CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.1194525860672606,0.14814814814814814,mazzatorta-prediction
-CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.1270677771191105,1,mazzatorta-prediction
-O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.12890443143014826,1,mazzatorta-prediction
-COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.13042675416012312,1,mazzatorta-prediction
-OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,0.13193009603279973,1,mazzatorta-prediction
-ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.1364078153236936,1,mazzatorta-prediction
-ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.13694971527814467,1,mazzatorta-prediction
-CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.13831022672047752,1,mazzatorta-prediction
-CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.13989660392944153,1,mazzatorta-prediction
-O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.1401261123626703,1,mazzatorta-prediction
-COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.142337016902528,1,mazzatorta-prediction
-Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.14373018518177136,1,mazzatorta-prediction
-Nc1nc(NC2CC2)nc(n1)N,0.1456955513263534,1,mazzatorta-prediction
-CNC(=O)Oc1cccc2c1cccc2,0.14826054249092455,1,mazzatorta-prediction
-COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.1482947332858024,1,mazzatorta-prediction
-CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.1493882391935513,1,mazzatorta-prediction
-N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.15445645494051075,1,mazzatorta-prediction
-N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.1582577446590667,1,mazzatorta-prediction
-CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.15949214928142758,1,mazzatorta-prediction
-OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.16221937215111784,1,mazzatorta-prediction
-CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.1642031063051573,1,mazzatorta-prediction
-C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.1739298872266669,1,mazzatorta-prediction
-CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.1881056272435862,1,mazzatorta-prediction
-[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.19997575454195834,1,mazzatorta-prediction
-CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.20212320440807907,1,mazzatorta-prediction
-Oc1ccccc1c1ccccc1,0.2236833070650602,1,mazzatorta-prediction
-CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.2246734303430016,1,mazzatorta-prediction
-COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.22642545612510342,1,mazzatorta-prediction
-CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.24428343783187767,1,mazzatorta-prediction
-COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.2509122725796671,1,mazzatorta-prediction
-CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.25786920018026926,1,mazzatorta-prediction
-O=C(C1=C(C)OCCS1)Nc1ccccc1,0.2689999596587689,1,mazzatorta-prediction
-COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.26940550668026203,1,mazzatorta-prediction
-OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.2719107573679272,1,mazzatorta-prediction
-CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.2825970996116866,1,mazzatorta-prediction
-c1ccc(cc1)Nc1ccccc1,0.3003220074311764,1,mazzatorta-prediction
-COCN(c1c(CC)cccc1CC)C(=O)CCl,0.32452363754907937,1,mazzatorta-prediction
-COP(=O)(SC)N,0.33442367385922134,1,mazzatorta-prediction
-CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.3350153436602428,1,mazzatorta-prediction
-ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.35545219964034264,1,mazzatorta-prediction
-O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.4457496787338429,1,mazzatorta-prediction
-COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.46002982126481345,1,mazzatorta-prediction
-CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.5248637450337764,1,mazzatorta-prediction
-Cc1cccc2c1n1cnnc1s2,0.5255899798851922,1,mazzatorta-prediction
-OC(=O)CNCP(=O)(O)O,0.700841565636653,0.16666666666666666,mazzatorta-prediction
-COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.7284064393720566,1,mazzatorta-prediction
-CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.8007493146491558,1,mazzatorta-prediction
-CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.9871399288405841,1,mazzatorta-prediction
-CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,1.7292009012474114,1,mazzatorta-prediction
-ClCCP(=O)(O)O,2.424380344082731,1,mazzatorta-prediction
+SMILES,LOAEL_measured_median,LOAEL_predicted,Confidence,Dataset
+N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.0001361095787305931,0.030320302552666413,1,mazzatorta-prediction
+OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.0002764719490734748,0.01643167623693211,1,mazzatorta-prediction
+CCOP(=S)(SCSC(C)(C)C)OCC,0.000277363084031507,0.0031106867605998826,1,mazzatorta-prediction
+CCSCSP(=S)(OCC)OCC,0.0006144925543928096,0.0016300938762789745,1,mazzatorta-prediction
+CCOP(=O)(SC(CC)C)SC(CC)C,0.0008728063120409454,0.011316358861878211,1,mazzatorta-prediction
+CNC(=O)CSP(=S)(OC)OC,0.001090477150926923,0.011977939066676562,1,mazzatorta-prediction
+COP(=O)(SC)N,0.0020548549621536454,0.33442367385922134,1,mazzatorta-prediction
+CSc1ccc(cc1C)OP(=S)(OC)OC,0.0025868753585247565,0.00998805136771544,1,mazzatorta-prediction
+CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.0033630532459809582,0.010714479147398627,1,mazzatorta-prediction
+CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149211972577347,0.019680297281264553,1,mazzatorta-prediction
+COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.004713372383210075,0.011073447351926287,1,mazzatorta-prediction
+CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.0049417895576815835,0.001847030797857757,1,mazzatorta-prediction
+CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005274306112287868,0.008291760373156038,1,mazzatorta-prediction
+CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601648043851348,0.24428343783187767,1,mazzatorta-prediction
+COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.006200913183680908,0.02586178816777326,1,mazzatorta-prediction
+OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.006747899500347733,0.04857785280417766,1,mazzatorta-prediction
+COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319665576013,0.053520026725304856,1,mazzatorta-prediction
+Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.007548400798826121,0.05383943932906067,1,mazzatorta-prediction
+COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.0076105098020530036,0.021687531960393556,1,mazzatorta-prediction
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.007818698763639501,0.03520125762569349,1,mazzatorta-prediction
+CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.008187766847509327,0.017635466228081265,1,mazzatorta-prediction
+CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.008855868434313272,0.018612600873944365,1,mazzatorta-prediction
+O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.009706945232441807,0.00036386924695734017,1,mazzatorta-prediction
+CCCSP(=O)(SCCC)OCC,0.010068539755671456,0.0061411247977180205,1,mazzatorta-prediction
+COP(=O)(OC=C(Cl)Cl)OC,0.010408382170442241,0.027361947682508048,1,mazzatorta-prediction
+CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.012455788330375379,0.1493882391935513,1,mazzatorta-prediction
+CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.0139433514779606,0.09304673991992557,1,mazzatorta-prediction
+N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.015042627044387032,0.0678327800715719,1,mazzatorta-prediction
+Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.01585325164934852,0.06017129137104992,1,mazzatorta-prediction
+CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.01642869699075557,0.010980300528105117,1,mazzatorta-prediction
+N#Cc1c(Cl)cccc1Cl,0.016568667498017633,0.07075139304586898,1,mazzatorta-prediction
+CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.0171141884323489,0.01285055734967491,1,mazzatorta-prediction
+BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185416989653705,0.053693231227279314,1,mazzatorta-prediction
+CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.018038670157326797,0.050018494066167395,1,mazzatorta-prediction
+CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.01991156926953532,0.08041755256984288,1,mazzatorta-prediction
+CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.020087610909726116,0.05083350716627104,1,mazzatorta-prediction
+COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.02048398681663214,0.06797949317882583,1,mazzatorta-prediction
+CNC(=O)ON=C(C(=O)N(C)C)SC,0.022347753176858155,0.02790918990194414,1,mazzatorta-prediction
+COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.02320682656135787,0.13042675416012312,1,mazzatorta-prediction
+CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.025090939601491648,0.050114646105433334,1,mazzatorta-prediction
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.025427825579407606,0.1289044314301482,1,mazzatorta-prediction
+O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.02574063309087087,0.10094161018484414,1,mazzatorta-prediction
+CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.025750915471868897,0.022838553346053694,1,mazzatorta-prediction
+N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.026997497601947272,0.15445645494051072,1,mazzatorta-prediction
+C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.027961199362093195,0.10370887199340012,1,mazzatorta-prediction
+N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.028207113064839383,0.022410110999451165,1,mazzatorta-prediction
+CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.029112705155716945,0.1642031063051573,1,mazzatorta-prediction
+CON(C(=O)Nc1ccc(cc1)Br)C,0.03130067550140176,0.0387394680026393,1,mazzatorta-prediction
+CN1CN(C)CSC1=S,0.03266034652463028,0.040377923983948856,0.16666666666666666,mazzatorta-prediction
+ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.03316084217977319,0.1369497152781446,1,mazzatorta-prediction
+CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.0341788251725187,0.09995117906018544,1,mazzatorta-prediction
+CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0345288315455876,0.007526804342298486,1,mazzatorta-prediction
+CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.03508230910777224,0.04261363346757391,1,mazzatorta-prediction
+Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.03798219426521996,0.05285113139249019,1,mazzatorta-prediction
+CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.038746408312020406,0.02067135597373707,1,mazzatorta-prediction
+CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04102878665011248,0.06648016528067341,1,mazzatorta-prediction
+COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.042491175292669145,0.015728201435628038,1,mazzatorta-prediction
+O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.045140176541360745,0.14012611236267028,1,mazzatorta-prediction
+CN(C(=S)SSC(=S)N(C)C)C,0.04783039657471141,0.051165604885929104,0.16666666666666666,mazzatorta-prediction
+CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.05161859628615915,0.00947421966200617,1,mazzatorta-prediction
+C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.05350296944357954,0.024853325579571102,1,mazzatorta-prediction
+COCN(c1c(CC)cccc1CC)C(=O)CCl,0.05375006811431045,0.3245236375490794,1,mazzatorta-prediction
+CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.05522147585284508,0.2578692001802692,1,mazzatorta-prediction
+O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.05566064749641608,0.11723725716301076,1,mazzatorta-prediction
+Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.05706818876652619,0.06951066613764371,1,mazzatorta-prediction
+OC(=O)COc1ccc(cc1C)Cl,0.057322598023636456,0.05445622621994636,1,mazzatorta-prediction
+CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.057576722828150476,0.20212320440807913,1,mazzatorta-prediction
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.05953797389131243,0.017235945805185275,1,mazzatorta-prediction
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.06009909138187043,0.034056779441515854,1,mazzatorta-prediction
+CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.06061453423316249,0.059446571641332435,1,mazzatorta-prediction
+Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.06267621846158328,0.07458280632191285,1,mazzatorta-prediction
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.06288907725176857,0.2264254561251038,1,mazzatorta-prediction
+CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.06569530810416269,0.04291166973357382,1,mazzatorta-prediction
+O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06690994773808298,0.040528628452314384,1,mazzatorta-prediction
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.06758613754894155,0.06365856284683262,1,mazzatorta-prediction
+CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.06839527058523667,0.13831022672047752,1,mazzatorta-prediction
+C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.06904967382858089,0.09394776953418806,1,mazzatorta-prediction
+OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.0721330354641874,0.04466802484258431,1,mazzatorta-prediction
+O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.07395704796137248,0.052337960737326904,1,mazzatorta-prediction
+OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.07409262028018154,0.16221937215111784,1,mazzatorta-prediction
+CCNc1nc(NC(C)C)nc(n1)Cl,0.07789199862212233,0.0530395654639936,1,mazzatorta-prediction
+O=C(C1=C(C)OCCS1)Nc1ccccc1,0.08117223892684251,0.2689999596587689,1,mazzatorta-prediction
+CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.0827758354922366,0.0217719179484974,1,mazzatorta-prediction
+OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08452667530010859,0.07314761133650725,1,mazzatorta-prediction
+O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08510674803234901,0.06511854133132516,1,mazzatorta-prediction
+CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.08783443947180365,0.07509600041356941,1,mazzatorta-prediction
+CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.09170952329114665,0.1398966039294415,1,mazzatorta-prediction
+N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.09203781459712614,0.058271163381068254,1,mazzatorta-prediction
+ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.09583741068272783,0.05722075950509786,1,mazzatorta-prediction
+OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.0967821447110451,0.020745537156134766,1,mazzatorta-prediction
+COP(=O)(NC(=O)C)SC,0.10236623790044716,0.01603420284847195,1,mazzatorta-prediction
+O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.11086164698737522,0.08848370769665356,1,mazzatorta-prediction
+COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045292283465,0.0369855483661329,1,mazzatorta-prediction
+Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.11516531274058425,0.05584570953801489,1,mazzatorta-prediction
+COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.12412602138191925,0.26940550668026203,1,mazzatorta-prediction
+CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.12855945536132327,0.05314229228135397,1,mazzatorta-prediction
+CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.1367492600870436,0.8007493146491557,1,mazzatorta-prediction
+c1scc(n1)c1nc2c([nH]1)cccc2,0.1490700414533971,0.06053075972454769,1,mazzatorta-prediction
+CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.14982590230152565,0.5248637450337764,1,mazzatorta-prediction
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.15013314047110002,0.017186299494700404,1,mazzatorta-prediction
+Cc1cccc2c1n1cnnc1s2,0.1506048130761757,0.5255899798851918,1,mazzatorta-prediction
+ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.15174119992085178,0.35545219964034264,1,mazzatorta-prediction
+CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.15338553104123837,0.1270677771191105,1,mazzatorta-prediction
+Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.15801925188118618,0.1437301851817713,1,mazzatorta-prediction
+c1ccc(cc1)Nc1ccccc1,0.16546268922726798,0.30032200743117654,1,mazzatorta-prediction
+Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.1730416993562668,0.024541035827570765,1,mazzatorta-prediction
+CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.1767866659490005,0.0434822264129367,1,mazzatorta-prediction
+C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.18559079091504613,0.1739298872266669,1,mazzatorta-prediction
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.20017699986539617,0.142337016902528,1,mazzatorta-prediction
+CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.2068313193675311,0.188105627243586,1,mazzatorta-prediction
+CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.21666838084755125,0.08899189256538591,1,mazzatorta-prediction
+CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.2198425631222415,0.056357793591707304,1,mazzatorta-prediction
+CNC(=O)Oc1ccccc1OC(C)C,0.23417894234275483,0.06370353086320013,1,mazzatorta-prediction
+CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.2408184692696632,0.2246734303430016,1,mazzatorta-prediction
+O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.253443853488009,0.06808330607768283,1,mazzatorta-prediction
+Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.25917417547047744,0.06221038764609867,1,mazzatorta-prediction
+CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.2594374890563992,0.04044915666609474,1,mazzatorta-prediction
+CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.27189561466298434,0.2825970996116866,1,mazzatorta-prediction
+OC(=O)COc1ccc(cc1Cl)Cl,0.28049546719325014,0.041293632648700326,1,mazzatorta-prediction
+CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.2921073325236663,0.025399859207357336,1,mazzatorta-prediction
+CNC(=O)Oc1cccc2c1cccc2,0.2981792578159244,0.14826054249092452,1,mazzatorta-prediction
+Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.31170568268957544,0.0593709579799201,1,mazzatorta-prediction
+ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.31207588849423984,0.1364078153236936,1,mazzatorta-prediction
+CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.3169092998307417,0.1194525860672606,0.14814814814814814,mazzatorta-prediction
+ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.32935301892961466,0.07976114599708196,1,mazzatorta-prediction
+CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.42802021191337764,0.0673352234226783,1,mazzatorta-prediction
+CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.43361266621389954,0.1594921492814276,1,mazzatorta-prediction
+N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.44926154899338216,0.1582577446590667,1,mazzatorta-prediction
+Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.47403843842257615,0.10273222601735031,1,mazzatorta-prediction
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.4837900188743661,0.1482947332858024,1,mazzatorta-prediction
+Nc1nc(NC2CC2)nc(n1)N,0.5144905821145022,0.1456955513263534,1,mazzatorta-prediction
+CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.5170806512852409,0.3350153436602428,1,mazzatorta-prediction
+COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.5285529966699751,0.11629727690023284,1,mazzatorta-prediction
+[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.7245881151318344,0.19997575454195834,1,mazzatorta-prediction
+COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.7681550277825105,0.7284064393720566,1,mazzatorta-prediction
+COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.7817895162025876,0.46002982126481323,1,mazzatorta-prediction
+OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.8282972172278201,0.2719107573679272,1,mazzatorta-prediction
+CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.8351128195663594,0.9871399288405841,1,mazzatorta-prediction
+COC(=O)Nc1nc2c([nH]1)cccc2,0.8499546987221808,0.08503054904294756,1,mazzatorta-prediction
+CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.9077984526598573,0.1098520125395136,1,mazzatorta-prediction
+O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.9662594125910484,0.44574967873384286,1,mazzatorta-prediction
+ClCCP(=O)(O)O,0.9723587138566308,2.424380344082731,1,mazzatorta-prediction
+COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,1.1154252951100516,0.2509122725796672,1,mazzatorta-prediction
+CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,1.5854670852219546,1.729200901247411,1,mazzatorta-prediction
+OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,1.9605490478397496,0.13193009603279973,1,mazzatorta-prediction
+CC(OC(=O)Nc1cccc(c1)Cl)C,2.340158076742021,0.08082419839147705,1,mazzatorta-prediction
+Oc1ccccc1c1ccccc1,3.119727015073393,0.22368330706506026,1,mazzatorta-prediction
+OC(=O)CNCP(=O)(O)O,5.559726007239,0.700841565636653,0.16666666666666666,mazzatorta-prediction
diff --git a/paper/data/mazzatorta-test-predictions.id b/paper/data/mazzatorta-test-predictions.id
new file mode 100644
index 0000000..65e6336
--- /dev/null
+++ b/paper/data/mazzatorta-test-predictions.id
@@ -0,0 +1 @@
+56c429252b72ed0afe000004
diff --git a/paper/data/mazzatorta.csv~ b/paper/data/mazzatorta.csv~
deleted file mode 100644
index 921a53b..0000000
--- a/paper/data/mazzatorta.csv~
+++ /dev/null
@@ -1,568 +0,0 @@
-SMILES,LOAEL_mmol_kg_bw_day
-C1=C(C(=CC(=C1NN=C3C2=C(C=C([S]([O-])(=O)=O)C=C2)C=CC3=O)OC)[S]([O-])(=O)=O)C.[Na+].[Na+],7.531899781214326
-O1C(=O)C(O)=C(O)C1C(O)CO,17.323010613197102
-C1(C)=C(C=CC(C)=CC=CC(C)=CC=CC=C(C)C=CC=C(C)C(=O)OC)C(C)(C)CCC1,1.119409718240544
-c(cccc1)(c1)C(C)C,3.8438632722857955
-O=C(OCCCC)c(c(ccc1)C(=O)OCCCC)c1,2.1556100397968727
-O=C(OCC)c(c(ccc1)C(=O)OCC)c1,19.95615854702247
-O=C(OC(OC(OC1C)C)C1)C,0.7175892491582392
-Oc(c(ccc1)C)c1C,0.04911414454620167
-Oc(ccc(c1C)C)c1,0.1145996706078039
-O=C(OCC)C=C,2.477130986890983
-c(cccc1)(c1)CC,3.843074459567654
-OCCO,4.027850816139244
-c(ccc1C(=O)OCC(=O)OCC)cc1C(=O)OCC,8.919866912731305
-O=C,2.73096831477274
-O=C(O)C=CC(=O)O,9.313172081918696
-OCC(O)CO,74.73899985905678
-O=C(OC)c(ccc(O)c1)c1,9.858865736182537
-O=C(OCCC)c(ccc(O)c1)c1,8.324062177858794
-CC(CCC(=O)(O))C3CCC4C2CCC1CC(O)CCC1(C)C2CCC34C,1.3277652171188237
-OC(C(CCC1C)C(C)C)C1,3.7948308388559964
-O=C(O)C(=C)C,2.8807316686731115
-O=C(OC)c(c(O)ccc1)c1,2.366127776683809
-Oc(cccc1)c1,3.655248831064175
-O=C(OCCC)c(cc(O)c(O)c1O)c1,4.071644352421931
-OCC(O)C1C(O)=C(O)C(=O)O1,8.82332300652517
-c(cccc1)(c1)C=C,0.20163396483810905
-O=Cc(occ1)c1,0.624453213155231
-NCCNc1cccc2ccccc12,0.4241543329029509
-CN(C)(C)CCCl,2.2427665071284903
-O=C(Nc(ccc(c1)C(=O)CCl)c1)C,7.465334624174738
-c(ccc(c1)Cl)(c1)C(c(ccc(c2)Cl)c2)C(Cl)(Cl)Cl,0.09027148189044054
-CC(Oc1cc(Cl)c(Cl)cc1Cl)C(=O)(O),0.03228091610123117
-O=N(=O)C(=CC=C1OC)C=C1N=NC(C(O)=C2C(=O)NC(=CC=C4)C=C4N(=O)=O)=C(C=C3)C(=C2)C=C3,4.308389780762046
-O=N(=O)C(C=C1)=CC(OCCO)=C1NCCO,0.9453881078267568
-Cc1cccc(CC)c1N(C(=O)CCl)COCC,0.18534506246313948
-C1=C(C(=CC=C1OC2=CC=C(C=C2Cl)C(F)(F)F)[N+](=O)[O-])C(=O)[O-].[Na+],0.46919094173712006
-CCc1cccc(CC)c1N(COC)C(=O)CCl,0.05560351873894184
-O=C(Nc(ccc(OCC)c1N)c1)C,6.1010029534002825
-Oc(ccc(N)c1)c1,6.286318149278613
-CC(N)CC(=CC=C1)C=C1,0.036980547196719206
-O(c(ccc(c1)C=CC)c1)C,2.3211612715861247
-COc1ccc(N)cc1,3.8488877932280037
-O=C(O)c(c(N)ccc1)c1,20.060380944519448
-Clc2cccc(c2)c1ccccc1,0.021202965065040626
-O=C(NC(C(=O)OC)Cc(cccc1)c1)C(N)CC(=O)O,0.4994850207500349
-n1c2ccc(Cl)cc2ncc1Oc3ccc(OC(C)C(=O)OCC)cc3,0.009924832004782804
-COC(=O)NS(=O)(=O)c1ccc(N)cc1,0.7817895162025876
-S=P(OC)(OC)SCN1N=Nc2ccccc2C1(=O),0.0011344859332252924
-CNC(=O)Oc1ccccc1OC(C)C,0.23895810443138246
-CC(C)(C)C(=O)C(Oc1ccc(Cl)cc1)n2cncn2,0.08510674803234901
-O=S(O)(=O)C(=CC=C1)C=C1CN(CC)=C(C=C2)C=CC2=C(C(C=C3)=CC=C3N(C)C)C(C=C4)=CC=C4N(CC)CC(C=C5)=CC(=C5)S(=O)(=O)O,1.009963174498295
-c(c(cccc1)c1)(cccc2)c2,1.6211890708511503
-BrC(Cl)Cl,0.7935120501519148
-ClC(Cl)C(Cl)(Cl)SN2C(=O)C1CC=CCC1C2(=O),0.034377949341570596
-O=C(Oc(c(c(ccc1)cc2)c1)c2)NC,0.07752660703214034
-CC1=C(SCCO1)C(=O)Nc2ccccc2,0.1274956638724717
-ClC1CC2C(C1Cl)C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl,0.0006588923229380624
-O=C(O)CCl,0.317470328693963
-ClC(=CC=C1N)C=C1,0.047032433723070206
-CC(C)OC(=O)C(O)(c1ccc(Cl)cc1)c2ccc(Cl)cc2,0.026531991066147967
-n1c(OC)nc(C)nc1NC(=O)NS(=O)(=O)c2ccccc2Cl,0.06987675250196507
-OS(=O)(=O)C(C(=CC=C2)C1=C2)=CC=C1N=NC(C(O)=C3N=NC(C(C=C5)=C4C=C5)=CC=C4S(O)(=O)=O)=CC(=C3O)CO,1.2093346835379808
-S=P(OCC)(OCC)Oc1ccc2C(C)=C(Cl)C(=O)Oc2c1,0.0022052807653206367
-CNP(=O)(OC)Oc1ccc(cc1Cl)C(C)(C)C,0.013712205220154254
-C(C1C2C(C(O)C(O1)OC8C(OC(OC7C(OC(OC6C(OC(OC5C(C(C(OC4C(C(C(OC3C(C(C(O2)OC3CO)O)O)OC4CO)O)O)OC5CO)O)O)C(C6O)O)CO)C(C7O)O)CO)C(C8O)O)CO)O)O,1.4097112541302337
-n1c(N)nc(N)nc1NC2CC2,0.09026150563412319
-COC(=O)c1c(Cl)c(Cl)c(C(=O)OC)c(Cl)c1Cl,1.5061863289853148
-O=C(O)C(Cl)(Cl)C,0.1970361896096669
-Nc1cc(N)c(O)cc1,0.2013846888993215
-FC(F)(Cl)Cl,1.2405561628307704
-ClCCl,0.5887022388817106
-O=P(OC)(OC)OC=C(Cl)Cl,0.010408382386229365
-OC(c1ccc(Cl)cc1)(c2ccc(Cl)cc2)C(Cl)(Cl)Cl,0.05398319600278186
-ClC4=C(Cl)C5(Cl)C3C1CC(C2OC12)C3C4(Cl)C5(Cl)Cl,0.0001312648375209092
-CN(=C1C(C=C2)=CC=C2)N(C)C(=C1)C(C=C3)=CC=C3,0.49533572071941767
-O=C(NC(=O)c(c(F)ccc1)c1F)Nc(ccc(c2)Cl)c2,0.025749696789273527
-CC1=C(C)S(=O)(=O)CCS1(=O)=O,0.047557630336441704
-O=C(NC)CSP(OC)(OC)=S,0.001090477150926923
-COc1ccc(N)c(OC)c1,1.8018201517132568
-COP(=O)OC,0.9086866261501474
-CC(=C(N(=O)=O)C=C1N(=O)=O)C=C1,0.1866762157041476
-CN(C)C(=O)C(c1ccccc1)c2ccccc2,0.1253592168358431
-N(c(cccc1)c1)c(cccc2)c2,0.1831908345016181
-C(C=C1)(=N(C=C1)CC2)C(N2=C3)=CC=C3,0.002984821462389602
-CCOP(=S)(OCC)SCCSCC,0.00036442614798427517
-NC(=S)NNC(N)=S,6.303842268414009
-O=C(N(C)C)Nc(ccc(c1Cl)Cl)c1,0.02574063309087087
-O=P(O)(O)CCCl,1.0381053884590363
-O=C(OCC)C(O1)C1(c(cccc2)c2)C,0.8485352051922984
-COC(=O)NC(=NC1=C2)NC1=CC(=C2)SC(C=C3)=CC=C3,0.050108966959550236
-CN1C=C(c2ccccc2)C(=O)C(c3cc(C(F)(F)F)ccc3)=C1,0.07591497971688389
-c1cc(C(F)(F)F)cc(Cl)c1NC(C(C)C)C(=O)OC(C(#N))c2cccc(Oc3ccccc3)c2,0.004971041792562443
-S=P(OCC)(Sc1ccccc1)CC,0.006414179135682054
-n1c(C)nc(OC)nc1NC(=O)NS(=O)(=O)c2ccsc2C(=O)OC,0.06453419527613821
-C(C(C(C(C1Cl)Cl)Cl)Cl)(C1Cl)Cl,0.017192183580611947
-O=N(=O)N(CN1N(=O)=O)CN(C1)N(=O)=O,0.006753217705640206
-O=C(N=C(N(C1(=O))C)N(C)C)N1C(CCCC2)C2,0.19816672003956992
-n(c(c(ccc1)cc2)c1O)c2,0.9851335765350275
-c1cc(Cl)cc(Cl)c1C(OCC=C)Cn2cncc2,0.13459866849613178
-COc1cccc(OC)c1C(=O)Nc2onc(C(C)(CC)CC)c2,0.15252975563710267
-n1c(OC)cc(OC)nc1NC(=O)NS(=O)(=O)Cc2ccccc2C(=O)OC,0.7529208210920754
-CCOC(=O)CC(SP(=S)(OC)OC)C(=O)OCC,0.1513509494941276
-CN(C)(CCC1)CC1,1.3133857473480115
-O=P(SCCCC)(SCCCC)SCCCC,0.003974424546249488
-COCC(=O)N(C(C)C(=O)OC)c1c(C)cccc1C,0.22374845318219344
-S=P(OC)(OC)SCN1C(=O)SC(OC)=N1,0.006615259485207122
-CNC(=O)ON=C(C)SC,0.061648442359631114
-COP(=S)(OC)Oc1ccc(cc1)N(=O)(=O),0.0009498211030948742
-O=C1N(N)C(SC)=NN=C1C(C)(C)C,0.06999926640768805
-COP(=O)(OC)OC(Br)C(Cl)(Cl)Br,0.005252325112411575
-OC(C(N)C1O)C(C)OC1(C)OC(CC(C)(C(C2O)C(O)=O)OC(O)(C2)CC(O)CC(C)(O3)C3C=C4)C=CC=CC=CC=CCC(C)OC4=O,0.10172294366080416
-O=N(=O)c(ccc(c1N)C)c1,0.05257947683683445
-O=N(=O)c(c(N)ccc1N)c1,0.5681125108300529
-O=C(O)C(=C(N)C=C1N(=O)=O)C=C1,6.506215164982792
-O=N(=O)c(c(c(ccc1)cc2)c1)c2,0.952831491808421
-c12c(N=Nc3ccccc3)c(O)ccc1cc(S(=O)(=O)O)cc2,0.5482080783455129
-CC(C)Oc1cc(c(Cl)cc1Cl)N2N=C(OC2(=O))C(C)(C)C,0.01448347496337274
-CNC(=O)ON=C(SC)C(=O)N(C)C,0.02280382932847922
-CCOP(=S)(OCC)Oc1ccc(cc1)N(=O)=O,0.012016729209736626
-Oc(c(c(c(c1Cl)Cl)Cl)Cl)c1Cl,0.037546481605565646
-NC(=N)NC(=N)NCCc1ccccc1,0.35564719019232227
-COP(=S)(OC)SCN2C(=O)c1ccccc1C2(=O),0.06302765174348351
-CCN(CC)C(=O)C(Cl)=C(C)OP(=O)(OC)OC,0.041042640567373466
-ClC3C6(Cl)C4C2C1OC1C5C2C3(Cl)C(Cl)(C45)C6(Cl)Cl,0.0018377077252927285
-O=C(OC(=O)c1cccc2)c12,8.000509872156579
-Nc1c(Cl)c(Cl)nc(C(=O)(O))c1Cl,0.24848916516834604
-CCN(CC)c1nc(C)cc(OP(=S)(OC)OC)n1,0.008187766847509327
-Nc3ccc2cc1ccc(N)cc1nc2c3,0.22461542255370148
-CCC(=O)Nc1ccc(Cl)c(Cl)c1,0.09170952329114665
-Clc1cc(Cl)ccc1C2(Cn3ncnc3)OC(CCC)CO2,0.07305234130123987
-O=C(N)c(nccn1)c1,6.408762052980724
-Oc1cc(O)c2C(=O)C(O)=C(c3cc(O)c(O)cc3)Oc2c1,6.729846937340625
-CCC(O)(C)C#C,0.4687038301254292
-CC(C(NCC)=C1)=CC(C1=O2)=C(C(C2=C3)=CC(C)=C3NCC)C(=CC=C4)C(=C4)C(=O)OCC,0.027053999376946393
-O=C(NS(=O)(=O)c1cccc2)c12,19.66323569952698
-c1cc(Cl)ccc1C2SC(=O)N(C(=O)NC3CCCCC3)C2C,0.4534134152107278
-n(c(nc(n1)NCC)NCC)c1Cl,0.024794616275543167
-O=[S](NC1CCCCC1)(=O)[O-].[Na+],17.900880706433757
-O=C(OCC(C1OCC(C1O)O)O)CCCCCCCCCCC,19.866710908558982
-O(CC1O)C(C1O)C(O)COC(=O)CCCCCCCCCCCCCCCCC,16.727105323218392
-O=S(=O)(Nc(nc(cc1C)C)n1)c(ccc(N)c2)c2,0.1185642260256668
-CCNc1nc(NC(C)(C)C)nc(SC)n1,0.06214876624755196
-Oc(c(cc(c1)C(C)(C)C)Cl)c1,1.1697007223226876
-C(C(Cl)Cl)(Cl)Cl,0.6434343954290421
-COP(=O)(OC)OC(=CCl)c1cc(Cl)c(Cl)cc1Cl,0.2732525485855328
-CCN(CC)C(=O)SCc1ccc(Cl)cc1,0.019396419126203733
-COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.09345959256991566
-N(C(=S)SSC(N(C)C)=S)(C)C,0.06238747379310184
-c12OC(CCCC(C)CCCC(C)CCCC(C)C)(C)CCc1c(C)c(OC(=O)C)c(C)c2C,4.230630449818821
-Cc1cc(N)ccc1NOS(O)(=O)=O,0.8431459792705229
-C(Br)(C(Br)(Br)Br)C1C(C)(C)C1C(=O)OC(C(#N))c2cccc(Oc3ccccc3)c2,0.004511229623452476
-O=C(O)COc(c(cc(c1Cl)Cl)Cl)c1,0.03914162418169542
-FC(F)(F)C(=CC(N(=O)=O)=C1N(C(C)C)C(C)C)C=C1N(=O)=O,0.1193036069506878
-Cc1cc(C)c(N)cc1C,0.46595489467866197
-CC(O)(C(O)C(O1)C)CC1(C)OC(C(C)O2)C(C(O)C2(C)OC(C(C)C(O)CC(=O)OC(CC)C3COC(C(OC)C4OC)OC(C)C4O)C(CC=O)CC(C)C(=O)C=CC(=C3)C)N(C)C,0.5295750507618869
-c1c(Cl)cc(Cl)cc1N2C(=O)C(C)(C=C)OC2(=O),0.25479642918707424
-O=C(OC(CCCC(O)CCCCCc1cc(O)cc2O)C)c12,0.0006203550142861557
-COC(=O)C1(C2=CC=CC=C2C3=C1C=C(C=C3)Cl)O,0.546052144921948
-CC(C(=O)O)OC1=CC(=CC=C1)Cl,0.4984573741185779
-P12P3P1P23,11.881024454247726
-C(CO)O,6.44456130582279
-CCCCOCC(C)OCC(C)O,0.6726932978936081
-C(CO)O,32.22280652911395
-C(CO)O,14.822491003392418
-[O-][As](=O)([O-])[O-],0.044990181342823746
-[Si](CN1C=NC=N1)(C2=CC=C(C=C2)F)C3=CC=C(C=C3)F,0.007657523838454347
-N(C(=S)SSC(N(C)C)=S)(C)C,0.04783039657471141
-COP(=O)(N)SC,0.006377136181192296
-N(C(=S)SSC(N(C)C)=S)(C)C,0.02275063210988447
-COP(=O)(NC(=O)(C))SC,0.1910836440808347
-C1=CC=C(C=C1)NC(=O)NC2=CN=NS2,0.13620822278144273
-CCOP(=S)(NC(C)C)OC1=CC=CC=C1C(=O)OC(C)C,0.0014476216329334154
-CC(=NOC(=O)N(C)SN(C)C(=O)ON=C(C)SC)SC,0.02821118623185781
-CCOP(=S)(OCC)OC1=NC(=NC(=C1)C)C(C)C,0.004928609097226672
-NC(CCCC1)C1,0.5898716318329822
-CN1C=C(c2ccccc2)C(=O)C(c3cc(C(F)(F)F)ccc3)=C1,0.09868947363194906
-ClC1CC2C(C1Cl)C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl,0.0012201709684038192
-CCCCC(CC)COC(=O)C1=CC=CC=C1C(=O)OCC(CC)CCCC,0.5120902983161549
-OC(=O)CNCP(O)(O)=O,5.914602135360638
-C1CNC(=S)N1,0.0022514113902230405
-O=C(N(OC)C)Nc(ccc(c1Cl)Cl)c1,0.025090939601491648
-C1=CC=C2C(=C1)NC(=S)S2,4.484270077422418
-CCOP(=S)(OCC)OC1=NC(=C(C=C1Cl)Cl)Cl,0.028523647387248163
-c(c(c(c(c1Cl)Cl)Cl)Cl)(c1Cl)Cl,0.0010183220720957982
-COc1ccc(cc1)C(c2ccc(OC)cc2)C(Cl)(Cl)Cl,0.36163948246786254
-C1=CC(=CC=C1Cl)Cl,2.0407891160090657
-CC(C)OC(=O)NC1=CC(=CC=C1)Cl,2.340158076742021
-COP(=O)(OC)OC=C(Cl)Cl,0.009729574839301364
-CCOP(=S)(OCC)Oc1ccc(cc1)N(=O)=O,0.001442007505168395
-CNC(=O)N(C)c1nnc(s1)C(C)(C)C,0.1751969016077557
-CCCCOCCOCCOCC1=CC2=C(C=C1CCC)OCO2,0.7386866446932013
-CC(C(=O)O)OC1=C(C=C(C=C1)Cl)Cl,0.03828744186371015
-CC1(C(C1C(=O)OC(C#N)C2=CC(=C(C=C2)F)OC3=CC=CC=C3)C=C(Cl)Cl)C,0.028782768433509572
-C(#N)c(c(c(c(c1C(#N))Cl)Cl)Cl)c1Cl,0.015042627044387032
-O=C(OCC)C(O)(c(ccc(c1)Cl)c1)c(ccc(c2)Cl)c2,0.056582904287311254
-O=C(N(C)C)Nc(ccc(c1)Cl)c1,0.6292491939569526
-O=C(N(SC(Cl)(Cl)Cl)C(=O)C1CC=CC2)C12,0.3326798171006209
-CCc1cccc(C)c1N(C(C)COC)C(=O)CCl,0.5285529966699751
-C1=CC(=C(C=C1Cl)Cl)OCC(=O)O,0.022620602193004043
-CCNC1=NC(=NC(=N1)Cl)NC(C)(C)C#N,0.005193343612552968
-C(C(C(C(C1Cl)Cl)Cl)Cl)(C1Cl)Cl,0.015816808894162992
-COP(=S)(OC)OC1=CC(=C(C=C1Cl)Cl)Cl,0.15549919159080278
-C1C2C=CC1C3C2C4(C(=C(C3(C4(Cl)Cl)Cl)Cl)Cl)Cl,2.7404023436797774e-05
-CC1(C(C1C(=O)OCC2=CC(=CC=C2)OC3=CC=CC=C3)C=C(Cl)Cl)C,0.06389160712181856
-CCN(CC)C(=O)C(C)OC1=CC=CC2=CC=CC=C21,0.36852210915226874
-ClC1C=CC2C1C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl,0.0006696708996117783
-CC(=CC(=O)NC)OP(=O)(OC)OC,0.0020164586039868883
-CC(C)C1(C)N=C(NC1(=O))c3nc2ccccc2cc3C(=O)(O),0.06423944765895072
-CC(C)Nc1nc(Cl)nc(NC(C)C)n1,0.21766590408142725
-CC(C(=O)O)(Cl)Cl,0.3497269961122948
-CC1(C(C1C(=O)OC(C#N)C2=CC(=C(C=C2)F)OC3=CC=CC=C3)C=C(Cl)Cl)C,0.017269661060105742
-O=C(NC)CSP(OC)(OC)=S,0.02180954301853846
-C12C3(C4(C5(C3(C(C1(C5(C2(C4(Cl)Cl)Cl)Cl)Cl)(Cl)Cl)Cl)Cl)Cl)Cl,1.9565721591442926e-05
-COC(=O)C1=CC=CC=C1C(=O)OC,10.299509743336218
-CCOP(=S)(OCC)SCSC(C)(C)C,0.0001733519259052264
-CCC(C)SP(=O)(OCC)SC(C)CC,0.0009245829520661433
-CCOP(=S)(OCC)SCSC(C)(C)C,0.006934077036209056
-C1C(C(C(=O)N1C2=CC=CC(=C2)C(F)(F)F)Cl)CCl,0.016019730669239306
-O=C(O)C(C(C(=O)O)C(O1)CC2)C12,0.6177415369409439
-O=C(Oc(c(OC(C1)(C)C)c1cc2)c2)NC,0.022598624918870935
-Oc(c(c(c(c1)Cl)Cl)Cc(c(c(cc2Cl)Cl)Cl)c2O)c1Cl,0.012287924553322883
-CC1(CON(C1=O)CC2=CC=CC=C2Cl)C,0.08969617860069455
-CCC(C)N1C(=O)C(=C(NC1=O)C)Br,0.23935747721355113
-CC1=CC(=CC(=C1N(C)C)C)OC(=O)NC,0.0067481385934503825
-CNC(=O)OC1=CC=CC(=C1)N=CN(C)C,0.056495719658295813
-CC1=NN(C(=O)N1C(F)F)C2=CC(=C(C=C2Cl)Cl)NS(=O)(=O)C,0.1730416993562668
-CCOP(=S)(CC)SC1=CC=CC=C1,0.020298035239500172
-CCOP(=S)(OCC)SC(CCl)N1C(=O)C2=CC=CC=C2C1=O,0.006347661308292605
-N1CC(C)(C)CNC1=NN=C(C=Cc2ccc(C(F)(F)F)cc2)C=Cc3ccc(C(F)(F)F)cc3,0.010111728942243584
-CC1=C(C(=C(C(=C1F)F)COC(=O)C2C(C2(C)C)C=C(C(F)(F)F)Cl)F)F,0.010985502766340648
-CC1=CC(=C(C=C1)N=CN(C)C=NC2=C(C=C(C=C2)C)C)C,0.034764112883573416
-S=P(OC)(OC)SCN1C(=O)SC(OC)=N1,0.002646103794082849
-CC(C)N(C(C)C)C(=O)SCC(Cl)=C(Cl)Cl,0.04102878665011248
-S=P(OC)(OC)SCN1N=Nc2ccccc2C1(=O),0.0070905370826580775
-CC1=NC(=NC(=N1)OC)NC(=O)NS(=O)(=O)C2=CC=CC=C2CCC(F)(F)F,0.19051986050321804
-O=C(N(S(=O)(=O)Nc1cccc2)C(C)C)c12,0.16647322477947293
-CCN(CC)c1nc(C)cc(OP(=S)(OC)OC)n1,0.04519647299825149
-O=C(ON=CC(SC)(C)C)NC,0.0005255875464343458
-ClC(Cl)(Cl)CC1(OC1)c2cc(Cl)cc(Cl)c2,0.09362507489225783
-CCC1CCCC(C(C(=O)C2CC3C(C2CC(=O)O1)CCC4C3CC(C4)OC5CC(C(C(C5OC)OC)OC)C)C)OC6CCC(C(O6)C)N(C)C,0.03269690443692089
-CC(C)(C)C(=NOC(=O)NC)CSC,0.027483045022449526
-CON=C(CC1=CN=CC=C1)C2=C(C=C(C=C2)Cl)Cl,0.15245767876475944
-CC(=CC1C(C1(C)C)C(=O)OCN2C(=O)C3=C(C2=O)CCCC3)C,0.7543614918373561
-C1COC(O1)(CN2C=NC=N2)C3=C(C=C(C=C3)Cl)Cl,0.03331771398901528
-CCCOC(=O)C1=CN=C(C=C1)C(=O)OCCC,0.9949124950582696
-CC1=CC(=NC(=N1)NC(=O)NS(=O)(=O)C2=CC=CC=C2C(=O)OC3COC3)C,0.20422574060250331
-C1=C(C(=NC(=C1Cl)Cl)OCC(=O)O)Cl,0.1403669879303106
-CCOCN1C(=C(C(=C1C(F)(F)F)Br)C#N)C2=CC=C(C=C2)Cl,0.03336499327732185
-CC(C)CC1=C(C(=NC(=C1C(=O)SC)C(F)(F)F)C(F)F)C(=O)SC,0.00904300899921393
-CC12CC1(C(=O)N(C2=O)C3=CC(=CC(=C3)Cl)Cl)C,0.05279126047017867
-CC1=CC(=C(C=C1)C(=O)OC)C2=NC(C(=O)N2)(C)C(C)C,0.1734054330003024
-COP(=S)(OC)OC1=NC(=C(C=C1Cl)Cl)Cl,0.009301369775521361
-CC(C)CC1=C(C(=NC(=C1C(=O)OC)C(F)F)C(F)(F)F)C2=NCCS2,0.11151045196043953
-CCOC(=O)C(CC1=CC(=C(C=C1Cl)F)N2C(=O)N(C(=N2)C)C(F)F)Cl,0.029112705155716945
-CC(C)=CC3C(C(=O)OCc2coc(Cc1ccccc1)c2)C3(C)C,0.3693416417277341
-CCCSP(=S)(OCC)OC1=CC=C(C=C1)SC,0.03566479582586673
-CC1=CC(=C(C(=C1)OC(=O)NC)C)C,0.30635114568601185
-CC1=CC=CC=C1COC2CC3(CCC2(O3)C)C(C)C,0.5466515334085721
-CC1=C2C(=CC=C1)SC3=NN=CN23,0.16381576159162972
-CCC(=C1C(=O)CC(CC1=O)CC(C)SCC)NOCC=CCl,0.27784628232227476
-CCCN(CCC)C(=O)SCC,0.047538995974292175
-CC(C)OC(=O)C=C(C)C=CCC(C)CCCC(C)(C)OC,0.14816176662421726
-COP(=S)(OC)Oc1ccc(SC)c(C)c1,0.013473309158983109
-COC1=C(C=C(C=C1)C(=CC(=O)N2CCOCC2)C3=CC=C(C=C3)Cl)OC,0.11937399144446861
-CCSC(=O)N(CC(C)C)CC(C)C,0.4600420791288938
-CC(C)OP(=S)(OC(C)C)SCCNS(=O)(=O)C1=CC=CC=C1,0.03773457509937652
-CC(=CC1C(C1(C)C)C(=O)OCC2=CC(=CC=C2)OC3=CC=CC=C3)C,0.42802021191337764
-CC1=CC(=CC(=C1C)C)OC(=O)NC,0.05174850433885335
-CCOP(=S)(OCC)SCSC1=CC=C(C=C1)Cl,0.0029165972759564764
-C1CN(CCN1C(C(Cl)(Cl)Cl)NC=O)C(C(Cl)(Cl)Cl)NC=O,0.22990526799413355
-C(=CC=C1)(C2=C1)NC(=N2)C(=CS3)N=C3,0.009938002763559809
-C1(=C(C(=C(C(=C1Cl)Cl)Cl)Cl)Cl)[N+](=O)[O-],0.423248605734443
-CCCC(=NOCC)C1C(=O)CC(CC1=O)CC(C)SCC,0.05056765552287047
-ClC2(Cl)C4(Cl)C1(Cl)C5(Cl)C(Cl)(Cl)C3(Cl)C1(Cl)C2(Cl)C3(Cl)C45Cl,0.0012831252531881078
-CCN(C1CCCCC1)C(=O)SCC,0.013930451940080113
-CC1=NC(=NC(=N1)OC)NC(=O)NS(=O)(=O)C2=CC=CC=C2OCCCl,0.5494924735209582
-C(C(=O)O)OC1=NC(=C(C(=C1Cl)N)Cl)F,1.9605490478397496
-CC1=C(C(=O)CC1OC(=O)C2C(C2(C)C)C=C(C)C)CC#C,0.316253365684832
-C1C(COC1(CN2C=NC=N2)C3=C(C=C(C=C3)Cl)Cl)Br,0.017185416964361586
-C1=NNC(=N1)N,0.029733601205328832
-C1=CC(=C(C(=C1)F)C(=O)NC(=O)NC2=CC(=C(C(=C2F)Cl)F)Cl)F,0.06822190749765324
-C1=CC(=CC=C1OS(=O)(=O)C2=CC=C(C=C2)Cl)Cl,0.008246440044818412
-FC(F)(F)C(=CC(N(=O)=O)=C1N(C(C)C)C(C)C)C=C1N(=O)=O,2.982590173767195
-CC1=C(C(=O)CC1OC(=O)C2C(C2(C)C)C=C(C)C)CC=C,0.24800936112986982
-C1=CC(=C(C(=C1)Cl)C#N)Cl,0.014533918736325764
-C1C(O1)COC2=CC=CC=C2C3=CC=CC=C3,2.209744922072461
-CC1=CC=CC=C1OCC2=CC=CC=C2C(=NOC)C(=O)OC,1.1967534090558043
-O=N(=O)C(C(=C1N(=O)=O)N(C(C)C)C(C)C)=CC(=C1)S(=O)(=O)N,0.10642121227099519
-C1=CC(=C(C2=NC=C(C=C21)Cl)C(=O)O)Cl,3.127347059508829
-CC(C)NC(=O)N1CC(=O)N(C1=O)C2=CC(=CC(=C2)Cl)Cl,0.13932359364492994
-CCCN(CCCl)C1=C(C=C(C=C1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-],0.5690227874227859
-C1=CC=C(C=C1)C2=CC=CC=C2O,5.875192118782284
-O=C(N(C)C)Nc(cccc1C(F)(F)F)c1,0.06459882942614491
-C1=CC(=NC(=C1)Cl)C(Cl)(Cl)Cl,0.00433075312836283
-Clc1cc(Cl)cc(Cl)c1OCCN(CCC)C(=O)n2cncc2,0.01991156926953532
-CC1=CC(=C(C=C1NC(=O)C)NS(=O)(=O)C(F)(F)F)C,0.08894826507859208
-C(=C(I)I)(I)I,0.09404873168890004
-C1=C(C=C(C(=C1Cl)N2C(=C(C(=N2)C#N)S(=O)C(F)(F)F)N)Cl)C(F)(F)F,0.0001372533562906347
-O=C(N(C)C)Nc(ccc(c1Cl)Cl)c1,0.026813159469657157
-CC1=C(C(=O)CC1OC(=O)C2C(C2(C)C)C=C(C)C)CC=C,0.08101639130242413
-CC1=CC2=C(C=C1)N=C3C(=N2)SC(=O)S3,0.03201059303080734
-CC(C)N(C(=O)CCl)c1ccccc1,0.10865048725491992
-CC(C)C1(C(=O)NC(=N1)C2=C(C=CC=N2)C(=O)O)C,1.913681483026602
-CC1(C(C1(C)C)C(=O)OC(C#N)C2=CC(=CC=C2)OC3=CC=CC=C3)C,0.05566320606558952
-CN1CN(C(=S)SC1)C,0.18486987933542975
-ClC(Cl)=CC1C(C)(C)C1C(=O)OC(C(#N))c2cccc(Oc3ccccc3)c2,0.18015976856532
-ClC2C1OC1C3C2C4(Cl)C(=C(Cl)C3(Cl)C4(Cl)Cl)Cl,0.0005137200498000217
-CCOC(=O)CN1C2=C(C=CC=C2Cl)SC1=O,0.046003238627999404
-CCCN(CCC)C1=C(C=C(C(=C1[N+](=O)[O-])N)C(F)(F)F)[N+](=O)[O-],0.08392957349588569
-C1=CC=C(C=C1)C(CCC2=CC=C(C=C2)Cl)(CN3C=NC=N3)C#N,0.11875847044790469
-CC(C)(C)C(CCC1=CC=C(C=C1)Cl)(CN2C=NC=N2)O,0.05165383561566402
-CC1=C(C=CC=C1COC(=O)C2C(C2(C)C)C=C(C(F)(F)F)Cl)C3=CC=CC=C3,0.011824026606519262
-C(=CC=C1)(C2=C1)NC(=N2)C(=CS3)N=C3,0.19876005527119617
-C1=C(C=C(C(=C1Cl)N)Cl)[N+](=O)[O-],0.7245881151318344
-CC1=C(C=C(C=C1C(=O)N)[N+](=O)[O-])[N+](=O)[O-],0.027758250773633555
-CC(C)OC1=CC=CC(=C1)NC(=O)C2=CC=CC=C2C(F)(F)F,1.5465050300849357
-OC(c1ccc(Cl)cc1)(c2ccc(Cl)cc2)C(Cl)(Cl)Cl,0.006747899500347733
-O=P(O)(O)CCCl,3.0866333550182015
-CN(C(=O)NC1=CC=C(C=C1)Br)OC,0.048243951057630914
-CC1=CC(=CC=C1)NC(=O)OC2=CC=CC(=C2)NC(=O)OC,0.0832475217878744
-CC(C)(C)C1=NN=C(S1)N2C(CN(C2=O)C)O,0.19506513302817866
-S=P(OCC)(OCC)Oc1ccc2C(C)=C(Cl)C(=O)Oc2c1,0.004686221626306353
-COC(=O)c1ccccc1S(=O)(=O)NC(=O)N(C)c2nc(OC)nc(C)n2,0.031614325062739264
-C1=CC=C(C(=C1)NC2=NC(=NC(=N2)Cl)Cl)Cl,0.004173898399328111
-CC1=C(C=CC(=C1)OP(=S)(OC)OC)[N+](=O)[O-],0.001659247904766673
-COc1c(Cl)ccc(Cl)c1C(=O)(O),0.520273850439093
-CC1(C(C1C(=O)OC(C#N)C2=CC(=CC=C2)OC3=CC=CC=C3)C=C(Br)Br)C,0.004948543461552866
-C1=CC=C(C(=C1)C(C2=CC=C(C=C2)F)(C3=CN=CN=C3)O)Cl,0.007943029289634557
-c1ccc2nc(NC(=O)OC)n(C(=O)NCCCC)c2c1,0.3961177430023906
-CCOC(=O)COC(=O)C1=C(C=CC(=C1)OC2=C(C=C(C=C2)C(F)(F)F)Cl)[N+](=O)[O-],0.05583516191627437
-CC(C)(C)C(C(N1C=NC=N1)OC2=CC=C(C=C2)C3=CC=CC=C3)O,0.07409262028018154
-CCNC(=O)NC(=O)C(=NOC)C#N,0.15289185096526225
-CCOC1=C(C=CC(=C1)OC2=C(C=C(C=C2)C(F)(F)F)Cl)[N+](=O)[O-],0.11058877880543937
-CC1=NC=C(N1CCO)[N+](=O)[O-],0.8764039114257128
-O=C(N(SC(Cl)(Cl)Cl)C(=O)c1cccc2)c12,1.6860133324539086
-CC(C)(C)c2ccc(OC1CCCCC1OS(=O)OCC#C)cc2,0.2853292217012047
-CCCCC(CN1C=NC=N1)(C2=C(C=C(C=C2)Cl)Cl)O,0.014958135679074535
-CC(C)(C)C(C(=CC1=C(C=C(C=C1)Cl)Cl)N2C=NC=N2)O,0.15327033840680634
-C1=CC(C2C1C3(C(=C(C2(C3(Cl)Cl)Cl)Cl)Cl)Cl)Cl,0.001017899767409903
-C1=CC=C2C(=C1)C(=O)C3=C(C2=O)SC(=C(S3)C#N)C#N,0.03374687200243409
-CC1=NN(C(=C1C=NOCC2=CC=C(C=C2)C(=O)OC(C)(C)C)OC3=CC=CC=C3)C,0.0073074288460468996
-CS(=O)(=O)C1=C(C=CC(=C1)C(F)(F)F)C(=O)C2=C(ON=C2)C3CC3,0.05566064749641608
-CC1=C(N=C(N=C1OC(=O)N(C)C)N(C)C)C,0.0524579222415799
-CCN(CC)C(=O)C(Cl)=C(C)OP(=O)(OC)OC,0.005005200069191886
-C1=CC(=CC=C1C(CN)O)O,1.6320834707547616
-CC1=C(C(=CC=C1)C)N(C(=O)COC)N2CCOC2=O,0.17965983350851364
-c1c(C(F)(F)F)cccc1N2C(=O)C(Cl)=C(NC)C=N2,0.06174515112035177
-CCC(C)NC1=C(C=C(C=C1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-],0.16929970598735858
-C1=CC(=CC=C1S(=O)(=O)C2=CC(=C(C=C2Cl)Cl)Cl)Cl,0.6459733503975151
-CCCCC1=C(NC(=NC1=O)NCC)C,0.1194525860672606
-n(c(nc(n1)NCC)NCC)c1Cl,0.026282293252075754
-FC(F)(F)C(C=C1N(=O)=O)=CC(N(=O)=O)=C1N(CC)CC(C)=C,0.0375078950368263
-C1CCC(C1)N(CC2=CC=C(C=C2)Cl)C(=O)NC3=CC=CC=C3,0.0760257762657501
-CS(=O)(=O)NC(=O)C1=C(C=CC(=C1)OC2=C(C=C(C=C2)C(F)(F)F)Cl)[N+](=O)[O-],0.11395676083924232
-CCOC(=O)C(C)OC1=CC=C(C=C1)OC2=NC3=C(O2)C=C(C=C3)Cl,0.02487724874434851
-CCC1=C(C(=CC=C1)CC)N(CNC(=O)C)C(=O)CCl,0.21058487877925733
-NC(=N)NCCCCCCCCCCCC(OC(=O)C),0.10160268068512719
-C1=CC(=CC(=C1)Cl)NC(=O)OCC#CCCl,1.743505808935165
-CC(C)C(C(=O)OC(C(#N))c2cccc(Oc1ccccc1)c2)c3ccc(Cl)cc3,0.05953797389131243
-CC(C)C1=C(C=CC(=C1)C(C)(C)C2=CC(=C(C=C2)O)C(C)C)O,0.08001387248515598
-CCN(CC1=C(C=CC=C1Cl)F)C2=C(C=C(C=C2[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-],0.1185590456888386
-CCCCCCCCSC(=O)OC1=CC(=NN=C1C2=CC=CC=C2)Cl,0.17813968959673715
-CC1=C(C(=O)CC1OC(=O)C2C(C2(C)C)C=C(C)C)CC=C,0.04563372244789605
-CCCCNC(=O)N1C2=CC=CC=C2N=C1NC(=O)OC,0.8611255282660666
-CC1=C(C(=CC=C1)C)N(C(C)C(=O)OC)C(=O)CC2=CC=CC=C2,0.14136381415796706
-CC(C)C(C1=CC=C(C=C1)OC(F)F)C(=O)OC(C#N)C2=CC(=CC=C2)OC3=CC=CC=C3,0.013290157156772887
-Clc1ccccc1c2nnc(c3ccccc3Cl)nn2,0.06597478470118634
-CCOC(=O)NCCOC1=CC=C(C=C1)OC2=CC=CC=C2,0.03318543029523152
-CCC1=C(C(=CC=C1)CC)N(CC(=O)OCC)C(=O)CCl,0.1603572605822803
-c1(O2)c(CC2(C)C)cccc1OC(=O)N(C)SN(CCCC)CCCC,0.06569530810416269
-CCCCOC(=O)C(C)OC1=CC=C(C=C1)OC2=NC=C(C=C2)C(F)(F)F,0.007825509706097071
-COC1=CC(=C(C=C1Cl)OC)Cl,0.6037074787089276
-CCOP(=S)(OCC)OC1=NN(C(=N1)Cl)C(C)C,0.039841737145637234
-n(c(nc(n1)NC(C)C)NCC)c1Cl,0.11591071091933607
-CC(C)(C)C(C(=CC1=CC=C(C=C1)Cl)N2C=NC=N2)O,0.13506940531624406
-CCCCCCCCc1cc(N(=O)(=O))c(OC(=O)C=CC)c(c1)N(=O)(=O),0.1372145060102149
-c1cc(OC(F)(F)F)ccc1C(O)(C(C)C)c2cncnc2,0.038746408312020406
-COP(=O)(C(C(Cl)(Cl)Cl)O)OC,0.07768900686568829
-C1=CC(=C(C=C1C(F)(F)F)Cl)OC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O,0.34563108073944815
-CCCN(CC1CC1)C2=C(C=C(C=C2[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-],0.014397200032537671
-CCOC(=O)C(C)OC(=O)C1=C(C=CC(=C1)OC2=C(C=C(C=C2)C(F)(F)F)Cl)[N+](=O)[O-],0.10827828411229923
-CCSC(=O)N1CCCCCC1,0.07907000434271044
-CCC1=CC=C(C=C1)C(=O)NN(C(=O)C2=CC(=CC(=C2)C)C)C(C)(C)C,0.13618183361575933
-ClC(Cl)C(Cl)(Cl)SN2C(=O)C1CC=CCC1C2(=O),0.04297243667696324
-COC(=O)C1=CC=CC=C1S(=O)(=O)NC(=O)NC2=NC(=CC(=N2)OC(F)F)OC(F)F,0.2989300503468667
-CC(C)(C)C(C(N1C=NC=N1)OC2=CC=C(C=C2)Cl)O,0.08452667530010859
-CC1=C(C=CC(=C1)Cl)OCC(=O)O,0.019938294964743114
-COC=C(C1=CC=CC=C1OC2=NC=NC(=C2)OC3=CC=CC=C3C#N)C(=O)OC,0.15431812608561873
-C1=CC=C(C(=C1)C(C2=CC=C(C=C2)Cl)(C3=CN=CN=C3)O)Cl,0.00694452873492003
-[O-]Br(=O)=O,0.047692690196102956
-OP(=O)OCC,3.6347465046005896
-COP(N)(=O)SC,0.000708570686799144
-CCOP(=O)(NC(C)C)Oc1ccc(SC)c(C)c1,0.004944661980269876
-CCOP(=S)(OCC)SCCSCC,0.00014577045919371006
-CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.005201883810203027
-O=C(OCC(CCCC)CC)CCCCC(=O)OCC(CCCC)CC,4.047856676081442
-CN(C=Nc1ccc(C)cc1C)C=Nc2ccc(C)cc2C,0.03408246361134649
-[C@@]14([C@@H]5OCC1=CC=C[C@@H]([C@H](O[C@H]2C[C@@H]([C@H]([C@@H](O2)C)O[C@H]3C[C@@H]([C@H]([C@@H](O3)C)O)OC)OC)C(=CC[C@@H]6C[C@H](OC([C@@H]4C=C([C@H]5O)C)=O)C[C@]7(O6)O[C@@H]([C@H](C=C7)C)[C@H](CC)C)C)C)O,0.002290749011702154
-O=C(N(S(=O)(=O)Nc1cccc2)C(C)C)c12,0.14566407168203882
-S=P(OC)(OC)SCN1C(=O)SC(OC)=N1,0.004134537178254452
-C(#N)Cl,1.1387594679715767
-C(#N)Br,1.1517974649126617
-C1=CC(=CC=C1C(C2=CC=C(C=C2)Cl)C(Cl)(Cl)Cl)Cl,0.0007052459522690667
-c(cccc1)(c1)C(C)C,2.7539366734341955
-CCCN(CCC)C(=O)SCC,0.13205276659525605
-NC(CCCC1)C1,0.6049965454697254
-ClC1CC2C(C1Cl)C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl,0.014642051620845831
-CC(C)OC(=O)NC1=CC(=CC=C1)Cl,4.680316153484042
-COC(=O)c1c(Cl)c(Cl)c(C(=O)OC)c(Cl)c1Cl,0.030123726579706293
-COC(=O)C1=CC=C(C=C1)C(=O)OC,0.6437193589585136
-N(C(=S)NC1)C1,0.0024471862937206963
-O=C(N(OC)C)Nc(ccc(c1Cl)Cl)c1,0.010036375840596658
-c(cccc1)(c1)C=C,2.736460951374337
-COC(=O)c1ccccc1S(=O)(=O)NC(=O)Nc2nc(OC)nc(C)n2,0.655542030995076
-C1=CC(=CC=C1N)Cl,0.09798423692306293
-FC(F)(F)C(Cl)=CC1C(C)(C)C1C(=O)OC(C(#N))c2cc(Oc3ccccc3)ccc2,0.02778703580061686
-C(Cl)(Br)Br,0.13683526627950768
-C=C(Cl)Cl,0.14441434207714035
-C(C=CCl)Cl,0.045958425107502164
-COP(=O)(OC)OC(=CCl)c1cc(Cl)c(Cl)cc1Cl,0.1366262742927664
-Oc(ccc(c1)C(c(ccc(O)c2)c2)(C)C)c1,0.21902317939829427
-O=C(OCc(cccc1)c1)c(c(ccc2)C(=O)OCCCC)c2,1.504675539130048
-O=C(NCCCC1)C1,1.10465364954589
-c(cccc1)(c1)Cl,1.0661274430976688
-C(Cl)(Cl)Cl,0.502606685808163
-ClCCl,0.6190792744080069
-C1C2C3C(C1C4C2O4)C5(C(=C(C3(C5(Cl)Cl)Cl)Cl)Cl)Cl,0.000328162093802273
-OCCO,16.111403264556976
-O=C(C=C(CC1(C)C)C)C1,1.295160023171064
-C(F)(Cl)(Cl)Cl,2.540618964665013
-CCc1cccc(CC)c1N(COC)C(=O)CCl,0.05189661748967905
-c1ccccc1c2c(C)c(COC(=O)C3C(C)(C)C3C=C(Cl)C(F)(F)F)ccc2,0.011824026606519262
-n1c(Cl)cc(OC)nc1NC(=O)NS(=O)(=O)c2ccccc2C(=O)OCC,0.30133493788161053
-O=C(NC(=O)c(c(F)ccc1)c1F)Nc(ccc(c2)Cl)c2,0.02510595436954169
-c(cccc1)(c1)CC,2.741016342485753
-O=C(N(SC(Cl)(Cl)Cl)C(=O)c1cccc2)c12,0.1348810665963127
-Clc1cc(C(F)(F)F)cnc1Oc2ccc(OC(C)C(=O)OC)cc2,0.0026615073878255148
-C#N,1.1470716002092851
-ClC(C(OC(C=C2C(=O)OC(C)C(=O)OCC)=CC=C2N(=O)=O)=C1)=CC=C1C(F)(F)F,0.10827828411229923
-c1c(C(F)(F)F)cccc1N2C(=O)C(Cl)=C(NC)C=N2,0.1687700797289615
-O=N(=O)C(C(=C1N(=O)=O)N(C(C)C)C(C)C)=CC(=C1)S(=O)(=O)N,0.12992280391195832
-CN(=CC=C1C(C=C2)=CC=N2C)C=C1,0.019100264469901956
-C1=C(C=C(C=C1[N+](=O)[O-])[N+](=O)[O-])[N+](=O)[O-],0.06245761469536169
-C(=C)Cl,0.020800592400871575
-C1CCC(=O)CC1,9.272184465524795
-CC1(C(C1(C)C)C(=O)OC(C#N)C2=CC(=CC=C2)OC3=CC=CC=C3)C,0.07154653735936956
-C1=CC(=O)NNC1(=O),4.460830164062196
-CCSC(=O)N1CCCCCC1,0.010677920910561842
-C(C(Cl)(Cl)Cl)(O)O,0.8161882413029702
-ClC2C1OC1C3C2C4(Cl)C(=C(Cl)C3(Cl)C4(Cl)Cl)Cl,6.421500622500271e-05
-Clc1cc(C(F)(F)F)ccc1Oc2cc(OCC)c(N(=O)(=O))cc2,0.02764719470135984
-c1cc(Cl)ccc1C(C(#N))(CCCC)Cn2ncnc2,0.03407493882440353
-CC1=C(C=C(C=C1[N+](=O)[O-])[N+](=O)[O-])[N+](=O)[O-],0.008805487227420639
-CC(C)OC(=O)C(C1=CC=C(C=C1)Br)(C2=CC=C(C=C2)Br)O,0.06073132568962639
-C[N+](C)(C)CCCl,1.0602168942789227
-CCC(=C1C(=O)CC(CC1=O)CC(C)SCC)NOCC=CCl,0.2389478027971563
-CC1=NC(=NC(=C1)C2CC2)NC3=CC=CC=C3,0.15801925526767843
-N(c(cccc1)c1)c(cccc2)c2,0.14773454395291782
-CC1(C(=O)N(C(=O)O1)NC2=CC=CC=C2)C3=CC=C(C=C3)OC4=CC=CC=C4,0.044873074905021335
-CC1(CCCCC1)C(=O)NC2=C(C(=C(C=C2)O)Cl)Cl,0.9662594125910484
-C1=CC(=C2C(=C1)OC(O2)(F)F)C3=CNC=C3C#N,0.443217671652664
-C(F)(F)(F)c1ccccc1C(=O)Nc2cccc(OC(C)C)c2,0.2690918752347788
-C(CCCCN=C(N)N)CCCNCCCCCCCCN=C(N)N,0.053436074592710235
-C1CN(C(=N1)N[N+](=O)[O-])CC2=CN=C(C=C2)Cl,0.0664943030028045
-COC(=O)C12CC3=C(C1=NN(CO2)C(=O)N(C4=CC=C(C=C4)OC(F)(F)F)C(=O)OC)C=CC(=C3)Cl,0.006820319755914397
-CC1=CC=CC=C1OCC2=CC=CC=C2C(=NOC)C(=O)OC,1.1807966969350603
-CC1=CC(=CC(=C1)C(=O)N(C(C)(C)C)NC(=O)C2=C(C(=CC=C2)OC)C)C,1.1154252951100516
-C1=CC(=C(C(=C1)F)C(=O)NC(=O)NC2=CC(=C(C=C2)OC(C(OC(F)(F)F)F)(F)F)Cl)F,0.07306609422899836
-CC1=CC=C(C=C1)N(SC(F)(Cl)Cl)S(=O)(=O)N(C)C,0.051834835094095484
-CC(C)(C)C1=C(C=CC(=C1)O)O,1.3536524792656537
-CC(=NOCC1=CC=CC=C1C(=NOC)C(=O)OC)C2=CC(=CC=C2)C(F)(F)F,0.14692519722320194
-COC(=O)N(C1=CC=CC=C1COC2=NN(C=C2)C3=CC=C(C=C3)Cl)OC,0.02320682656135787
-CC(C)N1C(=NC(C)(C)C)SCN(C1=O)C2=CC=CC=C2,0.02848365588181601
-C1=CC=C(C(=C1)C2=NN=C(N=N2)C3=CC=CC=C3Cl)Cl,0.05706818876652619
-CCCC(=C1C(=O)CC(CC1=O)C2CCCSC2)NOCC,0.08603044408485085
-C1CC1NC2=NC(=C(C(=N2)N)C#N)N,0.11566455596376966
-C(C=C1)(=N(C=C1)CC2)C(N2=C3)=CC=C3,0.002933359023382885
-C1=CC=C2C(=C1)C(=O)C3=C(C2=O)SC(=C(S3)C#N)C#N,0.020248123201460456
-CCOC1=CC2=C(C=C1)NC(C=C2C)(C)C,0.05522147585284508
-C1=CC=C(C=C1)C(CCC2=CC=C(C=C2)Cl)(CN3C=NC=N3)C#N,0.08906885283592852
-CC1CN(CC(O1)C)CC(C)CC2=CC=C(C=C2)C(C)(C)C,0.005601647965290344
-C[Si](CN1C=NC=N1)(C2=CC=C(C=C2)F)C3=CC=C(C=C3)F,0.006341300659739408
-c1cc(Cl)cc(Cl)c1C(OCC=C)Cn2cncc2,0.05047450068604942
-CN(=CC=C1C(C=C2)=CC=N2C)C=C1,0.012988179839533329
-CCCCOCCOCCOCC1=CC2=C(C=C1CCC)OCO2,0.29547465787728056
-Clc1cc(Cl)ccc1C2(Cn3ncnc3)OC(CCC)CO2,0.2805209905967611
-C(=CC=C1)(C2=C1)NC(=N2)C(=CS3)N=C3,0.1490700414533971
-CCOC1=CC=C(C=C1)C(C)(C)COCC2=CC(=CC=C2)OC3=CC=CC=C3,0.0690593023384914
-CC(C)(C)c2ccc(OC1CCCCC1OS(=O)OCC#C)cc2,0.0542125521232289
-CC(COC1=CC=C(C=C1)OC2=CC=CC=C2)OC3=CC=CC=N3,0.4356352632556343
-CC(C)(C)C(=O)C(N1C=NC=N1)OC2=CC=C(C=C2)Cl,0.3880867710275115
-CC(C)(C)C(C(N1C=NC=N1)OC2=CC=C(C=C2)Cl)O,0.3550120362604561
-c1ccccc1c2c(C)c(COC(=O)C3C(C)(C)C3C=C(Cl)C(F)(F)F)ccc2,0.018918442570430818
-CC1(C(C1C(=O)OC(C#N)C2=CC(=C(C=C2)F)OC3=CC=CC=C3)C=C(Cl)Cl)C,0.013815728848084595
-CC1(C(C1C(=O)OC(C#N)C2=CC(=C(C=C2)F)OC3=CC=CC=C3)C=C(Cl)Cl)C,0.02763145769616919
-ClC(Cl)=CC1C(C)(C)C1C(=O)OC(C(#N))c2cccc(Oc3ccccc3)c2,0.12010651237688001
-ClC1CC2C(C1Cl)C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl,0.0006100854842019096
-ClC4=C(Cl)C5(Cl)C3C1CC(C2OC12)C3C4(Cl)C5(Cl)Cl,0.0002625296750418184
-C1C2C3C(C1C4C2O4)C5(C(=C(C3(C5(Cl)Cl)Cl)Cl)Cl)Cl,0.000656324187604546
-C1(C(C(C(C(C1Cl)Cl)Cl)Cl)Cl)Cl,0.016160652565775233
-CCC(=O)Nc1ccc(Cl)c(Cl)c1,0.3484961885063573
-C1(=C(C(=C(C(=C1Cl)Cl)Cl)Cl)Cl)[N+](=O)[O-],0.47403843842257615
-C1C2C(COS(=O)O1)C3(C(=C(C2(C3(Cl)Cl)Cl)Cl)Cl)Cl,0.007126617932723449
-O=C(N(SC(Cl)(Cl)Cl)C(=O)c1cccc2)c12,0.16860133324539087
-CC(C(=O)O)OC1=CC=C(C=C1)OC2=C(C=C(C=N2)C(F)(F)F)Cl,0.00027647194701359843
-CCCCC(CN1C=NC=N1)(C#N)C2=CC=C(C=C2)Cl,0.033936422812922216
-Clc1cc(Cl)cc(Cl)c1OCCN(CCC)C(=O)n2cncc2,0.013539867103284017
-COP(=O)(NC(=O)(C))SC,0.013648831720059621
-CCOP(=O)(OCC)OC(=CCl)C1=C(C=C(C=C1)Cl)Cl,0.004171650398342553
-CCOP(=S)(OCC)SCCSCC,0.0008017375255654054
-O=P(O)(O)CCCl,0.08304843107672291
-CCCSP(=O)(OCC)SCCC,0.011141416681473747
-CCOP(=O)(NC(C)C)Oc1ccc(SC)c(C)c1,0.005603950244305859
-COP(=S)(OC)Oc1ccc(SC)c(C)c1,0.0025868753585247565
-OC(=O)C(N)CCP(C)(=O)O,0.019323475195614302
-OC(=O)CNCP(O)(O)=O,1.7743806406081915
-CCOC(=O)CC(SP(=S)(OC)OC)C(=O)OCC,1.0897268363577188
-COP(N)(=O)SC,0.002054854991717517
-CC(=CC(=O)OC)OP(=O)(OC)OC,0.001561466365033004
-CCOP(=S)(OCC)SCSCC,0.0006144925612602997
-CCOP(=S)(OCC)SCSC(C)(C)C,0.0002080223110862717
-S=P(OC)(OC)SCN1N=Nc2ccccc2C1(=O),0.00813048252144793
-CCOP(=S)(OCC)OC1=NC(=C(C=C1Cl)Cl)Cl,0.002852364738724816
-CCOP(=S)(OCC)OC1=NC(=NC(=C1)C)C(C)C,0.019057288509276463
-CCC1=NC(=CC(=N1)OP(=S)(OC)OC)OCC,0.0015395577035464635
-S=P(OC)(OC)SCN1C(=O)SC(OC)=N1,0.005292207588165698
-CCOP(=S)(OCC)SCN1C2=C(C=C(C=C2)Cl)OC1=O,0.0054376113486863924
-COP(=S)(OC)SCN2C(=O)c1ccccc1C2(=O),0.02836244328456758
-CCOC(=O)C1=CN2C(=CC(=N2)OP(=S)(OCC)OCC)N=C1C,0.010713392485187262
-CCOP(=S)(OCC)OC1=NN(C=N1)C2=CC=CC=C2,0.004149212048673449
-O=C(Oc(c(c(ccc1)cc2)c1)c2)NC,0.2981792578159244
-CC1=CC(=CC(=C1SC)C)OC(=O)NC,0.041276958181115306
-CNC(=O)ON=C(C)SC,0.12329688471926223
-CCCOC(=O)NCCCN(C)C,3.611885866531256
-COC(=O)NC1=NC2=CC=CC=C2N1,0.3922867840256219
-CC1=C(N=C(N=C1OC(=O)N(C)C)N(C)C)C,0.051618595485714625
-CNC(=O)CCSCCSP(=O)(OC)OC,0.001879329112916984
-N(C(=S)SSC(N(C)C)=S)(C)C,0.04990997903448147
-C1=NNC(=N1)N,0.059467202410657664
-CCCCCCCCc1cc(N(=O)(=O))c(OC(=O)C=CC)c(c1)N(=O)(=O),0.17563456769307506
-C1=CC=C(C=C1)[Sn](C2=CC=CC=C2)C3=CC=CC=C3,0.0008571117562305596
-O=C(NC(=O)c(c(F)ccc1)c1F)Nc(ccc(c2)Cl)c2,0.022530984690614337
-N(C(=S)NC1)C1,0.012235931468603481
-C1=CC(=C(C(=C1)F)C(=O)NC(=O)NC2=CC(=C(C(=C2F)Cl)F)Cl)F,0.06559798797851273
-CCCSC1=CC2=C(C=C1)N=C(N2)NC(=O)OC,0.07537743365466734
-C1CN(CCN1CCCC(=O)C2=CC=C(C=C2)F)C3=CC=CC=N3,0.35125671098854394
-CC1(C2C(C3C(C(=O)C(=C(N)O)C(=O)C3(C(=O)C2=C(C4=C(C=CC(=C41)Cl)O)O)O)N(C)C)O)O,10.50761860949369
-CC1=CC(=C(C=C1NC(=O)C2=CC(=CC(=C2O)I)I)Cl)C(C#N)C3=CC=C(C=C3)Cl,0.015081279803436631
-CN1CC2CC1CN2C3=C(C=C4C(=C3)N(C=C(C4=O)C(=O)O)C5CC5)F,0.13990757146198934
-C1=CC(=CC=C1C(C#N)C2=C(C=CC(=C2Cl)N3C(=O)NC(=O)C=N3)Cl)Cl,0.03679735812631385
-CC1=NC=C(N1C)[N+](=O)[O-],0.10628650675790867
-CCN1CCN(CC1)C2=C(C=C3C(=C2)N(C=C(C3=O)C(=O)O)C4CC4)F,0.07234386441112595
-CC1C=CC=C2COC3C2(C(C=C(C3O)C)C(=O)OC4CC(CC=C(C1OC5CC(C(C(O5)C)OC6CC(C(C(O6)C)NC(=O)C)OC)OC)C)OC7(C4)C=CC(C(O7)C(C)C)C)O,0.0011109849279118543
-COCC(=O)NC1=C(C=CC(=C1)SC2=CC=CC=C2)NC(=NC(=O)OC)NC(=O)OC,0.08959030532555236
-CC1CCC2=C3N1C=C(C(=O)C3=CC(=C2)F)C(=O)O,1.531109972815908
-COC(=O)NC1=NC2=C(N1)C=C(C=C2)S(=O)C3=CC=CC=C3,0.006342219438128827
-CC1(C2CC3C(C(=O)C(=C(N)O)C(=O)C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)N(C)C)O,0.33750750616693714
-C1CN(CCN1CCCC(=O)C2=CC=C(C=C2)F)C3=CC=CC=N3,0.09163218547527233
-CC(C)NCC(COC1=CC=CC2=C1C3=CC=CC=C3N2)O,0.023460058312320942
-C1=CC(=CC=C1C(C#N)C2=C(C=CC(=C2Cl)N3C(=O)NC(=O)C=N3)Cl)Cl,0.056422615793681234
-CC1C=CC=C2COC3C2(C(C=C(C3O)C)C(=O)OC4CC(CC=C(C1OC5CC(C(C(O5)C)OC6CC(C(C(O6)C)NC(=O)C)OC)OC)C)OC7(C4)C=CC(C(O7)C(C)C)C)O,0.0027774623197796356
-COP(=S)(OC)Oc1ccc(SC)c(C)c1,0.001616797099077973
-C(C(C(C(C1Cl)Cl)Cl)Cl)(C1Cl)Cl,0.027507493728979118
-C(C(C(C(C1Cl)Cl)Cl)Cl)(C1Cl)Cl,0.013753746864489559
-CC(N(C)C)CN(C(=CC=C3)C1=C3)C(=CC=C2)C(=C2)S1,0.058364575374860554
-C1=C(C=C(C(=C1Cl)N)Cl)[N+](=O)[O-],1.159340984210935
-c(cccc1)(c1)C=C,3.8406469492973154
diff --git a/paper/data/median-correlation.csv b/paper/data/median-correlation.csv
index 889453d..e83961d 100644
--- a/paper/data/median-correlation.csv
+++ b/paper/data/median-correlation.csv
@@ -1,121 +1,121 @@
SMILES,mazzatorta,swiss
-CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.18534506246313948,0.22661523159035935
-COCN(c1c(CC)cccc1CC)C(=O)CCl,0.05560351873894184,0.23816840526513422
-CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.009924832004782804,0.04157699893895499
-COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.005451835179110433,0.008508644649457775
-CNC(=O)Oc1ccccc1OC(C)C,0.23895810443138246,0.22939978025412716
-O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08510674803234901,0.08272375649019124
-CNC(=O)Oc1cccc2c1cccc2,0.1878529324240324,0.2991731924668564
-O=C(C1=C(C)OCCS1)Nc1ccccc1,0.1274956638724717,0.034848813981213346
-COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.06987675250196507,0.05590140200157206
+N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.0001372533562906347,0.00013496580117055152
+CCOP(=S)(SCSC(C)(C)C)OCC,0.0002080223110862717,0.00034670385697674235
+OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.00027647194701359843,0.0002764719511333511
+CCSCSP(=S)(OCC)OCC,0.0006144925612602997,0.0006144925475253195
+CCOP(=O)(SC(CC)C)SC(CC)C,0.0009245829520661433,0.0008210296720157477
+COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.001659247904766673,0.08332310268057162
+COP(=O)(SC)N,0.002054854991717517,0.0020548549325897737
+CSc1ccc(cc1C)OP(=S)(OC)OC,0.0025868753585247565,0.0016527259802523342
CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.003445751195813495,0.0033630532459809582
-Nc1nc(NC2CC2)nc(n1)N,0.09026150563412319,0.9387196585948812
+CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149212048673449,0.004149211896481245
+COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.004713372383210075,0.0039030031199302137
+CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.005201883810203027,0.004681695305160139
+CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005274306112287868,0.0035601567181414275
+CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0054376113486863924,0.0636200517424888
+CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601647965290344,0.005601648122412352
+Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.006341300659739408,0.023779877474022784
+COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319755914397,0.006820319575237628
+Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.00694452873492003,0.00920904883059355
+COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.0070905370826580775,0.008508644649457775
+O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.007126617932723449,0.01228727229779905
+CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.007825509706097071,0.009886227162529472
+COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.009301369775521361,0.003100456591840454
+CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.009924832004782804,0.04157699893895499
+CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.010036375840596658,0.12545469800745823
COP(=O)(OC=C(Cl)Cl)OC,0.010068978612765365,0.010408382170442241
-OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.030365547751564796,0.005938151689011985
-O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.02446221194980985,0.4023390123323988
-CC1=C(C)S(=O)(=O)CCS1(=O)=O,0.047557630336441704,0.23778815168220852
+CCCSP(=O)(SCCC)OCC,0.011141416681473747,0.006065882533527741
CNC(=O)CSP(=S)(OC)OC,0.011450010084732691,0.000872381733741038
-O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.026276896280264014,0.007293179580314936
-ClCCP(=O)(O)O,1.4025957248513201,0.9066120392542251
-ClC1C(Cl)C(Cl)C(C(C1Cl)Cl)Cl,0.01808617712680377,0.01616065190994549
-C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.13459866849613178,0.05350296944357954
-CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,0.15252975563710267,1.6952764753748983
-CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.6205388929259232,0.2603236331298995
-COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.00467202701142753,0.0039030031199302137
-CSc1nnc(c(=O)n1N)C(C)(C)C,0.06999926640768805,0.06393266242893511
-CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.01448347496337274,0.010428101697378017
-CNC(=O)ON=C(C(=O)N(C)C)SC,0.02280382932847922,0.02072868120754643
-COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.04569504751402555,0.009139009427670286
-OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.24848916516834604,0.9318343693812976
-CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.026692119922880408,0.0068777238395693234
-CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.22010285589875195,0.041269285481015994
-CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.4534134152107278,0.06546156290207059
-CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.019396419126203733,0.016680921188449865
-CN(C(=S)SSC(=S)N(C)C)C,0.04432283415923257,0.03036190470594063
-C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.25479642918707424,0.0186990521567307
-COP(=O)(SC)N,0.003046853953236319,0.0020548549325897737
CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.011992948803251567,0.01642869699075557
-OC(=O)CNCP(=O)(O)O,5.914602135360638,5.350743398456257
-CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.010036375840596658,0.12545469800745823
-CC(OC(=O)Nc1cccc(c1)Cl)C,3.510237115113031,0.14040948460452124
-OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.03828744186371015,0.15527684755838006
-N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.021874904009467275,0.04835505096829608
+CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.01448347496337274,0.010428101697378017
+N#Cc1c(Cl)cccc1Cl,0.014533918736325764,0.0186034162597095
N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.015042627044387032,0.08141821878808377
-CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.06423944765895072,1.605986191473768
-CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.21766590408142725,0.22201922216305578
-CCOP(=S)(SCSC(C)(C)C)OCC,0.002438483757733518,0.00034670385697674235
-CCOP(=O)(SC(CC)C)SC(CC)C,0.0009245829520661433,0.0008210296720157477
ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.016019730669239306,0.05030195369030707
-O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.08969617860069455,0.41719152837532353
-CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.23935747721355113,0.39446112244793224
-CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04102878665011248,0.03544887229174679
-COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.19051986050321804,0.2098341392275743
-CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.1560686482307559,0.14982590230152565
+ClC1C(Cl)C(Cl)C(C(C1Cl)Cl)Cl,0.016160652565775233,0.01616065190994549
+CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.01672571818640967,0.05707983190600125
+BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185416964361586,0.017185417014945824
+OC(=O)C(CCP(=O)(O)C)N,0.019323475195614302,1.2637552440957067
+CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.019396419126203733,0.016680921188449865
+OC(=O)COc1ccc(cc1C)Cl,0.019938294964743114,0.0947069010825298
+N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.022450559378137468,0.04835505096829608
+CNC(=O)ON=C(C(=O)N(C)C)SC,0.02280382932847922,0.02072868120754643
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.02510595436954169,0.4023390123323988
+O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.026276896280264014,0.007293179580314936
+CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.026692119922880408,0.0068777238395693234
+OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.030365547751564796,0.005938151689011985
+CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.03318543029523152,0.08196801536106943
Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.03331771398901528,0.042646674541424644
-OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.1403669879303106,0.0038990829980641837
CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.03336499327732185,0.036799624938222635
+CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.03400568081866287,0.20067507097305953
+OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.03828744186371015,0.15527684755838006
+CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.038746408312020406,0.08469772512288609
+CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04102878665011248,0.03544887229174679
+CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.041276958181115306,0.1242747128033579
+O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.044873074905021335,0.045407278177700156
+COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.04569504751402555,0.009139009427670286
+CC1=C(C)S(=O)(=O)CCS1(=O)=O,0.047557630336441704,0.23778815168220852
+CN(C(=S)SSC(=S)N(C)C)C,0.04783039657471141,0.03036190470594063
+CON(C(=O)Nc1ccc(cc1)Br)C,0.048243951057630914,0.014357399945172603
+Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.05165383561566402,0.17867678986550448
+Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.051834835094095484,0.27357274077439286
+CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.05203825886364726,0.05161859628615915
O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.05279126047017867,0.16893203350457175
-COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.009301369775521361,0.003100456591840454
-COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045196043953,0.11151045388522976
-CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.3693416417277341,0.04432099700732809
-Cc1cccc2c1n1cnnc1s2,0.16381576159162972,0.1373938645607217
-CSc1ccc(cc1C)OP(=S)(OC)OC,0.005892327205528613,0.0016527259802523342
-Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.44864352207850955,0.4774244272684517
-ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.5494924735209582,0.07465930346752149
-C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.316253365684832,0.05492821614526029
-BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185416964361586,0.017185417014945824
-O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06690994773808298,0.19325167158375256
-N#Cc1c(Cl)cccc1Cl,0.014533918736325764,0.0186034162597095
-CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.13932359364492994,0.03634528529867737
-O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.06459882942614491,0.10157735340683115
-CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.01672571818640967,0.05707983190600125
-N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.0001372533562906347,0.00013496580117055152
+COCN(c1c(CC)cccc1CC)C(=O)CCl,0.05560351873894184,0.23816840526513422
+CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.06073132568962639,0.060497742776698574
+Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.06152148673385627,0.05706818624978773
N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.06360487171247954,0.06009909138187043
-CN1CN(C)CSC1=S,0.18486987933542975,0.027422365728598172
+CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.06423944765895072,1.605986191473768
+O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.06459882942614491,0.10157735340683115
+O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06690994773808298,0.19325167158375256
+CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.0690593023384914,0.06773123883198195
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.06987675250196507,0.05590140200157206
+CSc1nnc(c(=O)n1N)C(C)(C)C,0.06999926640768805,0.06393266242893511
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.07306609422899836,0.062106180868884746
+Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.0760257762657501,0.5473855891134007
+COP(=O)(C(C(Cl)(Cl)Cl)O)OC,0.07768900686568829,0.05166319030658296
+O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08510674803234901,0.08272375649019124
+O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.08969617860069455,0.41719152837532353
+Nc1nc(NC2CC2)nc(n1)N,0.09026150563412319,0.9387196585948812
N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.10391366164191661,0.09203781459712614
-Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.05165383561566402,0.17867678986550448
-[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.9419645496713847,0.5651787298028309
-CON(C(=O)Nc1ccc(cc1)Br)C,0.048243951057630914,0.014357399945172603
-COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.001659247904766673,0.08332310268057162
CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.11058877880543937,0.14653013191720715
-ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.6631652440985374,0.08430066662269543
-C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.1697708869122168,0.061250674376451514
-CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.05203825886364726,0.05161859628615915
-CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.16929970598735858,0.13747135609511818
-Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.0760257762657501,0.5473855891134007
+COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045196043953,0.11151045388522976
Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.11395676083924232,0.011395676083924233
-CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.17813968959673715,0.3034972489425892
-Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.06152148673385627,0.05706818624978773
-CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.03318543029523152,0.08196801536106943
-CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.007825509706097071,0.009886227162529472
-CCNc1nc(NC(C)C)nc(n1)Cl,0.11591071091933607,0.056718974985359355
-CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.038746408312020406,0.08469772512288609
-COP(=O)(C(C(Cl)(Cl)Cl)O)OC,0.07768900686568829,0.05166319030658296
+CCNc1nc(NC(C)C)nc(n1)Cl,0.11591071091933607,0.04636428436773443
+O=C(C1=C(C)OCCS1)Nc1ccccc1,0.1274956638724717,0.034848813981213346
+C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.13459866849613178,0.05350296944357954
CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.13618183361575933,0.13731668655832788
-OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.21976935578028234,0.06424027322808253
-OC(=O)COc1ccc(cc1C)Cl,0.019938294964743114,0.0947069010825298
-Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.00694452873492003,0.01932390597300771
-CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005274306112287868,0.0035601567181414275
-CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.005201883810203027,0.004681695305160139
-COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.655542030995076,0.31203800675365617
-CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.03400568081866287,0.20067507097305953
-CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.06073132568962639,0.060497742776698574
+CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.13932359364492994,0.03634528529867737
+OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.1403669879303106,0.0038990829980641837
+CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,0.15252975563710267,1.6952764753748983
+CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.1560686482307559,0.14982590230152565
Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.15801925526767843,0.15801924849469393
-O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.044873074905021335,0.045407278177700156
-N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.443217671652664,0.4553054263341003
-COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319755914397,0.006820319575237628
-O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.07306609422899836,0.062106180868884746
-Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.051834835094095484,0.27357274077439286
-CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601647965290344,0.005601648122412352
-Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.006341300659739408,0.023779877474022784
-CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.0690593023384914,0.06773123883198195
-CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.4356352632556343,0.4315900691721648
-O=S1OCC2C(CO1)C1(C(C2(Cl)C(=C1Cl)Cl)(Cl)Cl)Cl,0.007126617932723449,0.015481963173347177
-OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.00027647194701359843,0.0002764719511333511
-CCCSP(=O)(SCCC)OCC,0.011141416681473747,0.006065882533527741
-OC(=O)C(CCP(=O)(O)C)N,0.019323475195614302,1.2637552440957067
-CCSCSP(=S)(OCC)OCC,0.0006144925612602997,0.0006144925475253195
-CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0054376113486863924,0.0636200517424888
-CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149212048673449,0.004149211896481245
-CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.041276958181115306,0.1242747128033579
+Cc1cccc2c1n1cnnc1s2,0.16381576159162972,0.1373938645607217
+ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.16860133324539087,0.08430066662269543
+CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.16929970598735858,0.13747135609511818
+C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.1697708869122168,0.06904967382858089
+CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.17813968959673715,0.3034972489425892
+CN1CN(C)CSC1=S,0.18486987933542975,0.027422365728598172
+CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.18534506246313948,0.24799169923196304
+CNC(=O)Oc1cccc2c1cccc2,0.1878529324240324,0.2991731924668564
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.19051986050321804,0.2098341392275743
+CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.21766590408142725,0.22201922216305578
+OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.21976935578028234,0.06424027322808253
+CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.22010285589875195,0.041269285481015994
+CNC(=O)Oc1ccccc1OC(C)C,0.23895810443138246,0.22939978025412716
+CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.23935747721355113,0.39446112244793224
+OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.24848916516834604,0.9318343693812976
+C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.25479642918707424,0.0186990521567307
+C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.316253365684832,0.05492821614526029
+CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.3693416417277341,0.04432099700732809
COC(=O)Nc1nc2c([nH]1)cccc2,0.3922867840256219,1.3076226134187396
+CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.4356352632556343,0.4315900691721648
+N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.443217671652664,0.4553054263341003
+Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.44864352207850955,0.4774244272684517
+CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.4534134152107278,0.06546156290207059
+ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.5494924735209582,0.07465930346752149
+CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.6205388929259232,0.2603236331298995
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.655542030995076,0.31203800675365617
+[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.9419645496713847,0.5651787298028309
+ClCCP(=O)(O)O,1.0381053884590363,0.9066120392542251
+CC(OC(=O)Nc1cccc(c1)Cl)C,3.510237115113031,0.14040948460452124
+OC(=O)CNCP(=O)(O)O,5.914602135360638,5.559726007239
diff --git a/paper/data/swiss-cv.csv b/paper/data/swiss-cv.csv
new file mode 100644
index 0000000..309ba82
--- /dev/null
+++ b/paper/data/swiss-cv.csv
@@ -0,0 +1,447 @@
+SMILES,LOAEL_measured_median,LOAEL_predicted,Confidence
+N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.00013496580117055152,0.017344813680253365,1
+Clc1ccc2c(c1)[n+]([O-])nc(n2)n1cncc1,0.00020190555530632425,0.03523100134958194,1
+OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.0002764719511333511,0.018009645835635024,1
+CCOP(=S)(SCSC(C)(C)C)OCC,0.00034670385697674235,0.004425660789408393,1
+COC(=O)C(Oc1ccc(cc1)Oc1ccc(cc1Cl)Cl)C,0.0005861906011027885,0.03191598936338135,1
+CCSCSP(=S)(OCC)OCC,0.0006144925475253195,0.001840915966735963,1
+CSc1ccc(cc1C)OP(=S)(OC)OC,0.0007185764991867223,0.005508449832284467,1
+CCOP(=O)(SC(CC)C)SC(CC)C,0.0008210296720157477,0.008579634171466552,1
+CNC(=O)CSP(=S)(OC)OC,0.000872381733741038,0.014593717469688315,1
+CCS(=O)CCSP(=O)(OC)OC,0.0008932752807580748,0.0015198540889657284,0.3125
+OC(=O)c1ccccc1.CCC(C1OC2(C=CC1C)OC1CC=C(C)C(OC3CC(OC)C(C(O3)C)OC3CC(OC)C(C(O3)C)NC)C(C)C=CC=C3C4(C(C(=O)OC(C2)C1)C=C(C)C(C4OC3)O)O)C,0.0009918273033473258,0.0018793779503536868,0.7758620689655172
+COP(=O)(SCCS(=O)(=O)CC)OC,0.0011437981092748413,0.0015198540889657284,0.3125
+COC1CC(OC2C(C)C=CC=C3COC4C3(O)C(C=C(C4O)C)C(=O)OC3CC(CC=C2C)OC2(C3)C=CC(C(O2)C(CC)C)C)OC(C1OC1CC(OC)C(C(O1)C)O)C.COC1CC(OC(C1OC1CC(OC)C(C(O1)C)O)C)OC1C(C)C=CC=C2COC3C2(O)C(C=C(C3O)C)C(=O)OC2CC(CC=C1C)OC1(C2)C=CC(C(O1)C(C)C)C,0.0011546496256700967,0.0028270088965558233,1
+O=C1CCCC(=O)C1C(=O)c1ccc(cc1Cl)S(=O)(=O)C,0.0012166633663470796,0.002407097420013963,1
+CCOP(=O)(N1CCSC1=O)SC(CC)C,0.001341107599716744,0.0011515824980518601,1
+O=C1CCCC(=O)C1C(=O)c1ccc(cc1[N+](=O)[O-])S(=O)(=O)C,0.001414591694222218,0.019307092486906843,1
+CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.001516363034790411,0.007657024526944333,1
+CCSCCSP(=O)(OC)OC,0.001519854088965729,0.000883234745514374,1
+COC(=O)/C=C(/OP(=O)(OC)OC)\C,0.0015614663384413926,0.038332425152094556,1
+C1CCC(CC1)[Sn](n1ncnc1)(C1CCCCC1)C1CCCCC1,0.0018110419025972907,0.014704825918970935,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)[C@H](C(C)C)Nc1ccc(cc1Cl)C(F)(F)F,0.001988416717024977,0.012590156321812351,1
+COP(=O)(SC)N,0.0020548549325897737,0.045296304153967855,0.13333333333333333
+CCCSP(=O)(SCCC)OCC,0.002063225311384027,0.0018731882921710285,1
+COP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OC,0.002089606472099723,0.01737984618635041,1
+COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.0021168829879502555,0.011073447351926287,1
+O=C1CCCC(=O)C1C(=O)c1ccc(c(c1Cl)COCC(F)(F)F)S(=O)(=O)C,0.002381932321850521,0.007692682105248146,1
+OC(=O)c1ccccc1.CCC(C1OC2(C=CC1C)OC1CC=C(C)C(OC3CC(OC)C(C(O3)C)OC3CC(OC)C(C(O3)C)NC)C(C)C=CC=C3C4(C(C(=O)OC(C2)C1)C=C(C)C(C4OC3)O)O)C,0.0024795682583683147,0.001880233874102364,1
+CSc1ccc(cc1C)OP(=S)(OC)OC,0.0025868754613179463,0.008959434615561151,1
+O[Sn](C1CCCCC1)(C1CCCCC1)C1CCCCC1,0.002596303652874617,0.07470222573840693,1
+CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.002852364738724816,0.014300523748580053,1
+COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.003100456591840454,0.02321316597224481,1
+Fc1ccc(cc1)N(C(=O)COc1nnc(s1)C(F)(F)F)C(C)C,0.0033027779077186826,0.031034162092124017,1
+CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.0033630532459809582,0.009462543754698434,1
+CNC(=O)Oc1cccc2c1OC(O2)(C)C,0.0035838244976124515,0.08761345281390893,1
+O[Sn](C1CCCCC1)(C1CCCCC1)C1CCCCC1,0.003608862040355308,0.07470222573840693,1
+OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.0038990829980641837,0.06919651159726646,1
+CCc1nn(c(c1Cl)C(=O)NCc1ccc(cc1)Oc1ccc(cc1)C)C,0.003907559846623587,0.09871158498928112,1
+CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149211896481245,0.022355398180114477,1
+CCOP(=O)(O/C(=C/Cl)/c1ccc(cc1Cl)Cl)OCC,0.004171650398342553,0.03530064138051918,1
+CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.004681695305160139,0.006211804536307154,1
+N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)/C=C(/c1ccc(cc1)Cl)\Cl,0.004898276703964497,0.012756681713004479,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.004948543461552866,0.03386160134712949,1
+CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.005288078037050265,0.0617750690364596,1
+CCNc1nc(Cl)nc(n1)NC(C)(C)C,0.005398114462735858,0.07999443961831189,1
+CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601648122412352,0.0686569170847295,1
+CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005603950401492444,0.007657024526944333,1
+COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.005672488506643871,0.03943927185787346,1
+COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.005689123251910172,0.011073447351926287,1
+COP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OC,0.005698926618569244,0.018344682252482723,1
+OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.005938151689011985,0.04868409643292804,1
+Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.006038720639060896,0.05532402173688333,1
+COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319575237628,0.05716874508521694,1
+CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.0068777238395693234,0.018493234339046313,1
+CCNc1nc(Cl)nc(n1)NC(C)(C)C,0.006921775895097049,0.09426207720335626,1
+CCOP(=S)(Oc1ccc(cc1)[N+](=O)[O-])OCC,0.00703837000663162,0.017057539160016816,1
+Cc1nn(c(c1/C=N/OCc1ccc(cc1)C(=O)OC(C)(C)C)Oc1ccccc1)C,0.0071176254993963305,0.21775855137076283,1
+O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.007293179580314936,0.08327089627070684,1
+CCNc1nc(Cl)nc(n1)NC(C)(C)C,0.0074006409463509264,0.08321807469197018,1
+Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.007548400798826121,0.0471479016716324,1
+N#Cc1nn(c(c1S(=O)CC)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.008056334643428573,0.0003583362548250535,1
+CN(C(=S)S[Zn]SC(=S)N(C)C)C,0.00817493363915869,0.060687914488021834,1
+Clc1cc(cnc1Nc1c(cc(c(c1[N+](=O)[O-])Cl)C(F)(F)F)[N+](=O)[O-])C(F)(F)F,0.00821343424858256,0.06022444999031304,1
+Clc1cc(cnc1Nc1c(cc(c(c1[N+](=O)[O-])Cl)C(F)(F)F)[N+](=O)[O-])C(F)(F)F,0.008385443694386083,0.037891775077278596,1
+COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.008508644649457775,0.022637038229098648,1
+Clc1ccc(c(c1)Cl)C(Cn1cncn1)COC(C(F)F)(F)F,0.00913621053742932,0.055026926191950536,1
+CCCN(C(=O)SCc1ccccc1)CCC,0.009149216533940492,0.026602989294595297,1
+COCN(c1c(CC)cccc1CC)C(=O)CCl,0.009267253123156974,0.22739182733921706,1
+CON(C(=O)Nc1ccc(cc1)Cl)C,0.00931754394759366,0.03688265812273081,1
+C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.009436904951368202,0.12399084292970325,1
+CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cc1F)C#N)C,0.009625729959721526,0.012480268779678033,1
+CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.009886227162529472,0.005677758509221912,1
+N#C/N=C\1/SCCN1Cc1ccc(nc1)Cl,0.009892243396986886,0.1373774325740661,1
+Cc1c(ccc(c1C1=NOCC1)S(=O)(=O)C)C(=O)c1cnn(c1O)C,0.009906758425540224,0.01665216481560526,1
+CC1(C)CNC(=NC1)NN=C(C=Cc1ccc(cc1)C(F)(F)F)C=Cc1ccc(cc1)C(F)(F)F,0.009909494556264633,0.07981845994371692,1
+CCCSP(=O)(SCCC)OCC,0.010068539755671456,0.004650206638789641,1
+COP(=O)(OC=C(Cl)Cl)OC,0.010408382170442241,0.0015614663384413924,0.25
+CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.010428101697378017,0.12778348323867558,1
+CCOC(=O)Nc1cccc(c1)OC(=O)Nc1ccccc1,0.010655682947629983,0.19281355438109932,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.010688854065726137,0.0346981875702237,1
+CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.010713392485187262,0.006900785651842258,1
+Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.01086969686236098,0.046116554883113524,1
+O=C(C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C)OCCON=C(C)C,0.011264301100355506,0.011573916415728037,1
+Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.011395676083924233,0.04346838792923881,1
+N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.012410167132297197,0.08243149771542137,1
+COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.012605530348696702,0.0444225657944749,1
+Clc1ccc(c(c1)Cl)n1c(nc2c(c1=O)cc(cc2)F)n1cncn1,0.01268036889326992,0.029111840582303525,1
+CCC1CCCC(OC2CCC(C(O2)C)N(C)C)C(C)C(=O)C2=CC3C(C2CC(=O)O1)C=C(C)C1C3CC(C1)OC1OC(C)C(C(C1OC)OC)OC,0.012734890360905185,0.004194655459081704,1
+CC1C=CC=C2COC3C2(O)C(C=C(C3O)C)C(=O)OC2CC(CC=C(C1)C)OC1(C2)CCC(C(O1)C)C,0.01286229964885329,0.002492034563523696,1
+COc1ccc(cc1NNC(=O)OC(C)C)c1ccccc1,0.01298475189092086,0.6426764188081437,1
+COP(=O)(NC(=O)C)SC,0.013648831720059621,0.0023317481611294373,1
+O=c1c(Cl)c(SCc2ccc(cc2)C(C)(C)C)cnn1C(C)(C)C,0.013701160159437661,0.03149493417295726,1
+NC1=C(Cl)C(=O)c2c(C1=O)cccc2,0.013920121360835688,0.0767015036114862,1
+O=C(C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl)OCc1c(F)c(F)c(c(c1F)F)C,0.01409010160197152,0.09735069347835236,1
+CS/C(=N/OC(=O)N(SN(C(=O)O/N=C(/SC)\C)C)C)/C,0.014105593115928905,0.02135491357652788,0.14285714285714285
+CC(c1ccccc1)(C[Sn](O[Sn](CC(c1ccccc1)(C)C)(CC(c1ccccc1)(C)C)CC(c1ccccc1)(C)C)(CC(c1ccccc1)(C)C)CC(c1ccccc1)(C)C)C,0.014249578440471417,0.05519865342588798,1
+CON(C(=O)Nc1ccc(cc1)Br)C,0.014357399945172603,0.04262134693069911,1
+CCNc1nc(NC(C)C)nc(n1)Cl,0.014372927711833409,0.051943767855990995,1
+CC(c1ccc(cc1)CCOc1ncnc2c1cccc2)(C)C,0.014686613132547533,0.04855108106681143,1
+N#CC(c1ccc(cc1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C\C(=O)OC(C(F)(F)F)C(F)(F)F,0.014960133059978587,0.04345047588412717,1
+Clc1cc(cnc1CCNC(=O)c1ccccc1C(F)(F)F)C(F)(F)F,0.015124216704213374,0.03310157945598653,1
+Fc1ccc(cc1)C1(Cn2cncn2)OC1c1ccccc1Cl,0.015162725459871818,0.058218089172169,1
+N#CN=S(=O)(C(c1ccc(nc1)C(F)(F)F)C)C,0.015292167409562457,0.08044125093401416,1
+CC(C1C2CCC1c1c2cccc1NC(=O)c1cn(nc1C(F)F)C)C,0.015302732709143212,0.0659916700840019,1
+O[Sn](C1CCCCC1)(C1CCCCC1)C1CCCCC1,0.015577821917247702,0.48143700649247756,1
+c1cc[n+]2c(c1)c1cccc[n+]1CC2,0.015794866515636753,0.11000206815300977,1
+Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.01585325164934852,0.030000496093829605,1
+CCCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC,0.016105987222784814,0.1189023058328751,1
+CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.01642869699075557,0.011528157789546231,1
+CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.016680921188449865,0.04306025403236089,1
+CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.0171141884323489,0.012566395107108207,1
+BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185417014945824,0.053028769050118635,1
+CC(NC(=S)[S])CNC(=S)S[Zn],0.017255039351497643,0.07465412245481072,1
+CC(NC(=S)[S])CNC(=S)S[Zn],0.017255039351497643,0.16793286430045296,1
+COCC(N(c1c(C)csc1C)C(=O)CCl)C,0.018129419544573026,0.2826580402554426,1
+Fc1ccc(cc1)C1(Cn2cncn2)OC1c1ccccc1Cl,0.018195270551846183,0.04421228477468209,1
+N#Cc1c(Cl)cccc1Cl,0.0186034162597095,0.07890493307638533,1
+CNC(=O)ON=C(C(=O)N(C)C)SC,0.019109609238234706,0.056548471383657296,1
+CCc1nn(c(c1Cl)C(=O)NCc1ccc(cc1)C(C)(C)C)C,0.019469491695902355,0.01932744862005293,1
+C[n+]1ccc(cc1)c1cc[n+](cc1)C,0.020133908207418557,0.018285286437140467,0.2
+N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.020248123201460456,0.23063198145893238,1
+c1cc[n+]2c(c1)c1cccc[n+]1CC2,0.021168377697732887,0.07893105200824276,1
+CN1CN(C)CSC1=S,0.022184384932566064,0.05143501540726455,1
+CNC(=O)ON=C(C(=O)N(C)C)SC,0.022347753176858155,0.0525615588790517,1
+CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.022598624918870935,0.06327057843197975,1
+OC(=O)COc1ccc(cc1Cl)Cl,0.022620602193004043,0.0343951090528196,1
+CSC1=NC(C(=O)N1Nc1ccccc1)(C)c1ccccc1,0.022800155556897562,0.0769113775658633,1
+COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.02320682656135787,0.09470190703489169,1
+CCCCCCCCCCCCC1=C(OC(=O)C)C(=O)c2c(C1=O)cccc2,0.02340650588512378,0.060675637920424984,1
+O=C(NC(=O)c1ccccc1Cl)Nc1ccc(cc1)OC(F)(F)F,0.023557308728421166,0.08557931600835092,1
+CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.025090939601491648,0.03923477121002939,1
+CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.025090939601491648,0.028754208201110545,1
+OC(=O)C(Oc1ccc(cc1C)Cl)C,0.02562363979237584,0.016643468119522744,1
+Clc1ccccc1CC(C1(Cl)CC1)(Cn1cncn1)O,0.025625059257949535,0.0660230798561165,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)/C=C(/C(F)(F)F)\Cl,0.026675554368592185,0.02509109884427809,1
+O=C(C1C(C1(C)C)C=C(Cl)Cl)OCc1c(F)c(F)cc(c1F)F,0.026942980220700186,0.09663897903564027,1
+Cc1nn(c(c1C(=O)c1ccc(cc1S(=O)(=O)C)C(F)(F)F)O)C,0.027599589461626675,0.025378541876685114,1
+C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.027961199362093195,0.10035495983582812,1
+OC(=O)C(Oc1ccc(cc1)Oc1nc2c(o1)cc(cc2)Cl)C,0.028167056356499628,0.012381567627824417,1
+CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.028227806467376604,0.009149216533940489,0.1
+CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.028523647387248163,0.01260929482868455,1
+N#Cc1cc(Br)c(c(c1)Br)O,0.028889958940868102,0.029869344404748466,1
+CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.029112705155716945,0.16848705182588955,1
+C#CCOC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1F)Cl)C,0.029164453292198207,0.014784205836078774,1
+Fc1ccc(c(c1)c1ccc(c(c1)Cl)Cl)NC(=O)c1cn(nc1C(F)F)C,0.02921233570136655,0.06361277589588017,1
+CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.02946182933426497,0.007637073565493268,1
+CC(N1/C(=N/C(C)(C)C)/SCN(C1=O)c1ccccc1)C,0.029465850912223458,0.07039595644138076,1
+COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.030123726579706293,0.2828087742644706,1
+Clc1c(ccc(c1N)[N+](=O)[O-])Oc1ccccc1,0.030226952270055448,0.1706598767161664,1
+CN(C(=S)SSC(=S)N(C)C)C,0.03036190470594063,0.052029910797683425,1
+COc1nc(Oc2cccc(c2C(=O)[O-])Oc2nc(OC)cc(n2)OC)nc(c1)OC.[Na+],0.030507347552487064,0.38910433529085675,1
+C[n+]1ccc(cc1)c1cc[n+](cc1)C,0.030657230461935643,0.018285286437140467,0.2
+CS/C(=N/OC(=O)N(SN(C(=O)O/N=C(/SC)\C)C)C)/C,0.03103230485504359,0.11836501403389493,1
+CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.03138138916099924,0.02071602054046362,1
+Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.03170650329869704,0.029386017466380077,1
+CCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC(=C)C,0.03210675757919814,0.1016500364317196,1
+Fc1ccc(cc1)NC(=O)c1cccc(n1)Oc1cccc(c1)C(F)(F)F,0.032154821211279785,0.06431687769950017,1
+CCCn1c(OCCC)nc2c(c1=O)cc(cc2)I,0.03224060518839999,0.11241236083791278,0.10810810810810811
+COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1ncccc1C(F)(F)F,0.0326520524201809,0.7659687318664031,1
+CN1CN(C)CSC1=S,0.03266034652463028,0.05143501540726455,1
+Fc1ccc(cc1)C(c1ccccc1F)(Cn1cncn1)O,0.03385434330908588,0.038703944827712805,1
+CN(/C=N/c1ccc(cc1C)C)/C=N/c1ccc(cc1C)C,0.03408246361134649,0.08953376139918832,1
+CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.03428271152063386,0.07298099293681495,1
+ClC(=CCOc1cc(Cl)c(c(c1)Cl)OCCCOc1ccc(cn1)C(F)(F)F)Cl,0.034818667907167616,0.0362288141006914,1
+O=C(C1=C(C)OCCS1)Nc1ccccc1,0.034848813981213346,0.21864734359359156,1
+CCC(C(=O)OC1=C(C(=O)OC21CCCCC2)c1ccc(cc1Cl)Cl)(C)C,0.03578732146400678,0.04379943535631055,1
+CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.03634528529867737,0.07637115370975499,1
+CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.036799624938222635,0.03897860826866038,1
+C#CCOC(c1ccc(cc1)Cl)C(=O)NCCc1ccc(c(c1)OC)OCC#C,0.0369041241749624,0.09203593406976318,1
+OC(C(C)(C)C)C(n1ncnc1)Cc1ccc(cc1)Cl,0.03744148066760202,0.0613731142050769,1
+Clc1ccc(cc1)c1ccccc1NC(=O)c1cccnc1Cl,0.03787805062535496,0.16131302749798718,1
+CCOC(=O)CSc1nc(nn1C(=O)N(C)C)C(C)(C)C,0.03816748004747272,0.1637892862116553,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(cc1Cl)OC(C(C(F)(F)F)F)(F)F,0.03990998658130422,0.08948423022911094,1
+O=C(CC(C)(C)C)OC1=C(C(=O)OC21CCCC2)c1c(C)cc(cc1C)C,0.039947970982482275,0.03534257976629232,0.3225806451612903
+CNC(=S)S,0.04011276528748593,0.08008768834713341,1
+CCCN(c1c(cc(cc1[N+](=O)[O-])S(=O)(=O)N)[N+](=O)[O-])CCC,0.04042042788372036,0.12945843909761384,1
+CCCOC/C(=N\c1ccc(cc1C(F)(F)F)Cl)/n1cncc1,0.04049199977868229,0.015563764732395404,1
+OC1(Cn2ncnc2)C(CCC1(C)C)Cc1ccc(cc1)Cl,0.04095937862019833,0.04921201205383799,1
+CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.041269285481015994,0.09057740215879534,1
+CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.04157699893895499,0.014607562266177505,1
+Fc1ccc(c(c1)c1ccc(c(c1)Cl)Cl)NC(=O)c1cn(nc1C(F)F)C,0.04200781934177246,0.05498513608625931,1
+CN(C(=S)S[Zn]SC(=S)N(C)C)C,0.04250965492362519,0.060687914488021834,1
+CCOC(=O)CCN(C(C)C)SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C,0.04262807177885238,0.040276083030728126,1
+Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.042646674541424644,0.04899254563307668,1
+CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04266993811611698,0.009149216533940489,0.1
+C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.04279938325518071,0.055408460312378546,1
+O=C(CC(C)(C)C)OC1=C(C(=O)OC21CCCC2)c1c(C)cc(cc1C)C,0.042917075351131324,0.046464409855751024,1
+OC1(Cn2ncnc2)C(CCC1(C)C)Cc1ccc(cc1)Cl,0.043148047046675374,0.056602427368467184,1
+CC(Cc1ccc(cc1)C(C)(C)C)CN1CCCCC1,0.04326105065224025,0.013280675424547358,1
+CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.04432099700732809,0.134338708765534,1
+[S-]C(=S)NCCNC(=S)[S-].[Zn+2],0.04460661819584039,0.04860046355249439,1
+O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.045407278177700156,0.10857645875840657,1
+CCNc1nc(NC(C)C)nc(n1)Cl,0.04636428436773443,0.051943767855990995,1
+N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.04835505096829608,0.01697256463740768,1
+CCC(C(=O)NCc1ccccc1)Oc1ccc(c(c1)C(F)(F)F)F,0.049813316199071624,0.0631047049249134,1
+O=C(c1cccc(c1C(=O)NC(CS(=O)(=O)C)(C)C)I)Nc1ccc(cc1C)C(C(F)(F)F)(C(F)(F)F)F,0.04982487508940451,0.16092687388776916,1
+CN(C(=S)SSC(=S)N(C)C)C,0.04990997903448147,0.052029910797683425,1
+ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.05030195369030707,0.08103248684079799,1
+C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.05047450068604942,0.023118608097902237,1
+CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.05161859628615915,0.04977891267988713,1
+COCN(c1c(CC)cccc1CC)C(=O)CCl,0.05189661748967905,0.20188678565232013,1
+CNC(=O)Oc1cccc(c1)/N=C/N(C)C,0.051976062085632144,0.09195186539535166,1
+Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.05283880559178284,0.04189686717166363,1
+CCO/N=C(\C1=C(O)CC(CC1=O)C1CCCSC1)/CCC,0.052847272941488777,0.11265754814947855,0.2857142857142857
+CC(C(c1ccc(cc1)Cl)(Cn1ncnc1)O)C1CC1,0.05326004956767166,0.03798066345599283,1
+O=C(N/C(=N\OCC1CC1)/c1c(F)c(F)ccc1C(F)(F)F)Cc1ccccc1,0.053352320292409515,0.056521863313852456,1
+C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.05350296944357954,0.019218559674619703,1
+C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.05492821614526029,0.10390334684834944,1
+CNC1=C(c2cccc(c2)C(F)(F)F)C(=O)C(O1)c1ccccc1,0.055204779037407746,0.08269089323283962,1
+CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.05522147585284508,0.1387754887585445,1
+O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.05566064749641608,0.02298512893865435,1
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.05590140200157206,0.2350095494055934,1
+COc1nc(nc(n1)C)NC(=O)[N-]S(=O)(=O)c1cc(I)ccc1C(=O)OC.[Na+],0.05611797964648073,0.2676949335249149,1
+O=C(C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C)OCCON=C(C)C,0.05632150550177753,0.015406382072853698,1
+Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.05706818624978773,0.08896364017396666,1
+CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.05707983190600125,0.061911009434330255,1
+CCOCCN(C(=C(C)C)c1ccccc1)C(=O)CCl,0.057470413386035736,0.1275914675240323,1
+CC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Oc1ccc(cc1)Cl,0.05932124091140686,0.0727699851914961,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.05953797389131243,0.020978319280864646,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.05953797389131243,0.020417506344953266,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.05953797389131243,0.019365048465195316,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.06009909138187043,0.032860975562319344,1
+CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.060497742776698574,0.1495196727139019,1
+C=CC=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C.C=CC=CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C(=O)OC)C,0.06134969850332702,0.12306134086604247,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.062106180868884746,0.07069517565485765,1
+COC(=O)c1cccc(c1S(=O)(=O)NC(=O)Nc1nc(OCC(F)(F)F)nc(n1)N(C)C)C,0.062140866929396014,0.248120536258991,1
+Clc1cc(F)c(cc1C(=O)NS(=O)(=O)N(C(C)C)C)n1c(=O)cc(n(c1=O)C)C(F)(F)F,0.06269313377509025,0.029112705155716952,0.10416666666666667
+ClCC(=O)N(c1c(C)cccc1C)Cn1cccn1,0.06336648858092589,0.1332496670549599,1
+CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0636200517424888,0.008434010240467348,1
+ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.06389160712181856,0.056430499830820414,1
+CSc1nnc(c(=O)n1N)C(C)(C)C,0.06393266242893511,0.013701160159437665,0.11538461538461539
+O=C(NS(=O)(=O)c1c(C)cccc1C(=O)O)Nc1nc(OCC(F)(F)F)nc(n1)N(C)C,0.06396281173215432,0.15839611977121426,1
+OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.06424027322808253,0.0714520296319689,1
+O=C(N(C)C)Nc1ccc(c(c1)Cl)n1nc(oc1=O)C(C)(C)C,0.06493710428214157,0.021477732951960638,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06507320207279278,0.06084688873507753,1
+CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.06546156290207059,0.041977666914404244,1
+CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.06569530810416269,0.021138552025194924,1
+CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.06773123883198195,0.1279190797644839,1
+C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.06904967382858089,0.08650638636803425,1
+CCO/N=C(/C1=C(O)CC(CC1=O)c1c(C)cc(cc1C)C)\CC,0.06981686853252955,0.11290411668826239,1
+CO/N=C(/c1ccccc1COc1cc(C)ccc1C)\C(=O)NC,0.07046793589427701,0.314493998741278,1
+COc1cc(OC)n2c(n1)nc(n2)S(=O)(=O)Nc1c(Cl)ccc(c1Cl)C,0.07172655770478076,0.24864999556659206,1
+C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.07190296604559293,0.05421753035009689,1
+CO/N=C(\c1ccccc1CO/N=C(/c1cccc(c1)C(F)(F)F)\C)/C(=O)OC,0.07272797449373557,0.17533172033775735,1
+O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.07395704796137248,0.053248039540905706,1
+OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.07409262028018154,0.1535614611373911,1
+ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.07465930346752149,0.27773056338281416,1
+Cn1nc(c(c1)C(=O)Nc1cccc2c1C1CCC2C1=C(Cl)Cl)C(F)F,0.07583481070072216,0.0461951218232978,1
+N#C/N=C(/N(Cc1ccc(nc1)Cl)C)\C,0.07859017665904088,0.04505453842232625,1
+NC(=N)NCCCCCCCCNC(=N)N,0.08102032708037427,0.04459021281877348,1
+CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.08196801536106943,0.1646969251307165,1
+Clc1cc(cnc1CNC(=O)c1c(Cl)cccc1Cl)C(F)(F)F,0.08212099927021806,0.024768757321864513,1
+O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08272375649019124,0.06543705860142954,1
+COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.08332310268057162,0.011289815763514382,1
+ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.08430066662269543,0.09817808803261632,1
+CCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CC(=C)C,0.08701831648543702,0.10048074651217331,1
+COc1cc(ccc1OC)/C(=C/C(=O)N1CCOCC1)/c1ccc(cc1)Cl,0.08766124641710438,0.09875624800076958,1
+CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.08778355070659401,0.004737027498363926,1
+CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,0.08947770521301585,0.04727481758871042,1
+N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.09203781459712614,0.053374219703886394,1
+Clc1ccc2c(c1)ncc(n2)Oc1ccc(cc1)OC(C(=O)OCC1CCCO1)C,0.09210345974638111,0.011353292778411973,1
+NC(=O)c1c(Cl)cccc1Cl,0.09261856560930491,0.08591160820223859,1
+COc1cc(ccc1OC)/C(=C/C(=O)N1CCOCC1)/c1ccc(cc1)Cl,0.09281779032399287,0.08717488588981356,1
+Clc1ccc(c(c1)Cl)NC(=O)C1(CC1)C(=O)O,0.09303171987631087,0.07557961997756538,1
+COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.09345959256991566,0.26870233958934475,1
+OC(=O)COc1ccc(cc1C)Cl,0.0947069010825298,0.04426982712431076,1
+Cc1nnc(c(=O)n1N)c1ccccc1,0.09643315995145703,0.14758316159354984,1
+[O-][N+](=O)/N=C/1\NCCN1Cc1ccc(nc1)Cl,0.0973945952590747,0.13032534293969533,1
+COc1c(OC)cc(c(c1OC)C(=O)c1c(OC)ncc(c1C)Cl)C,0.09950572862076837,0.303134327475591,1
+COc1ncc(c2n1nc(n2)NS(=O)(=O)c1c(cccc1C(F)(F)F)OCC(F)F)OC,0.1034404543369562,0.21197350920915925,1
+CCCN(c1c(cc(cc1[N+](=O)[O-])S(=O)(=O)N)[N+](=O)[O-])CCC,0.10393824312956665,0.13107642839933267,1
+COCc1c(F)c(F)c(c(c1F)F)COC(=O)C1C(C1(C)C)/C=C/C,0.10573252781458294,0.08851686119004981,1
+S=C1NCCNC(=S)S[Mn+2]S1,0.10855557507359069,0.038417173954574634,1
+CCNc1nc(NC(C)C)nc(n1)Cl,0.10941971287651023,0.023699434768191727,1
+COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045388522976,0.1783067523541709,0.16
+O=C(c1cccc(c1C(=O)NC(CS(=O)(=O)C)(C)C)I)Nc1ccc(cc1C)C(C(F)(F)F)(C(F)(F)F)F,0.115769562707734,0.16092687388776916,1
+COc1c(OC)cc(c(c1OC)C(=O)c1c(OC)ncc(c1C)Cl)C,0.11727460798675288,0.303134327475591,1
+CNC(=O)O/N=C(\SC)/C,0.11836501403389492,0.026052127905166686,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.12010651237688001,0.017629061431206107,1
+CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.1242747128033579,0.01659260666947232,1
+c1coc(c1)c1nc2c([nH]1)cccc2,0.12486833177320307,0.14928260732342855,1
+CO/C=C(\c1ccccc1COc1cccc(n1)C(F)(F)F)/C(=O)OC,0.1252316956521325,0.10792834060304858,1
+CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.12545469800745823,0.08572873540501698,1
+ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.12778321424363712,0.05749565678663053,1
+O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.12919765885228982,0.045845688053057596,1
+[O-][N+](=O)/N=C(\NC)/NCc1cnc(s1)Cl,0.13016764551401042,1.1362376254520026,0.23076923076923078
+CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.13731668655832788,0.5134702813140367,1
+Cc1cccc2c1n1cnnc1s2,0.1373938645607217,0.19681387136689427,1
+CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.13747135609511818,0.04812730013014119,1
+CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.13801406108477293,0.02238566497087926,1
+CC(OC(=O)Nc1cccc(c1)Cl)C,0.14040948460452124,0.08671233605999264,1
+COc1nc(C)nc(n1)N(C(=O)NS(=O)(=O)c1ccccc1C(=O)O)C,0.14421924681891674,0.24337175539925646,1
+Clc1ccccc1CC(C1(Cl)CC1)(Cn1nc[nH]c1=S)O,0.1452393166315865,0.03879388107080294,1
+CC(=O)O.CCCCCCCCCCCCNC(=N)N,0.1461167287581588,0.10263505101799683,1
+CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.14653013191720715,0.024823910652283242,1
+c1ccc(cc1)Nc1ccccc1,0.14773454395291782,0.12245503297742702,1
+CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.14982590230152565,0.12640643328259163,1
+N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.15042627044387033,0.08243149771542137,1
+COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.15479245019392282,0.28657183120817714,1
+OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.15527684755838006,0.016086093578697225,1
+Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.15801924849469393,0.16887323351376893,1
+CC(OC(=O)Nc1ccccc1)C,0.16181616210899355,0.14633879628836383,1
+C#CC(NC(=O)c1cc(Cl)cc(c1)Cl)(C)C,0.16593276232681306,0.05314488180888698,1
+O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.16893203350457175,0.08671261321187769,1
+Cc1ccc(c2c1cccc2)C,0.1728291127183792,0.10352660953011326,1
+Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.1730416993562668,0.019936495077846474,1
+CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.17526912017369997,0.20026950731392412,1
+CCCCCCC(c1cc(cc(c1OC(=O)/C=C/C)[N+](=O)[O-])[N+](=O)[O-])C,0.17563456769307506,0.22483093246893712,1
+CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.17607780933998252,0.10612134197624272,1
+CCCCCCCCCC[N+](CCCCCCCCCC)(C)C.[Cl-],0.1767583631976715,0.5202137438013086,1
+Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.17867678986550448,0.05314189537646974,1
+N#Cc1cc(C)c(c(c1)C(=O)NC)NC(=O)c1cc(nn1c1ncccc1Cl)Br,0.17901230859828976,0.030845016985805107,1
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.18015976856532,0.016307999222054163,1
+CC1=NNC(=O)N(C1)/N=C/c1cccnc1,0.18091653347462547,0.8264782088052985,1
+N#CCNC(=O)c1cnccc1C(F)(F)F,0.19244308898713228,0.18396100074583865,1
+CCCCCCC(c1cc(cc(c1OC(=O)/C=C\C)[N+](=O)[O-])[N+](=O)[O-])C,0.19484459853450517,0.12123612571900029,1
+c1scc(n1)c1nc2c([nH]1)cccc2,0.19876005527119617,0.0875690712634714,1
+Cn1nc(c(c1)C(=O)Nc1ccccc1C1CC1C1CC1)C(F)F,0.2021971466240455,0.04672471563475691,1
+CO/C=C(\c1ccccc1Oc1ncnc(c1)Oc1ccccc1C#N)/C(=O)OC,0.20427010160523304,0.1924110484439188,1
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.2098341392275743,0.10359077423092204,1
+O=C1CCCC(=O)C1C(=O)c1ccc(cc1Cl)S(=O)(=O)C,0.2189994026791292,0.002447483645886486,1
+ClCC(=O)N(c1c(C)cccc1CC)COC(C)C,0.22199225860138957,0.1280135192798158,1
+CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.22201922216305578,0.07003774391915464,1
+Nc1cnn(c(=O)c1Cl)c1ccccc1,0.2255879747532767,0.05570688290625887,1
+CNC(=O)Oc1ccccc1OC(C)C,0.22939978025412716,0.05695835209106234,1
+CCNC(=O)C(OC(=O)Nc1ccccc1)C,0.23278744254805916,0.2688088403129588,1
+Clc1c(ccc(c1N)[N+](=O)[O-])Oc1ccccc1,0.23425888009292972,0.16213639684416756,1
+Cl/C=C/CO/N=C(\C1=C(O)CC(CC1=O)CC(SCC)C)/CC,0.2389478027971563,0.19671826575084897,1
+CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.24799169923196304,0.07165946917369154,1
+CCc1cc(C)cc(c1c1c(OC(=O)C(C)(C)C)n2n(c1=O)CCOCC2)CC,0.24968092026794356,0.06646847904503225,1
+[S]C(=S)NCCNC(=S)S[Mn],0.2525424903682367,0.04075145933254611,1
+CC(CC(c1sccc1NC(=O)c1cn(nc1C(F)(F)F)C)C)C,0.2545841068122505,0.13075689109193567,1
+CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.2557761861991325,0.10157267923887335,1
+Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.25917417547047744,0.07855841127983834,1
+Fc1ccc(cc1)Oc1ccnc2c1c(Cl)cc(c2)Cl,0.25962686686321285,0.04972859326379051,1
+CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.2690918752347788,0.08909548105359592,1
+Nc1cnn(c(=O)c1Cl)c1ccccc1,0.270705569703932,0.06995787809045063,1
+COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1ncccc1S(=O)(=O)CC,0.2804534946915948,0.6959703502985611,1
+OC(=O)COc1ccc(cc1Cl)Cl,0.28049546719325014,0.034331532886187406,1
+CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.2805209905967611,0.03954696343660991,1
+Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.2879713060783083,0.07855841127983834,1
+CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.29547465787728056,0.0683920470541598,0.1
+CNC(=O)Oc1cccc2c1cccc2,0.2991731924668564,0.15220135307866764,1
+CN(C(=S)[S-])C.CN(C(=S)[S-])C.CN(C(=S)[S-])C.[Fe+3],0.30012414094866885,0.03664069651809872,1
+Fc1ccc(c(c1)F)NC(=O)c1cccnc1Oc1cccc(c1)C(F)(F)F,0.3033262936121485,0.040464380681156376,1
+CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.3034972489425892,0.10177125384326928,1
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.31203800675365617,0.10306738554824552,1
+ClCC(=O)N(c1c(C)cccc1C)Cn1cccn1,0.31323206744613685,0.1388909198134489,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.32143014109471235,0.058379476811434815,1
+ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.3260262207586085,0.23843829210981765,0.2
+CCCCCCC(c1cc(cc(c1OC(=O)/C=C\C)[N+](=O)[O-])[N+](=O)[O-])C,0.3315102548955885,0.1454764467989611,1
+OC(=O)COc1ccc(cc1Cl)Cl,0.33930903289506065,0.03674313560429932,1
+CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.3670674304254852,0.07196366656054391,1
+Cc1cc(C)cc(c1)C(=O)N(C(C)(C)C)NC(=O)c1ccc2c(c1C)CCCO2,0.3678012132205545,0.27389970955162496,1
+CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.36852210915226874,0.16923914313012559,1
+Cc1nnc(c(=O)n1N)c1ccccc1,0.36891864539658303,0.14647471195254017,1
+O=C(Nc1ccnc(c1)Cl)Nc1ccccc1,0.37548404132262436,0.08265962969203304,1
+O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.3880867710275115,0.07120229065098448,1
+CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.39446112244793224,0.503640251987437,0.25
+CO/N=C(\c1ccccc1Oc1ncnc(c1F)Oc1ccccc1Cl)/C1=NOCCO1,0.39448424715427566,0.08162616264267387,1
+N#Cc1ccc(cc1)C/C(=N/NC(=O)Nc1ccc(cc1)OC(F)(F)F)/c1ccc(cc1)C(F)(F)F,0.394944816927872,0.04926211530037183,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.4023390123323988,0.07991360285522836,1
+O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.41719152837532353,0.05991367718355013,1
+CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.42802021191337764,0.06812573964857704,1
+CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.4315900691721648,0.12243583387510579,1
+CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.43286371555320496,0.008236162894755514,1
+CO/C=C(\c1ccccc1COc1cccc(n1)C(F)(F)F)/C(=O)OC,0.4410333629488144,0.10792834060304858,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1F)Oc1ccc(cc1Cl)C(F)(F)F,0.4460202371248177,0.049224333757024806,1
+N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.4553054263341003,0.12725725033199659,1
+CSC(=O)c1cccc2c1snn2,0.4608228380460223,0.0549637606627435,1
+COCN(c1c(CC)cccc1CC)C(=O)CCl,0.4670695574071115,0.21177533873169335,1
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1F)Oc1ccc(cc1Cl)C(F)(F)F,0.4705718098105875,0.06418396495173338,1
+ClCC[N+](C)(C)C.[Cl-],0.47447507557122687,1.261437789326052,0.2
+Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.4774244272684517,0.07224143410034059,1
+CC1OC(C)CN(C1)C1CCCCCCCCCCC1,0.48316627385722294,0.005601648043851357,0.2222222222222222
+CCCCc1c(C)nc(nc1OS(=O)(=O)N(C)C)NCC,0.4930161419173511,0.03660545255794927,1
+O=c1[nH]c2CCCc2c(=O)n1C1CCCCC1,0.503640251987437,0.30727385038099597,0.25
+CCOC(=O)OC1=C(C(=O)NC21CCC(CC2)OC)c1cc(C)ccc1C,0.5061016308843888,0.1015896148452947,1
+CCOC(=O)OC1=C(C(=O)NC21CCC(CC2)OC)c1cc(C)ccc1C,0.5061016308843888,0.1015896148452947,1
+N#Cc1c[nH]cc1c1cccc(c1Cl)Cl,0.5061481392686851,0.06860514832511305,1
+CCCOC(=O)NCCCN(C)C.Cl,0.5072793699625824,0.1014922037802734,1
+CCOc1nc(nc(n1)NC)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.5116896474609399,0.2833675875893271,1
+CCOc1cc(ccc1C1COC(=N1)c1c(F)cccc1F)C(C)(C)C,0.5202976892967504,0.03331836205820965,1
+COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.5285529966699751,0.09081986063352195,1
+[O-][N+](=O)/N=C\1/N(Cc2cnc(s2)Cl)COCN1C,0.5313410671453993,0.11705065605473255,0.23076923076923078
+COC(=O)c1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)Cl,0.5465743293153008,0.06711842505159077,1
+Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.5473855891134007,0.048361251776754224,1
+[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.5651787298028309,0.1751390284551102,1
+COC(=O)c1csc(c1S(=O)(=O)NC(=O)n1nc(n(c1=O)C)OC)C,0.5993972829782238,1.0148375272967118,1
+COCCN(c1c(C)cccc1C)C(=O)CCl,0.6139034987494355,0.1390796686283413,1
+O=Cc1ccco1,0.624453213155231,0.12486833177320306,0.1111111111111111
+COc1c(OC)cc(c(c1OC)C(=O)c1c(OC)ccc(c1C)Br)C,0.6352758309016929,0.06875499955650785,1
+OC1(Cn2ncnc2)/C(=C/c2ccc(cc2)Cl)/CCC1(C)C,0.6406279100538178,0.0659303175711405,1
+CC(OC(=O)NC(C(=O)NC(c1nc2c(s1)cc(cc2)F)C)C(C)C)C,0.6543197874203039,0.3913277156537192,1
+CCCOC(=O)NCCCN(C)C.Cl,0.6674728552139242,0.1014922037802734,1
+ClCC(=O)N(c1c(C)cccc1CC)COC(C)C,0.7047373288933002,0.18006974794657446,1
+CCOc1nc(F)cc2n1nc(n2)S(=O)(=O)Nc1c(Cl)cccc1C(=O)OC,0.7561469746838736,0.15887585713526345,1
+COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)Cc1ccccc1C(=O)O,0.7795645307119917,0.370537825344271,1
+Clc1ccc(c(n1)C(=O)O)Cl,0.7812519531298828,0.28910519558310543,1
+Clc1ccc(c(n1)C(=O)O)Cl,0.7812519531298828,0.29250825632761424,1
+COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.7817895162025876,0.32172060768096433,1
+ClCC[N+](C)(C)C.[Cl-],0.7907917926187115,2.8607832080275912,0.2
+CCOc1cc(ccc1OCC)NC(=O)OC(C)C,0.8241033622809132,0.35721426039200926,1
+OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.8282972172278201,0.3802071151378962,1
+ClCC[N+](C)(C)C.[Cl-],0.860381470369158,1.261437789326052,0.2
+ClCCP(=O)(O)O,0.9066120392542251,4.1102099979069795,0.16666666666666666
+Nc1nc(NC2CC2)nc(n1)N,0.9387196585948812,0.09185975434766332,1
+O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.9662594125910484,0.4032551433167842,1
+OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,1.0353715215347752,0.3802071151378962,1
+Cc1cc(C)nc(n1)Nc1ccccc1,1.1091497729605546,0.10587990647536236,1
+COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,1.1154252951100516,0.18143584072688565,1
+Oc1ccccc1c1ccccc1,1.1750384237564568,0.12270365808404284,1
+COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)Nc1ccccc1C(=O)N(C)C,1.1780461209768547,0.4643030652501503,1
+CCCOc1nn(c(=O)n1C)C(=O)[N-]S(=O)(=O)c1ccccc1C(=O)OC.[Na+],1.1894202967675005,0.3690275404021419,1
+CO/N=C(\c1ccccc1COc1ccccc1C)/C(=O)OC,1.1967534090558043,0.10555973307076402,1
+OC(=O)Cc1cccc2c1cccc2,1.205650068257516,0.12789294483841956,1
+COC(=O)Nc1nc2c([nH]1)cccc2,1.3076226134187396,0.06947884063218772,1
+COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)N(S(=O)(=O)C)C,1.3411855059279478,0.5688398904999331,1
+COc1ncc(c2n1nc(n2)S(=O)(=O)Nc1c(F)cccc1F)F,1.391657397996453,0.2568559278403449,0.24324324324324326
+O=C(Nc1cc(F)cc(c1)F)N/N=C(/c1ncccc1C(=O)O)\C,1.4120001283962829,0.16386253811184753,1
+CCCN(c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-])CCC,1.4316432834082535,0.059544784520966634,1
+CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,1.5854670852219546,0.18859272947054131,1
+CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,1.605986191473768,0.2428137170977884,1
+CCc1c(C(=O)O)c(=O)cnn1c1ccc(cc1)Cl,1.6864553664875628,0.05046250020331599,1
+O/C(=C\1/C(=O)CC(CC1=O)C(=O)O)/C1CC1,1.752821172367082,1.8740405032498018,0.35294117647058826
+C[N+]1(C)CCCCC1.[Cl-],1.790706021930536,0.10999358670499064,1
+CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,1.8050858655278421,0.07580850240005735,1
+CCC(=O)C1=C([O-])CC(CC1=O)C(=O)[O-].[Ca+2],1.874040503249802,0.8805851892527955,1
+OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,1.9605490478397496,0.14404836568635776,1
+Clc1ccccc1CC(C1(Cl)CC1)(Cn1nc[nH]c1=S)O,2.178589749473798,0.044494450061192195,1
+COc1cc(OC)n2c(n1)nc(n2)NS(=O)(=O)c1c(OC)nccc1C(F)(F)F,2.302288500094267,0.08283257780127506,1
+CO/N=C(\c1ccccc1COc1ccccc1C)/C(=O)OC,2.4002085592886893,0.07353278767027785,1
+COC(=O)CC(c1ccc(cc1)Cl)NC(=O)C(C(C)C)NC(=O)OC(C)C,2.5070128670931195,0.10806290920415802,1
+COc1nn(c(=O)n1C)C(=O)NS(=O)(=O)c1ccccc1OC(F)(F)F,2.5233463155295692,0.107945327220545,1
+COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1n2ccccc2nc1S(=O)(=O)CC,2.7556956072872962,0.5237598413587932,1
+CCCOC(=O)NCCCN(C)C.Cl,3.0347765817059753,0.1014922037802734,1
+Oc1ccccc1c1ccccc1,3.119727015073393,0.13804976267570115,1
+OC(=O)CNCP(=O)(O)O,3.3121771958019575,0.700841565636653,0.16666666666666666
+Cc1cnc2c(c1)ccc(c2C(=O)O)Cl,3.3387517363764943,0.06061841733867432,1
+[O-]P(=O)OCC.[O-]P(=O)OCC.[O-]P(=O)OCC.[Al+3],3.6853523538557287,0.0028016911425965685,1
+C[N+]1(C)CCCCC1.[Cl-],4.570309399255547,0.10999358670499064,1
+Clc1cc(N)c(c(n1)C(=O)O)Cl,4.830587434212229,0.6461033364698784,1
+CNC(=N[N](=O)[O])NCC1COCC1,4.900819965040488,0.13016764551401044,0.12
+COc1cc(OC)nc(n1)NC(=O)NS(=O)(=O)c1ncccc1C(=O)N(C)C,5.08765706618306,0.5851124569365994,1
+OC(=O)CNCP(=O)(O)O,5.559726007239,0.700841565636653,0.16666666666666666
+Oc1ccccc1c1ccccc1,5.875192118782284,0.15340790034125226,1
+OC(=O)CNCP(=O)(O)O,7.180326992327815,0.700841565636653,0.16666666666666666
diff --git a/paper/data/swiss-cv.id b/paper/data/swiss-cv.id
new file mode 100644
index 0000000..7c4d169
--- /dev/null
+++ b/paper/data/swiss-cv.id
@@ -0,0 +1 @@
+56c42c7e2b72ed1141000001
diff --git a/paper/data/swiss-test-predictions.csv b/paper/data/swiss-test-predictions.csv
index e29dfc8..b570805 100644
--- a/paper/data/swiss-test-predictions.csv
+++ b/paper/data/swiss-test-predictions.csv
@@ -1,150 +1,150 @@
-SMILES,LOAEL,Confidence,Dataset
-COP(=O)(OC=C(Cl)Cl)OC,0.0015614663384413924,0.25,swiss-prediction
-CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.0021000613932692083,1,swiss-prediction
-CCSCSP(=S)(OCC)OCC,0.0021391291181705988,1,swiss-prediction
-COP(=O)(NC(=O)C)SC,0.0023317481611294373,1,swiss-prediction
-CCOP(=S)(SCSC(C)(C)C)OCC,0.0038932756645553637,1,swiss-prediction
-CCCSP(=O)(SCCC)OCC,0.00465020663878965,1,swiss-prediction
-CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.006594681734518869,1,swiss-prediction
-CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.007407343935315939,1,swiss-prediction
-CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.007637073565493268,1,swiss-prediction
-CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.007657024526944333,1,swiss-prediction
-CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.007932473192811151,1,swiss-prediction
-CCOP(=O)(SC(CC)C)SC(CC)C,0.008579634171466552,1,swiss-prediction
-CSc1ccc(cc1C)OP(=S)(OC)OC,0.008959434615561151,1,swiss-prediction
-CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.009149216533940489,0.1,swiss-prediction
-CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.009462543754698434,1,swiss-prediction
-COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.011073447351926287,1,swiss-prediction
-CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.011528157789546231,1,swiss-prediction
-CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.01260929482868455,1,swiss-prediction
-COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.01284173174704369,1,swiss-prediction
-CSc1nnc(c(=O)n1N)C(C)(C)C,0.013701160159437665,0.11538461538461539,swiss-prediction
-CNC(=O)CSP(=S)(OC)OC,0.014593717469688338,1,swiss-prediction
-CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.015316912166292015,1,swiss-prediction
-N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.015809693317509144,1,swiss-prediction
-OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.017069971996617916,1,swiss-prediction
-N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.017779187285422816,1,swiss-prediction
-N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.018032363548935507,1,swiss-prediction
-N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.018130739436787953,1,swiss-prediction
-OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.018461826477538752,1,swiss-prediction
-C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.019218559674619714,1,swiss-prediction
-Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.019936495077846474,1,swiss-prediction
-CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.020160859255457103,1,swiss-prediction
-CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.0206066733541471,1,swiss-prediction
-CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.02071602054046363,1,swiss-prediction
-CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.0209058951160662,1,swiss-prediction
-CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.02176055906933658,1,swiss-prediction
-COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.022759394852161156,1,swiss-prediction
-COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.023258622289143393,1,swiss-prediction
-N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.027036059762033778,1,swiss-prediction
-O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.027386062506459995,1,swiss-prediction
-CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.029097658324682158,1,swiss-prediction
-Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.030000496093829623,1,swiss-prediction
-N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.033983552550211814,1,swiss-prediction
-OC(=O)COc1ccc(cc1Cl)Cl,0.03674313560429932,1,swiss-prediction
-COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.03862209230395826,1,swiss-prediction
-CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.040036364983302354,1,swiss-prediction
-CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.04005668682016992,1,swiss-prediction
-CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.04008581787410761,1,swiss-prediction
-CON(C(=O)Nc1ccc(cc1)Br)C,0.041158230988177814,1,swiss-prediction
-CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.041544797190431346,1,swiss-prediction
-CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.042629025508833636,1,swiss-prediction
-Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.04346838792923881,1,swiss-prediction
-O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.045035773822150356,1,swiss-prediction
-OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.04528624604587442,1,swiss-prediction
-COP(=O)(SC)N,0.045296304153967855,0.13333333333333333,swiss-prediction
-BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.04710913483831366,1,swiss-prediction
-OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.04843867148274343,1,swiss-prediction
-N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.04878763252869801,1,swiss-prediction
-OC(=O)COc1ccc(cc1C)Cl,0.04905212014498487,1,swiss-prediction
-Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.049460296466489834,1,swiss-prediction
-CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.04977891267988713,1,swiss-prediction
-CN1CN(C)CSC1=S,0.05143501540726455,1,swiss-prediction
-CCNc1nc(NC(C)C)nc(n1)Cl,0.051943767855990995,1,swiss-prediction
-CN(C(=S)SSC(=S)N(C)C)C,0.052029910797683425,1,swiss-prediction
-CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.05287343441882649,1,swiss-prediction
-Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.053013598502549705,1,swiss-prediction
-C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.05421753035009689,1,swiss-prediction
-CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.05500615548608445,1,swiss-prediction
-Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.05500705393091043,1,swiss-prediction
-COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.056463299399384,1,swiss-prediction
-CNC(=O)ON=C(C(=O)N(C)C)SC,0.056548471383657296,1,swiss-prediction
-ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.058035645801953625,1,swiss-prediction
-O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.058379476811434815,1,swiss-prediction
-Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.058445620408330214,1,swiss-prediction
-CNC(=O)Oc1ccccc1OC(C)C,0.05950240879470488,1,swiss-prediction
-O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.060568960163232294,1,swiss-prediction
-CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.06059577630169314,1,swiss-prediction
-CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.06131182057778625,1,swiss-prediction
-O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.06552995555017295,1,swiss-prediction
-CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.06621783334677835,1,swiss-prediction
-CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.0683920470541598,0.1,swiss-prediction
-COC(=O)Nc1nc2c([nH]1)cccc2,0.06899951625487367,1,swiss-prediction
-CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.07003774391915464,1,swiss-prediction
-CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.0706115065820919,1,swiss-prediction
-O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.07121509909913679,1,swiss-prediction
-Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.07184435965438675,1,swiss-prediction
-CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.07298099293681495,1,swiss-prediction
-CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.07326983970840215,1,swiss-prediction
-OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.07342623822245768,1,swiss-prediction
-ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.07688860200812843,1,swiss-prediction
-CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.07845200347176315,1,swiss-prediction
-Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.07855841127983834,1,swiss-prediction
-Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.07983484928350948,1,swiss-prediction
-N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.08243149771542137,1,swiss-prediction
-O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.08391380469075903,1,swiss-prediction
-O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.08671261321187769,1,swiss-prediction
-c1scc(n1)c1nc2c([nH]1)cccc2,0.0875690712634714,1,swiss-prediction
-CC(OC(=O)Nc1cccc(c1)Cl)C,0.08873389182433263,1,swiss-prediction
-N#Cc1c(Cl)cccc1Cl,0.0895509304878687,1,swiss-prediction
-COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.09081986063352185,1,swiss-prediction
-Nc1nc(NC2CC2)nc(n1)N,0.09185975434766332,1,swiss-prediction
-CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.09329533119250687,1,swiss-prediction
-CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.09353524776797524,1,swiss-prediction
-CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,0.1002087398883717,1,swiss-prediction
-C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.10035495983582815,1,swiss-prediction
-C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.10390334684834944,1,swiss-prediction
-CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.11119329800179159,1,swiss-prediction
-CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.11147220458900187,1,swiss-prediction
-O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.11177541637848143,1,swiss-prediction
-ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.11359098888992505,1,swiss-prediction
-CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.12117285527596837,1,swiss-prediction
-c1ccc(cc1)Nc1ccccc1,0.12245503297742702,1,swiss-prediction
-O=Cc1ccco1,0.12486833177320306,0.1111111111111111,swiss-prediction
-CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.12674868328005046,1,swiss-prediction
-O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.12765610432548405,1,swiss-prediction
-CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.12778348323867558,1,swiss-prediction
-COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.12938220260072822,1,swiss-prediction
-CNC(=O)Oc1cccc2c1cccc2,0.13169394021834496,1,swiss-prediction
-Oc1ccccc1c1ccccc1,0.13372301955518534,1,swiss-prediction
-COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.13759637912715172,1,swiss-prediction
-CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.13967461554721775,1,swiss-prediction
-COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.14037755043662442,1,swiss-prediction
-OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,0.14404836568635776,1,swiss-prediction
-CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.1495196727139019,1,swiss-prediction
-CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.15662666843714387,1,swiss-prediction
-OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.1588970703544765,1,swiss-prediction
-N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.16073556275027362,1,swiss-prediction
-N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.1622542917276107,1,swiss-prediction
-ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.16698214361799124,1,swiss-prediction
-Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.16887323351376893,1,swiss-prediction
-CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.1869332752762242,1,swiss-prediction
-CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.1870996604409719,1,swiss-prediction
-COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.19454287979711238,1,swiss-prediction
-[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.19490539203138116,1,swiss-prediction
-COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.20342339615295477,1,swiss-prediction
-COCN(c1c(CC)cccc1CC)C(=O)CCl,0.2117753387316933,1,swiss-prediction
-O=C(C1=C(C)OCCS1)Nc1ccccc1,0.21864734359359142,1,swiss-prediction
-COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.2320365574545069,1,swiss-prediction
-ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.23843829210981765,0.2,swiss-prediction
-CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.2428137170977884,1,swiss-prediction
-COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.2515974159735541,1,swiss-prediction
-COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.2828087742644706,1,swiss-prediction
-Cc1cccc2c1n1cnnc1s2,0.28403797861303776,1,swiss-prediction
-COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.30649366939843004,1,swiss-prediction
-O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.31288159348953604,1,swiss-prediction
-OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.3802071151378962,1,swiss-prediction
-CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.503640251987437,0.25,swiss-prediction
-CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.5121170140168649,1,swiss-prediction
-OC(=O)CNCP(=O)(O)O,0.700841565636653,0.16666666666666666,swiss-prediction
-ClCCP(=O)(O)O,4.1102099979069795,0.16666666666666666,swiss-prediction
+SMILES,LOAEL_measured_median,LOAEL_predicted,Confidence,Dataset
+N#Cc1nn(c(c1S(=O)C(F)(F)F)N)c1c(Cl)cc(cc1Cl)C(F)(F)F,0.0001361095787305931,0.018032363548935507,1,swiss-prediction
+OC(=O)C(Oc1ccc(cc1)Oc1ncc(cc1Cl)C(F)(F)F)C,0.0002764719490734748,0.01846182647753877,1,swiss-prediction
+CCOP(=S)(SCSC(C)(C)C)OCC,0.000277363084031507,0.0038932756645553637,1,swiss-prediction
+CCSCSP(=S)(OCC)OCC,0.0006144925543928096,0.0021391291181705988,1,swiss-prediction
+CCOP(=O)(SC(CC)C)SC(CC)C,0.0008728063120409454,0.008579634171466552,1,swiss-prediction
+CNC(=O)CSP(=S)(OC)OC,0.001090477150926923,0.014593717469688338,1,swiss-prediction
+COP(=O)(SC)N,0.0020548549621536454,0.045296304153967855,0.13333333333333333,swiss-prediction
+CSc1ccc(cc1C)OP(=S)(OC)OC,0.0025868753585247565,0.008959434615561151,1,swiss-prediction
+CCOP(=S)(Oc1ccc2c(c1)oc(=O)c(c2C)Cl)OCC,0.0033630532459809582,0.009462543754698434,1,swiss-prediction
+CCOP(=S)(Oc1ncn(n1)c1ccccc1)OCC,0.004149211972577347,0.0209058951160662,1,swiss-prediction
+COc1sc(=O)n(n1)CSP(=S)(OC)OC,0.004713372383210075,0.011073447351926287,1,swiss-prediction
+CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,0.0049417895576815835,0.0021000613932692083,1,swiss-prediction
+CCOP(=O)(Oc1ccc(c(c1)C)SC)NC(C)C,0.005274306112287868,0.007657024526944333,1,swiss-prediction
+CC(Cc1ccc(cc1)C(C)(C)C)CN1CC(C)OC(C1)C,0.005601648043851348,0.05500615548608445,1,swiss-prediction
+COP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OC,0.006200913183680908,0.023258622289143393,1,swiss-prediction
+OC(C(Cl)(Cl)Cl)(c1ccc(cc1)Cl)c1ccc(cc1)Cl,0.006747899500347733,0.04843867148274346,1,swiss-prediction
+COC(=O)N(C(=O)N1COC2(C(=N1)c1ccc(cc1C2)Cl)C(=O)OC)c1ccc(cc1)OC(F)(F)F,0.006820319665576013,0.056463299399384,1,swiss-prediction
+Clc1ccc(cc1)C(c1ccccc1Cl)(c1cncnc1)O,0.007548400798826121,0.049460296466489855,1,swiss-prediction
+COP(=S)(SCn1nnc2c(c1=O)cccc2)OC,0.0076105098020530036,0.022759394852161142,1,swiss-prediction
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Br)Br,0.007818698763639501,0.033983552550211814,1,swiss-prediction
+CCN(c1nc(cc(n1)C)OP(=S)(OC)OC)CC,0.008187766847509327,0.0206066733541471,1,swiss-prediction
+CCCCOC(=O)C(Oc1ccc(cc1)Oc1ccc(cn1)C(F)(F)F)C,0.008855868434313272,0.006594681734518869,1,swiss-prediction
+CCCSP(=O)(SCCC)OCC,0.010068539755671456,0.004650206638789641,1,swiss-prediction
+COP(=O)(OC=C(Cl)Cl)OC,0.010408382170442241,0.0015614663384413924,0.25,swiss-prediction
+CC(Oc1cc(c(cc1Cl)Cl)n1nc(oc1=O)C(C)(C)C)C,0.012455788330375379,0.12778348323867558,1,swiss-prediction
+CNC(=O)Oc1cccc2c1OC(C2)(C)C,0.0139433514779606,0.06131182057778638,1,swiss-prediction
+N#Cc1c(Cl)c(C#N)c(c(c1Cl)Cl)Cl,0.015042627044387032,0.08243149771542137,1,swiss-prediction
+Fc1ccc(cc1)[Si](c1ccc(cc1)F)(Cn1cncn1)C,0.01585325164934852,0.030000496093829623,1,swiss-prediction
+CCOP(=S)(Oc1cc(C)nc(n1)C(C)C)OCC,0.01642869699075557,0.011528157789546231,1,swiss-prediction
+N#Cc1c(Cl)cccc1Cl,0.016568667498017633,0.08955093048786876,1,swiss-prediction
+CCOP(=S)(Oc1nc(Cl)c(cc1Cl)Cl)OCC,0.0171141884323489,0.01260929482868455,1,swiss-prediction
+BrC1COC(C1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.017185416989653705,0.047109134838313686,1,swiss-prediction
+CCN(C(=O)SCc1ccc(cc1)Cl)CC,0.018038670157326797,0.04008581787410763,1,swiss-prediction
+CCCN(C(=O)n1cncc1)CCOc1c(Cl)cc(cc1Cl)Cl,0.01991156926953532,0.05287343441882649,1,swiss-prediction
+CCOC(=O)c1cn2nc(cc2nc1C)OP(=S)(OCC)OCC,0.020087610909726116,0.007637073565493268,1,swiss-prediction
+COP(=S)(SCN1C(=O)c2c(C1=O)cccc2)OC,0.02048398681663214,0.03862209230395829,1,swiss-prediction
+CNC(=O)ON=C(C(=O)N(C)C)SC,0.022347753176858155,0.056548471383657296,1,swiss-prediction
+COC(=O)N(c1ccccc1COc1ccn(n1)c1ccc(cc1)Cl)OC,0.02320682656135787,0.14037755043662448,1,swiss-prediction
+CON(C(=O)Nc1ccc(c(c1)Cl)Cl)C,0.025090939601491648,0.04005668682016994,1,swiss-prediction
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(cc1)Cl,0.025427825579407606,0.12765610432548405,1,swiss-prediction
+O=C(N(C)C)Nc1ccc(c(c1)Cl)Cl,0.02574063309087087,0.08391380469075903,1,swiss-prediction
+CCOC(=O)C(Oc1ccc(cc1)Oc1cnc2c(n1)ccc(c2)Cl)C,0.025750915471868897,0.015316912166292006,1,swiss-prediction
+N#Cc1sc2=c(sc1C#N)c(=O)c1c(c2=O)cccc1,0.026997497601947272,0.1622542917276107,1,swiss-prediction
+C=CC1(C)OC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl,0.027961199362093195,0.10035495983582815,1,swiss-prediction
+N#CC(c1ccc(c(c1)Oc1ccccc1)F)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.028207113064839383,0.017779187285422805,1,swiss-prediction
+CCOC(=O)C(Cc1cc(c(cc1Cl)F)n1nc(n(c1=O)C(F)F)C)Cl,0.029112705155716945,0.11119329800179159,1,swiss-prediction
+CON(C(=O)Nc1ccc(cc1)Br)C,0.03130067550140176,0.041158230988177856,1,swiss-prediction
+CN1CN(C)CSC1=S,0.03266034652463028,0.05143501540726455,1,swiss-prediction
+ClCC1CN(C(=O)C1Cl)c1cccc(c1)C(F)(F)F,0.03316084217977319,0.07688860200812843,1,swiss-prediction
+CCCCC(c1ccc(cc1)Cl)(Cn1cncn1)C#N,0.0341788251725187,0.07298099293681495,1,swiss-prediction
+CCOP(=S)(SCn1c(=O)oc2c1ccc(c2)Cl)OCC,0.0345288315455876,0.007407343935315931,1,swiss-prediction
+CCOCn1c(c2ccc(cc2)Cl)c(c(c1C(F)(F)F)Br)C#N,0.03508230910777224,0.040036364983302354,1,swiss-prediction
+Clc1ccc(c(c1)Cl)C1(OCCO1)Cn1cncn1,0.03798219426521996,0.05301359850254968,1,swiss-prediction
+CC(C(c1cncnc1)(c1ccc(cc1)OC(F)(F)F)O)C,0.038746408312020406,0.02071602054046363,1,swiss-prediction
+CC(N(C(=O)SCC(=C(Cl)Cl)Cl)C(C)C)C,0.04102878665011248,0.009149216533940489,0.1,swiss-prediction
+COP(=S)(Oc1ccc(c(c1)C)[N+](=O)[O-])OC,0.042491175292669145,0.01284173174704369,1,swiss-prediction
+O=C1OC(C(=O)N1Nc1ccccc1)(C)c1ccc(cc1)Oc1ccccc1,0.045140176541360745,0.1117754163784813,1,swiss-prediction
+CN(C(=S)SSC(=S)N(C)C)C,0.04783039657471141,0.052029910797683425,1,swiss-prediction
+CN(C(=O)Oc1nc(nc(c1C)C)N(C)C)C,0.05161859628615915,0.04977891267988713,1,swiss-prediction
+C=CCOC(c1ccc(cc1Cl)Cl)Cn1cncc1,0.05350296944357954,0.019218559674619703,1,swiss-prediction
+COCN(c1c(CC)cccc1CC)C(=O)CCl,0.05375006811431045,0.21177533873169335,1,swiss-prediction
+CCOc1ccc2c(c1)C(=CC(N2)(C)C)C,0.05522147585284508,0.07326983970840215,1,swiss-prediction
+O=C(c1ccc(cc1S(=O)(=O)C)C(F)(F)F)c1cnoc1C1CC1,0.05566064749641608,0.027386062506459995,1,swiss-prediction
+Clc1ccccc1c1nnc(nn1)c1ccccc1Cl,0.05706818876652619,0.07184435965438675,1,swiss-prediction
+OC(=O)COc1ccc(cc1C)Cl,0.057322598023636456,0.0490521201449849,1,swiss-prediction
+CCOC(=O)NCCOc1ccc(cc1)Oc1ccccc1,0.057576722828150476,0.1566266684371439,1,swiss-prediction
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C(c1ccc(cc1)Cl)C(C)C,0.05953797389131243,0.018130739436787953,1,swiss-prediction
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)(C)C,0.06009909138187043,0.027036059762033778,1,swiss-prediction
+CC(OC(=O)C(c1ccc(cc1)Br)(c1ccc(cc1)Br)O)C,0.06061453423316249,0.1495196727139019,1,swiss-prediction
+Clc1cc(ccc1Oc1ccc(c(c1)C(=O)NS(=O)(=O)C)[N+](=O)[O-])C(F)(F)F,0.06267621846158328,0.04346838792923878,1,swiss-prediction
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1Cl,0.06288907725176857,0.2515974159735541,1,swiss-prediction
+CCCCN(SN(C(=O)Oc1cccc2c1OC(C2)(C)C)C)CCCC,0.06569530810416269,0.02176055906933658,1,swiss-prediction
+O=C(NC(=O)c1c(F)cccc1F)Nc1cc(Cl)c(c(c1F)Cl)F,0.06690994773808298,0.058379476811434815,1,swiss-prediction
+CSc1nnc(c(=O)n1N)C(C)(C)C,0.06719929397120725,0.013701160159437665,0.11538461538461539,swiss-prediction
+O=C(NC(=O)c1c(F)cccc1F)Nc1ccc(c(c1)Cl)OC(C(OC(F)(F)F)F)(F)F,0.06758613754894155,0.07121509909913679,1,swiss-prediction
+CCOc1ccc(cc1)C(COCc1cccc(c1)Oc1ccccc1)(C)C,0.06839527058523667,0.12674868328005046,1,swiss-prediction
+C#CCOS(=O)OC1CCCCC1Oc1ccc(cc1)C(C)(C)C,0.06904967382858089,0.05421753035009689,1,swiss-prediction
+OC(=O)COc1nc(Cl)c(cc1Cl)Cl,0.0721330354641874,0.04528624604587442,1,swiss-prediction
+O=C(N(C)C)Nc1cccc(c1)C(F)(F)F,0.07395704796137248,0.04503577382215038,1,swiss-prediction
+OC(C(C)(C)C)C(n1cncn1)Oc1ccc(cc1)c1ccccc1,0.07409262028018154,0.15889707035447642,1,swiss-prediction
+CCNc1nc(NC(C)C)nc(n1)Cl,0.07789199862212233,0.051943767855990995,1,swiss-prediction
+O=C(C1=C(C)OCCS1)Nc1ccccc1,0.08117223892684251,0.21864734359359156,1,swiss-prediction
+CNC(=O)Oc1cc(C)c(c(c1)C)SC,0.0827758354922366,0.020160859255457103,1,swiss-prediction
+OC(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08452667530010859,0.07342623822245768,1,swiss-prediction
+O=C(C(C)(C)C)C(n1ncnc1)Oc1ccc(cc1)Cl,0.08510674803234901,0.06552995555017291,1,swiss-prediction
+CC(NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(c1)Cl)C,0.08783443947180365,0.0706115065820919,1,swiss-prediction
+CCC(=O)Nc1ccc(c(c1)Cl)Cl,0.09170952329114665,0.07845200347176311,1,swiss-prediction
+N#CC(c1ccccc1)(Cn1cncn1)CCc1ccc(cc1)Cl,0.09203781459712614,0.048787632528698034,1,swiss-prediction
+ClC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)Cl,0.09583741068272783,0.058035645801953625,1,swiss-prediction
+OC(=O)C(Oc1ccc(cc1Cl)Cl)C,0.0967821447110451,0.017069971996617916,1,swiss-prediction
+COP(=O)(NC(=O)C)SC,0.10236623790044716,0.0023317481611294373,1,swiss-prediction
+O=C1N(c2cc(Cl)cc(c2)Cl)C(=O)C2(C1(C)C2)C,0.11086164698737522,0.08671261321187769,1,swiss-prediction
+COC(=O)c1c(nc(c(c1CC(C)C)C1=NCCS1)C(F)(F)F)C(F)F,0.11151045292283465,0.20342339615295477,1,swiss-prediction
+Clc1ccc(cc1)CCC(C(C)(C)C)(Cn1cncn1)O,0.11516531274058425,0.05500705393091043,1,swiss-prediction
+COC(=O)NC(=S)Nc1ccccc1NC(=S)NC(=O)OC,0.12412602138191925,0.2320365574545069,1,swiss-prediction
+CCOc1cc(ccc1[N+](=O)[O-])Oc1ccc(cc1Cl)C(F)(F)F,0.12855945536132327,0.029097658324682158,1,swiss-prediction
+CCc1ccc(cc1)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,0.1367492600870436,0.5121170140168657,1,swiss-prediction
+c1scc(n1)c1nc2c([nH]1)cccc2,0.1490700414533971,0.0875690712634714,1,swiss-prediction
+CC(N1C(=O)c2ccccc2NS1(=O)=O)C,0.14982590230152565,0.18693327527622422,1,swiss-prediction
+N#CC(c1cccc(c1)Oc1ccccc1)OC(=O)C1C(C1(C)C)C=C(Cl)Cl,0.15013314047110002,0.015809693317509144,1,swiss-prediction
+Cc1cccc2c1n1cnnc1s2,0.1506048130761757,0.28403797861303787,1,swiss-prediction
+ClC(SN1C(=O)c2c(C1=O)cccc2)(Cl)Cl,0.15174119992085178,0.11359098888992508,1,swiss-prediction
+CCC(Nc1c(cc(cc1[N+](=O)[O-])C(C)(C)C)[N+](=O)[O-])C,0.15338553104123837,0.06059577630169314,1,swiss-prediction
+Cc1nc(Nc2ccccc2)nc(c1)C1CC1,0.15801925188118618,0.16887323351376893,1,swiss-prediction
+c1ccc(cc1)Nc1ccccc1,0.16546268922726798,0.12245503297742702,1,swiss-prediction
+Clc1cc(Cl)c(cc1n1nc(n(c1=O)C(F)F)C)NS(=O)(=O)C,0.1730416993562668,0.019936495077846474,1,swiss-prediction
+CCCC1COC(O1)(Cn1cncn1)c1ccc(cc1Cl)Cl,0.1767866659490005,0.042629025508833636,1,swiss-prediction
+C#CCC1=C(C)C(CC1=O)OC(=O)C1C(C1(C)C)C=C(C)C,0.18559079091504613,0.10390334684834952,1,swiss-prediction
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1CCC(F)(F)F,0.20017699986539617,0.13759637912715172,1,swiss-prediction
+CC(=CC1C(C1(C)C)C(=O)OCc1coc(c1)Cc1ccccc1)C,0.2068313193675311,0.13967461554721777,1,swiss-prediction
+CCOCN(c1c(C)cccc1CC)C(=O)CCl,0.21666838084755125,0.09353524776797524,1,swiss-prediction
+CC(Nc1nc(NC(C)C)nc(n1)Cl)C,0.2198425631222415,0.07003774391915464,1,swiss-prediction
+CNC(=O)Oc1ccccc1OC(C)C,0.23417894234275483,0.05950240879470488,1,swiss-prediction
+CCCCCCCCSC(=O)Oc1cc(Cl)nnc1c1ccccc1,0.2408184692696632,0.12117285527596833,1,swiss-prediction
+O=C1N(OCC1(C)C)Cc1ccccc1Cl,0.253443853488009,0.060568960163232294,1,swiss-prediction
+Cc1ccc(cc1)N(S(=O)(=O)N(C)C)SC(Cl)(Cl)F,0.25917417547047744,0.07855841127983834,1,swiss-prediction
+CC1N(C(=O)NC2CCCCC2)C(=O)SC1c1ccc(cc1)Cl,0.2594374890563992,0.041544797190431366,1,swiss-prediction
+CCN(C(=O)C(Oc1cccc2c1cccc2)C)CC,0.27189561466298434,0.1870996604409719,1,swiss-prediction
+OC(=O)COc1ccc(cc1Cl)Cl,0.28049546719325014,0.03674313560429932,1,swiss-prediction
+CCOC(=O)CC(C(=O)OCC)SP(=S)(OC)OC,0.2921073325236663,0.007932473192811151,1,swiss-prediction
+CNC(=O)Oc1cccc2c1cccc2,0.2981792578159244,0.13169394021834496,1,swiss-prediction
+Clc1ccc(cc1)CN(C(=O)Nc1ccccc1)C1CCCC1,0.31170568268957544,0.05844562040833024,1,swiss-prediction
+ClCCOc1ccccc1S(=O)(=O)NC(=O)Nc1nc(C)nc(n1)OC,0.31207588849423984,0.16698214361799124,1,swiss-prediction
+CCC(n1c(=O)[nH]c(c(c1=O)Br)C)C,0.3169092998307417,0.503640251987437,0.25,swiss-prediction
+ClC(SN1C(=O)C2C(C1=O)CC=CC2)(Cl)Cl,0.32935301892961466,0.23843829210981765,0.2,swiss-prediction
+CC(=CC1C(C1(C)C)C(=O)OCc1cccc(c1)Oc1ccccc1)C,0.42802021191337764,0.06621783334677835,1,swiss-prediction
+CC(Oc1ccccn1)COc1ccc(cc1)Oc1ccccc1,0.43361266621389954,0.11147220458900187,1,swiss-prediction
+N#Cc1c[nH]cc1c1cccc2c1OC(O2)(F)F,0.44926154899338216,0.16073556275027356,1,swiss-prediction
+Clc1c(Cl)c([N+](=O)[O-])c(c(c1Cl)Cl)Cl,0.47403843842257615,0.07983484928350948,1,swiss-prediction
+COc1nc(nc(n1)C)NC(=O)NS(=O)(=O)c1ccccc1C(=O)OC,0.4837900188743661,0.12938220260072822,1,swiss-prediction
+Nc1nc(NC2CC2)nc(n1)N,0.5144905821145022,0.09185975434766332,1,swiss-prediction
+CCCCOCCOCCOCc1cc2OCOc2cc1CCC,0.5170806512852409,0.0683920470541598,0.1,swiss-prediction
+COCC(N(c1c(C)cccc1CC)C(=O)CCl)C,0.5285529966699751,0.09081986063352195,1,swiss-prediction
+O=Cc1ccco1,0.624453213155231,0.12486833177320306,0.1111111111111111,swiss-prediction
+[O-][N+](=O)c1cc(Cl)c(c(c1)Cl)N,0.7245881151318344,0.19490539203138116,1,swiss-prediction
+COC(=O)c1c(Cl)c(Cl)c(c(c1Cl)Cl)C(=O)OC,0.7681550277825105,0.2828087742644706,1,swiss-prediction
+COC(=O)NS(=O)(=O)c1ccc(cc1)N,0.7817895162025876,0.30649366939843004,1,swiss-prediction
+OC(=O)c1nc(Cl)c(c(c1Cl)N)Cl,0.8282972172278201,0.3802071151378962,1,swiss-prediction
+CC(C1(C)N=C(NC1=O)c1nc2ccccc2cc1C(=O)O)C,0.8351128195663594,0.2428137170977884,1,swiss-prediction
+COC(=O)Nc1nc2c([nH]1)cccc2,0.8499546987221808,0.06899951625487367,1,swiss-prediction
+CC(Oc1cccc(c1)NC(=O)c1ccccc1C(F)(F)F)C,0.9077984526598573,0.09329533119250691,1,swiss-prediction
+O=C(C1(C)CCCCC1)Nc1ccc(c(c1Cl)Cl)O,0.9662594125910484,0.31288159348953604,1,swiss-prediction
+ClCCP(=O)(O)O,0.9723587138566308,4.1102099979069795,0.16666666666666666,swiss-prediction
+COc1cccc(c1C)C(=O)NN(C(C)(C)C)C(=O)c1cc(C)cc(c1)C,1.1154252951100516,0.19454287979711238,1,swiss-prediction
+CCC(c1noc(c1)NC(=O)c1c(OC)cccc1OC)(CC)C,1.5854670852219546,0.1002087398883717,1,swiss-prediction
+OC(=O)COc1nc(F)c(c(c1Cl)N)Cl,1.9605490478397496,0.14404836568635776,1,swiss-prediction
+CC(OC(=O)Nc1cccc(c1)Cl)C,2.340158076742021,0.08873389182433254,1,swiss-prediction
+Oc1ccccc1c1ccccc1,3.119727015073393,0.1337230195551853,1,swiss-prediction
+OC(=O)CNCP(=O)(O)O,5.559726007239,0.700841565636653,0.16666666666666666,swiss-prediction
diff --git a/paper/data/swiss-test-predictions.id b/paper/data/swiss-test-predictions.id
new file mode 100644
index 0000000..fc28534
--- /dev/null
+++ b/paper/data/swiss-test-predictions.id
@@ -0,0 +1 @@
+56c429cc2b72ed0d79000004
diff --git a/paper/data/swiss.csv~ b/paper/data/swiss.csv~
deleted file mode 100644
index 997d8a4..0000000
--- a/paper/data/swiss.csv~
+++ /dev/null
@@ -1,494 +0,0 @@
-SMILES,LOAEL parental as dose (mmol/kg bw per day)
-[O-][N+]2=NC(N3C=CN=C3)=NC1=CC=C(Cl)C=C12,0.00020190555530632425
-ClC(C=C(C=C1Cl)C(F)(F)F)=C1N2N=C(C(S(C(F)(F)F)=O)=C2N)C#N,0.00013496580117055152
-CC(C(O)=O)OC2=CC=C(C=C2)OC1=C(Cl)C=C(C(F)(F)F)C=N1,0.0002764719511333511
-S=P(OCC)(OCC)SCSC(C)(C)C,0.00034670385697674235
-CCSCSP(OCC)(OCC)=S,0.0006144925475253195
-CC(OC1=CC=C(OC2=CC=C(Cl)C=C2Cl)C=C1)C(OC)=O,0.0005861906011027885
-S=P(OC)(OC)SCC(NC)=O,0.000872381733741038
-S=P(OC)(OC)OC1=CC(C)=C(SC)C=C1,0.0007185764991867223
-O=P(OC)(OC)SCCS(CC)=O,0.0008932752807580748
-O=P(OCC)(SC(C)CC)SC(C)CC,0.0008210296720157477
-O=P(SC)(OC)N,0.0020548549325897737
-CCS(CCSP(OC)(OC)=O)(=O)=O,0.0011437981092748413
-O=P(OC)(OC)SCCSCC,0.001519854088965729
-O=P(OC)(OC)O/C(C)=C/C(OC)=O,0.0015614663384413926
-O=C1N(P(OCC)(SC(C)CC)=O)CCS1,0.001341107599716744
-CS(C(C=C2Cl)=CC=C2C(C(C(CCC1)=O)C1=O)=O)(=O)=O,0.0012166633663470796
-O=P(OCC)(OC1=CC=C(SC)C(C)=C1)NC(C)C,0.001516363034790411
-O=C(C2=CC=C(S(=O)(C)=O)C=C2[N+]([O-])=O)C1C(CCCC1=O)=O,0.001414591694222218
-O=P(SCCC)(OCC)SCCC,0.002063225311384027
-S=P(OC)(OC)OC1=CC=C([N+]([O-])=O)C=C1,0.0018996422061897484
-S=P(OC)(OC)OC1=CC=C([N+]([O-])=O)C=C1,0.002089606472099723
-S=P(OC)(OC)SCN1C(SC(OC)=N1)=O,0.0021168829879502555
-S=P(OC)(OC)OC1=CC(C)=C(SC)C=C1,0.0025868754613179463
-N4(N=CN=C4)[Sn](C2CCCCC2)(C3CCCCC3)C1CCCCC1,0.0018110419025972907
-CC2(C)OC1=CC=CC(OC(NC)=O)=C1O2,0.0035838244976124515
-O=[N+]([O-])C(Cl)(Cl)Cl,0.006083642787963147
-S=P(OC1=NC(Cl)=C(Cl)C=C1Cl)(OCC)OCC,0.002852364738724816
-ClC1=CC(Cl)=C(OP(OC)(OC)=S)N=C1Cl,0.003100456591840454
-O[Sn](C2CCCCC2)(C3CCCCC3)C1CCCCC1,0.002596303652874617
-CCC(C)C1C(C=CC2(O1)CC3CC(O2)CC=C(C(C(C=CC=C4COC5C4(C(C=C(C5O)C)C(=O)O3)O)C)OC6CC(C(C(O6)C)OC7CC(C(C(O7)C)NC)OC)OC)C)C.C1=CC=C(C=C1)C(=O)O,0.0009918273033473258
-FC(F)(F)C1=CC=C(N[C@@H]([C@H](C)C)C(OC(C#N)C2=CC(OC3=CC=CC=C3)=CC=C2)=O)C(Cl)=C1,0.001988416717024977
-O=C(O)COC1=NC(Cl)=C(Cl)C=C1Cl,0.0038990829980641837
-O=C(CCC1)C(C(C2=CC=C(S(=O)(C)=O)C(COCC(F)(F)F)=C2Cl)=O)C1=O,0.002381932321850521
-C(C(CCl)O)O,0.009951195933270719
-O=C(NC)OC1=C(OC(C)(C)C2)C2=CC=C1,0.005288078037050265
-FC(F)(F)C(S2)=NN=C2OCC(N(C(C)C)C1=CC=C(F)C=C1)=O,0.0033027779077186826
-S=P(OCC)(OCC)OC(C=C2)=CC1=C2C(C)=C(Cl)C(O1)=O,0.0033630532459809582
-ClC1=NC(NCC)=NC(NC(C)(C)C)=N1,0.005398114462735858
-CCOP(OC2=NN(C=N2)C1=CC=CC=C1)(OCC)=S,0.004149211896481245
-O[Sn](C2CCCCC2)(C3CCCCC3)C1CCCCC1,0.003608862040355308
-O=P(OCC)(OCC)O/C(C1=CC=C(Cl)C=C1Cl)=C/Cl,0.004171650398342553
-CCC1=NN(C(=C1Cl)C(=O)NCC2=CC=C(C=C2)OC3=CC=C(C=C3)C)C,0.003907559846623587
-ClC1=NC(NCC)=NC(NC(C)(C)C)=N1,0.006921775895097049
-CCOP(=S)(OCC)OC1=CC=C(C=C1)[N+](=O)[O-],0.005493362006308507
-O=C(N(C)C)NC1=CC(Cl)=C(Cl)C=C1,0.007293179580314936
-O=P(OCC)(OC1=CC=C(SC)C(C)=C1)NC(C)C,0.005603950401492444
-CC(CC2=CC=C(C(C)(C)C)C=C2)CN1CC(OC(C1)C)C,0.005601648122412352
-ClC1=NC(NCC)=NC(NC(C)(C)C)=N1,0.0074006409463509264
-S=P(OC)(OC)SCN1C(SC(OC)=N1)=O,0.005689123251910172
-S=P(OCC)(OCC)SCSP(OCC)(OCC)=S,0.004681695305160139
-S=P(OC)(OC)SCN1C(C(C=CC=C2)=C2C1=O)=O,0.005672488506643871
-CCC(C)C1C(C=CC2(O1)CC3CC(O2)CC=C(C(C(C=CC=C4COC5C4(C(C=C(C5O)C)C(=O)O3)O)C)OC6CC(C(C(O6)C)OC7CC(C(C(O7)C)O)OC)OC)C)C.CC1C=CC=C2COC3C2(C(C=C(C3O)C)C(=O)OC4CC(CC=C(C1OC5CC(C(C(O5)C)OC6CC(C(C(O6)C)O)OC)OC)C)OC7(C4)C=CC(C(O7)C(C)C)C)O,0.0011546496256700967
-OC(C2=C(Cl)C=CC=C2)(C3=CN=CN=C3)C1=CC=C(Cl)C=C1,0.006038720639060896
-O=C(N(OC)C)NC1=CC=C(Cl)C=C1,0.00931754394759366
-S=P(OC)(OC)OC1=NC(N(CC)CC)=NC(C)=C1,0.0068777238395693234
-OC(C1=CC=C(Cl)C=C1)(C2=CC=C(Cl)C=C2)C(Cl)(Cl)Cl,0.005938151689011985
-O=P(OC)(OC)O/C=C(Cl)\Cl,0.010408382170442241
-O=C(SCC1=CC=CC=C1)N(CCC)CCC,0.009149216533940492
-O=P(SCCC)(OCC)SCCC,0.010068539755671456
-O=P(SC)(OC)NC(C)=O,0.013648831720059621
-O=C(N(C1=C(CC)C=CC=C1CC)COC)CCl,0.009267253123156974
-NC1=NNC=N1,0.029733601205328832
-NC#N,0.059467202410657664
-CC1(C)C(/C=C(Br)/Br)C1C(OC(C#N)C2=CC(OC3=CC=CC=C3)=CC=C2)=O,0.004948543461552866
-CCC(C)C1C(C=CC2(O1)CC3CC(O2)CC=C(C(C(C=CC=C4COC5C4(C(C=C(C5O)C)C(=O)O3)O)C)OC6CC(C(C(O6)C)OC7CC(C(C(O7)C)NC)OC)OC)C)C.C1=CC=C(C=C1)C(=O)O,0.0024795682583683147
-OC(C2=C(Cl)C=CC=C2)(C3=CN=CN=C3)C1=CC=C(Cl)C=C1,0.007548400798826121
-O=C(OC(C#N)C3=CC(OC4=CC=CC=C4)=C(F)C=C3)C(C(C)2C)C2/C=C(Cl)/C1=CC=C(Cl)C=C1,0.004898276703964497
-CCOP(=S)(OCC)OC1=CC=C(C=C1)[N+](=O)[O-],0.008583378006954733
-S=P(OC)(OC)OC1=CC=C([N+]([O-])=O)C=C1,0.009498211030948741
-N#C/N=C1SCCN/1CC2=CC=C(Cl)N=C2,0.009892243396986886
-CN(C)C(S[Zn]SC(N(C)C)=S)=S,0.00817493363915869
-C[N+](C=C2)=CC=C2C1=CC=[N+](C)C=C1,0.013691057325028715
-S=P(OC)(OC)SCN1N=NC(C=CC=C2)=C2C1=O,0.008508644649457775
-O=C(OC(C)1C=C)N(C2=CC(Cl)=CC(Cl)=C2)C1=O,0.009436904951368202
-O=C2C(N)=C(Cl)C(C1=CC=CC=C12)=O,0.013920121360835688
-O=S3OCC1C(CO3)C2(Cl)C(Cl)=C(Cl)C(Cl)1C(Cl)2Cl,0.007126618167084564
-[N+]12=CC=CC=C1C3=[N+](C=CC=C3)CC2,0.015794866515636753
-O=C(OC(C)(C)C)C1=CC=C(CO/N=C/C2=C(OC3=CC=CC=C3)N(C)N=C2C)C=C1,0.0071176254993963305
-CCNC1=NC(NC(C)C)=NC(Cl)=N1,0.014372927711833409
-O=C(OCC)NC1=CC=CC(OC(NC2=CC=CC=C2)=O)=C1,0.010655682947629983
-N#CC(C(Cl)=CC=C1)=C1Cl,0.0186034162597095
-CCS(=O)C1=C(N(N=C1C#N)C2=C(C=C(C=C2Cl)C(F)(F)F)Cl)N,0.008056334643428573
-N#CC1=C(Cl)C(Cl)=C(Cl)C(C#N)=C1Cl,0.012410167132297197
-FC(C(F)(OCC(C2=CC=C(C=C2Cl)Cl)CN1C=NC=N1)F)F,0.00913621053742932
-N#CC1=CC(F)=C(OC2=CC=C(OC(C)C(OCCCC)=O)C=C2)C=C1,0.009625729959721526
-CN(CN1C)CSC1=S,0.022184384932566064
-OC(C2=C(Cl)C=CC=C2)(C3=CN=CN=C3)C1=CC=C(Cl)C=C1,0.01086969686236098
-ClC1=CC=C(C2=NN(C(N(C(OC)=O)C4=CC=C(OC(F)(F)F)C=C4)=O)COC(C(OC)=O)2C3)C3=C1,0.006820319575237628
-O=C(OC(C(C)(C)C)=N2)N2C1=CC(OC(C)C)=C(Cl)C=C1Cl,0.010428101697378017
-CC1=C(C2=NOCC2)C(S(C)(=O)=O)=CC=C1C(C3=C(O)N(C)N=C3)=O,0.009906758425540224
-BrC1=CC=C(NC(N(C)OC)=O)C=C1,0.014357399945172603
-C[N+](C=C2)=CC=C2C1=CC=[N+](C)C=C1,0.020133908207418557
-CC(C(OCCCC)=O)OC1=CC=C(OC2=CC=C(C(F)(F)F)C=N2)C=C1,0.009886227162529472
-ClC1=C([N+]([O-])=O)C(NC2=NC=C(C(F)(F)F)C=C2Cl)=C([N+]([O-])=O)C=C1C(F)(F)F,0.00821343424858256
-COC(C(NNC(OC(C)C)=O)=C2)=CC=C2C1=CC=CC=C1,0.01298475189092086
-[N+]12=CC=CC=C1C3=[N+](C=CC=C3)CC2,0.021168377697732887
-ClC1=C([N+]([O-])=O)C(NC2=NC=C(C(F)(F)F)C=C2Cl)=C([N+]([O-])=O)C=C1C(F)(F)F,0.008385443694386083
-S=P(OC)(OC)SCN1C(C(C=CC=C2)=C2C1=O)=O,0.012605530348696702
-O=C(C(C(C)=N2)=CN1C2=CC(OP(OCC)(OCC)=S)=N1)OCC,0.010713392485187262
-CNC(ON=C(SC)C(N(C)C)=O)=O,0.019109609238234706
-FC(F)(F)C1=CC=C(C(S(C)(=O)=NC#N)C)C=N1,0.015292167409562457
-S=C(S)NC,0.04011276528748593
-O=C(N(CC)CC)SCC1=CC=C(Cl)C=C1,0.016680921188449865
-CC(C)(C1=CC=C(CCOC2=NC=NC3=CC=CC=C23)C=C1)C,0.014686613132547533
-ClC1C(Cl)C(Cl)C(Cl)C(Cl)C1Cl,0.01616065190994549
-O=C2C1=CC(F)=CC=C1N=C(N4N=CN=C4)N2C3=C(Cl)C=C(Cl)C=C3,0.01268036889326992
-CC1(CNC(=NC1)NN=C(C=CC2=CC=C(C=C2)C(F)(F)F)C=CC3=CC=C(C=C3)C(F)(F)F)C,0.009909494556264633
-CNC(ON=C(SC)C(N(C)C)=O)=O,0.022347753176858155
-ClC1=CC(Cl)=C(OCC(O)=O)C=C1,0.022620602193004043
-NC1=NNC=N1,0.059467202410657664
-O=C(NC)OC1=C(OC(C)(C)C2)C2=CC=C1,0.022598624918870935
-S=P(OCC)(OCC)OC1=NC(C(C)C)=NC(C)=C1,0.01642869699075557
-O=C(N(C1=C(C)SC=C1C)C(C)COC)CCl,0.018129419544573026
-O=S3OCC1C(CO3)C2(Cl)C(Cl)=C(Cl)C(Cl)1C(Cl)2Cl,0.01228727229779905
-FC1=CC=C(C2(CN4C=NC=N4)C(C3=CC=CC=C3Cl)O2)C=C1,0.015162725459871818
-ClC(Cl)(Cl)C1=NSC(OCC)=N1,0.020199571769078495
-C[Si](C2=CC=C(C=C2)F)(C3=CC=C(F)C=C3)CN1C=NC=N1,0.01585325164934852
-ClC1=CC(C(F)(F)F)=CC=C1OC2=CC=C([N+]([O-])=O)C(C(NS(C)(=O)=O)=O)=C2,0.011395676083924233
-ClC1=CC=C2C(N=CC(OC3=CC=C(OC(C)C(OCCO/N=C(C)/C)=O)C=C3)=N2)=C1,0.011264301100355506
-[S]C(NC(C)CNC(S[Zn])=S)=S,0.017255039351497643
-[S]C(NC(C)CNC(S[Zn])=S)=S,0.017255039351497643
-C1CNC(=S)NC1,0.04303491887745652
-CC(C)(C)C1=CC=C(CSC2=C(Cl)C(N(C(C)(C)C)N=C2)=O)C=C1,0.013701160159437661
-CS/C(C)=N/OC(N(SN(C(O/N=C(SC)\C)=O)C)C)=O,0.014105593115928905
-CN(CN1C)CSC1=S,0.03266034652463028
-CCN(CCCC)C1=C([N+]([O-])=O)C=C(C(F)(F)F)C=C1[N+]([O-])=O,0.016105987222784814
-CC1(C)C(/C=C(Br)/Br)C1C(OC(C#N)C2=CC(OC3=CC=CC=C3)=CC=C2)=O,0.010688854065726137
-CC(C)C1C2CCC1C3=C2C(NC(C4=CN(C)N=C4C(F)F)=O)=CC=C3,0.015302732709143212
-O=C(O)C(C)OC1=C(C)C=C(Cl)C=C1,0.02562363979237584
-C1CNC(=S)NC1,0.04819910832192538
-FC(F)(F)/C(Cl)=C/C1C(C)(C)C1C(OCC2=C(F)C(F)=C(C)C(F)=C2F)=O,0.01409010160197152
-S=P(OC1=NC(Cl)=C(Cl)C=C1Cl)(OCC)OCC,0.0171141884323489
-O[Sn](C2CCCCC2)(C3CCCCC3)C1CCCCC1,0.015577821917247702
-O=C(C2=C1C=CC=C2)C(SC(C#N)=C(C#N)S3)=C3C1=O,0.020248123201460456
-FC1=CC=C(C2(CN4C=NC=N4)C(C3=CC=CC=C3Cl)O2)C=C1,0.018195270551846183
-O=C(NCCC2=NC=C(C(F)(F)F)C=C2Cl)C1=CC=CC=C1C(F)(F)F,0.015124216704213374
-O=C(NC1=CC(Cl)=C(Cl)C=C1)N(C)OC,0.025090939601491648
-O=C(NC1=CC(Cl)=C(Cl)C=C1)N(C)OC,0.025090939601491648
-O=C1C(/C(CCC)=N\OCC)=C(O)CC(C2CSCCC2)C1,0.019664101798126703
-BrC(C3)COC(CN2C=NC=N2)3C1=C(Cl)C=C(Cl)C=C1,0.017185417014945824
-CN1N=C(C(Cl)=C1C(NCC2=CC=C(C=C2)C(C)(C)C)=O)CC,0.019469491695902355
-CC1CCC2(CC3CC(O2)CC=C(CC(C=CC=C4COC5C4(C(C=C(C5O)C)C(=O)O3)O)C)C)OC1C,0.01286229964885329
-O=C1N(NC3=CC=CC=C3)C(SC)=NC(C)1C2=CC=CC=C2,0.022800155556897562
-S=C(SSC(N(C)C)=S)N(C)C,0.03036190470594063
-NC1=C(C(OC2=CC=CC=C2)=CC=C1[N+]([O-])=O)Cl,0.030226952270055448
-BrC1=CC(C#N)=CC(Br)=C1O,0.028889958940868102
-C1CC1(C(CC2=CC=CC=C2Cl)(CN3C=NC=N3)O)Cl,0.025625059257949535
-O=C(OC(C)1C=C)N(C2=CC(Cl)=CC(Cl)=C2)C1=O,0.027961199362093195
-FC(F)(F)C(C(F)(F)F)OC(/C=C\C1C(C)(C)C1C(OC(C#N)C2=CC=C(OC3=CC=CC=C3)C=C2)=O)=O,0.014960133059978587
-O=C(C(SCCO2)=C2C)NC1=CC=CC=C1,0.034848813981213346
-FC(F)(F)OC(C=C2)=CC=C2NC(NC(C1=C(Cl)C=CC=C1)=O)=O,0.023557308728421166
-O=C(SC/C(Cl)=C(Cl)\Cl)N(C(C)C)C(C)C,0.028227806467376604
-C[N+](C=C2)=CC=C2C1=CC=[N+](C)C=C1,0.04762340359884257
-O=C2C(CCCCCCCCCCCC)=C(OC(C)=O)C(C1=CC=CC=C12)=O,0.02340650588512378
-O=C(N1C(C)C)N(C2=CC=CC=C2)CS/C1=N\C(C)(C)C,0.029465850912223458
-O=C(CC)NC1=CC(Cl)=C(Cl)C=C1,0.041269285481015994
-CON(C(OC)=O)C(C=CC=C3)=C3COC2=NN(C=C2)C1=CC=C(Cl)C=C1,0.02320682656135787
-CC(C(O)=O)OC(C=C3)=CC=C3OC2=NC1=CC=C(Cl)C=C1O2,0.028167056356499628
-CCC1CCCC(C(C(=O)C2=CC3C4CC(CC4C(=CC3C2CC(=O)O1)C)OC5C(C(C(C(O5)C)OC)OC)OC)C)OC6CCC(C(O6)C)N(C)C,0.012734890360905185
-C1=C(SC(=N1)S(=O)(=O)CCC(=C(F)F)F)Cl,0.03291071649153446
-CC(C)C(C2=CN=CN=C2)(O)C1=CC=C(OC(F)(F)F)C=C1,0.03138138916099924
-CCCCC(C#N)(C2=CC=C(C=C2)Cl)CN1C=NC=N1,0.03428271152063386
-CN(/C=N/C(C=CC(C)=C2)=C2C)/C=N/C(C=CC(C)=C1)=C1C,0.03408246361134649
-CCNC1=NC(NC(C)C)=NC(Cl)=N1,0.04636428436773443
-S=P(OC1=NC(Cl)=C(Cl)C=C1Cl)(OCC)OCC,0.028523647387248163
-O=C(C(C(Cl)=C(Cl)C(C(OC)=O)=C1Cl)=C1Cl)OC,0.030123726579706293
-C[Si](C2=CC=C(C=C2)F)(C3=CC=C(F)C=C3)CN1C=NC=N1,0.03170650329869704
-CS(C1=CC(C(F)(F)F)=CC=C1C(C2=C(O)N(C)N=C2C)=O)(=O)=O,0.027599589461626675
-CS/C(C)=N/OC(N(SN(C(O/N=C(SC)\C)=O)C)C)=O,0.02821118623185781
-O=C(C2C(/C=C(Cl)/Cl)C(C)2C)OCC1=C(F)C(F)=CC(F)=C1F,0.026942980220700186
-CC(C(OCC#C)=O)OC1=CC=C(OC2=C(F)C=C(Cl)C=N2)C=C1,0.029164453292198207
-OC(C2=CC=C(F)C=C2)(C3=C(F)C=CC=C3)CN1C=NC=N1,0.03385434330908588
-FC(F)(F)C1=CC([N+]([O-])=O)=C(N(CC(C)=C)CC)C([N+]([O-])=O)=C1,0.03210675757919814
-CCOC(CCN(C(C)C)SN(C)C(OC1=CC=CC2=C1OC(C)(C)C2)=O)=O,0.02679478797527864
-O=S3OCC1C(CO3)C2(Cl)C(Cl)=C(Cl)C(Cl)1C(Cl)2Cl,0.02703199905515791
-BrC([H])([H])[H],0.1158644562818127
-CC(C)(C(C(N2C=NC=N2)CC1=CC=C(C=C1)Cl)O)C,0.03744148066760202
-O=C(C(C(C)=N2)=CN1C2=CC(OP(OCC)(OCC)=S)=N1)OCC,0.02946182933426497
-CNC(OC1=CC=CC(/N=C/N(C)C)=C1)=O,0.051976062085632144
-CC(CC2=CC=C(C(C)(C)C)C=C2)CN1CCCCC1,0.04326105065224025
-O=C(OCC)C(Cl)CC1=CC(N2N=C(C)N(C(F)F)C2=O)=C(F)C=C1Cl,0.029112705155716945
-CC(C(C1C(OC(C2=CC=CC(OC3=CC=CC=C3)=C2)C#N)=O)/C=C(C(F)(F)F)\Cl)1C,0.026675554368592185
-CC2(C)C=C(C)C1=CC(OCC)=CC=C1N2,0.05522147585284508
-CC(NC(N1CC(N(C2=CC(Cl)=CC(Cl)=C2)C1=O)=O)=O)C,0.03634528529867737
-IC1=CC=C(N=C(OCCC)N(CCC)C2=O)C2=C1,0.03224060518839999
-CS/C(C)=N/OC(N(SN(C(O/N=C(SC)\C)=O)C)C)=O,0.03385342347822937
-S=C(SSC(N(C)C)=S)N(C)C,0.04990997903448147
-O=C(OCC)CSC1=NC(C(C)(C)C)=NN1C(N(C)C)=O,0.03816748004747272
-ClC1=CC=C(C2=CC(F)=CC=C2NC(C3=CN(C)N=C3C(F)F)=O)C=C1Cl,0.02921233570136655
-FC(F)(F)C1=CC(OC2=CC=CC(C(NC3=CC=C(F)C=C3)=O)=N2)=CC=C1,0.032154821211279785
-C(CNC(=S)[S-])NC(=S)[S-].[Zn+2],0.04460661819584039
-CN(C)C1=NC(C)=C(C)C(OC(N(C)C)=O)=N1,0.05161859628615915
-ClC1=C(C3(OCCO3)CN2C=NC=N2)C=CC(Cl)=C1,0.042646674541424644
-ClC(N=CC=C3)=C3C(NC1=CC=CC=C1C2=CC=C(Cl)C=C2)=O,0.03787805062535496
-O=C1C(C(C)(C)C)=NN=C(SC)N1N,0.060666030886662975
-O=C(SC/C(Cl)=C(Cl)\Cl)N(C(C)C)C(C)C,0.04266993811611698
-CN(C)C(S[Zn]SC(N(C)C)=S)=S,0.04250965492362519
-OC1(CN3C=NC=N3)C(C)(C)CCC1CC2=CC=C(Cl)C=C2,0.04095937862019833
-O=S(C1=C(C(F)(F)F)C=CC=N1)(NC(NC2=NC(OC)=CC(OC)=N2)=O)=O,0.0326520524201809
-OC(C(Cl)(Cl)Cl)P(OC)(OC)=O,0.05166319030658296
-COC1=CC(=NC(=N1)OC2=C(C(=CC=C2)OC3=NC(=CC(=N3)OC)OC)C(=O)[O-])OC.[Na+],0.030507347552487064
-OC1(CN3C=NC=N3)C(C)(C)CCC1CC2=CC=C(Cl)C=C2,0.043148047046675374
-O=C(N(C1=C(CC)C=CC=C1CC)COC)CCl,0.05189661748967905
-NS(C1=CC([N+]([O-])=O)=C(N(CCC)CCC)C([N+]([O-])=O)=C1)(=O)=O,0.04042042788372036
-CCCOC/C(N2C=CN=C2)=N\C1=CC=C(C=C1C(F)(F)F)Cl,0.04049199977868229
-O=C1C(C(C)(C)C)=NN=C(SC)N1N,0.06719929397120725
-O=C1OC3(CCCCC3)C(OC(C(C)(C)CC)=O)=C1C2=C(Cl)C=C(Cl)C=C2,0.03578732146400678
-CC1=CC(C)=CC(C)=C1C2=C(OC(CC(C)(C)C)=O)C3(CCCC3)OC2=O,0.039947970982482275
-CC1(C)C(C(OCC2=COC(CC3=CC=CC=C3)=C2)=O)C1/C=C(C)\C,0.04432099700732809
-ClC(C=C2)=CC=C2C1=C(C#N)C(Br)=C(C(F)(F)F)N1COCC,0.036799624938222635
-CC(C1=CC=CC=C1)(C)C[Sn](CC(C)(C2=CC=CC=C2)C)(CC(C)(C3=CC=CC=C3)C)O[Sn](CC(C)(C4=CC=CC=C4)C)(CC(C)(C6=CC=CC=C6)C)CC(C)(C5=CC=CC=C5)C,0.014249578440471417
-ClC(C=C(Cl)C=C2)=C2C(OCC=C)CN1C=NC=C1,0.05047450068604942
-CC(C)(C2=CC=C(C=C2)OC1CCCCC1OS(OCC#C)=O)C,0.04279938325518071
-C#CCOC(C(NCCC2=CC=C(OCC#C)C(OC)=C2)=O)C1=CC=C(Cl)C=C1,0.0369041241749624
-CCOC(C(OC3=CC=C(C=C3)OC2=NC1=CC=C(Cl)C=C1N=C2)C)=O,0.04157699893895499
-CC(C(CN2C=NC=N2)(C3=CC=C(C=C3)Cl)O)C1CC1,0.05326004956767166
-FC(F)(C1=CC=CC(N2CC(C(C2=O)Cl)CCl)=C1)F,0.05030195369030707
-ClC(C=C(Cl)C=C2)=C2C(OCC=C)CN1C=NC=C1,0.05350296944357954
-CC1=CC(C)=CC(C)=C1C2=C(OC(CC(C)(C)C)=O)C3(CCCC3)OC2=O,0.042917075351131324
-CCC1=C(C(NC(C2=CC=CS2)C#N)=O)SC(NCC)=N1,0.05118073847356783
-O=C(C2C(/C=C(C)/C)C(C)2C)OC1CC(C(CC#C)=C1C)=O,0.05492821614526029
-O=C(C(O3)(C)C(C=C2)=CC=C2OC1=CC=CC=C1)N(NC4=CC=CC=C4)C3=O,0.045407278177700156
-BrC([H])([H])[H],0.1790632506173469
-C/C(C)=C(N(C(CCl)=O)CCOCC)/C1=CC=CC=C1,0.057470413386035736
-FC(F)(F)C1=CC=C(OCCCOC2=C(Cl)C=C(OC/C=C(Cl)\Cl)C=C2Cl)N=C1,0.034818667907167616
-FC(F)(F)C1=CC(NC(N(C)C)=O)=CC=C1,0.07395704796137248
-ClC1=C(C2=NN=C(C3=C(Cl)C=CC=C3)N=N2)C=CC=C1,0.05706818624978773
-ClC1=CC=C(C2=CC(F)=CC=C2NC(C3=CN(C)N=C3C(F)F)=O)C=C1Cl,0.04200781934177246
-ClC1=CC=C(CN(C)/C(C)=N/C#N)C=N1,0.07859017665904088
-OC(C2=C(Cl)C=CC=C2)(C3=CN=CN=C3)C1=CC=C(Cl)C=C1,0.05283880559178284
-NC(C1=C(Cl)C=CC=C1Cl)=O,0.09261856560930491
-O=C(N(C2=C(C)C=CC=C2C)CN1N=CC=C1)CCl,0.06336648858092589
-FC(F)(F)C1=C(F)C=CC(OC(CC)C(NCC2=CC=CC=C2)=O)=C1,0.049813316199071624
-O=C(C(CCCC4)=C4C3=O)N3C(C(F)=C2)=CC1=C2OCC(N1CC#C)=O,0.05079984353648191
-O=C1C(C3=CC=CC(C(F)(F)F)=C3)=C(NC)OC1C2=CC=CC=C2,0.055204779037407746
-NC(NCCCCCCCCNC(N)=N)=N,0.08102032708037427
-O=C(O)COC(C=CC(Cl)=C1)=C1C,0.0947069010825298
-ClC1=CC=C(OC(N2N=CN=C2)C(O)C(C)(C)C)C=C1,0.06424027322808253
-O=C(OC(C#N)C2=CC(OC3=CC=CC=C3)=C(F)C=C2)C1C(/C=C(Cl)/Cl)C(C)1C,0.044210334070631574
-CNC(O/N=C(C)\SC)=O,0.11836501403389492
-O=C1C(C2=CC=CC=C2)=NN=C(C)N1N,0.09643315995145703
-O=C(NS(C2=CC=CC=C2Cl)(=O)=O)NC1=NC(C)=NC(OC)=N1,0.05590140200157206
-FC(F)(F)C(C=C3S(C)(=O)=O)=CC=C3C(C1=C(C2CC2)ON=C1)=O,0.05566064749641608
-O=C(NC(NC2=CC(Cl)=C(OC(F)(F)C(F)C(F)(F)F)C=C2Cl)=O)C1=C(F)C=CC=C1F,0.03990998658130422
-CC(C(C(C)3C)C(OC(C1=CC=CC(OC2=CC=CC=C2)=C1)C#N)=O)3C,0.06009909138187043
-CCCN(C(N2C=CN=C2)=O)CCOC1=C(Cl)C=C(Cl)C=C1Cl,0.05707983190600125
-C[C@H]([C@@H](N(C)C)CC5)O[C@H]5O[C@@H]2[C@@H](C)C(C1=C[C@]3([H])[C@](CC[C@@]4([H])[C@]([H])3C[C@H](O[C@@H]6O[C@@H](C)[C@H](OC)[C@@H](OCC)[C@H]6OC)C4)([H])[C@@]([H])1CC(O[C@@H](CC)CCC2)=O)=O,0.028877084613265123
-O=C(CC3=CC=CC=C3)N/C(C2=C(F)C(F)=CC=C2C(F)(F)F)=N\OCC1CC1,0.053352320292409515
-O=C(NC1=CC(Cl)=C(N2N=C(C(C)(C)C)OC2=O)C=C1)N(C)C,0.06493710428214157
-O=C(OC(C#N)C2=CC(OC3=CC=CC=C3)=C(F)C=C2)C1C(/C=C(Cl)/Cl)C(C)1C,0.052499767865960584
-CC1=CC=C(C)C=C1OCC2=CC=CC=C2/C(C(NC)=O)=N/OC,0.07046793589427701
-C2(C3=CC=CO3)=NC1=CC=CC=C1N2,0.12486833177320307
-O=C1C(/C(CC)=N/OCC)=C(O)CC(C2=C(C)C=C(C)C=C2C)C1,0.06981686853252955
-S=P(OC)(OC)OC1=CC(C)=C([N+]([O-])=O)C=C1,0.08332310268057162
-CC1C(C3=CC=C(C=C3)Cl)SC(N1C(NC2CCCCC2)=O)=O,0.06546156290207059
-CCOP(OCC)(SCN1C(OC2=C1C=CC(Cl)=C2)=O)=S,0.0636200517424888
-O=C(NC(NCC)=O)/C(C#N)=N\OC,0.11857948837239812
-CCNC1=NC(NC(C)C)=NC(Cl)=N1,0.10941971287651023
-CCOC(CCN(C(C)C)SN(C)C(OC1=CC=CC2=C1OC(C)(C)C2)=O)=O,0.05846135558242613
-CC2COC(O2)(C4=CC=C(C=C4Cl)OC3=CC=C(Cl)C=C3)CN1C=NC=N1,0.05932124091140686
-CC(C)(C2=CC=C(C=C2)OC1CCCCC1OS(OCC#C)=O)C,0.06904967382858089
-O=C(C(C)(C)C)C(N2C=NC=N2)OC1=CC=C(Cl)C=C1,0.08272375649019124
-O=C(OCC)NCCOC1=CC=C(OC2=CC=CC=C2)C=C1,0.08196801536106943
-FC1=CC=CC(F)=C1C(NC(NC2=CC(Cl)=C(C(Cl)=C2F)F)=O)=O,0.06507320207279278
-ClC2=CC=C(C=N2)CN1CCN/C1=N\[N+]([O-])=O,0.0973945952590747
-CC(C)(C(C(N3C=NC=N3)OC1=CC=C(C2=CC=CC=C2)C=C1)O)C,0.07409262028018154
-CCCCN(SN(C(OC2=C1OC(C)(C)CC1=CC=C2)=O)C)CCCC,0.06569530810416269
-C1(NC2=CC=CC=C2)=CC=CC=C1,0.14773454395291782
-N#CC(C2=CC(OC3=CC=CC=C3)=CC=C2)OC(C(C(C)C)C1=CC=C(Cl)C=C1)=O,0.05953797389131243
-ClC1=CC=C(C(C(C)C)C(OC(C#N)C2=CC(OC3=CC=CC=C3)=CC=C2)=O)C=C1,0.05953797389131243
-ClC1=CC=C(C(C(C)C)C(OC(C#N)C2=CC(OC3=CC=CC=C3)=CC=C2)=O)C=C1,0.05953797389131243
-O=C2N(SC(Cl)(Cl)Cl)C(C1=CC=CC=C12)=O,0.08430066662269543
-O=C(C2C(/C=C(Cl)/Cl)C(C)2C)OCC1=CC(OC3=CC=CC=C3)=CC=C1,0.06389160712181856
-ClC1=CC=C2C(N=CC(OC3=CC=C(OC(C)C(OCCO/N=C(C)/C)=O)C=C3)=N2)=C1,0.05632150550177753
-CC(C)(C2=CC=C(C=C2)OC1CCCCC1OS(OCC#C)=O)C,0.07190296604559293
-CC(C)C1=NN(C(=O)N1N)C(=O)NC(C)(C)C,0.10485300866417636
-OC(C2(CC2)C(NC1=C(Cl)C=C(Cl)C=C1)=O)=O,0.09303171987631087
-CCOC1=CC=C(C(C)(C)COCC2=CC(OC3=CC=CC=C3)=CC=C2)C=C1,0.06773123883198195
-CC(OC(C(C1=CC=C(Br)C=C1)(C2=CC=C(Br)C=C2)O)=O)C,0.060497742776698574
-CC1=C(N2C(S3)=NN=C2)C3=CC=C1,0.1373938645607217
-CC1=CC=C(C2=CC=CC=C12)C,0.1728291127183792
-O=C1C(/C(CCC)=N\OCC)=C(O)CC(C2CSCCC2)C1,0.08603044408485085
-O=C(OC1=CC(C)=C(SC)C(C)=C1)NC,0.1242747128033579
-S=C1S[Mn+2]SC(NCCN1)=S,0.10855557507359069
-FC(F)(F)C1=CC([N+]([O-])=O)=C(N(CC(C)=C)CC)C([N+]([O-])=O)=C1,0.08701831648543702
-O=C(OCC)C(CC(OCC)=O)SP(OC)(OC)=S,0.08778355070659401
-O=C(OC(C)C)NC1=CC=CC=C1,0.16181616210899355
-IC1=CC=C(C(OC)=O)C(S([N-]C(NC2=NC(C)=NC(OC)=N2)=O)(=O)=O)=C1.[Na+],0.05611797964648073
-CO\N=C(C(OC)=O)/C1=C(CO/N=C(C)/C2=CC=CC(C(F)(F)F)=C2)C=CC=C1,0.07272797449373557
-C(C(CCl)O)O,0.27139624684320934
-ClC1=CC=CC(NC(OC(C)C)=O)=C1,0.14040948460452124
-FC(F)(F)C1=CC(NC(N(C)C)=O)=CC=C1,0.12919765885228982
-CC1=C(Cl)C(NS(C2=NN3C(N=C(OC)C=C3OC)=N2)(=O)=O)=C(Cl)C=C1,0.07172655770478076
-ClCCOC1=CC=CC=C1S(NC(NC2=NC(OC)=NC(C)=N2)=O)(=O)=O,0.07465930346752149
-O=[N+]([O-])C1=CC(C(F)(F)F)=CC([N+]([O-])=O)=C1N(CCC)CCC,0.08947770521301585
-CN1C=C(C(=N1)C(F)F)C(=O)NC2=CC=CC3=C2C4CCC3C4=C(Cl)Cl,0.07583481070072216
-O=C(NC(NCC)=O)/C(C#N)=N\OC,0.15289184711551862
-FC(C=CC=C2F)=C2C(NC(NC1=CC=C(OC(F)(F)C(F)OC(F)(F)F)C(Cl)=C1)=O)=O,0.062106180868884746
-CC1=CC=CC(=C1S(=O)(=O)NC(=O)NC2=NC(=NC(=N2)OCC(F)(F)F)N(C)C)C(=O)O,0.06396281173215432
-O=S(C1=C(C)C=CC=C1C(OC)=O)(NC(NC2=NC(OCC(F)(F)F)=NC(N(C)C)=N2)=O)=O,0.062140866929396014
-ClC(C=C2)=CC=C2CCC(C#N)(CN3C=NC=N3)C1=CC=CC=C1,0.09203781459712614
-O=C(NC1=CC(Cl)=C(Cl)C=C1)N(C)OC,0.12545469800745823
-O=C(NS(N(C(C)C)C)(=O)=O)C1=CC(N2C(C=C(C(F)(F)F)N(C)C2=O)=O)=C(F)C=C1Cl,0.06269313377509025
-O=C(C2=C(Cl)C=CC=C2Cl)NCC1=NC=C(C(F)(F)F)C=C1Cl,0.08212099927021806
-S=C(NC(OC)=O)NC(C=CC=C1)=C1NC(NC(OC)=O)=S,0.09345959256991566
-CN/C(NCC1=CN=C(Cl)S1)=N\[N+]([O-])=O,0.13016764551401042
-CC(C)(C)C1CCC2(OC(CN(CCC)CC)CO2)CC1,0.11029461661878942
-O=C(N2CCOCC2)/C=C(C3=CC=C(Cl)C=C3)/C1=CC=C(OC)C(OC)=C1,0.08766124641710438
-O=C(NC2=CC=C(C(C(F)(F)F)(F)C(F)(F)F)C=C2C)C1=CC=CC(I)=C1C(NC(C)(C)CS(C)(=O)=O)=O,0.04982487508940451
-CC1=CC(C3CC3)=NC(NC2=CC=CC=C2)=N1,0.15801924849469393
-CC(C)N2C(C1=CC=CC=C1NS(=O)2=O)=O,0.14982590230152565
-O=C(N2CCOCC2)/C=C(C3=CC=C(Cl)C=C3)/C1=CC=C(OC)C(OC)=C1,0.09281779032399287
-NS(C1=CC([N+]([O-])=O)=C(N(CCC)CCC)C([N+]([O-])=O)=C1)(=O)=O,0.10393824312956665
-CC1=CC(=C(C(=C1C(=O)C2=C(C(=CN=C2OC)Cl)C)OC)OC)OC,0.09950572862076837
-CC(C(O)=O)OC(C=CC(Cl)=C1)=C1Cl,0.15527684755838006
-COCC1=C(F)C(F)=C(COC(C2C(/C=C/C)C(C)2C)=O)C(F)=C1F,0.10573252781458294
-O=C1N(/N=C/C2=CC=CN=C2)CC(C)=NN1,0.18091653347462547
-CC(C(=O)OCC1CCCO1)OC2=CC=C(C=C2)OC3=CN=C4C=C(C=CC4=N3)Cl,0.09210345974638111
-N#CC1=C(Cl)C(Cl)=C(Cl)C(C#N)=C1Cl,0.15042627044387033
-C2(C3=CSC=N3)=NC1=CC=CC=C1N2,0.19876005527119617
-CCC(NC1=C([N+]([O-])=O)C=C(C(C)(C)C)C=C1[N+]([O-])=O)C,0.13747135609511818
-N=C(N)NCCCCCCCCCCCC.OC(C)=O,0.1461167287581588
-O=C(NC(C)(C)C#C)C1=CC(Cl)=CC(Cl)=C1,0.16593276232681306
-CC1=CC(=C(C(=C1C(=O)C2=C(C(=CN=C2OC)Cl)C)OC)OC)OC,0.11727460798675288
-CC1=C(C(=O)CC1OC(=O)C2C(C2(C)C)C=C(C)C)CC=CC=C.CC1=C(C(=O)CC1OC(=O)C2C(C2(C)C)C=C(C)C(=O)OC)CC=CC=C,0.06134969850332702
-CC(C)C(C2=CN=CN=C2)(O)C1=CC=C(OC(F)(F)F)C=C1,0.13801406108477293
-CC1OC(C)OC(C)OC(C)O1,0.249701719945447
-O=C(NCC#N)C1=CN=CC=C1C(F)(F)F,0.19244308898713228
-FC(F)C1=NC(C(F)(F)F)=C(C2=NCCS2)C(CC(C)C)=C1C(OC)=O,0.11151045388522976
-FC(F)(F)C1=CC=CC(OCC2=C(/C(C(OC)=O)=C\OC)C=CC=C2)=N1,0.1252316956521325
-O=C(N(COCC)C1=C(CC)C=CC=C1C)CCl,0.17607780933998252
-O=C(N(CC)CC)C(OC1=CC=CC2=C1C=CC=C2)C,0.17526912017369997
-CC12CC(C(N(C3=CC(Cl)=CC(Cl)=C3)C2=O)=O)1C,0.16893203350457175
-O=C(NC)OC(C=CC=C1)=C1OC(C)C,0.22939978025412716
-O=C(NN(C(C)(C)C)C(C2=CC(C)=CC(C)=C2)=O)C1=CC=C(C=C1)CC,0.13731668655832788
-O=C1C(Cl)=C(N)C=NN1C2=CC=CC=C2,0.2255879747532767
-CC1(C(C1C(=O)OC(C#N)C2=CC(=CC=C2)OC3=CC=CC=C3)C=C(Cl)Cl)C,0.12010651237688001
-O=S(C(C)=C(C)S(CC1)(=O)=O)1=O,0.23778815168220852
-O=S(NC1=NN2C(C(OC)=CN=C2OC)=N1)(C3=C(OCC(F)F)C=CC=C3C(F)(F)F)=O,0.1034404543369562
-O=C(C2C(/C=C(Cl)/Cl)C(C)2C)OCC1=CC(OC3=CC=CC=C3)=CC=C1,0.12778321424363712
-ClC1=C(CC(C3(CC3)Cl)(O)CN2N=CNC2=S)C=CC=C1,0.1452393166315865
-CC(C)NC1=NC(=NC(=N1)Cl)NC(C)C,0.22201922216305578
-FC(F)(F)C1=CC(Cl)=C(OC2=CC=C([N+]([O-])=O)C(OCC)=C2)C=C1,0.14653013191720715
-S=C(NC(OC)=O)NC(C=CC=C1)=C1NC(NC(OC)=O)=S,0.15479245019392282
-O=C(NCC)C(OC(NC1=CC=CC=C1)=O)C,0.23278744254805916
-CC(C)(C(CCC1=CC=C(Cl)C=C1)(CN2C=NC=N2)O)C,0.17867678986550448
-O=C(O)C1=CC=CC=C1S(=O)(NC(N(C2=NC(OC)=NC(C)=N2)C)=O)=O,0.14421924681891674
-O=C1C(Cl)=C(N)C=NN1C2=CC=CC=C2,0.270705569703932
-O=CC1=CC=CO1,0.624453213155231
-O=C(NC)OC1=C(C=CC=C2)C2=CC=C1,0.2991731924668564
-ClC1=CC(Cl)=C(OCC(O)=O)C=C1,0.28049546719325014
-NC1=C(C(OC2=CC=CC=C2)=CC=C1[N+]([O-])=O)Cl,0.23425888009292972
-O=C(N(C1=C(C)C=CC=C1CC)COC(C)C)CCl,0.22199225860138957
-CCCCCCCCCC[N+](C)(C)CCCCCCCCCC.[Cl-],0.1767583631976715
-C/C=C/C(OC1=C([N+]([O-])=O)C=C([N+]([O-])=O)C=C1C(C)CCCCCC)=O,0.17563456769307506
-O=C(N(COCC)C1=C(CC)C=CC=C1C)CCl,0.24799169923196304
-[S]C(NCCNC(S[Mn])=S)=S,0.2525424903682367
-O=C(C2=CN(C)N=C2C(F)F)NC1=C(C3CC3C4CC4)C=CC=C1,0.2021971466240455
-CS(NC1=C(Cl)C=C(Cl)C(N2C(N(C(F)F)C(C)=N2)=O)=C1)(=O)=O,0.1730416993562668
-O=C(N(COCC)C1=C(CC)C=CC=C1C)CCl,0.2557761861991325
-CC(CCCCCC)C1=C(OC(/C=C\C)=O)C([N+]([O-])=O)=CC([N+]([O-])=O)=C1,0.19484459853450517
-CS(C(C=C2Cl)=CC=C2C(C(C(CCC1)=O)C1=O)=O)(=O)=O,0.2189994026791292
-O=C1C(C2=CC=CC=C2)=NN=C(C)N1N,0.36891864539658303
-ClC1=CC(Cl)=C(OCC(O)=O)C=C1,0.33930903289506065
-C[N+](C)(C)CCCl.[Cl-],0.47447507557122687
-CC1(C(C1C(=O)OC(C#N)C2=CC(=CC=C2)OC3=CC=CC=C3)C=C(Cl)Cl)C,0.18015976856532
-O=C(NC2=CC=C(C(C(F)(F)F)(F)C(F)(F)F)C=C2C)C1=CC=CC(I)=C1C(NC(C)(C)CS(C)(=O)=O)=O,0.115769562707734
-FC(C=C3)=CC=C3OC1=CC=NC2=C1C(Cl)=CC(Cl)=C2,0.25962686686321285
-O=C(OC)/C(C1=CC=CC=C1OC2=CC(OC3=CC=CC=C3C#N)=NC=N2)=C/OC,0.20427010160523304
-O=C(C2=CN(C)N=C2C(F)(F)F)NC1=C(C(C)CC(C)C)SC=C1,0.23093421710838027
-BrC1=NN(C3=C(Cl)C=CC=N3)C(C(NC2=C(C)C=C(C#N)C=C2C(NC)=O)=O)=C1,0.17901230859828976
-O=C1C(/C(CC)=N\OC/C=C/Cl)=C(O)CC(CC(C)SCC)C1,0.2389478027971563
-O=C(NC2=CC(OC(C)C)=CC=C2)C1=C(C(F)(F)F)C=CC=C1,0.2690918752347788
-O=C(N(C2=C(C)C=CC=C2C)CN1N=CC=C1)CCl,0.31323206744613685
-O=S(C1=C(CCC(F)(F)F)C=CC=C1)(NC(NC2=NC(C)=NC(OC)=N2)=O)=O,0.2098341392275743
-CN(C)S(N(SC(F)(Cl)Cl)C1=CC=C(C)C=C1)(=O)=O,0.25917417547047744
-ClC1=CC(NC(NC2=CC=CC=C2)=O)=CC=N1,0.37548404132262436
-CCCC1COC(C2=CC=C(Cl)C=C2Cl)(CN3N=CN=C3)O1,0.2805209905967611
-CSC(C1=C2C(N=NS2)=CC=C1)=O,0.4608228380460223
-ClC(Cl)(Cl)SN(C1=O)C(C2C1CC=CC2)=O,0.3260262207586085
-OC1=NOC(C)=C1,0.9991119005328597
-ClC(Cl)(Cl)SN(C1=O)C(C2C1CC=CC2)=O,0.3326798171006209
-ClC1=CC=CC=C1CN2C(C(C)(C)CO2)=O,0.41719152837532353
-O=C(N(CC)CC)C(OC1=CC=CC2=C1C=CC=C2)C,0.36852210915226874
-O=C(C2=CN(C)N=C2C(F)(F)F)NC1=C(C(C)CC(C)C)SC=C1,0.2782339965161208
-CCC1=C(C2=C(OC(C(C)(C)C)=O)N(CCOCC3)N3C2=O)C(CC)=CC(C)=C1,0.24968092026794356
-CCCCOCCOCCOCC1=C(CCC)C=C(OCO2)C2=C1,0.29547465787728056
-CN(C)S(N(SC(F)(Cl)Cl)C1=CC=C(C)C=C1)(=O)=O,0.2879713060783083
-ClC(Cl)(Cl)C(N1CCN(C(NC=O)C(Cl)(Cl)Cl)CC1)NC=O,0.22990526799413355
-CCC(C)N1C(=O)C(=C(NC1=O)C)Br,0.39446112244793224
-CCCCC(C#N)(C2=CC=C(C=C2)Cl)CN1C=NC=N1,0.3670674304254852
-CN(C)S(N(C=N3)N=C3S(N2C(C)=C(Br)C1=CC=C(F)C=C12)(=O)=O)(=O)=O,0.24018572189384213
-FC(O3)(F)OC(C3=CC=C2)=C2C1=CNC=C1C#N,0.4553054263341003
-O=C(OCCC)NCCCN(C)C.[H]Cl,0.5072793699625824
-O=C(C(C)(C)C)C(N2C=NC=N2)OC1=CC=C(Cl)C=C1,0.3880867710275115
-CCCCCCCCSC(OC1=C(C2=CC=CC=C2)N=NC(Cl)=C1)=O,0.3034972489425892
-CC1C(CC(C(O1)OC2C(C(C(C(C2O)O)O)O)O)N)N=C(C(=O)O)N,0.3057757345866624
-[O-][N+](C1=CC(Cl)=C(N)C(Cl)=C1)=O,0.5651787298028309
-O=C2NC1=C(C(N2C3CCCCC3)=O)CCC1,0.503640251987437
-O=C(OC)C1=CC=CC=C1S(NC(NC2=NC(C)=NC(OC)=N2)=O)(=O)=O,0.31203800675365617
-O=C(C2=CC=CN=C2OC3=CC=CC(C(F)(F)F)=C3)NC1=CC=C(F)C=C1F,0.3033262936121485
-ClC1=CC=CC(C2=CNC=C2C#N)=C1Cl,0.5061481392686851
-CC(CCCCCC)C1=C(OC(/C=C\C)=O)C([N+]([O-])=O)=CC([N+]([O-])=O)=C1,0.3315102548955885
-O=S(C1=C(S(CC)(=O)=O)C=CC=N1)(NC(NC2=NC(OC)=CC(OC)=N2)=O)=O,0.2804534946915948
-FC1=CC=CC(F)=C1C(NC(NC2=CC(Cl)=C(C(Cl)=C2F)F)=O)=O,0.32143014109471235
-C[N+](C)(C)CCCl.[Cl-],0.7907917926187115
-O=C(NC(NC2=CC=C(Cl)C=C2)=O)C1=C(F)C=CC=C1F,0.4023390123323988
-CN(C)C(=S)[S-].CN(C)C(=S)[S-].CN(C)C(=S)[S-].[Fe+3],0.30012414094866885
-O=C(N(C1=C(CC)C=CC=C1CC)COC)CCl,0.4670695574071115
-ClC(C(SCC(O)=O)=C3)=CC(F)=C3\N=C2/SC(N1CCCCN12)=O,0.33345926123075403
-O=P(O)(O)CCCl,0.9066120392542251
-C[N+](C)(C)CCCl.[Cl-],0.860381470369158
-CC(C2)OC(C)CN2C1CCCCCCCCCCC1,0.48316627385722294
-CC(COC2=CC=C(OC3=CC=CC=C3)C=C2)OC1=CC=CC=N1,0.4315900691721648
-[O-][N+](C(C(Cl)=C(Cl)C(Cl)=C1Cl)=C1Cl)=O,0.4774244272684517
-O=C(OCC)C(CC(OCC)=O)SP(OC)(OC)=S,0.43286371555320496
-CC1=CC(=CC(=C1)C(=O)N(C(C)(C)C)NC(=O)C2=C(C3=C(C=C2)OCCC3)C)C,0.3678012132205545
-ClC1=CC=C(Cl)C(C(O)=O)=N1,0.7812519531298828
-ClC1=CC=C(Cl)C(C(O)=O)=N1,0.7812519531298828
-O=C(N(C1=C(C)C=CC=C1CC)C(C)COC)CCl,0.5285529966699751
-O=C(C(C(C)3C)C3/C=C(C)/C)OCC1=CC=CC(OC2=CC=CC=C2)=C1,0.42802021191337764
-O=C(OCCC)NCCCN(C)C.[H]Cl,0.6674728552139242
-ClC1=NC=C(CN2/C(N(C)COC2)=N/[N+]([O-])=O)S1,0.5313410671453993
-CCCCC1=C(OS(=O)(N(C)C)=O)N=C(NCC)N=C1C,0.4930161419173511
-NC1=NC(N)=NC(NC2CC2)=N1,0.9387196585948812
-COCCN(C1=C(C)C=CC=C1C)C(CCl)=O,0.6139034987494355
-ClC(Cl)(Cl)C(N1CCN(C(NC=O)C(Cl)(Cl)Cl)CC1)NC=O,0.36784842879061364
-FC(F)(F)C1=CC=CC(OCC2=C(/C(C(OC)=O)=C\OC)C=CC=C2)=N1,0.4410333629488144
-O=C(OC)NS(C1=CC=C(N)C=C1)(=O)=O,0.7817895162025876
-ClC1=CC=C(CN(C(NC3=CC=CC=C3)=O)C2CCCC2)C=C1,0.5473855891134007
-CO\N=C(C4=NOCCO4)/C(C=CC=C3)=C3OC1=C(F)C(OC2=CC=CC=C2Cl)=NC=N1,0.39448424715427566
-COC(C1=C([N+]([O-])=O)C=CC(OC2=C(Cl)C=C(Cl)C=C2)=C1)=O,0.5465743293153008
-FC1=CC=CC(F)=C1C(OC3)=NC3C2=CC=C(C(C)(C)C)C=C2OCC,0.5202976892967504
-CC1=CC=C(C)C=C1C(C(N3)=O)=C(OC(OCC)=O)C23CCC(OC)CC2,0.5061016308843888
-CC1=CC=C(C)C=C1C(C(N3)=O)=C(OC(OCC)=O)C23CCC(OC)CC2,0.5061016308843888
-O=C(NC3=CC=C(OC(F)(F)F)C=C3)N/N=C(C2=CC=C(C(F)(F)F)C=C2)/CC1=CC=C(C#N)C=C1,0.394944816927872
-OC1=C(C2=CC=CC=C2)C=CC=C1,1.1750384237564568
-O=C(O)C1=NC(Cl)=C(Cl)C(N)=C1Cl,0.8282972172278201
-O=C(N(C1=C(C)C=CC=C1CC)COC(C)C)CCl,0.7047373288933002
-OC1(CN3N=CN=C3)C(C)(C)CC/C1=C\C2=CC=C(Cl)C=C2,0.6406279100538178
-CCOC1=NC(=NC(=N1)NC)NC(=O)NS(=O)(=O)C2=CC=CC=C2C(=O)OC,0.5116896474609399
-FC1=C(NC(NC(C3=C(F)C=CC=C3F)=O)=O)C=CC(OC2=C(Cl)C=C(C(F)(F)F)C=C2)=C1,0.4460202371248177
-CCOC1=CC=C(C=C1OCC)NC(OC(C)C)=O,0.8241033622809132
-CC1=NC(NC2=CC=CC=C2)=NC(C)=C1,1.1091497729605546
-O=C(O)CC2=CC=CC1=CC=CC=C12,1.205650068257516
-CP(CCC(N)C(O)=O)(O)=O,1.2637552440957067
-FC1=C(NC(NC(C3=C(F)C=CC=C3F)=O)=O)C=CC(OC2=C(Cl)C=C(C(F)(F)F)C=C2)=C1,0.4705718098105875
-O=S(NC(N1N=C(OC)N(C)C1=O)=O)(C2=C(C)SC=C2C(OC)=O)=O,0.5993972829782238
-FC1=CC=C2C(SC(C(C)NC(C(C(C)C)NC(OC(C)C)=O)=O)=N2)=C1,0.6543197874203039
-O=C(OC)NC2=NC1=CC=CC=C1N2,1.3076226134187396
-CC(C)OC(/C=C(C)/C=C/CC(C)CCCC(C)(C)OC)=O,0.8052269925229198
-O=C(O)C1=NC(Cl)=C(Cl)C(N)=C1Cl,1.0353715215347752
-BrC1=C(C)C(C(C2=C(C)C=C(OC)C(OC)=C2OC)=O)=C(OC)C=C1,0.6352758309016929
-C[N+]1(C)CCCCC1.[Cl-],1.790706021930536
-CC1(C(NC2=CC=C(O)C(Cl)=C2Cl)=O)CCCCC1,0.9662594125910484
-OC(CNCP(O)(O)=O)=O,1.7743806406081915
-COC1=CC(OC)=NC(NC(NS(CC2=CC=CC=C2C(O)=O)(=O)=O)=O)=N1,0.7795645307119917
-CCOC1=NC(=CC2=NC(=NN21)S(=O)(=O)NC3=C(C=CC=C3Cl)C(=O)OC)F,0.7561469746838736
-COC(/C(C(C=CC=C2)=C2COC1=C(C)C=CC=C1)=N/OC)=O,1.1967534090558043
-O=C/1CC(C(O)=O)CC(C1=C(C2CC2)\O)=O,1.752821172367082
-CC1=C(OC)C=CC=C1C(NN(C(C)(C)C)C(C2=CC(C)=CC(C)=C2)=O)=O,1.1154252951100516
-O=C([O-])C(CC([O-])=C1C(CC)=O)CC1=O.[Ca+2],1.874040503249802
-ClC(C=C2)=CC=C2N1C(CC)=C(C(O)=O)C(C=N1)=O,1.6864553664875628
-C/C(C1=NC=CC=C1C(O)=O)=N\NC(NC2=CC(F)=CC(F)=C2)=O,1.4120001283962829
-O=[N+]([O-])C1=CC(C(F)(F)F)=CC([N+]([O-])=O)=C1N(CCC)CCC,1.4316432834082535
-CN(NC(CCC(O)=O)=O)C,3.0342556221759884
-O=S(N(C)S(C)(=O)=O)(NC(NC1=NC(OC)=CC(OC)=N1)=O)=O,1.3411855059279478
-FC1=CC=CC(F)=C1NS(C3=NN2C(OC)=NC=C(F)C2=N3)(=O)=O,1.391657397996453
-OC(COC1=C(C(N)=C(C(F)=N1)Cl)Cl)=O,1.9605490478397496
-O=C(O)C2=C(N=C(C=CC=C3)C3=C2)C(N1)=NC(C)(C(C)C)C1=O,1.605986191473768
-OC1=CC=C(O)N=N1,4.460830164062196
-CN(C)C(C(C=CC=C2)=C2NS(NC(NC1=NC(OC)=CC(OC)=N1)=O)(=O)=O)=O,1.1780461209768547
-O=S(C1=C(C(OC)=O)C=CC=C1)([N-]C(N2N=C(OCCC)N(C)C2=O)=O)=O.[Na+],1.1894202967675005
-COC1=CC=CC(OC)=C1C(NC2=CC(C(CC)(C)CC)=NO2)=O,1.5854670852219546
-OC1=C(C2=CC=CC=C2)C=CC=C1,3.119727015073393
-OC(CNCP(O)(O)=O)=O,3.3121771958019575
-COC1=CC=CC(OC)=C1C(NC2=CC(C(CC)(C)CC)=NO2)=O,1.8050858655278421
-O=C(OCCC)NCCCN(C)C.[H]Cl,3.0347765817059753
-C[N+]1(C)CCCCC1.[Cl-],4.570309399255547
-ClC1=CC=C(C=C(C)C=N2)C2=C1C(O)=O,3.3387517363764943
-ClC1=C(CC(C3(CC3)Cl)(O)CN2N=CNC2=S)C=CC=C1,2.178589749473798
-COC(/C(C(C=CC=C2)=C2COC1=C(C)C=CC=C1)=N/OC)=O,2.4002085592886893
-OC(CNCP(O)(O)=O)=O,5.559726007239
-[O][N](N=C(NC)NCC1COCC1)=O,4.900819965040488
-OC(C1=NC(Cl)=CC(N)=C1Cl)=O,4.830587434212229
-FC(F)(F)OC(C=CC=C2)=C2S(NC(N1C(N(C)C(OC)=N1)=O)=O)(=O)=O,2.5233463155295692
-OC1=C(C2=CC=CC=C2)C=CC=C1,5.875192118782284
-O=S(NC1=NN2C(N=C(OC)C=C2OC)=N1)(C3=C(OC)N=CC=C3C(F)(F)F)=O,2.302288500094267
-CC(C)OC(NC(C(C)C)C(NC(C1=CC=C(Cl)C=C1)CC(OC)=O)=O)=O,2.5070128670931195
-OC(CNCP(O)(O)=O)=O,7.180326992327815
-O=S(C1=C(S(CC)(=O)=O)N=C2N1C=CC=C2)(NC(NC3=NC(OC)=CC(OC)=N3)=O)=O,2.7556956072872962
-[O-]P(OCC)([H])=O.[O-]P(OCC)([H])=O.[O-]P(OCC)([H])=O.[Al+3],3.6853523538557287
-O=S(C1=C(C(N(C)C)=O)C=CC=N1)(NC(NC2=NC(OC)=CC(OC)=N2)=O)=O,5.08765706618306
diff --git a/paper/figure/corr-1.png b/paper/figure/corr-1.png
deleted file mode 100644
index 8c26052..0000000
--- a/paper/figure/corr-1.png
+++ /dev/null
Binary files differ
diff --git a/paper/figure/crossvalidation.pdf b/paper/figure/crossvalidation.pdf
new file mode 100644
index 0000000..19a9913
--- /dev/null
+++ b/paper/figure/crossvalidation.pdf
Binary files differ
diff --git a/paper/loael-dataset-correlation.pdf b/paper/figure/functional-groups.pdf
index 33dcfdf..65ca730 100644
--- a/paper/loael-dataset-correlation.pdf
+++ b/paper/figure/functional-groups.pdf
Binary files differ
diff --git a/paper/figure/predictions-1.png b/paper/figure/predictions-1.png
deleted file mode 100644
index 759c327..0000000
--- a/paper/figure/predictions-1.png
+++ /dev/null
Binary files differ
diff --git a/paper/figure/test-correlation.pdf b/paper/figure/test-correlation.pdf
new file mode 100644
index 0000000..835c927
--- /dev/null
+++ b/paper/figure/test-correlation.pdf
Binary files differ
diff --git a/paper/figure/test-prediction.pdf b/paper/figure/test-prediction.pdf
new file mode 100644
index 0000000..f02330e
--- /dev/null
+++ b/paper/figure/test-prediction.pdf
Binary files differ
diff --git a/paper/figure/unnamed-chunk-2-1.png b/paper/figure/unnamed-chunk-2-1.png
deleted file mode 100644
index 8c26052..0000000
--- a/paper/figure/unnamed-chunk-2-1.png
+++ /dev/null
Binary files differ
diff --git a/paper/figure/unnamed-chunk-5-1.png b/paper/figure/unnamed-chunk-5-1.png
deleted file mode 100644
index 50699fd..0000000
--- a/paper/figure/unnamed-chunk-5-1.png
+++ /dev/null
Binary files differ
diff --git a/paper/functional-groups.R b/paper/functional-groups.R
index 765817a..47cec0b 100755
--- a/paper/functional-groups.R
+++ b/paper/functional-groups.R
@@ -1,7 +1,8 @@
library("ggplot2")
-#functional_groups <- read.csv("functional-groups-reduced.csv",header=F,row.names = 1)
-functional_groups <- read.csv("functional-groups-reduced4R.csv",header=F)
-print(functional_groups)
-ggplot(functional_groups,aes(x=V1,y=V2,fill=V3),legendTitle="Dataset") + geom_bar(stat="identity", position=position_dodge()) + xlab("") + ylab("") + coord_flip()
-ggsave("functional-groups.pdf")
+functional_groups <- read.csv("data/functional-groups-reduced4R.csv",header=F)
+
+names(functional_groups) = c("V1","V2","Dataset")
+
+ggplot(functional_groups,aes(x=V1,y=V2,fill=Dataset)) + geom_bar(stat="identity", position=position_dodge()) + xlab("") + ylab("") + coord_flip()
+ggsave("figure/functional-groups.pdf")
diff --git a/paper/loael-dataset-comparison-all-compounds.pdf b/paper/loael-dataset-comparison-all-compounds.pdf
deleted file mode 100644
index ee34390..0000000
--- a/paper/loael-dataset-comparison-all-compounds.pdf
+++ /dev/null
Binary files differ
diff --git a/paper/loael-dataset-comparison-common-compounds.pdf b/paper/loael-dataset-comparison-common-compounds.pdf
deleted file mode 100644
index 4dba051..0000000
--- a/paper/loael-dataset-comparison-common-compounds.pdf
+++ /dev/null
Binary files differ
diff --git a/paper/loael-dataset-comparison.rb b/paper/loael-dataset-comparison.rb
deleted file mode 100644
index 5850236..0000000
--- a/paper/loael-dataset-comparison.rb
+++ /dev/null
@@ -1,75 +0,0 @@
-require_relative '../../lazar/lib/lazar'
-include OpenTox
-
-old = Dataset.from_csv_file File.join(File.dirname(__FILE__),"..","regression","LOAEL_mg_corrected_smiles_mmol.csv")
-new = Dataset.from_csv_file File.join(File.dirname(__FILE__),"..","regression","swissRat_chron_LOAEL_mmol.csv")
-
-combined_compounds = old.compound_ids & new.compound_ids
-
-compound_vector = []
-value_vector = []
-dataset_vector = []
-
-old_median = []
-new_median = []
-
-errors = []
-combined_compounds.each do |cid|
- c = Compound.find cid
- old_values = old.values(c,old.features.first)
- old_median << -Math.log(old_values.mean)
- old_values.each do |v|
- compound_vector << c.smiles
- value_vector << -Math.log(v.to_f)
- dataset_vector << old.name
- end
- new_values = new.values(c,new.features.first)
- new_median << -Math.log(new_values.mean)
- new_values.each do |v|
- compound_vector << c.smiles
- value_vector << -Math.log(v)
- dataset_vector << new.name
- end
-end
-old_median.each_index do |i|
- errors[i] = (old_median[i] - new_median[i]).abs unless old_median[i] == new_median[i]
-end
-rmse = 0
-mae = 0
-errors.compact.each do |e|
- rmse += e**2
- mae += e
-end
-rmse = Math.sqrt(rmse/errors.size)
-mae = mae/errors.size
-
-=begin
-R.assign "smi", compound_vector
-R.assign "values", value_vector
-R.assign "dataset", dataset_vector
-R.eval "df <- data.frame(factor(smi),values,factor(dataset))"
-R.eval "df$smi <- reorder(df$factor.smi,df$values)"
-R.eval "img <- ggplot(df, aes(smi,values,ymin = min(values), ymax=max(values),color=dataset))"
-R.eval "img <- img + ylab('-log(LOAEL mg/kg_bw/day)') + xlab('Compound') + theme(axis.text.x = element_blank())"
-R.eval "img <- img + geom_point()"
-
-R.eval "ggsave(file='/home/ch/opentox/lazar-nestec-data/loael-dataset-comparison-mmol_kg_day.svg', plot=img,width=12, height=8)"
-=end
-#img <- ggplot(data, aes(SMILES,LOAEL,ymin = min(LOAEL), ymax=max(LOAEL),color=Dataset))
-
-#img <- img + ylab('-log(LOAEL mg/kg_bw/day)') + xlab('Compound') + theme(axis.text.x = element_blank())
-
-#img <- img + geom_point()
-
-#print(img)
-
-R.assign "Mazzatorta", old_median
-R.assign "SwissFederalOffice", new_median
-R.eval "df <- data.frame(Mazzatorta,SwissFederalOffice)"
-R.eval "ggplot(df, aes(Mazzatorta,SwissFederalOffice)) + geom_point() + geom_abline(intercept=0.0) "
-R.eval "ggsave(file='/home/ch/src/lazar-nestec-data/paper/loael-dataset-correlation.pdf')"
-
-puts "Correlation Mazzatorta/SwissFederalOffice:"
-puts "\tr^2: #{R.eval("cor(Mazzatorta,SwissFederalOffice,use='complete')").to_f**2}"
-puts "\tRMSE: #{rmse}"
-puts "\tMAE: #{mae}"
diff --git a/paper/loael.Rmd b/paper/loael.Rmd
index a94e88a..29456a6 100644
--- a/paper/loael.Rmd
+++ b/paper/loael.Rmd
@@ -8,9 +8,8 @@ keywords: (Q)SAR, read-across, LOAEL
date: \today
abstract: " "
documentclass: achemso
-bibliography: references.bib
+bibliography: references.bibtex
bibliographystyle: achemso
-biblio-style: achemso
output:
pdf_document:
fig_caption: yes
@@ -21,14 +20,36 @@ Introduction
Christoph + Elena + Benoit
-The main objectives of this study are
+The quality and reproducibility of (Q)SAR and read-across predictions is a controversial topic in the toxicological risk-assessment community. Although model predictions can be validated with various procedures it is rarely possible to put the results into the context of experimental variability, because replicate experiments are rarely available.
-- to investigate the experimental variability of LOAEL data
+With missing information about the variability of experimental toxicity data it is hard to judge the performance of predictive models and it is tempting for model developments to use aggressive model optimisation methods that lead to impressive validation results, but also to overfitted models with little practical relevance.
-- develop predictive model for lowest observed effect levels
+In this study we intent to compare model predictions with experimental variability with chronic oral rat lowest adverse effect levels (LOAEL) as toxicity endpoint.
+We are using two datasets, one from [@mazzatorta08] (*Mazzatorta* dataset) and one from the Swiss Federal Office of TODO (*Swiss Federal Office* dataset).
-- compare the performance of model predictions with experimental
- variability
+Elena: do you have a reference and the name of the department?
+
+```{r echo=F}
+t = read.csv("data/test.csv")
+```
+
+`r length(unique(t$SMILES))` compounds are common in both datasets and we use them as a test set in our investigation. For this test set we will
+
+- compare the structural diversity of both datasets
+- compare the LOAEL values in both datasets
+- build prediction models based on the Mazzatorta, Swiss Federal Office datasets and a combination of both
+- predict LOAELs of the training set
+- compare predictions with experimental variability
+
+With this investigation we also want to support the idea of reproducible research, by providing all datasets and programs that have been used to generate this manuscript under a TODO license.
+
+A self-contained docker image with all program dependencies required for the reproduction of these results is available from TODO.
+
+Source code and datasets for the reproduction of this manuscript can be downloaded from the GitHub repository TODO. The lazar framework [@Maunz2013] is also available under a GPL License from https://github.com/opentox/lazar.
+
+TODO: github tags
+
+Elena: please check if this is publication strategy is ok for the Swiss Federal Office
Materials and Methods
=====================
@@ -36,69 +57,87 @@ Materials and Methods
Datasets
--------
+```{r echo=F}
+m = read.csv("data/mazzatorta.csv",header=T)
+s = read.csv("data/swiss.csv",header=T)
+t = read.csv("data/test.csv",header=T)
+c = read.csv("data/combined.csv",header=T)
+```
+
### Mazzatorta dataset
-Just referred to the paper 2008.
+The first dataset (*Mazzatorta* dataset for further reference) originates from
+the publication of [@mazzatorta08]. It contains chronic (> 180 days) lowest
+observed effect levels (LOAEL) for rats (*Rattus norvegicus*) after oral
+(gavage, diet, drinking water) administration. The Mazzatorta dataset consists
+of `r length(m$SMILES)` LOAEL values for `r length(unique(m$SMILES))` unique
+chemical structures.
### Swiss Federal Office dataset
Elena + Swiss Federal Office contribution (input)
-Only rat LOAEL values were used for the current investigation, because
-they correspond directly to the Mazzatorta dataset.
+The Swiss Federal Office dataset consists of `r length(s$SMILES)` LOAEL values
+for `r length(unique(s$SMILES))` unique chemical structures.
### Preprocessing
-Christoph
+Chemical structures in both datasets were initially represented as SMILES strings
+[@doi:10.1021/ci00057a005]. Syntactically incorrect and missing SMILES were
+generated from other identifiers (e.g names, CAS numbers). Unique smiles from the OpenBabel library [@OBoyle2011] were used for the identification of duplicated structures.
+
+Studies with undefined or empty LOAEL entries were removed from the datasets. LOAEL values were converted to mmol/kg_bw/day units. For prediction, validation and visualisation purposes -log10 transformations are used.
-Chemical structures in both datasets are represented as SMILES strings
-(Weininger 1988). Syntactically incorrect and missing SMILES were
-generated from other identifiers (e.g names, CAS numbers) when possible.
-Studies with undefined (“0”) or empty LOAEL entries were removed for
-this study.
+David: please check if we have missed something
+
+### Derived datasets
+
+Two derived datasets were obtained from the original datasets:
+
+The *test* dataset contains data of compounds that occur in both datasets. Exact duplications of LOAEL values were removed, because it is very likely that they originate from the same study.
+The test dataset has `r length(t$SMILES)` LOAEL values for `r length(unique(t$SMILES))` unique chemical structures.
+
+The *combined* dataset is the union of the Mazzatorta and the Swiss Federal Office dataset and it is used to build predictive models. Exact LOAEL duplications were removed, as for the test dataset.
+The combined dataset has `r length(c$SMILES)` LOAEL values for `r length(unique(c$SMILES))` unique chemical structures.
Algorithms
----------
-Christoph
-
-For this study we are using the modular lazar (*la*zy *s*tructure
-*a*ctivity *r*elationships) framework (Maunz et al. 2013) for model
+In this study we are using the modular lazar (*la*zy *s*tructure
+*a*ctivity *r*elationships) framework [@Maunz2013] for model
development and validation.
lazar follows the following basic workflow: For a given chemical
-structure it searches in a database for similar structures (neighbors)
-with experimental data, builds a local (Q)SAR model with these neighbors
-and uses this model to predict the unknown activity of the query
-compound. This procedure resembles an automated version of *read across*
+structure lazar
+
+- searches in a database for similar structures (*neighbors*)
+with experimental data,
+- builds a local QSAR model with these neighbors
+and
+- uses this model to predict the unknown activity of the query
+compound.
+
+This procedure resembles an automated version of *read across*
predictions in toxicology, in machine learning terms it would be
classified as a *k-nearest-neighbor* algorithm.
Apart from this basic workflow lazar is completely modular and allows
-the researcher to use any algorithm for neighbor identification and
-local (Q)SAR modelling. Within this study we are using the following
+the researcher to use any algorithm for similarity searches and
+local QSAR modelling. Within this study we are using the following
algorithms:
### Neighbor identification
-Christoph
-
-Similarity calculations are based on MolPrint2D fingerprints (Bender et
-al. 2004) from the OpenBabel chemoinformatics library (OBoyle et al.
-2011).
+Similarity calculations are based on MolPrint2D fingerprints [@doi:10.1021/ci034207y] from the OpenBabel chemoinformatics library [@OBoyle2011].
The MolPrint2D fingerprint uses atom environments as molecular
representation, which resemble basically the chemical concept of
functional groups. For each atom in a molecule it represents the
-chemical environment with the atom types of connected atoms.
+chemical environment using the atom types of connected atoms.
-The main advantage of MolPrint2D fingerprints over fingerprints with
-predefined substructures (such as OpenBabel FP3, FP4 or MACCs
-fingerprints) is that it may capture substructures of toxicological
-relevance that are not included in predefined substructure lists.
+MolPrint2D fingerprints are generated dynamically from chemical structures and do not rely on predefined lists of fragments (such as OpenBabel FP3, FP4 or MACCs fingerprints or lists of toxocophores/toxicophobes). This has the advantage the they may capture substructures of toxicological relevance that are not included in other fingerprints.
Preliminary experiments have shown that predictions with MolPrint2D
-fingerprints are indeed more accurate than fingerprints with predefined
-substructures.
+fingerprints are indeed more accurate than other OpenBabel fingerprints.
From MolPrint2D fingerprints we can construct a feature vector with all
atom environments of a compound, which can be used to calculate chemical
@@ -106,27 +145,38 @@ similarities.
[//]: # https://openbabel.org/docs/dev/FileFormats/MolPrint2D_format.html#molprint2d-format
-The chemical similarity between two compounds is expressed as the
-proportion between atom environments common in both structures and the
-total number of atom environments (Jaccard/Tanimoto index, [@eq:jaccard]).
+The chemical similarity between two compounds A and B is expressed as the
+proportion between atom environments common in both structures $A \cap B$ and the
+total number of atom environments $A \cup B$ (Jaccard/Tanimoto index, [@eq:jaccard]).
$$ sim = \frac{|A \cap B|}{|A \cup B|} $$ {#eq:jaccard}
-$A$ atom environments of compound A, $B$ atom environments of compound B.
+A threshold of $sim < 0.1$ is used for the identification of neighbors for local QSAR models.
+Compounds with the same structure as the query structure are eliminated from the neighbors to obtain an unbiased prediction.
-### Local (Q)SAR models
+### Local QSAR models and predictions
-Christoph
+Only similar compounds (*neighbors*) are used for local QSAR models.
+In this investigation we are using a weighted partial least squares regression (PLS) algorithm for the prediction of quantitative properties.
+First all fingerprint features with identical values across all neighbors are removed.
+The reamining set of features is used as descriptors for creating a local weighted PLS model with atom environments as descriptors and model similarities as weights. The `plsr` function of the `pls` R package [@pls] is used for this purpose.
+Finally the local PLS model is applied to predict the activity of the query compound.
-As soon as neighbors for a query compound have been identified, we can
-use their experimental LOAEL values to predict the activity of the
-untested compound. In this case we are using the weighted mean of the
+If PLS modelling or prediction fails, the program resorts to using the weighted mean of the
neighbors LOAEL values, where the contribution of each neighbor is
weighted by its similarity to the query compound.
### Validation
-Christoph
+Two types of validations are used within this study:
+
+For the comparison of experimental variability with predictive accuracies we are using a test set of compounds that occur in both datasets. The *Mazzatorta*, *Swiss Federal Office* and *combined* datasets are used as training data for read across predictions. In order to obtain unbiased predictions *all* information from the test compound is removed from the training set prior to predictions. This procedure is hardcoded into the prediction algorithm in order to prevent validation errors.
+
+TODO: treatment of duplicates
+
+In addition traditional 10-fold crossvalidation results are provided.
+
+Christoph: check if these specifications have changed at submission
Results
=======
@@ -147,7 +197,7 @@ baseline for evaluating prediction performance.
Martin
CheS-Mapper (Chemical Space Mapping and Visualization in 3D,
-http://ches-mapper.org/, (Gutlein, Karwath, and Kramer 2012)) can be
+http://ches-mapper.org/, @Gütlein2012) can be
used to analyze the relationship between the structure of chemical
compounds, their physico-chemical properties, and biological or toxic
effects. CheS-Mapper embeds a dataset into 3D space, such that compounds
@@ -164,7 +214,7 @@ Christoph
datasets. A complete table for 138 functional groups from OpenBabel FP4
fingerprints can be found in the appendix.
-![Frequency of functional groups.](functional-groups.pdf){#fig:fg}
+![Frequency of functional groups.](figure/functional-groups.pdf){#fig:fg}
### Experimental variability versus prediction uncertainty
@@ -179,6 +229,8 @@ substantial overlap of compounds, with LOAEL values in both datasets.
##### Intra dataset variability
+TODO: read data from files
+
The Mazzatorta dataset has 562 LOAEL values with 439 unique structures,
the Swiss Federal Office dataset has 493 rat LOAEL values with 381
unique structures. [@fig:intra] shows the intra-dataset variability, where
@@ -188,7 +240,7 @@ similar in both datasets (p-value: 0.48).
[//]: # p-value: 0.4750771581019402
-![Intra dataset variability: Each vertical line represents a compound, dots are individual LOAEL values.](loael-dataset-comparison-all-compounds.pdf){#fig:intra}
+[//]: # ![Intra dataset variability: Each vertical line represents a compound, dots are individual LOAEL values.](loael-dataset-comparison-all-compounds.pdf){#fig:intra}
##### Inter dataset variability
@@ -196,30 +248,29 @@ similar in both datasets (p-value: 0.48).
and Swiss Federal Office datasets. Obviously the experimental
variability is larger than for individual datasets.
-![Inter dataset variability](loael-dataset-comparison-common-compounds.pdf){#fig:inter}
+[//]: # ![Inter dataset variability](loael-dataset-comparison-common-compounds.pdf){#fig:inter}
##### LOAEL correlation between datasets
-[@fig:corr-1] depicts the correlation between LOAEL data from both datasets
+[@fig:corr] depicts the correlation between LOAEL data from both datasets
(using means for multiple measurements).
Identical values were removed from analysis.
[//]: # MAE: 0.801626064534318
[//]: # with identical values
-```{r fig.cap="Correlation of dataset medians (-log10(LOAEL [mmol/kg_bw])", fig.lp="fig:", echo=F}
-library(ggplot2)
+
+```{r echo=F}
data <- read.csv("data/common-median.csv",header=T)
-print(qplot(-log10(mazzatorta),-log10(swiss),data=data,xlab="Mazzatorta",ylab="Swiss Federal Office") + geom_point() + geom_abline(intercept=0.0) )
cor <- cor.test(-log(data$mazzatorta),-log(data$swiss))
-median.r.square <- cor(-log(data$mazzatorta),-log(data$swiss),use='complete')**2
-median.rmse <- sqrt(mean((-log(data$mazzatorta)+log(data$swiss))^2))
+median.r.square <- round(cor(-log(data$mazzatorta),-log(data$swiss),use='complete')^2,2)
+median.rmse <- round(sqrt(mean((-log(data$mazzatorta)+log(data$swiss))^2)),2)
```
Correlation analysis shows a
significant correlation (p-value < 2.2e-16) with r\^2: `r round(median.r.square,2)`, RMSE: `r round(median.rmse,2)`
-### Local (Q)SAR models
+### Local QSAR models
Christoph
@@ -227,39 +278,38 @@ In order to compare the perfomance of in silico models with experimental variabi
The Mazzatorta, the Swiss Federal Office dataset and a combined dataset were used as training data. Predictions for the test set compounds were made after eliminating all information from the test compound from the corresponding training dataset. [@tbl:common-pred] summarizes the results:
+![Comparison of experimental with predicted LOAEL values, each vertical line represents a compound.](figure/test-prediction.pdf){#fig:comp}
+
```{r echo=F}
-validation <- read.csv("test-set-validation.csv",header=T)
+source("test-correlation.R")
```
-Training data | Model prediction | Experimental variability
---------------|------------------|-------------------------
-Mazzatorta | `r round(validation$rmse[1],2)` | `r round(mazzatorta.rmse,2)`
-Swiss Federal Office |`r round(validation$rmse[2],2)` | `r round(swiss.rmse,2)`
-Commmon | `r round(validation$rmse[3],2)`| `r common.rmse`
-Combined | | `r combined.rmse`
+
+Training data | $r^2$ | RMSE
+--------------|---------------------------|-------------------------
+Experimental | `r median.r.square` | `r median.rmse`
+Mazzatorta | `r mazzatorta.r_square` | `r mazzatorta.rmse`
+Swiss Federal Office |`r swiss.r_square` | `r swiss.rmse`
+Combined | `r combined.r_square` | `r combined.rmse`
: Comparison of model predictions with experimental variability. {#tbl:common-pred}
```{r echo=F}
-source("crossvalidations.R")
+source("crossvalidation.R")
```
Traditional 10-fold cross-validation results are summarised in [@tbl:cv]:
-Training dataset | $r^2$ | RMSE | MAE
------------------|-------|------|----
-Mazzatorta | `r round(cv.mazzatorta.r.squared,2)` | `r round(cv.mazzatorta.rmse,2)`| `r round(cv.mazzatorta.mae,2)`
-Swiss Federal Office | `r round(cv.swiss.r.squared,2)` | `r round(cv.swiss.rmse,2)`| `r round(cv.swiss.mae,2)`
-Combined | `r round(cv.combined.r.squared,2)` | `r round(cv.combined.rmse,2)`| `r round(cv.combined.mae,2)`
+Training dataset | $r^2$ | RMSE
+-----------------|-------|------
+Mazzatorta | `r round(cv.mazzatorta.r_square,2)` | `r round(cv.mazzatorta.rmse,2)`
+Swiss Federal Office | `r round(cv.swiss.r_square,2)` | `r round(cv.swiss.rmse,2)`
+Combined | `r round(cv.combined.r_square,2)` | `r round(cv.combined.rmse,2)`
: 10-fold crossvalidation results {#tbl:cv}
-[//]: # ```{r fig.cap="Comparison of predictions with measured values (-log10(LOAEL [mmol/kg_bw])", fig.lp="fig:", echo=F}
+![Correlation of experimental with predicted LOAEL values (test set)](figure/test-correlation.pdf){}
+
+![Correlation of experimental with predicted LOAEL values (10-fold crossvalidation)](figure/crossvalidation.pdf){}
-```{r predictions, fig.cap='Comparison of predictions with measured values (-log10(LOAEL [mmol/kg_bw])', echo=F}
-library(ggplot2)
-data <- read.csv("data/common-test.csv",header=T)
-sorted = data[ order(-log10(data$LOAEL)), ]
-ggplot(sorted, aes(SMILES,-log10(LOAEL),ymin = min(-log10(LOAEL)), ymax=max(-log10(LOAEL)),color=Dataset)) + geom_point() + ylab('-log(LOAEL mg/kg_bw/day)') + xlab('Compound') + theme(axis.text.x = element_blank())
-```
Discussion
==========
@@ -273,31 +323,3 @@ Summary
References
==========
-
-Bender, Andreas, Hamse Y. Mussa, and Robert C. Glen, and Stephan
-Reiling. 2004. “Molecular Similarity Searching Using Atom Environments,
-Information-Based Feature Selection, and a Naïve Bayesian Classifier.”
-*Journal of Chemical Information and Computer Sciences* 44 (1): 170–78.
-doi:[10.1021/ci034207y](https://doi.org/10.1021/ci034207y).
-
-Gütlein, Martin, Andreas Karwath, and Stefan Kramer. 2012. “CheS-Mapper
-- Chemical Space Mapping and Visualization in 3D.” *Journal of
-Cheminformatics* 4 (1): 7.
-doi:[10.1186/1758-2946-4-7](https://doi.org/10.1186/1758-2946-4-7).
-
-Maunz, Andreas, Martin Gütlein, Micha Rautenberg, David Vorgrimmler,
-Denis Gebele, and Christoph Helma. 2013. “Lazar: A Modular Predictive
-Toxicology Framework.” *Frontiers in Pharmacology* 4. Frontiers Media
-SA.
-doi:[10.3389/fphar.2013.00038](https://doi.org/10.3389/fphar.2013.00038).
-
-OBoyle, Noel M, Michael Banck, Craig A James, Chris Morley, Tim
-Vandermeersch, and Geoffrey R Hutchison. 2011. “Open Babel: An Open
-Chemical Toolbox.” *Journal of Cheminformatics* 3 (1). Springer Science;
-Business Media: 33.
-doi:[10.1186/1758-2946-3-33](https://doi.org/10.1186/1758-2946-3-33).
-
-Weininger, David. 1988. “SMILES, a Chemical Language and Information
-System. 1. Introduction to Methodology and Encoding Rules.” *Journal of
-Chemical Information and Computer Sciences* 28 (1): 31–36.
-doi:[10.1021/ci00057a005](https://doi.org/10.1021/ci00057a005).
diff --git a/paper/loael.md b/paper/loael.md
index a2fc458..69ee1ff 100644
--- a/paper/loael.md
+++ b/paper/loael.md
@@ -8,9 +8,8 @@ keywords: (Q)SAR, read-across, LOAEL
date: \today
abstract: " "
documentclass: achemso
-bibliography: references.bib
+bibliography: references.bibtex
bibliographystyle: achemso
-biblio-style: achemso
output:
pdf_document:
fig_caption: yes
@@ -21,14 +20,34 @@ Introduction
Christoph + Elena + Benoit
-The main objectives of this study are
+The quality and reproducibility of (Q)SAR and read-across predictions is a controversial topic in the toxicological risk-assessment community. Although model predictions can be validated with various procedures it is rarely possible to put the results into the context of experimental variability, because replicate experiments are rarely available.
-- to investigate the experimental variability of LOAEL data
+With missing information about the variability of experimental toxicity data it is hard to judge the performance of predictive models and it is tempting for model developments to use aggressive model optimisation methods that lead to impressive validation results, but also to overfitted models with little practical relevance.
-- develop predictive model for lowest observed effect levels
+In this study we intent to compare model predictions with experimental variability with chronic oral rat lowest adverse effect levels (LOAEL) as toxicity endpoint.
+We are using two datasets, one from [@mazzatorta08] (*Mazzatorta* dataset) and one from the Swiss Federal Office of TODO (*Swiss Federal Office* dataset).
-- compare the performance of model predictions with experimental
- variability
+Elena: do you have a reference and the name of the department?
+
+
+
+155 compounds are common in both datasets and we use them as a test set in our investigation. For this test set we will
+
+- compare the structural diversity of both datasets
+- compare the LOAEL values in both datasets
+- build prediction models based on the Mazzatorta, Swiss Federal Office datasets and a combination of both
+- predict LOAELs of the training set
+- compare predictions with experimental variability
+
+With this investigation we also want to support the idea of reproducible research, by providing all datasets and programs that have been used to generate this manuscript under a TODO license.
+
+A self-contained docker image with all program dependencies required for the reproduction of these results is available from TODO.
+
+Source code and datasets for the reproduction of this manuscript can be downloaded from the GitHub repository TODO. The lazar framework [@Maunz2013] is also available under a GPL License from https://github.com/opentox/lazar.
+
+TODO: github tags
+
+Elena: please check if this is publication strategy is ok for the Swiss Federal Office
Materials and Methods
=====================
@@ -36,69 +55,82 @@ Materials and Methods
Datasets
--------
+
+
### Mazzatorta dataset
-Just referred to the paper 2008.
+The first dataset (*Mazzatorta* dataset for further reference) originates from
+the publication of [@mazzatorta08]. It contains chronic (> 180 days) lowest
+observed effect levels (LOAEL) for rats (*Rattus norvegicus*) after oral
+(gavage, diet, drinking water) administration. The Mazzatorta dataset consists
+of 567 LOAEL values for 445 unique
+chemical structures.
### Swiss Federal Office dataset
Elena + Swiss Federal Office contribution (input)
-Only rat LOAEL values were used for the current investigation, because
-they correspond directly to the Mazzatorta dataset.
+The Swiss Federal Office dataset consists of 493 LOAEL values
+for 381 unique chemical structures.
### Preprocessing
-Christoph
+Chemical structures in both datasets were initially represented as SMILES strings
+[@doi:10.1021/ci00057a005]. Syntactically incorrect and missing SMILES were
+generated from other identifiers (e.g names, CAS numbers). Unique smiles from the OpenBabel library [@OBoyle2011] were used for the identification of duplicated structures.
+
+Studies with undefined or empty LOAEL entries were removed from the datasets. LOAEL values were converted to mmol/kg_bw/day units. For prediction, validation and visualisation purposes -log10 transformations are used.
+
+David: please check if we have missed something
-Chemical structures in both datasets are represented as SMILES strings
-(Weininger 1988). Syntactically incorrect and missing SMILES were
-generated from other identifiers (e.g names, CAS numbers) when possible.
-Studies with undefined (“0”) or empty LOAEL entries were removed for
-this study.
+### Derived datasets
+
+Two derived datasets were obtained from the original datasets:
+
+The *test* dataset contains data of compounds that occur in both datasets. Exact duplications of LOAEL values were removed, because it is very likely that they originate from the same study.
+The test dataset has 391 LOAEL values for 155 unique chemical structures.
+
+The *combined* dataset is the union of the Mazzatorta and the Swiss Federal Office dataset and it is used to build predictive models. Exact LOAEL duplications were removed, as for the test dataset.
+The combined dataset has 1014 LOAEL values for 671 unique chemical structures.
Algorithms
----------
-Christoph
-
-For this study we are using the modular lazar (*la*zy *s*tructure
-*a*ctivity *r*elationships) framework (Maunz et al. 2013) for model
+In this study we are using the modular lazar (*la*zy *s*tructure
+*a*ctivity *r*elationships) framework [@Maunz2013] for model
development and validation.
lazar follows the following basic workflow: For a given chemical
-structure it searches in a database for similar structures (neighbors)
-with experimental data, builds a local (Q)SAR model with these neighbors
-and uses this model to predict the unknown activity of the query
-compound. This procedure resembles an automated version of *read across*
+structure lazar
+
+- searches in a database for similar structures (*neighbors*)
+with experimental data,
+- builds a local QSAR model with these neighbors
+and
+- uses this model to predict the unknown activity of the query
+compound.
+
+This procedure resembles an automated version of *read across*
predictions in toxicology, in machine learning terms it would be
classified as a *k-nearest-neighbor* algorithm.
Apart from this basic workflow lazar is completely modular and allows
-the researcher to use any algorithm for neighbor identification and
-local (Q)SAR modelling. Within this study we are using the following
+the researcher to use any algorithm for similarity searches and
+local QSAR modelling. Within this study we are using the following
algorithms:
### Neighbor identification
-Christoph
-
-Similarity calculations are based on MolPrint2D fingerprints (Bender et
-al. 2004) from the OpenBabel chemoinformatics library (OBoyle et al.
-2011).
+Similarity calculations are based on MolPrint2D fingerprints [@doi:10.1021/ci034207y] from the OpenBabel chemoinformatics library [@OBoyle2011].
The MolPrint2D fingerprint uses atom environments as molecular
representation, which resemble basically the chemical concept of
functional groups. For each atom in a molecule it represents the
-chemical environment with the atom types of connected atoms.
+chemical environment using the atom types of connected atoms.
-The main advantage of MolPrint2D fingerprints over fingerprints with
-predefined substructures (such as OpenBabel FP3, FP4 or MACCs
-fingerprints) is that it may capture substructures of toxicological
-relevance that are not included in predefined substructure lists.
+MolPrint2D fingerprints are generated dynamically from chemical structures and do not rely on predefined lists of fragments (such as OpenBabel FP3, FP4 or MACCs fingerprints or lists of toxocophores/toxicophobes). This has the advantage the they may capture substructures of toxicological relevance that are not included in other fingerprints.
Preliminary experiments have shown that predictions with MolPrint2D
-fingerprints are indeed more accurate than fingerprints with predefined
-substructures.
+fingerprints are indeed more accurate than other OpenBabel fingerprints.
From MolPrint2D fingerprints we can construct a feature vector with all
atom environments of a compound, which can be used to calculate chemical
@@ -106,27 +138,38 @@ similarities.
[//]: # https://openbabel.org/docs/dev/FileFormats/MolPrint2D_format.html#molprint2d-format
-The chemical similarity between two compounds is expressed as the
-proportion between atom environments common in both structures and the
-total number of atom environments (Jaccard/Tanimoto index, [@eq:jaccard]).
+The chemical similarity between two compounds A and B is expressed as the
+proportion between atom environments common in both structures $A \cap B$ and the
+total number of atom environments $A \cup B$ (Jaccard/Tanimoto index, [@eq:jaccard]).
$$ sim = \frac{|A \cap B|}{|A \cup B|} $$ {#eq:jaccard}
-$A$ atom environments of compound A, $B$ atom environments of compound B.
+A threshold of $sim < 0.1$ is used for the identification of neighbors for local QSAR models.
+Compounds with the same structure as the query structure are eliminated from the neighbors to obtain an unbiased prediction.
-### Local (Q)SAR models
+### Local QSAR models and predictions
-Christoph
+Only similar compounds (*neighbors*) are used for local QSAR models.
+In this investigation we are using a weighted partial least squares regression (PLS) algorithm for the prediction of quantitative properties.
+First all fingerprint features with identical values across all neighbors are removed.
+The reamining set of features is used as descriptors for creating a local weighted PLS model with atom environments as descriptors and model similarities as weights. The `plsr` function of the `pls` R package [@pls] is used for this purpose.
+Finally the local PLS model is applied to predict the activity of the query compound.
-As soon as neighbors for a query compound have been identified, we can
-use their experimental LOAEL values to predict the activity of the
-untested compound. In this case we are using the weighted mean of the
+If PLS modelling or prediction fails, the program resorts to using the weighted mean of the
neighbors LOAEL values, where the contribution of each neighbor is
weighted by its similarity to the query compound.
### Validation
-Christoph
+Two types of validations are used within this study:
+
+For the comparison of experimental variability with predictive accuracies we are using a test set of compounds that occur in both datasets. The *Mazzatorta*, *Swiss Federal Office* and *combined* datasets are used as training data for read across predictions. In order to obtain unbiased predictions *all* information from the test compound is removed from the training set prior to predictions. This is hardcoded into the prediction algorithm in order to prevent validation errors.
+
+TODO: treatment of duplicates
+
+In addition traditional 10-fold crossvalidation results are provided.
+
+Christoph: check if these specifications have changed at submission
Results
=======
@@ -147,7 +190,7 @@ baseline for evaluating prediction performance.
Martin
CheS-Mapper (Chemical Space Mapping and Visualization in 3D,
-http://ches-mapper.org/, (Gutlein, Karwath, and Kramer 2012)) can be
+http://ches-mapper.org/, @Gütlein2012) can be
used to analyze the relationship between the structure of chemical
compounds, their physico-chemical properties, and biological or toxic
effects. CheS-Mapper embeds a dataset into 3D space, such that compounds
@@ -164,7 +207,7 @@ Christoph
datasets. A complete table for 138 functional groups from OpenBabel FP4
fingerprints can be found in the appendix.
-![Frequency of functional groups.](functional-groups.pdf){#fig:fg}
+![Frequency of functional groups.](figure/functional-groups.pdf){#fig:fg}
### Experimental variability versus prediction uncertainty
@@ -177,6 +220,8 @@ substantial overlap of compounds, with LOAEL values in both datasets.
##### Intra dataset variability
+TODO: read data from files
+
The Mazzatorta dataset has 562 LOAEL values with 439 unique structures,
the Swiss Federal Office dataset has 493 rat LOAEL values with 381
unique structures. [@fig:intra] shows the intra-dataset variability, where
@@ -186,7 +231,7 @@ similar in both datasets (p-value: 0.48).
[//]: # p-value: 0.4750771581019402
-![Intra dataset variability: Each vertical line represents a compound, dots are individual LOAEL values.](loael-dataset-comparison-all-compounds.pdf){#fig:intra}
+[//]: # ![Intra dataset variability: Each vertical line represents a compound, dots are individual LOAEL values.](loael-dataset-comparison-all-compounds.pdf){#fig:intra}
##### Inter dataset variability
@@ -194,11 +239,11 @@ similar in both datasets (p-value: 0.48).
and Swiss Federal Office datasets. Obviously the experimental
variability is larger than for individual datasets.
-![Inter dataset variability](loael-dataset-comparison-common-compounds.pdf){#fig:inter}
+[//]: # ![Inter dataset variability](loael-dataset-comparison-common-compounds.pdf){#fig:inter}
##### LOAEL correlation between datasets
-[@fig:corr-1] depicts the correlation between LOAEL data from both datasets
+[@fig:corr] depicts the correlation between LOAEL data from both datasets
(using means for multiple measurements).
Identical values were removed from analysis.
@@ -206,16 +251,12 @@ Identical values were removed from analysis.
[//]: # with identical values
-```
-## Loading required package: methods
-```
-![Correlation of dataset medians (-log10(LOAEL [mmol/kg_bw])](figure/unnamed-chunk-2-1.png)
Correlation analysis shows a
-significant correlation (p-value < 2.2e-16) with r\^2: 0.55, RMSE: 1.34
+significant correlation (p-value < 2.2e-16) with r\^2: 0.58, RMSE: 1.3
-### Local (Q)SAR models
+### Local QSAR models
Christoph
@@ -223,46 +264,34 @@ In order to compare the perfomance of in silico models with experimental variabi
The Mazzatorta, the Swiss Federal Office dataset and a combined dataset were used as training data. Predictions for the test set compounds were made after eliminating all information from the test compound from the corresponding training dataset. [@tbl:common-pred] summarizes the results:
+![Comparison of experimental with predicted LOAEL values, each vertical line represents a compound.](figure/test-prediction.pdf){#fig:comp}
+
+
-Training data | Model prediction | Experimental variability
---------------|------------------|-------------------------
-Mazzatorta | 0.88 | 0.87
-Swiss Federal Office |0.65 | 0.76
-Commmon | 1.28| 0.8314774
-Combined | | 0.8242536
+Training data | $r^2$ | RMSE
+--------------|---------------------------|-------------------------
+Experimental | 0.58 | 1.3
+Mazzatorta | 0.38 | 1.49
+Swiss Federal Office |0.38 | 1.47
+Combined | 0.38 | 1.47
: Comparison of model predictions with experimental variability. {#tbl:common-pred}
Traditional 10-fold cross-validation results are summarised in [@tbl:cv]:
-Training dataset | $r^2$ | RMSE | MAE
------------------|-------|------|----
-Mazzatorta | 0.37 | 0.84| 0.65
-Swiss Federal Office | 0.25 | 0.75| 0.61
-Combined | 0.12 | 1.45| 1.21
+Training dataset | $r^2$ | RMSE
+-----------------|-------|------
+Mazzatorta | 0.38 | 2.01
+Swiss Federal Office | 0.3 | 1.67
+Combined | 0.38 | 1.81
: 10-fold crossvalidation results {#tbl:cv}
-[//]: # ```{r fig.cap="Comparison of predictions with measured values (-log10(LOAEL [mmol/kg_bw])", fig.lp="fig:", echo=F}
+![Correlation of experimental with predicted LOAEL values (test set)](figure/test-correlation.pdf){}
+![Correlation of experimental with predicted LOAEL values (10-fold crossvalidation)](figure/crossvalidation.pdf){}
-```
-## Warning in file(file, "rt"): cannot open file 'data/common-test.csv': No
-## such file or directory
-```
-
-```
-## Error in file(file, "rt"): cannot open the connection
-```
-
-```
-## Error in log10(data$LOAEL): non-numeric argument to mathematical function
-```
-
-```
-## Error in ggplot(sorted, aes(SMILES, -log10(LOAEL), ymin = min(-log10(LOAEL)), : object 'sorted' not found
-```
Discussion
==========
@@ -276,31 +305,3 @@ Summary
References
==========
-
-Bender, Andreas, Hamse Y. Mussa, and Robert C. Glen, and Stephan
-Reiling. 2004. “Molecular Similarity Searching Using Atom Environments,
-Information-Based Feature Selection, and a Naïve Bayesian Classifier.”
-*Journal of Chemical Information and Computer Sciences* 44 (1): 170–78.
-doi:[10.1021/ci034207y](https://doi.org/10.1021/ci034207y).
-
-Gütlein, Martin, Andreas Karwath, and Stefan Kramer. 2012. “CheS-Mapper
-- Chemical Space Mapping and Visualization in 3D.” *Journal of
-Cheminformatics* 4 (1): 7.
-doi:[10.1186/1758-2946-4-7](https://doi.org/10.1186/1758-2946-4-7).
-
-Maunz, Andreas, Martin Gütlein, Micha Rautenberg, David Vorgrimmler,
-Denis Gebele, and Christoph Helma. 2013. “Lazar: A Modular Predictive
-Toxicology Framework.” *Frontiers in Pharmacology* 4. Frontiers Media
-SA.
-doi:[10.3389/fphar.2013.00038](https://doi.org/10.3389/fphar.2013.00038).
-
-OBoyle, Noel M, Michael Banck, Craig A James, Chris Morley, Tim
-Vandermeersch, and Geoffrey R Hutchison. 2011. “Open Babel: An Open
-Chemical Toolbox.” *Journal of Cheminformatics* 3 (1). Springer Science;
-Business Media: 33.
-doi:[10.1186/1758-2946-3-33](https://doi.org/10.1186/1758-2946-3-33).
-
-Weininger, David. 1988. “SMILES, a Chemical Language and Information
-System. 1. Introduction to Methodology and Encoding Rules.” *Journal of
-Chemical Information and Computer Sciences* 28 (1): 31–36.
-doi:[10.1021/ci00057a005](https://doi.org/10.1021/ci00057a005).
diff --git a/paper/loael.pdf b/paper/loael.pdf
index c937d62..cef90e5 100644
--- a/paper/loael.pdf
+++ b/paper/loael.pdf
Binary files differ
diff --git a/paper/references.bib b/paper/references.bibtex
index 6b40541..735a52f 100644
--- a/paper/references.bib
+++ b/paper/references.bibtex
@@ -1,5 +1,5 @@
@Article{Gütlein2012,
-AUTHOR = {Gutlein, Martin and Karwath, Andreas and Kramer, Stefan},
+AUTHOR = {Gütlein, Martin and Karwath, Andreas and Kramer, Stefan},
TITLE = {CheS-Mapper - Chemical Space Mapping and Visualization in 3D},
JOURNAL = {Journal of Cheminformatics},
VOLUME = {4},
@@ -83,3 +83,34 @@ eprint = {
title = {Open Babel: An open chemical toolbox},
journal = {Journal of Cheminformatics}
}
+
+@article{mazzatorta08,
+author = {Paolo Mazzatorta and Manuel Dominguez Estevez and Myriam Coulet and Benoit Schilter},
+title = {Modeling Oral Rat Chronic Toxicity},
+journal = {Journal of Chemical Information and Modeling},
+volume = {48},
+number = {10},
+pages = {1949-1954},
+year = {2008},
+doi = {10.1021/ci8001974},
+ note ={PMID: 18803370},
+
+URL = {
+ http://dx.doi.org/10.1021/ci8001974
+
+},
+eprint = {
+ http://dx.doi.org/10.1021/ci8001974
+
+}
+
+}
+
+@Manual{pls,
+ title = {pls: Partial Least Squares and Principal Component Regression},
+ author = {Bjørn-Helge Mevik and Ron Wehrens and Kristian Hovde Liland},
+ year = {2015},
+ note = {R package version 2.5-0},
+ url = {https://CRAN.R-project.org/package=pls},
+ }
+
diff --git a/paper/test-correlation-plot.R b/paper/test-correlation-plot.R
new file mode 100644
index 0000000..0626bd4
--- /dev/null
+++ b/paper/test-correlation-plot.R
@@ -0,0 +1,21 @@
+library(ggplot2)
+library(grid)
+library(gridExtra)
+
+experimental <- read.csv("data/median-correlation.csv",header=T)
+p1 = qplot(-log10(mazzatorta),-log10(swiss),data=experimental,xlab="-log10(LOAEL Mazzatorta median)",ylab="-log10(LOAEL Swiss Federal Office median)",main="Experimental data") + geom_point() + geom_abline(intercept=0.0) + xlim(-1,4) + ylim(-1,4)
+
+mazzatorta = read.csv("data/mazzatorta-test-predictions.csv",header=T)
+swiss = read.csv("data/swiss-test-predictions.csv",header=T)
+combined = read.csv("data/combined-test-predictions.csv",header=T)
+
+p2 = qplot(-log10(LOAEL_predicted),-log10(LOAEL_measured_median),data=mazzatorta,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)",main="Mazzatorta") + geom_point() + geom_abline(intercept=0.0) + xlim(-1,4) + ylim(-1,4)
+
+p3 = qplot(-log10(LOAEL_predicted),-log10(LOAEL_measured_median),data=swiss,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)",main="Swiss Federal Office") + geom_point() + geom_abline(intercept=0.0) + xlim(-1,4) + ylim(-1,4)
+
+p4 = qplot(-log10(LOAEL_predicted),-log10(LOAEL_measured_median),data=combined,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)",main="Combined") + geom_point() + geom_abline(intercept=0.0) + xlim(-1,4) + ylim(-1,4)
+
+pdf('figure/test-correlation.pdf')
+grid.arrange(p1,p2,p3,p4,ncol=2)
+dev.off()
+
diff --git a/paper/test-correlation.R b/paper/test-correlation.R
new file mode 100644
index 0000000..99d113a
--- /dev/null
+++ b/paper/test-correlation.R
@@ -0,0 +1,15 @@
+mazzatorta = read.csv("data/mazzatorta-test-predictions.csv",header=T)
+swiss = read.csv("data/swiss-test-predictions.csv",header=T)
+combined = read.csv("data/combined-test-predictions.csv",header=T)
+
+mazzatorta.p = round(cor.test(-log(mazzatorta$LOAEL_measured_median),-log(mazzatorta$LOAEL_predicted))$p.value,2)
+mazzatorta.r_square = round(cor(-log(mazzatorta$LOAEL_measured_median),-log(mazzatorta$LOAEL_predicted))^2,2)
+mazzatorta.rmse = round(sqrt(mean((-log(mazzatorta$LOAEL_measured_median)+log(mazzatorta$LOAEL_predicted))^2)),2)
+
+swiss.p = round(cor.test(-log(swiss$LOAEL_measured_median),-log(swiss$LOAEL_predicted))$p.value,2)
+swiss.r_square = round(cor(-log(swiss$LOAEL_measured_median),-log(swiss$LOAEL_predicted))^2,2)
+swiss.rmse = round(sqrt(mean((-log(swiss$LOAEL_measured_median)+log(swiss$LOAEL_predicted))^2)),2)
+
+combined.p = round(cor.test(-log(combined$LOAEL_measured_median),-log(combined$LOAEL_predicted))$p.value,2)
+combined.r_square = round(cor(-log(combined$LOAEL_measured_median),-log(combined$LOAEL_predicted))^2,2)
+combined.rmse = round(sqrt(mean((-log(combined$LOAEL_measured_median)+log(combined$LOAEL_predicted))^2)),2)
diff --git a/paper/test-prediction-plot.R b/paper/test-prediction-plot.R
new file mode 100644
index 0000000..c43737f
--- /dev/null
+++ b/paper/test-prediction-plot.R
@@ -0,0 +1,32 @@
+library(ggplot2)
+library(grid)
+library(gridExtra)
+
+mazzatorta = read.csv("data/mazzatorta-test-predictions.csv",header=T)
+swiss = read.csv("data/swiss-test-predictions.csv",header=T)
+combined = read.csv("data/combined-test-predictions.csv",header=T)
+test <- read.csv("data/test.csv",header=T)
+n = c("SMILES","LOAEL","Source")
+
+data = data.frame(factor(test$SMILES),test$LOAEL,factor(test$Dataset))
+names(data) = n
+data$Type = "experimental"
+maz = data.frame(factor(mazzatorta$SMILES),mazzatorta$LOAEL_predicted,factor(mazzatorta$Dataset))
+names(maz) = n
+maz$Type = "predicted"
+data = rbind(data,maz)
+swi = data.frame(factor(swiss$SMILES),swiss$LOAEL_predicted,factor(swiss$Dataset))
+names(swi) = n
+swi$Type = "predicted"
+data = rbind(data,swi)
+comb = data.frame(factor(combined$SMILES),combined$LOAEL_predicted,factor(combined$Dataset))
+names(comb) = n
+comb$Type = "predicted"
+data = rbind(data,comb)
+data$LOAEL = -log(data$LOAEL)
+data$SMILES <- reorder(data$SMILES,data$LOAEL)
+img <- ggplot(data, aes(SMILES,LOAEL,ymin = min(LOAEL), ymax=max(LOAEL),shape=Source,color=Type))
+img <- img + ylab('-log(LOAEL mg/kg_bw/day)') + xlab('Compound') + theme(axis.text.x = element_blank())
+img <- img + geom_point()
+
+ggsave(file='figure/test-prediction.pdf', plot=img,width=12, height=8)
diff --git a/paper/test-set-validation.rb b/paper/test-validation.rb
index d842d47..b748a8d 100644
--- a/paper/test-set-validation.rb
+++ b/paper/test-validation.rb
@@ -5,18 +5,20 @@ test = Dataset.from_csv_file(File.join(DATA,"common-test.csv"))
file = File.join(DATA,ARGV[0])
dataset = Dataset.from_csv_file file
model = Model::LazarRegression.create dataset
-validation = Validation.create model, dataset, test
+validation = RegressionValidation.create model, dataset, test
csv_file = file.sub(".csv","-test-predictions.csv")
+id_file = file.sub(".csv","-test-predictions.id")
+File.open(id_file,"w+"){|f| f.puts validation.id}
name = File.basename(ARGV[0],".csv")
data = []
validation.predictions.each do |p|
- data << [Compound.find(p[0]).smiles, p[2], p[3],"#{name}-prediction"]
+ data << [Compound.find(p[0]).smiles, p[1].median, p[2], p[3],"#{name}-prediction"]
end
data.sort!{|a,b| a[1] <=> b[1]}
CSV.open(csv_file,"w+") do |csv|
- csv << ["SMILES","LOAEL","Confidence","Dataset"]
+ csv << ["SMILES","LOAEL_measured_median","LOAEL_predicted","Confidence","Dataset"]
data.each{|r| csv << r}
end
diff --git a/paper/unique-smiles.rb b/paper/unique-smiles.rb
new file mode 100644
index 0000000..d316c3f
--- /dev/null
+++ b/paper/unique-smiles.rb
@@ -0,0 +1,18 @@
+require_relative "include.rb"
+
+input = Dataset.from_csv_file File.join(ARGV[0])
+outname = File.join(File.dirname(ARGV[0]),"#{ARGV[1]}.csv")
+
+data = []
+input.compounds.each_with_index do |cid,i|
+ c = Compound.find cid
+ v = input.data_entries[i].first
+ data << [c.smiles,v,ARGV[1]]
+end
+
+data.sort!{|a,b| a[1] <=> b[1]}
+
+CSV.open(outname,"w+") do |csv|
+ csv << ["SMILES","LOAEL","Dataset"]
+ data.each{|r| csv << r}
+end