blob: 9a154aecccdfad2469ce42f7bc408886647dde86 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
# Variables
datasets = data/median-correlation.csv data/test_log10.csv data/training_log10.csv data/mazzatorta_log10.csv data/swiss_log10.csv data/swiss_mg_dup.csv data/mazzatorta_mg_dup.csv data/all_mg_dup.csv
crossvalidations = data/training_log10-cv-0.csv data/training_log10-cv-1.csv data/training_log10-cv-2.csv
validations = data/training-test-predictions.csv $(crossvalidations) data/misclassifications.csv
figures = figures/functional-groups.pdf figures/test-prediction.pdf figures/prediction-test-correlation.pdf figures/dataset-variability.pdf figures/median-correlation.pdf figures/crossvalidation0.pdf figures/crossvalidation1.pdf figures/crossvalidation2.pdf
# Paper
loael.pdf: loael.tex
pdflatex loael.tex; pdflatex loael.tex
loael.tex: loael.md references.bibtex
pandoc -s --bibliography=references.bibtex --filter pandoc-crossref --filter pandoc-citeproc -o loael.tex loael.md
loael.md: loael.Rmd $(figures) $(datasets) $(validations)
export LANG=en_US.UTF-8; Rscript --vanilla -e "library(knitr); knit('loael.Rmd');"
loael.docx: loael.md
pandoc -s --bibliography=references.bibtex --latex-engine=pdflatex --filter pandoc-crossref --filter pandoc-citeproc -o loael.docx loael.md
loael.txt: loael.md
pandoc -s --bibliography=references.bibtex --latex-engine=pdflatex --filter pandoc-crossref --filter pandoc-citeproc -o loael.txt loael.md
# Figures
figures/functional-groups.pdf: data/functional-groups-reduced4R.csv
scripts/functional-groups.R
figures/dataset-variability.pdf: data/test_log10_database_fix.csv
scripts/dataset-variability.R
figures/crossvalidation0.pdf: data/training_log10-cv-0.csv
scripts/crossvalidation-plots.R 0
figures/crossvalidation1.pdf: data/training_log10-cv-1.csv
scripts/crossvalidation-plots.R 1
figures/crossvalidation2.pdf: data/training_log10-cv-2.csv
scripts/crossvalidation-plots.R 2
figures/test-prediction.pdf: data/predictions-measurements.csv
scripts/test-prediction-plot.R
figures/prediction-test-correlation.pdf: data/training-test-predictions.csv
scripts/prediction-test-correlation-plot.R
figures/median-correlation.pdf: data/median-correlation.csv
scripts/median-correlation-plot.R
# Validations
data/predictions-measurements.csv: data/training-test-predictions.csv data/test_log10.csv
scripts/test-prediction.rb
data/misclassifications.csv: data/training-test-predictions.csv
scripts/misclassifications.rb
data/training-test-predictions.csv: data/training-test-predictions.id
scripts/test-validation-results.rb
data/training-test-predictions.id: data/test_log10.csv data/training_log10.csv
scripts/testset-validation.rb
data/training_log10-cv-0.csv: data/training_log10-cv-0.id
scripts/crossvalidation-table.rb data/training_log10-cv-0.id
data/training_log10-cv-1.csv: data/training_log10-cv-1.id
scripts/crossvalidation-table.rb data/training_log10-cv-1.id
data/training_log10-cv-2.csv: data/training_log10-cv-2.id
scripts/crossvalidation-table.rb data/training_log10-cv-2.id
data/training_log10-cv-0.id: data/training_log10.csv
scripts/crossvalidation.rb training_log10.csv 0
data/training_log10-cv-1.id: data/training_log10.csv
scripts/crossvalidation.rb training_log10.csv 1
data/training_log10-cv-2.id: data/training_log10.csv
scripts/crossvalidation.rb training_log10.csv 2
# Datasets
# Functional groups
data/functional-groups-reduced4R.csv: data/functional-groups-reduced.csv
scripts/functional-groups4R.rb
# Medians for dataset correlation
data/median-correlation.csv: data/mazzatorta_log10.csv data/swiss_log10.csv
scripts/create-median-correlation.rb
# Test set
data/test_log10.csv: data/mazzatorta_log10.csv data/swiss_log10.csv
scripts/create-testset.rb
data/test_log10_database_fix.csv: data/test_log10.csv
sed 's/mazzatorta/Nestle/' data/test_log10.csv | sed 's/mazzatorta and swiss/Both/' | sed 's/swiss/FSVO/' > data/test_log10_database_fix.csv
# Training set
data/training_log10.csv: data/mazzatorta_log10.csv data/swiss_log10.csv
scripts/create-trainingset.rb
# -log10 transformations
data/mazzatorta_log10.csv: data/mazzatorta.csv
../lazar/scripts/mmol2-log10.rb data/mazzatorta.csv
sed -i 's/-log10(LOAEL)/LOAEL/' data/mazzatorta_log10.csv # R cannot parse -log10(LOAEL) header
data/swiss_log10.csv: data/swiss.csv
../lazar/scripts/mmol2-log10.rb data/swiss.csv
sed -i 's/-log10(LOAEL)/LOAEL/' data/swiss_log10.csv # R cannot parse -log10(LOAEL) header
# Datasets with unique smiles
data/mazzatorta.csv: data/LOAEL_mg_corrected_smiles_mmol.csv
scripts/mazzatorta-unique-smiles.rb data/LOAEL_mg_corrected_smiles_mmol.csv
data/mazzatorta_mg_dup.csv: data/LOAEL_mg_corrected_smiles_mmol.csv
scripts/mazzatorta_mg_dup.rb data/LOAEL_mg_corrected_smiles_mmol.csv
data/swiss.csv: data/NOAEL-LOAEL_SMILES_rat_chron.csv
scripts/noael_loael2mmol.rb data/NOAEL-LOAEL_SMILES_rat_chron.csv
data/swiss_mg_dup.csv: data/NOAEL-LOAEL_SMILES_rat_chron.csv
scripts/noael_loael2swiss_mg_dup.rb data/NOAEL-LOAEL_SMILES_rat_chron.csv
data/all_mg_dup.csv: data/NOAEL-LOAEL_SMILES_rat_chron.csv data/LOAEL_mg_corrected_smiles_mmol.csv
scripts/all_mg_dup.rb
clean:
rm figures/*pdf
cd data && rm `ls -I "*LOAEL*" -I "*functional*" -I "*SMARTS*"`
mongo production --eval "db.dropDatabase()"
|