summaryrefslogtreecommitdiff
path: root/Makefile
blob: f0b74b44c132025bc815e403076d4c208be7b673 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# Variables

datasets = data/median-correlation.csv data/test_log10.csv data/training_log10.csv data/mazzatorta_log10.csv data/swiss_log10.csv data/swiss_mg_dup.csv data/mazzatorta_mg_dup.csv data/all_mg_dup.csv
validations = data/training-test-predictions.csv data/50cv.csv data/misclassifications.csv
figures = figures/functional-groups.pdf  figures/test-prediction.pdf figures/prediction-test-correlation.pdf figures/dataset-variability.pdf figures/median-correlation.pdf figures/crossvalidation.pdf

# Paper
loael.pdf: loael.tex
	pdflatex loael.tex; pdflatex loael.tex

loael.tex: loael.md references.bibtex
	pandoc -s --bibliography=references.bibtex --filter pandoc-crossref --filter pandoc-citeproc -o loael.tex loael.md

loael.md: loael.Rmd $(figures) $(datasets) $(validations) 
	export LANG=en_US.UTF-8; Rscript --vanilla -e "library(knitr); knit('loael.Rmd');"

loael.docx: loael.md 
	pandoc -s --bibliography=references.bibtex --latex-engine=pdflatex --filter pandoc-crossref --filter pandoc-citeproc -o loael.docx loael.md

loael.txt: loael.md 
	pandoc -s --bibliography=references.bibtex --latex-engine=pdflatex --filter pandoc-crossref --filter pandoc-citeproc -o loael.txt loael.md

# Figures

figures/functional-groups.pdf: data/functional-groups-reduced4R.csv
	scripts/functional-groups.R

figures/dataset-variability.pdf: data/test_log10_database_fix.csv
	scripts/dataset-variability.R

figures/test-prediction.pdf: data/predictions-measurements.csv
	scripts/test-prediction-plot.R

figures/prediction-test-correlation.pdf: data/training-test-predictions.csv
	scripts/prediction-test-correlation-plot.R

figures/median-correlation.pdf: data/median-correlation.csv
	scripts/median-correlation-plot.R

figures/crossvalidation.pdf: data/training_log10-cv.csv
	scripts/crossvalidation-plots.R 

# Validations

data/predictions-measurements.csv: data/training-test-predictions.csv data/test_log10.csv
	scripts/test-prediction.rb

data/misclassifications.csv: data/training-test-predictions.csv
	scripts/misclassifications.rb

data/training-test-predictions.csv: data/training-test-predictions.id
	scripts/test-validation-results.rb 

data/training-test-predictions.id: data/test_log10.csv data/training_log10.csv
	scripts/testset-validation.rb

data/training_log10-cv.csv: data/50cv.ids
	scripts/crossvalidation-table.rb data/50cv.ids

data/50cv.csv: data/50cv.ids
	scripts/50cv-table.rb data/50cv.ids

data/50cv.ids: data/training_log10.csv
	scripts/50-crossvalidations.rb data/training_log10.csv

# Datasets

# Functional groups
data/functional-groups-reduced4R.csv: data/functional-groups-reduced.csv 
	scripts/functional-groups4R.rb

# Medians for dataset correlation
data/median-correlation.csv: data/mazzatorta_log10.csv data/swiss_log10.csv
	scripts/create-median-correlation.rb

# Test set
data/test_log10.csv: data/mazzatorta_log10.csv data/swiss_log10.csv
	scripts/create-testset.rb 

data/test_log10_database_fix.csv: data/test_log10.csv
	sed 's/mazzatorta/Nestle/' data/test_log10.csv | sed 's/mazzatorta and swiss/Both/' | sed 's/swiss/FSVO/' > data/test_log10_database_fix.csv

# Training set
data/training_log10.csv: data/mazzatorta_log10.csv data/swiss_log10.csv
	scripts/create-trainingset.rb

# -log10 transformations
data/mazzatorta_log10.csv: data/mazzatorta.csv
	../lazar/scripts/mmol2-log10.rb data/mazzatorta.csv
	sed -i 's/-log10(LOAEL)/LOAEL/' data/mazzatorta_log10.csv # R cannot parse -log10(LOAEL) header

data/swiss_log10.csv: data/swiss.csv
	../lazar/scripts/mmol2-log10.rb data/swiss.csv
	sed -i 's/-log10(LOAEL)/LOAEL/' data/swiss_log10.csv # R cannot parse -log10(LOAEL) header

# Datasets with unique smiles
data/mazzatorta.csv: data/LOAEL_mg_corrected_smiles_mmol.csv 
	scripts/mazzatorta-unique-smiles.rb data/LOAEL_mg_corrected_smiles_mmol.csv

data/mazzatorta_mg_dup.csv: data/LOAEL_mg_corrected_smiles_mmol.csv 
	scripts/mazzatorta_mg_dup.rb data/LOAEL_mg_corrected_smiles_mmol.csv

data/swiss.csv: data/NOAEL-LOAEL_SMILES_rat_chron.csv
	scripts/noael_loael2mmol.rb data/NOAEL-LOAEL_SMILES_rat_chron.csv

data/swiss_mg_dup.csv: data/NOAEL-LOAEL_SMILES_rat_chron.csv
	scripts/noael_loael2swiss_mg_dup.rb data/NOAEL-LOAEL_SMILES_rat_chron.csv

data/all_mg_dup.csv: data/NOAEL-LOAEL_SMILES_rat_chron.csv data/LOAEL_mg_corrected_smiles_mmol.csv 
	scripts/all_mg_dup.rb

clean:
	rm figures/*pdf
	cd data && rm `ls -I "*LOAEL*" -I "*functional*" -I "*SMARTS*"`
	mongo production --eval "db.dropDatabase()"