blob: 856d87c55ec75ae15e06dc4ef8bf54ebf539dc69 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
# Manuscript
# Requirements:
# pandoc-scholar (https://github.com/pandoc-scholar/pandoc-scholar) in PANDOC_SCHOLAR_PATH
# pandoc-placetable (https://github.com/mb21/pandoc-placetable)
ARTICLE_FILE = mutagenicity.mustache.md
PANDOC_SCHOLAR_PATH = pandoc-scholar
OUTFILE_PREFIX = mutagenicity
DEFAULT_EXTENSIONS = pdf #latex docx html #odt epub
PANDOC_WRITER_OPTIONS = --filter=pandoc-placetable --filter=pandoc-crossref --filter=pandoc-citeproc
TEMPLATE_FILE_LATEX = pandoc-scholar.latex
# Experiments
# crossvalidations
LAZAR_CONFUSION_MATRIX_DIR = 10-fold-crossvalidations/lazar/crossvalidation/confusion_matrices
LAZAR_PADEL_CONFUSION_MATRIX_DIR = 10-fold-crossvalidations/lazar-padel/crossvalidation/confusion_matrices
R_CV_DIR = 10-fold-crossvalidations/R
TENSORFLOW_CV_DIR = 10-fold-crossvalidations/tensorflow
CONFUSION_MATRICES_DIR = 10-fold-crossvalidations/confusion-matrices
CONFUSION_MATRICES = $(CONFUSION_MATRICES_DIR)/lazar-all.csv $(CONFUSION_MATRICES_DIR)/lazar-high-confidence.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-all.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-high-confidence.csv $(CONFUSION_MATRICES_DIR)/R-RF.csv $(CONFUSION_MATRICES_DIR)/R-SVM.csv $(CONFUSION_MATRICES_DIR)/R-DL.csv $(CONFUSION_MATRICES_DIR)/tensorflow-rf.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-lr.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-lr2.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-nn.v3.csv
CV_SUMMARY = 10-fold-crossvalidations/summary.yaml
# PA predictions
PA_DIR = pyrrolizidine-alkaloids
PA_LAZAR_DIR = $(PA_DIR)/lazar
# manuscript
TABLES = tables/lazar-summary.csv tables/r-summary.csv tables/tensorflow-summary.csv tables/pa-tab.tex
FIGURES = figures/roc.png figures/tsne-mp2d.png figures/tsne-padel.png
all: $(TABLES) $(FIGURES) mutagenicity.pdf
include $(PANDOC_SCHOLAR_PATH)/Makefile
mutagenicity.mustache.md: $(CV_SUMMARY) mutagenicity.md $(TABLES) $(FIGURES)
mustache $^ > $@
# figures
figures/tsne-padel.png: figures/tsne-padel.csv
scripts/tsne-padel.R
figures/tsne-padel.csv: data/GenoTox-database.csv pyrrolizidine-alkaloids/PA-Padel-2D_m2.csv
scripts/padel-descriptors.rb $^ > $@
figures/tsne-mp2d.png: figures/tsne-mp2d.csv
scripts/tsne-mp2d.R
figures/tsne-mp2d.csv: 10-fold-crossvalidations/lazar/independent_variables
scripts/mp2d-distances.rb > figures/tsne-mp2d.csv
figures/roc.png: figures/roc.csv
scripts/roc.R
figures/roc.csv: $(CV_SUMMARY)
scripts/summary2roc.rb $< > $@
# tables
tables/pa-tab.tex: scripts/pa-table.rb
scripts/pa-table.rb > $@
tables/lazar-summary.csv: $(CV_SUMMARY)
scripts/summary2table.rb lazar > $@
tables/r-summary.csv: $(CV_SUMMARY)
scripts/summary2table.rb R > $@
tables/tensorflow-summary.csv: $(CV_SUMMARY)
scripts/summary2table.rb tensorflow > $@
# crossvalidation summary
$(CV_SUMMARY): $(CONFUSION_MATRICES)
scripts/confusion-matrix-summary.rb $^ > $@
# PA predictions
$(PA_LAZAR_DIR)/pa-padel-predictions.csv: 10-fold-crossvalidations/lazar-pa-padel/pa_independent_variables
../lazar/bin/batch_padel_classification.rb 10-fold-crossvalidations/lazar-pa-padel $< > $@
$(PA_LAZAR_DIR)/pa-mp2d-predictions.csv: $(PA_LAZAR_DIR)/pa-smiles.csv
../lazar/bin/batch_fingerprint_classification.rb ../lazar/models/mutagenicity $< > $@
$(PA_LAZAR_DIR)/pa-smiles.csv: pyrrolizidine-alkaloids/180920_PA_complete_SMILES.csv
cut -f1,4 -d ';' $< | sed 's/;/,/' > $@
# confusion matrices
## lazar
$(CONFUSION_MATRICES_DIR)/lazar-all.csv: $(LAZAR_CONFUSION_MATRIX_DIR)
cp $</all $@
$(CONFUSION_MATRICES_DIR)/lazar-high-confidence.csv: $(LAZAR_CONFUSION_MATRIX_DIR)
cp $</high_confidence $@
$(CONFUSION_MATRICES_DIR)/lazar-padel-all.csv: $(LAZAR_PADEL_CONFUSION_MATRIX_DIR)
cp $</all $@
$(CONFUSION_MATRICES_DIR)/lazar-padel-high-confidence.csv: $(LAZAR_PADEL_CONFUSION_MATRIX_DIR)
cp $</high_confidence $@
## R
$(CONFUSION_MATRICES_DIR)/R-SVM.csv: $(R_CV_DIR)/Sgl-Observations-SVM.csv
scripts/cv-r-confusion-matrix.rb $< > $@
$(CONFUSION_MATRICES_DIR)/R-RF.csv: $(R_CV_DIR)/Sgl-Observations-RF.csv
scripts/cv-r-confusion-matrix.rb $< > $@
$(CONFUSION_MATRICES_DIR)/R-DL.csv: $(R_CV_DIR)/Sgl-Observations-DL.csv
scripts/cv-r-confusion-matrix.rb $< > $@
## tensorflow
$(CONFUSION_MATRICES_DIR)/tensorflow-lr.v3.csv: $(TENSORFLOW_CV_DIR)/pred.lr.v3.sorted.csv
scripts/cv-tensorflow-confusion-matrix.rb $< > $@
$(CONFUSION_MATRICES_DIR)/tensorflow-lr2.v3.csv: $(TENSORFLOW_CV_DIR)/pred.lr2.v3.sorted.csv
scripts/cv-tensorflow-confusion-matrix.rb $< > $@
$(CONFUSION_MATRICES_DIR)/tensorflow-nn.v3.csv: $(TENSORFLOW_CV_DIR)/pred.nn.v3.sorted.csv
scripts/cv-tensorflow-confusion-matrix.rb $< > $@
$(CONFUSION_MATRICES_DIR)/tensorflow-rf.v3.csv: $(TENSORFLOW_CV_DIR)/pred.rf.v3.sorted.csv
scripts/cv-tensorflow-confusion-matrix.rb $< > $@
|