summaryrefslogtreecommitdiff
path: root/Makefile
blob: 856d87c55ec75ae15e06dc4ef8bf54ebf539dc69 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Manuscript
# Requirements:
# pandoc-scholar (https://github.com/pandoc-scholar/pandoc-scholar) in PANDOC_SCHOLAR_PATH
# pandoc-placetable (https://github.com/mb21/pandoc-placetable)

ARTICLE_FILE          = mutagenicity.mustache.md
PANDOC_SCHOLAR_PATH   = pandoc-scholar
OUTFILE_PREFIX        = mutagenicity
DEFAULT_EXTENSIONS    = pdf #latex docx html #odt epub
PANDOC_WRITER_OPTIONS = --filter=pandoc-placetable --filter=pandoc-crossref --filter=pandoc-citeproc
TEMPLATE_FILE_LATEX   = pandoc-scholar.latex

# Experiments

# crossvalidations
LAZAR_CONFUSION_MATRIX_DIR = 10-fold-crossvalidations/lazar/crossvalidation/confusion_matrices
LAZAR_PADEL_CONFUSION_MATRIX_DIR = 10-fold-crossvalidations/lazar-padel/crossvalidation/confusion_matrices
R_CV_DIR = 10-fold-crossvalidations/R
TENSORFLOW_CV_DIR = 10-fold-crossvalidations/tensorflow

CONFUSION_MATRICES_DIR = 10-fold-crossvalidations/confusion-matrices
CONFUSION_MATRICES = $(CONFUSION_MATRICES_DIR)/lazar-all.csv $(CONFUSION_MATRICES_DIR)/lazar-high-confidence.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-all.csv $(CONFUSION_MATRICES_DIR)/lazar-padel-high-confidence.csv $(CONFUSION_MATRICES_DIR)/R-RF.csv $(CONFUSION_MATRICES_DIR)/R-SVM.csv $(CONFUSION_MATRICES_DIR)/R-DL.csv $(CONFUSION_MATRICES_DIR)/tensorflow-rf.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-lr.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-lr2.v3.csv $(CONFUSION_MATRICES_DIR)/tensorflow-nn.v3.csv

CV_SUMMARY = 10-fold-crossvalidations/summary.yaml

# PA predictions
PA_DIR = pyrrolizidine-alkaloids
PA_LAZAR_DIR = $(PA_DIR)/lazar

# manuscript
TABLES = tables/lazar-summary.csv tables/r-summary.csv tables/tensorflow-summary.csv tables/pa-tab.tex
FIGURES = figures/roc.png figures/tsne-mp2d.png figures/tsne-padel.png

all: $(TABLES) $(FIGURES) mutagenicity.pdf 
include $(PANDOC_SCHOLAR_PATH)/Makefile

mutagenicity.mustache.md: $(CV_SUMMARY) mutagenicity.md $(TABLES) $(FIGURES)
	mustache $^ > $@

# figures

figures/tsne-padel.png: figures/tsne-padel.csv
	scripts/tsne-padel.R
	
figures/tsne-padel.csv: data/GenoTox-database.csv pyrrolizidine-alkaloids/PA-Padel-2D_m2.csv
	scripts/padel-descriptors.rb $^ > $@
	
figures/tsne-mp2d.png: figures/tsne-mp2d.csv
	scripts/tsne-mp2d.R
	
figures/tsne-mp2d.csv: 10-fold-crossvalidations/lazar/independent_variables
	scripts/mp2d-distances.rb > figures/tsne-mp2d.csv

figures/roc.png: figures/roc.csv
	scripts/roc.R 

figures/roc.csv: $(CV_SUMMARY)
	scripts/summary2roc.rb $< > $@

# tables
tables/pa-tab.tex: scripts/pa-table.rb
	scripts/pa-table.rb > $@

tables/lazar-summary.csv: $(CV_SUMMARY)
	scripts/summary2table.rb lazar > $@

tables/r-summary.csv: $(CV_SUMMARY)
	scripts/summary2table.rb R > $@

tables/tensorflow-summary.csv: $(CV_SUMMARY)
	scripts/summary2table.rb tensorflow > $@

# crossvalidation summary

$(CV_SUMMARY): $(CONFUSION_MATRICES)
	scripts/confusion-matrix-summary.rb $^ > $@

# PA predictions

$(PA_LAZAR_DIR)/pa-padel-predictions.csv: 10-fold-crossvalidations/lazar-pa-padel/pa_independent_variables
	../lazar/bin/batch_padel_classification.rb 10-fold-crossvalidations/lazar-pa-padel $< > $@

$(PA_LAZAR_DIR)/pa-mp2d-predictions.csv: $(PA_LAZAR_DIR)/pa-smiles.csv
	../lazar/bin/batch_fingerprint_classification.rb ../lazar/models/mutagenicity $< > $@

$(PA_LAZAR_DIR)/pa-smiles.csv: pyrrolizidine-alkaloids/180920_PA_complete_SMILES.csv
	cut -f1,4 -d ';' $< | sed 's/;/,/' > $@

# confusion matrices

## lazar
$(CONFUSION_MATRICES_DIR)/lazar-all.csv: $(LAZAR_CONFUSION_MATRIX_DIR)
	cp $</all $@

$(CONFUSION_MATRICES_DIR)/lazar-high-confidence.csv: $(LAZAR_CONFUSION_MATRIX_DIR)
	cp $</high_confidence $@

$(CONFUSION_MATRICES_DIR)/lazar-padel-all.csv: $(LAZAR_PADEL_CONFUSION_MATRIX_DIR)
	cp $</all $@

$(CONFUSION_MATRICES_DIR)/lazar-padel-high-confidence.csv: $(LAZAR_PADEL_CONFUSION_MATRIX_DIR)
	cp $</high_confidence $@

## R
$(CONFUSION_MATRICES_DIR)/R-SVM.csv: $(R_CV_DIR)/Sgl-Observations-SVM.csv
	scripts/cv-r-confusion-matrix.rb $< > $@

$(CONFUSION_MATRICES_DIR)/R-RF.csv: $(R_CV_DIR)/Sgl-Observations-RF.csv
	scripts/cv-r-confusion-matrix.rb $< > $@

$(CONFUSION_MATRICES_DIR)/R-DL.csv: $(R_CV_DIR)/Sgl-Observations-DL.csv
	scripts/cv-r-confusion-matrix.rb $< > $@

## tensorflow
$(CONFUSION_MATRICES_DIR)/tensorflow-lr.v3.csv: $(TENSORFLOW_CV_DIR)/pred.lr.v3.sorted.csv
	scripts/cv-tensorflow-confusion-matrix.rb $< > $@

$(CONFUSION_MATRICES_DIR)/tensorflow-lr2.v3.csv: $(TENSORFLOW_CV_DIR)/pred.lr2.v3.sorted.csv
	scripts/cv-tensorflow-confusion-matrix.rb $< > $@

$(CONFUSION_MATRICES_DIR)/tensorflow-nn.v3.csv: $(TENSORFLOW_CV_DIR)/pred.nn.v3.sorted.csv
	scripts/cv-tensorflow-confusion-matrix.rb $< > $@

$(CONFUSION_MATRICES_DIR)/tensorflow-rf.v3.csv: $(TENSORFLOW_CV_DIR)/pred.rf.v3.sorted.csv
	scripts/cv-tensorflow-confusion-matrix.rb $< > $@