diff options
author | Christoph Helma <helma@in-silico.ch> | 2021-03-20 00:14:10 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2021-03-20 00:14:10 +0100 |
commit | 7bbe4c444523f281d07f79aa8d0a4719668c3c80 (patch) | |
tree | a79efa1decd4284d9454200fd987015826685b6d /scripts/pa-groups.R | |
parent | eca6889b784583bc0e9fb338d7b53d4c9b530dc4 (diff) |
manuscript update
Diffstat (limited to 'scripts/pa-groups.R')
-rwxr-xr-x | scripts/pa-groups.R | 60 |
1 files changed, 38 insertions, 22 deletions
diff --git a/scripts/pa-groups.R b/scripts/pa-groups.R index 3c6ce2c..77b358b 100755 --- a/scripts/pa-groups.R +++ b/scripts/pa-groups.R @@ -1,33 +1,49 @@ #!/usr/bin/env Rscript library(ggplot2) args = commandArgs(trailingOnly=TRUE) -groups = read.csv(args[1],header=T) -data = read.csv(args[2]) -for (i in c(2:10)) { - name = names(groups)[i] - cols = c(2:15) - group = data[groups[i] == 1,cols] - freq = 100*colSums(group,na.rm=TRUE)/colSums(!is.na(group)) - algos = toupper(names(data)[cols]) - algos = gsub("HIGH",'HC',algos) - algos = gsub(".CONFIDENCE",'',algos) - algos = gsub("\\.",'-',algos) - algos <- factor(algos,levels=rev(c( - "MP2D-LAZAR-ALL", +freq = read.csv(args[1],header=T,quote="'",sep=",") + +models = factor(freq$Model,levels=rev(c( "MP2D-LAZAR-HC", + "MP2D-LAZAR-ALL", "MP2D-RF", - "MP2D-LR", - "MP2D-LR2", + "MP2D-LR-sgd", + "MP2D-LR-scikit", "MP2D-NN", "MP2D-SVM", - "CDK-LAZAR-ALL", "CDK-LAZAR-HC", + "CDK-LAZAR-ALL", "CDK-RF", - "CDK-LR", - "CDK-LR2", + "CDK-LR-sgd", + "CDK-LR-scikit", "CDK-NN", "CDK-SVM" - ))) - plot = ggplot(data.frame(freq),aes(x=freq,y=algos)) + geom_bar(stat="identity") + xlab("% mutagenic") + ylab(element_blank()) + xlim(c(0,100))# + theme(axis.text.x = element_text(angle=90)) - ggsave(paste("figures/",name,".png",sep="")) -} +))) + +colors <- c( +"MP2D-LAZAR-HC" = "#0072B2", +"MP2D-LAZAR-ALL" = "#56B4E9", +"MP2D-RF" = "#009E73", +"MP2D-LR-sgd" = "#F0E442", +"MP2D-LR-scikit" = "#D55E00", +"MP2D-NN" = "#CC79A7", +"MP2D-SVM" = "#E69F00", +"CDK-LAZAR-HC" = "#0072B2", +"CDK-LAZAR-ALL" = "#56B4E9", +"CDK-RF" = "#009E73", +"CDK-LR-sgd" = "#F0E442", +"CDK-LR-scikit" = "#D55E00", +"CDK-NN" = "#CC79A7", +"CDK-SVM" = "#E69F00" +) + +ggplot(freq,aes(Frequency,models,fill=models)) + + geom_bar(stat="identity",show.legend=F) + + xlab("% mutagenic") + + ylab(element_blank()) + + xlim(c(0,100)) + + scale_fill_manual(values = colors) + + facet_wrap(~PA.Group) + + theme_minimal() + + +ggsave(args[2]) |