diff options
author | Christoph Helma <helma@in-silico.ch> | 2020-09-30 14:44:23 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2020-09-30 14:44:23 +0200 |
commit | fc6710ba085990f204fbb0e5c2d686f73811dead (patch) | |
tree | 2fdf4d7a17eba2fa9326b663f4ed7b74a76bfc29 /scripts | |
parent | 70a221983448a818535239704fdd4771c151957d (diff) |
padel tsne diagram
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/mp2d-tsne.R | 12 | ||||
-rwxr-xr-x | scripts/padel-descriptors.rb | 41 | ||||
-rwxr-xr-x | scripts/padel-tsne.R | 15 |
3 files changed, 63 insertions, 5 deletions
diff --git a/scripts/mp2d-tsne.R b/scripts/mp2d-tsne.R index 853b408..0877622 100755 --- a/scripts/mp2d-tsne.R +++ b/scripts/mp2d-tsne.R @@ -7,8 +7,10 @@ data$Mutagenicity <- NULL m <- as.matrix(data) dist <- as.dist(m) tsne <- Rtsne(dist,is_distance=T) -tsne_plot <- data.frame(x = tsne$Y[,1], y = tsne$Y[,2]) -colors <- c("PA" = "#00BFC4", "mutagen" = "#F8766D", "non-mutagen" = "#7CAE00") -plot <- ggplot(tsne_plot) -plot + geom_point(aes(x=x, y=y, color = labels)) + xlab(element_blank()) + ylab(element_blank()) + theme(axis.ticks = element_blank(), axis.text = element_blank(), legend.title=element_blank()) + scale_color_manual(values = colors) -ggsave("figures/tsne-mp2d.png") +#write.csv(tsne,"tsne.csv") +write.csv(data.frame(x = tsne$Y[,1], y = tsne$Y[,2]),"tsne.csv") +#tsne_plot <- data.frame(x = tsne$Y[,1], y = tsne$Y[,2]) +#colors <- c("PA" = "#00BFC4", "mutagen" = "#F8766D", "non-mutagen" = "#7CAE00") +#plot <- ggplot(tsne_plot) +#plot + geom_point(aes(x=x, y=y, color = labels)) + xlab(element_blank()) + ylab(element_blank()) + theme(axis.ticks = element_blank(), axis.text = element_blank(), legend.title=element_blank()) + scale_color_manual(values = colors) +#ggsave("figures/tsne-mp2d.png") diff --git a/scripts/padel-descriptors.rb b/scripts/padel-descriptors.rb new file mode 100755 index 0000000..691137f --- /dev/null +++ b/scripts/padel-descriptors.rb @@ -0,0 +1,41 @@ +#!/usr/bin/env ruby +train = File.readlines(ARGV[0]) +pa = File.readlines(ARGV[1]) +train_header = train.shift.chomp.split(",").collect{|i| i.gsub('"','')} +pa_header = pa.shift.chomp.split(";") +train_header.shift +train_header.pop +pa_header.shift + +#train_only = train_header - pa_header +#pa_only = pa_header - train_header +#puts train_only.size.to_s+ " training set descriptors missing from PAs:" +#puts train_only.join(",") +#puts +#puts pa_only.size.to_s+ " PA descriptors not in training set:" +#puts pa_only.join(",") +#exit + +common = train_header & pa_header + +puts (["Mutagenicity"]+common).join(",") +train.each do |line| + items = line.chomp.split "," + id = items.shift + id = "TRAIN"+id.gsub('"','') + act = items.pop + act == '"1"' ? act = "mutagen" : act = "non-mutagen" + descriptors = {} + items.each_with_index {|item,i| descriptors[train_header[i]] = item.sub(',','.').to_f } + puts ([id,act]+common.collect{|h| descriptors[h]}).join(",") +end + + +pa.each do |line| + items = line.chomp.split ";" + id = "PA"+items.shift + act = "PA" + descriptors = {} + items.each_with_index {|item,i| descriptors[pa_header[i]] = item.sub(',','.').to_f } + puts ([id,act]+common.collect{|h| descriptors[h]}).join(",") +end diff --git a/scripts/padel-tsne.R b/scripts/padel-tsne.R new file mode 100755 index 0000000..b8e9763 --- /dev/null +++ b/scripts/padel-tsne.R @@ -0,0 +1,15 @@ +#!/usr/bin/env Rscript +library(Rtsne) +library(ggplot2) +data <- read.csv("figures/tsne-padel.csv") +labels <- data$Mutagenicity +data$Mutagenicity <- NULL +m <- as.matrix(data) +tsne <- Rtsne(m,verbose=T,check_duplicates=F) +#write.csv(tsne,"tsne.csv") +write.csv(data.frame(x = tsne$Y[,1], y = tsne$Y[,2]),"padel-tsne.csv") +tsne_plot <- data.frame(x = tsne$Y[,1], y = tsne$Y[,2]) +colors <- c("PA" = "#00BFC4", "mutagen" = "#F8766D", "non-mutagen" = "#7CAE00") +plot <- ggplot(tsne_plot) +plot + geom_point(aes(x=x, y=y, color = labels)) + xlab(element_blank()) + ylab(element_blank()) + theme(axis.ticks = element_blank(), axis.text = element_blank(), legend.title=element_blank()) + scale_color_manual(values = colors) +ggsave("figures/tsne-padel.png") |