summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2020-09-30 14:44:23 +0200
committerChristoph Helma <helma@in-silico.ch>2020-09-30 14:44:23 +0200
commitfc6710ba085990f204fbb0e5c2d686f73811dead (patch)
tree2fdf4d7a17eba2fa9326b663f4ed7b74a76bfc29 /scripts
parent70a221983448a818535239704fdd4771c151957d (diff)
padel tsne diagram
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/mp2d-tsne.R12
-rwxr-xr-xscripts/padel-descriptors.rb41
-rwxr-xr-xscripts/padel-tsne.R15
3 files changed, 63 insertions, 5 deletions
diff --git a/scripts/mp2d-tsne.R b/scripts/mp2d-tsne.R
index 853b408..0877622 100755
--- a/scripts/mp2d-tsne.R
+++ b/scripts/mp2d-tsne.R
@@ -7,8 +7,10 @@ data$Mutagenicity <- NULL
m <- as.matrix(data)
dist <- as.dist(m)
tsne <- Rtsne(dist,is_distance=T)
-tsne_plot <- data.frame(x = tsne$Y[,1], y = tsne$Y[,2])
-colors <- c("PA" = "#00BFC4", "mutagen" = "#F8766D", "non-mutagen" = "#7CAE00")
-plot <- ggplot(tsne_plot)
-plot + geom_point(aes(x=x, y=y, color = labels)) + xlab(element_blank()) + ylab(element_blank()) + theme(axis.ticks = element_blank(), axis.text = element_blank(), legend.title=element_blank()) + scale_color_manual(values = colors)
-ggsave("figures/tsne-mp2d.png")
+#write.csv(tsne,"tsne.csv")
+write.csv(data.frame(x = tsne$Y[,1], y = tsne$Y[,2]),"tsne.csv")
+#tsne_plot <- data.frame(x = tsne$Y[,1], y = tsne$Y[,2])
+#colors <- c("PA" = "#00BFC4", "mutagen" = "#F8766D", "non-mutagen" = "#7CAE00")
+#plot <- ggplot(tsne_plot)
+#plot + geom_point(aes(x=x, y=y, color = labels)) + xlab(element_blank()) + ylab(element_blank()) + theme(axis.ticks = element_blank(), axis.text = element_blank(), legend.title=element_blank()) + scale_color_manual(values = colors)
+#ggsave("figures/tsne-mp2d.png")
diff --git a/scripts/padel-descriptors.rb b/scripts/padel-descriptors.rb
new file mode 100755
index 0000000..691137f
--- /dev/null
+++ b/scripts/padel-descriptors.rb
@@ -0,0 +1,41 @@
+#!/usr/bin/env ruby
+train = File.readlines(ARGV[0])
+pa = File.readlines(ARGV[1])
+train_header = train.shift.chomp.split(",").collect{|i| i.gsub('"','')}
+pa_header = pa.shift.chomp.split(";")
+train_header.shift
+train_header.pop
+pa_header.shift
+
+#train_only = train_header - pa_header
+#pa_only = pa_header - train_header
+#puts train_only.size.to_s+ " training set descriptors missing from PAs:"
+#puts train_only.join(",")
+#puts
+#puts pa_only.size.to_s+ " PA descriptors not in training set:"
+#puts pa_only.join(",")
+#exit
+
+common = train_header & pa_header
+
+puts (["Mutagenicity"]+common).join(",")
+train.each do |line|
+ items = line.chomp.split ","
+ id = items.shift
+ id = "TRAIN"+id.gsub('"','')
+ act = items.pop
+ act == '"1"' ? act = "mutagen" : act = "non-mutagen"
+ descriptors = {}
+ items.each_with_index {|item,i| descriptors[train_header[i]] = item.sub(',','.').to_f }
+ puts ([id,act]+common.collect{|h| descriptors[h]}).join(",")
+end
+
+
+pa.each do |line|
+ items = line.chomp.split ";"
+ id = "PA"+items.shift
+ act = "PA"
+ descriptors = {}
+ items.each_with_index {|item,i| descriptors[pa_header[i]] = item.sub(',','.').to_f }
+ puts ([id,act]+common.collect{|h| descriptors[h]}).join(",")
+end
diff --git a/scripts/padel-tsne.R b/scripts/padel-tsne.R
new file mode 100755
index 0000000..b8e9763
--- /dev/null
+++ b/scripts/padel-tsne.R
@@ -0,0 +1,15 @@
+#!/usr/bin/env Rscript
+library(Rtsne)
+library(ggplot2)
+data <- read.csv("figures/tsne-padel.csv")
+labels <- data$Mutagenicity
+data$Mutagenicity <- NULL
+m <- as.matrix(data)
+tsne <- Rtsne(m,verbose=T,check_duplicates=F)
+#write.csv(tsne,"tsne.csv")
+write.csv(data.frame(x = tsne$Y[,1], y = tsne$Y[,2]),"padel-tsne.csv")
+tsne_plot <- data.frame(x = tsne$Y[,1], y = tsne$Y[,2])
+colors <- c("PA" = "#00BFC4", "mutagen" = "#F8766D", "non-mutagen" = "#7CAE00")
+plot <- ggplot(tsne_plot)
+plot + geom_point(aes(x=x, y=y, color = labels)) + xlab(element_blank()) + ylab(element_blank()) + theme(axis.ticks = element_blank(), axis.text = element_blank(), legend.title=element_blank()) + scale_color_manual(values = colors)
+ggsave("figures/tsne-padel.png")