summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2020-10-19 22:01:36 +0200
committerChristoph Helma <helma@in-silico.ch>2020-10-19 22:01:36 +0200
commit73f236606451b52a483a073dc43f786bc9f63d9a (patch)
tree6a3058c890d2a3e0e92d57727902a488858877c3 /scripts
parentf1a35fa30d0f416acd9e0d28255f1939823f4e34 (diff)
lazar-padel pa predictions fixed
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/pa-table.rb88
-rwxr-xr-xscripts/pa-tex-table.rb52
2 files changed, 78 insertions, 62 deletions
diff --git a/scripts/pa-table.rb b/scripts/pa-table.rb
index d808fa1..e9c8902 100755
--- a/scripts/pa-table.rb
+++ b/scripts/pa-table.rb
@@ -1,98 +1,62 @@
#!/usr/bin/env ruby
-puts '
-\definecolor{red}{rgb}{1,0,0}
-\definecolor{lightred}{rgb}{0.5,0,0}
-\definecolor{green}{rgb}{0,1,0}
-\definecolor{lightgreen}{rgb}{0,0.5,0}
-\definecolor{grey}{rgb}{0.5,0.5,0.5}
-\tiny
-\begin{longtable}{rcccccccccc}
-\caption{Summary of pyrrolizidine alkaloid predictions: red: mutagen, green: non-mutagen, grey: no prediction, dark red/green: low confidence} \\\\
-\label{tab:pa}
- & & \multicolumn{2}{c}{lazar} & \multicolumn{3}{c}{R} & \multicolumn{4}{c}{Tensorflow}\\\\
-'
-#header = ["ID","measured","lazar"]
-header = ["ID","Measured","MP2D","PaDEL"]
+header = ["ID","CID","Name","SMILES","Canonical SMILES","Measured","lazar-MP2D","lazar-MP2D-high-confidence","lazar-PaDEL","lazar-PaDEL-high-confidence"]
tab = []
-=begin
+i = 0
File.read("pyrrolizidine-alkaloids/180920_PA_complete_SMILES.csv").each_line do |l|
- unless l.match("SMILES")
- #tab << '\verb '+l.chomp.split(";")[2].sub('1: ','')#+'}'
- tab << l.chomp.split(";")[0]
+ if i > 0
+ id,cid,name,smi = l.chomp.split(";")
+ name.sub!('1: ','')
+ tab << [id,cid,'"'+name+'"','"'+smi+'"']
end
+ i += 1
end
i = 0
-=end
File.read("pyrrolizidine-alkaloids/lazar/pa-mp2d-predictions.csv").each_line do |l|
- unless l.match("SMILES")
- id,smi,exp,mut,p0,p1,max_sim,nn = l.chomp.split(",")
- row = id
- if exp == "1"
- row += ' & \cellcolor{red} '
- elsif exp == "0"
- row += ' & \cellcolor{green} '
- else
- row += ' & \cellcolor{grey} '
- end
- if mut == "1"
- max_sim.to_f < 0.5 ? row += '& \cellcolor{lightred} ' : row += '& \cellcolor{red} '
- elsif mut == "0"
- max_sim.to_f < 0.5 ? row += '& \cellcolor{lightgreen} ' : row += '& \cellcolor{green} '
- else #if mut.nil? or mut.empty?
- row += '& \cellcolor{grey} '
- end
- tab << row
- #i += 1
+ if i > 0
+ id,cansmi,exp,mut,p0,p1,max_sim,nn = l.chomp.split(",")
+ max_sim.to_f < 0.5? hc = "F" : hc = "T"
+ hc = "" if mut.empty?
+ tab[i-1] += ['"'+cansmi+'"',exp,mut,hc]
end
+ i += 1
end
i=0
File.read("pyrrolizidine-alkaloids/lazar/pa-padel-predictions.csv").each_line do |l|
- smi,exp,mut,p0,p1,max_sim,nn = l.chomp.split(",")
- #p mut
- if mut == "1"
- max_sim.to_f < 0.5 ? tab[i] += '& \cellcolor{lightred} ' : tab[i] += '& \cellcolor{red} '
- elsif mut == "0"
- max_sim.to_f < 0.5 ? tab[i] += '& \cellcolor{lightgreen} ' : tab[i] += '& \cellcolor{green} '
- else #if mut.nil? or mut.empty?
- tab[i] += '& \cellcolor{grey} '
- end
+ #if i > 0
+ cansmi,exp,mut,p0,p1,max_sim,nn = l.chomp.split(",")
+ max_sim.to_f < 0.5? hc = "F" : hc = "T"
+ hc = "" if mut.empty?
+ tab[i] += [mut,hc]
+ #end
i += 1
end
Dir["pyrrolizidine-alkaloids/R/PA.*.outcome.csv"].each do |r|
- header << r.sub('pyrrolizidine-alkaloids/R/PA.','').sub('.outcome.csv','')
+ header << "R-"+r.sub('pyrrolizidine-alkaloids/R/PA.','').sub('.outcome.csv','')
i = 0
File.read(r).each_line do |l|
if i > 0
items = l.chomp.split(";")
items.shift
if items.uniq.include? "1"
- tab[i-1] << ' & \cellcolor{red}'
+ tab[i-1] << 1
elsif items.uniq.include? "0"
- tab[i-1] << ' & \cellcolor{green}'
+ tab[i-1] << 0
end
end
i += 1
end
end
Dir["pyrrolizidine-alkaloids/tensorflow/pred.*.v3-ext-Padel-2D.csv"].each do |r|
- header << r.sub('pyrrolizidine-alkaloids/tensorflow/pred.','').sub('.v3-ext-Padel-2D.csv','').sub("lr2","LR-scikit").sub("lr","LR-sgd").sub("rf","RF").sub("nn","NN")
+ header << "TF-"+r.sub('pyrrolizidine-alkaloids/tensorflow/pred.','').sub('.v3-ext-Padel-2D.csv','').sub("lr2","LR-scikit").sub("lr","LR-sgd").sub("rf","RF").sub("nn","NN")
i = 0
File.read(r).each_line do |l|
if i > 0
id,pred = l.chomp.split(",")
- pred.to_f > 0.5 ? tab[i-1] << ' & \cellcolor{red}' : tab[i-1] << ' & \cellcolor{green}'
+ pred.to_f > 0.5 ? tab[i-1] << 1 : tab[i-1] << 0
end
i += 1
end
end
-#tab.collect!{|t| t + '\cellcolor{grey} \\\\'}
-puts header.join(" & ")+" \\\\"
-puts '\hline'
-puts '\renewcommand{\arraystretch}{0.075}'
-tab.collect!{|t| t + ''}
-print tab.join(" \\\\ \n")
-puts ' \\\\
-\end{longtable}
-\normalsize
-'
+puts header.join(",")
+puts tab.collect{|r| r.join(",")}.join("\n")
diff --git a/scripts/pa-tex-table.rb b/scripts/pa-tex-table.rb
new file mode 100755
index 0000000..06f7b26
--- /dev/null
+++ b/scripts/pa-tex-table.rb
@@ -0,0 +1,52 @@
+#!/usr/bin/env ruby
+
+puts '
+\definecolor{red}{rgb}{1,0,0}
+\definecolor{lightred}{rgb}{0.5,0,0}
+\definecolor{green}{rgb}{0,1,0}
+\definecolor{lightgreen}{rgb}{0,0.5,0}
+\definecolor{grey}{rgb}{0.5,0.5,0.5}
+\tiny
+\begin{longtable}{rcccccccccc}
+\caption{Summary of pyrrolizidine alkaloid predictions: red: mutagen, green: non-mutagen, grey: no prediction, dark red/green: low confidence} \\\\
+\label{tab:pa}
+ & & \multicolumn{2}{c}{lazar} & \multicolumn{3}{c}{R} & \multicolumn{4}{c}{Tensorflow}\\\\
+ID & Measured & MP2D & PaDEL & DL & RF & SVM & LR-sgd & LR-scikit & NN & RF \\\\
+\hline
+\renewcommand{\arraystretch}{0.075}
+'
+File.read(ARGV[0]).each_line do |l|
+ unless l.match("SMILES")
+ id,cid,name,smi,cansmi,exp,lazar_MP2D,lazar_MP2D_high_confidence,lazar_PaDEL,lazar_PaDEL_high_confidence,r_DL,r_RF,r_SVM,tf_lr_sgd,tf_lr_scikit,tf_NN,tf_RF = l.chomp.split(",")
+ row = id
+ if exp == "1"
+ row += ' & \cellcolor{red}'
+ elsif exp == "0"
+ row += ' & \cellcolor{green}'
+ else
+ row += ' & \cellcolor{grey}'
+ end
+ if lazar_MP2D == "1"
+ lazar_MP2D_high_confidence == "F" ? row += ' & \cellcolor{lightred}' : row += ' & \cellcolor{red}'
+ elsif lazar_MP2D == "0"
+ lazar_MP2D_high_confidence == "F" ? row += ' & \cellcolor{lightgreen}' : row += ' & \cellcolor{green}'
+ else
+ row += '& \cellcolor{grey} '
+ end
+ if lazar_PaDEL == "1"
+ lazar_PaDEL_high_confidence == "F" ? row += ' & \cellcolor{lightred}' : row += ' & \cellcolor{red}'
+ elsif lazar_PaDEL == "0"
+ lazar_PaDEL_high_confidence == "F" ? row += ' & \cellcolor{lightgreen}' : row += ' & \cellcolor{green}'
+ else
+ row += ' & \cellcolor{grey}'
+ end
+ [r_DL,r_RF,r_SVM,tf_lr_sgd,tf_lr_scikit,tf_NN,tf_RF].each do |mut|
+ mut == "1" ? row += ' & \cellcolor{red}' : row += ' & \cellcolor{green}'
+ end
+ puts row + ' \\\\'
+ end
+end
+puts '
+\end{longtable}
+\normalsize
+'