summaryrefslogtreecommitdiff
path: root/scripts/pa-summary.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2021-02-17 23:11:49 +0100
committerChristoph Helma <helma@in-silico.ch>2021-02-17 23:11:49 +0100
commit9d7b4aaff715e731ba81bf131dfaa9de5a9d0fdd (patch)
tree7a84e5a702a1e0cb8d10c8512da822b2b8c9a5c1 /scripts/pa-summary.rb
parent3bfc5bfb57e8f130b1b3d9a90fc34744278ef6b5 (diff)
cleanup, scripts adjusted, improved figures
Diffstat (limited to 'scripts/pa-summary.rb')
-rwxr-xr-xscripts/pa-summary.rb208
1 files changed, 22 insertions, 186 deletions
diff --git a/scripts/pa-summary.rb b/scripts/pa-summary.rb
index a8c8f19..9fb3d0e 100755
--- a/scripts/pa-summary.rb
+++ b/scripts/pa-summary.rb
@@ -1,196 +1,32 @@
#!/usr/bin/env ruby
require 'yaml'
-summary = {
- :n => 0,
- :lazar => {
- :mp2d => {
- :all => {
- :n => 0,
- :mut => 0,
- :non_mut => 0
- },
- :high_confidence => {
- :n => 0,
- :mut => 0,
- :non_mut => 0
- }
- },
- :padel => {
- :all => {
- :n => 0,
- :mut => 0,
- :non_mut => 0
- },
- :high_confidence => {
- :n => 0,
- :mut => 0,
- :non_mut => 0
- }
- },
- },
- :r => {
- :rf => {
- :n => 0,
- :mut => 0,
- :non_mut => 0
- },
- :svm => {
- :n => 0,
- :mut => 0,
- :non_mut => 0
- },
- :dl => {
- :n => 0,
- :mut => 0,
- :non_mut => 0
- },
- },
- :tf => {
- :rf => {
- :n => 0,
- :mut => 0,
- :non_mut => 0
- },
- :lr_sgd => {
- :n => 0,
- :mut => 0,
- :non_mut => 0
- },
- :lr_scikit => {
- :n => 0,
- :mut => 0,
- :non_mut => 0
- },
- :nn => {
- :n => 0,
- :mut => 0,
- :non_mut => 0
- },
- },
-}
-
-n = 0
-File.read(ARGV[0]).each_line do |l|
- unless l.match("SMILES")
- id,r,o,p,n,de,t,ma,mo,di,cid,smi,cansmi,exp,lazar_MP2D,lazar_MP2D_high_confidence,lazar_PaDEL,lazar_PaDEL_high_confidence,r_DL,r_RF,r_SVM,tf_lr_sgd,tf_lr_scikit,tf_NN,tf_RF = l.chomp.split(",")
- #id,cid,smi,cansmi,exp,lazar_MP2D,lazar_MP2D_high_confidence,lazar_PaDEL,lazar_PaDEL_high_confidence,r_DL,r_RF,r_SVM,tf_lr_sgd,tf_lr_scikit,tf_NN,tf_RF = l.chomp.split(",")
-
- if lazar_MP2D == "1"
- summary[:lazar][:mp2d][:all][:n] += 1
- summary[:lazar][:mp2d][:all][:mut] += 1
- if lazar_MP2D_high_confidence == "T"
- summary[:lazar][:mp2d][:high_confidence][:n] += 1
- summary[:lazar][:mp2d][:high_confidence][:mut] += 1
- end
- elsif lazar_MP2D == "0"
- summary[:lazar][:mp2d][:all][:n] += 1
- summary[:lazar][:mp2d][:all][:non_mut] += 1
- if lazar_MP2D_high_confidence == "T"
- summary[:lazar][:mp2d][:high_confidence][:n] += 1
- summary[:lazar][:mp2d][:high_confidence][:non_mut] += 1
- end
- end
- if lazar_PaDEL == "1"
- summary[:lazar][:padel][:all][:n] += 1
- summary[:lazar][:padel][:all][:mut] += 1
- if lazar_PaDEL_high_confidence == "T"
- summary[:lazar][:padel][:high_confidence][:n] += 1
- summary[:lazar][:padel][:high_confidence][:mut] += 1
+lines = File.readlines(ARGV[0])
+header = lines.shift.chomp.split(",")
+summary = {:n => lines.size}
+lines.each do |line|
+ items = line.chomp.split(",")
+ items.each_with_index do |v,i|
+ if header[i].match (/MP2D|CDK/)
+ key = header[i].gsub("-","_").downcase.to_sym
+ summary[key] ||= { :n => 0, :mut => 0, :non_mut => 0 }
+ case v
+ when "1"
+ summary[key][:n] += 1
+ summary[key][:mut] += 1
+ when "0"
+ summary[key][:n] += 1
+ summary[key][:non_mut] += 1
end
- elsif lazar_PaDEL == "0"
- summary[:lazar][:padel][:all][:n] += 1
- summary[:lazar][:padel][:all][:non_mut] += 1
- if lazar_PaDEL_high_confidence == "T"
- summary[:lazar][:padel][:high_confidence][:n] += 1
- summary[:lazar][:padel][:high_confidence][:non_mut] += 1
- end
- end
- if r_DL == "1"
- summary[:r][:dl][:n] += 1
- summary[:r][:dl][:mut] += 1
- elsif r_DL == "0"
- summary[:r][:dl][:n] += 1
- summary[:r][:dl][:non_mut] += 1
- end
- if r_RF == "1"
- summary[:r][:rf][:n] += 1
- summary[:r][:rf][:mut] += 1
- elsif r_RF == "0"
- summary[:r][:rf][:n] += 1
- summary[:r][:rf][:non_mut] += 1
- end
- if r_SVM == "1"
- summary[:r][:svm][:n] += 1
- summary[:r][:svm][:mut] += 1
- elsif r_SVM == "0"
- summary[:r][:svm][:n] += 1
- summary[:r][:svm][:non_mut] += 1
- end
- if tf_lr_sgd == "1"
- summary[:tf][:lr_sgd][:n] += 1
- summary[:tf][:lr_sgd][:mut] += 1
- elsif tf_lr_sgd == "0"
- summary[:tf][:lr_sgd][:n] += 1
- summary[:tf][:lr_sgd][:non_mut] += 1
end
- if tf_lr_scikit == "1"
- summary[:tf][:lr_scikit][:n] += 1
- summary[:tf][:lr_scikit][:mut] += 1
- elsif tf_lr_scikit == "0"
- summary[:tf][:lr_scikit][:n] += 1
- summary[:tf][:lr_scikit][:non_mut] += 1
- end
- if tf_RF == "1"
- summary[:tf][:rf][:n] += 1
- summary[:tf][:rf][:mut] += 1
- elsif tf_RF == "0"
- summary[:tf][:rf][:n] += 1
- summary[:tf][:rf][:non_mut] += 1
- end
- if tf_NN == "1"
- summary[:tf][:nn][:n] += 1
- summary[:tf][:nn][:mut] += 1
- elsif tf_NN == "0"
- summary[:tf][:nn][:n] += 1
- summary[:tf][:nn][:non_mut] += 1
- end
- summary[:n] += 1
end
end
-summary[:lazar][:mp2d][:all][:n_perc] = (100.0*summary[:lazar][:mp2d][:all][:n]/summary[:n]).round
-summary[:lazar][:mp2d][:all][:mut_perc] = (100.0*summary[:lazar][:mp2d][:all][:mut]/summary[:lazar][:mp2d][:all][:n]).round
-summary[:lazar][:mp2d][:all][:non_mut_perc] = (100.0*summary[:lazar][:mp2d][:all][:non_mut]/summary[:lazar][:mp2d][:all][:n]).round
-summary[:lazar][:mp2d][:high_confidence][:n_perc] = (100.0*summary[:lazar][:mp2d][:high_confidence][:n]/summary[:n]).round
-if summary[:lazar][:mp2d][:high_confidence][:n] == 0
- summary[:lazar][:mp2d][:high_confidence][:mut_perc] = 0
- summary[:lazar][:mp2d][:high_confidence][:non_mut_perc] = 0
-else
- summary[:lazar][:mp2d][:high_confidence][:mut_perc] = (100.0*summary[:lazar][:mp2d][:high_confidence][:mut]/summary[:lazar][:mp2d][:high_confidence][:n]).round
- summary[:lazar][:mp2d][:high_confidence][:non_mut_perc] = (100.0*summary[:lazar][:mp2d][:high_confidence][:non_mut]/summary[:lazar][:mp2d][:high_confidence][:n]).round
-end
-summary[:lazar][:padel][:all][:n_perc] = (100.0*summary[:lazar][:padel][:all][:n]/summary[:n]).round
-summary[:lazar][:padel][:all][:mut_perc] = (100.0*summary[:lazar][:padel][:all][:mut]/summary[:lazar][:padel][:all][:n]).round
-summary[:lazar][:padel][:all][:non_mut_perc] = (100.0*summary[:lazar][:padel][:all][:non_mut]/summary[:lazar][:padel][:all][:n]).round
-summary[:lazar][:padel][:high_confidence][:n_perc] = (100.0*summary[:lazar][:padel][:high_confidence][:n]/summary[:n]).round
-if summary[:lazar][:padel][:high_confidence][:n] == 0
- summary[:lazar][:padel][:high_confidence][:mut_perc] = 0
- summary[:lazar][:padel][:high_confidence][:non_mut_perc] = 0
-else
- summary[:lazar][:padel][:high_confidence][:mut_perc] = (100.0*summary[:lazar][:padel][:high_confidence][:mut]/summary[:lazar][:padel][:high_confidence][:n]).round
- summary[:lazar][:padel][:high_confidence][:non_mut_perc] = (100.0*summary[:lazar][:padel][:high_confidence][:non_mut]/summary[:lazar][:padel][:high_confidence][:n]).round
-end
-
-[:rf,:svm,:dl].each do |a|
- summary[:r][a][:n_perc] = (100.0*summary[:r][a][:n]/summary[:n]).round
- summary[:r][a][:mut_perc] = (100.0*summary[:r][a][:mut]/summary[:r][a][:n]).round
- summary[:r][a][:non_mut_perc] = (100.0*summary[:r][a][:non_mut]/summary[:r][a][:n]).round
-end
-
-[:rf,:lr_sgd,:lr_scikit,:nn].each do |a|
- summary[:tf][a][:n_perc] = (100.0*summary[:tf][a][:n]/summary[:n]).round
- summary[:tf][a][:mut_perc] = (100.0*summary[:tf][a][:mut]/summary[:tf][a][:n]).round
- summary[:tf][a][:non_mut_perc] = (100.0*summary[:tf][a][:non_mut]/summary[:tf][a][:n]).round
+summary.each do |k,a|
+ unless k == :n
+ a[:n_perc] = (100.0*a[:n]/summary[:n]).round
+ a[:mut_perc] = (100.0*a[:mut]/a[:n]).round
+ a[:non_mut_perc] = (100.0*a[:non_mut]/a[:n]).round
+ end
end
summary = {:pa => summary}
puts summary.to_yaml