diff options
author | Christoph Helma <helma@in-silico.ch> | 2021-02-17 23:11:49 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2021-02-17 23:11:49 +0100 |
commit | 9d7b4aaff715e731ba81bf131dfaa9de5a9d0fdd (patch) | |
tree | 7a84e5a702a1e0cb8d10c8512da822b2b8c9a5c1 /scripts/pa-summary.rb | |
parent | 3bfc5bfb57e8f130b1b3d9a90fc34744278ef6b5 (diff) |
cleanup, scripts adjusted, improved figures
Diffstat (limited to 'scripts/pa-summary.rb')
-rwxr-xr-x | scripts/pa-summary.rb | 208 |
1 files changed, 22 insertions, 186 deletions
diff --git a/scripts/pa-summary.rb b/scripts/pa-summary.rb index a8c8f19..9fb3d0e 100755 --- a/scripts/pa-summary.rb +++ b/scripts/pa-summary.rb @@ -1,196 +1,32 @@ #!/usr/bin/env ruby require 'yaml' -summary = { - :n => 0, - :lazar => { - :mp2d => { - :all => { - :n => 0, - :mut => 0, - :non_mut => 0 - }, - :high_confidence => { - :n => 0, - :mut => 0, - :non_mut => 0 - } - }, - :padel => { - :all => { - :n => 0, - :mut => 0, - :non_mut => 0 - }, - :high_confidence => { - :n => 0, - :mut => 0, - :non_mut => 0 - } - }, - }, - :r => { - :rf => { - :n => 0, - :mut => 0, - :non_mut => 0 - }, - :svm => { - :n => 0, - :mut => 0, - :non_mut => 0 - }, - :dl => { - :n => 0, - :mut => 0, - :non_mut => 0 - }, - }, - :tf => { - :rf => { - :n => 0, - :mut => 0, - :non_mut => 0 - }, - :lr_sgd => { - :n => 0, - :mut => 0, - :non_mut => 0 - }, - :lr_scikit => { - :n => 0, - :mut => 0, - :non_mut => 0 - }, - :nn => { - :n => 0, - :mut => 0, - :non_mut => 0 - }, - }, -} - -n = 0 -File.read(ARGV[0]).each_line do |l| - unless l.match("SMILES") - id,r,o,p,n,de,t,ma,mo,di,cid,smi,cansmi,exp,lazar_MP2D,lazar_MP2D_high_confidence,lazar_PaDEL,lazar_PaDEL_high_confidence,r_DL,r_RF,r_SVM,tf_lr_sgd,tf_lr_scikit,tf_NN,tf_RF = l.chomp.split(",") - #id,cid,smi,cansmi,exp,lazar_MP2D,lazar_MP2D_high_confidence,lazar_PaDEL,lazar_PaDEL_high_confidence,r_DL,r_RF,r_SVM,tf_lr_sgd,tf_lr_scikit,tf_NN,tf_RF = l.chomp.split(",") - - if lazar_MP2D == "1" - summary[:lazar][:mp2d][:all][:n] += 1 - summary[:lazar][:mp2d][:all][:mut] += 1 - if lazar_MP2D_high_confidence == "T" - summary[:lazar][:mp2d][:high_confidence][:n] += 1 - summary[:lazar][:mp2d][:high_confidence][:mut] += 1 - end - elsif lazar_MP2D == "0" - summary[:lazar][:mp2d][:all][:n] += 1 - summary[:lazar][:mp2d][:all][:non_mut] += 1 - if lazar_MP2D_high_confidence == "T" - summary[:lazar][:mp2d][:high_confidence][:n] += 1 - summary[:lazar][:mp2d][:high_confidence][:non_mut] += 1 - end - end - if lazar_PaDEL == "1" - summary[:lazar][:padel][:all][:n] += 1 - summary[:lazar][:padel][:all][:mut] += 1 - if lazar_PaDEL_high_confidence == "T" - summary[:lazar][:padel][:high_confidence][:n] += 1 - summary[:lazar][:padel][:high_confidence][:mut] += 1 +lines = File.readlines(ARGV[0]) +header = lines.shift.chomp.split(",") +summary = {:n => lines.size} +lines.each do |line| + items = line.chomp.split(",") + items.each_with_index do |v,i| + if header[i].match (/MP2D|CDK/) + key = header[i].gsub("-","_").downcase.to_sym + summary[key] ||= { :n => 0, :mut => 0, :non_mut => 0 } + case v + when "1" + summary[key][:n] += 1 + summary[key][:mut] += 1 + when "0" + summary[key][:n] += 1 + summary[key][:non_mut] += 1 end - elsif lazar_PaDEL == "0" - summary[:lazar][:padel][:all][:n] += 1 - summary[:lazar][:padel][:all][:non_mut] += 1 - if lazar_PaDEL_high_confidence == "T" - summary[:lazar][:padel][:high_confidence][:n] += 1 - summary[:lazar][:padel][:high_confidence][:non_mut] += 1 - end - end - if r_DL == "1" - summary[:r][:dl][:n] += 1 - summary[:r][:dl][:mut] += 1 - elsif r_DL == "0" - summary[:r][:dl][:n] += 1 - summary[:r][:dl][:non_mut] += 1 - end - if r_RF == "1" - summary[:r][:rf][:n] += 1 - summary[:r][:rf][:mut] += 1 - elsif r_RF == "0" - summary[:r][:rf][:n] += 1 - summary[:r][:rf][:non_mut] += 1 - end - if r_SVM == "1" - summary[:r][:svm][:n] += 1 - summary[:r][:svm][:mut] += 1 - elsif r_SVM == "0" - summary[:r][:svm][:n] += 1 - summary[:r][:svm][:non_mut] += 1 - end - if tf_lr_sgd == "1" - summary[:tf][:lr_sgd][:n] += 1 - summary[:tf][:lr_sgd][:mut] += 1 - elsif tf_lr_sgd == "0" - summary[:tf][:lr_sgd][:n] += 1 - summary[:tf][:lr_sgd][:non_mut] += 1 end - if tf_lr_scikit == "1" - summary[:tf][:lr_scikit][:n] += 1 - summary[:tf][:lr_scikit][:mut] += 1 - elsif tf_lr_scikit == "0" - summary[:tf][:lr_scikit][:n] += 1 - summary[:tf][:lr_scikit][:non_mut] += 1 - end - if tf_RF == "1" - summary[:tf][:rf][:n] += 1 - summary[:tf][:rf][:mut] += 1 - elsif tf_RF == "0" - summary[:tf][:rf][:n] += 1 - summary[:tf][:rf][:non_mut] += 1 - end - if tf_NN == "1" - summary[:tf][:nn][:n] += 1 - summary[:tf][:nn][:mut] += 1 - elsif tf_NN == "0" - summary[:tf][:nn][:n] += 1 - summary[:tf][:nn][:non_mut] += 1 - end - summary[:n] += 1 end end -summary[:lazar][:mp2d][:all][:n_perc] = (100.0*summary[:lazar][:mp2d][:all][:n]/summary[:n]).round -summary[:lazar][:mp2d][:all][:mut_perc] = (100.0*summary[:lazar][:mp2d][:all][:mut]/summary[:lazar][:mp2d][:all][:n]).round -summary[:lazar][:mp2d][:all][:non_mut_perc] = (100.0*summary[:lazar][:mp2d][:all][:non_mut]/summary[:lazar][:mp2d][:all][:n]).round -summary[:lazar][:mp2d][:high_confidence][:n_perc] = (100.0*summary[:lazar][:mp2d][:high_confidence][:n]/summary[:n]).round -if summary[:lazar][:mp2d][:high_confidence][:n] == 0 - summary[:lazar][:mp2d][:high_confidence][:mut_perc] = 0 - summary[:lazar][:mp2d][:high_confidence][:non_mut_perc] = 0 -else - summary[:lazar][:mp2d][:high_confidence][:mut_perc] = (100.0*summary[:lazar][:mp2d][:high_confidence][:mut]/summary[:lazar][:mp2d][:high_confidence][:n]).round - summary[:lazar][:mp2d][:high_confidence][:non_mut_perc] = (100.0*summary[:lazar][:mp2d][:high_confidence][:non_mut]/summary[:lazar][:mp2d][:high_confidence][:n]).round -end -summary[:lazar][:padel][:all][:n_perc] = (100.0*summary[:lazar][:padel][:all][:n]/summary[:n]).round -summary[:lazar][:padel][:all][:mut_perc] = (100.0*summary[:lazar][:padel][:all][:mut]/summary[:lazar][:padel][:all][:n]).round -summary[:lazar][:padel][:all][:non_mut_perc] = (100.0*summary[:lazar][:padel][:all][:non_mut]/summary[:lazar][:padel][:all][:n]).round -summary[:lazar][:padel][:high_confidence][:n_perc] = (100.0*summary[:lazar][:padel][:high_confidence][:n]/summary[:n]).round -if summary[:lazar][:padel][:high_confidence][:n] == 0 - summary[:lazar][:padel][:high_confidence][:mut_perc] = 0 - summary[:lazar][:padel][:high_confidence][:non_mut_perc] = 0 -else - summary[:lazar][:padel][:high_confidence][:mut_perc] = (100.0*summary[:lazar][:padel][:high_confidence][:mut]/summary[:lazar][:padel][:high_confidence][:n]).round - summary[:lazar][:padel][:high_confidence][:non_mut_perc] = (100.0*summary[:lazar][:padel][:high_confidence][:non_mut]/summary[:lazar][:padel][:high_confidence][:n]).round -end - -[:rf,:svm,:dl].each do |a| - summary[:r][a][:n_perc] = (100.0*summary[:r][a][:n]/summary[:n]).round - summary[:r][a][:mut_perc] = (100.0*summary[:r][a][:mut]/summary[:r][a][:n]).round - summary[:r][a][:non_mut_perc] = (100.0*summary[:r][a][:non_mut]/summary[:r][a][:n]).round -end - -[:rf,:lr_sgd,:lr_scikit,:nn].each do |a| - summary[:tf][a][:n_perc] = (100.0*summary[:tf][a][:n]/summary[:n]).round - summary[:tf][a][:mut_perc] = (100.0*summary[:tf][a][:mut]/summary[:tf][a][:n]).round - summary[:tf][a][:non_mut_perc] = (100.0*summary[:tf][a][:non_mut]/summary[:tf][a][:n]).round +summary.each do |k,a| + unless k == :n + a[:n_perc] = (100.0*a[:n]/summary[:n]).round + a[:mut_perc] = (100.0*a[:mut]/a[:n]).round + a[:non_mut_perc] = (100.0*a[:non_mut]/a[:n]).round + end end summary = {:pa => summary} puts summary.to_yaml |