diff options
author | Christoph Helma <helma@in-silico.ch> | 2020-10-20 20:42:54 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2020-10-20 20:42:54 +0200 |
commit | 0b686f924a42105f2516aea44c27b6d3f75e1672 (patch) | |
tree | 2572b8f713200a59ea5993320fd133db6f712246 /scripts | |
parent | 2c3bc133700f7e1e1ea8d038d87da1f3095ed103 (diff) |
Summary table of PA predictions
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/pa-summary-table.rb | 18 | ||||
-rwxr-xr-x | scripts/pa-summary.rb | 190 |
2 files changed, 208 insertions, 0 deletions
diff --git a/scripts/pa-summary-table.rb b/scripts/pa-summary-table.rb new file mode 100755 index 0000000..48546bd --- /dev/null +++ b/scripts/pa-summary-table.rb @@ -0,0 +1,18 @@ +#!/usr/bin/env ruby +require 'yaml' +data = YAML.load_file(ARGV[0]) +puts "Model,Nr.predictions,mutagenic,non-mutagenic" +puts "lazar-MP2D (all),#{data[:pa][:lazar][:mp2d][:all][:n]} (#{data[:pa][:lazar][:mp2d][:all][:n_perc]} %),#{data[:pa][:lazar][:mp2d][:all][:mut]} (#{data[:pa][:lazar][:mp2d][:all][:mut_perc]} %),#{data[:pa][:lazar][:mp2d][:all][:non_mut]} (#{data[:pa][:lazar][:mp2d][:all][:non_mut_perc]} %)" +puts "lazar-MP2D (high-confidence),#{data[:pa][:lazar][:mp2d][:high_confidence][:n]} (#{data[:pa][:lazar][:mp2d][:high_confidence][:n_perc]} %),#{data[:pa][:lazar][:mp2d][:high_confidence][:mut]} (#{data[:pa][:lazar][:mp2d][:high_confidence][:mut_perc]} %),#{data[:pa][:lazar][:mp2d][:high_confidence][:non_mut]} (#{data[:pa][:lazar][:mp2d][:high_confidence][:non_mut_perc]} %)" + +puts "lazar-PaDEL (all),#{data[:pa][:lazar][:padel][:all][:n]} (#{data[:pa][:lazar][:padel][:all][:n_perc]} %),#{data[:pa][:lazar][:padel][:all][:mut]} (#{data[:pa][:lazar][:padel][:all][:mut_perc]} %),#{data[:pa][:lazar][:padel][:all][:non_mut]} (#{data[:pa][:lazar][:padel][:all][:non_mut_perc]} %)" +puts "lazar-PaDEL (high-confidence),#{data[:pa][:lazar][:padel][:high_confidence][:n]} (#{data[:pa][:lazar][:padel][:high_confidence][:n_perc]} %),#{data[:pa][:lazar][:padel][:high_confidence][:mut]} (#{data[:pa][:lazar][:padel][:high_confidence][:mut_perc]} %),#{data[:pa][:lazar][:padel][:high_confidence][:non_mut]} (#{data[:pa][:lazar][:padel][:high_confidence][:non_mut_perc]} %)" + +puts "R-RF,#{data[:pa][:r][:rf][:n]} (#{data[:pa][:r][:rf][:n_perc]} %),#{data[:pa][:r][:rf][:mut]} (#{data[:pa][:r][:rf][:mut_perc]} %),#{data[:pa][:r][:rf][:non_mut]} (#{data[:pa][:r][:rf][:non_mut_perc]} %)" +puts "R-SVM,#{data[:pa][:r][:svm][:n]} (#{data[:pa][:r][:svm][:n_perc]} %),#{data[:pa][:r][:svm][:mut]} (#{data[:pa][:r][:svm][:mut_perc]} %),#{data[:pa][:r][:svm][:non_mut]} (#{data[:pa][:r][:svm][:non_mut_perc]} %)" +puts "R-DL,#{data[:pa][:r][:dl][:n]} (#{data[:pa][:r][:dl][:n_perc]} %),#{data[:pa][:r][:dl][:mut]} (#{data[:pa][:r][:dl][:mut_perc]} %),#{data[:pa][:r][:dl][:non_mut]} (#{data[:pa][:r][:dl][:non_mut_perc]} %)" + +puts "Tensorflow-RF,#{data[:pa][:tf][:rf][:n]} (#{data[:pa][:tf][:rf][:n_perc]} %),#{data[:pa][:tf][:rf][:mut]} (#{data[:pa][:tf][:rf][:mut_perc]} %),#{data[:pa][:tf][:rf][:non_mut]} (#{data[:pa][:tf][:rf][:non_mut_perc]} %)" +puts "Tensorflow-LR-sgd,#{data[:pa][:tf][:lr_sgd][:n]} (#{data[:pa][:tf][:lr_sgd][:n_perc]} %),#{data[:pa][:tf][:lr_sgd][:mut]} (#{data[:pa][:tf][:lr_sgd][:mut_perc]} %),#{data[:pa][:tf][:lr_sgd][:non_mut]} (#{data[:pa][:tf][:lr_sgd][:non_mut_perc]} %)" +puts "Tensorflow-LR-scikit,#{data[:pa][:tf][:lr_scikit][:n]} (#{data[:pa][:tf][:lr_scikit][:n_perc]} %),#{data[:pa][:tf][:lr_scikit][:mut]} (#{data[:pa][:tf][:lr_scikit][:mut_perc]} %),#{data[:pa][:tf][:lr_scikit][:non_mut]} (#{data[:pa][:tf][:lr_scikit][:non_mut_perc]} %)" +puts "Tensorflow-NN,#{data[:pa][:tf][:nn][:n]} (#{data[:pa][:tf][:nn][:n_perc]} %),#{data[:pa][:tf][:nn][:mut]} (#{data[:pa][:tf][:nn][:mut_perc]} %),#{data[:pa][:tf][:nn][:non_mut]} (#{data[:pa][:tf][:nn][:non_mut_perc]} %)" diff --git a/scripts/pa-summary.rb b/scripts/pa-summary.rb new file mode 100755 index 0000000..0715a6c --- /dev/null +++ b/scripts/pa-summary.rb @@ -0,0 +1,190 @@ +#!/usr/bin/env ruby +require 'yaml' + +summary = { + :n => 0, + :lazar => { + :mp2d => { + :all => { + :n => 0, + :mut => 0, + :non_mut => 0 + }, + :high_confidence => { + :n => 0, + :mut => 0, + :non_mut => 0 + } + }, + :padel => { + :all => { + :n => 0, + :mut => 0, + :non_mut => 0 + }, + :high_confidence => { + :n => 0, + :mut => 0, + :non_mut => 0 + } + }, + }, + :r => { + :rf => { + :n => 0, + :mut => 0, + :non_mut => 0 + }, + :svm => { + :n => 0, + :mut => 0, + :non_mut => 0 + }, + :dl => { + :n => 0, + :mut => 0, + :non_mut => 0 + }, + }, + :tf => { + :rf => { + :n => 0, + :mut => 0, + :non_mut => 0 + }, + :lr_sgd => { + :n => 0, + :mut => 0, + :non_mut => 0 + }, + :lr_scikit => { + :n => 0, + :mut => 0, + :non_mut => 0 + }, + :nn => { + :n => 0, + :mut => 0, + :non_mut => 0 + }, + }, +} + +n = 0 +File.read(ARGV[0]).each_line do |l| + unless l.match("SMILES") + id,cid,smi,cansmi,exp,lazar_MP2D,lazar_MP2D_high_confidence,lazar_PaDEL,lazar_PaDEL_high_confidence,r_DL,r_RF,r_SVM,tf_lr_sgd,tf_lr_scikit,tf_NN,tf_RF = l.chomp.split(",") + + if lazar_MP2D == "1" + summary[:lazar][:mp2d][:all][:n] += 1 + summary[:lazar][:mp2d][:all][:mut] += 1 + if lazar_MP2D_high_confidence == "T" + summary[:lazar][:mp2d][:high_confidence][:n] += 1 + summary[:lazar][:mp2d][:high_confidence][:mut] += 1 + end + elsif lazar_MP2D == "0" + summary[:lazar][:mp2d][:all][:n] += 1 + summary[:lazar][:mp2d][:all][:non_mut] += 1 + if lazar_MP2D_high_confidence == "T" + summary[:lazar][:mp2d][:high_confidence][:n] += 1 + summary[:lazar][:mp2d][:high_confidence][:non_mut] += 1 + end + end + if lazar_PaDEL == "1" + summary[:lazar][:padel][:all][:n] += 1 + summary[:lazar][:padel][:all][:mut] += 1 + if lazar_PaDEL_high_confidence == "T" + summary[:lazar][:padel][:high_confidence][:n] += 1 + summary[:lazar][:padel][:high_confidence][:mut] += 1 + end + elsif lazar_PaDEL == "0" + summary[:lazar][:padel][:all][:n] += 1 + summary[:lazar][:padel][:all][:non_mut] += 1 + if lazar_PaDEL_high_confidence == "T" + summary[:lazar][:padel][:high_confidence][:n] += 1 + summary[:lazar][:padel][:high_confidence][:non_mut] += 1 + end + end + if r_DL == "1" + summary[:r][:dl][:n] += 1 + summary[:r][:dl][:mut] += 1 + elsif r_DL == "0" + summary[:r][:dl][:n] += 1 + summary[:r][:dl][:non_mut] += 1 + end + if r_RF == "1" + summary[:r][:rf][:n] += 1 + summary[:r][:rf][:mut] += 1 + elsif r_RF == "0" + summary[:r][:rf][:n] += 1 + summary[:r][:rf][:non_mut] += 1 + end + if r_SVM == "1" + summary[:r][:svm][:n] += 1 + summary[:r][:svm][:mut] += 1 + elsif r_SVM == "0" + summary[:r][:svm][:n] += 1 + summary[:r][:svm][:non_mut] += 1 + end + if tf_lr_sgd == "1" + summary[:tf][:lr_sgd][:n] += 1 + summary[:tf][:lr_sgd][:mut] += 1 + elsif tf_lr_sgd == "0" + summary[:tf][:lr_sgd][:n] += 1 + summary[:tf][:lr_sgd][:non_mut] += 1 + end + if tf_lr_scikit == "1" + summary[:tf][:lr_scikit][:n] += 1 + summary[:tf][:lr_scikit][:mut] += 1 + elsif tf_lr_scikit == "0" + summary[:tf][:lr_scikit][:n] += 1 + summary[:tf][:lr_scikit][:non_mut] += 1 + end + if tf_RF == "1" + summary[:tf][:rf][:n] += 1 + summary[:tf][:rf][:mut] += 1 + elsif tf_RF == "0" + summary[:tf][:rf][:n] += 1 + summary[:tf][:rf][:non_mut] += 1 + end + if tf_NN == "1" + summary[:tf][:nn][:n] += 1 + summary[:tf][:nn][:mut] += 1 + elsif tf_NN == "0" + summary[:tf][:nn][:n] += 1 + summary[:tf][:nn][:non_mut] += 1 + end + summary[:n] += 1 + end +end +summary[:lazar][:mp2d][:all][:n_perc] = (100.0*summary[:lazar][:mp2d][:all][:n]/summary[:n]).round +summary[:lazar][:mp2d][:all][:mut_perc] = (100.0*summary[:lazar][:mp2d][:all][:mut]/summary[:lazar][:mp2d][:all][:n]).round +summary[:lazar][:mp2d][:all][:non_mut_perc] = (100.0*summary[:lazar][:mp2d][:all][:non_mut]/summary[:lazar][:mp2d][:all][:n]).round +summary[:lazar][:mp2d][:high_confidence][:n_perc] = (100.0*summary[:lazar][:mp2d][:high_confidence][:n]/summary[:n]).round +summary[:lazar][:mp2d][:high_confidence][:mut_perc] = (100.0*summary[:lazar][:mp2d][:high_confidence][:mut]/summary[:lazar][:mp2d][:high_confidence][:n]).round +summary[:lazar][:mp2d][:high_confidence][:non_mut_perc] = (100.0*summary[:lazar][:mp2d][:high_confidence][:non_mut]/summary[:lazar][:mp2d][:high_confidence][:n]).round +summary[:lazar][:padel][:all][:n_perc] = (100.0*summary[:lazar][:padel][:all][:n]/summary[:n]).round +summary[:lazar][:padel][:all][:mut_perc] = (100.0*summary[:lazar][:padel][:all][:mut]/summary[:lazar][:padel][:all][:n]).round +summary[:lazar][:padel][:all][:non_mut_perc] = (100.0*summary[:lazar][:padel][:all][:non_mut]/summary[:lazar][:padel][:all][:n]).round +summary[:lazar][:padel][:high_confidence][:n_perc] = (100.0*summary[:lazar][:padel][:high_confidence][:n]/summary[:n]).round +if summary[:lazar][:padel][:high_confidence][:n] == 0 + summary[:lazar][:padel][:high_confidence][:mut_perc] = 0 + summary[:lazar][:padel][:high_confidence][:non_mut_perc] = 0 +else + summary[:lazar][:padel][:high_confidence][:mut_perc] = (100.0*summary[:lazar][:padel][:high_confidence][:mut]/summary[:lazar][:padel][:high_confidence][:n]).round + summary[:lazar][:padel][:high_confidence][:non_mut_perc] = (100.0*summary[:lazar][:padel][:high_confidence][:non_mut]/summary[:lazar][:padel][:high_confidence][:n]).round +end + +[:rf,:svm,:dl].each do |a| + summary[:r][a][:n_perc] = (100.0*summary[:r][a][:n]/summary[:n]).round + summary[:r][a][:mut_perc] = (100.0*summary[:r][a][:mut]/summary[:r][a][:n]).round + summary[:r][a][:non_mut_perc] = (100.0*summary[:r][a][:non_mut]/summary[:r][a][:n]).round +end + +[:rf,:lr_sgd,:lr_scikit,:nn].each do |a| + summary[:tf][a][:n_perc] = (100.0*summary[:tf][a][:n]/summary[:n]).round + summary[:tf][a][:mut_perc] = (100.0*summary[:tf][a][:mut]/summary[:tf][a][:n]).round + summary[:tf][a][:non_mut_perc] = (100.0*summary[:tf][a][:non_mut]/summary[:tf][a][:n]).round +end +summary = {:pa => summary} +puts summary.to_yaml |