summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2020-10-20 20:42:54 +0200
committerChristoph Helma <helma@in-silico.ch>2020-10-20 20:42:54 +0200
commit0b686f924a42105f2516aea44c27b6d3f75e1672 (patch)
tree2572b8f713200a59ea5993320fd133db6f712246 /scripts
parent2c3bc133700f7e1e1ea8d038d87da1f3095ed103 (diff)
Summary table of PA predictions
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/pa-summary-table.rb18
-rwxr-xr-xscripts/pa-summary.rb190
2 files changed, 208 insertions, 0 deletions
diff --git a/scripts/pa-summary-table.rb b/scripts/pa-summary-table.rb
new file mode 100755
index 0000000..48546bd
--- /dev/null
+++ b/scripts/pa-summary-table.rb
@@ -0,0 +1,18 @@
+#!/usr/bin/env ruby
+require 'yaml'
+data = YAML.load_file(ARGV[0])
+puts "Model,Nr.predictions,mutagenic,non-mutagenic"
+puts "lazar-MP2D (all),#{data[:pa][:lazar][:mp2d][:all][:n]} (#{data[:pa][:lazar][:mp2d][:all][:n_perc]} %),#{data[:pa][:lazar][:mp2d][:all][:mut]} (#{data[:pa][:lazar][:mp2d][:all][:mut_perc]} %),#{data[:pa][:lazar][:mp2d][:all][:non_mut]} (#{data[:pa][:lazar][:mp2d][:all][:non_mut_perc]} %)"
+puts "lazar-MP2D (high-confidence),#{data[:pa][:lazar][:mp2d][:high_confidence][:n]} (#{data[:pa][:lazar][:mp2d][:high_confidence][:n_perc]} %),#{data[:pa][:lazar][:mp2d][:high_confidence][:mut]} (#{data[:pa][:lazar][:mp2d][:high_confidence][:mut_perc]} %),#{data[:pa][:lazar][:mp2d][:high_confidence][:non_mut]} (#{data[:pa][:lazar][:mp2d][:high_confidence][:non_mut_perc]} %)"
+
+puts "lazar-PaDEL (all),#{data[:pa][:lazar][:padel][:all][:n]} (#{data[:pa][:lazar][:padel][:all][:n_perc]} %),#{data[:pa][:lazar][:padel][:all][:mut]} (#{data[:pa][:lazar][:padel][:all][:mut_perc]} %),#{data[:pa][:lazar][:padel][:all][:non_mut]} (#{data[:pa][:lazar][:padel][:all][:non_mut_perc]} %)"
+puts "lazar-PaDEL (high-confidence),#{data[:pa][:lazar][:padel][:high_confidence][:n]} (#{data[:pa][:lazar][:padel][:high_confidence][:n_perc]} %),#{data[:pa][:lazar][:padel][:high_confidence][:mut]} (#{data[:pa][:lazar][:padel][:high_confidence][:mut_perc]} %),#{data[:pa][:lazar][:padel][:high_confidence][:non_mut]} (#{data[:pa][:lazar][:padel][:high_confidence][:non_mut_perc]} %)"
+
+puts "R-RF,#{data[:pa][:r][:rf][:n]} (#{data[:pa][:r][:rf][:n_perc]} %),#{data[:pa][:r][:rf][:mut]} (#{data[:pa][:r][:rf][:mut_perc]} %),#{data[:pa][:r][:rf][:non_mut]} (#{data[:pa][:r][:rf][:non_mut_perc]} %)"
+puts "R-SVM,#{data[:pa][:r][:svm][:n]} (#{data[:pa][:r][:svm][:n_perc]} %),#{data[:pa][:r][:svm][:mut]} (#{data[:pa][:r][:svm][:mut_perc]} %),#{data[:pa][:r][:svm][:non_mut]} (#{data[:pa][:r][:svm][:non_mut_perc]} %)"
+puts "R-DL,#{data[:pa][:r][:dl][:n]} (#{data[:pa][:r][:dl][:n_perc]} %),#{data[:pa][:r][:dl][:mut]} (#{data[:pa][:r][:dl][:mut_perc]} %),#{data[:pa][:r][:dl][:non_mut]} (#{data[:pa][:r][:dl][:non_mut_perc]} %)"
+
+puts "Tensorflow-RF,#{data[:pa][:tf][:rf][:n]} (#{data[:pa][:tf][:rf][:n_perc]} %),#{data[:pa][:tf][:rf][:mut]} (#{data[:pa][:tf][:rf][:mut_perc]} %),#{data[:pa][:tf][:rf][:non_mut]} (#{data[:pa][:tf][:rf][:non_mut_perc]} %)"
+puts "Tensorflow-LR-sgd,#{data[:pa][:tf][:lr_sgd][:n]} (#{data[:pa][:tf][:lr_sgd][:n_perc]} %),#{data[:pa][:tf][:lr_sgd][:mut]} (#{data[:pa][:tf][:lr_sgd][:mut_perc]} %),#{data[:pa][:tf][:lr_sgd][:non_mut]} (#{data[:pa][:tf][:lr_sgd][:non_mut_perc]} %)"
+puts "Tensorflow-LR-scikit,#{data[:pa][:tf][:lr_scikit][:n]} (#{data[:pa][:tf][:lr_scikit][:n_perc]} %),#{data[:pa][:tf][:lr_scikit][:mut]} (#{data[:pa][:tf][:lr_scikit][:mut_perc]} %),#{data[:pa][:tf][:lr_scikit][:non_mut]} (#{data[:pa][:tf][:lr_scikit][:non_mut_perc]} %)"
+puts "Tensorflow-NN,#{data[:pa][:tf][:nn][:n]} (#{data[:pa][:tf][:nn][:n_perc]} %),#{data[:pa][:tf][:nn][:mut]} (#{data[:pa][:tf][:nn][:mut_perc]} %),#{data[:pa][:tf][:nn][:non_mut]} (#{data[:pa][:tf][:nn][:non_mut_perc]} %)"
diff --git a/scripts/pa-summary.rb b/scripts/pa-summary.rb
new file mode 100755
index 0000000..0715a6c
--- /dev/null
+++ b/scripts/pa-summary.rb
@@ -0,0 +1,190 @@
+#!/usr/bin/env ruby
+require 'yaml'
+
+summary = {
+ :n => 0,
+ :lazar => {
+ :mp2d => {
+ :all => {
+ :n => 0,
+ :mut => 0,
+ :non_mut => 0
+ },
+ :high_confidence => {
+ :n => 0,
+ :mut => 0,
+ :non_mut => 0
+ }
+ },
+ :padel => {
+ :all => {
+ :n => 0,
+ :mut => 0,
+ :non_mut => 0
+ },
+ :high_confidence => {
+ :n => 0,
+ :mut => 0,
+ :non_mut => 0
+ }
+ },
+ },
+ :r => {
+ :rf => {
+ :n => 0,
+ :mut => 0,
+ :non_mut => 0
+ },
+ :svm => {
+ :n => 0,
+ :mut => 0,
+ :non_mut => 0
+ },
+ :dl => {
+ :n => 0,
+ :mut => 0,
+ :non_mut => 0
+ },
+ },
+ :tf => {
+ :rf => {
+ :n => 0,
+ :mut => 0,
+ :non_mut => 0
+ },
+ :lr_sgd => {
+ :n => 0,
+ :mut => 0,
+ :non_mut => 0
+ },
+ :lr_scikit => {
+ :n => 0,
+ :mut => 0,
+ :non_mut => 0
+ },
+ :nn => {
+ :n => 0,
+ :mut => 0,
+ :non_mut => 0
+ },
+ },
+}
+
+n = 0
+File.read(ARGV[0]).each_line do |l|
+ unless l.match("SMILES")
+ id,cid,smi,cansmi,exp,lazar_MP2D,lazar_MP2D_high_confidence,lazar_PaDEL,lazar_PaDEL_high_confidence,r_DL,r_RF,r_SVM,tf_lr_sgd,tf_lr_scikit,tf_NN,tf_RF = l.chomp.split(",")
+
+ if lazar_MP2D == "1"
+ summary[:lazar][:mp2d][:all][:n] += 1
+ summary[:lazar][:mp2d][:all][:mut] += 1
+ if lazar_MP2D_high_confidence == "T"
+ summary[:lazar][:mp2d][:high_confidence][:n] += 1
+ summary[:lazar][:mp2d][:high_confidence][:mut] += 1
+ end
+ elsif lazar_MP2D == "0"
+ summary[:lazar][:mp2d][:all][:n] += 1
+ summary[:lazar][:mp2d][:all][:non_mut] += 1
+ if lazar_MP2D_high_confidence == "T"
+ summary[:lazar][:mp2d][:high_confidence][:n] += 1
+ summary[:lazar][:mp2d][:high_confidence][:non_mut] += 1
+ end
+ end
+ if lazar_PaDEL == "1"
+ summary[:lazar][:padel][:all][:n] += 1
+ summary[:lazar][:padel][:all][:mut] += 1
+ if lazar_PaDEL_high_confidence == "T"
+ summary[:lazar][:padel][:high_confidence][:n] += 1
+ summary[:lazar][:padel][:high_confidence][:mut] += 1
+ end
+ elsif lazar_PaDEL == "0"
+ summary[:lazar][:padel][:all][:n] += 1
+ summary[:lazar][:padel][:all][:non_mut] += 1
+ if lazar_PaDEL_high_confidence == "T"
+ summary[:lazar][:padel][:high_confidence][:n] += 1
+ summary[:lazar][:padel][:high_confidence][:non_mut] += 1
+ end
+ end
+ if r_DL == "1"
+ summary[:r][:dl][:n] += 1
+ summary[:r][:dl][:mut] += 1
+ elsif r_DL == "0"
+ summary[:r][:dl][:n] += 1
+ summary[:r][:dl][:non_mut] += 1
+ end
+ if r_RF == "1"
+ summary[:r][:rf][:n] += 1
+ summary[:r][:rf][:mut] += 1
+ elsif r_RF == "0"
+ summary[:r][:rf][:n] += 1
+ summary[:r][:rf][:non_mut] += 1
+ end
+ if r_SVM == "1"
+ summary[:r][:svm][:n] += 1
+ summary[:r][:svm][:mut] += 1
+ elsif r_SVM == "0"
+ summary[:r][:svm][:n] += 1
+ summary[:r][:svm][:non_mut] += 1
+ end
+ if tf_lr_sgd == "1"
+ summary[:tf][:lr_sgd][:n] += 1
+ summary[:tf][:lr_sgd][:mut] += 1
+ elsif tf_lr_sgd == "0"
+ summary[:tf][:lr_sgd][:n] += 1
+ summary[:tf][:lr_sgd][:non_mut] += 1
+ end
+ if tf_lr_scikit == "1"
+ summary[:tf][:lr_scikit][:n] += 1
+ summary[:tf][:lr_scikit][:mut] += 1
+ elsif tf_lr_scikit == "0"
+ summary[:tf][:lr_scikit][:n] += 1
+ summary[:tf][:lr_scikit][:non_mut] += 1
+ end
+ if tf_RF == "1"
+ summary[:tf][:rf][:n] += 1
+ summary[:tf][:rf][:mut] += 1
+ elsif tf_RF == "0"
+ summary[:tf][:rf][:n] += 1
+ summary[:tf][:rf][:non_mut] += 1
+ end
+ if tf_NN == "1"
+ summary[:tf][:nn][:n] += 1
+ summary[:tf][:nn][:mut] += 1
+ elsif tf_NN == "0"
+ summary[:tf][:nn][:n] += 1
+ summary[:tf][:nn][:non_mut] += 1
+ end
+ summary[:n] += 1
+ end
+end
+summary[:lazar][:mp2d][:all][:n_perc] = (100.0*summary[:lazar][:mp2d][:all][:n]/summary[:n]).round
+summary[:lazar][:mp2d][:all][:mut_perc] = (100.0*summary[:lazar][:mp2d][:all][:mut]/summary[:lazar][:mp2d][:all][:n]).round
+summary[:lazar][:mp2d][:all][:non_mut_perc] = (100.0*summary[:lazar][:mp2d][:all][:non_mut]/summary[:lazar][:mp2d][:all][:n]).round
+summary[:lazar][:mp2d][:high_confidence][:n_perc] = (100.0*summary[:lazar][:mp2d][:high_confidence][:n]/summary[:n]).round
+summary[:lazar][:mp2d][:high_confidence][:mut_perc] = (100.0*summary[:lazar][:mp2d][:high_confidence][:mut]/summary[:lazar][:mp2d][:high_confidence][:n]).round
+summary[:lazar][:mp2d][:high_confidence][:non_mut_perc] = (100.0*summary[:lazar][:mp2d][:high_confidence][:non_mut]/summary[:lazar][:mp2d][:high_confidence][:n]).round
+summary[:lazar][:padel][:all][:n_perc] = (100.0*summary[:lazar][:padel][:all][:n]/summary[:n]).round
+summary[:lazar][:padel][:all][:mut_perc] = (100.0*summary[:lazar][:padel][:all][:mut]/summary[:lazar][:padel][:all][:n]).round
+summary[:lazar][:padel][:all][:non_mut_perc] = (100.0*summary[:lazar][:padel][:all][:non_mut]/summary[:lazar][:padel][:all][:n]).round
+summary[:lazar][:padel][:high_confidence][:n_perc] = (100.0*summary[:lazar][:padel][:high_confidence][:n]/summary[:n]).round
+if summary[:lazar][:padel][:high_confidence][:n] == 0
+ summary[:lazar][:padel][:high_confidence][:mut_perc] = 0
+ summary[:lazar][:padel][:high_confidence][:non_mut_perc] = 0
+else
+ summary[:lazar][:padel][:high_confidence][:mut_perc] = (100.0*summary[:lazar][:padel][:high_confidence][:mut]/summary[:lazar][:padel][:high_confidence][:n]).round
+ summary[:lazar][:padel][:high_confidence][:non_mut_perc] = (100.0*summary[:lazar][:padel][:high_confidence][:non_mut]/summary[:lazar][:padel][:high_confidence][:n]).round
+end
+
+[:rf,:svm,:dl].each do |a|
+ summary[:r][a][:n_perc] = (100.0*summary[:r][a][:n]/summary[:n]).round
+ summary[:r][a][:mut_perc] = (100.0*summary[:r][a][:mut]/summary[:r][a][:n]).round
+ summary[:r][a][:non_mut_perc] = (100.0*summary[:r][a][:non_mut]/summary[:r][a][:n]).round
+end
+
+[:rf,:lr_sgd,:lr_scikit,:nn].each do |a|
+ summary[:tf][a][:n_perc] = (100.0*summary[:tf][a][:n]/summary[:n]).round
+ summary[:tf][a][:mut_perc] = (100.0*summary[:tf][a][:mut]/summary[:tf][a][:n]).round
+ summary[:tf][a][:non_mut_perc] = (100.0*summary[:tf][a][:non_mut]/summary[:tf][a][:n]).round
+end
+summary = {:pa => summary}
+puts summary.to_yaml