summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2020-10-10 17:05:41 +0200
committerChristoph Helma <helma@in-silico.ch>2020-10-10 17:05:41 +0200
commite451d812f3b63d1987c8f1e7f5557156fdab984f (patch)
treef5b4e1730f0b75593925b3287d3a37fa70fa507e /scripts
parent23ce84a7da69104fa763d5a3911b7b0ad98fbdbc (diff)
Makefile and scripts cleanup; lazar, R and tensorflow tables
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/confusion-matrix2table.rb10
-rwxr-xr-xscripts/crossvalidation-summary.rb9
-rwxr-xr-xscripts/json2csv.rb29
-rwxr-xr-xscripts/results.rb36
-rwxr-xr-xscripts/roc.R2
-rwxr-xr-xscripts/summaries2table.rb32
-rwxr-xr-xscripts/tsne-mp2d.R (renamed from scripts/mp2d-tsne.R)0
-rwxr-xr-xscripts/tsne-padel.R (renamed from scripts/padel-tsne.R)0
8 files changed, 19 insertions, 99 deletions
diff --git a/scripts/confusion-matrix2table.rb b/scripts/confusion-matrix2table.rb
deleted file mode 100755
index ccb4817..0000000
--- a/scripts/confusion-matrix2table.rb
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/env ruby
-
-mat = []
-File.readlines(ARGV[0]).each do |l|
- mat << l.chomp.split(",")
-end
-puts ",,Predictions,"
-puts ",,mutagenic,non-mutagenic"
-puts "Measurements,mutagenic,#{mat[0][0]},#{mat[0][1]}"
-puts ",non-mutagenic,#{mat[1][0]},#{mat[1][1]}"
diff --git a/scripts/crossvalidation-summary.rb b/scripts/crossvalidation-summary.rb
deleted file mode 100755
index 13b0dfa..0000000
--- a/scripts/crossvalidation-summary.rb
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/usr/bin/env ruby
-require_relative '../../lazar/lib/lazar'
-include OpenTox
-
-summary = []
-model = Model::Validation.find(File.read(ARGV[0]).chomp).crossvalidations.each do |cv|
- summary << cv.statistics
-end
-puts JSON.pretty_generate(summary)
diff --git a/scripts/json2csv.rb b/scripts/json2csv.rb
deleted file mode 100755
index 03191de..0000000
--- a/scripts/json2csv.rb
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env ruby
-require_relative '../../lazar/lib/lazar'
-include OpenTox
-
-results = JSON.parse File.read(ARGV[0])
-puts "SMILES,#{File.basename(ARGV[0],".json").sub("pa_","").capitalize},Probability(0),Probability(1),Nr Neighbors,Warnings"
-results.each do |id,r|
- s = Compound.find(id).smiles
- if r["value"]
- puts [
- s,
- r["value"],
- r["probabilities"]["0"],
- r["probabilities"]["1"],
- r["neighbors"].size,
- r["warnings"],
- ].join(",")
- else
- r["neighbors"] ? n = r["neighbors"].size : n = nil
- puts [
- s,
- r["value"],
- nil,
- nil,
- n,
- r["warnings"],
- ].join(",")
- end
-end
diff --git a/scripts/results.rb b/scripts/results.rb
deleted file mode 100755
index 1a36278..0000000
--- a/scripts/results.rb
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env ruby
-require 'json'
-
-result = {}
-ARGV.each do |f|
- fname = File.basename(f,".json")
- program,algo = fname.split('-')
- case program
- when "tensorflow"
- algo == "all" ? algo = "without feature selection" : algo = "with feature selection"
- when "lazar"
- algo = "high-confidence" if algo == "high"
- if algo == "padel"
- algo = "PaDEL"
- fname.match("high") ? algo += " high-confidence" : algo += " all"
- end
- end
- result[program] ||= {}
- result[program][algo] = JSON.parse(File.read(f)).collect{|k,v| [k,v.round(2)]}.to_h
-end
-
-out = {:programs => []}
-result.keys.each do |prog|
- out[:programs] << {:name => prog, :algos => []}
- result[prog].keys.each do |algo|
- r = result[prog][algo].dup
- result[prog][algo].each do |k,v|
- r[k+"_perc"] = (v*100).round
- end
- r[:name] = algo
- r[:abbrev] = prog+"-"+algo
- out[:programs].last[:algos] << r
- end
-end
-
-puts out.to_json
diff --git a/scripts/roc.R b/scripts/roc.R
index cb219fc..afc8293 100755
--- a/scripts/roc.R
+++ b/scripts/roc.R
@@ -1,6 +1,6 @@
#!/usr/bin/env Rscript
library(ggplot2)
-data <- read.csv("figures/results.csv",header=T)
+data <- read.csv("figures/roc.csv",header=T)
p <- ggplot(data, aes(x=fpr, y=tpr)) + geom_abline()
p <- p + geom_label(label=rownames(data) )
p <- p + expand_limits(x=c(0,1),y=c(0,1))
diff --git a/scripts/summaries2table.rb b/scripts/summaries2table.rb
index 5470b26..f98ec54 100755
--- a/scripts/summaries2table.rb
+++ b/scripts/summaries2table.rb
@@ -1,19 +1,23 @@
#!/usr/bin/env ruby
-require 'json'
+require 'yaml'
-results = {}
+rows = {:acc => "Accuracy", :tpr => "True positive rate/Sensitivity", :tnr => "True negative rate/Specificity", :ppv => "Positive predictive value/Precision", :npv => "Negative predictive value", :n => "Nr. predictions"}
+data = YAML.load_file "10-fold-crossvalidations/summary.yaml"
-ARGV.each do |f|
- results[File.basename(f,".json")] = JSON.parse(File.read(f))
+case ARGV[0]
+when "R"
+ header = ["RF","SVM","DL"]
+ keys = header.collect{|h| "R-"+h}
+when "tensorflow"
+ header = ["RF","LR (SGD)","LR (SCIKIT)","NN"]
+ keys = ["lr","lr2","nn"].collect{|n| "tensorflow-"+n+".v3"}
+when "lazar"
+ header = ["lazar-mp2d (all)","lazar-mp2d (high confidence)", "lazar-padel (all)","lazar-padel (high confidence)"]
+ keys = ["lazar-all","lazar-high-confidence", "lazar-padel-all","lazar-padel-high-confidence"]
end
-
-print ","
-puts results.keys.collect{|k| k.sub("tensorflow","TF")}.join(",")
-["accuracy","true_positive_rate","true_negative_rate","positive_predictive_value","negative_predictive_value"].each do |m|
- line = [m.gsub("_"," ")]
- results.each do |k,v|
- line << v[m].round(2)
- end
- puts line.join(",")
+puts ","+header.join(",")
+rows.each do |short,long|
+ print long+","
+ puts keys.collect{|k| data[k][short]}.join(",")
end
-
+exit
diff --git a/scripts/mp2d-tsne.R b/scripts/tsne-mp2d.R
index 0877622..0877622 100755
--- a/scripts/mp2d-tsne.R
+++ b/scripts/tsne-mp2d.R
diff --git a/scripts/padel-tsne.R b/scripts/tsne-padel.R
index b8e9763..b8e9763 100755
--- a/scripts/padel-tsne.R
+++ b/scripts/tsne-padel.R