diff options
author | Christoph Helma <helma@in-silico.ch> | 2020-10-10 17:05:41 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2020-10-10 17:05:41 +0200 |
commit | e451d812f3b63d1987c8f1e7f5557156fdab984f (patch) | |
tree | f5b4e1730f0b75593925b3287d3a37fa70fa507e /scripts | |
parent | 23ce84a7da69104fa763d5a3911b7b0ad98fbdbc (diff) |
Makefile and scripts cleanup; lazar, R and tensorflow tables
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/confusion-matrix2table.rb | 10 | ||||
-rwxr-xr-x | scripts/crossvalidation-summary.rb | 9 | ||||
-rwxr-xr-x | scripts/json2csv.rb | 29 | ||||
-rwxr-xr-x | scripts/results.rb | 36 | ||||
-rwxr-xr-x | scripts/roc.R | 2 | ||||
-rwxr-xr-x | scripts/summaries2table.rb | 32 | ||||
-rwxr-xr-x | scripts/tsne-mp2d.R (renamed from scripts/mp2d-tsne.R) | 0 | ||||
-rwxr-xr-x | scripts/tsne-padel.R (renamed from scripts/padel-tsne.R) | 0 |
8 files changed, 19 insertions, 99 deletions
diff --git a/scripts/confusion-matrix2table.rb b/scripts/confusion-matrix2table.rb deleted file mode 100755 index ccb4817..0000000 --- a/scripts/confusion-matrix2table.rb +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env ruby - -mat = [] -File.readlines(ARGV[0]).each do |l| - mat << l.chomp.split(",") -end -puts ",,Predictions," -puts ",,mutagenic,non-mutagenic" -puts "Measurements,mutagenic,#{mat[0][0]},#{mat[0][1]}" -puts ",non-mutagenic,#{mat[1][0]},#{mat[1][1]}" diff --git a/scripts/crossvalidation-summary.rb b/scripts/crossvalidation-summary.rb deleted file mode 100755 index 13b0dfa..0000000 --- a/scripts/crossvalidation-summary.rb +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env ruby -require_relative '../../lazar/lib/lazar' -include OpenTox - -summary = [] -model = Model::Validation.find(File.read(ARGV[0]).chomp).crossvalidations.each do |cv| - summary << cv.statistics -end -puts JSON.pretty_generate(summary) diff --git a/scripts/json2csv.rb b/scripts/json2csv.rb deleted file mode 100755 index 03191de..0000000 --- a/scripts/json2csv.rb +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env ruby -require_relative '../../lazar/lib/lazar' -include OpenTox - -results = JSON.parse File.read(ARGV[0]) -puts "SMILES,#{File.basename(ARGV[0],".json").sub("pa_","").capitalize},Probability(0),Probability(1),Nr Neighbors,Warnings" -results.each do |id,r| - s = Compound.find(id).smiles - if r["value"] - puts [ - s, - r["value"], - r["probabilities"]["0"], - r["probabilities"]["1"], - r["neighbors"].size, - r["warnings"], - ].join(",") - else - r["neighbors"] ? n = r["neighbors"].size : n = nil - puts [ - s, - r["value"], - nil, - nil, - n, - r["warnings"], - ].join(",") - end -end diff --git a/scripts/results.rb b/scripts/results.rb deleted file mode 100755 index 1a36278..0000000 --- a/scripts/results.rb +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env ruby -require 'json' - -result = {} -ARGV.each do |f| - fname = File.basename(f,".json") - program,algo = fname.split('-') - case program - when "tensorflow" - algo == "all" ? algo = "without feature selection" : algo = "with feature selection" - when "lazar" - algo = "high-confidence" if algo == "high" - if algo == "padel" - algo = "PaDEL" - fname.match("high") ? algo += " high-confidence" : algo += " all" - end - end - result[program] ||= {} - result[program][algo] = JSON.parse(File.read(f)).collect{|k,v| [k,v.round(2)]}.to_h -end - -out = {:programs => []} -result.keys.each do |prog| - out[:programs] << {:name => prog, :algos => []} - result[prog].keys.each do |algo| - r = result[prog][algo].dup - result[prog][algo].each do |k,v| - r[k+"_perc"] = (v*100).round - end - r[:name] = algo - r[:abbrev] = prog+"-"+algo - out[:programs].last[:algos] << r - end -end - -puts out.to_json diff --git a/scripts/roc.R b/scripts/roc.R index cb219fc..afc8293 100755 --- a/scripts/roc.R +++ b/scripts/roc.R @@ -1,6 +1,6 @@ #!/usr/bin/env Rscript library(ggplot2) -data <- read.csv("figures/results.csv",header=T) +data <- read.csv("figures/roc.csv",header=T) p <- ggplot(data, aes(x=fpr, y=tpr)) + geom_abline() p <- p + geom_label(label=rownames(data) ) p <- p + expand_limits(x=c(0,1),y=c(0,1)) diff --git a/scripts/summaries2table.rb b/scripts/summaries2table.rb index 5470b26..f98ec54 100755 --- a/scripts/summaries2table.rb +++ b/scripts/summaries2table.rb @@ -1,19 +1,23 @@ #!/usr/bin/env ruby -require 'json' +require 'yaml' -results = {} +rows = {:acc => "Accuracy", :tpr => "True positive rate/Sensitivity", :tnr => "True negative rate/Specificity", :ppv => "Positive predictive value/Precision", :npv => "Negative predictive value", :n => "Nr. predictions"} +data = YAML.load_file "10-fold-crossvalidations/summary.yaml" -ARGV.each do |f| - results[File.basename(f,".json")] = JSON.parse(File.read(f)) +case ARGV[0] +when "R" + header = ["RF","SVM","DL"] + keys = header.collect{|h| "R-"+h} +when "tensorflow" + header = ["RF","LR (SGD)","LR (SCIKIT)","NN"] + keys = ["lr","lr2","nn"].collect{|n| "tensorflow-"+n+".v3"} +when "lazar" + header = ["lazar-mp2d (all)","lazar-mp2d (high confidence)", "lazar-padel (all)","lazar-padel (high confidence)"] + keys = ["lazar-all","lazar-high-confidence", "lazar-padel-all","lazar-padel-high-confidence"] end - -print "," -puts results.keys.collect{|k| k.sub("tensorflow","TF")}.join(",") -["accuracy","true_positive_rate","true_negative_rate","positive_predictive_value","negative_predictive_value"].each do |m| - line = [m.gsub("_"," ")] - results.each do |k,v| - line << v[m].round(2) - end - puts line.join(",") +puts ","+header.join(",") +rows.each do |short,long| + print long+"," + puts keys.collect{|k| data[k][short]}.join(",") end - +exit diff --git a/scripts/mp2d-tsne.R b/scripts/tsne-mp2d.R index 0877622..0877622 100755 --- a/scripts/mp2d-tsne.R +++ b/scripts/tsne-mp2d.R diff --git a/scripts/padel-tsne.R b/scripts/tsne-padel.R index b8e9763..b8e9763 100755 --- a/scripts/padel-tsne.R +++ b/scripts/tsne-padel.R |