summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2018-03-15 16:20:17 +0100
committerChristoph Helma <helma@in-silico.ch>2018-03-15 16:20:17 +0100
commit41190556d2c02d8ebf3ac01edda3f7f8e41bad9d (patch)
tree47f1e5776fd7725c6985f5f0264606e3cc2765e8 /scripts
parent1aa8093ea8f182ec7cc9aae626f494a1e14c8c84 (diff)
first revision
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/50-crossvalidations.rb14
-rwxr-xr-xscripts/50cv-table.rb51
-rwxr-xr-xscripts/crossvalidation-plots.R5
-rwxr-xr-xscripts/crossvalidation-table.rb7
-rwxr-xr-xscripts/crossvalidation.rb2
-rwxr-xr-xscripts/test-prediction-plot.R2
6 files changed, 73 insertions, 8 deletions
diff --git a/scripts/50-crossvalidations.rb b/scripts/50-crossvalidations.rb
new file mode 100755
index 0000000..fa928b2
--- /dev/null
+++ b/scripts/50-crossvalidations.rb
@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar'
+include OpenTox
+
+file = ARGV[0]
+dataset = Dataset.from_csv_file file
+model = Model::LazarRegression.create(training_dataset: dataset)
+
+File.open("data/50cv.ids","w+") do |cvids|
+ (0..49).each do |i|
+ cv = Validation::RegressionCrossValidation.create model
+ cvids.puts cv.id
+ end
+end
diff --git a/scripts/50cv-table.rb b/scripts/50cv-table.rb
new file mode 100755
index 0000000..686f16c
--- /dev/null
+++ b/scripts/50cv-table.rb
@@ -0,0 +1,51 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar'
+include OpenTox
+
+table = {}
+table["close"] = { "rmse" => [], "r_squared" => [], "nr_predicted" => [] }
+table["distant"] = { "rmse" => [], "r_squared" => [], "nr_predicted" => [] }
+table["all"] = { "rmse" => [], "r_squared" => [], "nr_predicted" => [] }
+
+File.open(ARGV[0]).each_line do |id|
+ cv = Validation::RegressionCrossValidation.find id.chomp
+ rmse = {"close" => 0, "distant" => 0, "all" => 0}
+ x = {"close" => [], "distant" => [], "all" => []}
+ y = {"close" => [], "distant" => [], "all" => []}
+ cv.predictions.each do |cid,pred|
+ warnings = false
+ warnings = true if pred["warnings"] and !pred["warnings"].empty?
+ if pred[:value] #and pred[:measurements]
+ if warnings
+ x["distant"] << pred[:measurements].median
+ y["distant"] << pred[:value]
+ else
+ x["close"] << pred[:measurements].median
+ y["close"] << pred[:value]
+ end
+ x["all"] << pred[:measurements].median
+ y["all"] << pred[:value]
+ end
+ end
+ ["close","distant","all"].each do |cat|
+ R.assign "measurement", x[cat]
+ R.assign "prediction", y[cat]
+ R.eval "r <- cor(measurement,prediction,use='pairwise')"
+ R.eval "rmse <- sqrt(mean((prediction - measurement)^2))"
+ table[cat]["r_squared"] << R.eval("r").to_ruby**2
+ table[cat]["rmse"] << R.eval("rmse").to_ruby
+ table[cat]["nr_predicted"] << y[cat].size
+ end
+end
+
+File.open("data/50cv.csv","w+") do |f|
+ f.puts("AD,Param,Mean,SD")
+ table.each do |dist,data|
+ data.each do |name,values|
+ R.assign "x", values
+ R.eval "sd <- sd(x)"
+ f.puts "#{dist},#{name},#{values.mean},#{R.eval("sd").to_ruby}"
+ end
+ end
+end
+
diff --git a/scripts/crossvalidation-plots.R b/scripts/crossvalidation-plots.R
index de713f1..2511fdf 100755
--- a/scripts/crossvalidation-plots.R
+++ b/scripts/crossvalidation-plots.R
@@ -2,7 +2,6 @@
library(ggplot2)
-nr = commandArgs(TRUE)[1]
-data = read.csv(paste("data/training_log10-cv-",nr,".csv",sep=""))
+data = read.csv(paste("data/training_log10-cv.csv",sep=""))
img = qplot(LOAEL_predicted,LOAEL_measured_median,data=data,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)",colour=Warnings) + geom_point() + geom_abline(intercept=0.0) + xlim(-2,4.5) + ylim(-2,4.5) + scale_color_manual(name = "Applicability domain",values=c("#00BFC4", "#F8766D"), breaks=c(TRUE,FALSE), labels=c("distant","close"))
-ggsave(file=paste('figures/crossvalidation',nr,'.pdf',sep=""), plot=img,width=12, height=8)
+ggsave(file='figures/crossvalidation.pdf', plot=img,width=12, height=8)
diff --git a/scripts/crossvalidation-table.rb b/scripts/crossvalidation-table.rb
index 499b166..1ea5894 100755
--- a/scripts/crossvalidation-table.rb
+++ b/scripts/crossvalidation-table.rb
@@ -1,9 +1,10 @@
#!/usr/bin/env ruby
require_relative '../../lazar/lib/lazar'
include OpenTox
-require 'yaml'
-csv_file = ARGV[0].sub(/id$/,"csv")
-cv = Validation::RegressionCrossValidation.find File.read(ARGV[0]).chomp
+
+id = File.open(ARGV[0]).readlines.sample.chomp # random cv
+csv_file = "data/training_log10-cv.csv"
+cv = Validation::RegressionCrossValidation.find id
data = []
cv.predictions.each do |cid,p|
smi = Compound.find(cid).smiles
diff --git a/scripts/crossvalidation.rb b/scripts/crossvalidation.rb
index 6deca60..27f4203 100755
--- a/scripts/crossvalidation.rb
+++ b/scripts/crossvalidation.rb
@@ -5,7 +5,7 @@ require 'yaml'
name = File.basename ARGV[0], ".csv"
file = File.join "data",ARGV[0]
dataset = Dataset.from_csv_file file
-model = Model::LazarRegression.create(training_dataset: dataset)#, algorithms: { :prediction => {:method => "Algorithm::Caret.rf"}, :similarity => { :min => 0.5 }})
+model = Model::LazarRegression.create(training_dataset: dataset)
id_file = File.join("data",ARGV[0].sub(/.csv/,"-cv-#{ARGV[1]}.id"))
cv = Validation::RegressionCrossValidation.create model
File.open(id_file,"w+"){|f| f.puts cv.id}
diff --git a/scripts/test-prediction-plot.R b/scripts/test-prediction-plot.R
index dddf91a..ddd908b 100755
--- a/scripts/test-prediction-plot.R
+++ b/scripts/test-prediction-plot.R
@@ -6,6 +6,6 @@ data = read.csv("data/predictions-measurements.csv",header=T)
data$SMILES <- reorder(data$SMILES,data$LOAEL)
img <- ggplot(data, aes(SMILES,LOAEL,ymin = min(LOAEL), ymax=max(LOAEL),color=Origin))
img <- img + ylab('-log(LOAEL mg/kg_bw/day)') + xlab('Compound') + theme(axis.text.x = element_blank()) + theme(legend.title=element_blank())
-img <- img + geom_point() + scale_color_manual(values=c("#619CFF", "#00BFC4", "#F8766D"))
+img <- img + geom_point() + scale_color_manual(values=c("#000000", "#00BFC4", "#F8766D"))
ggsave(file='figures/test-prediction.pdf', plot=img,width=12, height=8)