diff options
author | Christoph Helma <helma@in-silico.ch> | 2018-03-15 16:20:17 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2018-03-15 16:20:17 +0100 |
commit | 41190556d2c02d8ebf3ac01edda3f7f8e41bad9d (patch) | |
tree | 47f1e5776fd7725c6985f5f0264606e3cc2765e8 /scripts | |
parent | 1aa8093ea8f182ec7cc9aae626f494a1e14c8c84 (diff) |
first revision
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/50-crossvalidations.rb | 14 | ||||
-rwxr-xr-x | scripts/50cv-table.rb | 51 | ||||
-rwxr-xr-x | scripts/crossvalidation-plots.R | 5 | ||||
-rwxr-xr-x | scripts/crossvalidation-table.rb | 7 | ||||
-rwxr-xr-x | scripts/crossvalidation.rb | 2 | ||||
-rwxr-xr-x | scripts/test-prediction-plot.R | 2 |
6 files changed, 73 insertions, 8 deletions
diff --git a/scripts/50-crossvalidations.rb b/scripts/50-crossvalidations.rb new file mode 100755 index 0000000..fa928b2 --- /dev/null +++ b/scripts/50-crossvalidations.rb @@ -0,0 +1,14 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar' +include OpenTox + +file = ARGV[0] +dataset = Dataset.from_csv_file file +model = Model::LazarRegression.create(training_dataset: dataset) + +File.open("data/50cv.ids","w+") do |cvids| + (0..49).each do |i| + cv = Validation::RegressionCrossValidation.create model + cvids.puts cv.id + end +end diff --git a/scripts/50cv-table.rb b/scripts/50cv-table.rb new file mode 100755 index 0000000..686f16c --- /dev/null +++ b/scripts/50cv-table.rb @@ -0,0 +1,51 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar' +include OpenTox + +table = {} +table["close"] = { "rmse" => [], "r_squared" => [], "nr_predicted" => [] } +table["distant"] = { "rmse" => [], "r_squared" => [], "nr_predicted" => [] } +table["all"] = { "rmse" => [], "r_squared" => [], "nr_predicted" => [] } + +File.open(ARGV[0]).each_line do |id| + cv = Validation::RegressionCrossValidation.find id.chomp + rmse = {"close" => 0, "distant" => 0, "all" => 0} + x = {"close" => [], "distant" => [], "all" => []} + y = {"close" => [], "distant" => [], "all" => []} + cv.predictions.each do |cid,pred| + warnings = false + warnings = true if pred["warnings"] and !pred["warnings"].empty? + if pred[:value] #and pred[:measurements] + if warnings + x["distant"] << pred[:measurements].median + y["distant"] << pred[:value] + else + x["close"] << pred[:measurements].median + y["close"] << pred[:value] + end + x["all"] << pred[:measurements].median + y["all"] << pred[:value] + end + end + ["close","distant","all"].each do |cat| + R.assign "measurement", x[cat] + R.assign "prediction", y[cat] + R.eval "r <- cor(measurement,prediction,use='pairwise')" + R.eval "rmse <- sqrt(mean((prediction - measurement)^2))" + table[cat]["r_squared"] << R.eval("r").to_ruby**2 + table[cat]["rmse"] << R.eval("rmse").to_ruby + table[cat]["nr_predicted"] << y[cat].size + end +end + +File.open("data/50cv.csv","w+") do |f| + f.puts("AD,Param,Mean,SD") + table.each do |dist,data| + data.each do |name,values| + R.assign "x", values + R.eval "sd <- sd(x)" + f.puts "#{dist},#{name},#{values.mean},#{R.eval("sd").to_ruby}" + end + end +end + diff --git a/scripts/crossvalidation-plots.R b/scripts/crossvalidation-plots.R index de713f1..2511fdf 100755 --- a/scripts/crossvalidation-plots.R +++ b/scripts/crossvalidation-plots.R @@ -2,7 +2,6 @@ library(ggplot2) -nr = commandArgs(TRUE)[1] -data = read.csv(paste("data/training_log10-cv-",nr,".csv",sep="")) +data = read.csv(paste("data/training_log10-cv.csv",sep="")) img = qplot(LOAEL_predicted,LOAEL_measured_median,data=data,xlab="-log10(LOAEL predicted)",ylab="-log10(LOAEL measured median)",colour=Warnings) + geom_point() + geom_abline(intercept=0.0) + xlim(-2,4.5) + ylim(-2,4.5) + scale_color_manual(name = "Applicability domain",values=c("#00BFC4", "#F8766D"), breaks=c(TRUE,FALSE), labels=c("distant","close")) -ggsave(file=paste('figures/crossvalidation',nr,'.pdf',sep=""), plot=img,width=12, height=8) +ggsave(file='figures/crossvalidation.pdf', plot=img,width=12, height=8) diff --git a/scripts/crossvalidation-table.rb b/scripts/crossvalidation-table.rb index 499b166..1ea5894 100755 --- a/scripts/crossvalidation-table.rb +++ b/scripts/crossvalidation-table.rb @@ -1,9 +1,10 @@ #!/usr/bin/env ruby require_relative '../../lazar/lib/lazar' include OpenTox -require 'yaml' -csv_file = ARGV[0].sub(/id$/,"csv") -cv = Validation::RegressionCrossValidation.find File.read(ARGV[0]).chomp + +id = File.open(ARGV[0]).readlines.sample.chomp # random cv +csv_file = "data/training_log10-cv.csv" +cv = Validation::RegressionCrossValidation.find id data = [] cv.predictions.each do |cid,p| smi = Compound.find(cid).smiles diff --git a/scripts/crossvalidation.rb b/scripts/crossvalidation.rb index 6deca60..27f4203 100755 --- a/scripts/crossvalidation.rb +++ b/scripts/crossvalidation.rb @@ -5,7 +5,7 @@ require 'yaml' name = File.basename ARGV[0], ".csv" file = File.join "data",ARGV[0] dataset = Dataset.from_csv_file file -model = Model::LazarRegression.create(training_dataset: dataset)#, algorithms: { :prediction => {:method => "Algorithm::Caret.rf"}, :similarity => { :min => 0.5 }}) +model = Model::LazarRegression.create(training_dataset: dataset) id_file = File.join("data",ARGV[0].sub(/.csv/,"-cv-#{ARGV[1]}.id")) cv = Validation::RegressionCrossValidation.create model File.open(id_file,"w+"){|f| f.puts cv.id} diff --git a/scripts/test-prediction-plot.R b/scripts/test-prediction-plot.R index dddf91a..ddd908b 100755 --- a/scripts/test-prediction-plot.R +++ b/scripts/test-prediction-plot.R @@ -6,6 +6,6 @@ data = read.csv("data/predictions-measurements.csv",header=T) data$SMILES <- reorder(data$SMILES,data$LOAEL) img <- ggplot(data, aes(SMILES,LOAEL,ymin = min(LOAEL), ymax=max(LOAEL),color=Origin)) img <- img + ylab('-log(LOAEL mg/kg_bw/day)') + xlab('Compound') + theme(axis.text.x = element_blank()) + theme(legend.title=element_blank()) -img <- img + geom_point() + scale_color_manual(values=c("#619CFF", "#00BFC4", "#F8766D")) +img <- img + geom_point() + scale_color_manual(values=c("#000000", "#00BFC4", "#F8766D")) ggsave(file='figures/test-prediction.pdf', plot=img,width=12, height=8) |