summaryrefslogtreecommitdiff
path: root/scripts/50cv-table.rb
blob: 686f16c8cd4c8faecca83b56032136c6d17952b1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env ruby
require_relative '../../lazar/lib/lazar'
include OpenTox

table = {}
table["close"] = { "rmse" => [], "r_squared" => [], "nr_predicted" => [] }
table["distant"] = { "rmse" => [], "r_squared" => [], "nr_predicted" => [] }
table["all"] = { "rmse" => [], "r_squared" => [], "nr_predicted" => [] }

File.open(ARGV[0]).each_line do |id|
  cv = Validation::RegressionCrossValidation.find id.chomp
  rmse = {"close" => 0, "distant" => 0, "all" => 0}
  x = {"close" => [], "distant" => [], "all" => []}
  y = {"close" => [], "distant" => [], "all" => []}
  cv.predictions.each do |cid,pred|
    warnings = false
    warnings = true if pred["warnings"] and !pred["warnings"].empty?
    if pred[:value] #and pred[:measurements] 
      if warnings
        x["distant"] << pred[:measurements].median
        y["distant"] << pred[:value]
      else
        x["close"] << pred[:measurements].median
        y["close"] << pred[:value]
      end
      x["all"] << pred[:measurements].median
      y["all"] << pred[:value]
    end
  end
  ["close","distant","all"].each do |cat|
    R.assign "measurement", x[cat]
    R.assign "prediction", y[cat]
    R.eval "r <- cor(measurement,prediction,use='pairwise')"
    R.eval "rmse <- sqrt(mean((prediction - measurement)^2))"
    table[cat]["r_squared"] << R.eval("r").to_ruby**2
    table[cat]["rmse"] << R.eval("rmse").to_ruby
    table[cat]["nr_predicted"] << y[cat].size
  end
end

File.open("data/50cv.csv","w+") do |f|
  f.puts("AD,Param,Mean,SD")
  table.each do |dist,data|
    data.each do |name,values|
      R.assign "x", values
      R.eval "sd <- sd(x)"
      f.puts "#{dist},#{name},#{values.mean},#{R.eval("sd").to_ruby}"
    end
  end
end