1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
module OpenTox
class ValidationStatistics
include OpenTox
def self.classification predictions, accept_values
confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
weighted_confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
true_rate = {}
predictivity = {}
nr_instances = 0
predictions.each do |cid,pred|
# TODO
# use predictions without probabilities (single neighbor)??
# use measured majority class??
if pred[:measured].uniq.size == 1 and pred[:probabilities]
m = pred[:measured].first
if pred[:value] == m
if pred[:value] == accept_values[0]
confusion_matrix[0][0] += 1
weighted_confusion_matrix[0][0] += pred[:probabilities][pred[:value]]
nr_instances += 1
elsif pred[:value] == accept_values[1]
confusion_matrix[1][1] += 1
weighted_confusion_matrix[1][1] += pred[:probabilities][pred[:value]]
nr_instances += 1
end
elsif pred[:value] != m
if pred[:value] == accept_values[0]
confusion_matrix[0][1] += 1
weighted_confusion_matrix[0][1] += pred[:probabilities][pred[:value]]
nr_instances += 1
elsif pred[:value] == accept_values[1]
confusion_matrix[1][0] += 1
weighted_confusion_matrix[1][0] += pred[:probabilities][pred[:value]]
nr_instances += 1
end
end
end
end
true_rate = {}
predictivity = {}
accept_values.each_with_index do |v,i|
true_rate[v] = confusion_matrix[i][i]/confusion_matrix[i].reduce(:+).to_f
predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f
end
confidence_sum = 0
weighted_confusion_matrix.each do |r|
r.each do |c|
confidence_sum += c
end
end
accuracy = (confusion_matrix[0][0]+confusion_matrix[1][1])/nr_instances.to_f
weighted_accuracy = (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f
$logger.debug "Accuracy #{accuracy}"
{
:accept_values => accept_values,
:confusion_matrix => confusion_matrix,
:weighted_confusion_matrix => weighted_confusion_matrix,
:accuracy => accuracy,
:weighted_accuracy => weighted_accuracy,
:true_rate => true_rate,
:predictivity => predictivity,
:finished_at => Time.now
}
end
def self.regression predictions
# TODO: predictions within prediction_interval
rmse = 0
mae = 0
x = []
y = []
predictions.each do |cid,pred|
if pred[:value] and pred[:measured]
x << pred[:measured].median
y << pred[:value]
error = pred[:value]-pred[:measured].median
rmse += error**2
mae += error.abs
else
warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
$logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
end
end
R.assign "measurement", x
R.assign "prediction", y
R.eval "r <- cor(measurement,prediction,use='complete')"
r = R.eval("r").to_ruby
mae = mae/predictions.size
rmse = Math.sqrt(rmse/predictions.size)
$logger.debug "R^2 #{r**2}"
$logger.debug "RMSE #{rmse}"
$logger.debug "MAE #{mae}"
{
:mae => mae,
:rmse => rmse,
:r_squared => r**2,
:finished_at => Time.now
}
end
end
module Plot
def plot_id
tmpfile = "/tmp/#{id.to_s}_correlation.png"
x = []
y = []
predictions.each do |sid,p|
x << p["value"]
y << p["measured"].median
end
R.assign "measurement", x
R.assign "prediction", y
R.eval "all = c(measurement,prediction)"
R.eval "range = c(min(all), max(all))"
R.eval "image = qplot(prediction,measurement,main='',asp=1,xlim=range, ylim=range)"
R.eval "image = image + geom_abline(intercept=0, slope=1)"
R.eval "ggsave(file='#{tmpfile}', plot=image)"
file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_correlation_plot.png")
plot_id = $gridfs.insert_one(file)
plot_id
end
end
end
|