1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
|
require "./lib/prediction_data.rb"
require "./lib/predictions.rb"
module Lib
class OTPredictions < Predictions
attr_reader :training_values, :prediction_feature_title
def initialize(data, compounds=nil, training_values=nil, prediction_feature_title=nil)
internal_server_error unless data.is_a?(Hash)
super(data)
@compounds = compounds
@training_values = training_values
@prediction_feature_title = prediction_feature_title
end
def identifier(instance_index)
compound(instance_index)
end
def compound(instance_index)
@compounds[instance_index]
end
def compute_stats()
res = {}
case feature_type
when "classification"
(Validation::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)}
when "regression"
(Validation::VAL_REGR_PROPS).each{ |s| res[s] = send(s) }
end
return res
end
def to_array()
OTPredictions.to_array( [self] )
end
def self.to_array( predictions, format=false, validation_uris=nil )
confidence_available = false
training_data_available = false
predictions.each do |p|
confidence_available |= p.confidence_values_available?
training_data_available |= p.training_values.keys.flatten.size>0
end
res = []
count = 0
join_map = {}
predictions.each do |p|
v_uris = validation_uris[count] if validation_uris
count += 1
p.num_instances.times do |i|
a = {}
a["Compound"] = p.identifier(i)+"?media=image/png&size=150"
a["Compound URI"] = p.identifier(i)
a["Training value"] = p.training_values[p.identifier(i)] if training_data_available
a["Test value"] = p.actual_value(i)
a["Predicted value"] = p.predicted_value(i)
if p.feature_type=="classification"
if (p.predicted_value(i)!=nil and p.actual_value(i)!=nil)
if p.classification_miss?(i)
a["Classification"] = (format ? ICON_ERROR : 1)
else
a["Classification"] = (format ? ICON_OK : 0)
end
end
else
if (p.predicted_value(i)!=nil and p.actual_value(i)!=nil)
a["Error"] = (p.actual_value(i)-p.predicted_value(i)).abs
end
end
a["Confidence value"] = p.confidence_value(i) if confidence_available
a["Validation URI"] = v_uris[i] if validation_uris
idx = join_map["#{p.identifier(i)}#{v_uris ? v_uris[i] : ''}"]
if (idx!=nil and format) # join equal compounds unless formatting is disabled
raise "prediciton of same compound in same validation should be equal" unless res[idx]["Predicted value"]==a["Predicted value"]
["Error", "Test value" ].each do |v|
res[idx][v] = [ res[idx][v], a[v] ].flatten.compact if res[idx].has_key?(v) or a.has_key?(v)
end
res[idx]["Classification"]=nil if a["Classification"] and res[idx]["Classification"]!=a["Classification"]
else
join_map["#{p.identifier(i)}#{v_uris ? v_uris[i] : ''}"] = res.size
res << a
end
end
end
unless predictions.first.feature_type=="classification"
# compute horziontal line step-width to make boxplots inter-comparable
# step 1: compute max delta
delta = 0
res.each do |r|
vals = ["Training value","Predicted value","Test value"].collect{|k| r[k]==nil ? [] : r[k] }.flatten
delta = [delta,(vals.max-vals.min)].max if vals.size>0
end
# step 2: compute stepwidth by rounding off to power of 10
# e.g. delta > 100 -> stepwidth = 100, delta within [10-99.9] -> stepwidth = 10, delta within [1-9.99] -> stepwidth = 1
hline = 10**Math.log(delta,10).floor
end
transformer = PredictionTransformer.new(res.collect{|r| r["Compound URI"]},predictions.first.prediction_feature_title)
res.size.times do |r|
# add boxplot
unless predictions.first.feature_type=="classification"
# add boxplots including training, test and predicted values
val_str = training_data_available ? "training=#{[res[r]["Training value"]].join(",")};" : ""
val_str << "test=#{[res[r]["Test value"]].join(",")};predicted=#{[res[r]["Predicted value"]].join(",")}"
res[r]["Boxplot"] = File.join($validation[:uri],"/boxplot/#{val_str}?hline=#{hline}&size=150")
end
# render missing values
if format
res[r]["Test value"] = "'missing'" unless res[r]["Test value"]
res[r]["Predicted value"] = (res[r]["Training value"] ? "'in-training-data'" : "'outside-AD'") unless res[r]["Predicted value"]
end
# handle arrays
# add transformed values
["Training value","Test value","Predicted value","Error","Confidence value","Validation URI"].each do |v|
next unless res[r].has_key?(v)
vals = [res[r][v]].flatten
do_transform = (transformer.do_transform? and ["Training value","Test value","Predicted value"].include?(v))
if predictions.first.feature_type=="classification" or vals.any?{|x| !x.is_a?(Numeric)}
res[r][v] = vals.join(", ")
elsif vals.size==1
res[r][v] = vals.first.to_nice_s
res[r][v] += "\n#{transformer.transform(vals.first,res[r]["Compound URI"])}" if do_transform
else # vals.size > 1
mean = vals.inject(0.0) { |sum, el| sum + el } / vals.size
res[r][v] = "#{mean.to_nice_s} (mean)"
res[r][v] += "\n#{transformer.transform(mean,res[r]["Compound URI"])}" if do_transform
res[r][v] += "\n("+vals.collect{|v| v.to_nice_s}.join(", ")
res[r][v] += "\n#{transformer.transform(vals,res[r]["Compound URI"])}" if do_transform
res[r][v] += ")"
end
end
end
header = []
header << "Compound" if format
header << "Training value" if training_data_available
header << "Test value"
header << "Predicted value"
if predictions.first.feature_type=="classification"
header << "Classification"
else
header << "Error"
header << "Boxplot"
end
header << "Confidence value" if confidence_available
header << "Validation URI" if validation_uris
header << "Compound URI"
array = []
array << header
res.each do |a|
array << header.collect{|h| a[h]}
end
if transformer.do_transform?
array[0].each_with_index do |v,i|
array[0][i] += "\n[#{transformer.unit}]" if ["Training value","Test value","Predicted value","Error"].include?(v)
end
end
array
end
end
########## HACK FOR LOEAL MODELS ##############################
class PredictionTransformer
def initialize(compounds, prediction_feature_title)
@prediction_feature_title = prediction_feature_title
case prediction_feature_title
when "LOAEL_log_mmol_kg_bw_day"
@mw = {}
OpenTox::Algorithm::Descriptor.physchem(compounds.collect{|c| OpenTox::Compound.new(c)},["Openbabel.mw"]).each do |uri,hash|
@mw[uri] = hash["Openbabel.mw"].to_f
end
end
end
def do_transform?
case @prediction_feature_title
when /LOAEL_log_.mol_kg_bw_day/, "LOAEL_log_mg_kg_bw_day"
true
else
false
end
end
def unit
case @prediction_feature_title
when /LOAEL_log_.mol_kg_bw_day/
"-log mol/kg bw/day"
when "LOAEL_log_mg_kg_bw_day"
"log mg/kg bw/day"
else
nil
end
end
def transform_single(val, c_uri)
case @prediction_feature_title
when /LOAEL_log_.mol_kg_bw_day/
val = (10**(-1*val)) * (@mw[c_uri]*1000)
when "LOAEL_log_mg_kg_bw_day"
val = 10**val
else
nil
end
val ? (val*10).round/10.0 : nil
end
def transform(val, c_uri)
"["+[val].flatten.collect{|v| transform_single(v,c_uri)}.join(", ")+" mg/kg bw/day]"
end
end
end
|