From 6d6be53a110e71d0d56ae5ea9a2675f76f7c84ec Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Thu, 6 Sep 2018 17:24:25 +0200 Subject: adjusted classification similarities, dataset sdf export --- lib/validation-statistics.rb | 139 +++++++++++++++++++++++++++++-------------- 1 file changed, 95 insertions(+), 44 deletions(-) (limited to 'lib/validation-statistics.rb') diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb index 69e7992..a69ede3 100644 --- a/lib/validation-statistics.rb +++ b/lib/validation-statistics.rb @@ -7,9 +7,10 @@ module OpenTox # @return [Hash] def statistics self.accept_values = model.prediction_feature.accept_values - self.confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)} - self.weighted_confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)} - nr_instances = 0 + self.confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}} + self.weighted_confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}} + #self.weighted_confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)} + self.nr_predictions = {:all => 0,:without_warnings => 0} predictions.each do |cid,pred| # TODO # use predictions without probabilities (single neighbor)?? @@ -18,41 +19,69 @@ module OpenTox m = pred[:measurements].first if pred[:value] == m if pred[:value] == accept_values[0] - confusion_matrix[0][0] += 1 - weighted_confusion_matrix[0][0] += pred[:probabilities][pred[:value]] - nr_instances += 1 + confusion_matrix[:all][0][0] += 1 + weighted_confusion_matrix[:all][0][0] += pred[:probabilities][pred[:value]] + self.nr_predictions[:all] += 1 + if pred[:warnings].empty? + confusion_matrix[:without_warnings][0][0] += 1 + weighted_confusion_matrix[:without_warnings][0][0] += pred[:probabilities][pred[:value]] + self.nr_predictions[:without_warnings] += 1 + end elsif pred[:value] == accept_values[1] - confusion_matrix[1][1] += 1 - weighted_confusion_matrix[1][1] += pred[:probabilities][pred[:value]] - nr_instances += 1 + confusion_matrix[:all][1][1] += 1 + weighted_confusion_matrix[:all][1][1] += pred[:probabilities][pred[:value]] + self.nr_predictions[:all] += 1 + if pred[:warnings].empty? + confusion_matrix[:without_warnings][1][1] += 1 + weighted_confusion_matrix[:without_warnings][1][1] += pred[:probabilities][pred[:value]] + self.nr_predictions[:without_warnings] += 1 + end end elsif pred[:value] != m if pred[:value] == accept_values[0] - confusion_matrix[0][1] += 1 - weighted_confusion_matrix[0][1] += pred[:probabilities][pred[:value]] - nr_instances += 1 + confusion_matrix[:all][0][1] += 1 + weighted_confusion_matrix[:all][0][1] += pred[:probabilities][pred[:value]] + self.nr_predictions[:all] += 1 + if pred[:warnings].empty? + confusion_matrix[:without_warnings][0][1] += 1 + weighted_confusion_matrix[:without_warnings][0][1] += pred[:probabilities][pred[:value]] + self.nr_predictions[:without_warnings] += 1 + end elsif pred[:value] == accept_values[1] - confusion_matrix[1][0] += 1 - weighted_confusion_matrix[1][0] += pred[:probabilities][pred[:value]] - nr_instances += 1 + confusion_matrix[:all][1][0] += 1 + weighted_confusion_matrix[:all][1][0] += pred[:probabilities][pred[:value]] + self.nr_predictions[:all] += 1 + if pred[:warnings].empty? + confusion_matrix[:without_warnings][1][0] += 1 + weighted_confusion_matrix[:without_warnings][1][0] += pred[:probabilities][pred[:value]] + self.nr_predictions[:without_warnings] += 1 + end end end end end - self.true_rate = {} - self.predictivity = {} + self.true_rate = {:all => {}, :without_warnings => {}} + self.predictivity = {:all => {}, :without_warnings => {}} accept_values.each_with_index do |v,i| - self.true_rate[v] = confusion_matrix[i][i]/confusion_matrix[i].reduce(:+).to_f - self.predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f + [:all,:without_warnings].each do |a| + self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f + self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f + end end - confidence_sum = 0 - weighted_confusion_matrix.each do |r| - r.each do |c| - confidence_sum += c + confidence_sum = {:all => 0, :without_warnings => 0} + [:all,:without_warnings].each do |a| + weighted_confusion_matrix[a].each do |r| + r.each do |c| + confidence_sum[a] += c + end end end - self.accuracy = (confusion_matrix[0][0]+confusion_matrix[1][1])/nr_instances.to_f - self.weighted_accuracy = (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f + self.accuracy = {} + self.weighted_accuracy = {} + [:all,:without_warnings].each do |a| + self.accuracy[a] = (confusion_matrix[a][0][0]+confusion_matrix[a][1][1])/nr_predictions[a].to_f + self.weighted_accuracy[a] = (weighted_confusion_matrix[a][0][0]+weighted_confusion_matrix[a][1][1])/confidence_sum[a].to_f + end $logger.debug "Accuracy #{accuracy}" save { @@ -63,6 +92,7 @@ module OpenTox :weighted_accuracy => weighted_accuracy, :true_rate => self.true_rate, :predictivity => self.predictivity, + :nr_predictions => nr_predictions, } end @@ -112,26 +142,44 @@ module OpenTox # @return [Hash] def statistics self.warnings = [] - self.rmse = 0 - self.mae = 0 - self.within_prediction_interval = 0 - self.out_of_prediction_interval = 0 - x = [] - y = [] + self.rmse = {:all =>0,:without_warnings => 0} + self.mae = {:all =>0,:without_warnings => 0} + self.within_prediction_interval = {:all =>0,:without_warnings => 0} + self.out_of_prediction_interval = {:all =>0,:without_warnings => 0} + x = {:all => [],:without_warnings => []} + y = {:all => [],:without_warnings => []} + self.nr_predictions = {:all =>0,:without_warnings => 0} + error = {} predictions.each do |cid,pred| if pred[:value] and pred[:measurements] - x << pred[:measurements].median - y << pred[:value] - error = pred[:value]-pred[:measurements].median - self.rmse += error**2 - self.mae += error.abs + self.nr_predictions[:all] +=1 + x[:all] << pred[:measurements].median + y[:all] << pred[:value] + error[:all] = pred[:value]-pred[:measurements].median + self.rmse[:all] += error**2 + self.mae[:all] += error.abs if pred[:prediction_interval] if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1] - self.within_prediction_interval += 1 + self.within_prediction_interval[:all] += 1 else - self.out_of_prediction_interval += 1 + self.out_of_prediction_interval[:all] += 1 end end + if pred[:warnings].empty? + self.nr_predictions[:without_warnings] +=1 + x[:without_warnings] << pred[:measurements].median + y[:without_warnings] << pred[:value] + error[:without_warnings] = pred[:value]-pred[:measurements].median + self.rmse[:without_warnings] += error**2 + self.mae[:without_warnings] += error.abs + if pred[:prediction_interval] + if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1] + self.within_prediction_interval[:without_warnings] += 1 + else + self.out_of_prediction_interval[:without_warnings] += 1 + end + end + end else trd_id = model.training_dataset_id smiles = Compound.find(cid).smiles @@ -139,12 +187,14 @@ module OpenTox $logger.debug "No training activities for #{smiles} in training dataset #{trd_id}." end end - R.assign "measurement", x - R.assign "prediction", y - R.eval "r <- cor(measurement,prediction,use='pairwise')" - self.r_squared = R.eval("r").to_ruby**2 - self.mae = self.mae/predictions.size - self.rmse = Math.sqrt(self.rmse/predictions.size) + [:all,:without_warnings].each do |a| + R.assign "measurement", x[a] + R.assign "prediction", y[a] + R.eval "r <- cor(measurement,prediction,use='pairwise')" + self.r_squared[a] = R.eval("r").to_ruby**2 + self.mae[a] = self.mae[a]/self.nr_predictions[a] + self.rmse[a] = Math.sqrt(self.rmse[a]/self.nr_predictions[a]) + end $logger.debug "R^2 #{r_squared}" $logger.debug "RMSE #{rmse}" $logger.debug "MAE #{mae}" @@ -157,6 +207,7 @@ module OpenTox :r_squared => r_squared, :within_prediction_interval => within_prediction_interval, :out_of_prediction_interval => out_of_prediction_interval, + :nr_predictions => nr_predictions, } end -- cgit v1.2.3 From 9d17895ab9e8cd31e0f32e8e622e13612ea5ff77 Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Fri, 12 Oct 2018 21:58:36 +0200 Subject: validation statistic fixes --- lib/validation-statistics.rb | 128 ++++++++++++++++++++++--------------------- 1 file changed, 66 insertions(+), 62 deletions(-) (limited to 'lib/validation-statistics.rb') diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb index a69ede3..e440731 100644 --- a/lib/validation-statistics.rb +++ b/lib/validation-statistics.rb @@ -9,8 +9,7 @@ module OpenTox self.accept_values = model.prediction_feature.accept_values self.confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}} self.weighted_confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}} - #self.weighted_confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)} - self.nr_predictions = {:all => 0,:without_warnings => 0} + self.nr_predictions = {:all => 0,:without_warnings => 0} predictions.each do |cid,pred| # TODO # use predictions without probabilities (single neighbor)?? @@ -21,41 +20,41 @@ module OpenTox if pred[:value] == accept_values[0] confusion_matrix[:all][0][0] += 1 weighted_confusion_matrix[:all][0][0] += pred[:probabilities][pred[:value]] - self.nr_predictions[:all] += 1 - if pred[:warnings].empty? + self.nr_predictions[:all] += 1 + if pred[:warnings].empty? confusion_matrix[:without_warnings][0][0] += 1 weighted_confusion_matrix[:without_warnings][0][0] += pred[:probabilities][pred[:value]] - self.nr_predictions[:without_warnings] += 1 - end + self.nr_predictions[:without_warnings] += 1 + end elsif pred[:value] == accept_values[1] confusion_matrix[:all][1][1] += 1 weighted_confusion_matrix[:all][1][1] += pred[:probabilities][pred[:value]] - self.nr_predictions[:all] += 1 - if pred[:warnings].empty? + self.nr_predictions[:all] += 1 + if pred[:warnings].empty? confusion_matrix[:without_warnings][1][1] += 1 weighted_confusion_matrix[:without_warnings][1][1] += pred[:probabilities][pred[:value]] - self.nr_predictions[:without_warnings] += 1 - end + self.nr_predictions[:without_warnings] += 1 + end end elsif pred[:value] != m if pred[:value] == accept_values[0] confusion_matrix[:all][0][1] += 1 weighted_confusion_matrix[:all][0][1] += pred[:probabilities][pred[:value]] - self.nr_predictions[:all] += 1 - if pred[:warnings].empty? + self.nr_predictions[:all] += 1 + if pred[:warnings].empty? confusion_matrix[:without_warnings][0][1] += 1 weighted_confusion_matrix[:without_warnings][0][1] += pred[:probabilities][pred[:value]] - self.nr_predictions[:without_warnings] += 1 - end + self.nr_predictions[:without_warnings] += 1 + end elsif pred[:value] == accept_values[1] confusion_matrix[:all][1][0] += 1 weighted_confusion_matrix[:all][1][0] += pred[:probabilities][pred[:value]] - self.nr_predictions[:all] += 1 - if pred[:warnings].empty? + self.nr_predictions[:all] += 1 + if pred[:warnings].empty? confusion_matrix[:without_warnings][1][0] += 1 weighted_confusion_matrix[:without_warnings][1][0] += pred[:probabilities][pred[:value]] - self.nr_predictions[:without_warnings] += 1 - end + self.nr_predictions[:without_warnings] += 1 + end end end end @@ -63,25 +62,25 @@ module OpenTox self.true_rate = {:all => {}, :without_warnings => {}} self.predictivity = {:all => {}, :without_warnings => {}} accept_values.each_with_index do |v,i| - [:all,:without_warnings].each do |a| - self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f - self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f - end + [:all,:without_warnings].each do |a| + self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f + self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f + end end confidence_sum = {:all => 0, :without_warnings => 0} [:all,:without_warnings].each do |a| weighted_confusion_matrix[a].each do |r| r.each do |c| confidence_sum[a] += c - end + end end end - self.accuracy = {} - self.weighted_accuracy = {} + self.accuracy = {} + self.weighted_accuracy = {} [:all,:without_warnings].each do |a| self.accuracy[a] = (confusion_matrix[a][0][0]+confusion_matrix[a][1][1])/nr_predictions[a].to_f self.weighted_accuracy[a] = (weighted_confusion_matrix[a][0][0]+weighted_confusion_matrix[a][1][1])/confidence_sum[a].to_f - end + end $logger.debug "Accuracy #{accuracy}" save { @@ -92,7 +91,7 @@ module OpenTox :weighted_accuracy => weighted_accuracy, :true_rate => self.true_rate, :predictivity => self.predictivity, - :nr_predictions => nr_predictions, + :nr_predictions => nr_predictions, } end @@ -143,19 +142,20 @@ module OpenTox def statistics self.warnings = [] self.rmse = {:all =>0,:without_warnings => 0} + self.r_squared = {:all =>0,:without_warnings => 0} self.mae = {:all =>0,:without_warnings => 0} self.within_prediction_interval = {:all =>0,:without_warnings => 0} self.out_of_prediction_interval = {:all =>0,:without_warnings => 0} x = {:all => [],:without_warnings => []} y = {:all => [],:without_warnings => []} self.nr_predictions = {:all =>0,:without_warnings => 0} - error = {} predictions.each do |cid,pred| + p pred if pred[:value] and pred[:measurements] - self.nr_predictions[:all] +=1 + self.nr_predictions[:all] +=1 x[:all] << pred[:measurements].median y[:all] << pred[:value] - error[:all] = pred[:value]-pred[:measurements].median + error = pred[:value]-pred[:measurements].median self.rmse[:all] += error**2 self.mae[:all] += error.abs if pred[:prediction_interval] @@ -165,21 +165,21 @@ module OpenTox self.out_of_prediction_interval[:all] += 1 end end - if pred[:warnings].empty? - self.nr_predictions[:without_warnings] +=1 - x[:without_warnings] << pred[:measurements].median - y[:without_warnings] << pred[:value] - error[:without_warnings] = pred[:value]-pred[:measurements].median - self.rmse[:without_warnings] += error**2 - self.mae[:without_warnings] += error.abs - if pred[:prediction_interval] - if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1] - self.within_prediction_interval[:without_warnings] += 1 - else - self.out_of_prediction_interval[:without_warnings] += 1 - end - end - end + if pred[:warnings].empty? + self.nr_predictions[:without_warnings] +=1 + x[:without_warnings] << pred[:measurements].median + y[:without_warnings] << pred[:value] + error = pred[:value]-pred[:measurements].median + self.rmse[:without_warnings] += error**2 + self.mae[:without_warnings] += error.abs + if pred[:prediction_interval] + if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1] + self.within_prediction_interval[:without_warnings] += 1 + else + self.out_of_prediction_interval[:without_warnings] += 1 + end + end + end else trd_id = model.training_dataset_id smiles = Compound.find(cid).smiles @@ -187,36 +187,40 @@ module OpenTox $logger.debug "No training activities for #{smiles} in training dataset #{trd_id}." end end - [:all,:without_warnings].each do |a| - R.assign "measurement", x[a] - R.assign "prediction", y[a] - R.eval "r <- cor(measurement,prediction,use='pairwise')" - self.r_squared[a] = R.eval("r").to_ruby**2 - self.mae[a] = self.mae[a]/self.nr_predictions[a] - self.rmse[a] = Math.sqrt(self.rmse[a]/self.nr_predictions[a]) - end + [:all,:without_warnings].each do |a| + if x[a].size > 2 + R.assign "measurement", x[a] + R.assign "prediction", y[a] + R.eval "r <- cor(measurement,prediction,use='pairwise')" + self.r_squared[a] = R.eval("r").to_ruby**2 + else + self.r_squared[a] = 0 + end + if self.nr_predictions[a] > 0 + self.mae[a] = self.mae[a]/self.nr_predictions[a] + self.rmse[a] = Math.sqrt(self.rmse[a]/self.nr_predictions[a]) + else + self.mae[a] = nil + self.rmse[a] = nil + end + end $logger.debug "R^2 #{r_squared}" $logger.debug "RMSE #{rmse}" $logger.debug "MAE #{mae}" - $logger.debug "#{percent_within_prediction_interval.round(2)}% of measurements within prediction interval" + $logger.debug "Nr predictions #{nr_predictions}" + $logger.debug "#{within_prediction_interval} measurements within prediction interval" $logger.debug "#{warnings}" save { :mae => mae, :rmse => rmse, :r_squared => r_squared, - :within_prediction_interval => within_prediction_interval, + :within_prediction_interval => self.within_prediction_interval, :out_of_prediction_interval => out_of_prediction_interval, - :nr_predictions => nr_predictions, + :nr_predictions => nr_predictions, } end - # Get percentage of measurements within the prediction interval - # @return [Float] - def percent_within_prediction_interval - 100*within_prediction_interval.to_f/(within_prediction_interval+out_of_prediction_interval) - end - # Plot predicted vs measured values # @param [String,nil] format # @return [Blob] -- cgit v1.2.3 From d9c9d78e49d886ea91386adbbd2b523347df226e Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Mon, 29 Oct 2018 20:34:39 +0100 Subject: dataset predictions fixed --- lib/validation-statistics.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib/validation-statistics.rb') diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb index e440731..7bae891 100644 --- a/lib/validation-statistics.rb +++ b/lib/validation-statistics.rb @@ -150,8 +150,7 @@ module OpenTox y = {:all => [],:without_warnings => []} self.nr_predictions = {:all =>0,:without_warnings => 0} predictions.each do |cid,pred| - p pred - if pred[:value] and pred[:measurements] + !if pred[:value] and pred[:measurements] and !pred[:measurements].empty? self.nr_predictions[:all] +=1 x[:all] << pred[:measurements].median y[:all] << pred[:value] -- cgit v1.2.3 From 3a9c9332b660d35720ad4fa1f55ee0883e53aecd Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Fri, 2 Nov 2018 20:34:44 +0100 Subject: warnings fixed, cleanup --- lib/validation-statistics.rb | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/validation-statistics.rb') diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb index 7bae891..ad4c14d 100644 --- a/lib/validation-statistics.rb +++ b/lib/validation-statistics.rb @@ -82,6 +82,7 @@ module OpenTox self.weighted_accuracy[a] = (weighted_confusion_matrix[a][0][0]+weighted_confusion_matrix[a][1][1])/confidence_sum[a].to_f end $logger.debug "Accuracy #{accuracy}" + $logger.debug "Nr Predictions #{nr_predictions}" save { :accept_values => accept_values, -- cgit v1.2.3 From 7e547fd4a296f497615a7805d565b378cb1bd7cd Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Wed, 14 Nov 2018 17:33:44 +0100 Subject: bad_request_error substituted with ArgumentError --- lib/validation-statistics.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/validation-statistics.rb') diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb index ad4c14d..f3e3af8 100644 --- a/lib/validation-statistics.rb +++ b/lib/validation-statistics.rb @@ -242,7 +242,7 @@ module OpenTox title = "log2(Net cell association [mL/ug(Mg)])" else title = feature.name - title += " [#{feature.unit}]" if feature.unit and !feature.unit.blank? + title += "-log10(#{feature.unit})" if feature.unit and !feature.unit.blank? end R.eval "image = qplot(prediction,measurement,main='#{title}',xlab='Prediction',ylab='Measurement',asp=1,xlim=range, ylim=range)" R.eval "image = image + geom_abline(intercept=0, slope=1)" -- cgit v1.2.3 From 0882c2cd0de934d7377fc9d08c306be98612c88a Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Fri, 16 Nov 2018 18:42:42 +0100 Subject: real datasets for testing, test data cleanup, Daphnia import, upper and lower similarity thresholds --- lib/validation-statistics.rb | 163 ++++++++++++++++++------------------------- 1 file changed, 67 insertions(+), 96 deletions(-) (limited to 'lib/validation-statistics.rb') diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb index f3e3af8..8a8970e 100644 --- a/lib/validation-statistics.rb +++ b/lib/validation-statistics.rb @@ -7,79 +7,55 @@ module OpenTox # @return [Hash] def statistics self.accept_values = model.prediction_feature.accept_values - self.confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}} - self.weighted_confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}} - self.nr_predictions = {:all => 0,:without_warnings => 0} + self.confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :confidence_high => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :confidence_low => Array.new(accept_values.size){Array.new(accept_values.size,0)}} + self.nr_predictions = {:all => 0,:confidence_high => 0,:confidence_low => 0} predictions.each do |cid,pred| - # TODO - # use predictions without probabilities (single neighbor)?? - # use measured majority class?? + # TODO: use measured majority class or all measurements?? if pred[:measurements].uniq.size == 1 and pred[:probabilities] m = pred[:measurements].first if pred[:value] == m - if pred[:value] == accept_values[0] - confusion_matrix[:all][0][0] += 1 - weighted_confusion_matrix[:all][0][0] += pred[:probabilities][pred[:value]] - self.nr_predictions[:all] += 1 - if pred[:warnings].empty? - confusion_matrix[:without_warnings][0][0] += 1 - weighted_confusion_matrix[:without_warnings][0][0] += pred[:probabilities][pred[:value]] - self.nr_predictions[:without_warnings] += 1 - end - elsif pred[:value] == accept_values[1] - confusion_matrix[:all][1][1] += 1 - weighted_confusion_matrix[:all][1][1] += pred[:probabilities][pred[:value]] - self.nr_predictions[:all] += 1 - if pred[:warnings].empty? - confusion_matrix[:without_warnings][1][1] += 1 - weighted_confusion_matrix[:without_warnings][1][1] += pred[:probabilities][pred[:value]] - self.nr_predictions[:without_warnings] += 1 + accept_values.each_with_index do |v,i| + if pred[:value] == v + confusion_matrix[:all][i][i] += 1 + self.nr_predictions[:all] += 1 + if pred[:confidence].match(/High/i) + confusion_matrix[:confidence_high][i][i] += 1 + self.nr_predictions[:confidence_high] += 1 + elsif pred[:confidence].match(/Low/i) + confusion_matrix[:confidence_low][i][i] += 1 + self.nr_predictions[:confidence_low] += 1 + end end end elsif pred[:value] != m - if pred[:value] == accept_values[0] - confusion_matrix[:all][0][1] += 1 - weighted_confusion_matrix[:all][0][1] += pred[:probabilities][pred[:value]] - self.nr_predictions[:all] += 1 - if pred[:warnings].empty? - confusion_matrix[:without_warnings][0][1] += 1 - weighted_confusion_matrix[:without_warnings][0][1] += pred[:probabilities][pred[:value]] - self.nr_predictions[:without_warnings] += 1 - end - elsif pred[:value] == accept_values[1] - confusion_matrix[:all][1][0] += 1 - weighted_confusion_matrix[:all][1][0] += pred[:probabilities][pred[:value]] - self.nr_predictions[:all] += 1 - if pred[:warnings].empty? - confusion_matrix[:without_warnings][1][0] += 1 - weighted_confusion_matrix[:without_warnings][1][0] += pred[:probabilities][pred[:value]] - self.nr_predictions[:without_warnings] += 1 + accept_values.each_with_index do |v,i| + if pred[:value] == v + confusion_matrix[:all][i][(i+1)%2] += 1 + self.nr_predictions[:all] += 1 + if pred[:confidence].match(/High/i) + confusion_matrix[:confidence_high][i][(i+1)%2] += 1 + self.nr_predictions[:confidence_high] += 1 + elsif pred[:confidence].match(/Low/i) + confusion_matrix[:confidence_low][i][(i+1)%2] += 1 + self.nr_predictions[:confidence_low] += 1 + end end end end end end - self.true_rate = {:all => {}, :without_warnings => {}} - self.predictivity = {:all => {}, :without_warnings => {}} + + self.true_rate = {:all => {}, :confidence_high => {}, :confidence_low => {}} + self.predictivity = {:all => {}, :confidence_high => {}, :confidence_low => {}} accept_values.each_with_index do |v,i| - [:all,:without_warnings].each do |a| + [:all,:confidence_high,:confidence_low].each do |a| self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f end end - confidence_sum = {:all => 0, :without_warnings => 0} - [:all,:without_warnings].each do |a| - weighted_confusion_matrix[a].each do |r| - r.each do |c| - confidence_sum[a] += c - end - end - end self.accuracy = {} - self.weighted_accuracy = {} - [:all,:without_warnings].each do |a| + [:all,:confidence_high,:confidence_low].each do |a| self.accuracy[a] = (confusion_matrix[a][0][0]+confusion_matrix[a][1][1])/nr_predictions[a].to_f - self.weighted_accuracy[a] = (weighted_confusion_matrix[a][0][0]+weighted_confusion_matrix[a][1][1])/confidence_sum[a].to_f end $logger.debug "Accuracy #{accuracy}" $logger.debug "Nr Predictions #{nr_predictions}" @@ -87,9 +63,7 @@ module OpenTox { :accept_values => accept_values, :confusion_matrix => confusion_matrix, - :weighted_confusion_matrix => weighted_confusion_matrix, :accuracy => accuracy, - :weighted_accuracy => weighted_accuracy, :true_rate => self.true_rate, :predictivity => self.predictivity, :nr_predictions => nr_predictions, @@ -138,47 +112,27 @@ module OpenTox # Statistical evaluation of regression validations module RegressionStatistics + attr_accessor :x, :y + # Get statistics # @return [Hash] def statistics self.warnings = [] - self.rmse = {:all =>0,:without_warnings => 0} - self.r_squared = {:all =>0,:without_warnings => 0} - self.mae = {:all =>0,:without_warnings => 0} - self.within_prediction_interval = {:all =>0,:without_warnings => 0} - self.out_of_prediction_interval = {:all =>0,:without_warnings => 0} - x = {:all => [],:without_warnings => []} - y = {:all => [],:without_warnings => []} - self.nr_predictions = {:all =>0,:without_warnings => 0} + self.rmse = {:all =>0,:confidence_high => 0,:confidence_low => 0} + self.r_squared = {:all =>0,:confidence_high => 0,:confidence_low => 0} + self.mae = {:all =>0,:confidence_high => 0,:confidence_low => 0} + self.within_prediction_interval = {:all =>0,:confidence_high => 0,:confidence_low => 0} + self.out_of_prediction_interval = {:all =>0,:confidence_high => 0,:confidence_low => 0} + @x = {:all => [],:confidence_high => [],:confidence_low => []} + @y = {:all => [],:confidence_high => [],:confidence_low => []} + self.nr_predictions = {:all =>0,:confidence_high => 0,:confidence_low => 0} predictions.each do |cid,pred| !if pred[:value] and pred[:measurements] and !pred[:measurements].empty? - self.nr_predictions[:all] +=1 - x[:all] << pred[:measurements].median - y[:all] << pred[:value] - error = pred[:value]-pred[:measurements].median - self.rmse[:all] += error**2 - self.mae[:all] += error.abs - if pred[:prediction_interval] - if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1] - self.within_prediction_interval[:all] += 1 - else - self.out_of_prediction_interval[:all] += 1 - end - end - if pred[:warnings].empty? - self.nr_predictions[:without_warnings] +=1 - x[:without_warnings] << pred[:measurements].median - y[:without_warnings] << pred[:value] - error = pred[:value]-pred[:measurements].median - self.rmse[:without_warnings] += error**2 - self.mae[:without_warnings] += error.abs - if pred[:prediction_interval] - if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1] - self.within_prediction_interval[:without_warnings] += 1 - else - self.out_of_prediction_interval[:without_warnings] += 1 - end - end + insert_prediction pred, :all + if pred[:confidence].match(/High/i) + insert_prediction pred, :confidence_high + elsif pred[:confidence].match(/Low/i) + insert_prediction pred, :confidence_low end else trd_id = model.training_dataset_id @@ -187,10 +141,10 @@ module OpenTox $logger.debug "No training activities for #{smiles} in training dataset #{trd_id}." end end - [:all,:without_warnings].each do |a| - if x[a].size > 2 - R.assign "measurement", x[a] - R.assign "prediction", y[a] + [:all,:confidence_high,:confidence_low].each do |a| + if @x[a].size > 2 + R.assign "measurement", @x[a] + R.assign "prediction", @y[a] R.eval "r <- cor(measurement,prediction,use='pairwise')" self.r_squared[a] = R.eval("r").to_ruby**2 else @@ -209,7 +163,6 @@ module OpenTox $logger.debug "MAE #{mae}" $logger.debug "Nr predictions #{nr_predictions}" $logger.debug "#{within_prediction_interval} measurements within prediction interval" - $logger.debug "#{warnings}" save { :mae => mae, @@ -270,6 +223,24 @@ module OpenTox end worst_predictions.sort_by{|sid,p| p["distance_prediction_interval"] }.to_h end + + private + + def insert_prediction prediction, type + self.nr_predictions[type] +=1 + @x[type] << prediction[:measurements].median + @y[type] << prediction[:value] + error = prediction[:value]-prediction[:measurements].median + self.rmse[type] += error**2 + self.mae[type] += error.abs + if prediction[:prediction_interval] + if prediction[:measurements].median >= prediction[:prediction_interval][0] and prediction[:measurements].median <= prediction[:prediction_interval][1] + self.within_prediction_interval[type] += 1 + else + self.out_of_prediction_interval[type] += 1 + end + end + end end end end -- cgit v1.2.3 From 455da06aa6459da0d25b286ca6cb866ff64c4c34 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 20 Jun 2019 22:01:50 +0200 Subject: separate csv serialisations for batch predictions and training data, repeated measurements in mutagenicity dataset fixed, daphnia import fixed, CENTRAL_MONGO_IP removed --- lib/validation-statistics.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/validation-statistics.rb') diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb index 8a8970e..d603294 100644 --- a/lib/validation-statistics.rb +++ b/lib/validation-statistics.rb @@ -18,7 +18,7 @@ module OpenTox if pred[:value] == v confusion_matrix[:all][i][i] += 1 self.nr_predictions[:all] += 1 - if pred[:confidence].match(/High/i) + if pred[:confidence].match(/Similar/i) confusion_matrix[:confidence_high][i][i] += 1 self.nr_predictions[:confidence_high] += 1 elsif pred[:confidence].match(/Low/i) @@ -32,7 +32,7 @@ module OpenTox if pred[:value] == v confusion_matrix[:all][i][(i+1)%2] += 1 self.nr_predictions[:all] += 1 - if pred[:confidence].match(/High/i) + if pred[:confidence].match(/Similar/i) confusion_matrix[:confidence_high][i][(i+1)%2] += 1 self.nr_predictions[:confidence_high] += 1 elsif pred[:confidence].match(/Low/i) -- cgit v1.2.3 From b536a45cf18b070cec3f9cb8a44fdac0bfa3c58e Mon Sep 17 00:00:00 2001 From: gebele Date: Thu, 27 Jun 2019 14:08:57 +0000 Subject: fixed confidence value for cv stats; added tests --- lib/validation-statistics.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/validation-statistics.rb') diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb index d603294..2dd9c7a 100644 --- a/lib/validation-statistics.rb +++ b/lib/validation-statistics.rb @@ -129,7 +129,7 @@ module OpenTox predictions.each do |cid,pred| !if pred[:value] and pred[:measurements] and !pred[:measurements].empty? insert_prediction pred, :all - if pred[:confidence].match(/High/i) + if pred[:confidence].match(/Similar/i) insert_prediction pred, :confidence_high elsif pred[:confidence].match(/Low/i) insert_prediction pred, :confidence_low -- cgit v1.2.3 From 29c3cb2e8a8bbfb12178785f81d1cb324dc328e7 Mon Sep 17 00:00:00 2001 From: gebele Date: Fri, 12 Jul 2019 12:20:20 +0000 Subject: fixed, probability plot format was not taken from params for filename --- lib/validation-statistics.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/validation-statistics.rb') diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb index 2dd9c7a..4910573 100644 --- a/lib/validation-statistics.rb +++ b/lib/validation-statistics.rb @@ -101,7 +101,7 @@ module OpenTox R.assign "probability", probabilities R.eval "image = qplot(probability,accuracy)+ylab('Accumulated accuracy')+xlab('Prediction probability')+ylim(c(0,1))+scale_x_reverse()+geom_line()" R.eval "ggsave(file='#{tmpfile}', plot=image)" - file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_probability_plot.svg") + file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_probability_plot.#{format}") plot_id = $gridfs.insert_one(file) update(:probability_plot_id => plot_id) #end -- cgit v1.2.3 From 581707afa48711cfd2f929a91a96e4f5041b9ba2 Mon Sep 17 00:00:00 2001 From: gebele Date: Tue, 16 Jul 2019 14:03:03 +0000 Subject: always render new correlation plot; keep same handling as for probability plot --- lib/validation-statistics.rb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'lib/validation-statistics.rb') diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb index 4910573..5fd9985 100644 --- a/lib/validation-statistics.rb +++ b/lib/validation-statistics.rb @@ -178,8 +178,12 @@ module OpenTox # @param [String,nil] format # @return [Blob] def correlation_plot format: "png" - unless correlation_plot_id - tmpfile = "/tmp/#{id.to_s}_correlation.#{format}" + #unless correlation_plot_id + #tmpfile = "/tmp/#{id.to_s}_correlation.#{format}" + tmpdir = "/tmp" + #p tmpdir + FileUtils.mkdir_p tmpdir + tmpfile = File.join(tmpdir,"#{id.to_s}_correlation.#{format}") x = [] y = [] feature = Feature.find(predictions.first.last["prediction_feature_id"]) @@ -203,7 +207,7 @@ module OpenTox file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{id.to_s}_correlation_plot.#{format}") plot_id = $gridfs.insert_one(file) update(:correlation_plot_id => plot_id) - end + #end $gridfs.find_one(_id: correlation_plot_id).data end -- cgit v1.2.3