summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-10-27 11:58:07 +0200
committerChristoph Helma <helma@in-silico.ch>2016-10-27 11:58:07 +0200
commit8519274487166d75b3b9ae28e61f7a7be9f7e83c (patch)
tree6815047dae4740e57c33bca9f65b5716f32e0b0f /lib
parentaada2ff67eaba251d1eeedb7f3eb29282706f997 (diff)
probability plot for classification validations
Diffstat (limited to 'lib')
-rw-r--r--lib/crossvalidation.rb18
-rw-r--r--lib/leave-one-out-validation.rb3
-rw-r--r--lib/train-test-validation.rb14
-rw-r--r--lib/validation-statistics.rb64
4 files changed, 67 insertions, 32 deletions
diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb
index 15d1031..4f779a2 100644
--- a/lib/crossvalidation.rb
+++ b/lib/crossvalidation.rb
@@ -64,14 +64,16 @@ module OpenTox
field :weighted_accuracy, type: Float
field :true_rate, type: Hash
field :predictivity, type: Hash
- field :confidence_plot_id, type: BSON::ObjectId
+ field :probability_plot_id, type: BSON::ObjectId
end
class RegressionCrossValidation < CrossValidation
include RegressionStatistics
- field :rmse, type: Float
- field :mae, type: Float
+ field :rmse, type: Float, default:0
+ field :mae, type: Float, default:0
field :r_squared, type: Float
+ field :within_prediction_interval, type: Integer, default:0
+ field :out_of_prediction_interval, type: Integer, default:0
field :correlation_plot_id, type: BSON::ObjectId
end
@@ -93,6 +95,7 @@ module OpenTox
crossvalidation_ids.collect{|id| CrossValidation.find(id)}
end
+=begin
def correlation_plot format: "png"
#unless correlation_plot_id
feature = Feature.find(crossvalidations.first.model.prediction_feature)
@@ -104,16 +107,18 @@ module OpenTox
x = []
y = []
cv.predictions.each do |sid,p|
- x << p["value"]
- y << p["measurements"].median
+ x << p["measurements"].median
+ y << p["value"]
end
R.assign "measurement", x
R.assign "prediction", y
R.eval "all = c(measurement,prediction)"
R.eval "range = c(min(all), max(all))"
- R.eval "image#{i} = qplot(prediction,measurement,main='#{title}',xlab='Prediction',ylab='Measurement',asp=1,xlim=range, ylim=range)"
+ R.eval "image#{i} = qplot(prediction,measurement,main='#{title} #{i}',xlab='Prediction',ylab='Measurement',asp=1,xlim=range, ylim=range)"
R.eval "image#{i} = image#{i} + geom_abline(intercept=0, slope=1)"
images << "image#{i}"
+
+ R.eval "ggsave(file='/home/ist/lazar/test/tmp#{i}.pdf', plot=image#{i})"
end
R.eval "pdf('#{tmpfile}')"
R.eval "grid.arrange(#{images.join ","},ncol=#{images.size})"
@@ -124,6 +129,7 @@ module OpenTox
#end
$gridfs.find_one(_id: correlation_plot_id).data
end
+=end
end
end
diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb
index 59f43c5..538b7b3 100644
--- a/lib/leave-one-out-validation.rb
+++ b/lib/leave-one-out-validation.rb
@@ -5,6 +5,7 @@ module OpenTox
class LeaveOneOut < Validation
def self.create model
+ bad_request_error "Cannot create leave one out validation for models with supervised feature selection. Please use crossvalidation instead." if model.algorithms[:feature_selection]
$logger.debug "#{model.name}: LOO validation started"
t = Time.now
model.training_dataset.features.first.nominal? ? klass = ClassificationLeaveOneOut : klass = RegressionLeaveOneOut
@@ -48,6 +49,8 @@ module OpenTox
field :rmse, type: Float, default: 0
field :mae, type: Float, default: 0
field :r_squared, type: Float
+ field :within_prediction_interval, type: Integer, default:0
+ field :out_of_prediction_interval, type: Integer, default:0
field :correlation_plot_id, type: BSON::ObjectId
end
diff --git a/lib/train-test-validation.rb b/lib/train-test-validation.rb
index e3f5905..71abad2 100644
--- a/lib/train-test-validation.rb
+++ b/lib/train-test-validation.rb
@@ -44,10 +44,24 @@ module OpenTox
class ClassificationTrainTest < TrainTest
include ClassificationStatistics
+ field :accept_values, type: Array
+ field :confusion_matrix, type: Array
+ field :weighted_confusion_matrix, type: Array
+ field :accuracy, type: Float
+ field :weighted_accuracy, type: Float
+ field :true_rate, type: Hash
+ field :predictivity, type: Hash
+ field :probability_plot_id, type: BSON::ObjectId
end
class RegressionTrainTest < TrainTest
include RegressionStatistics
+ field :rmse, type: Float, default:0
+ field :mae, type: Float, default:0
+ field :r_squared, type: Float
+ field :within_prediction_interval, type: Integer, default:0
+ field :out_of_prediction_interval, type: Integer, default:0
+ field :correlation_plot_id, type: BSON::ObjectId
end
end
diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index 3582c71..4ab4b13 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -65,43 +65,44 @@ module OpenTox
}
end
- def confidence_plot
- unless confidence_plot_id
- tmpfile = "/tmp/#{id.to_s}_confidence.svg"
+ def probability_plot format: "pdf"
+ #unless probability_plot_id
+ tmpfile = "/tmp/#{id.to_s}_probability.#{format}"
accuracies = []
- confidences = []
+ probabilities = []
correct_predictions = 0
incorrect_predictions = 0
- predictions.each do |p|
- p[:measurements].each do |db_act|
- if p[:value]
- p[:value] == db_act ? correct_predictions += 1 : incorrect_predictions += 1
- accuracies << correct_predictions/(correct_predictions+incorrect_predictions).to_f
- confidences << p[:confidence]
-
- end
+ pp = []
+ predictions.values.select{|p| p["probabilities"]}.compact.each do |p|
+ p["measurements"].each do |m|
+ pp << [ p["probabilities"][p["value"]], p["value"] == m ]
end
end
+ pp.sort_by!{|p| 1-p.first}
+ pp.each do |p|
+ p[1] ? correct_predictions += 1 : incorrect_predictions += 1
+ accuracies << correct_predictions/(correct_predictions+incorrect_predictions).to_f
+ probabilities << p[0]
+ end
R.assign "accuracy", accuracies
- R.assign "confidence", confidences
- R.eval "image = qplot(confidence,accuracy)+ylab('accumulated accuracy')+scale_x_reverse()"
+ R.assign "probability", probabilities
+ R.eval "image = qplot(probability,accuracy)+ylab('Accumulated accuracy')+xlab('Prediction probability')+ylim(c(0,1))+scale_x_reverse()+geom_line()"
R.eval "ggsave(file='#{tmpfile}', plot=image)"
- file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_confidence_plot.svg")
+ file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_probability_plot.svg")
plot_id = $gridfs.insert_one(file)
- update(:confidence_plot_id => plot_id)
- end
- $gridfs.find_one(_id: confidence_plot_id).data
+ update(:probability_plot_id => plot_id)
+ #end
+ $gridfs.find_one(_id: probability_plot_id).data
end
end
module RegressionStatistics
def statistics
- # TODO: predictions within prediction_interval
self.rmse = 0
self.mae = 0
- #self.within_prediction_interval = 0
- #self.outside_prediction_interval = 0
+ self.within_prediction_interval = 0
+ self.out_of_prediction_interval = 0
x = []
y = []
predictions.each do |cid,pred|
@@ -111,9 +112,13 @@ module OpenTox
error = pred[:value]-pred[:measurements].median
self.rmse += error**2
self.mae += error.abs
- #if pred[:prediction_interval]
- #if pred[:measurements]
- #end
+ if pred[:prediction_interval]
+ if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1]
+ self.within_prediction_interval += 1
+ else
+ self.out_of_prediction_interval += 1
+ end
+ end
else
warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
$logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
@@ -128,16 +133,23 @@ module OpenTox
$logger.debug "R^2 #{r_squared}"
$logger.debug "RMSE #{rmse}"
$logger.debug "MAE #{mae}"
+ $logger.debug "#{percent_within_prediction_interval.round(2)}% measurements within prediction interval"
save
{
:mae => mae,
:rmse => rmse,
:r_squared => r_squared,
+ :within_prediction_interval => within_prediction_interval,
+ :out_of_prediction_interval => out_of_prediction_interval,
}
end
+ def percent_within_prediction_interval
+ 100*within_prediction_interval.to_f/(within_prediction_interval+out_of_prediction_interval)
+ end
+
def correlation_plot format: "png"
- unless correlation_plot_id
+ #unless correlation_plot_id
tmpfile = "/tmp/#{id.to_s}_correlation.#{format}"
x = []
y = []
@@ -158,7 +170,7 @@ module OpenTox
file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{id.to_s}_correlation_plot.#{format}")
plot_id = $gridfs.insert_one(file)
update(:correlation_plot_id => plot_id)
- end
+ #end
$gridfs.find_one(_id: correlation_plot_id).data
end