summaryrefslogtreecommitdiff
path: root/lib/validation.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/validation.rb')
-rw-r--r--lib/validation.rb62
1 files changed, 58 insertions, 4 deletions
diff --git a/lib/validation.rb b/lib/validation.rb
index c52ffc0..651860e 100644
--- a/lib/validation.rb
+++ b/lib/validation.rb
@@ -29,17 +29,22 @@ module OpenTox
atts[:training_dataset_id] = training_set.id
validation_model = model.class.create training_set, atts
validation_model.save
- test_set_without_activities = Dataset.new(:compound_ids => test_set.compound_ids) # just to be sure that activities cannot be used
+ cids = test_set.compound_ids
+
+ test_set_without_activities = Dataset.new(:compound_ids => cids.uniq) # remove duplicates and make sure that activities cannot be used
prediction_dataset = validation_model.predict test_set_without_activities
predictions = []
nr_unpredicted = 0
activities = test_set.data_entries.collect{|de| de.first}
prediction_dataset.data_entries.each_with_index do |de,i|
- if de[0] and de[1] and de[1].numeric?
- activity = activities[i]
+ if de[0] and de[1]
+ cid = prediction_dataset.compound_ids[i]
+ rows = cids.each_index.select{|r| cids[r] == cid }
+ activities = rows.collect{|r| test_set.data_entries[r][0]}
+ #activity = activities[i]
prediction = de.first
confidence = de[1]
- predictions << [prediction_dataset.compound_ids[i], activity, prediction, de[1]]
+ predictions << [prediction_dataset.compound_ids[i], activities, prediction, de[1]]
else
nr_unpredicted += 1
end
@@ -57,6 +62,55 @@ module OpenTox
validation
end
+ def statistics
+ rmse = 0
+ weighted_rmse = 0
+ rse = 0
+ weighted_rse = 0
+ mae = 0
+ weighted_mae = 0
+ confidence_sum = 0
+ predictions.each do |pred|
+ compound_id,activity,prediction,confidence = pred
+ if activity and prediction
+ error = Math.log10(prediction)-Math.log10(activity.median)
+ rmse += error**2
+ weighted_rmse += confidence*error**2
+ mae += error.abs
+ weighted_mae += confidence*error.abs
+ confidence_sum += confidence
+ else
+ warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
+ $logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
+ end
+ end
+ x = predictions.collect{|p| p[1].median}
+ y = predictions.collect{|p| p[2]}
+ R.assign "measurement", x
+ R.assign "prediction", y
+ R.eval "r <- cor(-log(measurement),-log(prediction),use='complete')"
+ r = R.eval("r").to_ruby
+
+ mae = mae/predictions.size
+ weighted_mae = weighted_mae/confidence_sum
+ rmse = Math.sqrt(rmse/predictions.size)
+ weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum)
+=begin
+ update_attributes(
+ mae: mae,
+ rmse: rmse,
+ weighted_mae: weighted_mae,
+ weighted_rmse: weighted_rmse,
+ r_squared: r**2,
+ finished_at: Time.now
+ )
+=end
+ puts "R^2 #{r**2}"
+ puts "RMSE #{rmse}"
+ puts "MAE #{mae}"
+ return { "R^2" => r**2, "RMSE" => rmse, "MAE" => mae }
+ end
+
end
class ClassificationValidation < Validation