summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-04-13 15:15:51 +0200
committerChristoph Helma <helma@in-silico.ch>2016-04-13 15:15:51 +0200
commita8368dda776c05331474adf7eaf9a6e413a3b1eb (patch)
treedaafac9a7453a8d453fc6992293afe6f6a937551
parent84222bae2bbb9fb3e0ce3e65de1be8e7f94d2147 (diff)
validation tests pass
-rw-r--r--lib/compound.rb2
-rw-r--r--lib/crossvalidation.rb109
-rw-r--r--lib/dataset.rb40
-rw-r--r--lib/lazar.rb3
-rw-r--r--lib/leave-one-out-validation.rb108
-rw-r--r--lib/model.rb23
-rw-r--r--lib/validation.rb62
-rw-r--r--test/classification.rb6
-rw-r--r--test/validation.rb6
9 files changed, 78 insertions, 281 deletions
diff --git a/lib/compound.rb b/lib/compound.rb
index 84d8891..757ba1a 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -341,7 +341,7 @@ module OpenTox
{'$sort' => {'tanimoto' => -1}}
]
- $mongo["compounds"].aggregate(aggregate).select{|r| r["dataset_ids"].include? params[:training_dataset_id]}
+ $mongo["substances"].aggregate(aggregate).select{|r| r["dataset_ids"].include? params[:training_dataset_id]}
end
diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb
index b7cd7bf..f93a04c 100644
--- a/lib/crossvalidation.rb
+++ b/lib/crossvalidation.rb
@@ -6,7 +6,7 @@ module OpenTox
field :folds, type: Integer
field :nr_instances, type: Integer
field :nr_unpredicted, type: Integer
- field :predictions, type: Array, default: []
+ field :predictions, type: Hash, default: {}
field :finished_at, type: Time
def time
@@ -32,7 +32,7 @@ module OpenTox
cv.save # set created_at
nr_instances = 0
nr_unpredicted = 0
- predictions = []
+ predictions = {}
training_dataset = Dataset.find model.training_dataset_id
training_dataset.folds(n).each_with_index do |fold,fold_nr|
#fork do # parallel execution of validations
@@ -42,12 +42,12 @@ module OpenTox
$logger.debug "Dataset #{training_dataset.name}, Fold #{fold_nr}: #{Time.now-t} seconds"
#end
end
- #Process.waitall
+ Process.waitall
cv.validation_ids = Validation.where(:crossvalidation_id => cv.id).distinct(:_id)
cv.validations.each do |validation|
nr_instances += validation.nr_instances
nr_unpredicted += validation.nr_unpredicted
- predictions += validation.predictions
+ predictions.merge! validation.predictions
end
cv.update_attributes(
nr_instances: nr_instances,
@@ -73,61 +73,8 @@ module OpenTox
# TODO auc, f-measure (usability??)
def statistics
- accept_values = Feature.find(model.prediction_feature_id).accept_values
- confusion_matrix = Array.new(accept_values.size,0){Array.new(accept_values.size,0)}
- weighted_confusion_matrix = Array.new(accept_values.size,0){Array.new(accept_values.size,0)}
- true_rate = {}
- predictivity = {}
- predictions.each do |pred|
- compound_id,activities,prediction,confidence = pred
- if activities and prediction #and confidence.numeric?
- if activities.uniq.size == 1
- activity = activities.uniq.first
- if prediction == activity
- if prediction == accept_values[0]
- confusion_matrix[0][0] += 1
- #weighted_confusion_matrix[0][0] += confidence
- elsif prediction == accept_values[1]
- confusion_matrix[1][1] += 1
- #weighted_confusion_matrix[1][1] += confidence
- end
- elsif prediction != activity
- if prediction == accept_values[0]
- confusion_matrix[0][1] += 1
- #weighted_confusion_matrix[0][1] += confidence
- elsif prediction == accept_values[1]
- confusion_matrix[1][0] += 1
- #weighted_confusion_matrix[1][0] += confidence
- end
- end
- end
- else
- nr_unpredicted += 1 if prediction.nil?
- end
- end
- true_rate = {}
- predictivity = {}
- accept_values.each_with_index do |v,i|
- true_rate[v] = confusion_matrix[i][i]/confusion_matrix[i].reduce(:+).to_f
- predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f
- end
- confidence_sum = 0
- #weighted_confusion_matrix.each do |r|
- #r.each do |c|
- #confidence_sum += c
- #end
- #end
- update_attributes(
- accept_values: accept_values,
- confusion_matrix: confusion_matrix,
- #weighted_confusion_matrix: weighted_confusion_matrix,
- accuracy: (confusion_matrix[0][0]+confusion_matrix[1][1])/(nr_instances-nr_unpredicted).to_f,
- #weighted_accuracy: (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f,
- true_rate: true_rate,
- predictivity: predictivity,
- finished_at: Time.now
- )
- $logger.debug "Accuracy #{accuracy}"
+ stat = ValidationStatistics.classification(predictions, Feature.find(model.prediction_feature_id).accept_values)
+ update_attributes(stat)
end
def confidence_plot
@@ -169,48 +116,8 @@ module OpenTox
field :correlation_plot_id, type: BSON::ObjectId
def statistics
- rmse = 0
- mae = 0
- x = []
- y = []
- predictions.each do |pred|
- compound_id,activity,prediction,confidence = pred
- if activity and prediction
- unless activity == [nil]
- x << -Math.log10(activity.median)
- y << -Math.log10(prediction)
- error = Math.log10(prediction)-Math.log10(activity.median)
- rmse += error**2
- #weighted_rmse += confidence*error**2
- mae += error.abs
- #weighted_mae += confidence*error.abs
- #confidence_sum += confidence
- end
- else
- warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
- $logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
- end
- end
- R.assign "measurement", x
- R.assign "prediction", y
- R.eval "r <- cor(measurement,prediction,use='complete')"
- r = R.eval("r").to_ruby
-
- mae = mae/predictions.size
- #weighted_mae = weighted_mae/confidence_sum
- rmse = Math.sqrt(rmse/predictions.size)
- #weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum)
- update_attributes(
- mae: mae,
- rmse: rmse,
- #weighted_mae: weighted_mae,
- #weighted_rmse: weighted_rmse,
- r_squared: r**2,
- finished_at: Time.now
- )
- $logger.debug "R^2 #{r**2}"
- $logger.debug "RMSE #{rmse}"
- $logger.debug "MAE #{mae}"
+ stat = ValidationStatistics.regression predictions
+ update_attributes(stat)
end
def misclassifications n=nil
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 5c04382..25307c9 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -5,9 +5,6 @@ module OpenTox
class Dataset
- # associations like has_many, belongs_to deteriorate performance
- #field :feature_ids, type: Array, default: []
- #field :substance_ids, type: Array, default: []
field :data_entries, type: Hash, default: {}
# Readers
@@ -24,7 +21,7 @@ module OpenTox
# Get all features
def features
- @features ||= data_entries.collect{|cid,f| f.keys}.flatten.uniq.collect{|id| OpenTox::Feature.find(id)}
+ @features ||= data_entries.collect{|cid,f| f.first}.flatten.uniq.collect{|id| OpenTox::Feature.find(id)}
@features
end
@@ -33,7 +30,7 @@ module OpenTox
# @param feature [OpenTox::Feature] OpenTox Feature object
# @return [Array] Data entry values
def values(compound, feature)
- data_entries[compound.id,feature.id]
+ data_entries[compound.id.to_s][feature.id.to_s]
end
# Writers
@@ -68,15 +65,14 @@ module OpenTox
training_idxs = indices-test_idxs
training_cids = training_idxs.collect{|i| substance_ids[i]}
chunk = [training_cids,test_cids].collect do |cids|
- new_cids = []
- new_data_entries = []
+ new_data_entries = {}
cids.each do |cid|
- data_entries[cid].each do |de|
- new_cids << cid
- new_data_entries << de
+ data_entries[cid].each do |f,v|
+ new_data_entries[cid] ||= {}
+ new_data_entries[cid][f] = v
end
end
- dataset = self.class.new(:data_entries => data_entries, :source => self.id )
+ dataset = self.class.new(:data_entries => new_data_entries, :source => self.id )
dataset.compounds.each do |compound|
compound.dataset_ids << dataset.id
compound.save
@@ -213,9 +209,6 @@ module OpenTox
next
end
- #substance_ids << compound.id
- #table.first.size == 0 ? self.data_entries[compound.id] = Array.new(0) : self.data_entries[compound.id] = Array.new(table.first.size-1)
-
vals.each_with_index do |v,j|
if v.blank?
warnings << "Empty value for compound '#{identifier}' (row #{r+2}) and feature '#{feature_names[j]}' (column #{j+2})."
@@ -228,10 +221,8 @@ module OpenTox
self.data_entries[compound.id.to_s] ||= {}
self.data_entries[compound.id.to_s][@features[j].id.to_s] ||= []
self.data_entries[compound.id.to_s][@features[j].id.to_s] << v
- #i = compound.feature_ids.index feature_ids[j]
- #TODO
- #compound.features[feature_ids[j].to_s] ||= []
- #compound.features[feature_ids[j].to_s] << v
+ compound.features[@features[j].id.to_s] ||= []
+ compound.features[@features[j].id.to_s] << v
compound.save
end
end
@@ -251,14 +242,23 @@ module OpenTox
end
# Dataset for lazar predictions
- class LazarPrediction < Dataset
+ class LazarPrediction #< Dataset
field :creator, type: String
- field :prediction_feature_id, type: String
+ field :prediction_feature_id, type: BSON::ObjectId
+ field :predictions, type: Hash, default: {}
def prediction_feature
Feature.find prediction_feature_id
end
+ def compounds
+ substances.select{|s| s.is_a? Compound}
+ end
+
+ def substances
+ predictions.keys.collect{|id| Substance.find id}
+ end
+
end
end
diff --git a/lib/lazar.rb b/lib/lazar.rb
index 2bcecc5..a1ad551 100644
--- a/lib/lazar.rb
+++ b/lib/lazar.rb
@@ -62,7 +62,7 @@ suppressPackageStartupMessages({
# OpenTox classes and includes
#CLASSES = ["Feature","Substance::Compound","Substance::Nanoparticle","Dataset","Validation","CrossValidation","LeaveOneOutValidation","RepeatedCrossValidation","Experiment"]# Algorithm and Models are modules
-CLASSES = ["Feature","Substance","Dataset","Validation","CrossValidation","LeaveOneOutValidation","RepeatedCrossValidation","Experiment"]# Algorithm and Models are modules
+CLASSES = ["Feature","Substance","Dataset","LazarPrediction","Validation","CrossValidation","LeaveOneOutValidation","RepeatedCrossValidation","Experiment"]# Algorithm and Models are modules
[ # be aware of the require sequence as it affects class/method overwrites
"overwrite.rb",
@@ -81,6 +81,7 @@ CLASSES = ["Feature","Substance","Dataset","Validation","CrossValidation","Leave
"validation.rb",
"crossvalidation.rb",
"leave-one-out-validation.rb",
+ "validation-statistics.rb",
"experiment.rb",
"import.rb",
].each{ |f| require_relative f }
diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb
index 2cd13db..10fbe85 100644
--- a/lib/leave-one-out-validation.rb
+++ b/lib/leave-one-out-validation.rb
@@ -6,20 +6,26 @@ module OpenTox
field :dataset_id, type: BSON::ObjectId
field :nr_instances, type: Integer
field :nr_unpredicted, type: Integer
- field :predictions, type: Array
+ field :predictions, type: Hash
field :finished_at, type: Time
def self.create model
model.training_dataset.features.first.nominal? ? klass = ClassificationLeaveOneOutValidation : klass = RegressionLeaveOneOutValidation
loo = klass.new :model_id => model.id, :dataset_id => model.training_dataset_id
- compound_ids = model.training_dataset.compound_ids
predictions = model.predict model.training_dataset.compounds
- predictions = predictions.each_with_index {|p,i| p[:compound_id] = compound_ids[i]}
- predictions.select!{|p| p[:database_activities] and !p[:database_activities].empty?}
+ predictions.each{|cid,p| p.delete(:neighbors)}
+ nr_unpredicted = 0
+ predictions.each do |cid,prediction|
+ if prediction[:value]
+ prediction[:measured] = model.training_dataset.data_entries[cid][prediction[:prediction_feature_id].to_s]
+ else
+ nr_unpredicted += 1
+ end
+ predictions.delete(cid) unless prediction[:value] and prediction[:measured]
+ end
loo.nr_instances = predictions.size
- predictions.select!{|p| p[:value]} # remove unpredicted
- loo.predictions = predictions#.sort{|a,b| b[:confidence] <=> a[:confidence]}
- loo.nr_unpredicted = loo.nr_instances - loo.predictions.size
+ loo.nr_unpredicted = nr_unpredicted
+ loo.predictions = predictions
loo.statistics
loo.save
loo
@@ -42,53 +48,8 @@ module OpenTox
field :confidence_plot_id, type: BSON::ObjectId
def statistics
- accept_values = Feature.find(model.prediction_feature_id).accept_values
- confusion_matrix = Array.new(accept_values.size,0){Array.new(accept_values.size,0)}
- weighted_confusion_matrix = Array.new(accept_values.size,0){Array.new(accept_values.size,0)}
- predictions.each do |pred|
- pred[:database_activities].each do |db_act|
- if pred[:value]
- if pred[:value] == db_act
- if pred[:value] == accept_values[0]
- confusion_matrix[0][0] += 1
- weighted_confusion_matrix[0][0] += pred[:confidence]
- elsif pred[:value] == accept_values[1]
- confusion_matrix[1][1] += 1
- weighted_confusion_matrix[1][1] += pred[:confidence]
- end
- else
- if pred[:value] == accept_values[0]
- confusion_matrix[0][1] += 1
- weighted_confusion_matrix[0][1] += pred[:confidence]
- elsif pred[:value] == accept_values[1]
- confusion_matrix[1][0] += 1
- weighted_confusion_matrix[1][0] += pred[:confidence]
- end
- end
- end
- end
- end
- accept_values.each_with_index do |v,i|
- true_rate[v] = confusion_matrix[i][i]/confusion_matrix[i].reduce(:+).to_f
- predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f
- end
- confidence_sum = 0
- weighted_confusion_matrix.each do |r|
- r.each do |c|
- confidence_sum += c
- end
- end
- update_attributes(
- accept_values: accept_values,
- confusion_matrix: confusion_matrix,
- weighted_confusion_matrix: weighted_confusion_matrix,
- accuracy: (confusion_matrix[0][0]+confusion_matrix[1][1])/(nr_instances-nr_unpredicted).to_f,
- weighted_accuracy: (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f,
- true_rate: true_rate,
- predictivity: predictivity,
- finished_at: Time.now
- )
- $logger.debug "Accuracy #{accuracy}"
+ stat = ValidationStatistics.classification(predictions, Feature.find(model.prediction_feature_id).accept_values)
+ update_attributes(stat)
end
def confidence_plot
@@ -132,43 +93,10 @@ module OpenTox
field :correlation_plot_id, type: BSON::ObjectId
field :confidence_plot_id, type: BSON::ObjectId
+
def statistics
- confidence_sum = 0
- predicted_values = []
- measured_values = []
- predictions.each do |pred|
- pred[:database_activities].each do |activity|
- if pred[:value]
- predicted_values << pred[:value]
- measured_values << activity
- error = Math.log10(pred[:value])-Math.log10(activity)
- self.rmse += error**2
- #self.weighted_rmse += pred[:confidence]*error**2
- self.mae += error.abs
- #self.weighted_mae += pred[:confidence]*error.abs
- #confidence_sum += pred[:confidence]
- end
- end
- if pred[:database_activities].empty?
- warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
- $logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
- end
- end
- R.assign "measurement", measured_values
- R.assign "prediction", predicted_values
- R.eval "r <- cor(-log(measurement),-log(prediction),use='complete')"
- r = R.eval("r").to_ruby
-
- self.mae = self.mae/predictions.size
- #self.weighted_mae = self.weighted_mae/confidence_sum
- self.rmse = Math.sqrt(self.rmse/predictions.size)
- #self.weighted_rmse = Math.sqrt(self.weighted_rmse/confidence_sum)
- self.r_squared = r**2
- self.finished_at = Time.now
- save
- $logger.debug "R^2 #{r**2}"
- $logger.debug "RMSE #{rmse}"
- $logger.debug "MAE #{mae}"
+ stat = ValidationStatistics.regression predictions
+ update_attributes(stat)
end
def correlation_plot
diff --git a/lib/model.rb b/lib/model.rb
index 1f9942b..5140d5a 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -90,33 +90,36 @@ module OpenTox
end
# make predictions
- predictions = []
- predictions = compounds.collect{|c| predict_compound c}
+ predictions = {}
+ compounds.each do |c|
+ predictions[c.id.to_s] = predict_compound c
+ predictions[c.id.to_s][:prediction_feature_id] = prediction_feature_id
+ end
# serialize result
case object.class.to_s
when "OpenTox::Compound"
- prediction = predictions.first
+ prediction = predictions[compounds.first.id.to_s]
prediction[:neighbors].sort!{|a,b| b[1] <=> a[1]} # sort according to similarity
- return prediction
+ return predictions
when "Array"
return predictions
when "OpenTox::Dataset"
+ predictions.each{|cid,p| p.delete(:neighbors)}
# prepare prediction dataset
measurement_feature = Feature.find prediction_feature_id
- prediction_feature = OpenTox::NumericFeature.find_or_create_by( "name" => measurement_feature.name + " (Prediction)" )
+ prediction_feature = NumericFeature.find_or_create_by( "name" => measurement_feature.name + " (Prediction)" )
prediction_dataset = LazarPrediction.new(
:name => "Lazar prediction for #{prediction_feature.name}",
:creator => __FILE__,
:prediction_feature_id => prediction_feature.id
)
- confidence_feature = OpenTox::NumericFeature.find_or_create_by( "name" => "Model RMSE" )
- warning_feature = OpenTox::NominalFeature.find_or_create_by("name" => "Warnings")
- prediction_dataset.features = [ prediction_feature, confidence_feature, measurement_feature, warning_feature ]
- prediction_dataset.compounds = compounds
- prediction_dataset.data_entries = predictions.collect{|p| [p[:value], p[:rmse] , p[:dataset_activities].to_s, p[:warning]]}
+
+ compounds.each_with_index do |c,i|
+ prediction_dataset.predictions[c.id.to_s] = predictions[i]
+ end
prediction_dataset.save
return prediction_dataset
end
diff --git a/lib/validation.rb b/lib/validation.rb
index b72d273..484e22e 100644
--- a/lib/validation.rb
+++ b/lib/validation.rb
@@ -8,7 +8,7 @@ module OpenTox
field :test_dataset_id, type: BSON::ObjectId
field :nr_instances, type: Integer
field :nr_unpredicted, type: Integer
- field :predictions, type: Array
+ field :predictions, type: Hash
def prediction_dataset
Dataset.find prediction_dataset_id
@@ -29,30 +29,22 @@ module OpenTox
atts[:training_dataset_id] = training_set.id
validation_model = model.class.create training_set, atts
validation_model.save
- cids = test_set.compound_ids
-
- test_set_without_activities = Dataset.new(:compound_ids => cids.uniq) # remove duplicates and make sure that activities cannot be used
- prediction_dataset = validation_model.predict test_set_without_activities
- predictions = []
+ predictions = validation_model.predict test_set.compounds
+ predictions.each{|cid,p| p.delete(:neighbors)}
nr_unpredicted = 0
- activities = test_set.data_entries.collect{|de| de.first}
- prediction_dataset.data_entries.each_with_index do |de,i|
- if de[0] #and de[1]
- cid = prediction_dataset.compound_ids[i]
- rows = cids.each_index.select{|r| cids[r] == cid }
- activities = rows.collect{|r| test_set.data_entries[r][0]}
- prediction = de.first
- confidence = de[1]
- predictions << [prediction_dataset.compound_ids[i], activities, prediction, de[1]]
+ predictions.each do |cid,prediction|
+ if prediction[:value]
+ prediction[:measured] = test_set.data_entries[cid][prediction[:prediction_feature_id].to_s]
else
nr_unpredicted += 1
end
+ predictions.delete(cid) unless prediction[:value] and prediction[:measured]
end
validation = self.new(
:model_id => validation_model.id,
- :prediction_dataset_id => prediction_dataset.id,
+ #:prediction_dataset_id => prediction_dataset.id,
:test_dataset_id => test_set.id,
- :nr_instances => test_set.compound_ids.size,
+ :nr_instances => test_set.compounds.size,
:nr_unpredicted => nr_unpredicted,
:predictions => predictions#.sort{|a,b| p a; b[3] <=> a[3]} # sort according to confidence
)
@@ -67,42 +59,6 @@ module OpenTox
end
class RegressionValidation < Validation
-
- def statistics
- rmse = 0
- weighted_rmse = 0
- rse = 0
- weighted_rse = 0
- mae = 0
- weighted_mae = 0
- confidence_sum = 0
- predictions.each do |pred|
- compound_id,activity,prediction,confidence = pred
- if activity and prediction
- error = Math.log10(prediction)-Math.log10(activity.median)
- rmse += error**2
- weighted_rmse += confidence*error**2
- mae += error.abs
- weighted_mae += confidence*error.abs
- confidence_sum += confidence
- else
- warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
- $logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
- end
- end
- x = predictions.collect{|p| p[1].median}
- y = predictions.collect{|p| p[2]}
- R.assign "measurement", x
- R.assign "prediction", y
- R.eval "r <- cor(-log(measurement),-log(prediction),use='complete')"
- r = R.eval("r").to_ruby
-
- mae = mae/predictions.size
- weighted_mae = weighted_mae/confidence_sum
- rmse = Math.sqrt(rmse/predictions.size)
- weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum)
- { "R^2" => r**2, "RMSE" => rmse, "MAE" => mae }
- end
end
end
diff --git a/test/classification.rb b/test/classification.rb
index bedbe14..af23db6 100644
--- a/test/classification.rb
+++ b/test/classification.rb
@@ -33,8 +33,10 @@ class LazarClassificationTest < MiniTest::Test
prediction = model.predict compound_dataset
assert_equal compound_dataset.compounds, prediction.compounds
- assert_equal "Could not find similar compounds with experimental data in the training dataset.", prediction.data_entries[7][3]
- assert_equal "1 compounds have been removed from neighbors, because they have the same structure as the query compound.", prediction.data_entries[14][3]
+ cid = prediction.compounds[7].id.to_s
+ assert_equal "Could not find similar compounds with experimental data in the training dataset.", prediction.predictions[cid][:warning]
+ cid = prediction.compounds[9].id.to_s
+ assert_equal "1 compounds have been removed from neighbors, because they have the same structure as the query compound.", prediction.predictions[cid][:warning]
# cleanup
[training_dataset,model,compound_dataset].each{|o| o.delete}
end
diff --git a/test/validation.rb b/test/validation.rb
index d8eea59..e702278 100644
--- a/test/validation.rb
+++ b/test/validation.rb
@@ -8,15 +8,15 @@ class ValidationTest < MiniTest::Test
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
model = Model::LazarClassification.create dataset
cv = ClassificationCrossValidation.create model
- assert cv.accuracy > 0.7, "Accuracy (#{cv.accuracy}) should be larger than 0.7"
+ assert cv.accuracy > 0.7, "Accuracy (#{cv.accuracy}) should be larger than 0.7, this may occur due to an unfavorable training/test set split"
end
def test_default_regression_crossvalidation
dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
model = Model::LazarRegression.create dataset
cv = RegressionCrossValidation.create model
- assert cv.rmse < 1.5, "RMSE > 1.5"
- assert cv.mae < 1
+ assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be larger than 1.5, this may occur due to an unfavorable training/test set split"
+ assert cv.mae < 1, "MAE #{cv.mae} should be larger than 1, this may occur due to an unfavorable training/test set split"
end
# parameters