From f8faf510b4574df1a00fa61a9f0a1681fc2f4857 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 25 Aug 2015 17:20:55 +0200 Subject: Experiments added --- lib/classification.rb | 2 +- lib/compound.rb | 1 + lib/crossvalidation.rb | 109 ++++++++++++++++++++++++++++++++++++----------- lib/dataset.rb | 15 ++++--- lib/experiment.rb | 66 ++++++++++++++++++++++++++++ lib/feature.rb | 2 +- lib/lazar.rb | 6 +-- lib/model.rb | 27 +++++++----- lib/opentox.rb | 2 +- lib/overwrite.rb | 6 +++ lib/regression.rb | 29 ++++++++++++- test/dataset.rb | 2 +- test/experiment.rb | 31 ++++++++++++++ test/lazar-long.rb | 2 +- test/lazar-regression.rb | 7 +-- test/setup.rb | 4 +- test/validation.rb | 7 ++- 17 files changed, 261 insertions(+), 57 deletions(-) create mode 100644 lib/experiment.rb create mode 100644 test/experiment.rb diff --git a/lib/classification.rb b/lib/classification.rb index 723c66f..0d47983 100644 --- a/lib/classification.rb +++ b/lib/classification.rb @@ -3,7 +3,7 @@ module OpenTox class Classification - def self.weighted_majority_vote neighbors + def self.weighted_majority_vote compound, neighbors return {:value => nil,:confidence => nil,:warning => "Cound not find similar compounds."} if neighbors.empty? weighted_sum = {} sim_sum = 0.0 diff --git a/lib/compound.rb b/lib/compound.rb index fa57aff..a819f56 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -178,6 +178,7 @@ module OpenTox end def neighbors threshold=0.7 + # TODO restrict to dataset # from http://blog.matt-swain.com/post/87093745652/chemical-similarity-search-in-mongodb qn = fp4.size #qmin = qn * threshold diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index 5af75bf..4407aeb 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -102,6 +102,8 @@ module OpenTox field :mae, type: Float field :weighted_rmse, type: Float field :weighted_mae, type: Float + field :weighted_mae, type: Float + field :correlation_plot_id, type: BSON::ObjectId def self.create model, n=10 cv = self.new @@ -135,10 +137,11 @@ module OpenTox weighted_rae = 0 n = 0 confidence_sum = 0 + nil_activities = [] predictions.each do |pred| compound_id,activity,prediction,confidence = pred if activity and prediction - error = prediction-activity + error = Math.log(prediction)-Math.log(activity) rmse += error**2 weighted_rmse += confidence*error**2 mae += error.abs @@ -147,13 +150,36 @@ module OpenTox confidence_sum += confidence else # TODO: create warnings - p pred + $logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{training_dataset.id}." + nil_activities << pred end end + predictions -= nil_activities + x = predictions.collect{|p| p[1]} + y = predictions.collect{|p| p[2]} + R.assign "Measurement", x + R.assign "Prediction", y + R.eval "corr <- lm(-log(Measurement) ~ -log(Prediction))" + s = R.eval "summary <- summary(corr)" + p R.eval("summary$r.squared").to_ruby + #p s.to_ruby + #p s.to_ruby.first + s.to_ruby.each_with_index do |l,i| + #p i + #p l + end mae = mae/n weighted_mae = weighted_mae/confidence_sum rmse = Math.sqrt(rmse/n) weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum) + # TODO check!! + predictions.sort! do |a,b| + relative_error_a = (a[1]-a[2]).abs/a[1].to_f + relative_error_a = 1/relative_error_a if relative_error_a < 1 + relative_error_b = (b[1]-b[2]).abs/b[1].to_f + relative_error_b = 1/relative_error_b if relative_error_b < 1 + [relative_error_b,b[3]] <=> [relative_error_a,a[3]] + end cv.update_attributes( name: model.name, model_id: model.id, @@ -161,7 +187,7 @@ module OpenTox validation_ids: validation_ids, nr_instances: nr_instances, nr_unpredicted: nr_unpredicted, - predictions: predictions.sort{|a,b| b[3] <=> a[3]}, + predictions: predictions,#.sort{|a,b| [(b[1]-b[2]).abs/b[1].to_f,b[3]] <=> [(a[1]-a[2]).abs/a[1].to_f,a[3]]}, mae: mae, rmse: rmse, weighted_mae: weighted_mae, @@ -171,27 +197,62 @@ module OpenTox cv end - def plot - # RMSE - x = predictions.collect{|p| p[1]} - y = predictions.collect{|p| p[2]} - R.assign "Measurement", x - R.assign "Prediction", y - R.eval "par(pty='s')" # sets the plot type to be square - #R.eval "fitline <- lm(log(Prediction) ~ log(Measurement))" - #R.eval "error <- log(Measurement)-log(Prediction)" - R.eval "error <- Measurement-Prediction" - R.eval "rmse <- sqrt(mean(error^2,na.rm=T))" - R.eval "mae <- mean( abs(error), na.rm = TRUE)" - R.eval "r <- cor(log(Prediction),log(Measurement))" - R.eval "svg(filename='/tmp/#{id.to_s}.svg')" - R.eval "plot(log(Prediction),log(Measurement),main='#{self.name}', sub=paste('RMSE: ',rmse, 'MAE :',mae, 'r^2: ',r^2),asp=1)" - #R.eval "plot(log(Prediction),log(Measurement),main='#{self.name}', sub=paste('RMSE: ',rmse, 'MAE :',mae, 'r^2: '),asp=1)" - #R.eval "plot(log(Prediction),log(Measurement),main='#{self.name}', ,asp=1)" - R.eval "abline(0,1,col='blue')" - #R.eval "abline(fitline,col='red')" - R.eval "dev.off()" - "/tmp/#{id.to_s}.svg" + def misclassifications n=nil + #n = predictions.size unless n + n = 20 unless n + model = Model::Lazar.find(self.model_id) + training_dataset = Dataset.find(model.training_dataset_id) + prediction_feature = training_dataset.features.first + predictions[0..n-1].collect do |p| + compound = Compound.find(p[0]) + neighbors = compound.neighbors.collect do |n| + neighbor = Compound.find(n[0]) + values = training_dataset.values(neighbor,prediction_feature) + { :smiles => neighbor.smiles, :fingerprint => neighbor.fp4.collect{|id| Smarts.find(id).name},:similarity => n[1], :measurements => values} + end + { + :smiles => compound.smiles, + :fingerprint => compound.fp4.collect{|id| Smarts.find(id).name}, + :measured => p[1], + :predicted => p[2], + :relative_error => (p[1]-p[2]).abs/p[1].to_f, + :confidence => p[3], + :neighbors => neighbors + } + end + end + + def correlation_plot + unless correlation_plot_id + tmpfile = "/tmp/#{id.to_s}.svg" + x = predictions.collect{|p| p[1]} + y = predictions.collect{|p| p[2]} + attributes = Model::Lazar.find(self.model_id).attributes + attributes.delete_if{|key,_| key.match(/_id|_at/) or ["_id","creator","name"].include? key} + attributes = attributes.values.collect{|v| v.is_a?(String) ? v.sub(/OpenTox::/,'') : v}.join("\n") + p "'"+attributes + R.eval "library(ggplot2)" + R.eval "library(grid)" + R.eval "library(gridExtra)" + R.assign "measurement", x + R.assign "prediction", y + #R.eval "error <- log(Measurement)-log(Prediction)" + #R.eval "rmse <- sqrt(mean(error^2, na.rm=T))" + #R.eval "mae <- mean(abs(error), na.rm=T)" + R.eval "r <- cor(-log(prediction),-log(measurement))" + R.eval "svg(filename='#{tmpfile}')" + R.eval "all = c(-log(measurement),-log(prediction))" + R.eval "range = c(min(all), max(all))" + R.eval "image = qplot(-log(prediction),-log(measurement),main='#{self.name}',asp=1,xlim=range, ylim=range)" + R.eval "image = image + geom_abline(intercept=0, slope=1) + stat_smooth(method='lm', se=FALSE)" + R.eval "text = textGrob(paste('RMSE: ', '#{rmse.round(2)},','MAE:','#{mae.round(2)},','r^2: ',round(r^2,2),'\n\n','#{attributes}'),just=c('left','top'),check.overlap = T)" + R.eval "grid.arrange(image, text, ncol=2)" + R.eval "dev.off()" + file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_correlation_plot.svg") + plot_id = $gridfs.insert_one(file) + update(:correlation_plot_id => plot_id) + end + $gridfs.find_one(_id: correlation_plot_id).data end end diff --git a/lib/dataset.rb b/lib/dataset.rb index 5850c3d..b3f5392 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -10,7 +10,7 @@ module OpenTox # associations like has_many, belongs_to deteriorate performance field :feature_ids, type: Array, default: [] field :compound_ids, type: Array, default: [] - field :data_entries_id, type: BSON::ObjectId, default: [] + field :data_entries_id, type: BSON::ObjectId#, default: [] field :source, type: String field :warnings, type: Array, default: [] @@ -19,9 +19,9 @@ module OpenTox def save_all dump = Marshal.dump(@data_entries) file = Mongo::Grid::File.new(dump, :filename => "#{self.id.to_s}.data_entries") - data_entries_id = $gridfs.insert_one(file) - update(:data_entries_id => data_entries_id) - save + entries_id = $gridfs.insert_one(file) + update(:data_entries_id => entries_id) + #save end # Readers @@ -125,11 +125,11 @@ module OpenTox # Serialisation - # converts dataset to csv format including compound smiles as first column, other column headers are feature titles + # converts dataset to csv format including compound smiles as first column, other column headers are feature names # @return [String] def to_csv(inchi=false) CSV.generate() do |csv| #{:force_quotes=>true} - csv << [inchi ? "InChI" : "SMILES"] + features.collect{|f| f.title} + csv << [inchi ? "InChI" : "SMILES"] + features.collect{|f| f.name} compounds.each_with_index do |c,i| csv << [inchi ? c.inchi : c.smiles] + data_entries[i] end @@ -149,9 +149,10 @@ module OpenTox # Create a dataset from CSV file # TODO: document structure def self.from_csv_file file, source=nil, bioassay=true + $logger.debug "Parsing #{file}." source ||= file table = CSV.read file, :skip_blanks => true - dataset = self.new(:source => source, :name => File.basename(file)) + dataset = self.new(:source => source, :name => File.basename(file,".*")) dataset.parse_table table, bioassay dataset end diff --git a/lib/experiment.rb b/lib/experiment.rb new file mode 100644 index 0000000..b3ed174 --- /dev/null +++ b/lib/experiment.rb @@ -0,0 +1,66 @@ +module OpenTox + + class Experiment + field :dataset_ids, type: Array + field :model_algorithms, type: Array + field :model_ids, type: Array, default: [] + field :crossvalidation_ids, type: Array, default: [] + field :prediction_algorithms, type: Array + field :neighbor_algorithms, type: Array + field :neighbor_algorithm_parameters, type: Array + end + + # TODO more sophisticated experimental design + def run + dataset_ids.each do |dataset_id| + dataset = Dataset.find(dataset_id) + model_algorithms.each do |model_algorithm| + prediction_algorithms.each do |prediction_algorithm| + neighbor_algorithms.each do |neighbor_algorithm| + neighbor_algorithm_parameters.each do |neighbor_algorithm_parameter| + $logger.debug "Creating #{model_algorithm} model for dataset #{dataset.name}, with prediction_algorithm #{prediction_algorithm}, neighbor_algorithm #{neighbor_algorithm}, neighbor_algorithm_parameters #{neighbor_algorithm_parameter}." + model = Object.const_get(model_algorithm).create dataset + model.prediction_algorithm = prediction_algorithm + model.neighbor_algorithm = neighbor_algorithm + model.neighbor_algorithm_parameters = neighbor_algorithm_parameter + model.save + model_ids << model.id + cv = nil + if dataset.features.first.nominal + cv = ClassificationCrossValidation + elsif dataset.features.first.numeric + cv = RegressionCrossValidation + end + if cv + $logger.debug "Creating #{cv} for #{model_algorithm}, dataset #{dataset.name}, with prediction_algorithm #{prediction_algorithm}, neighbor_algorithm #{neighbor_algorithm}, neighbor_algorithm_parameters #{neighbor_algorithm_parameter}." + crossvalidation = cv.create model + crossvalidation_ids << crossvalidation.id + else + $logger.warn "#{dataset.features.first} is neither nominal nor numeric." + end + end + end + end + end + end + save + end + + def self.create params + experiment = self.new + $logge.debug "Experiment started ..." + experiment.run params + experiment + end + + def report + crossvalidation_ids.each do |id| + cv = CrossValidation.find(id) + file = "/tmp/#{cv.name}.svg" + File.open(file,"w+"){|f| f.puts cv.correlation_plot} + `inkview '#{file}'` + #p Crossvalidation.find(id).correlation_plot + end + end + +end diff --git a/lib/feature.rb b/lib/feature.rb index 22b2846..9521597 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -2,7 +2,7 @@ module OpenTox # Basic feature class class Feature - field :name, as: :title, type: String + field :name, type: String field :nominal, type: Boolean field :numeric, type: Boolean field :measured, type: Boolean diff --git a/lib/lazar.rb b/lib/lazar.rb index d0128b7..5903556 100644 --- a/lib/lazar.rb +++ b/lib/lazar.rb @@ -16,7 +16,6 @@ ENV["MONGOID_ENV"] ||= "development" # TODO remove config files, change default via ENV or directly in Mongoid class Mongoid.load!("#{File.expand_path(File.join(File.dirname(__FILE__),'..','mongoid.yml'))}") $mongo = Mongoid.default_client -#$mongo = Mongo::Client.new('mongodb://127.0.0.1:27017/opentox') $gridfs = $mongo.database.fs # R setup @@ -26,8 +25,8 @@ R = Rserve::Connection.new STDOUT.sync = true # for redirection, etc see http://stackoverflow.com/questions/8549443/why-doesnt-logger-output-to-stdout-get-redirected-to-files $logger = Logger.new STDOUT # STDERR did not work on my development machine (CH) $logger.level = Logger::DEBUG -Mongo::Logger.logger = $logger Mongo::Logger.level = Logger::WARN +#Mongo::Logger.logger = $logger # Require sub-Repositories require_relative '../libfminer/libbbrc/bbrc' # include before openbabel @@ -43,7 +42,7 @@ ENV['FMINER_SILENT'] = 'true' ENV['FMINER_NR_HITS'] = 'true' # OpenTox classes and includes -CLASSES = ["Feature","Compound","Dataset","Validation","CrossValidation"]# Algorithm and Models are modules +CLASSES = ["Feature","Compound","Dataset","Validation","CrossValidation","Experiment"]# Algorithm and Models are modules [ # be aware of the require sequence as it affects class/method overwrites "overwrite.rb", @@ -64,5 +63,6 @@ CLASSES = ["Feature","Compound","Dataset","Validation","CrossValidation"]# Algor "regression.rb", "validation.rb", "crossvalidation.rb", + "experiment.rb", ].each{ |f| require_relative f } diff --git a/lib/model.rb b/lib/model.rb index 185d70f..418ec18 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -2,24 +2,27 @@ module OpenTox module Model - class Lazar + class Model include OpenTox include Mongoid::Document include Mongoid::Timestamps store_in collection: "models" - field :title, as: :name, type: String + field :name, type: String field :creator, type: String, default: __FILE__ # datasets field :training_dataset_id, type: BSON::ObjectId # algorithms field :prediction_algorithm, type: String - field :neighbor_algorithm, type: String - field :neighbor_algorithm_parameters, type: Hash # prediction feature field :prediction_feature_id, type: BSON::ObjectId + end - #belongs_to :prediction + class Lazar < Model + + # algorithms + field :neighbor_algorithm, type: String + field :neighbor_algorithm_parameters, type: Hash attr_accessor :prediction_dataset attr_accessor :training_dataset @@ -36,7 +39,7 @@ module OpenTox prediction_feature.nominal ? lazar = OpenTox::Model::LazarClassification.new : lazar = OpenTox::Model::LazarRegression.new lazar.training_dataset_id = training_dataset.id lazar.prediction_feature_id = prediction_feature.id - lazar.title = prediction_feature.title + lazar.name = "#{training_dataset.name} #{prediction_feature.name}" lazar.save lazar @@ -83,7 +86,7 @@ module OpenTox acts.empty? ? nil : n << acts end neighbors.compact! # remove neighbors without training activities - predictions << Algorithm.run(prediction_algorithm, neighbors) + predictions << Algorithm.run(prediction_algorithm, compound, neighbors) end # serialize result @@ -97,14 +100,14 @@ module OpenTox when "OpenTox::Dataset" # prepare prediction dataset prediction_dataset = LazarPrediction.new( - :title => "Lazar prediction for #{prediction_feature.title}", + :name => "Lazar prediction for #{prediction_feature.name}", :creator => __FILE__, :prediction_feature_id => prediction_feature.id ) - confidence_feature = OpenTox::NumericFeature.find_or_create_by( "title" => "Prediction confidence" ) + confidence_feature = OpenTox::NumericFeature.find_or_create_by( "name" => "Prediction confidence" ) # TODO move into warnings field - warning_feature = OpenTox::NominalFeature.find_or_create_by("title" => "Warnings") + warning_feature = OpenTox::NominalFeature.find_or_create_by("name" => "Warnings") prediction_dataset.features = [ prediction_feature, confidence_feature, warning_feature ] prediction_dataset.compounds = compounds prediction_dataset.data_entries = predictions.collect{|p| [p[:value], p[:confidence], p[:warning]]} @@ -112,6 +115,10 @@ module OpenTox return prediction_dataset end + def training_dataset + return Dataset.find(training_dataset_id) + end + end def training_activities diff --git a/lib/opentox.rb b/lib/opentox.rb index 33293ac..53b34e9 100644 --- a/lib/opentox.rb +++ b/lib/opentox.rb @@ -12,7 +12,7 @@ module OpenTox include Mongoid::Document include Mongoid::Timestamps store_in collection: klass.downcase.pluralize - field :title, as: :name, type: String + field :name, type: String end OpenTox.const_set klass,c diff --git a/lib/overwrite.rb b/lib/overwrite.rb index df515eb..cb47527 100644 --- a/lib/overwrite.rb +++ b/lib/overwrite.rb @@ -81,6 +81,12 @@ class Array return self.uniq.size == 1 end + def median + sorted = self.sort + len = sorted.length + (sorted[(len - 1) / 2] + sorted[len / 2]) / 2.0 + end + end module URI diff --git a/lib/regression.rb b/lib/regression.rb index 0bc6547..020bb3a 100644 --- a/lib/regression.rb +++ b/lib/regression.rb @@ -19,7 +19,7 @@ module OpenTox class Regression - def self.weighted_average neighbors + def self.weighted_average compound, neighbors weighted_sum = 0.0 sim_sum = 0.0 neighbors.each do |row| @@ -34,6 +34,33 @@ module OpenTox {:value => prediction,:confidence => confidence} end + def self.local_linear_regression compound, neighbors + p neighbors.size + return nil unless neighbors.size > 0 + features = neighbors.collect{|n| Compound.find(n.first).fp4}.flatten.uniq + p features + training_data = Array.new(neighbors.size){Array.new(features.size,0)} + neighbors.each_with_index do |n,i| + #p n.first + neighbor = Compound.find n.first + features.each_with_index do |f,j| + training_data[i][j] = 1 if neighbor.fp4.include? f + end + end + p training_data + + R.assign "activities", neighbors.collect{|n| n[2].median} + R.assign "features", training_data + R.eval "model <- lm(activities ~ features)" + R.eval "summary <- summary(model)" + p R.summary + compound_features = features.collect{|f| compound.fp4.include? f ? 1 : 0} + R.assign "compound_features", compound_features + R.eval "prediction <- predict(model,compound_features)" + p R.prediction + + end + def self.weighted_average_with_relevant_fingerprints neighbors weighted_sum = 0.0 sim_sum = 0.0 diff --git a/test/dataset.rb b/test/dataset.rb index 27dba61..b5275d4 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -78,7 +78,7 @@ class DatasetTest < MiniTest::Test new_dataset = Dataset.find d.id # get metadata assert_match "multicolumn.csv", new_dataset.source - assert_equal "multicolumn.csv", new_dataset.title + assert_equal "multicolumn.csv", new_dataset.name # get features assert_equal 6, new_dataset.features.size assert_equal 7, new_dataset.compounds.size diff --git a/test/experiment.rb b/test/experiment.rb new file mode 100644 index 0000000..eae7fa0 --- /dev/null +++ b/test/experiment.rb @@ -0,0 +1,31 @@ +require_relative "setup.rb" + +class ExperimentTest < MiniTest::Test + + def test_regression_experiment + datasets = [ + "EPAFHM.csv", + "FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv", + "LOAEL_log_mmol_corrected_smiles.csv" + ] + model_algorithms = ["OpenTox::Model::LazarRegression"] + neighbor_algorithms = ["OpenTox::Algorithm::Neighbor.fingerprint_similarity"] + prediction_algorithms = ["OpenTox::Algorithm::Regression.weighted_average"] + neighbor_algorithm_parameters = [{:min_sim => 0.7}] + experiment = Experiment.create( + :name => "Regression for datasets #{datasets}.", + :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id}, + :model_algorithms => model_algorithms, + :neighbor_algorithms => neighbor_algorithms, + :neighbor_algorithm_parameters => neighbor_algorithm_parameters, + :prediction_algorithms => prediction_algorithms, + ) + experiment.run +=begin + experiment = Experiment.find "55dc58b32b72ed14a8000008" +=end + p experiment.id + experiment.report + refute_empty experiment.crossvalidation_ids + end +end diff --git a/test/lazar-long.rb b/test/lazar-long.rb index c0deaa2..1b58319 100644 --- a/test/lazar-long.rb +++ b/test/lazar-long.rb @@ -29,7 +29,7 @@ class LazarExtendedTest < MiniTest::Test feature_dataset = OpenTox::CalculatedDataset.find model.feature_dataset_id assert_equal dataset.compounds.size, feature_dataset.compounds.size assert_equal 52, feature_dataset.features.size - assert_equal '[#17&A]-[#6&A]', feature_dataset.features.first.title + assert_equal '[#17&A]-[#6&A]', feature_dataset.features.first.name compound = OpenTox::Compound.from_inchi("InChI=1S/C10H9NO2S/c1-8-2-4-9(5-3-8)13-6-10(12)11-7-14/h2-5H,6H2,1H3") prediction_dataset = model.predict compound prediction = prediction_dataset.data_entries.first diff --git a/test/lazar-regression.rb b/test/lazar-regression.rb index c36f521..4062cfd 100644 --- a/test/lazar-regression.rb +++ b/test/lazar-regression.rb @@ -12,11 +12,12 @@ class LazarRegressionTest < MiniTest::Test assert_equal 1, prediction[:neighbors].size end - def test_weighted_average_with_relevant_fingerprints + def test_local_linear_regression + skip training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" model = Model::LazarRegression.create training_dataset - model.update(:prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average_with_relevant_fingerprints") - compound = Compound.from_smiles "CC(C)(C)CN" + model.update(:prediction_algorithm => "OpenTox::Algorithm::Regression.local_linear_regression") + compound = Compound.from_smiles "NC(=O)OCCC" prediction = model.predict compound p prediction #assert_equal 13.6, prediction[:value].round(1) diff --git a/test/setup.rb b/test/setup.rb index 538853d..3dad683 100644 --- a/test/setup.rb +++ b/test/setup.rb @@ -3,5 +3,5 @@ require_relative '../lib/lazar.rb' include OpenTox TEST_DIR ||= File.expand_path(File.dirname(__FILE__)) DATA_DIR ||= File.join(TEST_DIR,"data") -$mongo.database.drop -$gridfs = $mongo.database.fs # recreate GridFS indexes +#$mongo.database.drop +#$gridfs = $mongo.database.fs # recreate GridFS indexes diff --git a/test/validation.rb b/test/validation.rb index 485769c..009c337 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -24,8 +24,8 @@ class ValidationTest < MiniTest::Test end def test_regression_crossvalidation - dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" - #dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.csv" + #dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" + dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.csv" model = Model::LazarRegression.create dataset cv = RegressionCrossValidation.create model p cv.rmse @@ -33,6 +33,9 @@ class ValidationTest < MiniTest::Test p cv.mae p cv.weighted_mae #`inkview #{cv.plot}` + #puts JSON.pretty_generate(cv.misclassifications)#.collect{|l| l.join ", "}.join "\n" + p cv.misclassifications.collect{|l| l[:neighbors].size} + `inkview #{cv.plot}` assert cv.rmse < 30, "RMSE > 30" assert cv.weighted_rmse < cv.rmse, "Weighted RMSE (#{cv.weighted_rmse}) larger than unweighted RMSE(#{cv.rmse}) " assert cv.mae < 12 -- cgit v1.2.3