From d5bf97c2cb999539c56bf59aa1d7d3286745be84 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 23 Sep 2015 14:51:41 +0200 Subject: validations fixed (all models were executed with default parameters) --- test/compound.rb | 12 ++++++++ test/dataset.rb | 10 +++++++ test/experiment.rb | 63 ++++++++++++++++++++++++++++++++++++++---- test/lazar-physchem-short.rb | 34 ++++++++++++----------- test/lazar-regression.rb | 10 +++++++ test/validation.rb | 66 ++++++++++++++++++++++++++++++++++++++++---- 6 files changed, 167 insertions(+), 28 deletions(-) (limited to 'test') diff --git a/test/compound.rb b/test/compound.rb index 6a3c696..b33a643 100644 --- a/test/compound.rb +++ b/test/compound.rb @@ -134,4 +134,16 @@ print c.sdf end end end + + def test_mna + c = OpenTox::Compound.from_smiles "N#[N+]C1=CC=CC=C1.F[B-](F)(F)F" + p c.mna 4 + end + + def test_mpd + c = OpenTox::Compound.from_smiles "N#[N+]C1=CC=CC=C1.F[B-](F)(F)F" + assert 13, c.mpd.size + assert 7, c.mpd.uniq.size + assert_equal c.mpd, c.openbabel_fingerprint("mpd") + end end diff --git a/test/dataset.rb b/test/dataset.rb index 84be547..752073e 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -202,5 +202,15 @@ class DatasetTest < MiniTest::Test assert_equal 0.00323, d2.data_entries[5][0] end + def test_scaled_dataset + original_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv") + scaled_dataset = original_dataset.scale + scaled_dataset.data_entries.each_with_index do |row,i| + row.each_with_index do |value,j| + assert_equal original_dataset.data_entries[i][j].round(4), scaled_dataset.original_value(value,j).round(4) if value # ignore nils + end + end + end + end diff --git a/test/experiment.rb b/test/experiment.rb index 4b54768..76a0498 100644 --- a/test/experiment.rb +++ b/test/experiment.rb @@ -63,21 +63,26 @@ class ExperimentTest < MiniTest::Test end def test_regression_fingerprints +=begin datasets = [ - "LOAEL_mmol_corrected_smiles.csv" + "EPAFHM.medi.csv", + #"LOAEL_mmol_corrected_smiles.csv" ] min_sims = [0.3,0.7] - types = ["FP2","FP3","FP4","MACCS"] + #min_sims = [0.7] + #types = ["FP2","FP3","FP4","MACCS","mpd"] + types = ["mpd","FP3"] experiment = Experiment.create( - :name => "Fminer vs fingerprint classification for datasets #{datasets}.", + :name => "Fingerprint regression with different types for datasets #{datasets}.", :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id}, ) types.each do |type| min_sims.each do |min_sim| experiment.model_settings << { - :algorithm => "OpenTox::Model::LazarRegression", + :model_algorithm => "OpenTox::Model::LazarRegression", + :prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average", :neighbor_algorithm => "fingerprint_neighbors", - :neighbor_algorithm_parameter => { + :neighbor_algorithm_parameters => { :type => type, :min_sim => min_sim, } @@ -85,7 +90,53 @@ class ExperimentTest < MiniTest::Test end end experiment.run - p experiment.report +=end +#=begin + experiment = Experiment.find '56029cb92b72ed673d000000' +#=end + p experiment.id + experiment.results.each do |dataset,result| + result.each do |r| + params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters] + RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv| + cv.validation_ids.each do |vid| + model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters] + assert_equal params[:type], model_params[:type] + assert_equal params[:min_sim], model_params[:min_sim] + refute_equal params[:training_dataset_id], model_params[:training_dataset_id] + end + end + end + end + puts experiment.report.to_yaml + p experiment.summary + end + def test_mpd_fingerprints +=begin + datasets = [ + "EPAFHM.medi.csv", + ] + types = ["FP2","mpd"] + experiment = Experiment.create( + :name => "FP2 vs mpd fingerprint regression for datasets #{datasets}.", + :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id}, + ) + types.each do |type| + experiment.model_settings << { + :algorithm => "OpenTox::Model::LazarRegression", + :neighbor_algorithm => "fingerprint_neighbors", + :neighbor_algorithm_parameter => { + :type => type, + :min_sim => 0.7, + } + } + end + experiment.run + p experiment.id +=end + experiment = Experiment.find '55ffd0c02b72ed123c000000' + p experiment + puts experiment.report.to_yaml end end diff --git a/test/lazar-physchem-short.rb b/test/lazar-physchem-short.rb index e74a4b9..59d8112 100644 --- a/test/lazar-physchem-short.rb +++ b/test/lazar-physchem-short.rb @@ -2,27 +2,29 @@ require_relative "setup.rb" class LazarPhyschemDescriptorTest < MiniTest::Test def test_epafhm - skip "Physchem Regression not yet implemented." - # check available descriptors - @descriptors = OpenTox::Algorithm::Descriptor::DESCRIPTORS.keys - assert_equal 111,@descriptors.size,"wrong number of physchem descriptors" - @descriptor_values = OpenTox::Algorithm::Descriptor::DESCRIPTOR_VALUES - # select descriptors for test - @num_features_offset = 0 - @descriptors.keep_if{|x| x=~/^Openbabel\./} - @descriptors.delete("Openbabel.L5") # TODO Openbabel.L5 does not work, investigate!!! - puts "Descriptors: #{@descriptors}" + @descriptors = OpenTox::Algorithm::Descriptor::OBDESCRIPTORS.keys + refute_empty @descriptors # UPLOAD DATA training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv") - puts "Dataset: "+training_dataset.id -# feature_dataset = Algorithm::Descriptor.physchem training_dataset, @descriptors - model = Model::LazarRegression.create training_dataset#, feature_dataset - #p model + feature_dataset = Algorithm::Descriptor.physchem training_dataset, @descriptors + scaled_feature_dataset = feature_dataset.scale + model = Model::LazarRegression.create training_dataset + model.neighbor_algorithm = "physchem_neighbors" + model.neighbor_algorithm_parameters = { + :feature_calculation_algorithm => "OpenTox::Algorithm::Descriptor.physchem", + :descriptors => @descriptors, + :feature_dataset_id => scaled_feature_dataset.id, + :min_sim => 0.3 + } + model.save compound = Compound.from_smiles "CC(C)(C)CN" prediction = model.predict compound - p prediction - + refute_nil prediction[:value] + refute_nil prediction[:confidence] + prediction[:neighbors].each do |line| + assert_operator line[1], :>, 0.3 + end end end diff --git a/test/lazar-regression.rb b/test/lazar-regression.rb index cc7f356..8b2d473 100644 --- a/test/lazar-regression.rb +++ b/test/lazar-regression.rb @@ -13,6 +13,16 @@ class LazarRegressionTest < MiniTest::Test assert_equal 1, prediction[:neighbors].size end + def test_mpd_fingerprints + training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" + model = Model::LazarRegression.create training_dataset + model.neighbor_algorithm_parameters[:type] = "mpd" + compound = Compound.from_smiles "CCCSCCSCC" + prediction = model.predict compound + assert_equal 0.04, prediction[:value].round(2) + assert_equal 1, prediction[:neighbors].size + end + def test_local_linear_regression skip training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" diff --git a/test/validation.rb b/test/validation.rb index dfa2c81..9717ccc 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -20,10 +20,25 @@ class ValidationTest < MiniTest::Test end def test_regression_crossvalidation - #dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" - dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.csv" - model = Model::LazarRegression.create dataset + dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" + #dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.csv" + params = { + :prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average", + :neighbor_algorithm => "fingerprint_neighbors", + :neighbor_algorithm_parameters => { + :type => "MACCS", + :min_sim => 0.7, + } + } + model = Model::LazarRegression.create dataset, params cv = RegressionCrossValidation.create model + cv.validation_ids.each do |vid| + model = Model::Lazar.find(Validation.find(vid).model_id) + assert_equal params[:neighbor_algorithm_parameters][:type], model[:neighbor_algorithm_parameters][:type] + assert_equal params[:neighbor_algorithm_parameters][:min_sim], model[:neighbor_algorithm_parameters][:min_sim] + refute_equal params[:neighbor_algorithm_parameters][:training_dataset_id], model[:neighbor_algorithm_parameters][:training_dataset_id] + end + #`inkview #{cv.plot}` #puts JSON.pretty_generate(cv.misclassifications)#.collect{|l| l.join ", "}.join "\n" #`inkview #{cv.plot}` @@ -37,12 +52,51 @@ class ValidationTest < MiniTest::Test dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" model = Model::LazarClassification.create dataset repeated_cv = RepeatedCrossValidation.create model - p repeated_cv repeated_cv.crossvalidations.each do |cv| - p cv - p cv.accuracy assert cv.accuracy > 0.7 end end + def test_crossvalidation_parameters + dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" + params = { + :neighbor_algorithm_parameters => { + :min_sim => 0.3, + :type => "FP3" + } + } + model = Model::LazarClassification.create dataset, params + model.save + cv = ClassificationCrossValidation.create model + params = model.neighbor_algorithm_parameters + params = Hash[params.map{ |k, v| [k.to_s, v] }] # convert symbols to string + cv.validations.each do |validation| + assert_equal params, validation.model.neighbor_algorithm_parameters + end + end + + def test_physchem_regression_crossvalidation + + @descriptors = OpenTox::Algorithm::Descriptor::OBDESCRIPTORS.keys + refute_empty @descriptors + + # UPLOAD DATA + training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv") + feature_dataset = Algorithm::Descriptor.physchem training_dataset, @descriptors + feature_dataset.save + scaled_feature_dataset = feature_dataset.scale + scaled_feature_dataset.save + model = Model::LazarRegression.create training_dataset + model.neighbor_algorithm = "physchem_neighbors" + model.neighbor_algorithm_parameters = { + :feature_calculation_algorithm => "OpenTox::Algorithm::Descriptor.physchem", + :descriptors => @descriptors, + :feature_dataset_id => scaled_feature_dataset.id, + :min_sim => 0.3 + } + model.save + cv = RegressionCrossValidation.create model + p cv + end + end -- cgit v1.2.3