From 8c6c59980bc82dc2177147f2fe34adf8bfbc1539 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 24 Aug 2015 12:17:07 +0200 Subject: Model::Prediction with tests, initial indexes --- .gitignore | 1 + VERSION | 2 +- lib/compound.rb | 2 ++ lib/crossvalidation.rb | 1 + lib/feature.rb | 1 + lib/lazar.rb | 7 +++---- lib/model.rb | 13 +++++++++++-- test/compound.rb | 4 ++-- test/data/hamster_carcinogenicity.json | 3 +++ test/prediction_models.rb | 23 +++++++++++++++++++++++ test/setup.rb | 3 ++- 11 files changed, 50 insertions(+), 10 deletions(-) create mode 100644 test/data/hamster_carcinogenicity.json create mode 100644 test/prediction_models.rb diff --git a/.gitignore b/.gitignore index 926a09d..901e399 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ last-utils libfminer +openbabel test/fminer_debug.txt Gemfile.lock *.gem diff --git a/VERSION b/VERSION index 5a5831a..d169b2f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.7 +0.0.8 diff --git a/lib/compound.rb b/lib/compound.rb index c0d6536..fa57aff 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -22,6 +22,8 @@ module OpenTox field :fp4, type: Array field :fp4_size, type: Integer + index({smiles: 1}, {unique: true}) + # Overwrites standard Mongoid method to create fingerprints before database insertion def self.find_or_create_by params compound = self.find_or_initialize_by params diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index d0ad324..5af75bf 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -8,6 +8,7 @@ module OpenTox field :nr_unpredicted, type: Integer field :predictions, type: Array field :finished_at, type: Time + #belongs_to :prediction def time finished_at - created_at diff --git a/lib/feature.rb b/lib/feature.rb index b2bc1f5..22b2846 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -29,6 +29,7 @@ module OpenTox # Feature for SMARTS fragments class Smarts < NominalFeature field :smarts, type: String + index "smarts" => 1 def self.from_smarts smarts self.find_or_create_by :smarts => smarts end diff --git a/lib/lazar.rb b/lib/lazar.rb index 174fb2c..d0128b7 100644 --- a/lib/lazar.rb +++ b/lib/lazar.rb @@ -15,20 +15,19 @@ require "base64" ENV["MONGOID_ENV"] ||= "development" # TODO remove config files, change default via ENV or directly in Mongoid class Mongoid.load!("#{File.expand_path(File.join(File.dirname(__FILE__),'..','mongoid.yml'))}") -# TODO get Mongo::Client from Mongoid -$mongo = Mongo::Client.new('mongodb://127.0.0.1:27017/opentox') -# TODO same for GridFS +$mongo = Mongoid.default_client +#$mongo = Mongo::Client.new('mongodb://127.0.0.1:27017/opentox') $gridfs = $mongo.database.fs # R setup R = Rserve::Connection.new # Logger setup +STDOUT.sync = true # for redirection, etc see http://stackoverflow.com/questions/8549443/why-doesnt-logger-output-to-stdout-get-redirected-to-files $logger = Logger.new STDOUT # STDERR did not work on my development machine (CH) $logger.level = Logger::DEBUG Mongo::Logger.logger = $logger Mongo::Logger.level = Logger::WARN -#Mongoid.logger = $logger # Require sub-Repositories require_relative '../libfminer/libbbrc/bbrc' # include before openbabel diff --git a/lib/model.rb b/lib/model.rb index bf8c549..185d70f 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -19,6 +19,8 @@ module OpenTox # prediction feature field :prediction_feature_id, type: BSON::ObjectId + #belongs_to :prediction + attr_accessor :prediction_dataset attr_accessor :training_dataset @@ -156,11 +158,10 @@ module OpenTox end - class PredictionModel + class Prediction include OpenTox include Mongoid::Document include Mongoid::Timestamps - store_in collection: "models" # TODO field Validations field :endpoint, type: String @@ -169,6 +170,14 @@ module OpenTox field :unit, type: String field :model_id, type: BSON::ObjectId field :crossvalidation_id, type: BSON::ObjectId + + def predict object + Model::Lazar.find(model_id).predict object + end + + def crossvalidation + CrossValidation.find crossvalidation_id + end end end diff --git a/test/compound.rb b/test/compound.rb index 7265ef5..06c19a2 100644 --- a/test/compound.rb +++ b/test/compound.rb @@ -54,7 +54,6 @@ print c.sdf def test_inchikey c = OpenTox::Compound.from_inchi "InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H" - p c assert_equal "UHOVQNZJYSORNB-UHFFFAOYSA-N", c.inchikey end @@ -88,7 +87,8 @@ print c.sdf refute_nil c.fp4 end c = d.compounds[371] - assert c.neighbors.size >= 19 + n = c.neighbors + assert n.size >= 18, "Neighbors size (#{n.size}) should be larger than 17" end def test_openbabel_segfault diff --git a/test/data/hamster_carcinogenicity.json b/test/data/hamster_carcinogenicity.json new file mode 100644 index 0000000..8c17d9d --- /dev/null +++ b/test/data/hamster_carcinogenicity.json @@ -0,0 +1,3 @@ +{ "species": "Hamster", +"endpoint": "Carcinogenicity", +"source": "http://www.epa.gov/ncct/dsstox/sdf_cpdbas.html" } diff --git a/test/prediction_models.rb b/test/prediction_models.rb new file mode 100644 index 0000000..b4ad415 --- /dev/null +++ b/test/prediction_models.rb @@ -0,0 +1,23 @@ +require_relative "setup.rb" + +class PredictionModelTest < MiniTest::Test + + def test_prediction_model + dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" + model = Model::LazarFminerClassification.create dataset + cv = ClassificationCrossValidation.create model + metadata = JSON.parse(File.read("#{DATA_DIR}/hamster_carcinogenicity.json")) + + metadata[:model_id] = model.id + metadata[:crossvalidation_id] = cv.id + pm = Model::Prediction.new(metadata) + pm.save + [:endpoint,:species,:source].each do |p| + refute_empty pm[p] + end + assert pm.crossvalidation.accuracy > 0.8 + prediction = pm.predict Compound.from_smiles("CCCC(NN)C") + assert_equal "true", prediction[:value] + pm.delete + end +end diff --git a/test/setup.rb b/test/setup.rb index 379fa53..538853d 100644 --- a/test/setup.rb +++ b/test/setup.rb @@ -3,4 +3,5 @@ require_relative '../lib/lazar.rb' include OpenTox TEST_DIR ||= File.expand_path(File.dirname(__FILE__)) DATA_DIR ||= File.join(TEST_DIR,"data") -#$mongo.database.drop +$mongo.database.drop +$gridfs = $mongo.database.fs # recreate GridFS indexes -- cgit v1.2.3