summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--VERSION2
-rw-r--r--lib/crossvalidation.rb17
-rw-r--r--lib/dataset.rb2
-rw-r--r--lib/lazar.rb2
-rw-r--r--lib/model.rb (renamed from lib/lazar-model.rb)13
-rw-r--r--lib/regression.rb24
-rw-r--r--test/lazar-physchem-short.rb1
-rw-r--r--test/lazar-regression.rb27
-rw-r--r--test/validation.rb5
9 files changed, 82 insertions, 11 deletions
diff --git a/VERSION b/VERSION
index 1750564..5a5831a 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.0.6
+0.0.7
diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb
index d926cc4..d0ad324 100644
--- a/lib/crossvalidation.rb
+++ b/lib/crossvalidation.rb
@@ -2,11 +2,16 @@ module OpenTox
class CrossValidation
field :validation_ids, type: Array, default: []
+ field :model_id, type: BSON::ObjectId
field :folds, type: Integer
field :nr_instances, type: Integer
field :nr_unpredicted, type: Integer
field :predictions, type: Array
field :finished_at, type: Time
+
+ def time
+ finished_at - created_at
+ end
end
class ClassificationCrossValidation < CrossValidation
@@ -22,6 +27,7 @@ module OpenTox
def self.create model, n=10
cv = self.new
+ cv.save # set created_at
validation_ids = []
nr_instances = 0
nr_unpredicted = 0
@@ -64,6 +70,10 @@ module OpenTox
end
end
cv.update_attributes(
+ name: model.name,
+ model_id: model.id,
+ folds: n,
+ validation_ids: validation_ids,
nr_instances: nr_instances,
nr_unpredicted: nr_unpredicted,
accept_values: accept_values,
@@ -85,10 +95,8 @@ module OpenTox
#F measure carcinogen: 0.769, noncarcinogen: 0.348
end
- class RegressionCrossValidation < Validation
+ class RegressionCrossValidation < CrossValidation
- field :validation_ids, type: Array, default: []
- field :folds, type: Integer
field :rmse, type: Float
field :mae, type: Float
field :weighted_rmse, type: Float
@@ -96,6 +104,7 @@ module OpenTox
def self.create model, n=10
cv = self.new
+ cv.save # set created_at
validation_ids = []
nr_instances = 0
nr_unpredicted = 0
@@ -145,6 +154,8 @@ module OpenTox
rmse = Math.sqrt(rmse/n)
weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum)
cv.update_attributes(
+ name: model.name,
+ model_id: model.id,
folds: n,
validation_ids: validation_ids,
nr_instances: nr_instances,
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 8c5ffc0..5850c3d 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -49,7 +49,7 @@ module OpenTox
@data_entries = Marshal.load(data_entry_file.data)
bad_request_error "Data entries (#{data_entries_id}) are not a 2D-Array" unless @data_entries.is_a? Array and @data_entries.first.is_a? Array
bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.size} rows, but dataset (#{id}) has #{compound_ids.size} compounds" unless @data_entries.size == compound_ids.size
- bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries..first.size} columns, but dataset (#{id}) has #{feature_ids.size} features" unless @data_entries.first.size == feature_ids.size
+ bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.first.size} columns, but dataset (#{id}) has #{feature_ids.size} features" unless @data_entries.first.size == feature_ids.size
$logger.debug "Retrieving data: #{Time.now-t}"
end
end
diff --git a/lib/lazar.rb b/lib/lazar.rb
index 2ea8cba..174fb2c 100644
--- a/lib/lazar.rb
+++ b/lib/lazar.rb
@@ -58,7 +58,7 @@ CLASSES = ["Feature","Compound","Dataset","Validation","CrossValidation"]# Algor
"algorithm.rb",
"descriptor.rb",
"bbrc.rb",
- "lazar-model.rb",
+ "model.rb",
"similarity.rb",
"neighbor.rb",
"classification.rb",
diff --git a/lib/lazar-model.rb b/lib/model.rb
index 1970401..bf8c549 100644
--- a/lib/lazar-model.rb
+++ b/lib/model.rb
@@ -8,7 +8,7 @@ module OpenTox
include Mongoid::Timestamps
store_in collection: "models"
- field :title, type: String
+ field :title, as: :name, type: String
field :creator, type: String, default: __FILE__
# datasets
field :training_dataset_id, type: BSON::ObjectId
@@ -156,9 +156,16 @@ module OpenTox
end
- class PredictionModel < Lazar
- field :category, type: String
+ class PredictionModel
+ include OpenTox
+ include Mongoid::Document
+ include Mongoid::Timestamps
+ store_in collection: "models"
+
+ # TODO field Validations
field :endpoint, type: String
+ field :species, type: String
+ field :source, type: String
field :unit, type: String
field :model_id, type: BSON::ObjectId
field :crossvalidation_id, type: BSON::ObjectId
diff --git a/lib/regression.rb b/lib/regression.rb
index 8a52e7d..0bc6547 100644
--- a/lib/regression.rb
+++ b/lib/regression.rb
@@ -34,6 +34,30 @@ module OpenTox
{:value => prediction,:confidence => confidence}
end
+ def self.weighted_average_with_relevant_fingerprints neighbors
+ weighted_sum = 0.0
+ sim_sum = 0.0
+ fingerprint_features = []
+ neighbors.each do |row|
+ n,sim,acts = row
+ neighbor = Compound.find n
+ fingerprint_features += neighbor.fp4
+ end
+ fingerprint_features.uniq!
+ p fingerprint_features
+=begin
+ p n
+ acts.each do |act|
+ weighted_sum += sim*Math.log10(act)
+ sim_sum += sim
+ end
+ end
+=end
+ confidence = sim_sum/neighbors.size.to_f
+ sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum)
+ {:value => prediction,:confidence => confidence}
+ end
+
# Local support vector regression from neighbors
# @param [Hash] params Keys `:props, :activities, :sims, :min_train_performance` are required
# @return [Numeric] A prediction value.
diff --git a/test/lazar-physchem-short.rb b/test/lazar-physchem-short.rb
index ecf8aff..e74a4b9 100644
--- a/test/lazar-physchem-short.rb
+++ b/test/lazar-physchem-short.rb
@@ -2,6 +2,7 @@ require_relative "setup.rb"
class LazarPhyschemDescriptorTest < MiniTest::Test
def test_epafhm
+ skip "Physchem Regression not yet implemented."
# check available descriptors
@descriptors = OpenTox::Algorithm::Descriptor::DESCRIPTORS.keys
assert_equal 111,@descriptors.size,"wrong number of physchem descriptors"
diff --git a/test/lazar-regression.rb b/test/lazar-regression.rb
new file mode 100644
index 0000000..c36f521
--- /dev/null
+++ b/test/lazar-regression.rb
@@ -0,0 +1,27 @@
+require_relative "setup.rb"
+
+class LazarRegressionTest < MiniTest::Test
+
+ def test_weighted_average
+ training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
+ model = Model::LazarRegression.create training_dataset
+ compound = Compound.from_smiles "CC(C)(C)CN"
+ prediction = model.predict compound
+ assert_equal 13.6, prediction[:value].round(1)
+ assert_equal 0.83, prediction[:confidence].round(2)
+ assert_equal 1, prediction[:neighbors].size
+ end
+
+ def test_weighted_average_with_relevant_fingerprints
+ training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
+ model = Model::LazarRegression.create training_dataset
+ model.update(:prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average_with_relevant_fingerprints")
+ compound = Compound.from_smiles "CC(C)(C)CN"
+ prediction = model.predict compound
+ p prediction
+ #assert_equal 13.6, prediction[:value].round(1)
+ #assert_equal 0.83, prediction[:confidence].round(2)
+ #assert_equal 1, prediction[:neighbors].size
+ end
+
+end
diff --git a/test/validation.rb b/test/validation.rb
index d98feb5..485769c 100644
--- a/test/validation.rb
+++ b/test/validation.rb
@@ -4,10 +4,11 @@ class ValidationTest < MiniTest::Test
def test_fminer_crossvalidation
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
- model = Model::LazarFminerClassification.create dataset#, features
+ model = Model::LazarFminerClassification.create dataset
cv = ClassificationCrossValidation.create model
p cv.accuracy
p cv.weighted_accuracy
+ refute_empty cv.validation_ids
assert cv.accuracy > 0.8
assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy (#{cv.weighted_accuracy}) larger than unweighted accuracy(#{cv.accuracy}) "
end
@@ -31,7 +32,7 @@ class ValidationTest < MiniTest::Test
p cv.weighted_rmse
p cv.mae
p cv.weighted_mae
- `inkview #{cv.plot}`
+ #`inkview #{cv.plot}`
assert cv.rmse < 30, "RMSE > 30"
assert cv.weighted_rmse < cv.rmse, "Weighted RMSE (#{cv.weighted_rmse}) larger than unweighted RMSE(#{cv.rmse}) "
assert cv.mae < 12