summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/crossvalidation.rb17
-rw-r--r--lib/dataset.rb2
-rw-r--r--lib/lazar.rb2
-rw-r--r--lib/model.rb (renamed from lib/lazar-model.rb)13
-rw-r--r--lib/regression.rb24
5 files changed, 50 insertions, 8 deletions
diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb
index d926cc4..d0ad324 100644
--- a/lib/crossvalidation.rb
+++ b/lib/crossvalidation.rb
@@ -2,11 +2,16 @@ module OpenTox
class CrossValidation
field :validation_ids, type: Array, default: []
+ field :model_id, type: BSON::ObjectId
field :folds, type: Integer
field :nr_instances, type: Integer
field :nr_unpredicted, type: Integer
field :predictions, type: Array
field :finished_at, type: Time
+
+ def time
+ finished_at - created_at
+ end
end
class ClassificationCrossValidation < CrossValidation
@@ -22,6 +27,7 @@ module OpenTox
def self.create model, n=10
cv = self.new
+ cv.save # set created_at
validation_ids = []
nr_instances = 0
nr_unpredicted = 0
@@ -64,6 +70,10 @@ module OpenTox
end
end
cv.update_attributes(
+ name: model.name,
+ model_id: model.id,
+ folds: n,
+ validation_ids: validation_ids,
nr_instances: nr_instances,
nr_unpredicted: nr_unpredicted,
accept_values: accept_values,
@@ -85,10 +95,8 @@ module OpenTox
#F measure carcinogen: 0.769, noncarcinogen: 0.348
end
- class RegressionCrossValidation < Validation
+ class RegressionCrossValidation < CrossValidation
- field :validation_ids, type: Array, default: []
- field :folds, type: Integer
field :rmse, type: Float
field :mae, type: Float
field :weighted_rmse, type: Float
@@ -96,6 +104,7 @@ module OpenTox
def self.create model, n=10
cv = self.new
+ cv.save # set created_at
validation_ids = []
nr_instances = 0
nr_unpredicted = 0
@@ -145,6 +154,8 @@ module OpenTox
rmse = Math.sqrt(rmse/n)
weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum)
cv.update_attributes(
+ name: model.name,
+ model_id: model.id,
folds: n,
validation_ids: validation_ids,
nr_instances: nr_instances,
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 8c5ffc0..5850c3d 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -49,7 +49,7 @@ module OpenTox
@data_entries = Marshal.load(data_entry_file.data)
bad_request_error "Data entries (#{data_entries_id}) are not a 2D-Array" unless @data_entries.is_a? Array and @data_entries.first.is_a? Array
bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.size} rows, but dataset (#{id}) has #{compound_ids.size} compounds" unless @data_entries.size == compound_ids.size
- bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries..first.size} columns, but dataset (#{id}) has #{feature_ids.size} features" unless @data_entries.first.size == feature_ids.size
+ bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.first.size} columns, but dataset (#{id}) has #{feature_ids.size} features" unless @data_entries.first.size == feature_ids.size
$logger.debug "Retrieving data: #{Time.now-t}"
end
end
diff --git a/lib/lazar.rb b/lib/lazar.rb
index 2ea8cba..174fb2c 100644
--- a/lib/lazar.rb
+++ b/lib/lazar.rb
@@ -58,7 +58,7 @@ CLASSES = ["Feature","Compound","Dataset","Validation","CrossValidation"]# Algor
"algorithm.rb",
"descriptor.rb",
"bbrc.rb",
- "lazar-model.rb",
+ "model.rb",
"similarity.rb",
"neighbor.rb",
"classification.rb",
diff --git a/lib/lazar-model.rb b/lib/model.rb
index 1970401..bf8c549 100644
--- a/lib/lazar-model.rb
+++ b/lib/model.rb
@@ -8,7 +8,7 @@ module OpenTox
include Mongoid::Timestamps
store_in collection: "models"
- field :title, type: String
+ field :title, as: :name, type: String
field :creator, type: String, default: __FILE__
# datasets
field :training_dataset_id, type: BSON::ObjectId
@@ -156,9 +156,16 @@ module OpenTox
end
- class PredictionModel < Lazar
- field :category, type: String
+ class PredictionModel
+ include OpenTox
+ include Mongoid::Document
+ include Mongoid::Timestamps
+ store_in collection: "models"
+
+ # TODO field Validations
field :endpoint, type: String
+ field :species, type: String
+ field :source, type: String
field :unit, type: String
field :model_id, type: BSON::ObjectId
field :crossvalidation_id, type: BSON::ObjectId
diff --git a/lib/regression.rb b/lib/regression.rb
index 8a52e7d..0bc6547 100644
--- a/lib/regression.rb
+++ b/lib/regression.rb
@@ -34,6 +34,30 @@ module OpenTox
{:value => prediction,:confidence => confidence}
end
+ def self.weighted_average_with_relevant_fingerprints neighbors
+ weighted_sum = 0.0
+ sim_sum = 0.0
+ fingerprint_features = []
+ neighbors.each do |row|
+ n,sim,acts = row
+ neighbor = Compound.find n
+ fingerprint_features += neighbor.fp4
+ end
+ fingerprint_features.uniq!
+ p fingerprint_features
+=begin
+ p n
+ acts.each do |act|
+ weighted_sum += sim*Math.log10(act)
+ sim_sum += sim
+ end
+ end
+=end
+ confidence = sim_sum/neighbors.size.to_f
+ sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum)
+ {:value => prediction,:confidence => confidence}
+ end
+
# Local support vector regression from neighbors
# @param [Hash] params Keys `:props, :activities, :sims, :min_train_performance` are required
# @return [Numeric] A prediction value.