summaryrefslogtreecommitdiff
path: root/lib/model.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/model.rb')
-rw-r--r--lib/model.rb94
1 files changed, 35 insertions, 59 deletions
diff --git a/lib/model.rb b/lib/model.rb
index adcbcc6..e8b30ca 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -81,7 +81,6 @@ module OpenTox
:method => "properties",
:categories => ["P-CHEM"],
},
- #:descriptors => ["P-CHEM","Proteomics"],
:similarity => {
:method => "Algorithm::Similarity.weighted_cosine",
:min => 0.5
@@ -103,11 +102,12 @@ module OpenTox
parameters.each do |p,v|
model.algorithms[type] ||= {}
model.algorithms[type][p] = v
+ model.algorithms[:descriptors].delete :categories if type == :descriptors and p == :type
end
else
model.algorithms[type] = parameters
end
- end
+ end if algorithms
# parse dependent_variables from training dataset
training_dataset.substances.each do |substance|
@@ -140,10 +140,11 @@ module OpenTox
model.algorithms[:descriptors].delete(:features)
model.algorithms[:descriptors].delete(:type)
model.substances.each_with_index do |s,i|
- s.calculate_properties(features).each_with_index do |v,j|
+ props = s.calculate_properties(features)
+ props.each_with_index do |v,j|
model.independent_variables[j] ||= []
model.independent_variables[j][i] = v
- end
+ end if props and !props.empty?
end
# parse independent_variables
when "properties"
@@ -152,7 +153,7 @@ module OpenTox
categories.each do |category|
Feature.where(category:category).each{|f| feature_ids << f.id.to_s}
end
- properties = model.substances.collect { |s| s.properties }
+ properties = model.substances.collect { |s| s.properties }
property_ids = properties.collect{|p| p.keys}.flatten.uniq
model.descriptor_ids = feature_ids & property_ids
model.independent_variables = model.descriptor_ids.collect{|i| properties.collect{|p| p[i] ? p[i].median : nil}}
@@ -220,10 +221,10 @@ module OpenTox
prediction[:measurements] << dependent_variables[i]
prediction[:warning] = "Substance '#{substance.name}, id:#{substance.id}' has been excluded from neighbors, because it is identical with the query substance."
else
- next if substance.is_a? Nanoparticle and substance.core != Nanoparticle.find(s).core
if fingerprints?
neighbor_descriptors = fingerprints[i]
else
+ next if substance.is_a? Nanoparticle and substance.core != Nanoparticle.find(s).core # necessary for nanoparticle properties predictions
neighbor_descriptors = scaled_variables.collect{|v| v[i]}
end
sim = Algorithm.run algorithms[:similarity][:method], [similarity_descriptors, neighbor_descriptors, descriptor_weights]
@@ -246,6 +247,7 @@ module OpenTox
elsif neighbor_similarities.size == 1
prediction.merge!({:value => dependent_variables.first, :probabilities => nil, :warning => "Only one similar compound in the training set. Predicting its experimental value.", :neighbors => [{:id => neighbor_ids.first, :similarity => neighbor_similarities.first}]})
else
+ query_descriptors.collect!{|d| d ? 1 : 0} if algorithms[:feature_selection] and algorithms[:descriptors][:method] == "fingerprint"
# call prediction algorithm
result = Algorithm.run algorithms[:prediction][:method], dependent_variables:neighbor_dependent_variables,independent_variables:neighbor_independent_variables ,weights:neighbor_similarities, query_variables:query_descriptors
prediction.merge! result
@@ -328,7 +330,7 @@ module OpenTox
class LazarRegression < Lazar
end
- class Prediction
+ class Validation
include OpenTox
include Mongoid::Document
@@ -340,7 +342,6 @@ module OpenTox
field :unit, type: String
field :model_id, type: BSON::ObjectId
field :repeated_crossvalidation_id, type: BSON::ObjectId
- field :leave_one_out_validation_id, type: BSON::ObjectId
def predict object
model.predict object
@@ -354,6 +355,10 @@ module OpenTox
Lazar.find model_id
end
+ def algorithms
+ model.algorithms
+ end
+
def prediction_feature
model.prediction_feature
end
@@ -366,10 +371,6 @@ module OpenTox
repeated_crossvalidation.crossvalidations
end
- def leave_one_out_validation
- Validation::LeaveOneOut.find leave_one_out_validation_id
- end
-
def regression?
model.is_a? LazarRegression
end
@@ -381,63 +382,38 @@ module OpenTox
def self.from_csv_file file
metadata_file = file.sub(/csv$/,"json")
bad_request_error "No metadata file #{metadata_file}" unless File.exist? metadata_file
- prediction_model = self.new JSON.parse(File.read(metadata_file))
+ model_validation = self.new JSON.parse(File.read(metadata_file))
training_dataset = Dataset.from_csv_file file
model = Lazar.create training_dataset: training_dataset
- prediction_model[:model_id] = model.id
- prediction_model[:repeated_crossvalidation_id] = Validation::RepeatedCrossValidation.create(model).id
- #prediction_model[:leave_one_out_validation_id] = Validation::LeaveOneOut.create(model).id
- prediction_model.save
- prediction_model
+ model_validation[:model_id] = model.id
+ model_validation[:repeated_crossvalidation_id] = Validation::RepeatedCrossValidation.create(model).id
+ model_validation.save
+ model_validation
end
- end
-
- class NanoPrediction < Prediction
-
- def self.from_json_dump dir, category
- Import::Enanomapper.import dir
- training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
- unless training_dataset
- Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm")
+ def self.from_enanomapper training_dataset: nil, prediction_feature:nil, algorithms: nil
+
+ # find/import training_dataset
+ training_dataset ||= Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
+ unless training_dataset # try to import from json dump
+ Import::Enanomapper.import
training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
+ bad_request_error "Cannot import 'Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles' dataset" unless training_dataset
end
- prediction_model = self.new(
- :endpoint => "log2(Net cell association)",
- :source => "https://data.enanomapper.net/",
- :species => "A549 human lung epithelial carcinoma cells",
- :unit => "log2(ug/Mg)"
- )
- prediction_feature = Feature.where(name: "log2(Net cell association)", category: "TOX").first
- model = Model::LazarRegression.create(prediction_feature: prediction_feature, training_dataset: training_dataset)
- prediction_model[:model_id] = model.id
- repeated_cv = Validation::RepeatedCrossValidation.create model
- prediction_model[:repeated_crossvalidation_id] = Validation::RepeatedCrossValidation.create(model).id
- #prediction_model[:leave_one_out_validation_id] = Validation::LeaveOneOut.create(model).id
- prediction_model.save
- prediction_model
- end
+ prediction_feature ||= Feature.where(name: "log2(Net cell association)", category: "TOX").first
- def self.create dir: dir, algorithms: algorithms
- training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
- unless training_dataset
- Import::Enanomapper.import dir
- training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
- end
- prediction_model = self.new(
- :endpoint => "log2(Net cell association)",
- :source => "https://data.enanomapper.net/",
+ model_validation = self.new(
+ :endpoint => prediction_feature.name,
+ :source => prediction_feature.source,
:species => "A549 human lung epithelial carcinoma cells",
- :unit => "log2(ug/Mg)"
+ :unit => prediction_feature.unit
)
- prediction_feature = Feature.where(name: "log2(Net cell association)", category: "TOX").first
- model = Model::LazarRegression.create(prediction_feature: prediction_feature, training_dataset: training_dataset, algorithms: algorithms)
- prediction_model[:model_id] = model.id
+ model = LazarRegression.create prediction_feature: prediction_feature, training_dataset: training_dataset, algorithms: algorithms
+ model_validation[:model_id] = model.id
repeated_cv = Validation::RepeatedCrossValidation.create model
- prediction_model[:repeated_crossvalidation_id] = Validation::RepeatedCrossValidation.create(model).id
- #prediction_model[:leave_one_out_validation_id] = Validation::LeaveOneOut.create(model).id
- prediction_model.save
- prediction_model
+ model_validation[:repeated_crossvalidation_id] = repeated_cv.id
+ model_validation.save
+ model_validation
end
end