diff options
Diffstat (limited to 'lib/model.rb')
-rw-r--r-- | lib/model.rb | 94 |
1 files changed, 35 insertions, 59 deletions
diff --git a/lib/model.rb b/lib/model.rb index adcbcc6..e8b30ca 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -81,7 +81,6 @@ module OpenTox :method => "properties", :categories => ["P-CHEM"], }, - #:descriptors => ["P-CHEM","Proteomics"], :similarity => { :method => "Algorithm::Similarity.weighted_cosine", :min => 0.5 @@ -103,11 +102,12 @@ module OpenTox parameters.each do |p,v| model.algorithms[type] ||= {} model.algorithms[type][p] = v + model.algorithms[:descriptors].delete :categories if type == :descriptors and p == :type end else model.algorithms[type] = parameters end - end + end if algorithms # parse dependent_variables from training dataset training_dataset.substances.each do |substance| @@ -140,10 +140,11 @@ module OpenTox model.algorithms[:descriptors].delete(:features) model.algorithms[:descriptors].delete(:type) model.substances.each_with_index do |s,i| - s.calculate_properties(features).each_with_index do |v,j| + props = s.calculate_properties(features) + props.each_with_index do |v,j| model.independent_variables[j] ||= [] model.independent_variables[j][i] = v - end + end if props and !props.empty? end # parse independent_variables when "properties" @@ -152,7 +153,7 @@ module OpenTox categories.each do |category| Feature.where(category:category).each{|f| feature_ids << f.id.to_s} end - properties = model.substances.collect { |s| s.properties } + properties = model.substances.collect { |s| s.properties } property_ids = properties.collect{|p| p.keys}.flatten.uniq model.descriptor_ids = feature_ids & property_ids model.independent_variables = model.descriptor_ids.collect{|i| properties.collect{|p| p[i] ? p[i].median : nil}} @@ -220,10 +221,10 @@ module OpenTox prediction[:measurements] << dependent_variables[i] prediction[:warning] = "Substance '#{substance.name}, id:#{substance.id}' has been excluded from neighbors, because it is identical with the query substance." else - next if substance.is_a? Nanoparticle and substance.core != Nanoparticle.find(s).core if fingerprints? neighbor_descriptors = fingerprints[i] else + next if substance.is_a? Nanoparticle and substance.core != Nanoparticle.find(s).core # necessary for nanoparticle properties predictions neighbor_descriptors = scaled_variables.collect{|v| v[i]} end sim = Algorithm.run algorithms[:similarity][:method], [similarity_descriptors, neighbor_descriptors, descriptor_weights] @@ -246,6 +247,7 @@ module OpenTox elsif neighbor_similarities.size == 1 prediction.merge!({:value => dependent_variables.first, :probabilities => nil, :warning => "Only one similar compound in the training set. Predicting its experimental value.", :neighbors => [{:id => neighbor_ids.first, :similarity => neighbor_similarities.first}]}) else + query_descriptors.collect!{|d| d ? 1 : 0} if algorithms[:feature_selection] and algorithms[:descriptors][:method] == "fingerprint" # call prediction algorithm result = Algorithm.run algorithms[:prediction][:method], dependent_variables:neighbor_dependent_variables,independent_variables:neighbor_independent_variables ,weights:neighbor_similarities, query_variables:query_descriptors prediction.merge! result @@ -328,7 +330,7 @@ module OpenTox class LazarRegression < Lazar end - class Prediction + class Validation include OpenTox include Mongoid::Document @@ -340,7 +342,6 @@ module OpenTox field :unit, type: String field :model_id, type: BSON::ObjectId field :repeated_crossvalidation_id, type: BSON::ObjectId - field :leave_one_out_validation_id, type: BSON::ObjectId def predict object model.predict object @@ -354,6 +355,10 @@ module OpenTox Lazar.find model_id end + def algorithms + model.algorithms + end + def prediction_feature model.prediction_feature end @@ -366,10 +371,6 @@ module OpenTox repeated_crossvalidation.crossvalidations end - def leave_one_out_validation - Validation::LeaveOneOut.find leave_one_out_validation_id - end - def regression? model.is_a? LazarRegression end @@ -381,63 +382,38 @@ module OpenTox def self.from_csv_file file metadata_file = file.sub(/csv$/,"json") bad_request_error "No metadata file #{metadata_file}" unless File.exist? metadata_file - prediction_model = self.new JSON.parse(File.read(metadata_file)) + model_validation = self.new JSON.parse(File.read(metadata_file)) training_dataset = Dataset.from_csv_file file model = Lazar.create training_dataset: training_dataset - prediction_model[:model_id] = model.id - prediction_model[:repeated_crossvalidation_id] = Validation::RepeatedCrossValidation.create(model).id - #prediction_model[:leave_one_out_validation_id] = Validation::LeaveOneOut.create(model).id - prediction_model.save - prediction_model + model_validation[:model_id] = model.id + model_validation[:repeated_crossvalidation_id] = Validation::RepeatedCrossValidation.create(model).id + model_validation.save + model_validation end - end - - class NanoPrediction < Prediction - - def self.from_json_dump dir, category - Import::Enanomapper.import dir - training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first - unless training_dataset - Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm") + def self.from_enanomapper training_dataset: nil, prediction_feature:nil, algorithms: nil + + # find/import training_dataset + training_dataset ||= Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first + unless training_dataset # try to import from json dump + Import::Enanomapper.import training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first + bad_request_error "Cannot import 'Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles' dataset" unless training_dataset end - prediction_model = self.new( - :endpoint => "log2(Net cell association)", - :source => "https://data.enanomapper.net/", - :species => "A549 human lung epithelial carcinoma cells", - :unit => "log2(ug/Mg)" - ) - prediction_feature = Feature.where(name: "log2(Net cell association)", category: "TOX").first - model = Model::LazarRegression.create(prediction_feature: prediction_feature, training_dataset: training_dataset) - prediction_model[:model_id] = model.id - repeated_cv = Validation::RepeatedCrossValidation.create model - prediction_model[:repeated_crossvalidation_id] = Validation::RepeatedCrossValidation.create(model).id - #prediction_model[:leave_one_out_validation_id] = Validation::LeaveOneOut.create(model).id - prediction_model.save - prediction_model - end + prediction_feature ||= Feature.where(name: "log2(Net cell association)", category: "TOX").first - def self.create dir: dir, algorithms: algorithms - training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first - unless training_dataset - Import::Enanomapper.import dir - training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first - end - prediction_model = self.new( - :endpoint => "log2(Net cell association)", - :source => "https://data.enanomapper.net/", + model_validation = self.new( + :endpoint => prediction_feature.name, + :source => prediction_feature.source, :species => "A549 human lung epithelial carcinoma cells", - :unit => "log2(ug/Mg)" + :unit => prediction_feature.unit ) - prediction_feature = Feature.where(name: "log2(Net cell association)", category: "TOX").first - model = Model::LazarRegression.create(prediction_feature: prediction_feature, training_dataset: training_dataset, algorithms: algorithms) - prediction_model[:model_id] = model.id + model = LazarRegression.create prediction_feature: prediction_feature, training_dataset: training_dataset, algorithms: algorithms + model_validation[:model_id] = model.id repeated_cv = Validation::RepeatedCrossValidation.create model - prediction_model[:repeated_crossvalidation_id] = Validation::RepeatedCrossValidation.create(model).id - #prediction_model[:leave_one_out_validation_id] = Validation::LeaveOneOut.create(model).id - prediction_model.save - prediction_model + model_validation[:repeated_crossvalidation_id] = repeated_cv.id + model_validation.save + model_validation end end |