diff options
Diffstat (limited to 'lib/model.rb')
-rw-r--r-- | lib/model.rb | 127 |
1 files changed, 84 insertions, 43 deletions
diff --git a/lib/model.rb b/lib/model.rb index 8e657b8..b82f098 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -20,6 +20,10 @@ module OpenTox def training_dataset Dataset.find(training_dataset_id) end + + def prediction_feature + Feature.find(prediction_feature_id) + end end class Lazar < Model @@ -31,12 +35,10 @@ module OpenTox # Create a lazar model from a training_dataset and a feature_dataset # @param [OpenTox::Dataset] training_dataset # @return [OpenTox::Model::Lazar] Regression or classification model - def initialize training_dataset, params={} + def initialize prediction_feature, training_dataset, params={} super params - # TODO document convention - prediction_feature = training_dataset.features.first # set defaults for empty parameters self.prediction_feature_id ||= prediction_feature.id self.training_dataset_id ||= training_dataset.id @@ -48,7 +50,6 @@ module OpenTox end def predict_compound compound - prediction_feature = Feature.find prediction_feature_id neighbors = compound.send(neighbor_algorithm, neighbor_algorithm_parameters) # remove neighbors without prediction_feature # check for database activities (neighbors may include query compound) @@ -56,12 +57,13 @@ module OpenTox prediction = {} if neighbors.collect{|n| n["_id"]}.include? compound.id - database_activities = neighbors.select{|n| n["_id"] == compound.id}.first["features"][prediction_feature.id.to_s].uniq + #TODO restrict to dataset features + database_activities = neighbors.select{|n| n["_id"] == compound.id}.first["toxicities"][prediction_feature.id.to_s].uniq prediction[:database_activities] = database_activities prediction[:warning] = "#{database_activities.size} compounds have been removed from neighbors, because they have the same structure as the query compound." neighbors.delete_if{|n| n["_id"] == compound.id} end - neighbors.delete_if{|n| n['features'].empty? or n['features'][prediction_feature.id.to_s] == [nil] } + neighbors.delete_if{|n| n['toxicities'].empty? or n['toxicities'][prediction_feature.id.to_s] == [nil] } if neighbors.empty? prediction.merge!({:value => nil,:confidence => nil,:warning => "Could not find similar compounds with experimental data in the training dataset.",:neighbors => []}) else @@ -78,62 +80,55 @@ module OpenTox # parse data compounds = [] - case object.class.to_s - when "OpenTox::Compound" + if object.is_a? Substance compounds = [object] - when "Array" + elsif object.is_a? Array compounds = object - when "OpenTox::Dataset" + elsif object.is_a? Dataset compounds = object.compounds else bad_request_error "Please provide a OpenTox::Compound an Array of OpenTox::Compounds or an OpenTox::Dataset as parameter." end # make predictions - predictions = [] - predictions = compounds.collect{|c| predict_compound c} + predictions = {} + compounds.each do |c| + predictions[c.id.to_s] = predict_compound c + predictions[c.id.to_s][:prediction_feature_id] = prediction_feature_id + end # serialize result - case object.class.to_s - when "OpenTox::Compound" - prediction = predictions.first + if object.is_a? Substance + prediction = predictions[compounds.first.id.to_s] prediction[:neighbors].sort!{|a,b| b[1] <=> a[1]} # sort according to similarity return prediction - when "Array" + elsif object.is_a? Array return predictions - when "OpenTox::Dataset" + elsif object.is_a? Dataset + predictions.each{|cid,p| p.delete(:neighbors)} # prepare prediction dataset measurement_feature = Feature.find prediction_feature_id - prediction_feature = OpenTox::NumericFeature.find_or_create_by( "name" => measurement_feature.name + " (Prediction)" ) - prediction_dataset = LazarPrediction.new( + prediction_feature = NumericFeature.find_or_create_by( "name" => measurement_feature.name + " (Prediction)" ) + prediction_dataset = LazarPrediction.create( :name => "Lazar prediction for #{prediction_feature.name}", :creator => __FILE__, - :prediction_feature_id => prediction_feature.id - + :prediction_feature_id => prediction_feature.id, + :predictions => predictions ) - confidence_feature = OpenTox::NumericFeature.find_or_create_by( "name" => "Model RMSE" ) - warning_feature = OpenTox::NominalFeature.find_or_create_by("name" => "Warnings") - prediction_dataset.features = [ prediction_feature, confidence_feature, measurement_feature, warning_feature ] - prediction_dataset.compounds = compounds - prediction_dataset.data_entries = predictions.collect{|p| [p[:value], p[:rmse] , p[:dataset_activities].to_s, p[:warning]]} - prediction_dataset.save + + #prediction_dataset.save return prediction_dataset end end - - def training_activities - i = training_dataset.feature_ids.index prediction_feature_id - training_dataset.data_entries.collect{|de| de[i]} - end end class LazarClassification < Lazar - def self.create training_dataset, params={} - model = self.new training_dataset, params + def self.create prediction_feature, training_dataset, params={} + model = self.new prediction_feature, training_dataset, params model.prediction_algorithm = "OpenTox::Algorithm::Classification.weighted_majority_vote" unless model.prediction_algorithm model.neighbor_algorithm ||= "fingerprint_neighbors" model.neighbor_algorithm_parameters ||= {} @@ -151,8 +146,8 @@ module OpenTox class LazarRegression < Lazar - def self.create training_dataset, params={} - model = self.new training_dataset, params + def self.create prediction_feature, training_dataset, params={} + model = self.new prediction_feature, training_dataset, params model.neighbor_algorithm ||= "fingerprint_neighbors" model.prediction_algorithm ||= "OpenTox::Algorithm::Regression.local_fingerprint_regression" model.neighbor_algorithm_parameters ||= {} @@ -173,13 +168,13 @@ module OpenTox include Mongoid::Document include Mongoid::Timestamps - # TODO field Validations field :endpoint, type: String field :species, type: String field :source, type: String field :unit, type: String field :model_id, type: BSON::ObjectId field :repeated_crossvalidation_id, type: BSON::ObjectId + field :leave_one_out_validation_id, type: BSON::ObjectId def predict object Lazar.find(model_id).predict object @@ -201,12 +196,16 @@ module OpenTox repeated_crossvalidation.crossvalidations end + def leave_one_out_validation + LeaveOneOutValidation.find leave_one_out_validation_id + end + def regression? - training_dataset.features.first.numeric? + model.is_a? LazarRegression end def classification? - training_dataset.features.first.nominal? + model.is_a? LazarClassification end def self.from_csv_file file @@ -214,19 +213,61 @@ module OpenTox bad_request_error "No metadata file #{metadata_file}" unless File.exist? metadata_file prediction_model = self.new JSON.parse(File.read(metadata_file)) training_dataset = Dataset.from_csv_file file + prediction_feature = training_dataset.features.first model = nil - if training_dataset.features.first.nominal? - model = LazarClassification.create training_dataset - elsif training_dataset.features.first.numeric? - model = LazarRegression.create training_dataset + if prediction_feature.nominal? + model = LazarClassification.create prediction_feature, training_dataset + elsif prediction_feature.numeric? + model = LazarRegression.create prediction_feature, training_dataset end prediction_model[:model_id] = model.id + prediction_model[:prediction_feature_id] = prediction_feature.id prediction_model[:repeated_crossvalidation_id] = RepeatedCrossValidation.create(model).id + prediction_model[:leave_one_out_validation_id] = LeaveOneOutValidation.create(model).id prediction_model.save prediction_model end end + class NanoLazar + include OpenTox + include Mongoid::Document + include Mongoid::Timestamps + store_in collection: "models" + + field :name, type: String + field :creator, type: String, default: __FILE__ + # datasets + field :training_dataset_id, type: BSON::ObjectId + # algorithms + field :prediction_algorithm, type: String + # prediction feature + field :prediction_feature_id, type: BSON::ObjectId + field :training_particle_ids, type: Array + + def self.create_all + nanoparticles = Nanoparticle.all + toxfeatures = Nanoparticle.all.collect{|np| np.toxicities.keys}.flatten.uniq.collect{|id| Feature.find id} + tox = {} + toxfeatures.each do |t| + tox[t] = nanoparticles.select{|np| np.toxicities.keys.include? t.id.to_s} + end + tox.select!{|t,nps| nps.size > 50} + tox.collect do |t,nps| + find_or_create_by(:prediction_feature_id => t.id, :training_particle_ids => nps.collect{|np| np.id}) + end + end + + def predict nanoparticle + training = training_particle_ids.collect{|id| Nanoparticle.find id} + training_features = training.collect{|t| t.physchem_descriptors.keys}.flatten.uniq + query_features = nanoparticle.physchem_descriptors.keys + common_features = (training_features & query_features) + #p common_features + end + + end + end end |