From 7c3bd90c26dfeea2db3cf74a1cefc23d8dece7c0 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 15 Mar 2016 17:40:40 +0100 Subject: validation tests pass --- lib/model.rb | 77 ++++++++++++++++++++---------------------------------------- 1 file changed, 26 insertions(+), 51 deletions(-) (limited to 'lib/model.rb') diff --git a/lib/model.rb b/lib/model.rb index ebc0db3..f21ea54 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -47,13 +47,32 @@ module OpenTox self end - def predict object + def predict_compound compound + prediction_feature = Feature.find prediction_feature_id + neighbors = compound.send(neighbor_algorithm, neighbor_algorithm_parameters) + # remove neighbors without prediction_feature + # check for database activities (neighbors may include query compound) + database_activities = nil + prediction = {} + if neighbors.collect{|n| n["_id"]}.include? compound.id + + database_activities = neighbors.select{|n| n["_id"] == compound.id}.first["features"][prediction_feature.id.to_s].uniq + prediction[:database_activities] = database_activities + prediction[:warning] = "#{database_activities.size} compounds have been removed from neighbors, because they have the same structure as the query compound." + neighbors.delete_if{|n| n["_id"] == compound.id} + end + neighbors.delete_if{|n| n['features'].empty? or n['features'][prediction_feature.id.to_s] == [nil] } + if neighbors.empty? + prediction.merge!({:value => nil,:confidence => nil,:warning => "Could not find similar compounds with experimental data in the training dataset."}) + else + prediction.merge!(Algorithm.run(prediction_algorithm, compound, {:neighbors => neighbors,:training_dataset_id=> training_dataset_id,:prediction_feature_id => prediction_feature.id})) + end + prediction + end - t = Time.now - at = Time.now + def predict object training_dataset = Dataset.find training_dataset_id - prediction_feature = Feature.find prediction_feature_id # parse data compounds = [] @@ -70,30 +89,7 @@ module OpenTox # make predictions predictions = [] - neighbors = [] - compounds.each_with_index do |compound,c| - t = Time.new - - neighbors = compound.send(neighbor_algorithm, neighbor_algorithm_parameters) - # remove neighbors without prediction_feature - # check for database activities (neighbors may include query compound) - database_activities = nil - prediction = {} - if neighbors.collect{|n| n["_id"]}.include? compound.id - - database_activities = neighbors.select{|n| n["_id"] == compound.id}.first["features"][prediction_feature.id.to_s].uniq - prediction[:database_activities] = database_activities - prediction[:warning] = "#{database_activities.size} compounds have been removed from neighbors, because they have the same structure as the query compound." - neighbors.delete_if{|n| n["_id"] == compound.id} - end - neighbors.delete_if{|n| n['features'].empty? or n['features'][prediction_feature.id.to_s] == [nil] } - if neighbors.empty? - prediction.merge!({:value => nil,:confidence => nil,:warning => "Could not find similar compounds with experimental data in the training dataset."}) - else - prediction.merge!(Algorithm.run(prediction_algorithm, compound, {:neighbors => neighbors,:training_dataset_id=> training_dataset_id,:prediction_feature_id => prediction_feature.id})) - end - predictions << prediction - end + predictions = compounds.collect{|c| predict_compound c} # serialize result case object.class.to_s @@ -105,7 +101,8 @@ module OpenTox return predictions when "OpenTox::Dataset" # prepare prediction dataset - measurement_feature = prediction_feature + measurement_feature = Feature.find prediction_feature_id + prediction_feature = OpenTox::NumericFeature.find_or_create_by( "name" => measurement_feature.name + " (Prediction)" ) prediction_dataset = LazarPrediction.new( :name => "Lazar prediction for #{prediction_feature.name}", @@ -114,11 +111,9 @@ module OpenTox ) confidence_feature = OpenTox::NumericFeature.find_or_create_by( "name" => "Model RMSE" ) - # TODO move into warnings field warning_feature = OpenTox::NominalFeature.find_or_create_by("name" => "Warnings") prediction_dataset.features = [ prediction_feature, confidence_feature, measurement_feature, warning_feature ] prediction_dataset.compounds = compounds - # TODO fix dataset measurements prediction_dataset.data_entries = predictions.collect{|p| [p[:value], p[:rmse] , p[:dataset_activities].to_s, p[:warning]]} prediction_dataset.save return prediction_dataset @@ -171,25 +166,6 @@ module OpenTox end end - class LazarFminerClassification < LazarClassification - field :feature_calculation_parameters, type: Hash - - def self.create training_dataset, fminer_params={} - model = super(training_dataset) - model.update "_type" => self.to_s # adjust class - model = self.find model.id # adjust class - model.neighbor_algorithm = "fminer_neighbors" - model.neighbor_algorithm_parameters = { - :feature_calculation_algorithm => "OpenTox::Algorithm::Descriptor.smarts_match", - :feature_dataset_id => Algorithm::Fminer.bbrc(training_dataset,fminer_params).id, - :min_sim => 0.3 - } - model.feature_calculation_parameters = fminer_params - model.save - model - end - end - class Prediction include OpenTox include Mongoid::Document @@ -238,7 +214,6 @@ module OpenTox training_dataset = Dataset.from_csv_file file model = nil if training_dataset.features.first.nominal? - #model = LazarFminerClassification.create training_dataset model = LazarClassification.create training_dataset elsif training_dataset.features.first.numeric? model = LazarRegression.create training_dataset -- cgit v1.2.3