From 354aaa649e9eeed5d81793e09d9714b45063c147 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 8 Feb 2012 13:14:11 +0100 Subject: toxbank-investigation compatible version --- lib/model.rb | 480 +---------------------------------------------------------- 1 file changed, 3 insertions(+), 477 deletions(-) (limited to 'lib/model.rb') diff --git a/lib/model.rb b/lib/model.rb index a806b74..95aa9ff 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -1,488 +1,14 @@ module OpenTox - module Model - - include OpenTox - - # Find a lazar model - # @param [String] uri Model URI - # @return [OpenTox::Model::Lazar] lazar model - def self.find(uri, subjectid=nil) - if CONFIG[:yaml_hosts].include?(URI.parse(uri).host) - YAML.load RestClientWrapper.get(uri,{:accept => 'application/x-yaml', :subjectid => subjectid}) - else - parser = Parser::Owl::Feature.new(uri, @subjectid) - @metadata = parser.load_uri.metadata - end - end - - # Get URIs of all models - # @return [Array] List of lazar model URIs - def self.all(subjectid=nil) - RestClientWrapper.get(CONFIG[:services]["opentox-model"], :subjectid => subjectid).to_s.split("\n") - end + class Model # Run a model with parameters # @param [Hash] params Parameters for OpenTox model # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly # @return [text/uri-list] Task or resource URI - def run( params, accept_header=nil, waiting_task=nil ) - unless accept_header - if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host) - accept_header = 'application/x-yaml' - else - accept_header = 'application/rdf+xml' - end - end - LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s - RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s - end - - # Generic OpenTox model class for all API compliant services - class Generic - include Model - - # Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error - # @param [String] uri Model URI - # @return [OpenTox::Model::Generic] Model instance - def self.find(uri,subjectid=nil) - return nil unless uri - model = Generic.new(uri,subjectid) - model.load_metadata - raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0 - model - end - - # provides feature type, possible types are "regression" or "classification" - # @return [String] feature type, "unknown" if type could not be estimated - def feature_type - unless @feature_type - load_predicted_variables unless @predicted_variable - @feature_type = OpenTox::Feature.find( @predicted_variable, @subjectid ).feature_type - end - @feature_type - end - - def predicted_variable - load_predicted_variables unless @predicted_variable - @predicted_variable - end - - def predicted_confidence - load_predicted_variables unless @predicted_confidence - @predicted_confidence - end - - private - def load_predicted_variables - load_metadata if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri) - if @metadata[OT.predictedVariables] - predictedVariables = @metadata[OT.predictedVariables] - if predictedVariables.is_a?(Array) - if (predictedVariables.size==1) - @predicted_variable = predictedVariables[0] - elsif (predictedVariables.size==2) - # PENDING identify confidence - conf_index = -1 - predictedVariables.size.times do |i| - f = OpenTox::Feature.find(predictedVariables[i]) - conf_index = i if f.metadata[DC.title]=~/(?i)confidence/ - end - raise "could not estimate predicted variable from model: '"+uri.to_s+ - "', number of predicted-variables==2, but no confidence found" if conf_index==-1 - @predicted_variable = predictedVariables[1-conf_index] - @predicted_confidence = predictedVariables[conf_index] - else - raise "could not estimate predicted variable from model: '"+uri.to_s+"', number of predicted-variables > 2" - end - else - raise "could not estimate predicted variable from model: '"+uri.to_s+"', predicted-variables is no array" - end - end - raise "could not estimate predicted variable from model: '"+uri.to_s+"'" unless @predicted_variable - end + def run params=nil + post params, {:accept => 'text/uri-list'} end - # Lazy Structure Activity Relationship class - class Lazar < Generic - - #include Model - include Algorithm - - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map - - def initialize(uri=nil,subjectid=nil) - - if uri - super uri - else - super CONFIG[:services]["opentox-model"] - end - - @metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar") - - @features = [] - @effects = {} - @activities = {} - @p_values = {} - @fingerprints = {} - @value_map = {} - - @feature_calculation_algorithm = "Substructure.match" - @similarity_algorithm = "Similarity.tanimoto" - @prediction_algorithm = "Neighbors.weighted_majority_vote" - - @min_sim = 0.3 - @prop_kernel = false - - end - - # Find a lazar model via URI, and loads metadata, could raise NotFound/NotAuthorized error - # @param [String] uri Model URI - # @return [OpenTox::Model::Generic] Model instance - def self.find(uri,subjectid=nil) - return nil unless uri - model = Lazar.new(uri,subjectid) - model.load_metadata - raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0 - model - end - - # Create a new lazar model - # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar) - # @return [OpenTox::Model::Lazar] lazar model - def self.create(params) - subjectid = params[:subjectid] - lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar") - model_uri = lazar_algorithm.run(params) - OpenTox::Model::Lazar.find(model_uri, subjectid) - end - -=begin - # Get a parameter value - # @param [String] param Parameter name - # @return [String] Parameter value - def parameter(param) - @metadata[OT.parameters].collect{|p| p[OT.paramValue] if p[DC.title] == param}.compact.first - end - - # Predict a dataset - # @param [String] dataset_uri Dataset URI - # @param [optional,subjectid] - # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly - # @return [OpenTox::Dataset] Dataset with predictions - def predict_dataset(dataset_uri, subjectid=nil, waiting_task=nil) - @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid) - @prediction_dataset.add_metadata({ - OT.hasSource => @uri, - DC.creator => @uri, - DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), - OT.parameters => [{DC.title => "dataset_uri", OT.paramValue => dataset_uri}] - }) - d = Dataset.new(dataset_uri,subjectid) - d.load_compounds(subjectid) - count = 0 - d.compounds.each do |compound_uri| - begin - predict(compound_uri,false,subjectid) - count += 1 - waiting_task.progress( count/d.compounds.size.to_f*100.0 ) if waiting_task - rescue => ex - LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message - end - end - @prediction_dataset.save(subjectid) - @prediction_dataset - end - - # Predict a compound - # @param [String] compound_uri Compound URI - # @param [optinal,Boolean] verbose Verbose prediction (output includes neighbors and features) - # @return [OpenTox::Dataset] Dataset with prediction - def predict(compound_uri,verbose=false,subjectid=nil) - - @compound = Compound.new compound_uri - features = {} - - unless @prediction_dataset - @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid) - @prediction_dataset.add_metadata( { - OT.hasSource => @uri, - DC.creator => @uri, - DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), - OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] - } ) - end - - return @prediction_dataset if database_activity(subjectid) - - load_metadata(subjectid) - case OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type - when "classification" - # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar - l = Array.new # larger - s = Array.new # smaller fraction - - raise "no fingerprints in model" if @fingerprints.size==0 - - @fingerprints.each do |training_compound,training_features| - @activities[training_compound].each do |act| - case act.to_s - when "false" - l << training_compound - when "true" - s << training_compound - else - LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached." - end - end - end - if s.size > l.size then - l,s = s,l # happy swapping - LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}." - end - # determine ratio - modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest - LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}." - - # AM: Balanced predictions - addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round - slack = (addon!=0 ? modulo[1].divmod(addon)[1] : 0) # what remains for the last round - position = 0 - predictions = Array.new - - prediction_best=nil - neighbors_best=nil - - begin - for i in 1..modulo[0] do - (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction - LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." - neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - if @prop_kernel && @prediction_algorithm.include?("svm") - props = get_props - else - props = nil - end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") - if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs - prediction_best=prediction - neighbors_best=@neighbors - end - position = position + lr_size - end - rescue Exception => e - LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message - end - - prediction=prediction_best - @neighbors=neighbors_best - ### END AM balanced predictions - - else # AM: no balancing - LOGGER.info "LAZAR: Unbalanced." - neighbors - if @prop_kernel && @prediction_algorithm.include?("svm") - props = get_props - else - props = nil - end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") - end - - value_feature_uri = File.join( @uri, "predicted", "value") - confidence_feature_uri = File.join( @uri, "predicted", "confidence") - - #prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]} - #prediction_feature_uris[value_feature_uri] = nil if @neighbors.size == 0 or prediction[:prediction].nil? - - @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] - @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] - - if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification" - @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]] - else - @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction] - end - @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence] - #prediction_feature_uris.each do |prediction_feature_uri,value| - #@prediction_dataset.add @compound.uri, prediction_feature_uri, @value_map[value] - #end - - if verbose - if @feature_calculation_algorithm == "Substructure.match" - f = 0 - @compound_features.each do |feature| - feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) - features[feature] = feature_uri - @prediction_dataset.add_feature(feature_uri, { - RDF.type => [OT.Substructure], - OT.smarts => feature, - OT.pValue => @p_values[feature], - OT.effect => @effects[feature] - }) - @prediction_dataset.add @compound.uri, feature_uri, true - f+=1 - end - else - @compound_features.each do |feature| - features[feature] = feature - @prediction_dataset.add @compound.uri, feature, true - end - end - n = 0 - @neighbors.each do |neighbor| - neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s ) - @prediction_dataset.add_feature(neighbor_uri, { - OT.compound => neighbor[:compound], - OT.similarity => neighbor[:similarity], - OT.measuredActivity => neighbor[:activity], - RDF.type => [OT.Neighbor] - }) - @prediction_dataset.add @compound.uri, neighbor_uri, true - f = 0 unless f - neighbor[:features].each do |feature| - if @feature_calculation_algorithm == "Substructure.match" - feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature] - else - feature_uri = feature - end - @prediction_dataset.add neighbor[:compound], feature_uri, true - unless features.has_key? feature - features[feature] = feature_uri - @prediction_dataset.add_feature(feature_uri, { - RDF.type => [OT.Substructure], - OT.smarts => feature, - OT.pValue => @p_values[feature], - OT.effect => @effects[feature] - }) - f+=1 - end - end - n+=1 - end - end - #end - - @prediction_dataset.save(subjectid) - @prediction_dataset - end - - # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) - # Same for the vector describing the query compound - def get_props - matrix = Array.new - begin - @neighbors.each do |n| - n = n[:compound] - row = [] - @features.each do |f| - if ! @fingerprints[n].nil? - row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f]) - else - row << 0.0 - end - end - matrix << row - end - row = [] - @features.each do |f| - row << (@compound.match([f]).size == 0 ? 0.0 : @p_values[f]) - end - rescue Exception => e - LOGGER.debug "get_props failed with '" + $! + "'" - end - [ matrix, row ] - end - - # Find neighbors and store them as object variable, access only a subset of compounds for that. - def neighbors_balanced(s, l, start, offset) - @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm - @neighbors = [] - [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset - training_features = @fingerprints[training_compound] - add_neighbor training_features, training_compound - end - - end - - # Find neighbors and store them as object variable, access all compounds for that. - def neighbors - @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm - @neighbors = [] - @fingerprints.each do |training_compound,training_features| # AM: access all compounds - add_neighbor training_features, training_compound - end - end - - # Adds a neighbor to @neighbors if it passes the similarity threshold. - def add_neighbor(training_features, training_compound) - sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") - if sim > @min_sim - @activities[training_compound].each do |act| - @neighbors << { - :compound => training_compound, - :similarity => sim, - :features => training_features, - :activity => act - } - end - end - end - - # Find database activities and store them in @prediction_dataset - # @return [Boolean] true if compound has databasse activities, false if not - def database_activity(subjectid) - if @activities[@compound.uri] - @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act } - @prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset]) - @prediction_dataset.save(subjectid) - true - else - false - end - end - - def prediction_features - [prediction_value_feature,prediction_confidence_feature] - end - - def prediction_value_feature - dependent_uri = @metadata[OT.dependentVariables].first - feature = OpenTox::Feature.new File.join( @uri, "predicted", "value") - feature.add_metadata( { - RDF.type => [OT.ModelPrediction], - OT.hasSource => @uri, - DC.creator => @uri, - DC.title => URI.decode(File.basename( dependent_uri )), - OWL.sameAs => dependent_uri - }) - feature - end - - def prediction_confidence_feature - dependent_uri = @metadata[OT.dependentVariables].first - feature = OpenTox::Feature.new File.join( @uri, "predicted", "confidence") - feature.add_metadata( { - RDF.type => [OT.ModelPrediction], - OT.hasSource => @uri, - DC.creator => @uri, - DC.title => "#{URI.decode(File.basename( dependent_uri ))} confidence" - }) - feature - end - - # Save model at model service - def save(subjectid) - self.uri = RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => subjectid}) - end - - # Delete model at model service - def delete(subjectid) - RestClientWrapper.delete(@uri, :subjectid => subjectid) unless @uri == CONFIG[:services]["opentox-model"] - end - -=end - end end end -- cgit v1.2.3