diff options
Diffstat (limited to 'lib/model.rb')
-rw-r--r-- | lib/model.rb | 391 |
1 files changed, 283 insertions, 108 deletions
diff --git a/lib/model.rb b/lib/model.rb index e36b538..74408d8 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -1,143 +1,318 @@ module OpenTox - module Model - class Generic + module Model - MODEL_ATTRIBS = [:uri, :title, :creator, :date, :format, :predictedVariables, :independentVariables, :dependentVariables, :trainingDataset, :algorithm] - MODEL_ATTRIBS.each{ |a| attr_accessor(a) } + include OpenTox - def self.find(uri) - owl = OpenTox::Owl.from_uri(uri, "Model") - return self.new(owl) - end - - def self.to_rdf(model) - owl = OpenTox::Owl.create 'Model', model.uri - (MODEL_ATTRIBS - [:uri]).each do |a| - owl.set(a.to_s,model.send(a.to_s)) + # Run a model with parameters + # @param [Hash] params Parameters for OpenTox model + # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly + # @return [text/uri-list] Task or resource URI + def run( params, accept_header=nil, waiting_task=nil ) + unless accept_header + if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host) + accept_header = 'application/x-yaml' + else + accept_header = 'application/rdf+xml' end - owl.rdf end + LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s + RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s + end + + # Generic OpenTox model class for all API compliant services + class Generic + include Model - protected - def initialize(owl) - MODEL_ATTRIBS.each do |a| - self.send("#{a.to_s}=".to_sym, owl.get(a.to_s)) unless a==:uri - end - @uri = owl.uri - if ENV['RACK_ENV'] =~ /test|debug/ - begin - raise "uri invalid" unless Utils.is_uri?(@uri) - raise "no predicted variables" unless @predictedVariables and @predictedVariables.size>0 - rescue => ex - RestClientWrapper.raise_uri_error "invalid model: '"+ex.message+"'\n"+self.to_yaml+"\n",@uri.to_s + # Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error + # @param [String] uri Model URI + # @return [OpenTox::Model::Generic] Model instance + def self.find(uri,subjectid=nil) + return nil unless uri + model = Generic.new(uri) + model.load_metadata(subjectid) + raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0 + model + end + + # provides feature type, possible types are "regression" or "classification" + # @return [String] feature type, "unknown" if type could not be estimated + def feature_type(subjectid=nil) + return @feature_type if @feature_type + + # dynamically perform restcalls if necessary + load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri) + algorithm = OpenTox::Algorithm::Generic.find(@metadata[OT.algorithm], subjectid) + algorithm_title = algorithm ? algorithm.metadata[DC.title] : nil + algorithm_type = algorithm ? algorithm.metadata[OT.isA] : nil + dependent_variable = OpenTox::Feature.find( @metadata[OT.dependentVariables],subjectid ) + dependent_variable_type = dependent_variable ? dependent_variable.feature_type : nil + type_indicators = [dependent_variable_type, @metadata[OT.isA], @metadata[DC.title], + @uri, algorithm_type, algorithm_title] + type_indicators.each do |type| + case type + when /(?i)classification/ + @feature_type = "classification" + break + when /(?i)regression/ + @feature_type = "regression" end - LOGGER.warn "model has no dependent variable" unless @dependentVariables and @dependentVariables.size>0 - LOGGER.warn "model has no algorithm" unless @algorithm and @algorithm.size>0 - LOGGER.warn "model has no indenpendent variables" unless @independentVariables end + raise "unknown model "+type_indicators.inspect unless @feature_type + @feature_type end - end - - class PredictionModel < Generic - def self.build( algorithm_uri, algorithm_params ) - - LOGGER.debug "Build model, algorithm_uri:"+algorithm_uri.to_s+", algorithm_parms: "+algorithm_params.inspect.to_s - uri = OpenTox::RestClientWrapper.post(algorithm_uri,algorithm_params).to_s - LOGGER.debug "Build model done: "+uri.to_s - RestClientWrapper.raise_uri_error("Invalid build model result: '"+uri.to_s+"'", algorithm_uri, algorithm_params ) unless Utils.model_uri?(uri) - return PredictionModel.find(uri) - end - - def predict_dataset( dataset_uri ) - - LOGGER.debug "Predict dataset: "+dataset_uri.to_s+" with model "+@uri.to_s - uri = RestClientWrapper.post(@uri, {:accept => "text/uri-list", :dataset_uri=>dataset_uri}) - RestClientWrapper.raise_uri_error("Prediciton result no dataset uri: "+uri.to_s, @uri, {:dataset_uri=>dataset_uri} ) unless Utils.dataset_uri?(uri) - uri - end - - def classification? - #HACK replace with request to ontology server - if @title =~ /(?i)classification/ - return true - elsif @title =~ /(?i)regression/ - return false - elsif @uri =~/ntua/ and @title =~ /mlr/ - return false - elsif @uri =~/tu-muenchen/ and @title =~ /regression|M5P|GaussP/ - return false - elsif @uri =~/ambit2/ and @title =~ /pKa/ || @title =~ /Regression|Caco/ - return false - elsif @uri =~/majority/ - return (@uri =~ /class/) != nil + end + + # Lazy Structure Activity Relationship class + class Lazar + + include Model + include Algorithm + + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid + + def initialize(uri=nil) + + if uri + super uri else - raise "unknown model, uri:'"+@uri.to_s+"' title:'"+@title.to_s+"'" + super CONFIG[:services]["opentox-model"] end - end - end - - class Lazar < Generic - - attr_accessor :feature_dataset_uri, :effects, :activities, :p_values, :fingerprints, :features - - def initialize - @source = "http://github.com/helma/opentox-model" - @algorithm = File.join(@@config[:services]["opentox-algorithm"],"lazar") - #@independent_variables = File.join(@@config[:services]["opentox-algorithm"],"fminer#BBRC_representative") + + @metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar") + @features = [] @effects = {} @activities = {} @p_values = {} @fingerprints = {} + + @feature_calculation_algorithm = "Substructure.match" + @similarity_algorithm = "Similarity.tanimoto" + @prediction_algorithm = "Neighbors.weighted_majority_vote" + + @min_sim = 0.3 + end - def save - @features.uniq! - resource = RestClient::Resource.new(@@config[:services]["opentox-model"], :user => @@users[:users].keys[0], :password => @@users[:users].values[0]) - resource.post(self.to_yaml, :content_type => "application/x-yaml").chomp.to_s + # Get URIs of all lazar models + # @return [Array] List of lazar model URIs + def self.all(subjectid=nil) + RestClientWrapper.get(CONFIG[:services]["opentox-model"], :subjectid => subjectid).to_s.split("\n") end - def self.find_all - RestClientWrapper.get(@@config[:services]["opentox-model"]).chomp.split("\n") + # Find a lazar model + # @param [String] uri Model URI + # @return [OpenTox::Model::Lazar] lazar model + def self.find(uri, subjectid=nil) + YAML.load RestClientWrapper.get(uri,{:accept => 'application/x-yaml', :subjectid => subjectid}) end - def self.predict(compound_uri,model_uri) - #RestClientWrapper.post(model_uri,{:compound_uri => compound_uri, :accept => 'application/x-yaml'}) - `curl -X POST -d 'compound_uri=#{compound_uri}' -H 'Accept:application/x-yaml' #{model_uri}` + # Create a new lazar model + # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar) + # @return [OpenTox::Model::Lazar] lazar model + def self.create(params) + lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar") + model_uri = lazar_algorithm.run(params) + OpenTox::Model::Lazar.find(model_uri, params[:subjectid]) end - end - - class PropertyLazar < Generic - - attr_accessor :feature_dataset_uri, :properties, :features, :activities#, :effects, :p_values - - def initialize - @source = "http://github.com/helma/opentox-model" - @algorithm = File.join(@@config[:services]["opentox-algorithm"],"property_lazar") - #@independent_variables = File.join(@@config[:services]["opentox-algorithm"],"fminer#BBRC_representative") - @features = [] - #@effects = {} - @activities = {} - #@p_values = {} - @properties = {} + + # Get a parameter value + # @param [String] param Parameter name + # @return [String] Parameter value + def parameter(param) + @metadata[OT.parameters].collect{|p| p[OT.paramValue] if p[DC.title] == param}.compact.first end - def save - @features.uniq! - resource = RestClient::Resource.new(@@config[:services]["opentox-model"], :user => @@users[:users].keys[0], :password => @@users[:users].values[0]) - resource.post(self.to_yaml, :content_type => "application/x-yaml").chomp.to_s + # Predict a dataset + # @param [String] dataset_uri Dataset URI + # @param [optional,subjectid] + # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly + # @return [OpenTox::Dataset] Dataset with predictions + def predict_dataset(dataset_uri, subjectid=nil, waiting_task=nil) + @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid) + @prediction_dataset.add_metadata({ + OT.hasSource => @uri, + DC.creator => @uri, + DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), + OT.parameters => [{DC.title => "dataset_uri", OT.paramValue => dataset_uri}] + }) + d = Dataset.new(dataset_uri,subjectid) + d.load_compounds(subjectid) + count = 0 + d.compounds.each do |compound_uri| + begin + predict(compound_uri,false,subjectid) + count += 1 + waiting_task.progress( count/d.compounds.size.to_f*100.0 ) if waiting_task + rescue => ex + LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message + end + end + @prediction_dataset.save(subjectid) + @prediction_dataset end - def self.find_all - RestClientWrapper.get(@@config[:services]["opentox-model"]).chomp.split("\n") + # Predict a compound + # @param [String] compound_uri Compound URI + # @param [optinal,Boolean] verbose Verbose prediction (output includes neighbors and features) + # @return [OpenTox::Dataset] Dataset with prediction + def predict(compound_uri,verbose=false,subjectid=nil) + + @compound = Compound.new compound_uri + features = {} + + unless @prediction_dataset + #@prediction_dataset = cached_prediction + #return @prediction_dataset if cached_prediction + @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid) + @prediction_dataset.add_metadata( { + OT.hasSource => @uri, + DC.creator => @uri, + # TODO: fix dependentVariable + DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), + OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] + } ) + end + + return @prediction_dataset if database_activity(subjectid) + + neighbors + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + + prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) + # TODO: fix dependentVariable + @prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri + + if @neighbors.size == 0 + @prediction_dataset.add_feature(prediction_feature_uri, { + OT.isA => OT.MeasuredFeature, + OT.hasSource => @uri, + DC.creator => @uri, + DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), + OT.error => "No similar compounds in training dataset.", + OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] + }) + @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction] + + else + @prediction_dataset.add_feature(prediction_feature_uri, { + OT.isA => OT.ModelPrediction, + OT.hasSource => @uri, + DC.creator => @uri, + DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), + OT.prediction => prediction[:prediction], + OT.confidence => prediction[:confidence], + OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] + }) + @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction] + + if verbose + if @feature_calculation_algorithm == "Substructure.match" + f = 0 + @compound_features.each do |feature| + feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) + features[feature] = feature_uri + @prediction_dataset.add_feature(feature_uri, { + OT.isA => OT.Substructure, + OT.smarts => feature, + OT.pValue => @p_values[feature], + OT.effect => @effects[feature] + }) + @prediction_dataset.add @compound.uri, feature_uri, true + f+=1 + end + else + @compound_features.each do |feature| + features[feature] = feature + @prediction_dataset.add @compound.uri, feature, true + end + end + n = 0 + @neighbors.each do |neighbor| + neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s ) + @prediction_dataset.add_feature(neighbor_uri, { + OT.compound => neighbor[:compound], + OT.similarity => neighbor[:similarity], + OT.measuredActivity => neighbor[:activity], + OT.isA => OT.Neighbor + }) + @prediction_dataset.add @compound.uri, neighbor_uri, true + f = 0 unless f + neighbor[:features].each do |feature| + if @feature_calculation_algorithm == "Substructure.match" + feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature] + else + feature_uri = feature + end + @prediction_dataset.add neighbor[:compound], feature_uri, true + unless features.has_key? feature + features[feature] = feature_uri + @prediction_dataset.add_feature(feature_uri, { + OT.isA => OT.Substructure, + OT.smarts => feature, + OT.pValue => @p_values[feature], + OT.effect => @effects[feature] + }) + f+=1 + end + end + n+=1 + end + # what happens with dataset predictions? + end + end + + @prediction_dataset.save(subjectid) + @prediction_dataset + end + + # Find neighbors and store them as object variable + def neighbors + + @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm + + @neighbors = [] + @fingerprints.each do |training_compound,training_features| + sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") + if sim > @min_sim + @activities[training_compound].each do |act| + @neighbors << { + :compound => training_compound, + :similarity => sim, + :features => training_features, + :activity => act + } + end + end + end + + end + + # Find database activities and store them in @prediction_dataset + # @return [Boolean] true if compound has databasse activities, false if not + def database_activity(subjectid) + if @activities[@compound.uri] + @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act } + @prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset]) + @prediction_dataset.save(subjectid) + true + else + false + end + end + + # Save model at model service + def save(subjectid) + self.uri = RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => subjectid}) end - def self.predict(compound_uri,model_uri) - #RestClientWrapper.post(model_uri,{:compound_uri => compound_uri, :accept => 'application/x-yaml'}) - `curl -X POST -d 'compound_uri=#{compound_uri}' -H 'Accept:application/x-yaml' #{model_uri}` + # Delete model at model service + def delete(subjectid) + RestClientWrapper.delete(@uri, :subjectid => subjectid) unless @uri == CONFIG[:services]["opentox-model"] end + end end end |