summaryrefslogtreecommitdiff
path: root/lib/model.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/model.rb')
-rw-r--r--lib/model.rb391
1 files changed, 283 insertions, 108 deletions
diff --git a/lib/model.rb b/lib/model.rb
index e36b538..74408d8 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -1,143 +1,318 @@
module OpenTox
- module Model
- class Generic
+ module Model
- MODEL_ATTRIBS = [:uri, :title, :creator, :date, :format, :predictedVariables, :independentVariables, :dependentVariables, :trainingDataset, :algorithm]
- MODEL_ATTRIBS.each{ |a| attr_accessor(a) }
+ include OpenTox
- def self.find(uri)
- owl = OpenTox::Owl.from_uri(uri, "Model")
- return self.new(owl)
- end
-
- def self.to_rdf(model)
- owl = OpenTox::Owl.create 'Model', model.uri
- (MODEL_ATTRIBS - [:uri]).each do |a|
- owl.set(a.to_s,model.send(a.to_s))
+ # Run a model with parameters
+ # @param [Hash] params Parameters for OpenTox model
+ # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
+ # @return [text/uri-list] Task or resource URI
+ def run( params, accept_header=nil, waiting_task=nil )
+ unless accept_header
+ if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)
+ accept_header = 'application/x-yaml'
+ else
+ accept_header = 'application/rdf+xml'
end
- owl.rdf
end
+ LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s
+ RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s
+ end
+
+ # Generic OpenTox model class for all API compliant services
+ class Generic
+ include Model
- protected
- def initialize(owl)
- MODEL_ATTRIBS.each do |a|
- self.send("#{a.to_s}=".to_sym, owl.get(a.to_s)) unless a==:uri
- end
- @uri = owl.uri
- if ENV['RACK_ENV'] =~ /test|debug/
- begin
- raise "uri invalid" unless Utils.is_uri?(@uri)
- raise "no predicted variables" unless @predictedVariables and @predictedVariables.size>0
- rescue => ex
- RestClientWrapper.raise_uri_error "invalid model: '"+ex.message+"'\n"+self.to_yaml+"\n",@uri.to_s
+ # Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error
+ # @param [String] uri Model URI
+ # @return [OpenTox::Model::Generic] Model instance
+ def self.find(uri,subjectid=nil)
+ return nil unless uri
+ model = Generic.new(uri)
+ model.load_metadata(subjectid)
+ raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0
+ model
+ end
+
+ # provides feature type, possible types are "regression" or "classification"
+ # @return [String] feature type, "unknown" if type could not be estimated
+ def feature_type(subjectid=nil)
+ return @feature_type if @feature_type
+
+ # dynamically perform restcalls if necessary
+ load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
+ algorithm = OpenTox::Algorithm::Generic.find(@metadata[OT.algorithm], subjectid)
+ algorithm_title = algorithm ? algorithm.metadata[DC.title] : nil
+ algorithm_type = algorithm ? algorithm.metadata[OT.isA] : nil
+ dependent_variable = OpenTox::Feature.find( @metadata[OT.dependentVariables],subjectid )
+ dependent_variable_type = dependent_variable ? dependent_variable.feature_type : nil
+ type_indicators = [dependent_variable_type, @metadata[OT.isA], @metadata[DC.title],
+ @uri, algorithm_type, algorithm_title]
+ type_indicators.each do |type|
+ case type
+ when /(?i)classification/
+ @feature_type = "classification"
+ break
+ when /(?i)regression/
+ @feature_type = "regression"
end
- LOGGER.warn "model has no dependent variable" unless @dependentVariables and @dependentVariables.size>0
- LOGGER.warn "model has no algorithm" unless @algorithm and @algorithm.size>0
- LOGGER.warn "model has no indenpendent variables" unless @independentVariables
end
+ raise "unknown model "+type_indicators.inspect unless @feature_type
+ @feature_type
end
- end
-
- class PredictionModel < Generic
- def self.build( algorithm_uri, algorithm_params )
-
- LOGGER.debug "Build model, algorithm_uri:"+algorithm_uri.to_s+", algorithm_parms: "+algorithm_params.inspect.to_s
- uri = OpenTox::RestClientWrapper.post(algorithm_uri,algorithm_params).to_s
- LOGGER.debug "Build model done: "+uri.to_s
- RestClientWrapper.raise_uri_error("Invalid build model result: '"+uri.to_s+"'", algorithm_uri, algorithm_params ) unless Utils.model_uri?(uri)
- return PredictionModel.find(uri)
- end
-
- def predict_dataset( dataset_uri )
-
- LOGGER.debug "Predict dataset: "+dataset_uri.to_s+" with model "+@uri.to_s
- uri = RestClientWrapper.post(@uri, {:accept => "text/uri-list", :dataset_uri=>dataset_uri})
- RestClientWrapper.raise_uri_error("Prediciton result no dataset uri: "+uri.to_s, @uri, {:dataset_uri=>dataset_uri} ) unless Utils.dataset_uri?(uri)
- uri
- end
-
- def classification?
- #HACK replace with request to ontology server
- if @title =~ /(?i)classification/
- return true
- elsif @title =~ /(?i)regression/
- return false
- elsif @uri =~/ntua/ and @title =~ /mlr/
- return false
- elsif @uri =~/tu-muenchen/ and @title =~ /regression|M5P|GaussP/
- return false
- elsif @uri =~/ambit2/ and @title =~ /pKa/ || @title =~ /Regression|Caco/
- return false
- elsif @uri =~/majority/
- return (@uri =~ /class/) != nil
+ end
+
+ # Lazy Structure Activity Relationship class
+ class Lazar
+
+ include Model
+ include Algorithm
+
+ attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid
+
+ def initialize(uri=nil)
+
+ if uri
+ super uri
else
- raise "unknown model, uri:'"+@uri.to_s+"' title:'"+@title.to_s+"'"
+ super CONFIG[:services]["opentox-model"]
end
- end
- end
-
- class Lazar < Generic
-
- attr_accessor :feature_dataset_uri, :effects, :activities, :p_values, :fingerprints, :features
-
- def initialize
- @source = "http://github.com/helma/opentox-model"
- @algorithm = File.join(@@config[:services]["opentox-algorithm"],"lazar")
- #@independent_variables = File.join(@@config[:services]["opentox-algorithm"],"fminer#BBRC_representative")
+
+ @metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
+
@features = []
@effects = {}
@activities = {}
@p_values = {}
@fingerprints = {}
+
+ @feature_calculation_algorithm = "Substructure.match"
+ @similarity_algorithm = "Similarity.tanimoto"
+ @prediction_algorithm = "Neighbors.weighted_majority_vote"
+
+ @min_sim = 0.3
+
end
- def save
- @features.uniq!
- resource = RestClient::Resource.new(@@config[:services]["opentox-model"], :user => @@users[:users].keys[0], :password => @@users[:users].values[0])
- resource.post(self.to_yaml, :content_type => "application/x-yaml").chomp.to_s
+ # Get URIs of all lazar models
+ # @return [Array] List of lazar model URIs
+ def self.all(subjectid=nil)
+ RestClientWrapper.get(CONFIG[:services]["opentox-model"], :subjectid => subjectid).to_s.split("\n")
end
- def self.find_all
- RestClientWrapper.get(@@config[:services]["opentox-model"]).chomp.split("\n")
+ # Find a lazar model
+ # @param [String] uri Model URI
+ # @return [OpenTox::Model::Lazar] lazar model
+ def self.find(uri, subjectid=nil)
+ YAML.load RestClientWrapper.get(uri,{:accept => 'application/x-yaml', :subjectid => subjectid})
end
- def self.predict(compound_uri,model_uri)
- #RestClientWrapper.post(model_uri,{:compound_uri => compound_uri, :accept => 'application/x-yaml'})
- `curl -X POST -d 'compound_uri=#{compound_uri}' -H 'Accept:application/x-yaml' #{model_uri}`
+ # Create a new lazar model
+ # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar)
+ # @return [OpenTox::Model::Lazar] lazar model
+ def self.create(params)
+ lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar")
+ model_uri = lazar_algorithm.run(params)
+ OpenTox::Model::Lazar.find(model_uri, params[:subjectid])
end
- end
-
- class PropertyLazar < Generic
-
- attr_accessor :feature_dataset_uri, :properties, :features, :activities#, :effects, :p_values
-
- def initialize
- @source = "http://github.com/helma/opentox-model"
- @algorithm = File.join(@@config[:services]["opentox-algorithm"],"property_lazar")
- #@independent_variables = File.join(@@config[:services]["opentox-algorithm"],"fminer#BBRC_representative")
- @features = []
- #@effects = {}
- @activities = {}
- #@p_values = {}
- @properties = {}
+
+ # Get a parameter value
+ # @param [String] param Parameter name
+ # @return [String] Parameter value
+ def parameter(param)
+ @metadata[OT.parameters].collect{|p| p[OT.paramValue] if p[DC.title] == param}.compact.first
end
- def save
- @features.uniq!
- resource = RestClient::Resource.new(@@config[:services]["opentox-model"], :user => @@users[:users].keys[0], :password => @@users[:users].values[0])
- resource.post(self.to_yaml, :content_type => "application/x-yaml").chomp.to_s
+ # Predict a dataset
+ # @param [String] dataset_uri Dataset URI
+ # @param [optional,subjectid]
+ # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
+ # @return [OpenTox::Dataset] Dataset with predictions
+ def predict_dataset(dataset_uri, subjectid=nil, waiting_task=nil)
+ @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
+ @prediction_dataset.add_metadata({
+ OT.hasSource => @uri,
+ DC.creator => @uri,
+ DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
+ OT.parameters => [{DC.title => "dataset_uri", OT.paramValue => dataset_uri}]
+ })
+ d = Dataset.new(dataset_uri,subjectid)
+ d.load_compounds(subjectid)
+ count = 0
+ d.compounds.each do |compound_uri|
+ begin
+ predict(compound_uri,false,subjectid)
+ count += 1
+ waiting_task.progress( count/d.compounds.size.to_f*100.0 ) if waiting_task
+ rescue => ex
+ LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message
+ end
+ end
+ @prediction_dataset.save(subjectid)
+ @prediction_dataset
end
- def self.find_all
- RestClientWrapper.get(@@config[:services]["opentox-model"]).chomp.split("\n")
+ # Predict a compound
+ # @param [String] compound_uri Compound URI
+ # @param [optinal,Boolean] verbose Verbose prediction (output includes neighbors and features)
+ # @return [OpenTox::Dataset] Dataset with prediction
+ def predict(compound_uri,verbose=false,subjectid=nil)
+
+ @compound = Compound.new compound_uri
+ features = {}
+
+ unless @prediction_dataset
+ #@prediction_dataset = cached_prediction
+ #return @prediction_dataset if cached_prediction
+ @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
+ @prediction_dataset.add_metadata( {
+ OT.hasSource => @uri,
+ DC.creator => @uri,
+ # TODO: fix dependentVariable
+ DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
+ OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
+ } )
+ end
+
+ return @prediction_dataset if database_activity(subjectid)
+
+ neighbors
+ prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
+
+ prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s)
+ # TODO: fix dependentVariable
+ @prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri
+
+ if @neighbors.size == 0
+ @prediction_dataset.add_feature(prediction_feature_uri, {
+ OT.isA => OT.MeasuredFeature,
+ OT.hasSource => @uri,
+ DC.creator => @uri,
+ DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
+ OT.error => "No similar compounds in training dataset.",
+ OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
+ })
+ @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
+
+ else
+ @prediction_dataset.add_feature(prediction_feature_uri, {
+ OT.isA => OT.ModelPrediction,
+ OT.hasSource => @uri,
+ DC.creator => @uri,
+ DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
+ OT.prediction => prediction[:prediction],
+ OT.confidence => prediction[:confidence],
+ OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
+ })
+ @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
+
+ if verbose
+ if @feature_calculation_algorithm == "Substructure.match"
+ f = 0
+ @compound_features.each do |feature|
+ feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s)
+ features[feature] = feature_uri
+ @prediction_dataset.add_feature(feature_uri, {
+ OT.isA => OT.Substructure,
+ OT.smarts => feature,
+ OT.pValue => @p_values[feature],
+ OT.effect => @effects[feature]
+ })
+ @prediction_dataset.add @compound.uri, feature_uri, true
+ f+=1
+ end
+ else
+ @compound_features.each do |feature|
+ features[feature] = feature
+ @prediction_dataset.add @compound.uri, feature, true
+ end
+ end
+ n = 0
+ @neighbors.each do |neighbor|
+ neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s )
+ @prediction_dataset.add_feature(neighbor_uri, {
+ OT.compound => neighbor[:compound],
+ OT.similarity => neighbor[:similarity],
+ OT.measuredActivity => neighbor[:activity],
+ OT.isA => OT.Neighbor
+ })
+ @prediction_dataset.add @compound.uri, neighbor_uri, true
+ f = 0 unless f
+ neighbor[:features].each do |feature|
+ if @feature_calculation_algorithm == "Substructure.match"
+ feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature]
+ else
+ feature_uri = feature
+ end
+ @prediction_dataset.add neighbor[:compound], feature_uri, true
+ unless features.has_key? feature
+ features[feature] = feature_uri
+ @prediction_dataset.add_feature(feature_uri, {
+ OT.isA => OT.Substructure,
+ OT.smarts => feature,
+ OT.pValue => @p_values[feature],
+ OT.effect => @effects[feature]
+ })
+ f+=1
+ end
+ end
+ n+=1
+ end
+ # what happens with dataset predictions?
+ end
+ end
+
+ @prediction_dataset.save(subjectid)
+ @prediction_dataset
+ end
+
+ # Find neighbors and store them as object variable
+ def neighbors
+
+ @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
+
+ @neighbors = []
+ @fingerprints.each do |training_compound,training_features|
+ sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
+ if sim > @min_sim
+ @activities[training_compound].each do |act|
+ @neighbors << {
+ :compound => training_compound,
+ :similarity => sim,
+ :features => training_features,
+ :activity => act
+ }
+ end
+ end
+ end
+
+ end
+
+ # Find database activities and store them in @prediction_dataset
+ # @return [Boolean] true if compound has databasse activities, false if not
+ def database_activity(subjectid)
+ if @activities[@compound.uri]
+ @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act }
+ @prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset])
+ @prediction_dataset.save(subjectid)
+ true
+ else
+ false
+ end
+ end
+
+ # Save model at model service
+ def save(subjectid)
+ self.uri = RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => subjectid})
end
- def self.predict(compound_uri,model_uri)
- #RestClientWrapper.post(model_uri,{:compound_uri => compound_uri, :accept => 'application/x-yaml'})
- `curl -X POST -d 'compound_uri=#{compound_uri}' -H 'Accept:application/x-yaml' #{model_uri}`
+ # Delete model at model service
+ def delete(subjectid)
+ RestClientWrapper.delete(@uri, :subjectid => subjectid) unless @uri == CONFIG[:services]["opentox-model"]
end
+
end
end
end