summaryrefslogtreecommitdiff
path: root/lib/model.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/model.rb')
-rw-r--r--lib/model.rb480
1 files changed, 3 insertions, 477 deletions
diff --git a/lib/model.rb b/lib/model.rb
index a806b74..95aa9ff 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -1,488 +1,14 @@
module OpenTox
- module Model
-
- include OpenTox
-
- # Find a lazar model
- # @param [String] uri Model URI
- # @return [OpenTox::Model::Lazar] lazar model
- def self.find(uri, subjectid=nil)
- if CONFIG[:yaml_hosts].include?(URI.parse(uri).host)
- YAML.load RestClientWrapper.get(uri,{:accept => 'application/x-yaml', :subjectid => subjectid})
- else
- parser = Parser::Owl::Feature.new(uri, @subjectid)
- @metadata = parser.load_uri.metadata
- end
- end
-
- # Get URIs of all models
- # @return [Array] List of lazar model URIs
- def self.all(subjectid=nil)
- RestClientWrapper.get(CONFIG[:services]["opentox-model"], :subjectid => subjectid).to_s.split("\n")
- end
+ class Model
# Run a model with parameters
# @param [Hash] params Parameters for OpenTox model
# @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
# @return [text/uri-list] Task or resource URI
- def run( params, accept_header=nil, waiting_task=nil )
- unless accept_header
- if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)
- accept_header = 'application/x-yaml'
- else
- accept_header = 'application/rdf+xml'
- end
- end
- LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s
- RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s
- end
-
- # Generic OpenTox model class for all API compliant services
- class Generic
- include Model
-
- # Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error
- # @param [String] uri Model URI
- # @return [OpenTox::Model::Generic] Model instance
- def self.find(uri,subjectid=nil)
- return nil unless uri
- model = Generic.new(uri,subjectid)
- model.load_metadata
- raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0
- model
- end
-
- # provides feature type, possible types are "regression" or "classification"
- # @return [String] feature type, "unknown" if type could not be estimated
- def feature_type
- unless @feature_type
- load_predicted_variables unless @predicted_variable
- @feature_type = OpenTox::Feature.find( @predicted_variable, @subjectid ).feature_type
- end
- @feature_type
- end
-
- def predicted_variable
- load_predicted_variables unless @predicted_variable
- @predicted_variable
- end
-
- def predicted_confidence
- load_predicted_variables unless @predicted_confidence
- @predicted_confidence
- end
-
- private
- def load_predicted_variables
- load_metadata if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
- if @metadata[OT.predictedVariables]
- predictedVariables = @metadata[OT.predictedVariables]
- if predictedVariables.is_a?(Array)
- if (predictedVariables.size==1)
- @predicted_variable = predictedVariables[0]
- elsif (predictedVariables.size==2)
- # PENDING identify confidence
- conf_index = -1
- predictedVariables.size.times do |i|
- f = OpenTox::Feature.find(predictedVariables[i])
- conf_index = i if f.metadata[DC.title]=~/(?i)confidence/
- end
- raise "could not estimate predicted variable from model: '"+uri.to_s+
- "', number of predicted-variables==2, but no confidence found" if conf_index==-1
- @predicted_variable = predictedVariables[1-conf_index]
- @predicted_confidence = predictedVariables[conf_index]
- else
- raise "could not estimate predicted variable from model: '"+uri.to_s+"', number of predicted-variables > 2"
- end
- else
- raise "could not estimate predicted variable from model: '"+uri.to_s+"', predicted-variables is no array"
- end
- end
- raise "could not estimate predicted variable from model: '"+uri.to_s+"'" unless @predicted_variable
- end
+ def run params=nil
+ post params, {:accept => 'text/uri-list'}
end
- # Lazy Structure Activity Relationship class
- class Lazar < Generic
-
- #include Model
- include Algorithm
-
- attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map
-
- def initialize(uri=nil,subjectid=nil)
-
- if uri
- super uri
- else
- super CONFIG[:services]["opentox-model"]
- end
-
- @metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
-
- @features = []
- @effects = {}
- @activities = {}
- @p_values = {}
- @fingerprints = {}
- @value_map = {}
-
- @feature_calculation_algorithm = "Substructure.match"
- @similarity_algorithm = "Similarity.tanimoto"
- @prediction_algorithm = "Neighbors.weighted_majority_vote"
-
- @min_sim = 0.3
- @prop_kernel = false
-
- end
-
- # Find a lazar model via URI, and loads metadata, could raise NotFound/NotAuthorized error
- # @param [String] uri Model URI
- # @return [OpenTox::Model::Generic] Model instance
- def self.find(uri,subjectid=nil)
- return nil unless uri
- model = Lazar.new(uri,subjectid)
- model.load_metadata
- raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0
- model
- end
-
- # Create a new lazar model
- # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar)
- # @return [OpenTox::Model::Lazar] lazar model
- def self.create(params)
- subjectid = params[:subjectid]
- lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar")
- model_uri = lazar_algorithm.run(params)
- OpenTox::Model::Lazar.find(model_uri, subjectid)
- end
-
-=begin
- # Get a parameter value
- # @param [String] param Parameter name
- # @return [String] Parameter value
- def parameter(param)
- @metadata[OT.parameters].collect{|p| p[OT.paramValue] if p[DC.title] == param}.compact.first
- end
-
- # Predict a dataset
- # @param [String] dataset_uri Dataset URI
- # @param [optional,subjectid]
- # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
- # @return [OpenTox::Dataset] Dataset with predictions
- def predict_dataset(dataset_uri, subjectid=nil, waiting_task=nil)
- @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
- @prediction_dataset.add_metadata({
- OT.hasSource => @uri,
- DC.creator => @uri,
- DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
- OT.parameters => [{DC.title => "dataset_uri", OT.paramValue => dataset_uri}]
- })
- d = Dataset.new(dataset_uri,subjectid)
- d.load_compounds(subjectid)
- count = 0
- d.compounds.each do |compound_uri|
- begin
- predict(compound_uri,false,subjectid)
- count += 1
- waiting_task.progress( count/d.compounds.size.to_f*100.0 ) if waiting_task
- rescue => ex
- LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message
- end
- end
- @prediction_dataset.save(subjectid)
- @prediction_dataset
- end
-
- # Predict a compound
- # @param [String] compound_uri Compound URI
- # @param [optinal,Boolean] verbose Verbose prediction (output includes neighbors and features)
- # @return [OpenTox::Dataset] Dataset with prediction
- def predict(compound_uri,verbose=false,subjectid=nil)
-
- @compound = Compound.new compound_uri
- features = {}
-
- unless @prediction_dataset
- @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
- @prediction_dataset.add_metadata( {
- OT.hasSource => @uri,
- DC.creator => @uri,
- DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
- OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
- } )
- end
-
- return @prediction_dataset if database_activity(subjectid)
-
- load_metadata(subjectid)
- case OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type
- when "classification"
- # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar
- l = Array.new # larger
- s = Array.new # smaller fraction
-
- raise "no fingerprints in model" if @fingerprints.size==0
-
- @fingerprints.each do |training_compound,training_features|
- @activities[training_compound].each do |act|
- case act.to_s
- when "false"
- l << training_compound
- when "true"
- s << training_compound
- else
- LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached."
- end
- end
- end
- if s.size > l.size then
- l,s = s,l # happy swapping
- LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}."
- end
- # determine ratio
- modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest
- LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}."
-
- # AM: Balanced predictions
- addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round
- slack = (addon!=0 ? modulo[1].divmod(addon)[1] : 0) # what remains for the last round
- position = 0
- predictions = Array.new
-
- prediction_best=nil
- neighbors_best=nil
-
- begin
- for i in 1..modulo[0] do
- (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction
- LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}."
- neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part
- if @prop_kernel && @prediction_algorithm.include?("svm")
- props = get_props
- else
- props = nil
- end
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)")
- if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs
- prediction_best=prediction
- neighbors_best=@neighbors
- end
- position = position + lr_size
- end
- rescue Exception => e
- LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message
- end
-
- prediction=prediction_best
- @neighbors=neighbors_best
- ### END AM balanced predictions
-
- else # AM: no balancing
- LOGGER.info "LAZAR: Unbalanced."
- neighbors
- if @prop_kernel && @prediction_algorithm.include?("svm")
- props = get_props
- else
- props = nil
- end
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)")
- end
-
- value_feature_uri = File.join( @uri, "predicted", "value")
- confidence_feature_uri = File.join( @uri, "predicted", "confidence")
-
- #prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]}
- #prediction_feature_uris[value_feature_uri] = nil if @neighbors.size == 0 or prediction[:prediction].nil?
-
- @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables]
- @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri]
-
- if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification"
- @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]]
- else
- @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction]
- end
- @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence]
- #prediction_feature_uris.each do |prediction_feature_uri,value|
- #@prediction_dataset.add @compound.uri, prediction_feature_uri, @value_map[value]
- #end
-
- if verbose
- if @feature_calculation_algorithm == "Substructure.match"
- f = 0
- @compound_features.each do |feature|
- feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s)
- features[feature] = feature_uri
- @prediction_dataset.add_feature(feature_uri, {
- RDF.type => [OT.Substructure],
- OT.smarts => feature,
- OT.pValue => @p_values[feature],
- OT.effect => @effects[feature]
- })
- @prediction_dataset.add @compound.uri, feature_uri, true
- f+=1
- end
- else
- @compound_features.each do |feature|
- features[feature] = feature
- @prediction_dataset.add @compound.uri, feature, true
- end
- end
- n = 0
- @neighbors.each do |neighbor|
- neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s )
- @prediction_dataset.add_feature(neighbor_uri, {
- OT.compound => neighbor[:compound],
- OT.similarity => neighbor[:similarity],
- OT.measuredActivity => neighbor[:activity],
- RDF.type => [OT.Neighbor]
- })
- @prediction_dataset.add @compound.uri, neighbor_uri, true
- f = 0 unless f
- neighbor[:features].each do |feature|
- if @feature_calculation_algorithm == "Substructure.match"
- feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature]
- else
- feature_uri = feature
- end
- @prediction_dataset.add neighbor[:compound], feature_uri, true
- unless features.has_key? feature
- features[feature] = feature_uri
- @prediction_dataset.add_feature(feature_uri, {
- RDF.type => [OT.Substructure],
- OT.smarts => feature,
- OT.pValue => @p_values[feature],
- OT.effect => @effects[feature]
- })
- f+=1
- end
- end
- n+=1
- end
- end
- #end
-
- @prediction_dataset.save(subjectid)
- @prediction_dataset
- end
-
- # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features)
- # Same for the vector describing the query compound
- def get_props
- matrix = Array.new
- begin
- @neighbors.each do |n|
- n = n[:compound]
- row = []
- @features.each do |f|
- if ! @fingerprints[n].nil?
- row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f])
- else
- row << 0.0
- end
- end
- matrix << row
- end
- row = []
- @features.each do |f|
- row << (@compound.match([f]).size == 0 ? 0.0 : @p_values[f])
- end
- rescue Exception => e
- LOGGER.debug "get_props failed with '" + $! + "'"
- end
- [ matrix, row ]
- end
-
- # Find neighbors and store them as object variable, access only a subset of compounds for that.
- def neighbors_balanced(s, l, start, offset)
- @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
- @neighbors = []
- [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset
- training_features = @fingerprints[training_compound]
- add_neighbor training_features, training_compound
- end
-
- end
-
- # Find neighbors and store them as object variable, access all compounds for that.
- def neighbors
- @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
- @neighbors = []
- @fingerprints.each do |training_compound,training_features| # AM: access all compounds
- add_neighbor training_features, training_compound
- end
- end
-
- # Adds a neighbor to @neighbors if it passes the similarity threshold.
- def add_neighbor(training_features, training_compound)
- sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
- if sim > @min_sim
- @activities[training_compound].each do |act|
- @neighbors << {
- :compound => training_compound,
- :similarity => sim,
- :features => training_features,
- :activity => act
- }
- end
- end
- end
-
- # Find database activities and store them in @prediction_dataset
- # @return [Boolean] true if compound has databasse activities, false if not
- def database_activity(subjectid)
- if @activities[@compound.uri]
- @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act }
- @prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset])
- @prediction_dataset.save(subjectid)
- true
- else
- false
- end
- end
-
- def prediction_features
- [prediction_value_feature,prediction_confidence_feature]
- end
-
- def prediction_value_feature
- dependent_uri = @metadata[OT.dependentVariables].first
- feature = OpenTox::Feature.new File.join( @uri, "predicted", "value")
- feature.add_metadata( {
- RDF.type => [OT.ModelPrediction],
- OT.hasSource => @uri,
- DC.creator => @uri,
- DC.title => URI.decode(File.basename( dependent_uri )),
- OWL.sameAs => dependent_uri
- })
- feature
- end
-
- def prediction_confidence_feature
- dependent_uri = @metadata[OT.dependentVariables].first
- feature = OpenTox::Feature.new File.join( @uri, "predicted", "confidence")
- feature.add_metadata( {
- RDF.type => [OT.ModelPrediction],
- OT.hasSource => @uri,
- DC.creator => @uri,
- DC.title => "#{URI.decode(File.basename( dependent_uri ))} confidence"
- })
- feature
- end
-
- # Save model at model service
- def save(subjectid)
- self.uri = RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => subjectid})
- end
-
- # Delete model at model service
- def delete(subjectid)
- RestClientWrapper.delete(@uri, :subjectid => subjectid) unless @uri == CONFIG[:services]["opentox-model"]
- end
-
-=end
- end
end
end