summaryrefslogtreecommitdiff
path: root/lib/model.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/model.rb')
-rw-r--r--lib/model.rb263
1 files changed, 181 insertions, 82 deletions
diff --git a/lib/model.rb b/lib/model.rb
index 048de85..ff0ce0e 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -23,7 +23,7 @@ module OpenTox
# Generic OpenTox model class for all API compliant services
class Generic
include Model
-
+
# Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error
# @param [String] uri Model URI
# @return [OpenTox::Model::Generic] Model instance
@@ -34,42 +34,75 @@ module OpenTox
raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0
model
end
-
- # provides feature type, possible types are "regression" or "classification"
- # @return [String] feature type, "unknown" if type could not be estimated
+
+ # provides feature type, possible types are "regression" or "classification"
+ # @return [String] feature type, "unknown" if type could not be estimated
def feature_type(subjectid=nil)
- return @feature_type if @feature_type
-
- # dynamically perform restcalls if necessary
- load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
- algorithm = OpenTox::Algorithm::Generic.find(@metadata[OT.algorithm], subjectid)
- algorithm_title = algorithm ? algorithm.metadata[DC.title] : nil
- algorithm_type = algorithm ? algorithm.metadata[RDF.type] : nil
- dependent_variable = OpenTox::Feature.find( @metadata[OT.dependentVariables],subjectid )
- dependent_variable_type = dependent_variable ? dependent_variable.feature_type : nil
- type_indicators = [dependent_variable_type, @metadata[RDF.type], @metadata[DC.title], @uri, algorithm_type, algorithm_title].flatten
- type_indicators.each do |type|
- case type
- when /(?i)classification/
- @feature_type = "classification"
- break
- when /(?i)regression/
- @feature_type = "regression"
- end
+ unless @feature_type
+ load_predicted_variables( subjectid ) unless @predicted_variable
+ @feature_type = OpenTox::Feature.find( @predicted_variable, subjectid ).feature_type
end
- raise "unknown model "+type_indicators.inspect unless @feature_type
@feature_type
end
-
- end
+ def predicted_variable( subjectid )
+ load_predicted_variables( subjectid ) unless @predicted_variable
+ @predicted_variable
+ end
+
+ def predicted_variables( subjectid )
+ load_predicted_variables( subjectid, false ) unless @predicted_variables
+ @predicted_variables
+ end
+
+ def predicted_confidence( subjectid )
+ load_predicted_variables( subjectid ) unless @predicted_confidence
+ @predicted_confidence
+ end
+
+ private
+ def load_predicted_variables( subjectid=nil, use_confidence=true )
+ load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
+ if @metadata[OT.predictedVariables]
+ predictedVariables = @metadata[OT.predictedVariables]
+ if predictedVariables.is_a?(Array)
+ if (predictedVariables.size==1)
+ @predicted_variable = predictedVariables[0]
+ elsif (predictedVariables.size>=2)
+ # PENDING identify confidence
+ if use_confidence
+ conf_index = -1
+ predictedVariables.size.times do |i|
+ f = OpenTox::Feature.find(predictedVariables[i], subjectid)
+ conf_index = i if f.metadata[DC.title]=~/(?i)confidence/
+ end
+ raise "could not estimate predicted variable from model: '"+uri.to_s+
+ "', number of predicted-variables==2, but no confidence found" if conf_index==-1
+ end
+ if (predictedVariables.size==2) && use_confidence
+ @predicted_variable = predictedVariables[1-conf_index]
+ @predicted_confidence = predictedVariables[conf_index]
+ else
+ @predicted_variables = predictedVariables
+ end
+ else
+ raise "could not estimate predicted variable from model: '"+uri.to_s+"', number of predicted-variables == 0"
+ end
+ else
+ raise "could not estimate predicted variable from model: '"+uri.to_s+"', predicted-variables is no array"
+ end
+ end
+ raise "could not estimate predicted variable from model: '"+uri.to_s+"'" unless (@predicted_variable || @predicted_variables)
+ end
+ end
+
# Lazy Structure Activity Relationship class
class Lazar
- include Model
include Algorithm
+ include Model
- attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid
+ attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :nr_hits, :transform, :conf_stdev, :prediction_min_max
def initialize(uri=nil)
@@ -78,7 +111,7 @@ module OpenTox
else
super CONFIG[:services]["opentox-model"]
end
-
+
@metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
@features = []
@@ -86,12 +119,18 @@ module OpenTox
@activities = {}
@p_values = {}
@fingerprints = {}
+ @value_map = {}
+ @prediction_min_max = []
@feature_calculation_algorithm = "Substructure.match"
@similarity_algorithm = "Similarity.tanimoto"
@prediction_algorithm = "Neighbors.weighted_majority_vote"
-
+
+ @nr_hits = false
@min_sim = 0.3
+ @prop_kernel = false
+ @transform = { "class" => "NOP" }
+ @conf_stdev = false
end
@@ -111,13 +150,25 @@ module OpenTox
# Create a new lazar model
# @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar)
# @return [OpenTox::Model::Lazar] lazar model
- def self.create(params)
+ def self.create(params, waiting_task=nil )
subjectid = params[:subjectid]
lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar")
- model_uri = lazar_algorithm.run(params)
+ model_uri = lazar_algorithm.run(params, waiting_task)
OpenTox::Model::Lazar.find(model_uri, subjectid)
end
+ def run( params, accept_header=nil, waiting_task=nil )
+ unless accept_header
+ if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)
+ accept_header = 'application/x-yaml'
+ else
+ accept_header = 'application/rdf+xml'
+ end
+ end
+ LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s
+ RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s
+ end
+
# Get a parameter value
# @param [String] param Parameter name
# @return [String] Parameter value
@@ -131,6 +182,7 @@ module OpenTox
# @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
# @return [OpenTox::Dataset] Dataset with predictions
def predict_dataset(dataset_uri, subjectid=nil, waiting_task=nil)
+
@prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
@prediction_dataset.add_metadata({
OT.hasSource => @uri,
@@ -150,7 +202,7 @@ module OpenTox
LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message
end
end
- @prediction_dataset.save(subjectid)
+ #@prediction_dataset.save(subjectid)
@prediction_dataset
end
@@ -164,49 +216,52 @@ module OpenTox
features = {}
unless @prediction_dataset
- #@prediction_dataset = cached_prediction
- #return @prediction_dataset if cached_prediction
@prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
@prediction_dataset.add_metadata( {
OT.hasSource => @uri,
DC.creator => @uri,
- # TODO: fix dependentVariable
DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
} )
end
- return @prediction_dataset if database_activity(subjectid)
-
- neighbors
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
-
- prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s)
- # TODO: fix dependentVariable
- @prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri
-
- if @neighbors.size == 0
- @prediction_dataset.add_feature(prediction_feature_uri, {
- RDF.type => [OT.MeasuredFeature],
- OT.hasSource => @uri,
- DC.creator => @uri,
- DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
- OT.error => "No similar compounds in training dataset.",
- OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
- })
- @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
+ if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "regression"
+ all_activities = []
+ all_activities = @activities.values.flatten.collect! { |i| i.to_f }
+ @prediction_min_max[0] = (all_activities.to_scale.min/2)
+ @prediction_min_max[1] = (all_activities.to_scale.max*2)
+ end
- else
- @prediction_dataset.add_feature(prediction_feature_uri, {
- RDF.type => [OT.ModelPrediction],
- OT.hasSource => @uri,
- DC.creator => @uri,
- DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
- OT.prediction => prediction[:prediction],
- OT.confidence => prediction[:confidence],
- OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
- })
- @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
+ unless database_activity(subjectid) # adds database activity to @prediction_dataset
+
+ neighbors
+ prediction = eval("#{@prediction_algorithm} ( { :neighbors => @neighbors,
+ :compound => @compound,
+ :features => @features,
+ :p_values => @p_values,
+ :fingerprints => @fingerprints,
+ :similarity_algorithm => @similarity_algorithm,
+ :prop_kernel => @prop_kernel,
+ :value_map => @value_map,
+ :nr_hits => @nr_hits,
+ :conf_stdev => @conf_stdev,
+ :prediction_min_max => @prediction_min_max,
+ :transform => @transform } ) ")
+
+ value_feature_uri = File.join( @uri, "predicted", "value")
+ confidence_feature_uri = File.join( @uri, "predicted", "confidence")
+
+ @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] unless @prediction_dataset.metadata[OT.dependentVariables]
+ @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] unless @prediction_dataset.metadata[OT.predictedVariables]
+
+ if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "classification"
+ @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]]
+ else
+ @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction]
+ end
+ @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence]
+ @prediction_dataset.features[value_feature_uri][DC.title] = @prediction_dataset.metadata[DC.title]
+ @prediction_dataset.features[confidence_feature_uri][DC.title] = "Confidence"
if verbose
if @feature_calculation_algorithm == "Substructure.match"
@@ -260,7 +315,6 @@ module OpenTox
end
n+=1
end
- # what happens with dataset predictions?
end
end
@@ -268,33 +322,49 @@ module OpenTox
@prediction_dataset
end
- # Find neighbors and store them as object variable
- def neighbors
+
+ # Find neighbors and store them as object variable, access all compounds for that.
+ def neighbors
@compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
-
@neighbors = []
- @fingerprints.each do |training_compound,training_features|
- sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
- if sim > @min_sim
- @activities[training_compound].each do |act|
- @neighbors << {
- :compound => training_compound,
- :similarity => sim,
- :features => training_features,
- :activity => act
- }
- end
- end
+ @fingerprints.keys.each do |training_compound| # AM: access all compounds
+ add_neighbor @fingerprints[training_compound].keys, training_compound
end
+ end
+ # Adds a neighbor to @neighbors if it passes the similarity threshold.
+ def add_neighbor(training_features, training_compound)
+ compound_features_hits = {}
+ training_compound_features_hits = {}
+ if @nr_hits
+ compound_features_hits = @compound.match_hits(@compound_features)
+ training_compound_features_hits = @fingerprints[training_compound]
+ #LOGGER.debug "dv ------------ training_compound_features_hits:#{training_compound_features_hits.class} #{training_compound_features_hits}"
+ end
+ params = {}
+ params[:nr_hits] = @nr_hits
+ params[:compound_features_hits] = compound_features_hits
+ params[:training_compound_features_hits] = training_compound_features_hits
+
+ sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, params)")
+ if sim > @min_sim
+ @activities[training_compound].each do |act|
+ @neighbors << {
+ :compound => training_compound,
+ :similarity => sim,
+ :features => training_features,
+ :activity => act
+ }
+ end
+ end
end
# Find database activities and store them in @prediction_dataset
# @return [Boolean] true if compound has databasse activities, false if not
def database_activity(subjectid)
if @activities[@compound.uri]
- @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act }
+ @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], @value_map[act] }
@prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset])
@prediction_dataset.save(subjectid)
true
@@ -303,6 +373,35 @@ module OpenTox
end
end
+ def prediction_features
+ [prediction_value_feature,prediction_confidence_feature]
+ end
+
+ def prediction_value_feature
+ dependent_uri = @metadata[OT.dependentVariables].first
+ feature = OpenTox::Feature.new File.join( @uri, "predicted", "value")
+ feature.add_metadata( {
+ RDF.type => [OT.ModelPrediction],
+ OT.hasSource => @uri,
+ DC.creator => @uri,
+ DC.title => URI.decode(File.basename( dependent_uri )),
+ OWL.sameAs => dependent_uri
+ })
+ feature
+ end
+
+ def prediction_confidence_feature
+ dependent_uri = @metadata[OT.dependentVariables].first
+ feature = OpenTox::Feature.new File.join( @uri, "predicted", "confidence")
+ feature.add_metadata( {
+ RDF.type => [OT.ModelPrediction],
+ OT.hasSource => @uri,
+ DC.creator => @uri,
+ DC.title => "#{URI.decode(File.basename( dependent_uri ))} confidence"
+ })
+ feature
+ end
+
# Save model at model service
def save(subjectid)
self.uri = RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => subjectid})