summaryrefslogtreecommitdiff
path: root/lib/model.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/model.rb')
-rw-r--r--lib/model.rb146
1 files changed, 115 insertions, 31 deletions
diff --git a/lib/model.rb b/lib/model.rb
index 74408d8..998d2dc 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -23,7 +23,7 @@ module OpenTox
# Generic OpenTox model class for all API compliant services
class Generic
include Model
-
+
# Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error
# @param [String] uri Model URI
# @return [OpenTox::Model::Generic] Model instance
@@ -34,21 +34,20 @@ module OpenTox
raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0
model
end
-
- # provides feature type, possible types are "regression" or "classification"
- # @return [String] feature type, "unknown" if type could not be estimated
+
+ # provides feature type, possible types are "regression" or "classification"
+ # @return [String] feature type, "unknown" if type could not be estimated
def feature_type(subjectid=nil)
return @feature_type if @feature_type
-
+
# dynamically perform restcalls if necessary
load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
algorithm = OpenTox::Algorithm::Generic.find(@metadata[OT.algorithm], subjectid)
algorithm_title = algorithm ? algorithm.metadata[DC.title] : nil
- algorithm_type = algorithm ? algorithm.metadata[OT.isA] : nil
+ algorithm_type = algorithm ? algorithm.metadata[RDF.type] : nil
dependent_variable = OpenTox::Feature.find( @metadata[OT.dependentVariables],subjectid )
dependent_variable_type = dependent_variable ? dependent_variable.feature_type : nil
- type_indicators = [dependent_variable_type, @metadata[OT.isA], @metadata[DC.title],
- @uri, algorithm_type, algorithm_title]
+ type_indicators = [dependent_variable_type, @metadata[RDF.type], @metadata[DC.title], @uri, algorithm_type, algorithm_title].flatten
type_indicators.each do |type|
case type
when /(?i)classification/
@@ -61,9 +60,9 @@ module OpenTox
raise "unknown model "+type_indicators.inspect unless @feature_type
@feature_type
end
-
+
end
-
+
# Lazy Structure Activity Relationship class
class Lazar
@@ -79,7 +78,7 @@ module OpenTox
else
super CONFIG[:services]["opentox-model"]
end
-
+
@metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
@features = []
@@ -113,9 +112,10 @@ module OpenTox
# @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar)
# @return [OpenTox::Model::Lazar] lazar model
def self.create(params)
+ subjectid = params[:subjectid]
lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar")
model_uri = lazar_algorithm.run(params)
- OpenTox::Model::Lazar.find(model_uri, params[:subjectid])
+ OpenTox::Model::Lazar.find(model_uri, subjectid)
end
# Get a parameter value
@@ -178,16 +178,72 @@ module OpenTox
return @prediction_dataset if database_activity(subjectid)
- neighbors
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
+ if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget]
+ # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar
+ l = Array.new # larger
+ s = Array.new # smaller fraction
+ @fingerprints.each do |training_compound,training_features|
+ @activities[training_compound].each do |act|
+ case act.to_s
+ when "false"
+ l << training_compound
+ when "true"
+ s << training_compound
+ else
+ LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached."
+ end
+ end
+ end
+ if s.size > l.size then
+ l,s = s,l # happy swapping
+ LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}."
+ end
+ # determine ratio
+ modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest
+ LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}."
+
+ # AM: Balanced predictions
+ addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round
+ slack = modulo[1].divmod(addon)[1] # what remains for the last round
+ position = 0
+ predictions = Array.new
+
+ prediction_best=nil
+ neighbors_best=nil
+
+ begin
+ for i in 1..modulo[0] do
+ (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction
+ LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}."
+ neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part
+ prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
+ if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs
+ prediction_best=prediction
+ neighbors_best=@neighbors
+ end
+ position = position + lr_size
+ end
+ rescue Exception => e
+ LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message
+ end
+
+ prediction=prediction_best
+ @neighbors=neighbors_best
+ ### END AM balanced predictions
+
+ else # regression case: no balancing
+ neighbors
+ prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
+ end
+
prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s)
# TODO: fix dependentVariable
@prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri
if @neighbors.size == 0
@prediction_dataset.add_feature(prediction_feature_uri, {
- OT.isA => OT.MeasuredFeature,
+ RDF.type => [OT.MeasuredFeature],
OT.hasSource => @uri,
DC.creator => @uri,
DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
@@ -198,7 +254,7 @@ module OpenTox
else
@prediction_dataset.add_feature(prediction_feature_uri, {
- OT.isA => OT.ModelPrediction,
+ RDF.type => [OT.ModelPrediction],
OT.hasSource => @uri,
DC.creator => @uri,
DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
@@ -215,7 +271,7 @@ module OpenTox
feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s)
features[feature] = feature_uri
@prediction_dataset.add_feature(feature_uri, {
- OT.isA => OT.Substructure,
+ RDF.type => [OT.Substructure],
OT.smarts => feature,
OT.pValue => @p_values[feature],
OT.effect => @effects[feature]
@@ -236,7 +292,7 @@ module OpenTox
OT.compound => neighbor[:compound],
OT.similarity => neighbor[:similarity],
OT.measuredActivity => neighbor[:activity],
- OT.isA => OT.Neighbor
+ RDF.type => [OT.Neighbor]
})
@prediction_dataset.add @compound.uri, neighbor_uri, true
f = 0 unless f
@@ -250,7 +306,7 @@ module OpenTox
unless features.has_key? feature
features[feature] = feature_uri
@prediction_dataset.add_feature(feature_uri, {
- OT.isA => OT.Substructure,
+ RDF.type => [OT.Substructure],
OT.smarts => feature,
OT.pValue => @p_values[feature],
OT.effect => @effects[feature]
@@ -269,27 +325,55 @@ module OpenTox
end
# Find neighbors and store them as object variable
- def neighbors
-
+ def neighbors_balanced(s, l, start, offset)
@compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
@neighbors = []
- @fingerprints.each do |training_compound,training_features|
- sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
- if sim > @min_sim
- @activities[training_compound].each do |act|
- @neighbors << {
- :compound => training_compound,
- :similarity => sim,
- :features => training_features,
- :activity => act
- }
+ begin
+ #@fingerprints.each do |training_compound,training_features| # AM: this is original by CH
+ [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset
+ training_features = @fingerprints[training_compound]
+ sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
+ if sim > @min_sim
+ @activities[training_compound].each do |act|
+ this_neighbor = {
+ :compound => training_compound,
+ :similarity => sim,
+ :features => training_features,
+ :activity => act
+ }
+ @neighbors << this_neighbor
+ end
end
end
+ rescue Exception => e
+ LOGGER.error "BLAZAR failed in neighbors: "+e.class.to_s+": "+e.message
end
end
+
+ # Find neighbors and store them as object variable
+ def neighbors
+
+ @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
+
+ @neighbors = []
+ @fingerprints.each do |training_compound,training_features|
+ sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
+ if sim > @min_sim
+ @activities[training_compound].each do |act|
+ @neighbors << {
+ :compound => training_compound,
+ :similarity => sim,
+ :features => training_features,
+ :activity => act
+ }
+ end
+ end
+ end
+ end
+
# Find database activities and store them in @prediction_dataset
# @return [Boolean] true if compound has databasse activities, false if not
def database_activity(subjectid)