summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2011-06-24 10:27:39 +0200
committerAndreas Maunz <andreas@maunz.de>2011-06-24 10:27:39 +0200
commit1fad135b34741f75af1dc3bac95a6380459caab8 (patch)
tree4d0c65e33ede2a44c1529c916e1fb7d7d335d08b
parentb92a57d48fc1a5a648ba4b68957357af95648391 (diff)
parent1d3d27cb689db3091c4ac6e429f2b0f5a198dcdf (diff)
Merge branch 'development' into multi
Conflicts: lib/model.rb
-rw-r--r--lib/dataset.rb12
-rw-r--r--lib/model.rb260
-rw-r--r--lib/parser.rb3
3 files changed, 142 insertions, 133 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 784bb2a..f13c0d3 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -376,11 +376,14 @@ module OpenTox
end
def value(compound)
- @data_entries[compound.uri].collect{|f,v| v.first if f.match(/value/)}.compact.first
+ v = nil
+ v = @data_entries[compound.uri].collect{|f,v| v.first if f.match(/value/)}.compact.first if @data_entries[compound.uri]
+ v = nil if v.is_a? Array and v.empty?
+ v
end
def confidence(compound)
- @data_entries[compound.uri].collect{|f,v| v.first if f.match(/confidence/)}.compact.first
+ @data_entries[compound.uri].collect{|f,v| v.first if f.match(/confidence/)}.compact.first if @data_entries[compound.uri]
end
def descriptors(compound)
@@ -388,12 +391,11 @@ module OpenTox
end
def measured_activities(compound)
- source = @metadata[OT.hasSource]
- @data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact.flatten
+ @data_entries[compound.uri].collect{|f,v| v if f.match(/#{@metadata[OT.hasSource]}/)}.compact.flatten if @data_entries[compound.uri]
end
def neighbors(compound)
- @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact
+ @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact if @data_entries[compound.uri]
end
# def errors(compound)
diff --git a/lib/model.rb b/lib/model.rb
index 41d9335..7a44c40 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -88,8 +88,8 @@ module OpenTox
# Lazy Structure Activity Relationship class
class Lazar
- include Model
include Algorithm
+ include Model
attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :balanced
@@ -143,6 +143,18 @@ module OpenTox
OpenTox::Model::Lazar.find(model_uri, subjectid)
end
+ def run( params, accept_header=nil, waiting_task=nil )
+ unless accept_header
+ if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)
+ accept_header = 'application/x-yaml'
+ else
+ accept_header = 'application/rdf+xml'
+ end
+ end
+ LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s
+ RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s
+ end
+
# Get a parameter value
# @param [String] param Parameter name
# @return [String] Parameter value
@@ -175,7 +187,7 @@ module OpenTox
LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message
end
end
- @prediction_dataset.save(subjectid)
+ #@prediction_dataset.save(subjectid)
@prediction_dataset
end
@@ -198,141 +210,98 @@ module OpenTox
} )
end
- return @prediction_dataset if database_activity(subjectid)
-
- load_metadata(subjectid)
- if @balanced && OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification"
-
- # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar
- l = Array.new # larger
- s = Array.new # smaller fraction
-
- raise "no fingerprints in model" if @fingerprints.size==0
-
- @fingerprints.each do |training_compound,training_features|
- @activities[training_compound].each do |act|
- case act.to_s
- when "0"
- l << training_compound
- when "1"
- s << training_compound
- else
- LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached (supports only two classes)."
+ unless database_activity(subjectid) # adds database activity to @prediction_dataset
+
+ if @balanced && OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification"
+ # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar
+ l = Array.new # larger
+ s = Array.new # smaller fraction
+
+ raise "no fingerprints in model" if @fingerprints.size==0
+
+ @fingerprints.each do |training_compound,training_features|
+ @activities[training_compound].each do |act|
+ case act.to_s
+ when "0"
+ l << training_compound
+ when "1"
+ s << training_compound
+ else
+ LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached (supports only two classes)."
+ end
end
end
- end
- if s.size > l.size then
- l,s = s,l # happy swapping
- LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}."
- end
- # determine ratio
- modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest
- LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}."
-
- # AM: Balanced predictions
- addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round
- slack = (addon!=0 ? modulo[1].divmod(addon)[1] : 0) # what remains for the last round
- position = 0
- predictions = Array.new
-
- prediction_best=nil
- neighbors_best=nil
-
- begin
- for i in 1..modulo[0] do
- (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction
- LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}."
- neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part
- if @prop_kernel && @prediction_algorithm.include?("svm")
- props = get_props
- else
- props = nil
- end
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)")
- if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs
- prediction_best=prediction
- neighbors_best=@neighbors
+ if s.size > l.size then
+ l,s = s,l # happy swapping
+ LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}."
+ end
+ # determine ratio
+ modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest
+ LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}."
+
+ # AM: Balanced predictions
+ addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round
+ slack = (addon!=0 ? modulo[1].divmod(addon)[1] : 0) # what remains for the last round
+ position = 0
+ predictions = Array.new
+
+ prediction_best=nil
+ neighbors_best=nil
+
+ begin
+ for i in 1..modulo[0] do
+ (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction
+ LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}."
+ neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part
+ if @prop_kernel && @prediction_algorithm.include?("svm")
+ props = get_props
+ else
+ props = nil
+ end
+ prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)")
+ if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs
+ prediction_best=prediction
+ neighbors_best=@neighbors
+ end
+ position = position + lr_size
end
- position = position + lr_size
+ rescue Exception => e
+ LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message
end
- rescue Exception => e
- LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message
- end
- prediction=prediction_best
- @neighbors=neighbors_best
- ### END AM balanced predictions
+ prediction=prediction_best
+ @neighbors=neighbors_best
+ ### END AM balanced predictions
- else # AM: no balancing or regression
- LOGGER.info "LAZAR: Unbalanced."
- neighbors
- if @prop_kernel && @prediction_algorithm.include?("svm")
- props = get_props
- else
- props = nil
+ else # AM: no balancing or regression
+ LOGGER.info "LAZAR: Unbalanced."
+ neighbors
+ if @prop_kernel && @prediction_algorithm.include?("svm")
+ props = get_props
+ else
+ props = nil
+ end
+ prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)")
end
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)")
- end
-
- value_feature_uri = File.join( @uri, "predicted", "value")
- confidence_feature_uri = File.join( @uri, "predicted", "confidence")
+
+ value_feature_uri = File.join( @uri, "predicted", "value")
+ confidence_feature_uri = File.join( @uri, "predicted", "confidence")
- #prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]}
- #prediction_feature_uris[value_feature_uri] = nil if @neighbors.size == 0 or prediction[:prediction].nil?
+ @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] unless @prediction_dataset.metadata[OT.dependentVariables]
+ @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] unless @prediction_dataset.metadata[OT.predictedVariables]
- @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables]
- @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri]
-
- if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification"
- @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]]
- else
- @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction]
- end
- @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence]
- #prediction_feature_uris.each do |prediction_feature_uri,value|
- #@prediction_dataset.add @compound.uri, prediction_feature_uri, @value_map[value]
- #end
-
- if verbose
- if @feature_calculation_algorithm == "Substructure.match"
- f = 0
- @compound_features.each do |feature|
- feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s)
- features[feature] = feature_uri
- @prediction_dataset.add_feature(feature_uri, {
- RDF.type => [OT.Substructure],
- OT.smarts => feature,
- OT.pValue => @p_values[feature],
- OT.effect => @effects[feature]
- })
- @prediction_dataset.add @compound.uri, feature_uri, true
- f+=1
- end
+ if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification"
+ @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]]
else
- @compound_features.each do |feature|
- features[feature] = feature
- @prediction_dataset.add @compound.uri, feature, true
- end
+ @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction]
end
- n = 0
- @neighbors.each do |neighbor|
- neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s )
- @prediction_dataset.add_feature(neighbor_uri, {
- OT.compound => neighbor[:compound],
- OT.similarity => neighbor[:similarity],
- OT.measuredActivity => neighbor[:activity],
- RDF.type => [OT.Neighbor]
- })
- @prediction_dataset.add @compound.uri, neighbor_uri, true
- f = 0 unless f
- neighbor[:features].each do |feature|
- if @feature_calculation_algorithm == "Substructure.match"
- feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature]
- else
- feature_uri = feature
- end
- @prediction_dataset.add neighbor[:compound], feature_uri, true
- unless features.has_key? feature
+ @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence]
+
+ if verbose
+ if @feature_calculation_algorithm == "Substructure.match"
+ f = 0
+ @compound_features.each do |feature|
+ feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s)
features[feature] = feature_uri
@prediction_dataset.add_feature(feature_uri, {
RDF.type => [OT.Substructure],
@@ -340,13 +309,48 @@ module OpenTox
OT.pValue => @p_values[feature],
OT.effect => @effects[feature]
})
+ @prediction_dataset.add @compound.uri, feature_uri, true
f+=1
end
+ else
+ @compound_features.each do |feature|
+ features[feature] = feature
+ @prediction_dataset.add @compound.uri, feature, true
+ end
+ end
+ n = 0
+ @neighbors.each do |neighbor|
+ neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s )
+ @prediction_dataset.add_feature(neighbor_uri, {
+ OT.compound => neighbor[:compound],
+ OT.similarity => neighbor[:similarity],
+ OT.measuredActivity => neighbor[:activity],
+ RDF.type => [OT.Neighbor]
+ })
+ @prediction_dataset.add @compound.uri, neighbor_uri, true
+ f = 0 unless f
+ neighbor[:features].each do |feature|
+ if @feature_calculation_algorithm == "Substructure.match"
+ feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature]
+ else
+ feature_uri = feature
+ end
+ @prediction_dataset.add neighbor[:compound], feature_uri, true
+ unless features.has_key? feature
+ features[feature] = feature_uri
+ @prediction_dataset.add_feature(feature_uri, {
+ RDF.type => [OT.Substructure],
+ OT.smarts => feature,
+ OT.pValue => @p_values[feature],
+ OT.effect => @effects[feature]
+ })
+ f+=1
+ end
+ end
+ n+=1
end
- n+=1
end
end
- #end
@prediction_dataset.save(subjectid)
@prediction_dataset
diff --git a/lib/parser.rb b/lib/parser.rb
index 5625f60..89fcb71 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -76,6 +76,9 @@ module OpenTox
@metadata[OT.parameters] << parameter
end
end
+ #@metadata.each do |k,v|
+ #v = v.first if v and v.size == 1
+ #end
@metadata
end