diff options
author | Andreas Maunz <andreas@maunz.de> | 2011-06-24 10:27:39 +0200 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2011-06-24 10:27:39 +0200 |
commit | 1fad135b34741f75af1dc3bac95a6380459caab8 (patch) | |
tree | 4d0c65e33ede2a44c1529c916e1fb7d7d335d08b | |
parent | b92a57d48fc1a5a648ba4b68957357af95648391 (diff) | |
parent | 1d3d27cb689db3091c4ac6e429f2b0f5a198dcdf (diff) |
Merge branch 'development' into multi
Conflicts:
lib/model.rb
-rw-r--r-- | lib/dataset.rb | 12 | ||||
-rw-r--r-- | lib/model.rb | 260 | ||||
-rw-r--r-- | lib/parser.rb | 3 |
3 files changed, 142 insertions, 133 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb index 784bb2a..f13c0d3 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -376,11 +376,14 @@ module OpenTox end def value(compound) - @data_entries[compound.uri].collect{|f,v| v.first if f.match(/value/)}.compact.first + v = nil + v = @data_entries[compound.uri].collect{|f,v| v.first if f.match(/value/)}.compact.first if @data_entries[compound.uri] + v = nil if v.is_a? Array and v.empty? + v end def confidence(compound) - @data_entries[compound.uri].collect{|f,v| v.first if f.match(/confidence/)}.compact.first + @data_entries[compound.uri].collect{|f,v| v.first if f.match(/confidence/)}.compact.first if @data_entries[compound.uri] end def descriptors(compound) @@ -388,12 +391,11 @@ module OpenTox end def measured_activities(compound) - source = @metadata[OT.hasSource] - @data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact.flatten + @data_entries[compound.uri].collect{|f,v| v if f.match(/#{@metadata[OT.hasSource]}/)}.compact.flatten if @data_entries[compound.uri] end def neighbors(compound) - @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact + @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact if @data_entries[compound.uri] end # def errors(compound) diff --git a/lib/model.rb b/lib/model.rb index 41d9335..7a44c40 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -88,8 +88,8 @@ module OpenTox # Lazy Structure Activity Relationship class class Lazar - include Model include Algorithm + include Model attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :balanced @@ -143,6 +143,18 @@ module OpenTox OpenTox::Model::Lazar.find(model_uri, subjectid) end + def run( params, accept_header=nil, waiting_task=nil ) + unless accept_header + if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host) + accept_header = 'application/x-yaml' + else + accept_header = 'application/rdf+xml' + end + end + LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s + RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s + end + # Get a parameter value # @param [String] param Parameter name # @return [String] Parameter value @@ -175,7 +187,7 @@ module OpenTox LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message end end - @prediction_dataset.save(subjectid) + #@prediction_dataset.save(subjectid) @prediction_dataset end @@ -198,141 +210,98 @@ module OpenTox } ) end - return @prediction_dataset if database_activity(subjectid) - - load_metadata(subjectid) - if @balanced && OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification" - - # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar - l = Array.new # larger - s = Array.new # smaller fraction - - raise "no fingerprints in model" if @fingerprints.size==0 - - @fingerprints.each do |training_compound,training_features| - @activities[training_compound].each do |act| - case act.to_s - when "0" - l << training_compound - when "1" - s << training_compound - else - LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached (supports only two classes)." + unless database_activity(subjectid) # adds database activity to @prediction_dataset + + if @balanced && OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification" + # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar + l = Array.new # larger + s = Array.new # smaller fraction + + raise "no fingerprints in model" if @fingerprints.size==0 + + @fingerprints.each do |training_compound,training_features| + @activities[training_compound].each do |act| + case act.to_s + when "0" + l << training_compound + when "1" + s << training_compound + else + LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached (supports only two classes)." + end end end - end - if s.size > l.size then - l,s = s,l # happy swapping - LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}." - end - # determine ratio - modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest - LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}." - - # AM: Balanced predictions - addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round - slack = (addon!=0 ? modulo[1].divmod(addon)[1] : 0) # what remains for the last round - position = 0 - predictions = Array.new - - prediction_best=nil - neighbors_best=nil - - begin - for i in 1..modulo[0] do - (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction - LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." - neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - if @prop_kernel && @prediction_algorithm.include?("svm") - props = get_props - else - props = nil - end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") - if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs - prediction_best=prediction - neighbors_best=@neighbors + if s.size > l.size then + l,s = s,l # happy swapping + LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}." + end + # determine ratio + modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest + LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}." + + # AM: Balanced predictions + addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round + slack = (addon!=0 ? modulo[1].divmod(addon)[1] : 0) # what remains for the last round + position = 0 + predictions = Array.new + + prediction_best=nil + neighbors_best=nil + + begin + for i in 1..modulo[0] do + (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction + LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." + neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part + if @prop_kernel && @prediction_algorithm.include?("svm") + props = get_props + else + props = nil + end + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") + if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs + prediction_best=prediction + neighbors_best=@neighbors + end + position = position + lr_size end - position = position + lr_size + rescue Exception => e + LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message end - rescue Exception => e - LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message - end - prediction=prediction_best - @neighbors=neighbors_best - ### END AM balanced predictions + prediction=prediction_best + @neighbors=neighbors_best + ### END AM balanced predictions - else # AM: no balancing or regression - LOGGER.info "LAZAR: Unbalanced." - neighbors - if @prop_kernel && @prediction_algorithm.include?("svm") - props = get_props - else - props = nil + else # AM: no balancing or regression + LOGGER.info "LAZAR: Unbalanced." + neighbors + if @prop_kernel && @prediction_algorithm.include?("svm") + props = get_props + else + props = nil + end + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") - end - - value_feature_uri = File.join( @uri, "predicted", "value") - confidence_feature_uri = File.join( @uri, "predicted", "confidence") + + value_feature_uri = File.join( @uri, "predicted", "value") + confidence_feature_uri = File.join( @uri, "predicted", "confidence") - #prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]} - #prediction_feature_uris[value_feature_uri] = nil if @neighbors.size == 0 or prediction[:prediction].nil? + @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] unless @prediction_dataset.metadata[OT.dependentVariables] + @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] unless @prediction_dataset.metadata[OT.predictedVariables] - @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] - @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] - - if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification" - @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]] - else - @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction] - end - @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence] - #prediction_feature_uris.each do |prediction_feature_uri,value| - #@prediction_dataset.add @compound.uri, prediction_feature_uri, @value_map[value] - #end - - if verbose - if @feature_calculation_algorithm == "Substructure.match" - f = 0 - @compound_features.each do |feature| - feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) - features[feature] = feature_uri - @prediction_dataset.add_feature(feature_uri, { - RDF.type => [OT.Substructure], - OT.smarts => feature, - OT.pValue => @p_values[feature], - OT.effect => @effects[feature] - }) - @prediction_dataset.add @compound.uri, feature_uri, true - f+=1 - end + if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification" + @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]] else - @compound_features.each do |feature| - features[feature] = feature - @prediction_dataset.add @compound.uri, feature, true - end + @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction] end - n = 0 - @neighbors.each do |neighbor| - neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s ) - @prediction_dataset.add_feature(neighbor_uri, { - OT.compound => neighbor[:compound], - OT.similarity => neighbor[:similarity], - OT.measuredActivity => neighbor[:activity], - RDF.type => [OT.Neighbor] - }) - @prediction_dataset.add @compound.uri, neighbor_uri, true - f = 0 unless f - neighbor[:features].each do |feature| - if @feature_calculation_algorithm == "Substructure.match" - feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature] - else - feature_uri = feature - end - @prediction_dataset.add neighbor[:compound], feature_uri, true - unless features.has_key? feature + @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence] + + if verbose + if @feature_calculation_algorithm == "Substructure.match" + f = 0 + @compound_features.each do |feature| + feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) features[feature] = feature_uri @prediction_dataset.add_feature(feature_uri, { RDF.type => [OT.Substructure], @@ -340,13 +309,48 @@ module OpenTox OT.pValue => @p_values[feature], OT.effect => @effects[feature] }) + @prediction_dataset.add @compound.uri, feature_uri, true f+=1 end + else + @compound_features.each do |feature| + features[feature] = feature + @prediction_dataset.add @compound.uri, feature, true + end + end + n = 0 + @neighbors.each do |neighbor| + neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s ) + @prediction_dataset.add_feature(neighbor_uri, { + OT.compound => neighbor[:compound], + OT.similarity => neighbor[:similarity], + OT.measuredActivity => neighbor[:activity], + RDF.type => [OT.Neighbor] + }) + @prediction_dataset.add @compound.uri, neighbor_uri, true + f = 0 unless f + neighbor[:features].each do |feature| + if @feature_calculation_algorithm == "Substructure.match" + feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature] + else + feature_uri = feature + end + @prediction_dataset.add neighbor[:compound], feature_uri, true + unless features.has_key? feature + features[feature] = feature_uri + @prediction_dataset.add_feature(feature_uri, { + RDF.type => [OT.Substructure], + OT.smarts => feature, + OT.pValue => @p_values[feature], + OT.effect => @effects[feature] + }) + f+=1 + end + end + n+=1 end - n+=1 end end - #end @prediction_dataset.save(subjectid) @prediction_dataset diff --git a/lib/parser.rb b/lib/parser.rb index 5625f60..89fcb71 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -76,6 +76,9 @@ module OpenTox @metadata[OT.parameters] << parameter end end + #@metadata.each do |k,v| + #v = v.first if v and v.size == 1 + #end @metadata end |