From d012b9e8da641c342c455a1384ddf3b14f5b5c35 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 12:38:04 +0200 Subject: 2nd v --- lib/model.rb | 87 ++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 46 insertions(+), 41 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index bebf5d3..f4df8ea 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -177,7 +177,7 @@ module OpenTox return @prediction_dataset if database_activity(subjectid) - if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget] + if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget] # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar l = Array.new # larger s = Array.new # smaller fraction @@ -211,33 +211,33 @@ module OpenTox neighbors_best=nil begin - for i in 1..modulo[0] do - (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction - LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." - neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - prop_matrix = get_prop_matrix - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") - if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs - prediction_best=prediction - neighbors_best=@neighbors + for i in 1..modulo[0] do + (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction + LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." + neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part + props = get_props + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs + prediction_best=prediction + neighbors_best=@neighbors + end + position = position + lr_size end - position = position + lr_size - end rescue Exception => e LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message end prediction=prediction_best @neighbors=neighbors_best - ### END AM balanced predictions + ### END AM balanced predictions else # regression case: no balancing neighbors - prop_matrix = get_prop_matrix + props = get_props prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") end - - # TODO: reasonable feature name + + # TODO: reasonable feature name #prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) value_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"value") confidence_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"confidence") @@ -245,7 +245,7 @@ module OpenTox prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]} #prediction_feature_uris[value_feature_uri] = "No similar compounds in training dataset." if @neighbors.size == 0 or prediction[:prediction].nil? prediction_feature_uris[value_feature_uri] = nil if @neighbors.size == 0 or prediction[:prediction].nil? - + #@prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] @@ -275,10 +275,10 @@ module OpenTox DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), # TODO: factor information to value }) - #OT.prediction => prediction[:prediction], - #OT.confidence => prediction[:confidence], - #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] - @prediction_dataset.add @compound.uri, prediction_feature_uri, value + #OT.prediction => prediction[:prediction], + #OT.confidence => prediction[:confidence], + #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] + @prediction_dataset.add @compound.uri, prediction_feature_uri, value end if verbose @@ -341,34 +341,39 @@ module OpenTox end # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) - def get_prop_matrix + # Same for the vector describing the query compound + def get_props matrix = Array.new begin - @neighbors.each do |n| - n = n[:compound] + @neighbors.each do |n| + n = n[:compound] + row = [] + @features.each do |f| + if ! @fingerprints[n].nil? + row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f]) + else + row << 0.0 + end + end + matrix << row + end row = [] @features.each do |f| - if ! @fingerprints[n].nil? - row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f]) - else - row << 0.0 - end + row << (@compound.match([f]).size == 0 ? 0.0 : @p_values[f]) end - matrix << row - end rescue Exception => e - LOGGER.debug "get_prop_matrix failed with '" + $! + "'" + LOGGER.debug "get_props failed with '" + $! + "'" end - matrix + [ matrix, row ] end # Find neighbors and store them as object variable, access only a subset of compounds for that. def neighbors_balanced(s, l, start, offset) @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] - [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset - training_features = @fingerprints[training_compound] - add_neighbor training_features, training_compound + [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset + training_features = @fingerprints[training_compound] + add_neighbor training_features, training_compound end end @@ -378,7 +383,7 @@ module OpenTox @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] @fingerprints.each do |training_compound,training_features| # AM: access all compounds - add_neighbor training_features, training_compound + add_neighbor training_features, training_compound end end @@ -388,10 +393,10 @@ module OpenTox if sim > @min_sim @activities[training_compound].each do |act| @neighbors << { - :compound => training_compound, - :similarity => sim, - :features => training_features, - :activity => act + :compound => training_compound, + :similarity => sim, + :features => training_features, + :activity => act } end end -- cgit v1.2.3