From b7a03a18ce90d664d89d6a414512aa03a6dddcc4 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 08:51:56 +0200 Subject: Add_neighbor function --- lib/model.rb | 59 ++++++++++++++++++++++------------------------------------- 1 file changed, 22 insertions(+), 37 deletions(-) (limited to 'lib/model.rb') diff --git a/lib/model.rb b/lib/model.rb index 139aed8..f5e0410 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -338,54 +338,39 @@ module OpenTox @prediction_dataset end - # Find neighbors and store them as object variable + # Find neighbors and store them as object variable, access only a subset of compounds for that. def neighbors_balanced(s, l, start, offset) @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm - @neighbors = [] - begin - #@fingerprints.each do |training_compound,training_features| # AM: this is original by CH [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset training_features = @fingerprints[training_compound] - sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") - if sim > @min_sim - @activities[training_compound].each do |act| - this_neighbor = { - :compound => training_compound, - :similarity => sim, - :features => training_features, - :activity => act - } - @neighbors << this_neighbor - end - end - end - rescue Exception => e - LOGGER.error "BLAZAR failed in neighbors: "+e.class.to_s+": "+e.message + add_neighbor training_features end end - - # Find neighbors and store them as object variable + # Find neighbors and store them as object variable. def neighbors - - @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm - - @neighbors = [] - @fingerprints.each do |training_compound,training_features| - sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") - if sim > @min_sim - @activities[training_compound].each do |act| - @neighbors << { - :compound => training_compound, - :similarity => sim, - :features => training_features, - :activity => act - } - end + @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm + @neighbors = [] + @fingerprints.each do |training_compound,training_features| # AM: access all compounds + add_neighbor training_features + end + end + + # Adds a neighbor to @neighbors if it passes the similarity threshold. + def add_neighbor(training_features) + sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") + if sim > @min_sim + @activities[training_compound].each do |act| + @neighbors << { + :compound => training_compound, + :similarity => sim, + :features => training_features, + :activity => act + } end - end + end end # Find database activities and store them in @prediction_dataset -- cgit v1.2.3 From baffedfc7543cfc8a90fc185fc91f2748ce94528 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 10:24:13 +0200 Subject: Fixed add_neighbor --- lib/model.rb | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'lib/model.rb') diff --git a/lib/model.rb b/lib/model.rb index f5e0410..3d27706 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -338,28 +338,45 @@ module OpenTox @prediction_dataset end + # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) +# def get_prop_matrix +# matrix = Array.new +# begin +# @neighbors.each do |n| +# row = [] +# @features.each do |f| +# row << @fingerprints[n].include?(f) ? 0.0 : @p_values[f] +# end +# matrix << row +# end +# rescue Exception => e +# LOGGER.debug "get_prop_matrix failed with '" + $! + "'" +# end +# matrix +# end + # Find neighbors and store them as object variable, access only a subset of compounds for that. def neighbors_balanced(s, l, start, offset) @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset training_features = @fingerprints[training_compound] - add_neighbor training_features + add_neighbor training_features, training_compound end end - # Find neighbors and store them as object variable. + # Find neighbors and store them as object variable, access all compounds for that. def neighbors @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] @fingerprints.each do |training_compound,training_features| # AM: access all compounds - add_neighbor training_features + add_neighbor training_features, training_compound end end # Adds a neighbor to @neighbors if it passes the similarity threshold. - def add_neighbor(training_features) + def add_neighbor(training_features, training_compound) sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") if sim > @min_sim @activities[training_compound].each do |act| -- cgit v1.2.3 From b6ba84a077db9f6c708807f059e501333f7303b1 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 12:18:08 +0200 Subject: 1st v --- lib/model.rb | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'lib/model.rb') diff --git a/lib/model.rb b/lib/model.rb index 3d27706..bebf5d3 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -215,6 +215,7 @@ module OpenTox (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part + prop_matrix = get_prop_matrix prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction @@ -228,10 +229,11 @@ module OpenTox prediction=prediction_best @neighbors=neighbors_best - ### END AM balanced predictions + ### END AM balanced predictions else # regression case: no balancing neighbors + prop_matrix = get_prop_matrix prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") end @@ -339,21 +341,26 @@ module OpenTox end # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) -# def get_prop_matrix -# matrix = Array.new -# begin -# @neighbors.each do |n| -# row = [] -# @features.each do |f| -# row << @fingerprints[n].include?(f) ? 0.0 : @p_values[f] -# end -# matrix << row -# end -# rescue Exception => e -# LOGGER.debug "get_prop_matrix failed with '" + $! + "'" -# end -# matrix -# end + def get_prop_matrix + matrix = Array.new + begin + @neighbors.each do |n| + n = n[:compound] + row = [] + @features.each do |f| + if ! @fingerprints[n].nil? + row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f]) + else + row << 0.0 + end + end + matrix << row + end + rescue Exception => e + LOGGER.debug "get_prop_matrix failed with '" + $! + "'" + end + matrix + end # Find neighbors and store them as object variable, access only a subset of compounds for that. def neighbors_balanced(s, l, start, offset) -- cgit v1.2.3 From d012b9e8da641c342c455a1384ddf3b14f5b5c35 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 12:38:04 +0200 Subject: 2nd v --- lib/model.rb | 87 ++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 46 insertions(+), 41 deletions(-) (limited to 'lib/model.rb') diff --git a/lib/model.rb b/lib/model.rb index bebf5d3..f4df8ea 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -177,7 +177,7 @@ module OpenTox return @prediction_dataset if database_activity(subjectid) - if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget] + if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget] # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar l = Array.new # larger s = Array.new # smaller fraction @@ -211,33 +211,33 @@ module OpenTox neighbors_best=nil begin - for i in 1..modulo[0] do - (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction - LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." - neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - prop_matrix = get_prop_matrix - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") - if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs - prediction_best=prediction - neighbors_best=@neighbors + for i in 1..modulo[0] do + (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction + LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." + neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part + props = get_props + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs + prediction_best=prediction + neighbors_best=@neighbors + end + position = position + lr_size end - position = position + lr_size - end rescue Exception => e LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message end prediction=prediction_best @neighbors=neighbors_best - ### END AM balanced predictions + ### END AM balanced predictions else # regression case: no balancing neighbors - prop_matrix = get_prop_matrix + props = get_props prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") end - - # TODO: reasonable feature name + + # TODO: reasonable feature name #prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) value_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"value") confidence_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"confidence") @@ -245,7 +245,7 @@ module OpenTox prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]} #prediction_feature_uris[value_feature_uri] = "No similar compounds in training dataset." if @neighbors.size == 0 or prediction[:prediction].nil? prediction_feature_uris[value_feature_uri] = nil if @neighbors.size == 0 or prediction[:prediction].nil? - + #@prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] @@ -275,10 +275,10 @@ module OpenTox DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), # TODO: factor information to value }) - #OT.prediction => prediction[:prediction], - #OT.confidence => prediction[:confidence], - #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] - @prediction_dataset.add @compound.uri, prediction_feature_uri, value + #OT.prediction => prediction[:prediction], + #OT.confidence => prediction[:confidence], + #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] + @prediction_dataset.add @compound.uri, prediction_feature_uri, value end if verbose @@ -341,34 +341,39 @@ module OpenTox end # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) - def get_prop_matrix + # Same for the vector describing the query compound + def get_props matrix = Array.new begin - @neighbors.each do |n| - n = n[:compound] + @neighbors.each do |n| + n = n[:compound] + row = [] + @features.each do |f| + if ! @fingerprints[n].nil? + row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f]) + else + row << 0.0 + end + end + matrix << row + end row = [] @features.each do |f| - if ! @fingerprints[n].nil? - row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f]) - else - row << 0.0 - end + row << (@compound.match([f]).size == 0 ? 0.0 : @p_values[f]) end - matrix << row - end rescue Exception => e - LOGGER.debug "get_prop_matrix failed with '" + $! + "'" + LOGGER.debug "get_props failed with '" + $! + "'" end - matrix + [ matrix, row ] end # Find neighbors and store them as object variable, access only a subset of compounds for that. def neighbors_balanced(s, l, start, offset) @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] - [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset - training_features = @fingerprints[training_compound] - add_neighbor training_features, training_compound + [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset + training_features = @fingerprints[training_compound] + add_neighbor training_features, training_compound end end @@ -378,7 +383,7 @@ module OpenTox @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] @fingerprints.each do |training_compound,training_features| # AM: access all compounds - add_neighbor training_features, training_compound + add_neighbor training_features, training_compound end end @@ -388,10 +393,10 @@ module OpenTox if sim > @min_sim @activities[training_compound].each do |act| @neighbors << { - :compound => training_compound, - :similarity => sim, - :features => training_features, - :activity => act + :compound => training_compound, + :similarity => sim, + :features => training_features, + :activity => act } end end -- cgit v1.2.3 From ca6dd87b7c80611c4f4e4716f68fe6633ce1066b Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 15:04:50 +0200 Subject: 4th v --- lib/model.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib/model.rb') diff --git a/lib/model.rb b/lib/model.rb index f4df8ea..6a4602f 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -176,8 +176,7 @@ module OpenTox return @prediction_dataset if database_activity(subjectid) - - if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget] + if metadata[RDF.type].include?([OTA.ClassificationLazySingleTarget][0]) # AM: searching in metadata for classification # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar l = Array.new # larger s = Array.new # smaller fraction @@ -231,7 +230,7 @@ module OpenTox @neighbors=neighbors_best ### END AM balanced predictions - else # regression case: no balancing + else # no balancing as before neighbors props = get_props prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") -- cgit v1.2.3 From 2b12d07bec101df8c10b7ab5aff1491b0997a6c7 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 17:10:14 +0200 Subject: 6th v --- lib/model.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/model.rb') diff --git a/lib/model.rb b/lib/model.rb index 6a4602f..1a5aa37 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -215,7 +215,7 @@ module OpenTox LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part props = get_props - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction neighbors_best=@neighbors @@ -233,7 +233,7 @@ module OpenTox else # no balancing as before neighbors props = get_props - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") end # TODO: reasonable feature name -- cgit v1.2.3 From 77c885b7394aa11ba5e59eb60884205332efa31a Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 26 May 2011 08:38:21 +0200 Subject: 7th v --- lib/model.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/model.rb') diff --git a/lib/model.rb b/lib/model.rb index 1a5aa37..921335c 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -214,7 +214,7 @@ module OpenTox (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - props = get_props + (@prediction_algorithm.include? "svm" and params[:prop_kernel] == "true") ? props = get_props : props = nil prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction @@ -232,7 +232,7 @@ module OpenTox else # no balancing as before neighbors - props = get_props + (@prediction_algorithm.include? "svm" and params[:prop_kernel] == "true") ? props = get_props : props = nil prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") end -- cgit v1.2.3 From 065fdeb351f68d0445b66516ccf8e7cfcc7e2a1f Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 26 May 2011 12:22:07 +0200 Subject: Fixed prediction type switching --- lib/model.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib/model.rb') diff --git a/lib/model.rb b/lib/model.rb index 921335c..d63eef2 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -176,7 +176,10 @@ module OpenTox return @prediction_dataset if database_activity(subjectid) - if metadata[RDF.type].include?([OTA.ClassificationLazySingleTarget][0]) # AM: searching in metadata for classification + load_metadata(subjectid) + case OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type + when "classification" + # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar l = Array.new # larger s = Array.new # smaller fraction @@ -231,6 +234,7 @@ module OpenTox ### END AM balanced predictions else # no balancing as before + LOGGER.info "LAZAR: Unbalanced." neighbors (@prediction_algorithm.include? "svm" and params[:prop_kernel] == "true") ? props = get_props : props = nil prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") -- cgit v1.2.3 From 3922c8e5fcb9fbe6ddedab9f70e114717ff33a60 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 26 May 2011 14:28:19 +0200 Subject: 8th v --- lib/model.rb | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'lib/model.rb') diff --git a/lib/model.rb b/lib/model.rb index 7c2ef58..28c05a9 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -69,7 +69,7 @@ module OpenTox include Model include Algorithm - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel def initialize(uri=nil) @@ -92,6 +92,7 @@ module OpenTox @prediction_algorithm = "Neighbors.weighted_majority_vote" @min_sim = 0.3 + @prop_kernel = false end @@ -219,7 +220,11 @@ module OpenTox (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - (@prediction_algorithm.include? "svm" and params[:prop_kernel] == "true") ? props = get_props : props = nil + if @prop_kernel && @prediction_algorithm.include?("svm") + props = get_props + else + props = nil + end prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction @@ -235,10 +240,14 @@ module OpenTox @neighbors=neighbors_best ### END AM balanced predictions - else # regression case: no balancing + else # AM: no balancing LOGGER.info "LAZAR: Unbalanced." neighbors - (@prediction_algorithm.include? "svm" and params[:prop_kernel] == "true") ? props = get_props : props = nil + if @prop_kernel && @prediction_algorithm.include?("svm") + props = get_props + else + props = nil + end prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") end -- cgit v1.2.3