From f999b42afbb4387d99b2c91a79f84654408cbab1 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 10 May 2011 08:29:27 +0200 Subject: Added bal --- lib/model.rb | 102 ++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 80 insertions(+), 22 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index 048de85..9442897 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -23,7 +23,7 @@ module OpenTox # Generic OpenTox model class for all API compliant services class Generic include Model - + # Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error # @param [String] uri Model URI # @return [OpenTox::Model::Generic] Model instance @@ -34,12 +34,12 @@ module OpenTox raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0 model end - - # provides feature type, possible types are "regression" or "classification" - # @return [String] feature type, "unknown" if type could not be estimated + + # provides feature type, possible types are "regression" or "classification" + # @return [String] feature type, "unknown" if type could not be estimated def feature_type(subjectid=nil) return @feature_type if @feature_type - + # dynamically perform restcalls if necessary load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri) algorithm = OpenTox::Algorithm::Generic.find(@metadata[OT.algorithm], subjectid) @@ -60,9 +60,9 @@ module OpenTox raise "unknown model "+type_indicators.inspect unless @feature_type @feature_type end - + end - + # Lazy Structure Activity Relationship class class Lazar @@ -78,7 +78,7 @@ module OpenTox else super CONFIG[:services]["opentox-model"] end - + @metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar") @features = [] @@ -178,8 +178,59 @@ module OpenTox return @prediction_dataset if database_activity(subjectid) - neighbors - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + + # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar + l = Array.new # larger + s = Array.new # smaller fraction + if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget] + @fingerprints.each do |training_compound,training_features| + @activities[training_compound].each do |act| + case act.to_s + when "false" + l << training_compound + when "true" + s << training_compound + else + LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached." + end + end + end + if s.size > l.size then + l,s = s,l # happy swapping + LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}." + end + # determine ratio + modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest + LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}." + end + + # AM: Balanced predictions + addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round + slack = modulo[1].divmod(addon)[1] # what remains for the last round + position = 0 + predictions = Array.new + + @collect_neighbors = {} + predictions = [] + for i in 1..modulo[0] do + (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction + LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." + neighbors(s, l, position, lr_size) # get ratio fraction of larger part + predictions << eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + position = position + lr_size + end + @neighbors = @collect_neighbors.values # AM: get all neighbors + + prediction={} + begin + p_sum=0.0 + predictions.each do |p| + p[:prediction] == false ? p_sum = p_sum - p[:confidence].to_f : p_sum = p_sum + p[:confidence].to_f + end + prediction = { :prediction => (p_sum<0.0 ? false : true), :confidence => p_sum.abs/predictions.size } # AM: get mean + rescue Exception => e + LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message + end prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) # TODO: fix dependentVariable @@ -269,23 +320,30 @@ module OpenTox end # Find neighbors and store them as object variable - def neighbors - + def neighbors(s=nil, l=nil, start=nil, offset=nil) @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] - @fingerprints.each do |training_compound,training_features| - sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") - if sim > @min_sim - @activities[training_compound].each do |act| - @neighbors << { - :compound => training_compound, - :similarity => sim, - :features => training_features, - :activity => act - } + begin + #@fingerprints.each do |training_compound,training_features| # AM: this is original by CH + [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset + training_features = @fingerprints[training_compound] + sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") + if sim > @min_sim + @activities[training_compound].each do |act| + this_neighbor = { + :compound => training_compound, + :similarity => sim, + :features => training_features, + :activity => act + } + @neighbors << this_neighbor + @collect_neighbors[training_compound] = this_neighbor + end end end + rescue Exception => e + LOGGER.error "BLAZAR failed in neighbors: "+e.class.to_s+": "+e.message end end -- cgit v1.2.3 From 1d8c7d6dfa513cd7c8ad642248db24e0d1e3a199 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 10 May 2011 09:02:38 +0200 Subject: Using Best prediction only --- lib/model.rb | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index 9442897..a4d6d85 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -210,28 +210,28 @@ module OpenTox position = 0 predictions = Array.new - @collect_neighbors = {} - predictions = [] + prediction_best=nil + neighbors_best=nil + + begin for i in 1..modulo[0] do (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors(s, l, position, lr_size) # get ratio fraction of larger part - predictions << eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + if prediction[:confidence].abs > prediction_best[:confidence].abs || prediction_best.nil? + prediction_best=prediction + neighbors_best=@neighbors + end position = position + lr_size end - @neighbors = @collect_neighbors.values # AM: get all neighbors - - prediction={} - begin - p_sum=0.0 - predictions.each do |p| - p[:prediction] == false ? p_sum = p_sum - p[:confidence].to_f : p_sum = p_sum + p[:confidence].to_f - end - prediction = { :prediction => (p_sum<0.0 ? false : true), :confidence => p_sum.abs/predictions.size } # AM: get mean rescue Exception => e LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message end + prediction=prediction_best + @neighbors=neighbors_best + prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) # TODO: fix dependentVariable @prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri @@ -338,7 +338,6 @@ module OpenTox :activity => act } @neighbors << this_neighbor - @collect_neighbors[training_compound] = this_neighbor end end end -- cgit v1.2.3 From 2af934ddc033d7d8a737d88eb4ee175955ad4a0a Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 10 May 2011 11:11:34 +0200 Subject: Fixed first prediction case --- lib/model.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/model.rb b/lib/model.rb index a4d6d85..3d64f32 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -219,7 +219,7 @@ module OpenTox LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors(s, l, position, lr_size) # get ratio fraction of larger part prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") - if prediction[:confidence].abs > prediction_best[:confidence].abs || prediction_best.nil? + if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction neighbors_best=@neighbors end -- cgit v1.2.3 From 305f3caa692dd977df07cbc5ec195521e2a135fa Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 10 May 2011 16:43:05 +0200 Subject: Added Gauss patch --- lib/algorithm.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 008e7fe..abf10d4 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -165,7 +165,7 @@ module OpenTox # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @return [Hash] Hash with keys `:prediction, :confidence` def self.local_svm_regression(neighbors,params ) - sims = neighbors.collect{ |n| n[:similarity] } # similarity values between query and neighbors + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values between query and neighbors conf = sims.inject{|sum,x| sum + x } acts = neighbors.collect do |n| act = n[:activity] -- cgit v1.2.3 From 524a68d8429b8adc16bd8073774f9305cb7138a0 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 11 May 2011 12:13:37 +0200 Subject: Added balance patch --- lib/parser.rb | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/lib/parser.rb b/lib/parser.rb index db746c1..dc5f675 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -348,16 +348,27 @@ module OpenTox when OT.NominalFeature case value.to_s when TRUE_REGEXP - @dataset.add(compound.uri, feature, true ) + #@dataset.add(compound.uri, feature, true ) + val=true when FALSE_REGEXP - @dataset.add(compound.uri, feature, false ) + #@dataset.add(compound.uri, feature, false ) + val=false end when OT.NumericFeature - @dataset.add compound.uri, feature, value.to_f + #@dataset.add compound.uri, feature, value.to_f + val = value.to_f when OT.StringFeature - @dataset.add compound.uri, feature, value.to_s + #@dataset.add compound.uri, feature, value.to_s + val = value.to_s @activity_errors << smiles+", "+row.join(", ") end + if val!=nil + @dataset.add(compound.uri, feature, val) + if type!=OT.NumericFeature + @dataset.features[feature][OT.acceptValue] = [] unless @dataset.features[feature][OT.acceptValue] + @dataset.features[feature][OT.acceptValue] << val.to_s unless @dataset.features[feature][OT.acceptValue].include?(val.to_s) + end + end end end -- cgit v1.2.3 From 03a87a832162ccf17b6f0ebfda126e3622530ca3 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 11 May 2011 15:56:55 +0200 Subject: Further Martin patch --- lib/feature.rb | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/lib/feature.rb b/lib/feature.rb index f6e2dfd..eb0b869 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -19,14 +19,7 @@ module OpenTox # provides domain (possible target values) of classification feature # @return [Array] list with possible target values def domain - if metadata[OT.acceptValue] - raise "accept value found, remove hack and implement correctly" - else - if @uri=~/feature\/26221/ || @uri=~/feature\/221726/ - return ["mutagen" , "nonmutagen"] - end - return [true, false] - end + return [true, false] end # provides feature type, possible types are "regression" or "classification" -- cgit v1.2.3