diff options
author | Andreas Maunz <andreas@maunz.de> | 2011-06-24 15:53:08 +0200 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2011-06-24 15:53:08 +0200 |
commit | 96ef56a05efffdbfa8f78a58cdeb6f7a37e9452f (patch) | |
tree | 0cbc493757e058a66ae5894ec199b9f3fd00a18b | |
parent | 1d3d27cb689db3091c4ac6e429f2b0f5a198dcdf (diff) | |
parent | f9721059cb28c23c10e83dafe7aa58d9cf650746 (diff) |
Merge branch 'multi' into development
-rw-r--r-- | lib/algorithm.rb | 60 | ||||
-rw-r--r-- | lib/model.rb | 20 | ||||
-rw-r--r-- | lib/parser.rb | 106 |
3 files changed, 107 insertions, 79 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 7c1c7a2..1f0ef2a 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -140,25 +140,46 @@ module OpenTox # @param [optional] params Ignored (only for compatibility with local_svm_regression) # @return [Hash] Hash with keys `:prediction, :confidence` def self.weighted_majority_vote(neighbors,params={}, props=nil) - conf = 0.0 + neighbor_contribution = 0.0 + confidence_sum = 0.0 confidence = 0.0 + prediction = nil + positive_map_value= nil + negative_map_value= nil + neighbors.each do |neighbor| - case neighbor[:activity].to_s - when 'true' - conf += Algorithm.gauss(neighbor[:similarity]) - when 'false' - conf -= Algorithm.gauss(neighbor[:similarity]) + neighbor_weight = Algorithm.gauss(neighbor[:similarity]).to_f + neighbor_contribution += neighbor[:activity].to_f * neighbor_weight + + if params[:value_map].size == 2 # provide compat to binary classification + map_entry = params[:value_map][neighbor[:activity].to_i].to_s # access original neighbor activity + case map_entry + when TRUE_REGEXP + confidence_sum += neighbor_weight + positive_map_value = neighbor[:activity] + when FALSE_REGEXP + confidence_sum -= neighbor_weight + negative_map_value = neighbor[:activity] + end + else + confidence_sum += neighbor_weight # AM: new multinomial confidence end end - if conf > 0.0 - prediction = true - elsif conf < 0.0 - prediction = false - else - prediction = nil - end - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence.abs} + + if params[:value_map].size == 2 # provide compat to binary classification + if confidence_sum >= 0.0 + prediction = positive_map_value unless neighbors.size==0 + elsif confidence_sum < 0.0 + prediction = negative_map_value unless neighbors.size==0 + end + else + prediction = (neighbor_contribution/confidence_sum).round unless neighbors.size==0 # AM: new multinomial prediction + end + + confidence = confidence_sum/neighbors.size if neighbors.size > 0 + res = {:prediction => prediction, :confidence => confidence.abs} + puts res.to_yaml + res end # Local support vector regression from neighbors @@ -201,7 +222,8 @@ module OpenTox acts = neighbors.collect do |n| act = n[:activity] end # activities of neighbors for supervised learning - acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} +# acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} + acts_f = acts sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, "C-bsvc", params)) @@ -279,7 +301,8 @@ module OpenTox if type == "nu-svr" prediction = @r.p elsif type == "C-bsvc" - prediction = (@r.p.to_f == 1.0 ? true : false) + #prediction = (@r.p.to_f == 1.0 ? true : false) + prediction = @r.p end @r.quit # free R rescue Exception => e @@ -353,7 +376,8 @@ module OpenTox if type == "nu-svr" prediction = @r.p elsif type == "C-bsvc" - prediction = (@r.p.to_f == 1.0 ? true : false) + #prediction = (@r.p.to_f == 1.0 ? true : false) + prediction = @r.p end @r.quit # free R rescue Exception => e diff --git a/lib/model.rb b/lib/model.rb index e3dce09..5eec366 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -91,7 +91,7 @@ module OpenTox include Algorithm include Model - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :balanced def initialize(uri=nil) @@ -116,6 +116,7 @@ module OpenTox @min_sim = 0.3 @prop_kernel = false + @balanced = false end @@ -211,8 +212,7 @@ module OpenTox unless database_activity(subjectid) # adds database activity to @prediction_dataset - case OpenTox::Feature.find(@metadata[OT.dependentVariables]).feature_type - when "classification" + if @balanced && OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification" # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar l = Array.new # larger s = Array.new # smaller fraction @@ -222,12 +222,12 @@ module OpenTox @fingerprints.each do |training_compound,training_features| @activities[training_compound].each do |act| case act.to_s - when "false" + when "0" l << training_compound - when "true" + when "1" s << training_compound else - LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached." + LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached (supports only two classes)." end end end @@ -258,7 +258,7 @@ module OpenTox else props = nil end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props)") if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction neighbors_best=@neighbors @@ -273,7 +273,7 @@ module OpenTox @neighbors=neighbors_best ### END AM balanced predictions - else # AM: no balancing + else # AM: no balancing or regression LOGGER.info "LAZAR: Unbalanced." neighbors if @prop_kernel && @prediction_algorithm.include?("svm") @@ -281,7 +281,7 @@ module OpenTox else props = nil end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props)") end value_feature_uri = File.join( @uri, "predicted", "value") @@ -422,7 +422,7 @@ module OpenTox # @return [Boolean] true if compound has databasse activities, false if not def database_activity(subjectid) if @activities[@compound.uri] - @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act } + @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], @value_map[act] } @prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset]) @prediction_dataset.save(subjectid) true diff --git a/lib/parser.rb b/lib/parser.rb index 2ce9467..07bee67 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -285,22 +285,39 @@ module OpenTox @duplicates = {} end + def detect_new_values(row, value_maps) + row.shift + row.each_index do |i| + value = row[i] + value_maps[i] = Hash.new if value_maps[i].nil? + value_maps[i][value].nil? ? value_maps[i][value]=0 : value_maps[i][value] += 1 + end + value_maps + end + # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help) # @param [Excel] book Excel workbook object (created with roo gem) # @return [OpenTox::Dataset] Dataset object with Excel data def load_spreadsheet(book) book.default_sheet = 0 add_features book.row(1) + value_maps = Array.new + regression_features=Array.new - # AM: fix mixed read in - regression_features=false 2.upto(book.last_row) { |i| row = book.row(i) - regression_features = detect_regression_features row - break if regression_features==true + value_maps = detect_new_values(row, value_maps) + value_maps.each_with_index { |vm,j| + if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer. + regression_features[j]=true + else + regression_features[j]=false + end + } + } + 2.upto(book.last_row) { |i| + add_values book.row(i), regression_features } - - 2.upto(book.last_row) { |i| add_values book.row(i),regression_features } warnings @dataset end @@ -312,16 +329,23 @@ module OpenTox row = 0 input = csv.split("\n") add_features split_row(input.shift) + value_maps = Array.new + regression_features=Array.new - - # AM: fix mixed read in - regression_features=false input.each { |row| row = split_row(row) - regression_features = detect_regression_features row - break if regression_features==true + value_maps = detect_new_values(row, value_maps) + value_maps.each_with_index { |vm,j| + if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer. + regression_features[j]=true + else + regression_features[j]=false + end + } + } + input.each { |row| + add_values split_row(row), regression_features } - input.each { |row| add_values split_row(row),regression_features } warnings @dataset end @@ -368,20 +392,10 @@ module OpenTox end end - def detect_regression_features row - row.shift - regression_features=false - row.each_index do |i| - value = row[i] - type = feature_type(value) - if type == OT.NumericFeature - regression_features=true - end - end - regression_features - end - - def add_values(row, regression_features=false) + # Adds a row to a dataset + # @param Array A row split up as an array + # @param Array Indicator for regression for each field + def add_values(row, regression_features) smiles = row.shift compound = Compound.from_smiles(smiles) @@ -395,27 +409,23 @@ module OpenTox row.each_index do |i| value = row[i] feature = @features[i] - type = feature_type(value) + type = nil + if (regression_features[i]) + type = feature_type(value) + if type != OT.NumericFeature + raise "Error! Expected numeric values." + end + else + type = OT.NominalFeature + end @feature_types[feature] << type - if (regression_features) + case type + when OT.NumericFeature val = value.to_f - else - case type - when OT.NominalFeature - case value.to_s - when TRUE_REGEXP - val = true - when FALSE_REGEXP - val = false - end - when OT.NumericFeature - val = value.to_f - when OT.StringFeature - val = value.to_s - @activity_errors << smiles+", "+row.join(", ") - end + when OT.NominalFeature + val = value.to_s end if val!=nil @dataset.add(compound.uri, feature, val) @@ -431,17 +441,11 @@ module OpenTox true if Float(value) rescue false end - def classification?(value) - !value.to_s.strip.match(TRUE_REGEXP).nil? or !value.to_s.strip.match(FALSE_REGEXP).nil? - end - def feature_type(value) - if classification? value - return OT.NominalFeature - elsif numeric? value + if numeric? value return OT.NumericFeature else - return OT.StringFeature + return OT.NominalFeature end end |