summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2011-06-24 15:53:08 +0200
committerAndreas Maunz <andreas@maunz.de>2011-06-24 15:53:08 +0200
commit96ef56a05efffdbfa8f78a58cdeb6f7a37e9452f (patch)
tree0cbc493757e058a66ae5894ec199b9f3fd00a18b
parent1d3d27cb689db3091c4ac6e429f2b0f5a198dcdf (diff)
parentf9721059cb28c23c10e83dafe7aa58d9cf650746 (diff)
Merge branch 'multi' into development
-rw-r--r--lib/algorithm.rb60
-rw-r--r--lib/model.rb20
-rw-r--r--lib/parser.rb106
3 files changed, 107 insertions, 79 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 7c1c7a2..1f0ef2a 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -140,25 +140,46 @@ module OpenTox
# @param [optional] params Ignored (only for compatibility with local_svm_regression)
# @return [Hash] Hash with keys `:prediction, :confidence`
def self.weighted_majority_vote(neighbors,params={}, props=nil)
- conf = 0.0
+ neighbor_contribution = 0.0
+ confidence_sum = 0.0
confidence = 0.0
+ prediction = nil
+ positive_map_value= nil
+ negative_map_value= nil
+
neighbors.each do |neighbor|
- case neighbor[:activity].to_s
- when 'true'
- conf += Algorithm.gauss(neighbor[:similarity])
- when 'false'
- conf -= Algorithm.gauss(neighbor[:similarity])
+ neighbor_weight = Algorithm.gauss(neighbor[:similarity]).to_f
+ neighbor_contribution += neighbor[:activity].to_f * neighbor_weight
+
+ if params[:value_map].size == 2 # provide compat to binary classification
+ map_entry = params[:value_map][neighbor[:activity].to_i].to_s # access original neighbor activity
+ case map_entry
+ when TRUE_REGEXP
+ confidence_sum += neighbor_weight
+ positive_map_value = neighbor[:activity]
+ when FALSE_REGEXP
+ confidence_sum -= neighbor_weight
+ negative_map_value = neighbor[:activity]
+ end
+ else
+ confidence_sum += neighbor_weight # AM: new multinomial confidence
end
end
- if conf > 0.0
- prediction = true
- elsif conf < 0.0
- prediction = false
- else
- prediction = nil
- end
- confidence = conf/neighbors.size if neighbors.size > 0
- {:prediction => prediction, :confidence => confidence.abs}
+
+ if params[:value_map].size == 2 # provide compat to binary classification
+ if confidence_sum >= 0.0
+ prediction = positive_map_value unless neighbors.size==0
+ elsif confidence_sum < 0.0
+ prediction = negative_map_value unless neighbors.size==0
+ end
+ else
+ prediction = (neighbor_contribution/confidence_sum).round unless neighbors.size==0 # AM: new multinomial prediction
+ end
+
+ confidence = confidence_sum/neighbors.size if neighbors.size > 0
+ res = {:prediction => prediction, :confidence => confidence.abs}
+ puts res.to_yaml
+ res
end
# Local support vector regression from neighbors
@@ -201,7 +222,8 @@ module OpenTox
acts = neighbors.collect do |n|
act = n[:activity]
end # activities of neighbors for supervised learning
- acts_f = acts.collect {|v| v == true ? 1.0 : 0.0}
+# acts_f = acts.collect {|v| v == true ? 1.0 : 0.0}
+ acts_f = acts
sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
begin
prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, "C-bsvc", params))
@@ -279,7 +301,8 @@ module OpenTox
if type == "nu-svr"
prediction = @r.p
elsif type == "C-bsvc"
- prediction = (@r.p.to_f == 1.0 ? true : false)
+ #prediction = (@r.p.to_f == 1.0 ? true : false)
+ prediction = @r.p
end
@r.quit # free R
rescue Exception => e
@@ -353,7 +376,8 @@ module OpenTox
if type == "nu-svr"
prediction = @r.p
elsif type == "C-bsvc"
- prediction = (@r.p.to_f == 1.0 ? true : false)
+ #prediction = (@r.p.to_f == 1.0 ? true : false)
+ prediction = @r.p
end
@r.quit # free R
rescue Exception => e
diff --git a/lib/model.rb b/lib/model.rb
index e3dce09..5eec366 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -91,7 +91,7 @@ module OpenTox
include Algorithm
include Model
- attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map
+ attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :balanced
def initialize(uri=nil)
@@ -116,6 +116,7 @@ module OpenTox
@min_sim = 0.3
@prop_kernel = false
+ @balanced = false
end
@@ -211,8 +212,7 @@ module OpenTox
unless database_activity(subjectid) # adds database activity to @prediction_dataset
- case OpenTox::Feature.find(@metadata[OT.dependentVariables]).feature_type
- when "classification"
+ if @balanced && OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification"
# AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar
l = Array.new # larger
s = Array.new # smaller fraction
@@ -222,12 +222,12 @@ module OpenTox
@fingerprints.each do |training_compound,training_features|
@activities[training_compound].each do |act|
case act.to_s
- when "false"
+ when "0"
l << training_compound
- when "true"
+ when "1"
s << training_compound
else
- LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached."
+ LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached (supports only two classes)."
end
end
end
@@ -258,7 +258,7 @@ module OpenTox
else
props = nil
end
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)")
+ prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props)")
if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs
prediction_best=prediction
neighbors_best=@neighbors
@@ -273,7 +273,7 @@ module OpenTox
@neighbors=neighbors_best
### END AM balanced predictions
- else # AM: no balancing
+ else # AM: no balancing or regression
LOGGER.info "LAZAR: Unbalanced."
neighbors
if @prop_kernel && @prediction_algorithm.include?("svm")
@@ -281,7 +281,7 @@ module OpenTox
else
props = nil
end
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)")
+ prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props)")
end
value_feature_uri = File.join( @uri, "predicted", "value")
@@ -422,7 +422,7 @@ module OpenTox
# @return [Boolean] true if compound has databasse activities, false if not
def database_activity(subjectid)
if @activities[@compound.uri]
- @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act }
+ @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], @value_map[act] }
@prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset])
@prediction_dataset.save(subjectid)
true
diff --git a/lib/parser.rb b/lib/parser.rb
index 2ce9467..07bee67 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -285,22 +285,39 @@ module OpenTox
@duplicates = {}
end
+ def detect_new_values(row, value_maps)
+ row.shift
+ row.each_index do |i|
+ value = row[i]
+ value_maps[i] = Hash.new if value_maps[i].nil?
+ value_maps[i][value].nil? ? value_maps[i][value]=0 : value_maps[i][value] += 1
+ end
+ value_maps
+ end
+
# Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)
# @param [Excel] book Excel workbook object (created with roo gem)
# @return [OpenTox::Dataset] Dataset object with Excel data
def load_spreadsheet(book)
book.default_sheet = 0
add_features book.row(1)
+ value_maps = Array.new
+ regression_features=Array.new
- # AM: fix mixed read in
- regression_features=false
2.upto(book.last_row) { |i|
row = book.row(i)
- regression_features = detect_regression_features row
- break if regression_features==true
+ value_maps = detect_new_values(row, value_maps)
+ value_maps.each_with_index { |vm,j|
+ if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer.
+ regression_features[j]=true
+ else
+ regression_features[j]=false
+ end
+ }
+ }
+ 2.upto(book.last_row) { |i|
+ add_values book.row(i), regression_features
}
-
- 2.upto(book.last_row) { |i| add_values book.row(i),regression_features }
warnings
@dataset
end
@@ -312,16 +329,23 @@ module OpenTox
row = 0
input = csv.split("\n")
add_features split_row(input.shift)
+ value_maps = Array.new
+ regression_features=Array.new
-
- # AM: fix mixed read in
- regression_features=false
input.each { |row|
row = split_row(row)
- regression_features = detect_regression_features row
- break if regression_features==true
+ value_maps = detect_new_values(row, value_maps)
+ value_maps.each_with_index { |vm,j|
+ if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer.
+ regression_features[j]=true
+ else
+ regression_features[j]=false
+ end
+ }
+ }
+ input.each { |row|
+ add_values split_row(row), regression_features
}
- input.each { |row| add_values split_row(row),regression_features }
warnings
@dataset
end
@@ -368,20 +392,10 @@ module OpenTox
end
end
- def detect_regression_features row
- row.shift
- regression_features=false
- row.each_index do |i|
- value = row[i]
- type = feature_type(value)
- if type == OT.NumericFeature
- regression_features=true
- end
- end
- regression_features
- end
-
- def add_values(row, regression_features=false)
+ # Adds a row to a dataset
+ # @param Array A row split up as an array
+ # @param Array Indicator for regression for each field
+ def add_values(row, regression_features)
smiles = row.shift
compound = Compound.from_smiles(smiles)
@@ -395,27 +409,23 @@ module OpenTox
row.each_index do |i|
value = row[i]
feature = @features[i]
- type = feature_type(value)
+ type = nil
+ if (regression_features[i])
+ type = feature_type(value)
+ if type != OT.NumericFeature
+ raise "Error! Expected numeric values."
+ end
+ else
+ type = OT.NominalFeature
+ end
@feature_types[feature] << type
- if (regression_features)
+ case type
+ when OT.NumericFeature
val = value.to_f
- else
- case type
- when OT.NominalFeature
- case value.to_s
- when TRUE_REGEXP
- val = true
- when FALSE_REGEXP
- val = false
- end
- when OT.NumericFeature
- val = value.to_f
- when OT.StringFeature
- val = value.to_s
- @activity_errors << smiles+", "+row.join(", ")
- end
+ when OT.NominalFeature
+ val = value.to_s
end
if val!=nil
@dataset.add(compound.uri, feature, val)
@@ -431,17 +441,11 @@ module OpenTox
true if Float(value) rescue false
end
- def classification?(value)
- !value.to_s.strip.match(TRUE_REGEXP).nil? or !value.to_s.strip.match(FALSE_REGEXP).nil?
- end
-
def feature_type(value)
- if classification? value
- return OT.NominalFeature
- elsif numeric? value
+ if numeric? value
return OT.NumericFeature
else
- return OT.StringFeature
+ return OT.NominalFeature
end
end