summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2011-07-19 16:27:53 +0200
committerAndreas Maunz <andreas@maunz.de>2011-07-19 16:27:53 +0200
commit5d74d060687cb374269aa152c829ba28fbccf506 (patch)
treee9c913a565878159e42a93df19bbba8b0c06a06e
parentb52a34f062fc4ad5cacf403e88861b24c3117f91 (diff)
parentc6261ec85b8cf8d7c80b4718a927adeade1a127b (diff)
Merge branch 'development' into reg_conf
Conflicts: Rakefile lib/algorithm.rb lib/model.rb
-rw-r--r--Rakefile6
-rw-r--r--lib/algorithm.rb49
-rw-r--r--lib/model.rb123
-rw-r--r--lib/validation.rb49
4 files changed, 67 insertions, 160 deletions
diff --git a/Rakefile b/Rakefile
index bd22c16..952affe 100644
--- a/Rakefile
+++ b/Rakefile
@@ -8,7 +8,7 @@ begin
gem.summary = %Q{Ruby wrapper for the OpenTox REST API}
gem.description = %Q{Ruby wrapper for the OpenTox REST API (http://www.opentox.org)}
gem.email = "helma@in-silico.ch"
- gem.homepage = "http://github.com/helma/opentox-ruby"
+ gem.homepage = "http://github.com/opentox/opentox-ruby"
gem.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler"]
# dependencies with versions
gem.add_dependency "sinatra", "=1.2.6"
@@ -16,7 +16,7 @@ begin
gem.add_dependency "sinatra-respond_to", "=0.7.0"
gem.add_dependency "sinatra-static-assets", "=0.5.0"
gem.add_dependency "rest-client", "=1.6.1"
- gem.add_dependency "rack", "=1.3.0"
+ gem.add_dependency "rack", "=1.3.1"
gem.add_dependency "rack-contrib", "=1.1.0"
gem.add_dependency "rack-flash", "=0.1.1"
gem.add_dependency "nokogiri", "=1.4.4"
@@ -44,6 +44,8 @@ begin
gem.add_dependency "dm-sqlite-adapter", "=1.1.0"
gem.add_dependency "ruby-plot", "=0.5.0"
gem.add_dependency "gsl", "=1.14.7"
+ gem.add_dependency "statsample", "=1.1.0"
+ #gem.add_dependency "statsample-optimization", "=2.1.0"
gem.add_development_dependency 'jeweler'
gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore']
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 43845fb..cfca069 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -205,21 +205,17 @@ module OpenTox
raise "No neighbors found." unless params[:neighbors].size>0
begin
-
props = params[:prop_kernel] ? get_props(params) : nil
acts = params[:neighbors].collect { |n| act = n[:activity].to_f }
sims = params[:neighbors].collect { |n| Algorithm.gauss(n[:similarity]) }
-
LOGGER.debug "Local MLR (Propositionalization / GSL)."
prediction = mlr( {:n_prop => props[0], :q_prop => props[1], :sims => sims, :acts => acts} )
transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})"
prediction = transformer.values[0]
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
- sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
- conf = sims.inject{|sum,x| sum + x }
- confidence = conf/params[:neighbors].size if params[:neighbors].size > 0
+ params[:conf_stdev] = "false" if params[:conf_stdev].nil?
+ confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]})
{:prediction => prediction, :confidence => confidence}
-
rescue Exception => e
LOGGER.debug "#{e.class}: #{e.message}"
end
@@ -326,19 +322,9 @@ module OpenTox
transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})"
prediction = transformer.values[0]
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
- sim_median = Algorithm.median(sims)
- if sim_median.nil?
- confidence = nil
- LOGGER.debug "dv ------------ sim_median is nil"
- else
- standard_deviation = acts.std_dev
- confidence = (sim_median*Math.exp(-1*standard_deviation)).abs
- if confidence.nan?
- confidence = nil
- end
- end
- LOGGER.debug "Confidence is: '" + confidence.to_s + "'."
- return {:prediction => prediction, :confidence => confidence}
+ params[:conf_stdev] = "false" if params[:conf_stdev].nil?
+ confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]})
+ {:prediction => prediction, :confidence => confidence}
rescue Exception => e
LOGGER.debug "#{e.class}: #{e.message}"
LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
@@ -502,6 +488,29 @@ module OpenTox
prediction
end
+ # Get confidence for regression, with standard deviation of neighbor activity if conf_stdev is set.
+ # @param[Hash] Required keys: :sims, :acts, :neighbors, :conf_stdev
+ # @return[Float] Confidence
+ def self.get_confidence(params)
+ if params[:conf_stdev] == "true"
+ sim_median = Algorithm.median(params[:sims])
+ if sim_median.nil?
+ confidence = nil
+ else
+ standard_deviation = params[:acts].std_dev
+ confidence = (sim_median*Math.exp(-1*standard_deviation)).abs
+ if confidence.nan?
+ confidence = nil
+ end
+ end
+ else
+ conf = params[:sims].inject{|sum,x| sum + x }
+ confidence = conf/params[:neighbors].size
+ end
+ LOGGER.debug "Confidence is: '" + confidence.to_s + "'."
+ return confidence
+ end
+
# Get X and Y size of a nested Array (Matrix)
def self.get_sizes(matrix)
begin
@@ -840,7 +849,7 @@ module OpenTox
end
def variance
m = mean
- sum { |i| ( i - m )**2 } / size
+ sum { |i| ( i - m )**2 } / (size-1).to_f
end
def std_dev
Math.sqrt(variance)
diff --git a/lib/model.rb b/lib/model.rb
index 825f697..13212ee 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -91,7 +91,7 @@ module OpenTox
include Algorithm
include Model
- attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :balanced
+ attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :transform
def initialize(uri=nil)
@@ -116,7 +116,7 @@ module OpenTox
@min_sim = 0.3
@prop_kernel = false
- @balanced = false
+ @transform = { "class" => "NOP" }
end
@@ -212,78 +212,17 @@ module OpenTox
unless database_activity(subjectid) # adds database activity to @prediction_dataset
- if @balanced && OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification"
- # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar
- l = Array.new # larger
- s = Array.new # smaller fraction
+ neighbors
+ prediction = eval("#{@prediction_algorithm} ( { :neighbors => @neighbors,
+ :compound => @compound,
+ :features => @features,
+ :p_values => @p_values,
+ :fingerprints => @fingerprints,
+ :similarity_algorithm => @similarity_algorithm,
+ :prop_kernel => @prop_kernel,
+ :value_map => @value_map,
+ :transform => @transform } ) ")
- raise "no fingerprints in model" if @fingerprints.size==0
-
- @fingerprints.each do |training_compound,training_features|
- @activities[training_compound].each do |act|
- case act.to_s
- when "0"
- l << training_compound
- when "1"
- s << training_compound
- else
- LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached (supports only two classes)."
- end
- end
- end
- if s.size > l.size then
- l,s = s,l # happy swapping
- LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}."
- end
- # determine ratio
- modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest
- LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}."
-
- # AM: Balanced predictions
- addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round
- slack = (addon!=0 ? modulo[1].divmod(addon)[1] : 0) # what remains for the last round
- position = 0
- predictions = Array.new
-
- prediction_best=nil
- neighbors_best=nil
-
- begin
- for i in 1..modulo[0] do
- (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction
- LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}."
- neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part
- if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") )
- props = get_props
- else
- props = nil
- end
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props)")
- if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs
- prediction_best=prediction
- neighbors_best=@neighbors
- end
- position = position + lr_size
- end
- rescue Exception => e
- LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message
- end
-
- prediction=prediction_best
- @neighbors=neighbors_best
- ### END AM balanced predictions
-
- else # AM: no balancing or regression
- LOGGER.info "LAZAR: Unbalanced."
- neighbors
- if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") )
- props = get_props
- else
- props = nil
- end
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props)")
- end
-
value_feature_uri = File.join( @uri, "predicted", "value")
confidence_feature_uri = File.join( @uri, "predicted", "confidence")
@@ -356,43 +295,7 @@ module OpenTox
@prediction_dataset
end
- # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features)
- # Same for the vector describing the query compound
- def get_props
- matrix = Array.new
- begin
- @neighbors.each do |n|
- n = n[:compound]
- row = []
- @features.each do |f|
- if ! @fingerprints[n].nil?
- row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f])
- else
- row << 0.0
- end
- end
- matrix << row
- end
- row = []
- @features.each do |f|
- row << (@compound.match([f]).size == 0 ? 0.0 : @p_values[f])
- end
- rescue Exception => e
- LOGGER.debug "get_props failed with '" + $! + "'"
- end
- [ matrix, row ]
- end
-
- # Find neighbors and store them as object variable, access only a subset of compounds for that.
- def neighbors_balanced(s, l, start, offset)
- @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
- @neighbors = []
- [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset
- training_features = @fingerprints[training_compound]
- add_neighbor training_features, training_compound
- end
-
- end
+
# Find neighbors and store them as object variable, access all compounds for that.
def neighbors
diff --git a/lib/validation.rb b/lib/validation.rb
index 3e8367c..646b076 100644
--- a/lib/validation.rb
+++ b/lib/validation.rb
@@ -85,34 +85,27 @@ module OpenTox
@metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid, :accept => "application/x-yaml"}))
end
- # PENDING: creates summary as used for ToxCreate
- def summary
- if @metadata[OT.classificationStatistics]
- res = {
- :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i,
- :correct_predictions => @metadata[OT.classificationStatistics][OT.percentCorrect],
- :weighted_area_under_roc => @metadata[OT.classificationStatistics][OT.weightedAreaUnderRoc],
- }
- @metadata[OT.classificationStatistics][OT.classValueStatistics].each do |s|
- if s[OT.classValue].to_s=="true"
- res[:true_positives] = s[OT.numTruePositives]
- res[:false_positives] = s[OT.numFalsePositives]
- res[:true_negatives] = s[OT.numTrueNegatives]
- res[:false_negatives] = s[OT.numFalseNegatives]
- res[:sensitivity] = s[OT.truePositiveRate]
- res[:specificity] = s[OT.trueNegativeRate]
- break
+ # returns confusion matrix as array, predicted values are in rows
+ # example:
+ # [[nil,"active","moderate","inactive"],["active",1,3,99],["moderate",4,2,8],["inactive",3,8,6]]
+ # -> 99 inactive compounds have been predicted as active
+ def confusion_matrix
+ raise "no classification statistics, probably a regression valdiation" unless @metadata[OT.classificationStatistics]
+ matrix = @metadata[OT.classificationStatistics][OT.confusionMatrix][OT.confusionMatrixCell]
+ values = matrix.collect{|cell| cell[OT.confusionMatrixPredicted]}.uniq
+ table = [[nil]+values]
+ values.each do |c|
+ table << [c]
+ values.each do |r|
+ matrix.each do |cell|
+ if cell[OT.confusionMatrixPredicted]==c and cell[OT.confusionMatrixActual]==r
+ table[-1] << cell[OT.confusionMatrixValue].to_f
+ break
+ end
end
end
- res
- elsif @metadata[OT.regressionStatistics]
- {
- :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i,
- :r_square => @metadata[OT.regressionStatistics][OT.rSquare],
- :root_mean_squared_error => @metadata[OT.regressionStatistics][OT.rootMeanSquaredError],
- :mean_absolute_error => @metadata[OT.regressionStatistics][OT.meanAbsoluteError],
- }
end
+ table
end
end
@@ -171,9 +164,9 @@ module OpenTox
@metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid, :accept => "application/x-yaml"}))
end
- # PENDING: creates summary as used for ToxCreate
- def summary( subjectid=nil )
- Validation.from_cv_statistics( @uri, subjectid ).summary
+ # returns a Validation object containing the statistics of the crossavlidation
+ def statistics( subjectid=nil )
+ Validation.from_cv_statistics( @uri, subjectid )
end
end