summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordv <dv@dv.de>2011-07-20 09:38:15 +0200
committerdv <dv@dv.de>2011-07-20 09:38:15 +0200
commitafaab75c94d4d87aa985b5ef6a21bf724727c21f (patch)
tree803b045a0c35d34d681df4cfdf11e650a18c7bb6
parenta688cb99ace5cbfd8802951f57c46f1eb1926a0b (diff)
parent5a86ca682633dc6865729a75be0046dff2460684 (diff)
Merge branch 'development' into support
Conflicts: lib/algorithm.rb lib/model.rb
-rw-r--r--Rakefile6
-rw-r--r--lib/algorithm.rb62
-rw-r--r--lib/validation.rb49
3 files changed, 77 insertions, 40 deletions
diff --git a/Rakefile b/Rakefile
index bd22c16..952affe 100644
--- a/Rakefile
+++ b/Rakefile
@@ -8,7 +8,7 @@ begin
gem.summary = %Q{Ruby wrapper for the OpenTox REST API}
gem.description = %Q{Ruby wrapper for the OpenTox REST API (http://www.opentox.org)}
gem.email = "helma@in-silico.ch"
- gem.homepage = "http://github.com/helma/opentox-ruby"
+ gem.homepage = "http://github.com/opentox/opentox-ruby"
gem.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler"]
# dependencies with versions
gem.add_dependency "sinatra", "=1.2.6"
@@ -16,7 +16,7 @@ begin
gem.add_dependency "sinatra-respond_to", "=0.7.0"
gem.add_dependency "sinatra-static-assets", "=0.5.0"
gem.add_dependency "rest-client", "=1.6.1"
- gem.add_dependency "rack", "=1.3.0"
+ gem.add_dependency "rack", "=1.3.1"
gem.add_dependency "rack-contrib", "=1.1.0"
gem.add_dependency "rack-flash", "=0.1.1"
gem.add_dependency "nokogiri", "=1.4.4"
@@ -44,6 +44,8 @@ begin
gem.add_dependency "dm-sqlite-adapter", "=1.1.0"
gem.add_dependency "ruby-plot", "=0.5.0"
gem.add_dependency "gsl", "=1.14.7"
+ gem.add_dependency "statsample", "=1.1.0"
+ #gem.add_dependency "statsample-optimization", "=2.1.0"
gem.add_development_dependency 'jeweler'
gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore']
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 3170efb..528c426 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -230,21 +230,17 @@ module OpenTox
raise "No neighbors found." unless params[:neighbors].size>0
begin
-
props = params[:prop_kernel] ? get_props(params) : nil
acts = params[:neighbors].collect { |n| act = n[:activity].to_f }
sims = params[:neighbors].collect { |n| Algorithm.gauss(n[:similarity]) }
-
LOGGER.debug "Local MLR (Propositionalization / GSL)."
prediction = mlr( {:n_prop => props[0], :q_prop => props[1], :sims => sims, :acts => acts} )
transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})"
prediction = transformer.values[0]
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
- sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
- conf = sims.inject{|sum,x| sum + x }
- confidence = conf/params[:neighbors].size if params[:neighbors].size > 0
+ params[:conf_stdev] = "false" if params[:conf_stdev].nil?
+ confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]})
{:prediction => prediction, :confidence => confidence}
-
rescue Exception => e
LOGGER.debug "#{e.class}: #{e.message}"
end
@@ -351,8 +347,8 @@ module OpenTox
transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})"
prediction = transformer.values[0]
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
- conf = sims.inject{|sum,x| sum + x }
- confidence = conf/params[:neighbors].size
+ params[:conf_stdev] = "false" if params[:conf_stdev].nil?
+ confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]})
{:prediction => prediction, :confidence => confidence}
rescue Exception => e
LOGGER.debug "#{e.class}: #{e.message}"
@@ -517,6 +513,29 @@ module OpenTox
prediction
end
+ # Get confidence for regression, with standard deviation of neighbor activity if conf_stdev is set.
+ # @param[Hash] Required keys: :sims, :acts, :neighbors, :conf_stdev
+ # @return[Float] Confidence
+ def self.get_confidence(params)
+ if params[:conf_stdev] == "true"
+ sim_median = Algorithm.median(params[:sims])
+ if sim_median.nil?
+ confidence = nil
+ else
+ standard_deviation = params[:acts].std_dev
+ confidence = (sim_median*Math.exp(-1*standard_deviation)).abs
+ if confidence.nan?
+ confidence = nil
+ end
+ end
+ else
+ conf = params[:sims].inject{|sum,x| sum + x }
+ confidence = conf/params[:neighbors].size
+ end
+ LOGGER.debug "Confidence is: '" + confidence.to_s + "'."
+ return confidence
+ end
+
# Get X and Y size of a nested Array (Matrix)
def self.get_sizes(matrix)
begin
@@ -545,7 +564,7 @@ module OpenTox
row = []
params[:features].each do |f|
if ! params[:fingerprints][n].nil?
- row << (params[:fingerprints][n].include?(f) ? 0.0 : params[:p_values][f])
+ row << (params[:fingerprints][n].include?(f) ? params[:p_values][f] : 0.0)
else
row << 0.0
end
@@ -778,9 +797,14 @@ module OpenTox
Math.exp(-(d*d)/(2*sigma*sigma))
end
+ # For symbolic features
+ # @param [Array] Array to test, must indicate non-occurrence with 0.
+ # @return [Boolean] Whether the feature is singular or non-occurring or present everywhere.
def self.isnull_or_singular?(array)
nr_zeroes = array.count(0)
- return ((nr_zeroes == array.size) || (nr_zeroes == 0) || (nr_zeroes == 1) || (nr_zeroes == array.size-1) )
+ return (nr_zeroes == array.size) || # remove non-occurring feature
+ (nr_zeroes == array.size-1) || # remove singular feature
+ (nr_zeroes == 0) # also remove feature present everywhere
end
# Median of an array
@@ -865,6 +889,24 @@ module OpenTox
p_sum
end
+ # Adds variance, mean and standard deviation calculation to Array class
+ module Variance
+ def sum(&blk)
+ map(&blk).inject { |sum, element| sum + element }
+ end
+ def mean
+ (sum.to_f / size.to_f)
+ end
+ def variance
+ m = mean
+ sum { |i| ( i - m )**2 } / (size-1).to_f
+ end
+ def std_dev
+ Math.sqrt(variance)
+ end
+ end
+ Array.send :include, Variance
+
end
end
diff --git a/lib/validation.rb b/lib/validation.rb
index 3e8367c..646b076 100644
--- a/lib/validation.rb
+++ b/lib/validation.rb
@@ -85,34 +85,27 @@ module OpenTox
@metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid, :accept => "application/x-yaml"}))
end
- # PENDING: creates summary as used for ToxCreate
- def summary
- if @metadata[OT.classificationStatistics]
- res = {
- :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i,
- :correct_predictions => @metadata[OT.classificationStatistics][OT.percentCorrect],
- :weighted_area_under_roc => @metadata[OT.classificationStatistics][OT.weightedAreaUnderRoc],
- }
- @metadata[OT.classificationStatistics][OT.classValueStatistics].each do |s|
- if s[OT.classValue].to_s=="true"
- res[:true_positives] = s[OT.numTruePositives]
- res[:false_positives] = s[OT.numFalsePositives]
- res[:true_negatives] = s[OT.numTrueNegatives]
- res[:false_negatives] = s[OT.numFalseNegatives]
- res[:sensitivity] = s[OT.truePositiveRate]
- res[:specificity] = s[OT.trueNegativeRate]
- break
+ # returns confusion matrix as array, predicted values are in rows
+ # example:
+ # [[nil,"active","moderate","inactive"],["active",1,3,99],["moderate",4,2,8],["inactive",3,8,6]]
+ # -> 99 inactive compounds have been predicted as active
+ def confusion_matrix
+ raise "no classification statistics, probably a regression valdiation" unless @metadata[OT.classificationStatistics]
+ matrix = @metadata[OT.classificationStatistics][OT.confusionMatrix][OT.confusionMatrixCell]
+ values = matrix.collect{|cell| cell[OT.confusionMatrixPredicted]}.uniq
+ table = [[nil]+values]
+ values.each do |c|
+ table << [c]
+ values.each do |r|
+ matrix.each do |cell|
+ if cell[OT.confusionMatrixPredicted]==c and cell[OT.confusionMatrixActual]==r
+ table[-1] << cell[OT.confusionMatrixValue].to_f
+ break
+ end
end
end
- res
- elsif @metadata[OT.regressionStatistics]
- {
- :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i,
- :r_square => @metadata[OT.regressionStatistics][OT.rSquare],
- :root_mean_squared_error => @metadata[OT.regressionStatistics][OT.rootMeanSquaredError],
- :mean_absolute_error => @metadata[OT.regressionStatistics][OT.meanAbsoluteError],
- }
end
+ table
end
end
@@ -171,9 +164,9 @@ module OpenTox
@metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid, :accept => "application/x-yaml"}))
end
- # PENDING: creates summary as used for ToxCreate
- def summary( subjectid=nil )
- Validation.from_cv_statistics( @uri, subjectid ).summary
+ # returns a Validation object containing the statistics of the crossavlidation
+ def statistics( subjectid=nil )
+ Validation.from_cv_statistics( @uri, subjectid )
end
end