diff options
author | dv <dv@dv.de> | 2011-07-20 09:38:15 +0200 |
---|---|---|
committer | dv <dv@dv.de> | 2011-07-20 09:38:15 +0200 |
commit | afaab75c94d4d87aa985b5ef6a21bf724727c21f (patch) | |
tree | 803b045a0c35d34d681df4cfdf11e650a18c7bb6 | |
parent | a688cb99ace5cbfd8802951f57c46f1eb1926a0b (diff) | |
parent | 5a86ca682633dc6865729a75be0046dff2460684 (diff) |
Merge branch 'development' into support
Conflicts:
lib/algorithm.rb
lib/model.rb
-rw-r--r-- | Rakefile | 6 | ||||
-rw-r--r-- | lib/algorithm.rb | 62 | ||||
-rw-r--r-- | lib/validation.rb | 49 |
3 files changed, 77 insertions, 40 deletions
@@ -8,7 +8,7 @@ begin gem.summary = %Q{Ruby wrapper for the OpenTox REST API} gem.description = %Q{Ruby wrapper for the OpenTox REST API (http://www.opentox.org)} gem.email = "helma@in-silico.ch" - gem.homepage = "http://github.com/helma/opentox-ruby" + gem.homepage = "http://github.com/opentox/opentox-ruby" gem.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler"] # dependencies with versions gem.add_dependency "sinatra", "=1.2.6" @@ -16,7 +16,7 @@ begin gem.add_dependency "sinatra-respond_to", "=0.7.0" gem.add_dependency "sinatra-static-assets", "=0.5.0" gem.add_dependency "rest-client", "=1.6.1" - gem.add_dependency "rack", "=1.3.0" + gem.add_dependency "rack", "=1.3.1" gem.add_dependency "rack-contrib", "=1.1.0" gem.add_dependency "rack-flash", "=0.1.1" gem.add_dependency "nokogiri", "=1.4.4" @@ -44,6 +44,8 @@ begin gem.add_dependency "dm-sqlite-adapter", "=1.1.0" gem.add_dependency "ruby-plot", "=0.5.0" gem.add_dependency "gsl", "=1.14.7" + gem.add_dependency "statsample", "=1.1.0" + #gem.add_dependency "statsample-optimization", "=2.1.0" gem.add_development_dependency 'jeweler' gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore'] diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 3170efb..528c426 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -230,21 +230,17 @@ module OpenTox raise "No neighbors found." unless params[:neighbors].size>0 begin - props = params[:prop_kernel] ? get_props(params) : nil acts = params[:neighbors].collect { |n| act = n[:activity].to_f } sims = params[:neighbors].collect { |n| Algorithm.gauss(n[:similarity]) } - LOGGER.debug "Local MLR (Propositionalization / GSL)." prediction = mlr( {:n_prop => props[0], :q_prop => props[1], :sims => sims, :acts => acts} ) transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})" prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." - sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - conf = sims.inject{|sum,x| sum + x } - confidence = conf/params[:neighbors].size if params[:neighbors].size > 0 + params[:conf_stdev] = "false" if params[:conf_stdev].nil? + confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) {:prediction => prediction, :confidence => confidence} - rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" end @@ -351,8 +347,8 @@ module OpenTox transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})" prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." - conf = sims.inject{|sum,x| sum + x } - confidence = conf/params[:neighbors].size + params[:conf_stdev] = "false" if params[:conf_stdev].nil? + confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) {:prediction => prediction, :confidence => confidence} rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" @@ -517,6 +513,29 @@ module OpenTox prediction end + # Get confidence for regression, with standard deviation of neighbor activity if conf_stdev is set. + # @param[Hash] Required keys: :sims, :acts, :neighbors, :conf_stdev + # @return[Float] Confidence + def self.get_confidence(params) + if params[:conf_stdev] == "true" + sim_median = Algorithm.median(params[:sims]) + if sim_median.nil? + confidence = nil + else + standard_deviation = params[:acts].std_dev + confidence = (sim_median*Math.exp(-1*standard_deviation)).abs + if confidence.nan? + confidence = nil + end + end + else + conf = params[:sims].inject{|sum,x| sum + x } + confidence = conf/params[:neighbors].size + end + LOGGER.debug "Confidence is: '" + confidence.to_s + "'." + return confidence + end + # Get X and Y size of a nested Array (Matrix) def self.get_sizes(matrix) begin @@ -545,7 +564,7 @@ module OpenTox row = [] params[:features].each do |f| if ! params[:fingerprints][n].nil? - row << (params[:fingerprints][n].include?(f) ? 0.0 : params[:p_values][f]) + row << (params[:fingerprints][n].include?(f) ? params[:p_values][f] : 0.0) else row << 0.0 end @@ -778,9 +797,14 @@ module OpenTox Math.exp(-(d*d)/(2*sigma*sigma)) end + # For symbolic features + # @param [Array] Array to test, must indicate non-occurrence with 0. + # @return [Boolean] Whether the feature is singular or non-occurring or present everywhere. def self.isnull_or_singular?(array) nr_zeroes = array.count(0) - return ((nr_zeroes == array.size) || (nr_zeroes == 0) || (nr_zeroes == 1) || (nr_zeroes == array.size-1) ) + return (nr_zeroes == array.size) || # remove non-occurring feature + (nr_zeroes == array.size-1) || # remove singular feature + (nr_zeroes == 0) # also remove feature present everywhere end # Median of an array @@ -865,6 +889,24 @@ module OpenTox p_sum end + # Adds variance, mean and standard deviation calculation to Array class + module Variance + def sum(&blk) + map(&blk).inject { |sum, element| sum + element } + end + def mean + (sum.to_f / size.to_f) + end + def variance + m = mean + sum { |i| ( i - m )**2 } / (size-1).to_f + end + def std_dev + Math.sqrt(variance) + end + end + Array.send :include, Variance + end end diff --git a/lib/validation.rb b/lib/validation.rb index 3e8367c..646b076 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -85,34 +85,27 @@ module OpenTox @metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid, :accept => "application/x-yaml"})) end - # PENDING: creates summary as used for ToxCreate - def summary - if @metadata[OT.classificationStatistics] - res = { - :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i, - :correct_predictions => @metadata[OT.classificationStatistics][OT.percentCorrect], - :weighted_area_under_roc => @metadata[OT.classificationStatistics][OT.weightedAreaUnderRoc], - } - @metadata[OT.classificationStatistics][OT.classValueStatistics].each do |s| - if s[OT.classValue].to_s=="true" - res[:true_positives] = s[OT.numTruePositives] - res[:false_positives] = s[OT.numFalsePositives] - res[:true_negatives] = s[OT.numTrueNegatives] - res[:false_negatives] = s[OT.numFalseNegatives] - res[:sensitivity] = s[OT.truePositiveRate] - res[:specificity] = s[OT.trueNegativeRate] - break + # returns confusion matrix as array, predicted values are in rows + # example: + # [[nil,"active","moderate","inactive"],["active",1,3,99],["moderate",4,2,8],["inactive",3,8,6]] + # -> 99 inactive compounds have been predicted as active + def confusion_matrix + raise "no classification statistics, probably a regression valdiation" unless @metadata[OT.classificationStatistics] + matrix = @metadata[OT.classificationStatistics][OT.confusionMatrix][OT.confusionMatrixCell] + values = matrix.collect{|cell| cell[OT.confusionMatrixPredicted]}.uniq + table = [[nil]+values] + values.each do |c| + table << [c] + values.each do |r| + matrix.each do |cell| + if cell[OT.confusionMatrixPredicted]==c and cell[OT.confusionMatrixActual]==r + table[-1] << cell[OT.confusionMatrixValue].to_f + break + end end end - res - elsif @metadata[OT.regressionStatistics] - { - :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i, - :r_square => @metadata[OT.regressionStatistics][OT.rSquare], - :root_mean_squared_error => @metadata[OT.regressionStatistics][OT.rootMeanSquaredError], - :mean_absolute_error => @metadata[OT.regressionStatistics][OT.meanAbsoluteError], - } end + table end end @@ -171,9 +164,9 @@ module OpenTox @metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid, :accept => "application/x-yaml"})) end - # PENDING: creates summary as used for ToxCreate - def summary( subjectid=nil ) - Validation.from_cv_statistics( @uri, subjectid ).summary + # returns a Validation object containing the statistics of the crossavlidation + def statistics( subjectid=nil ) + Validation.from_cv_statistics( @uri, subjectid ) end end |