diff options
author | Andreas Maunz <andreas@maunz.de> | 2011-07-19 16:27:53 +0200 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2011-07-19 16:27:53 +0200 |
commit | 5d74d060687cb374269aa152c829ba28fbccf506 (patch) | |
tree | e9c913a565878159e42a93df19bbba8b0c06a06e | |
parent | b52a34f062fc4ad5cacf403e88861b24c3117f91 (diff) | |
parent | c6261ec85b8cf8d7c80b4718a927adeade1a127b (diff) |
Merge branch 'development' into reg_conf
Conflicts:
Rakefile
lib/algorithm.rb
lib/model.rb
-rw-r--r-- | Rakefile | 6 | ||||
-rw-r--r-- | lib/algorithm.rb | 49 | ||||
-rw-r--r-- | lib/model.rb | 123 | ||||
-rw-r--r-- | lib/validation.rb | 49 |
4 files changed, 67 insertions, 160 deletions
@@ -8,7 +8,7 @@ begin gem.summary = %Q{Ruby wrapper for the OpenTox REST API} gem.description = %Q{Ruby wrapper for the OpenTox REST API (http://www.opentox.org)} gem.email = "helma@in-silico.ch" - gem.homepage = "http://github.com/helma/opentox-ruby" + gem.homepage = "http://github.com/opentox/opentox-ruby" gem.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler"] # dependencies with versions gem.add_dependency "sinatra", "=1.2.6" @@ -16,7 +16,7 @@ begin gem.add_dependency "sinatra-respond_to", "=0.7.0" gem.add_dependency "sinatra-static-assets", "=0.5.0" gem.add_dependency "rest-client", "=1.6.1" - gem.add_dependency "rack", "=1.3.0" + gem.add_dependency "rack", "=1.3.1" gem.add_dependency "rack-contrib", "=1.1.0" gem.add_dependency "rack-flash", "=0.1.1" gem.add_dependency "nokogiri", "=1.4.4" @@ -44,6 +44,8 @@ begin gem.add_dependency "dm-sqlite-adapter", "=1.1.0" gem.add_dependency "ruby-plot", "=0.5.0" gem.add_dependency "gsl", "=1.14.7" + gem.add_dependency "statsample", "=1.1.0" + #gem.add_dependency "statsample-optimization", "=2.1.0" gem.add_development_dependency 'jeweler' gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore'] diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 43845fb..cfca069 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -205,21 +205,17 @@ module OpenTox raise "No neighbors found." unless params[:neighbors].size>0 begin - props = params[:prop_kernel] ? get_props(params) : nil acts = params[:neighbors].collect { |n| act = n[:activity].to_f } sims = params[:neighbors].collect { |n| Algorithm.gauss(n[:similarity]) } - LOGGER.debug "Local MLR (Propositionalization / GSL)." prediction = mlr( {:n_prop => props[0], :q_prop => props[1], :sims => sims, :acts => acts} ) transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})" prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." - sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - conf = sims.inject{|sum,x| sum + x } - confidence = conf/params[:neighbors].size if params[:neighbors].size > 0 + params[:conf_stdev] = "false" if params[:conf_stdev].nil? + confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) {:prediction => prediction, :confidence => confidence} - rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" end @@ -326,19 +322,9 @@ module OpenTox transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})" prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." - sim_median = Algorithm.median(sims) - if sim_median.nil? - confidence = nil - LOGGER.debug "dv ------------ sim_median is nil" - else - standard_deviation = acts.std_dev - confidence = (sim_median*Math.exp(-1*standard_deviation)).abs - if confidence.nan? - confidence = nil - end - end - LOGGER.debug "Confidence is: '" + confidence.to_s + "'." - return {:prediction => prediction, :confidence => confidence} + params[:conf_stdev] = "false" if params[:conf_stdev].nil? + confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) + {:prediction => prediction, :confidence => confidence} rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" @@ -502,6 +488,29 @@ module OpenTox prediction end + # Get confidence for regression, with standard deviation of neighbor activity if conf_stdev is set. + # @param[Hash] Required keys: :sims, :acts, :neighbors, :conf_stdev + # @return[Float] Confidence + def self.get_confidence(params) + if params[:conf_stdev] == "true" + sim_median = Algorithm.median(params[:sims]) + if sim_median.nil? + confidence = nil + else + standard_deviation = params[:acts].std_dev + confidence = (sim_median*Math.exp(-1*standard_deviation)).abs + if confidence.nan? + confidence = nil + end + end + else + conf = params[:sims].inject{|sum,x| sum + x } + confidence = conf/params[:neighbors].size + end + LOGGER.debug "Confidence is: '" + confidence.to_s + "'." + return confidence + end + # Get X and Y size of a nested Array (Matrix) def self.get_sizes(matrix) begin @@ -840,7 +849,7 @@ module OpenTox end def variance m = mean - sum { |i| ( i - m )**2 } / size + sum { |i| ( i - m )**2 } / (size-1).to_f end def std_dev Math.sqrt(variance) diff --git a/lib/model.rb b/lib/model.rb index 825f697..13212ee 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -91,7 +91,7 @@ module OpenTox include Algorithm include Model - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :balanced + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :transform def initialize(uri=nil) @@ -116,7 +116,7 @@ module OpenTox @min_sim = 0.3 @prop_kernel = false - @balanced = false + @transform = { "class" => "NOP" } end @@ -212,78 +212,17 @@ module OpenTox unless database_activity(subjectid) # adds database activity to @prediction_dataset - if @balanced && OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification" - # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar - l = Array.new # larger - s = Array.new # smaller fraction + neighbors + prediction = eval("#{@prediction_algorithm} ( { :neighbors => @neighbors, + :compound => @compound, + :features => @features, + :p_values => @p_values, + :fingerprints => @fingerprints, + :similarity_algorithm => @similarity_algorithm, + :prop_kernel => @prop_kernel, + :value_map => @value_map, + :transform => @transform } ) ") - raise "no fingerprints in model" if @fingerprints.size==0 - - @fingerprints.each do |training_compound,training_features| - @activities[training_compound].each do |act| - case act.to_s - when "0" - l << training_compound - when "1" - s << training_compound - else - LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached (supports only two classes)." - end - end - end - if s.size > l.size then - l,s = s,l # happy swapping - LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}." - end - # determine ratio - modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest - LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}." - - # AM: Balanced predictions - addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round - slack = (addon!=0 ? modulo[1].divmod(addon)[1] : 0) # what remains for the last round - position = 0 - predictions = Array.new - - prediction_best=nil - neighbors_best=nil - - begin - for i in 1..modulo[0] do - (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction - LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." - neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") ) - props = get_props - else - props = nil - end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props)") - if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs - prediction_best=prediction - neighbors_best=@neighbors - end - position = position + lr_size - end - rescue Exception => e - LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message - end - - prediction=prediction_best - @neighbors=neighbors_best - ### END AM balanced predictions - - else # AM: no balancing or regression - LOGGER.info "LAZAR: Unbalanced." - neighbors - if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") ) - props = get_props - else - props = nil - end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props)") - end - value_feature_uri = File.join( @uri, "predicted", "value") confidence_feature_uri = File.join( @uri, "predicted", "confidence") @@ -356,43 +295,7 @@ module OpenTox @prediction_dataset end - # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) - # Same for the vector describing the query compound - def get_props - matrix = Array.new - begin - @neighbors.each do |n| - n = n[:compound] - row = [] - @features.each do |f| - if ! @fingerprints[n].nil? - row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f]) - else - row << 0.0 - end - end - matrix << row - end - row = [] - @features.each do |f| - row << (@compound.match([f]).size == 0 ? 0.0 : @p_values[f]) - end - rescue Exception => e - LOGGER.debug "get_props failed with '" + $! + "'" - end - [ matrix, row ] - end - - # Find neighbors and store them as object variable, access only a subset of compounds for that. - def neighbors_balanced(s, l, start, offset) - @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm - @neighbors = [] - [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset - training_features = @fingerprints[training_compound] - add_neighbor training_features, training_compound - end - - end + # Find neighbors and store them as object variable, access all compounds for that. def neighbors diff --git a/lib/validation.rb b/lib/validation.rb index 3e8367c..646b076 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -85,34 +85,27 @@ module OpenTox @metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid, :accept => "application/x-yaml"})) end - # PENDING: creates summary as used for ToxCreate - def summary - if @metadata[OT.classificationStatistics] - res = { - :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i, - :correct_predictions => @metadata[OT.classificationStatistics][OT.percentCorrect], - :weighted_area_under_roc => @metadata[OT.classificationStatistics][OT.weightedAreaUnderRoc], - } - @metadata[OT.classificationStatistics][OT.classValueStatistics].each do |s| - if s[OT.classValue].to_s=="true" - res[:true_positives] = s[OT.numTruePositives] - res[:false_positives] = s[OT.numFalsePositives] - res[:true_negatives] = s[OT.numTrueNegatives] - res[:false_negatives] = s[OT.numFalseNegatives] - res[:sensitivity] = s[OT.truePositiveRate] - res[:specificity] = s[OT.trueNegativeRate] - break + # returns confusion matrix as array, predicted values are in rows + # example: + # [[nil,"active","moderate","inactive"],["active",1,3,99],["moderate",4,2,8],["inactive",3,8,6]] + # -> 99 inactive compounds have been predicted as active + def confusion_matrix + raise "no classification statistics, probably a regression valdiation" unless @metadata[OT.classificationStatistics] + matrix = @metadata[OT.classificationStatistics][OT.confusionMatrix][OT.confusionMatrixCell] + values = matrix.collect{|cell| cell[OT.confusionMatrixPredicted]}.uniq + table = [[nil]+values] + values.each do |c| + table << [c] + values.each do |r| + matrix.each do |cell| + if cell[OT.confusionMatrixPredicted]==c and cell[OT.confusionMatrixActual]==r + table[-1] << cell[OT.confusionMatrixValue].to_f + break + end end end - res - elsif @metadata[OT.regressionStatistics] - { - :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i, - :r_square => @metadata[OT.regressionStatistics][OT.rSquare], - :root_mean_squared_error => @metadata[OT.regressionStatistics][OT.rootMeanSquaredError], - :mean_absolute_error => @metadata[OT.regressionStatistics][OT.meanAbsoluteError], - } end + table end end @@ -171,9 +164,9 @@ module OpenTox @metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid, :accept => "application/x-yaml"})) end - # PENDING: creates summary as used for ToxCreate - def summary( subjectid=nil ) - Validation.from_cv_statistics( @uri, subjectid ).summary + # returns a Validation object containing the statistics of the crossavlidation + def statistics( subjectid=nil ) + Validation.from_cv_statistics( @uri, subjectid ) end end |