summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/algorithm.rb148
-rw-r--r--lib/feature.rb1
-rw-r--r--lib/model.rb128
-rw-r--r--lib/to-html.rb2
-rw-r--r--lib/validation.rb53
5 files changed, 259 insertions, 73 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 7fbe0dc..5b41cbf 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -164,11 +164,7 @@ module OpenTox
# @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
# @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
# @return [Hash] Hash with keys `:prediction, :confidence`
- def self.local_svm_regression(neighbors,params )
- sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values between query and neighbors
- conf = sims.inject{|sum,x| sum + x }
-
- # AM: Control log taking
+ def self.local_svm_regression(neighbors, params)
take_logs=true
neighbors.each do |n|
if (! n[:activity].nil?) && (n[:activity].to_f < 0.0)
@@ -180,57 +176,115 @@ module OpenTox
take_logs ? Math.log10(act.to_f) : act.to_f
end # activities of neighbors for supervised learning
- neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches
- gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
- if neighbor_matches.size == 0
- raise "No neighbors found"
- else
- # gram matrix
- (0..(neighbor_matches.length-1)).each do |i|
- gram_matrix[i] = [] unless gram_matrix[i]
- # upper triangle
- ((i+1)..(neighbor_matches.length-1)).each do |j|
- sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
- gram_matrix[i][j] = Algorithm.gauss(sim)
- gram_matrix[j] = [] unless gram_matrix[j]
- gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
- end
- gram_matrix[i][i] = 1.0
- end
+ sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
+ begin
+ prediction = local_svm(neighbors, acts, sims, "nu-svr", params)
+ prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f)
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
+ end
- #LOGGER.debug gram_matrix.to_yaml
- @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
- @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
- LOGGER.debug "Setting R data ..."
- # set data
- @r.gram_matrix = gram_matrix.flatten
- @r.n = neighbor_matches.size
- @r.y = acts
- @r.sims = sims
+ conf = sims.inject{|sum,x| sum + x }
+ confidence = conf/neighbors.size if neighbors.size > 0
+ {:prediction => prediction, :confidence => confidence}
+
+ end
- LOGGER.debug "Preparing R data ..."
- # prepare data
- @r.eval "y<-as.vector(y)"
- @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
- @r.eval "sims<-as.vector(sims)"
-
- # model + support vectors
- LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
- @r.eval "sv<-as.vector(SVindex(model))"
- @r.eval "sims<-sims[sv]"
- @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
- LOGGER.debug "Predicting ..."
- @r.eval "p<-predict(model,sims)[1,1]"
- prediction = 10**(@r.p.to_f) if take_logs
+ # Local support vector classification from neighbors
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @return [Hash] Hash with keys `:prediction, :confidence`
+ def self.local_svm_classification(neighbors, params)
+ acts = neighbors.collect do |n|
+ act = n[:activity]
+ end # activities of neighbors for supervised learning
+ acts_f = acts.collect {|v| v == true ? 1.0 : 0.0}
+ sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
+ begin
+ prediction = local_svm (neighbors, acts_f, sims, "C-bsvc", params)
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
- @r.quit # free R
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
end
+
+ conf = sims.inject{|sum,x| sum + x }
confidence = conf/neighbors.size if neighbors.size > 0
{:prediction => prediction, :confidence => confidence}
end
+
+ # Local support vector prediction from neighbors.
+ # Not to be called directly (use local_svm_regression or local_svm_classification.
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
+ # @param [Array] acts, activities for neighbors.
+ # @param [Array] sims, similarities for neighbors.
+ # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification).
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @return [Numeric] A prediction value.
+ def self.local_svm(neighbors, acts, sims, type, params)
+ neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches
+ gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
+ if neighbor_matches.size == 0
+ raise "No neighbors found."
+ else
+ # gram matrix
+ (0..(neighbor_matches.length-1)).each do |i|
+ gram_matrix[i] = [] unless gram_matrix[i]
+ # upper triangle
+ ((i+1)..(neighbor_matches.length-1)).each do |j|
+ sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
+ gram_matrix[i][j] = Algorithm.gauss(sim)
+ gram_matrix[j] = [] unless gram_matrix[j]
+ gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
+ end
+ gram_matrix[i][i] = 1.0
+ end
+
+ #LOGGER.debug gram_matrix.to_yaml
+ @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
+ @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
+ LOGGER.debug "Setting R data ..."
+ # set data
+ @r.gram_matrix = gram_matrix.flatten
+ @r.n = neighbor_matches.size
+ @r.y = acts
+ @r.sims = sims
+
+ begin
+ LOGGER.debug "Preparing R data ..."
+ # prepare data
+ @r.eval "y<-as.vector(y)"
+ @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
+ @r.eval "sims<-as.vector(sims)"
+
+ # model + support vectors
+ LOGGER.debug "Creating SVM model ..."
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)"
+ @r.eval "sv<-as.vector(SVindex(model))"
+ @r.eval "sims<-sims[sv]"
+ @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
+ LOGGER.debug "Predicting ..."
+ if type == "nu-svr"
+ @r.eval "p<-predict(model,sims)[1,1]"
+ elsif type == "C-bsvc"
+ @r.eval "p<-predict(model,sims)"
+ end
+ if type == "nu-svr"
+ prediction = @r.p
+ elsif type == "C-bsvc"
+ prediction = (@r.p.to_f == 1.0 ? true : false)
+ end
+ @r.quit # free R
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
+ end
+
+ end
+ prediction
+ end
+
end
module Substructure
diff --git a/lib/feature.rb b/lib/feature.rb
index b631e46..2f1ab6c 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -16,6 +16,7 @@ module OpenTox
feature
end
+
# provides feature type, possible types are "regression" or "classification"
# @return [String] feature type, unknown if OT.isA property is unknown/ not set
def feature_type
diff --git a/lib/model.rb b/lib/model.rb
index 048de85..998d2dc 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -23,7 +23,7 @@ module OpenTox
# Generic OpenTox model class for all API compliant services
class Generic
include Model
-
+
# Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error
# @param [String] uri Model URI
# @return [OpenTox::Model::Generic] Model instance
@@ -34,12 +34,12 @@ module OpenTox
raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0
model
end
-
- # provides feature type, possible types are "regression" or "classification"
- # @return [String] feature type, "unknown" if type could not be estimated
+
+ # provides feature type, possible types are "regression" or "classification"
+ # @return [String] feature type, "unknown" if type could not be estimated
def feature_type(subjectid=nil)
return @feature_type if @feature_type
-
+
# dynamically perform restcalls if necessary
load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
algorithm = OpenTox::Algorithm::Generic.find(@metadata[OT.algorithm], subjectid)
@@ -60,9 +60,9 @@ module OpenTox
raise "unknown model "+type_indicators.inspect unless @feature_type
@feature_type
end
-
+
end
-
+
# Lazy Structure Activity Relationship class
class Lazar
@@ -78,7 +78,7 @@ module OpenTox
else
super CONFIG[:services]["opentox-model"]
end
-
+
@metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
@features = []
@@ -178,9 +178,65 @@ module OpenTox
return @prediction_dataset if database_activity(subjectid)
- neighbors
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
+ if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget]
+ # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar
+ l = Array.new # larger
+ s = Array.new # smaller fraction
+ @fingerprints.each do |training_compound,training_features|
+ @activities[training_compound].each do |act|
+ case act.to_s
+ when "false"
+ l << training_compound
+ when "true"
+ s << training_compound
+ else
+ LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached."
+ end
+ end
+ end
+ if s.size > l.size then
+ l,s = s,l # happy swapping
+ LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}."
+ end
+ # determine ratio
+ modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest
+ LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}."
+
+ # AM: Balanced predictions
+ addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round
+ slack = modulo[1].divmod(addon)[1] # what remains for the last round
+ position = 0
+ predictions = Array.new
+
+ prediction_best=nil
+ neighbors_best=nil
+
+ begin
+ for i in 1..modulo[0] do
+ (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction
+ LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}."
+ neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part
+ prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
+ if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs
+ prediction_best=prediction
+ neighbors_best=@neighbors
+ end
+ position = position + lr_size
+ end
+ rescue Exception => e
+ LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message
+ end
+
+ prediction=prediction_best
+ @neighbors=neighbors_best
+ ### END AM balanced predictions
+
+ else # regression case: no balancing
+ neighbors
+ prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
+ end
+
prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s)
# TODO: fix dependentVariable
@prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri
@@ -269,27 +325,55 @@ module OpenTox
end
# Find neighbors and store them as object variable
- def neighbors
-
+ def neighbors_balanced(s, l, start, offset)
@compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
@neighbors = []
- @fingerprints.each do |training_compound,training_features|
- sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
- if sim > @min_sim
- @activities[training_compound].each do |act|
- @neighbors << {
- :compound => training_compound,
- :similarity => sim,
- :features => training_features,
- :activity => act
- }
+ begin
+ #@fingerprints.each do |training_compound,training_features| # AM: this is original by CH
+ [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset
+ training_features = @fingerprints[training_compound]
+ sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
+ if sim > @min_sim
+ @activities[training_compound].each do |act|
+ this_neighbor = {
+ :compound => training_compound,
+ :similarity => sim,
+ :features => training_features,
+ :activity => act
+ }
+ @neighbors << this_neighbor
+ end
end
end
+ rescue Exception => e
+ LOGGER.error "BLAZAR failed in neighbors: "+e.class.to_s+": "+e.message
end
end
+
+ # Find neighbors and store them as object variable
+ def neighbors
+
+ @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
+
+ @neighbors = []
+ @fingerprints.each do |training_compound,training_features|
+ sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
+ if sim > @min_sim
+ @activities[training_compound].each do |act|
+ @neighbors << {
+ :compound => training_compound,
+ :similarity => sim,
+ :features => training_features,
+ :activity => act
+ }
+ end
+ end
+ end
+ end
+
# Find database activities and store them in @prediction_dataset
# @return [Boolean] true if compound has databasse activities, false if not
def database_activity(subjectid)
diff --git a/lib/to-html.rb b/lib/to-html.rb
index 6785974..66a3e74 100644
--- a/lib/to-html.rb
+++ b/lib/to-html.rb
@@ -1,5 +1,5 @@
-OT_LOGO = "http://opentox.informatik.uni-freiburg.de/ot-logo.png"
+OT_LOGO = File.join(CONFIG[:services]["opentox-validation"],"resources/ot-logo.png")
class String
diff --git a/lib/validation.rb b/lib/validation.rb
index d58d36e..d7a337c 100644
--- a/lib/validation.rb
+++ b/lib/validation.rb
@@ -65,7 +65,7 @@ module OpenTox
def summary
if @metadata[OT.classificationStatistics]
res = {
- :nr_predictions => @metadata[OT.numInstances] - @metadata[OT.numUnpredicted],
+ :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i,
:correct_predictions => @metadata[OT.classificationStatistics][OT.percentCorrect],
:weighted_area_under_roc => @metadata[OT.classificationStatistics][OT.weightedAreaUnderRoc],
}
@@ -83,7 +83,7 @@ module OpenTox
res
elsif @metadata[OT.regressionStatistics]
{
- :nr_predictions => @metadata[OT.numInstances] - @metadata[OT.numUnpredicted],
+ :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i,
:r_square => @metadata[OT.regressionStatistics][OT.rSquare],
:root_mean_squared_error => @metadata[OT.regressionStatistics][OT.rootMeanSquaredError],
:mean_absolute_error => @metadata[OT.regressionStatistics][OT.meanAbsoluteError],
@@ -198,7 +198,6 @@ module OpenTox
# @param [String,optional] subjectid
# @return [OpenTox::CrossvalidationReport]
def self.find( uri, subjectid=nil )
- # PENDING load report data?
OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid})
rep = CrossvalidationReport.new(uri)
rep.load_metadata( subjectid )
@@ -227,6 +226,54 @@ module OpenTox
end
end
+
+ class AlgorithmComparisonReport
+ include OpenTox
+
+ # finds AlgorithmComparisonReport via uri, raises error if not found
+ # @param [String] uri
+ # @param [String,optional] subjectid
+ # @return [OpenTox::CrossvalidationReport]
+ def self.find( uri, subjectid=nil )
+ OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid})
+ rep = AlgorithmComparisonReport.new(uri)
+ rep.load_metadata( subjectid )
+ rep
+ end
+
+ # finds AlgorithmComparisonReport for a particular crossvalidation
+ # @param [String] crossvalidation uri
+ # @param [String,optional] subjectid
+ # @return [OpenTox::AlgorithmComparisonReport] nil if no report found
+ def self.find_for_crossvalidation( crossvalidation_uri, subjectid=nil )
+ uris = RestClientWrapper.get(File.join(CONFIG[:services]["opentox-validation"],
+ "/report/algorithm_comparison?crossvalidation="+crossvalidation_uri), {:subjectid => subjectid}).chomp.split("\n")
+ uris.size==0 ? nil : AlgorithmComparisonReport.new(uris[-1])
+ end
+
+ # creates a crossvalidation report via crossvalidation
+ # @param [Hash] crossvalidation uri_hash, see example
+ # @param [String,optional] subjectid
+ # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
+ # @return [OpenTox::AlgorithmComparisonReport]
+ # example for hash:
+ # { :lazar-bbrc => [ http://host/validation/crossvalidation/x1, http://host/validation/crossvalidation/x2 ],
+ # :lazar-last => [ http://host/validation/crossvalidation/xy, http://host/validation/crossvalidation/xy ] }
+ def self.create( crossvalidation_uri_hash, subjectid=nil, waiting_task=nil )
+ identifier = []
+ validation_uris = []
+ crossvalidation_uri_hash.each do |id, uris|
+ uris.each do |uri|
+ identifier << id
+ validation_uris << uri
+ end
+ end
+ uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-validation"],"/report/algorithm_comparison"),
+ { :validation_uris => validation_uris.join(","), :identifier => identifier.join(","), :subjectid => subjectid }, {}, waiting_task )
+ AlgorithmComparisonReport.new(uri)
+ end
+ end
+
class QMRFReport
include OpenTox