From f999b42afbb4387d99b2c91a79f84654408cbab1 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 10 May 2011 08:29:27 +0200 Subject: Added bal --- lib/model.rb | 102 ++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 80 insertions(+), 22 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index 048de85..9442897 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -23,7 +23,7 @@ module OpenTox # Generic OpenTox model class for all API compliant services class Generic include Model - + # Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error # @param [String] uri Model URI # @return [OpenTox::Model::Generic] Model instance @@ -34,12 +34,12 @@ module OpenTox raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0 model end - - # provides feature type, possible types are "regression" or "classification" - # @return [String] feature type, "unknown" if type could not be estimated + + # provides feature type, possible types are "regression" or "classification" + # @return [String] feature type, "unknown" if type could not be estimated def feature_type(subjectid=nil) return @feature_type if @feature_type - + # dynamically perform restcalls if necessary load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri) algorithm = OpenTox::Algorithm::Generic.find(@metadata[OT.algorithm], subjectid) @@ -60,9 +60,9 @@ module OpenTox raise "unknown model "+type_indicators.inspect unless @feature_type @feature_type end - + end - + # Lazy Structure Activity Relationship class class Lazar @@ -78,7 +78,7 @@ module OpenTox else super CONFIG[:services]["opentox-model"] end - + @metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar") @features = [] @@ -178,8 +178,59 @@ module OpenTox return @prediction_dataset if database_activity(subjectid) - neighbors - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + + # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar + l = Array.new # larger + s = Array.new # smaller fraction + if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget] + @fingerprints.each do |training_compound,training_features| + @activities[training_compound].each do |act| + case act.to_s + when "false" + l << training_compound + when "true" + s << training_compound + else + LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached." + end + end + end + if s.size > l.size then + l,s = s,l # happy swapping + LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}." + end + # determine ratio + modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest + LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}." + end + + # AM: Balanced predictions + addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round + slack = modulo[1].divmod(addon)[1] # what remains for the last round + position = 0 + predictions = Array.new + + @collect_neighbors = {} + predictions = [] + for i in 1..modulo[0] do + (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction + LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." + neighbors(s, l, position, lr_size) # get ratio fraction of larger part + predictions << eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + position = position + lr_size + end + @neighbors = @collect_neighbors.values # AM: get all neighbors + + prediction={} + begin + p_sum=0.0 + predictions.each do |p| + p[:prediction] == false ? p_sum = p_sum - p[:confidence].to_f : p_sum = p_sum + p[:confidence].to_f + end + prediction = { :prediction => (p_sum<0.0 ? false : true), :confidence => p_sum.abs/predictions.size } # AM: get mean + rescue Exception => e + LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message + end prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) # TODO: fix dependentVariable @@ -269,23 +320,30 @@ module OpenTox end # Find neighbors and store them as object variable - def neighbors - + def neighbors(s=nil, l=nil, start=nil, offset=nil) @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] - @fingerprints.each do |training_compound,training_features| - sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") - if sim > @min_sim - @activities[training_compound].each do |act| - @neighbors << { - :compound => training_compound, - :similarity => sim, - :features => training_features, - :activity => act - } + begin + #@fingerprints.each do |training_compound,training_features| # AM: this is original by CH + [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset + training_features = @fingerprints[training_compound] + sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") + if sim > @min_sim + @activities[training_compound].each do |act| + this_neighbor = { + :compound => training_compound, + :similarity => sim, + :features => training_features, + :activity => act + } + @neighbors << this_neighbor + @collect_neighbors[training_compound] = this_neighbor + end end end + rescue Exception => e + LOGGER.error "BLAZAR failed in neighbors: "+e.class.to_s+": "+e.message end end -- cgit v1.2.3 From 1d8c7d6dfa513cd7c8ad642248db24e0d1e3a199 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 10 May 2011 09:02:38 +0200 Subject: Using Best prediction only --- lib/model.rb | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index 9442897..a4d6d85 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -210,28 +210,28 @@ module OpenTox position = 0 predictions = Array.new - @collect_neighbors = {} - predictions = [] + prediction_best=nil + neighbors_best=nil + + begin for i in 1..modulo[0] do (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors(s, l, position, lr_size) # get ratio fraction of larger part - predictions << eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + if prediction[:confidence].abs > prediction_best[:confidence].abs || prediction_best.nil? + prediction_best=prediction + neighbors_best=@neighbors + end position = position + lr_size end - @neighbors = @collect_neighbors.values # AM: get all neighbors - - prediction={} - begin - p_sum=0.0 - predictions.each do |p| - p[:prediction] == false ? p_sum = p_sum - p[:confidence].to_f : p_sum = p_sum + p[:confidence].to_f - end - prediction = { :prediction => (p_sum<0.0 ? false : true), :confidence => p_sum.abs/predictions.size } # AM: get mean rescue Exception => e LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message end + prediction=prediction_best + @neighbors=neighbors_best + prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) # TODO: fix dependentVariable @prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri @@ -338,7 +338,6 @@ module OpenTox :activity => act } @neighbors << this_neighbor - @collect_neighbors[training_compound] = this_neighbor end end end -- cgit v1.2.3 From 2af934ddc033d7d8a737d88eb4ee175955ad4a0a Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 10 May 2011 11:11:34 +0200 Subject: Fixed first prediction case --- lib/model.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/model.rb b/lib/model.rb index a4d6d85..3d64f32 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -219,7 +219,7 @@ module OpenTox LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors(s, l, position, lr_size) # get ratio fraction of larger part prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") - if prediction[:confidence].abs > prediction_best[:confidence].abs || prediction_best.nil? + if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction neighbors_best=@neighbors end -- cgit v1.2.3 From 305f3caa692dd977df07cbc5ec195521e2a135fa Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 10 May 2011 16:43:05 +0200 Subject: Added Gauss patch --- lib/algorithm.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 008e7fe..abf10d4 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -165,7 +165,7 @@ module OpenTox # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @return [Hash] Hash with keys `:prediction, :confidence` def self.local_svm_regression(neighbors,params ) - sims = neighbors.collect{ |n| n[:similarity] } # similarity values between query and neighbors + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values between query and neighbors conf = sims.inject{|sum,x| sum + x } acts = neighbors.collect do |n| act = n[:activity] -- cgit v1.2.3 From 524a68d8429b8adc16bd8073774f9305cb7138a0 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 11 May 2011 12:13:37 +0200 Subject: Added balance patch --- lib/parser.rb | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/lib/parser.rb b/lib/parser.rb index db746c1..dc5f675 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -348,16 +348,27 @@ module OpenTox when OT.NominalFeature case value.to_s when TRUE_REGEXP - @dataset.add(compound.uri, feature, true ) + #@dataset.add(compound.uri, feature, true ) + val=true when FALSE_REGEXP - @dataset.add(compound.uri, feature, false ) + #@dataset.add(compound.uri, feature, false ) + val=false end when OT.NumericFeature - @dataset.add compound.uri, feature, value.to_f + #@dataset.add compound.uri, feature, value.to_f + val = value.to_f when OT.StringFeature - @dataset.add compound.uri, feature, value.to_s + #@dataset.add compound.uri, feature, value.to_s + val = value.to_s @activity_errors << smiles+", "+row.join(", ") end + if val!=nil + @dataset.add(compound.uri, feature, val) + if type!=OT.NumericFeature + @dataset.features[feature][OT.acceptValue] = [] unless @dataset.features[feature][OT.acceptValue] + @dataset.features[feature][OT.acceptValue] << val.to_s unless @dataset.features[feature][OT.acceptValue].include?(val.to_s) + end + end end end -- cgit v1.2.3 From 03a87a832162ccf17b6f0ebfda126e3622530ca3 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 11 May 2011 15:56:55 +0200 Subject: Further Martin patch --- lib/feature.rb | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/lib/feature.rb b/lib/feature.rb index f6e2dfd..eb0b869 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -19,14 +19,7 @@ module OpenTox # provides domain (possible target values) of classification feature # @return [Array] list with possible target values def domain - if metadata[OT.acceptValue] - raise "accept value found, remove hack and implement correctly" - else - if @uri=~/feature\/26221/ || @uri=~/feature\/221726/ - return ["mutagen" , "nonmutagen"] - end - return [true, false] - end + return [true, false] end # provides feature type, possible types are "regression" or "classification" -- cgit v1.2.3 From b944a21b557b9628b3b6f7be990534b2f86f0884 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 17 May 2011 10:47:58 +0200 Subject: fix validation statistics: convert num predictions to integer --- lib/validation.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/validation.rb b/lib/validation.rb index d58d36e..1a2497b 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -65,7 +65,7 @@ module OpenTox def summary if @metadata[OT.classificationStatistics] res = { - :nr_predictions => @metadata[OT.numInstances] - @metadata[OT.numUnpredicted], + :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i, :correct_predictions => @metadata[OT.classificationStatistics][OT.percentCorrect], :weighted_area_under_roc => @metadata[OT.classificationStatistics][OT.weightedAreaUnderRoc], } @@ -83,7 +83,7 @@ module OpenTox res elsif @metadata[OT.regressionStatistics] { - :nr_predictions => @metadata[OT.numInstances] - @metadata[OT.numUnpredicted], + :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i, :r_square => @metadata[OT.regressionStatistics][OT.rSquare], :root_mean_squared_error => @metadata[OT.regressionStatistics][OT.rootMeanSquaredError], :mean_absolute_error => @metadata[OT.regressionStatistics][OT.meanAbsoluteError], -- cgit v1.2.3 From 37a066e4cfe102d2e4edfaf3b4b9787bcbb3206f Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 17 May 2011 16:08:25 +0200 Subject: Initial version --- lib/algorithm.rb | 62 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 7fbe0dc..16372ea 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -164,11 +164,7 @@ module OpenTox # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_regression(neighbors,params ) - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values between query and neighbors - conf = sims.inject{|sum,x| sum + x } - - # AM: Control log taking + def self.local_svm_regression(neighbors, params) take_logs=true neighbors.each do |n| if (! n[:activity].nil?) && (n[:activity].to_f < 0.0) @@ -180,10 +176,51 @@ module OpenTox take_logs ? Math.log10(act.to_f) : act.to_f end # activities of neighbors for supervised learning - neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + prediction = local_sv_machine (neighbors, acts, sims, "svr", params) + prediction = take_logs ? 10**(prediction.to_f) : prediction.to_f + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + + conf = sims.inject{|sum,x| sum + x } + confidence = conf/neighbors.size if neighbors.size > 0 + {:prediction => prediction, :confidence => confidence} + + end + + # Local support vector classification from neighbors + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @return [Hash] Hash with keys `:prediction, :confidence` + def self.local_svm_classification(neighbors, params) + acts = neighbors.collect do |n| + act = n[:activity] + end # activities of neighbors for supervised learning + + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + prediction = local_sv_machine (neighbors, acts, sims, "svc", params) + prediction = prediction.to_f + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + + conf = sims.inject{|sum,x| sum + x } + confidence = conf/neighbors.size if neighbors.size > 0 + {:prediction => prediction, :confidence => confidence} + + end + + end + + # Local support vector prediction. Not to be called directly (use local_svm_regression or local_svm_classification. + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Array] acts, activities for neighbors. + # @param [Array] sims, similarities for neighbors. + # @param [String] type, one of "svr" (regression) or "svc" (classification). + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @return [Numeric] A prediction value. + def self.local_sv_machine(neighbors, acts, sims, type, params) + neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel if neighbor_matches.size == 0 - raise "No neighbors found" + raise "No neighbors found." else # gram matrix (0..(neighbor_matches.length-1)).each do |i| @@ -216,21 +253,16 @@ module OpenTox # model + support vectors LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)" + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)" @r.eval "sv<-as.vector(SVindex(model))" @r.eval "sims<-sims[sv]" @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" LOGGER.debug "Predicting ..." @r.eval "p<-predict(model,sims)[1,1]" - prediction = 10**(@r.p.to_f) if take_logs - LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + prediction = @r.p @r.quit # free R end - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence} - - end - + prediction end module Substructure -- cgit v1.2.3 From 0e49be4d0ed4752d5988ed651d813f001e42c05b Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 17 May 2011 16:35:20 +0200 Subject: Fixed method scope --- lib/algorithm.rb | 110 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 56 insertions(+), 54 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 16372ea..0a5b09f 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -177,7 +177,7 @@ module OpenTox end # activities of neighbors for supervised learning sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - prediction = local_sv_machine (neighbors, acts, sims, "svr", params) + prediction = local_svm(neighbors, acts, sims, "svr", params) prediction = take_logs ? 10**(prediction.to_f) : prediction.to_f LOGGER.debug "Prediction is: '" + prediction.to_s + "'." @@ -197,7 +197,7 @@ module OpenTox end # activities of neighbors for supervised learning sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - prediction = local_sv_machine (neighbors, acts, sims, "svc", params) + prediction = local_svm (neighbors, acts, sims, "svc", params) prediction = prediction.to_f LOGGER.debug "Prediction is: '" + prediction.to_s + "'." @@ -207,62 +207,64 @@ module OpenTox end - end - # Local support vector prediction. Not to be called directly (use local_svm_regression or local_svm_classification. - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Array] acts, activities for neighbors. - # @param [Array] sims, similarities for neighbors. - # @param [String] type, one of "svr" (regression) or "svc" (classification). - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @return [Numeric] A prediction value. - def self.local_sv_machine(neighbors, acts, sims, type, params) - neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches - gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel - if neighbor_matches.size == 0 - raise "No neighbors found." - else - # gram matrix - (0..(neighbor_matches.length-1)).each do |i| - gram_matrix[i] = [] unless gram_matrix[i] - # upper triangle - ((i+1)..(neighbor_matches.length-1)).each do |j| - sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") - gram_matrix[i][j] = Algorithm.gauss(sim) - gram_matrix[j] = [] unless gram_matrix[j] - gram_matrix[j][i] = gram_matrix[i][j] # lower triangle + # Local support vector prediction from neighbors. + # Not to be called directly (use local_svm_regression or local_svm_classification. + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Array] acts, activities for neighbors. + # @param [Array] sims, similarities for neighbors. + # @param [String] type, one of "svr" (regression) or "svc" (classification). + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @return [Numeric] A prediction value. + def self.local_svm(neighbors, acts, sims, type, params) + neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches + gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + if neighbor_matches.size == 0 + raise "No neighbors found." + else + # gram matrix + (0..(neighbor_matches.length-1)).each do |i| + gram_matrix[i] = [] unless gram_matrix[i] + # upper triangle + ((i+1)..(neighbor_matches.length-1)).each do |j| + sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + gram_matrix[i][j] = Algorithm.gauss(sim) + gram_matrix[j] = [] unless gram_matrix[j] + gram_matrix[j][i] = gram_matrix[i][j] # lower triangle + end + gram_matrix[i][i] = 1.0 end - gram_matrix[i][i] = 1.0 - end - #LOGGER.debug gram_matrix.to_yaml - @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests - @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed - LOGGER.debug "Setting R data ..." - # set data - @r.gram_matrix = gram_matrix.flatten - @r.n = neighbor_matches.size - @r.y = acts - @r.sims = sims + #LOGGER.debug gram_matrix.to_yaml + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.gram_matrix = gram_matrix.flatten + @r.n = neighbor_matches.size + @r.y = acts + @r.sims = sims + + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-as.vector(y)" + @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" + @r.eval "sims<-as.vector(sims)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)" + @r.eval "sv<-as.vector(SVindex(model))" + @r.eval "sims<-sims[sv]" + @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" + LOGGER.debug "Predicting ..." + @r.eval "p<-predict(model,sims)[1,1]" + prediction = @r.p + @r.quit # free R + end + prediction + end - LOGGER.debug "Preparing R data ..." - # prepare data - @r.eval "y<-as.vector(y)" - @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" - @r.eval "sims<-as.vector(sims)" - - # model + support vectors - LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)" - @r.eval "sv<-as.vector(SVindex(model))" - @r.eval "sims<-sims[sv]" - @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" - LOGGER.debug "Predicting ..." - @r.eval "p<-predict(model,sims)[1,1]" - prediction = @r.p - @r.quit # free R - end - prediction end module Substructure -- cgit v1.2.3 From 4372e80a38c5228f3b7d0372f92195e62500b743 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 17 May 2011 16:40:12 +0200 Subject: Add debug --- lib/algorithm.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 0a5b09f..ec5748d 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -178,7 +178,8 @@ module OpenTox sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors prediction = local_svm(neighbors, acts, sims, "svr", params) - prediction = take_logs ? 10**(prediction.to_f) : prediction.to_f + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." conf = sims.inject{|sum,x| sum + x } -- cgit v1.2.3 From cf6d40be3f31d473f69216f1453e2ca0ddf82130 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 17 May 2011 16:46:26 +0200 Subject: nu 0.8 again to pass tests --- lib/algorithm.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index ec5748d..4cb80e3 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -254,7 +254,7 @@ module OpenTox # model + support vectors LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)" + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.8)" @r.eval "sv<-as.vector(SVindex(model))" @r.eval "sims<-sims[sv]" @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" -- cgit v1.2.3 From 4081ac06ddf8dafeebc93dfc28c4ef54f64a844d Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 18 May 2011 17:57:37 +0200 Subject: add opentox object for new algorithm comparison report --- lib/validation.rb | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/lib/validation.rb b/lib/validation.rb index 1a2497b..d7a337c 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -198,7 +198,6 @@ module OpenTox # @param [String,optional] subjectid # @return [OpenTox::CrossvalidationReport] def self.find( uri, subjectid=nil ) - # PENDING load report data? OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid}) rep = CrossvalidationReport.new(uri) rep.load_metadata( subjectid ) @@ -227,6 +226,54 @@ module OpenTox end end + + class AlgorithmComparisonReport + include OpenTox + + # finds AlgorithmComparisonReport via uri, raises error if not found + # @param [String] uri + # @param [String,optional] subjectid + # @return [OpenTox::CrossvalidationReport] + def self.find( uri, subjectid=nil ) + OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid}) + rep = AlgorithmComparisonReport.new(uri) + rep.load_metadata( subjectid ) + rep + end + + # finds AlgorithmComparisonReport for a particular crossvalidation + # @param [String] crossvalidation uri + # @param [String,optional] subjectid + # @return [OpenTox::AlgorithmComparisonReport] nil if no report found + def self.find_for_crossvalidation( crossvalidation_uri, subjectid=nil ) + uris = RestClientWrapper.get(File.join(CONFIG[:services]["opentox-validation"], + "/report/algorithm_comparison?crossvalidation="+crossvalidation_uri), {:subjectid => subjectid}).chomp.split("\n") + uris.size==0 ? nil : AlgorithmComparisonReport.new(uris[-1]) + end + + # creates a crossvalidation report via crossvalidation + # @param [Hash] crossvalidation uri_hash, see example + # @param [String,optional] subjectid + # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly + # @return [OpenTox::AlgorithmComparisonReport] + # example for hash: + # { :lazar-bbrc => [ http://host/validation/crossvalidation/x1, http://host/validation/crossvalidation/x2 ], + # :lazar-last => [ http://host/validation/crossvalidation/xy, http://host/validation/crossvalidation/xy ] } + def self.create( crossvalidation_uri_hash, subjectid=nil, waiting_task=nil ) + identifier = [] + validation_uris = [] + crossvalidation_uri_hash.each do |id, uris| + uris.each do |uri| + identifier << id + validation_uris << uri + end + end + uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-validation"],"/report/algorithm_comparison"), + { :validation_uris => validation_uris.join(","), :identifier => identifier.join(","), :subjectid => subjectid }, {}, waiting_task ) + AlgorithmComparisonReport.new(uri) + end + end + class QMRFReport include OpenTox -- cgit v1.2.3 From 96f00f67be05da4eed147928254af6e3f6f0f03d Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 17 May 2011 10:47:58 +0200 Subject: fix validation statistics: convert num predictions to integer --- lib/validation.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/validation.rb b/lib/validation.rb index d58d36e..1a2497b 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -65,7 +65,7 @@ module OpenTox def summary if @metadata[OT.classificationStatistics] res = { - :nr_predictions => @metadata[OT.numInstances] - @metadata[OT.numUnpredicted], + :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i, :correct_predictions => @metadata[OT.classificationStatistics][OT.percentCorrect], :weighted_area_under_roc => @metadata[OT.classificationStatistics][OT.weightedAreaUnderRoc], } @@ -83,7 +83,7 @@ module OpenTox res elsif @metadata[OT.regressionStatistics] { - :nr_predictions => @metadata[OT.numInstances] - @metadata[OT.numUnpredicted], + :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i, :r_square => @metadata[OT.regressionStatistics][OT.rSquare], :root_mean_squared_error => @metadata[OT.regressionStatistics][OT.rootMeanSquaredError], :mean_absolute_error => @metadata[OT.regressionStatistics][OT.meanAbsoluteError], -- cgit v1.2.3 From d755a131a5636f4fbe6077de5a276faf84c325b1 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 17 May 2011 16:08:25 +0200 Subject: Initial version --- lib/algorithm.rb | 62 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 7fbe0dc..16372ea 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -164,11 +164,7 @@ module OpenTox # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_regression(neighbors,params ) - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values between query and neighbors - conf = sims.inject{|sum,x| sum + x } - - # AM: Control log taking + def self.local_svm_regression(neighbors, params) take_logs=true neighbors.each do |n| if (! n[:activity].nil?) && (n[:activity].to_f < 0.0) @@ -180,10 +176,51 @@ module OpenTox take_logs ? Math.log10(act.to_f) : act.to_f end # activities of neighbors for supervised learning - neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + prediction = local_sv_machine (neighbors, acts, sims, "svr", params) + prediction = take_logs ? 10**(prediction.to_f) : prediction.to_f + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + + conf = sims.inject{|sum,x| sum + x } + confidence = conf/neighbors.size if neighbors.size > 0 + {:prediction => prediction, :confidence => confidence} + + end + + # Local support vector classification from neighbors + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @return [Hash] Hash with keys `:prediction, :confidence` + def self.local_svm_classification(neighbors, params) + acts = neighbors.collect do |n| + act = n[:activity] + end # activities of neighbors for supervised learning + + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + prediction = local_sv_machine (neighbors, acts, sims, "svc", params) + prediction = prediction.to_f + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + + conf = sims.inject{|sum,x| sum + x } + confidence = conf/neighbors.size if neighbors.size > 0 + {:prediction => prediction, :confidence => confidence} + + end + + end + + # Local support vector prediction. Not to be called directly (use local_svm_regression or local_svm_classification. + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Array] acts, activities for neighbors. + # @param [Array] sims, similarities for neighbors. + # @param [String] type, one of "svr" (regression) or "svc" (classification). + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @return [Numeric] A prediction value. + def self.local_sv_machine(neighbors, acts, sims, type, params) + neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel if neighbor_matches.size == 0 - raise "No neighbors found" + raise "No neighbors found." else # gram matrix (0..(neighbor_matches.length-1)).each do |i| @@ -216,21 +253,16 @@ module OpenTox # model + support vectors LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)" + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)" @r.eval "sv<-as.vector(SVindex(model))" @r.eval "sims<-sims[sv]" @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" LOGGER.debug "Predicting ..." @r.eval "p<-predict(model,sims)[1,1]" - prediction = 10**(@r.p.to_f) if take_logs - LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + prediction = @r.p @r.quit # free R end - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence} - - end - + prediction end module Substructure -- cgit v1.2.3 From cb0cc893c74016425b56424093a6de1b2f795c70 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 17 May 2011 16:35:20 +0200 Subject: Fixed method scope --- lib/algorithm.rb | 110 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 56 insertions(+), 54 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 16372ea..0a5b09f 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -177,7 +177,7 @@ module OpenTox end # activities of neighbors for supervised learning sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - prediction = local_sv_machine (neighbors, acts, sims, "svr", params) + prediction = local_svm(neighbors, acts, sims, "svr", params) prediction = take_logs ? 10**(prediction.to_f) : prediction.to_f LOGGER.debug "Prediction is: '" + prediction.to_s + "'." @@ -197,7 +197,7 @@ module OpenTox end # activities of neighbors for supervised learning sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - prediction = local_sv_machine (neighbors, acts, sims, "svc", params) + prediction = local_svm (neighbors, acts, sims, "svc", params) prediction = prediction.to_f LOGGER.debug "Prediction is: '" + prediction.to_s + "'." @@ -207,62 +207,64 @@ module OpenTox end - end - # Local support vector prediction. Not to be called directly (use local_svm_regression or local_svm_classification. - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Array] acts, activities for neighbors. - # @param [Array] sims, similarities for neighbors. - # @param [String] type, one of "svr" (regression) or "svc" (classification). - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @return [Numeric] A prediction value. - def self.local_sv_machine(neighbors, acts, sims, type, params) - neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches - gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel - if neighbor_matches.size == 0 - raise "No neighbors found." - else - # gram matrix - (0..(neighbor_matches.length-1)).each do |i| - gram_matrix[i] = [] unless gram_matrix[i] - # upper triangle - ((i+1)..(neighbor_matches.length-1)).each do |j| - sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") - gram_matrix[i][j] = Algorithm.gauss(sim) - gram_matrix[j] = [] unless gram_matrix[j] - gram_matrix[j][i] = gram_matrix[i][j] # lower triangle + # Local support vector prediction from neighbors. + # Not to be called directly (use local_svm_regression or local_svm_classification. + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Array] acts, activities for neighbors. + # @param [Array] sims, similarities for neighbors. + # @param [String] type, one of "svr" (regression) or "svc" (classification). + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @return [Numeric] A prediction value. + def self.local_svm(neighbors, acts, sims, type, params) + neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches + gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + if neighbor_matches.size == 0 + raise "No neighbors found." + else + # gram matrix + (0..(neighbor_matches.length-1)).each do |i| + gram_matrix[i] = [] unless gram_matrix[i] + # upper triangle + ((i+1)..(neighbor_matches.length-1)).each do |j| + sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + gram_matrix[i][j] = Algorithm.gauss(sim) + gram_matrix[j] = [] unless gram_matrix[j] + gram_matrix[j][i] = gram_matrix[i][j] # lower triangle + end + gram_matrix[i][i] = 1.0 end - gram_matrix[i][i] = 1.0 - end - #LOGGER.debug gram_matrix.to_yaml - @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests - @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed - LOGGER.debug "Setting R data ..." - # set data - @r.gram_matrix = gram_matrix.flatten - @r.n = neighbor_matches.size - @r.y = acts - @r.sims = sims + #LOGGER.debug gram_matrix.to_yaml + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.gram_matrix = gram_matrix.flatten + @r.n = neighbor_matches.size + @r.y = acts + @r.sims = sims + + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-as.vector(y)" + @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" + @r.eval "sims<-as.vector(sims)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)" + @r.eval "sv<-as.vector(SVindex(model))" + @r.eval "sims<-sims[sv]" + @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" + LOGGER.debug "Predicting ..." + @r.eval "p<-predict(model,sims)[1,1]" + prediction = @r.p + @r.quit # free R + end + prediction + end - LOGGER.debug "Preparing R data ..." - # prepare data - @r.eval "y<-as.vector(y)" - @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" - @r.eval "sims<-as.vector(sims)" - - # model + support vectors - LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)" - @r.eval "sv<-as.vector(SVindex(model))" - @r.eval "sims<-sims[sv]" - @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" - LOGGER.debug "Predicting ..." - @r.eval "p<-predict(model,sims)[1,1]" - prediction = @r.p - @r.quit # free R - end - prediction end module Substructure -- cgit v1.2.3 From eb2582799bb5a05e053e2709db47880430f80a78 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 17 May 2011 16:40:12 +0200 Subject: Add debug --- lib/algorithm.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 0a5b09f..ec5748d 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -178,7 +178,8 @@ module OpenTox sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors prediction = local_svm(neighbors, acts, sims, "svr", params) - prediction = take_logs ? 10**(prediction.to_f) : prediction.to_f + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." conf = sims.inject{|sum,x| sum + x } -- cgit v1.2.3 From 251ce4cd57a161fa20f1400b5980b171bf2ff86c Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 17 May 2011 16:46:26 +0200 Subject: nu 0.8 again to pass tests --- lib/algorithm.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index ec5748d..4cb80e3 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -254,7 +254,7 @@ module OpenTox # model + support vectors LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)" + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.8)" @r.eval "sv<-as.vector(SVindex(model))" @r.eval "sims<-sims[sv]" @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" -- cgit v1.2.3 From cf0fd8003c373bd9216823ff2065231696ddfbcb Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 19 May 2011 10:08:17 +0200 Subject: Set nu to 0.5 --- lib/algorithm.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 4cb80e3..fb5fd7f 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -178,7 +178,6 @@ module OpenTox sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors prediction = local_svm(neighbors, acts, sims, "svr", params) - LOGGER.debug "Prediction is: '" + prediction.to_s + "'." prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." @@ -254,7 +253,7 @@ module OpenTox # model + support vectors LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.8)" + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)" @r.eval "sv<-as.vector(SVindex(model))" @r.eval "sims<-sims[sv]" @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" -- cgit v1.2.3 From afefbdf05549c298387821c3a441d1de701291e0 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 19 May 2011 12:12:23 +0200 Subject: Added SVM classification --- lib/algorithm.rb | 61 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index fb5fd7f..9402eab 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -177,7 +177,7 @@ module OpenTox end # activities of neighbors for supervised learning sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - prediction = local_svm(neighbors, acts, sims, "svr", params) + prediction = local_svm(neighbors, acts, sims, "nu-svr", params) prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." @@ -197,9 +197,15 @@ module OpenTox end # activities of neighbors for supervised learning sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - prediction = local_svm (neighbors, acts, sims, "svc", params) - prediction = prediction.to_f - LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + + + acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} + begin + prediction = local_svm (neighbors, acts_f, sims, "C-bsvc", params) + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + rescue Exception => e + LOGGER.debug "Prediction failed." + end conf = sims.inject{|sum,x| sum + x } confidence = conf/neighbors.size if neighbors.size > 0 @@ -213,7 +219,7 @@ module OpenTox # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Array] acts, activities for neighbors. # @param [Array] sims, similarities for neighbors. - # @param [String] type, one of "svr" (regression) or "svc" (classification). + # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification). # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @return [Numeric] A prediction value. def self.local_svm(neighbors, acts, sims, type, params) @@ -245,22 +251,35 @@ module OpenTox @r.y = acts @r.sims = sims - LOGGER.debug "Preparing R data ..." - # prepare data - @r.eval "y<-as.vector(y)" - @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" - @r.eval "sims<-as.vector(sims)" - - # model + support vectors - LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)" - @r.eval "sv<-as.vector(SVindex(model))" - @r.eval "sims<-sims[sv]" - @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" - LOGGER.debug "Predicting ..." - @r.eval "p<-predict(model,sims)[1,1]" - prediction = @r.p - @r.quit # free R + begin + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-as.vector(y)" + @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" + @r.eval "sims<-as.vector(sims)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)" + @r.eval "sv<-as.vector(SVindex(model))" + @r.eval "sims<-sims[sv]" + @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" + LOGGER.debug "Predicting ..." + if type == "nu-svr" + @r.eval "p<-predict(model,sims)[1,1]" + elsif type == "C-bsvc" + @r.eval "p<-predict(model,sims)" + end + if type == "nu-svr" + prediction = @r.p + elsif type == "C-bsvc" + prediction = (@r.p.to_f == 1.0 ? true : false) + end + @r.quit # free R + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + end + end prediction end -- cgit v1.2.3 From e34c80eadcd40482a765cda861b92ab5c1250049 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 19 May 2011 13:08:28 +0200 Subject: Added Exception handling --- lib/algorithm.rb | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 9402eab..5b41cbf 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -177,9 +177,13 @@ module OpenTox end # activities of neighbors for supervised learning sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - prediction = local_svm(neighbors, acts, sims, "nu-svr", params) - prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) - LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + begin + prediction = local_svm(neighbors, acts, sims, "nu-svr", params) + prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + end conf = sims.inject{|sum,x| sum + x } confidence = conf/neighbors.size if neighbors.size > 0 @@ -195,16 +199,13 @@ module OpenTox acts = neighbors.collect do |n| act = n[:activity] end # activities of neighbors for supervised learning - - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - - acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin prediction = local_svm (neighbors, acts_f, sims, "C-bsvc", params) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e - LOGGER.debug "Prediction failed." + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" end conf = sims.inject{|sum,x| sum + x } -- cgit v1.2.3 From 30478c4dd18b56048b6e190027daef1fc6608230 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 19 May 2011 16:55:34 +0200 Subject: Fixed digression class / regr --- lib/model.rb | 83 ++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 28 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index 3d64f32..7acd8f2 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -179,10 +179,10 @@ module OpenTox return @prediction_dataset if database_activity(subjectid) - # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar - l = Array.new # larger - s = Array.new # smaller fraction - if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget] + if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget] + # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar + l = Array.new # larger + s = Array.new # smaller fraction @fingerprints.each do |training_compound,training_features| @activities[training_compound].each do |act| case act.to_s @@ -202,36 +202,41 @@ module OpenTox # determine ratio modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}." - end - # AM: Balanced predictions - addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round - slack = modulo[1].divmod(addon)[1] # what remains for the last round - position = 0 - predictions = Array.new + # AM: Balanced predictions + addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round + slack = modulo[1].divmod(addon)[1] # what remains for the last round + position = 0 + predictions = Array.new - prediction_best=nil - neighbors_best=nil + prediction_best=nil + neighbors_best=nil - begin - for i in 1..modulo[0] do - (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction - LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." - neighbors(s, l, position, lr_size) # get ratio fraction of larger part - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") - if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs - prediction_best=prediction - neighbors_best=@neighbors + begin + for i in 1..modulo[0] do + (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction + LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." + neighbors(s, l, position, lr_size) # get ratio fraction of larger part + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs + prediction_best=prediction + neighbors_best=@neighbors + end + position = position + lr_size + end + rescue Exception => e + LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message end - position = position + lr_size - end - rescue Exception => e - LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message - end - prediction=prediction_best - @neighbors=neighbors_best + prediction=prediction_best + @neighbors=neighbors_best + ### END AM balanced predictions + else # regression case: no balancing + neighbors + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + end + prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) # TODO: fix dependentVariable @prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri @@ -347,6 +352,28 @@ module OpenTox end + + # Find neighbors and store them as object variable + def neighbors + + @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm + + @neighbors = [] + @fingerprints.each do |training_compound,training_features| + sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") + if sim > @min_sim + @activities[training_compound].each do |act| + @neighbors << { + :compound => training_compound, + :similarity => sim, + :features => training_features, + :activity => act + } + end + end + end + end + # Find database activities and store them in @prediction_dataset # @return [Boolean] true if compound has databasse activities, false if not def database_activity(subjectid) -- cgit v1.2.3 From 32b7faa44ef70194e0ae1c5e43948eea785f9d04 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 19 May 2011 17:03:50 +0200 Subject: Fixed neighbor selection --- lib/model.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index 7acd8f2..998d2dc 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -216,7 +216,7 @@ module OpenTox for i in 1..modulo[0] do (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." - neighbors(s, l, position, lr_size) # get ratio fraction of larger part + neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction @@ -325,7 +325,7 @@ module OpenTox end # Find neighbors and store them as object variable - def neighbors(s=nil, l=nil, start=nil, offset=nil) + def neighbors_balanced(s, l, start, offset) @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] -- cgit v1.2.3 From 8c78bf2358338cf5f795a65c9b1c21a48474169f Mon Sep 17 00:00:00 2001 From: mguetlein Date: Fri, 20 May 2011 10:52:32 +0200 Subject: change location of to-html-opentox-image to local validation service --- lib/to-html.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/to-html.rb b/lib/to-html.rb index 6785974..66a3e74 100644 --- a/lib/to-html.rb +++ b/lib/to-html.rb @@ -1,5 +1,5 @@ -OT_LOGO = "http://opentox.informatik.uni-freiburg.de/ot-logo.png" +OT_LOGO = File.join(CONFIG[:services]["opentox-validation"],"resources/ot-logo.png") class String -- cgit v1.2.3 From 0b936c71d8a1d5effa6c29d5ee9c227fff18a070 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 23 May 2011 14:03:02 +0000 Subject: owl-dl fixed for model and prediction datasets --- lib/dataset.rb | 7 ++- lib/model.rb | 136 ++++++++++++++++++++++++++++++------------------------ lib/serializer.rb | 27 +++++++---- 3 files changed, 95 insertions(+), 75 deletions(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index 4005c1c..4dc4296 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -179,7 +179,6 @@ module OpenTox end end -=begin # Detect feature type(s) in the dataset # @return [String] `classification", "regression", "mixed" or unknown` def feature_type(subjectid=nil) @@ -193,6 +192,7 @@ module OpenTox "unknown" end end +=begin =end # Get Spreadsheet representation @@ -369,12 +369,11 @@ module OpenTox end def value(compound) - @data_entries[compound.uri].collect{|f,v| v.first if f.match(/prediction/)}.compact.first + @data_entries[compound.uri].collect{|f,v| v.first if f.match(/value/)}.compact.first end def confidence(compound) - feature_uri = @data_entries[compound.uri].collect{|f,v| f if f.match(/prediction/)}.compact.first - @features[feature_uri][OT.confidence] + @data_entries[compound.uri].collect{|f,v| v.first if f.match(/confidence/)}.compact.first end def descriptors(compound) diff --git a/lib/model.rb b/lib/model.rb index 998d2dc..d46152d 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -164,8 +164,6 @@ module OpenTox features = {} unless @prediction_dataset - #@prediction_dataset = cached_prediction - #return @prediction_dataset if cached_prediction @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid) @prediction_dataset.add_metadata( { OT.hasSource => @uri, @@ -237,38 +235,90 @@ module OpenTox prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") end - prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) - # TODO: fix dependentVariable - @prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri + # TODO: reasonable feature name + #prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) + value_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"value") + confidence_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"confidence") + prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]} + prediction_feature_uris[value_feature_uri] = "No similar compounds in training dataset." if @neighbors.size == 0 or prediction[:prediction].nil? + + + #@prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri + @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] + +=begin if @neighbors.size == 0 - @prediction_dataset.add_feature(prediction_feature_uri, { - RDF.type => [OT.MeasuredFeature], - OT.hasSource => @uri, - DC.creator => @uri, - DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), - OT.error => "No similar compounds in training dataset.", - OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] - }) - @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction] + prediction_feature_uris.each do |prediction_feature_uri,value| + @prediction_dataset.add_feature(prediction_feature_uri, { + RDF.type => [OT.MeasuredFeature], + OT.hasSource => @uri, + DC.creator => @uri, + DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), + OT.error => "No similar compounds in training dataset.", + #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] + }) + @prediction_dataset.add @compound.uri, prediction_feature_uri, value + end else +=end + prediction_feature_uris.each do |prediction_feature_uri,value| + @prediction_dataset.metadata[OT.predictedVariables] = [] unless @prediction_dataset.metadata[OT.predictedVariables] + @prediction_dataset.metadata[OT.predictedVariables] << prediction_feature_uri @prediction_dataset.add_feature(prediction_feature_uri, { RDF.type => [OT.ModelPrediction], OT.hasSource => @uri, DC.creator => @uri, DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), - OT.prediction => prediction[:prediction], - OT.confidence => prediction[:confidence], - OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] + # TODO: factor information to value }) - @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction] + #OT.prediction => prediction[:prediction], + #OT.confidence => prediction[:confidence], + #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] + @prediction_dataset.add @compound.uri, prediction_feature_uri, value + end - if verbose - if @feature_calculation_algorithm == "Substructure.match" - f = 0 - @compound_features.each do |feature| - feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) + if verbose + if @feature_calculation_algorithm == "Substructure.match" + f = 0 + @compound_features.each do |feature| + feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) + features[feature] = feature_uri + @prediction_dataset.add_feature(feature_uri, { + RDF.type => [OT.Substructure], + OT.smarts => feature, + OT.pValue => @p_values[feature], + OT.effect => @effects[feature] + }) + @prediction_dataset.add @compound.uri, feature_uri, true + f+=1 + end + else + @compound_features.each do |feature| + features[feature] = feature + @prediction_dataset.add @compound.uri, feature, true + end + end + n = 0 + @neighbors.each do |neighbor| + neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s ) + @prediction_dataset.add_feature(neighbor_uri, { + OT.compound => neighbor[:compound], + OT.similarity => neighbor[:similarity], + OT.measuredActivity => neighbor[:activity], + RDF.type => [OT.Neighbor] + }) + @prediction_dataset.add @compound.uri, neighbor_uri, true + f = 0 unless f + neighbor[:features].each do |feature| + if @feature_calculation_algorithm == "Substructure.match" + feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature] + else + feature_uri = feature + end + @prediction_dataset.add neighbor[:compound], feature_uri, true + unless features.has_key? feature features[feature] = feature_uri @prediction_dataset.add_feature(feature_uri, { RDF.type => [OT.Substructure], @@ -276,49 +326,13 @@ module OpenTox OT.pValue => @p_values[feature], OT.effect => @effects[feature] }) - @prediction_dataset.add @compound.uri, feature_uri, true f+=1 end - else - @compound_features.each do |feature| - features[feature] = feature - @prediction_dataset.add @compound.uri, feature, true - end - end - n = 0 - @neighbors.each do |neighbor| - neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s ) - @prediction_dataset.add_feature(neighbor_uri, { - OT.compound => neighbor[:compound], - OT.similarity => neighbor[:similarity], - OT.measuredActivity => neighbor[:activity], - RDF.type => [OT.Neighbor] - }) - @prediction_dataset.add @compound.uri, neighbor_uri, true - f = 0 unless f - neighbor[:features].each do |feature| - if @feature_calculation_algorithm == "Substructure.match" - feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature] - else - feature_uri = feature - end - @prediction_dataset.add neighbor[:compound], feature_uri, true - unless features.has_key? feature - features[feature] = feature_uri - @prediction_dataset.add_feature(feature_uri, { - RDF.type => [OT.Substructure], - OT.smarts => feature, - OT.pValue => @p_values[feature], - OT.effect => @effects[feature] - }) - f+=1 - end - end - n+=1 end - # what happens with dataset predictions? + n+=1 end end + #end @prediction_dataset.save(subjectid) @prediction_dataset diff --git a/lib/serializer.rb b/lib/serializer.rb index e4cb541..78e7709 100644 --- a/lib/serializer.rb +++ b/lib/serializer.rb @@ -17,6 +17,7 @@ module OpenTox # this should come from opentox.owl OT.Compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.Feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.Model => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.NominalFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.NumericFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.StringFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , @@ -27,6 +28,8 @@ module OpenTox OT.Parameter => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.Task => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OTA.PatternMiningSupervised => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OTA.ClassificationLazySingleTarget => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OTA.RegressionLazySingleTarget => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , #classes for validation OT.Validation => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , @@ -45,6 +48,9 @@ module OpenTox OT.values => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , OT.algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , OT.parameters => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.featureDataset => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.dependentVariables => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , #object props for validation# OT.model => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , @@ -126,7 +132,7 @@ module OpenTox OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , OT.paramScope => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , - OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , + #OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , } @data_entries = {} @@ -157,23 +163,16 @@ module OpenTox # Add a dataset # @param [String] uri Dataset URI def add_dataset(dataset) - @dataset = dataset.uri - @object[dataset.uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } - add_metadata dataset.uri, dataset.metadata - dataset.compounds.each { |compound| add_compound compound } - dataset.features.each { |feature,metadata| add_feature feature,metadata } - dataset.data_entries.each do |compound,entry| entry.each do |feature,values| values.each { |value| add_data_entry compound,feature,value } end end - end # Add a algorithm @@ -188,6 +187,13 @@ module OpenTox def add_model(uri,metadata) @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Model }] } add_metadata uri, metadata + @object[metadata[OT.featureDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } + @object[metadata[OT.trainingDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } + @object[metadata[OT.dependentVariables]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] } + # TODO: add algorithms from parameters + @object["http://ot-dev.in-silico.ch/algorithm/fminer/bbrc"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } + @object["http://ot-dev.in-silico.ch/algorithm/fminer/last"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } + @object["http://ot-dev.in-silico.ch/algorithm/lazar"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } end # Add a task @@ -272,7 +278,7 @@ module OpenTox @object[genid][name] = [{"type" => type(entry), "value" => entry }] end end - elsif v.is_a? Array and u == RDF.type + elsif v.is_a? Array #and u == RDF.type @object[uri] = {} unless @object[uri] v.each do |value| @object[uri][u] = [] unless @object[uri][u] @@ -354,7 +360,8 @@ module OpenTox # @return [text/plain] Object OWL-DL in RDF/XML format def to_rdfxml Tempfile.open("owl-serializer"){|f| f.write(self.to_ntriples); @path = f.path} - `rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null` + # TODO: add base uri for ist services + `rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:ota="#{OTA.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null` end # Convert to JSON as specified in http://n2.talis.com/wiki/RDF_JSON_Specification -- cgit v1.2.3 From 87eb7cc1e079821c2f7c5e101e7e392e9bd10f00 Mon Sep 17 00:00:00 2001 From: davor Date: Tue, 24 May 2011 09:35:11 +0200 Subject: Fixing regression detection --- lib/parser.rb | 66 +++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 15 deletions(-) diff --git a/lib/parser.rb b/lib/parser.rb index 7bdee95..8deaa91 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -277,7 +277,23 @@ module OpenTox def load_spreadsheet(book) book.default_sheet = 0 add_features book.row(1) - 2.upto(book.last_row) { |i| add_values book.row(i) } + + # AM: fix mixed read in + regression_features=false + 2.upto(book.last_row) { |i| + row = book.row(i) + smiles = row.shift + row.each_index do |i| + value = row[i] + type = feature_type(value) + if type == OT.NumericFeature + regression_features=true + break + end + end + } + + 2.upto(book.last_row) { |i| add_values book.row(i),regression_features } warnings @dataset end @@ -289,7 +305,23 @@ module OpenTox row = 0 input = csv.split("\n") add_features split_row(input.shift) - input.each { |row| add_values split_row(row) } + + + # AM: fix mixed read in + regression_features=false + input.each { |row| + row = split_row(row) + smiles = row.shift + row.each_index do |i| + value = row[i] + type = feature_type(value) + if type == OT.NumericFeature + regression_features=true + break + end + end + } + input.each { |row| add_values split_row(row),regression_features } warnings @dataset end @@ -335,7 +367,7 @@ module OpenTox end end - def add_values(row) + def add_values(row, regression_features=false) smiles = row.shift compound = Compound.from_smiles(smiles) @@ -353,19 +385,23 @@ module OpenTox @feature_types[feature] << type - case type - when OT.NominalFeature - case value.to_s - when TRUE_REGEXP - val = true - when FALSE_REGEXP - val = false - end - when OT.NumericFeature + if (regression_features) val = value.to_f - when OT.StringFeature - val = value.to_s - @activity_errors << smiles+", "+row.join(", ") + else + case type + when OT.NominalFeature + case value.to_s + when TRUE_REGEXP + val = true + when FALSE_REGEXP + val = false + end + when OT.NumericFeature + val = value.to_f + when OT.StringFeature + val = value.to_s + @activity_errors << smiles+", "+row.join(", ") + end end if val!=nil @dataset.add(compound.uri, feature, val) -- cgit v1.2.3 From 4a7ba2adb0743cd225ad5c2cf9f71c896d87b157 Mon Sep 17 00:00:00 2001 From: davor Date: Tue, 24 May 2011 10:45:53 +0200 Subject: Created dedicated function for value sweeping --- lib/parser.rb | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/lib/parser.rb b/lib/parser.rb index 8deaa91..4984292 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -282,15 +282,8 @@ module OpenTox regression_features=false 2.upto(book.last_row) { |i| row = book.row(i) - smiles = row.shift - row.each_index do |i| - value = row[i] - type = feature_type(value) - if type == OT.NumericFeature - regression_features=true - break - end - end + regression_features = detect_regression_features row + break if regression_features=true } 2.upto(book.last_row) { |i| add_values book.row(i),regression_features } @@ -311,21 +304,15 @@ module OpenTox regression_features=false input.each { |row| row = split_row(row) - smiles = row.shift - row.each_index do |i| - value = row[i] - type = feature_type(value) - if type == OT.NumericFeature - regression_features=true - break - end - end + regression_features = detect_regression_features row + break if regression_features=true } input.each { |row| add_values split_row(row),regression_features } warnings @dataset end + private def warnings @@ -367,6 +354,18 @@ module OpenTox end end + def detect_regression_features row + regression_features=false + row.each_index do |i| + value = row[i] + type = feature_type(value) + if type == OT.NumericFeature + regression_features=true + end + end + regression_features + end + def add_values(row, regression_features=false) smiles = row.shift -- cgit v1.2.3 From 8a20cf940c346fd04649d3c3c8f7ad4c1fcb20cb Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 24 May 2011 14:00:16 +0200 Subject: Fix: break was too early --- lib/parser.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/parser.rb b/lib/parser.rb index 4984292..5f847c3 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -283,7 +283,7 @@ module OpenTox 2.upto(book.last_row) { |i| row = book.row(i) regression_features = detect_regression_features row - break if regression_features=true + break if regression_features==true } 2.upto(book.last_row) { |i| add_values book.row(i),regression_features } @@ -305,7 +305,7 @@ module OpenTox input.each { |row| row = split_row(row) regression_features = detect_regression_features row - break if regression_features=true + break if regression_features==true } input.each { |row| add_values split_row(row),regression_features } warnings @@ -355,6 +355,7 @@ module OpenTox end def detect_regression_features row + row.shift regression_features=false row.each_index do |i| value = row[i] -- cgit v1.2.3 From 0d87789eec37f7ae09d01937dbfc72af1ef17252 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 24 May 2011 16:06:05 +0200 Subject: fix small errors in to-html method --- lib/to-html.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/to-html.rb b/lib/to-html.rb index 66a3e74..51602d7 100644 --- a/lib/to-html.rb +++ b/lib/to-html.rb @@ -6,7 +6,7 @@ class String # encloses URI in text with with link tag # @return [String] new text with marked links def link_urls - self.gsub(/(?i)http(s?):\/\/[^\r\n\s']*/, '\0') + self.gsub(/(?i)http(s?):\/\/[^\r\n\s']*/, '\0') end end @@ -30,7 +30,7 @@ module OpenTox title = nil #$sinatra.url_for($sinatra.request.env['PATH_INFO'], :full) if $sinatra html = "" html += ""+title+"" if title - html += "" + html += "<\/img>" if AA_SERVER user = OpenTox::Authorization.get_user(subjectid) if subjectid @@ -63,7 +63,7 @@ module OpenTox html += "

Content

" if description || related_links html += "

" html += text.link_urls - html += "

" + html += "

" html end @@ -78,7 +78,7 @@ module OpenTox "password:"+ #""+ "" - html += "

" + html += "

" html end end -- cgit v1.2.3 From 524bda5ac60e07aa0805bfb215da718157849672 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 24 May 2011 16:08:22 +0200 Subject: fix lazar-non-predictions: replace "No similar compounds in training dataset." with nil --- lib/model.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index d46152d..139aed8 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -241,8 +241,8 @@ module OpenTox confidence_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"confidence") prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]} - prediction_feature_uris[value_feature_uri] = "No similar compounds in training dataset." if @neighbors.size == 0 or prediction[:prediction].nil? - + #prediction_feature_uris[value_feature_uri] = "No similar compounds in training dataset." if @neighbors.size == 0 or prediction[:prediction].nil? + prediction_feature_uris[value_feature_uri] = nil if @neighbors.size == 0 or prediction[:prediction].nil? #@prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] -- cgit v1.2.3 From fe85fafc4b24cc8275ad67536d25d660249bb792 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 24 May 2011 16:10:10 +0200 Subject: adjust dataset-parser: predictedVariables may be array, do not request id/features from ambit services as not supported --- lib/parser.rb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/parser.rb b/lib/parser.rb index 5f847c3..a6878a2 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -56,7 +56,7 @@ module OpenTox `rapper -i rdfxml -o ntriples #{file.path} 2>/dev/null`.each_line do |line| triple = line.to_triple if triple[0] == @uri - if triple[1] == RDF.type # allow multiple types + if triple[1] == RDF.type || triple[1]==OT.predictedVariables # allow multiple types @metadata[triple[1]] = [] unless @metadata[triple[1]] @metadata[triple[1]] << triple[2].split('^^').first else @@ -228,7 +228,11 @@ module OpenTox file = Tempfile.new("ot-rdfxml") # do not concat /features to uri string, this would not work for dataset/R401577?max=3 uri = URI::parse(@uri) - uri.path = File.join(uri.path,"features") + # PENDING + # ambit models return http://host/dataset/id?feature_uris[]=sth but + # amibt dataset services does not support http://host/dataset/id/features?feature_uris[]=sth + # -> load features from complete dataset + uri.path = File.join(uri.path,"features") unless @uri=~/\?feature_uris\[\]/ uri = uri.to_s file.puts OpenTox::RestClientWrapper.get uri,{:subjectid => subjectid,:accept => "application/rdf+xml"},nil,false file.close -- cgit v1.2.3 From aaff8d61a7b3bb96e79fbf575718764a071ced9a Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 24 May 2011 18:04:59 +0200 Subject: added missing prop to serializer --- lib/serializer.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/serializer.rb b/lib/serializer.rb index 78e7709..62c1159 100644 --- a/lib/serializer.rb +++ b/lib/serializer.rb @@ -109,6 +109,7 @@ module OpenTox OT.precision => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.areaUnderRoc => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.weightedAreaUnderRoc => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + OT.weightedAccuracy => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.fMeasure => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.percentIncorrect => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.validationType => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , -- cgit v1.2.3 From 43572f94815a5ec4ca5b922dad3a1c1a140b7348 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 24 May 2011 19:44:53 +0200 Subject: remove empty space that produced a warning --- lib/algorithm.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 5b41cbf..96b9df1 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -202,7 +202,7 @@ module OpenTox acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - prediction = local_svm (neighbors, acts_f, sims, "C-bsvc", params) + prediction = local_svm(neighbors, acts_f, sims, "C-bsvc", params) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" -- cgit v1.2.3 From 2a52792b6143fba8e888fe454303a45f625cb0cf Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 24 May 2011 19:45:19 +0200 Subject: move to ruby-plot version 0.5 --- Rakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Rakefile b/Rakefile index 08959b0..acf32d4 100644 --- a/Rakefile +++ b/Rakefile @@ -53,7 +53,7 @@ begin #valiation-gem gem.add_dependency "haml", ">=3" # validation-gems - gem.add_dependency "ruby-plot", "~>0.4.0" + gem.add_dependency "ruby-plot", "~>0.5.0" ['jeweler'].each { |dep| gem.add_development_dependency dep } gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore'] end -- cgit v1.2.3 From b7a03a18ce90d664d89d6a414512aa03a6dddcc4 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 08:51:56 +0200 Subject: Add_neighbor function --- lib/model.rb | 59 ++++++++++++++++++++++------------------------------------- 1 file changed, 22 insertions(+), 37 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index 139aed8..f5e0410 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -338,54 +338,39 @@ module OpenTox @prediction_dataset end - # Find neighbors and store them as object variable + # Find neighbors and store them as object variable, access only a subset of compounds for that. def neighbors_balanced(s, l, start, offset) @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm - @neighbors = [] - begin - #@fingerprints.each do |training_compound,training_features| # AM: this is original by CH [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset training_features = @fingerprints[training_compound] - sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") - if sim > @min_sim - @activities[training_compound].each do |act| - this_neighbor = { - :compound => training_compound, - :similarity => sim, - :features => training_features, - :activity => act - } - @neighbors << this_neighbor - end - end - end - rescue Exception => e - LOGGER.error "BLAZAR failed in neighbors: "+e.class.to_s+": "+e.message + add_neighbor training_features end end - - # Find neighbors and store them as object variable + # Find neighbors and store them as object variable. def neighbors - - @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm - - @neighbors = [] - @fingerprints.each do |training_compound,training_features| - sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") - if sim > @min_sim - @activities[training_compound].each do |act| - @neighbors << { - :compound => training_compound, - :similarity => sim, - :features => training_features, - :activity => act - } - end + @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm + @neighbors = [] + @fingerprints.each do |training_compound,training_features| # AM: access all compounds + add_neighbor training_features + end + end + + # Adds a neighbor to @neighbors if it passes the similarity threshold. + def add_neighbor(training_features) + sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") + if sim > @min_sim + @activities[training_compound].each do |act| + @neighbors << { + :compound => training_compound, + :similarity => sim, + :features => training_features, + :activity => act + } end - end + end end # Find database activities and store them in @prediction_dataset -- cgit v1.2.3 From baca9424a84b6a21363cac891cdef72f44c116b1 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 25 May 2011 10:08:04 +0200 Subject: remove duplicate debug msg --- lib/rest_client_wrapper.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rest_client_wrapper.rb b/lib/rest_client_wrapper.rb index 747a353..53887a2 100644 --- a/lib/rest_client_wrapper.rb +++ b/lib/rest_client_wrapper.rb @@ -131,7 +131,7 @@ module OpenTox raise "unknown content-type for task : '"+res.content_type.to_s+"'"+" base-uri: "+base_uri.to_s+" content: "+res[0..200].to_s end - LOGGER.debug "result is a task '"+task.uri.to_s+"', wait for completion" + #LOGGER.debug "result is a task '"+task.uri.to_s+"', wait for completion" task.wait_for_completion waiting_task unless task.completed? # maybe task was cancelled / error if task.errorReport -- cgit v1.2.3 From baffedfc7543cfc8a90fc185fc91f2748ce94528 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 10:24:13 +0200 Subject: Fixed add_neighbor --- lib/model.rb | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index f5e0410..3d27706 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -338,28 +338,45 @@ module OpenTox @prediction_dataset end + # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) +# def get_prop_matrix +# matrix = Array.new +# begin +# @neighbors.each do |n| +# row = [] +# @features.each do |f| +# row << @fingerprints[n].include?(f) ? 0.0 : @p_values[f] +# end +# matrix << row +# end +# rescue Exception => e +# LOGGER.debug "get_prop_matrix failed with '" + $! + "'" +# end +# matrix +# end + # Find neighbors and store them as object variable, access only a subset of compounds for that. def neighbors_balanced(s, l, start, offset) @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset training_features = @fingerprints[training_compound] - add_neighbor training_features + add_neighbor training_features, training_compound end end - # Find neighbors and store them as object variable. + # Find neighbors and store them as object variable, access all compounds for that. def neighbors @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] @fingerprints.each do |training_compound,training_features| # AM: access all compounds - add_neighbor training_features + add_neighbor training_features, training_compound end end # Adds a neighbor to @neighbors if it passes the similarity threshold. - def add_neighbor(training_features) + def add_neighbor(training_features, training_compound) sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") if sim > @min_sim @activities[training_compound].each do |act| -- cgit v1.2.3 From 52e73a3da8e99da9a0a973b6ef9934297bc6511e Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 25 May 2011 11:55:41 +0200 Subject: remove check for task status when code is 201 (after discussion with nina and fabian) --- lib/task.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/task.rb b/lib/task.rb index 42d3d17..146a756 100644 --- a/lib/task.rb +++ b/lib/task.rb @@ -288,7 +288,8 @@ module OpenTox if @http_code == 202 raise "#{@uri}: illegal task state, code is 202, but hasStatus is not Running: '"+@metadata[OT.hasStatus]+"'" unless running? elsif @http_code == 201 - raise "#{@uri}: illegal task state, code is 201, but hasStatus is not Completed: '"+@metadata[OT.hasStatus]+"'" unless completed? + # ignore hasStatus + # raise "#{@uri}: illegal task state, code is 201, but hasStatus is not Completed: '"+@metadata[OT.hasStatus]+"'" unless completed? raise "#{@uri}: illegal task state, code is 201, resultURI is no task-URI: '"+@metadata[OT.resultURI].to_s+ "'" unless @metadata[OT.resultURI] and @metadata[OT.resultURI].to_s.uri? end -- cgit v1.2.3 From b6ba84a077db9f6c708807f059e501333f7303b1 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 12:18:08 +0200 Subject: 1st v --- lib/model.rb | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index 3d27706..bebf5d3 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -215,6 +215,7 @@ module OpenTox (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part + prop_matrix = get_prop_matrix prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction @@ -228,10 +229,11 @@ module OpenTox prediction=prediction_best @neighbors=neighbors_best - ### END AM balanced predictions + ### END AM balanced predictions else # regression case: no balancing neighbors + prop_matrix = get_prop_matrix prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") end @@ -339,21 +341,26 @@ module OpenTox end # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) -# def get_prop_matrix -# matrix = Array.new -# begin -# @neighbors.each do |n| -# row = [] -# @features.each do |f| -# row << @fingerprints[n].include?(f) ? 0.0 : @p_values[f] -# end -# matrix << row -# end -# rescue Exception => e -# LOGGER.debug "get_prop_matrix failed with '" + $! + "'" -# end -# matrix -# end + def get_prop_matrix + matrix = Array.new + begin + @neighbors.each do |n| + n = n[:compound] + row = [] + @features.each do |f| + if ! @fingerprints[n].nil? + row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f]) + else + row << 0.0 + end + end + matrix << row + end + rescue Exception => e + LOGGER.debug "get_prop_matrix failed with '" + $! + "'" + end + matrix + end # Find neighbors and store them as object variable, access only a subset of compounds for that. def neighbors_balanced(s, l, start, offset) -- cgit v1.2.3 From d012b9e8da641c342c455a1384ddf3b14f5b5c35 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 12:38:04 +0200 Subject: 2nd v --- lib/model.rb | 87 ++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 46 insertions(+), 41 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index bebf5d3..f4df8ea 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -177,7 +177,7 @@ module OpenTox return @prediction_dataset if database_activity(subjectid) - if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget] + if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget] # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar l = Array.new # larger s = Array.new # smaller fraction @@ -211,33 +211,33 @@ module OpenTox neighbors_best=nil begin - for i in 1..modulo[0] do - (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction - LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." - neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - prop_matrix = get_prop_matrix - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") - if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs - prediction_best=prediction - neighbors_best=@neighbors + for i in 1..modulo[0] do + (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction + LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." + neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part + props = get_props + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs + prediction_best=prediction + neighbors_best=@neighbors + end + position = position + lr_size end - position = position + lr_size - end rescue Exception => e LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message end prediction=prediction_best @neighbors=neighbors_best - ### END AM balanced predictions + ### END AM balanced predictions else # regression case: no balancing neighbors - prop_matrix = get_prop_matrix + props = get_props prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") end - - # TODO: reasonable feature name + + # TODO: reasonable feature name #prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) value_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"value") confidence_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"confidence") @@ -245,7 +245,7 @@ module OpenTox prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]} #prediction_feature_uris[value_feature_uri] = "No similar compounds in training dataset." if @neighbors.size == 0 or prediction[:prediction].nil? prediction_feature_uris[value_feature_uri] = nil if @neighbors.size == 0 or prediction[:prediction].nil? - + #@prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] @@ -275,10 +275,10 @@ module OpenTox DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), # TODO: factor information to value }) - #OT.prediction => prediction[:prediction], - #OT.confidence => prediction[:confidence], - #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] - @prediction_dataset.add @compound.uri, prediction_feature_uri, value + #OT.prediction => prediction[:prediction], + #OT.confidence => prediction[:confidence], + #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] + @prediction_dataset.add @compound.uri, prediction_feature_uri, value end if verbose @@ -341,34 +341,39 @@ module OpenTox end # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) - def get_prop_matrix + # Same for the vector describing the query compound + def get_props matrix = Array.new begin - @neighbors.each do |n| - n = n[:compound] + @neighbors.each do |n| + n = n[:compound] + row = [] + @features.each do |f| + if ! @fingerprints[n].nil? + row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f]) + else + row << 0.0 + end + end + matrix << row + end row = [] @features.each do |f| - if ! @fingerprints[n].nil? - row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f]) - else - row << 0.0 - end + row << (@compound.match([f]).size == 0 ? 0.0 : @p_values[f]) end - matrix << row - end rescue Exception => e - LOGGER.debug "get_prop_matrix failed with '" + $! + "'" + LOGGER.debug "get_props failed with '" + $! + "'" end - matrix + [ matrix, row ] end # Find neighbors and store them as object variable, access only a subset of compounds for that. def neighbors_balanced(s, l, start, offset) @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] - [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset - training_features = @fingerprints[training_compound] - add_neighbor training_features, training_compound + [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset + training_features = @fingerprints[training_compound] + add_neighbor training_features, training_compound end end @@ -378,7 +383,7 @@ module OpenTox @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] @fingerprints.each do |training_compound,training_features| # AM: access all compounds - add_neighbor training_features, training_compound + add_neighbor training_features, training_compound end end @@ -388,10 +393,10 @@ module OpenTox if sim > @min_sim @activities[training_compound].each do |act| @neighbors << { - :compound => training_compound, - :similarity => sim, - :features => training_features, - :activity => act + :compound => training_compound, + :similarity => sim, + :features => training_features, + :activity => act } end end -- cgit v1.2.3 From a76a0c8d8ee259f1818a5fa2b5c4986fa460d888 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 25 May 2011 13:59:53 +0200 Subject: re-enabled cookie-authentication for html-access to webservices, replaced login/logout with sign in/out to avoid name clash with toxcreate --- lib/to-html.rb | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/lib/to-html.rb b/lib/to-html.rb index 51602d7..2c29f7d 100644 --- a/lib/to-html.rb +++ b/lib/to-html.rb @@ -36,11 +36,11 @@ module OpenTox user = OpenTox::Authorization.get_user(subjectid) if subjectid html += "

" unless user - html += "You are currently not logged in to "+$url_provider.url_for("",:full)+ - ", login" + html += "You are currently not signed in to "+$url_provider.url_for("",:full)+ + ", sign in" else - html += "You are logged in as '#{user}' to "+$url_provider.url_for("",:full)+ - ", logout" + html += "You are signed in as '#{user}' to "+$url_provider.url_for("",:full)+ + ", sign out" end html += "

" end @@ -67,46 +67,44 @@ module OpenTox html end - def self.login( msg=nil ) + def self.sign_in( msg=nil ) html = "Login" - html += "
" + html += "" html += "

" html += msg+"\n\n" if msg - html += "Please login to "+$url_provider.url_for("",:full)+"\n\n" + html += "Please sign in to "+$url_provider.url_for("",:full)+"\n\n" html += "" html += ""+ ""+ #""+ - "" + "" html += "
user:
password:

" html end end -=begin -get '/logout/?' do +get '/sign_out/?' do response.set_cookie("subjectid",{:value=>nil}) content_type "text/html" - content = "Sucessfully logged out from "+$url_provider.url_for("",:full) + content = "Sucessfully signed out from "+$url_provider.url_for("",:full) OpenTox.text_to_html(content) end -get '/login/?' do +get '/sign_in/?' do content_type "text/html" - OpenTox.login + OpenTox.sign_in end -post '/login/?' do +post '/sign_in/?' do subjectid = OpenTox::Authorization.authenticate(params[:user], params[:password]) if (subjectid) response.set_cookie("subjectid",{:value=>subjectid}) content_type "text/html" - content = "Sucessfully logged in as '"+params[:user]+"' to "+$url_provider.url_for("",:full) + content = "Sucessfully signed in as '"+params[:user]+"' to "+$url_provider.url_for("",:full) OpenTox.text_to_html(content,subjectid) else content_type "text/html" - OpenTox.login("Login failed, please try again") + OpenTox.sign_in("Login failed, please try again") end end -=end -- cgit v1.2.3 From 3f209f75a2abe2b8a89df3afcb3f54ec8329a5e1 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 14:16:34 +0200 Subject: 3rd v --- lib/algorithm.rb | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 5 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 96b9df1..280ed82 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -178,7 +178,7 @@ module OpenTox sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - prediction = local_svm(neighbors, acts, sims, "nu-svr", params) + prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(neighbors, acts, sims, "nu-svr", params, props)) prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e @@ -194,15 +194,16 @@ module OpenTox # Local support vector classification from neighbors # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_classification(neighbors, params) + def self.local_svm_classification(neighbors, params, props=nil) acts = neighbors.collect do |n| act = n[:activity] end # activities of neighbors for supervised learning acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - prediction = local_svm(neighbors, acts_f, sims, "C-bsvc", params) + prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(neighbors, acts_f, sims, "C-bsvc", params, props)) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" @@ -216,14 +217,16 @@ module OpenTox # Local support vector prediction from neighbors. - # Not to be called directly (use local_svm_regression or local_svm_classification. + # Uses pre-defined Kernel Matrix. + # Not to be called directly (use local_svm_regression or local_svm_classification). # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Array] acts, activities for neighbors. # @param [Array] sims, similarities for neighbors. # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification). # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Numeric] A prediction value. - def self.local_svm(neighbors, acts, sims, type, params) + def self.local_svm(neighbors, acts, sims, type, params, props=nil) neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel if neighbor_matches.size == 0 @@ -285,6 +288,78 @@ module OpenTox prediction end + # Local support vector prediction from neighbors. + # Uses propositionalized setting. + # Not to be called directly (use local_svm_regression or local_svm_classification). + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Array] acts, activities for neighbors. + # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification). + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @return [Numeric] A prediction value. + def self.local_svm_prop(props, acts, type, params) + + n_prop = props[0] # is a matrix, i.e. two nested Arrays. + q_prop = props[1] # is an Array. + + #neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches + #gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + if n_prop.size == 0 + raise "No neighbors found." + else + # gram matrix + #(0..(neighbor_matches.length-1)).each do |i| + # gram_matrix[i] = [] unless gram_matrix[i] + # # upper triangle + # ((i+1)..(neighbor_matches.length-1)).each do |j| + # sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + # gram_matrix[i][j] = Algorithm.gauss(sim) + # gram_matrix[j] = [] unless gram_matrix[j] + # gram_matrix[j][i] = gram_matrix[i][j] # lower triangle + # end + # gram_matrix[i][i] = 1.0 + #end + + #LOGGER.debug gram_matrix.to_yaml + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.n_prop = n_prop.flatten + @r.n = n_prop.size + @r.y = acts + @r.q_prop = q_prop + + begin + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-as.vector(y)" + @r.eval "prop_matrix<-matrix(n_prop,n,n)" + @r.eval "q_prop<-as.vector(q_prop)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(prop_matrix, y, type=\"#{type}\", nu=0.5)" + LOGGER.debug "Predicting ..." + if type == "nu-svr" + @r.eval "p<-predict(model,q_prop)[1,1]" + elsif type == "C-bsvc" + @r.eval "p<-predict(model,q_prop)" + end + if type == "nu-svr" + prediction = @r.p + elsif type == "C-bsvc" + prediction = (@r.p.to_f == 1.0 ? true : false) + end + @r.quit # free R + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + end + end + prediction + end + + end module Substructure -- cgit v1.2.3 From ca6dd87b7c80611c4f4e4716f68fe6633ce1066b Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 15:04:50 +0200 Subject: 4th v --- lib/algorithm.rb | 4 ++-- lib/model.rb | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 280ed82..2f722c1 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -178,7 +178,7 @@ module OpenTox sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(neighbors, acts, sims, "nu-svr", params, props)) + prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, sims, "nu-svr", params)) prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e @@ -203,7 +203,7 @@ module OpenTox acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(neighbors, acts_f, sims, "C-bsvc", params, props)) + prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, sims, "C-bsvc", params)) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" diff --git a/lib/model.rb b/lib/model.rb index f4df8ea..6a4602f 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -176,8 +176,7 @@ module OpenTox return @prediction_dataset if database_activity(subjectid) - - if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget] + if metadata[RDF.type].include?([OTA.ClassificationLazySingleTarget][0]) # AM: searching in metadata for classification # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar l = Array.new # larger s = Array.new # smaller fraction @@ -231,7 +230,7 @@ module OpenTox @neighbors=neighbors_best ### END AM balanced predictions - else # regression case: no balancing + else # no balancing as before neighbors props = get_props prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") -- cgit v1.2.3 From 2b12d07bec101df8c10b7ab5aff1491b0997a6c7 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 17:10:14 +0200 Subject: 6th v --- lib/algorithm.rb | 17 +++++++++-------- lib/model.rb | 4 ++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 2f722c1..e089184 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -138,7 +138,7 @@ module OpenTox # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity` # @param [optional] params Ignored (only for compatibility with local_svm_regression) # @return [Hash] Hash with keys `:prediction, :confidence` - def self.weighted_majority_vote(neighbors,params={}) + def self.weighted_majority_vote(neighbors,params={}, props=nil) conf = 0.0 confidence = 0.0 neighbors.each do |neighbor| @@ -178,7 +178,7 @@ module OpenTox sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, sims, "nu-svr", params)) + prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr", params)) prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e @@ -203,7 +203,7 @@ module OpenTox acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, sims, "C-bsvc", params)) + prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, "C-bsvc", params)) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" @@ -226,7 +226,7 @@ module OpenTox # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Numeric] A prediction value. - def self.local_svm(neighbors, acts, sims, type, params, props=nil) + def self.local_svm(neighbors, acts, sims, type, params) neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel if neighbor_matches.size == 0 @@ -326,16 +326,17 @@ module OpenTox LOGGER.debug "Setting R data ..." # set data @r.n_prop = n_prop.flatten - @r.n = n_prop.size + @r.n_prop_x_size = n_prop.size + @r.n_prop_y_size = n_prop[0].size @r.y = acts @r.q_prop = q_prop begin LOGGER.debug "Preparing R data ..." # prepare data - @r.eval "y<-as.vector(y)" - @r.eval "prop_matrix<-matrix(n_prop,n,n)" - @r.eval "q_prop<-as.vector(q_prop)" + @r.eval "y<-matrix(y)" + @r.eval "prop_matrix<-matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=TRUE)" + @r.eval "q_prop<-matrix(q_prop, 1, n_prop_y_size, byrow=TRUE)" # model + support vectors LOGGER.debug "Creating SVM model ..." diff --git a/lib/model.rb b/lib/model.rb index 6a4602f..1a5aa37 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -215,7 +215,7 @@ module OpenTox LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part props = get_props - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction neighbors_best=@neighbors @@ -233,7 +233,7 @@ module OpenTox else # no balancing as before neighbors props = get_props - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") end # TODO: reasonable feature name -- cgit v1.2.3 From 77c885b7394aa11ba5e59eb60884205332efa31a Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 26 May 2011 08:38:21 +0200 Subject: 7th v --- lib/algorithm.rb | 2 ++ lib/model.rb | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index e089184..91e075a 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -227,6 +227,7 @@ module OpenTox # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Numeric] A prediction value. def self.local_svm(neighbors, acts, sims, type, params) + LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel if neighbor_matches.size == 0 @@ -299,6 +300,7 @@ module OpenTox # @return [Numeric] A prediction value. def self.local_svm_prop(props, acts, type, params) + LOGGER.debug "Local SVM (Propositionalization / Kernlab Kernel)." n_prop = props[0] # is a matrix, i.e. two nested Arrays. q_prop = props[1] # is an Array. diff --git a/lib/model.rb b/lib/model.rb index 1a5aa37..921335c 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -214,7 +214,7 @@ module OpenTox (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - props = get_props + (@prediction_algorithm.include? "svm" and params[:prop_kernel] == "true") ? props = get_props : props = nil prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction @@ -232,7 +232,7 @@ module OpenTox else # no balancing as before neighbors - props = get_props + (@prediction_algorithm.include? "svm" and params[:prop_kernel] == "true") ? props = get_props : props = nil prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") end -- cgit v1.2.3 From f13763a8505ad997739b65d7cfcd804411ff9c77 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 26 May 2011 10:56:31 +0200 Subject: unify access to classification feature domain, replace methode feature_values with accept_values --- lib/dataset.rb | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index 4dc4296..fc7c263 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -167,16 +167,13 @@ module OpenTox @features end - def feature_classes(feature, subjectid=nil) - if Feature.find(feature, subjectid).feature_type == "classification" - classes = [] - @data_entries.each do |c,e| - e[feature].each { |v| classes << v.to_s } - end - classes.uniq.sort - else - nil - end + # returns the accept_values of a feature, i.e. the classification domain / all possible feature values + # @param [String] feature the URI of the feature + # @return [Array] return array with strings, nil if value is not set (e.g. when feature is numeric) + def accept_values(feature) + accept_values = features[feature][OT.acceptValue] + accept_values.sort if accept_values + accept_values end # Detect feature type(s) in the dataset -- cgit v1.2.3 From 065fdeb351f68d0445b66516ccf8e7cfcc7e2a1f Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 26 May 2011 12:22:07 +0200 Subject: Fixed prediction type switching --- lib/model.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/model.rb b/lib/model.rb index 921335c..d63eef2 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -176,7 +176,10 @@ module OpenTox return @prediction_dataset if database_activity(subjectid) - if metadata[RDF.type].include?([OTA.ClassificationLazySingleTarget][0]) # AM: searching in metadata for classification + load_metadata(subjectid) + case OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type + when "classification" + # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar l = Array.new # larger s = Array.new # smaller fraction @@ -231,6 +234,7 @@ module OpenTox ### END AM balanced predictions else # no balancing as before + LOGGER.info "LAZAR: Unbalanced." neighbors (@prediction_algorithm.include? "svm" and params[:prop_kernel] == "true") ? props = get_props : props = nil prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") -- cgit v1.2.3 From f507227fd4efff3c8b32b2a8c8f2860af2546e3b Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 26 May 2011 13:11:32 +0200 Subject: Hotfix: Switch to balanced mode. --- lib/model.rb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index 139aed8..14471cc 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -176,11 +176,15 @@ module OpenTox return @prediction_dataset if database_activity(subjectid) - - if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget] + load_metadata(subjectid) + case OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type + when "classification" # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar l = Array.new # larger s = Array.new # smaller fraction + + raise "no fingerprints in model" if @fingerprints.size==0 + @fingerprints.each do |training_compound,training_features| @activities[training_compound].each do |act| case act.to_s @@ -231,6 +235,7 @@ module OpenTox ### END AM balanced predictions else # regression case: no balancing + LOGGER.info "LAZAR: Unbalanced." neighbors prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") end -- cgit v1.2.3 From 3922c8e5fcb9fbe6ddedab9f70e114717ff33a60 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 26 May 2011 14:28:19 +0200 Subject: 8th v --- lib/algorithm.rb | 2 +- lib/model.rb | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 91e075a..2652695 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -164,7 +164,7 @@ module OpenTox # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_regression(neighbors, params) + def self.local_svm_regression(neighbors, params, props=nil) take_logs=true neighbors.each do |n| if (! n[:activity].nil?) && (n[:activity].to_f < 0.0) diff --git a/lib/model.rb b/lib/model.rb index 7c2ef58..28c05a9 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -69,7 +69,7 @@ module OpenTox include Model include Algorithm - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel def initialize(uri=nil) @@ -92,6 +92,7 @@ module OpenTox @prediction_algorithm = "Neighbors.weighted_majority_vote" @min_sim = 0.3 + @prop_kernel = false end @@ -219,7 +220,11 @@ module OpenTox (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - (@prediction_algorithm.include? "svm" and params[:prop_kernel] == "true") ? props = get_props : props = nil + if @prop_kernel && @prediction_algorithm.include?("svm") + props = get_props + else + props = nil + end prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction @@ -235,10 +240,14 @@ module OpenTox @neighbors=neighbors_best ### END AM balanced predictions - else # regression case: no balancing + else # AM: no balancing LOGGER.info "LAZAR: Unbalanced." neighbors - (@prediction_algorithm.include? "svm" and params[:prop_kernel] == "true") ? props = get_props : props = nil + if @prop_kernel && @prediction_algorithm.include?("svm") + props = get_props + else + props = nil + end prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") end -- cgit v1.2.3 From a1135de5d9911838f4c020d73be9c462cba709d1 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 26 May 2011 15:46:15 +0200 Subject: fix blazar nil error --- lib/model.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/model.rb b/lib/model.rb index 14471cc..edaa696 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -207,7 +207,7 @@ module OpenTox # AM: Balanced predictions addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round - slack = modulo[1].divmod(addon)[1] # what remains for the last round + slack = (addon!=0 ? modulo[1].divmod(addon)[1] : 0) # what remains for the last round position = 0 predictions = Array.new -- cgit v1.2.3 From 1ddea6e712319b7f21a6acf24739a2ef54c41042 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 26 May 2011 20:52:10 +0000 Subject: predicted/value and predicted/confidence stored as separate features in model --- lib/model.rb | 67 ++++++++++++++++++++++++++----------------------------- lib/serializer.rb | 2 ++ 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index 139aed8..31a513e 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -168,7 +168,6 @@ module OpenTox @prediction_dataset.add_metadata( { OT.hasSource => @uri, DC.creator => @uri, - # TODO: fix dependentVariable DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] } ) @@ -235,47 +234,16 @@ module OpenTox prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") end - # TODO: reasonable feature name - #prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) - value_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"value") - confidence_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"confidence") + value_feature_uri = File.join( @uri, "predicted", "value") + confidence_feature_uri = File.join( @uri, "predicted", "confidence") prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]} - #prediction_feature_uris[value_feature_uri] = "No similar compounds in training dataset." if @neighbors.size == 0 or prediction[:prediction].nil? prediction_feature_uris[value_feature_uri] = nil if @neighbors.size == 0 or prediction[:prediction].nil? - #@prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] + @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] -=begin - if @neighbors.size == 0 - prediction_feature_uris.each do |prediction_feature_uri,value| - @prediction_dataset.add_feature(prediction_feature_uri, { - RDF.type => [OT.MeasuredFeature], - OT.hasSource => @uri, - DC.creator => @uri, - DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), - OT.error => "No similar compounds in training dataset.", - #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] - }) - @prediction_dataset.add @compound.uri, prediction_feature_uri, value - end - - else -=end prediction_feature_uris.each do |prediction_feature_uri,value| - @prediction_dataset.metadata[OT.predictedVariables] = [] unless @prediction_dataset.metadata[OT.predictedVariables] - @prediction_dataset.metadata[OT.predictedVariables] << prediction_feature_uri - @prediction_dataset.add_feature(prediction_feature_uri, { - RDF.type => [OT.ModelPrediction], - OT.hasSource => @uri, - DC.creator => @uri, - DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), - # TODO: factor information to value - }) - #OT.prediction => prediction[:prediction], - #OT.confidence => prediction[:confidence], - #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] @prediction_dataset.add @compound.uri, prediction_feature_uri, value end @@ -401,6 +369,35 @@ module OpenTox end end + def prediction_features + [prediction_value_feature,prediction_confidence_feature] + end + + def prediction_value_feature + dependent_uri = @metadata[OT.dependentVariables].first + feature = OpenTox::Feature.new File.join( @uri, "predicted", "value") + feature.add_metadata( { + RDF.type => [OT.ModelPrediction], + OT.hasSource => @uri, + DC.creator => @uri, + DC.title => URI.decode(File.basename( dependent_uri )), + OWL.sameAs => dependent_uri + }) + feature + end + + def prediction_confidence_feature + dependent_uri = @metadata[OT.dependentVariables].first + feature = OpenTox::Feature.new File.join( @uri, "predicted", "confidence") + feature.add_metadata( { + RDF.type => [OT.ModelPrediction], + OT.hasSource => @uri, + DC.creator => @uri, + DC.title => "#{URI.decode(File.basename( dependent_uri ))} confidence" + }) + feature + end + # Save model at model service def save(subjectid) self.uri = RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => subjectid}) diff --git a/lib/serializer.rb b/lib/serializer.rb index 62c1159..5a9fd0a 100644 --- a/lib/serializer.rb +++ b/lib/serializer.rb @@ -50,6 +50,7 @@ module OpenTox OT.parameters => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , OT.featureDataset => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , OT.dependentVariables => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.predictedVariables => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , #object props for validation# @@ -191,6 +192,7 @@ module OpenTox @object[metadata[OT.featureDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } @object[metadata[OT.trainingDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } @object[metadata[OT.dependentVariables]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] } + metadata[OT.predictedVariables].each{|feature| @object[feature] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] }} # TODO: add algorithms from parameters @object["http://ot-dev.in-silico.ch/algorithm/fminer/bbrc"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } @object["http://ot-dev.in-silico.ch/algorithm/fminer/last"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } -- cgit v1.2.3 From af426336f15e1f4b58c87bf09571721bb42a388f Mon Sep 17 00:00:00 2001 From: mguetlein Date: Fri, 27 May 2011 10:06:12 +0200 Subject: predicted variable and confidence can now be derieved from model, adjust feature_type accordingly --- lib/feature.rb | 26 ++++++++++++------------- lib/model.rb | 60 +++++++++++++++++++++++++++++++++++++++------------------- 2 files changed, 54 insertions(+), 32 deletions(-) diff --git a/lib/feature.rb b/lib/feature.rb index 2f1ab6c..f3bec5c 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -2,6 +2,8 @@ module OpenTox class Feature include OpenTox + attr_accessor :subjectid + # Find a feature # @param [String] uri Feature URI # @return [OpenTox::Task] Feature object @@ -13,9 +15,9 @@ module OpenTox else feature.add_metadata Parser::Owl::Dataset.new(uri).load_metadata end + feature.subjectid = subjectid feature end - # provides feature type, possible types are "regression" or "classification" # @return [String] feature type, unknown if OT.isA property is unknown/ not set @@ -24,21 +26,19 @@ module OpenTox "classification" elsif metadata[RDF.type].flatten.include?(OT.NumericFeature) "regression" - else - #"unknown" - metadata[RDF.type].inspect - end -=begin - case metadata[RDF.type] - when /NominalFeature/ - "classification" - when /NumericFeature/ - "regression" + elsif metadata[OWL.sameAs] + metadata[OWL.sameAs].each do |f| + begin + type = Feature.find(f, subjectid).feature_type + return type unless type=="unknown" + rescue => ex + LOGGER.warn "could not load same-as-feature '"+f.to_s+"' for feature '"+uri.to_s+"' : "+ex.message.to_s + end + end + "unknown" else "unknown" end -=end end - end end diff --git a/lib/model.rb b/lib/model.rb index baf01a6..f0fd46b 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -38,29 +38,51 @@ module OpenTox # provides feature type, possible types are "regression" or "classification" # @return [String] feature type, "unknown" if type could not be estimated def feature_type(subjectid=nil) - return @feature_type if @feature_type - - # dynamically perform restcalls if necessary - load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri) - algorithm = OpenTox::Algorithm::Generic.find(@metadata[OT.algorithm], subjectid) - algorithm_title = algorithm ? algorithm.metadata[DC.title] : nil - algorithm_type = algorithm ? algorithm.metadata[RDF.type] : nil - dependent_variable = OpenTox::Feature.find( @metadata[OT.dependentVariables],subjectid ) - dependent_variable_type = dependent_variable ? dependent_variable.feature_type : nil - type_indicators = [dependent_variable_type, @metadata[RDF.type], @metadata[DC.title], @uri, algorithm_type, algorithm_title].flatten - type_indicators.each do |type| - case type - when /(?i)classification/ - @feature_type = "classification" - break - when /(?i)regression/ - @feature_type = "regression" - end + unless @feature_type + load_predicted_variables( subjectid ) unless @predicted_variable + @feature_type = OpenTox::Feature.find( @predicted_variable, subjectid ).feature_type end - raise "unknown model "+type_indicators.inspect unless @feature_type @feature_type end + + def predicted_variable( subjectid ) + load_predicted_variables( subjectid ) unless @predicted_variable + @predicted_variable + end + def predicted_confidence( subjectid ) + load_predicted_variables( subjectid ) unless @predicted_confidence + @predicted_confidence + end + + private + def load_predicted_variables( subjectid=nil ) + load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri) + if @metadata[OT.predictedVariables] + predictedVariables = @metadata[OT.predictedVariables] + if predictedVariables.is_a?(Array) + if (predictedVariables.size==1) + @predicted_variable = predictedVariables[0] + elsif (predictedVariables.size==2) + # PENDING identify confidence + conf_index = -1 + predictedVariables.size.times do |i| + f = OpenTox::Feature.find(predictedVariables[i]) + conf_index = i if f.metadata[DC.title]=~/(?i)confidence/ + end + raise "could not estimate predicted variable from model: '"+uri.to_s+ + "', number of predicted-variables==2, but no confidence found" if conf_index==-1 + @predicted_variable = predictedVariables[1-conf_index] + @predicted_confidence = predictedVariables[conf_index] + else + raise "could not estimate predicted variable from model: '"+uri.to_s+"', number of predicted-variables > 2" + end + else + raise "could not estimate predicted variable from model: '"+uri.to_s+"', predicted-variables is no array" + end + end + raise "could not estimate predicted variable from model: '"+uri.to_s+"'" unless @predicted_variable + end end # Lazy Structure Activity Relationship class -- cgit v1.2.3 From 24df87ffb307af4dbff9a19af43734e0f02b81eb Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 27 May 2011 17:09:09 +0000 Subject: explicit gem versions --- Rakefile | 78 +++++++++++++++++++++++++++------------------------------------- 1 file changed, 33 insertions(+), 45 deletions(-) diff --git a/Rakefile b/Rakefile index acf32d4..f402e0d 100644 --- a/Rakefile +++ b/Rakefile @@ -10,51 +10,39 @@ begin gem.email = "helma@in-silico.ch" gem.homepage = "http://github.com/helma/opentox-ruby" gem.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler"] - # dependencies - [ "sinatra", - "emk-sinatra-url-for", - "sinatra-respond_to", - "sinatra-static-assets", - "rest-client", - "rack", - "rack-contrib", - "rack-flash", - "nokogiri", - "rubyzip", - "roo", - "spreadsheet", - "google-spreadsheet-ruby", - "yajl-ruby", - "tmail", - "rinruby", - "ohm", - "ohm-contrib", - "SystemTimer", - "rjb", - #valiation-gems - "dm-core", - "dm-serializer", - "dm-timestamps", - "dm-types", - "dm-migrations", - "dm-validations", - "dm-sqlite-adapter" - ].each { |dep| gem.add_dependency dep } -=begin - [ "dm-core", - 'dm-serializer', - 'dm-timestamps', - 'dm-types', - 'dm-migrations', - "dm-mysql-adapter", - "dm-validations", - ].each {|dep| gem.add_dependency dep, ">= 1" } -=end - #valiation-gem - gem.add_dependency "haml", ">=3" - # validation-gems - gem.add_dependency "ruby-plot", "~>0.5.0" - ['jeweler'].each { |dep| gem.add_development_dependency dep } + # dependencies with versions + gem.add_dependency "sinatra", "=1.2.6" + gem.add_dependency "emk-sinatra-url-for", "=0.2.1" + gem.add_dependency "sinatra-respond_to", "=0.7.0" + gem.add_dependency "sinatra-static-assets", "=0.5.0" + gem.add_dependency "rest-client", "=1.6.1" + gem.add_dependency "rack", "=1.3.0" + gem.add_dependency "rack-contrib", "=1.1.0" + gem.add_dependency "rack-flash", "=0.1.1" + gem.add_dependency "nokogiri", "=1.4.4" + gem.add_dependency "rubyzip", "=0.9.4" + gem.add_dependency "roo", "=1.9.3" + gem.add_dependency "spreadsheet", "=0.6.5.4" + gem.add_dependency "google-spreadsheet-ruby", "=0.1.5" + gem.add_dependency "yajl-ruby", "=0.8.2" + gem.add_dependency "tmail", "=1.2.7.1" + gem.add_dependency "rinruby", "=2.0.2" + gem.add_dependency "ohm", "=0.1.3" + gem.add_dependency "ohm-contrib", "=0.1.1" + gem.add_dependency "SystemTimer", "=1.2.3" + gem.add_dependency "rjb", "=1.3.4" + gem.add_dependency "haml", "=3.1.1" + #valiation-gems + gem.add_dependency "dm-core", "=1.1.0" + gem.add_dependency "dm-serializer", "=1.1.0" + gem.add_dependency "dm-timestamps", "=1.1.0" + gem.add_dependency "dm-types", "=1.1.0" + gem.add_dependency "dm-migrations", "=1.1.0" + gem.add_dependency "dm-validations", "=1.1.0" + gem.add_dependency "dm-sqlite-adapter", "=1.1.0" + gem.add_dependency "ruby-plot", "=0.5.0" + + gem.add_development_dependency 'jeweler' gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore'] end Jeweler::GemcutterTasks.new -- cgit v1.2.3 From 1dbf257583fb68658f4ba4ecde3602ebef07e540 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 30 May 2011 08:17:43 +0000 Subject: mail exceptions disabled --- Rakefile | 2 +- lib/config/config_ru.rb | 2 ++ lib/environment.rb | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Rakefile b/Rakefile index f402e0d..4fb851e 100644 --- a/Rakefile +++ b/Rakefile @@ -25,7 +25,7 @@ begin gem.add_dependency "spreadsheet", "=0.6.5.4" gem.add_dependency "google-spreadsheet-ruby", "=0.1.5" gem.add_dependency "yajl-ruby", "=0.8.2" - gem.add_dependency "tmail", "=1.2.7.1" + #gem.add_dependency "mail", "=2.3.0" gem.add_dependency "rinruby", "=2.0.2" gem.add_dependency "ohm", "=0.1.3" gem.add_dependency "ohm-contrib", "=0.1.1" diff --git a/lib/config/config_ru.rb b/lib/config/config_ru.rb index 93df867..dc04263 100644 --- a/lib/config/config_ru.rb +++ b/lib/config/config_ru.rb @@ -19,6 +19,7 @@ set :lock, true end use Rack::ShowExceptions +=begin if defined?(MAIL) # monkeypatch with the original method @@ -50,3 +51,4 @@ if defined?(MAIL) mail.smtp MAIL end end +=end diff --git a/lib/environment.rb b/lib/environment.rb index ffc4f60..28a9a66 100644 --- a/lib/environment.rb +++ b/lib/environment.rb @@ -27,7 +27,7 @@ end Ohm.connect :thread_safe => true # load mail settings for error messages -load File.join config_dir,"mail.rb" if File.exists?(File.join config_dir,"mail.rb") +#load File.join config_dir,"mail.rb" if File.exists?(File.join config_dir,"mail.rb") logfile = "#{LOG_DIR}/#{ENV["RACK_ENV"]}.log" #LOGGER = OTLogger.new(logfile,'daily') # daily rotation -- cgit v1.2.3 From a5945d10144e49ff983dd02b0860aeb4e4e8ab1c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 31 May 2011 08:59:18 +0000 Subject: keep classes from external datasets --- lib/model.rb | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index 4321646..02fabfa 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -91,7 +91,7 @@ module OpenTox include Model include Algorithm - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map def initialize(uri=nil) @@ -108,6 +108,7 @@ module OpenTox @activities = {} @p_values = {} @fingerprints = {} + @value_map = {} @feature_calculation_algorithm = "Substructure.match" @similarity_algorithm = "Similarity.tanimoto" @@ -275,15 +276,21 @@ module OpenTox value_feature_uri = File.join( @uri, "predicted", "value") confidence_feature_uri = File.join( @uri, "predicted", "confidence") - prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]} - prediction_feature_uris[value_feature_uri] = nil if @neighbors.size == 0 or prediction[:prediction].nil? + #prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]} + #prediction_feature_uris[value_feature_uri] = nil if @neighbors.size == 0 or prediction[:prediction].nil? @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] - prediction_feature_uris.each do |prediction_feature_uri,value| - @prediction_dataset.add @compound.uri, prediction_feature_uri, value + if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification" + @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]] + else + @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction] end + @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence] + #prediction_feature_uris.each do |prediction_feature_uri,value| + #@prediction_dataset.add @compound.uri, prediction_feature_uri, @value_map[value] + #end if verbose if @feature_calculation_algorithm == "Substructure.match" -- cgit v1.2.3 From b98bc374b119e5a21f43d4f45048376aeeeead09 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 31 May 2011 13:51:09 +0200 Subject: fix set acceptValues when splitting dataset for validation --- lib/dataset.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/dataset.rb b/lib/dataset.rb index fc7c263..5e6a29b 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -304,6 +304,12 @@ module OpenTox end end end + # set feature metadata in new dataset accordingly (including accept values) + features.each do |f| + self.features[f].each do |k,v| + dataset.features[f][k] = v + end + end dataset.add_metadata(metadata) dataset.save(subjectid) dataset -- cgit v1.2.3 From b54900370f9cb13ba234b9fb3ff070f0ae960f07 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 31 May 2011 23:12:35 +0200 Subject: add compound method to directly request smiles without using inchi --- lib/compound.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/compound.rb b/lib/compound.rb index a85507b..48ef3a6 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -22,6 +22,12 @@ module OpenTox @inchi = RestClientWrapper.get(@uri, :accept => 'chemical/x-inchi').to_s.chomp if @uri end end + + # request smiles from compound service via accept header + # @return smiles as string + def self.smiles(uri) + RestClientWrapper.get(uri, :accept => 'chemical/x-daylight-smiles').to_s.chomp + end # Create a compound from smiles string # @example -- cgit v1.2.3 From 7b28e192fdec6eaccd0e2c528df76c54ca1b1cdd Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 31 May 2011 23:14:22 +0200 Subject: fix: handle uri params on ambit datasets, like dataset//?(max=5|page=0) --- lib/algorithm.rb | 1 + lib/dataset.rb | 6 +++++- lib/parser.rb | 8 +++++--- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 2652695..7c1c7a2 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -16,6 +16,7 @@ module OpenTox # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly # @return [String] URI of new resource (dataset, model, ...) def run(params=nil, waiting_task=nil) + LOGGER.info "Running algorithm '"+@uri.to_s+"' with params: "+params.inspect RestClientWrapper.post(@uri, params, {:accept => 'text/uri-list'}, waiting_task).to_s end diff --git a/lib/dataset.rb b/lib/dataset.rb index 5e6a29b..784bb2a 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -149,7 +149,11 @@ module OpenTox # Load and return only compound URIs from the dataset service # @return [Array] Compound URIs in the dataset def load_compounds(subjectid=nil) - RestClientWrapper.get(File.join(uri,"compounds"),{:accept=> "text/uri-list", :subjectid => subjectid}).to_s.each_line do |compound_uri| + # fix for datasets like http://apps.ideaconsult.net:8080/ambit2/dataset/272?max=50 + u = URI::parse(uri) + u.path = File.join(u.path,"compounds") + u = u.to_s + RestClientWrapper.get(u,{:accept=> "text/uri-list", :subjectid => subjectid}).to_s.each_line do |compound_uri| @compounds << compound_uri.chomp end @compounds.uniq! diff --git a/lib/parser.rb b/lib/parser.rb index a6878a2..9eacf4b 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -40,8 +40,9 @@ module OpenTox else file = Tempfile.new("ot-rdfxml") if @dataset - # do not concat /metadata to uri string, this would not work for dataset/R401577?max=3 uri = URI::parse(@uri) + #remove params like dataset/?max=3 from uri, not needed for metadata + uri.query = nil uri.path = File.join(uri.path,"metadata") uri = uri.to_s else @@ -230,9 +231,10 @@ module OpenTox uri = URI::parse(@uri) # PENDING # ambit models return http://host/dataset/id?feature_uris[]=sth but - # amibt dataset services does not support http://host/dataset/id/features?feature_uris[]=sth + # amibt dataset services does not support http://host/dataset/id/features?feature_uris[]=sth + # and features are not inlcuded in http://host/dataset/id/features # -> load features from complete dataset - uri.path = File.join(uri.path,"features") unless @uri=~/\?feature_uris\[\]/ + uri.path = File.join(uri.path,"features") unless @uri=~/\?(feature_uris|page|pagesize)/ uri = uri.to_s file.puts OpenTox::RestClientWrapper.get uri,{:subjectid => subjectid,:accept => "application/rdf+xml"},nil,false file.close -- cgit v1.2.3 From 515520bb8c03a7211dc32c1d57d35c0acdf77210 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 31 May 2011 23:15:02 +0200 Subject: adding training test validation method for validation --- lib/validation.rb | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/validation.rb b/lib/validation.rb index d7a337c..46251a5 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -36,6 +36,18 @@ module OpenTox Validation.new(uri) end + # creates a training test validation, waits until it finishes, may take some time + # @param [Hash] params (required:algorithm_uri,training_dataset_uri,prediction_feature,test_dataset_uri,optional:algorithm_params) + # @param [String,optional] subjectid + # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly + # @return [OpenTox::Validation] + def self.create_training_test_validation( params, subjectid=nil, waiting_task=nil ) + params[:subjectid] = subjectid if subjectid + uri = OpenTox::RestClientWrapper.post( File.join(CONFIG[:services]["opentox-validation"],"training_test_validation"), + params,{:content_type => "text/uri-list"},waiting_task ) + Validation.new(uri) + end + # looks for report for this validation, creates a report if no report is found # @param [String,optional] subjectid # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly -- cgit v1.2.3 From c9881eaaf4ea68b5327e4b617c05195530abddf7 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 6 Jun 2011 16:54:56 +0000 Subject: halts (partially) substituted by OpenTox errors --- lib/helper.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/helper.rb b/lib/helper.rb index 995f3e9..4e1c0a6 100644 --- a/lib/helper.rb +++ b/lib/helper.rb @@ -81,7 +81,7 @@ helpers do when "css" @accept = 'text/css' else - # halt 404, "File format #{extension} not supported." + # raise OpenTox::NotFoundError.new "File format #{extension} not supported." end end end -- cgit v1.2.3 From eb2b0d29d506f47ad793a3763768d306c760c632 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 6 Jun 2011 19:35:57 +0200 Subject: compound hack and feature type bugfix to work with ambit datasets --- lib/compound.rb | 5 +++++ lib/parser.rb | 9 +++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/compound.rb b/lib/compound.rb index 48ef3a6..c178ba2 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -21,6 +21,11 @@ module OpenTox else @inchi = RestClientWrapper.get(@uri, :accept => 'chemical/x-inchi').to_s.chomp if @uri end + + if @uri and @inchi.to_s.size==0 + LOGGER.warn "REMOVE ABMIT HACK: no inchi for compound "+@uri.to_s+", load via smiles" + @inchi = Compound.smiles2inchi(Compound.smiles(@uri)) + end end # request smiles from compound service via accept header diff --git a/lib/parser.rb b/lib/parser.rb index 9eacf4b..90a997b 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -250,8 +250,13 @@ module OpenTox File.delete(to_delete) if to_delete statements.each do |triple| if features.include? triple[0] - @dataset.features[triple[0]] = {} unless @dataset.features[triple[0]] - @dataset.features[triple[0]][triple[1]] = triple[2].split('^^').first + @dataset.features[triple[0]] = {} unless @dataset.features[triple[0]] + if triple[1] == RDF.type + @dataset.features[triple[0]][triple[1]] = [] unless @dataset.features[triple[0]][triple[1]] + @dataset.features[triple[0]][triple[1]] << triple[2].split('^^').first + else + @dataset.features[triple[0]][triple[1]] = triple[2].split('^^').first + end end end @dataset.features -- cgit v1.2.3 From 6fa95c7d5fac48b3c740a112a53b300b8a7b4fdc Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 6 Jun 2011 19:36:44 +0200 Subject: add bootstrapping --- lib/validation.rb | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/validation.rb b/lib/validation.rb index 46251a5..3e8367c 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -48,6 +48,18 @@ module OpenTox Validation.new(uri) end + # creates a bootstrapping validation, waits until it finishes, may take some time + # @param [Hash] params (required:algorithm_uri,dataset_uri,prediction_feature, optional:algorithm_params,random_seed(1)) + # @param [String,optional] subjectid + # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly + # @return [OpenTox::Validation] + def self.create_bootstrapping_validation( params, subjectid=nil, waiting_task=nil ) + params[:subjectid] = subjectid if subjectid + uri = OpenTox::RestClientWrapper.post( File.join(CONFIG[:services]["opentox-validation"],"bootstrapping"), + params,{:content_type => "text/uri-list"},waiting_task ) + Validation.new(uri) + end + # looks for report for this validation, creates a report if no report is found # @param [String,optional] subjectid # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly -- cgit v1.2.3 From 73ee94895a75901dc12300d2cc81b539c6c98c8d Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 7 Jun 2011 14:55:31 +0200 Subject: akephalos gem added for headless browser tests --- Rakefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Rakefile b/Rakefile index 4fb851e..834e0a3 100644 --- a/Rakefile +++ b/Rakefile @@ -32,6 +32,8 @@ begin gem.add_dependency "SystemTimer", "=1.2.3" gem.add_dependency "rjb", "=1.3.4" gem.add_dependency "haml", "=3.1.1" + # for headless browser tests + gem.add_dependency "akephalos", "=0.2.5" #valiation-gems gem.add_dependency "dm-core", "=1.1.0" gem.add_dependency "dm-serializer", "=1.1.0" -- cgit v1.2.3 From 6198b914562ea83f5851772be0de3597d90fc863 Mon Sep 17 00:00:00 2001 From: mr Date: Tue, 14 Jun 2011 17:32:45 +0200 Subject: fix helper if no subjectid --- lib/helper.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/helper.rb b/lib/helper.rb index 3a6126a..f59246e 100644 --- a/lib/helper.rb +++ b/lib/helper.rb @@ -44,8 +44,9 @@ helpers do def uri_available?(urlStr) url = URI.parse(urlStr) + subjectidstr = @subjectid ? "?subjectid=#{CGI.escape @subjectid}" : "" Net::HTTP.start(url.host, url.port) do |http| - return http.head("#{url.request_uri}?subjectid=#{CGI.escape @subjectid}").code == "200" + return http.head("#{url.request_uri}#{subjectidstr}").code == "200" end end @@ -93,4 +94,3 @@ before do protected!(@subjectid) end end - -- cgit v1.2.3 From f8ba43de19ff3bfa0a03dce4512944b4ad256ae6 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 16 Jun 2011 23:44:56 +0200 Subject: add autmatic post form creation to to-html --- lib/overwrite.rb | 3 ++- lib/to-html.rb | 65 +++++++++++++++++++++++++++++++++++++++----------------- 2 files changed, 48 insertions(+), 20 deletions(-) diff --git a/lib/overwrite.rb b/lib/overwrite.rb index df4e1b7..393e8e7 100644 --- a/lib/overwrite.rb +++ b/lib/overwrite.rb @@ -50,7 +50,8 @@ class Sinatra::Base halt task.http_code,task.to_yaml # PENDING differs from task-webservice when /html/ response['Content-Type'] = "text/html" - halt task.http_code,OpenTox.text_to_html(task.to_yaml, @subjectid) + # html -> task created with html form -> redirect to task uri + redirect task.uri else # default /uri-list/ response['Content-Type'] = "text/uri-list" if task.completed? diff --git a/lib/to-html.rb b/lib/to-html.rb index 2c29f7d..2979062 100644 --- a/lib/to-html.rb +++ b/lib/to-html.rb @@ -15,16 +15,13 @@ module OpenTox # produces a html page for making web services browser friendly # format of text (=string params) is preserved (e.g. line breaks) # urls are marked as links - # @example post params: - # [ [ [:mandatory_param_1], [:mandatory_param_2], [:optional_param,"default_value"] ], - # [ [:alteranative_mandatory_param_1], [:alteranative_mandatory_param_2] ] - # ] + # # @param [String] text this is the actual content, # @param [optional,String] related_links info on related resources # @param [optional,String] description general info - # @param [optional,Array] post_params, array of arrays containing info on POST operation, see example + # @param [optional,Array] post_command, infos for the post operation, object defined below # @return [String] html page - def self.text_to_html( text, subjectid=nil, related_links=nil, description=nil, post_params=nil ) + def self.text_to_html( text, subjectid=nil, related_links=nil, description=nil, post_command=nil ) # TODO add title as parameter title = nil #$sinatra.url_for($sinatra.request.env['PATH_INFO'], :full) if $sinatra @@ -47,20 +44,12 @@ module OpenTox html += "

Description

"+description.link_urls+"

" if description html += "

Related links

"+related_links.link_urls+"

" if related_links - if post_params - html += "

POST parameters

" - count = 0 - post_params.each do |p| - html += "

alternatively:

" if count > 0 - html += "

" - p.each do |k,v| - html += "" - end - html += "
paramdefault_value
"+k.to_s+""+(v!=nil ? v.to_s : "mandatory")+"

" - count += 1 - end + if post_command + raise "not a post command" unless post_command.is_a?(OpenTox::PostCommand) + html += "

POST command

" + html += post_command.to_html end - html += "

Content

" if description || related_links + html += "

Content

" if description || related_links || post_command html += "

" html += text.link_urls html += "

" @@ -81,6 +70,44 @@ module OpenTox html += "

" html end + + class PostAttribute + attr_accessor :name, :is_mandatory, :default, :description + + def initialize(name, is_mandatory=true, default=nil, description=nil) + @name = name + @is_mandatory = is_mandatory + @default = default + @description = description + end + end + + class PostCommand + attr_accessor :attributes, :uri, :name + + def initialize( uri, name="Send" ) + @uri = uri + @name = name + @attributes = [] + end + + def to_html + html = "
" + html << "

" + html << "" + #html << "" + attributes.each do |a| + mandatory_string = a.is_mandatory ? "*" : "" + html << "" + html << "" + html << "" + end + html << "" + html << "
Mandatory params are marked with *.
"+a.name.to_s+":"+mandatory_string+""+a.description.to_s+"

" + html + end + end end get '/sign_out/?' do -- cgit v1.2.3 From d384f80e5fd64dcb8aae803b247d07ca7f33b74e Mon Sep 17 00:00:00 2001 From: mguetlein Date: Fri, 17 Jun 2011 13:51:36 +0200 Subject: make task error msg more verbose --- lib/rest_client_wrapper.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/rest_client_wrapper.rb b/lib/rest_client_wrapper.rb index 53887a2..6d25bb3 100644 --- a/lib/rest_client_wrapper.rb +++ b/lib/rest_client_wrapper.rb @@ -137,7 +137,8 @@ module OpenTox if task.errorReport received_error task.errorReport, task.http_code, nil, {:rest_uri => task.uri, :rest_code => task.http_code} else - raise "task status: '"+task.status.to_s+"' but errorReport nil" + raise "status of task '"+task.uri.to_s+"' is no longer running (hasStatus is '"+task.status+ + "'), but it is neither completed nor has an errorReport" end end -- cgit v1.2.3 From 6ccd8878902fbb23cb06316441912be4b3cbbf66 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Sun, 19 Jun 2011 00:14:48 +0200 Subject: add error msg if feature-rdf type is not set (for maxtox feature) --- lib/feature.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/feature.rb b/lib/feature.rb index f3bec5c..4ba58ce 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -22,6 +22,7 @@ module OpenTox # provides feature type, possible types are "regression" or "classification" # @return [String] feature type, unknown if OT.isA property is unknown/ not set def feature_type + raise OpenTox::BadRequestError.new("rdf type of feature '"+uri.to_s+"' not set") unless metadata[RDF.type] if metadata[RDF.type].flatten.include?(OT.NominalFeature) "classification" elsif metadata[RDF.type].flatten.include?(OT.NumericFeature) -- cgit v1.2.3 From efd57ff4ca8445ac77435a2bdc18207fb0a94d8f Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 20 Jun 2011 16:26:18 +0200 Subject: Removed classification feature --- lib/parser.rb | 71 +++++++++++++++++++++++------------------------------------ 1 file changed, 28 insertions(+), 43 deletions(-) diff --git a/lib/parser.rb b/lib/parser.rb index 90a997b..ffa9ea5 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -293,11 +293,18 @@ module OpenTox regression_features=false 2.upto(book.last_row) { |i| row = book.row(i) - regression_features = detect_regression_features row - break if regression_features==true + row.shift + row.each_index do |i| + value = row[i] + value_maps[value].nil? ? value_maps[value]=0 : value_maps[value] += 1 + if value_maps.size > 5 + regression_features=true + break + end + end } - 2.upto(book.last_row) { |i| add_values book.row(i),regression_features } + 2.upto(book.last_row) { |i| add_values book.row(i) } warnings @dataset end @@ -313,12 +320,20 @@ module OpenTox # AM: fix mixed read in regression_features=false + value_maps= {0} input.each { |row| row = split_row(row) - regression_features = detect_regression_features row - break if regression_features==true + row.shift + row.each_index do |i| + value = row[i] + value_maps[value].nil? ? value_maps[value]=0 : value_maps[value] += 1 + if value_maps.size > 5 + regression_features=true + break + end + end } - input.each { |row| add_values split_row(row),regression_features } + input.each { |row| add_values split_row(row) } warnings @dataset end @@ -365,20 +380,7 @@ module OpenTox end end - def detect_regression_features row - row.shift - regression_features=false - row.each_index do |i| - value = row[i] - type = feature_type(value) - if type == OT.NumericFeature - regression_features=true - end - end - regression_features - end - - def add_values(row, regression_features=false) + def add_values(row) smiles = row.shift compound = Compound.from_smiles(smiles) @@ -396,23 +398,12 @@ module OpenTox @feature_types[feature] << type - if (regression_features) + case type + when OT.NumericFeature val = value.to_f - else - case type - when OT.NominalFeature - case value.to_s - when TRUE_REGEXP - val = true - when FALSE_REGEXP - val = false - end - when OT.NumericFeature - val = value.to_f - when OT.StringFeature - val = value.to_s - @activity_errors << smiles+", "+row.join(", ") - end + when OT.StringFeature + val = value.to_s + @activity_errors << smiles+", "+row.join(", ") end if val!=nil @dataset.add(compound.uri, feature, val) @@ -428,14 +419,8 @@ module OpenTox true if Float(value) rescue false end - def classification?(value) - !value.to_s.strip.match(TRUE_REGEXP).nil? or !value.to_s.strip.match(FALSE_REGEXP).nil? - end - def feature_type(value) - if classification? value - return OT.NominalFeature - elsif numeric? value + if numeric? value return OT.NumericFeature else return OT.StringFeature -- cgit v1.2.3 From 47973d0a325b699ca90407da99ebebc1c6928cb7 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 21 Jun 2011 09:05:42 +0200 Subject: Allowing TAB as separator --- lib/parser.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/parser.rb b/lib/parser.rb index 90a997b..5a3767a 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -443,7 +443,7 @@ module OpenTox end def split_row(row) - row.chomp.gsub(/["']/,'').split(/\s*[,;]\s*/) # remove quotes + row.chomp.gsub(/["']/,'').split(/\s*[,;\t]\s*/) # remove quotes end end -- cgit v1.2.3 From ff5aa4a57aa3fa0a77609933f5c23d8bdcaf6430 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 21 Jun 2011 11:19:35 +0200 Subject: Using Nominal Feature --- lib/parser.rb | 67 +++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/lib/parser.rb b/lib/parser.rb index ffa9ea5..5625f60 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -282,6 +282,15 @@ module OpenTox @duplicates = {} end + def detect_new_values(row, value_maps) + row.shift + row.each_index do |i| + value = row[i] + value_maps[value].nil? ? value_maps[value]=0 : value_maps[value] += 1 + end + value_maps + end + # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help) # @param [Excel] book Excel workbook object (created with roo gem) # @return [OpenTox::Dataset] Dataset object with Excel data @@ -289,22 +298,19 @@ module OpenTox book.default_sheet = 0 add_features book.row(1) - # AM: fix mixed read in regression_features=false + value_maps= {} 2.upto(book.last_row) { |i| row = book.row(i) - row.shift - row.each_index do |i| - value = row[i] - value_maps[value].nil? ? value_maps[value]=0 : value_maps[value] += 1 - if value_maps.size > 5 - regression_features=true - break - end + value_maps=detect_new_values(row, value_maps) + if value_maps.size > 5 # 5 is the maximum nr of classes supported by Fminer. + regression_features=true + break end } - - 2.upto(book.last_row) { |i| add_values book.row(i) } + 2.upto(book.last_row) { |i| + add_values book.row(i), regression_features + } warnings @dataset end @@ -317,23 +323,19 @@ module OpenTox input = csv.split("\n") add_features split_row(input.shift) - - # AM: fix mixed read in regression_features=false - value_maps= {0} + value_maps= {} input.each { |row| row = split_row(row) - row.shift - row.each_index do |i| - value = row[i] - value_maps[value].nil? ? value_maps[value]=0 : value_maps[value] += 1 - if value_maps.size > 5 - regression_features=true - break - end + value_maps=detect_new_values(row, value_maps) + if value_maps.size > 5 # 5 is the maximum nr of classes supported by Fminer. + regression_features=true + break end } - input.each { |row| add_values split_row(row) } + input.each { |row| + add_values split_row(row), regression_features + } warnings @dataset end @@ -380,7 +382,7 @@ module OpenTox end end - def add_values(row) + def add_values(row, regression_features) smiles = row.shift compound = Compound.from_smiles(smiles) @@ -394,16 +396,23 @@ module OpenTox row.each_index do |i| value = row[i] feature = @features[i] - type = feature_type(value) + type = nil + if (regression_features) + type = feature_type(value) + if type != OT.NumericFeature + raise "Error! Expected numeric values." + end + else + type = OT.NominalFeature + end @feature_types[feature] << type case type when OT.NumericFeature val = value.to_f - when OT.StringFeature + when OT.NominalFeature val = value.to_s - @activity_errors << smiles+", "+row.join(", ") end if val!=nil @dataset.add(compound.uri, feature, val) @@ -423,12 +432,12 @@ module OpenTox if numeric? value return OT.NumericFeature else - return OT.StringFeature + return OT.NominalFeature end end def split_row(row) - row.chomp.gsub(/["']/,'').split(/\s*[,;]\s*/) # remove quotes + row.chomp.gsub(/["']/,'').split(/\s*[,;\t]\s*/) # remove quotes end end -- cgit v1.2.3 From 8f938837b503db179bcb03beb12421975501420c Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 21 Jun 2011 14:42:07 +0200 Subject: WMV turned multinomial --- lib/algorithm.rb | 30 +++++++++++++++++------------- lib/model.rb | 15 ++++++++------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 7c1c7a2..bc17087 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -143,21 +143,25 @@ module OpenTox conf = 0.0 confidence = 0.0 neighbors.each do |neighbor| - case neighbor[:activity].to_s - when 'true' - conf += Algorithm.gauss(neighbor[:similarity]) - when 'false' - conf -= Algorithm.gauss(neighbor[:similarity]) - end - end - if conf > 0.0 - prediction = true - elsif conf < 0.0 - prediction = false - else - prediction = nil + conf += neighbor[:activity].to_f * Algorithm.gauss(neighbor[:similarity]).to_f + #case neighbor[:activity].to_s + #when 'true' + # conf += Algorithm.gauss(neighbor[:similarity]) + #when 'false' + # conf -= Algorithm.gauss(neighbor[:similarity]) + #end end confidence = conf/neighbors.size if neighbors.size > 0 + prediction = confidence.round + {:prediction => prediction, :confidence => confidence} + #if conf > 0.0 + # prediction = true + #elsif conf < 0.0 + # prediction = false + #else + # prediction = nil + #end + #confidence = conf/neighbors.size if neighbors.size > 0 {:prediction => prediction, :confidence => confidence.abs} end diff --git a/lib/model.rb b/lib/model.rb index 02fabfa..41d9335 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -91,7 +91,7 @@ module OpenTox include Model include Algorithm - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :balanced def initialize(uri=nil) @@ -116,6 +116,7 @@ module OpenTox @min_sim = 0.3 @prop_kernel = false + @balanced = false end @@ -200,8 +201,8 @@ module OpenTox return @prediction_dataset if database_activity(subjectid) load_metadata(subjectid) - case OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type - when "classification" + if @balanced && OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification" + # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar l = Array.new # larger s = Array.new # smaller fraction @@ -211,12 +212,12 @@ module OpenTox @fingerprints.each do |training_compound,training_features| @activities[training_compound].each do |act| case act.to_s - when "false" + when "0" l << training_compound - when "true" + when "1" s << training_compound else - LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached." + LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached (supports only two classes)." end end end @@ -262,7 +263,7 @@ module OpenTox @neighbors=neighbors_best ### END AM balanced predictions - else # AM: no balancing + else # AM: no balancing or regression LOGGER.info "LAZAR: Unbalanced." neighbors if @prop_kernel && @prediction_algorithm.include?("svm") -- cgit v1.2.3 From b92a57d48fc1a5a648ba4b68957357af95648391 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 21 Jun 2011 16:11:38 +0200 Subject: Fixed prediction --- lib/algorithm.rb | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index bc17087..263d83c 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -141,28 +141,16 @@ module OpenTox # @return [Hash] Hash with keys `:prediction, :confidence` def self.weighted_majority_vote(neighbors,params={}, props=nil) conf = 0.0 + conf_sum = 0.0 confidence = 0.0 neighbors.each do |neighbor| - conf += neighbor[:activity].to_f * Algorithm.gauss(neighbor[:similarity]).to_f - #case neighbor[:activity].to_s - #when 'true' - # conf += Algorithm.gauss(neighbor[:similarity]) - #when 'false' - # conf -= Algorithm.gauss(neighbor[:similarity]) - #end + weight = Algorithm.gauss(neighbor[:similarity]).to_f + conf += neighbor[:activity].to_f * weight + conf_sum += weight end - confidence = conf/neighbors.size if neighbors.size > 0 - prediction = confidence.round + prediction = (conf/conf_sum).round if conf_sum > 0 + confidence = conf_sum/neighbors.size if neighbors.size > 0 {:prediction => prediction, :confidence => confidence} - #if conf > 0.0 - # prediction = true - #elsif conf < 0.0 - # prediction = false - #else - # prediction = nil - #end - #confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence.abs} end # Local support vector regression from neighbors -- cgit v1.2.3 From c6754d38f7c2e653d523c323d50fa0b690fd6968 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 22 Jun 2011 13:01:23 +0000 Subject: load_metadata removed --- lib/model.rb | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index 02fabfa..f959939 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -88,8 +88,8 @@ module OpenTox # Lazy Structure Activity Relationship class class Lazar - include Model include Algorithm + include Model attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map @@ -142,6 +142,18 @@ module OpenTox OpenTox::Model::Lazar.find(model_uri, subjectid) end + def run( params, accept_header=nil, waiting_task=nil ) + unless accept_header + if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host) + accept_header = 'application/x-yaml' + else + accept_header = 'application/rdf+xml' + end + end + LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s + RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s + end + # Get a parameter value # @param [String] param Parameter name # @return [String] Parameter value @@ -199,7 +211,7 @@ module OpenTox return @prediction_dataset if database_activity(subjectid) - load_metadata(subjectid) + #load_metadata(subjectid) case OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type when "classification" # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar -- cgit v1.2.3 From 5d5db79f2b1833e77b9cb5ded5b74835bc99f9c7 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 22 Jun 2011 13:02:49 +0000 Subject: attempt fo fix load_metadata --- lib/parser.rb | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/parser.rb b/lib/parser.rb index 90a997b..79c2017 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -57,12 +57,12 @@ module OpenTox `rapper -i rdfxml -o ntriples #{file.path} 2>/dev/null`.each_line do |line| triple = line.to_triple if triple[0] == @uri - if triple[1] == RDF.type || triple[1]==OT.predictedVariables # allow multiple types + #if triple[1] == RDF.type || triple[1]==OT.predictedVariables # allow multiple types @metadata[triple[1]] = [] unless @metadata[triple[1]] @metadata[triple[1]] << triple[2].split('^^').first - else - @metadata[triple[1]] = triple[2].split('^^').first - end + #else + #@metadata[triple[1]] = triple[2].split('^^').first + #end end statements << triple parameter_ids << triple[2] if triple[1] == OT.parameters @@ -76,6 +76,9 @@ module OpenTox @metadata[OT.parameters] << parameter end end + @metadata.each do |k,v| + v = v.first if v.size == 1 + end @metadata end -- cgit v1.2.3 From 1d3d27cb689db3091c4ac6e429f2b0f5a198dcdf Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 23 Jun 2011 13:16:21 +0000 Subject: lazar predictions fixed --- lib/dataset.rb | 12 +-- lib/model.rb | 247 ++++++++++++++++++++++++++++----------------------------- lib/parser.rb | 14 ++-- 3 files changed, 134 insertions(+), 139 deletions(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index 784bb2a..f13c0d3 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -376,11 +376,14 @@ module OpenTox end def value(compound) - @data_entries[compound.uri].collect{|f,v| v.first if f.match(/value/)}.compact.first + v = nil + v = @data_entries[compound.uri].collect{|f,v| v.first if f.match(/value/)}.compact.first if @data_entries[compound.uri] + v = nil if v.is_a? Array and v.empty? + v end def confidence(compound) - @data_entries[compound.uri].collect{|f,v| v.first if f.match(/confidence/)}.compact.first + @data_entries[compound.uri].collect{|f,v| v.first if f.match(/confidence/)}.compact.first if @data_entries[compound.uri] end def descriptors(compound) @@ -388,12 +391,11 @@ module OpenTox end def measured_activities(compound) - source = @metadata[OT.hasSource] - @data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact.flatten + @data_entries[compound.uri].collect{|f,v| v if f.match(/#{@metadata[OT.hasSource]}/)}.compact.flatten if @data_entries[compound.uri] end def neighbors(compound) - @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact + @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact if @data_entries[compound.uri] end # def errors(compound) diff --git a/lib/model.rb b/lib/model.rb index f959939..e3dce09 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -186,7 +186,7 @@ module OpenTox LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message end end - @prediction_dataset.save(subjectid) + #@prediction_dataset.save(subjectid) @prediction_dataset end @@ -209,141 +209,99 @@ module OpenTox } ) end - return @prediction_dataset if database_activity(subjectid) - - #load_metadata(subjectid) - case OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type - when "classification" - # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar - l = Array.new # larger - s = Array.new # smaller fraction - - raise "no fingerprints in model" if @fingerprints.size==0 - - @fingerprints.each do |training_compound,training_features| - @activities[training_compound].each do |act| - case act.to_s - when "false" - l << training_compound - when "true" - s << training_compound - else - LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached." + unless database_activity(subjectid) # adds database activity to @prediction_dataset + + case OpenTox::Feature.find(@metadata[OT.dependentVariables]).feature_type + when "classification" + # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar + l = Array.new # larger + s = Array.new # smaller fraction + + raise "no fingerprints in model" if @fingerprints.size==0 + + @fingerprints.each do |training_compound,training_features| + @activities[training_compound].each do |act| + case act.to_s + when "false" + l << training_compound + when "true" + s << training_compound + else + LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached." + end end end - end - if s.size > l.size then - l,s = s,l # happy swapping - LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}." - end - # determine ratio - modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest - LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}." - - # AM: Balanced predictions - addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round - slack = (addon!=0 ? modulo[1].divmod(addon)[1] : 0) # what remains for the last round - position = 0 - predictions = Array.new - - prediction_best=nil - neighbors_best=nil - - begin - for i in 1..modulo[0] do - (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction - LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." - neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - if @prop_kernel && @prediction_algorithm.include?("svm") - props = get_props - else - props = nil - end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") - if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs - prediction_best=prediction - neighbors_best=@neighbors + if s.size > l.size then + l,s = s,l # happy swapping + LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}." + end + # determine ratio + modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest + LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}." + + # AM: Balanced predictions + addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round + slack = (addon!=0 ? modulo[1].divmod(addon)[1] : 0) # what remains for the last round + position = 0 + predictions = Array.new + + prediction_best=nil + neighbors_best=nil + + begin + for i in 1..modulo[0] do + (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction + LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." + neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part + if @prop_kernel && @prediction_algorithm.include?("svm") + props = get_props + else + props = nil + end + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") + if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs + prediction_best=prediction + neighbors_best=@neighbors + end + position = position + lr_size end - position = position + lr_size + rescue Exception => e + LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message end - rescue Exception => e - LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message - end - prediction=prediction_best - @neighbors=neighbors_best - ### END AM balanced predictions + prediction=prediction_best + @neighbors=neighbors_best + ### END AM balanced predictions - else # AM: no balancing - LOGGER.info "LAZAR: Unbalanced." - neighbors - if @prop_kernel && @prediction_algorithm.include?("svm") - props = get_props - else - props = nil + else # AM: no balancing + LOGGER.info "LAZAR: Unbalanced." + neighbors + if @prop_kernel && @prediction_algorithm.include?("svm") + props = get_props + else + props = nil + end + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") - end - - value_feature_uri = File.join( @uri, "predicted", "value") - confidence_feature_uri = File.join( @uri, "predicted", "confidence") + + value_feature_uri = File.join( @uri, "predicted", "value") + confidence_feature_uri = File.join( @uri, "predicted", "confidence") - #prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]} - #prediction_feature_uris[value_feature_uri] = nil if @neighbors.size == 0 or prediction[:prediction].nil? + @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] unless @prediction_dataset.metadata[OT.dependentVariables] + @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] unless @prediction_dataset.metadata[OT.predictedVariables] - @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] - @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] - - if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification" - @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]] - else - @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction] - end - @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence] - #prediction_feature_uris.each do |prediction_feature_uri,value| - #@prediction_dataset.add @compound.uri, prediction_feature_uri, @value_map[value] - #end - - if verbose - if @feature_calculation_algorithm == "Substructure.match" - f = 0 - @compound_features.each do |feature| - feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) - features[feature] = feature_uri - @prediction_dataset.add_feature(feature_uri, { - RDF.type => [OT.Substructure], - OT.smarts => feature, - OT.pValue => @p_values[feature], - OT.effect => @effects[feature] - }) - @prediction_dataset.add @compound.uri, feature_uri, true - f+=1 - end + if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification" + @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]] else - @compound_features.each do |feature| - features[feature] = feature - @prediction_dataset.add @compound.uri, feature, true - end + @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction] end - n = 0 - @neighbors.each do |neighbor| - neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s ) - @prediction_dataset.add_feature(neighbor_uri, { - OT.compound => neighbor[:compound], - OT.similarity => neighbor[:similarity], - OT.measuredActivity => neighbor[:activity], - RDF.type => [OT.Neighbor] - }) - @prediction_dataset.add @compound.uri, neighbor_uri, true - f = 0 unless f - neighbor[:features].each do |feature| - if @feature_calculation_algorithm == "Substructure.match" - feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature] - else - feature_uri = feature - end - @prediction_dataset.add neighbor[:compound], feature_uri, true - unless features.has_key? feature + @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence] + + if verbose + if @feature_calculation_algorithm == "Substructure.match" + f = 0 + @compound_features.each do |feature| + feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) features[feature] = feature_uri @prediction_dataset.add_feature(feature_uri, { RDF.type => [OT.Substructure], @@ -351,13 +309,48 @@ module OpenTox OT.pValue => @p_values[feature], OT.effect => @effects[feature] }) + @prediction_dataset.add @compound.uri, feature_uri, true f+=1 end + else + @compound_features.each do |feature| + features[feature] = feature + @prediction_dataset.add @compound.uri, feature, true + end + end + n = 0 + @neighbors.each do |neighbor| + neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s ) + @prediction_dataset.add_feature(neighbor_uri, { + OT.compound => neighbor[:compound], + OT.similarity => neighbor[:similarity], + OT.measuredActivity => neighbor[:activity], + RDF.type => [OT.Neighbor] + }) + @prediction_dataset.add @compound.uri, neighbor_uri, true + f = 0 unless f + neighbor[:features].each do |feature| + if @feature_calculation_algorithm == "Substructure.match" + feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature] + else + feature_uri = feature + end + @prediction_dataset.add neighbor[:compound], feature_uri, true + unless features.has_key? feature + features[feature] = feature_uri + @prediction_dataset.add_feature(feature_uri, { + RDF.type => [OT.Substructure], + OT.smarts => feature, + OT.pValue => @p_values[feature], + OT.effect => @effects[feature] + }) + f+=1 + end + end + n+=1 end - n+=1 end end - #end @prediction_dataset.save(subjectid) @prediction_dataset diff --git a/lib/parser.rb b/lib/parser.rb index 12ab7f3..2ce9467 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -57,12 +57,12 @@ module OpenTox `rapper -i rdfxml -o ntriples #{file.path} 2>/dev/null`.each_line do |line| triple = line.to_triple if triple[0] == @uri - #if triple[1] == RDF.type || triple[1]==OT.predictedVariables # allow multiple types + if triple[1] == RDF.type || triple[1]==OT.predictedVariables # allow multiple types @metadata[triple[1]] = [] unless @metadata[triple[1]] @metadata[triple[1]] << triple[2].split('^^').first - #else - #@metadata[triple[1]] = triple[2].split('^^').first - #end + else + @metadata[triple[1]] = triple[2].split('^^').first + end end statements << triple parameter_ids << triple[2] if triple[1] == OT.parameters @@ -76,9 +76,9 @@ module OpenTox @metadata[OT.parameters] << parameter end end - @metadata.each do |k,v| - v = v.first if v.size == 1 - end + #@metadata.each do |k,v| + #v = v.first if v and v.size == 1 + #end @metadata end -- cgit v1.2.3 From 310500f4d61f92de713577e7a09e9536ff6e7c42 Mon Sep 17 00:00:00 2001 From: am Date: Fri, 24 Jun 2011 13:29:18 +0200 Subject: Restored compatibility behavior: guessing true/false in wmv, fixed regression detection for multicolumn CSVs. Discuss: what about other labels: - remove guessing? - then, how to guarantee bw compat if ordering is lost? exploit already existing alphanum ordering on REGEX patterns? --- lib/algorithm.rb | 45 +++++++++++++++++++++++++++++++++++++-------- lib/model.rb | 6 +++--- lib/parser.rb | 44 +++++++++++++++++++++++++++----------------- 3 files changed, 67 insertions(+), 28 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 263d83c..75252c2 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -140,17 +140,46 @@ module OpenTox # @param [optional] params Ignored (only for compatibility with local_svm_regression) # @return [Hash] Hash with keys `:prediction, :confidence` def self.weighted_majority_vote(neighbors,params={}, props=nil) - conf = 0.0 - conf_sum = 0.0 + neighbor_contribution = 0.0 + confidence_sum = 0.0 confidence = 0.0 + prediction = nil + positive_map_value= nil + negative_map_value= nil + neighbors.each do |neighbor| - weight = Algorithm.gauss(neighbor[:similarity]).to_f - conf += neighbor[:activity].to_f * weight - conf_sum += weight + neighbor_weight = Algorithm.gauss(neighbor[:similarity]).to_f + neighbor_contribution += neighbor[:activity].to_f * neighbor_weight + + if params[:value_map].size == 2 # provide compat to binary classification + map_entry = params[:value_map][neighbor[:activity].to_i].to_s # access original neighbor activity + case map_entry + when TRUE_REGEXP + confidence_sum += neighbor_weight + positive_map_value = neighbor[:activity] + when FALSE_REGEXP + confidence_sum -= neighbor_weight + negative_map_value = neighbor[:activity] + end + else + confidence_sum += neighbor_weight # AM: new multinomial confidence + end end - prediction = (conf/conf_sum).round if conf_sum > 0 - confidence = conf_sum/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence} + + if params[:value_map].size == 2 # provide compat to binary classification + if confidence_sum >= 0.0 + prediction = positive_map_value unless neighbors.size==0 + elsif confidence_sum < 0.0 + prediction = negative_map_value unless neighbors.size==0 + end + else + prediction = (neighbor_contribution/confidence_sum).round unless neighbors.size==0 # AM: new multinomial prediction + end + + confidence = confidence_sum/neighbors.size if neighbors.size > 0 + res = {:prediction => prediction, :confidence => confidence.abs} + puts res.to_yaml + res end # Local support vector regression from neighbors diff --git a/lib/model.rb b/lib/model.rb index 7a44c40..5eec366 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -258,7 +258,7 @@ module OpenTox else props = nil end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props)") if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction neighbors_best=@neighbors @@ -281,7 +281,7 @@ module OpenTox else props = nil end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props)") end value_feature_uri = File.join( @uri, "predicted", "value") @@ -422,7 +422,7 @@ module OpenTox # @return [Boolean] true if compound has databasse activities, false if not def database_activity(subjectid) if @activities[@compound.uri] - @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act } + @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], @value_map[act] } @prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset]) @prediction_dataset.save(subjectid) true diff --git a/lib/parser.rb b/lib/parser.rb index 89fcb71..07bee67 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -289,7 +289,8 @@ module OpenTox row.shift row.each_index do |i| value = row[i] - value_maps[value].nil? ? value_maps[value]=0 : value_maps[value] += 1 + value_maps[i] = Hash.new if value_maps[i].nil? + value_maps[i][value].nil? ? value_maps[i][value]=0 : value_maps[i][value] += 1 end value_maps end @@ -300,19 +301,22 @@ module OpenTox def load_spreadsheet(book) book.default_sheet = 0 add_features book.row(1) + value_maps = Array.new + regression_features=Array.new - regression_features=false - value_maps= {} 2.upto(book.last_row) { |i| row = book.row(i) - value_maps=detect_new_values(row, value_maps) - if value_maps.size > 5 # 5 is the maximum nr of classes supported by Fminer. - regression_features=true - break - end + value_maps = detect_new_values(row, value_maps) + value_maps.each_with_index { |vm,j| + if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer. + regression_features[j]=true + else + regression_features[j]=false + end + } } 2.upto(book.last_row) { |i| - add_values book.row(i), regression_features + add_values book.row(i), regression_features } warnings @dataset @@ -325,16 +329,19 @@ module OpenTox row = 0 input = csv.split("\n") add_features split_row(input.shift) + value_maps = Array.new + regression_features=Array.new - regression_features=false - value_maps= {} input.each { |row| row = split_row(row) - value_maps=detect_new_values(row, value_maps) - if value_maps.size > 5 # 5 is the maximum nr of classes supported by Fminer. - regression_features=true - break - end + value_maps = detect_new_values(row, value_maps) + value_maps.each_with_index { |vm,j| + if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer. + regression_features[j]=true + else + regression_features[j]=false + end + } } input.each { |row| add_values split_row(row), regression_features @@ -385,6 +392,9 @@ module OpenTox end end + # Adds a row to a dataset + # @param Array A row split up as an array + # @param Array Indicator for regression for each field def add_values(row, regression_features) smiles = row.shift @@ -401,7 +411,7 @@ module OpenTox feature = @features[i] type = nil - if (regression_features) + if (regression_features[i]) type = feature_type(value) if type != OT.NumericFeature raise "Error! Expected numeric values." -- cgit v1.2.3 From f9721059cb28c23c10e83dafe7aa58d9cf650746 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 24 Jun 2011 14:57:02 +0200 Subject: Fixed SVM predictions --- lib/algorithm.rb | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 75252c2..1f0ef2a 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -222,7 +222,8 @@ module OpenTox acts = neighbors.collect do |n| act = n[:activity] end # activities of neighbors for supervised learning - acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} +# acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} + acts_f = acts sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, "C-bsvc", params)) @@ -300,7 +301,8 @@ module OpenTox if type == "nu-svr" prediction = @r.p elsif type == "C-bsvc" - prediction = (@r.p.to_f == 1.0 ? true : false) + #prediction = (@r.p.to_f == 1.0 ? true : false) + prediction = @r.p end @r.quit # free R rescue Exception => e @@ -374,7 +376,8 @@ module OpenTox if type == "nu-svr" prediction = @r.p elsif type == "C-bsvc" - prediction = (@r.p.to_f == 1.0 ? true : false) + #prediction = (@r.p.to_f == 1.0 ? true : false) + prediction = @r.p end @r.quit # free R rescue Exception => e -- cgit v1.2.3 From 5ce30c81a486896aca129bf779b917dd93007adc Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 27 Jun 2011 08:54:53 +0200 Subject: Removed map lookup, using sort order for activity --- lib/algorithm.rb | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 1f0ef2a..41e08ab 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -151,26 +151,23 @@ module OpenTox neighbor_weight = Algorithm.gauss(neighbor[:similarity]).to_f neighbor_contribution += neighbor[:activity].to_f * neighbor_weight - if params[:value_map].size == 2 # provide compat to binary classification - map_entry = params[:value_map][neighbor[:activity].to_i].to_s # access original neighbor activity - case map_entry - when TRUE_REGEXP - confidence_sum += neighbor_weight - positive_map_value = neighbor[:activity] - when FALSE_REGEXP + if params[:value_map].size == 2 # AM: provide compat to binary classification: 1=>false 2=>true + case neighbor[:activity] + when 1 confidence_sum -= neighbor_weight - negative_map_value = neighbor[:activity] + when 2 + confidence_sum += neighbor_weight end else - confidence_sum += neighbor_weight # AM: new multinomial confidence + confidence_sum += neighbor_weight end end - if params[:value_map].size == 2 # provide compat to binary classification + if params[:value_map].size == 2 if confidence_sum >= 0.0 - prediction = positive_map_value unless neighbors.size==0 + prediction = 2 unless neighbors.size==0 elsif confidence_sum < 0.0 - prediction = negative_map_value unless neighbors.size==0 + prediction = 1 unless neighbors.size==0 end else prediction = (neighbor_contribution/confidence_sum).round unless neighbors.size==0 # AM: new multinomial prediction -- cgit v1.2.3 From 803b57557a5433cfd940b7e333f5fcfb08a17a25 Mon Sep 17 00:00:00 2001 From: dv Date: Mon, 27 Jun 2011 12:20:56 +0200 Subject: added new confidence calculation to local_svm_regression --- lib/algorithm.rb | 271 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 145 insertions(+), 126 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 41e08ab..d37e49d 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -19,7 +19,7 @@ module OpenTox LOGGER.info "Running algorithm '"+@uri.to_s+"' with params: "+params.inspect RestClientWrapper.post(@uri, params, {:accept => 'text/uri-list'}, waiting_task).to_s end - + # Get OWL-DL representation in RDF/XML format # @return [application/rdf+xml] RDF/XML representation def to_rdfxml @@ -31,7 +31,7 @@ module OpenTox # Generic Algorithm class, should work with all OpenTox webservices class Generic include Algorithm - + # Find Generic Opentox Algorithm via URI, and loads metadata, could raise NotFound/NotAuthorized error # @param [String] uri Algorithm URI # @return [OpenTox::Algorithm::Generic] Algorithm instance @@ -42,7 +42,7 @@ module OpenTox raise "cannot load algorithm metadata" if alg.metadata==nil or alg.metadata.size==0 alg end - + end # Fminer algorithms (https://github.com/amaunz/fminer2) @@ -204,12 +204,31 @@ module OpenTox LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" end - conf = sims.inject{|sum,x| sum + x } - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence} - + begin + sim_median = Algorithm.median(sims) + confidence = nil + if sim_median + @r_sd = RinRuby.new(false,false) + @r_sd.r_regression_acts = acts + standard_diviation = @r_sd.pull "as.numeric(sd(r_regression_acts))"#calculate standard deviation + @r_sd.quit #free R + confidence = sim_median*Math.exp(-standard_diviation) + if confidence.nan? + confidence = nil + end + else + LOGGER.debug "dv ------------ regression sim_median not valid" + end + LOGGER.debug "Confidence is: '" + confidence.to_s + "'." + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + end + res = {:prediction => prediction, :confidence => confidence.abs} + puts res.to_yaml + res end + # Local support vector classification from neighbors # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required @@ -219,7 +238,7 @@ module OpenTox acts = neighbors.collect do |n| act = n[:activity] end # activities of neighbors for supervised learning -# acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} + # acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} acts_f = acts sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin @@ -232,7 +251,7 @@ module OpenTox conf = sims.inject{|sum,x| sum + x } confidence = conf/neighbors.size if neighbors.size > 0 {:prediction => prediction, :confidence => confidence} - + end @@ -247,67 +266,67 @@ module OpenTox # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Numeric] A prediction value. def self.local_svm(neighbors, acts, sims, type, params) - LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." - neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches - gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel - if neighbor_matches.size == 0 - raise "No neighbors found." - else - # gram matrix - (0..(neighbor_matches.length-1)).each do |i| - gram_matrix[i] = [] unless gram_matrix[i] - # upper triangle - ((i+1)..(neighbor_matches.length-1)).each do |j| - sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") - gram_matrix[i][j] = Algorithm.gauss(sim) - gram_matrix[j] = [] unless gram_matrix[j] - gram_matrix[j][i] = gram_matrix[i][j] # lower triangle - end - gram_matrix[i][i] = 1.0 + LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." + neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches + gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + if neighbor_matches.size == 0 + raise "No neighbors found." + else + # gram matrix + (0..(neighbor_matches.length-1)).each do |i| + gram_matrix[i] = [] unless gram_matrix[i] + # upper triangle + ((i+1)..(neighbor_matches.length-1)).each do |j| + sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + gram_matrix[i][j] = Algorithm.gauss(sim) + gram_matrix[j] = [] unless gram_matrix[j] + gram_matrix[j][i] = gram_matrix[i][j] # lower triangle end + gram_matrix[i][i] = 1.0 + end - #LOGGER.debug gram_matrix.to_yaml - @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests - @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed - LOGGER.debug "Setting R data ..." - # set data - @r.gram_matrix = gram_matrix.flatten - @r.n = neighbor_matches.size - @r.y = acts - @r.sims = sims - - begin - LOGGER.debug "Preparing R data ..." - # prepare data - @r.eval "y<-as.vector(y)" - @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" - @r.eval "sims<-as.vector(sims)" - - # model + support vectors - LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)" - @r.eval "sv<-as.vector(SVindex(model))" - @r.eval "sims<-sims[sv]" - @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" - LOGGER.debug "Predicting ..." - if type == "nu-svr" - @r.eval "p<-predict(model,sims)[1,1]" - elsif type == "C-bsvc" - @r.eval "p<-predict(model,sims)" - end - if type == "nu-svr" - prediction = @r.p - elsif type == "C-bsvc" - #prediction = (@r.p.to_f == 1.0 ? true : false) - prediction = @r.p - end - @r.quit # free R - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + #LOGGER.debug gram_matrix.to_yaml + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.gram_matrix = gram_matrix.flatten + @r.n = neighbor_matches.size + @r.y = acts + @r.sims = sims + + begin + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-as.vector(y)" + @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" + @r.eval "sims<-as.vector(sims)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)" + @r.eval "sv<-as.vector(SVindex(model))" + @r.eval "sims<-sims[sv]" + @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" + LOGGER.debug "Predicting ..." + if type == "nu-svr" + @r.eval "p<-predict(model,sims)[1,1]" + elsif type == "C-bsvc" + @r.eval "p<-predict(model,sims)" end - + if type == "nu-svr" + prediction = @r.p + elsif type == "C-bsvc" + #prediction = (@r.p.to_f == 1.0 ? true : false) + prediction = @r.p + end + @r.quit # free R + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" end - prediction + + end + prediction end # Local support vector prediction from neighbors. @@ -321,67 +340,67 @@ module OpenTox # @return [Numeric] A prediction value. def self.local_svm_prop(props, acts, type, params) - LOGGER.debug "Local SVM (Propositionalization / Kernlab Kernel)." - n_prop = props[0] # is a matrix, i.e. two nested Arrays. - q_prop = props[1] # is an Array. + LOGGER.debug "Local SVM (Propositionalization / Kernlab Kernel)." + n_prop = props[0] # is a matrix, i.e. two nested Arrays. + q_prop = props[1] # is an Array. - #neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches - #gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel - if n_prop.size == 0 - raise "No neighbors found." - else - # gram matrix - #(0..(neighbor_matches.length-1)).each do |i| - # gram_matrix[i] = [] unless gram_matrix[i] - # # upper triangle - # ((i+1)..(neighbor_matches.length-1)).each do |j| - # sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") - # gram_matrix[i][j] = Algorithm.gauss(sim) - # gram_matrix[j] = [] unless gram_matrix[j] - # gram_matrix[j][i] = gram_matrix[i][j] # lower triangle - # end - # gram_matrix[i][i] = 1.0 - #end - - #LOGGER.debug gram_matrix.to_yaml - @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests - @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed - LOGGER.debug "Setting R data ..." - # set data - @r.n_prop = n_prop.flatten - @r.n_prop_x_size = n_prop.size - @r.n_prop_y_size = n_prop[0].size - @r.y = acts - @r.q_prop = q_prop - - begin - LOGGER.debug "Preparing R data ..." - # prepare data - @r.eval "y<-matrix(y)" - @r.eval "prop_matrix<-matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=TRUE)" - @r.eval "q_prop<-matrix(q_prop, 1, n_prop_y_size, byrow=TRUE)" - - # model + support vectors - LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(prop_matrix, y, type=\"#{type}\", nu=0.5)" - LOGGER.debug "Predicting ..." - if type == "nu-svr" - @r.eval "p<-predict(model,q_prop)[1,1]" - elsif type == "C-bsvc" - @r.eval "p<-predict(model,q_prop)" - end - if type == "nu-svr" - prediction = @r.p - elsif type == "C-bsvc" - #prediction = (@r.p.to_f == 1.0 ? true : false) - prediction = @r.p - end - @r.quit # free R - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + #neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches + #gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + if n_prop.size == 0 + raise "No neighbors found." + else + # gram matrix + #(0..(neighbor_matches.length-1)).each do |i| + # gram_matrix[i] = [] unless gram_matrix[i] + # # upper triangle + # ((i+1)..(neighbor_matches.length-1)).each do |j| + # sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + # gram_matrix[i][j] = Algorithm.gauss(sim) + # gram_matrix[j] = [] unless gram_matrix[j] + # gram_matrix[j][i] = gram_matrix[i][j] # lower triangle + # end + # gram_matrix[i][i] = 1.0 + #end + + #LOGGER.debug gram_matrix.to_yaml + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.n_prop = n_prop.flatten + @r.n_prop_x_size = n_prop.size + @r.n_prop_y_size = n_prop[0].size + @r.y = acts + @r.q_prop = q_prop + + begin + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-matrix(y)" + @r.eval "prop_matrix<-matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=TRUE)" + @r.eval "q_prop<-matrix(q_prop, 1, n_prop_y_size, byrow=TRUE)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(prop_matrix, y, type=\"#{type}\", nu=0.5)" + LOGGER.debug "Predicting ..." + if type == "nu-svr" + @r.eval "p<-predict(model,q_prop)[1,1]" + elsif type == "C-bsvc" + @r.eval "p<-predict(model,q_prop)" + end + if type == "nu-svr" + prediction = @r.p + elsif type == "C-bsvc" + #prediction = (@r.p.to_f == 1.0 ? true : false) + prediction = @r.p end + @r.quit # free R + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" end - prediction + end + prediction end @@ -404,14 +423,14 @@ module OpenTox def features(dataset_uri,compound_uri) end end - + # Gauss kernel # @return [Float] def self.gauss(x, sigma = 0.3) d = 1.0 - x.to_f Math.exp(-(d*d)/(2*sigma*sigma)) end - + # Median of an array # @param [Array] Array with values # @return [Float] Median -- cgit v1.2.3 From 72c15272a9d7be9959294e70f26f37279f1392fe Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 27 Jun 2011 13:23:16 +0200 Subject: Added support for integer features --- lib/serializer.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/serializer.rb b/lib/serializer.rb index 5a9fd0a..03dcf1f 100644 --- a/lib/serializer.rb +++ b/lib/serializer.rb @@ -383,6 +383,8 @@ module OpenTox XSD.boolean elsif value.is_a? Float XSD.float + elsif value.is_a? Integer + XSD.integer else XSD.string end @@ -393,6 +395,8 @@ module OpenTox datatype = OT.NominalFeature elsif value.is_a? Float datatype = OT.NumericFeature + elsif value.is_a? Integer + datatype = OT.NumericFeature else datatype = OT.StringFeature end -- cgit v1.2.3 From 77978d431208a0aafc7d3c6373c448e8487d75ed Mon Sep 17 00:00:00 2001 From: dv Date: Mon, 27 Jun 2011 16:22:28 +0200 Subject: removed abs bug --- lib/algorithm.rb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index d37e49d..b789a35 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -211,19 +211,20 @@ module OpenTox @r_sd = RinRuby.new(false,false) @r_sd.r_regression_acts = acts standard_diviation = @r_sd.pull "as.numeric(sd(r_regression_acts))"#calculate standard deviation + LOGGER.debug "dv ----------------- sd is: '" + standard_diviation.to_s + "'." @r_sd.quit #free R - confidence = sim_median*Math.exp(-standard_diviation) + confidence = (sim_median*Math.exp(-1*standard_diviation)).abs if confidence.nan? confidence = nil end else - LOGGER.debug "dv ------------ regression sim_median not valid" + LOGGER.debug "dv ------------ sim_median not valid" end LOGGER.debug "Confidence is: '" + confidence.to_s + "'." rescue Exception => e LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" end - res = {:prediction => prediction, :confidence => confidence.abs} + res = {:prediction => prediction, :confidence => confidence} puts res.to_yaml res end -- cgit v1.2.3 From 0233e13d9f850139ffbc22eea710da230027aaef Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 28 Jun 2011 08:36:50 +0200 Subject: Added routines --- lib/algorithm.rb | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 41e08ab..fc1d451 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -422,5 +422,46 @@ module OpenTox return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos])/2 end + # Sum of an array + # @param [Array] Array with values + # @return [Integer] Sum of values + def self.sum(array) + array.inject{|s,x| s + x } + end + + # Minimum Frequency + # @param [Integer] per-mil value + # return [Integer] min-frequency + def self.min_frequency(training_dataset,per_mil) + minfreq = per_mil*training_dataset.compounds.size/1000 # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST + minfreq = 2 unless minfreq > 2 + minfreq + end + + # Effect calculation for classification + # @param [Array] Array of occurrence counts of a feature. + # @param [Array] Array of database instance counts. + def self.effect(occurrences, db_instances) + max=nil + max_value=0 + nr_o = sum(occurrences) + nr_db = sum(db_instances) + + occurrences.each_with_index { |o,i| # fminer outputs occurrences sorted reverse by activity. + actual = o.to_f/nr_o + expected = db_instances[i].to_f/nr_db + if actual > expected + if ((actual - expected) / actual) > max_value + max_value = (actual - expected) / actual # 'Schleppzeiger' + max = i + end + end + } + max + end + + end end + + -- cgit v1.2.3 From d7628d7a7c83eef7572271ed19bae6277daa540c Mon Sep 17 00:00:00 2001 From: dv Date: Tue, 28 Jun 2011 11:25:26 +0200 Subject: undo auto indent --- lib/algorithm.rb | 245 +++++++++++++++++++++++++++---------------------------- 1 file changed, 122 insertions(+), 123 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index b789a35..1b9461e 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -19,7 +19,7 @@ module OpenTox LOGGER.info "Running algorithm '"+@uri.to_s+"' with params: "+params.inspect RestClientWrapper.post(@uri, params, {:accept => 'text/uri-list'}, waiting_task).to_s end - + # Get OWL-DL representation in RDF/XML format # @return [application/rdf+xml] RDF/XML representation def to_rdfxml @@ -31,7 +31,7 @@ module OpenTox # Generic Algorithm class, should work with all OpenTox webservices class Generic include Algorithm - + # Find Generic Opentox Algorithm via URI, and loads metadata, could raise NotFound/NotAuthorized error # @param [String] uri Algorithm URI # @return [OpenTox::Algorithm::Generic] Algorithm instance @@ -42,7 +42,7 @@ module OpenTox raise "cannot load algorithm metadata" if alg.metadata==nil or alg.metadata.size==0 alg end - + end # Fminer algorithms (https://github.com/amaunz/fminer2) @@ -229,7 +229,6 @@ module OpenTox res end - # Local support vector classification from neighbors # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required @@ -239,7 +238,7 @@ module OpenTox acts = neighbors.collect do |n| act = n[:activity] end # activities of neighbors for supervised learning - # acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} +# acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} acts_f = acts sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin @@ -252,7 +251,7 @@ module OpenTox conf = sims.inject{|sum,x| sum + x } confidence = conf/neighbors.size if neighbors.size > 0 {:prediction => prediction, :confidence => confidence} - + end @@ -267,67 +266,67 @@ module OpenTox # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Numeric] A prediction value. def self.local_svm(neighbors, acts, sims, type, params) - LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." - neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches - gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel - if neighbor_matches.size == 0 - raise "No neighbors found." - else - # gram matrix - (0..(neighbor_matches.length-1)).each do |i| - gram_matrix[i] = [] unless gram_matrix[i] - # upper triangle - ((i+1)..(neighbor_matches.length-1)).each do |j| - sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") - gram_matrix[i][j] = Algorithm.gauss(sim) - gram_matrix[j] = [] unless gram_matrix[j] - gram_matrix[j][i] = gram_matrix[i][j] # lower triangle + LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." + neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches + gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + if neighbor_matches.size == 0 + raise "No neighbors found." + else + # gram matrix + (0..(neighbor_matches.length-1)).each do |i| + gram_matrix[i] = [] unless gram_matrix[i] + # upper triangle + ((i+1)..(neighbor_matches.length-1)).each do |j| + sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + gram_matrix[i][j] = Algorithm.gauss(sim) + gram_matrix[j] = [] unless gram_matrix[j] + gram_matrix[j][i] = gram_matrix[i][j] # lower triangle + end + gram_matrix[i][i] = 1.0 end - gram_matrix[i][i] = 1.0 - end - #LOGGER.debug gram_matrix.to_yaml - @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests - @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed - LOGGER.debug "Setting R data ..." - # set data - @r.gram_matrix = gram_matrix.flatten - @r.n = neighbor_matches.size - @r.y = acts - @r.sims = sims - - begin - LOGGER.debug "Preparing R data ..." - # prepare data - @r.eval "y<-as.vector(y)" - @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" - @r.eval "sims<-as.vector(sims)" - - # model + support vectors - LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)" - @r.eval "sv<-as.vector(SVindex(model))" - @r.eval "sims<-sims[sv]" - @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" - LOGGER.debug "Predicting ..." - if type == "nu-svr" - @r.eval "p<-predict(model,sims)[1,1]" - elsif type == "C-bsvc" - @r.eval "p<-predict(model,sims)" - end - if type == "nu-svr" - prediction = @r.p - elsif type == "C-bsvc" - #prediction = (@r.p.to_f == 1.0 ? true : false) - prediction = @r.p + #LOGGER.debug gram_matrix.to_yaml + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.gram_matrix = gram_matrix.flatten + @r.n = neighbor_matches.size + @r.y = acts + @r.sims = sims + + begin + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-as.vector(y)" + @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" + @r.eval "sims<-as.vector(sims)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)" + @r.eval "sv<-as.vector(SVindex(model))" + @r.eval "sims<-sims[sv]" + @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" + LOGGER.debug "Predicting ..." + if type == "nu-svr" + @r.eval "p<-predict(model,sims)[1,1]" + elsif type == "C-bsvc" + @r.eval "p<-predict(model,sims)" + end + if type == "nu-svr" + prediction = @r.p + elsif type == "C-bsvc" + #prediction = (@r.p.to_f == 1.0 ? true : false) + prediction = @r.p + end + @r.quit # free R + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" end - @r.quit # free R - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" - end - end - prediction + end + prediction end # Local support vector prediction from neighbors. @@ -341,67 +340,67 @@ module OpenTox # @return [Numeric] A prediction value. def self.local_svm_prop(props, acts, type, params) - LOGGER.debug "Local SVM (Propositionalization / Kernlab Kernel)." - n_prop = props[0] # is a matrix, i.e. two nested Arrays. - q_prop = props[1] # is an Array. + LOGGER.debug "Local SVM (Propositionalization / Kernlab Kernel)." + n_prop = props[0] # is a matrix, i.e. two nested Arrays. + q_prop = props[1] # is an Array. - #neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches - #gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel - if n_prop.size == 0 - raise "No neighbors found." - else - # gram matrix - #(0..(neighbor_matches.length-1)).each do |i| - # gram_matrix[i] = [] unless gram_matrix[i] - # # upper triangle - # ((i+1)..(neighbor_matches.length-1)).each do |j| - # sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") - # gram_matrix[i][j] = Algorithm.gauss(sim) - # gram_matrix[j] = [] unless gram_matrix[j] - # gram_matrix[j][i] = gram_matrix[i][j] # lower triangle - # end - # gram_matrix[i][i] = 1.0 - #end - - #LOGGER.debug gram_matrix.to_yaml - @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests - @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed - LOGGER.debug "Setting R data ..." - # set data - @r.n_prop = n_prop.flatten - @r.n_prop_x_size = n_prop.size - @r.n_prop_y_size = n_prop[0].size - @r.y = acts - @r.q_prop = q_prop - - begin - LOGGER.debug "Preparing R data ..." - # prepare data - @r.eval "y<-matrix(y)" - @r.eval "prop_matrix<-matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=TRUE)" - @r.eval "q_prop<-matrix(q_prop, 1, n_prop_y_size, byrow=TRUE)" - - # model + support vectors - LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(prop_matrix, y, type=\"#{type}\", nu=0.5)" - LOGGER.debug "Predicting ..." - if type == "nu-svr" - @r.eval "p<-predict(model,q_prop)[1,1]" - elsif type == "C-bsvc" - @r.eval "p<-predict(model,q_prop)" - end - if type == "nu-svr" - prediction = @r.p - elsif type == "C-bsvc" - #prediction = (@r.p.to_f == 1.0 ? true : false) - prediction = @r.p + #neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches + #gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + if n_prop.size == 0 + raise "No neighbors found." + else + # gram matrix + #(0..(neighbor_matches.length-1)).each do |i| + # gram_matrix[i] = [] unless gram_matrix[i] + # # upper triangle + # ((i+1)..(neighbor_matches.length-1)).each do |j| + # sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + # gram_matrix[i][j] = Algorithm.gauss(sim) + # gram_matrix[j] = [] unless gram_matrix[j] + # gram_matrix[j][i] = gram_matrix[i][j] # lower triangle + # end + # gram_matrix[i][i] = 1.0 + #end + + #LOGGER.debug gram_matrix.to_yaml + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.n_prop = n_prop.flatten + @r.n_prop_x_size = n_prop.size + @r.n_prop_y_size = n_prop[0].size + @r.y = acts + @r.q_prop = q_prop + + begin + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-matrix(y)" + @r.eval "prop_matrix<-matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=TRUE)" + @r.eval "q_prop<-matrix(q_prop, 1, n_prop_y_size, byrow=TRUE)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(prop_matrix, y, type=\"#{type}\", nu=0.5)" + LOGGER.debug "Predicting ..." + if type == "nu-svr" + @r.eval "p<-predict(model,q_prop)[1,1]" + elsif type == "C-bsvc" + @r.eval "p<-predict(model,q_prop)" + end + if type == "nu-svr" + prediction = @r.p + elsif type == "C-bsvc" + #prediction = (@r.p.to_f == 1.0 ? true : false) + prediction = @r.p + end + @r.quit # free R + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" end - @r.quit # free R - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" end - end - prediction + prediction end @@ -424,14 +423,14 @@ module OpenTox def features(dataset_uri,compound_uri) end end - + # Gauss kernel # @return [Float] def self.gauss(x, sigma = 0.3) d = 1.0 - x.to_f Math.exp(-(d*d)/(2*sigma*sigma)) end - + # Median of an array # @param [Array] Array with values # @return [Float] Median -- cgit v1.2.3 From 412d036b1694faacc9a6d6ab53fc989be3e625b4 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 28 Jun 2011 11:38:44 +0200 Subject: Utility functions for effect calculation --- lib/algorithm.rb | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index fc1d451..baed7bf 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -174,9 +174,7 @@ module OpenTox end confidence = confidence_sum/neighbors.size if neighbors.size > 0 - res = {:prediction => prediction, :confidence => confidence.abs} - puts res.to_yaml - res + return {:prediction => prediction, :confidence => confidence.abs} end # Local support vector regression from neighbors @@ -422,13 +420,23 @@ module OpenTox return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos])/2 end - # Sum of an array + # Sum of an array for Numeric values # @param [Array] Array with values # @return [Integer] Sum of values def self.sum(array) array.inject{|s,x| s + x } end + # Sum of an array for Arrays. + # @param [Array] Array with values + # @return [Integer] Sum of size of values + def self.sum_size(array) + sum=0 + array.each { |e| sum += e.size } + return sum + end + + # Minimum Frequency # @param [Integer] per-mil value # return [Integer] min-frequency @@ -439,20 +447,20 @@ module OpenTox end # Effect calculation for classification - # @param [Array] Array of occurrence counts of a feature. - # @param [Array] Array of database instance counts. + # @param [Array] Array of occurrences per class in the form of Enumerables. + # @param [Array] Array of database instance counts per class. def self.effect(occurrences, db_instances) max=nil max_value=0 - nr_o = sum(occurrences) - nr_db = sum(db_instances) + nr_o = self.sum_size(occurrences) + nr_db = self.sum(db_instances) occurrences.each_with_index { |o,i| # fminer outputs occurrences sorted reverse by activity. - actual = o.to_f/nr_o + actual = o.size.to_f/nr_o expected = db_instances[i].to_f/nr_db if actual > expected if ((actual - expected) / actual) > max_value - max_value = (actual - expected) / actual # 'Schleppzeiger' + max_value = (actual - expected) / actual # 'Schleppzeiger' max = i end end -- cgit v1.2.3 From 89d053e7ffa4aa234722316939ee494fe380e623 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 28 Jun 2011 15:24:11 +0200 Subject: Initializing effect to 0 --- lib/algorithm.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index baed7bf..45b45ee 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -450,7 +450,7 @@ module OpenTox # @param [Array] Array of occurrences per class in the form of Enumerables. # @param [Array] Array of database instance counts per class. def self.effect(occurrences, db_instances) - max=nil + max=0 max_value=0 nr_o = self.sum_size(occurrences) nr_db = self.sum(db_instances) -- cgit v1.2.3 From 8506bf2f365bba1ecf546683f001aece5ee3430f Mon Sep 17 00:00:00 2001 From: dv Date: Tue, 28 Jun 2011 17:20:27 +0200 Subject: some fixes --- lib/algorithm.rb | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 1b9461e..cba52e5 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -206,19 +206,18 @@ module OpenTox begin sim_median = Algorithm.median(sims) - confidence = nil - if sim_median + #confidence = nil + if sim_median.nil? + LOGGER.debug "dv ------------ sim_median is nil" + else @r_sd = RinRuby.new(false,false) @r_sd.r_regression_acts = acts - standard_diviation = @r_sd.pull "as.numeric(sd(r_regression_acts))"#calculate standard deviation - LOGGER.debug "dv ----------------- sd is: '" + standard_diviation.to_s + "'." + standard_deviation = @r_sd.pull "as.numeric(sd(r_regression_acts))"#calculate standard deviation @r_sd.quit #free R - confidence = (sim_median*Math.exp(-1*standard_diviation)).abs + confidence = (sim_median*Math.exp(-1*standard_deviation)).abs if confidence.nan? confidence = nil end - else - LOGGER.debug "dv ------------ sim_median not valid" end LOGGER.debug "Confidence is: '" + confidence.to_s + "'." rescue Exception => e -- cgit v1.2.3 From 1bd5be35590449413297f0b49ef51a437e1cf8d9 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 29 Jun 2011 10:34:44 +0200 Subject: Added unification routines for fminer --- lib/algorithm.rb | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 6 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 45b45ee..6c4134b 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -46,12 +46,87 @@ module OpenTox end # Fminer algorithms (https://github.com/amaunz/fminer2) - module Fminer + class Fminer include Algorithm + attr_accessor :prediction_feature, :training_dataset, :minfreq, :compounds, :db_class_sizes, :all_activities, :smi + + def check_params(params,per_mil) + raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? + raise OpenTox::NotFoundError.new "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? + @prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid + @training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid + raise OpenTox::NotFoundError.new "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless @training_dataset.features and @training_dataset.features.include?(params[:prediction_feature]) + + unless params[:min_frequency].nil? + @minfreq=params[:min_frequency].to_i + raise "Minimum frequency must be a number >0!" unless @minfreq>0 + else + @minfreq=OpenTox::Algorithm.min_frequency(@training_dataset,per_mil) # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST + end + end + + def add_fminer_data(fminer_instance, params, value_map) + + id = 1 # fminer start id is not 0 + @training_dataset.data_entries.each do |compound,entry| + begin + smiles = OpenTox::Compound.smiles(compound.to_s) + rescue + LOGGER.warn "No resource for #{compound.to_s}" + next + end + if smiles == '' or smiles.nil? + LOGGER.warn "Cannot find smiles for #{compound.to_s}." + next + end + + # AM: take log if appropriate + take_logs=true + entry.each do |feature,values| + values.each do |value| + if @prediction_feature.feature_type == "regression" + if (! value.nil?) && (value.to_f <= 0) + take_logs=false + end + end + end + end + + value_map=params[:value_map] unless params[:value_map].nil? + entry.each do |feature,values| + if feature == @prediction_feature.uri + values.each do |value| + if value.nil? + LOGGER.warn "No #{feature} activity for #{compound.to_s}." + else + if @prediction_feature.feature_type == "classification" + activity= value_map.invert[value].to_i # activities are mapped to 1..n + @db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect + elsif @prediction_feature.feature_type == "regression" + activity= take_logs ? Math.log10(value.to_f) : value.to_f + end + begin + fminer_instance.AddCompound(smiles,id) + fminer_instance.AddActivity(activity, id) + @all_activities[id]=activity # DV: insert global information + @compounds[id] = compound + @smi[id] = smiles + id += 1 + rescue Exception => e + LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" + LOGGER.warn e.backtrace + end + end + end + end + end + end + end + + end # Backbone Refinement Class mining (http://bbrc.maunz.de/) - class BBRC - include Fminer + class BBRC < Fminer # Initialize bbrc algorithm def initialize(subjectid=nil) super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc") @@ -60,8 +135,7 @@ module OpenTox end # LAtent STructure Pattern Mining (http://last-pm.maunz.de) - class LAST - include Fminer + class LAST < Fminer # Initialize last algorithm def initialize(subjectid=nil) super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/last") @@ -69,7 +143,6 @@ module OpenTox end end - end # Create lazar prediction model class Lazar -- cgit v1.2.3 From 3e6b6940797a1b118027964796c1abc10d5c3afa Mon Sep 17 00:00:00 2001 From: dv Date: Wed, 29 Jun 2011 16:01:23 +0200 Subject: standard deviation in ruby (without r) --- lib/algorithm.rb | 41 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index cba52e5..5a1da60 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -210,10 +210,12 @@ module OpenTox if sim_median.nil? LOGGER.debug "dv ------------ sim_median is nil" else - @r_sd = RinRuby.new(false,false) - @r_sd.r_regression_acts = acts - standard_deviation = @r_sd.pull "as.numeric(sd(r_regression_acts))"#calculate standard deviation - @r_sd.quit #free R + #@r_sd = RinRuby.new(false,false) + #@r_sd.r_regression_acts = acts + #standard_deviation = @r_sd.pull "as.numeric(sd(r_regression_acts))"#calculate standard deviation + #@r_sd.quit #free R + standard_deviation = acts.std_dev + LOGGER.debug "dv ------------ sd: #{standard_deviation}" confidence = (sim_median*Math.exp(-1*standard_deviation)).abs if confidence.nan? confidence = nil @@ -440,5 +442,36 @@ module OpenTox return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos])/2 end + # Adds mean calculation to Array class + #class Array; def mean; sum.to_f / size.to_f; end; end + + # Calculation of standard deviation + # @param [Array] Array with values + # @return [Float] variance + #def self.variance(array) + # return nil if array.empty? + # mean = array.mean + # return array.inject(0.0) {|s,x| s + (x - mean)**2} + #end + module Variance + def sum(&blk) + map(&blk).inject { |sum, element| sum + element } + end + + def mean + (sum.to_f / size.to_f) + end + + def variance + m = mean + sum { |i| ( i - m )**2 } / size + end + + def std_dev + Math.sqrt(variance) + end + end + Array.send :include, Variance + end end -- cgit v1.2.3 From 50d35c614cc0fb2cfb6f44f3c8711a1a0cd97d8d Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 4 Jul 2011 08:41:24 +0200 Subject: Added switch for MLR --- lib/algorithm.rb | 47 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 6c4134b..af05376 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -401,19 +401,6 @@ module OpenTox if n_prop.size == 0 raise "No neighbors found." else - # gram matrix - #(0..(neighbor_matches.length-1)).each do |i| - # gram_matrix[i] = [] unless gram_matrix[i] - # # upper triangle - # ((i+1)..(neighbor_matches.length-1)).each do |j| - # sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") - # gram_matrix[i][j] = Algorithm.gauss(sim) - # gram_matrix[j] = [] unless gram_matrix[j] - # gram_matrix[j][i] = gram_matrix[i][j] # lower triangle - # end - # gram_matrix[i][i] = 1.0 - #end - #LOGGER.debug gram_matrix.to_yaml @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed @@ -455,7 +442,41 @@ module OpenTox prediction end + # Local multi-linear regression (MLR) prediction from neighbors. + # Uses propositionalized setting. + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Array] acts, activities for neighbors. + # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @return [Numeric] A prediction value. + def local_mlr_prop + LOGGER.debug "Local MLR (Propositionalization / GSL)." + n_prop = props[0] # is a matrix, i.e. two nested Arrays. + q_prop = props[1] # is an Array. + + if n_prop.size == 0 + raise "No neighbors found." + else + begin + LOGGER.debug "Setting GSL data ..." + # set data + prop_matrix = GSL::Matrix[n_prop] + n_prop_x_size = n_prop.size + n_prop_y_size = n_prop[0].size + y = GSL::Vector[acts] + q_prop = GSL::Vector[q_prop] + # model + support vectors + LOGGER.debug "Creating MLR model ..." + work = GSL::MultiFit::Workspace.alloc(n_prop_y_size,n_prop_x_size) + [c, cov, chisq, status] = GSL::MultiFit::linear(prop_matrix, y, work) + LOGGER.debug "Predicting ..." + prediction = GSL::Multifit::linear_est(q_prop, c, cov) + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + end + end + prediction end module Substructure -- cgit v1.2.3 From 29ac11c8cf857a45bc5be33e465fdb6d14e47395 Mon Sep 17 00:00:00 2001 From: dv Date: Mon, 4 Jul 2011 10:44:42 +0200 Subject: Merged with resent dev branch --- lib/algorithm.rb | 156 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 131 insertions(+), 25 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 5a1da60..2b6086b 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -46,12 +46,87 @@ module OpenTox end # Fminer algorithms (https://github.com/amaunz/fminer2) - module Fminer + class Fminer include Algorithm + attr_accessor :prediction_feature, :training_dataset, :minfreq, :compounds, :db_class_sizes, :all_activities, :smi + + def check_params(params,per_mil) + raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? + raise OpenTox::NotFoundError.new "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? + @prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid + @training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid + raise OpenTox::NotFoundError.new "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless @training_dataset.features and @training_dataset.features.include?(params[:prediction_feature]) + + unless params[:min_frequency].nil? + @minfreq=params[:min_frequency].to_i + raise "Minimum frequency must be a number >0!" unless @minfreq>0 + else + @minfreq=OpenTox::Algorithm.min_frequency(@training_dataset,per_mil) # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST + end + end + + def add_fminer_data(fminer_instance, params, value_map) + + id = 1 # fminer start id is not 0 + @training_dataset.data_entries.each do |compound,entry| + begin + smiles = OpenTox::Compound.smiles(compound.to_s) + rescue + LOGGER.warn "No resource for #{compound.to_s}" + next + end + if smiles == '' or smiles.nil? + LOGGER.warn "Cannot find smiles for #{compound.to_s}." + next + end + + # AM: take log if appropriate + take_logs=true + entry.each do |feature,values| + values.each do |value| + if @prediction_feature.feature_type == "regression" + if (! value.nil?) && (value.to_f <= 0) + take_logs=false + end + end + end + end + + value_map=params[:value_map] unless params[:value_map].nil? + entry.each do |feature,values| + if feature == @prediction_feature.uri + values.each do |value| + if value.nil? + LOGGER.warn "No #{feature} activity for #{compound.to_s}." + else + if @prediction_feature.feature_type == "classification" + activity= value_map.invert[value].to_i # activities are mapped to 1..n + @db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect + elsif @prediction_feature.feature_type == "regression" + activity= take_logs ? Math.log10(value.to_f) : value.to_f + end + begin + fminer_instance.AddCompound(smiles,id) + fminer_instance.AddActivity(activity, id) + @all_activities[id]=activity # DV: insert global information + @compounds[id] = compound + @smi[id] = smiles + id += 1 + rescue Exception => e + LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" + LOGGER.warn e.backtrace + end + end + end + end + end + end + end + + end # Backbone Refinement Class mining (http://bbrc.maunz.de/) - class BBRC - include Fminer + class BBRC < Fminer # Initialize bbrc algorithm def initialize(subjectid=nil) super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc") @@ -60,8 +135,7 @@ module OpenTox end # LAtent STructure Pattern Mining (http://last-pm.maunz.de) - class LAST - include Fminer + class LAST < Fminer # Initialize last algorithm def initialize(subjectid=nil) super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/last") @@ -69,7 +143,6 @@ module OpenTox end end - end # Create lazar prediction model class Lazar @@ -174,9 +247,7 @@ module OpenTox end confidence = confidence_sum/neighbors.size if neighbors.size > 0 - res = {:prediction => prediction, :confidence => confidence.abs} - puts res.to_yaml - res + return {:prediction => prediction, :confidence => confidence.abs} end # Local support vector regression from neighbors @@ -225,9 +296,7 @@ module OpenTox rescue Exception => e LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" end - res = {:prediction => prediction, :confidence => confidence} - puts res.to_yaml - res + return {:prediction => prediction, :confidence => confidence} end # Local support vector classification from neighbors @@ -442,36 +511,73 @@ module OpenTox return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos])/2 end - # Adds mean calculation to Array class - #class Array; def mean; sum.to_f / size.to_f; end; end + # Sum of an array for Numeric values + # @param [Array] Array with values + # @return [Integer] Sum of values + def self.sum(array) + array.inject{|s,x| s + x } + end - # Calculation of standard deviation + # Sum of an array for Arrays. # @param [Array] Array with values - # @return [Float] variance - #def self.variance(array) - # return nil if array.empty? - # mean = array.mean - # return array.inject(0.0) {|s,x| s + (x - mean)**2} - #end + # @return [Integer] Sum of size of values + def self.sum_size(array) + sum=0 + array.each { |e| sum += e.size } + return sum + end + + + # Minimum Frequency + # @param [Integer] per-mil value + # return [Integer] min-frequency + def self.min_frequency(training_dataset,per_mil) + minfreq = per_mil*training_dataset.compounds.size/1000 # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST + minfreq = 2 unless minfreq > 2 + minfreq + end + + # Effect calculation for classification + # @param [Array] Array of occurrences per class in the form of Enumerables. + # @param [Array] Array of database instance counts per class. + def self.effect(occurrences, db_instances) + max=0 + max_value=0 + nr_o = self.sum_size(occurrences) + nr_db = self.sum(db_instances) + + occurrences.each_with_index { |o,i| # fminer outputs occurrences sorted reverse by activity. + actual = o.size.to_f/nr_o + expected = db_instances[i].to_f/nr_db + if actual > expected + if ((actual - expected) / actual) > max_value + max_value = (actual - expected) / actual # 'Schleppzeiger' + max = i + end + end + } + max + end + + # Adds variance, mean and standard deviation calculation to Array class module Variance def sum(&blk) map(&blk).inject { |sum, element| sum + element } end - def mean (sum.to_f / size.to_f) end - def variance m = mean sum { |i| ( i - m )**2 } / size end - def std_dev Math.sqrt(variance) end end Array.send :include, Variance - + end end + + -- cgit v1.2.3 From ebb9427120e8100d94435851a66ae76dc6d5a22c Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 4 Jul 2011 11:05:34 +0200 Subject: MLR integration finished --- Rakefile | 1 + lib/algorithm.rb | 215 ++++++++++++++++++++++++++++++---------------------- lib/model.rb | 4 +- lib/opentox-ruby.rb | 2 +- 4 files changed, 129 insertions(+), 93 deletions(-) diff --git a/Rakefile b/Rakefile index 834e0a3..bd22c16 100644 --- a/Rakefile +++ b/Rakefile @@ -43,6 +43,7 @@ begin gem.add_dependency "dm-validations", "=1.1.0" gem.add_dependency "dm-sqlite-adapter", "=1.1.0" gem.add_dependency "ruby-plot", "=0.5.0" + gem.add_dependency "gsl", "=1.14.7" gem.add_development_dependency 'jeweler' gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore'] diff --git a/lib/algorithm.rb b/lib/algorithm.rb index af05376..bfa79d3 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -208,6 +208,75 @@ module OpenTox module Neighbors + # Local multi-linear regression (MLR) prediction from neighbors. + # Uses propositionalized setting. + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @return [Numeric] A prediction value. + def self.local_mlr_prop(neighbors, params, props) + + take_logs=true + + neighbors.each do |n| + if (! n[:activity].nil?) && (n[:activity].to_f < 0.0) + take_logs = false + end + end + + acts = neighbors.collect do |n| + act = n[:activity] + take_logs ? Math.log10(act.to_f) : act.to_f + end # activities of neighbors for supervised learning + + + begin + + LOGGER.debug "Local MLR (Propositionalization / GSL)." + n_prop = props[0] # is a matrix, i.e. two nested Arrays. + q_prop = props[1] # is an Array. + n_prop_x_size = n_prop[0].size + n_prop_y_size = n_prop.size + + n_prop.flatten! + y_x_rel = n_prop_y_size.to_f / n_prop_x_size + repeat_factor = (1/y_x_rel).ceil + n_prop_tmp = Array.new ; repeat_factor.times { n_prop_tmp.concat n_prop } ; n_prop = n_prop_tmp + acts_tmp = Array.new ; repeat_factor.times { acts_tmp.concat acts } ; acts = acts_tmp + + if n_prop.size == 0 + raise "No neighbors found." + else + begin + LOGGER.debug "Setting GSL data ..." + # set data + prop_matrix = GSL::Matrix[n_prop, n_prop_y_size * repeat_factor, n_prop_x_size] + y = GSL::Vector[acts] + q_prop = GSL::Vector[q_prop] + + # model + support vectors + LOGGER.debug "Creating MLR model ..." + work = GSL::MultiFit::Workspace.alloc(n_prop_y_size * repeat_factor, n_prop_x_size) + c, cov, chisq, status = GSL::MultiFit::linear(prop_matrix, y, work) + LOGGER.debug "Predicting ..." + prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0] + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + end + end + + prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + end + + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + conf = sims.inject{|sum,x| sum + x } + confidence = conf/neighbors.size if neighbors.size > 0 + {:prediction => prediction, :confidence => confidence} + end + # Classification with majority vote from neighbors weighted by similarity # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity` # @param [optional] params Ignored (only for compatibility with local_svm_regression) @@ -318,67 +387,67 @@ module OpenTox # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Numeric] A prediction value. def self.local_svm(neighbors, acts, sims, type, params) - LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." - neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches - gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel - if neighbor_matches.size == 0 - raise "No neighbors found." - else - # gram matrix - (0..(neighbor_matches.length-1)).each do |i| - gram_matrix[i] = [] unless gram_matrix[i] - # upper triangle - ((i+1)..(neighbor_matches.length-1)).each do |j| - sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") - gram_matrix[i][j] = Algorithm.gauss(sim) - gram_matrix[j] = [] unless gram_matrix[j] - gram_matrix[j][i] = gram_matrix[i][j] # lower triangle - end - gram_matrix[i][i] = 1.0 + LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." + neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches + gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + if neighbor_matches.size == 0 + raise "No neighbors found." + else + # gram matrix + (0..(neighbor_matches.length-1)).each do |i| + gram_matrix[i] = [] unless gram_matrix[i] + # upper triangle + ((i+1)..(neighbor_matches.length-1)).each do |j| + sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + gram_matrix[i][j] = Algorithm.gauss(sim) + gram_matrix[j] = [] unless gram_matrix[j] + gram_matrix[j][i] = gram_matrix[i][j] # lower triangle end + gram_matrix[i][i] = 1.0 + end - #LOGGER.debug gram_matrix.to_yaml - @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests - @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed - LOGGER.debug "Setting R data ..." - # set data - @r.gram_matrix = gram_matrix.flatten - @r.n = neighbor_matches.size - @r.y = acts - @r.sims = sims + #LOGGER.debug gram_matrix.to_yaml + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.gram_matrix = gram_matrix.flatten + @r.n = neighbor_matches.size + @r.y = acts + @r.sims = sims - begin - LOGGER.debug "Preparing R data ..." - # prepare data - @r.eval "y<-as.vector(y)" - @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" - @r.eval "sims<-as.vector(sims)" - - # model + support vectors - LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)" - @r.eval "sv<-as.vector(SVindex(model))" - @r.eval "sims<-sims[sv]" - @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" - LOGGER.debug "Predicting ..." - if type == "nu-svr" - @r.eval "p<-predict(model,sims)[1,1]" - elsif type == "C-bsvc" - @r.eval "p<-predict(model,sims)" - end - if type == "nu-svr" - prediction = @r.p - elsif type == "C-bsvc" - #prediction = (@r.p.to_f == 1.0 ? true : false) - prediction = @r.p - end - @r.quit # free R - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + begin + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-as.vector(y)" + @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" + @r.eval "sims<-as.vector(sims)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)" + @r.eval "sv<-as.vector(SVindex(model))" + @r.eval "sims<-sims[sv]" + @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" + LOGGER.debug "Predicting ..." + if type == "nu-svr" + @r.eval "p<-predict(model,sims)[1,1]" + elsif type == "C-bsvc" + @r.eval "p<-predict(model,sims)" end - + if type == "nu-svr" + prediction = @r.p + elsif type == "C-bsvc" + #prediction = (@r.p.to_f == 1.0 ? true : false) + prediction = @r.p + end + @r.quit # free R + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" end - prediction + + end + prediction end # Local support vector prediction from neighbors. @@ -442,41 +511,7 @@ module OpenTox prediction end - # Local multi-linear regression (MLR) prediction from neighbors. - # Uses propositionalized setting. - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Array] acts, activities for neighbors. - # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @return [Numeric] A prediction value. - def local_mlr_prop - LOGGER.debug "Local MLR (Propositionalization / GSL)." - n_prop = props[0] # is a matrix, i.e. two nested Arrays. - q_prop = props[1] # is an Array. - - if n_prop.size == 0 - raise "No neighbors found." - else - begin - LOGGER.debug "Setting GSL data ..." - # set data - prop_matrix = GSL::Matrix[n_prop] - n_prop_x_size = n_prop.size - n_prop_y_size = n_prop[0].size - y = GSL::Vector[acts] - q_prop = GSL::Vector[q_prop] - # model + support vectors - LOGGER.debug "Creating MLR model ..." - work = GSL::MultiFit::Workspace.alloc(n_prop_y_size,n_prop_x_size) - [c, cov, chisq, status] = GSL::MultiFit::linear(prop_matrix, y, work) - LOGGER.debug "Predicting ..." - prediction = GSL::Multifit::linear_est(q_prop, c, cov) - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" - end - end - prediction end module Substructure diff --git a/lib/model.rb b/lib/model.rb index 5eec366..ea6fd08 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -253,7 +253,7 @@ module OpenTox (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - if @prop_kernel && @prediction_algorithm.include?("svm") + if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") ) props = get_props else props = nil @@ -276,7 +276,7 @@ module OpenTox else # AM: no balancing or regression LOGGER.info "LAZAR: Unbalanced." neighbors - if @prop_kernel && @prediction_algorithm.include?("svm") + if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") ) props = get_props else props = nil diff --git a/lib/opentox-ruby.rb b/lib/opentox-ruby.rb index ae05cb2..1fa2a86 100644 --- a/lib/opentox-ruby.rb +++ b/lib/opentox-ruby.rb @@ -1,4 +1,4 @@ -['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment'].each do |lib| +['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment', 'gsl'].each do |lib| require lib end -- cgit v1.2.3 From ba069b4091b95bea6db4acb500c181c4875b8368 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 5 Jul 2011 09:00:23 +0200 Subject: Fixed log taking in Lazar --- lib/algorithm.rb | 45 +++++++++++++++------------------------------ 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index bfa79d3..6a3dd1d 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -80,18 +80,6 @@ module OpenTox next end - # AM: take log if appropriate - take_logs=true - entry.each do |feature,values| - values.each do |value| - if @prediction_feature.feature_type == "regression" - if (! value.nil?) && (value.to_f <= 0) - take_logs=false - end - end - end - end - value_map=params[:value_map] unless params[:value_map].nil? entry.each do |feature,values| if feature == @prediction_feature.uri @@ -103,7 +91,7 @@ module OpenTox activity= value_map.invert[value].to_i # activities are mapped to 1..n @db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect elsif @prediction_feature.feature_type == "regression" - activity= take_logs ? Math.log10(value.to_f) : value.to_f + activity= value.to_f end begin fminer_instance.AddCompound(smiles,id) @@ -216,21 +204,19 @@ module OpenTox # @return [Numeric] A prediction value. def self.local_mlr_prop(neighbors, params, props) - take_logs=true - - neighbors.each do |n| - if (! n[:activity].nil?) && (n[:activity].to_f < 0.0) - take_logs = false - end - end acts = neighbors.collect do |n| act = n[:activity] - take_logs ? Math.log10(act.to_f) : act.to_f + act.to_f end # activities of neighbors for supervised learning begin + [min,max] = acts.minmax + neg_offset = 1.0 - min # negative offset to min element + acts = acts.collect do |a| + Math.log10(a-neg_offset) # everything >1, then take log10 + end LOGGER.debug "Local MLR (Propositionalization / GSL)." n_prop = props[0] # is a matrix, i.e. two nested Arrays. @@ -265,7 +251,7 @@ module OpenTox end end - prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) + prediction = 10**(prediction.to_f) + neg_offset # reverse log10 LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" @@ -324,21 +310,20 @@ module OpenTox # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @return [Hash] Hash with keys `:prediction, :confidence` def self.local_svm_regression(neighbors, params, props=nil) - take_logs=true - neighbors.each do |n| - if (! n[:activity].nil?) && (n[:activity].to_f < 0.0) - take_logs = false - end - end acts = neighbors.collect do |n| act = n[:activity] - take_logs ? Math.log10(act.to_f) : act.to_f + act.to_f end # activities of neighbors for supervised learning sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin + [min,max] = acts.minmax + neg_offset = 1.0 - min # negative offset to min element + acts = acts.collect do |a| + Math.log10(a-neg_offset) # everything >1, then take log10 + end prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr", params)) - prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) + prediction = 10**(prediction.to_f) + neg_offset LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" -- cgit v1.2.3 From f84a1577101e2eb9d8978947ae7d8311025c8130 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 5 Jul 2011 11:45:44 +0200 Subject: Fixed minmax --- lib/algorithm.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 6a3dd1d..326561b 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -212,7 +212,7 @@ module OpenTox begin - [min,max] = acts.minmax + min,max = acts.minmax neg_offset = 1.0 - min # negative offset to min element acts = acts.collect do |a| Math.log10(a-neg_offset) # everything >1, then take log10 @@ -317,7 +317,7 @@ module OpenTox sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - [min,max] = acts.minmax + min,max = acts.minmax neg_offset = 1.0 - min # negative offset to min element acts = acts.collect do |a| Math.log10(a-neg_offset) # everything >1, then take log10 -- cgit v1.2.3 From 12951ac52d8dcf81aaa9fa7a882da912c91cce22 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 5 Jul 2011 15:58:49 +0200 Subject: Added scaling --- lib/algorithm.rb | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 326561b..d5e9caf 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -213,10 +213,13 @@ module OpenTox begin min,max = acts.minmax - neg_offset = 1.0 - min # negative offset to min element - acts = acts.collect do |a| - Math.log10(a-neg_offset) # everything >1, then take log10 - end + offset = 1.0 - min # offset to min element + offset = -1.0 * offset if offset>0.0 + div_offset = max - offset # dynamic range + + acts = acts.collect { |a| a - offset } # everything >1, starting at 1 + acts = acts.collect { |a| a / div_offset } # scale to unit length + acts = acts.collect { |a| Math.log10 a } # everything >1, then take log10 LOGGER.debug "Local MLR (Propositionalization / GSL)." n_prop = props[0] # is a matrix, i.e. two nested Arrays. @@ -251,7 +254,7 @@ module OpenTox end end - prediction = 10**(prediction.to_f) + neg_offset # reverse log10 + prediction = div_offset * (10**(prediction.to_f)) + offset # reverse transformation LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" @@ -317,13 +320,24 @@ module OpenTox sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - min,max = acts.minmax - neg_offset = 1.0 - min # negative offset to min element - acts = acts.collect do |a| - Math.log10(a-neg_offset) # everything >1, then take log10 - end + offset = 1.0 - acts.minmax[0] # offset to min element + offset = -1.0 * offset if offset>0.0 + + puts "OFFSET MV" + acts = acts.collect { |a| a - offset } # slide + puts acts.to_yaml + + puts "OFFSET LOG" + acts = acts.collect { |a| Math.log10 a } # everything >1, then take log10 + puts acts.to_yaml + + div_offset = acts.minmax[1] # dynamic range + puts "OFFSET DIV" + acts = acts.collect { |a| a / div_offset } # scale + puts acts.to_yaml + prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr", params)) - prediction = 10**(prediction.to_f) + neg_offset + prediction = (10**(div_offset*prediction.to_f))+offset LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" -- cgit v1.2.3 From 324471e8455eb4a9256bd25aa3d33b6eb78e62ed Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 6 Jul 2011 11:38:53 +0200 Subject: Sigmoidal function for normality --- lib/algorithm.rb | 154 ++++++++++++++++++++++++++----------------------------- 1 file changed, 74 insertions(+), 80 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index d5e9caf..bdb10f7 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -204,22 +204,13 @@ module OpenTox # @return [Numeric] A prediction value. def self.local_mlr_prop(neighbors, params, props) - - acts = neighbors.collect do |n| - act = n[:activity] - act.to_f - end # activities of neighbors for supervised learning - - + raise "No neighbors found." unless neighbors.size>0 begin - min,max = acts.minmax - offset = 1.0 - min # offset to min element - offset = -1.0 * offset if offset>0.0 - div_offset = max - offset # dynamic range - acts = acts.collect { |a| a - offset } # everything >1, starting at 1 - acts = acts.collect { |a| a / div_offset } # scale to unit length - acts = acts.collect { |a| Math.log10 a } # everything >1, then take log10 + acts = neighbors.collect do |n| + act = n[:activity] + act.to_f + end # activities of neighbors for supervised learning LOGGER.debug "Local MLR (Propositionalization / GSL)." n_prop = props[0] # is a matrix, i.e. two nested Arrays. @@ -233,37 +224,31 @@ module OpenTox n_prop_tmp = Array.new ; repeat_factor.times { n_prop_tmp.concat n_prop } ; n_prop = n_prop_tmp acts_tmp = Array.new ; repeat_factor.times { acts_tmp.concat acts } ; acts = acts_tmp - if n_prop.size == 0 - raise "No neighbors found." - else - begin - LOGGER.debug "Setting GSL data ..." - # set data - prop_matrix = GSL::Matrix[n_prop, n_prop_y_size * repeat_factor, n_prop_x_size] - y = GSL::Vector[acts] - q_prop = GSL::Vector[q_prop] + LOGGER.debug "Setting GSL data ..." + # set data + prop_matrix = GSL::Matrix[n_prop, n_prop_y_size * repeat_factor, n_prop_x_size] + y = GSL::Vector[acts] + q_prop = GSL::Vector[q_prop] + + # model + support vectors + LOGGER.debug "Creating MLR model ..." + work = GSL::MultiFit::Workspace.alloc(n_prop_y_size * repeat_factor, n_prop_x_size) + c, cov, chisq, status = GSL::MultiFit::linear(prop_matrix, y, work) + LOGGER.debug "Predicting ..." + prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0] + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." - # model + support vectors - LOGGER.debug "Creating MLR model ..." - work = GSL::MultiFit::Workspace.alloc(n_prop_y_size * repeat_factor, n_prop_x_size) - c, cov, chisq, status = GSL::MultiFit::linear(prop_matrix, y, work) - LOGGER.debug "Predicting ..." - prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0] - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" - end - end + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + conf = sims.inject{|sum,x| sum + x } + confidence = conf/neighbors.size if neighbors.size > 0 + {:prediction => prediction, :confidence => confidence} - prediction = div_offset * (10**(prediction.to_f)) + offset # reverse transformation - LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - conf = sims.inject{|sum,x| sum + x } - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence} + end # Classification with majority vote from neighbors weighted by similarity @@ -313,39 +298,24 @@ module OpenTox # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @return [Hash] Hash with keys `:prediction, :confidence` def self.local_svm_regression(neighbors, params, props=nil) - acts = neighbors.collect do |n| - act = n[:activity] - act.to_f - end # activities of neighbors for supervised learning - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + raise "No neighbors found." unless neighbors.size>0 begin + acts = neighbors.collect{ |n| n[:activity].to_f } + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } offset = 1.0 - acts.minmax[0] # offset to min element offset = -1.0 * offset if offset>0.0 - - puts "OFFSET MV" - acts = acts.collect { |a| a - offset } # slide - puts acts.to_yaml - - puts "OFFSET LOG" - acts = acts.collect { |a| Math.log10 a } # everything >1, then take log10 - puts acts.to_yaml - - div_offset = acts.minmax[1] # dynamic range - puts "OFFSET DIV" - acts = acts.collect { |a| a / div_offset } # scale - puts acts.to_yaml - - prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr", params)) - prediction = (10**(div_offset*prediction.to_f))+offset - LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + inverter = OpenTox::Algorithm::Transform::Inverter.new(acts) + prediction = (props.nil? ? local_svm(neighbors, inverter.values, sims, "nu-svr", params) : local_svm_prop(props, inverter.values, "nu-svr", params)) + prediction = inverter.back_transform([prediction]) + LOGGER.debug "Prediction is: '" + prediction[0].to_s + "'." + conf = sims.inject{|sum,x| sum + x } + confidence = conf/neighbors.size + {:prediction => prediction, :confidence => confidence} rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end - - conf = sims.inject{|sum,x| sum + x } - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence} end @@ -355,22 +325,21 @@ module OpenTox # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Hash] Hash with keys `:prediction, :confidence` def self.local_svm_classification(neighbors, params, props=nil) - acts = neighbors.collect do |n| - act = n[:activity] - end # activities of neighbors for supervised learning -# acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} - acts_f = acts - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + + raise "No neighbors found." unless neighbors.size>0 begin + acts = neighbors.collect { |n| act = n[:activity] } + acts_f = acts + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, "C-bsvc", params)) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + conf = sims.inject{|sum,x| sum + x } + confidence = conf/neighbors.size if neighbors.size > 0 + {:prediction => prediction, :confidence => confidence} rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end - - conf = sims.inject{|sum,x| sum + x } - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence} end @@ -442,7 +411,8 @@ module OpenTox end @r.quit # free R rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end @@ -504,7 +474,8 @@ module OpenTox end @r.quit # free R rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end prediction @@ -530,6 +501,29 @@ module OpenTox def features(dataset_uri,compound_uri) end end + + module Transform + include Algorithm + + class Inverter # to improve normality conditions on a vector + attr_accessor :values + + def initialize(values) + @values=values + raise "Cannot transform, values empty." if @values.size==0 + @offset = 1.0 - @values.minmax[0] + @offset = -1.0 * @offset if @offset>0.0 + @values = @values.collect { |v| v - @offset } # slide >1 + @values = @values.collect { |v| 1 / v } # invert using sigmoidal function + end + + def back_transform(values) + values = values.collect { |v| 1 / v } + values = values.collect { |v| v + @offset } + end + + end + end # Gauss kernel # @return [Float] -- cgit v1.2.3 From 835e12ba51c67b4d1ccb8d74cea987ea5ce7452b Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 6 Jul 2011 12:18:14 +0200 Subject: Fixed prediction value --- lib/algorithm.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index bdb10f7..66c1ed6 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -307,8 +307,8 @@ module OpenTox offset = -1.0 * offset if offset>0.0 inverter = OpenTox::Algorithm::Transform::Inverter.new(acts) prediction = (props.nil? ? local_svm(neighbors, inverter.values, sims, "nu-svr", params) : local_svm_prop(props, inverter.values, "nu-svr", params)) - prediction = inverter.back_transform([prediction]) - LOGGER.debug "Prediction is: '" + prediction[0].to_s + "'." + prediction = inverter.back_transform([prediction])[0] + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." conf = sims.inject{|sum,x| sum + x } confidence = conf/neighbors.size {:prediction => prediction, :confidence => confidence} -- cgit v1.2.3 From 5e948e928235b6b5c4b8ad6da3d77186ffc4fba0 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 6 Jul 2011 15:14:48 +0200 Subject: Added reflection by -1 --- lib/algorithm.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 66c1ed6..4da5d56 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -511,6 +511,7 @@ module OpenTox def initialize(values) @values=values raise "Cannot transform, values empty." if @values.size==0 + @values = @values.collect { |v| -1.0 * v } # slide >1 @offset = 1.0 - @values.minmax[0] @offset = -1.0 * @offset if @offset>0.0 @values = @values.collect { |v| v - @offset } # slide >1 @@ -520,6 +521,7 @@ module OpenTox def back_transform(values) values = values.collect { |v| 1 / v } values = values.collect { |v| v + @offset } + values = values.collect { |v| -1.0 * v } end end -- cgit v1.2.3 From b9937c22f7f6faaf4b91f33e0f9bb106242b2fea Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 8 Jul 2011 14:54:03 +0200 Subject: Transform should have happened before --- lib/algorithm.rb | 7 ++----- lib/model.rb | 3 ++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 4da5d56..f3f8ed1 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -303,11 +303,8 @@ module OpenTox begin acts = neighbors.collect{ |n| n[:activity].to_f } sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } - offset = 1.0 - acts.minmax[0] # offset to min element - offset = -1.0 * offset if offset>0.0 - inverter = OpenTox::Algorithm::Transform::Inverter.new(acts) - prediction = (props.nil? ? local_svm(neighbors, inverter.values, sims, "nu-svr", params) : local_svm_prop(props, inverter.values, "nu-svr", params)) - prediction = inverter.back_transform([prediction])[0] + prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr", params)) + prediction = lazar.inverter.back_transform([prediction])[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." conf = sims.inject{|sum,x| sum + x } confidence = conf/neighbors.size diff --git a/lib/model.rb b/lib/model.rb index ea6fd08..6abead6 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -91,7 +91,7 @@ module OpenTox include Algorithm include Model - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :balanced + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :balanced :transform def initialize(uri=nil) @@ -117,6 +117,7 @@ module OpenTox @min_sim = 0.3 @prop_kernel = false @balanced = false + @transform = nil end -- cgit v1.2.3 From eefe5b8caaece32314a03e5bd9a06f3cb8d19021 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 8 Jul 2011 15:48:49 +0200 Subject: Added log10 Transform --- lib/algorithm.rb | 31 +++++++++++++++++++++++++------ lib/model.rb | 4 ++-- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index f3f8ed1..6b9f8ec 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -202,7 +202,7 @@ module OpenTox # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Numeric] A prediction value. - def self.local_mlr_prop(neighbors, params, props) + def self.local_mlr_prop(neighbors, params, props, transform=nil) raise "No neighbors found." unless neighbors.size>0 begin @@ -255,7 +255,7 @@ module OpenTox # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity` # @param [optional] params Ignored (only for compatibility with local_svm_regression) # @return [Hash] Hash with keys `:prediction, :confidence` - def self.weighted_majority_vote(neighbors,params={}, props=nil) + def self.weighted_majority_vote(neighbors,params={}, props=nil, transform=nil) neighbor_contribution = 0.0 confidence_sum = 0.0 confidence = 0.0 @@ -297,14 +297,14 @@ module OpenTox # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_regression(neighbors, params, props=nil) + def self.local_svm_regression(neighbors, params, props=nil, transform=nil) raise "No neighbors found." unless neighbors.size>0 begin acts = neighbors.collect{ |n| n[:activity].to_f } sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr", params)) - prediction = lazar.inverter.back_transform([prediction])[0] + prediction = transform.back_transform([prediction])[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." conf = sims.inject{|sum,x| sum + x } confidence = conf/neighbors.size @@ -321,7 +321,7 @@ module OpenTox # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_classification(neighbors, params, props=nil) + def self.local_svm_classification(neighbors, params, props=nil, transform=nil) raise "No neighbors found." unless neighbors.size>0 begin @@ -508,7 +508,7 @@ module OpenTox def initialize(values) @values=values raise "Cannot transform, values empty." if @values.size==0 - @values = @values.collect { |v| -1.0 * v } # slide >1 + @values = @values.collect { |v| -1.0 * v } @offset = 1.0 - @values.minmax[0] @offset = -1.0 * @offset if @offset>0.0 @values = @values.collect { |v| v - @offset } # slide >1 @@ -522,6 +522,25 @@ module OpenTox end end + + class Log10 # to improve normality conditions on a vector + attr_accessor :values + + def initialize(values) + @values=values + raise "Cannot transform, values empty." if @values.size==0 + @offset = 1.0 - @values.minmax[0] + @offset = -1.0 * @offset if @offset>0.0 + @values = @values.collect { |v| v - @offset } # slide >1 + @values = @values.collect { |v| Math::log10 v } # take log10 + end + + def back_transform(values) + values = values.collect { |v| 10**v } + values = values.collect { |v| v + @offset } + end + + end end # Gauss kernel diff --git a/lib/model.rb b/lib/model.rb index 6abead6..6a64c53 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -91,7 +91,7 @@ module OpenTox include Algorithm include Model - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :balanced :transform + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :balanced, :transform def initialize(uri=nil) @@ -282,7 +282,7 @@ module OpenTox else props = nil end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props)") + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props, @transform)") end value_feature_uri = File.join( @uri, "predicted", "value") -- cgit v1.2.3 From c5be6291668d9e0187d6fd7a62a8c1efdcc0b7a4 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 8 Jul 2011 16:00:12 +0200 Subject: Added careful log taking --- lib/algorithm.rb | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 6b9f8ec..35a3437 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -512,7 +512,7 @@ module OpenTox @offset = 1.0 - @values.minmax[0] @offset = -1.0 * @offset if @offset>0.0 @values = @values.collect { |v| v - @offset } # slide >1 - @values = @values.collect { |v| 1 / v } # invert using sigmoidal function + @values = @values.collect { |v| 1 / v } # invert end def back_transform(values) @@ -529,10 +529,17 @@ module OpenTox def initialize(values) @values=values raise "Cannot transform, values empty." if @values.size==0 - @offset = 1.0 - @values.minmax[0] + has_negatives = false + @values.each { |v| + if v<0.0 + has_negatives = true + end + } + has_negatives ? @anchor_point = 1.0 : @anchor_point = 0.0 + @offset = @anchor_point - @values.minmax[0] @offset = -1.0 * @offset if @offset>0.0 - @values = @values.collect { |v| v - @offset } # slide >1 - @values = @values.collect { |v| Math::log10 v } # take log10 + @values = @values.collect { |v| v - @offset } # slide > anchor + @values = @values.collect { |v| Math::log10 v } # log10 end def back_transform(values) -- cgit v1.2.3 From 772e146a9d61a6d1df74a732dde8aab0188f523f Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 11 Jul 2011 08:47:54 +0200 Subject: Fixed representation of transform --- lib/algorithm.rb | 92 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 49 insertions(+), 43 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 35a3437..dddbad7 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -245,7 +245,7 @@ module OpenTox rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" - puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end @@ -304,14 +304,15 @@ module OpenTox acts = neighbors.collect{ |n| n[:activity].to_f } sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr", params)) - prediction = transform.back_transform([prediction])[0] + transformer = eval "#{transform[:class]}.new ([#{prediction}], #{transform[:offset]})" + prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." conf = sims.inject{|sum,x| sum + x } confidence = conf/neighbors.size {:prediction => prediction, :confidence => confidence} rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" - puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end @@ -335,7 +336,7 @@ module OpenTox {:prediction => prediction, :confidence => confidence} rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" - puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end @@ -409,7 +410,7 @@ module OpenTox @r.quit # free R rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" - puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end @@ -472,7 +473,7 @@ module OpenTox @r.quit # free R rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" - puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end prediction @@ -503,50 +504,55 @@ module OpenTox include Algorithm class Inverter # to improve normality conditions on a vector - attr_accessor :values - - def initialize(values) - @values=values - raise "Cannot transform, values empty." if @values.size==0 - @values = @values.collect { |v| -1.0 * v } - @offset = 1.0 - @values.minmax[0] - @offset = -1.0 * @offset if @offset>0.0 - @values = @values.collect { |v| v - @offset } # slide >1 - @values = @values.collect { |v| 1 / v } # invert - end + attr_accessor :offset, :values - def back_transform(values) - values = values.collect { |v| 1 / v } - values = values.collect { |v| v + @offset } - values = values.collect { |v| -1.0 * v } + def initialize *args + case args.size + when 1 + begin + @values=args[0] + raise "Cannot transform, values empty." if @values.size==0 + @values = @values.collect { |v| -1.0 * v } + @offset = 1.0 - @values.minmax[0] + @offset = -1.0 * @offset if @offset>0.0 + @values = @values.collect { |v| v - @offset } # slide >1 + @values = @values.collect { |v| 1 / v } # invert to [0,1] + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + when 2 + @offset = args[1].to_f + @values = args[0].collect { |v| 1 / v } + @values = @values.collect { |v| v + @offset } + @values = @values.collect { |v| -1.0 * v } + end end - end class Log10 # to improve normality conditions on a vector - attr_accessor :values - - def initialize(values) - @values=values - raise "Cannot transform, values empty." if @values.size==0 - has_negatives = false - @values.each { |v| - if v<0.0 - has_negatives = true - end - } - has_negatives ? @anchor_point = 1.0 : @anchor_point = 0.0 - @offset = @anchor_point - @values.minmax[0] - @offset = -1.0 * @offset if @offset>0.0 - @values = @values.collect { |v| v - @offset } # slide > anchor - @values = @values.collect { |v| Math::log10 v } # log10 - end + attr_accessor :offset, :values - def back_transform(values) - values = values.collect { |v| 10**v } - values = values.collect { |v| v + @offset } + def initialize *args + case args.size + when 1 + begin + @values=args[0] + raise "Cannot transform, values empty." if @values.size==0 + @offset = @values.minmax[0] + @offset = -1.0 * @offset if @offset>0.0 + @values = @values.collect { |v| v - @offset } # slide > anchor + @values = @values.collect { |v| Math::log10 v } # log10 (can fail) + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + when 2 + @offset = args[1].to_f + @values = args[0].collect { |v| 10**v } + @values = @values.collect { |v| v + @offset } + end end - end end -- cgit v1.2.3 From d21a6f669f248c85dfd649f26be6d1dd9aac6bbc Mon Sep 17 00:00:00 2001 From: dv Date: Mon, 11 Jul 2011 12:43:38 +0200 Subject: changes for support --- lib/algorithm.rb | 46 +++++++++++++++++++++++++++++++++++++++++----- lib/model.rb | 30 ++++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 7 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index bfa79d3..5c00aa8 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -162,19 +162,39 @@ module OpenTox # Tanimoto similarity # @param [Array] features_a Features of first compound - # @param [Array] features_b Features of second compound + # @param [Array][Hash] features_b Features of second compound # @param [optional, Hash] weights Weights for all features # @return [Float] (Weighted) tanimoto similarity def self.tanimoto(features_a,features_b,weights=nil) + LOGGER.debug "dv ------------ class: #{features_b.class}" common_features = features_a & features_b all_features = (features_a + features_b).uniq common_p_sum = 0.0 if common_features.size > 0 if weights - common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])} - all_p_sum = 0.0 - all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])} + if @nr_hits == true + LOGGER.debug "dv --------------- NR_HITS TRUE" + else + common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])} + all_p_sum = 0.0 + all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])} + end common_p_sum/all_p_sum + #if frequencies + # #LOGGER.debug "dv --------------- all_features: #{all_features} \n common_features: #{common_features} " + # common_features.each do |f| + # #LOGGER.debug "dv --------------- weight: #{weights[f]} frequency: #{frequencies[f]}" + # common_p_sum += Algorithm.gauss(weights[f]*frequencies[f].to_f) + # end + # all_p_sum = 0.0 + # all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f]*frequencies[f].to_f)} + #else + # common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])} + # all_p_sum = 0.0 + # all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])} + #end + # #LOGGER.debug "dv -------------- common_p_sum: #{common_p_sum} all_p_sum: #{all_p_sum}" + # common_p_sum/all_p_sum else common_features.to_f/all_features end @@ -596,7 +616,23 @@ module OpenTox } max end - + + # Frequency check befor Simularity calculation + # @param [Array] similarity_algorithm, + # @param [Array] features_a + # @param [Array] [Hash] (feature_b => frequency} + # @param [] p_values + # return sim + #def self.similarity(similarity_algorithm, features_a, features_b, p_values = nil) + # if @nr_hits == true + # + # features_b_f = + # eval("#{similarity_algorithm}(features_a,features_b_f,p_values,frequencies_b)") + # else + # eval("#{similarity_algorithm}(features_a,features_b,p_values)") + # end + #end + end end diff --git a/lib/model.rb b/lib/model.rb index ea6fd08..6d9b1cb 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -91,7 +91,8 @@ module OpenTox include Algorithm include Model - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :balanced + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :frequencies, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :balanced, :nr_hits + def initialize(uri=nil) @@ -107,6 +108,7 @@ module OpenTox @effects = {} @activities = {} @p_values = {} + @frequencies = {} @fingerprints = {} @value_map = {} @@ -114,6 +116,7 @@ module OpenTox @similarity_algorithm = "Similarity.tanimoto" @prediction_algorithm = "Neighbors.weighted_majority_vote" + @nr_hits = false @min_sim = 0.3 @prop_kernel = false @balanced = false @@ -398,14 +401,37 @@ module OpenTox def neighbors @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] - @fingerprints.each do |training_compound,training_features| # AM: access all compounds + @fingerprints.each do |training_compound, training_features | # AM: access all compounds + #LOGGER.debug "dv ---------------- training_features: #{training_features.class}, #{training_features}, #{training_compound.class}, #{training_compound} " add_neighbor training_features, training_compound end end # Adds a neighbor to @neighbors if it passes the similarity threshold. def add_neighbor(training_features, training_compound) + #LOGGER.debug "dv ------ xyz ----- compound_features: '#{@compound_features}' \n training_features: '#{training_features}'\n training_compound: '#{training_compound}'" + sim = 0.0 + #if @frequencies.empty? + # LOGGER.debug "dv ----------------- frequencies is empty goto #{@similarity_algorithm}" + # sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") + #else + # LOGGER.debug "dv ----------------- with frequencies goto #{@similarity_algorithm}, training_compound #{training_compound}" + # t_compound_freq = {} + # training_features.each do |f| + # #LOGGER.debug "dv ----------------- with feature: #{f}, training_compound: #{training_compound}\n" + # @frequencies[f.to_s].each do |cf| + # if cf.keys.to_s == training_compound.to_s + # #LOGGER.debug "#{cf.keys} =? #{training_compound}----------------- #{f} #{cf[training_compound.to_s]}" + # t_compound_freq[f] = cf[training_compound.to_s] + # #LOGGER.debug "t_compound_freq: #{t_compound_freq}" + # end + # end + # end + # #LOGGER.debug "t_compound_freq: #{t_compound_freq}" + # sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values,t_compound_freq)") + #end sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") + LOGGER.debug "sim is: #{sim}" if sim > @min_sim @activities[training_compound].each do |act| @neighbors << { -- cgit v1.2.3 From 937090b89fb718ac9b1c9d7e802f9e326c9753cd Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 11 Jul 2011 14:57:07 +0200 Subject: First PCA version --- Rakefile | 2 ++ lib/algorithm.rb | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 77 insertions(+), 9 deletions(-) diff --git a/Rakefile b/Rakefile index bd22c16..602f0f4 100644 --- a/Rakefile +++ b/Rakefile @@ -44,6 +44,8 @@ begin gem.add_dependency "dm-sqlite-adapter", "=1.1.0" gem.add_dependency "ruby-plot", "=0.5.0" gem.add_dependency "gsl", "=1.14.7" + gem.add_dependency "statsample", "=2.1.0" + gem.add_dependency "statsample-optimization", "=1.1.0" gem.add_development_dependency 'jeweler' gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore'] diff --git a/lib/algorithm.rb b/lib/algorithm.rb index dddbad7..3357d7a 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -218,14 +218,44 @@ module OpenTox n_prop_x_size = n_prop[0].size n_prop_y_size = n_prop.size + data = n_prop << q_prop # attach q_prop + begin + nr_cases = data.size + nr_features = data[0].size + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + + # Auto-Scaling + LOGGER.debug "Scaling ..." + data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) + (0..nr_features-1).each { |i| + column_view = data_matrix.col(i) + OpenTox::Algorithm::AutoScale.new(column_view) + } + + # PCA + LOGGER.debug "PCA ..." + data_matrix_hash = Hash.new + (0..nr_features-1).each { |i| + column_view = data_matrix.col(i) + data_matrix_hash[i] = column_view.to_scale + } + ds = data_matrix_hash.to_dataset + pca = OpenTox::Algorithm::PCA.new(dataset) + + # Mangle data + n_prop = pca.dataset_transformed_matrix.transpose.to_a + q_prop = n_prop.pop # Restore query n_prop.flatten! y_x_rel = n_prop_y_size.to_f / n_prop_x_size repeat_factor = (1/y_x_rel).ceil n_prop_tmp = Array.new ; repeat_factor.times { n_prop_tmp.concat n_prop } ; n_prop = n_prop_tmp acts_tmp = Array.new ; repeat_factor.times { acts_tmp.concat acts } ; acts = acts_tmp - LOGGER.debug "Setting GSL data ..." # set data + LOGGER.debug "Setting prop data ..." prop_matrix = GSL::Matrix[n_prop, n_prop_y_size * repeat_factor, n_prop_x_size] y = GSL::Vector[acts] q_prop = GSL::Vector[q_prop] @@ -512,11 +542,11 @@ module OpenTox begin @values=args[0] raise "Cannot transform, values empty." if @values.size==0 - @values = @values.collect { |v| -1.0 * v } + @values.collect! { |v| -1.0 * v } @offset = 1.0 - @values.minmax[0] @offset = -1.0 * @offset if @offset>0.0 - @values = @values.collect { |v| v - @offset } # slide >1 - @values = @values.collect { |v| 1 / v } # invert to [0,1] + @values.collect! { |v| v - @offset } # slide >1 + @values.collect! { |v| 1 / v } # invert to [0,1] rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" @@ -524,8 +554,8 @@ module OpenTox when 2 @offset = args[1].to_f @values = args[0].collect { |v| 1 / v } - @values = @values.collect { |v| v + @offset } - @values = @values.collect { |v| -1.0 * v } + @values.collect! { |v| v + @offset } + @values.collect! { |v| -1.0 * v } end end end @@ -541,8 +571,8 @@ module OpenTox raise "Cannot transform, values empty." if @values.size==0 @offset = @values.minmax[0] @offset = -1.0 * @offset if @offset>0.0 - @values = @values.collect { |v| v - @offset } # slide > anchor - @values = @values.collect { |v| Math::log10 v } # log10 (can fail) + @values.collect! { |v| v - @offset } # slide > anchor + @values.collect! { |v| Math::log10 v } # log10 (can fail) rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" @@ -550,10 +580,46 @@ module OpenTox when 2 @offset = args[1].to_f @values = args[0].collect { |v| 10**v } - @values = @values.collect { |v| v + @offset } + @values.collect! { |v| v + @offset } end end end + + class AutoScale # center on mean and divide by stdev + attr_accessor :values + + def initialize values + mean = values.to_scale.mean + stdev = values.to_scale.standard_deviation_sample + @values = values.collect{|vi| vi - mean } + @values.collect! {|vi| vi / stdev } + end + end + + class PCA + attr_accessor :dataset_transformed_matrix + + def initialize dataset + @cor_matrix=Statsample::Bivariate.correlation_matrix(dataset) + pca=Statsample::Factor::PCA.new(@cor_matrix) + + eigenvalue_sums = Array.new + (0..dataset.fields.size-1).each { |i| + eigenvalue_sums << pca.eigenvalues[0..i].inject{ |sum, ev| sum + ev } + } + # compression cutoff @0.9 + @eigenvectors_selected = Array.new + pca.eigenvectors.each_with_index { |ev, i| + if (eigenvalue_sums[i] <= 0.9) || (@eigenvectors_selected.size == 0) + @eigenvectors_selected << ev.to_a + end + } + eigenvector_matrix = GSL::Matrix.alloc(@eigenvectors_selected.flatten, dataset.fields.size, @eigenvectors_selected.size).transpose + dataset_matrix = dataset.to_gsl.transpose + @dataset_transformed_matrix = eigenvector_matrix * dataset_matrix # dataset_transformed_matrix is in row-wise notation now + end + end + end # Gauss kernel -- cgit v1.2.3 From 77efc9700a2fcba167ce8b68bafaefff1584329f Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 11 Jul 2011 14:59:59 +0200 Subject: add waiting task for lazar model building --- lib/model.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index ea6fd08..825f697 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -136,10 +136,10 @@ module OpenTox # Create a new lazar model # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar) # @return [OpenTox::Model::Lazar] lazar model - def self.create(params) + def self.create(params, waiting_task=nil ) subjectid = params[:subjectid] lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar") - model_uri = lazar_algorithm.run(params) + model_uri = lazar_algorithm.run(params, waiting_task) OpenTox::Model::Lazar.find(model_uri, subjectid) end -- cgit v1.2.3 From a7031978a0c4573a06d432b225ead6f758a15a08 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 11 Jul 2011 17:36:58 +0200 Subject: 2nd PCA version --- lib/algorithm.rb | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 3357d7a..697b7df 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -3,6 +3,7 @@ # avoids compiling R with X R = nil require "rinruby" +require "statsample" module OpenTox @@ -215,54 +216,61 @@ module OpenTox LOGGER.debug "Local MLR (Propositionalization / GSL)." n_prop = props[0] # is a matrix, i.e. two nested Arrays. q_prop = props[1] # is an Array. - n_prop_x_size = n_prop[0].size - n_prop_y_size = n_prop.size - - data = n_prop << q_prop # attach q_prop + + n_prop = n_prop << q_prop # attach q_prop begin - nr_cases = data.size - nr_features = data[0].size + nr_cases = n_prop.size + nr_features = n_prop[0].size rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end - + # Auto-Scaling LOGGER.debug "Scaling ..." data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) - (0..nr_features-1).each { |i| + (0..nr_cases-1).each { |i| column_view = data_matrix.col(i) - OpenTox::Algorithm::AutoScale.new(column_view) + OpenTox::Algorithm::Transform::AutoScale.new(column_view) } # PCA LOGGER.debug "PCA ..." data_matrix_hash = Hash.new - (0..nr_features-1).each { |i| + (0..nr_cases-1).each { |i| column_view = data_matrix.col(i) data_matrix_hash[i] = column_view.to_scale } - ds = data_matrix_hash.to_dataset - pca = OpenTox::Algorithm::PCA.new(dataset) + dataset_hash = data_matrix_hash.to_dataset + pca = OpenTox::Algorithm::Transform::PCA.new(dataset_hash) # Mangle data n_prop = pca.dataset_transformed_matrix.transpose.to_a q_prop = n_prop.pop # Restore query + + begin + nr_cases = n_prop.size + nr_features = n_prop[0].size + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + n_prop.flatten! - y_x_rel = n_prop_y_size.to_f / n_prop_x_size + y_x_rel = nr_cases.to_f / nr_features repeat_factor = (1/y_x_rel).ceil n_prop_tmp = Array.new ; repeat_factor.times { n_prop_tmp.concat n_prop } ; n_prop = n_prop_tmp acts_tmp = Array.new ; repeat_factor.times { acts_tmp.concat acts } ; acts = acts_tmp # set data LOGGER.debug "Setting prop data ..." - prop_matrix = GSL::Matrix[n_prop, n_prop_y_size * repeat_factor, n_prop_x_size] + prop_matrix = GSL::Matrix[n_prop, nr_cases * repeat_factor, nr_features] y = GSL::Vector[acts] q_prop = GSL::Vector[q_prop] # model + support vectors LOGGER.debug "Creating MLR model ..." - work = GSL::MultiFit::Workspace.alloc(n_prop_y_size * repeat_factor, n_prop_x_size) + work = GSL::MultiFit::Workspace.alloc(nr_cases * repeat_factor, nr_features) c, cov, chisq, status = GSL::MultiFit::linear(prop_matrix, y, work) LOGGER.debug "Predicting ..." prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0] @@ -600,6 +608,7 @@ module OpenTox attr_accessor :dataset_transformed_matrix def initialize dataset + @cor_matrix=Statsample::Bivariate.correlation_matrix(dataset) pca=Statsample::Factor::PCA.new(@cor_matrix) @@ -610,7 +619,7 @@ module OpenTox # compression cutoff @0.9 @eigenvectors_selected = Array.new pca.eigenvectors.each_with_index { |ev, i| - if (eigenvalue_sums[i] <= 0.9) || (@eigenvectors_selected.size == 0) + if (eigenvalue_sums[i] <= (0.9*dataset.cases)) || (@eigenvectors_selected.size == 0) @eigenvectors_selected << ev.to_a end } @@ -660,9 +669,9 @@ module OpenTox # @param [Integer] per-mil value # return [Integer] min-frequency def self.min_frequency(training_dataset,per_mil) - minfreq = per_mil*training_dataset.compounds.size/1000 # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST + minfreq = per_mil * training_dataset.compounds.size.to_f / 1000.0 # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST minfreq = 2 unless minfreq > 2 - minfreq + Integer (minfreq) end # Effect calculation for classification -- cgit v1.2.3 From b6df468e2457c62598b084ef17b1c1df7ad4dbaf Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 12 Jul 2011 09:32:30 +0200 Subject: PCA 3rd version --- lib/algorithm.rb | 61 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 697b7df..402a373 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -218,44 +218,41 @@ module OpenTox q_prop = props[1] # is an Array. n_prop = n_prop << q_prop # attach q_prop - begin - nr_cases = n_prop.size - nr_features = n_prop[0].size - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message}" - LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" - end + nr_cases, nr_features = get_sizes n_prop + - # Auto-Scaling + # Centering and Scaling LOGGER.debug "Scaling ..." data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) - (0..nr_cases-1).each { |i| + (0..nr_features-1).each { |i| column_view = data_matrix.col(i) OpenTox::Algorithm::Transform::AutoScale.new(column_view) } - # PCA + # Principal Components Analysis LOGGER.debug "PCA ..." data_matrix_hash = Hash.new - (0..nr_cases-1).each { |i| + (0..nr_features-1).each { |i| column_view = data_matrix.col(i) data_matrix_hash[i] = column_view.to_scale } dataset_hash = data_matrix_hash.to_dataset pca = OpenTox::Algorithm::Transform::PCA.new(dataset_hash) - - # Mangle data n_prop = pca.dataset_transformed_matrix.transpose.to_a - q_prop = n_prop.pop # Restore query + nr_cases, nr_features = get_sizes n_prop - begin - nr_cases = n_prop.size - nr_features = n_prop[0].size - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message}" - LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" - end + # Normalizing along each Principal Component + LOGGER.debug "Normalizing..." + data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) + (0..nr_features-1).each { |i| + column_view = data_matrix.col(i) + normalizer = OpenTox::Algorithm::Transform::Log10.new(column_view.to_a) + column_view = normalizer.values.to_gv + } + # Mangle data + q_prop = n_prop.pop + nr_cases, nr_features = get_sizes n_prop n_prop.flatten! y_x_rel = nr_cases.to_f / nr_features repeat_factor = (1/y_x_rel).ceil @@ -286,8 +283,7 @@ module OpenTox LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end - - end + end # Classification with majority vote from neighbors weighted by similarity # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity` @@ -517,6 +513,18 @@ module OpenTox prediction end + # Get X and Y size of a nested Array + def self.get_sizes(matrix) + begin + nr_cases = matrix.size + nr_features = matrix[0].size + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + puts "NRC: #{nr_cases}, NRF: #{nr_features}" + [ nr_cases, nr_features ] + end end @@ -572,6 +580,7 @@ module OpenTox attr_accessor :offset, :values def initialize *args + @distance_to_zero = 0.000001 case args.size when 1 begin @@ -580,6 +589,7 @@ module OpenTox @offset = @values.minmax[0] @offset = -1.0 * @offset if @offset>0.0 @values.collect! { |v| v - @offset } # slide > anchor + @values.collect! { |v| v + @distance_to_zero } # @values.collect! { |v| Math::log10 v } # log10 (can fail) rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" @@ -588,6 +598,7 @@ module OpenTox when 2 @offset = args[1].to_f @values = args[0].collect { |v| 10**v } + @values.collect! { |v| v - @distance_to_zero } @values.collect! { |v| v + @offset } end end @@ -608,10 +619,8 @@ module OpenTox attr_accessor :dataset_transformed_matrix def initialize dataset - @cor_matrix=Statsample::Bivariate.correlation_matrix(dataset) pca=Statsample::Factor::PCA.new(@cor_matrix) - eigenvalue_sums = Array.new (0..dataset.fields.size-1).each { |i| eigenvalue_sums << pca.eigenvalues[0..i].inject{ |sum, ev| sum + ev } @@ -619,7 +628,7 @@ module OpenTox # compression cutoff @0.9 @eigenvectors_selected = Array.new pca.eigenvectors.each_with_index { |ev, i| - if (eigenvalue_sums[i] <= (0.9*dataset.cases)) || (@eigenvectors_selected.size == 0) + if (eigenvalue_sums[i] <= (0.9*dataset.fields.size)) || (@eigenvectors_selected.size == 0) @eigenvectors_selected << ev.to_a end } -- cgit v1.2.3 From 73a5e7644bf282fee73f061ce44492ce6da95aed Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 12 Jul 2011 11:36:40 +0200 Subject: PCA 4th version --- lib/algorithm.rb | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 402a373..f61628d 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -209,6 +209,7 @@ module OpenTox begin acts = neighbors.collect do |n| + puts n.to_yaml act = n[:activity] act.to_f end # activities of neighbors for supervised learning @@ -221,16 +222,15 @@ module OpenTox nr_cases, nr_features = get_sizes n_prop + LOGGER.debug "PCA..." # Centering and Scaling - LOGGER.debug "Scaling ..." data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) (0..nr_features-1).each { |i| - column_view = data_matrix.col(i) - OpenTox::Algorithm::Transform::AutoScale.new(column_view) + autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(data_matrix.col(i)) + data_matrix.col(i)[0..nr_cases-1] = autoscaler.values } # Principal Components Analysis - LOGGER.debug "PCA ..." data_matrix_hash = Hash.new (0..nr_features-1).each { |i| column_view = data_matrix.col(i) @@ -242,16 +242,14 @@ module OpenTox nr_cases, nr_features = get_sizes n_prop # Normalizing along each Principal Component - LOGGER.debug "Normalizing..." - data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) - (0..nr_features-1).each { |i| - column_view = data_matrix.col(i) - normalizer = OpenTox::Algorithm::Transform::Log10.new(column_view.to_a) - column_view = normalizer.values.to_gv - } + #data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) + #(0..nr_features-1).each { |i| + # normalizer = OpenTox::Algorithm::Transform::Log10.new(data_matrix.col(i).to_a) + # data_matrix.col(i)[0..nr_cases-1] = normalizer.values + #} # Mangle data - q_prop = n_prop.pop + q_prop = n_prop.pop # detach query instance nr_cases, nr_features = get_sizes n_prop n_prop.flatten! y_x_rel = nr_cases.to_f / nr_features @@ -271,6 +269,8 @@ module OpenTox c, cov, chisq, status = GSL::MultiFit::linear(prop_matrix, y, work) LOGGER.debug "Predicting ..." prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0] + transformer = eval "#{transform[:class]}.new ([#{prediction}], #{transform[:offset]})" + prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors @@ -556,9 +556,9 @@ module OpenTox case args.size when 1 begin - @values=args[0] + values=args[0] raise "Cannot transform, values empty." if @values.size==0 - @values.collect! { |v| -1.0 * v } + @values = values.collect { |v| -1.0 * v } @offset = 1.0 - @values.minmax[0] @offset = -1.0 * @offset if @offset>0.0 @values.collect! { |v| v - @offset } # slide >1 @@ -584,11 +584,11 @@ module OpenTox case args.size when 1 begin - @values=args[0] - raise "Cannot transform, values empty." if @values.size==0 - @offset = @values.minmax[0] + values=args[0] + raise "Cannot transform, values empty." if values.size==0 + @offset = values.minmax[0] @offset = -1.0 * @offset if @offset>0.0 - @values.collect! { |v| v - @offset } # slide > anchor + @values = values.collect { |v| v - @offset } # slide > anchor @values.collect! { |v| v + @distance_to_zero } # @values.collect! { |v| Math::log10 v } # log10 (can fail) rescue Exception => e @@ -628,7 +628,7 @@ module OpenTox # compression cutoff @0.9 @eigenvectors_selected = Array.new pca.eigenvectors.each_with_index { |ev, i| - if (eigenvalue_sums[i] <= (0.9*dataset.fields.size)) || (@eigenvectors_selected.size == 0) + if (eigenvalue_sums[i] <= (0.95*dataset.fields.size)) || (@eigenvectors_selected.size == 0) @eigenvectors_selected << ev.to_a end } -- cgit v1.2.3 From 68178cc26e32d37a32cc0b67cabd93b7271c6c97 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 12 Jul 2011 12:31:47 +0200 Subject: PCA v5 --- lib/algorithm.rb | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index f61628d..e5e7d4d 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -223,12 +223,13 @@ module OpenTox LOGGER.debug "PCA..." - # Centering and Scaling data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) - (0..nr_features-1).each { |i| - autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(data_matrix.col(i)) - data_matrix.col(i)[0..nr_cases-1] = autoscaler.values - } + + # Centering and Scaling + #(0..nr_features-1).each { |i| + # autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(data_matrix.col(i)) + # data_matrix.col(i)[0..nr_cases-1] = autoscaler.values + #} # Principal Components Analysis data_matrix_hash = Hash.new @@ -239,9 +240,9 @@ module OpenTox dataset_hash = data_matrix_hash.to_dataset pca = OpenTox::Algorithm::Transform::PCA.new(dataset_hash) n_prop = pca.dataset_transformed_matrix.transpose.to_a - nr_cases, nr_features = get_sizes n_prop - # Normalizing along each Principal Component + ## Normalizing along each Principal Component + #nr_cases, nr_features = get_sizes n_prop #data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) #(0..nr_features-1).each { |i| # normalizer = OpenTox::Algorithm::Transform::Log10.new(data_matrix.col(i).to_a) @@ -587,6 +588,7 @@ module OpenTox values=args[0] raise "Cannot transform, values empty." if values.size==0 @offset = values.minmax[0] + puts @offset @offset = -1.0 * @offset if @offset>0.0 @values = values.collect { |v| v - @offset } # slide > anchor @values.collect! { |v| v + @distance_to_zero } # -- cgit v1.2.3 From bd0bd9a1f8cd6daaa4ec387c2dbdf40738cd69c6 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 12 Jul 2011 14:27:56 +0200 Subject: Removed Debug --- lib/algorithm.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index e5e7d4d..a22798e 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -209,7 +209,6 @@ module OpenTox begin acts = neighbors.collect do |n| - puts n.to_yaml act = n[:activity] act.to_f end # activities of neighbors for supervised learning -- cgit v1.2.3 From 733fe6dddbd427589b91eccace7a13d75c8c761a Mon Sep 17 00:00:00 2001 From: dv Date: Tue, 12 Jul 2011 14:32:31 +0200 Subject: merged to recent dev branch --- Rakefile | 1 + lib/algorithm.rb | 196 +++++++++++++++++++++++++++++++++------------------- lib/model.rb | 8 +-- lib/opentox-ruby.rb | 2 +- lib/serializer.rb | 4 ++ 5 files changed, 136 insertions(+), 75 deletions(-) diff --git a/Rakefile b/Rakefile index 834e0a3..bd22c16 100644 --- a/Rakefile +++ b/Rakefile @@ -43,6 +43,7 @@ begin gem.add_dependency "dm-validations", "=1.1.0" gem.add_dependency "dm-sqlite-adapter", "=1.1.0" gem.add_dependency "ruby-plot", "=0.5.0" + gem.add_dependency "gsl", "=1.14.7" gem.add_development_dependency 'jeweler' gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore'] diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 2b6086b..a50d568 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -208,6 +208,75 @@ module OpenTox module Neighbors + # Local multi-linear regression (MLR) prediction from neighbors. + # Uses propositionalized setting. + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @return [Numeric] A prediction value. + def self.local_mlr_prop(neighbors, params, props) + + take_logs=true + + neighbors.each do |n| + if (! n[:activity].nil?) && (n[:activity].to_f < 0.0) + take_logs = false + end + end + + acts = neighbors.collect do |n| + act = n[:activity] + take_logs ? Math.log10(act.to_f) : act.to_f + end # activities of neighbors for supervised learning + + + begin + + LOGGER.debug "Local MLR (Propositionalization / GSL)." + n_prop = props[0] # is a matrix, i.e. two nested Arrays. + q_prop = props[1] # is an Array. + n_prop_x_size = n_prop[0].size + n_prop_y_size = n_prop.size + + n_prop.flatten! + y_x_rel = n_prop_y_size.to_f / n_prop_x_size + repeat_factor = (1/y_x_rel).ceil + n_prop_tmp = Array.new ; repeat_factor.times { n_prop_tmp.concat n_prop } ; n_prop = n_prop_tmp + acts_tmp = Array.new ; repeat_factor.times { acts_tmp.concat acts } ; acts = acts_tmp + + if n_prop.size == 0 + raise "No neighbors found." + else + begin + LOGGER.debug "Setting GSL data ..." + # set data + prop_matrix = GSL::Matrix[n_prop, n_prop_y_size * repeat_factor, n_prop_x_size] + y = GSL::Vector[acts] + q_prop = GSL::Vector[q_prop] + + # model + support vectors + LOGGER.debug "Creating MLR model ..." + work = GSL::MultiFit::Workspace.alloc(n_prop_y_size * repeat_factor, n_prop_x_size) + c, cov, chisq, status = GSL::MultiFit::linear(prop_matrix, y, work) + LOGGER.debug "Predicting ..." + prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0] + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + end + end + + prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + end + + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + conf = sims.inject{|sum,x| sum + x } + confidence = conf/neighbors.size if neighbors.size > 0 + {:prediction => prediction, :confidence => confidence} + end + # Classification with majority vote from neighbors weighted by similarity # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity` # @param [optional] params Ignored (only for compatibility with local_svm_regression) @@ -336,67 +405,67 @@ module OpenTox # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Numeric] A prediction value. def self.local_svm(neighbors, acts, sims, type, params) - LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." - neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches - gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel - if neighbor_matches.size == 0 - raise "No neighbors found." - else - # gram matrix - (0..(neighbor_matches.length-1)).each do |i| - gram_matrix[i] = [] unless gram_matrix[i] - # upper triangle - ((i+1)..(neighbor_matches.length-1)).each do |j| - sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") - gram_matrix[i][j] = Algorithm.gauss(sim) - gram_matrix[j] = [] unless gram_matrix[j] - gram_matrix[j][i] = gram_matrix[i][j] # lower triangle - end - gram_matrix[i][i] = 1.0 + LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." + neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches + gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + if neighbor_matches.size == 0 + raise "No neighbors found." + else + # gram matrix + (0..(neighbor_matches.length-1)).each do |i| + gram_matrix[i] = [] unless gram_matrix[i] + # upper triangle + ((i+1)..(neighbor_matches.length-1)).each do |j| + sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + gram_matrix[i][j] = Algorithm.gauss(sim) + gram_matrix[j] = [] unless gram_matrix[j] + gram_matrix[j][i] = gram_matrix[i][j] # lower triangle end + gram_matrix[i][i] = 1.0 + end - #LOGGER.debug gram_matrix.to_yaml - @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests - @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed - LOGGER.debug "Setting R data ..." - # set data - @r.gram_matrix = gram_matrix.flatten - @r.n = neighbor_matches.size - @r.y = acts - @r.sims = sims + #LOGGER.debug gram_matrix.to_yaml + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.gram_matrix = gram_matrix.flatten + @r.n = neighbor_matches.size + @r.y = acts + @r.sims = sims - begin - LOGGER.debug "Preparing R data ..." - # prepare data - @r.eval "y<-as.vector(y)" - @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" - @r.eval "sims<-as.vector(sims)" - - # model + support vectors - LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)" - @r.eval "sv<-as.vector(SVindex(model))" - @r.eval "sims<-sims[sv]" - @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" - LOGGER.debug "Predicting ..." - if type == "nu-svr" - @r.eval "p<-predict(model,sims)[1,1]" - elsif type == "C-bsvc" - @r.eval "p<-predict(model,sims)" - end - if type == "nu-svr" - prediction = @r.p - elsif type == "C-bsvc" - #prediction = (@r.p.to_f == 1.0 ? true : false) - prediction = @r.p - end - @r.quit # free R - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + begin + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-as.vector(y)" + @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" + @r.eval "sims<-as.vector(sims)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)" + @r.eval "sv<-as.vector(SVindex(model))" + @r.eval "sims<-sims[sv]" + @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" + LOGGER.debug "Predicting ..." + if type == "nu-svr" + @r.eval "p<-predict(model,sims)[1,1]" + elsif type == "C-bsvc" + @r.eval "p<-predict(model,sims)" end - + if type == "nu-svr" + prediction = @r.p + elsif type == "C-bsvc" + #prediction = (@r.p.to_f == 1.0 ? true : false) + prediction = @r.p + end + @r.quit # free R + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" end - prediction + + end + prediction end # Local support vector prediction from neighbors. @@ -419,19 +488,6 @@ module OpenTox if n_prop.size == 0 raise "No neighbors found." else - # gram matrix - #(0..(neighbor_matches.length-1)).each do |i| - # gram_matrix[i] = [] unless gram_matrix[i] - # # upper triangle - # ((i+1)..(neighbor_matches.length-1)).each do |j| - # sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") - # gram_matrix[i][j] = Algorithm.gauss(sim) - # gram_matrix[j] = [] unless gram_matrix[j] - # gram_matrix[j][i] = gram_matrix[i][j] # lower triangle - # end - # gram_matrix[i][i] = 1.0 - #end - #LOGGER.debug gram_matrix.to_yaml @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed @@ -575,7 +631,7 @@ module OpenTox Math.sqrt(variance) end end - Array.send :include, Variance + Array.send :include, Variance end end diff --git a/lib/model.rb b/lib/model.rb index 5eec366..825f697 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -136,10 +136,10 @@ module OpenTox # Create a new lazar model # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar) # @return [OpenTox::Model::Lazar] lazar model - def self.create(params) + def self.create(params, waiting_task=nil ) subjectid = params[:subjectid] lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar") - model_uri = lazar_algorithm.run(params) + model_uri = lazar_algorithm.run(params, waiting_task) OpenTox::Model::Lazar.find(model_uri, subjectid) end @@ -253,7 +253,7 @@ module OpenTox (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - if @prop_kernel && @prediction_algorithm.include?("svm") + if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") ) props = get_props else props = nil @@ -276,7 +276,7 @@ module OpenTox else # AM: no balancing or regression LOGGER.info "LAZAR: Unbalanced." neighbors - if @prop_kernel && @prediction_algorithm.include?("svm") + if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") ) props = get_props else props = nil diff --git a/lib/opentox-ruby.rb b/lib/opentox-ruby.rb index ae05cb2..1fa2a86 100644 --- a/lib/opentox-ruby.rb +++ b/lib/opentox-ruby.rb @@ -1,4 +1,4 @@ -['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment'].each do |lib| +['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment', 'gsl'].each do |lib| require lib end diff --git a/lib/serializer.rb b/lib/serializer.rb index 5a9fd0a..03dcf1f 100644 --- a/lib/serializer.rb +++ b/lib/serializer.rb @@ -383,6 +383,8 @@ module OpenTox XSD.boolean elsif value.is_a? Float XSD.float + elsif value.is_a? Integer + XSD.integer else XSD.string end @@ -393,6 +395,8 @@ module OpenTox datatype = OT.NominalFeature elsif value.is_a? Float datatype = OT.NumericFeature + elsif value.is_a? Integer + datatype = OT.NumericFeature else datatype = OT.StringFeature end -- cgit v1.2.3 From c1b7c18b899c9f3b2aa57979ed620a25141c5508 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 13 Jul 2011 08:49:11 +0200 Subject: Fixed statsample versions --- Rakefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Rakefile b/Rakefile index 602f0f4..00f013e 100644 --- a/Rakefile +++ b/Rakefile @@ -44,8 +44,8 @@ begin gem.add_dependency "dm-sqlite-adapter", "=1.1.0" gem.add_dependency "ruby-plot", "=0.5.0" gem.add_dependency "gsl", "=1.14.7" - gem.add_dependency "statsample", "=2.1.0" - gem.add_dependency "statsample-optimization", "=1.1.0" + gem.add_dependency "statsample", "=1.1.0" + gem.add_dependency "statsample-optimization", "=2.1.0" gem.add_development_dependency 'jeweler' gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore'] -- cgit v1.2.3 From 64114c2a4d29dd305bb26af5d616b29dd5681aaa Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 13 Jul 2011 12:25:46 +0200 Subject: gem homepage fixed --- Rakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Rakefile b/Rakefile index bd22c16..f54e23e 100644 --- a/Rakefile +++ b/Rakefile @@ -8,7 +8,7 @@ begin gem.summary = %Q{Ruby wrapper for the OpenTox REST API} gem.description = %Q{Ruby wrapper for the OpenTox REST API (http://www.opentox.org)} gem.email = "helma@in-silico.ch" - gem.homepage = "http://github.com/helma/opentox-ruby" + gem.homepage = "http://github.com/opentox/opentox-ruby" gem.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler"] # dependencies with versions gem.add_dependency "sinatra", "=1.2.6" -- cgit v1.2.3 From 8c203f7d8936502dff646d160e03e64aee7246ec Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 13 Jul 2011 16:26:24 +0200 Subject: removing summary function --- lib/validation.rb | 36 +++--------------------------------- 1 file changed, 3 insertions(+), 33 deletions(-) diff --git a/lib/validation.rb b/lib/validation.rb index 3e8367c..b045264 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -84,36 +84,6 @@ module OpenTox def load_metadata( subjectid=nil ) @metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid, :accept => "application/x-yaml"})) end - - # PENDING: creates summary as used for ToxCreate - def summary - if @metadata[OT.classificationStatistics] - res = { - :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i, - :correct_predictions => @metadata[OT.classificationStatistics][OT.percentCorrect], - :weighted_area_under_roc => @metadata[OT.classificationStatistics][OT.weightedAreaUnderRoc], - } - @metadata[OT.classificationStatistics][OT.classValueStatistics].each do |s| - if s[OT.classValue].to_s=="true" - res[:true_positives] = s[OT.numTruePositives] - res[:false_positives] = s[OT.numFalsePositives] - res[:true_negatives] = s[OT.numTrueNegatives] - res[:false_negatives] = s[OT.numFalseNegatives] - res[:sensitivity] = s[OT.truePositiveRate] - res[:specificity] = s[OT.trueNegativeRate] - break - end - end - res - elsif @metadata[OT.regressionStatistics] - { - :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i, - :r_square => @metadata[OT.regressionStatistics][OT.rSquare], - :root_mean_squared_error => @metadata[OT.regressionStatistics][OT.rootMeanSquaredError], - :mean_absolute_error => @metadata[OT.regressionStatistics][OT.meanAbsoluteError], - } - end - end end class Crossvalidation @@ -171,9 +141,9 @@ module OpenTox @metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid, :accept => "application/x-yaml"})) end - # PENDING: creates summary as used for ToxCreate - def summary( subjectid=nil ) - Validation.from_cv_statistics( @uri, subjectid ).summary + # returns a Validation object containing the statistics of the crossavlidation + def statistics( subjectid=nil ) + Validation.from_cv_statistics( @uri, subjectid ) end end -- cgit v1.2.3 From 7cc52634b2ee84ca98be16ef548de8a6a8215f90 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 13 Jul 2011 18:07:01 +0200 Subject: providing confusion matrix table - stub --- lib/validation.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/validation.rb b/lib/validation.rb index b045264..6060504 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -84,6 +84,10 @@ module OpenTox def load_metadata( subjectid=nil ) @metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid, :accept => "application/x-yaml"})) end + + def confusion_matrix + [[nil,"true","false","osterhase"],["true",1,2,3],["false",2,3,4],["osterhase",5,6,7]] + end end class Crossvalidation -- cgit v1.2.3 From 8087ce3c03cdfda26ab9215e64f655c124dc413b Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 13 Jul 2011 18:35:46 +0200 Subject: providing confusion matrix table --- lib/validation.rb | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/lib/validation.rb b/lib/validation.rb index 6060504..646b076 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -85,8 +85,27 @@ module OpenTox @metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid, :accept => "application/x-yaml"})) end + # returns confusion matrix as array, predicted values are in rows + # example: + # [[nil,"active","moderate","inactive"],["active",1,3,99],["moderate",4,2,8],["inactive",3,8,6]] + # -> 99 inactive compounds have been predicted as active def confusion_matrix - [[nil,"true","false","osterhase"],["true",1,2,3],["false",2,3,4],["osterhase",5,6,7]] + raise "no classification statistics, probably a regression valdiation" unless @metadata[OT.classificationStatistics] + matrix = @metadata[OT.classificationStatistics][OT.confusionMatrix][OT.confusionMatrixCell] + values = matrix.collect{|cell| cell[OT.confusionMatrixPredicted]}.uniq + table = [[nil]+values] + values.each do |c| + table << [c] + values.each do |r| + matrix.each do |cell| + if cell[OT.confusionMatrixPredicted]==c and cell[OT.confusionMatrixActual]==r + table[-1] << cell[OT.confusionMatrixValue].to_f + break + end + end + end + end + table end end -- cgit v1.2.3 From 47c28bbc74e40c39a9324f88f93f921bfc2b03fa Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 14 Jul 2011 09:45:50 +0200 Subject: PCA fixed and tested --- lib/algorithm.rb | 191 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 134 insertions(+), 57 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index a22798e..beadf84 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -208,6 +208,9 @@ module OpenTox raise "No neighbors found." unless neighbors.size>0 begin + weights = neighbors.collect do |n| + Algorithm.gauss(n[:similarity]) + end acts = neighbors.collect do |n| act = n[:activity] act.to_f @@ -216,57 +219,50 @@ module OpenTox LOGGER.debug "Local MLR (Propositionalization / GSL)." n_prop = props[0] # is a matrix, i.e. two nested Arrays. q_prop = props[1] # is an Array. - + n_prop = n_prop << q_prop # attach q_prop nr_cases, nr_features = get_sizes n_prop - LOGGER.debug "PCA..." data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) + # GSL matrix operations: + # to_a : row-wise conversion to nested array + # Statsample operations (build on GSL): + # to_scale: convert into Statsample format - # Centering and Scaling - #(0..nr_features-1).each { |i| - # autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(data_matrix.col(i)) - # data_matrix.col(i)[0..nr_cases-1] = autoscaler.values - #} # Principal Components Analysis - data_matrix_hash = Hash.new - (0..nr_features-1).each { |i| - column_view = data_matrix.col(i) - data_matrix_hash[i] = column_view.to_scale - } - dataset_hash = data_matrix_hash.to_dataset - pca = OpenTox::Algorithm::Transform::PCA.new(dataset_hash) + pca = OpenTox::Algorithm::Transform::PCA.new(data_matrix) n_prop = pca.dataset_transformed_matrix.transpose.to_a ## Normalizing along each Principal Component - #nr_cases, nr_features = get_sizes n_prop #data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) #(0..nr_features-1).each { |i| # normalizer = OpenTox::Algorithm::Transform::Log10.new(data_matrix.col(i).to_a) # data_matrix.col(i)[0..nr_cases-1] = normalizer.values #} - # Mangle data + # attach intercept column + #nr_cases, nr_features = get_sizes n_prop + #data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) + #intercept = GSL::Matrix.alloc(Array.new(nr_cases,1.0),nr_cases,1) + #data_matrix = data_matrix.horzcat(intercept) + #n_prop = data_matrix.to_a + + + # set data q_prop = n_prop.pop # detach query instance nr_cases, nr_features = get_sizes n_prop n_prop.flatten! - y_x_rel = nr_cases.to_f / nr_features - repeat_factor = (1/y_x_rel).ceil - n_prop_tmp = Array.new ; repeat_factor.times { n_prop_tmp.concat n_prop } ; n_prop = n_prop_tmp - acts_tmp = Array.new ; repeat_factor.times { acts_tmp.concat acts } ; acts = acts_tmp - - # set data - LOGGER.debug "Setting prop data ..." - prop_matrix = GSL::Matrix[n_prop, nr_cases * repeat_factor, nr_features] - y = GSL::Vector[acts] - q_prop = GSL::Vector[q_prop] + prop_matrix = GSL::Matrix.alloc(n_prop, nr_cases, nr_features) + y = GSL::Vector.alloc(acts) + w = GSL::Vector.alloc(weights) + q_prop = GSL::Vector.alloc(q_prop) # model + support vectors LOGGER.debug "Creating MLR model ..." - work = GSL::MultiFit::Workspace.alloc(nr_cases * repeat_factor, nr_features) - c, cov, chisq, status = GSL::MultiFit::linear(prop_matrix, y, work) +# work = GSL::MultiFit::Workspace.alloc(nr_cases * repeat_factor, nr_features) + c, cov, chisq, status = GSL::MultiFit::wlinear(prop_matrix, w, y) LOGGER.debug "Predicting ..." prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0] transformer = eval "#{transform[:class]}.new ([#{prediction}], #{transform[:offset]})" @@ -280,7 +276,7 @@ module OpenTox rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" - LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + #LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end @@ -513,7 +509,7 @@ module OpenTox prediction end - # Get X and Y size of a nested Array + # Get X and Y size of a nested Array (Matrix) def self.get_sizes(matrix) begin nr_cases = matrix.size @@ -580,14 +576,13 @@ module OpenTox attr_accessor :offset, :values def initialize *args - @distance_to_zero = 0.000001 + @distance_to_zero = 0.000000001 # 1 / 1 billion case args.size when 1 begin values=args[0] raise "Cannot transform, values empty." if values.size==0 @offset = values.minmax[0] - puts @offset @offset = -1.0 * @offset if @offset>0.0 @values = values.collect { |v| v - @offset } # slide > anchor @values.collect! { |v| v + @distance_to_zero } # @@ -605,38 +600,120 @@ module OpenTox end end - class AutoScale # center on mean and divide by stdev - attr_accessor :values + # The transformer that does nothing. + class NOP + attr_accessor :offset, :values + def initialize *args + @offset = 0.0 + @distance_to_zero = 0.0 + case args.size + when 1 + begin + values=args[0] + raise "Cannot transform, values empty." if values.size==0 + @offset = values.minmax[0] + @offset = -1.0 * @offset if @offset>0.0 + @values = values.collect { |v| v - @offset } # slide > anchor + @values.collect! { |v| v + @distance_to_zero } # + @values.collect! { |v| Math::log10 v } # log10 (can fail) + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + when 2 + @offset = args[1].to_f + @values = args[0].collect { |v| 10**v } + @values.collect! { |v| v - @distance_to_zero } + @values.collect! { |v| v + @offset } + end + end + end + + + # Auto-Scaler for Arrays + # Center on mean and divide by standard deviation + class AutoScale + attr_accessor :scaled_values, :mean, :stdev def initialize values - mean = values.to_scale.mean - stdev = values.to_scale.standard_deviation_sample - @values = values.collect{|vi| vi - mean } - @values.collect! {|vi| vi / stdev } + @scaled_values = values + @mean = @scaled_values.to_scale.mean + @stdev = @scaled_values.to_scale.standard_deviation_sample + @scaled_values = @scaled_values.collect {|vi| vi - @mean } + @scaled_values.collect! {|vi| vi / @stdev } end end + # Principal Components Analysis + # Statsample Library (http://ruby-statsample.rubyforge.org/) by C. Bustos class PCA - attr_accessor :dataset_transformed_matrix - - def initialize dataset - @cor_matrix=Statsample::Bivariate.correlation_matrix(dataset) - pca=Statsample::Factor::PCA.new(@cor_matrix) - eigenvalue_sums = Array.new - (0..dataset.fields.size-1).each { |i| - eigenvalue_sums << pca.eigenvalues[0..i].inject{ |sum, ev| sum + ev } - } - # compression cutoff @0.9 - @eigenvectors_selected = Array.new - pca.eigenvectors.each_with_index { |ev, i| - if (eigenvalue_sums[i] <= (0.95*dataset.fields.size)) || (@eigenvectors_selected.size == 0) - @eigenvectors_selected << ev.to_a - end - } - eigenvector_matrix = GSL::Matrix.alloc(@eigenvectors_selected.flatten, dataset.fields.size, @eigenvectors_selected.size).transpose - dataset_matrix = dataset.to_gsl.transpose - @dataset_transformed_matrix = eigenvector_matrix * dataset_matrix # dataset_transformed_matrix is in row-wise notation now + attr_accessor :data_matrix, :data_transformed_matrix, :eigenvector_matrix, :eigenvalue_sums, :autoscaler + + # Creates a transformed dataset as GSL::Matrix. + # @param [GSL::Matrix] Data matrix. + # @param [Float] Compression ratio from [0,1]. + # @return [GSL::Matrix] Data transformed matrix. + def initialize data_matrix, compression=0.05 + begin + @data_matrix = data_matrix + @data_matrix_scaled = GSL::Matrix.alloc(@data_matrix.size1, @data_matrix.size2) + @compression = compression.to_f + @stdev = Array.new + @mean = Array.new + + # Scaling of Axes + (0..@data_matrix.size2-1).each { |i| + @autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(@data_matrix.col(i)) + @data_matrix_scaled.col(i)[0..@data_matrix.size1-1] = @autoscaler.scaled_values + @stdev << @autoscaler.stdev + @mean << @autoscaler.mean + } + + data_matrix_hash = Hash.new + (0..@data_matrix.size2-1).each { |i| + column_view = @data_matrix_scaled.col(i) + data_matrix_hash[i] = column_view.to_scale + } + dataset_hash = data_matrix_hash.to_dataset # see http://goo.gl/7XcW9 + cor_matrix=Statsample::Bivariate.correlation_matrix(dataset_hash) + pca=Statsample::Factor::PCA.new(cor_matrix) + pca.eigenvalues.each { |ev| raise "PCA failed!" unless !ev.nan? } + @eigenvalue_sums = Array.new + (0..dataset_hash.fields.size-1).each { |i| + @eigenvalue_sums << pca.eigenvalues[0..i].inject{ |sum, ev| sum + ev } + } + eigenvectors_selected = Array.new + pca.eigenvectors.each_with_index { |ev, i| + if (@eigenvalue_sums[i] <= ((1.0-@compression)*dataset_hash.fields.size)) || (eigenvectors_selected.size == 0) + eigenvectors_selected << ev.to_a + end + } + @eigenvector_matrix = GSL::Matrix.alloc(eigenvectors_selected.flatten, eigenvectors_selected.size, dataset_hash.fields.size).transpose + dataset_matrix = dataset_hash.to_gsl.transpose + @data_transformed_matrix = (@eigenvector_matrix.transpose * dataset_matrix).transpose + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end end + + # Restores data in the original feature space (possibly with compression loss). + # @return [GSL::Matrix] Data matrix. + def restore + begin + data_matrix_restored = (@eigenvector_matrix * @data_transformed_matrix.transpose).transpose # reverse pca + # reverse scaling + (0..data_matrix_restored.size2-1).each { |i| + data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] *= @stdev[i] + data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] += @mean[i] + } + data_matrix_restored + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + end + end end -- cgit v1.2.3 From 5517fb997ba94132449bc9ad0d2192737a9d193d Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 14 Jul 2011 10:43:56 +0200 Subject: Added Transform support in model --- lib/algorithm.rb | 73 ++++++++++++++++++++++++-------------------------------- lib/model.rb | 2 +- 2 files changed, 32 insertions(+), 43 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index beadf84..74b1bdd 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -204,6 +204,10 @@ module OpenTox # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Numeric] A prediction value. def self.local_mlr_prop(neighbors, params, props, transform=nil) + # GSL matrix operations: + # to_a : row-wise conversion to nested array + # Statsample operations (build on GSL): + # to_scale: convert into Statsample format raise "No neighbors found." unless neighbors.size>0 begin @@ -222,18 +226,12 @@ module OpenTox n_prop = n_prop << q_prop # attach q_prop nr_cases, nr_features = get_sizes n_prop - - LOGGER.debug "PCA..." data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) - # GSL matrix operations: - # to_a : row-wise conversion to nested array - # Statsample operations (build on GSL): - # to_scale: convert into Statsample format - # Principal Components Analysis + LOGGER.debug "PCA..." pca = OpenTox::Algorithm::Transform::PCA.new(data_matrix) - n_prop = pca.dataset_transformed_matrix.transpose.to_a + data_matrix = pca.data_transformed_matrix ## Normalizing along each Principal Component #data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) @@ -242,28 +240,22 @@ module OpenTox # data_matrix.col(i)[0..nr_cases-1] = normalizer.values #} - # attach intercept column - #nr_cases, nr_features = get_sizes n_prop - #data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) - #intercept = GSL::Matrix.alloc(Array.new(nr_cases,1.0),nr_cases,1) - #data_matrix = data_matrix.horzcat(intercept) - #n_prop = data_matrix.to_a - + # Attach intercept column to data + intercept = GSL::Matrix.alloc(Array.new(nr_cases,1.0),nr_cases,1) + data_matrix = data_matrix.horzcat(intercept) - # set data - q_prop = n_prop.pop # detach query instance + # detach query instance + n_prop = data_matrix.to_a + q_prop = n_prop.pop nr_cases, nr_features = get_sizes n_prop - n_prop.flatten! - prop_matrix = GSL::Matrix.alloc(n_prop, nr_cases, nr_features) + data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) y = GSL::Vector.alloc(acts) w = GSL::Vector.alloc(weights) q_prop = GSL::Vector.alloc(q_prop) # model + support vectors LOGGER.debug "Creating MLR model ..." -# work = GSL::MultiFit::Workspace.alloc(nr_cases * repeat_factor, nr_features) - c, cov, chisq, status = GSL::MultiFit::wlinear(prop_matrix, w, y) - LOGGER.debug "Predicting ..." + c, cov, chisq, status = GSL::MultiFit::wlinear(data_matrix, w, y) prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0] transformer = eval "#{transform[:class]}.new ([#{prediction}], #{transform[:offset]})" prediction = transformer.values[0] @@ -276,7 +268,6 @@ module OpenTox rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" - #LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end @@ -545,9 +536,13 @@ module OpenTox module Transform include Algorithm - class Inverter # to improve normality conditions on a vector + # The transformer that inverts values. + # 1/x is used, after values have been moved >= 1. + class Inverter attr_accessor :offset, :values + # @params[Array] Values to transform. + # @params[Float] Offset for restore. def initialize *args case args.size when 1 @@ -572,9 +567,13 @@ module OpenTox end end - class Log10 # to improve normality conditions on a vector + # The transformer that takes logs. + # Log10 is used, after values have been moved > 0. + class Log10 attr_accessor :offset, :values + # @params[Array] Values to transform / restore. + # @params[Float] Offset for restore. def initialize *args @distance_to_zero = 0.000000001 # 1 / 1 billion case args.size @@ -600,32 +599,20 @@ module OpenTox end end - # The transformer that does nothing. + # The transformer that does nothing (No OPeration). class NOP attr_accessor :offset, :values + # @params[Array] Values to transform / restore. + # @params[Float] Offset for restore. def initialize *args @offset = 0.0 @distance_to_zero = 0.0 case args.size when 1 - begin - values=args[0] - raise "Cannot transform, values empty." if values.size==0 - @offset = values.minmax[0] - @offset = -1.0 * @offset if @offset>0.0 - @values = values.collect { |v| v - @offset } # slide > anchor - @values.collect! { |v| v + @distance_to_zero } # - @values.collect! { |v| Math::log10 v } # log10 (can fail) - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message}" - LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" - end + @values = args[0] when 2 - @offset = args[1].to_f - @values = args[0].collect { |v| 10**v } - @values.collect! { |v| v - @distance_to_zero } - @values.collect! { |v| v + @offset } + @values = args[0] end end end @@ -635,6 +622,8 @@ module OpenTox # Center on mean and divide by standard deviation class AutoScale attr_accessor :scaled_values, :mean, :stdev + + # @params[Array] Values to transform. def initialize values @scaled_values = values @mean = @scaled_values.to_scale.mean diff --git a/lib/model.rb b/lib/model.rb index 6a64c53..9a5dc60 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -117,7 +117,7 @@ module OpenTox @min_sim = 0.3 @prop_kernel = false @balanced = false - @transform = nil + @transform = { "class" => "NOP" } end -- cgit v1.2.3 From 145a7fb2b2f22796859c10ef58992bbbf2dcc2c1 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 14 Jul 2011 11:45:21 +0200 Subject: Minor fixe --- lib/algorithm.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 74b1bdd..e9d646c 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -257,7 +257,7 @@ module OpenTox LOGGER.debug "Creating MLR model ..." c, cov, chisq, status = GSL::MultiFit::wlinear(data_matrix, w, y) prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0] - transformer = eval "#{transform[:class]}.new ([#{prediction}], #{transform[:offset]})" + transformer = eval "OpenTox::Algorithm::Transform::#{transform["class"]}.new ([#{prediction}], #{transform["offset"]})" prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." @@ -325,7 +325,7 @@ module OpenTox acts = neighbors.collect{ |n| n[:activity].to_f } sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr", params)) - transformer = eval "#{transform[:class]}.new ([#{prediction}], #{transform[:offset]})" + transformer = eval "OpenTox::Algorithm::Transform::#{transform["class"]}.new ([#{prediction}], #{transform["offset"]})" prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." conf = sims.inject{|sum,x| sum + x } -- cgit v1.2.3 From 3baf0d066cf984e3f80be7559e0a451bf4f59c40 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 14 Jul 2011 14:39:15 +0200 Subject: Removed balancing --- lib/model.rb | 91 ++++++------------------------------------------------------ 1 file changed, 8 insertions(+), 83 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index 3dac63d..c25e55f 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -91,7 +91,7 @@ module OpenTox include Algorithm include Model - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :balanced, :transform + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :transform def initialize(uri=nil) @@ -116,7 +116,6 @@ module OpenTox @min_sim = 0.3 @prop_kernel = false - @balanced = false @transform = { "class" => "NOP" } end @@ -213,77 +212,14 @@ module OpenTox unless database_activity(subjectid) # adds database activity to @prediction_dataset - if @balanced && OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification" - # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar - l = Array.new # larger - s = Array.new # smaller fraction - - raise "no fingerprints in model" if @fingerprints.size==0 - - @fingerprints.each do |training_compound,training_features| - @activities[training_compound].each do |act| - case act.to_s - when "0" - l << training_compound - when "1" - s << training_compound - else - LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached (supports only two classes)." - end - end - end - if s.size > l.size then - l,s = s,l # happy swapping - LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}." - end - # determine ratio - modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest - LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}." - - # AM: Balanced predictions - addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round - slack = (addon!=0 ? modulo[1].divmod(addon)[1] : 0) # what remains for the last round - position = 0 - predictions = Array.new - - prediction_best=nil - neighbors_best=nil - - begin - for i in 1..modulo[0] do - (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction - LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." - neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") ) - props = get_props - else - props = nil - end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props)") - if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs - prediction_best=prediction - neighbors_best=@neighbors - end - position = position + lr_size - end - rescue Exception => e - LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message - end - - prediction=prediction_best - @neighbors=neighbors_best - ### END AM balanced predictions - - else # AM: no balancing or regression - LOGGER.info "LAZAR: Unbalanced." - neighbors - if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") ) - props = get_props - else - props = nil - end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props, @transform)") + neighbors + if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") ) + props = get_props + else + props = nil end + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props, @transform)") + value_feature_uri = File.join( @uri, "predicted", "value") confidence_feature_uri = File.join( @uri, "predicted", "confidence") @@ -384,17 +320,6 @@ module OpenTox [ matrix, row ] end - # Find neighbors and store them as object variable, access only a subset of compounds for that. - def neighbors_balanced(s, l, start, offset) - @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm - @neighbors = [] - [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset - training_features = @fingerprints[training_compound] - add_neighbor training_features, training_compound - end - - end - # Find neighbors and store them as object variable, access all compounds for that. def neighbors @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm -- cgit v1.2.3 From 8c19e4c773ef31514aff46b057b24c8cec1498ed Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 14 Jul 2011 15:09:07 +0200 Subject: Cleanup --- lib/algorithm.rb | 28 ++++++---------------------- lib/model.rb | 8 ++------ 2 files changed, 8 insertions(+), 28 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index e9d646c..2986569 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -212,13 +212,8 @@ module OpenTox raise "No neighbors found." unless neighbors.size>0 begin - weights = neighbors.collect do |n| - Algorithm.gauss(n[:similarity]) - end - acts = neighbors.collect do |n| - act = n[:activity] - act.to_f - end # activities of neighbors for supervised learning + acts = neighbors.collect { |n| act = n[:activity].to_f } + sims = neighbors.collect { |n| Algorithm.gauss(n[:similarity]) } LOGGER.debug "Local MLR (Propositionalization / GSL)." n_prop = props[0] # is a matrix, i.e. two nested Arrays. @@ -233,30 +228,20 @@ module OpenTox pca = OpenTox::Algorithm::Transform::PCA.new(data_matrix) data_matrix = pca.data_transformed_matrix - ## Normalizing along each Principal Component - #data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) - #(0..nr_features-1).each { |i| - # normalizer = OpenTox::Algorithm::Transform::Log10.new(data_matrix.col(i).to_a) - # data_matrix.col(i)[0..nr_cases-1] = normalizer.values - #} - # Attach intercept column to data intercept = GSL::Matrix.alloc(Array.new(nr_cases,1.0),nr_cases,1) data_matrix = data_matrix.horzcat(intercept) - # detach query instance + # Detach query instance n_prop = data_matrix.to_a q_prop = n_prop.pop nr_cases, nr_features = get_sizes n_prop data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) - y = GSL::Vector.alloc(acts) - w = GSL::Vector.alloc(weights) - q_prop = GSL::Vector.alloc(q_prop) # model + support vectors LOGGER.debug "Creating MLR model ..." - c, cov, chisq, status = GSL::MultiFit::wlinear(data_matrix, w, y) - prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0] + c, cov, chisq, status = GSL::MultiFit::wlinear(data_matrix, sims.to_scale.to_gsl, acts.to_scale.to_gsl) + prediction = GSL::MultiFit::linear_est(q_prop.to_scale.to_gsl, c, cov)[0] transformer = eval "OpenTox::Algorithm::Transform::#{transform["class"]}.new ([#{prediction}], #{transform["offset"]})" prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." @@ -348,9 +333,8 @@ module OpenTox raise "No neighbors found." unless neighbors.size>0 begin acts = neighbors.collect { |n| act = n[:activity] } - acts_f = acts sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, "C-bsvc", params)) + prediction = (props.nil? ? local_svm(neighbors, acts, sims, "C-bsvc", params) : local_svm_prop(props, acts, "C-bsvc", params)) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." conf = sims.inject{|sum,x| sum + x } confidence = conf/neighbors.size if neighbors.size > 0 diff --git a/lib/model.rb b/lib/model.rb index c25e55f..c57ea65 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -213,14 +213,10 @@ module OpenTox unless database_activity(subjectid) # adds database activity to @prediction_dataset neighbors - if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") ) - props = get_props - else - props = nil - end + props=nil + props = get_props if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") ) prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props, @transform)") - value_feature_uri = File.join( @uri, "predicted", "value") confidence_feature_uri = File.join( @uri, "predicted", "confidence") -- cgit v1.2.3 From 9c73b4e330ede897382ac4ae3fc9bf0fbd75b3be Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 15 Jul 2011 10:18:11 +0200 Subject: Unified interface to algorithms --- lib/algorithm.rb | 123 +++++++++++++++++++++++++++++++++---------------------- lib/model.rb | 39 +++++------------- 2 files changed, 85 insertions(+), 77 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 2986569..f6cbbc8 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -199,21 +199,22 @@ module OpenTox # Local multi-linear regression (MLR) prediction from neighbors. # Uses propositionalized setting. - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required # @return [Numeric] A prediction value. - def self.local_mlr_prop(neighbors, params, props, transform=nil) + def self.local_mlr_prop(params) + # GSL matrix operations: # to_a : row-wise conversion to nested array + # # Statsample operations (build on GSL): # to_scale: convert into Statsample format raise "No neighbors found." unless neighbors.size>0 begin - acts = neighbors.collect { |n| act = n[:activity].to_f } - sims = neighbors.collect { |n| Algorithm.gauss(n[:similarity]) } + props = params[:prop_kernel] ? get_props(params) : nil + acts = params[:neighbors].collect { |n| act = n[:activity].to_f } + sims = params[:neighbors].collect { |n| Algorithm.gauss(n[:similarity]) } LOGGER.debug "Local MLR (Propositionalization / GSL)." n_prop = props[0] # is a matrix, i.e. two nested Arrays. @@ -242,13 +243,13 @@ module OpenTox LOGGER.debug "Creating MLR model ..." c, cov, chisq, status = GSL::MultiFit::wlinear(data_matrix, sims.to_scale.to_gsl, acts.to_scale.to_gsl) prediction = GSL::MultiFit::linear_est(q_prop.to_scale.to_gsl, c, cov)[0] - transformer = eval "OpenTox::Algorithm::Transform::#{transform["class"]}.new ([#{prediction}], #{transform["offset"]})" + transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})" prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors conf = sims.inject{|sum,x| sum + x } - confidence = conf/neighbors.size if neighbors.size > 0 + confidence = conf/params[:neighbors].size if params[:neighbors].size > 0 {:prediction => prediction, :confidence => confidence} rescue Exception => e @@ -258,10 +259,10 @@ module OpenTox end # Classification with majority vote from neighbors weighted by similarity - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity` - # @param [optional] params Ignored (only for compatibility with local_svm_regression) - # @return [Hash] Hash with keys `:prediction, :confidence` - def self.weighted_majority_vote(neighbors,params={}, props=nil, transform=nil) + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required + # @return [Numeric] A prediction value. + def self.weighted_majority_vote(params) + neighbor_contribution = 0.0 confidence_sum = 0.0 confidence = 0.0 @@ -269,7 +270,7 @@ module OpenTox positive_map_value= nil negative_map_value= nil - neighbors.each do |neighbor| + params[:neighbors].each do |neighbor| neighbor_weight = Algorithm.gauss(neighbor[:similarity]).to_f neighbor_contribution += neighbor[:activity].to_f * neighbor_weight @@ -287,34 +288,34 @@ module OpenTox if params[:value_map].size == 2 if confidence_sum >= 0.0 - prediction = 2 unless neighbors.size==0 + prediction = 2 unless params[:neighbors].size==0 elsif confidence_sum < 0.0 - prediction = 1 unless neighbors.size==0 + prediction = 1 unless params[:neighbors].size==0 end else - prediction = (neighbor_contribution/confidence_sum).round unless neighbors.size==0 # AM: new multinomial prediction + prediction = (neighbor_contribution/confidence_sum).round unless params[:neighbors].size==0 # AM: new multinomial prediction end - confidence = confidence_sum/neighbors.size if neighbors.size > 0 + confidence = confidence_sum/params[:neighbors].size if params[:neighbors].size > 0 return {:prediction => prediction, :confidence => confidence.abs} end # Local support vector regression from neighbors - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_regression(neighbors, params, props=nil, transform=nil) + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required + # @return [Numeric] A prediction value. + def self.local_svm_regression(params) - raise "No neighbors found." unless neighbors.size>0 + raise "No neighbors found." unless params[:neighbors].size>0 begin - acts = neighbors.collect{ |n| n[:activity].to_f } - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } - prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr", params)) - transformer = eval "OpenTox::Algorithm::Transform::#{transform["class"]}.new ([#{prediction}], #{transform["offset"]})" + props = params[:prop_kernel] ? get_props(params) : nil + acts = params[:neighbors].collect{ |n| n[:activity].to_f } + sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } + prediction = props.nil? ? local_svm(acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr") + transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})" prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." conf = sims.inject{|sum,x| sum + x } - confidence = conf/neighbors.size + confidence = conf/params[:neighbors].size {:prediction => prediction, :confidence => confidence} rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" @@ -324,20 +325,19 @@ module OpenTox end # Local support vector classification from neighbors - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] - # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_classification(neighbors, params, props=nil, transform=nil) + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required + # @return [Numeric] A prediction value. + def self.local_svm_classification(params) - raise "No neighbors found." unless neighbors.size>0 + raise "No neighbors found." unless params[:neighbors].size>0 begin - acts = neighbors.collect { |n| act = n[:activity] } - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - prediction = (props.nil? ? local_svm(neighbors, acts, sims, "C-bsvc", params) : local_svm_prop(props, acts, "C-bsvc", params)) + props = params[:prop_kernel] ? get_props(params) : nil + acts = params[:neighbors].collect { |n| act = n[:activity] } + sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + prediction = props.nil? ? local_svm(acts, sims, "C-bsvc", params) : local_svm_prop(props, acts, "C-bsvc") LOGGER.debug "Prediction is: '" + prediction.to_s + "'." conf = sims.inject{|sum,x| sum + x } - confidence = conf/neighbors.size if neighbors.size > 0 + confidence = conf/params[:neighbors].size if params[:neighbors].size > 0 {:prediction => prediction, :confidence => confidence} rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" @@ -350,16 +350,14 @@ module OpenTox # Local support vector prediction from neighbors. # Uses pre-defined Kernel Matrix. # Not to be called directly (use local_svm_regression or local_svm_classification). - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Array] acts, activities for neighbors. # @param [Array] sims, similarities for neighbors. # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification). - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required # @return [Numeric] A prediction value. - def self.local_svm(neighbors, acts, sims, type, params) + def self.local_svm(acts, sims, type, params) LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." - neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches + neighbor_matches = params[:neighbors].collect{ |n| n[:features] } # URIs of matches gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel if neighbor_matches.size == 0 raise "No neighbors found." @@ -425,13 +423,11 @@ module OpenTox # Local support vector prediction from neighbors. # Uses propositionalized setting. # Not to be called directly (use local_svm_regression or local_svm_classification). - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Array] acts, activities for neighbors. # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @param [Array] acts, activities for neighbors. # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification). - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @return [Numeric] A prediction value. - def self.local_svm_prop(props, acts, type, params) + def self.local_svm_prop(props, acts, type) LOGGER.debug "Local SVM (Propositionalization / Kernlab Kernel)." n_prop = props[0] # is a matrix, i.e. two nested Arrays. @@ -497,6 +493,38 @@ module OpenTox [ nr_cases, nr_features ] end + # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) + # Same for the vector describing the query compound + # @param[Array] neighbors. + # @param[OpenTox::Compound] query compound. + # @param[Array] Dataset Features. + # @param[Array] Fingerprints of neighbors. + # @param[Float] p-values of Features. + def self.get_props (params) + matrix = Array.new + begin + params[:neighbors].each do |n| + n = n[:compound] + row = [] + params[:features].each do |f| + if ! params[:fingerprints][n].nil? + row << (params[:fingerprints][n].include?(f) ? 0.0 : params[:p_values][f]) + else + row << 0.0 + end + end + matrix << row + end + row = [] + params[:features].each do |f| + row << (params[:compound].match([f]).size == 0 ? 0.0 : params[:p_values][f]) + end + rescue Exception => e + LOGGER.debug "get_props failed with '" + $! + "'" + end + [ matrix, row ] + end + end module Substructure @@ -724,7 +752,6 @@ module OpenTox return sum end - # Minimum Frequency # @param [Integer] per-mil value # return [Integer] min-frequency diff --git a/lib/model.rb b/lib/model.rb index c57ea65..13212ee 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -213,9 +213,15 @@ module OpenTox unless database_activity(subjectid) # adds database activity to @prediction_dataset neighbors - props=nil - props = get_props if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") ) - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props, @transform)") + prediction = eval("#{@prediction_algorithm} ( { :neighbors => @neighbors, + :compound => @compound, + :features => @features, + :p_values => @p_values, + :fingerprints => @fingerprints, + :similarity_algorithm => @similarity_algorithm, + :prop_kernel => @prop_kernel, + :value_map => @value_map, + :transform => @transform } ) ") value_feature_uri = File.join( @uri, "predicted", "value") confidence_feature_uri = File.join( @uri, "predicted", "confidence") @@ -289,32 +295,7 @@ module OpenTox @prediction_dataset end - # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) - # Same for the vector describing the query compound - def get_props - matrix = Array.new - begin - @neighbors.each do |n| - n = n[:compound] - row = [] - @features.each do |f| - if ! @fingerprints[n].nil? - row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f]) - else - row << 0.0 - end - end - matrix << row - end - row = [] - @features.each do |f| - row << (@compound.match([f]).size == 0 ? 0.0 : @p_values[f]) - end - rescue Exception => e - LOGGER.debug "get_props failed with '" + $! + "'" - end - [ matrix, row ] - end + # Find neighbors and store them as object variable, access all compounds for that. def neighbors -- cgit v1.2.3 From ca852130c11b49d1a9e0a1c50a8739f3244ec7ff Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 15 Jul 2011 10:44:44 +0200 Subject: Removed statsample-optimization because it wants statsample 0.x --- Rakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Rakefile b/Rakefile index 8cdf8c5..845b967 100644 --- a/Rakefile +++ b/Rakefile @@ -45,7 +45,7 @@ begin gem.add_dependency "ruby-plot", "=0.5.0" gem.add_dependency "gsl", "=1.14.7" gem.add_dependency "statsample", "=1.1.0" - gem.add_dependency "statsample-optimization", "=2.1.0" + #gem.add_dependency "statsample-optimization", "=2.1.0" gem.add_development_dependency 'jeweler' gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore'] -- cgit v1.2.3 From 6f5e97ea74afcd1c104bf3fc9571d8cddfa07021 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 15 Jul 2011 11:44:43 +0200 Subject: Rack version: 1.3.1 (was 1.3.0) --- Rakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Rakefile b/Rakefile index 845b967..952affe 100644 --- a/Rakefile +++ b/Rakefile @@ -16,7 +16,7 @@ begin gem.add_dependency "sinatra-respond_to", "=0.7.0" gem.add_dependency "sinatra-static-assets", "=0.5.0" gem.add_dependency "rest-client", "=1.6.1" - gem.add_dependency "rack", "=1.3.0" + gem.add_dependency "rack", "=1.3.1" gem.add_dependency "rack-contrib", "=1.1.0" gem.add_dependency "rack-flash", "=0.1.1" gem.add_dependency "nokogiri", "=1.4.4" -- cgit v1.2.3 From 7164f9ccfb80db2634ba4e9f557fbcd117fcf570 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 15 Jul 2011 15:34:35 +0200 Subject: Added encapsulation of MLR --- lib/algorithm.rb | 52 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index f6cbbc8..c63eb44 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -203,13 +203,7 @@ module OpenTox # @return [Numeric] A prediction value. def self.local_mlr_prop(params) - # GSL matrix operations: - # to_a : row-wise conversion to nested array - # - # Statsample operations (build on GSL): - # to_scale: convert into Statsample format - - raise "No neighbors found." unless neighbors.size>0 + raise "No neighbors found." unless params[:neighbors].size>0 begin props = params[:prop_kernel] ? get_props(params) : nil @@ -217,10 +211,33 @@ module OpenTox sims = params[:neighbors].collect { |n| Algorithm.gauss(n[:similarity]) } LOGGER.debug "Local MLR (Propositionalization / GSL)." - n_prop = props[0] # is a matrix, i.e. two nested Arrays. - q_prop = props[1] # is an Array. + prediction = mlr ( {:n_prop => props[0], :q_prop => props[1], :sims => sims, :act => acts} ) + transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})" + prediction = transformer.values[0] + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + conf = sims.inject{|sum,x| sum + x } + confidence = conf/params[:neighbors].size if params[:neighbors].size > 0 + {:prediction => prediction, :confidence => confidence} + + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + end - n_prop = n_prop << q_prop # attach q_prop + end + + def self.mlr(params) + + # GSL matrix operations: + # to_a : row-wise conversion to nested array + # + # Statsample operations (build on GSL): + # to_scale: convert into Statsample format + + begin + n_prop = params[:n_prop].collect { |v| v } + q_prop = params[:q_prop].collect { |v| v } + n_prop << q_prop # attach q_prop nr_cases, nr_features = get_sizes n_prop data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) @@ -241,17 +258,8 @@ module OpenTox # model + support vectors LOGGER.debug "Creating MLR model ..." - c, cov, chisq, status = GSL::MultiFit::wlinear(data_matrix, sims.to_scale.to_gsl, acts.to_scale.to_gsl) - prediction = GSL::MultiFit::linear_est(q_prop.to_scale.to_gsl, c, cov)[0] - transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})" - prediction = transformer.values[0] - LOGGER.debug "Prediction is: '" + prediction.to_s + "'." - - sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - conf = sims.inject{|sum,x| sum + x } - confidence = conf/params[:neighbors].size if params[:neighbors].size > 0 - {:prediction => prediction, :confidence => confidence} - + c, cov, chisq, status = GSL::MultiFit::wlinear(data_matrix, params[:sims].to_scale.to_gsl, params[:acts].to_scale.to_gsl) + GSL::MultiFit::linear_est(q_prop.to_scale.to_gsl, c, cov)[0] rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" end @@ -489,7 +497,7 @@ module OpenTox LOGGER.debug "#{e.class}: #{e.message}" LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end - puts "NRC: #{nr_cases}, NRF: #{nr_features}" + #puts "NRC: #{nr_cases}, NRF: #{nr_features}" [ nr_cases, nr_features ] end -- cgit v1.2.3 From e65db80a108b37d8fac4e223488010bef204be96 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 18 Jul 2011 10:32:10 +0200 Subject: Objective Feature Selection Added --- lib/algorithm.rb | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index c63eb44..0d2301d 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -211,7 +211,7 @@ module OpenTox sims = params[:neighbors].collect { |n| Algorithm.gauss(n[:similarity]) } LOGGER.debug "Local MLR (Propositionalization / GSL)." - prediction = mlr ( {:n_prop => props[0], :q_prop => props[1], :sims => sims, :act => acts} ) + prediction = mlr( {:n_prop => props[0], :q_prop => props[1], :sims => sims, :acts => acts} ) transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})" prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." @@ -249,6 +249,10 @@ module OpenTox # Attach intercept column to data intercept = GSL::Matrix.alloc(Array.new(nr_cases,1.0),nr_cases,1) data_matrix = data_matrix.horzcat(intercept) + (0..data_matrix.size2-2).each { |i| + autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(data_matrix.col(i)) + data_matrix.col(i)[0..data_matrix.size1-1] = autoscaler.scaled_values + } # Detach query instance n_prop = data_matrix.to_a @@ -649,7 +653,7 @@ module OpenTox @mean = @scaled_values.to_scale.mean @stdev = @scaled_values.to_scale.standard_deviation_sample @scaled_values = @scaled_values.collect {|vi| vi - @mean } - @scaled_values.collect! {|vi| vi / @stdev } + @scaled_values.collect! {|vi| vi / @stdev } unless @stdev == 0.0 end end @@ -665,21 +669,36 @@ module OpenTox def initialize data_matrix, compression=0.05 begin @data_matrix = data_matrix - @data_matrix_scaled = GSL::Matrix.alloc(@data_matrix.size1, @data_matrix.size2) @compression = compression.to_f @stdev = Array.new @mean = Array.new - # Scaling of Axes + # Objective Feature Selection + raise "Error! PCA needs at least two dimensions." if data_matrix.size2 < 2 + @data_matrix_selected = nil (0..@data_matrix.size2-1).each { |i| - @autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(@data_matrix.col(i)) + if !Algorithm::isnull_or_singular?(@data_matrix.col(i).to_a) + if @data_matrix_selected.nil? + @data_matrix_selected = GSL::Matrix.alloc(@data_matrix.size1, 1) + @data_matrix_selected.col(0)[0..@data_matrix.size1-1] = @data_matrix.col(i) + else + @data_matrix_selected = @data_matrix_selected.horzcat(GSL::Matrix.alloc(@data_matrix.col(i).to_a,@data_matrix.size1, 1)) + end + end + } + raise "Error! PCA needs at least two dimensions." if (@data_matrix_selected.nil? || @data_matrix_selected.size2 < 2) + + # Scaling of Axes + @data_matrix_scaled = GSL::Matrix.alloc(@data_matrix_selected.size1, @data_matrix_selected.size2) + (0..@data_matrix_selected.size2-1).each { |i| + @autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(@data_matrix_selected.col(i)) @data_matrix_scaled.col(i)[0..@data_matrix.size1-1] = @autoscaler.scaled_values @stdev << @autoscaler.stdev @mean << @autoscaler.mean } data_matrix_hash = Hash.new - (0..@data_matrix.size2-1).each { |i| + (0..@data_matrix_scaled.size2-1).each { |i| column_view = @data_matrix_scaled.col(i) data_matrix_hash[i] = column_view.to_scale } @@ -713,7 +732,7 @@ module OpenTox data_matrix_restored = (@eigenvector_matrix * @data_transformed_matrix.transpose).transpose # reverse pca # reverse scaling (0..data_matrix_restored.size2-1).each { |i| - data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] *= @stdev[i] + data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] *= @stdev[i] unless @stdev[i] == 0.0 data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] += @mean[i] } data_matrix_restored @@ -733,6 +752,11 @@ module OpenTox d = 1.0 - x.to_f Math.exp(-(d*d)/(2*sigma*sigma)) end + + def self.isnull_or_singular?(array) + nr_zeroes = array.count(0) + return ((nr_zeroes == array.size) || (nr_zeroes == 0) || (nr_zeroes == 1) || (nr_zeroes == array.size-1) ) + end # Median of an array # @param [Array] Array with values -- cgit v1.2.3 From e5d71ed221d6ad1bcd49334498d6b0daba1784ea Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 18 Jul 2011 14:02:51 +0200 Subject: Fixed is_null_or_singlar --- lib/algorithm.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 0d2301d..467de1f 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -755,7 +755,7 @@ module OpenTox def self.isnull_or_singular?(array) nr_zeroes = array.count(0) - return ((nr_zeroes == array.size) || (nr_zeroes == 0) || (nr_zeroes == 1) || (nr_zeroes == array.size-1) ) + return ((nr_zeroes == array.size) || (nr_zeroes == array.size-1) ) end # Median of an array -- cgit v1.2.3 From a306d86a3d9fcae80fd2128867e3a36d965fc817 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 18 Jul 2011 16:40:30 +0200 Subject: Major bug in get_props fixed --- lib/algorithm.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 467de1f..897eb9d 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -520,7 +520,7 @@ module OpenTox row = [] params[:features].each do |f| if ! params[:fingerprints][n].nil? - row << (params[:fingerprints][n].include?(f) ? 0.0 : params[:p_values][f]) + row << (params[:fingerprints][n].include?(f) ? params[:p_values][f] : 0.0) else row << 0.0 end -- cgit v1.2.3 From c6261ec85b8cf8d7c80b4718a927adeade1a127b Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 18 Jul 2011 17:21:23 +0200 Subject: Fixed null or singular detection to use missing zeroes --- lib/algorithm.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 897eb9d..34c80ab 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -753,9 +753,14 @@ module OpenTox Math.exp(-(d*d)/(2*sigma*sigma)) end + # For symbolic features + # @param [Array] Array to test, must indicate non-occurrence with 0. + # @return [Boolean] Whether the feature is singular or non-occurring or present everywhere. def self.isnull_or_singular?(array) nr_zeroes = array.count(0) - return ((nr_zeroes == array.size) || (nr_zeroes == array.size-1) ) + return (nr_zeroes == array.size) || # remove non-occurring feature + (nr_zeroes == array.size-1) || # remove singular feature + (nr_zeroes == 0) # also remove feature present everywhere end # Median of an array -- cgit v1.2.3 From d21dab9f1d1f01096830ff5b8d8254c2c7ca71cf Mon Sep 17 00:00:00 2001 From: dv Date: Tue, 19 Jul 2011 10:08:50 +0200 Subject: Merge with dev and some changes --- lib/algorithm.rb | 534 +++++++++++++++++++++++++++++++++++++------------------ lib/compound.rb | 28 +++ lib/model.rb | 168 +++-------------- 3 files changed, 419 insertions(+), 311 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 5c00aa8..2f4bea6 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -3,6 +3,7 @@ # avoids compiling R with X R = nil require "rinruby" +require "statsample" module OpenTox @@ -80,18 +81,6 @@ module OpenTox next end - # AM: take log if appropriate - take_logs=true - entry.each do |feature,values| - values.each do |value| - if @prediction_feature.feature_type == "regression" - if (! value.nil?) && (value.to_f <= 0) - take_logs=false - end - end - end - end - value_map=params[:value_map] unless params[:value_map].nil? entry.each do |feature,values| if feature == @prediction_feature.uri @@ -103,7 +92,7 @@ module OpenTox activity= value_map.invert[value].to_i # activities are mapped to 1..n @db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect elsif @prediction_feature.feature_type == "regression" - activity= take_logs ? Math.log10(value.to_f) : value.to_f + activity= value.to_f end begin fminer_instance.AddCompound(smiles,id) @@ -162,39 +151,21 @@ module OpenTox # Tanimoto similarity # @param [Array] features_a Features of first compound - # @param [Array][Hash] features_b Features of second compound + # @param [Array] features_b Features of second compound # @param [optional, Hash] weights Weights for all features # @return [Float] (Weighted) tanimoto similarity - def self.tanimoto(features_a,features_b,weights=nil) - LOGGER.debug "dv ------------ class: #{features_b.class}" + def self.tanimoto(features_a,features_b,weights=nil,params=nil) common_features = features_a & features_b all_features = (features_a + features_b).uniq common_p_sum = 0.0 if common_features.size > 0 if weights - if @nr_hits == true - LOGGER.debug "dv --------------- NR_HITS TRUE" - else - common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])} - all_p_sum = 0.0 - all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])} - end + LOGGER.debug "dv --------------- common_features: #{common_features}, params_hits: #{params[:compound_hits]}" + common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f]*Algorithm.support(f,params))} + all_p_sum = 0.0 + LOGGER.debug "dv --------------- all_features: #{all_features}" + all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f]*Algorithm.support(f,params))} common_p_sum/all_p_sum - #if frequencies - # #LOGGER.debug "dv --------------- all_features: #{all_features} \n common_features: #{common_features} " - # common_features.each do |f| - # #LOGGER.debug "dv --------------- weight: #{weights[f]} frequency: #{frequencies[f]}" - # common_p_sum += Algorithm.gauss(weights[f]*frequencies[f].to_f) - # end - # all_p_sum = 0.0 - # all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f]*frequencies[f].to_f)} - #else - # common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])} - # all_p_sum = 0.0 - # all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])} - #end - # #LOGGER.debug "dv -------------- common_p_sum: #{common_p_sum} all_p_sum: #{all_p_sum}" - # common_p_sum/all_p_sum else common_features.to_f/all_features end @@ -230,78 +201,82 @@ module OpenTox # Local multi-linear regression (MLR) prediction from neighbors. # Uses propositionalized setting. - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required # @return [Numeric] A prediction value. - def self.local_mlr_prop(neighbors, params, props) - - take_logs=true - - neighbors.each do |n| - if (! n[:activity].nil?) && (n[:activity].to_f < 0.0) - take_logs = false - end - end - - acts = neighbors.collect do |n| - act = n[:activity] - take_logs ? Math.log10(act.to_f) : act.to_f - end # activities of neighbors for supervised learning - + def self.local_mlr_prop(params) + raise "No neighbors found." unless params[:neighbors].size>0 begin + props = params[:prop_kernel] ? get_props(params) : nil + acts = params[:neighbors].collect { |n| act = n[:activity].to_f } + sims = params[:neighbors].collect { |n| Algorithm.gauss(n[:similarity]) } + LOGGER.debug "Local MLR (Propositionalization / GSL)." - n_prop = props[0] # is a matrix, i.e. two nested Arrays. - q_prop = props[1] # is an Array. - n_prop_x_size = n_prop[0].size - n_prop_y_size = n_prop.size + prediction = mlr( {:n_prop => props[0], :q_prop => props[1], :sims => sims, :acts => acts} ) + transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})" + prediction = transformer.values[0] + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + conf = sims.inject{|sum,x| sum + x } + confidence = conf/params[:neighbors].size if params[:neighbors].size > 0 + {:prediction => prediction, :confidence => confidence} - n_prop.flatten! - y_x_rel = n_prop_y_size.to_f / n_prop_x_size - repeat_factor = (1/y_x_rel).ceil - n_prop_tmp = Array.new ; repeat_factor.times { n_prop_tmp.concat n_prop } ; n_prop = n_prop_tmp - acts_tmp = Array.new ; repeat_factor.times { acts_tmp.concat acts } ; acts = acts_tmp + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + end - if n_prop.size == 0 - raise "No neighbors found." - else - begin - LOGGER.debug "Setting GSL data ..." - # set data - prop_matrix = GSL::Matrix[n_prop, n_prop_y_size * repeat_factor, n_prop_x_size] - y = GSL::Vector[acts] - q_prop = GSL::Vector[q_prop] + end - # model + support vectors - LOGGER.debug "Creating MLR model ..." - work = GSL::MultiFit::Workspace.alloc(n_prop_y_size * repeat_factor, n_prop_x_size) - c, cov, chisq, status = GSL::MultiFit::linear(prop_matrix, y, work) - LOGGER.debug "Predicting ..." - prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0] - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" - end - end + def self.mlr(params) - prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) - LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + # GSL matrix operations: + # to_a : row-wise conversion to nested array + # + # Statsample operations (build on GSL): + # to_scale: convert into Statsample format + + begin + n_prop = params[:n_prop].collect { |v| v } + q_prop = params[:q_prop].collect { |v| v } + n_prop << q_prop # attach q_prop + nr_cases, nr_features = get_sizes n_prop + data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) + + # Principal Components Analysis + LOGGER.debug "PCA..." + pca = OpenTox::Algorithm::Transform::PCA.new(data_matrix) + data_matrix = pca.data_transformed_matrix + + # Attach intercept column to data + intercept = GSL::Matrix.alloc(Array.new(nr_cases,1.0),nr_cases,1) + data_matrix = data_matrix.horzcat(intercept) + (0..data_matrix.size2-2).each { |i| + autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(data_matrix.col(i)) + data_matrix.col(i)[0..data_matrix.size1-1] = autoscaler.scaled_values + } + + # Detach query instance + n_prop = data_matrix.to_a + q_prop = n_prop.pop + nr_cases, nr_features = get_sizes n_prop + data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) + + # model + support vectors + LOGGER.debug "Creating MLR model ..." + c, cov, chisq, status = GSL::MultiFit::wlinear(data_matrix, params[:sims].to_scale.to_gsl, params[:acts].to_scale.to_gsl) + GSL::MultiFit::linear_est(q_prop.to_scale.to_gsl, c, cov)[0] rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" end - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - conf = sims.inject{|sum,x| sum + x } - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence} - end + end # Classification with majority vote from neighbors weighted by similarity - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity` - # @param [optional] params Ignored (only for compatibility with local_svm_regression) - # @return [Hash] Hash with keys `:prediction, :confidence` - def self.weighted_majority_vote(neighbors,params={}, props=nil) + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required + # @return [Numeric] A prediction value. + def self.weighted_majority_vote(params) + neighbor_contribution = 0.0 confidence_sum = 0.0 confidence = 0.0 @@ -309,7 +284,7 @@ module OpenTox positive_map_value= nil negative_map_value= nil - neighbors.each do |neighbor| + params[:neighbors].each do |neighbor| neighbor_weight = Algorithm.gauss(neighbor[:similarity]).to_f neighbor_contribution += neighbor[:activity].to_f * neighbor_weight @@ -327,71 +302,61 @@ module OpenTox if params[:value_map].size == 2 if confidence_sum >= 0.0 - prediction = 2 unless neighbors.size==0 + prediction = 2 unless params[:neighbors].size==0 elsif confidence_sum < 0.0 - prediction = 1 unless neighbors.size==0 + prediction = 1 unless params[:neighbors].size==0 end else - prediction = (neighbor_contribution/confidence_sum).round unless neighbors.size==0 # AM: new multinomial prediction + prediction = (neighbor_contribution/confidence_sum).round unless params[:neighbors].size==0 # AM: new multinomial prediction end - confidence = confidence_sum/neighbors.size if neighbors.size > 0 + confidence = confidence_sum/params[:neighbors].size if params[:neighbors].size > 0 return {:prediction => prediction, :confidence => confidence.abs} end # Local support vector regression from neighbors - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_regression(neighbors, params, props=nil) - take_logs=true - neighbors.each do |n| - if (! n[:activity].nil?) && (n[:activity].to_f < 0.0) - take_logs = false - end - end - acts = neighbors.collect do |n| - act = n[:activity] - take_logs ? Math.log10(act.to_f) : act.to_f - end # activities of neighbors for supervised learning + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required + # @return [Numeric] A prediction value. + def self.local_svm_regression(params) - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + raise "No neighbors found." unless params[:neighbors].size>0 begin - prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr", params)) - prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) + props = params[:prop_kernel] ? get_props(params) : nil + acts = params[:neighbors].collect{ |n| n[:activity].to_f } + sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } + prediction = props.nil? ? local_svm(acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr") + transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})" + prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + conf = sims.inject{|sum,x| sum + x } + confidence = conf/params[:neighbors].size + {:prediction => prediction, :confidence => confidence} rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end - - conf = sims.inject{|sum,x| sum + x } - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence} end # Local support vector classification from neighbors - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] - # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_classification(neighbors, params, props=nil) - acts = neighbors.collect do |n| - act = n[:activity] - end # activities of neighbors for supervised learning -# acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} - acts_f = acts - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required + # @return [Numeric] A prediction value. + def self.local_svm_classification(params) + + raise "No neighbors found." unless params[:neighbors].size>0 begin - prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, "C-bsvc", params)) + props = params[:prop_kernel] ? get_props(params) : nil + acts = params[:neighbors].collect { |n| act = n[:activity] } + sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + prediction = props.nil? ? local_svm(acts, sims, "C-bsvc", params) : local_svm_prop(props, acts, "C-bsvc") LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + conf = sims.inject{|sum,x| sum + x } + confidence = conf/params[:neighbors].size if params[:neighbors].size > 0 + {:prediction => prediction, :confidence => confidence} rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end - - conf = sims.inject{|sum,x| sum + x } - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence} end @@ -399,16 +364,14 @@ module OpenTox # Local support vector prediction from neighbors. # Uses pre-defined Kernel Matrix. # Not to be called directly (use local_svm_regression or local_svm_classification). - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Array] acts, activities for neighbors. # @param [Array] sims, similarities for neighbors. # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification). - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required # @return [Numeric] A prediction value. - def self.local_svm(neighbors, acts, sims, type, params) + def self.local_svm(acts, sims, type, params) LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." - neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches + neighbor_matches = params[:neighbors].collect{ |n| n[:features] } # URIs of matches gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel if neighbor_matches.size == 0 raise "No neighbors found." @@ -463,7 +426,8 @@ module OpenTox end @r.quit # free R rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end @@ -473,13 +437,11 @@ module OpenTox # Local support vector prediction from neighbors. # Uses propositionalized setting. # Not to be called directly (use local_svm_regression or local_svm_classification). - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Array] acts, activities for neighbors. # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @param [Array] acts, activities for neighbors. # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification). - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @return [Numeric] A prediction value. - def self.local_svm_prop(props, acts, type, params) + def self.local_svm_prop(props, acts, type) LOGGER.debug "Local SVM (Propositionalization / Kernlab Kernel)." n_prop = props[0] # is a matrix, i.e. two nested Arrays. @@ -525,12 +487,57 @@ module OpenTox end @r.quit # free R rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end prediction end + # Get X and Y size of a nested Array (Matrix) + def self.get_sizes(matrix) + begin + nr_cases = matrix.size + nr_features = matrix[0].size + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + #puts "NRC: #{nr_cases}, NRF: #{nr_features}" + [ nr_cases, nr_features ] + end + + # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) + # Same for the vector describing the query compound + # @param[Array] neighbors. + # @param[OpenTox::Compound] query compound. + # @param[Array] Dataset Features. + # @param[Array] Fingerprints of neighbors. + # @param[Float] p-values of Features. + def self.get_props (params) + matrix = Array.new + begin + params[:neighbors].each do |n| + n = n[:compound] + row = [] + params[:features].each do |f| + if ! params[:fingerprints][n].nil? + row << (params[:fingerprints][n].include?(f) ? 0.0 : params[:p_values][f]) + else + row << 0.0 + end + end + matrix << row + end + row = [] + params[:features].each do |f| + row << (params[:compound].match([f]).size == 0 ? 0.0 : params[:p_values][f]) + end + rescue Exception => e + LOGGER.debug "get_props failed with '" + $! + "'" + end + [ matrix, row ] + end end @@ -551,6 +558,195 @@ module OpenTox def features(dataset_uri,compound_uri) end end + + module Transform + include Algorithm + + # The transformer that inverts values. + # 1/x is used, after values have been moved >= 1. + class Inverter + attr_accessor :offset, :values + + # @params[Array] Values to transform. + # @params[Float] Offset for restore. + def initialize *args + case args.size + when 1 + begin + values=args[0] + raise "Cannot transform, values empty." if @values.size==0 + @values = values.collect { |v| -1.0 * v } + @offset = 1.0 - @values.minmax[0] + @offset = -1.0 * @offset if @offset>0.0 + @values.collect! { |v| v - @offset } # slide >1 + @values.collect! { |v| 1 / v } # invert to [0,1] + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + when 2 + @offset = args[1].to_f + @values = args[0].collect { |v| 1 / v } + @values.collect! { |v| v + @offset } + @values.collect! { |v| -1.0 * v } + end + end + end + + # The transformer that takes logs. + # Log10 is used, after values have been moved > 0. + class Log10 + attr_accessor :offset, :values + + # @params[Array] Values to transform / restore. + # @params[Float] Offset for restore. + def initialize *args + @distance_to_zero = 0.000000001 # 1 / 1 billion + case args.size + when 1 + begin + values=args[0] + raise "Cannot transform, values empty." if values.size==0 + @offset = values.minmax[0] + @offset = -1.0 * @offset if @offset>0.0 + @values = values.collect { |v| v - @offset } # slide > anchor + @values.collect! { |v| v + @distance_to_zero } # + @values.collect! { |v| Math::log10 v } # log10 (can fail) + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + when 2 + @offset = args[1].to_f + @values = args[0].collect { |v| 10**v } + @values.collect! { |v| v - @distance_to_zero } + @values.collect! { |v| v + @offset } + end + end + end + + # The transformer that does nothing (No OPeration). + class NOP + attr_accessor :offset, :values + + # @params[Array] Values to transform / restore. + # @params[Float] Offset for restore. + def initialize *args + @offset = 0.0 + @distance_to_zero = 0.0 + case args.size + when 1 + @values = args[0] + when 2 + @values = args[0] + end + end + end + + + # Auto-Scaler for Arrays + # Center on mean and divide by standard deviation + class AutoScale + attr_accessor :scaled_values, :mean, :stdev + + # @params[Array] Values to transform. + def initialize values + @scaled_values = values + @mean = @scaled_values.to_scale.mean + @stdev = @scaled_values.to_scale.standard_deviation_sample + @scaled_values = @scaled_values.collect {|vi| vi - @mean } + @scaled_values.collect! {|vi| vi / @stdev } unless @stdev == 0.0 + end + end + + # Principal Components Analysis + # Statsample Library (http://ruby-statsample.rubyforge.org/) by C. Bustos + class PCA + attr_accessor :data_matrix, :data_transformed_matrix, :eigenvector_matrix, :eigenvalue_sums, :autoscaler + + # Creates a transformed dataset as GSL::Matrix. + # @param [GSL::Matrix] Data matrix. + # @param [Float] Compression ratio from [0,1]. + # @return [GSL::Matrix] Data transformed matrix. + def initialize data_matrix, compression=0.05 + begin + @data_matrix = data_matrix + @compression = compression.to_f + @stdev = Array.new + @mean = Array.new + + # Objective Feature Selection + raise "Error! PCA needs at least two dimensions." if data_matrix.size2 < 2 + @data_matrix_selected = nil + (0..@data_matrix.size2-1).each { |i| + if !Algorithm::isnull_or_singular?(@data_matrix.col(i).to_a) + if @data_matrix_selected.nil? + @data_matrix_selected = GSL::Matrix.alloc(@data_matrix.size1, 1) + @data_matrix_selected.col(0)[0..@data_matrix.size1-1] = @data_matrix.col(i) + else + @data_matrix_selected = @data_matrix_selected.horzcat(GSL::Matrix.alloc(@data_matrix.col(i).to_a,@data_matrix.size1, 1)) + end + end + } + raise "Error! PCA needs at least two dimensions." if (@data_matrix_selected.nil? || @data_matrix_selected.size2 < 2) + + # Scaling of Axes + @data_matrix_scaled = GSL::Matrix.alloc(@data_matrix_selected.size1, @data_matrix_selected.size2) + (0..@data_matrix_selected.size2-1).each { |i| + @autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(@data_matrix_selected.col(i)) + @data_matrix_scaled.col(i)[0..@data_matrix.size1-1] = @autoscaler.scaled_values + @stdev << @autoscaler.stdev + @mean << @autoscaler.mean + } + + data_matrix_hash = Hash.new + (0..@data_matrix_scaled.size2-1).each { |i| + column_view = @data_matrix_scaled.col(i) + data_matrix_hash[i] = column_view.to_scale + } + dataset_hash = data_matrix_hash.to_dataset # see http://goo.gl/7XcW9 + cor_matrix=Statsample::Bivariate.correlation_matrix(dataset_hash) + pca=Statsample::Factor::PCA.new(cor_matrix) + pca.eigenvalues.each { |ev| raise "PCA failed!" unless !ev.nan? } + @eigenvalue_sums = Array.new + (0..dataset_hash.fields.size-1).each { |i| + @eigenvalue_sums << pca.eigenvalues[0..i].inject{ |sum, ev| sum + ev } + } + eigenvectors_selected = Array.new + pca.eigenvectors.each_with_index { |ev, i| + if (@eigenvalue_sums[i] <= ((1.0-@compression)*dataset_hash.fields.size)) || (eigenvectors_selected.size == 0) + eigenvectors_selected << ev.to_a + end + } + @eigenvector_matrix = GSL::Matrix.alloc(eigenvectors_selected.flatten, eigenvectors_selected.size, dataset_hash.fields.size).transpose + dataset_matrix = dataset_hash.to_gsl.transpose + @data_transformed_matrix = (@eigenvector_matrix.transpose * dataset_matrix).transpose + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + end + + # Restores data in the original feature space (possibly with compression loss). + # @return [GSL::Matrix] Data matrix. + def restore + begin + data_matrix_restored = (@eigenvector_matrix * @data_transformed_matrix.transpose).transpose # reverse pca + # reverse scaling + (0..data_matrix_restored.size2-1).each { |i| + data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] *= @stdev[i] unless @stdev[i] == 0.0 + data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] += @mean[i] + } + data_matrix_restored + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + end + + end + + end # Gauss kernel # @return [Float] @@ -558,6 +754,11 @@ module OpenTox d = 1.0 - x.to_f Math.exp(-(d*d)/(2*sigma*sigma)) end + + def self.isnull_or_singular?(array) + nr_zeroes = array.count(0) + return ((nr_zeroes == array.size) || (nr_zeroes == 0) || (nr_zeroes == 1) || (nr_zeroes == array.size-1) ) + end # Median of an array # @param [Array] Array with values @@ -585,14 +786,13 @@ module OpenTox return sum end - # Minimum Frequency # @param [Integer] per-mil value # return [Integer] min-frequency def self.min_frequency(training_dataset,per_mil) - minfreq = per_mil*training_dataset.compounds.size/1000 # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST + minfreq = per_mil * training_dataset.compounds.size.to_f / 1000.0 # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST minfreq = 2 unless minfreq > 2 - minfreq + Integer (minfreq) end # Effect calculation for classification @@ -617,22 +817,16 @@ module OpenTox max end - # Frequency check befor Simularity calculation - # @param [Array] similarity_algorithm, - # @param [Array] features_a - # @param [Array] [Hash] (feature_b => frequency} - # @param [] p_values - # return sim - #def self.similarity(similarity_algorithm, features_a, features_b, p_values = nil) - # if @nr_hits == true - # - # features_b_f = - # eval("#{similarity_algorithm}(features_a,features_b_f,p_values,frequencies_b)") - # else - # eval("#{similarity_algorithm}(features_a,features_b,p_values)") - # end - #end - + # Returns Support value of an fingerprint + # @param [String] smiles of feature + # @param [Hash] params Keys: `fingerprints:, compound:, nr_hits:` are required + # return [Numeric] Support value + def self.support(feature,params) + LOGGER.debug "dv ------------- feature: #{feature}" + LOGGER.debug "dv ------------- compound #{params[:compound]}" + LOGGER.debug "dv ------------- value #{params[:fingerprints][params[:compound]][feature]}" + params[:fingerprints][params[:compound]][feature] + end end end diff --git a/lib/compound.rb b/lib/compound.rb index d374b02..87467d9 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -164,6 +164,34 @@ module OpenTox #smarts_array.collect { |s| s if match?(s)}.compact end + # Match_hits an array of smarts strings, returns hash with matching smarts as key and number of non-unique hits as value + # @example + # compound = OpenTox::Compound.from_name("Benzene") + # compound.match(['cc','cN']) # returns ['cc'] + # @param [Array] smarts_array Array with Smarts strings + # @return [Hash] Hash with matching smarts as key and number of non-unique hits as value + def match_hits(smarts_array) + # avoid recreation of OpenBabel objects + obconversion = OpenBabel::OBConversion.new + obmol = OpenBabel::OBMol.new + obconversion.set_in_format('inchi') + obconversion.read_string(obmol,@inchi) + smarts_pattern = OpenBabel::OBSmartsPattern.new + smarts_hits = {} + smarts_array.collect do |smarts| + LOGGER.debug "dv ----------- all smarts #{smarts}" + smarts_pattern.init(smarts) + if smarts_pattern.match(obmol) + hits = smarts_pattern.get_map_list + smarts_hits[smarts] = hits.size + end + end + LOGGER.debug "dv ----------- smarts => hits #{smarts_hits}" + return smarts_hits + #smarts_array.collect { |s| s if match?(s)}.compact + end + + # Get URI of compound image with highlighted fragments # # @param [Array] activating Array with activating Smarts strings diff --git a/lib/model.rb b/lib/model.rb index 6d9b1cb..52be6d4 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -91,8 +91,7 @@ module OpenTox include Algorithm include Model - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :frequencies, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :balanced, :nr_hits - + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :nr_hits, :transform def initialize(uri=nil) @@ -108,18 +107,17 @@ module OpenTox @effects = {} @activities = {} @p_values = {} - @frequencies = {} @fingerprints = {} @value_map = {} @feature_calculation_algorithm = "Substructure.match" @similarity_algorithm = "Similarity.tanimoto" @prediction_algorithm = "Neighbors.weighted_majority_vote" - + @nr_hits = false @min_sim = 0.3 @prop_kernel = false - @balanced = false + @transform = { "class" => "NOP" } end @@ -139,10 +137,10 @@ module OpenTox # Create a new lazar model # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar) # @return [OpenTox::Model::Lazar] lazar model - def self.create(params) + def self.create(params, waiting_task=nil ) subjectid = params[:subjectid] lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar") - model_uri = lazar_algorithm.run(params) + model_uri = lazar_algorithm.run(params, waiting_task) OpenTox::Model::Lazar.find(model_uri, subjectid) end @@ -215,78 +213,18 @@ module OpenTox unless database_activity(subjectid) # adds database activity to @prediction_dataset - if @balanced && OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification" - # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar - l = Array.new # larger - s = Array.new # smaller fraction - - raise "no fingerprints in model" if @fingerprints.size==0 - - @fingerprints.each do |training_compound,training_features| - @activities[training_compound].each do |act| - case act.to_s - when "0" - l << training_compound - when "1" - s << training_compound - else - LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached (supports only two classes)." - end - end - end - if s.size > l.size then - l,s = s,l # happy swapping - LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}." - end - # determine ratio - modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest - LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}." - - # AM: Balanced predictions - addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round - slack = (addon!=0 ? modulo[1].divmod(addon)[1] : 0) # what remains for the last round - position = 0 - predictions = Array.new - - prediction_best=nil - neighbors_best=nil - - begin - for i in 1..modulo[0] do - (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction - LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." - neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") ) - props = get_props - else - props = nil - end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props)") - if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs - prediction_best=prediction - neighbors_best=@neighbors - end - position = position + lr_size - end - rescue Exception => e - LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message - end - - prediction=prediction_best - @neighbors=neighbors_best - ### END AM balanced predictions + neighbors + prediction = eval("#{@prediction_algorithm} ( { :neighbors => @neighbors, + :compound => @compound, + :features => @features, + :p_values => @p_values, + :fingerprints => @fingerprints, + :similarity_algorithm => @similarity_algorithm, + :prop_kernel => @prop_kernel, + :value_map => @value_map, + :nr_hits => @nr_hits, + :transform => @transform } ) ") - else # AM: no balancing or regression - LOGGER.info "LAZAR: Unbalanced." - neighbors - if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") ) - props = get_props - else - props = nil - end - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values, :value_map => @value_map}, props)") - end - value_feature_uri = File.join( @uri, "predicted", "value") confidence_feature_uri = File.join( @uri, "predicted", "confidence") @@ -359,79 +297,27 @@ module OpenTox @prediction_dataset end - # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) - # Same for the vector describing the query compound - def get_props - matrix = Array.new - begin - @neighbors.each do |n| - n = n[:compound] - row = [] - @features.each do |f| - if ! @fingerprints[n].nil? - row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f]) - else - row << 0.0 - end - end - matrix << row - end - row = [] - @features.each do |f| - row << (@compound.match([f]).size == 0 ? 0.0 : @p_values[f]) - end - rescue Exception => e - LOGGER.debug "get_props failed with '" + $! + "'" - end - [ matrix, row ] - end - - # Find neighbors and store them as object variable, access only a subset of compounds for that. - def neighbors_balanced(s, l, start, offset) - @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm - @neighbors = [] - [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset - training_features = @fingerprints[training_compound] - add_neighbor training_features, training_compound - end - - end + # Find neighbors and store them as object variable, access all compounds for that. def neighbors @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] - @fingerprints.each do |training_compound, training_features | # AM: access all compounds - #LOGGER.debug "dv ---------------- training_features: #{training_features.class}, #{training_features}, #{training_compound.class}, #{training_compound} " - add_neighbor training_features, training_compound + @fingerprints.keys.each do |training_compound| # AM: access all compounds + add_neighbor @fingerprints[training_compound].keys, training_compound end end # Adds a neighbor to @neighbors if it passes the similarity threshold. def add_neighbor(training_features, training_compound) - #LOGGER.debug "dv ------ xyz ----- compound_features: '#{@compound_features}' \n training_features: '#{training_features}'\n training_compound: '#{training_compound}'" - sim = 0.0 - #if @frequencies.empty? - # LOGGER.debug "dv ----------------- frequencies is empty goto #{@similarity_algorithm}" - # sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") - #else - # LOGGER.debug "dv ----------------- with frequencies goto #{@similarity_algorithm}, training_compound #{training_compound}" - # t_compound_freq = {} - # training_features.each do |f| - # #LOGGER.debug "dv ----------------- with feature: #{f}, training_compound: #{training_compound}\n" - # @frequencies[f.to_s].each do |cf| - # if cf.keys.to_s == training_compound.to_s - # #LOGGER.debug "#{cf.keys} =? #{training_compound}----------------- #{f} #{cf[training_compound.to_s]}" - # t_compound_freq[f] = cf[training_compound.to_s] - # #LOGGER.debug "t_compound_freq: #{t_compound_freq}" - # end - # end - # end - # #LOGGER.debug "t_compound_freq: #{t_compound_freq}" - # sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values,t_compound_freq)") - #end - sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") - LOGGER.debug "sim is: #{sim}" + compound_match_hits = {} + if @nr_hits == "true" + compound_match_hits = OpenTox::Compound.new(training_compound).match_hits(@compound_features) + LOGGER.debug "dv ------------ training_compound: #{training_compound}" + LOGGER.debug "dv ------------ training_features: #{training_features}" + LOGGER.debug "dv ------------ compound_features: #{@compound_features}" + end + sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, ( { :compound => training_compound, :fingerprints => @fingerprints, :nr_hits => @nr_hits, :compound_hits => compound_match_hits } ) )") if sim > @min_sim @activities[training_compound].each do |act| @neighbors << { -- cgit v1.2.3 From 3bb4f9e651d959ec53d8a84cdd0f0e52e4eade9d Mon Sep 17 00:00:00 2001 From: dv Date: Tue, 19 Jul 2011 14:23:29 +0200 Subject: saving --- lib/algorithm.rb | 4 +++- lib/compound.rb | 1 + lib/model.rb | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 2f4bea6..53e58eb 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -153,6 +153,8 @@ module OpenTox # @param [Array] features_a Features of first compound # @param [Array] features_b Features of second compound # @param [optional, Hash] weights Weights for all features + # @param [optional, Hash] params Keys: `fingerprints:, compound:, nr_hits:` are required + # @return [Float] (Weighted) tanimoto similarity def self.tanimoto(features_a,features_b,weights=nil,params=nil) common_features = features_a & features_b @@ -824,7 +826,7 @@ module OpenTox def self.support(feature,params) LOGGER.debug "dv ------------- feature: #{feature}" LOGGER.debug "dv ------------- compound #{params[:compound]}" - LOGGER.debug "dv ------------- value #{params[:fingerprints][params[:compound]][feature]}" + LOGGER.debug "dv ------------- feature value #{params[:fingerprints][params[:compound]][feature]}" params[:fingerprints][params[:compound]][feature] end diff --git a/lib/compound.rb b/lib/compound.rb index 87467d9..3ec321a 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -178,6 +178,7 @@ module OpenTox obconversion.read_string(obmol,@inchi) smarts_pattern = OpenBabel::OBSmartsPattern.new smarts_hits = {} + LOGGER.debug "dv ----------- obmol #{Compound.new(@inchi).to_smiles}" smarts_array.collect do |smarts| LOGGER.debug "dv ----------- all smarts #{smarts}" smarts_pattern.init(smarts) diff --git a/lib/model.rb b/lib/model.rb index 52be6d4..4aefea3 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -312,7 +312,7 @@ module OpenTox def add_neighbor(training_features, training_compound) compound_match_hits = {} if @nr_hits == "true" - compound_match_hits = OpenTox::Compound.new(training_compound).match_hits(@compound_features) + compound_match_hits = @compound.match_hits(@compound_features) #OpenTox::Compound.new(training_compound).match_hits(@compound_features) LOGGER.debug "dv ------------ training_compound: #{training_compound}" LOGGER.debug "dv ------------ training_features: #{training_features}" LOGGER.debug "dv ------------ compound_features: #{@compound_features}" -- cgit v1.2.3 From b52a34f062fc4ad5cacf403e88861b24c3117f91 Mon Sep 17 00:00:00 2001 From: dv Date: Tue, 19 Jul 2011 14:49:34 +0200 Subject: merged with dev and removed comments --- lib/algorithm.rb | 491 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 353 insertions(+), 138 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index a50d568..43845fb 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -3,6 +3,7 @@ # avoids compiling R with X R = nil require "rinruby" +require "statsample" module OpenTox @@ -80,18 +81,6 @@ module OpenTox next end - # AM: take log if appropriate - take_logs=true - entry.each do |feature,values| - values.each do |value| - if @prediction_feature.feature_type == "regression" - if (! value.nil?) && (value.to_f <= 0) - take_logs=false - end - end - end - end - value_map=params[:value_map] unless params[:value_map].nil? entry.each do |feature,values| if feature == @prediction_feature.uri @@ -103,7 +92,7 @@ module OpenTox activity= value_map.invert[value].to_i # activities are mapped to 1..n @db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect elsif @prediction_feature.feature_type == "regression" - activity= take_logs ? Math.log10(value.to_f) : value.to_f + activity= value.to_f end begin fminer_instance.AddCompound(smiles,id) @@ -210,78 +199,82 @@ module OpenTox # Local multi-linear regression (MLR) prediction from neighbors. # Uses propositionalized setting. - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required # @return [Numeric] A prediction value. - def self.local_mlr_prop(neighbors, params, props) - - take_logs=true - - neighbors.each do |n| - if (! n[:activity].nil?) && (n[:activity].to_f < 0.0) - take_logs = false - end - end - - acts = neighbors.collect do |n| - act = n[:activity] - take_logs ? Math.log10(act.to_f) : act.to_f - end # activities of neighbors for supervised learning - + def self.local_mlr_prop(params) + raise "No neighbors found." unless params[:neighbors].size>0 begin + props = params[:prop_kernel] ? get_props(params) : nil + acts = params[:neighbors].collect { |n| act = n[:activity].to_f } + sims = params[:neighbors].collect { |n| Algorithm.gauss(n[:similarity]) } + LOGGER.debug "Local MLR (Propositionalization / GSL)." - n_prop = props[0] # is a matrix, i.e. two nested Arrays. - q_prop = props[1] # is an Array. - n_prop_x_size = n_prop[0].size - n_prop_y_size = n_prop.size + prediction = mlr( {:n_prop => props[0], :q_prop => props[1], :sims => sims, :acts => acts} ) + transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})" + prediction = transformer.values[0] + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + conf = sims.inject{|sum,x| sum + x } + confidence = conf/params[:neighbors].size if params[:neighbors].size > 0 + {:prediction => prediction, :confidence => confidence} - n_prop.flatten! - y_x_rel = n_prop_y_size.to_f / n_prop_x_size - repeat_factor = (1/y_x_rel).ceil - n_prop_tmp = Array.new ; repeat_factor.times { n_prop_tmp.concat n_prop } ; n_prop = n_prop_tmp - acts_tmp = Array.new ; repeat_factor.times { acts_tmp.concat acts } ; acts = acts_tmp + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + end - if n_prop.size == 0 - raise "No neighbors found." - else - begin - LOGGER.debug "Setting GSL data ..." - # set data - prop_matrix = GSL::Matrix[n_prop, n_prop_y_size * repeat_factor, n_prop_x_size] - y = GSL::Vector[acts] - q_prop = GSL::Vector[q_prop] + end - # model + support vectors - LOGGER.debug "Creating MLR model ..." - work = GSL::MultiFit::Workspace.alloc(n_prop_y_size * repeat_factor, n_prop_x_size) - c, cov, chisq, status = GSL::MultiFit::linear(prop_matrix, y, work) - LOGGER.debug "Predicting ..." - prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0] - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" - end - end + def self.mlr(params) - prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) - LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + # GSL matrix operations: + # to_a : row-wise conversion to nested array + # + # Statsample operations (build on GSL): + # to_scale: convert into Statsample format + + begin + n_prop = params[:n_prop].collect { |v| v } + q_prop = params[:q_prop].collect { |v| v } + n_prop << q_prop # attach q_prop + nr_cases, nr_features = get_sizes n_prop + data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) + + # Principal Components Analysis + LOGGER.debug "PCA..." + pca = OpenTox::Algorithm::Transform::PCA.new(data_matrix) + data_matrix = pca.data_transformed_matrix + + # Attach intercept column to data + intercept = GSL::Matrix.alloc(Array.new(nr_cases,1.0),nr_cases,1) + data_matrix = data_matrix.horzcat(intercept) + (0..data_matrix.size2-2).each { |i| + autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(data_matrix.col(i)) + data_matrix.col(i)[0..data_matrix.size1-1] = autoscaler.scaled_values + } + + # Detach query instance + n_prop = data_matrix.to_a + q_prop = n_prop.pop + nr_cases, nr_features = get_sizes n_prop + data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features) + + # model + support vectors + LOGGER.debug "Creating MLR model ..." + c, cov, chisq, status = GSL::MultiFit::wlinear(data_matrix, params[:sims].to_scale.to_gsl, params[:acts].to_scale.to_gsl) + GSL::MultiFit::linear_est(q_prop.to_scale.to_gsl, c, cov)[0] rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" end - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - conf = sims.inject{|sum,x| sum + x } - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence} - end + end # Classification with majority vote from neighbors weighted by similarity - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity` - # @param [optional] params Ignored (only for compatibility with local_svm_regression) - # @return [Hash] Hash with keys `:prediction, :confidence` - def self.weighted_majority_vote(neighbors,params={}, props=nil) + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required + # @return [Numeric] A prediction value. + def self.weighted_majority_vote(params) + neighbor_contribution = 0.0 confidence_sum = 0.0 confidence = 0.0 @@ -289,7 +282,7 @@ module OpenTox positive_map_value= nil negative_map_value= nil - neighbors.each do |neighbor| + params[:neighbors].each do |neighbor| neighbor_weight = Algorithm.gauss(neighbor[:similarity]).to_f neighbor_contribution += neighbor[:activity].to_f * neighbor_weight @@ -307,89 +300,71 @@ module OpenTox if params[:value_map].size == 2 if confidence_sum >= 0.0 - prediction = 2 unless neighbors.size==0 + prediction = 2 unless params[:neighbors].size==0 elsif confidence_sum < 0.0 - prediction = 1 unless neighbors.size==0 + prediction = 1 unless params[:neighbors].size==0 end else - prediction = (neighbor_contribution/confidence_sum).round unless neighbors.size==0 # AM: new multinomial prediction + prediction = (neighbor_contribution/confidence_sum).round unless params[:neighbors].size==0 # AM: new multinomial prediction end - confidence = confidence_sum/neighbors.size if neighbors.size > 0 + confidence = confidence_sum/params[:neighbors].size if params[:neighbors].size > 0 return {:prediction => prediction, :confidence => confidence.abs} end # Local support vector regression from neighbors - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_regression(neighbors, params, props=nil) - take_logs=true - neighbors.each do |n| - if (! n[:activity].nil?) && (n[:activity].to_f < 0.0) - take_logs = false - end - end - acts = neighbors.collect do |n| - act = n[:activity] - take_logs ? Math.log10(act.to_f) : act.to_f - end # activities of neighbors for supervised learning + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required + # @return [Numeric] A prediction value. + def self.local_svm_regression(params) - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + raise "No neighbors found." unless params[:neighbors].size>0 begin - prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr", params)) - prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) + props = params[:prop_kernel] ? get_props(params) : nil + acts = params[:neighbors].collect{ |n| n[:activity].to_f } + sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } + prediction = props.nil? ? local_svm(acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr") + transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})" + prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" - end - - begin sim_median = Algorithm.median(sims) - #confidence = nil - if sim_median.nil? + if sim_median.nil? + confidence = nil LOGGER.debug "dv ------------ sim_median is nil" else - #@r_sd = RinRuby.new(false,false) - #@r_sd.r_regression_acts = acts - #standard_deviation = @r_sd.pull "as.numeric(sd(r_regression_acts))"#calculate standard deviation - #@r_sd.quit #free R standard_deviation = acts.std_dev - LOGGER.debug "dv ------------ sd: #{standard_deviation}" confidence = (sim_median*Math.exp(-1*standard_deviation)).abs if confidence.nan? confidence = nil end end LOGGER.debug "Confidence is: '" + confidence.to_s + "'." + return {:prediction => prediction, :confidence => confidence} rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end - return {:prediction => prediction, :confidence => confidence} + end # Local support vector classification from neighbors - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] - # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_classification(neighbors, params, props=nil) - acts = neighbors.collect do |n| - act = n[:activity] - end # activities of neighbors for supervised learning -# acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} - acts_f = acts - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required + # @return [Numeric] A prediction value. + def self.local_svm_classification(params) + + raise "No neighbors found." unless params[:neighbors].size>0 begin - prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, "C-bsvc", params)) + props = params[:prop_kernel] ? get_props(params) : nil + acts = params[:neighbors].collect { |n| act = n[:activity] } + sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + prediction = props.nil? ? local_svm(acts, sims, "C-bsvc", params) : local_svm_prop(props, acts, "C-bsvc") LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + conf = sims.inject{|sum,x| sum + x } + confidence = conf/params[:neighbors].size if params[:neighbors].size > 0 + {:prediction => prediction, :confidence => confidence} rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end - - conf = sims.inject{|sum,x| sum + x } - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence} end @@ -397,16 +372,14 @@ module OpenTox # Local support vector prediction from neighbors. # Uses pre-defined Kernel Matrix. # Not to be called directly (use local_svm_regression or local_svm_classification). - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Array] acts, activities for neighbors. # @param [Array] sims, similarities for neighbors. # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification). - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required # @return [Numeric] A prediction value. - def self.local_svm(neighbors, acts, sims, type, params) + def self.local_svm(acts, sims, type, params) LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." - neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches + neighbor_matches = params[:neighbors].collect{ |n| n[:features] } # URIs of matches gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel if neighbor_matches.size == 0 raise "No neighbors found." @@ -461,7 +434,8 @@ module OpenTox end @r.quit # free R rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end @@ -471,13 +445,11 @@ module OpenTox # Local support vector prediction from neighbors. # Uses propositionalized setting. # Not to be called directly (use local_svm_regression or local_svm_classification). - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Array] acts, activities for neighbors. # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @param [Array] acts, activities for neighbors. # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification). - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @return [Numeric] A prediction value. - def self.local_svm_prop(props, acts, type, params) + def self.local_svm_prop(props, acts, type) LOGGER.debug "Local SVM (Propositionalization / Kernlab Kernel)." n_prop = props[0] # is a matrix, i.e. two nested Arrays. @@ -523,12 +495,57 @@ module OpenTox end @r.quit # free R rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end prediction end + # Get X and Y size of a nested Array (Matrix) + def self.get_sizes(matrix) + begin + nr_cases = matrix.size + nr_features = matrix[0].size + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + #puts "NRC: #{nr_cases}, NRF: #{nr_features}" + [ nr_cases, nr_features ] + end + + # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) + # Same for the vector describing the query compound + # @param[Array] neighbors. + # @param[OpenTox::Compound] query compound. + # @param[Array] Dataset Features. + # @param[Array] Fingerprints of neighbors. + # @param[Float] p-values of Features. + def self.get_props (params) + matrix = Array.new + begin + params[:neighbors].each do |n| + n = n[:compound] + row = [] + params[:features].each do |f| + if ! params[:fingerprints][n].nil? + row << (params[:fingerprints][n].include?(f) ? params[:p_values][f] : 0.0) + else + row << 0.0 + end + end + matrix << row + end + row = [] + params[:features].each do |f| + row << (params[:compound].match([f]).size == 0 ? 0.0 : params[:p_values][f]) + end + rescue Exception => e + LOGGER.debug "get_props failed with '" + $! + "'" + end + [ matrix, row ] + end end @@ -549,6 +566,195 @@ module OpenTox def features(dataset_uri,compound_uri) end end + + module Transform + include Algorithm + + # The transformer that inverts values. + # 1/x is used, after values have been moved >= 1. + class Inverter + attr_accessor :offset, :values + + # @params[Array] Values to transform. + # @params[Float] Offset for restore. + def initialize *args + case args.size + when 1 + begin + values=args[0] + raise "Cannot transform, values empty." if @values.size==0 + @values = values.collect { |v| -1.0 * v } + @offset = 1.0 - @values.minmax[0] + @offset = -1.0 * @offset if @offset>0.0 + @values.collect! { |v| v - @offset } # slide >1 + @values.collect! { |v| 1 / v } # invert to [0,1] + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + when 2 + @offset = args[1].to_f + @values = args[0].collect { |v| 1 / v } + @values.collect! { |v| v + @offset } + @values.collect! { |v| -1.0 * v } + end + end + end + + # The transformer that takes logs. + # Log10 is used, after values have been moved > 0. + class Log10 + attr_accessor :offset, :values + + # @params[Array] Values to transform / restore. + # @params[Float] Offset for restore. + def initialize *args + @distance_to_zero = 0.000000001 # 1 / 1 billion + case args.size + when 1 + begin + values=args[0] + raise "Cannot transform, values empty." if values.size==0 + @offset = values.minmax[0] + @offset = -1.0 * @offset if @offset>0.0 + @values = values.collect { |v| v - @offset } # slide > anchor + @values.collect! { |v| v + @distance_to_zero } # + @values.collect! { |v| Math::log10 v } # log10 (can fail) + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + when 2 + @offset = args[1].to_f + @values = args[0].collect { |v| 10**v } + @values.collect! { |v| v - @distance_to_zero } + @values.collect! { |v| v + @offset } + end + end + end + + # The transformer that does nothing (No OPeration). + class NOP + attr_accessor :offset, :values + + # @params[Array] Values to transform / restore. + # @params[Float] Offset for restore. + def initialize *args + @offset = 0.0 + @distance_to_zero = 0.0 + case args.size + when 1 + @values = args[0] + when 2 + @values = args[0] + end + end + end + + + # Auto-Scaler for Arrays + # Center on mean and divide by standard deviation + class AutoScale + attr_accessor :scaled_values, :mean, :stdev + + # @params[Array] Values to transform. + def initialize values + @scaled_values = values + @mean = @scaled_values.to_scale.mean + @stdev = @scaled_values.to_scale.standard_deviation_sample + @scaled_values = @scaled_values.collect {|vi| vi - @mean } + @scaled_values.collect! {|vi| vi / @stdev } unless @stdev == 0.0 + end + end + + # Principal Components Analysis + # Statsample Library (http://ruby-statsample.rubyforge.org/) by C. Bustos + class PCA + attr_accessor :data_matrix, :data_transformed_matrix, :eigenvector_matrix, :eigenvalue_sums, :autoscaler + + # Creates a transformed dataset as GSL::Matrix. + # @param [GSL::Matrix] Data matrix. + # @param [Float] Compression ratio from [0,1]. + # @return [GSL::Matrix] Data transformed matrix. + def initialize data_matrix, compression=0.05 + begin + @data_matrix = data_matrix + @compression = compression.to_f + @stdev = Array.new + @mean = Array.new + + # Objective Feature Selection + raise "Error! PCA needs at least two dimensions." if data_matrix.size2 < 2 + @data_matrix_selected = nil + (0..@data_matrix.size2-1).each { |i| + if !Algorithm::isnull_or_singular?(@data_matrix.col(i).to_a) + if @data_matrix_selected.nil? + @data_matrix_selected = GSL::Matrix.alloc(@data_matrix.size1, 1) + @data_matrix_selected.col(0)[0..@data_matrix.size1-1] = @data_matrix.col(i) + else + @data_matrix_selected = @data_matrix_selected.horzcat(GSL::Matrix.alloc(@data_matrix.col(i).to_a,@data_matrix.size1, 1)) + end + end + } + raise "Error! PCA needs at least two dimensions." if (@data_matrix_selected.nil? || @data_matrix_selected.size2 < 2) + + # Scaling of Axes + @data_matrix_scaled = GSL::Matrix.alloc(@data_matrix_selected.size1, @data_matrix_selected.size2) + (0..@data_matrix_selected.size2-1).each { |i| + @autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(@data_matrix_selected.col(i)) + @data_matrix_scaled.col(i)[0..@data_matrix.size1-1] = @autoscaler.scaled_values + @stdev << @autoscaler.stdev + @mean << @autoscaler.mean + } + + data_matrix_hash = Hash.new + (0..@data_matrix_scaled.size2-1).each { |i| + column_view = @data_matrix_scaled.col(i) + data_matrix_hash[i] = column_view.to_scale + } + dataset_hash = data_matrix_hash.to_dataset # see http://goo.gl/7XcW9 + cor_matrix=Statsample::Bivariate.correlation_matrix(dataset_hash) + pca=Statsample::Factor::PCA.new(cor_matrix) + pca.eigenvalues.each { |ev| raise "PCA failed!" unless !ev.nan? } + @eigenvalue_sums = Array.new + (0..dataset_hash.fields.size-1).each { |i| + @eigenvalue_sums << pca.eigenvalues[0..i].inject{ |sum, ev| sum + ev } + } + eigenvectors_selected = Array.new + pca.eigenvectors.each_with_index { |ev, i| + if (@eigenvalue_sums[i] <= ((1.0-@compression)*dataset_hash.fields.size)) || (eigenvectors_selected.size == 0) + eigenvectors_selected << ev.to_a + end + } + @eigenvector_matrix = GSL::Matrix.alloc(eigenvectors_selected.flatten, eigenvectors_selected.size, dataset_hash.fields.size).transpose + dataset_matrix = dataset_hash.to_gsl.transpose + @data_transformed_matrix = (@eigenvector_matrix.transpose * dataset_matrix).transpose + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + end + + # Restores data in the original feature space (possibly with compression loss). + # @return [GSL::Matrix] Data matrix. + def restore + begin + data_matrix_restored = (@eigenvector_matrix * @data_transformed_matrix.transpose).transpose # reverse pca + # reverse scaling + (0..data_matrix_restored.size2-1).each { |i| + data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] *= @stdev[i] unless @stdev[i] == 0.0 + data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] += @mean[i] + } + data_matrix_restored + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + end + end + + end + + end # Gauss kernel # @return [Float] @@ -556,6 +762,16 @@ module OpenTox d = 1.0 - x.to_f Math.exp(-(d*d)/(2*sigma*sigma)) end + + # For symbolic features + # @param [Array] Array to test, must indicate non-occurrence with 0. + # @return [Boolean] Whether the feature is singular or non-occurring or present everywhere. + def self.isnull_or_singular?(array) + nr_zeroes = array.count(0) + return (nr_zeroes == array.size) || # remove non-occurring feature + (nr_zeroes == array.size-1) || # remove singular feature + (nr_zeroes == 0) # also remove feature present everywhere + end # Median of an array # @param [Array] Array with values @@ -583,14 +799,13 @@ module OpenTox return sum end - # Minimum Frequency # @param [Integer] per-mil value # return [Integer] min-frequency def self.min_frequency(training_dataset,per_mil) - minfreq = per_mil*training_dataset.compounds.size/1000 # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST + minfreq = per_mil * training_dataset.compounds.size.to_f / 1000.0 # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST minfreq = 2 unless minfreq > 2 - minfreq + Integer (minfreq) end # Effect calculation for classification -- cgit v1.2.3 From a688cb99ace5cbfd8802951f57c46f1eb1926a0b Mon Sep 17 00:00:00 2001 From: dv Date: Tue, 19 Jul 2011 17:55:13 +0200 Subject: added p_sum_support --- lib/algorithm.rb | 59 ++++++++++++++++++++++++++++++++++++++++++++------------ lib/compound.rb | 4 ++-- lib/model.rb | 10 ++++------ 3 files changed, 53 insertions(+), 20 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 53e58eb..3170efb 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -153,20 +153,41 @@ module OpenTox # @param [Array] features_a Features of first compound # @param [Array] features_b Features of second compound # @param [optional, Hash] weights Weights for all features - # @param [optional, Hash] params Keys: `fingerprints:, compound:, nr_hits:` are required - + # @param [optional, Hash] params Keys: `:training_compound, :compound, :fingerprints, :nr_hits, :compound_features_hits` are required # @return [Float] (Weighted) tanimoto similarity def self.tanimoto(features_a,features_b,weights=nil,params=nil) common_features = features_a & features_b all_features = (features_a + features_b).uniq - common_p_sum = 0.0 + #LOGGER.debug "dv --------------- common: #{common_features}, all: #{all_features}" if common_features.size > 0 if weights - LOGGER.debug "dv --------------- common_features: #{common_features}, params_hits: #{params[:compound_hits]}" - common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f]*Algorithm.support(f,params))} - all_p_sum = 0.0 - LOGGER.debug "dv --------------- all_features: #{all_features}" - all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f]*Algorithm.support(f,params))} + if params[:nr_hits] == "true" + params[:weights] = weights + params[:mode] = "min" + params[:features] = common_features + common_p_sum = Algorithm.p_sum_support(params) + params[:mode] = "max" + params[:features] = all_features + all_p_sum = Algorithm.p_sum_support(params) + #common_p_sum = 0.0 + #common_features.each{|f| + # compound_hits = params[:compound_features_hits][f] + # neighbor_hits = Algorithm.support(f,params) + # common_p = weights[f] * [compound_hits, neighbor_hits].min + # common_p_sum += Algorithm.gauss(common_p) + #} +# all_p_sum = 0.0 + #all_features.each{|f| + # compound_hits = params[:compound_features_hits][f] + # neighbor_hits = Algorithm.support(f,params) + # all_p = weights[f] * [compound_hits, neighbor_hits].max + # all_p_sum += Algorithm.gauss(all_p) + #} + else + common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])}#*Algorithm.support(f,params))} + all_p_sum = 0.0 + all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])}#*Algorithm.support(f,params))} + end common_p_sum/all_p_sum else common_features.to_f/all_features @@ -824,12 +845,26 @@ module OpenTox # @param [Hash] params Keys: `fingerprints:, compound:, nr_hits:` are required # return [Numeric] Support value def self.support(feature,params) - LOGGER.debug "dv ------------- feature: #{feature}" - LOGGER.debug "dv ------------- compound #{params[:compound]}" - LOGGER.debug "dv ------------- feature value #{params[:fingerprints][params[:compound]][feature]}" - params[:fingerprints][params[:compound]][feature] + params[:fingerprints][params[:training_compound]][feature] end + # Returns Support value of an fingerprint + # @param [Hash] params Keys: `:weights, :fingerprints, :features, :compound, :nr_hits:, :mode` are required + # return [Numeric] Support value + def self.p_sum_support(params) + p_sum = 0.0 + params[:features].each{|f| + #LOGGER.debug "compound_features_hits: #{params[:compound_features_hits][f]}" + compound_hits = params[:compound_features_hits][f] + #LOGGER.debug "compound_hits: #{compound_hits}" + neighbor_hits = Algorithm.support(f,params) + #LOGGER.debug "neighbor_hits: #{neighbor_hits}" + p_sum += eval "(Algorithm.gauss(params[:weights][f]) * ([compound_hits, neighbor_hits].compact.#{params[:mode]}))" + #LOGGER.debug "p_sum: #{p_sum}" + } + p_sum + end + end end diff --git a/lib/compound.rb b/lib/compound.rb index 3ec321a..616db2c 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -178,9 +178,9 @@ module OpenTox obconversion.read_string(obmol,@inchi) smarts_pattern = OpenBabel::OBSmartsPattern.new smarts_hits = {} - LOGGER.debug "dv ----------- obmol #{Compound.new(@inchi).to_smiles}" + #LOGGER.debug "dv ----------- obmol #{Compound.new(@inchi).to_smiles}" smarts_array.collect do |smarts| - LOGGER.debug "dv ----------- all smarts #{smarts}" + #LOGGER.debug "dv ----------- all smarts #{smarts}" smarts_pattern.init(smarts) if smarts_pattern.match(obmol) hits = smarts_pattern.get_map_list diff --git a/lib/model.rb b/lib/model.rb index 4aefea3..f9f2685 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -310,14 +310,12 @@ module OpenTox # Adds a neighbor to @neighbors if it passes the similarity threshold. def add_neighbor(training_features, training_compound) - compound_match_hits = {} + compound_features_hits = {} if @nr_hits == "true" - compound_match_hits = @compound.match_hits(@compound_features) #OpenTox::Compound.new(training_compound).match_hits(@compound_features) - LOGGER.debug "dv ------------ training_compound: #{training_compound}" - LOGGER.debug "dv ------------ training_features: #{training_features}" - LOGGER.debug "dv ------------ compound_features: #{@compound_features}" + compound_features_hits = @compound.match_hits(@compound_features) #OpenTox::Compound.new(training_compound).match_hits(@compound_features) + LOGGER.debug "dv ------------ compound_features_hits: #{@compound_features_hits}" end - sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, ( { :compound => training_compound, :fingerprints => @fingerprints, :nr_hits => @nr_hits, :compound_hits => compound_match_hits } ) )") + sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, ( { :training_compound => training_compound, :compound => @compound.uri, :fingerprints => @fingerprints, :nr_hits => @nr_hits, :compound_features_hits => compound_features_hits } ) )") if sim > @min_sim @activities[training_compound].each do |act| @neighbors << { -- cgit v1.2.3 From de4d04a7f30e1c1743c14b81ee977fe5e750c5c8 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 20 Jul 2011 10:05:42 +0200 Subject: Fixed common_p_sum --- lib/algorithm.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 528c426..352538b 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -184,6 +184,7 @@ module OpenTox # all_p_sum += Algorithm.gauss(all_p) #} else + common_p_sum = 0.0 common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])}#*Algorithm.support(f,params))} all_p_sum = 0.0 all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])}#*Algorithm.support(f,params))} @@ -873,7 +874,7 @@ module OpenTox end # Returns Support value of an fingerprint - # @param [Hash] params Keys: `:weights, :fingerprints, :features, :compound, :nr_hits:, :mode` are required + # @param [Hash] params Keys: `:compound_features_hits, :weights, :fingerprints, :features, :compound, :nr_hits:, :mode` are required # return [Numeric] Support value def self.p_sum_support(params) p_sum = 0.0 -- cgit v1.2.3 From f938a796945c3b5f9bd29c6878facfc30aa1f926 Mon Sep 17 00:00:00 2001 From: dv Date: Wed, 20 Jul 2011 11:03:24 +0200 Subject: modified arguments in add_neighbor,removed comments --- lib/algorithm.rb | 22 ++-------------------- lib/model.rb | 9 ++++++++- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 352538b..5f3c328 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -169,25 +169,11 @@ module OpenTox params[:mode] = "max" params[:features] = all_features all_p_sum = Algorithm.p_sum_support(params) - #common_p_sum = 0.0 - #common_features.each{|f| - # compound_hits = params[:compound_features_hits][f] - # neighbor_hits = Algorithm.support(f,params) - # common_p = weights[f] * [compound_hits, neighbor_hits].min - # common_p_sum += Algorithm.gauss(common_p) - #} -# all_p_sum = 0.0 - #all_features.each{|f| - # compound_hits = params[:compound_features_hits][f] - # neighbor_hits = Algorithm.support(f,params) - # all_p = weights[f] * [compound_hits, neighbor_hits].max - # all_p_sum += Algorithm.gauss(all_p) - #} else common_p_sum = 0.0 - common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])}#*Algorithm.support(f,params))} + common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])} all_p_sum = 0.0 - all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])}#*Algorithm.support(f,params))} + all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])} end common_p_sum/all_p_sum else @@ -879,13 +865,9 @@ module OpenTox def self.p_sum_support(params) p_sum = 0.0 params[:features].each{|f| - #LOGGER.debug "compound_features_hits: #{params[:compound_features_hits][f]}" compound_hits = params[:compound_features_hits][f] - #LOGGER.debug "compound_hits: #{compound_hits}" neighbor_hits = Algorithm.support(f,params) - #LOGGER.debug "neighbor_hits: #{neighbor_hits}" p_sum += eval "(Algorithm.gauss(params[:weights][f]) * ([compound_hits, neighbor_hits].compact.#{params[:mode]}))" - #LOGGER.debug "p_sum: #{p_sum}" } p_sum end diff --git a/lib/model.rb b/lib/model.rb index f9f2685..4cbe95a 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -315,7 +315,14 @@ module OpenTox compound_features_hits = @compound.match_hits(@compound_features) #OpenTox::Compound.new(training_compound).match_hits(@compound_features) LOGGER.debug "dv ------------ compound_features_hits: #{@compound_features_hits}" end - sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, ( { :training_compound => training_compound, :compound => @compound.uri, :fingerprints => @fingerprints, :nr_hits => @nr_hits, :compound_features_hits => compound_features_hits } ) )") + params = {} + params[:training_compound] = training_compound + params[:compound] = @compound.uri #query compound + params[:fingerprints] = @fingerprints + params[:nr_hits] = nr_hits + params[:compound_features_hits] = compound_features_hits + + sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, params )") if sim > @min_sim @activities[training_compound].each do |act| @neighbors << { -- cgit v1.2.3 From 62930c5b40a1ed0e4f170d70c2284a004b3d0d55 Mon Sep 17 00:00:00 2001 From: dv Date: Wed, 20 Jul 2011 13:26:31 +0200 Subject: Fixed tanimoto without weights --- lib/algorithm.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 5f3c328..56ab94c 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -161,6 +161,7 @@ module OpenTox #LOGGER.debug "dv --------------- common: #{common_features}, all: #{all_features}" if common_features.size > 0 if weights + LOGGER.debug "nr_hits: #{params[:nr_hits]}" if params[:nr_hits] == "true" params[:weights] = weights params[:mode] = "min" @@ -175,9 +176,11 @@ module OpenTox all_p_sum = 0.0 all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])} end + LOGGER.debug "common_p_sum: #{common_p_sum}, all_p_sum: #{all_p_sum}, c/a: #{common_p_sum/all_p_sum}" common_p_sum/all_p_sum else - common_features.to_f/all_features + LOGGER.debug "common_features : #{common_features}, all_features: #{all_features}, c/a: #{(common_features.size/all_features.size).to_f}" + (common_features.size/all_features.size).to_f end else 0.0 -- cgit v1.2.3 From 6b322f37618139af79680941c8da0293cc803e6a Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 20 Jul 2011 15:19:42 +0200 Subject: Fixed nr_hits to bool --- lib/algorithm.rb | 14 +++++++------- lib/model.rb | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 56ab94c..9982995 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -161,8 +161,8 @@ module OpenTox #LOGGER.debug "dv --------------- common: #{common_features}, all: #{all_features}" if common_features.size > 0 if weights - LOGGER.debug "nr_hits: #{params[:nr_hits]}" - if params[:nr_hits] == "true" + #LOGGER.debug "nr_hits: #{params[:nr_hits]}" + if !params.nil? && params[:nr_hits] params[:weights] = weights params[:mode] = "min" params[:features] = common_features @@ -176,10 +176,10 @@ module OpenTox all_p_sum = 0.0 all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])} end - LOGGER.debug "common_p_sum: #{common_p_sum}, all_p_sum: #{all_p_sum}, c/a: #{common_p_sum/all_p_sum}" + #LOGGER.debug "common_p_sum: #{common_p_sum}, all_p_sum: #{all_p_sum}, c/a: #{common_p_sum/all_p_sum}" common_p_sum/all_p_sum else - LOGGER.debug "common_features : #{common_features}, all_features: #{all_features}, c/a: #{(common_features.size/all_features.size).to_f}" + #LOGGER.debug "common_features : #{common_features}, all_features: #{all_features}, c/a: #{(common_features.size/all_features.size).to_f}" (common_features.size/all_features.size).to_f end else @@ -225,7 +225,7 @@ module OpenTox sims = params[:neighbors].collect { |n| Algorithm.gauss(n[:similarity]) } LOGGER.debug "Local MLR (Propositionalization / GSL)." prediction = mlr( {:n_prop => props[0], :q_prop => props[1], :sims => sims, :acts => acts} ) - transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})" + transformer = eval("OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})") prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." params[:conf_stdev] = "false" if params[:conf_stdev].nil? @@ -334,7 +334,7 @@ module OpenTox acts = params[:neighbors].collect{ |n| n[:activity].to_f } sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } prediction = props.nil? ? local_svm(acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr") - transformer = eval "OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})" + transformer = eval("OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})") prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." params[:conf_stdev] = "false" if params[:conf_stdev].nil? @@ -870,7 +870,7 @@ module OpenTox params[:features].each{|f| compound_hits = params[:compound_features_hits][f] neighbor_hits = Algorithm.support(f,params) - p_sum += eval "(Algorithm.gauss(params[:weights][f]) * ([compound_hits, neighbor_hits].compact.#{params[:mode]}))" + p_sum += eval("(Algorithm.gauss(params[:weights][f]) * ([compound_hits, neighbor_hits].compact.#{params[:mode]}))") } p_sum end diff --git a/lib/model.rb b/lib/model.rb index 4cbe95a..d920f19 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -311,7 +311,7 @@ module OpenTox # Adds a neighbor to @neighbors if it passes the similarity threshold. def add_neighbor(training_features, training_compound) compound_features_hits = {} - if @nr_hits == "true" + if @nr_hits compound_features_hits = @compound.match_hits(@compound_features) #OpenTox::Compound.new(training_compound).match_hits(@compound_features) LOGGER.debug "dv ------------ compound_features_hits: #{@compound_features_hits}" end @@ -319,7 +319,7 @@ module OpenTox params[:training_compound] = training_compound params[:compound] = @compound.uri #query compound params[:fingerprints] = @fingerprints - params[:nr_hits] = nr_hits + params[:nr_hits] = @nr_hits params[:compound_features_hits] = compound_features_hits sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, params )") -- cgit v1.2.3 From b1e6b8aaeaed95797cd67e13567ac72344e89707 Mon Sep 17 00:00:00 2001 From: dv Date: Wed, 20 Jul 2011 17:19:33 +0200 Subject: Changed fingerprint as add_neightbor argument to training_compound_features_hits --- lib/algorithm.rb | 14 +++----------- lib/model.rb | 10 ++++++---- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 9982995..df010e1 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -153,7 +153,7 @@ module OpenTox # @param [Array] features_a Features of first compound # @param [Array] features_b Features of second compound # @param [optional, Hash] weights Weights for all features - # @param [optional, Hash] params Keys: `:training_compound, :compound, :fingerprints, :nr_hits, :compound_features_hits` are required + # @param [optional, Hash] params Keys: `:training_compound, :compound, :training_compound_features_hits, :nr_hits, :compound_features_hits` are required # @return [Float] (Weighted) tanimoto similarity def self.tanimoto(features_a,features_b,weights=nil,params=nil) common_features = features_a & features_b @@ -855,21 +855,13 @@ module OpenTox end # Returns Support value of an fingerprint - # @param [String] smiles of feature - # @param [Hash] params Keys: `fingerprints:, compound:, nr_hits:` are required - # return [Numeric] Support value - def self.support(feature,params) - params[:fingerprints][params[:training_compound]][feature] - end - - # Returns Support value of an fingerprint - # @param [Hash] params Keys: `:compound_features_hits, :weights, :fingerprints, :features, :compound, :nr_hits:, :mode` are required + # @param [Hash] params Keys: `:compound_features_hits, :weights, :training_compound_features_hits, :features, :nr_hits:, :mode` are required # return [Numeric] Support value def self.p_sum_support(params) p_sum = 0.0 params[:features].each{|f| compound_hits = params[:compound_features_hits][f] - neighbor_hits = Algorithm.support(f,params) + neighbor_hits = params[:training_compound_features_hits][f] p_sum += eval("(Algorithm.gauss(params[:weights][f]) * ([compound_hits, neighbor_hits].compact.#{params[:mode]}))") } p_sum diff --git a/lib/model.rb b/lib/model.rb index d920f19..a8b33c6 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -311,18 +311,20 @@ module OpenTox # Adds a neighbor to @neighbors if it passes the similarity threshold. def add_neighbor(training_features, training_compound) compound_features_hits = {} + training_compound_features_hits = {} if @nr_hits - compound_features_hits = @compound.match_hits(@compound_features) #OpenTox::Compound.new(training_compound).match_hits(@compound_features) - LOGGER.debug "dv ------------ compound_features_hits: #{@compound_features_hits}" + compound_features_hits = @compound.match_hits(@compound_features) + training_compound_features_hits = @fingerprints[training_compound] + #LOGGER.debug "dv ------------ training_compound_features_hits:#{training_compound_features_hits.class} #{training_compound_features_hits}" end params = {} params[:training_compound] = training_compound params[:compound] = @compound.uri #query compound - params[:fingerprints] = @fingerprints params[:nr_hits] = @nr_hits params[:compound_features_hits] = compound_features_hits + params[:training_compound_features_hits] = training_compound_features_hits - sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, params )") + sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, params)") if sim > @min_sim @activities[training_compound].each do |act| @neighbors << { -- cgit v1.2.3 From ce30df7393b3a8d57faa21b441ac0c57dd766691 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 21 Jul 2011 11:13:44 +0200 Subject: Minor improvements --- lib/algorithm.rb | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index df010e1..73f99ae 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -180,7 +180,7 @@ module OpenTox common_p_sum/all_p_sum else #LOGGER.debug "common_features : #{common_features}, all_features: #{all_features}, c/a: #{(common_features.size/all_features.size).to_f}" - (common_features.size/all_features.size).to_f + common_features.size.to_f/all_features.size.to_f end else 0.0 @@ -237,6 +237,10 @@ module OpenTox end + # Multi-linear regression weighted by similarity. + # Objective Feature Selection, Principal Components Analysis, Scaling of Axes. + # @param [Hash] params Keys `:n_prop, :q_prop, :sims, :acts` are required + # @return [Numeric] A prediction value. def self.mlr(params) # GSL matrix operations: @@ -290,8 +294,6 @@ module OpenTox confidence_sum = 0.0 confidence = 0.0 prediction = nil - positive_map_value= nil - negative_map_value= nil params[:neighbors].each do |neighbor| neighbor_weight = Algorithm.gauss(neighbor[:similarity]).to_f @@ -387,9 +389,20 @@ module OpenTox else # gram matrix (0..(neighbor_matches.length-1)).each do |i| + #neighbor_i_hits = params[:fingerprints][params[:neighbors][i]] + puts + puts params[:fingerprints][params[:neighbors][i]] + puts gram_matrix[i] = [] unless gram_matrix[i] # upper triangle ((i+1)..(neighbor_matches.length-1)).each do |j| + #neighbor_j_hits= params[:fingerprints][params[:neighbors][j]] + puts + puts params[:fingerprints][params[:neighbors][j]] + sim_params = {} + sim_params[:compound_features_hits] = neighbor_i_hits + sim_params[:training_compound_features_hits] = neighbor_j_hits + #sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values], sim_params)") sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") gram_matrix[i][j] = Algorithm.gauss(sim) gram_matrix[j] = [] unless gram_matrix[j] -- cgit v1.2.3 From 3733dbfadf1872ee63ff689bedc1681366474612 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 21 Jul 2011 11:55:59 +0200 Subject: Added nr_hits for regression gram matrix --- lib/algorithm.rb | 19 ++++++++----------- lib/model.rb | 2 -- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 73f99ae..c655c4b 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -389,21 +389,18 @@ module OpenTox else # gram matrix (0..(neighbor_matches.length-1)).each do |i| - #neighbor_i_hits = params[:fingerprints][params[:neighbors][i]] - puts - puts params[:fingerprints][params[:neighbors][i]] - puts + neighbor_i_hits = params[:fingerprints][params[:neighbors][i][:compound]] gram_matrix[i] = [] unless gram_matrix[i] # upper triangle ((i+1)..(neighbor_matches.length-1)).each do |j| - #neighbor_j_hits= params[:fingerprints][params[:neighbors][j]] - puts - puts params[:fingerprints][params[:neighbors][j]] + neighbor_j_hits= params[:fingerprints][params[:neighbors][j][:compound]] sim_params = {} - sim_params[:compound_features_hits] = neighbor_i_hits - sim_params[:training_compound_features_hits] = neighbor_j_hits - #sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values], sim_params)") - sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + if params[:nr_hits] + sim_params[:nr_hits] = true + sim_params[:compound_features_hits] = neighbor_i_hits + sim_params[:training_compound_features_hits] = neighbor_j_hits + end + sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values], sim_params)") gram_matrix[i][j] = Algorithm.gauss(sim) gram_matrix[j] = [] unless gram_matrix[j] gram_matrix[j][i] = gram_matrix[i][j] # lower triangle diff --git a/lib/model.rb b/lib/model.rb index a8b33c6..fe7f895 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -318,8 +318,6 @@ module OpenTox #LOGGER.debug "dv ------------ training_compound_features_hits:#{training_compound_features_hits.class} #{training_compound_features_hits}" end params = {} - params[:training_compound] = training_compound - params[:compound] = @compound.uri #query compound params[:nr_hits] = @nr_hits params[:compound_features_hits] = compound_features_hits params[:training_compound_features_hits] = training_compound_features_hits -- cgit v1.2.3 From aa3cde05ee8467a280dfc648a7b98e736704a770 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 21 Jul 2011 12:29:33 +0200 Subject: Enabled nr_hits for get_props --- lib/algorithm.rb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index c655c4b..199c4d8 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -564,7 +564,7 @@ module OpenTox row = [] params[:features].each do |f| if ! params[:fingerprints][n].nil? - row << (params[:fingerprints][n].include?(f) ? params[:p_values][f] : 0.0) + row << (params[:fingerprints][n].include?(f) ? (params[:p_values][f] * params[:fingerprints][n][f]) : 0.0) else row << 0.0 end @@ -573,7 +573,12 @@ module OpenTox end row = [] params[:features].each do |f| - row << (params[:compound].match([f]).size == 0 ? 0.0 : params[:p_values][f]) + if params[:nr_hits] + compound_feature_hits = params[:compound].match_hits([f]) + row << (compound_feature_hits.size == 0 ? 0.0 : (params[:p_values][f] * compound_feature_hits[f])) + else + row << (params[:compound].match([f]).size == 0 ? 0.0 : params[:p_values][f]) + end end rescue Exception => e LOGGER.debug "get_props failed with '" + $! + "'" -- cgit v1.2.3 From ba3e92cb0c64b5051aa5790f125797bb00eab74a Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 21 Jul 2011 12:48:08 +0200 Subject: Added zero-variance test --- lib/algorithm.rb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 199c4d8..22768cc 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -726,7 +726,7 @@ module OpenTox raise "Error! PCA needs at least two dimensions." if data_matrix.size2 < 2 @data_matrix_selected = nil (0..@data_matrix.size2-1).each { |i| - if !Algorithm::isnull_or_singular?(@data_matrix.col(i).to_a) + if !Algorithm::zero_variance?(@data_matrix.col(i).to_a) if @data_matrix_selected.nil? @data_matrix_selected = GSL::Matrix.alloc(@data_matrix.size1, 1) @data_matrix_selected.col(0)[0..@data_matrix.size1-1] = @data_matrix.col(i) @@ -811,6 +811,13 @@ module OpenTox (nr_zeroes == array.size-1) || # remove singular feature (nr_zeroes == 0) # also remove feature present everywhere end + + # For symbolic features + # @param [Array] Array to test, must indicate non-occurrence with 0. + # @return [Boolean] Whether the feature has variance zero. + def self.zero_variance?(array) + return (array.to_scale.variance_sample == 0.0) + end # Median of an array # @param [Array] Array with values -- cgit v1.2.3 From cd536b197b7668f7be37f8b7340aa3f9e8c417b9 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 21 Jul 2011 12:48:48 +0200 Subject: removed debug --- lib/compound.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compound.rb b/lib/compound.rb index 616db2c..e7b4da0 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -187,7 +187,7 @@ module OpenTox smarts_hits[smarts] = hits.size end end - LOGGER.debug "dv ----------- smarts => hits #{smarts_hits}" + #LOGGER.debug "dv ----------- smarts => hits #{smarts_hits}" return smarts_hits #smarts_array.collect { |s| s if match?(s)}.compact end -- cgit v1.2.3 From d6d02b31f3785d39f0c01053d632eee217c9dcee Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 21 Jul 2011 14:10:59 +0200 Subject: Conversion to SDF --- lib/dataset.rb | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/dataset.rb b/lib/dataset.rb index f13c0d3..3573633 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -230,6 +230,14 @@ module OpenTox s.to_rdfxml end + # Get SDF representation of compounds + # @return [String] SDF representation + def to_sdf + @compounds.inject { |sum, c| + sum + c.to_sdf + } + end + # Get name (DC.title) of a feature # @param [String] feature Feature URI # @return [String] Feture title -- cgit v1.2.3 From aa9e6cba5494f2771ade29850aaba7ea854caaf0 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 21 Jul 2011 14:22:02 +0200 Subject: Fixed conversion to Mol object --- lib/dataset.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index 3573633..1d4ad55 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -234,7 +234,7 @@ module OpenTox # @return [String] SDF representation def to_sdf @compounds.inject { |sum, c| - sum + c.to_sdf + sum + OpenTox::Compound.new(c).to_sdf } end -- cgit v1.2.3 From c9dc0a0be749773a65d2c7f05c0b024e06823a7c Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 21 Jul 2011 16:02:41 +0200 Subject: Added conf_stdev switch --- lib/algorithm.rb | 6 +++--- lib/model.rb | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 22768cc..4a34337 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -228,7 +228,7 @@ module OpenTox transformer = eval("OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})") prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." - params[:conf_stdev] = "false" if params[:conf_stdev].nil? + params[:conf_stdev] = false if params[:conf_stdev].nil? confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) {:prediction => prediction, :confidence => confidence} rescue Exception => e @@ -339,7 +339,7 @@ module OpenTox transformer = eval("OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})") prediction = transformer.values[0] LOGGER.debug "Prediction is: '" + prediction.to_s + "'." - params[:conf_stdev] = "false" if params[:conf_stdev].nil? + params[:conf_stdev] = false if params[:conf_stdev].nil? confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) {:prediction => prediction, :confidence => confidence} rescue Exception => e @@ -517,7 +517,7 @@ module OpenTox # @param[Hash] Required keys: :sims, :acts, :neighbors, :conf_stdev # @return[Float] Confidence def self.get_confidence(params) - if params[:conf_stdev] == "true" + if params[:conf_stdev] sim_median = Algorithm.median(params[:sims]) if sim_median.nil? confidence = nil diff --git a/lib/model.rb b/lib/model.rb index fe7f895..0f1cc22 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -91,7 +91,7 @@ module OpenTox include Algorithm include Model - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :nr_hits, :transform + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :nr_hits, :transform, :conf_stdev def initialize(uri=nil) @@ -118,6 +118,7 @@ module OpenTox @min_sim = 0.3 @prop_kernel = false @transform = { "class" => "NOP" } + @conf_stdev = false end @@ -223,6 +224,7 @@ module OpenTox :prop_kernel => @prop_kernel, :value_map => @value_map, :nr_hits => @nr_hits, + :conf_stdev => @conf_stdev, :transform => @transform } ) ") value_feature_uri = File.join( @uri, "predicted", "value") -- cgit v1.2.3 From ade2d5104dc3e3b7e3bd4cae9c18bef49b78b4eb Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 22 Jul 2011 16:14:05 +0200 Subject: Fixed SVM algorithms. --- lib/algorithm.rb | 51 +++++++++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 4a34337..5093c34 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -218,8 +218,10 @@ module OpenTox # @return [Numeric] A prediction value. def self.local_mlr_prop(params) - raise "No neighbors found." unless params[:neighbors].size>0 - begin + confidence=0.0 + prediction=nil + + if params[:neighbors].size>0 props = params[:prop_kernel] ? get_props(params) : nil acts = params[:neighbors].collect { |n| act = n[:activity].to_f } sims = params[:neighbors].collect { |n| Algorithm.gauss(n[:similarity]) } @@ -230,10 +232,8 @@ module OpenTox LOGGER.debug "Prediction is: '" + prediction.to_s + "'." params[:conf_stdev] = false if params[:conf_stdev].nil? confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) - {:prediction => prediction, :confidence => confidence} - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message}" end + {:prediction => prediction, :confidence => confidence} end @@ -320,8 +320,9 @@ module OpenTox else prediction = (neighbor_contribution/confidence_sum).round unless params[:neighbors].size==0 # AM: new multinomial prediction end - + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." unless prediction.nil? confidence = confidence_sum/params[:neighbors].size if params[:neighbors].size > 0 + LOGGER.debug "Confidence is: '" + confidence.to_s + "'." unless prediction.nil? return {:prediction => prediction, :confidence => confidence.abs} end @@ -330,8 +331,9 @@ module OpenTox # @return [Numeric] A prediction value. def self.local_svm_regression(params) - raise "No neighbors found." unless params[:neighbors].size>0 - begin + confidence = 0.0 + prediction = nil + if params[:neighbors].size>0 props = params[:prop_kernel] ? get_props(params) : nil acts = params[:neighbors].collect{ |n| n[:activity].to_f } sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } @@ -341,11 +343,8 @@ module OpenTox LOGGER.debug "Prediction is: '" + prediction.to_s + "'." params[:conf_stdev] = false if params[:conf_stdev].nil? confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) - {:prediction => prediction, :confidence => confidence} - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message}" - LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end + {:prediction => prediction, :confidence => confidence} end @@ -354,20 +353,18 @@ module OpenTox # @return [Numeric] A prediction value. def self.local_svm_classification(params) - raise "No neighbors found." unless params[:neighbors].size>0 - begin + confidence = 0.0 + prediction = nil + if params[:neighbors].size>0 props = params[:prop_kernel] ? get_props(params) : nil acts = params[:neighbors].collect { |n| act = n[:activity] } sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors prediction = props.nil? ? local_svm(acts, sims, "C-bsvc", params) : local_svm_prop(props, acts, "C-bsvc") LOGGER.debug "Prediction is: '" + prediction.to_s + "'." - conf = sims.inject{|sum,x| sum + x } - confidence = conf/params[:neighbors].size if params[:neighbors].size > 0 - {:prediction => prediction, :confidence => confidence} - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message}" - LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + params[:conf_stdev] = false if params[:conf_stdev].nil? + confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) end + {:prediction => prediction, :confidence => confidence} end @@ -384,8 +381,10 @@ module OpenTox LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." neighbor_matches = params[:neighbors].collect{ |n| n[:features] } # URIs of matches gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel - if neighbor_matches.size == 0 - raise "No neighbors found." + + prediction = nil + if acts.to_scale.variance_population == 0 + prediction = acts[0] else # gram matrix (0..(neighbor_matches.length-1)).each do |i| @@ -408,6 +407,7 @@ module OpenTox gram_matrix[i][i] = 1.0 end + #LOGGER.debug gram_matrix.to_yaml @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed @@ -466,10 +466,9 @@ module OpenTox n_prop = props[0] # is a matrix, i.e. two nested Arrays. q_prop = props[1] # is an Array. - #neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches - #gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel - if n_prop.size == 0 - raise "No neighbors found." + prediction = nil + if acts.to_scale.variance_population == 0 + prediction = acts[0] else #LOGGER.debug gram_matrix.to_yaml @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests -- cgit v1.2.3 From b1b51fcef2a0640f46cdfd5c4ca116bc659c86b9 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 27 Jul 2011 15:26:36 +0200 Subject: Fixed SDF --- lib/dataset.rb | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index 1d4ad55..d7a8e47 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -233,8 +233,18 @@ module OpenTox # Get SDF representation of compounds # @return [String] SDF representation def to_sdf + sum="" + @compounds.each{ |c| + sum << OpenTox::Compound.new(c).to_inchi + sum << OpenTox::Compound.new(c).to_sdf + } + sum + end + + def to_urilist @compounds.inject { |sum, c| - sum + OpenTox::Compound.new(c).to_sdf + sum << OpenTox::Compound.new(c).uri + sum + "\n" } end -- cgit v1.2.3 From 545af9b1376ed56aa75be709f8d7a7f4c2a5fd5e Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 27 Jul 2011 17:02:51 +0200 Subject: Fixed statistics to use statsample --- lib/algorithm.rb | 45 +++++---------------------------------------- 1 file changed, 5 insertions(+), 40 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 5093c34..a0ad9a5 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -383,7 +383,7 @@ module OpenTox gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel prediction = nil - if acts.to_scale.variance_population == 0 + if Algorithm::zero_variance? acts prediction = acts[0] else # gram matrix @@ -467,7 +467,7 @@ module OpenTox q_prop = props[1] # is an Array. prediction = nil - if acts.to_scale.variance_population == 0 + if Algorithm::zero_variance? acts prediction = acts[0] else #LOGGER.debug gram_matrix.to_yaml @@ -517,11 +517,11 @@ module OpenTox # @return[Float] Confidence def self.get_confidence(params) if params[:conf_stdev] - sim_median = Algorithm.median(params[:sims]) + sim_median = params[:sims].to_scale.median if sim_median.nil? confidence = nil else - standard_deviation = params[:acts].std_dev + standard_deviation = params[:acts].to_scale.standard_deviation_sample confidence = (sim_median*Math.exp(-1*standard_deviation)).abs if confidence.nan? confidence = nil @@ -818,23 +818,6 @@ module OpenTox return (array.to_scale.variance_sample == 0.0) end - # Median of an array - # @param [Array] Array with values - # @return [Float] Median - def self.median(array) - return nil if array.empty? - array.sort! - m_pos = array.size / 2 - return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos])/2 - end - - # Sum of an array for Numeric values - # @param [Array] Array with values - # @return [Integer] Sum of values - def self.sum(array) - array.inject{|s,x| s + x } - end - # Sum of an array for Arrays. # @param [Array] Array with values # @return [Integer] Sum of size of values @@ -860,7 +843,7 @@ module OpenTox max=0 max_value=0 nr_o = self.sum_size(occurrences) - nr_db = self.sum(db_instances) + nr_db = db_instances.to_scale.sum occurrences.each_with_index { |o,i| # fminer outputs occurrences sorted reverse by activity. actual = o.size.to_f/nr_o @@ -888,24 +871,6 @@ module OpenTox p_sum end - # Adds variance, mean and standard deviation calculation to Array class - module Variance - def sum(&blk) - map(&blk).inject { |sum, element| sum + element } - end - def mean - (sum.to_f / size.to_f) - end - def variance - m = mean - sum { |i| ( i - m )**2 } / (size-1).to_f - end - def std_dev - Math.sqrt(variance) - end - end - Array.send :include, Variance - end end -- cgit v1.2.3 From 4cacbe3d45acaed3f7b4edf11b4115df55c84829 Mon Sep 17 00:00:00 2001 From: dv Date: Wed, 27 Jul 2011 18:47:00 +0200 Subject: Added feature title in result dataset (for ToxPredict) --- lib/model.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/model.rb b/lib/model.rb index 0f1cc22..1ff9df1 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -239,6 +239,8 @@ module OpenTox @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction] end @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence] + @prediction_dataset.features[value_feature_uri][DC.title] = @prediction_dataset.metadata[DC.title] + @prediction_dataset.features[confidence_feature_uri][DC.title] = "Confidence" if verbose if @feature_calculation_algorithm == "Substructure.match" -- cgit v1.2.3 From 9bc59a8715e5b12bb989ba3ed2856630f0436b2b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 28 Jul 2011 12:12:51 +0000 Subject: initial sdf parser --- lib/dataset.rb | 7 +++ lib/parser.rb | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 174 insertions(+), 1 deletion(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index 1d4ad55..5d5d13d 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -102,6 +102,13 @@ module OpenTox copy parser.load_uri(subjectid) end + def load_sdf(sdf,subjectid=nil) + save(subjectid) unless @uri # get a uri for creating features + parser = Parser::Sdf.new + parser.dataset = self + parser.load_sdf(sdf) + end + # Load CSV string (format specification: http://toxcreate.org/help) # - loads data_entries, compounds, features # - sets metadata (warnings) for parser errors diff --git a/lib/parser.rb b/lib/parser.rb index 07bee67..c9de1ed 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -350,7 +350,6 @@ module OpenTox @dataset end - private def warnings @@ -454,5 +453,172 @@ module OpenTox end end + + class Table + + attr_accessor :data, :features, :compounds + + def initialize + @data = {} + @activity_errors = [] + end + + def feature_values(feature) + @data.collect{|c, row| row[feature]}.uniq.compact + end + + def feature_types(feature) + @data.collect{|c, row| feature_type(row[feature])}.uniq.compact + end + + def features + @data.collect{|c,row| row.keys}.flatten.uniq + end + + def clean_features + ignored_features = [] + features.each do |feature| + if feature_values(feature).size > 5 + if feature_types(feature).size == 1 and feature_types(feature).first == OT.NumericFeature + # REGRESSION + elsif feature_types(feature).include? OT.NumericFeature + @data.each{|c,row| row[feature] = nil unless numeric?(row[feature]) } # delete nominal features + @activity_errors << "Nominal feature values of #{feature} ignored (using numeric features for regression models)." + else + @activity_errors << "Feature #{feature} ignored (more than 5 nominal feature values and no numeric values)." + ignored_features << feature + next + end + elsif feature_values(feature).size <= 1 + @activity_errors << "Feature #{feature} ignored (less than 2 feature values)." + ignored_features << feature + else + # CLASSIFICATION + end + end + ignored_features.each do |feature| + @data.each{ |c,row| row.delete feature } + end + @activity_errors + end + + def add_to_dataset(dataset) + features.each do |feature_name| + feature_uri = File.join(dataset.uri,"feature",URI.encode(feature_name)) + dataset.add_feature(feature_uri,{DC.title => feature_name}) + end + + @data.each do |compound,row| + row.each do |feature,value| + if numeric?(value) + value = value.to_f + elsif value.nil? or value.empty? + value = nil + else + value = value.to_s + end + feature_uri = File.join(dataset.uri,"feature",URI.encode(feature)) + dataset.add(compound, feature_uri, value) + if feature_types(feature).include? OT.NumericFeature + dataset.features[feature_uri][RDF.type] = OT.NumericFeature + else + dataset.features[feature_uri][RDF.type] = OT.NominalFeature + dataset.features[feature_uri][OT.acceptValue] = feature_values(feature) + end + end + end + end + + private + def numeric?(value) + true if Float(value) rescue false + end + + def feature_type(value) + if numeric? value + return OT.NumericFeature + else + return OT.NominalFeature + end + end + end + + # quick hack to enable sdf import via csv + # should be refactored + class Sdf + + attr_accessor :dataset + + def initialize + @data = {} + + #@format_errors = "" + @compound_errors = [] + @activity_errors = [] + @duplicates = {} + end + + def load_sdf(sdf) + + obconversion = OpenBabel::OBConversion.new + obmol = OpenBabel::OBMol.new + obconversion.set_in_and_out_formats "sdf", "inchi" + + table = Table.new + + properties = [] + sdf.each_line { |l| properties << l.to_s if l.match(//,'').strip.chomp } + + LOGGER.debug "SDF import" + rec = 0 + sdf.split(/\$\$\$\$\r*\n/).each do |s| + rec += 1 + obconversion.read_string obmol, s + begin + inchi = obconversion.write_string(obmol).gsub(/\s/,'').chomp + @duplicates[inchi] = [] unless @duplicates[inchi] + @duplicates[inchi] << rec #inchi#+", "+row.join(", ") + compound = Compound.from_inchi inchi + rescue + @compound_errors << "Could not convert structure to InChI, all entries for this compound (record #{rec} have been ignored! \n#{s}" + next + end + row = {} + obmol.get_data.each { |d| row[d.get_attribute] = d.get_value if properties.include?(d.get_attribute) } + table.data[compound.uri] = row + end + + LOGGER.debug "Clean table" + #File.open("/home/ch/tmp_all.yaml","w+"){|f| f.puts table.to_yaml} + # REOVE ignored_features + @activity_errors = table.clean_features + #File.open("/home/ch/tmp.yaml","w+"){|f| f.puts table.to_yaml} + LOGGER.debug "Dataset insert" + table.add_to_dataset @dataset + + warnings + @dataset + + end + + private + + def warnings + + warnings = '' + warnings += "

Incorrect Smiles structures (ignored):

" + @compound_errors.join("
") unless @compound_errors.empty? + warnings += "

Irregular activities (ignored):

" + @activity_errors.join("
") unless @activity_errors.empty? + duplicate_warnings = '' + @duplicates.each {|inchi,lines| duplicate_warnings << "

#{lines.join('
')}

" if lines.size > 1 } + warnings += "

Duplicated structures (all structures/activities used for model building, please make sure, that the results were obtained from independent experiments):

" + duplicate_warnings unless duplicate_warnings.empty? + + @dataset.metadata[OT.Warnings] = warnings + + end + + end end end -- cgit v1.2.3 From 49943ba879a40f2039eae710cb9e0ad8c2ffb04a Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 28 Jul 2011 15:44:55 +0200 Subject: Generalized routines 'predicted_variables' and 'from_rdf' --- lib/model.rb | 35 +++++++++++++++++++++++------------ lib/parser.rb | 31 +++++++++++++++++++++---------- 2 files changed, 44 insertions(+), 22 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index 0f1cc22..2c62f40 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -50,38 +50,49 @@ module OpenTox @predicted_variable end + def predicted_variables( subjectid ) + load_predicted_variables( subjectid, false ) unless @predicted_variables + @predicted_variables + end + def predicted_confidence( subjectid ) load_predicted_variables( subjectid ) unless @predicted_confidence @predicted_confidence end private - def load_predicted_variables( subjectid=nil ) + def load_predicted_variables( subjectid=nil, use_confidence=true ) load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri) if @metadata[OT.predictedVariables] predictedVariables = @metadata[OT.predictedVariables] if predictedVariables.is_a?(Array) if (predictedVariables.size==1) @predicted_variable = predictedVariables[0] - elsif (predictedVariables.size==2) + elsif (predictedVariables.size>=2) # PENDING identify confidence - conf_index = -1 - predictedVariables.size.times do |i| - f = OpenTox::Feature.find(predictedVariables[i]) - conf_index = i if f.metadata[DC.title]=~/(?i)confidence/ + if use_confidence + conf_index = -1 + predictedVariables.size.times do |i| + f = OpenTox::Feature.find(predictedVariables[i]) + conf_index = i if f.metadata[DC.title]=~/(?i)confidence/ + end + raise "could not estimate predicted variable from model: '"+uri.to_s+ + "', number of predicted-variables==2, but no confidence found" if conf_index==-1 + end + if (predictedVariables.size==2) && use_confidence + @predicted_variable = predictedVariables[1-conf_index] + @predicted_confidence = predictedVariables[conf_index] + else + @predicted_variables = predictedVariables end - raise "could not estimate predicted variable from model: '"+uri.to_s+ - "', number of predicted-variables==2, but no confidence found" if conf_index==-1 - @predicted_variable = predictedVariables[1-conf_index] - @predicted_confidence = predictedVariables[conf_index] else - raise "could not estimate predicted variable from model: '"+uri.to_s+"', number of predicted-variables > 2" + raise "could not estimate predicted variable from model: '"+uri.to_s+"', number of predicted-variables == 0" end else raise "could not estimate predicted variable from model: '"+uri.to_s+"', predicted-variables is no array" end end - raise "could not estimate predicted variable from model: '"+uri.to_s+"'" unless @predicted_variable + raise "could not estimate predicted variable from model: '"+uri.to_s+"'" unless (@predicted_variable || @predicted_variables) end end diff --git a/lib/parser.rb b/lib/parser.rb index 07bee67..a1678ea 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -86,7 +86,11 @@ module OpenTox # @param [String] rdf # @param [String] type of the info (e.g. OT.Task, OT.ErrorReport) needed to get the subject-uri # @return [Owl] with uri and metadata set - def self.from_rdf( rdf, type ) + def self.from_rdf( rdf, type, allow_multiple = false ) + + uris = Array.new + owls = Array.new + # write to file and read convert with rapper into tripples file = Tempfile.new("ot-rdfxml") file.puts rdf @@ -99,20 +103,27 @@ module OpenTox triples.each_line do |line| triple = line.to_triple if triple[1] == RDF['type'] and triple[2]==type - raise "uri already set, two uris found with type: "+type.to_s if uri + if !allow_multiple + raise "uri already set, two uris found with type: "+type.to_s if uri + end uri = triple[0] + uris << uri end end File.delete(file.path) + # load metadata - metadata = {} - triples.each_line do |line| - triple = line.to_triple - metadata[triple[1]] = triple[2].split('^^').first if triple[0] == uri and triple[1] != RDF['type'] - end - owl = Owl::Generic.new(uri) - owl.metadata = metadata - owl + uris.each { |uri| + metadata = {} + triples.each_line do |line| + triple = line.to_triple + metadata[triple[1]] = triple[2].split('^^').first if triple[0] == uri and triple[1] != RDF['type'] + end + owl = Owl::Generic.new(uri) + owl.metadata = metadata + owls << owl + } + allow_multiple ? owls : owls[0] end # Generic parser for all OpenTox classes -- cgit v1.2.3 From 1148087a71ac023a6758c74325ad364d7cda7dbe Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 28 Jul 2011 15:46:39 +0000 Subject: sdf acceptValues fixed --- lib/parser.rb | 52 +++++++++++++++++++++------------------------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/lib/parser.rb b/lib/parser.rb index c9de1ed..8fa5847 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -509,21 +509,25 @@ module OpenTox end @data.each do |compound,row| - row.each do |feature,value| - if numeric?(value) - value = value.to_f - elsif value.nil? or value.empty? - value = nil - else - value = value.to_s - end - feature_uri = File.join(dataset.uri,"feature",URI.encode(feature)) - dataset.add(compound, feature_uri, value) - if feature_types(feature).include? OT.NumericFeature - dataset.features[feature_uri][RDF.type] = OT.NumericFeature - else - dataset.features[feature_uri][RDF.type] = OT.NominalFeature - dataset.features[feature_uri][OT.acceptValue] = feature_values(feature) + unless row.empty? + row.each do |feature,value| + if numeric?(value) + value = value.to_f + elsif value.nil? or value.empty? + value = nil + else + value = value.to_s + end + feature_uri = File.join(dataset.uri,"feature",URI.encode(feature)) + dataset.add(compound, feature_uri, value) + #dataset.features[feature_uri][RDF.type] = feature_types(feature) + #dataset.features[feature_uri][OT.acceptValue] = feature_values(feature) + if feature_types(feature).include? OT.NumericFeature + dataset.features[feature_uri][RDF.type] = [OT.NumericFeature] + else + dataset.features[feature_uri][RDF.type] = [OT.NominalFeature] + dataset.features[feature_uri][OT.acceptValue] = feature_values(feature) + end end end end @@ -552,7 +556,6 @@ module OpenTox def initialize @data = {} - #@format_errors = "" @compound_errors = [] @activity_errors = [] @duplicates = {} @@ -572,7 +575,6 @@ module OpenTox properties.sort! properties.collect!{ |p| p.gsub(/<|>/,'').strip.chomp } - LOGGER.debug "SDF import" rec = 0 sdf.split(/\$\$\$\$\r*\n/).each do |s| rec += 1 @@ -591,23 +593,10 @@ module OpenTox table.data[compound.uri] = row end - LOGGER.debug "Clean table" - #File.open("/home/ch/tmp_all.yaml","w+"){|f| f.puts table.to_yaml} - # REOVE ignored_features + # finda and remove ignored_features @activity_errors = table.clean_features - #File.open("/home/ch/tmp.yaml","w+"){|f| f.puts table.to_yaml} - LOGGER.debug "Dataset insert" table.add_to_dataset @dataset - warnings - @dataset - - end - - private - - def warnings - warnings = '' warnings += "

Incorrect Smiles structures (ignored):

" + @compound_errors.join("
") unless @compound_errors.empty? warnings += "

Irregular activities (ignored):

" + @activity_errors.join("
") unless @activity_errors.empty? @@ -616,6 +605,7 @@ module OpenTox warnings += "

Duplicated structures (all structures/activities used for model building, please make sure, that the results were obtained from independent experiments):

" + duplicate_warnings unless duplicate_warnings.empty? @dataset.metadata[OT.Warnings] = warnings + @dataset end -- cgit v1.2.3 From fa37ab0876faaaa2acf37b147924f025a0d8cd9a Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 29 Jul 2011 11:47:17 +0200 Subject: Added TUM clustering --- lib/algorithm.rb | 108 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/parser.rb | 15 +++----- 2 files changed, 112 insertions(+), 11 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index a0ad9a5..3cf4ecf 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -4,6 +4,7 @@ R = nil require "rinruby" require "statsample" +require 'uri' module OpenTox @@ -210,6 +211,106 @@ module OpenTox end end + # Structural Graph Clustering by TU Munich + # Finds clusters similar to a query structure in a given training dataset + # May be queried for cluster membership of an unknown compound + class StructuralClustering + attr_accessor :training_dataset_uri, :training_threshold, :query_dataset_uri, :query_threshold, :target_clusters_array + + # @params[String] Training dataset_uri + # @params[Float] Similarity threshold for training (optional) + # @params[String] Cluster service uri (no AA) + def initialize training_dataset_uri, training_threshold=0.8, cluster_service_uri = "http://opentox-dev.informatik.tu-muenchen.de:8080/OpenTox/algorithm/StructuralClustering" + + if (training_dataset_uri =~ URI::regexp).nil? || (cluster_service_uri =~ URI::regexp).nil? + raise "Invalid URI." + end + @training_dataset_uri = training_dataset_uri + if !OpenTox::Algorithm.numeric? training_threshold || training_threshold <0 || training_threshold >1 + raise "Training threshold out of bounds." + end + @training_threshold = training_threshold.to_f + + # Train a cluster model + params = {:dataset_uri => @training_dataset_uri, :threshold => @training_threshold } + @cluster_model_uri = OpenTox::RestClientWrapper.post cluster_service_uri, params + cluster_model_rdf = OpenTox::RestClientWrapper.get @cluster_model_uri + @datasets = OpenTox::Parser::Owl.from_rdf cluster_model_rdf, OT.Dataset, true # must extract OT.Datasets from model + + # Process parsed OWL objects + @clusterid_dataset_map = Hash.new + @datasets.each { |d| + begin + d.metadata[OT.hasSource]["Structural Clustering cluster "] = "" # must parse in metadata for string (not elegant) + @clusterid_dataset_map[d.metadata[OT.hasSource].to_i] = d.uri + rescue Exception => e + # ignore other entries! + end + } + end + + # Whether a model has been trained + def trained? + !@cluster_model_uri.nil? + end + + # Instance query: clusters for a compound + # @params[String] Query compound + # @params[Float] Similarity threshold for query to clusters (optional) + def get_clusters query_compound_uri, query_threshold = 0.5 + + if !OpenTox::Algorithm.numeric? query_threshold || query_threshold <0 || query_threshold >1 + raise "Query threshold out of bounds." + end + @query_threshold = query_threshold.to_f + + + # Preparing a query dataset + query_dataset = OpenTox::Dataset.new + @query_dataset_uri = query_dataset.save + query_dataset = OpenTox::Dataset.find @query_dataset_uri + query_dataset.add_compound query_compound_uri + @query_dataset_uri = query_dataset.save + + # Obtaining a clustering for query compound + params = { :dataset_uri => @query_dataset_uri, :threshold => @query_threshold } + cluster_query_dataset_uri = OpenTox::RestClientWrapper.post @cluster_model_uri, params + cluster_query_dataset = OpenTox::Dataset.new cluster_query_dataset_uri + cluster_query_dataset.load_all + + # Reading cluster ids for features from metadata + feature_clusterid_map = Hash.new + pattern="Prediction feature for cluster assignment " # must parse for string in metadata (not elegant) + cluster_query_dataset.features.each { |feature_uri,metadata| + metadata[DC.title][pattern]="" + feature_clusterid_map[feature_uri] = metadata[DC.title].to_i + } + + # Integrity check + unless cluster_query_dataset.compounds.size == 1 + raise "Number of predicted compounds is != 1." + end + + # Process data entry + query_compound_uri = cluster_query_dataset.compounds[0] + @target_clusters_array = Array.new + cluster_query_dataset.features.keys.each { |cluster_membership_feature| + + # Getting dataset URI for cluster + target_cluster = feature_clusterid_map[cluster_membership_feature] + dataset = @clusterid_dataset_map[target_cluster] + + # Finally look up presence + data_entry = cluster_query_dataset.data_entries[query_compound_uri] + present = data_entry[cluster_membership_feature][0] + + # Store result + @target_clusters_array << dataset if present > 0.5 # 0.0 for absence, 1.0 for presence + } + end + + end + module Neighbors # Local multi-linear regression (MLR) prediction from neighbors. @@ -811,6 +912,13 @@ module OpenTox (nr_zeroes == 0) # also remove feature present everywhere end + # Numeric value test + # @param[Object] value + # @return [Boolean] Whether value is a number + def self.numeric?(value) + true if Float(value) rescue false + end + # For symbolic features # @param [Array] Array to test, must indicate non-occurrence with 0. # @return [Boolean] Whether the feature has variance zero. diff --git a/lib/parser.rb b/lib/parser.rb index 4ee4a22..d0975af 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -447,12 +447,8 @@ module OpenTox end end - def numeric?(value) - true if Float(value) rescue false - end - def feature_type(value) - if numeric? value + if OpenTox::Algorithm::numeric? value return OT.NumericFeature else return OT.NominalFeature @@ -493,7 +489,7 @@ module OpenTox if feature_types(feature).size == 1 and feature_types(feature).first == OT.NumericFeature # REGRESSION elsif feature_types(feature).include? OT.NumericFeature - @data.each{|c,row| row[feature] = nil unless numeric?(row[feature]) } # delete nominal features + @data.each{|c,row| row[feature] = nil unless OpenTox::Algorithm::numeric?(row[feature]) } # delete nominal features @activity_errors << "Nominal feature values of #{feature} ignored (using numeric features for regression models)." else @activity_errors << "Feature #{feature} ignored (more than 5 nominal feature values and no numeric values)." @@ -522,7 +518,7 @@ module OpenTox @data.each do |compound,row| unless row.empty? row.each do |feature,value| - if numeric?(value) + if OpenTox::Algorithm::numeric?(value) value = value.to_f elsif value.nil? or value.empty? value = nil @@ -545,12 +541,9 @@ module OpenTox end private - def numeric?(value) - true if Float(value) rescue false - end def feature_type(value) - if numeric? value + if OpenTox::Algorithm::numeric? value return OT.NumericFeature else return OT.NominalFeature -- cgit v1.2.3 From c5c944894def5f96307c042b4048eba963e7b86a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 29 Jul 2011 12:10:47 +0000 Subject: SDF export with data items --- lib/dataset.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index 05335dd..5ebad0f 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -243,7 +243,13 @@ module OpenTox sum="" @compounds.each{ |c| sum << OpenTox::Compound.new(c).to_inchi - sum << OpenTox::Compound.new(c).to_sdf + sum << OpenTox::Compound.new(c).to_sdf.sub(/\n\$\$\$\$/,'') + @data_entries[c].each{ |f,v| + sum << "> <\"#{f}\">\n" + sum << v.join(", ") + sum << "\n\n" + } + sum << "$$$$\n" } sum end -- cgit v1.2.3 From 84fde83d9fe568a2bb10ccf302ef833766e464f2 Mon Sep 17 00:00:00 2001 From: dv Date: Mon, 1 Aug 2011 14:03:37 +0200 Subject: added min-max --- lib/algorithm.rb | 4 ++++ lib/dataset.rb | 1 + lib/model.rb | 15 ++++++++++++++- lib/serializer.rb | 2 +- 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index a0ad9a5..e2397f0 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -229,6 +229,8 @@ module OpenTox prediction = mlr( {:n_prop => props[0], :q_prop => props[1], :sims => sims, :acts => acts} ) transformer = eval("OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})") prediction = transformer.values[0] + prediction = nil if prediction.infinite? + #prediction = nil if params[:prediction_min_max][1] < prediction || params[:prediction_min_max][0] > prediction LOGGER.debug "Prediction is: '" + prediction.to_s + "'." params[:conf_stdev] = false if params[:conf_stdev].nil? confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) @@ -340,6 +342,8 @@ module OpenTox prediction = props.nil? ? local_svm(acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr") transformer = eval("OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})") prediction = transformer.values[0] + prediction = nil if prediction.infinite? + #prediction = nil if params[:prediction_min_max][1] < prediction || params[:prediction_min_max][0] > prediction LOGGER.debug "Prediction is: '" + prediction.to_s + "'." params[:conf_stdev] = false if params[:conf_stdev].nil? confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) diff --git a/lib/dataset.rb b/lib/dataset.rb index d7a8e47..f701699 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -270,6 +270,7 @@ module OpenTox @features[feature] = {} unless @features[feature] @data_entries[compound] = {} unless @data_entries[compound] @data_entries[compound][feature] = [] unless @data_entries[compound][feature] + #LOGGER.debug "dv --------------- #{value.class}" @data_entries[compound][feature] << value if value!=nil end diff --git a/lib/model.rb b/lib/model.rb index 1ff9df1..d5d54b6 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -91,7 +91,7 @@ module OpenTox include Algorithm include Model - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :nr_hits, :transform, :conf_stdev + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :nr_hits, :transform, :conf_stdev, :prediction_min_max def initialize(uri=nil) @@ -109,6 +109,7 @@ module OpenTox @p_values = {} @fingerprints = {} @value_map = {} + @prediction_min_max = [] @feature_calculation_algorithm = "Substructure.match" @similarity_algorithm = "Similarity.tanimoto" @@ -212,6 +213,17 @@ module OpenTox } ) end + #if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "regression" + # all_activities = [] + #all_activities = @activities.values.flatten.collect! { |i| i.to_f } + #LOGGER.debug "dv ------------------ min_toscale: #{all_activities.to_scale.min}" + #LOGGER.debug "dv ------------------ max_toscale: #{all_activities.to_scale.max}" + #LOGGER.debug "dv ------------------ min: #{@activities.values.flatten.to_scale.min}" + #LOGGER.debug "dv ------------------ max: #{@activities.values.max}" + #@prediction_min_max[0] = (all_activities.to_scale.min/2) + #@prediction_min_max[1] = (all_activities.to_scale.max*2) + #end + unless database_activity(subjectid) # adds database activity to @prediction_dataset neighbors @@ -225,6 +237,7 @@ module OpenTox :value_map => @value_map, :nr_hits => @nr_hits, :conf_stdev => @conf_stdev, + :prediction_min_max => @prediction_min_max, :transform => @transform } ) ") value_feature_uri = File.join( @uri, "predicted", "value") diff --git a/lib/serializer.rb b/lib/serializer.rb index 03dcf1f..d53f7fb 100644 --- a/lib/serializer.rb +++ b/lib/serializer.rb @@ -192,7 +192,7 @@ module OpenTox @object[metadata[OT.featureDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } @object[metadata[OT.trainingDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } @object[metadata[OT.dependentVariables]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] } - metadata[OT.predictedVariables].each{|feature| @object[feature] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] }} + metadata[OT.predictedVariables].each{|feature| @object[feature] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }]}} unless metadata[OT.predictedVariables].nil? # TODO: add algorithms from parameters @object["http://ot-dev.in-silico.ch/algorithm/fminer/bbrc"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } @object["http://ot-dev.in-silico.ch/algorithm/fminer/last"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } -- cgit v1.2.3 From e0dd975c5077fc84493a95fc244caad397f47f9b Mon Sep 17 00:00:00 2001 From: dv Date: Tue, 2 Aug 2011 10:25:22 +0200 Subject: changes --- lib/algorithm.rb | 6 ++---- lib/model.rb | 17 ++++++++++------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 50ce359..092205b 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -330,8 +330,7 @@ module OpenTox prediction = mlr( {:n_prop => props[0], :q_prop => props[1], :sims => sims, :acts => acts} ) transformer = eval("OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})") prediction = transformer.values[0] - prediction = nil if prediction.infinite? - #prediction = nil if params[:prediction_min_max][1] < prediction || params[:prediction_min_max][0] > prediction + prediction = nil if prediction.infinite? || params[:prediction_min_max][1] < prediction || params[:prediction_min_max][0] > prediction LOGGER.debug "Prediction is: '" + prediction.to_s + "'." params[:conf_stdev] = false if params[:conf_stdev].nil? confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) @@ -443,8 +442,7 @@ module OpenTox prediction = props.nil? ? local_svm(acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr") transformer = eval("OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})") prediction = transformer.values[0] - prediction = nil if prediction.infinite? - #prediction = nil if params[:prediction_min_max][1] < prediction || params[:prediction_min_max][0] > prediction + prediction = nil if prediction.infinite? || params[:prediction_min_max][1] < prediction || params[:prediction_min_max][0] > prediction LOGGER.debug "Prediction is: '" + prediction.to_s + "'." params[:conf_stdev] = false if params[:conf_stdev].nil? confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) diff --git a/lib/model.rb b/lib/model.rb index e6fbe2f..652d071 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -224,16 +224,19 @@ module OpenTox } ) end - #if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "regression" - # all_activities = [] - #all_activities = @activities.values.flatten.collect! { |i| i.to_f } + if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "regression" + #t1 = Time.now + #LOGGER.debug "dv ----------- start" + all_activities = [] + all_activities = @activities.values.flatten.collect! { |i| i.to_f } #LOGGER.debug "dv ------------------ min_toscale: #{all_activities.to_scale.min}" #LOGGER.debug "dv ------------------ max_toscale: #{all_activities.to_scale.max}" #LOGGER.debug "dv ------------------ min: #{@activities.values.flatten.to_scale.min}" #LOGGER.debug "dv ------------------ max: #{@activities.values.max}" - #@prediction_min_max[0] = (all_activities.to_scale.min/2) - #@prediction_min_max[1] = (all_activities.to_scale.max*2) - #end + @prediction_min_max[0] = (all_activities.to_scale.min/2) + @prediction_min_max[1] = (all_activities.to_scale.max*2) + #LOGGER.debug "dv ----------- end. Duration: '#{Time.now - t1}'" + end unless database_activity(subjectid) # adds database activity to @prediction_dataset @@ -261,7 +264,7 @@ module OpenTox @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]] else @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction] - end + end @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence] @prediction_dataset.features[value_feature_uri][DC.title] = @prediction_dataset.metadata[DC.title] @prediction_dataset.features[confidence_feature_uri][DC.title] = "Confidence" -- cgit v1.2.3 From 8f858ee951da1487dff34186dd4b740fde552546 Mon Sep 17 00:00:00 2001 From: dv Date: Tue, 2 Aug 2011 12:21:42 +0200 Subject: Indent --- lib/model.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/model.rb b/lib/model.rb index 652d071..d2f86b0 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -264,7 +264,7 @@ module OpenTox @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]] else @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction] - end + end @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence] @prediction_dataset.features[value_feature_uri][DC.title] = @prediction_dataset.metadata[DC.title] @prediction_dataset.features[confidence_feature_uri][DC.title] = "Confidence" -- cgit v1.2.3 From acfe33c4fd91efe5d5455892f20a3ffe20c3954c Mon Sep 17 00:00:00 2001 From: mr Date: Tue, 2 Aug 2011 16:32:01 +0200 Subject: add missing subjectid --- lib/algorithm.rb | 6 +++--- lib/model.rb | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 3cf4ecf..9a5ff01 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -52,11 +52,11 @@ module OpenTox include Algorithm attr_accessor :prediction_feature, :training_dataset, :minfreq, :compounds, :db_class_sizes, :all_activities, :smi - def check_params(params,per_mil) + def check_params(params,per_mil,subjectid=nil) raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? raise OpenTox::NotFoundError.new "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? - @prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid - @training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid + @prediction_feature = OpenTox::Feature.find params[:prediction_feature], subjectid + @training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", subjectid raise OpenTox::NotFoundError.new "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless @training_dataset.features and @training_dataset.features.include?(params[:prediction_feature]) unless params[:min_frequency].nil? diff --git a/lib/model.rb b/lib/model.rb index f1a8dc9..26c42a5 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -73,7 +73,7 @@ module OpenTox if use_confidence conf_index = -1 predictedVariables.size.times do |i| - f = OpenTox::Feature.find(predictedVariables[i]) + f = OpenTox::Feature.find(predictedVariables[i], subjectid) conf_index = i if f.metadata[DC.title]=~/(?i)confidence/ end raise "could not estimate predicted variable from model: '"+uri.to_s+ @@ -181,6 +181,7 @@ module OpenTox # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly # @return [OpenTox::Dataset] Dataset with predictions def predict_dataset(dataset_uri, subjectid=nil, waiting_task=nil) + @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid) @prediction_dataset.add_metadata({ OT.hasSource => @uri, @@ -244,7 +245,7 @@ module OpenTox @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] unless @prediction_dataset.metadata[OT.dependentVariables] @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] unless @prediction_dataset.metadata[OT.predictedVariables] - if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "classification" + if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "classification" @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]] else @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction] -- cgit v1.2.3 From 0c21b5c58977d16c74d7e976d37d5361ffcb63d1 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 3 Aug 2011 15:26:18 +0200 Subject: allow task state queued for 202 --- lib/task.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/task.rb b/lib/task.rb index 146a756..00499fa 100644 --- a/lib/task.rb +++ b/lib/task.rb @@ -169,6 +169,10 @@ module OpenTox @metadata[OT.hasStatus] == 'Running' end + def queued? + @metadata[OT.hasStatus] == 'Queued' + end + def completed? @metadata[OT.hasStatus] == 'Completed' end @@ -286,7 +290,7 @@ module OpenTox raise "illegal task state, task is completed, resultURI is no URI: '"+@metadata[OT.resultURI].to_s+ "'" unless @metadata[OT.resultURI] and @metadata[OT.resultURI].to_s.uri? if completed? if @http_code == 202 - raise "#{@uri}: illegal task state, code is 202, but hasStatus is not Running: '"+@metadata[OT.hasStatus]+"'" unless running? + raise "#{@uri}: illegal task state, code is 202, but hasStatus is not Running or Queued: '"+@metadata[OT.hasStatus]+"'" unless running? or queued? elsif @http_code == 201 # ignore hasStatus # raise "#{@uri}: illegal task state, code is 201, but hasStatus is not Completed: '"+@metadata[OT.hasStatus]+"'" unless completed? -- cgit v1.2.3 From 8831f981cc246f00d39689843698f6c70475bf26 Mon Sep 17 00:00:00 2001 From: dv Date: Wed, 3 Aug 2011 16:42:27 +0200 Subject: removed comments --- lib/dataset.rb | 1 - lib/model.rb | 7 ------- 2 files changed, 8 deletions(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index 2147a4d..5ebad0f 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -283,7 +283,6 @@ module OpenTox @features[feature] = {} unless @features[feature] @data_entries[compound] = {} unless @data_entries[compound] @data_entries[compound][feature] = [] unless @data_entries[compound][feature] - #LOGGER.debug "dv --------------- #{value.class}" @data_entries[compound][feature] << value if value!=nil end diff --git a/lib/model.rb b/lib/model.rb index d2f86b0..d69a827 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -225,17 +225,10 @@ module OpenTox end if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "regression" - #t1 = Time.now - #LOGGER.debug "dv ----------- start" all_activities = [] all_activities = @activities.values.flatten.collect! { |i| i.to_f } - #LOGGER.debug "dv ------------------ min_toscale: #{all_activities.to_scale.min}" - #LOGGER.debug "dv ------------------ max_toscale: #{all_activities.to_scale.max}" - #LOGGER.debug "dv ------------------ min: #{@activities.values.flatten.to_scale.min}" - #LOGGER.debug "dv ------------------ max: #{@activities.values.max}" @prediction_min_max[0] = (all_activities.to_scale.min/2) @prediction_min_max[1] = (all_activities.to_scale.max*2) - #LOGGER.debug "dv ----------- end. Duration: '#{Time.now - t1}'" end unless database_activity(subjectid) # adds database activity to @prediction_dataset -- cgit v1.2.3 From 69cb4d3672c50f009b9edc154fe69f15bcafc0da Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 4 Aug 2011 13:56:04 +0000 Subject: feature highlighting and significant features fixed for generic classes and regression --- lib/environment.rb | 4 ++-- lib/serializer.rb | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/environment.rb b/lib/environment.rb index 28a9a66..6d1bb85 100644 --- a/lib/environment.rb +++ b/lib/environment.rb @@ -40,8 +40,8 @@ else end # Regular expressions for parsing classification data -TRUE_REGEXP = /^(true|active|1|1.0|tox)$/i -FALSE_REGEXP = /^(false|inactive|0|0.0|low tox)$/i +TRUE_REGEXP = /^(true|active|1|1.0|tox|activating)$/i +FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating)$/i # Task durations DEFAULT_TASK_MAX_DURATION = 36000 diff --git a/lib/serializer.rb b/lib/serializer.rb index d53f7fb..3921784 100644 --- a/lib/serializer.rb +++ b/lib/serializer.rb @@ -192,7 +192,7 @@ module OpenTox @object[metadata[OT.featureDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } @object[metadata[OT.trainingDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } @object[metadata[OT.dependentVariables]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] } - metadata[OT.predictedVariables].each{|feature| @object[feature] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }]}} unless metadata[OT.predictedVariables].nil? + metadata[OT.predictedVariables].each{|feature| @object[feature] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }]}} #unless metadata[OT.predictedVariables].nil? # TODO: add algorithms from parameters @object["http://ot-dev.in-silico.ch/algorithm/fminer/bbrc"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } @object["http://ot-dev.in-silico.ch/algorithm/fminer/last"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } -- cgit v1.2.3 From 87a2584661682d769858da486ac57fc292227c41 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 4 Aug 2011 14:05:45 +0000 Subject: confidence = nil if prediction.nil? --- lib/algorithm.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 0ba6a83..85b54ab 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -334,6 +334,7 @@ module OpenTox LOGGER.debug "Prediction is: '" + prediction.to_s + "'." params[:conf_stdev] = false if params[:conf_stdev].nil? confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) + confidence = nil if prediction.nil? end {:prediction => prediction, :confidence => confidence} @@ -446,6 +447,7 @@ module OpenTox LOGGER.debug "Prediction is: '" + prediction.to_s + "'." params[:conf_stdev] = false if params[:conf_stdev].nil? confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]}) + confidence = nil if prediction.nil? end {:prediction => prediction, :confidence => confidence} -- cgit v1.2.3 From 7a13c2da03220ad6716fe7da5bfa3403c873d7d1 Mon Sep 17 00:00:00 2001 From: mr Date: Thu, 4 Aug 2011 17:59:04 +0200 Subject: Version bump to 2.1.0 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 10bf840..50aea0e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.1 \ No newline at end of file +2.1.0 \ No newline at end of file -- cgit v1.2.3