From f999b42afbb4387d99b2c91a79f84654408cbab1 Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Tue, 10 May 2011 08:29:27 +0200
Subject: Added bal
---
lib/model.rb | 102 ++++++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 80 insertions(+), 22 deletions(-)
diff --git a/lib/model.rb b/lib/model.rb
index 048de85..9442897 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -23,7 +23,7 @@ module OpenTox
# Generic OpenTox model class for all API compliant services
class Generic
include Model
-
+
# Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error
# @param [String] uri Model URI
# @return [OpenTox::Model::Generic] Model instance
@@ -34,12 +34,12 @@ module OpenTox
raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0
model
end
-
- # provides feature type, possible types are "regression" or "classification"
- # @return [String] feature type, "unknown" if type could not be estimated
+
+ # provides feature type, possible types are "regression" or "classification"
+ # @return [String] feature type, "unknown" if type could not be estimated
def feature_type(subjectid=nil)
return @feature_type if @feature_type
-
+
# dynamically perform restcalls if necessary
load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
algorithm = OpenTox::Algorithm::Generic.find(@metadata[OT.algorithm], subjectid)
@@ -60,9 +60,9 @@ module OpenTox
raise "unknown model "+type_indicators.inspect unless @feature_type
@feature_type
end
-
+
end
-
+
# Lazy Structure Activity Relationship class
class Lazar
@@ -78,7 +78,7 @@ module OpenTox
else
super CONFIG[:services]["opentox-model"]
end
-
+
@metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
@features = []
@@ -178,8 +178,59 @@ module OpenTox
return @prediction_dataset if database_activity(subjectid)
- neighbors
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
+
+ # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar
+ l = Array.new # larger
+ s = Array.new # smaller fraction
+ if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget]
+ @fingerprints.each do |training_compound,training_features|
+ @activities[training_compound].each do |act|
+ case act.to_s
+ when "false"
+ l << training_compound
+ when "true"
+ s << training_compound
+ else
+ LOGGER.warn "BLAZAR: Activity #{act.to_s} should not be reached."
+ end
+ end
+ end
+ if s.size > l.size then
+ l,s = s,l # happy swapping
+ LOGGER.info "BLAZAR: |s|=#{s.size}, |l|=#{l.size}."
+ end
+ # determine ratio
+ modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest
+ LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}."
+ end
+
+ # AM: Balanced predictions
+ addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round
+ slack = modulo[1].divmod(addon)[1] # what remains for the last round
+ position = 0
+ predictions = Array.new
+
+ @collect_neighbors = {}
+ predictions = []
+ for i in 1..modulo[0] do
+ (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction
+ LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}."
+ neighbors(s, l, position, lr_size) # get ratio fraction of larger part
+ predictions << eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
+ position = position + lr_size
+ end
+ @neighbors = @collect_neighbors.values # AM: get all neighbors
+
+ prediction={}
+ begin
+ p_sum=0.0
+ predictions.each do |p|
+ p[:prediction] == false ? p_sum = p_sum - p[:confidence].to_f : p_sum = p_sum + p[:confidence].to_f
+ end
+ prediction = { :prediction => (p_sum<0.0 ? false : true), :confidence => p_sum.abs/predictions.size } # AM: get mean
+ rescue Exception => e
+ LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message
+ end
prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s)
# TODO: fix dependentVariable
@@ -269,23 +320,30 @@ module OpenTox
end
# Find neighbors and store them as object variable
- def neighbors
-
+ def neighbors(s=nil, l=nil, start=nil, offset=nil)
@compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
@neighbors = []
- @fingerprints.each do |training_compound,training_features|
- sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
- if sim > @min_sim
- @activities[training_compound].each do |act|
- @neighbors << {
- :compound => training_compound,
- :similarity => sim,
- :features => training_features,
- :activity => act
- }
+ begin
+ #@fingerprints.each do |training_compound,training_features| # AM: this is original by CH
+ [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset
+ training_features = @fingerprints[training_compound]
+ sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
+ if sim > @min_sim
+ @activities[training_compound].each do |act|
+ this_neighbor = {
+ :compound => training_compound,
+ :similarity => sim,
+ :features => training_features,
+ :activity => act
+ }
+ @neighbors << this_neighbor
+ @collect_neighbors[training_compound] = this_neighbor
+ end
end
end
+ rescue Exception => e
+ LOGGER.error "BLAZAR failed in neighbors: "+e.class.to_s+": "+e.message
end
end
--
cgit v1.2.3
From 1d8c7d6dfa513cd7c8ad642248db24e0d1e3a199 Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Tue, 10 May 2011 09:02:38 +0200
Subject: Using Best prediction only
---
lib/model.rb | 25 ++++++++++++-------------
1 file changed, 12 insertions(+), 13 deletions(-)
diff --git a/lib/model.rb b/lib/model.rb
index 9442897..a4d6d85 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -210,28 +210,28 @@ module OpenTox
position = 0
predictions = Array.new
- @collect_neighbors = {}
- predictions = []
+ prediction_best=nil
+ neighbors_best=nil
+
+ begin
for i in 1..modulo[0] do
(i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction
LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}."
neighbors(s, l, position, lr_size) # get ratio fraction of larger part
- predictions << eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
+ prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
+ if prediction[:confidence].abs > prediction_best[:confidence].abs || prediction_best.nil?
+ prediction_best=prediction
+ neighbors_best=@neighbors
+ end
position = position + lr_size
end
- @neighbors = @collect_neighbors.values # AM: get all neighbors
-
- prediction={}
- begin
- p_sum=0.0
- predictions.each do |p|
- p[:prediction] == false ? p_sum = p_sum - p[:confidence].to_f : p_sum = p_sum + p[:confidence].to_f
- end
- prediction = { :prediction => (p_sum<0.0 ? false : true), :confidence => p_sum.abs/predictions.size } # AM: get mean
rescue Exception => e
LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message
end
+ prediction=prediction_best
+ @neighbors=neighbors_best
+
prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s)
# TODO: fix dependentVariable
@prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri
@@ -338,7 +338,6 @@ module OpenTox
:activity => act
}
@neighbors << this_neighbor
- @collect_neighbors[training_compound] = this_neighbor
end
end
end
--
cgit v1.2.3
From 2af934ddc033d7d8a737d88eb4ee175955ad4a0a Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Tue, 10 May 2011 11:11:34 +0200
Subject: Fixed first prediction case
---
lib/model.rb | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/model.rb b/lib/model.rb
index a4d6d85..3d64f32 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -219,7 +219,7 @@ module OpenTox
LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}."
neighbors(s, l, position, lr_size) # get ratio fraction of larger part
prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
- if prediction[:confidence].abs > prediction_best[:confidence].abs || prediction_best.nil?
+ if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs
prediction_best=prediction
neighbors_best=@neighbors
end
--
cgit v1.2.3
From 305f3caa692dd977df07cbc5ec195521e2a135fa Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Tue, 10 May 2011 16:43:05 +0200
Subject: Added Gauss patch
---
lib/algorithm.rb | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 008e7fe..abf10d4 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -165,7 +165,7 @@ module OpenTox
# @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
# @return [Hash] Hash with keys `:prediction, :confidence`
def self.local_svm_regression(neighbors,params )
- sims = neighbors.collect{ |n| n[:similarity] } # similarity values between query and neighbors
+ sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values between query and neighbors
conf = sims.inject{|sum,x| sum + x }
acts = neighbors.collect do |n|
act = n[:activity]
--
cgit v1.2.3
From 524a68d8429b8adc16bd8073774f9305cb7138a0 Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Wed, 11 May 2011 12:13:37 +0200
Subject: Added balance patch
---
lib/parser.rb | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/lib/parser.rb b/lib/parser.rb
index db746c1..dc5f675 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -348,16 +348,27 @@ module OpenTox
when OT.NominalFeature
case value.to_s
when TRUE_REGEXP
- @dataset.add(compound.uri, feature, true )
+ #@dataset.add(compound.uri, feature, true )
+ val=true
when FALSE_REGEXP
- @dataset.add(compound.uri, feature, false )
+ #@dataset.add(compound.uri, feature, false )
+ val=false
end
when OT.NumericFeature
- @dataset.add compound.uri, feature, value.to_f
+ #@dataset.add compound.uri, feature, value.to_f
+ val = value.to_f
when OT.StringFeature
- @dataset.add compound.uri, feature, value.to_s
+ #@dataset.add compound.uri, feature, value.to_s
+ val = value.to_s
@activity_errors << smiles+", "+row.join(", ")
end
+ if val!=nil
+ @dataset.add(compound.uri, feature, val)
+ if type!=OT.NumericFeature
+ @dataset.features[feature][OT.acceptValue] = [] unless @dataset.features[feature][OT.acceptValue]
+ @dataset.features[feature][OT.acceptValue] << val.to_s unless @dataset.features[feature][OT.acceptValue].include?(val.to_s)
+ end
+ end
end
end
--
cgit v1.2.3
From 03a87a832162ccf17b6f0ebfda126e3622530ca3 Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Wed, 11 May 2011 15:56:55 +0200
Subject: Further Martin patch
---
lib/feature.rb | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/lib/feature.rb b/lib/feature.rb
index f6e2dfd..eb0b869 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -19,14 +19,7 @@ module OpenTox
# provides domain (possible target values) of classification feature
# @return [Array] list with possible target values
def domain
- if metadata[OT.acceptValue]
- raise "accept value found, remove hack and implement correctly"
- else
- if @uri=~/feature\/26221/ || @uri=~/feature\/221726/
- return ["mutagen" , "nonmutagen"]
- end
- return [true, false]
- end
+ return [true, false]
end
# provides feature type, possible types are "regression" or "classification"
--
cgit v1.2.3
From b944a21b557b9628b3b6f7be990534b2f86f0884 Mon Sep 17 00:00:00 2001
From: mguetlein
Date: Tue, 17 May 2011 10:47:58 +0200
Subject: fix validation statistics: convert num predictions to integer
---
lib/validation.rb | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/validation.rb b/lib/validation.rb
index d58d36e..1a2497b 100644
--- a/lib/validation.rb
+++ b/lib/validation.rb
@@ -65,7 +65,7 @@ module OpenTox
def summary
if @metadata[OT.classificationStatistics]
res = {
- :nr_predictions => @metadata[OT.numInstances] - @metadata[OT.numUnpredicted],
+ :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i,
:correct_predictions => @metadata[OT.classificationStatistics][OT.percentCorrect],
:weighted_area_under_roc => @metadata[OT.classificationStatistics][OT.weightedAreaUnderRoc],
}
@@ -83,7 +83,7 @@ module OpenTox
res
elsif @metadata[OT.regressionStatistics]
{
- :nr_predictions => @metadata[OT.numInstances] - @metadata[OT.numUnpredicted],
+ :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i,
:r_square => @metadata[OT.regressionStatistics][OT.rSquare],
:root_mean_squared_error => @metadata[OT.regressionStatistics][OT.rootMeanSquaredError],
:mean_absolute_error => @metadata[OT.regressionStatistics][OT.meanAbsoluteError],
--
cgit v1.2.3
From 37a066e4cfe102d2e4edfaf3b4b9787bcbb3206f Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Tue, 17 May 2011 16:08:25 +0200
Subject: Initial version
---
lib/algorithm.rb | 62 ++++++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 47 insertions(+), 15 deletions(-)
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 7fbe0dc..16372ea 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -164,11 +164,7 @@ module OpenTox
# @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
# @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
# @return [Hash] Hash with keys `:prediction, :confidence`
- def self.local_svm_regression(neighbors,params )
- sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values between query and neighbors
- conf = sims.inject{|sum,x| sum + x }
-
- # AM: Control log taking
+ def self.local_svm_regression(neighbors, params)
take_logs=true
neighbors.each do |n|
if (! n[:activity].nil?) && (n[:activity].to_f < 0.0)
@@ -180,10 +176,51 @@ module OpenTox
take_logs ? Math.log10(act.to_f) : act.to_f
end # activities of neighbors for supervised learning
- neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches
+ sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
+ prediction = local_sv_machine (neighbors, acts, sims, "svr", params)
+ prediction = take_logs ? 10**(prediction.to_f) : prediction.to_f
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+
+ conf = sims.inject{|sum,x| sum + x }
+ confidence = conf/neighbors.size if neighbors.size > 0
+ {:prediction => prediction, :confidence => confidence}
+
+ end
+
+ # Local support vector classification from neighbors
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @return [Hash] Hash with keys `:prediction, :confidence`
+ def self.local_svm_classification(neighbors, params)
+ acts = neighbors.collect do |n|
+ act = n[:activity]
+ end # activities of neighbors for supervised learning
+
+ sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
+ prediction = local_sv_machine (neighbors, acts, sims, "svc", params)
+ prediction = prediction.to_f
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+
+ conf = sims.inject{|sum,x| sum + x }
+ confidence = conf/neighbors.size if neighbors.size > 0
+ {:prediction => prediction, :confidence => confidence}
+
+ end
+
+ end
+
+ # Local support vector prediction. Not to be called directly (use local_svm_regression or local_svm_classification.
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
+ # @param [Array] acts, activities for neighbors.
+ # @param [Array] sims, similarities for neighbors.
+ # @param [String] type, one of "svr" (regression) or "svc" (classification).
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @return [Numeric] A prediction value.
+ def self.local_sv_machine(neighbors, acts, sims, type, params)
+ neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches
gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
if neighbor_matches.size == 0
- raise "No neighbors found"
+ raise "No neighbors found."
else
# gram matrix
(0..(neighbor_matches.length-1)).each do |i|
@@ -216,21 +253,16 @@ module OpenTox
# model + support vectors
LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)"
@r.eval "sv<-as.vector(SVindex(model))"
@r.eval "sims<-sims[sv]"
@r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
LOGGER.debug "Predicting ..."
@r.eval "p<-predict(model,sims)[1,1]"
- prediction = 10**(@r.p.to_f) if take_logs
- LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ prediction = @r.p
@r.quit # free R
end
- confidence = conf/neighbors.size if neighbors.size > 0
- {:prediction => prediction, :confidence => confidence}
-
- end
-
+ prediction
end
module Substructure
--
cgit v1.2.3
From 0e49be4d0ed4752d5988ed651d813f001e42c05b Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Tue, 17 May 2011 16:35:20 +0200
Subject: Fixed method scope
---
lib/algorithm.rb | 110 ++++++++++++++++++++++++++++---------------------------
1 file changed, 56 insertions(+), 54 deletions(-)
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 16372ea..0a5b09f 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -177,7 +177,7 @@ module OpenTox
end # activities of neighbors for supervised learning
sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
- prediction = local_sv_machine (neighbors, acts, sims, "svr", params)
+ prediction = local_svm(neighbors, acts, sims, "svr", params)
prediction = take_logs ? 10**(prediction.to_f) : prediction.to_f
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
@@ -197,7 +197,7 @@ module OpenTox
end # activities of neighbors for supervised learning
sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
- prediction = local_sv_machine (neighbors, acts, sims, "svc", params)
+ prediction = local_svm (neighbors, acts, sims, "svc", params)
prediction = prediction.to_f
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
@@ -207,62 +207,64 @@ module OpenTox
end
- end
- # Local support vector prediction. Not to be called directly (use local_svm_regression or local_svm_classification.
- # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
- # @param [Array] acts, activities for neighbors.
- # @param [Array] sims, similarities for neighbors.
- # @param [String] type, one of "svr" (regression) or "svc" (classification).
- # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
- # @return [Numeric] A prediction value.
- def self.local_sv_machine(neighbors, acts, sims, type, params)
- neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches
- gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
- if neighbor_matches.size == 0
- raise "No neighbors found."
- else
- # gram matrix
- (0..(neighbor_matches.length-1)).each do |i|
- gram_matrix[i] = [] unless gram_matrix[i]
- # upper triangle
- ((i+1)..(neighbor_matches.length-1)).each do |j|
- sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
- gram_matrix[i][j] = Algorithm.gauss(sim)
- gram_matrix[j] = [] unless gram_matrix[j]
- gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
+ # Local support vector prediction from neighbors.
+ # Not to be called directly (use local_svm_regression or local_svm_classification.
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
+ # @param [Array] acts, activities for neighbors.
+ # @param [Array] sims, similarities for neighbors.
+ # @param [String] type, one of "svr" (regression) or "svc" (classification).
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @return [Numeric] A prediction value.
+ def self.local_svm(neighbors, acts, sims, type, params)
+ neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches
+ gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
+ if neighbor_matches.size == 0
+ raise "No neighbors found."
+ else
+ # gram matrix
+ (0..(neighbor_matches.length-1)).each do |i|
+ gram_matrix[i] = [] unless gram_matrix[i]
+ # upper triangle
+ ((i+1)..(neighbor_matches.length-1)).each do |j|
+ sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
+ gram_matrix[i][j] = Algorithm.gauss(sim)
+ gram_matrix[j] = [] unless gram_matrix[j]
+ gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
+ end
+ gram_matrix[i][i] = 1.0
end
- gram_matrix[i][i] = 1.0
- end
- #LOGGER.debug gram_matrix.to_yaml
- @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
- @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
- LOGGER.debug "Setting R data ..."
- # set data
- @r.gram_matrix = gram_matrix.flatten
- @r.n = neighbor_matches.size
- @r.y = acts
- @r.sims = sims
+ #LOGGER.debug gram_matrix.to_yaml
+ @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
+ @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
+ LOGGER.debug "Setting R data ..."
+ # set data
+ @r.gram_matrix = gram_matrix.flatten
+ @r.n = neighbor_matches.size
+ @r.y = acts
+ @r.sims = sims
+
+ LOGGER.debug "Preparing R data ..."
+ # prepare data
+ @r.eval "y<-as.vector(y)"
+ @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
+ @r.eval "sims<-as.vector(sims)"
+
+ # model + support vectors
+ LOGGER.debug "Creating SVM model ..."
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)"
+ @r.eval "sv<-as.vector(SVindex(model))"
+ @r.eval "sims<-sims[sv]"
+ @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
+ LOGGER.debug "Predicting ..."
+ @r.eval "p<-predict(model,sims)[1,1]"
+ prediction = @r.p
+ @r.quit # free R
+ end
+ prediction
+ end
- LOGGER.debug "Preparing R data ..."
- # prepare data
- @r.eval "y<-as.vector(y)"
- @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
- @r.eval "sims<-as.vector(sims)"
-
- # model + support vectors
- LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)"
- @r.eval "sv<-as.vector(SVindex(model))"
- @r.eval "sims<-sims[sv]"
- @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
- LOGGER.debug "Predicting ..."
- @r.eval "p<-predict(model,sims)[1,1]"
- prediction = @r.p
- @r.quit # free R
- end
- prediction
end
module Substructure
--
cgit v1.2.3
From 4372e80a38c5228f3b7d0372f92195e62500b743 Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Tue, 17 May 2011 16:40:12 +0200
Subject: Add debug
---
lib/algorithm.rb | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 0a5b09f..ec5748d 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -178,7 +178,8 @@ module OpenTox
sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
prediction = local_svm(neighbors, acts, sims, "svr", params)
- prediction = take_logs ? 10**(prediction.to_f) : prediction.to_f
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f)
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
conf = sims.inject{|sum,x| sum + x }
--
cgit v1.2.3
From cf6d40be3f31d473f69216f1453e2ca0ddf82130 Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Tue, 17 May 2011 16:46:26 +0200
Subject: nu 0.8 again to pass tests
---
lib/algorithm.rb | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index ec5748d..4cb80e3 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -254,7 +254,7 @@ module OpenTox
# model + support vectors
LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)"
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.8)"
@r.eval "sv<-as.vector(SVindex(model))"
@r.eval "sims<-sims[sv]"
@r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
--
cgit v1.2.3
From 4081ac06ddf8dafeebc93dfc28c4ef54f64a844d Mon Sep 17 00:00:00 2001
From: mguetlein
Date: Wed, 18 May 2011 17:57:37 +0200
Subject: add opentox object for new algorithm comparison report
---
lib/validation.rb | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 48 insertions(+), 1 deletion(-)
diff --git a/lib/validation.rb b/lib/validation.rb
index 1a2497b..d7a337c 100644
--- a/lib/validation.rb
+++ b/lib/validation.rb
@@ -198,7 +198,6 @@ module OpenTox
# @param [String,optional] subjectid
# @return [OpenTox::CrossvalidationReport]
def self.find( uri, subjectid=nil )
- # PENDING load report data?
OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid})
rep = CrossvalidationReport.new(uri)
rep.load_metadata( subjectid )
@@ -227,6 +226,54 @@ module OpenTox
end
end
+
+ class AlgorithmComparisonReport
+ include OpenTox
+
+ # finds AlgorithmComparisonReport via uri, raises error if not found
+ # @param [String] uri
+ # @param [String,optional] subjectid
+ # @return [OpenTox::CrossvalidationReport]
+ def self.find( uri, subjectid=nil )
+ OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid})
+ rep = AlgorithmComparisonReport.new(uri)
+ rep.load_metadata( subjectid )
+ rep
+ end
+
+ # finds AlgorithmComparisonReport for a particular crossvalidation
+ # @param [String] crossvalidation uri
+ # @param [String,optional] subjectid
+ # @return [OpenTox::AlgorithmComparisonReport] nil if no report found
+ def self.find_for_crossvalidation( crossvalidation_uri, subjectid=nil )
+ uris = RestClientWrapper.get(File.join(CONFIG[:services]["opentox-validation"],
+ "/report/algorithm_comparison?crossvalidation="+crossvalidation_uri), {:subjectid => subjectid}).chomp.split("\n")
+ uris.size==0 ? nil : AlgorithmComparisonReport.new(uris[-1])
+ end
+
+ # creates a crossvalidation report via crossvalidation
+ # @param [Hash] crossvalidation uri_hash, see example
+ # @param [String,optional] subjectid
+ # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
+ # @return [OpenTox::AlgorithmComparisonReport]
+ # example for hash:
+ # { :lazar-bbrc => [ http://host/validation/crossvalidation/x1, http://host/validation/crossvalidation/x2 ],
+ # :lazar-last => [ http://host/validation/crossvalidation/xy, http://host/validation/crossvalidation/xy ] }
+ def self.create( crossvalidation_uri_hash, subjectid=nil, waiting_task=nil )
+ identifier = []
+ validation_uris = []
+ crossvalidation_uri_hash.each do |id, uris|
+ uris.each do |uri|
+ identifier << id
+ validation_uris << uri
+ end
+ end
+ uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-validation"],"/report/algorithm_comparison"),
+ { :validation_uris => validation_uris.join(","), :identifier => identifier.join(","), :subjectid => subjectid }, {}, waiting_task )
+ AlgorithmComparisonReport.new(uri)
+ end
+ end
+
class QMRFReport
include OpenTox
--
cgit v1.2.3
From 96f00f67be05da4eed147928254af6e3f6f0f03d Mon Sep 17 00:00:00 2001
From: mguetlein
Date: Tue, 17 May 2011 10:47:58 +0200
Subject: fix validation statistics: convert num predictions to integer
---
lib/validation.rb | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/validation.rb b/lib/validation.rb
index d58d36e..1a2497b 100644
--- a/lib/validation.rb
+++ b/lib/validation.rb
@@ -65,7 +65,7 @@ module OpenTox
def summary
if @metadata[OT.classificationStatistics]
res = {
- :nr_predictions => @metadata[OT.numInstances] - @metadata[OT.numUnpredicted],
+ :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i,
:correct_predictions => @metadata[OT.classificationStatistics][OT.percentCorrect],
:weighted_area_under_roc => @metadata[OT.classificationStatistics][OT.weightedAreaUnderRoc],
}
@@ -83,7 +83,7 @@ module OpenTox
res
elsif @metadata[OT.regressionStatistics]
{
- :nr_predictions => @metadata[OT.numInstances] - @metadata[OT.numUnpredicted],
+ :nr_predictions => @metadata[OT.numInstances].to_i - @metadata[OT.numUnpredicted].to_i,
:r_square => @metadata[OT.regressionStatistics][OT.rSquare],
:root_mean_squared_error => @metadata[OT.regressionStatistics][OT.rootMeanSquaredError],
:mean_absolute_error => @metadata[OT.regressionStatistics][OT.meanAbsoluteError],
--
cgit v1.2.3
From d755a131a5636f4fbe6077de5a276faf84c325b1 Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Tue, 17 May 2011 16:08:25 +0200
Subject: Initial version
---
lib/algorithm.rb | 62 ++++++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 47 insertions(+), 15 deletions(-)
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 7fbe0dc..16372ea 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -164,11 +164,7 @@ module OpenTox
# @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
# @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
# @return [Hash] Hash with keys `:prediction, :confidence`
- def self.local_svm_regression(neighbors,params )
- sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values between query and neighbors
- conf = sims.inject{|sum,x| sum + x }
-
- # AM: Control log taking
+ def self.local_svm_regression(neighbors, params)
take_logs=true
neighbors.each do |n|
if (! n[:activity].nil?) && (n[:activity].to_f < 0.0)
@@ -180,10 +176,51 @@ module OpenTox
take_logs ? Math.log10(act.to_f) : act.to_f
end # activities of neighbors for supervised learning
- neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches
+ sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
+ prediction = local_sv_machine (neighbors, acts, sims, "svr", params)
+ prediction = take_logs ? 10**(prediction.to_f) : prediction.to_f
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+
+ conf = sims.inject{|sum,x| sum + x }
+ confidence = conf/neighbors.size if neighbors.size > 0
+ {:prediction => prediction, :confidence => confidence}
+
+ end
+
+ # Local support vector classification from neighbors
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @return [Hash] Hash with keys `:prediction, :confidence`
+ def self.local_svm_classification(neighbors, params)
+ acts = neighbors.collect do |n|
+ act = n[:activity]
+ end # activities of neighbors for supervised learning
+
+ sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
+ prediction = local_sv_machine (neighbors, acts, sims, "svc", params)
+ prediction = prediction.to_f
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+
+ conf = sims.inject{|sum,x| sum + x }
+ confidence = conf/neighbors.size if neighbors.size > 0
+ {:prediction => prediction, :confidence => confidence}
+
+ end
+
+ end
+
+ # Local support vector prediction. Not to be called directly (use local_svm_regression or local_svm_classification.
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
+ # @param [Array] acts, activities for neighbors.
+ # @param [Array] sims, similarities for neighbors.
+ # @param [String] type, one of "svr" (regression) or "svc" (classification).
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @return [Numeric] A prediction value.
+ def self.local_sv_machine(neighbors, acts, sims, type, params)
+ neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches
gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
if neighbor_matches.size == 0
- raise "No neighbors found"
+ raise "No neighbors found."
else
# gram matrix
(0..(neighbor_matches.length-1)).each do |i|
@@ -216,21 +253,16 @@ module OpenTox
# model + support vectors
LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)"
@r.eval "sv<-as.vector(SVindex(model))"
@r.eval "sims<-sims[sv]"
@r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
LOGGER.debug "Predicting ..."
@r.eval "p<-predict(model,sims)[1,1]"
- prediction = 10**(@r.p.to_f) if take_logs
- LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ prediction = @r.p
@r.quit # free R
end
- confidence = conf/neighbors.size if neighbors.size > 0
- {:prediction => prediction, :confidence => confidence}
-
- end
-
+ prediction
end
module Substructure
--
cgit v1.2.3
From cb0cc893c74016425b56424093a6de1b2f795c70 Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Tue, 17 May 2011 16:35:20 +0200
Subject: Fixed method scope
---
lib/algorithm.rb | 110 ++++++++++++++++++++++++++++---------------------------
1 file changed, 56 insertions(+), 54 deletions(-)
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 16372ea..0a5b09f 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -177,7 +177,7 @@ module OpenTox
end # activities of neighbors for supervised learning
sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
- prediction = local_sv_machine (neighbors, acts, sims, "svr", params)
+ prediction = local_svm(neighbors, acts, sims, "svr", params)
prediction = take_logs ? 10**(prediction.to_f) : prediction.to_f
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
@@ -197,7 +197,7 @@ module OpenTox
end # activities of neighbors for supervised learning
sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
- prediction = local_sv_machine (neighbors, acts, sims, "svc", params)
+ prediction = local_svm (neighbors, acts, sims, "svc", params)
prediction = prediction.to_f
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
@@ -207,62 +207,64 @@ module OpenTox
end
- end
- # Local support vector prediction. Not to be called directly (use local_svm_regression or local_svm_classification.
- # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
- # @param [Array] acts, activities for neighbors.
- # @param [Array] sims, similarities for neighbors.
- # @param [String] type, one of "svr" (regression) or "svc" (classification).
- # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
- # @return [Numeric] A prediction value.
- def self.local_sv_machine(neighbors, acts, sims, type, params)
- neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches
- gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
- if neighbor_matches.size == 0
- raise "No neighbors found."
- else
- # gram matrix
- (0..(neighbor_matches.length-1)).each do |i|
- gram_matrix[i] = [] unless gram_matrix[i]
- # upper triangle
- ((i+1)..(neighbor_matches.length-1)).each do |j|
- sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
- gram_matrix[i][j] = Algorithm.gauss(sim)
- gram_matrix[j] = [] unless gram_matrix[j]
- gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
+ # Local support vector prediction from neighbors.
+ # Not to be called directly (use local_svm_regression or local_svm_classification.
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
+ # @param [Array] acts, activities for neighbors.
+ # @param [Array] sims, similarities for neighbors.
+ # @param [String] type, one of "svr" (regression) or "svc" (classification).
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @return [Numeric] A prediction value.
+ def self.local_svm(neighbors, acts, sims, type, params)
+ neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches
+ gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
+ if neighbor_matches.size == 0
+ raise "No neighbors found."
+ else
+ # gram matrix
+ (0..(neighbor_matches.length-1)).each do |i|
+ gram_matrix[i] = [] unless gram_matrix[i]
+ # upper triangle
+ ((i+1)..(neighbor_matches.length-1)).each do |j|
+ sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
+ gram_matrix[i][j] = Algorithm.gauss(sim)
+ gram_matrix[j] = [] unless gram_matrix[j]
+ gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
+ end
+ gram_matrix[i][i] = 1.0
end
- gram_matrix[i][i] = 1.0
- end
- #LOGGER.debug gram_matrix.to_yaml
- @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
- @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
- LOGGER.debug "Setting R data ..."
- # set data
- @r.gram_matrix = gram_matrix.flatten
- @r.n = neighbor_matches.size
- @r.y = acts
- @r.sims = sims
+ #LOGGER.debug gram_matrix.to_yaml
+ @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
+ @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
+ LOGGER.debug "Setting R data ..."
+ # set data
+ @r.gram_matrix = gram_matrix.flatten
+ @r.n = neighbor_matches.size
+ @r.y = acts
+ @r.sims = sims
+
+ LOGGER.debug "Preparing R data ..."
+ # prepare data
+ @r.eval "y<-as.vector(y)"
+ @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
+ @r.eval "sims<-as.vector(sims)"
+
+ # model + support vectors
+ LOGGER.debug "Creating SVM model ..."
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)"
+ @r.eval "sv<-as.vector(SVindex(model))"
+ @r.eval "sims<-sims[sv]"
+ @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
+ LOGGER.debug "Predicting ..."
+ @r.eval "p<-predict(model,sims)[1,1]"
+ prediction = @r.p
+ @r.quit # free R
+ end
+ prediction
+ end
- LOGGER.debug "Preparing R data ..."
- # prepare data
- @r.eval "y<-as.vector(y)"
- @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
- @r.eval "sims<-as.vector(sims)"
-
- # model + support vectors
- LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)"
- @r.eval "sv<-as.vector(SVindex(model))"
- @r.eval "sims<-sims[sv]"
- @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
- LOGGER.debug "Predicting ..."
- @r.eval "p<-predict(model,sims)[1,1]"
- prediction = @r.p
- @r.quit # free R
- end
- prediction
end
module Substructure
--
cgit v1.2.3
From eb2582799bb5a05e053e2709db47880430f80a78 Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Tue, 17 May 2011 16:40:12 +0200
Subject: Add debug
---
lib/algorithm.rb | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 0a5b09f..ec5748d 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -178,7 +178,8 @@ module OpenTox
sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
prediction = local_svm(neighbors, acts, sims, "svr", params)
- prediction = take_logs ? 10**(prediction.to_f) : prediction.to_f
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f)
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
conf = sims.inject{|sum,x| sum + x }
--
cgit v1.2.3
From 251ce4cd57a161fa20f1400b5980b171bf2ff86c Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Tue, 17 May 2011 16:46:26 +0200
Subject: nu 0.8 again to pass tests
---
lib/algorithm.rb | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index ec5748d..4cb80e3 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -254,7 +254,7 @@ module OpenTox
# model + support vectors
LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)"
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.8)"
@r.eval "sv<-as.vector(SVindex(model))"
@r.eval "sims<-sims[sv]"
@r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
--
cgit v1.2.3
From cf0fd8003c373bd9216823ff2065231696ddfbcb Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Thu, 19 May 2011 10:08:17 +0200
Subject: Set nu to 0.5
---
lib/algorithm.rb | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 4cb80e3..fb5fd7f 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -178,7 +178,6 @@ module OpenTox
sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
prediction = local_svm(neighbors, acts, sims, "svr", params)
- LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f)
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
@@ -254,7 +253,7 @@ module OpenTox
# model + support vectors
LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.8)"
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)"
@r.eval "sv<-as.vector(SVindex(model))"
@r.eval "sims<-sims[sv]"
@r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
--
cgit v1.2.3
From afefbdf05549c298387821c3a441d1de701291e0 Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Thu, 19 May 2011 12:12:23 +0200
Subject: Added SVM classification
---
lib/algorithm.rb | 61 +++++++++++++++++++++++++++++++++++++-------------------
1 file changed, 40 insertions(+), 21 deletions(-)
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index fb5fd7f..9402eab 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -177,7 +177,7 @@ module OpenTox
end # activities of neighbors for supervised learning
sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
- prediction = local_svm(neighbors, acts, sims, "svr", params)
+ prediction = local_svm(neighbors, acts, sims, "nu-svr", params)
prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f)
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
@@ -197,9 +197,15 @@ module OpenTox
end # activities of neighbors for supervised learning
sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
- prediction = local_svm (neighbors, acts, sims, "svc", params)
- prediction = prediction.to_f
- LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+
+
+ acts_f = acts.collect {|v| v == true ? 1.0 : 0.0}
+ begin
+ prediction = local_svm (neighbors, acts_f, sims, "C-bsvc", params)
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ rescue Exception => e
+ LOGGER.debug "Prediction failed."
+ end
conf = sims.inject{|sum,x| sum + x }
confidence = conf/neighbors.size if neighbors.size > 0
@@ -213,7 +219,7 @@ module OpenTox
# @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
# @param [Array] acts, activities for neighbors.
# @param [Array] sims, similarities for neighbors.
- # @param [String] type, one of "svr" (regression) or "svc" (classification).
+ # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification).
# @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
# @return [Numeric] A prediction value.
def self.local_svm(neighbors, acts, sims, type, params)
@@ -245,22 +251,35 @@ module OpenTox
@r.y = acts
@r.sims = sims
- LOGGER.debug "Preparing R data ..."
- # prepare data
- @r.eval "y<-as.vector(y)"
- @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
- @r.eval "sims<-as.vector(sims)"
-
- # model + support vectors
- LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-#{type}\", nu=0.5)"
- @r.eval "sv<-as.vector(SVindex(model))"
- @r.eval "sims<-sims[sv]"
- @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
- LOGGER.debug "Predicting ..."
- @r.eval "p<-predict(model,sims)[1,1]"
- prediction = @r.p
- @r.quit # free R
+ begin
+ LOGGER.debug "Preparing R data ..."
+ # prepare data
+ @r.eval "y<-as.vector(y)"
+ @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
+ @r.eval "sims<-as.vector(sims)"
+
+ # model + support vectors
+ LOGGER.debug "Creating SVM model ..."
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)"
+ @r.eval "sv<-as.vector(SVindex(model))"
+ @r.eval "sims<-sims[sv]"
+ @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
+ LOGGER.debug "Predicting ..."
+ if type == "nu-svr"
+ @r.eval "p<-predict(model,sims)[1,1]"
+ elsif type == "C-bsvc"
+ @r.eval "p<-predict(model,sims)"
+ end
+ if type == "nu-svr"
+ prediction = @r.p
+ elsif type == "C-bsvc"
+ prediction = (@r.p.to_f == 1.0 ? true : false)
+ end
+ @r.quit # free R
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
+ end
+
end
prediction
end
--
cgit v1.2.3
From e34c80eadcd40482a765cda861b92ab5c1250049 Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Thu, 19 May 2011 13:08:28 +0200
Subject: Added Exception handling
---
lib/algorithm.rb | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 9402eab..5b41cbf 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -177,9 +177,13 @@ module OpenTox
end # activities of neighbors for supervised learning
sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
- prediction = local_svm(neighbors, acts, sims, "nu-svr", params)
- prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f)
- LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ begin
+ prediction = local_svm(neighbors, acts, sims, "nu-svr", params)
+ prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f)
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
+ end
conf = sims.inject{|sum,x| sum + x }
confidence = conf/neighbors.size if neighbors.size > 0
@@ -195,16 +199,13 @@ module OpenTox
acts = neighbors.collect do |n|
act = n[:activity]
end # activities of neighbors for supervised learning
-
- sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
-
-
acts_f = acts.collect {|v| v == true ? 1.0 : 0.0}
+ sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
begin
prediction = local_svm (neighbors, acts_f, sims, "C-bsvc", params)
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
rescue Exception => e
- LOGGER.debug "Prediction failed."
+ LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
end
conf = sims.inject{|sum,x| sum + x }
--
cgit v1.2.3
From 30478c4dd18b56048b6e190027daef1fc6608230 Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Thu, 19 May 2011 16:55:34 +0200
Subject: Fixed digression class / regr
---
lib/model.rb | 83 ++++++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 55 insertions(+), 28 deletions(-)
diff --git a/lib/model.rb b/lib/model.rb
index 3d64f32..7acd8f2 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -179,10 +179,10 @@ module OpenTox
return @prediction_dataset if database_activity(subjectid)
- # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar
- l = Array.new # larger
- s = Array.new # smaller fraction
- if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget]
+ if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget]
+ # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar
+ l = Array.new # larger
+ s = Array.new # smaller fraction
@fingerprints.each do |training_compound,training_features|
@activities[training_compound].each do |act|
case act.to_s
@@ -202,36 +202,41 @@ module OpenTox
# determine ratio
modulo = l.size.divmod(s.size)# modulo[0]=ratio, modulo[1]=rest
LOGGER.info "BLAZAR: Balance: #{modulo[0]}, rest #{modulo[1]}."
- end
- # AM: Balanced predictions
- addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round
- slack = modulo[1].divmod(addon)[1] # what remains for the last round
- position = 0
- predictions = Array.new
+ # AM: Balanced predictions
+ addon = (modulo[1].to_f/modulo[0]).ceil # what will be added in each round
+ slack = modulo[1].divmod(addon)[1] # what remains for the last round
+ position = 0
+ predictions = Array.new
- prediction_best=nil
- neighbors_best=nil
+ prediction_best=nil
+ neighbors_best=nil
- begin
- for i in 1..modulo[0] do
- (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction
- LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}."
- neighbors(s, l, position, lr_size) # get ratio fraction of larger part
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
- if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs
- prediction_best=prediction
- neighbors_best=@neighbors
+ begin
+ for i in 1..modulo[0] do
+ (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction
+ LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}."
+ neighbors(s, l, position, lr_size) # get ratio fraction of larger part
+ prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
+ if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs
+ prediction_best=prediction
+ neighbors_best=@neighbors
+ end
+ position = position + lr_size
+ end
+ rescue Exception => e
+ LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message
end
- position = position + lr_size
- end
- rescue Exception => e
- LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message
- end
- prediction=prediction_best
- @neighbors=neighbors_best
+ prediction=prediction_best
+ @neighbors=neighbors_best
+ ### END AM balanced predictions
+ else # regression case: no balancing
+ neighbors
+ prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
+ end
+
prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s)
# TODO: fix dependentVariable
@prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri
@@ -347,6 +352,28 @@ module OpenTox
end
+
+ # Find neighbors and store them as object variable
+ def neighbors
+
+ @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
+
+ @neighbors = []
+ @fingerprints.each do |training_compound,training_features|
+ sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
+ if sim > @min_sim
+ @activities[training_compound].each do |act|
+ @neighbors << {
+ :compound => training_compound,
+ :similarity => sim,
+ :features => training_features,
+ :activity => act
+ }
+ end
+ end
+ end
+ end
+
# Find database activities and store them in @prediction_dataset
# @return [Boolean] true if compound has databasse activities, false if not
def database_activity(subjectid)
--
cgit v1.2.3
From 32b7faa44ef70194e0ae1c5e43948eea785f9d04 Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Thu, 19 May 2011 17:03:50 +0200
Subject: Fixed neighbor selection
---
lib/model.rb | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/model.rb b/lib/model.rb
index 7acd8f2..998d2dc 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -216,7 +216,7 @@ module OpenTox
for i in 1..modulo[0] do
(i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction
LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}."
- neighbors(s, l, position, lr_size) # get ratio fraction of larger part
+ neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part
prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs
prediction_best=prediction
@@ -325,7 +325,7 @@ module OpenTox
end
# Find neighbors and store them as object variable
- def neighbors(s=nil, l=nil, start=nil, offset=nil)
+ def neighbors_balanced(s, l, start, offset)
@compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
@neighbors = []
--
cgit v1.2.3
From 8c78bf2358338cf5f795a65c9b1c21a48474169f Mon Sep 17 00:00:00 2001
From: mguetlein
Date: Fri, 20 May 2011 10:52:32 +0200
Subject: change location of to-html-opentox-image to local validation service
---
lib/to-html.rb | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/to-html.rb b/lib/to-html.rb
index 6785974..66a3e74 100644
--- a/lib/to-html.rb
+++ b/lib/to-html.rb
@@ -1,5 +1,5 @@
-OT_LOGO = "http://opentox.informatik.uni-freiburg.de/ot-logo.png"
+OT_LOGO = File.join(CONFIG[:services]["opentox-validation"],"resources/ot-logo.png")
class String
--
cgit v1.2.3
From 0b936c71d8a1d5effa6c29d5ee9c227fff18a070 Mon Sep 17 00:00:00 2001
From: Christoph Helma
Date: Mon, 23 May 2011 14:03:02 +0000
Subject: owl-dl fixed for model and prediction datasets
---
lib/dataset.rb | 7 ++-
lib/model.rb | 136 ++++++++++++++++++++++++++++++------------------------
lib/serializer.rb | 27 +++++++----
3 files changed, 95 insertions(+), 75 deletions(-)
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 4005c1c..4dc4296 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -179,7 +179,6 @@ module OpenTox
end
end
-=begin
# Detect feature type(s) in the dataset
# @return [String] `classification", "regression", "mixed" or unknown`
def feature_type(subjectid=nil)
@@ -193,6 +192,7 @@ module OpenTox
"unknown"
end
end
+=begin
=end
# Get Spreadsheet representation
@@ -369,12 +369,11 @@ module OpenTox
end
def value(compound)
- @data_entries[compound.uri].collect{|f,v| v.first if f.match(/prediction/)}.compact.first
+ @data_entries[compound.uri].collect{|f,v| v.first if f.match(/value/)}.compact.first
end
def confidence(compound)
- feature_uri = @data_entries[compound.uri].collect{|f,v| f if f.match(/prediction/)}.compact.first
- @features[feature_uri][OT.confidence]
+ @data_entries[compound.uri].collect{|f,v| v.first if f.match(/confidence/)}.compact.first
end
def descriptors(compound)
diff --git a/lib/model.rb b/lib/model.rb
index 998d2dc..d46152d 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -164,8 +164,6 @@ module OpenTox
features = {}
unless @prediction_dataset
- #@prediction_dataset = cached_prediction
- #return @prediction_dataset if cached_prediction
@prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
@prediction_dataset.add_metadata( {
OT.hasSource => @uri,
@@ -237,38 +235,90 @@ module OpenTox
prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
end
- prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s)
- # TODO: fix dependentVariable
- @prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri
+ # TODO: reasonable feature name
+ #prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s)
+ value_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"value")
+ confidence_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"confidence")
+ prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]}
+ prediction_feature_uris[value_feature_uri] = "No similar compounds in training dataset." if @neighbors.size == 0 or prediction[:prediction].nil?
+
+
+ #@prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri
+ @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables]
+
+=begin
if @neighbors.size == 0
- @prediction_dataset.add_feature(prediction_feature_uri, {
- RDF.type => [OT.MeasuredFeature],
- OT.hasSource => @uri,
- DC.creator => @uri,
- DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
- OT.error => "No similar compounds in training dataset.",
- OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
- })
- @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
+ prediction_feature_uris.each do |prediction_feature_uri,value|
+ @prediction_dataset.add_feature(prediction_feature_uri, {
+ RDF.type => [OT.MeasuredFeature],
+ OT.hasSource => @uri,
+ DC.creator => @uri,
+ DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
+ OT.error => "No similar compounds in training dataset.",
+ #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
+ })
+ @prediction_dataset.add @compound.uri, prediction_feature_uri, value
+ end
else
+=end
+ prediction_feature_uris.each do |prediction_feature_uri,value|
+ @prediction_dataset.metadata[OT.predictedVariables] = [] unless @prediction_dataset.metadata[OT.predictedVariables]
+ @prediction_dataset.metadata[OT.predictedVariables] << prediction_feature_uri
@prediction_dataset.add_feature(prediction_feature_uri, {
RDF.type => [OT.ModelPrediction],
OT.hasSource => @uri,
DC.creator => @uri,
DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
- OT.prediction => prediction[:prediction],
- OT.confidence => prediction[:confidence],
- OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
+ # TODO: factor information to value
})
- @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
+ #OT.prediction => prediction[:prediction],
+ #OT.confidence => prediction[:confidence],
+ #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
+ @prediction_dataset.add @compound.uri, prediction_feature_uri, value
+ end
- if verbose
- if @feature_calculation_algorithm == "Substructure.match"
- f = 0
- @compound_features.each do |feature|
- feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s)
+ if verbose
+ if @feature_calculation_algorithm == "Substructure.match"
+ f = 0
+ @compound_features.each do |feature|
+ feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s)
+ features[feature] = feature_uri
+ @prediction_dataset.add_feature(feature_uri, {
+ RDF.type => [OT.Substructure],
+ OT.smarts => feature,
+ OT.pValue => @p_values[feature],
+ OT.effect => @effects[feature]
+ })
+ @prediction_dataset.add @compound.uri, feature_uri, true
+ f+=1
+ end
+ else
+ @compound_features.each do |feature|
+ features[feature] = feature
+ @prediction_dataset.add @compound.uri, feature, true
+ end
+ end
+ n = 0
+ @neighbors.each do |neighbor|
+ neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s )
+ @prediction_dataset.add_feature(neighbor_uri, {
+ OT.compound => neighbor[:compound],
+ OT.similarity => neighbor[:similarity],
+ OT.measuredActivity => neighbor[:activity],
+ RDF.type => [OT.Neighbor]
+ })
+ @prediction_dataset.add @compound.uri, neighbor_uri, true
+ f = 0 unless f
+ neighbor[:features].each do |feature|
+ if @feature_calculation_algorithm == "Substructure.match"
+ feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature]
+ else
+ feature_uri = feature
+ end
+ @prediction_dataset.add neighbor[:compound], feature_uri, true
+ unless features.has_key? feature
features[feature] = feature_uri
@prediction_dataset.add_feature(feature_uri, {
RDF.type => [OT.Substructure],
@@ -276,49 +326,13 @@ module OpenTox
OT.pValue => @p_values[feature],
OT.effect => @effects[feature]
})
- @prediction_dataset.add @compound.uri, feature_uri, true
f+=1
end
- else
- @compound_features.each do |feature|
- features[feature] = feature
- @prediction_dataset.add @compound.uri, feature, true
- end
- end
- n = 0
- @neighbors.each do |neighbor|
- neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s )
- @prediction_dataset.add_feature(neighbor_uri, {
- OT.compound => neighbor[:compound],
- OT.similarity => neighbor[:similarity],
- OT.measuredActivity => neighbor[:activity],
- RDF.type => [OT.Neighbor]
- })
- @prediction_dataset.add @compound.uri, neighbor_uri, true
- f = 0 unless f
- neighbor[:features].each do |feature|
- if @feature_calculation_algorithm == "Substructure.match"
- feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature]
- else
- feature_uri = feature
- end
- @prediction_dataset.add neighbor[:compound], feature_uri, true
- unless features.has_key? feature
- features[feature] = feature_uri
- @prediction_dataset.add_feature(feature_uri, {
- RDF.type => [OT.Substructure],
- OT.smarts => feature,
- OT.pValue => @p_values[feature],
- OT.effect => @effects[feature]
- })
- f+=1
- end
- end
- n+=1
end
- # what happens with dataset predictions?
+ n+=1
end
end
+ #end
@prediction_dataset.save(subjectid)
@prediction_dataset
diff --git a/lib/serializer.rb b/lib/serializer.rb
index e4cb541..78e7709 100644
--- a/lib/serializer.rb
+++ b/lib/serializer.rb
@@ -17,6 +17,7 @@ module OpenTox
# this should come from opentox.owl
OT.Compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.Feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OT.Model => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.NominalFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.NumericFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.StringFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
@@ -27,6 +28,8 @@ module OpenTox
OT.Parameter => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.Task => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OTA.PatternMiningSupervised => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OTA.ClassificationLazySingleTarget => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OTA.RegressionLazySingleTarget => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
#classes for validation
OT.Validation => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
@@ -45,6 +48,9 @@ module OpenTox
OT.values => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
OT.algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
OT.parameters => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ OT.featureDataset => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ OT.dependentVariables => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
#object props for validation#
OT.model => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
@@ -126,7 +132,7 @@ module OpenTox
OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
OT.paramScope => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
- OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
+ #OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
}
@data_entries = {}
@@ -157,23 +163,16 @@ module OpenTox
# Add a dataset
# @param [String] uri Dataset URI
def add_dataset(dataset)
-
@dataset = dataset.uri
-
@object[dataset.uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] }
-
add_metadata dataset.uri, dataset.metadata
-
dataset.compounds.each { |compound| add_compound compound }
-
dataset.features.each { |feature,metadata| add_feature feature,metadata }
-
dataset.data_entries.each do |compound,entry|
entry.each do |feature,values|
values.each { |value| add_data_entry compound,feature,value }
end
end
-
end
# Add a algorithm
@@ -188,6 +187,13 @@ module OpenTox
def add_model(uri,metadata)
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Model }] }
add_metadata uri, metadata
+ @object[metadata[OT.featureDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] }
+ @object[metadata[OT.trainingDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] }
+ @object[metadata[OT.dependentVariables]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] }
+ # TODO: add algorithms from parameters
+ @object["http://ot-dev.in-silico.ch/algorithm/fminer/bbrc"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] }
+ @object["http://ot-dev.in-silico.ch/algorithm/fminer/last"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] }
+ @object["http://ot-dev.in-silico.ch/algorithm/lazar"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] }
end
# Add a task
@@ -272,7 +278,7 @@ module OpenTox
@object[genid][name] = [{"type" => type(entry), "value" => entry }]
end
end
- elsif v.is_a? Array and u == RDF.type
+ elsif v.is_a? Array #and u == RDF.type
@object[uri] = {} unless @object[uri]
v.each do |value|
@object[uri][u] = [] unless @object[uri][u]
@@ -354,7 +360,8 @@ module OpenTox
# @return [text/plain] Object OWL-DL in RDF/XML format
def to_rdfxml
Tempfile.open("owl-serializer"){|f| f.write(self.to_ntriples); @path = f.path}
- `rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null`
+ # TODO: add base uri for ist services
+ `rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:ota="#{OTA.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null`
end
# Convert to JSON as specified in http://n2.talis.com/wiki/RDF_JSON_Specification
--
cgit v1.2.3
From 87eb7cc1e079821c2f7c5e101e7e392e9bd10f00 Mon Sep 17 00:00:00 2001
From: davor
Date: Tue, 24 May 2011 09:35:11 +0200
Subject: Fixing regression detection
---
lib/parser.rb | 66 +++++++++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 51 insertions(+), 15 deletions(-)
diff --git a/lib/parser.rb b/lib/parser.rb
index 7bdee95..8deaa91 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -277,7 +277,23 @@ module OpenTox
def load_spreadsheet(book)
book.default_sheet = 0
add_features book.row(1)
- 2.upto(book.last_row) { |i| add_values book.row(i) }
+
+ # AM: fix mixed read in
+ regression_features=false
+ 2.upto(book.last_row) { |i|
+ row = book.row(i)
+ smiles = row.shift
+ row.each_index do |i|
+ value = row[i]
+ type = feature_type(value)
+ if type == OT.NumericFeature
+ regression_features=true
+ break
+ end
+ end
+ }
+
+ 2.upto(book.last_row) { |i| add_values book.row(i),regression_features }
warnings
@dataset
end
@@ -289,7 +305,23 @@ module OpenTox
row = 0
input = csv.split("\n")
add_features split_row(input.shift)
- input.each { |row| add_values split_row(row) }
+
+
+ # AM: fix mixed read in
+ regression_features=false
+ input.each { |row|
+ row = split_row(row)
+ smiles = row.shift
+ row.each_index do |i|
+ value = row[i]
+ type = feature_type(value)
+ if type == OT.NumericFeature
+ regression_features=true
+ break
+ end
+ end
+ }
+ input.each { |row| add_values split_row(row),regression_features }
warnings
@dataset
end
@@ -335,7 +367,7 @@ module OpenTox
end
end
- def add_values(row)
+ def add_values(row, regression_features=false)
smiles = row.shift
compound = Compound.from_smiles(smiles)
@@ -353,19 +385,23 @@ module OpenTox
@feature_types[feature] << type
- case type
- when OT.NominalFeature
- case value.to_s
- when TRUE_REGEXP
- val = true
- when FALSE_REGEXP
- val = false
- end
- when OT.NumericFeature
+ if (regression_features)
val = value.to_f
- when OT.StringFeature
- val = value.to_s
- @activity_errors << smiles+", "+row.join(", ")
+ else
+ case type
+ when OT.NominalFeature
+ case value.to_s
+ when TRUE_REGEXP
+ val = true
+ when FALSE_REGEXP
+ val = false
+ end
+ when OT.NumericFeature
+ val = value.to_f
+ when OT.StringFeature
+ val = value.to_s
+ @activity_errors << smiles+", "+row.join(", ")
+ end
end
if val!=nil
@dataset.add(compound.uri, feature, val)
--
cgit v1.2.3
From 4a7ba2adb0743cd225ad5c2cf9f71c896d87b157 Mon Sep 17 00:00:00 2001
From: davor
Date: Tue, 24 May 2011 10:45:53 +0200
Subject: Created dedicated function for value sweeping
---
lib/parser.rb | 35 +++++++++++++++++------------------
1 file changed, 17 insertions(+), 18 deletions(-)
diff --git a/lib/parser.rb b/lib/parser.rb
index 8deaa91..4984292 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -282,15 +282,8 @@ module OpenTox
regression_features=false
2.upto(book.last_row) { |i|
row = book.row(i)
- smiles = row.shift
- row.each_index do |i|
- value = row[i]
- type = feature_type(value)
- if type == OT.NumericFeature
- regression_features=true
- break
- end
- end
+ regression_features = detect_regression_features row
+ break if regression_features=true
}
2.upto(book.last_row) { |i| add_values book.row(i),regression_features }
@@ -311,21 +304,15 @@ module OpenTox
regression_features=false
input.each { |row|
row = split_row(row)
- smiles = row.shift
- row.each_index do |i|
- value = row[i]
- type = feature_type(value)
- if type == OT.NumericFeature
- regression_features=true
- break
- end
- end
+ regression_features = detect_regression_features row
+ break if regression_features=true
}
input.each { |row| add_values split_row(row),regression_features }
warnings
@dataset
end
+
private
def warnings
@@ -367,6 +354,18 @@ module OpenTox
end
end
+ def detect_regression_features row
+ regression_features=false
+ row.each_index do |i|
+ value = row[i]
+ type = feature_type(value)
+ if type == OT.NumericFeature
+ regression_features=true
+ end
+ end
+ regression_features
+ end
+
def add_values(row, regression_features=false)
smiles = row.shift
--
cgit v1.2.3
From 8a20cf940c346fd04649d3c3c8f7ad4c1fcb20cb Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Tue, 24 May 2011 14:00:16 +0200
Subject: Fix: break was too early
---
lib/parser.rb | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/lib/parser.rb b/lib/parser.rb
index 4984292..5f847c3 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -283,7 +283,7 @@ module OpenTox
2.upto(book.last_row) { |i|
row = book.row(i)
regression_features = detect_regression_features row
- break if regression_features=true
+ break if regression_features==true
}
2.upto(book.last_row) { |i| add_values book.row(i),regression_features }
@@ -305,7 +305,7 @@ module OpenTox
input.each { |row|
row = split_row(row)
regression_features = detect_regression_features row
- break if regression_features=true
+ break if regression_features==true
}
input.each { |row| add_values split_row(row),regression_features }
warnings
@@ -355,6 +355,7 @@ module OpenTox
end
def detect_regression_features row
+ row.shift
regression_features=false
row.each_index do |i|
value = row[i]
--
cgit v1.2.3
From 0d87789eec37f7ae09d01937dbfc72af1ef17252 Mon Sep 17 00:00:00 2001
From: mguetlein
Date: Tue, 24 May 2011 16:06:05 +0200
Subject: fix small errors in to-html method
---
lib/to-html.rb | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/lib/to-html.rb b/lib/to-html.rb
index 66a3e74..51602d7 100644
--- a/lib/to-html.rb
+++ b/lib/to-html.rb
@@ -6,7 +6,7 @@ class String
# encloses URI in text with with link tag
# @return [String] new text with marked links
def link_urls
- self.gsub(/(?i)http(s?):\/\/[^\r\n\s']*/, '\0')
+ self.gsub(/(?i)http(s?):\/\/[^\r\n\s']*/, '\0')
end
end
@@ -30,7 +30,7 @@ module OpenTox
title = nil #$sinatra.url_for($sinatra.request.env['PATH_INFO'], :full) if $sinatra
html = ""
html += ""+title+"" if title
- html += ""
+ html += "<\/img>"
if AA_SERVER
user = OpenTox::Authorization.get_user(subjectid) if subjectid
@@ -63,7 +63,7 @@ module OpenTox
html += "Content
" if description || related_links
html += ""
html += text.link_urls
- html += "
"
+ html += "
"
html
end
@@ -78,7 +78,7 @@ module OpenTox
"password: | |
"+
#""+
" |
"
- html += ""
+ html += "