summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2012-02-28 10:34:58 +0100
committermguetlein <martin.guetlein@gmail.com>2012-02-28 10:34:58 +0100
commit32cd79cd055ccd0aa9de29d68ca8c3e63663b126 (patch)
tree5058c65d249e39983eac1d2ae0641d069f68937e
parent04e8d118ec8e4d09b4e3ad6538aa7e8974613076 (diff)
parent754fd31f83be3ac7557a2f7832ff44fb262a98a5 (diff)
Merge branch 'development' of github.com:opentox/opentox-ruby into development
-rw-r--r--Rakefile1
-rw-r--r--lib/algorithm.rb908
-rw-r--r--lib/compound.rb61
-rw-r--r--lib/dataset.rb12
-rw-r--r--lib/environment.rb2
-rw-r--r--lib/model.rb107
-rw-r--r--lib/parser.rb170
-rw-r--r--lib/rest_client_wrapper.rb2
-rw-r--r--lib/serializer.rb73
-rw-r--r--lib/transform.rb520
-rw-r--r--lib/utils.rb372
-rw-r--r--opentox-ruby.gemspec5
12 files changed, 1382 insertions, 851 deletions
diff --git a/Rakefile b/Rakefile
index 2f8da26..4d1d0c3 100644
--- a/Rakefile
+++ b/Rakefile
@@ -45,7 +45,6 @@ begin
gem.add_dependency "ruby-plot", "=0.6.1"
gem.add_dependency "gsl", "=1.14.7"
gem.add_dependency "statsample", "=1.1.0"
- #gem.add_dependency "statsample-optimization", "=2.1.0"
gem.add_development_dependency 'jeweler'
gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore']
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 3bfe789..db21c46 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -5,6 +5,8 @@ R = nil
require "rinruby"
require "statsample"
require 'uri'
+require 'transform.rb'
+require 'utils.rb'
module OpenTox
@@ -13,7 +15,7 @@ module OpenTox
include OpenTox
- # Execute algorithm with parameters, please consult the OpenTox API and the webservice documentation for acceptable parameters
+ # Execute algorithm with parameters, consult OpenTox API and webservice documentation for acceptable parameters
# @param [optional,Hash] params Algorithm parameters
# @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
# @return [String] URI of new resource (dataset, model, ...)
@@ -21,7 +23,7 @@ module OpenTox
LOGGER.info "Running algorithm '"+@uri.to_s+"' with params: "+params.inspect
RestClientWrapper.post(@uri, params, {:accept => 'text/uri-list'}, waiting_task).to_s
end
-
+
# Get OWL-DL representation in RDF/XML format
# @return [application/rdf+xml] RDF/XML representation
def to_rdfxml
@@ -33,7 +35,7 @@ module OpenTox
# Generic Algorithm class, should work with all OpenTox webservices
class Generic
include Algorithm
-
+
# Find Generic Opentox Algorithm via URI, and loads metadata, could raise NotFound/NotAuthorized error
# @param [String] uri Algorithm URI
# @return [OpenTox::Algorithm::Generic] Algorithm instance
@@ -44,14 +46,14 @@ module OpenTox
raise "cannot load algorithm metadata" if alg.metadata==nil or alg.metadata.size==0
alg
end
-
+
end
# Fminer algorithms (https://github.com/amaunz/fminer2)
class Fminer
include Algorithm
attr_accessor :prediction_feature, :training_dataset, :minfreq, :compounds, :db_class_sizes, :all_activities, :smi
-
+
def check_params(params,per_mil,subjectid=nil)
raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
raise OpenTox::NotFoundError.new "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil?
@@ -81,7 +83,7 @@ module OpenTox
LOGGER.warn "Cannot find smiles for #{compound.to_s}."
next
end
-
+
value_map=params[:value_map] unless params[:value_map].nil?
entry.each do |feature,values|
if feature == @prediction_feature.uri
@@ -115,23 +117,23 @@ module OpenTox
end
- # Backbone Refinement Class mining (http://bbrc.maunz.de/)
- class BBRC < Fminer
- # Initialize bbrc algorithm
- def initialize(subjectid=nil)
- super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc")
- load_metadata(subjectid)
- end
+ # Backbone Refinement Class mining (http://bbrc.maunz.de/)
+ class BBRC < Fminer
+ # Initialize bbrc algorithm
+ def initialize(subjectid=nil)
+ super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc")
+ load_metadata(subjectid)
end
+ end
- # LAtent STructure Pattern Mining (http://last-pm.maunz.de)
- class LAST < Fminer
- # Initialize last algorithm
- def initialize(subjectid=nil)
- super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/last")
- load_metadata(subjectid)
- end
+ # LAtent STructure Pattern Mining (http://last-pm.maunz.de)
+ class LAST < Fminer
+ # Initialize last algorithm
+ def initialize(subjectid=nil)
+ super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/last")
+ load_metadata(subjectid)
end
+ end
# Create lazar prediction model
@@ -144,72 +146,6 @@ module OpenTox
end
end
- # Utility methods without dedicated webservices
-
- # Similarity calculations
- module Similarity
- include Algorithm
-
- # Tanimoto similarity
- # @param [Array] features_a Features of first compound
- # @param [Array] features_b Features of second compound
- # @param [optional, Hash] weights Weights for all features
- # @param [optional, Hash] params Keys: `:training_compound, :compound, :training_compound_features_hits, :nr_hits, :compound_features_hits` are required
- # @return [Float] (Weighted) tanimoto similarity
- def self.tanimoto(features_a,features_b,weights=nil,params=nil)
- common_features = features_a & features_b
- all_features = (features_a + features_b).uniq
- #LOGGER.debug "dv --------------- common: #{common_features}, all: #{all_features}"
- if common_features.size > 0
- if weights
- #LOGGER.debug "nr_hits: #{params[:nr_hits]}"
- if !params.nil? && params[:nr_hits]
- params[:weights] = weights
- params[:mode] = "min"
- params[:features] = common_features
- common_p_sum = Algorithm.p_sum_support(params)
- params[:mode] = "max"
- params[:features] = all_features
- all_p_sum = Algorithm.p_sum_support(params)
- else
- common_p_sum = 0.0
- common_features.each{|f| common_p_sum += weights[f]}
- all_p_sum = 0.0
- all_features.each{|f| all_p_sum += weights[f]}
- end
- #LOGGER.debug "common_p_sum: #{common_p_sum}, all_p_sum: #{all_p_sum}, c/a: #{common_p_sum/all_p_sum}"
- common_p_sum/all_p_sum
- else
- #LOGGER.debug "common_features : #{common_features}, all_features: #{all_features}, c/a: #{(common_features.size/all_features.size).to_f}"
- common_features.size.to_f/all_features.size.to_f
- end
- else
- 0.0
- end
- end
-
- # Euclidean similarity
- # @param [Hash] properties_a Properties of first compound
- # @param [Hash] properties_b Properties of second compound
- # @param [optional, Hash] weights Weights for all properties
- # @return [Float] (Weighted) euclidean similarity
- def self.euclidean(properties_a,properties_b,weights=nil)
- common_properties = properties_a.keys & properties_b.keys
- if common_properties.size > 1
- dist_sum = 0
- common_properties.each do |p|
- if weights
- dist_sum += ( (properties_a[p] - properties_b[p]) * weights[p] )**2
- else
- dist_sum += (properties_a[p] - properties_b[p])**2
- end
- end
- 1/(1+Math.sqrt(dist_sum))
- else
- 0.0
- end
- end
- end
# Structural Graph Clustering by TU Munich
# Finds clusters similar to a query structure in a given training dataset
@@ -226,7 +162,7 @@ module OpenTox
raise "Invalid URI."
end
@training_dataset_uri = training_dataset_uri
- if !OpenTox::Algorithm.numeric? training_threshold || training_threshold <0 || training_threshold >1
+ if !self.numeric? training_threshold || training_threshold <0 || training_threshold >1
raise "Training threshold out of bounds."
end
@training_threshold = training_threshold.to_f
@@ -259,7 +195,7 @@ module OpenTox
# @params[Float] Similarity threshold for query to clusters (optional)
def get_clusters query_compound_uri, query_threshold = 0.5
- if !OpenTox::Algorithm.numeric? query_threshold || query_threshold <0 || query_threshold >1
+ if !self.numeric? query_threshold || query_threshold <0 || query_threshold >1
raise "Query threshold out of bounds."
end
@query_threshold = query_threshold.to_f
@@ -285,7 +221,7 @@ module OpenTox
metadata[DC.title][pattern]=""
feature_clusterid_map[feature_uri] = metadata[DC.title].to_i
}
-
+
# Integrity check
unless cluster_query_dataset.compounds.size == 1
raise "Number of predicted compounds is != 1."
@@ -295,11 +231,11 @@ module OpenTox
query_compound_uri = cluster_query_dataset.compounds[0]
@target_clusters_array = Array.new
cluster_query_dataset.features.keys.each { |cluster_membership_feature|
-
+
# Getting dataset URI for cluster
target_cluster = feature_clusterid_map[cluster_membership_feature]
dataset = @clusterid_dataset_map[target_cluster]
-
+
# Finally look up presence
data_entry = cluster_query_dataset.data_entries[query_compound_uri]
present = data_entry[cluster_membership_feature][0]
@@ -311,89 +247,13 @@ module OpenTox
end
- module Neighbors
-
- # Local multi-linear regression (MLR) prediction from neighbors.
- # Uses propositionalized setting.
- # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required
- # @return [Numeric] A prediction value.
- def self.local_mlr_prop(params)
-
- confidence=0.0
- prediction=nil
-
- if params[:neighbors].size>0
- props = params[:prop_kernel] ? get_props(params) : nil
- acts = params[:neighbors].collect { |n| act = n[:activity].to_f }
- sims = params[:neighbors].collect { |n| n[:similarity] }
- LOGGER.debug "Local MLR (Propositionalization / GSL)."
- prediction = mlr( {:n_prop => props[0], :q_prop => props[1], :sims => sims, :acts => acts} )
- if params[:transform].to_s == "classNOP"
- transformer = eval("OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}])")
- else
- transformer = eval("OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}]), #{params[:transform]["offset"]})")
- end
- prediction = transformer.values[0]
- prediction = nil if prediction.infinite? || params[:prediction_min_max][1] < prediction || params[:prediction_min_max][0] > prediction
- LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
- params[:conf_stdev] = false if params[:conf_stdev].nil?
- confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]})
- confidence = nil if prediction.nil?
- end
- {:prediction => prediction, :confidence => confidence}
-
- end
-
- # Multi-linear regression weighted by similarity.
- # Objective Feature Selection, Principal Components Analysis, Scaling of Axes.
- # @param [Hash] params Keys `:n_prop, :q_prop, :sims, :acts` are required
- # @return [Numeric] A prediction value.
- def self.mlr(params)
-
- # GSL matrix operations:
- # to_a : row-wise conversion to nested array
- #
- # Statsample operations (build on GSL):
- # to_scale: convert into Statsample format
-
- begin
- n_prop = params[:n_prop].collect { |v| v }
- q_prop = params[:q_prop].collect { |v| v }
- n_prop << q_prop # attach q_prop
- nr_cases, nr_features = get_sizes n_prop
- data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features)
-
- # Principal Components Analysis
- LOGGER.debug "PCA..."
- pca = OpenTox::Algorithm::Transform::PCA.new(data_matrix)
- data_matrix = pca.data_transformed_matrix
-
- # Attach intercept column to data
- intercept = GSL::Matrix.alloc(Array.new(nr_cases,1.0),nr_cases,1)
- data_matrix = data_matrix.horzcat(intercept)
- (0..data_matrix.size2-2).each { |i|
- autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(data_matrix.col(i))
- data_matrix.col(i)[0..data_matrix.size1-1] = autoscaler.scaled_values
- }
- # Detach query instance
- n_prop = data_matrix.to_a
- q_prop = n_prop.pop
- nr_cases, nr_features = get_sizes n_prop
- data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features)
- # model + support vectors
- LOGGER.debug "Creating MLR model ..."
- c, cov, chisq, status = GSL::MultiFit::wlinear(data_matrix, params[:sims].to_scale.to_gsl, params[:acts].to_scale.to_gsl)
- GSL::MultiFit::linear_est(q_prop.to_scale.to_gsl, c, cov)[0]
- rescue Exception => e
- LOGGER.debug "#{e.class}: #{e.message}"
- end
+ module Neighbors
- end
# Classification with majority vote from neighbors weighted by similarity
- # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required
+ # @param [Hash] params Keys `:acts, :sims, :value_map` are required
# @return [Numeric] A prediction value.
def self.weighted_majority_vote(params)
@@ -402,12 +262,13 @@ module OpenTox
confidence = 0.0
prediction = nil
- params[:neighbors].each do |neighbor|
- neighbor_weight = neighbor[:similarity].to_f
- neighbor_contribution += neighbor[:activity].to_f * neighbor_weight
+ LOGGER.debug "Weighted Majority Vote Classification."
+ params[:acts].each_index do |idx|
+ neighbor_weight = params[:sims][1][idx]
+ neighbor_contribution += params[:acts][idx] * neighbor_weight
if params[:value_map].size == 2 # AM: provide compat to binary classification: 1=>false 2=>true
- case neighbor[:activity]
+ case params[:acts][idx]
when 1
confidence_sum -= neighbor_weight
when 2
@@ -417,298 +278,257 @@ module OpenTox
confidence_sum += neighbor_weight
end
end
-
if params[:value_map].size == 2
if confidence_sum >= 0.0
- prediction = 2 unless params[:neighbors].size==0
+ prediction = 2 unless params[:acts].size==0
elsif confidence_sum < 0.0
- prediction = 1 unless params[:neighbors].size==0
+ prediction = 1 unless params[:acts].size==0
end
else
- prediction = (neighbor_contribution/confidence_sum).round unless params[:neighbors].size==0 # AM: new multinomial prediction
+ prediction = (neighbor_contribution/confidence_sum).round unless params[:acts].size==0 # AM: new multinomial prediction
end
+
LOGGER.debug "Prediction is: '" + prediction.to_s + "'." unless prediction.nil?
- confidence = confidence_sum/params[:neighbors].size if params[:neighbors].size > 0
+ confidence = (confidence_sum/params[:acts].size).abs if params[:acts].size > 0
LOGGER.debug "Confidence is: '" + confidence.to_s + "'." unless prediction.nil?
return {:prediction => prediction, :confidence => confidence.abs}
end
+
+
# Local support vector regression from neighbors
- # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required
+ # @param [Hash] params Keys `:props, :acts, :sims, :min_train_performance` are required
# @return [Numeric] A prediction value.
def self.local_svm_regression(params)
- confidence = 0.0
- prediction = nil
- if params[:neighbors].size>0
- props = params[:prop_kernel] ? get_props(params) : nil
- acts = params[:neighbors].collect{ |n| n[:activity].to_f }
- sims = params[:neighbors].collect{ |n| n[:similarity] }
- prediction = props.nil? ? local_svm(acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr")
- if params[:transform].to_s == "classNOP"
- transformer = eval("OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}])")
- else
- transformer = eval("OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}]), #{params[:transform]["offset"]})")
+ begin
+ confidence = 0.0
+ prediction = nil
+
+ LOGGER.debug "Local SVM."
+ if params[:acts].size>0
+ if params[:props]
+ n_prop = params[:props][0].collect
+ q_prop = params[:props][1].collect
+ props = [ n_prop, q_prop ]
+ end
+ acts = params[:acts].collect
+ prediction = local_svm_prop( props, acts, params[:min_train_performance]) # params[:props].nil? signals non-prop setting
+ prediction = nil if (!prediction.nil? && prediction.infinite?)
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ confidence = get_confidence({:sims => params[:sims][1], :acts => params[:acts]})
+ confidence = 0.0 if prediction.nil?
end
- prediction = transformer.values[0]
- prediction = nil if prediction.infinite? || params[:prediction_min_max][1] < prediction || params[:prediction_min_max][0] > prediction
- LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
- params[:conf_stdev] = false if params[:conf_stdev].nil?
- confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]})
- confidence = nil if prediction.nil?
+ {:prediction => prediction, :confidence => confidence}
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
end
- {:prediction => prediction, :confidence => confidence}
-
+
end
- # Local support vector classification from neighbors
- # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required
+
+ # Local support vector regression from neighbors
+ # @param [Hash] params Keys `:props, :acts, :sims, :min_train_performance` are required
# @return [Numeric] A prediction value.
def self.local_svm_classification(params)
- confidence = 0.0
- prediction = nil
- if params[:neighbors].size>0
- props = params[:prop_kernel] ? get_props(params) : nil
- acts = params[:neighbors].collect { |n| act = n[:activity] }
- sims = params[:neighbors].collect{ |n| n[:similarity] } # similarity values btwn q and nbors
- prediction = props.nil? ? local_svm(acts, sims, "C-bsvc", params) : local_svm_prop(props, acts, "C-bsvc")
- LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
- params[:conf_stdev] = false if params[:conf_stdev].nil?
- confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]})
+ begin
+ confidence = 0.0
+ prediction = nil
+
+ LOGGER.debug "Local SVM."
+ if params[:acts].size>0
+ if params[:props]
+ n_prop = params[:props][0].collect
+ q_prop = params[:props][1].collect
+ props = [ n_prop, q_prop ]
+ end
+ acts = params[:acts].collect
+ acts = acts.collect{|v| "Val" + v.to_s} # Convert to string for R to recognize classification
+ prediction = local_svm_prop( props, acts, params[:min_train_performance]) # params[:props].nil? signals non-prop setting
+ prediction = prediction.sub(/Val/,"") if prediction # Convert back to Float
+ confidence = 0.0 if prediction.nil?
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ confidence = get_confidence({:sims => params[:sims][1], :acts => params[:acts]})
+ end
+ {:prediction => prediction, :confidence => confidence}
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
end
- {:prediction => prediction, :confidence => confidence}
-
+
end
+
# Local support vector prediction from neighbors.
- # Uses pre-defined Kernel Matrix.
+ # Uses propositionalized setting.
# Not to be called directly (use local_svm_regression or local_svm_classification).
+ # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ]
# @param [Array] acts, activities for neighbors.
- # @param [Array] sims, similarities for neighbors.
- # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification).
- # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required
+ # @param [Float] min_train_performance, parameter to control censoring
# @return [Numeric] A prediction value.
- def self.local_svm(acts, sims, type, params)
- LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)."
- neighbor_matches = params[:neighbors].collect{ |n| n[:features] } # URIs of matches
- gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
+ def self.local_svm_prop(props, acts, min_train_performance)
+
+ LOGGER.debug "Local SVM (Propositionalization / Kernlab Kernel)."
+ n_prop = props[0] # is a matrix, i.e. two nested Arrays.
+ q_prop = props[1] # is an Array.
prediction = nil
if Algorithm::zero_variance? acts
prediction = acts[0]
else
- # gram matrix
- (0..(neighbor_matches.length-1)).each do |i|
- neighbor_i_hits = params[:fingerprints][params[:neighbors][i][:compound]]
- gram_matrix[i] = [] unless gram_matrix[i]
- # upper triangle
- ((i+1)..(neighbor_matches.length-1)).each do |j|
- neighbor_j_hits= params[:fingerprints][params[:neighbors][j][:compound]]
- sim_params = {}
- if params[:nr_hits]
- sim_params[:nr_hits] = true
- sim_params[:compound_features_hits] = neighbor_i_hits
- sim_params[:training_compound_features_hits] = neighbor_j_hits
- end
- sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values], sim_params)")
- gram_matrix[i][j] = sim
- gram_matrix[j] = [] unless gram_matrix[j]
- gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
- end
- gram_matrix[i][i] = 1.0
- end
-
-
#LOGGER.debug gram_matrix.to_yaml
@r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
- @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
- LOGGER.debug "Setting R data ..."
- # set data
- @r.gram_matrix = gram_matrix.flatten
- @r.n = neighbor_matches.size
- @r.y = acts
- @r.sims = sims
-
+ @r.eval "set.seed(1)"
+ @r.eval "suppressPackageStartupMessages(library('caret'))" # requires R packages "caret" and "kernlab"
+ @r.eval "suppressPackageStartupMessages(library('doMC'))" # requires R packages "multicore"
+ @r.eval "registerDoMC()" # switch on parallel processing
begin
- LOGGER.debug "Preparing R data ..."
- # prepare data
- @r.eval "y<-as.vector(y)"
- @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
- @r.eval "sims<-as.vector(sims)"
-
- # model + support vectors
- LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)"
- @r.eval "sv<-as.vector(SVindex(model))"
- @r.eval "sims<-sims[sv]"
- @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
- LOGGER.debug "Predicting ..."
- if type == "nu-svr"
- @r.eval "p<-predict(model,sims)[1,1]"
- elsif type == "C-bsvc"
- @r.eval "p<-predict(model,sims)"
- end
- if type == "nu-svr"
- prediction = @r.p
- elsif type == "C-bsvc"
- #prediction = (@r.p.to_f == 1.0 ? true : false)
- prediction = @r.p
- end
- @r.quit # free R
- rescue Exception => e
- LOGGER.debug "#{e.class}: #{e.message}"
- LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
- end
- end
- prediction
- end
-
- # Local support vector prediction from neighbors.
- # Uses propositionalized setting.
- # Not to be called directly (use local_svm_regression or local_svm_classification).
- # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ]
- # @param [Array] acts, activities for neighbors.
- # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification).
- # @return [Numeric] A prediction value.
- def self.local_svm_prop(props, acts, type)
-
- LOGGER.debug "Local SVM (Propositionalization / Kernlab Kernel)."
- n_prop = props[0] # is a matrix, i.e. two nested Arrays.
- q_prop = props[1] # is an Array.
-
- prediction = nil
- if Algorithm::zero_variance? acts
- prediction = acts[0]
- else
- #LOGGER.debug gram_matrix.to_yaml
- @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
- @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
- LOGGER.debug "Setting R data ..."
# set data
+ LOGGER.debug "Setting R data ..."
@r.n_prop = n_prop.flatten
@r.n_prop_x_size = n_prop.size
@r.n_prop_y_size = n_prop[0].size
@r.y = acts
@r.q_prop = q_prop
+ #@r.eval "y = matrix(y)"
+ @r.eval "prop_matrix = matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=T)"
+ @r.eval "q_prop = matrix(q_prop, 1, n_prop_y_size, byrow=T)"
- begin
- LOGGER.debug "Preparing R data ..."
- # prepare data
- @r.eval "y<-matrix(y)"
- @r.eval "prop_matrix<-matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=TRUE)"
- @r.eval "q_prop<-matrix(q_prop, 1, n_prop_y_size, byrow=TRUE)"
-
- # model + support vectors
- LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(prop_matrix, y, type=\"#{type}\", nu=0.5)"
- LOGGER.debug "Predicting ..."
- if type == "nu-svr"
- @r.eval "p<-predict(model,q_prop)[1,1]"
- elsif type == "C-bsvc"
- @r.eval "p<-predict(model,q_prop)"
- end
- if type == "nu-svr"
- prediction = @r.p
- elsif type == "C-bsvc"
- #prediction = (@r.p.to_f == 1.0 ? true : false)
- prediction = @r.p
- end
- @r.quit # free R
- rescue Exception => e
- LOGGER.debug "#{e.class}: #{e.message}"
- LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
- end
- end
- prediction
- end
+ # prepare data
+ LOGGER.debug "Preparing R data ..."
+ @r.eval "if (class(y) == 'character') { y = factor(y); suppressPackageStartupMessages(library('class')) }" # For classification
+
+ @r.eval <<-EOR
+ rem = nearZeroVar(prop_matrix)
+ if (length(rem) > 0) {
+ prop_matrix = prop_matrix[,-rem,drop=F]
+ q_prop = q_prop[,-rem,drop=F]
+ }
+ rem = findCorrelation(cor(prop_matrix))
+ if (length(rem) > 0) {
+ prop_matrix = prop_matrix[,-rem,drop=F]
+ q_prop = q_prop[,-rem,drop=F]
+ }
+ EOR
- # Get confidence for regression, with standard deviation of neighbor activity if conf_stdev is set.
- # @param[Hash] Required keys: :sims, :acts, :neighbors, :conf_stdev
- # @return[Float] Confidence
- def self.get_confidence(params)
- if params[:conf_stdev]
- sim_median = params[:sims].to_scale.median
- if sim_median.nil?
- confidence = nil
- else
- standard_deviation = params[:acts].to_scale.standard_deviation_sample
- confidence = (sim_median*Math.exp(-1*standard_deviation)).abs
- if confidence.nan?
- confidence = nil
- end
- end
- else
- conf = params[:sims].inject{|sum,x| sum + x }
- confidence = conf/params[:neighbors].size
- end
- LOGGER.debug "Confidence is: '" + confidence.to_s + "'."
- return confidence
- end
+ # model + support vectors
+ LOGGER.debug "Creating R SVM model ..."
+ @r.eval <<-EOR
+ model = train(prop_matrix,y,method="svmradial",tuneLength=8,trControl=trainControl(method="LGOCV",number=10),preProcess=c("center", "scale"))
+ perf = ifelse ( class(y)!='numeric', max(model$results$Accuracy), model$results[which.min(model$results$RMSE),]$Rsquared )
+ EOR
- # Get X and Y size of a nested Array (Matrix)
- def self.get_sizes(matrix)
- begin
- nr_cases = matrix.size
- nr_features = matrix[0].size
- rescue Exception => e
- LOGGER.debug "#{e.class}: #{e.message}"
- LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
- end
- #puts "NRC: #{nr_cases}, NRF: #{nr_features}"
- [ nr_cases, nr_features ]
- end
- # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features)
- # Same for the vector describing the query compound
- # @param[Array] neighbors.
- # @param[OpenTox::Compound] query compound.
- # @param[Array] Dataset Features.
- # @param[Array] Fingerprints of neighbors.
- # @param[Float] p-values of Features.
- def self.get_props (params)
- matrix = Array.new
- begin
- params[:neighbors].each do |n|
- n = n[:compound]
- row = []
- params[:features].each do |f|
- if ! params[:fingerprints][n].nil?
- row << (params[:fingerprints][n].include?(f) ? (params[:p_values][f] * params[:fingerprints][n][f]) : 0.0)
- else
- row << 0.0
- end
- end
- matrix << row
- end
- row = []
- params[:features].each do |f|
- if params[:nr_hits]
- compound_feature_hits = params[:compound].match_hits([f])
- row << (compound_feature_hits.size == 0 ? 0.0 : (params[:p_values][f] * compound_feature_hits[f]))
- else
- row << (params[:compound].match([f]).size == 0 ? 0.0 : params[:p_values][f])
- end
+ # prediction
+ LOGGER.debug "Predicting ..."
+ @r.eval "p = predict(model,q_prop)"
+ @r.eval "if (class(y)!='numeric') p = as.character(p)"
+ prediction = @r.p
+
+ # censoring
+ prediction = nil if ( @r.perf.nan? || @r.perf < min_train_performance )
+ LOGGER.debug "Performance: #{sprintf("%.2f", @r.perf)}"
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
end
- rescue Exception => e
- LOGGER.debug "get_props failed with '" + $! + "'"
+ @r.quit # free R
end
- [ matrix, row ]
+ prediction
end
end
+ module FeatureSelection
+ include Algorithm
+ # Recursive Feature Elimination using caret
+ # @param [Hash] required keys: ds_csv_file, prediction_feature, fds_csv_file (dataset CSV file, prediction feature column name, and feature dataset CSV file), optional: del_missing (delete rows with missing values).
+ # @return [String] feature dataset CSV file composed of selected features.
+ def self.rfe(params)
+ @r=RinRuby.new(false,false)
+ @r.ds_csv_file = params[:ds_csv_file].to_s
+ @r.prediction_feature = params[:prediction_feature].to_s
+ @r.fds_csv_file = params[:fds_csv_file].to_s
+ @r.del_missing = params[:del_missing] == true ? 1 : 0
+ r_result_file = params[:fds_csv_file].sub("rfe_", "rfe_R_")
+ @r.f_fds_r = r_result_file.to_s
+
+ # need packs 'randomForest', 'RANN'
+ @r.eval <<-EOR
+ set.seed(1)
+ suppressPackageStartupMessages(library('caret'))
+ suppressPackageStartupMessages(library('randomForest'))
+ suppressPackageStartupMessages(library('RANN'))
+ suppressPackageStartupMessages(library('doMC'))
+ registerDoMC()
+
+ acts = read.csv(ds_csv_file, check.names=F)
+ feats = read.csv(fds_csv_file, check.names=F)
+ ds = merge(acts, feats, by="SMILES") # duplicates features for duplicate SMILES :-)
+
+ features = ds[,(dim(acts)[2]+1):(dim(ds)[2])]
+ y = ds[,which(names(ds) == prediction_feature)]
+
+ # assumes a data matrix 'features' and a vector 'y' of target values
+ row.names(features)=NULL
+
+ pp = NULL
+ if (del_missing) {
+ # needed if rows should be removed
+ na_ids = apply(features,1,function(x)any(is.na(x)))
+ features = features[!na_ids,]
+ y = y[!na_ids]
+ pp = preProcess(features, method=c("scale", "center"))
+ } else {
+ # Use imputation if NA's random (only then!)
+ pp = preProcess(features, method=c("scale", "center", "knnImpute"))
+ }
+ features = predict(pp, features)
+
+ # determine subsets
+ subsets = dim(features)[2]*c(0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7)
+ subsets = c(2,3,4,5,7,10,subsets)
+ subsets = unique(sort(round(subsets)))
+ subsets = subsets[subsets<=dim(features)[2]]
+ subsets = subsets[subsets>1]
+
+ # Recursive feature elimination
+ rfProfile = rfe( x=features, y=y, rfeControl=rfeControl(functions=rfFuncs, number=50), sizes=subsets)
+
+ # read existing dataset and select most useful features
+ csv=feats[,c("SMILES", rfProfile$optVariables)]
+ write.csv(x=csv,file=f_fds_r, row.names=F, quote=F, na='')
+ EOR
+ r_result_file
+ end
+ end
+
module Substructure
include Algorithm
# Substructure matching
- # @param [OpenTox::Compound] compound Compound
- # @param [Array] features Array with Smarts strings
+ # @param [Hash] required keys: compound, features
# @return [Array] Array with matching Smarts
- def self.match(compound,features)
- compound.match(features)
+ def self.match(params)
+ params[:compound].match(params[:features])
end
+
+ # Substructure matching with number of non-unique hits
+ # @param [Hash] required keys: compound, features
+ # @return [Hash] Hash with matching Smarts and number of hits
+ def self.match_hits(params)
+ params[:compound].match_hits(params[:features])
+ end
+
+ # Substructure matching with number of non-unique hits
+ # @param [Hash] required keys: compound, features, feature_dataset_uri, pc_type
+ # @return [Hash] Hash with matching Smarts and number of hits
+ def self.lookup(params)
+ params[:compound].lookup(params[:features], params[:feature_dataset_uri],params[:pc_type])
+ end
end
module Dataset
@@ -717,283 +537,5 @@ module OpenTox
def features(dataset_uri,compound_uri)
end
end
-
- module Transform
- include Algorithm
-
- # The transformer that inverts values.
- # 1/x is used, after values have been moved >= 1.
- class Inverter
- attr_accessor :offset, :values
-
- # @params[Array] Values to transform.
- # @params[Float] Offset for restore.
- def initialize *args
- case args.size
- when 1
- begin
- values=args[0]
- raise "Cannot transform, values empty." if @values.size==0
- @values = values.collect { |v| -1.0 * v }
- @offset = 1.0 - @values.minmax[0]
- @offset = -1.0 * @offset if @offset>0.0
- @values.collect! { |v| v - @offset } # slide >1
- @values.collect! { |v| 1 / v } # invert to [0,1]
- rescue Exception => e
- LOGGER.debug "#{e.class}: #{e.message}"
- LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
- end
- when 2
- @offset = args[1].to_f
- @values = args[0].collect { |v| 1 / v }
- @values.collect! { |v| v + @offset }
- @values.collect! { |v| -1.0 * v }
- end
- end
- end
-
- # The transformer that takes logs.
- # Log10 is used, after values have been moved > 0.
- class Log10
- attr_accessor :offset, :values
-
- # @params[Array] Values to transform / restore.
- # @params[Float] Offset for restore.
- def initialize *args
- @distance_to_zero = 0.000000001 # 1 / 1 billion
- case args.size
- when 1
- begin
- values=args[0]
- raise "Cannot transform, values empty." if values.size==0
- @offset = values.minmax[0]
- @offset = -1.0 * @offset if @offset>0.0
- @values = values.collect { |v| v - @offset } # slide > anchor
- @values.collect! { |v| v + @distance_to_zero } #
- @values.collect! { |v| Math::log10 v } # log10 (can fail)
- rescue Exception => e
- LOGGER.debug "#{e.class}: #{e.message}"
- LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
- end
- when 2
- @offset = args[1].to_f
- @values = args[0].collect { |v| 10**v }
- @values.collect! { |v| v - @distance_to_zero }
- @values.collect! { |v| v + @offset }
- end
- end
- end
-
- # The transformer that does nothing (No OPeration).
- class NOP
- attr_accessor :offset, :values
-
- # @params[Array] Values to transform / restore.
- # @params[Float] Offset for restore.
- def initialize *args
- @offset = 0.0
- @distance_to_zero = 0.0
- case args.size
- when 1
- @values = args[0]
- when 2
- @values = args[0]
- end
- end
- end
-
-
- # Auto-Scaler for Arrays
- # Center on mean and divide by standard deviation
- class AutoScale
- attr_accessor :scaled_values, :mean, :stdev
-
- # @params[Array] Values to transform.
- def initialize values
- @scaled_values = values
- @mean = @scaled_values.to_scale.mean
- @stdev = @scaled_values.to_scale.standard_deviation_sample
- @scaled_values = @scaled_values.collect {|vi| vi - @mean }
- @scaled_values.collect! {|vi| vi / @stdev } unless @stdev == 0.0
- end
- end
-
- # Principal Components Analysis
- # Statsample Library (http://ruby-statsample.rubyforge.org/) by C. Bustos
- class PCA
- attr_accessor :data_matrix, :data_transformed_matrix, :eigenvector_matrix, :eigenvalue_sums, :autoscaler
-
- # Creates a transformed dataset as GSL::Matrix.
- # @param [GSL::Matrix] Data matrix.
- # @param [Float] Compression ratio from [0,1].
- # @return [GSL::Matrix] Data transformed matrix.
- def initialize data_matrix, compression=0.05
- begin
- @data_matrix = data_matrix
- @compression = compression.to_f
- @stdev = Array.new
- @mean = Array.new
-
- # Objective Feature Selection
- raise "Error! PCA needs at least two dimensions." if data_matrix.size2 < 2
- @data_matrix_selected = nil
- (0..@data_matrix.size2-1).each { |i|
- if !Algorithm::zero_variance?(@data_matrix.col(i).to_a)
- if @data_matrix_selected.nil?
- @data_matrix_selected = GSL::Matrix.alloc(@data_matrix.size1, 1)
- @data_matrix_selected.col(0)[0..@data_matrix.size1-1] = @data_matrix.col(i)
- else
- @data_matrix_selected = @data_matrix_selected.horzcat(GSL::Matrix.alloc(@data_matrix.col(i).to_a,@data_matrix.size1, 1))
- end
- end
- }
- raise "Error! PCA needs at least two dimensions." if (@data_matrix_selected.nil? || @data_matrix_selected.size2 < 2)
-
- # Scaling of Axes
- @data_matrix_scaled = GSL::Matrix.alloc(@data_matrix_selected.size1, @data_matrix_selected.size2)
- (0..@data_matrix_selected.size2-1).each { |i|
- @autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(@data_matrix_selected.col(i))
- @data_matrix_scaled.col(i)[0..@data_matrix.size1-1] = @autoscaler.scaled_values
- @stdev << @autoscaler.stdev
- @mean << @autoscaler.mean
- }
-
- data_matrix_hash = Hash.new
- (0..@data_matrix_scaled.size2-1).each { |i|
- column_view = @data_matrix_scaled.col(i)
- data_matrix_hash[i] = column_view.to_scale
- }
- dataset_hash = data_matrix_hash.to_dataset # see http://goo.gl/7XcW9
- cor_matrix=Statsample::Bivariate.correlation_matrix(dataset_hash)
- pca=Statsample::Factor::PCA.new(cor_matrix)
- pca.eigenvalues.each { |ev| raise "PCA failed!" unless !ev.nan? }
- @eigenvalue_sums = Array.new
- (0..dataset_hash.fields.size-1).each { |i|
- @eigenvalue_sums << pca.eigenvalues[0..i].inject{ |sum, ev| sum + ev }
- }
- eigenvectors_selected = Array.new
- pca.eigenvectors.each_with_index { |ev, i|
- if (@eigenvalue_sums[i] <= ((1.0-@compression)*dataset_hash.fields.size)) || (eigenvectors_selected.size == 0)
- eigenvectors_selected << ev.to_a
- end
- }
- @eigenvector_matrix = GSL::Matrix.alloc(eigenvectors_selected.flatten, eigenvectors_selected.size, dataset_hash.fields.size).transpose
- dataset_matrix = dataset_hash.to_gsl.transpose
- @data_transformed_matrix = (@eigenvector_matrix.transpose * dataset_matrix).transpose
- rescue Exception => e
- LOGGER.debug "#{e.class}: #{e.message}"
- LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
- end
- end
-
- # Restores data in the original feature space (possibly with compression loss).
- # @return [GSL::Matrix] Data matrix.
- def restore
- begin
- data_matrix_restored = (@eigenvector_matrix * @data_transformed_matrix.transpose).transpose # reverse pca
- # reverse scaling
- (0..data_matrix_restored.size2-1).each { |i|
- data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] *= @stdev[i] unless @stdev[i] == 0.0
- data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] += @mean[i]
- }
- data_matrix_restored
- rescue Exception => e
- LOGGER.debug "#{e.class}: #{e.message}"
- LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
- end
- end
-
- end
-
- end
-
- # DV: Removed Gauss kernel in this file since the additional calculations has no benefit for the result.
- # See experiment summary: http://goo.gl/uwiKt
- # Gauss kernel
- # @return [Float]
- #def self.gauss(x, sigma = 0.3)
- # d = 1.0 - x.to_f
- # Math.exp(-(d*d)/(2*sigma*sigma))
- #end
-
- # For symbolic features
- # @param [Array] Array to test, must indicate non-occurrence with 0.
- # @return [Boolean] Whether the feature is singular or non-occurring or present everywhere.
- def self.isnull_or_singular?(array)
- nr_zeroes = array.count(0)
- return (nr_zeroes == array.size) || # remove non-occurring feature
- (nr_zeroes == array.size-1) || # remove singular feature
- (nr_zeroes == 0) # also remove feature present everywhere
- end
-
- # Numeric value test
- # @param[Object] value
- # @return [Boolean] Whether value is a number
- def self.numeric?(value)
- true if Float(value) rescue false
- end
-
- # For symbolic features
- # @param [Array] Array to test, must indicate non-occurrence with 0.
- # @return [Boolean] Whether the feature has variance zero.
- def self.zero_variance?(array)
- return (array.to_scale.variance_population == 0.0)
- end
-
- # Sum of an array for Arrays.
- # @param [Array] Array with values
- # @return [Integer] Sum of size of values
- def self.sum_size(array)
- sum=0
- array.each { |e| sum += e.size }
- return sum
- end
-
- # Minimum Frequency
- # @param [Integer] per-mil value
- # return [Integer] min-frequency
- def self.min_frequency(training_dataset,per_mil)
- minfreq = per_mil * training_dataset.compounds.size.to_f / 1000.0 # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST
- minfreq = 2 unless minfreq > 2
- Integer (minfreq)
- end
-
- # Effect calculation for classification
- # @param [Array] Array of occurrences per class in the form of Enumerables.
- # @param [Array] Array of database instance counts per class.
- def self.effect(occurrences, db_instances)
- max=0
- max_value=0
- nr_o = self.sum_size(occurrences)
- nr_db = db_instances.to_scale.sum
-
- occurrences.each_with_index { |o,i| # fminer outputs occurrences sorted reverse by activity.
- actual = o.size.to_f/nr_o
- expected = db_instances[i].to_f/nr_db
- if actual > expected
- if ((actual - expected) / actual) > max_value
- max_value = (actual - expected) / actual # 'Schleppzeiger'
- max = i
- end
- end
- }
- max
- end
-
- # Returns Support value of an fingerprint
- # @param [Hash] params Keys: `:compound_features_hits, :weights, :training_compound_features_hits, :features, :nr_hits:, :mode` are required
- # return [Numeric] Support value
- def self.p_sum_support(params)
- p_sum = 0.0
- params[:features].each{|f|
- compound_hits = params[:compound_features_hits][f]
- neighbor_hits = params[:training_compound_features_hits][f]
- p_sum += eval("(params[:weights][f] * ([compound_hits, neighbor_hits].compact.#{params[:mode]}))")
- }
- p_sum
- end
-
end
end
-
-
diff --git a/lib/compound.rb b/lib/compound.rb
index e7b4da0..a2b7e48 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -6,13 +6,15 @@ module OpenTox
# Ruby wrapper for OpenTox Compound Webservices (http://opentox.org/dev/apis/api-1.2/structure).
class Compound
+ include OpenTox
+
attr_accessor :inchi, :uri
# Create compound with optional uri
# @example
- # compound = OpenTox::Compound.new("http://webservices.in-silico.ch/compound/InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"")
+ # compound = Compound.new("http://webservices.in-silico.ch/compound/InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"")
# @param [optional, String] uri Compound URI
- # @return [OpenTox::Compound] Compound
+ # @return [Compound] Compound
def initialize(uri=nil)
@uri = uri
case @uri
@@ -36,9 +38,9 @@ module OpenTox
# Create a compound from smiles string
# @example
- # compound = OpenTox::Compound.from_smiles("c1ccccc1")
+ # compound = Compound.from_smiles("c1ccccc1")
# @param [String] smiles Smiles string
- # @return [OpenTox::Compound] Compound
+ # @return [Compound] Compound
def self.from_smiles(smiles)
c = Compound.new
c.inchi = Compound.smiles2inchi(smiles)
@@ -48,7 +50,7 @@ module OpenTox
# Create a compound from inchi string
# @param [String] smiles InChI string
- # @return [OpenTox::Compound] Compound
+ # @return [Compound] Compound
def self.from_inchi(inchi)
c = Compound.new
c.inchi = inchi
@@ -58,7 +60,7 @@ module OpenTox
# Create a compound from sdf string
# @param [String] smiles SDF string
- # @return [OpenTox::Compound] Compound
+ # @return [Compound] Compound
def self.from_sdf(sdf)
c = Compound.new
c.inchi = Compound.sdf2inchi(sdf)
@@ -68,9 +70,9 @@ module OpenTox
# Create a compound from name. Relies on an external service for name lookups.
# @example
- # compound = OpenTox::Compound.from_name("Benzene")
+ # compound = Compound.from_name("Benzene")
# @param [String] name name can be also an InChI/InChiKey, CAS number, etc
- # @return [OpenTox::Compound] Compound
+ # @return [Compound] Compound
def self.from_name(name)
c = Compound.new
# paranoid URI encoding to keep SMILES charges and brackets
@@ -131,7 +133,7 @@ module OpenTox
# Match a smarts string
# @example
- # compound = OpenTox::Compound.from_name("Benzene")
+ # compound = Compound.from_name("Benzene")
# compound.match?("cN") # returns false
# @param [String] smarts Smarts string
def match?(smarts)
@@ -146,7 +148,7 @@ module OpenTox
# Match an array of smarts strings, returns array with matching smarts
# @example
- # compound = OpenTox::Compound.from_name("Benzene")
+ # compound = Compound.from_name("Benzene")
# compound.match(['cc','cN']) # returns ['cc']
# @param [Array] smarts_array Array with Smarts strings
# @return [Array] Array with matching Smarts strings
@@ -166,7 +168,7 @@ module OpenTox
# Match_hits an array of smarts strings, returns hash with matching smarts as key and number of non-unique hits as value
# @example
- # compound = OpenTox::Compound.from_name("Benzene")
+ # compound = Compound.from_name("Benzene")
# compound.match(['cc','cN']) # returns ['cc']
# @param [Array] smarts_array Array with Smarts strings
# @return [Hash] Hash with matching smarts as key and number of non-unique hits as value
@@ -191,6 +193,43 @@ module OpenTox
return smarts_hits
#smarts_array.collect { |s| s if match?(s)}.compact
end
+
+ # Lookup numerical values, returns hash with feature name as key and value as value
+ # @param [Array] Array of feature names
+ # @param [String] Feature dataset uri
+ # @return [Hash] Hash with feature name as key and value as value
+ def lookup(feature_array,feature_dataset_uri,pc_type)
+ ds = OpenTox::Dataset.find(feature_dataset_uri)
+
+ #entry = ds.data_entries[self.uri]
+ entry = nil
+ ds.data_entries.each { |c_uri, values|
+ if c_uri.split('/compound/').last == self.to_inchi
+ entry = ds.data_entries[self.uri]
+ break
+ end
+ }
+
+ LOGGER.debug "#{entry.size} entries in feature ds for query." unless entry.nil?
+
+ if entry.nil?
+ uri, smiles_to_inchi = OpenTox::Algorithm.get_pc_descriptors({:compounds => [self.uri], :pc_type => pc_type})
+ uri = OpenTox::Algorithm.load_ds_csv(uri, smiles_to_inchi)
+ ds = OpenTox::Dataset.find(uri)
+ entry = ds.data_entries[self.uri]
+ ds.delete
+ end
+
+ features = entry.keys
+ features.each { |feature|
+ new_feature = File.join(feature_dataset_uri, "feature", feature.split("/").last)
+ entry[new_feature] = entry[feature].flatten.first.to_f # see algorithm/lazar.rb:182, to_f because feature type detection doesn't work w 1 entry
+ entry.delete(feature) unless feature == new_feature # e.g. when loading from ambit
+ }
+ #res = feature_array.collect {|v| entry[v]}
+ #LOGGER.debug "----- am #{entry.to_yaml}"
+ entry
+ end
# Get URI of compound image with highlighted fragments
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 3662527..95c1918 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -288,7 +288,7 @@ module OpenTox
# Insert a statement (compound_uri,feature_uri,value)
# @example Insert a statement (compound_uri,feature_uri,value)
- # dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", true
+ # dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", 1
# @param [String] compound Compound URI
# @param [String] feature Compound URI
# @param [Boolean,Float] value Feature value
@@ -315,6 +315,16 @@ module OpenTox
@features[feature] = metadata
end
+ # Complete feature values by adding zeroes
+ def complete_data_entries
+ all_features = @features.keys
+ @data_entries.each { |c, e|
+ (Set.new(all_features.collect)).subtract(Set.new e.keys).to_a.each { |f|
+ self.add(c,f,0)
+ }
+ }
+ end
+
# Add/modify metadata for a feature
# @param [String] feature Feature URI
# @param [Hash] metadata Hash with feature metadata
diff --git a/lib/environment.rb b/lib/environment.rb
index 6a72ba5..c1b8312 100644
--- a/lib/environment.rb
+++ b/lib/environment.rb
@@ -91,5 +91,5 @@ DC = OwlNamespace.new 'http://purl.org/dc/elements/1.1/'
OT = OwlNamespace.new 'http://www.opentox.org/api/1.1#'
OTA = OwlNamespace.new 'http://www.opentox.org/algorithmTypes.owl#'
XSD = OwlNamespace.new 'http://www.w3.org/2001/XMLSchema#'
-BO = OwlNamespace.new 'http://www.blueobelisk.org/ontologies/chemoinformatics-algorithms/#'
+#BO = OwlNamespace.new 'http://www.blueobelisk.org/ontologies/chemoinformatics-algorithms/#'
diff --git a/lib/model.rb b/lib/model.rb
index 0b116c2..b3de1a3 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -102,8 +102,8 @@ module OpenTox
include Algorithm
include Model
- attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :nr_hits, :transform, :conf_stdev, :prediction_min_max
+ attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :subjectid, :value_map, :compound_fingerprints, :feature_calculation_algorithm, :neighbors
def initialize(uri=nil)
if uri
@@ -120,18 +120,11 @@ module OpenTox
@p_values = {}
@fingerprints = {}
@value_map = {}
- @prediction_min_max = []
@feature_calculation_algorithm = "Substructure.match"
@similarity_algorithm = "Similarity.tanimoto"
@prediction_algorithm = "Neighbors.weighted_majority_vote"
- @nr_hits = false
- @min_sim = 0.3
- @prop_kernel = false
- @transform = { "class" => "NOP" }
- @conf_stdev = false
-
end
# Get URIs of all lazar models
@@ -174,19 +167,14 @@ module OpenTox
lazar.feature_calculation_algorithm = hash["feature_calculation_algorithm"] if hash["feature_calculation_algorithm"]
lazar.similarity_algorithm = hash["similarity_algorithm"] if hash["similarity_algorithm"]
lazar.prediction_algorithm = hash["prediction_algorithm"] if hash["prediction_algorithm"]
- lazar.min_sim = hash["min_sim"] if hash["min_sim"]
lazar.subjectid = hash["subjectid"] if hash["subjectid"]
- lazar.prop_kernel = hash["prop_kernel"] if hash["prop_kernel"]
lazar.value_map = hash["value_map"] if hash["value_map"]
- lazar.nr_hits = hash["nr_hits"] if hash["nr_hits"]
- lazar.transform = hash["transform"] if hash["transform"]
- lazar.conf_stdev = hash["conf_stdev"] if hash["conf_stdev"]
- lazar.prediction_min_max = hash["prediction_min_max"] if hash["prediction_min_max"]
+
lazar
end
def to_json
- Yajl::Encoder.encode({:uri => @uri,:metadata => @metadata, :compound => @compound, :prediction_dataset => @prediction_dataset, :features => @features, :effects => @effects, :activities => @activities, :p_values => @p_values, :fingerprints => @fingerprints, :feature_calculation_algorithm => @feature_calculation_algorithm, :similarity_algorithm => @similarity_algorithm, :prediction_algorithm => @prediction_algorithm, :min_sim => @min_sim, :subjectid => @subjectid, :prop_kernel => @prop_kernel, :value_map => @value_map, :nr_hits => @nr_hits, :transform => @transform, :conf_stdev => @conf_stdev, :prediction_min_max => @prediction_min_max})
+ Yajl::Encoder.encode({:uri => @uri,:metadata => @metadata, :compound => @compound, :prediction_dataset => @prediction_dataset, :features => @features, :effects => @effects, :activities => @activities, :p_values => @p_values, :fingerprints => @fingerprints, :feature_calculation_algorithm => @feature_calculation_algorithm, :similarity_algorithm => @similarity_algorithm, :prediction_algorithm => @prediction_algorithm, :subjectid => @subjectid, :value_map => @value_map})
end
def run( params, accept_header=nil, waiting_task=nil )
@@ -230,8 +218,11 @@ module OpenTox
predict(compound_uri,false,subjectid)
count += 1
waiting_task.progress( count/d.compounds.size.to_f*100.0 ) if waiting_task
- rescue => ex
- LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message+" subjectid: #{subjectid}"
+ rescue => e
+ LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+e.message+" subjectid: #{subjectid}"
+ #LOGGER.debug "#{e.class}: #{e.message}"
+ #LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+
end
end
#@prediction_dataset.save(subjectid)
@@ -261,25 +252,41 @@ module OpenTox
if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "regression"
all_activities = []
all_activities = @activities.values.flatten.collect! { |i| i.to_f }
- @prediction_min_max[0] = (all_activities.to_scale.min/2)
- @prediction_min_max[1] = (all_activities.to_scale.max*2)
end
unless database_activity(subjectid) # adds database activity to @prediction_dataset
- neighbors
- prediction = eval("#{@prediction_algorithm} ( { :neighbors => @neighbors,
- :compound => @compound,
- :features => @features,
- :p_values => @p_values,
- :fingerprints => @fingerprints,
- :similarity_algorithm => @similarity_algorithm,
- :prop_kernel => @prop_kernel,
+ # Calculation of needed values for query compound
+ @compound_features = eval("#{@feature_calculation_algorithm}({
+ :compound => @compound,
+ :features => @features,
+ :feature_dataset_uri => @metadata[OT.featureDataset],
+ :pc_type => self.parameter(\"pc_type\")
+ })")
+
+ # Adding fingerprint of query compound with features and values(p_value*nr_hits)
+ @compound_fingerprints = {}
+ @compound_features.each do |feature, value| # value is nil if "Substructure.match"
+ if @feature_calculation_algorithm == "Substructure.match_hits"
+ @compound_fingerprints[feature] = @p_values[feature] * value
+ elsif @feature_calculation_algorithm == "Substructure.match"
+ @compound_fingerprints[feature] = @p_values[feature]
+ elsif @feature_calculation_algorithm == "Substructure.lookup"
+ @compound_fingerprints[feature] = value
+ end
+ end
+
+ # Transform model data to machine learning scheme (tables of data)
+ mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(self)
+ mtf.transform
+
+ # Make a prediction
+ prediction = eval("#{@prediction_algorithm}( { :props => mtf.props,
+ :acts => mtf.acts,
+ :sims => mtf.sims,
:value_map => @value_map,
- :nr_hits => @nr_hits,
- :conf_stdev => @conf_stdev,
- :prediction_min_max => @prediction_min_max,
- :transform => @transform } ) ")
+ :min_train_performance => self.parameter(\"min_train_performance\")
+ } ) ")
value_feature_uri = File.join( @uri, "predicted", "value")
confidence_feature_uri = File.join( @uri, "predicted", "confidence")
@@ -355,44 +362,6 @@ module OpenTox
@prediction_dataset
end
-
-
- # Find neighbors and store them as object variable, access all compounds for that.
- def neighbors
- @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
- @neighbors = []
- @fingerprints.keys.each do |training_compound| # AM: access all compounds
- add_neighbor @fingerprints[training_compound].keys, training_compound
- end
- end
-
- # Adds a neighbor to @neighbors if it passes the similarity threshold.
- def add_neighbor(training_features, training_compound)
- compound_features_hits = {}
- training_compound_features_hits = {}
- if @nr_hits
- compound_features_hits = @compound.match_hits(@compound_features)
- training_compound_features_hits = @fingerprints[training_compound]
- #LOGGER.debug "dv ------------ training_compound_features_hits:#{training_compound_features_hits.class} #{training_compound_features_hits}"
- end
- params = {}
- params[:nr_hits] = @nr_hits
- params[:compound_features_hits] = compound_features_hits
- params[:training_compound_features_hits] = training_compound_features_hits
-
- sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, params)")
- if sim > @min_sim
- @activities[training_compound].each do |act|
- @neighbors << {
- :compound => training_compound,
- :similarity => sim,
- :features => training_features,
- :activity => act
- }
- end
- end
- end
-
# Find database activities and store them in @prediction_dataset
# @return [Boolean] true if compound has databasse activities, false if not
def database_activity(subjectid)
diff --git a/lib/parser.rb b/lib/parser.rb
index 749a41d..56e4fed 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -290,10 +290,11 @@ module OpenTox
@features = []
@feature_types = {}
- @format_errors = ""
- @smiles_errors = []
+ @format_errors = []
+ @id_errors = []
@activity_errors = []
@duplicates = {}
+ @max_class_values = 3
end
def detect_new_values(row, value_maps)
@@ -309,9 +310,10 @@ module OpenTox
# Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)
# @param [Excel] book Excel workbook object (created with roo gem)
# @return [OpenTox::Dataset] Dataset object with Excel data
- def load_spreadsheet(book)
+ def load_spreadsheet(book, drop_missing=false)
book.default_sheet = 0
- add_features book.row(1)
+ headers = book.row(1)
+ add_features headers
value_maps = Array.new
regression_features=Array.new
@@ -319,15 +321,27 @@ module OpenTox
row = book.row(i)
value_maps = detect_new_values(row, value_maps)
value_maps.each_with_index { |vm,j|
- if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer.
+ if vm.size > @max_class_values # 5 is the maximum nr of classes supported by Fminer.
regression_features[j]=true
else
regression_features[j]=false
end
}
}
+
2.upto(book.last_row) { |i|
- add_values book.row(i), regression_features
+ drop=false
+ row = book.row(i)
+ raise "Entry has size #{row.size}, different from headers (#{headers.size})" if row.size != headers.size
+ if row.include?("")
+ @format_errors << "Row #{i} has #{row.count("")} missing values"
+ drop=true
+ drop_missing=true if (row.count("") == row.size-1)
+ end
+ add_values(row, regression_features) unless (drop_missing && drop)
+ if (drop_missing && drop)
+ @format_errors << "Row #{i} not added"
+ end
}
warnings
@dataset
@@ -336,10 +350,11 @@ module OpenTox
# Load CSV string (format specification: http://toxcreate.org/help)
# @param [String] csv CSV representation of the dataset
# @return [OpenTox::Dataset] Dataset object with CSV data
- def load_csv(csv)
+ def load_csv(csv, drop_missing=false)
row = 0
input = csv.split("\n")
- add_features split_row(input.shift)
+ headers = split_row(input.shift)
+ add_features(headers)
value_maps = Array.new
regression_features=Array.new
@@ -347,15 +362,27 @@ module OpenTox
row = split_row(row)
value_maps = detect_new_values(row, value_maps)
value_maps.each_with_index { |vm,j|
- if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer.
+ if vm.size > @max_class_values # max @max_class_values classes.
regression_features[j]=true
else
regression_features[j]=false
end
}
}
- input.each { |row|
- add_values split_row(row), regression_features
+
+ input.each_with_index { |row, i|
+ drop=false
+ row = split_row(row)
+ raise "Entry has size #{row.size}, different from headers (#{headers.size})" if row.size != headers.size
+ if row.include?("")
+ @format_errors << "Row #{i} has #{row.count("")} missing values"
+ drop=true
+ drop_missing=true if (row.count("") == row.size-1)
+ end
+ add_values(row, regression_features) unless (drop_missing && drop)
+ if (drop_missing && drop)
+ @format_errors << "Row #{i} not added"
+ end
}
warnings
@dataset
@@ -367,88 +394,115 @@ module OpenTox
info = ''
@feature_types.each do |feature,types|
- if types.uniq.size > 1
+ if types.uniq.size == 0
+ type = "helper#MissingFeature"
+ elsif types.uniq.size > 1
type = OT.NumericFeature
else
type = types.first
end
@dataset.add_feature_metadata(feature,{RDF.type => [type]})
- info += "\"#{@dataset.feature_name(feature)}\" detected as #{type.split('#').last}."
+ info += "\"#{@dataset.feature_name(feature)}\" detected as #{type.split('#').last}." if type
# TODO: rewrite feature values
- # TODO if value.to_f == 0 @activity_errors << "#{smiles} Zero values not allowed for regression datasets - entry ignored."
+ # TODO if value.to_f == 0 @activity_errors << "#{id} Zero values not allowed for regression datasets - entry ignored."
end
@dataset.metadata[OT.Info] = info
warnings = ''
- warnings += "<p>Incorrect Smiles structures (ignored):</p>" + @smiles_errors.join("<br/>") unless @smiles_errors.empty?
+ warnings += "<p>Incorrect structures (ignored):</p>" + @id_errors.join("<br/>") unless @id_errors.empty?
warnings += "<p>Irregular activities (ignored):</p>" + @activity_errors.join("<br/>") unless @activity_errors.empty?
+ warnings += "<p>Format errors:</p>" + @format_errors.join("<br/>") unless @format_errors.empty?
duplicate_warnings = ''
@duplicates.each {|inchi,lines| duplicate_warnings << "<p>#{lines.join('<br/>')}</p>" if lines.size > 1 }
- warnings += "<p>Duplicated structures (all structures/activities used for model building, please make sure, that the results were obtained from <em>independent</em> experiments):</p>" + duplicate_warnings unless duplicate_warnings.empty?
+ warnings += "<p>Duplicate structures (all structures/activities used for model building, please make sure that the results were obtained from <em>independent</em> experiments):</p>" + duplicate_warnings unless duplicate_warnings.empty?
@dataset.metadata[OT.Warnings] = warnings
end
+ # Adds a row of features to a dataset
+ # @param Array A row split up as an array
+ # @return Array Indices for duplicate features
def add_features(row)
- row.shift # get rid of smiles entry
- row.each do |feature_name|
+ row=row.collect
+ row.shift # get rid of id entry
+ @duplicate_feature_indices = [] # starts with 0 at first f after id
+ row.each_with_index do |feature_name, idx|
feature_uri = File.join(@dataset.uri,"feature",URI.encode(feature_name))
- @feature_types[feature_uri] = []
- @features << feature_uri
- @dataset.add_feature(feature_uri,{DC.title => feature_name})
+ unless @features.include? feature_uri
+ @feature_types[feature_uri] = []
+ @features << feature_uri
+ @dataset.add_feature(feature_uri,{DC.title => feature_name})
+ else
+ @duplicate_feature_indices << idx
+ @format_errors << "Duplicate Feature '#{feature_name}' at pos #{idx}"
+ end
end
end
# Adds a row to a dataset
# @param Array A row split up as an array
# @param Array Indicator for regression for each field
+ # @param Array Indices for duplicate features
def add_values(row, regression_features)
- smiles = row.shift
- compound = Compound.from_smiles(smiles)
+ id = row.shift
+ case id
+ when /InChI/
+ compound = Compound.from_inchi(URI.decode_www_form_component(id))
+ else
+ compound = Compound.from_smiles(id)
+ end
+
if compound.nil? or compound.inchi.nil? or compound.inchi == ""
- @smiles_errors << smiles+", "+row.join(", ")
+ @id_errors << id+", "+row.join(", ")
return false
end
@duplicates[compound.inchi] = [] unless @duplicates[compound.inchi]
- @duplicates[compound.inchi] << smiles+", "+row.join(", ")
+ @duplicates[compound.inchi] << id+", "+row.join(", ")
+ feature_idx = 0
row.each_index do |i|
- value = row[i]
- feature = @features[i]
- type = nil
- if (regression_features[i])
- type = feature_type(value)
- if type != OT.NumericFeature
- raise "Error! Expected numeric values."
+ unless @duplicate_feature_indices.include? i
+
+ value = row[i]
+ #LOGGER.warn "Missing values for #{id}" if value.size == 0 # String is empty
+ feature = @features[feature_idx]
+
+ type = feature_type(value) # May be NIL
+ type = OT.NominalFeature unless (type.nil? || regression_features[i])
+ @feature_types[feature] << type if type
+
+ val = nil
+ case type
+ when OT.NumericFeature
+ val = value.to_f
+ when OT.NominalFeature
+ val = value.to_s
end
- else
- type = OT.NominalFeature
- end
- @feature_types[feature] << type
- case type
- when OT.NumericFeature
- val = value.to_f
- when OT.NominalFeature
- val = value.to_s
- end
- if val!=nil
- @dataset.add(compound.uri, feature, val)
- if type!=OT.NumericFeature
- @dataset.features[feature][OT.acceptValue] = [] unless @dataset.features[feature][OT.acceptValue]
- @dataset.features[feature][OT.acceptValue] << val.to_s unless @dataset.features[feature][OT.acceptValue].include?(val.to_s)
+ feature_idx += 1
+
+ if val != nil
+ @dataset.add(compound.uri, feature, val)
+ if type != OT.NumericFeature
+ @dataset.features[feature][OT.acceptValue] = [] unless @dataset.features[feature][OT.acceptValue]
+ @dataset.features[feature][OT.acceptValue] << val.to_s unless @dataset.features[feature][OT.acceptValue].include?(val.to_s)
+ end
end
+
end
+
end
end
def feature_type(value)
- if OpenTox::Algorithm::numeric? value
+ if value == ""
+ return nil
+ elsif OpenTox::Algorithm::numeric? value
return OT.NumericFeature
else
return OT.NominalFeature
@@ -456,7 +510,7 @@ module OpenTox
end
def split_row(row)
- row.chomp.gsub(/["']/,'').split(/\s*[,;\t]\s*/) # remove quotes
+ row.chomp.gsub(/["']/,'').split(/\s*[,;\t]\s*/,-1) # -1: do not skip empty cells
end
end
@@ -468,6 +522,7 @@ module OpenTox
def initialize
@data = {}
@activity_errors = []
+ @max_class_values = 3
end
def feature_values(feature)
@@ -485,14 +540,14 @@ module OpenTox
def clean_features
ignored_features = []
features.each do |feature|
- if feature_values(feature).size > 5
+ if feature_values(feature).size > @max_class_values
if feature_types(feature).size == 1 and feature_types(feature).first == OT.NumericFeature
# REGRESSION
elsif feature_types(feature).include? OT.NumericFeature
@data.each{|c,row| row[feature] = nil unless OpenTox::Algorithm::numeric?(row[feature]) } # delete nominal features
@activity_errors << "Nominal feature values of #{feature} ignored (using numeric features for regression models)."
else
- @activity_errors << "Feature #{feature} ignored (more than 5 nominal feature values and no numeric values)."
+ @activity_errors << "Feature #{feature} ignored (more than #{@max_class_values} nominal feature values and no numeric values)."
ignored_features << feature
next
end
@@ -543,12 +598,15 @@ module OpenTox
private
def feature_type(value)
- if OpenTox::Algorithm::numeric? value
+ if value.nil?
+ return nil
+ elsif OpenTox::Algorithm::numeric? value
return OT.NumericFeature
else
return OT.NominalFeature
end
end
+
end
# quick hack to enable sdf import via csv
@@ -589,20 +647,20 @@ module OpenTox
@duplicates[inchi] << rec #inchi#+", "+row.join(", ")
compound = Compound.from_inchi inchi
rescue
- @compound_errors << "Could not convert structure to InChI, all entries for this compound (record #{rec} have been ignored! \n#{s}"
+ @compound_errors << "Could not convert structure to InChI, all entries for this compound (record #{rec}) have been ignored! \n#{s}"
next
end
row = {}
obmol.get_data.each { |d| row[d.get_attribute] = d.get_value if properties.include?(d.get_attribute) }
table.data[compound.uri] = row
end
-
- # finda and remove ignored_features
+
+ # find and remove ignored_features
@activity_errors = table.clean_features
table.add_to_dataset @dataset
warnings = ''
- warnings += "<p>Incorrect Smiles structures (ignored):</p>" + @compound_errors.join("<br/>") unless @compound_errors.empty?
+ warnings += "<p>Incorrect structures (ignored):</p>" + @compound_errors.join("<br/>") unless @compound_errors.empty?
warnings += "<p>Irregular activities (ignored):</p>" + @activity_errors.join("<br/>") unless @activity_errors.empty?
duplicate_warnings = ''
@duplicates.each {|inchi,lines| duplicate_warnings << "<p>#{lines.join('<br/>')}</p>" if lines.size > 1 }
diff --git a/lib/rest_client_wrapper.rb b/lib/rest_client_wrapper.rb
index 6d25bb3..fcadebb 100644
--- a/lib/rest_client_wrapper.rb
+++ b/lib/rest_client_wrapper.rb
@@ -70,7 +70,7 @@ module OpenTox
begin
#LOGGER.debug "RestCall: "+rest_call.to_s+" "+uri.to_s+" "+headers.inspect+" "+payload.inspect
- resource = RestClient::Resource.new(uri,{:timeout => 60})
+ resource = RestClient::Resource.new(uri,{:timeout => 600})
if rest_call=="post" || rest_call=="put"
result = resource.send(rest_call, payload, headers)
else
diff --git a/lib/serializer.rb b/lib/serializer.rb
index a010ce5..30cb2ba 100644
--- a/lib/serializer.rb
+++ b/lib/serializer.rb
@@ -55,7 +55,7 @@ module OpenTox
OT.predictedVariables => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
- #object props for validation#
+ #object props for validation#
OT.model => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
OT.trainingDataset => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
OT.predictionFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
@@ -87,7 +87,7 @@ module OpenTox
OT.percentageCompleted => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.acceptValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
- # annotation props for validation
+ # annotation props for validation
OT.numUnpredicted => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.crossvalidationFold => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.numInstances => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
@@ -143,8 +143,8 @@ module OpenTox
@data_entries = {}
@values_id = 0
@parameter_id = 0
-
- @classes = Set.new
+
+ @classes = Set.new
@object_properties = Set.new
@annotation_properties = Set.new
@datatype_properties = Set.new
@@ -208,7 +208,7 @@ module OpenTox
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Task }] }
add_metadata uri, metadata
end
-
+
# Add a resource defined by resource_class and content
# (see documentation of add_content for example)
# @param [String] uri of resource
@@ -223,10 +223,10 @@ module OpenTox
def add_uri(uri,type)
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => type }] }
end
-
+
private
@@content_id = 1
-
+
#Recursiv function to add content
#@example
# { DC.description => "bla",
@@ -244,7 +244,7 @@ module OpenTox
hash.each do |u,v|
if v.is_a? Hash
# value is again a hash, i.e. a new owl class is added
- # first make sure type (==class) is set
+ # first make sure type (==class) is set
type = v[RDF.type]
raise "type missing for "+u.to_s+" content:\n"+v.inspect unless type
raise "class unknown "+type.to_s+" (for "+u.to_s+")" unless @object.has_key?(type)
@@ -256,7 +256,7 @@ module OpenTox
# add content to new class
add_content(genid,v)
elsif v.is_a? Array
- # value is an array, i.e. a list of values with property is added
+ # value is an array, i.e. a list of values with property is added
v.each{ |vv| add_content( uri, { u => vv } ) }
else # v.is_a? String
# simple string value
@@ -268,7 +268,7 @@ module OpenTox
end
end
end
-
+
public
# Add metadata
@@ -329,7 +329,7 @@ module OpenTox
v = [{ "type" => "uri", "value" => value}]
when "literal"
v = [{ "type" => "literal", "value" => value, "datatype" => datatype(value) }]
- else
+ else
raise "Illegal type #{type(value)} for #{value}."
end
@object[values] = {
@@ -342,7 +342,7 @@ module OpenTox
end
# Serializers
-
+
# Convert to N-Triples
# @return [text/plain] Object OWL-DL in N-Triples format
def to_ntriples
@@ -353,7 +353,7 @@ module OpenTox
entry.each do |p,objects|
p = url(p)
objects.each do |o|
- case o["type"]
+ case o["type"]
when "uri"
o = url(o["value"])
when "literal"
@@ -371,9 +371,15 @@ module OpenTox
# Convert to RDF/XML
# @return [text/plain] Object OWL-DL in RDF/XML format
def to_rdfxml
- Tempfile.open("owl-serializer"){|f| f.write(self.to_ntriples); @path = f.path}
+ tmpf = Tempfile.open("owl-serializer")
+ tmpf.write(self.to_ntriples)
+ tmpf.flush
+ @path = tmpf.path
# TODO: add base uri for ist services
- `rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:ota="#{OTA.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null`
+ res=`rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:ota="#{OTA.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null`
+ tmpf.close
+ tmpf.delete
+ res
end
# Convert to JSON as specified in http://n2.talis.com/wiki/RDF_JSON_Specification
@@ -427,20 +433,20 @@ module OpenTox
end
def literal(value,type)
- # concat and << are faster string concatination operators than +
+ # concat and << are faster string concatination operators than +
'"'.concat(value.to_s).concat('"^^<').concat(type).concat('>')
end
def url(uri)
- # concat and << are faster string concatination operators than +
+ # concat and << are faster string concatination operators than +
'<'.concat(uri).concat('>')
end
def rdf_types
- @classes.each { |c| @object[c] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } }
- @object_properties.each { |p| @object[p] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['ObjectProperty'] }] } }
- @annotation_properties.each { |a| @object[a] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['AnnotationProperty'] }] } }
- @datatype_properties.each { |d| @object[d] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['DatatypeProperty'] }] } }
+ @classes.each { |c| @object[c] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } }
+ @object_properties.each { |p| @object[p] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['ObjectProperty'] }] } }
+ @annotation_properties.each { |a| @object[a] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['AnnotationProperty'] }] } }
+ @datatype_properties.each { |d| @object[d] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['DatatypeProperty'] }] } }
end
end
@@ -457,27 +463,38 @@ module OpenTox
@rows.first << features
@rows.first.flatten!
dataset.data_entries.each do |compound,entries|
- smiles = Compound.new(compound).to_smiles
+ cmpd = Compound.new(compound)
+ smiles = cmpd.to_smiles
+ inchi = URI.encode_www_form_component(cmpd.to_inchi)
+ row_container = Array.new
row = Array.new(@rows.first.size)
- row[0] = smiles
+ row_container << row
+ #row[0] = smiles
+ row[0] = inchi
entries.each do |feature, values|
i = features.index(feature)+1
values.each do |value|
- if row[i]
- row[i] = "#{row[i]} #{value}" # multiple values
+ if row_container[0][i]
+ #LOGGER.debug "Feature '#{feature}' (nr '#{i}'): '#{value}'"
+ row_container << row_container.last.collect
+ row_container.last[i] = value
+ #LOGGER.debug "RC: #{row_container.to_yaml}"
else
- row[i] = value
+ row_container.each { |r| r[i] = value }
end
end
end
- @rows << row
+ row_container.each { |r| @rows << r }
end
end
# Convert to CSV string
# @return [String] CSV string
def to_csv
- @rows.collect{|r| r.join(", ")}.join("\n")
+ rows = @rows.collect
+ result = ""
+ result << rows.shift.collect { |f| f.split('/').last }.join(",") << "\n" # only feature name
+ result << rows.collect{ |r| r.join(",") }.join("\n")
end
# Convert to spreadsheet workbook
diff --git a/lib/transform.rb b/lib/transform.rb
new file mode 100644
index 0000000..8fe1093
--- /dev/null
+++ b/lib/transform.rb
@@ -0,0 +1,520 @@
+module OpenTox
+ module Transform
+ # Uses Statsample Library (http://ruby-statsample.rubyforge.org/) by C. Bustos
+
+ # LogAutoScaler for GSL vectors.
+ # Take log and scale.
+ class LogAutoScale
+ attr_accessor :vs, :offset, :autoscaler
+
+ # @param [GSL::Vector] Values to transform using LogAutoScaling.
+ def initialize values
+ @distance_to_zero = 1.0
+ begin
+ raise "Cannot transform, values empty." if values.size==0
+ vs = values.clone
+ @offset = vs.min - @distance_to_zero
+ @autoscaler = OpenTox::Transform::AutoScale.new mvlog(vs)
+ @vs = @autoscaler.vs
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ end
+
+ # @param [GSL::Vector] values to restore.
+ # @return [GSL::Vector] transformed values.
+ def restore values
+ begin
+ raise "Cannot transform, values empty." if values.size==0
+ vs = values.clone
+ rv = @autoscaler.restore(vs)
+ rv.to_a.collect { |v| (10**v) + @offset }.to_gv
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ end
+
+ # @param [GSL::Vector] values to transform.
+ # @return [GSL::Vector] transformed values.
+ def mvlog values
+ values.to_a.collect { |v| Math::log10(v - @offset) }.to_gv
+ end
+
+ end
+
+
+ # Auto-Scaler for GSL vectors.
+ # Center on mean and divide by standard deviation.
+ class AutoScale
+ attr_accessor :vs, :mean, :stdev
+
+ # @param [GSL::Vector] values to transform using AutoScaling.
+ def initialize values
+ begin
+ raise "Cannot transform, values empty." if values.size==0
+ vs = values.clone
+ @mean = vs.to_scale.mean
+ @stdev = vs.to_scale.standard_deviation_population
+ @stdev = 0.0 if @stdev.nan?
+ @vs = transform vs
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ end
+
+ # @param [GSL::Vector] values to transform.
+ # @return [GSL::Vector] transformed values.
+ def transform values
+ begin
+ raise "Cannot transform, values empty." if values.size==0
+ autoscale values.clone
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ end
+
+ # @param [GSL::Vector] Values to restore.
+ # @return [GSL::Vector] transformed values.
+ def restore values
+ begin
+ raise "Cannot transform, values empty." if values.size==0
+ rv_ss = values.clone.to_scale * @stdev unless @stdev == 0.0
+ (rv_ss + @mean).to_gsl
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ end
+
+ # @param [GSL::Vector] values to transform.
+ # @return [GSL::Vector] transformed values.
+ def autoscale values
+ vs_ss = values.clone.to_scale - @mean
+ @stdev == 0.0 ? vs_ss.to_gsl : ( vs_ss * ( 1 / @stdev) ).to_gsl
+ end
+
+ end
+
+
+ # Principal Components Analysis.
+ class PCA
+ attr_accessor :data_matrix, :data_transformed_matrix, :eigenvector_matrix, :eigenvalue_sums, :autoscaler
+
+ # Creates a transformed dataset as GSL::Matrix.
+ #
+ # @param [GSL::Matrix] Data matrix.
+ # @param [Float] Compression ratio from [0,1], default 0.05.
+ # @return [GSL::Matrix] Data transformed matrix.
+ def initialize data_matrix, compression=0.05, maxcols=(1.0/0.0)
+ begin
+ @data_matrix = data_matrix.clone
+ @compression = compression.to_f
+ @mean = Array.new
+ @autoscaler = Array.new
+ @cols = Array.new
+ @maxcols = maxcols
+
+ # Objective Feature Selection
+ raise "Error! PCA needs at least two dimensions." if data_matrix.size2 < 2
+ @data_matrix_selected = nil
+ (0..@data_matrix.size2-1).each { |i|
+ if !Algorithm::zero_variance?(@data_matrix.col(i).to_a)
+ if @data_matrix_selected.nil?
+ @data_matrix_selected = GSL::Matrix.alloc(@data_matrix.size1, 1)
+ @data_matrix_selected.col(0)[0..@data_matrix.size1-1] = @data_matrix.col(i)
+ else
+ @data_matrix_selected = @data_matrix_selected.horzcat(GSL::Matrix.alloc(@data_matrix.col(i).to_a,@data_matrix.size1, 1))
+ end
+ @cols << i
+ end
+ }
+ raise "Error! PCA needs at least two dimensions." if (@data_matrix_selected.nil? || @data_matrix_selected.size2 < 2)
+
+ # PCA uses internal centering on 0
+ @data_matrix_scaled = GSL::Matrix.alloc(@data_matrix_selected.size1, @cols.size)
+ (0..@cols.size-1).each { |i|
+ as = OpenTox::Transform::AutoScale.new(@data_matrix_selected.col(i))
+ @data_matrix_scaled.col(i)[0..@data_matrix.size1-1] = as.vs * as.stdev # re-adjust by stdev
+ @mean << as.mean
+ @autoscaler << as
+ }
+
+ # PCA
+ data_matrix_hash = Hash.new
+ (0..@cols.size-1).each { |i|
+ column_view = @data_matrix_scaled.col(i)
+ data_matrix_hash[i] = column_view.to_scale
+ }
+ dataset_hash = data_matrix_hash.to_dataset # see http://goo.gl/7XcW9
+ cor_matrix=Statsample::Bivariate.correlation_matrix(dataset_hash)
+ pca=Statsample::Factor::PCA.new(cor_matrix)
+
+ # Select best eigenvectors
+ pca.eigenvalues.each { |ev| raise "PCA failed!" unless !ev.nan? }
+ @eigenvalue_sums = Array.new
+ (0..@cols.size-1).each { |i|
+ @eigenvalue_sums << pca.eigenvalues[0..i].inject{ |sum, ev| sum + ev }
+ }
+ eigenvectors_selected = Array.new
+ pca.eigenvectors.each_with_index { |ev, i|
+ if (@eigenvalue_sums[i] <= ((1.0-@compression)*@cols.size)) || (eigenvectors_selected.size == 0)
+ eigenvectors_selected << ev.to_a unless @maxcols <= eigenvectors_selected.size
+ end
+ }
+ @eigenvector_matrix = GSL::Matrix.alloc(eigenvectors_selected.flatten, eigenvectors_selected.size, @cols.size).transpose
+ @data_transformed_matrix = (@eigenvector_matrix.transpose * @data_matrix_scaled.transpose).transpose
+
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ end
+
+ # Transforms data to feature space found by PCA.
+ #
+ # @param [GSL::Matrix] Data matrix.
+ # @return [GSL::Matrix] Transformed data matrix.
+ def transform values
+ begin
+ vs = values.clone
+ raise "Error! Too few columns for transformation." if vs.size2 < @cols.max
+ data_matrix_scaled = GSL::Matrix.alloc(vs.size1, @cols.size)
+ @cols.each_with_index { |i,j|
+ data_matrix_scaled.col(j)[0..data_matrix_scaled.size1-1] = @autoscaler[j].transform(vs.col(i).to_a) * @autoscaler[j].stdev
+ }
+ (@eigenvector_matrix.transpose * data_matrix_scaled.transpose).transpose
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ end
+
+ # Restores data in the original feature space (possibly with compression loss).
+ #
+ # @param [GSL::Matrix] Transformed data matrix.
+ # @return [GSL::Matrix] Data matrix.
+ def restore
+ begin
+ data_matrix_restored = (@eigenvector_matrix * @data_transformed_matrix.transpose).transpose # reverse pca
+ # reverse scaling
+ (0..@cols.size-1).each { |i|
+ data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] += @mean[i]
+ }
+ data_matrix_restored
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ end
+
+ end
+
+
+ # Singular Value Decomposition
+ class SVD
+ attr_accessor :data_matrix, :compression, :data_transformed_matrix, :uk, :vk, :eigk, :eigk_inv
+
+ # Creates a transformed dataset as GSL::Matrix.
+ #
+ # @param [GSL::Matrix] Data matrix
+ # @param [Float] Compression ratio from [0,1], default 0.05
+ # @return [GSL::Matrix] Data transformed matrix
+
+ def initialize data_matrix, compression=0.05
+ begin
+ @data_matrix = data_matrix.clone
+ @compression = compression
+
+ # Compute the SV Decomposition X=USV
+ # vt is *not* the transpose of V here, but V itself (see http://goo.gl/mm2xz)!
+ u, vt, s = data_matrix.SV_decomp
+
+ # Determine cutoff index
+ s2 = s.mul(s) ; s2_sum = s2.sum
+ s2_run = 0
+ k = s2.size - 1
+ s2.to_a.reverse.each { |v|
+ s2_run += v
+ frac = s2_run / s2_sum
+ break if frac > compression
+ k -= 1
+ }
+ k += 1 if k == 0 # avoid uni-dimensional (always cos sim of 1)
+
+ # Take the k-rank approximation of the Matrix
+ # - Take first k columns of u
+ # - Take first k columns of vt
+ # - Take the first k eigenvalues
+ @uk = u.submatrix(nil, (0..k)) # used to transform column format data
+ @vk = vt.submatrix(nil, (0..k)) # used to transform row format data
+ s = GSL::Matrix.diagonal(s)
+ @eigk = s.submatrix((0..k), (0..k))
+ @eigk_inv = @eigk.inv
+
+ # Transform data
+ @data_transformed_matrix = @uk # = u for all SVs
+ # NOTE: @data_transformed_matrix is also equal to
+ # @data_matrix * @vk * @eigk_inv
+
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ end
+
+
+ # Transforms data instance (1 row) to feature space found by SVD.
+ #
+ # @param [GSL::Matrix] Data matrix (1 x m).
+ # @return [GSL::Matrix] Transformed data matrix.
+ def transform_instance values
+ begin
+ values * @vk * @eigk_inv
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ end
+ alias :transform :transform_instance # make this the default (see PCA interface)
+
+ # Transforms data feature (1 column) to feature space found by SVD.
+ #
+ # @param [GSL::Matrix] Data matrix (1 x n).
+ # @return [GSL::Matrix] Transformed data matrix.
+ def transform_feature values
+ begin
+ values * @uk * @eigk_inv
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ end
+
+
+ # Restores data in the original feature space (possibly with compression loss).
+ #
+ # @param [GSL::Matrix] Transformed data matrix.
+ # @return [GSL::Matrix] Data matrix.
+ def restore
+ begin
+ @data_transformed_matrix * @eigk * @vk.transpose # reverse svd
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ end
+
+
+ end
+
+
+
+ # Attaches transformations to an OpenTox::Model
+ # Stores props, sims, performs similarity calculations
+ class ModelTransformer
+ attr_accessor :model, :similarity_algorithm, :acts, :sims
+
+ # @params[OpenTox::Model] model to transform
+ def initialize model
+ @model = model
+ @similarity_algorithm = @model.similarity_algorithm
+ end
+
+ def transform
+ get_matrices # creates @n_prop, @q_prop, @acts from ordered fps
+ @ids = (0..((@n_prop.length)-1)).to_a # surviving compounds; become neighbors
+
+ # Preprocessing
+ if (@model.similarity_algorithm == "Similarity.cosine")
+ # truncate nil-columns and -rows
+ LOGGER.debug "O: #{@n_prop.size}x#{@n_prop[0].size}; R: #{@q_prop.size}"
+ while @q_prop.size>0
+ idx = @q_prop.index(nil)
+ break if idx.nil?
+ @q_prop.slice!(idx)
+ @n_prop.each { |r| r.slice!(idx) }
+ end
+ LOGGER.debug "Q: #{@n_prop.size}x#{@n_prop[0].size}; R: #{@q_prop.size}"
+ remove_nils # removes nil cells (for cosine); alters @n_props, @q_props, cuts down @ids to survivors
+ LOGGER.debug "M: #{@n_prop.size}x#{@n_prop[0].size}; R: #{@q_prop.size}"
+
+ # adjust rest
+ fps_tmp = []; @ids.each { |idx| fps_tmp << @fps[idx] }; @fps = fps_tmp
+ cmpds_tmp = []; @ids.each { |idx| cmpds_tmp << @cmpds[idx] }; @cmpds = cmpds_tmp
+ acts_tmp = []; @ids.each { |idx| acts_tmp << @acts[idx] }; @acts = acts_tmp
+
+ # scale and svd
+ nr_cases, nr_features = @n_prop.size, @n_prop[0].size
+ gsl_n_prop = GSL::Matrix.alloc(@n_prop.flatten, nr_cases, nr_features); gsl_n_prop_orig = gsl_n_prop.clone # make backup
+ gsl_q_prop = GSL::Matrix.alloc(@q_prop.flatten, 1, nr_features); gsl_q_prop_orig = gsl_q_prop.clone # make backup
+ (0...nr_features).each { |i|
+ autoscaler = OpenTox::Transform::AutoScale.new(gsl_n_prop.col(i))
+ gsl_n_prop.col(i)[0..nr_cases-1] = autoscaler.vs
+ gsl_q_prop.col(i)[0..0] = autoscaler.transform gsl_q_prop.col(i)
+ }
+ svd = OpenTox::Algorithm::Transform::SVD.new(gsl_n_prop, 0.0)
+ @n_prop = svd.data_transformed_matrix.to_a
+ @q_prop = svd.transform(gsl_q_prop).row(0).to_a
+ LOGGER.debug "S: #{@n_prop.size}x#{@n_prop[0].size}; R: #{@q_prop.size}"
+ else
+ convert_nils # convert nil cells (for tanimoto); leave @n_props, @q_props, @ids untouched
+ end
+
+ # neighbor calculation
+ @ids = [] # surviving compounds become neighbors
+ @sims = [] # calculated by neighbor routine
+ neighbors
+ n_prop_tmp = []; @ids.each { |idx| n_prop_tmp << @n_prop[idx] }; @n_prop = n_prop_tmp # select neighbors from matrix
+ acts_tmp = []; @ids.each { |idx| acts_tmp << @acts[idx] }; @acts = acts_tmp
+
+
+ # Sims between neighbors, if necessary
+ gram_matrix = []
+ if !@model.parameter("propositionalized") # need gram matrix for standard setting (n. prop.)
+ @n_prop.each_index do |i|
+ gram_matrix[i] = [] unless gram_matrix[i]
+ @n_prop.each_index do |j|
+ if (j>i)
+ sim = eval("OpenTox::Algorithm::#{@similarity_algorithm}(@n_prop[i], @n_prop[j])")
+ gram_matrix[i][j] = sim
+ gram_matrix[j] = [] unless gram_matrix[j]
+ gram_matrix[j][i] = gram_matrix[i][j]
+ end
+ end
+ gram_matrix[i][i] = 1.0
+ end
+ end
+
+ # reclaim original data (if svd was performed)
+ if svd
+ @n_prop = gsl_n_prop_orig.to_a
+ n_prop_tmp = []; @ids.each { |idx| n_prop_tmp << @n_prop[idx] }; @n_prop = n_prop_tmp
+ @q_prop = gsl_q_prop_orig.row(0).to_a
+ end
+
+ LOGGER.debug "F: #{@n_prop.size}x#{@n_prop[0].size}; R: #{@q_prop.size}"
+ LOGGER.debug "Sims: #{@sims.size}, Acts: #{@acts.size}"
+
+ @sims = [ gram_matrix, @sims ]
+
+ end
+
+
+
+
+ # Find neighbors and store them as object variable, access all compounds for that.
+ def neighbors
+ @model.neighbors = []
+ @n_prop.each_with_index do |fp, idx| # AM: access all compounds
+ add_neighbor fp, idx
+ end
+ end
+
+
+ # Adds a neighbor to @neighbors if it passes the similarity threshold
+ # adjusts @ids to signal the
+ def add_neighbor(training_props, idx)
+
+ sim = similarity(training_props)
+ if sim > @model.parameter("min_sim")
+ if @model.activities[@cmpds[idx]]
+ @model.activities[@cmpds[idx]].each do |act|
+ @model.neighbors << {
+ :compound => @cmpds[idx],
+ :similarity => sim,
+ :features => @fps[idx].keys,
+ :activity => act
+ }
+ @sims << sim
+ @ids << idx
+ end
+ end
+ end
+ end
+
+
+ # Removes nil entries from n_prop and q_prop.
+ # Matrix is a nested two-dimensional array.
+ # Removes iteratively rows or columns with the highest fraction of nil entries, until all nil entries are removed.
+ # Tie break: columns take precedence.
+ # Deficient input such as [[nil],[nil]] will not be completely reduced, as the algorithm terminates if any matrix dimension (x or y) is zero.
+ # Enables the use of cosine similarity / SVD
+ def remove_nils
+ return @n_prop if (@n_prop.length == 0 || @n_prop[0].length == 0)
+ col_nr_nils = (Matrix.rows(@n_prop)).column_vectors.collect{ |cv| (cv.to_a.count(nil) / cv.size.to_f) }
+ row_nr_nils = (Matrix.rows(@n_prop)).row_vectors.collect{ |rv| (rv.to_a.count(nil) / rv.size.to_f) }
+ m_cols = col_nr_nils.max
+ m_rows = row_nr_nils.max
+ idx_cols = col_nr_nils.index(m_cols)
+ idx_rows = row_nr_nils.index(m_rows)
+ while ((m_cols > 0) || (m_rows > 0)) do
+ if m_cols >= m_rows
+ @n_prop.each { |row| row.slice!(idx_cols) }
+ @q_prop.slice!(idx_cols)
+ else
+ @n_prop.slice!(idx_rows)
+ @ids.slice!(idx_rows)
+ end
+ break if (@n_prop.length == 0) || (@n_prop[0].length == 0)
+ col_nr_nils = Matrix.rows(@n_prop).column_vectors.collect{ |cv| (cv.to_a.count(nil) / cv.size.to_f) }
+ row_nr_nils = Matrix.rows(@n_prop).row_vectors.collect{ |rv| (rv.to_a.count(nil) / rv.size.to_f) }
+ m_cols = col_nr_nils.max
+ m_rows = row_nr_nils.max
+ idx_cols= col_nr_nils.index(m_cols)
+ idx_rows = row_nr_nils.index(m_rows)
+ end
+ end
+
+
+ # Replaces nils by zeroes in n_prop and q_prop
+ # Enables the use of Tanimoto similarities with arrays (rows of n_prop and q_prop)
+ def convert_nils
+ @n_prop.each { |row| row.collect! { |v| v.nil? ? 0 : v } }
+ @q_prop.collect! { |v| v.nil? ? 0 : v }
+ end
+
+
+ # Executes model similarity_algorithm
+ def similarity(training_props)
+ eval("OpenTox::Algorithm::#{@model.similarity_algorithm}(training_props, @q_prop)")
+ end
+
+
+ # Converts fingerprints to matrix, order of rows by fingerprints. nil values allowed.
+ # Same for compound fingerprints.
+ def get_matrices
+
+ @cmpds = []; @fps = []; @acts = []; @n_prop = []; @q_prop = []
+
+ @model.fingerprints.each { |fp|
+ cmpd = fp[0]; fp = fp[1]
+ if @model.activities[cmpd] # row good
+ acts = @model.activities[cmpd]; @acts += acts
+ LOGGER.debug "#{acts.size} activities for '#{cmpd}'" if acts.size > 1
+ row = []; @model.features.each { |f| row << fp[f] } # nils for non-existent f's
+ acts.size.times { # multiple additions for multiple activities
+ @n_prop << row.collect
+ @cmpds << cmpd
+ @fps << Marshal.load(Marshal.dump(fp))
+ }
+ else
+ LOGGER.warn "No activity found for compound '#{cmpd}' in model '#{@model.uri}'"
+ end
+ }
+
+ @model.features.each { |f| @q_prop << @model.compound_fingerprints[f] } # query structure
+
+ end
+
+ def props
+ @model.parameter("propositionalized") ? [ @n_prop, @q_prop ] : nil
+ end
+
+ end
+
+ end
+end
diff --git a/lib/utils.rb b/lib/utils.rb
new file mode 100644
index 0000000..c9e9eca
--- /dev/null
+++ b/lib/utils.rb
@@ -0,0 +1,372 @@
+require 'csv'
+
+
+module OpenTox
+
+ module Algorithm
+
+ include OpenTox
+
+ # Calculate physico-chemical descriptors.
+ # @param[Hash] Required keys: :dataset_uri, :pc_type
+ # @return[String] dataset uri
+
+ def self.pc_descriptors(params)
+
+ begin
+ ds = OpenTox::Dataset.find(params[:dataset_uri])
+ compounds = ds.compounds.collect
+ ambit_result_uri, smiles_to_inchi = get_pc_descriptors( { :compounds => compounds, :pc_type => params[:pc_type] } )
+ #ambit_result_uri = ["http://apps.ideaconsult.net:8080/ambit2/dataset/987103?" ,"feature_uris[]=http%3A%2F%2Fapps.ideaconsult.net%3A8080%2Fambit2%2Ffeature%2F4276789&", "feature_uris[]=http%3A%2F%2Fapps.ideaconsult.net%3A8080%2Fambit2%2Fmodel%2F16%2Fpredicted"] # for testing
+ LOGGER.debug "Ambit result uri for #{params.inspect}: '#{ambit_result_uri.to_yaml}'"
+ load_ds_csv(ambit_result_uri, smiles_to_inchi)
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+
+ end
+
+ # Calculates PC descriptors via Ambit -- DO NOT OVERLOAD Ambit.
+ # @param[Hash] Required keys: :compounds, :pc_type
+ # @return[Array] Ambit result uri, piecewise (1st: base, 2nd: SMILES, 3rd+: features
+ def self.get_pc_descriptors(params)
+
+ begin
+
+ ambit_ds_service_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/"
+ ambit_mopac_model_uri = "http://apps.ideaconsult.net:8080/ambit2/model/69632"
+ descs = YAML::load_file( File.join(ENV['HOME'], ".opentox", "config", "ambit_descriptors.yaml") )
+ descs_uris = []
+ params[:pc_type] = "electronic,cpsa" if params[:pc_type].nil? # rescue missing pc_type
+ types = params[:pc_type].split(",")
+ descs.each { |uri, cat_name|
+ if types.include? cat_name[:category]
+ descs_uris << uri
+ end
+ }
+ if descs_uris.size == 0
+ raise "Error! Empty set of descriptors. Did you supply one of [geometrical, topological, electronic, constitutional, hybrid, cpsa] ?"
+ end
+ #LOGGER.debug "Ambit descriptor URIs: #{descs_uris.join(", ")}"
+
+ begin
+ # Create SMI
+ smiles_array = []; smiles_to_inchi = {}
+ params[:compounds].each do |n|
+ cmpd = OpenTox::Compound.new(n)
+ smiles_string = cmpd.to_smiles
+ smiles_to_inchi[smiles_string] = URI.encode_www_form_component(cmpd.to_inchi)
+ smiles_array << smiles_string
+ end
+ smi_file = Tempfile.open(['pc_ambit', '.csv'])
+ pc_descriptors = nil
+
+ # Create Ambit dataset
+ smi_file.puts( "SMILES\n" )
+ smi_file.puts( smiles_array.join("\n") )
+ smi_file.flush
+ ambit_ds_uri = OpenTox::RestClientWrapper.post(ambit_ds_service_uri, {:file => File.new(smi_file.path)}, {:content_type => "multipart/form-data", :accept => "text/uri-list"} )
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ ensure
+ smi_file.close! if smi_file
+ end
+ ambit_smiles_uri = OpenTox::RestClientWrapper.get(ambit_ds_uri + "/features", {:accept=> "text/uri-list"} ).chomp
+
+ # Calculate 3D for CPSA
+ if types.include? "cpsa"
+ ambit_ds_mopac_uri = OpenTox::RestClientWrapper.post(ambit_mopac_model_uri, {:dataset_uri => ambit_ds_uri}, {:accept => "text/uri-list"} )
+ LOGGER.debug "MOPAC dataset: #{ambit_ds_mopac_uri }"
+ end
+
+ # Get Ambit results
+ ambit_result_uri = [] # 1st pos: base uri, then features
+ ambit_result_uri << ambit_ds_uri + "?"
+ ambit_result_uri << ("feature_uris[]=" + URI.encode_www_form_component(ambit_smiles_uri) + "&")
+ descs_uris.each_with_index do |uri, i|
+ algorithm = Algorithm::Generic.new(uri)
+ result_uri = algorithm.run({:dataset_uri => ambit_ds_uri})
+ ambit_result_uri << result_uri.split("?")[1] + "&"
+ LOGGER.debug "Ambit (#{descs_uris.size}): #{i+1}"
+ end
+ #LOGGER.debug "Ambit result: #{ambit_result_uri.join('')}"
+ [ ambit_result_uri, smiles_to_inchi ]
+
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ end
+
+
+ # Load dataset via CSV
+ # @param[Array] Ambit result uri, piecewise (1st: base, 2nd: SMILES, 3rd+: features
+ # @return[String] dataset uri
+ def self.load_ds_csv(ambit_result_uri, smiles_to_inchi)
+
+ master=nil
+ (1...ambit_result_uri.size).collect { |idx|
+ curr_uri = ambit_result_uri[0] + ambit_result_uri[idx]
+ LOGGER.debug "Requesting #{curr_uri}"
+ csv_data = CSV.parse( OpenTox::RestClientWrapper.get(curr_uri, {:accept => "text/csv"}) )
+ if csv_data[0] && csv_data[0].size>1
+ if master.nil? # This is the smiles entry
+ (1...csv_data.size).each{ |idx| csv_data[idx][1] = smiles_to_inchi[csv_data[idx][1]] }
+ master = csv_data
+ next
+ else
+ index_uri = csv_data[0].index("SMILES")
+ csv_data.map {|i| i.delete_at(index_uri)} if index_uri #Removes additional SMILES information
+
+ nr_cols = (csv_data[0].size)-1
+ LOGGER.debug "Merging #{nr_cols} new columns"
+ master.each {|row| nr_cols.times { row.push(nil) } } # Adds empty columns to all rows
+ csv_data.each do |row|
+ temp = master.assoc(row[0]) # Finds the appropriate line in master
+ ((-1*nr_cols)..-1).collect.each { |idx|
+ temp[idx] = row[nr_cols+idx+1] if temp # Updates columns if line is found
+ }
+ end
+ end
+ end
+ }
+
+ index_uri = master[0].index("Compound")
+ master.map {|i| i.delete_at(index_uri)}
+ master[0].each {|cell| cell.chomp!(" ")}
+ master[0][0] = "Compound" #"SMILES"
+ index_smi = master[0].index("SMILES")
+ master.map {|i| i.delete_at(index_smi)} if index_smi
+ #master[0][0] = "SMILES"
+
+ #LOGGER.debug "-------- AM: Writing to dumpfile"
+ #File.open("/tmp/test.csv", 'w') {|f| f.write( master.collect {|r| r.join(",")}.join("\n") ) }
+
+ parser = OpenTox::Parser::Spreadsheets.new
+ ds = OpenTox::Dataset.new
+ ds.save
+ parser.dataset = ds
+ ds = parser.load_csv(master.collect{|r| r.join(",")}.join("\n"))
+ ds.save
+ end
+
+
+ # Gauss kernel
+ # @return [Float]
+ def self.gauss(x, sigma = 0.3)
+ d = 1.0 - x.to_f
+ Math.exp(-(d*d)/(2*sigma*sigma))
+ end
+
+
+ # For symbolic features
+ # @param [Array] Array to test, must indicate non-occurrence with 0.
+ # @return [Boolean] Whether the feature is singular or non-occurring or present everywhere.
+ def self.isnull_or_singular?(array)
+ nr_zeroes = array.count(0)
+ return (nr_zeroes == array.size) || # remove non-occurring feature
+ (nr_zeroes == array.size-1) || # remove singular feature
+ (nr_zeroes == 0) # also remove feature present everywhere
+ end
+
+
+ # Numeric value test
+ # @param[Object] value
+ # @return [Boolean] Whether value is a number
+ def self.numeric?(value)
+ true if Float(value) rescue false
+ end
+
+
+ # For symbolic features
+ # @param [Array] Array to test, must indicate non-occurrence with 0.
+ # @return [Boolean] Whether the feature has variance zero.
+ def self.zero_variance?(array)
+ return array.uniq.size == 1
+ end
+
+
+ # Sum of an array for Arrays.
+ # @param [Array] Array with values
+ # @return [Integer] Sum of size of values
+ def self.sum_size(array)
+ sum=0
+ array.each { |e| sum += e.size }
+ return sum
+ end
+
+
+ # Minimum Frequency
+ # @param [Integer] per-mil value
+ # return [Integer] min-frequency
+ def self.min_frequency(training_dataset,per_mil)
+ minfreq = per_mil * training_dataset.compounds.size.to_f / 1000.0 # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST
+ minfreq = 2 unless minfreq > 2
+ Integer (minfreq)
+ end
+
+
+ # Effect calculation for classification
+ # @param [Array] Array of occurrences per class in the form of Enumerables.
+ # @param [Array] Array of database instance counts per class.
+ def self.effect(occurrences, db_instances)
+ max=0
+ max_value=0
+ nr_o = self.sum_size(occurrences)
+ nr_db = db_instances.to_scale.sum
+
+ occurrences.each_with_index { |o,i| # fminer outputs occurrences sorted reverse by activity.
+ actual = o.size.to_f/nr_o
+ expected = db_instances[i].to_f/nr_db
+ if actual > expected
+ if ((actual - expected) / actual) > max_value
+ max_value = (actual - expected) / actual # 'Schleppzeiger'
+ max = i
+ end
+ end
+ }
+ max
+ end
+
+
+ # neighbors
+
+ module Neighbors
+
+ # Get confidence.
+ # @param[Hash] Required keys: :sims, :acts
+ # @return[Float] Confidence
+ def self.get_confidence(params)
+ conf = params[:sims].inject{|sum,x| sum + x }
+ confidence = conf/params[:sims].size
+ LOGGER.debug "Confidence is: '" + confidence.to_s + "'."
+ return confidence
+ end
+
+ end
+
+
+ # Similarity calculations
+ module Similarity
+
+ # Tanimoto similarity
+ # @param [Hash, Array] fingerprints of first compound
+ # @param [Hash, Array] fingerprints of second compound
+ # @return [Float] (Weighted) tanimoto similarity
+ def self.tanimoto(fingerprints_a,fingerprints_b,weights=nil,params=nil)
+
+ common_p_sum = 0.0
+ all_p_sum = 0.0
+
+ # fingerprints are hashes
+ if fingerprints_a.class == Hash && fingerprints_b.class == Hash
+ common_features = fingerprints_a.keys & fingerprints_b.keys
+ all_features = (fingerprints_a.keys + fingerprints_b.keys).uniq
+ if common_features.size > 0
+ common_features.each{ |f| common_p_sum += [ fingerprints_a[f], fingerprints_b[f] ].min }
+ all_features.each{ |f| all_p_sum += [ fingerprints_a[f],fingerprints_b[f] ].compact.max } # compact, since one fp may be empty at that pos
+ end
+
+ # fingerprints are arrays
+ elsif fingerprints_a.class == Array && fingerprints_b.class == Array
+ size = [ fingerprints_a.size, fingerprints_b.size ].min
+ LOGGER.warn "fingerprints don't have equal size" if fingerprints_a.size != fingerprints_b.size
+ (0...size).each { |idx|
+ common_p_sum += [ fingerprints_a[idx], fingerprints_b[idx] ].min
+ all_p_sum += [ fingerprints_a[idx], fingerprints_b[idx] ].max
+ }
+ end
+
+ (all_p_sum > 0.0) ? (common_p_sum/all_p_sum) : 0.0
+
+ end
+
+
+ # Cosine similarity
+ # @param [Hash] properties_a key-value properties of first compound
+ # @param [Hash] properties_b key-value properties of second compound
+ # @return [Float] cosine of angle enclosed between vectors induced by keys present in both a and b
+ def self.cosine(fingerprints_a,fingerprints_b,weights=nil)
+
+ # fingerprints are hashes
+ if fingerprints_a.class == Hash && fingerprints_b.class == Hash
+ a = []; b = []
+ common_features = fingerprints_a.keys & fingerprints_b.keys
+ if common_features.size > 1
+ common_features.each do |p|
+ a << fingerprints_a[p]
+ b << fingerprints_b[p]
+ end
+ end
+
+ # fingerprints are arrays
+ elsif fingerprints_a.class == Array && fingerprints_b.class == Array
+ a = fingerprints_a
+ b = fingerprints_b
+ end
+
+ (a.size > 0 && b.size > 0) ? self.cosine_num(a.to_gv, b.to_gv) : 0.0
+
+ end
+
+
+ # Cosine similarity
+ # @param [GSL::Vector] a
+ # @param [GSL::Vector] b
+ # @return [Float] cosine of angle enclosed between a and b
+ def self.cosine_num(a, b)
+ if a.size>12 && b.size>12
+ a = a[0..11]
+ b = b[0..11]
+ end
+ a.dot(b) / (a.norm * b.norm)
+ end
+
+
+ # Outlier detection based on Mahalanobis distances
+ # Multivariate detection on X, univariate detection on y
+ # Uses an existing Rinruby instance, if possible
+ # @param[Hash] Keys query_matrix, data_matrix, acts are required; r, p_outlier optional
+ # @return[Array] indices identifying outliers (may occur several times, this is intended)
+ def self.outliers(params)
+ outlier_array = []
+ data_matrix = params[:data_matrix]
+ query_matrix = params[:query_matrix]
+ acts = params[:acts]
+ begin
+ LOGGER.debug "Outliers (p=#{params[:p_outlier] || 0.9999})..."
+ r = ( params[:r] || RinRuby.new(false,false) )
+ r.eval "suppressPackageStartupMessages(library(\"robustbase\"))"
+ r.eval "outlier_threshold = #{params[:p_outlier] || 0.999}"
+ nr_cases, nr_features = data_matrix.to_a.size, data_matrix.to_a[0].size
+ r.odx = data_matrix.to_a.flatten
+ r.q = query_matrix.to_a.flatten
+ r.y = acts.to_a.flatten
+ r.eval "odx = matrix(odx, #{nr_cases}, #{nr_features}, byrow=T)"
+ r.eval 'odx = rbind(q,odx)' # query is nr 0 (1) in ruby (R)
+ r.eval 'mah = covMcd(odx)$mah' # run MCD alg
+ r.eval "mah = pchisq(mah,#{nr_features})"
+ r.eval 'outlier_array = which(mah>outlier_threshold)' # multivariate outliers using robust mahalanobis
+ outlier_array = r.outlier_array.to_a.collect{|v| v-2 } # translate to ruby index (-1 for q, -1 due to ruby)
+ r.eval 'fqu = matrix(summary(y))[2]'
+ r.eval 'tqu = matrix(summary(y))[5]'
+ r.eval 'outlier_array = which(y>(tqu+1.5*IQR(y)))' # univariate outliers due to Tukey (http://goo.gl/mwzNH)
+ outlier_array += r.outlier_array.to_a.collect{|v| v-1 } # translate to ruby index (-1 due to ruby)
+ r.eval 'outlier_array = which(y<(fqu-1.5*IQR(y)))'
+ outlier_array += r.outlier_array.to_a.collect{|v| v-1 }
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ #LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ outlier_array
+ end
+
+ end
+
+
+ end
+
+end
+
diff --git a/opentox-ruby.gemspec b/opentox-ruby.gemspec
index d316689..d3ae2d7 100644
--- a/opentox-ruby.gemspec
+++ b/opentox-ruby.gemspec
@@ -48,6 +48,8 @@ Gem::Specification.new do |s|
"lib/templates/default_guest_policy.xml",
"lib/templates/default_policy.xml",
"lib/to-html.rb",
+ "lib/transform.rb",
+ "lib/utils.rb"
"lib/validation.rb"
]
s.homepage = %q{http://github.com/helma/opentox-ruby}
@@ -88,6 +90,7 @@ Gem::Specification.new do |s|
s.add_runtime_dependency(%q<dm-sqlite-adapter>, [">= 0"])
s.add_runtime_dependency(%q<haml>, [">= 3"])
s.add_runtime_dependency(%q<ruby-plot>, ["~> 0.4.0"])
+ s.add_runtime_dependency(%q<statsample>, [">= 0"])
s.add_development_dependency(%q<jeweler>, [">= 0"])
else
s.add_dependency(%q<sinatra>, [">= 0"])
@@ -119,6 +122,7 @@ Gem::Specification.new do |s|
s.add_dependency(%q<dm-sqlite-adapter>, [">= 0"])
s.add_dependency(%q<haml>, [">= 3"])
s.add_dependency(%q<ruby-plot>, ["~> 0.4.0"])
+ s.add_dependency(%q<statsample>, [">= 0"])
s.add_dependency(%q<jeweler>, [">= 0"])
end
else
@@ -151,6 +155,7 @@ Gem::Specification.new do |s|
s.add_dependency(%q<dm-sqlite-adapter>, [">= 0"])
s.add_dependency(%q<haml>, [">= 3"])
s.add_dependency(%q<ruby-plot>, ["~> 0.4.0"])
+ s.add_dependency(%q<statsample>, [">= 0"])
s.add_dependency(%q<jeweler>, [">= 0"])
end
end