summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormr <mr@mrautenberg.de>2011-08-04 18:37:33 +0200
committermr <mr@mrautenberg.de>2011-08-04 18:37:33 +0200
commit6f26ea70b05b69fb69a102fb4cec688338c1f7ff (patch)
treece05acc8adb8c64ae8cc1ea997d35744b062e35e
parent6b9e012576857fbc6c51cd86581cca792f367cdf (diff)
parent7a13c2da03220ad6716fe7da5bfa3403c873d7d1 (diff)
Merge branch 'release/v2.1.0'v2.1.0
-rw-r--r--Rakefile85
-rw-r--r--VERSION2
-rw-r--r--lib/algorithm.rb877
-rw-r--r--lib/compound.rb40
-rw-r--r--lib/config/config_ru.rb2
-rw-r--r--lib/dataset.rb75
-rw-r--r--lib/environment.rb6
-rw-r--r--lib/feature.rb28
-rw-r--r--lib/helper.rb3
-rw-r--r--lib/model.rb263
-rw-r--r--lib/opentox-ruby.rb2
-rw-r--r--lib/overwrite.rb3
-rw-r--r--lib/parser.rb316
-rw-r--r--lib/rest_client_wrapper.rb5
-rw-r--r--lib/serializer.rb34
-rw-r--r--lib/task.rb11
-rw-r--r--lib/to-html.rb107
-rw-r--r--lib/validation.rb122
18 files changed, 1581 insertions, 400 deletions
diff --git a/Rakefile b/Rakefile
index 08959b0..952affe 100644
--- a/Rakefile
+++ b/Rakefile
@@ -8,53 +8,46 @@ begin
gem.summary = %Q{Ruby wrapper for the OpenTox REST API}
gem.description = %Q{Ruby wrapper for the OpenTox REST API (http://www.opentox.org)}
gem.email = "helma@in-silico.ch"
- gem.homepage = "http://github.com/helma/opentox-ruby"
+ gem.homepage = "http://github.com/opentox/opentox-ruby"
gem.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler"]
- # dependencies
- [ "sinatra",
- "emk-sinatra-url-for",
- "sinatra-respond_to",
- "sinatra-static-assets",
- "rest-client",
- "rack",
- "rack-contrib",
- "rack-flash",
- "nokogiri",
- "rubyzip",
- "roo",
- "spreadsheet",
- "google-spreadsheet-ruby",
- "yajl-ruby",
- "tmail",
- "rinruby",
- "ohm",
- "ohm-contrib",
- "SystemTimer",
- "rjb",
- #valiation-gems
- "dm-core",
- "dm-serializer",
- "dm-timestamps",
- "dm-types",
- "dm-migrations",
- "dm-validations",
- "dm-sqlite-adapter"
- ].each { |dep| gem.add_dependency dep }
-=begin
- [ "dm-core",
- 'dm-serializer',
- 'dm-timestamps',
- 'dm-types',
- 'dm-migrations',
- "dm-mysql-adapter",
- "dm-validations",
- ].each {|dep| gem.add_dependency dep, ">= 1" }
-=end
- #valiation-gem
- gem.add_dependency "haml", ">=3"
- # validation-gems
- gem.add_dependency "ruby-plot", "~>0.4.0"
- ['jeweler'].each { |dep| gem.add_development_dependency dep }
+ # dependencies with versions
+ gem.add_dependency "sinatra", "=1.2.6"
+ gem.add_dependency "emk-sinatra-url-for", "=0.2.1"
+ gem.add_dependency "sinatra-respond_to", "=0.7.0"
+ gem.add_dependency "sinatra-static-assets", "=0.5.0"
+ gem.add_dependency "rest-client", "=1.6.1"
+ gem.add_dependency "rack", "=1.3.1"
+ gem.add_dependency "rack-contrib", "=1.1.0"
+ gem.add_dependency "rack-flash", "=0.1.1"
+ gem.add_dependency "nokogiri", "=1.4.4"
+ gem.add_dependency "rubyzip", "=0.9.4"
+ gem.add_dependency "roo", "=1.9.3"
+ gem.add_dependency "spreadsheet", "=0.6.5.4"
+ gem.add_dependency "google-spreadsheet-ruby", "=0.1.5"
+ gem.add_dependency "yajl-ruby", "=0.8.2"
+ #gem.add_dependency "mail", "=2.3.0"
+ gem.add_dependency "rinruby", "=2.0.2"
+ gem.add_dependency "ohm", "=0.1.3"
+ gem.add_dependency "ohm-contrib", "=0.1.1"
+ gem.add_dependency "SystemTimer", "=1.2.3"
+ gem.add_dependency "rjb", "=1.3.4"
+ gem.add_dependency "haml", "=3.1.1"
+ # for headless browser tests
+ gem.add_dependency "akephalos", "=0.2.5"
+ #valiation-gems
+ gem.add_dependency "dm-core", "=1.1.0"
+ gem.add_dependency "dm-serializer", "=1.1.0"
+ gem.add_dependency "dm-timestamps", "=1.1.0"
+ gem.add_dependency "dm-types", "=1.1.0"
+ gem.add_dependency "dm-migrations", "=1.1.0"
+ gem.add_dependency "dm-validations", "=1.1.0"
+ gem.add_dependency "dm-sqlite-adapter", "=1.1.0"
+ gem.add_dependency "ruby-plot", "=0.5.0"
+ gem.add_dependency "gsl", "=1.14.7"
+ gem.add_dependency "statsample", "=1.1.0"
+ #gem.add_dependency "statsample-optimization", "=2.1.0"
+
+ gem.add_development_dependency 'jeweler'
gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore']
end
Jeweler::GemcutterTasks.new
diff --git a/VERSION b/VERSION
index 10bf840..50aea0e 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.0.1 \ No newline at end of file
+2.1.0 \ No newline at end of file
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 7fbe0dc..85b54ab 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -3,6 +3,8 @@
# avoids compiling R with X
R = nil
require "rinruby"
+require "statsample"
+require 'uri'
module OpenTox
@@ -16,6 +18,7 @@ module OpenTox
# @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
# @return [String] URI of new resource (dataset, model, ...)
def run(params=nil, waiting_task=nil)
+ LOGGER.info "Running algorithm '"+@uri.to_s+"' with params: "+params.inspect
RestClientWrapper.post(@uri, params, {:accept => 'text/uri-list'}, waiting_task).to_s
end
@@ -45,12 +48,75 @@ module OpenTox
end
# Fminer algorithms (https://github.com/amaunz/fminer2)
- module Fminer
+ class Fminer
include Algorithm
+ attr_accessor :prediction_feature, :training_dataset, :minfreq, :compounds, :db_class_sizes, :all_activities, :smi
+
+ def check_params(params,per_mil,subjectid=nil)
+ raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
+ raise OpenTox::NotFoundError.new "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil?
+ @prediction_feature = OpenTox::Feature.find params[:prediction_feature], subjectid
+ @training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", subjectid
+ raise OpenTox::NotFoundError.new "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless @training_dataset.features and @training_dataset.features.include?(params[:prediction_feature])
+
+ unless params[:min_frequency].nil?
+ @minfreq=params[:min_frequency].to_i
+ raise "Minimum frequency must be a number >0!" unless @minfreq>0
+ else
+ @minfreq=OpenTox::Algorithm.min_frequency(@training_dataset,per_mil) # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST
+ end
+ end
+
+ def add_fminer_data(fminer_instance, params, value_map)
+
+ id = 1 # fminer start id is not 0
+ @training_dataset.data_entries.each do |compound,entry|
+ begin
+ smiles = OpenTox::Compound.smiles(compound.to_s)
+ rescue
+ LOGGER.warn "No resource for #{compound.to_s}"
+ next
+ end
+ if smiles == '' or smiles.nil?
+ LOGGER.warn "Cannot find smiles for #{compound.to_s}."
+ next
+ end
+
+ value_map=params[:value_map] unless params[:value_map].nil?
+ entry.each do |feature,values|
+ if feature == @prediction_feature.uri
+ values.each do |value|
+ if value.nil?
+ LOGGER.warn "No #{feature} activity for #{compound.to_s}."
+ else
+ if @prediction_feature.feature_type == "classification"
+ activity= value_map.invert[value].to_i # activities are mapped to 1..n
+ @db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect
+ elsif @prediction_feature.feature_type == "regression"
+ activity= value.to_f
+ end
+ begin
+ fminer_instance.AddCompound(smiles,id)
+ fminer_instance.AddActivity(activity, id)
+ @all_activities[id]=activity # DV: insert global information
+ @compounds[id] = compound
+ @smi[id] = smiles
+ id += 1
+ rescue Exception => e
+ LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer"
+ LOGGER.warn e.backtrace
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+
+ end
# Backbone Refinement Class mining (http://bbrc.maunz.de/)
- class BBRC
- include Fminer
+ class BBRC < Fminer
# Initialize bbrc algorithm
def initialize(subjectid=nil)
super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc")
@@ -59,8 +125,7 @@ module OpenTox
end
# LAtent STructure Pattern Mining (http://last-pm.maunz.de)
- class LAST
- include Fminer
+ class LAST < Fminer
# Initialize last algorithm
def initialize(subjectid=nil)
super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/last")
@@ -68,7 +133,6 @@ module OpenTox
end
end
- end
# Create lazar prediction model
class Lazar
@@ -90,19 +154,34 @@ module OpenTox
# @param [Array] features_a Features of first compound
# @param [Array] features_b Features of second compound
# @param [optional, Hash] weights Weights for all features
+ # @param [optional, Hash] params Keys: `:training_compound, :compound, :training_compound_features_hits, :nr_hits, :compound_features_hits` are required
# @return [Float] (Weighted) tanimoto similarity
- def self.tanimoto(features_a,features_b,weights=nil)
+ def self.tanimoto(features_a,features_b,weights=nil,params=nil)
common_features = features_a & features_b
all_features = (features_a + features_b).uniq
- common_p_sum = 0.0
+ #LOGGER.debug "dv --------------- common: #{common_features}, all: #{all_features}"
if common_features.size > 0
if weights
- common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])}
- all_p_sum = 0.0
- all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])}
+ #LOGGER.debug "nr_hits: #{params[:nr_hits]}"
+ if !params.nil? && params[:nr_hits]
+ params[:weights] = weights
+ params[:mode] = "min"
+ params[:features] = common_features
+ common_p_sum = Algorithm.p_sum_support(params)
+ params[:mode] = "max"
+ params[:features] = all_features
+ all_p_sum = Algorithm.p_sum_support(params)
+ else
+ common_p_sum = 0.0
+ common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])}
+ all_p_sum = 0.0
+ all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])}
+ end
+ #LOGGER.debug "common_p_sum: #{common_p_sum}, all_p_sum: #{all_p_sum}, c/a: #{common_p_sum/all_p_sum}"
common_p_sum/all_p_sum
else
- common_features.to_f/all_features
+ #LOGGER.debug "common_features : #{common_features}, all_features: #{all_features}, c/a: #{(common_features.size/all_features.size).to_f}"
+ common_features.size.to_f/all_features.size.to_f
end
else
0.0
@@ -132,65 +211,300 @@ module OpenTox
end
end
+ # Structural Graph Clustering by TU Munich
+ # Finds clusters similar to a query structure in a given training dataset
+ # May be queried for cluster membership of an unknown compound
+ class StructuralClustering
+ attr_accessor :training_dataset_uri, :training_threshold, :query_dataset_uri, :query_threshold, :target_clusters_array
+
+ # @params[String] Training dataset_uri
+ # @params[Float] Similarity threshold for training (optional)
+ # @params[String] Cluster service uri (no AA)
+ def initialize training_dataset_uri, training_threshold=0.8, cluster_service_uri = "http://opentox-dev.informatik.tu-muenchen.de:8080/OpenTox/algorithm/StructuralClustering"
+
+ if (training_dataset_uri =~ URI::regexp).nil? || (cluster_service_uri =~ URI::regexp).nil?
+ raise "Invalid URI."
+ end
+ @training_dataset_uri = training_dataset_uri
+ if !OpenTox::Algorithm.numeric? training_threshold || training_threshold <0 || training_threshold >1
+ raise "Training threshold out of bounds."
+ end
+ @training_threshold = training_threshold.to_f
+
+ # Train a cluster model
+ params = {:dataset_uri => @training_dataset_uri, :threshold => @training_threshold }
+ @cluster_model_uri = OpenTox::RestClientWrapper.post cluster_service_uri, params
+ cluster_model_rdf = OpenTox::RestClientWrapper.get @cluster_model_uri
+ @datasets = OpenTox::Parser::Owl.from_rdf cluster_model_rdf, OT.Dataset, true # must extract OT.Datasets from model
+
+ # Process parsed OWL objects
+ @clusterid_dataset_map = Hash.new
+ @datasets.each { |d|
+ begin
+ d.metadata[OT.hasSource]["Structural Clustering cluster "] = "" # must parse in metadata for string (not elegant)
+ @clusterid_dataset_map[d.metadata[OT.hasSource].to_i] = d.uri
+ rescue Exception => e
+ # ignore other entries!
+ end
+ }
+ end
+
+ # Whether a model has been trained
+ def trained?
+ !@cluster_model_uri.nil?
+ end
+
+ # Instance query: clusters for a compound
+ # @params[String] Query compound
+ # @params[Float] Similarity threshold for query to clusters (optional)
+ def get_clusters query_compound_uri, query_threshold = 0.5
+
+ if !OpenTox::Algorithm.numeric? query_threshold || query_threshold <0 || query_threshold >1
+ raise "Query threshold out of bounds."
+ end
+ @query_threshold = query_threshold.to_f
+
+
+ # Preparing a query dataset
+ query_dataset = OpenTox::Dataset.new
+ @query_dataset_uri = query_dataset.save
+ query_dataset = OpenTox::Dataset.find @query_dataset_uri
+ query_dataset.add_compound query_compound_uri
+ @query_dataset_uri = query_dataset.save
+
+ # Obtaining a clustering for query compound
+ params = { :dataset_uri => @query_dataset_uri, :threshold => @query_threshold }
+ cluster_query_dataset_uri = OpenTox::RestClientWrapper.post @cluster_model_uri, params
+ cluster_query_dataset = OpenTox::Dataset.new cluster_query_dataset_uri
+ cluster_query_dataset.load_all
+
+ # Reading cluster ids for features from metadata
+ feature_clusterid_map = Hash.new
+ pattern="Prediction feature for cluster assignment " # must parse for string in metadata (not elegant)
+ cluster_query_dataset.features.each { |feature_uri,metadata|
+ metadata[DC.title][pattern]=""
+ feature_clusterid_map[feature_uri] = metadata[DC.title].to_i
+ }
+
+ # Integrity check
+ unless cluster_query_dataset.compounds.size == 1
+ raise "Number of predicted compounds is != 1."
+ end
+
+ # Process data entry
+ query_compound_uri = cluster_query_dataset.compounds[0]
+ @target_clusters_array = Array.new
+ cluster_query_dataset.features.keys.each { |cluster_membership_feature|
+
+ # Getting dataset URI for cluster
+ target_cluster = feature_clusterid_map[cluster_membership_feature]
+ dataset = @clusterid_dataset_map[target_cluster]
+
+ # Finally look up presence
+ data_entry = cluster_query_dataset.data_entries[query_compound_uri]
+ present = data_entry[cluster_membership_feature][0]
+
+ # Store result
+ @target_clusters_array << dataset if present > 0.5 # 0.0 for absence, 1.0 for presence
+ }
+ end
+
+ end
+
module Neighbors
+ # Local multi-linear regression (MLR) prediction from neighbors.
+ # Uses propositionalized setting.
+ # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required
+ # @return [Numeric] A prediction value.
+ def self.local_mlr_prop(params)
+
+ confidence=0.0
+ prediction=nil
+
+ if params[:neighbors].size>0
+ props = params[:prop_kernel] ? get_props(params) : nil
+ acts = params[:neighbors].collect { |n| act = n[:activity].to_f }
+ sims = params[:neighbors].collect { |n| Algorithm.gauss(n[:similarity]) }
+ LOGGER.debug "Local MLR (Propositionalization / GSL)."
+ prediction = mlr( {:n_prop => props[0], :q_prop => props[1], :sims => sims, :acts => acts} )
+ transformer = eval("OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})")
+ prediction = transformer.values[0]
+ prediction = nil if prediction.infinite? || params[:prediction_min_max][1] < prediction || params[:prediction_min_max][0] > prediction
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ params[:conf_stdev] = false if params[:conf_stdev].nil?
+ confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]})
+ confidence = nil if prediction.nil?
+ end
+ {:prediction => prediction, :confidence => confidence}
+
+ end
+
+ # Multi-linear regression weighted by similarity.
+ # Objective Feature Selection, Principal Components Analysis, Scaling of Axes.
+ # @param [Hash] params Keys `:n_prop, :q_prop, :sims, :acts` are required
+ # @return [Numeric] A prediction value.
+ def self.mlr(params)
+
+ # GSL matrix operations:
+ # to_a : row-wise conversion to nested array
+ #
+ # Statsample operations (build on GSL):
+ # to_scale: convert into Statsample format
+
+ begin
+ n_prop = params[:n_prop].collect { |v| v }
+ q_prop = params[:q_prop].collect { |v| v }
+ n_prop << q_prop # attach q_prop
+ nr_cases, nr_features = get_sizes n_prop
+ data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features)
+
+ # Principal Components Analysis
+ LOGGER.debug "PCA..."
+ pca = OpenTox::Algorithm::Transform::PCA.new(data_matrix)
+ data_matrix = pca.data_transformed_matrix
+
+ # Attach intercept column to data
+ intercept = GSL::Matrix.alloc(Array.new(nr_cases,1.0),nr_cases,1)
+ data_matrix = data_matrix.horzcat(intercept)
+ (0..data_matrix.size2-2).each { |i|
+ autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(data_matrix.col(i))
+ data_matrix.col(i)[0..data_matrix.size1-1] = autoscaler.scaled_values
+ }
+
+ # Detach query instance
+ n_prop = data_matrix.to_a
+ q_prop = n_prop.pop
+ nr_cases, nr_features = get_sizes n_prop
+ data_matrix = GSL::Matrix.alloc(n_prop.flatten, nr_cases, nr_features)
+
+ # model + support vectors
+ LOGGER.debug "Creating MLR model ..."
+ c, cov, chisq, status = GSL::MultiFit::wlinear(data_matrix, params[:sims].to_scale.to_gsl, params[:acts].to_scale.to_gsl)
+ GSL::MultiFit::linear_est(q_prop.to_scale.to_gsl, c, cov)[0]
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ end
+
+ end
+
# Classification with majority vote from neighbors weighted by similarity
- # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity`
- # @param [optional] params Ignored (only for compatibility with local_svm_regression)
- # @return [Hash] Hash with keys `:prediction, :confidence`
- def self.weighted_majority_vote(neighbors,params={})
- conf = 0.0
+ # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required
+ # @return [Numeric] A prediction value.
+ def self.weighted_majority_vote(params)
+
+ neighbor_contribution = 0.0
+ confidence_sum = 0.0
confidence = 0.0
- neighbors.each do |neighbor|
- case neighbor[:activity].to_s
- when 'true'
- conf += Algorithm.gauss(neighbor[:similarity])
- when 'false'
- conf -= Algorithm.gauss(neighbor[:similarity])
+ prediction = nil
+
+ params[:neighbors].each do |neighbor|
+ neighbor_weight = Algorithm.gauss(neighbor[:similarity]).to_f
+ neighbor_contribution += neighbor[:activity].to_f * neighbor_weight
+
+ if params[:value_map].size == 2 # AM: provide compat to binary classification: 1=>false 2=>true
+ case neighbor[:activity]
+ when 1
+ confidence_sum -= neighbor_weight
+ when 2
+ confidence_sum += neighbor_weight
+ end
+ else
+ confidence_sum += neighbor_weight
end
end
- if conf > 0.0
- prediction = true
- elsif conf < 0.0
- prediction = false
- else
- prediction = nil
- end
- confidence = conf/neighbors.size if neighbors.size > 0
- {:prediction => prediction, :confidence => confidence.abs}
+
+ if params[:value_map].size == 2
+ if confidence_sum >= 0.0
+ prediction = 2 unless params[:neighbors].size==0
+ elsif confidence_sum < 0.0
+ prediction = 1 unless params[:neighbors].size==0
+ end
+ else
+ prediction = (neighbor_contribution/confidence_sum).round unless params[:neighbors].size==0 # AM: new multinomial prediction
+ end
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'." unless prediction.nil?
+ confidence = confidence_sum/params[:neighbors].size if params[:neighbors].size > 0
+ LOGGER.debug "Confidence is: '" + confidence.to_s + "'." unless prediction.nil?
+ return {:prediction => prediction, :confidence => confidence.abs}
end
# Local support vector regression from neighbors
- # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
- # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
- # @return [Hash] Hash with keys `:prediction, :confidence`
- def self.local_svm_regression(neighbors,params )
- sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values between query and neighbors
- conf = sims.inject{|sum,x| sum + x }
-
- # AM: Control log taking
- take_logs=true
- neighbors.each do |n|
- if (! n[:activity].nil?) && (n[:activity].to_f < 0.0)
- take_logs = false
- end
+ # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required
+ # @return [Numeric] A prediction value.
+ def self.local_svm_regression(params)
+
+ confidence = 0.0
+ prediction = nil
+ if params[:neighbors].size>0
+ props = params[:prop_kernel] ? get_props(params) : nil
+ acts = params[:neighbors].collect{ |n| n[:activity].to_f }
+ sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) }
+ prediction = props.nil? ? local_svm(acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr")
+ transformer = eval("OpenTox::Algorithm::Transform::#{params[:transform]["class"]}.new ([#{prediction}], #{params[:transform]["offset"]})")
+ prediction = transformer.values[0]
+ prediction = nil if prediction.infinite? || params[:prediction_min_max][1] < prediction || params[:prediction_min_max][0] > prediction
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ params[:conf_stdev] = false if params[:conf_stdev].nil?
+ confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]})
+ confidence = nil if prediction.nil?
end
- acts = neighbors.collect do |n|
- act = n[:activity]
- take_logs ? Math.log10(act.to_f) : act.to_f
- end # activities of neighbors for supervised learning
+ {:prediction => prediction, :confidence => confidence}
+
+ end
+
+ # Local support vector classification from neighbors
+ # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required
+ # @return [Numeric] A prediction value.
+ def self.local_svm_classification(params)
- neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches
+ confidence = 0.0
+ prediction = nil
+ if params[:neighbors].size>0
+ props = params[:prop_kernel] ? get_props(params) : nil
+ acts = params[:neighbors].collect { |n| act = n[:activity] }
+ sims = params[:neighbors].collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
+ prediction = props.nil? ? local_svm(acts, sims, "C-bsvc", params) : local_svm_prop(props, acts, "C-bsvc")
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ params[:conf_stdev] = false if params[:conf_stdev].nil?
+ confidence = get_confidence({:sims => sims, :acts => acts, :neighbors => params[:neighbors], :conf_stdev => params[:conf_stdev]})
+ end
+ {:prediction => prediction, :confidence => confidence}
+
+ end
+
+
+ # Local support vector prediction from neighbors.
+ # Uses pre-defined Kernel Matrix.
+ # Not to be called directly (use local_svm_regression or local_svm_classification).
+ # @param [Array] acts, activities for neighbors.
+ # @param [Array] sims, similarities for neighbors.
+ # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification).
+ # @param [Hash] params Keys `:neighbors,:compound,:features,:p_values,:similarity_algorithm,:prop_kernel,:value_map,:transform` are required
+ # @return [Numeric] A prediction value.
+ def self.local_svm(acts, sims, type, params)
+ LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)."
+ neighbor_matches = params[:neighbors].collect{ |n| n[:features] } # URIs of matches
gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
- if neighbor_matches.size == 0
- raise "No neighbors found"
+
+ prediction = nil
+ if Algorithm::zero_variance? acts
+ prediction = acts[0]
else
# gram matrix
(0..(neighbor_matches.length-1)).each do |i|
+ neighbor_i_hits = params[:fingerprints][params[:neighbors][i][:compound]]
gram_matrix[i] = [] unless gram_matrix[i]
# upper triangle
((i+1)..(neighbor_matches.length-1)).each do |j|
- sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
+ neighbor_j_hits= params[:fingerprints][params[:neighbors][j][:compound]]
+ sim_params = {}
+ if params[:nr_hits]
+ sim_params[:nr_hits] = true
+ sim_params[:compound_features_hits] = neighbor_i_hits
+ sim_params[:training_compound_features_hits] = neighbor_j_hits
+ end
+ sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values], sim_params)")
gram_matrix[i][j] = Algorithm.gauss(sim)
gram_matrix[j] = [] unless gram_matrix[j]
gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
@@ -198,6 +512,7 @@ module OpenTox
gram_matrix[i][i] = 1.0
end
+
#LOGGER.debug gram_matrix.to_yaml
@r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
@r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
@@ -208,27 +523,171 @@ module OpenTox
@r.y = acts
@r.sims = sims
- LOGGER.debug "Preparing R data ..."
- # prepare data
- @r.eval "y<-as.vector(y)"
- @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
- @r.eval "sims<-as.vector(sims)"
-
- # model + support vectors
- LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
- @r.eval "sv<-as.vector(SVindex(model))"
- @r.eval "sims<-sims[sv]"
- @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
- LOGGER.debug "Predicting ..."
- @r.eval "p<-predict(model,sims)[1,1]"
- prediction = 10**(@r.p.to_f) if take_logs
- LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
- @r.quit # free R
+ begin
+ LOGGER.debug "Preparing R data ..."
+ # prepare data
+ @r.eval "y<-as.vector(y)"
+ @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
+ @r.eval "sims<-as.vector(sims)"
+
+ # model + support vectors
+ LOGGER.debug "Creating SVM model ..."
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)"
+ @r.eval "sv<-as.vector(SVindex(model))"
+ @r.eval "sims<-sims[sv]"
+ @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
+ LOGGER.debug "Predicting ..."
+ if type == "nu-svr"
+ @r.eval "p<-predict(model,sims)[1,1]"
+ elsif type == "C-bsvc"
+ @r.eval "p<-predict(model,sims)"
+ end
+ if type == "nu-svr"
+ prediction = @r.p
+ elsif type == "C-bsvc"
+ #prediction = (@r.p.to_f == 1.0 ? true : false)
+ prediction = @r.p
+ end
+ @r.quit # free R
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+
end
- confidence = conf/neighbors.size if neighbors.size > 0
- {:prediction => prediction, :confidence => confidence}
-
+ prediction
+ end
+
+ # Local support vector prediction from neighbors.
+ # Uses propositionalized setting.
+ # Not to be called directly (use local_svm_regression or local_svm_classification).
+ # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ]
+ # @param [Array] acts, activities for neighbors.
+ # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification).
+ # @return [Numeric] A prediction value.
+ def self.local_svm_prop(props, acts, type)
+
+ LOGGER.debug "Local SVM (Propositionalization / Kernlab Kernel)."
+ n_prop = props[0] # is a matrix, i.e. two nested Arrays.
+ q_prop = props[1] # is an Array.
+
+ prediction = nil
+ if Algorithm::zero_variance? acts
+ prediction = acts[0]
+ else
+ #LOGGER.debug gram_matrix.to_yaml
+ @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
+ @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
+ LOGGER.debug "Setting R data ..."
+ # set data
+ @r.n_prop = n_prop.flatten
+ @r.n_prop_x_size = n_prop.size
+ @r.n_prop_y_size = n_prop[0].size
+ @r.y = acts
+ @r.q_prop = q_prop
+
+ begin
+ LOGGER.debug "Preparing R data ..."
+ # prepare data
+ @r.eval "y<-matrix(y)"
+ @r.eval "prop_matrix<-matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=TRUE)"
+ @r.eval "q_prop<-matrix(q_prop, 1, n_prop_y_size, byrow=TRUE)"
+
+ # model + support vectors
+ LOGGER.debug "Creating SVM model ..."
+ @r.eval "model<-ksvm(prop_matrix, y, type=\"#{type}\", nu=0.5)"
+ LOGGER.debug "Predicting ..."
+ if type == "nu-svr"
+ @r.eval "p<-predict(model,q_prop)[1,1]"
+ elsif type == "C-bsvc"
+ @r.eval "p<-predict(model,q_prop)"
+ end
+ if type == "nu-svr"
+ prediction = @r.p
+ elsif type == "C-bsvc"
+ #prediction = (@r.p.to_f == 1.0 ? true : false)
+ prediction = @r.p
+ end
+ @r.quit # free R
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ end
+ prediction
+ end
+
+ # Get confidence for regression, with standard deviation of neighbor activity if conf_stdev is set.
+ # @param[Hash] Required keys: :sims, :acts, :neighbors, :conf_stdev
+ # @return[Float] Confidence
+ def self.get_confidence(params)
+ if params[:conf_stdev]
+ sim_median = params[:sims].to_scale.median
+ if sim_median.nil?
+ confidence = nil
+ else
+ standard_deviation = params[:acts].to_scale.standard_deviation_sample
+ confidence = (sim_median*Math.exp(-1*standard_deviation)).abs
+ if confidence.nan?
+ confidence = nil
+ end
+ end
+ else
+ conf = params[:sims].inject{|sum,x| sum + x }
+ confidence = conf/params[:neighbors].size
+ end
+ LOGGER.debug "Confidence is: '" + confidence.to_s + "'."
+ return confidence
+ end
+
+ # Get X and Y size of a nested Array (Matrix)
+ def self.get_sizes(matrix)
+ begin
+ nr_cases = matrix.size
+ nr_features = matrix[0].size
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ #puts "NRC: #{nr_cases}, NRF: #{nr_features}"
+ [ nr_cases, nr_features ]
+ end
+
+ # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features)
+ # Same for the vector describing the query compound
+ # @param[Array] neighbors.
+ # @param[OpenTox::Compound] query compound.
+ # @param[Array] Dataset Features.
+ # @param[Array] Fingerprints of neighbors.
+ # @param[Float] p-values of Features.
+ def self.get_props (params)
+ matrix = Array.new
+ begin
+ params[:neighbors].each do |n|
+ n = n[:compound]
+ row = []
+ params[:features].each do |f|
+ if ! params[:fingerprints][n].nil?
+ row << (params[:fingerprints][n].include?(f) ? (params[:p_values][f] * params[:fingerprints][n][f]) : 0.0)
+ else
+ row << 0.0
+ end
+ end
+ matrix << row
+ end
+ row = []
+ params[:features].each do |f|
+ if params[:nr_hits]
+ compound_feature_hits = params[:compound].match_hits([f])
+ row << (compound_feature_hits.size == 0 ? 0.0 : (params[:p_values][f] * compound_feature_hits[f]))
+ else
+ row << (params[:compound].match([f]).size == 0 ? 0.0 : params[:p_values][f])
+ end
+ end
+ rescue Exception => e
+ LOGGER.debug "get_props failed with '" + $! + "'"
+ end
+ [ matrix, row ]
end
end
@@ -250,6 +709,195 @@ module OpenTox
def features(dataset_uri,compound_uri)
end
end
+
+ module Transform
+ include Algorithm
+
+ # The transformer that inverts values.
+ # 1/x is used, after values have been moved >= 1.
+ class Inverter
+ attr_accessor :offset, :values
+
+ # @params[Array] Values to transform.
+ # @params[Float] Offset for restore.
+ def initialize *args
+ case args.size
+ when 1
+ begin
+ values=args[0]
+ raise "Cannot transform, values empty." if @values.size==0
+ @values = values.collect { |v| -1.0 * v }
+ @offset = 1.0 - @values.minmax[0]
+ @offset = -1.0 * @offset if @offset>0.0
+ @values.collect! { |v| v - @offset } # slide >1
+ @values.collect! { |v| 1 / v } # invert to [0,1]
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ when 2
+ @offset = args[1].to_f
+ @values = args[0].collect { |v| 1 / v }
+ @values.collect! { |v| v + @offset }
+ @values.collect! { |v| -1.0 * v }
+ end
+ end
+ end
+
+ # The transformer that takes logs.
+ # Log10 is used, after values have been moved > 0.
+ class Log10
+ attr_accessor :offset, :values
+
+ # @params[Array] Values to transform / restore.
+ # @params[Float] Offset for restore.
+ def initialize *args
+ @distance_to_zero = 0.000000001 # 1 / 1 billion
+ case args.size
+ when 1
+ begin
+ values=args[0]
+ raise "Cannot transform, values empty." if values.size==0
+ @offset = values.minmax[0]
+ @offset = -1.0 * @offset if @offset>0.0
+ @values = values.collect { |v| v - @offset } # slide > anchor
+ @values.collect! { |v| v + @distance_to_zero } #
+ @values.collect! { |v| Math::log10 v } # log10 (can fail)
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ when 2
+ @offset = args[1].to_f
+ @values = args[0].collect { |v| 10**v }
+ @values.collect! { |v| v - @distance_to_zero }
+ @values.collect! { |v| v + @offset }
+ end
+ end
+ end
+
+ # The transformer that does nothing (No OPeration).
+ class NOP
+ attr_accessor :offset, :values
+
+ # @params[Array] Values to transform / restore.
+ # @params[Float] Offset for restore.
+ def initialize *args
+ @offset = 0.0
+ @distance_to_zero = 0.0
+ case args.size
+ when 1
+ @values = args[0]
+ when 2
+ @values = args[0]
+ end
+ end
+ end
+
+
+ # Auto-Scaler for Arrays
+ # Center on mean and divide by standard deviation
+ class AutoScale
+ attr_accessor :scaled_values, :mean, :stdev
+
+ # @params[Array] Values to transform.
+ def initialize values
+ @scaled_values = values
+ @mean = @scaled_values.to_scale.mean
+ @stdev = @scaled_values.to_scale.standard_deviation_sample
+ @scaled_values = @scaled_values.collect {|vi| vi - @mean }
+ @scaled_values.collect! {|vi| vi / @stdev } unless @stdev == 0.0
+ end
+ end
+
+ # Principal Components Analysis
+ # Statsample Library (http://ruby-statsample.rubyforge.org/) by C. Bustos
+ class PCA
+ attr_accessor :data_matrix, :data_transformed_matrix, :eigenvector_matrix, :eigenvalue_sums, :autoscaler
+
+ # Creates a transformed dataset as GSL::Matrix.
+ # @param [GSL::Matrix] Data matrix.
+ # @param [Float] Compression ratio from [0,1].
+ # @return [GSL::Matrix] Data transformed matrix.
+ def initialize data_matrix, compression=0.05
+ begin
+ @data_matrix = data_matrix
+ @compression = compression.to_f
+ @stdev = Array.new
+ @mean = Array.new
+
+ # Objective Feature Selection
+ raise "Error! PCA needs at least two dimensions." if data_matrix.size2 < 2
+ @data_matrix_selected = nil
+ (0..@data_matrix.size2-1).each { |i|
+ if !Algorithm::zero_variance?(@data_matrix.col(i).to_a)
+ if @data_matrix_selected.nil?
+ @data_matrix_selected = GSL::Matrix.alloc(@data_matrix.size1, 1)
+ @data_matrix_selected.col(0)[0..@data_matrix.size1-1] = @data_matrix.col(i)
+ else
+ @data_matrix_selected = @data_matrix_selected.horzcat(GSL::Matrix.alloc(@data_matrix.col(i).to_a,@data_matrix.size1, 1))
+ end
+ end
+ }
+ raise "Error! PCA needs at least two dimensions." if (@data_matrix_selected.nil? || @data_matrix_selected.size2 < 2)
+
+ # Scaling of Axes
+ @data_matrix_scaled = GSL::Matrix.alloc(@data_matrix_selected.size1, @data_matrix_selected.size2)
+ (0..@data_matrix_selected.size2-1).each { |i|
+ @autoscaler = OpenTox::Algorithm::Transform::AutoScale.new(@data_matrix_selected.col(i))
+ @data_matrix_scaled.col(i)[0..@data_matrix.size1-1] = @autoscaler.scaled_values
+ @stdev << @autoscaler.stdev
+ @mean << @autoscaler.mean
+ }
+
+ data_matrix_hash = Hash.new
+ (0..@data_matrix_scaled.size2-1).each { |i|
+ column_view = @data_matrix_scaled.col(i)
+ data_matrix_hash[i] = column_view.to_scale
+ }
+ dataset_hash = data_matrix_hash.to_dataset # see http://goo.gl/7XcW9
+ cor_matrix=Statsample::Bivariate.correlation_matrix(dataset_hash)
+ pca=Statsample::Factor::PCA.new(cor_matrix)
+ pca.eigenvalues.each { |ev| raise "PCA failed!" unless !ev.nan? }
+ @eigenvalue_sums = Array.new
+ (0..dataset_hash.fields.size-1).each { |i|
+ @eigenvalue_sums << pca.eigenvalues[0..i].inject{ |sum, ev| sum + ev }
+ }
+ eigenvectors_selected = Array.new
+ pca.eigenvectors.each_with_index { |ev, i|
+ if (@eigenvalue_sums[i] <= ((1.0-@compression)*dataset_hash.fields.size)) || (eigenvectors_selected.size == 0)
+ eigenvectors_selected << ev.to_a
+ end
+ }
+ @eigenvector_matrix = GSL::Matrix.alloc(eigenvectors_selected.flatten, eigenvectors_selected.size, dataset_hash.fields.size).transpose
+ dataset_matrix = dataset_hash.to_gsl.transpose
+ @data_transformed_matrix = (@eigenvector_matrix.transpose * dataset_matrix).transpose
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ end
+
+ # Restores data in the original feature space (possibly with compression loss).
+ # @return [GSL::Matrix] Data matrix.
+ def restore
+ begin
+ data_matrix_restored = (@eigenvector_matrix * @data_transformed_matrix.transpose).transpose # reverse pca
+ # reverse scaling
+ (0..data_matrix_restored.size2-1).each { |i|
+ data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] *= @stdev[i] unless @stdev[i] == 0.0
+ data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] += @mean[i]
+ }
+ data_matrix_restored
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message}"
+ LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ end
+ end
+
+ end
+
+ end
# Gauss kernel
# @return [Float]
@@ -257,16 +905,85 @@ module OpenTox
d = 1.0 - x.to_f
Math.exp(-(d*d)/(2*sigma*sigma))
end
+
+ # For symbolic features
+ # @param [Array] Array to test, must indicate non-occurrence with 0.
+ # @return [Boolean] Whether the feature is singular or non-occurring or present everywhere.
+ def self.isnull_or_singular?(array)
+ nr_zeroes = array.count(0)
+ return (nr_zeroes == array.size) || # remove non-occurring feature
+ (nr_zeroes == array.size-1) || # remove singular feature
+ (nr_zeroes == 0) # also remove feature present everywhere
+ end
+
+ # Numeric value test
+ # @param[Object] value
+ # @return [Boolean] Whether value is a number
+ def self.numeric?(value)
+ true if Float(value) rescue false
+ end
+
+ # For symbolic features
+ # @param [Array] Array to test, must indicate non-occurrence with 0.
+ # @return [Boolean] Whether the feature has variance zero.
+ def self.zero_variance?(array)
+ return (array.to_scale.variance_sample == 0.0)
+ end
- # Median of an array
+ # Sum of an array for Arrays.
# @param [Array] Array with values
- # @return [Float] Median
- def self.median(array)
- return nil if array.empty?
- array.sort!
- m_pos = array.size / 2
- return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos])/2
+ # @return [Integer] Sum of size of values
+ def self.sum_size(array)
+ sum=0
+ array.each { |e| sum += e.size }
+ return sum
+ end
+
+ # Minimum Frequency
+ # @param [Integer] per-mil value
+ # return [Integer] min-frequency
+ def self.min_frequency(training_dataset,per_mil)
+ minfreq = per_mil * training_dataset.compounds.size.to_f / 1000.0 # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST
+ minfreq = 2 unless minfreq > 2
+ Integer (minfreq)
end
+ # Effect calculation for classification
+ # @param [Array] Array of occurrences per class in the form of Enumerables.
+ # @param [Array] Array of database instance counts per class.
+ def self.effect(occurrences, db_instances)
+ max=0
+ max_value=0
+ nr_o = self.sum_size(occurrences)
+ nr_db = db_instances.to_scale.sum
+
+ occurrences.each_with_index { |o,i| # fminer outputs occurrences sorted reverse by activity.
+ actual = o.size.to_f/nr_o
+ expected = db_instances[i].to_f/nr_db
+ if actual > expected
+ if ((actual - expected) / actual) > max_value
+ max_value = (actual - expected) / actual # 'Schleppzeiger'
+ max = i
+ end
+ end
+ }
+ max
+ end
+
+ # Returns Support value of an fingerprint
+ # @param [Hash] params Keys: `:compound_features_hits, :weights, :training_compound_features_hits, :features, :nr_hits:, :mode` are required
+ # return [Numeric] Support value
+ def self.p_sum_support(params)
+ p_sum = 0.0
+ params[:features].each{|f|
+ compound_hits = params[:compound_features_hits][f]
+ neighbor_hits = params[:training_compound_features_hits][f]
+ p_sum += eval("(Algorithm.gauss(params[:weights][f]) * ([compound_hits, neighbor_hits].compact.#{params[:mode]}))")
+ }
+ p_sum
+ end
+
end
end
+
+
diff --git a/lib/compound.rb b/lib/compound.rb
index f631ca9..e7b4da0 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -21,6 +21,17 @@ module OpenTox
else
@inchi = RestClientWrapper.get(@uri, :accept => 'chemical/x-inchi').to_s.chomp if @uri
end
+
+ if @uri and @inchi.to_s.size==0
+ LOGGER.warn "REMOVE ABMIT HACK: no inchi for compound "+@uri.to_s+", load via smiles"
+ @inchi = Compound.smiles2inchi(Compound.smiles(@uri))
+ end
+ end
+
+ # request smiles from compound service via accept header
+ # @return smiles as string
+ def self.smiles(uri)
+ RestClientWrapper.get(uri, :accept => 'chemical/x-daylight-smiles').to_s.chomp
end
# Create a compound from smiles string
@@ -153,6 +164,35 @@ module OpenTox
#smarts_array.collect { |s| s if match?(s)}.compact
end
+ # Match_hits an array of smarts strings, returns hash with matching smarts as key and number of non-unique hits as value
+ # @example
+ # compound = OpenTox::Compound.from_name("Benzene")
+ # compound.match(['cc','cN']) # returns ['cc']
+ # @param [Array] smarts_array Array with Smarts strings
+ # @return [Hash] Hash with matching smarts as key and number of non-unique hits as value
+ def match_hits(smarts_array)
+ # avoid recreation of OpenBabel objects
+ obconversion = OpenBabel::OBConversion.new
+ obmol = OpenBabel::OBMol.new
+ obconversion.set_in_format('inchi')
+ obconversion.read_string(obmol,@inchi)
+ smarts_pattern = OpenBabel::OBSmartsPattern.new
+ smarts_hits = {}
+ #LOGGER.debug "dv ----------- obmol #{Compound.new(@inchi).to_smiles}"
+ smarts_array.collect do |smarts|
+ #LOGGER.debug "dv ----------- all smarts #{smarts}"
+ smarts_pattern.init(smarts)
+ if smarts_pattern.match(obmol)
+ hits = smarts_pattern.get_map_list
+ smarts_hits[smarts] = hits.size
+ end
+ end
+ #LOGGER.debug "dv ----------- smarts => hits #{smarts_hits}"
+ return smarts_hits
+ #smarts_array.collect { |s| s if match?(s)}.compact
+ end
+
+
# Get URI of compound image with highlighted fragments
#
# @param [Array] activating Array with activating Smarts strings
diff --git a/lib/config/config_ru.rb b/lib/config/config_ru.rb
index 93df867..dc04263 100644
--- a/lib/config/config_ru.rb
+++ b/lib/config/config_ru.rb
@@ -19,6 +19,7 @@ set :lock, true
end
use Rack::ShowExceptions
+=begin
if defined?(MAIL)
# monkeypatch with the original method
@@ -50,3 +51,4 @@ if defined?(MAIL)
mail.smtp MAIL
end
end
+=end
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 4005c1c..5ebad0f 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -102,6 +102,13 @@ module OpenTox
copy parser.load_uri(subjectid)
end
+ def load_sdf(sdf,subjectid=nil)
+ save(subjectid) unless @uri # get a uri for creating features
+ parser = Parser::Sdf.new
+ parser.dataset = self
+ parser.load_sdf(sdf)
+ end
+
# Load CSV string (format specification: http://toxcreate.org/help)
# - loads data_entries, compounds, features
# - sets metadata (warnings) for parser errors
@@ -149,7 +156,11 @@ module OpenTox
# Load and return only compound URIs from the dataset service
# @return [Array] Compound URIs in the dataset
def load_compounds(subjectid=nil)
- RestClientWrapper.get(File.join(uri,"compounds"),{:accept=> "text/uri-list", :subjectid => subjectid}).to_s.each_line do |compound_uri|
+ # fix for datasets like http://apps.ideaconsult.net:8080/ambit2/dataset/272?max=50
+ u = URI::parse(uri)
+ u.path = File.join(u.path,"compounds")
+ u = u.to_s
+ RestClientWrapper.get(u,{:accept=> "text/uri-list", :subjectid => subjectid}).to_s.each_line do |compound_uri|
@compounds << compound_uri.chomp
end
@compounds.uniq!
@@ -167,19 +178,15 @@ module OpenTox
@features
end
- def feature_classes(feature, subjectid=nil)
- if Feature.find(feature, subjectid).feature_type == "classification"
- classes = []
- @data_entries.each do |c,e|
- e[feature].each { |v| classes << v.to_s }
- end
- classes.uniq.sort
- else
- nil
- end
+ # returns the accept_values of a feature, i.e. the classification domain / all possible feature values
+ # @param [String] feature the URI of the feature
+ # @return [Array] return array with strings, nil if value is not set (e.g. when feature is numeric)
+ def accept_values(feature)
+ accept_values = features[feature][OT.acceptValue]
+ accept_values.sort if accept_values
+ accept_values
end
-=begin
# Detect feature type(s) in the dataset
# @return [String] `classification", "regression", "mixed" or unknown`
def feature_type(subjectid=nil)
@@ -193,6 +200,7 @@ module OpenTox
"unknown"
end
end
+=begin
=end
# Get Spreadsheet representation
@@ -229,6 +237,30 @@ module OpenTox
s.to_rdfxml
end
+ # Get SDF representation of compounds
+ # @return [String] SDF representation
+ def to_sdf
+ sum=""
+ @compounds.each{ |c|
+ sum << OpenTox::Compound.new(c).to_inchi
+ sum << OpenTox::Compound.new(c).to_sdf.sub(/\n\$\$\$\$/,'')
+ @data_entries[c].each{ |f,v|
+ sum << "> <\"#{f}\">\n"
+ sum << v.join(", ")
+ sum << "\n\n"
+ }
+ sum << "$$$$\n"
+ }
+ sum
+ end
+
+ def to_urilist
+ @compounds.inject { |sum, c|
+ sum << OpenTox::Compound.new(c).uri
+ sum + "\n"
+ }
+ end
+
# Get name (DC.title) of a feature
# @param [String] feature Feature URI
# @return [String] Feture title
@@ -307,6 +339,12 @@ module OpenTox
end
end
end
+ # set feature metadata in new dataset accordingly (including accept values)
+ features.each do |f|
+ self.features[f].each do |k,v|
+ dataset.features[f][k] = v
+ end
+ end
dataset.add_metadata(metadata)
dataset.save(subjectid)
dataset
@@ -369,12 +407,14 @@ module OpenTox
end
def value(compound)
- @data_entries[compound.uri].collect{|f,v| v.first if f.match(/prediction/)}.compact.first
+ v = nil
+ v = @data_entries[compound.uri].collect{|f,v| v.first if f.match(/value/)}.compact.first if @data_entries[compound.uri]
+ v = nil if v.is_a? Array and v.empty?
+ v
end
def confidence(compound)
- feature_uri = @data_entries[compound.uri].collect{|f,v| f if f.match(/prediction/)}.compact.first
- @features[feature_uri][OT.confidence]
+ @data_entries[compound.uri].collect{|f,v| v.first if f.match(/confidence/)}.compact.first if @data_entries[compound.uri]
end
def descriptors(compound)
@@ -382,12 +422,11 @@ module OpenTox
end
def measured_activities(compound)
- source = @metadata[OT.hasSource]
- @data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact.flatten
+ @data_entries[compound.uri].collect{|f,v| v if f.match(/#{@metadata[OT.hasSource]}/)}.compact.flatten if @data_entries[compound.uri]
end
def neighbors(compound)
- @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact
+ @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact if @data_entries[compound.uri]
end
# def errors(compound)
diff --git a/lib/environment.rb b/lib/environment.rb
index ffc4f60..6d1bb85 100644
--- a/lib/environment.rb
+++ b/lib/environment.rb
@@ -27,7 +27,7 @@ end
Ohm.connect :thread_safe => true
# load mail settings for error messages
-load File.join config_dir,"mail.rb" if File.exists?(File.join config_dir,"mail.rb")
+#load File.join config_dir,"mail.rb" if File.exists?(File.join config_dir,"mail.rb")
logfile = "#{LOG_DIR}/#{ENV["RACK_ENV"]}.log"
#LOGGER = OTLogger.new(logfile,'daily') # daily rotation
@@ -40,8 +40,8 @@ else
end
# Regular expressions for parsing classification data
-TRUE_REGEXP = /^(true|active|1|1.0|tox)$/i
-FALSE_REGEXP = /^(false|inactive|0|0.0|low tox)$/i
+TRUE_REGEXP = /^(true|active|1|1.0|tox|activating)$/i
+FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating)$/i
# Task durations
DEFAULT_TASK_MAX_DURATION = 36000
diff --git a/lib/feature.rb b/lib/feature.rb
index b631e46..4ba58ce 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -2,6 +2,8 @@ module OpenTox
class Feature
include OpenTox
+ attr_accessor :subjectid
+
# Find a feature
# @param [String] uri Feature URI
# @return [OpenTox::Task] Feature object
@@ -13,31 +15,31 @@ module OpenTox
else
feature.add_metadata Parser::Owl::Dataset.new(uri).load_metadata
end
+ feature.subjectid = subjectid
feature
end
-
+
# provides feature type, possible types are "regression" or "classification"
# @return [String] feature type, unknown if OT.isA property is unknown/ not set
def feature_type
+ raise OpenTox::BadRequestError.new("rdf type of feature '"+uri.to_s+"' not set") unless metadata[RDF.type]
if metadata[RDF.type].flatten.include?(OT.NominalFeature)
"classification"
elsif metadata[RDF.type].flatten.include?(OT.NumericFeature)
"regression"
- else
- #"unknown"
- metadata[RDF.type].inspect
- end
-=begin
- case metadata[RDF.type]
- when /NominalFeature/
- "classification"
- when /NumericFeature/
- "regression"
+ elsif metadata[OWL.sameAs]
+ metadata[OWL.sameAs].each do |f|
+ begin
+ type = Feature.find(f, subjectid).feature_type
+ return type unless type=="unknown"
+ rescue => ex
+ LOGGER.warn "could not load same-as-feature '"+f.to_s+"' for feature '"+uri.to_s+"' : "+ex.message.to_s
+ end
+ end
+ "unknown"
else
"unknown"
end
-=end
end
-
end
end
diff --git a/lib/helper.rb b/lib/helper.rb
index 995f3e9..33774b4 100644
--- a/lib/helper.rb
+++ b/lib/helper.rb
@@ -81,7 +81,7 @@ helpers do
when "css"
@accept = 'text/css'
else
- # halt 404, "File format #{extension} not supported."
+ # raise OpenTox::NotFoundError.new "File format #{extension} not supported."
end
end
end
@@ -94,4 +94,3 @@ before do
protected!(@subjectid)
end
end
-
diff --git a/lib/model.rb b/lib/model.rb
index 048de85..ff0ce0e 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -23,7 +23,7 @@ module OpenTox
# Generic OpenTox model class for all API compliant services
class Generic
include Model
-
+
# Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error
# @param [String] uri Model URI
# @return [OpenTox::Model::Generic] Model instance
@@ -34,42 +34,75 @@ module OpenTox
raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0
model
end
-
- # provides feature type, possible types are "regression" or "classification"
- # @return [String] feature type, "unknown" if type could not be estimated
+
+ # provides feature type, possible types are "regression" or "classification"
+ # @return [String] feature type, "unknown" if type could not be estimated
def feature_type(subjectid=nil)
- return @feature_type if @feature_type
-
- # dynamically perform restcalls if necessary
- load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
- algorithm = OpenTox::Algorithm::Generic.find(@metadata[OT.algorithm], subjectid)
- algorithm_title = algorithm ? algorithm.metadata[DC.title] : nil
- algorithm_type = algorithm ? algorithm.metadata[RDF.type] : nil
- dependent_variable = OpenTox::Feature.find( @metadata[OT.dependentVariables],subjectid )
- dependent_variable_type = dependent_variable ? dependent_variable.feature_type : nil
- type_indicators = [dependent_variable_type, @metadata[RDF.type], @metadata[DC.title], @uri, algorithm_type, algorithm_title].flatten
- type_indicators.each do |type|
- case type
- when /(?i)classification/
- @feature_type = "classification"
- break
- when /(?i)regression/
- @feature_type = "regression"
- end
+ unless @feature_type
+ load_predicted_variables( subjectid ) unless @predicted_variable
+ @feature_type = OpenTox::Feature.find( @predicted_variable, subjectid ).feature_type
end
- raise "unknown model "+type_indicators.inspect unless @feature_type
@feature_type
end
-
- end
+ def predicted_variable( subjectid )
+ load_predicted_variables( subjectid ) unless @predicted_variable
+ @predicted_variable
+ end
+
+ def predicted_variables( subjectid )
+ load_predicted_variables( subjectid, false ) unless @predicted_variables
+ @predicted_variables
+ end
+
+ def predicted_confidence( subjectid )
+ load_predicted_variables( subjectid ) unless @predicted_confidence
+ @predicted_confidence
+ end
+
+ private
+ def load_predicted_variables( subjectid=nil, use_confidence=true )
+ load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
+ if @metadata[OT.predictedVariables]
+ predictedVariables = @metadata[OT.predictedVariables]
+ if predictedVariables.is_a?(Array)
+ if (predictedVariables.size==1)
+ @predicted_variable = predictedVariables[0]
+ elsif (predictedVariables.size>=2)
+ # PENDING identify confidence
+ if use_confidence
+ conf_index = -1
+ predictedVariables.size.times do |i|
+ f = OpenTox::Feature.find(predictedVariables[i], subjectid)
+ conf_index = i if f.metadata[DC.title]=~/(?i)confidence/
+ end
+ raise "could not estimate predicted variable from model: '"+uri.to_s+
+ "', number of predicted-variables==2, but no confidence found" if conf_index==-1
+ end
+ if (predictedVariables.size==2) && use_confidence
+ @predicted_variable = predictedVariables[1-conf_index]
+ @predicted_confidence = predictedVariables[conf_index]
+ else
+ @predicted_variables = predictedVariables
+ end
+ else
+ raise "could not estimate predicted variable from model: '"+uri.to_s+"', number of predicted-variables == 0"
+ end
+ else
+ raise "could not estimate predicted variable from model: '"+uri.to_s+"', predicted-variables is no array"
+ end
+ end
+ raise "could not estimate predicted variable from model: '"+uri.to_s+"'" unless (@predicted_variable || @predicted_variables)
+ end
+ end
+
# Lazy Structure Activity Relationship class
class Lazar
- include Model
include Algorithm
+ include Model
- attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid
+ attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :nr_hits, :transform, :conf_stdev, :prediction_min_max
def initialize(uri=nil)
@@ -78,7 +111,7 @@ module OpenTox
else
super CONFIG[:services]["opentox-model"]
end
-
+
@metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
@features = []
@@ -86,12 +119,18 @@ module OpenTox
@activities = {}
@p_values = {}
@fingerprints = {}
+ @value_map = {}
+ @prediction_min_max = []
@feature_calculation_algorithm = "Substructure.match"
@similarity_algorithm = "Similarity.tanimoto"
@prediction_algorithm = "Neighbors.weighted_majority_vote"
-
+
+ @nr_hits = false
@min_sim = 0.3
+ @prop_kernel = false
+ @transform = { "class" => "NOP" }
+ @conf_stdev = false
end
@@ -111,13 +150,25 @@ module OpenTox
# Create a new lazar model
# @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar)
# @return [OpenTox::Model::Lazar] lazar model
- def self.create(params)
+ def self.create(params, waiting_task=nil )
subjectid = params[:subjectid]
lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar")
- model_uri = lazar_algorithm.run(params)
+ model_uri = lazar_algorithm.run(params, waiting_task)
OpenTox::Model::Lazar.find(model_uri, subjectid)
end
+ def run( params, accept_header=nil, waiting_task=nil )
+ unless accept_header
+ if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)
+ accept_header = 'application/x-yaml'
+ else
+ accept_header = 'application/rdf+xml'
+ end
+ end
+ LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s
+ RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s
+ end
+
# Get a parameter value
# @param [String] param Parameter name
# @return [String] Parameter value
@@ -131,6 +182,7 @@ module OpenTox
# @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
# @return [OpenTox::Dataset] Dataset with predictions
def predict_dataset(dataset_uri, subjectid=nil, waiting_task=nil)
+
@prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
@prediction_dataset.add_metadata({
OT.hasSource => @uri,
@@ -150,7 +202,7 @@ module OpenTox
LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message
end
end
- @prediction_dataset.save(subjectid)
+ #@prediction_dataset.save(subjectid)
@prediction_dataset
end
@@ -164,49 +216,52 @@ module OpenTox
features = {}
unless @prediction_dataset
- #@prediction_dataset = cached_prediction
- #return @prediction_dataset if cached_prediction
@prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
@prediction_dataset.add_metadata( {
OT.hasSource => @uri,
DC.creator => @uri,
- # TODO: fix dependentVariable
DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
} )
end
- return @prediction_dataset if database_activity(subjectid)
-
- neighbors
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
-
- prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s)
- # TODO: fix dependentVariable
- @prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri
-
- if @neighbors.size == 0
- @prediction_dataset.add_feature(prediction_feature_uri, {
- RDF.type => [OT.MeasuredFeature],
- OT.hasSource => @uri,
- DC.creator => @uri,
- DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
- OT.error => "No similar compounds in training dataset.",
- OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
- })
- @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
+ if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "regression"
+ all_activities = []
+ all_activities = @activities.values.flatten.collect! { |i| i.to_f }
+ @prediction_min_max[0] = (all_activities.to_scale.min/2)
+ @prediction_min_max[1] = (all_activities.to_scale.max*2)
+ end
- else
- @prediction_dataset.add_feature(prediction_feature_uri, {
- RDF.type => [OT.ModelPrediction],
- OT.hasSource => @uri,
- DC.creator => @uri,
- DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
- OT.prediction => prediction[:prediction],
- OT.confidence => prediction[:confidence],
- OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
- })
- @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
+ unless database_activity(subjectid) # adds database activity to @prediction_dataset
+
+ neighbors
+ prediction = eval("#{@prediction_algorithm} ( { :neighbors => @neighbors,
+ :compound => @compound,
+ :features => @features,
+ :p_values => @p_values,
+ :fingerprints => @fingerprints,
+ :similarity_algorithm => @similarity_algorithm,
+ :prop_kernel => @prop_kernel,
+ :value_map => @value_map,
+ :nr_hits => @nr_hits,
+ :conf_stdev => @conf_stdev,
+ :prediction_min_max => @prediction_min_max,
+ :transform => @transform } ) ")
+
+ value_feature_uri = File.join( @uri, "predicted", "value")
+ confidence_feature_uri = File.join( @uri, "predicted", "confidence")
+
+ @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] unless @prediction_dataset.metadata[OT.dependentVariables]
+ @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] unless @prediction_dataset.metadata[OT.predictedVariables]
+
+ if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "classification"
+ @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]]
+ else
+ @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction]
+ end
+ @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence]
+ @prediction_dataset.features[value_feature_uri][DC.title] = @prediction_dataset.metadata[DC.title]
+ @prediction_dataset.features[confidence_feature_uri][DC.title] = "Confidence"
if verbose
if @feature_calculation_algorithm == "Substructure.match"
@@ -260,7 +315,6 @@ module OpenTox
end
n+=1
end
- # what happens with dataset predictions?
end
end
@@ -268,33 +322,49 @@ module OpenTox
@prediction_dataset
end
- # Find neighbors and store them as object variable
- def neighbors
+
+ # Find neighbors and store them as object variable, access all compounds for that.
+ def neighbors
@compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
-
@neighbors = []
- @fingerprints.each do |training_compound,training_features|
- sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
- if sim > @min_sim
- @activities[training_compound].each do |act|
- @neighbors << {
- :compound => training_compound,
- :similarity => sim,
- :features => training_features,
- :activity => act
- }
- end
- end
+ @fingerprints.keys.each do |training_compound| # AM: access all compounds
+ add_neighbor @fingerprints[training_compound].keys, training_compound
end
+ end
+ # Adds a neighbor to @neighbors if it passes the similarity threshold.
+ def add_neighbor(training_features, training_compound)
+ compound_features_hits = {}
+ training_compound_features_hits = {}
+ if @nr_hits
+ compound_features_hits = @compound.match_hits(@compound_features)
+ training_compound_features_hits = @fingerprints[training_compound]
+ #LOGGER.debug "dv ------------ training_compound_features_hits:#{training_compound_features_hits.class} #{training_compound_features_hits}"
+ end
+ params = {}
+ params[:nr_hits] = @nr_hits
+ params[:compound_features_hits] = compound_features_hits
+ params[:training_compound_features_hits] = training_compound_features_hits
+
+ sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, params)")
+ if sim > @min_sim
+ @activities[training_compound].each do |act|
+ @neighbors << {
+ :compound => training_compound,
+ :similarity => sim,
+ :features => training_features,
+ :activity => act
+ }
+ end
+ end
end
# Find database activities and store them in @prediction_dataset
# @return [Boolean] true if compound has databasse activities, false if not
def database_activity(subjectid)
if @activities[@compound.uri]
- @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act }
+ @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], @value_map[act] }
@prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset])
@prediction_dataset.save(subjectid)
true
@@ -303,6 +373,35 @@ module OpenTox
end
end
+ def prediction_features
+ [prediction_value_feature,prediction_confidence_feature]
+ end
+
+ def prediction_value_feature
+ dependent_uri = @metadata[OT.dependentVariables].first
+ feature = OpenTox::Feature.new File.join( @uri, "predicted", "value")
+ feature.add_metadata( {
+ RDF.type => [OT.ModelPrediction],
+ OT.hasSource => @uri,
+ DC.creator => @uri,
+ DC.title => URI.decode(File.basename( dependent_uri )),
+ OWL.sameAs => dependent_uri
+ })
+ feature
+ end
+
+ def prediction_confidence_feature
+ dependent_uri = @metadata[OT.dependentVariables].first
+ feature = OpenTox::Feature.new File.join( @uri, "predicted", "confidence")
+ feature.add_metadata( {
+ RDF.type => [OT.ModelPrediction],
+ OT.hasSource => @uri,
+ DC.creator => @uri,
+ DC.title => "#{URI.decode(File.basename( dependent_uri ))} confidence"
+ })
+ feature
+ end
+
# Save model at model service
def save(subjectid)
self.uri = RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => subjectid})
diff --git a/lib/opentox-ruby.rb b/lib/opentox-ruby.rb
index ae05cb2..1fa2a86 100644
--- a/lib/opentox-ruby.rb
+++ b/lib/opentox-ruby.rb
@@ -1,4 +1,4 @@
-['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment'].each do |lib|
+['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment', 'gsl'].each do |lib|
require lib
end
diff --git a/lib/overwrite.rb b/lib/overwrite.rb
index df4e1b7..393e8e7 100644
--- a/lib/overwrite.rb
+++ b/lib/overwrite.rb
@@ -50,7 +50,8 @@ class Sinatra::Base
halt task.http_code,task.to_yaml # PENDING differs from task-webservice
when /html/
response['Content-Type'] = "text/html"
- halt task.http_code,OpenTox.text_to_html(task.to_yaml, @subjectid)
+ # html -> task created with html form -> redirect to task uri
+ redirect task.uri
else # default /uri-list/
response['Content-Type'] = "text/uri-list"
if task.completed?
diff --git a/lib/parser.rb b/lib/parser.rb
index 5f847c3..d0975af 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -40,8 +40,9 @@ module OpenTox
else
file = Tempfile.new("ot-rdfxml")
if @dataset
- # do not concat /metadata to uri string, this would not work for dataset/R401577?max=3
uri = URI::parse(@uri)
+ #remove params like dataset/<id>?max=3 from uri, not needed for metadata
+ uri.query = nil
uri.path = File.join(uri.path,"metadata")
uri = uri.to_s
else
@@ -56,7 +57,7 @@ module OpenTox
`rapper -i rdfxml -o ntriples #{file.path} 2>/dev/null`.each_line do |line|
triple = line.to_triple
if triple[0] == @uri
- if triple[1] == RDF.type # allow multiple types
+ if triple[1] == RDF.type || triple[1]==OT.predictedVariables # allow multiple types
@metadata[triple[1]] = [] unless @metadata[triple[1]]
@metadata[triple[1]] << triple[2].split('^^').first
else
@@ -75,6 +76,9 @@ module OpenTox
@metadata[OT.parameters] << parameter
end
end
+ #@metadata.each do |k,v|
+ #v = v.first if v and v.size == 1
+ #end
@metadata
end
@@ -82,7 +86,11 @@ module OpenTox
# @param [String] rdf
# @param [String] type of the info (e.g. OT.Task, OT.ErrorReport) needed to get the subject-uri
# @return [Owl] with uri and metadata set
- def self.from_rdf( rdf, type )
+ def self.from_rdf( rdf, type, allow_multiple = false )
+
+ uris = Array.new
+ owls = Array.new
+
# write to file and read convert with rapper into tripples
file = Tempfile.new("ot-rdfxml")
file.puts rdf
@@ -95,20 +103,27 @@ module OpenTox
triples.each_line do |line|
triple = line.to_triple
if triple[1] == RDF['type'] and triple[2]==type
- raise "uri already set, two uris found with type: "+type.to_s if uri
+ if !allow_multiple
+ raise "uri already set, two uris found with type: "+type.to_s if uri
+ end
uri = triple[0]
+ uris << uri
end
end
File.delete(file.path)
+
# load metadata
- metadata = {}
- triples.each_line do |line|
- triple = line.to_triple
- metadata[triple[1]] = triple[2].split('^^').first if triple[0] == uri and triple[1] != RDF['type']
- end
- owl = Owl::Generic.new(uri)
- owl.metadata = metadata
- owl
+ uris.each { |uri|
+ metadata = {}
+ triples.each_line do |line|
+ triple = line.to_triple
+ metadata[triple[1]] = triple[2].split('^^').first if triple[0] == uri and triple[1] != RDF['type']
+ end
+ owl = Owl::Generic.new(uri)
+ owl.metadata = metadata
+ owls << owl
+ }
+ allow_multiple ? owls : owls[0]
end
# Generic parser for all OpenTox classes
@@ -228,7 +243,12 @@ module OpenTox
file = Tempfile.new("ot-rdfxml")
# do not concat /features to uri string, this would not work for dataset/R401577?max=3
uri = URI::parse(@uri)
- uri.path = File.join(uri.path,"features")
+ # PENDING
+ # ambit models return http://host/dataset/id?feature_uris[]=sth but
+ # amibt dataset services does not support http://host/dataset/id/features?feature_uris[]=sth
+ # and features are not inlcuded in http://host/dataset/id/features
+ # -> load features from complete dataset
+ uri.path = File.join(uri.path,"features") unless @uri=~/\?(feature_uris|page|pagesize)/
uri = uri.to_s
file.puts OpenTox::RestClientWrapper.get uri,{:subjectid => subjectid,:accept => "application/rdf+xml"},nil,false
file.close
@@ -244,8 +264,13 @@ module OpenTox
File.delete(to_delete) if to_delete
statements.each do |triple|
if features.include? triple[0]
- @dataset.features[triple[0]] = {} unless @dataset.features[triple[0]]
- @dataset.features[triple[0]][triple[1]] = triple[2].split('^^').first
+ @dataset.features[triple[0]] = {} unless @dataset.features[triple[0]]
+ if triple[1] == RDF.type
+ @dataset.features[triple[0]][triple[1]] = [] unless @dataset.features[triple[0]][triple[1]]
+ @dataset.features[triple[0]][triple[1]] << triple[2].split('^^').first
+ else
+ @dataset.features[triple[0]][triple[1]] = triple[2].split('^^').first
+ end
end
end
@dataset.features
@@ -271,22 +296,39 @@ module OpenTox
@duplicates = {}
end
+ def detect_new_values(row, value_maps)
+ row.shift
+ row.each_index do |i|
+ value = row[i]
+ value_maps[i] = Hash.new if value_maps[i].nil?
+ value_maps[i][value].nil? ? value_maps[i][value]=0 : value_maps[i][value] += 1
+ end
+ value_maps
+ end
+
# Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)
# @param [Excel] book Excel workbook object (created with roo gem)
# @return [OpenTox::Dataset] Dataset object with Excel data
def load_spreadsheet(book)
book.default_sheet = 0
add_features book.row(1)
+ value_maps = Array.new
+ regression_features=Array.new
- # AM: fix mixed read in
- regression_features=false
2.upto(book.last_row) { |i|
row = book.row(i)
- regression_features = detect_regression_features row
- break if regression_features==true
+ value_maps = detect_new_values(row, value_maps)
+ value_maps.each_with_index { |vm,j|
+ if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer.
+ regression_features[j]=true
+ else
+ regression_features[j]=false
+ end
+ }
+ }
+ 2.upto(book.last_row) { |i|
+ add_values book.row(i), regression_features
}
-
- 2.upto(book.last_row) { |i| add_values book.row(i),regression_features }
warnings
@dataset
end
@@ -298,21 +340,27 @@ module OpenTox
row = 0
input = csv.split("\n")
add_features split_row(input.shift)
+ value_maps = Array.new
+ regression_features=Array.new
-
- # AM: fix mixed read in
- regression_features=false
input.each { |row|
row = split_row(row)
- regression_features = detect_regression_features row
- break if regression_features==true
+ value_maps = detect_new_values(row, value_maps)
+ value_maps.each_with_index { |vm,j|
+ if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer.
+ regression_features[j]=true
+ else
+ regression_features[j]=false
+ end
+ }
+ }
+ input.each { |row|
+ add_values split_row(row), regression_features
}
- input.each { |row| add_values split_row(row),regression_features }
warnings
@dataset
end
-
private
def warnings
@@ -354,20 +402,10 @@ module OpenTox
end
end
- def detect_regression_features row
- row.shift
- regression_features=false
- row.each_index do |i|
- value = row[i]
- type = feature_type(value)
- if type == OT.NumericFeature
- regression_features=true
- end
- end
- regression_features
- end
-
- def add_values(row, regression_features=false)
+ # Adds a row to a dataset
+ # @param Array A row split up as an array
+ # @param Array Indicator for regression for each field
+ def add_values(row, regression_features)
smiles = row.shift
compound = Compound.from_smiles(smiles)
@@ -381,27 +419,23 @@ module OpenTox
row.each_index do |i|
value = row[i]
feature = @features[i]
- type = feature_type(value)
+ type = nil
+ if (regression_features[i])
+ type = feature_type(value)
+ if type != OT.NumericFeature
+ raise "Error! Expected numeric values."
+ end
+ else
+ type = OT.NominalFeature
+ end
@feature_types[feature] << type
- if (regression_features)
+ case type
+ when OT.NumericFeature
val = value.to_f
- else
- case type
- when OT.NominalFeature
- case value.to_s
- when TRUE_REGEXP
- val = true
- when FALSE_REGEXP
- val = false
- end
- when OT.NumericFeature
- val = value.to_f
- when OT.StringFeature
- val = value.to_s
- @activity_errors << smiles+", "+row.join(", ")
- end
+ when OT.NominalFeature
+ val = value.to_s
end
if val!=nil
@dataset.add(compound.uri, feature, val)
@@ -413,26 +447,170 @@ module OpenTox
end
end
- def numeric?(value)
- true if Float(value) rescue false
+ def feature_type(value)
+ if OpenTox::Algorithm::numeric? value
+ return OT.NumericFeature
+ else
+ return OT.NominalFeature
+ end
+ end
+
+ def split_row(row)
+ row.chomp.gsub(/["']/,'').split(/\s*[,;\t]\s*/) # remove quotes
+ end
+
+ end
+
+ class Table
+
+ attr_accessor :data, :features, :compounds
+
+ def initialize
+ @data = {}
+ @activity_errors = []
+ end
+
+ def feature_values(feature)
+ @data.collect{|c, row| row[feature]}.uniq.compact
+ end
+
+ def feature_types(feature)
+ @data.collect{|c, row| feature_type(row[feature])}.uniq.compact
+ end
+
+ def features
+ @data.collect{|c,row| row.keys}.flatten.uniq
+ end
+
+ def clean_features
+ ignored_features = []
+ features.each do |feature|
+ if feature_values(feature).size > 5
+ if feature_types(feature).size == 1 and feature_types(feature).first == OT.NumericFeature
+ # REGRESSION
+ elsif feature_types(feature).include? OT.NumericFeature
+ @data.each{|c,row| row[feature] = nil unless OpenTox::Algorithm::numeric?(row[feature]) } # delete nominal features
+ @activity_errors << "Nominal feature values of #{feature} ignored (using numeric features for regression models)."
+ else
+ @activity_errors << "Feature #{feature} ignored (more than 5 nominal feature values and no numeric values)."
+ ignored_features << feature
+ next
+ end
+ elsif feature_values(feature).size <= 1
+ @activity_errors << "Feature #{feature} ignored (less than 2 feature values)."
+ ignored_features << feature
+ else
+ # CLASSIFICATION
+ end
+ end
+ ignored_features.each do |feature|
+ @data.each{ |c,row| row.delete feature }
+ end
+ @activity_errors
end
- def classification?(value)
- !value.to_s.strip.match(TRUE_REGEXP).nil? or !value.to_s.strip.match(FALSE_REGEXP).nil?
+ def add_to_dataset(dataset)
+ features.each do |feature_name|
+ feature_uri = File.join(dataset.uri,"feature",URI.encode(feature_name))
+ dataset.add_feature(feature_uri,{DC.title => feature_name})
+ end
+
+ @data.each do |compound,row|
+ unless row.empty?
+ row.each do |feature,value|
+ if OpenTox::Algorithm::numeric?(value)
+ value = value.to_f
+ elsif value.nil? or value.empty?
+ value = nil
+ else
+ value = value.to_s
+ end
+ feature_uri = File.join(dataset.uri,"feature",URI.encode(feature))
+ dataset.add(compound, feature_uri, value)
+ #dataset.features[feature_uri][RDF.type] = feature_types(feature)
+ #dataset.features[feature_uri][OT.acceptValue] = feature_values(feature)
+ if feature_types(feature).include? OT.NumericFeature
+ dataset.features[feature_uri][RDF.type] = [OT.NumericFeature]
+ else
+ dataset.features[feature_uri][RDF.type] = [OT.NominalFeature]
+ dataset.features[feature_uri][OT.acceptValue] = feature_values(feature)
+ end
+ end
+ end
+ end
end
+ private
+
def feature_type(value)
- if classification? value
- return OT.NominalFeature
- elsif numeric? value
+ if OpenTox::Algorithm::numeric? value
return OT.NumericFeature
else
- return OT.StringFeature
+ return OT.NominalFeature
end
end
+ end
+
+ # quick hack to enable sdf import via csv
+ # should be refactored
+ class Sdf
+
+ attr_accessor :dataset
+
+ def initialize
+ @data = {}
+
+ @compound_errors = []
+ @activity_errors = []
+ @duplicates = {}
+ end
+
+ def load_sdf(sdf)
+
+ obconversion = OpenBabel::OBConversion.new
+ obmol = OpenBabel::OBMol.new
+ obconversion.set_in_and_out_formats "sdf", "inchi"
+
+ table = Table.new
+
+ properties = []
+ sdf.each_line { |l| properties << l.to_s if l.match(/</) }
+ properties.uniq!
+ properties.sort!
+ properties.collect!{ |p| p.gsub(/<|>/,'').strip.chomp }
+
+ rec = 0
+ sdf.split(/\$\$\$\$\r*\n/).each do |s|
+ rec += 1
+ obconversion.read_string obmol, s
+ begin
+ inchi = obconversion.write_string(obmol).gsub(/\s/,'').chomp
+ @duplicates[inchi] = [] unless @duplicates[inchi]
+ @duplicates[inchi] << rec #inchi#+", "+row.join(", ")
+ compound = Compound.from_inchi inchi
+ rescue
+ @compound_errors << "Could not convert structure to InChI, all entries for this compound (record #{rec} have been ignored! \n#{s}"
+ next
+ end
+ row = {}
+ obmol.get_data.each { |d| row[d.get_attribute] = d.get_value if properties.include?(d.get_attribute) }
+ table.data[compound.uri] = row
+ end
+
+ # finda and remove ignored_features
+ @activity_errors = table.clean_features
+ table.add_to_dataset @dataset
+
+ warnings = ''
+ warnings += "<p>Incorrect Smiles structures (ignored):</p>" + @compound_errors.join("<br/>") unless @compound_errors.empty?
+ warnings += "<p>Irregular activities (ignored):</p>" + @activity_errors.join("<br/>") unless @activity_errors.empty?
+ duplicate_warnings = ''
+ @duplicates.each {|inchi,lines| duplicate_warnings << "<p>#{lines.join('<br/>')}</p>" if lines.size > 1 }
+ warnings += "<p>Duplicated structures (all structures/activities used for model building, please make sure, that the results were obtained from <em>independent</em> experiments):</p>" + duplicate_warnings unless duplicate_warnings.empty?
+
+ @dataset.metadata[OT.Warnings] = warnings
+ @dataset
- def split_row(row)
- row.chomp.gsub(/["']/,'').split(/\s*[,;]\s*/) # remove quotes
end
end
diff --git a/lib/rest_client_wrapper.rb b/lib/rest_client_wrapper.rb
index 747a353..6d25bb3 100644
--- a/lib/rest_client_wrapper.rb
+++ b/lib/rest_client_wrapper.rb
@@ -131,13 +131,14 @@ module OpenTox
raise "unknown content-type for task : '"+res.content_type.to_s+"'"+" base-uri: "+base_uri.to_s+" content: "+res[0..200].to_s
end
- LOGGER.debug "result is a task '"+task.uri.to_s+"', wait for completion"
+ #LOGGER.debug "result is a task '"+task.uri.to_s+"', wait for completion"
task.wait_for_completion waiting_task
unless task.completed? # maybe task was cancelled / error
if task.errorReport
received_error task.errorReport, task.http_code, nil, {:rest_uri => task.uri, :rest_code => task.http_code}
else
- raise "task status: '"+task.status.to_s+"' but errorReport nil"
+ raise "status of task '"+task.uri.to_s+"' is no longer running (hasStatus is '"+task.status+
+ "'), but it is neither completed nor has an errorReport"
end
end
diff --git a/lib/serializer.rb b/lib/serializer.rb
index e4cb541..3921784 100644
--- a/lib/serializer.rb
+++ b/lib/serializer.rb
@@ -17,6 +17,7 @@ module OpenTox
# this should come from opentox.owl
OT.Compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.Feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OT.Model => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.NominalFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.NumericFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.StringFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
@@ -27,6 +28,8 @@ module OpenTox
OT.Parameter => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.Task => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OTA.PatternMiningSupervised => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OTA.ClassificationLazySingleTarget => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OTA.RegressionLazySingleTarget => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
#classes for validation
OT.Validation => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
@@ -45,6 +48,10 @@ module OpenTox
OT.values => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
OT.algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
OT.parameters => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ OT.featureDataset => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ OT.dependentVariables => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ OT.predictedVariables => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
#object props for validation#
OT.model => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
@@ -103,6 +110,7 @@ module OpenTox
OT.precision => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.areaUnderRoc => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.weightedAreaUnderRoc => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
+ OT.weightedAccuracy => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.fMeasure => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.percentIncorrect => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.validationType => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
@@ -126,7 +134,7 @@ module OpenTox
OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
OT.paramScope => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
- OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
+ #OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
}
@data_entries = {}
@@ -157,23 +165,16 @@ module OpenTox
# Add a dataset
# @param [String] uri Dataset URI
def add_dataset(dataset)
-
@dataset = dataset.uri
-
@object[dataset.uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] }
-
add_metadata dataset.uri, dataset.metadata
-
dataset.compounds.each { |compound| add_compound compound }
-
dataset.features.each { |feature,metadata| add_feature feature,metadata }
-
dataset.data_entries.each do |compound,entry|
entry.each do |feature,values|
values.each { |value| add_data_entry compound,feature,value }
end
end
-
end
# Add a algorithm
@@ -188,6 +189,14 @@ module OpenTox
def add_model(uri,metadata)
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Model }] }
add_metadata uri, metadata
+ @object[metadata[OT.featureDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] }
+ @object[metadata[OT.trainingDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] }
+ @object[metadata[OT.dependentVariables]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] }
+ metadata[OT.predictedVariables].each{|feature| @object[feature] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }]}} #unless metadata[OT.predictedVariables].nil?
+ # TODO: add algorithms from parameters
+ @object["http://ot-dev.in-silico.ch/algorithm/fminer/bbrc"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] }
+ @object["http://ot-dev.in-silico.ch/algorithm/fminer/last"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] }
+ @object["http://ot-dev.in-silico.ch/algorithm/lazar"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] }
end
# Add a task
@@ -272,7 +281,7 @@ module OpenTox
@object[genid][name] = [{"type" => type(entry), "value" => entry }]
end
end
- elsif v.is_a? Array and u == RDF.type
+ elsif v.is_a? Array #and u == RDF.type
@object[uri] = {} unless @object[uri]
v.each do |value|
@object[uri][u] = [] unless @object[uri][u]
@@ -354,7 +363,8 @@ module OpenTox
# @return [text/plain] Object OWL-DL in RDF/XML format
def to_rdfxml
Tempfile.open("owl-serializer"){|f| f.write(self.to_ntriples); @path = f.path}
- `rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null`
+ # TODO: add base uri for ist services
+ `rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:ota="#{OTA.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null`
end
# Convert to JSON as specified in http://n2.talis.com/wiki/RDF_JSON_Specification
@@ -373,6 +383,8 @@ module OpenTox
XSD.boolean
elsif value.is_a? Float
XSD.float
+ elsif value.is_a? Integer
+ XSD.integer
else
XSD.string
end
@@ -383,6 +395,8 @@ module OpenTox
datatype = OT.NominalFeature
elsif value.is_a? Float
datatype = OT.NumericFeature
+ elsif value.is_a? Integer
+ datatype = OT.NumericFeature
else
datatype = OT.StringFeature
end
diff --git a/lib/task.rb b/lib/task.rb
index 19f42d6..00499fa 100644
--- a/lib/task.rb
+++ b/lib/task.rb
@@ -38,6 +38,7 @@ module OpenTox
task = Task.new(task_uri.chomp)
# measure current memory consumption
+=begin
memory = `free -m|sed -n '2p'`.split
free_memory = memory[3].to_i + memory[6].to_i # include cache
if free_memory < 20 # require at least 200 M free memory
@@ -56,6 +57,7 @@ module OpenTox
# return task
# #raise "Server too busy to start a new task"
#end
+=end
task_pid = Spork.spork(:logger => LOGGER) do
LOGGER.debug "Task #{task.uri} started #{Time.now}"
@@ -167,6 +169,10 @@ module OpenTox
@metadata[OT.hasStatus] == 'Running'
end
+ def queued?
+ @metadata[OT.hasStatus] == 'Queued'
+ end
+
def completed?
@metadata[OT.hasStatus] == 'Completed'
end
@@ -284,9 +290,10 @@ module OpenTox
raise "illegal task state, task is completed, resultURI is no URI: '"+@metadata[OT.resultURI].to_s+
"'" unless @metadata[OT.resultURI] and @metadata[OT.resultURI].to_s.uri? if completed?
if @http_code == 202
- raise "#{@uri}: illegal task state, code is 202, but hasStatus is not Running: '"+@metadata[OT.hasStatus]+"'" unless running?
+ raise "#{@uri}: illegal task state, code is 202, but hasStatus is not Running or Queued: '"+@metadata[OT.hasStatus]+"'" unless running? or queued?
elsif @http_code == 201
- raise "#{@uri}: illegal task state, code is 201, but hasStatus is not Completed: '"+@metadata[OT.hasStatus]+"'" unless completed?
+ # ignore hasStatus
+ # raise "#{@uri}: illegal task state, code is 201, but hasStatus is not Completed: '"+@metadata[OT.hasStatus]+"'" unless completed?
raise "#{@uri}: illegal task state, code is 201, resultURI is no task-URI: '"+@metadata[OT.resultURI].to_s+
"'" unless @metadata[OT.resultURI] and @metadata[OT.resultURI].to_s.uri?
end
diff --git a/lib/to-html.rb b/lib/to-html.rb
index 6785974..2979062 100644
--- a/lib/to-html.rb
+++ b/lib/to-html.rb
@@ -1,12 +1,12 @@
-OT_LOGO = "http://opentox.informatik.uni-freiburg.de/ot-logo.png"
+OT_LOGO = File.join(CONFIG[:services]["opentox-validation"],"resources/ot-logo.png")
class String
# encloses URI in text with with link tag
# @return [String] new text with marked links
def link_urls
- self.gsub(/(?i)http(s?):\/\/[^\r\n\s']*/, '<a href=\0>\0</a>')
+ self.gsub(/(?i)http(s?):\/\/[^\r\n\s']*/, '<a href="\0">\0</a>')
end
end
@@ -15,98 +15,123 @@ module OpenTox
# produces a html page for making web services browser friendly
# format of text (=string params) is preserved (e.g. line breaks)
# urls are marked as links
- # @example post params:
- # [ [ [:mandatory_param_1], [:mandatory_param_2], [:optional_param,"default_value"] ],
- # [ [:alteranative_mandatory_param_1], [:alteranative_mandatory_param_2] ]
- # ]
+ #
# @param [String] text this is the actual content,
# @param [optional,String] related_links info on related resources
# @param [optional,String] description general info
- # @param [optional,Array] post_params, array of arrays containing info on POST operation, see example
+ # @param [optional,Array] post_command, infos for the post operation, object defined below
# @return [String] html page
- def self.text_to_html( text, subjectid=nil, related_links=nil, description=nil, post_params=nil )
+ def self.text_to_html( text, subjectid=nil, related_links=nil, description=nil, post_command=nil )
# TODO add title as parameter
title = nil #$sinatra.url_for($sinatra.request.env['PATH_INFO'], :full) if $sinatra
html = "<html>"
html += "<title>"+title+"</title>" if title
- html += "<img src="+OT_LOGO+"><body>"
+ html += "<img src=\""+OT_LOGO+"\"><\/img><body>"
if AA_SERVER
user = OpenTox::Authorization.get_user(subjectid) if subjectid
html += "<pre><p align=\"right\">"
unless user
- html += "You are currently not logged in to "+$url_provider.url_for("",:full)+
- ", <a href="+$url_provider.url_for("/login",:full)+">login</a>"
+ html += "You are currently not signed in to "+$url_provider.url_for("",:full)+
+ ", <a href="+$url_provider.url_for("/sign_in",:full)+">sign in</a>"
else
- html += "You are logged in as '#{user}' to "+$url_provider.url_for("",:full)+
- ", <a href="+$url_provider.url_for("/logout",:full)+">logout</a>"
+ html += "You are signed in as '#{user}' to "+$url_provider.url_for("",:full)+
+ ", <a href="+$url_provider.url_for("/sign_out",:full)+">sign out</a>"
end
html += " </p></pre>"
end
html += "<h3>Description</h3><pre><p>"+description.link_urls+"</p></pre>" if description
html += "<h3>Related links</h3><pre><p>"+related_links.link_urls+"</p></pre>" if related_links
- if post_params
- html += "<h3>POST parameters</h3>"
- count = 0
- post_params.each do |p|
- html += "<pre><p>alternatively:</p></pre>" if count > 0
- html += "<pre><p><table><thead><tr><th>param</th><th>default_value</th></tr></thead>"
- p.each do |k,v|
- html += "<tr><th>"+k.to_s+"</th><th>"+(v!=nil ? v.to_s : "<i>mandatory</i>")+"</th></tr>"
- end
- html += "</table></p></pre>"
- count += 1
- end
+ if post_command
+ raise "not a post command" unless post_command.is_a?(OpenTox::PostCommand)
+ html += "<h3>POST command</h3>"
+ html += post_command.to_html
end
- html += "<h3>Content</h3>" if description || related_links
+ html += "<h3>Content</h3>" if description || related_links || post_command
html += "<pre><p style=\"padding:15px; border:10px solid \#5D308A\">"
html += text.link_urls
- html += "</p></pre></body><html>"
+ html += "</p></pre></body></html>"
html
end
- def self.login( msg=nil )
+ def self.sign_in( msg=nil )
html = "<html><title>Login</title><img src="+OT_LOGO+"><body>"
- html += "<form method='POST' action='"+$url_provider.url_for("/login",:full)+"'>"
+ html += "<form method='POST' action='"+$url_provider.url_for("/sign_in",:full)+"'>"
html += "<pre><p style=\"padding:15px; border:10px solid \#5D308A\">"
html += msg+"\n\n" if msg
- html += "Please login to "+$url_provider.url_for("",:full)+"\n\n"
+ html += "Please sign in to "+$url_provider.url_for("",:full)+"\n\n"
html += "<table border=0>"
html += "<tr><td>user:</td><td><input type='text' name='user' size='15' /></td></tr>"+
"<tr><td>password:</td><td><input type='password' name='password' size='15' /></td></tr>"+
#"<input type=hidden name=back_to value="+back_to.to_s+">"+
- "<tr><td><input type='submit' value='Login' /></td></tr>"
- html += "</table></p></pre></form></body><html>"
+ "<tr><td><input type='submit' value='Sign in' /></td></tr>"
+ html += "</table></p></pre></form></body></html>"
html
end
+
+ class PostAttribute
+ attr_accessor :name, :is_mandatory, :default, :description
+
+ def initialize(name, is_mandatory=true, default=nil, description=nil)
+ @name = name
+ @is_mandatory = is_mandatory
+ @default = default
+ @description = description
+ end
+ end
+
+ class PostCommand
+ attr_accessor :attributes, :uri, :name
+
+ def initialize( uri, name="Send" )
+ @uri = uri
+ @name = name
+ @attributes = []
+ end
+
+ def to_html
+ html = "<form method='POST' action='"+@uri.to_s+"'>"
+ html << "<pre><p>"
+ html << "<table border=0>"
+ #html << "<tr><td colspan='3'><i><sup>Mandatory params are marked with *.</sup></i></td></tr>"
+ attributes.each do |a|
+ mandatory_string = a.is_mandatory ? "*" : ""
+ html << "<tr><td>"+a.name.to_s+":"+mandatory_string+"</td>"
+ html << "<td><input type='text' name='"+a.name.to_s+
+ "' size='50' value='"+a.default.to_s+"'/></td>"
+ html << "<td><i><sup>"+a.description.to_s+"</sup></i></td></tr>"
+ end
+ html << "<tr><td colspan='3'><input type='submit' value='"+@name.to_s+"' /></td></tr>"
+ html << "</table></p></pre></form>"
+ html
+ end
+ end
end
-=begin
-get '/logout/?' do
+get '/sign_out/?' do
response.set_cookie("subjectid",{:value=>nil})
content_type "text/html"
- content = "Sucessfully logged out from "+$url_provider.url_for("",:full)
+ content = "Sucessfully signed out from "+$url_provider.url_for("",:full)
OpenTox.text_to_html(content)
end
-get '/login/?' do
+get '/sign_in/?' do
content_type "text/html"
- OpenTox.login
+ OpenTox.sign_in
end
-post '/login/?' do
+post '/sign_in/?' do
subjectid = OpenTox::Authorization.authenticate(params[:user], params[:password])
if (subjectid)
response.set_cookie("subjectid",{:value=>subjectid})
content_type "text/html"
- content = "Sucessfully logged in as '"+params[:user]+"' to "+$url_provider.url_for("",:full)
+ content = "Sucessfully signed in as '"+params[:user]+"' to "+$url_provider.url_for("",:full)
OpenTox.text_to_html(content,subjectid)
else
content_type "text/html"
- OpenTox.login("Login failed, please try again")
+ OpenTox.sign_in("Login failed, please try again")
end
end
-=end
diff --git a/lib/validation.rb b/lib/validation.rb
index d58d36e..646b076 100644
--- a/lib/validation.rb
+++ b/lib/validation.rb
@@ -36,6 +36,30 @@ module OpenTox
Validation.new(uri)
end
+ # creates a training test validation, waits until it finishes, may take some time
+ # @param [Hash] params (required:algorithm_uri,training_dataset_uri,prediction_feature,test_dataset_uri,optional:algorithm_params)
+ # @param [String,optional] subjectid
+ # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
+ # @return [OpenTox::Validation]
+ def self.create_training_test_validation( params, subjectid=nil, waiting_task=nil )
+ params[:subjectid] = subjectid if subjectid
+ uri = OpenTox::RestClientWrapper.post( File.join(CONFIG[:services]["opentox-validation"],"training_test_validation"),
+ params,{:content_type => "text/uri-list"},waiting_task )
+ Validation.new(uri)
+ end
+
+ # creates a bootstrapping validation, waits until it finishes, may take some time
+ # @param [Hash] params (required:algorithm_uri,dataset_uri,prediction_feature, optional:algorithm_params,random_seed(1))
+ # @param [String,optional] subjectid
+ # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
+ # @return [OpenTox::Validation]
+ def self.create_bootstrapping_validation( params, subjectid=nil, waiting_task=nil )
+ params[:subjectid] = subjectid if subjectid
+ uri = OpenTox::RestClientWrapper.post( File.join(CONFIG[:services]["opentox-validation"],"bootstrapping"),
+ params,{:content_type => "text/uri-list"},waiting_task )
+ Validation.new(uri)
+ end
+
# looks for report for this validation, creates a report if no report is found
# @param [String,optional] subjectid
# @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
@@ -61,34 +85,27 @@ module OpenTox
@metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid, :accept => "application/x-yaml"}))
end
- # PENDING: creates summary as used for ToxCreate
- def summary
- if @metadata[OT.classificationStatistics]
- res = {
- :nr_predictions => @metadata[OT.numInstances] - @metadata[OT.numUnpredicted],
- :correct_predictions => @metadata[OT.classificationStatistics][OT.percentCorrect],
- :weighted_area_under_roc => @metadata[OT.classificationStatistics][OT.weightedAreaUnderRoc],
- }
- @metadata[OT.classificationStatistics][OT.classValueStatistics].each do |s|
- if s[OT.classValue].to_s=="true"
- res[:true_positives] = s[OT.numTruePositives]
- res[:false_positives] = s[OT.numFalsePositives]
- res[:true_negatives] = s[OT.numTrueNegatives]
- res[:false_negatives] = s[OT.numFalseNegatives]
- res[:sensitivity] = s[OT.truePositiveRate]
- res[:specificity] = s[OT.trueNegativeRate]
- break
+ # returns confusion matrix as array, predicted values are in rows
+ # example:
+ # [[nil,"active","moderate","inactive"],["active",1,3,99],["moderate",4,2,8],["inactive",3,8,6]]
+ # -> 99 inactive compounds have been predicted as active
+ def confusion_matrix
+ raise "no classification statistics, probably a regression valdiation" unless @metadata[OT.classificationStatistics]
+ matrix = @metadata[OT.classificationStatistics][OT.confusionMatrix][OT.confusionMatrixCell]
+ values = matrix.collect{|cell| cell[OT.confusionMatrixPredicted]}.uniq
+ table = [[nil]+values]
+ values.each do |c|
+ table << [c]
+ values.each do |r|
+ matrix.each do |cell|
+ if cell[OT.confusionMatrixPredicted]==c and cell[OT.confusionMatrixActual]==r
+ table[-1] << cell[OT.confusionMatrixValue].to_f
+ break
+ end
end
end
- res
- elsif @metadata[OT.regressionStatistics]
- {
- :nr_predictions => @metadata[OT.numInstances] - @metadata[OT.numUnpredicted],
- :r_square => @metadata[OT.regressionStatistics][OT.rSquare],
- :root_mean_squared_error => @metadata[OT.regressionStatistics][OT.rootMeanSquaredError],
- :mean_absolute_error => @metadata[OT.regressionStatistics][OT.meanAbsoluteError],
- }
end
+ table
end
end
@@ -147,9 +164,9 @@ module OpenTox
@metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid, :accept => "application/x-yaml"}))
end
- # PENDING: creates summary as used for ToxCreate
- def summary( subjectid=nil )
- Validation.from_cv_statistics( @uri, subjectid ).summary
+ # returns a Validation object containing the statistics of the crossavlidation
+ def statistics( subjectid=nil )
+ Validation.from_cv_statistics( @uri, subjectid )
end
end
@@ -198,7 +215,6 @@ module OpenTox
# @param [String,optional] subjectid
# @return [OpenTox::CrossvalidationReport]
def self.find( uri, subjectid=nil )
- # PENDING load report data?
OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid})
rep = CrossvalidationReport.new(uri)
rep.load_metadata( subjectid )
@@ -227,6 +243,54 @@ module OpenTox
end
end
+
+ class AlgorithmComparisonReport
+ include OpenTox
+
+ # finds AlgorithmComparisonReport via uri, raises error if not found
+ # @param [String] uri
+ # @param [String,optional] subjectid
+ # @return [OpenTox::CrossvalidationReport]
+ def self.find( uri, subjectid=nil )
+ OpenTox::RestClientWrapper.get(uri,{:subjectid => subjectid})
+ rep = AlgorithmComparisonReport.new(uri)
+ rep.load_metadata( subjectid )
+ rep
+ end
+
+ # finds AlgorithmComparisonReport for a particular crossvalidation
+ # @param [String] crossvalidation uri
+ # @param [String,optional] subjectid
+ # @return [OpenTox::AlgorithmComparisonReport] nil if no report found
+ def self.find_for_crossvalidation( crossvalidation_uri, subjectid=nil )
+ uris = RestClientWrapper.get(File.join(CONFIG[:services]["opentox-validation"],
+ "/report/algorithm_comparison?crossvalidation="+crossvalidation_uri), {:subjectid => subjectid}).chomp.split("\n")
+ uris.size==0 ? nil : AlgorithmComparisonReport.new(uris[-1])
+ end
+
+ # creates a crossvalidation report via crossvalidation
+ # @param [Hash] crossvalidation uri_hash, see example
+ # @param [String,optional] subjectid
+ # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
+ # @return [OpenTox::AlgorithmComparisonReport]
+ # example for hash:
+ # { :lazar-bbrc => [ http://host/validation/crossvalidation/x1, http://host/validation/crossvalidation/x2 ],
+ # :lazar-last => [ http://host/validation/crossvalidation/xy, http://host/validation/crossvalidation/xy ] }
+ def self.create( crossvalidation_uri_hash, subjectid=nil, waiting_task=nil )
+ identifier = []
+ validation_uris = []
+ crossvalidation_uri_hash.each do |id, uris|
+ uris.each do |uri|
+ identifier << id
+ validation_uris << uri
+ end
+ end
+ uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-validation"],"/report/algorithm_comparison"),
+ { :validation_uris => validation_uris.join(","), :identifier => identifier.join(","), :subjectid => subjectid }, {}, waiting_task )
+ AlgorithmComparisonReport.new(uri)
+ end
+ end
+
class QMRFReport
include OpenTox