summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2010-11-19 16:53:21 +0100
committerChristoph Helma <helma@in-silico.ch>2010-11-19 16:53:21 +0100
commitf8552611c2dbe25d76474f51e4e895bf9c2b5c5e (patch)
treeda145cd1d69adc4cdb8d299f0cea2e0810b88eaf
parent91c95f8dc8f60a8f0029b970ef881eecee28401b (diff)
lazar predictions for toxcreate working
-rw-r--r--Rakefile3
-rw-r--r--lib/algorithm.rb154
-rw-r--r--lib/dataset.rb72
-rw-r--r--lib/environment.rb8
-rw-r--r--lib/feature.rb10
-rw-r--r--lib/model.rb466
-rw-r--r--lib/opentox-ruby-api-wrapper.rb4
-rw-r--r--lib/opentox.rb10
-rw-r--r--lib/ot-logger.rb48
-rw-r--r--lib/overwrite.rb50
-rw-r--r--lib/parser.rb4
-rw-r--r--lib/rest_client_wrapper.rb16
-rw-r--r--lib/serializer.rb23
-rw-r--r--lib/task.rb278
-rw-r--r--lib/validation.rb64
15 files changed, 720 insertions, 490 deletions
diff --git a/Rakefile b/Rakefile
index 18f24bd..6838e75 100644
--- a/Rakefile
+++ b/Rakefile
@@ -21,14 +21,11 @@ begin
"rack-flash",
"nokogiri",
"rubyzip",
- #"builder",
"roo",
"spreadsheet",
"google-spreadsheet-ruby",
"tmail",
"rinruby",
- #"rdf",
- #"rdf-raptor",
"rjb"
].each { |dep| gem.add_dependency dep }
[ "dm-core",
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 711f63b..a6fa4a7 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -1,3 +1,9 @@
+# R integration
+# workaround to initialize R non-interactively (former rinruby versions did this by default)
+# avoids compiling R with X
+R = nil
+require "rinruby"
+
module OpenTox
# Wrapper for OpenTox Algorithms
@@ -6,8 +12,10 @@ module OpenTox
include OpenTox
# Execute algorithm with parameters, please consult the OpenTox API and the webservice documentation for acceptable parameters
+ # @param [optional,Hash] params Algorithm parameters
+ # @return [String] URI of new resource (dataset, model, ...)
def run(params=nil)
- RestClientWrapper.post(@uri, params)
+ RestClientWrapper.post(@uri, params).to_s
end
# Get OWL-DL representation in RDF/XML format
@@ -23,9 +31,11 @@ module OpenTox
include Algorithm
end
+ # Fminer algorithms (https://github.com/amaunz/fminer2)
module Fminer
include Algorithm
+ # Backbone Refinement Class mining (http://bbrc.maunz.de/)
class BBRC
include Fminer
# Initialize bbrc algorithm
@@ -35,6 +45,7 @@ module OpenTox
end
end
+ # LAtent STructure Pattern Mining (http://last-pm.maunz.de)
class LAST
include Fminer
# Initialize last algorithm
@@ -58,15 +69,15 @@ module OpenTox
# Utility methods without dedicated webservices
+ # Similarity calculations
module Similarity
include Algorithm
# Tanimoto similarity
- #
# @param [Array] features_a Features of first compound
# @param [Array] features_b Features of second compound
# @param [optional, Hash] weights Weights for all features
- # @return [Float] (Wighted) tanimoto similarity
+ # @return [Float] (Weighted) tanimoto similarity
def self.tanimoto(features_a,features_b,weights=nil)
common_features = features_a & features_b
all_features = (features_a + features_b).uniq
@@ -86,15 +97,19 @@ module OpenTox
end
# Euclidean similarity
- def self.euclidean(prop_a,prop_b,weights=nil)
- common_properties = prop_a.keys & prop_b.keys
+ # @param [Hash] properties_a Properties of first compound
+ # @param [Hash] properties_b Properties of second compound
+ # @param [optional, Hash] weights Weights for all properties
+ # @return [Float] (Weighted) euclidean similarity
+ def self.euclidean(properties_a,properties_b,weights=nil)
+ common_properties = properties_a.keys & properties_b.keys
if common_properties.size > 1
dist_sum = 0
common_properties.each do |p|
if weights
- dist_sum += ( (prop_a[p] - prop_b[p]) * Algorithm.gauss(weights[p]) )**2
+ dist_sum += ( (properties_a[p] - properties_b[p]) * Algorithm.gauss(weights[p]) )**2
else
- dist_sum += (prop_a[p] - prop_b[p])**2
+ dist_sum += (properties_a[p] - properties_b[p])**2
end
end
1/(1+Math.sqrt(dist_sum))
@@ -103,14 +118,129 @@ module OpenTox
end
end
end
+
+ module Neighbors
+
+ # Classification with majority vote from neighbors weighted by similarity
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity`
+ # @param [optional] params Ignored (only for compatibility with local_svm_regression)
+ # @return [Hash] Hash with keys `:prediction, :confidence`
+ def self.weighted_majority_vote(neighbors,params={})
+ conf = 0.0
+ confidence = 0.0
+ neighbors.each do |neighbor|
+ case neighbor[:activity].to_s
+ when 'true'
+ conf += Algorithm.gauss(neighbor[:similarity])
+ when 'false'
+ conf -= Algorithm.gauss(neighbor[:similarity])
+ end
+ end
+ if conf > 0.0
+ prediction = true
+ elsif conf < 0.0
+ prediction = false
+ else
+ prediction = nil
+ end
+ confidence = conf/neighbors.size if neighbors.size > 0
+ {:prediction => prediction, :confidence => confidence.abs}
+ end
+
+ # Local support vector regression from neighbors
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @return [Hash] Hash with keys `:prediction, :confidence`
+ def self.local_svm_regression(neighbors,params )
+ sims = neighbors.collect{ |n| n[:similarity] } # similarity values between query and neighbors
+ conf = sims.inject{|sum,x| sum + x }
+ acts = neighbors.collect do |n|
+ act = n[:activity]
+ Math.log10(act.to_f)
+ end # activities of neighbors for supervised learning
+
+ neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches
+ gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
+ if neighbor_matches.size == 0
+ raise "No neighbors found"
+ else
+ # gram matrix
+ (0..(neighbor_matches.length-1)).each do |i|
+ gram_matrix[i] = [] unless gram_matrix[i]
+ # upper triangle
+ ((i+1)..(neighbor_matches.length-1)).each do |j|
+ sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
+ gram_matrix[i][j] = Algorithm.gauss(sim)
+ gram_matrix[j] = [] unless gram_matrix[j]
+ gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
+ end
+ gram_matrix[i][i] = 1.0
+ end
+
+ LOGGER.debug gram_matrix.to_yaml
+
+ @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
+ @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
+ LOGGER.debug "Setting R data ..."
+ # set data
+ @r.gram_matrix = gram_matrix.flatten
+ @r.n = neighbor_matches.size
+ @r.y = acts
+ @r.sims = sims
+
+ LOGGER.debug "Preparing R data ..."
+ # prepare data
+ @r.eval "y<-as.vector(y)"
+ @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
+ @r.eval "sims<-as.vector(sims)"
+
+ # model + support vectors
+ LOGGER.debug "Creating SVM model ..."
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
+ @r.eval "sv<-as.vector(SVindex(model))"
+ @r.eval "sims<-sims[sv]"
+ @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
+ LOGGER.debug "Predicting ..."
+ @r.eval "p<-predict(model,sims)[1,1]"
+ prediction = 10**(@r.p.to_f)
+ LOGGER.debug "Prediction is: '" + @prediction.to_s + "'."
+ @r.quit # free R
+ end
+ confidence = conf/neighbors.size if neighbors.size > 0
+ {:prediction => prediction, :confidence => confidence}
+
+ end
+
+ end
+
+ module Substructure
+ include Algorithm
+ # Substructure matching
+ # @param [OpenTox::Compound] compound Compound
+ # @param [Array] features Array with Smarts strings
+ # @return [Array] Array with matching Smarts
+ def self.match(compound,features)
+ compound.match(features)
+ end
+ end
+
+ module Dataset
+ include Algorithm
+ # API should match Substructure.match
+ def features(dataset_uri,compound_uri)
+ end
+ end
- # Gauss kernel
- def self.gauss(sim, sigma = 0.3)
- x = 1.0 - sim
- Math.exp(-(x*x)/(2*sigma*sigma))
- end
+ # Gauss kernel
+ # @return [Float]
+ def self.gauss(x, sigma = 0.3)
+ d = 1.0 - x
+ Math.exp(-(d*d)/(2*sigma*sigma))
+ end
# Median of an array
+ # @param [Array] Array with values
+ # @return [Float] Median
def self.median(array)
return nil if array.empty?
array.sort!
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 6e270e9..4737ea1 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -43,7 +43,7 @@ module OpenTox
# Get all datasets from a service
# @param [optional,String] uri URI of the dataset service, defaults to service specified in configuration
- # @return [Array] Array of dataset object with all data
+ # @return [Array] Array of dataset object without data (use one of the load_* methods to pull data from the server)
def self.all(uri=CONFIG[:services]["opentox-dataset"])
RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.each_line.collect{|u| Dataset.new(u)}
end
@@ -55,6 +55,10 @@ module OpenTox
copy YAML.load(yaml)
end
+ def load_rdfxml(rdfxml)
+ load_rdfxml_file Tempfile.open("ot-rdfxml"){|f| f.write(rdfxml)}.path
+ end
+
# Load RDF/XML representation from a file
# @param [String] file File with RDF/XML representation of the dataset
# @return [OpenTox::Dataset] Dataset object with RDF/XML data
@@ -129,8 +133,6 @@ module OpenTox
# @return [String] `classification", "regression", "mixed" or unknown`
def feature_type
feature_types = @features.collect{|f,metadata| metadata[OT.isA]}.uniq
- LOGGER.debug "FEATURES"
- LOGGER.debug feature_types.inspect
if feature_types.size > 1
"mixed"
else
@@ -145,12 +147,18 @@ module OpenTox
end
end
- # Get Excel representation
+ # Get Spreadsheet representation
# @return [Spreadsheet::Workbook] Workbook which can be written with the spreadsheet gem (data_entries only, metadata will will be discarded))
def to_spreadsheet
Serializer::Spreadsheets.new(self).to_spreadsheet
end
+ # Get Excel representation (alias for to_spreadsheet)
+ # @return [Spreadsheet::Workbook] Workbook which can be written with the spreadsheet gem (data_entries only, metadata will will be discarded))
+ def to_xls
+ to_spreadsheet
+ end
+
# Get CSV string representation (data_entries only, metadata will be discarded)
# @return [String] CSV representation
def to_csv
@@ -180,6 +188,10 @@ module OpenTox
@features[feature][DC.title]
end
+ def title
+ @metadata[DC.title]
+ end
+
# Insert a statement (compound_uri,feature_uri,value)
# @example Insert a statement (compound_uri,feature_uri,value)
# dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", true
@@ -224,11 +236,18 @@ module OpenTox
# TODO: rewrite feature URI's ??
@compounds.uniq!
if @uri
- RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml)
+ if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host))
+ RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml)
+ else
+ File.open("ot-post-file.rdf","w+") { |f| f.write(self.to_rdfxml); @path = f.path }
+ task_uri = RestClient.post(@uri, {:file => File.new(@path)},{:accept => "text/uri-list"}).to_s.chomp
+ #task_uri = `curl -X POST -H "Accept:text/uri-list" -F "file=@#{@path};type=application/rdf+xml" http://apps.ideaconsult.net:8080/ambit2/dataset`
+ Task.find(task_uri).wait_for_completion
+ self.uri = RestClientWrapper.get(task_uri,:accept => 'text/uri-list')
+ end
else
# create dataset if uri is empty
self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{}).to_s.chomp
- RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml)
end
@uri
end
@@ -252,4 +271,45 @@ module OpenTox
end
end
end
+
+ # Class with special methods for lazar prediction datasets
+ class LazarPrediction < Dataset
+
+ # Find a prediction dataset and load all data.
+ # @param [String] uri Prediction dataset URI
+ # @return [OpenTox::Dataset] Prediction dataset object with all data
+ def self.find(uri)
+ prediction = LazarPrediction.new(uri)
+ prediction.load_all
+ prediction
+ end
+
+ def value(compound)
+ @data_entries[compound.uri].collect{|f,v| v.first if f.match(/prediction/)}.compact.first
+ end
+
+ def confidence(compound)
+ feature_uri = @data_entries[compound.uri].collect{|f,v| f if f.match(/prediction/)}.compact.first
+ @features[feature_uri][OT.confidence]
+ end
+
+ def descriptors(compound)
+ @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/descriptor/)}.compact if @data_entries[compound.uri]
+ end
+
+ def measured_activities(compound)
+ source = @metadata[OT.hasSource]
+ @data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact
+ end
+
+ def neighbors(compound)
+ @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact
+ end
+
+# def errors(compound)
+# features = @data_entries[compound.uri].keys
+# features.collect{|f| @features[f][OT.error]}.join(" ") if features
+# end
+
+ end
end
diff --git a/lib/environment.rb b/lib/environment.rb
index d66b062..4f1cc80 100644
--- a/lib/environment.rb
+++ b/lib/environment.rb
@@ -1,4 +1,3 @@
-require "ot-logger"
# set default environment
ENV['RACK_ENV'] = 'production' unless ENV['RACK_ENV']
@@ -45,8 +44,8 @@ end
load File.join config_dir,"mail.rb" if File.exists?(File.join config_dir,"mail.rb")
logfile = "#{LOG_DIR}/#{ENV["RACK_ENV"]}.log"
-#LOGGER = MyLogger.new(logfile,'daily') # daily rotation
-LOGGER = MyLogger.new(logfile) # no rotation
+#LOGGER = OTLogger.new(logfile,'daily') # daily rotation
+LOGGER = OTLogger.new(logfile) # no rotation
LOGGER.formatter = Logger::Formatter.new #this is neccessary to restore the formating in case active-record is loaded
if CONFIG[:logger] and CONFIG[:logger] == "debug"
LOGGER.level = Logger::DEBUG
@@ -60,11 +59,12 @@ FALSE_REGEXP = /^(false|inactive|0|0.0)$/i
# Task durations
DEFAULT_TASK_MAX_DURATION = 36000
-EXTERNAL_TASK_MAX_DURATION = 36000
+#EXTERNAL_TASK_MAX_DURATION = 36000
# OWL Namespaces
class OwlNamespace
+ attr_accessor :uri
def initialize(uri)
@uri = uri
end
diff --git a/lib/feature.rb b/lib/feature.rb
index 13d97a2..9e28077 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -1,5 +1,15 @@
module OpenTox
class Feature
include OpenTox
+
+ def self.find(uri)
+ feature = Feature.new uri
+ if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host))
+ feature.add_metadata YAML.load(RestClientWrapper.get(uri,:accept => "application/x-yaml"))
+ else
+ feature.add_metadata Parser::Owl::Dataset.new(uri).load_metadata
+ end
+ feature
+ end
end
end
diff --git a/lib/model.rb b/lib/model.rb
index 63013cb..c6a2cf4 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -4,6 +4,9 @@ module OpenTox
include OpenTox
+ # Run a model with parameters
+ # @param [Hash] params Parameters for OpenTox model
+ # @return [text/uri-list] Task or resource URI
def run(params)
if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)
accept = 'application/x-yaml'
@@ -11,47 +14,25 @@ module OpenTox
accept = 'application/rdf+xml'
end
begin
- params[:acccept] = accept
- #TODO fix: REstClientWrapper does not accept accept header
- #RestClientWrapper.post(@uri,params)#,{:accept => accept})
- `curl -X POST -H "Accept:#{accept}" #{params.collect{|k,v| "-d #{k}=#{v}"}.join(" ")} #{@uri}`.to_s.chomp
+ RestClientWrapper.post(@uri,{:accept => accept},params).to_s
rescue => e
LOGGER.error "Failed to run #{@uri} with #{params.inspect} (#{e.inspect})"
raise "Failed to run #{@uri} with #{params.inspect}"
end
end
-
-=begin
- def classification?
- #TODO replace with request to ontology server
- if @metadata[DC.title] =~ /(?i)classification/
- return true
- elsif @metadata[DC.title] =~ /(?i)regression/
- return false
- elsif @uri =~/ntua/ and @metadata[DC.title] =~ /mlr/
- return false
- elsif @uri =~/tu-muenchen/ and @metadata[DC.title] =~ /regression|M5P|GaussP/
- return false
- elsif @uri =~/ambit2/ and @metadata[DC.title] =~ /pKa/ || @metadata[DC.title] =~ /Regression|Caco/
- return false
- elsif @uri =~/majority/
- return (@uri =~ /class/) != nil
- else
- raise "unknown model, uri:'"+@uri+"' title:'"+@metadata[DC.title]+"'"
- end
- end
-=end
+ # Generic OpenTox model class for all API compliant services
class Generic
include Model
end
+ # Lazy Structure Activity Relationship class
class Lazar
include Model
+ include Algorithm
- #attr_accessor :prediction_type, :feature_type, :features, :effects, :activities, :p_values, :fingerprints, :parameters
- attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :parameters, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm
+ attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim
def initialize(uri=nil)
@@ -61,7 +42,6 @@ module OpenTox
super CONFIG[:services]["opentox-model"]
end
- # TODO: fix metadata, add parameters
@metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
@features = []
@@ -70,284 +50,192 @@ module OpenTox
@p_values = {}
@fingerprints = {}
- @feature_calculation_algorithm = "substructure_match"
- @similarity_algorithm = "weighted_tanimoto"
- @prediction_algorithm = "weighted_majority_vote"
+ @feature_calculation_algorithm = "Substructure.match"
+ @similarity_algorithm = "Similarity.tanimoto"
+ @prediction_algorithm = "Neighbors.weighted_majority_vote"
@min_sim = 0.3
end
- def self.find(uri)
- YAML.load RestClientWrapper.get(uri,:content_type => 'application/x-yaml')
+ # Get URIs of all lazar models
+ # @return [Array] List of lazar model URIs
+ def self.all
+ RestClientWrapper.get(CONFIG[:services]["opentox-model"]).to_s.split("\n")
end
- def self.create_from_dataset(dataset_uri,feature_dataset_uri,prediction_feature=nil)
- training_activities = OpenTox::Dataset.find(dataset_uri)
- training_features = OpenTox::Dataset.find(feature_dataset_uri)
- unless prediction_feature # try to read prediction_feature from dataset
- raise "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
- prediction_feature = training_activities.features.keys.first
- params[:prediction_feature] = prediction_feature
- end
- lazar = Lazar.new
- training_features = OpenTox::Dataset.new(feature_dataset_uri)
- case training_features.feature_type
- when "classification"
- lazar.similarity_algorithm = "weighted_tanimoto"
- when "regression"
- lazar.similarity_algorithm = "weighted_euclid"
- end
+ # Find a lazar model
+ # @param [String] uri Model URI
+ # @return [OpenTox::Model::Lazar] lazar model
+ def self.find(uri)
+ YAML.load RestClientWrapper.get(uri,:accept => 'application/x-yaml')
end
- def self.create(dataset_uri,prediction_feature=nil,feature_generation_uri=File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"),params=nil)
-
- training_activities = OpenTox::Dataset.find(dataset_uri)
-
- unless prediction_feature # try to read prediction_feature from dataset
- raise "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
- prediction_feature = training_activities.features.keys.first
- params[:prediction_feature] = prediction_feature
- end
-
- lazar = Lazar.new
- params[:feature_generation_uri] = feature_generation_uri
- feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s
- training_features = OpenTox::Dataset.find(feature_dataset_uri)
- raise "Dataset #{feature_dataset_uri} not found or empty." if training_features.nil?
-
- # sorted features for index lookups
- lazar.features = training_features.features.sort if training_features.feature_type == "regression"
-
- training_features.data_entries.each do |compound,entry|
- lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound]
- entry.keys.each do |feature|
- case training_features.feature_type
- when "fminer"
- # fingerprints are sets
- smarts = training_features.features[feature][OT.smarts]
- lazar.fingerprints[compound] << smarts
- unless lazar.features.include? smarts
- lazar.features << smarts
- lazar.p_values[smarts] = training_features.features[feature][OT.p_value]
- lazar.effects[smarts] = training_features.features[feature][OT.effect]
- end
- when "classification"
- # fingerprints are sets
- if entry[feature].flatten.size == 1
- lazar.fingerprints[compound] << feature if entry[feature].flatten.first.match(TRUE_REGEXP)
- lazar.features << feature unless lazar.features.include? feature
- else
- LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
- end
- when "regression"
- # fingerprints are arrays
- if entry[feature].flatten.size == 1
- lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first
- else
- LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
- end
- end
- end
-
- lazar.activities[compound] = [] unless lazar.activities[compound]
- training_activities.data_entries[compound][params[:prediction_feature]].each do |value|
- case value.to_s
- when "true"
- lazar.activities[compound] << true
- when "false"
- lazar.activities[compound] << false
- else
- lazar.activities[compound] << value.to_f
- lazar.prediction_type = "regression"
- end
- end
- end
-
- if feature_generation_uri.match(/fminer/)
- lazar.feature_calculation_algorithm = "substructure_match"
- else
- halt 404, "External feature generation services not yet supported"
- end
-
- lazar.metadata[OT.dependentVariables] = params[:prediction_feature]
- lazar.metadata[OT.trainingDataset] = dataset_uri
- lazar.metadata[OT.featureDataset] = feature_dataset_uri
+ # Create a new lazar model
+ # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar)
+ # @return [OpenTox::Model::Lazar] lazar model
+ def self.create(params)
+ lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar")
+ model_uri = lazar_algorithm.run(params)
+ OpenTox::Model::Lazar.find(model_uri)
+ end
- lazar.parameters = {
- "dataset_uri" => dataset_uri,
- "prediction_feature" => prediction_feature,
- "feature_generation_uri" => feature_generation_uri
- }
-
- model_uri = lazar.save
- LOGGER.info model_uri + " created #{Time.now}"
- model_uri
+=begin
+ # Create a new lazar model and return task
+ # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar)
+ # @return [OpenTox::Task] Task for lazar model creation
+ def self.create_task(params)
+ task_uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), {}, params, false)
+ Task.find(task_uri)
+ #model_uri = lazar_algorithm.run(params)
+ #OpenTox::Model::Lazar.new(model_uri)
+ end
+=end
+ def parameter(param)
+ @metadata[OT.parameters].collect{|p| p[OT.paramValue] if p[DC.title] == param}.compact.first
end
def predict_dataset(dataset_uri)
@prediction_dataset = Dataset.create
@prediction_dataset.add_metadata({
- OT.hasSource => @lazar.uri,
- DC.creator => @lazar.uri,
- DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] ))
+ OT.hasSource => @uri,
+ DC.creator => @uri,
+ DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
+ OT.parameters => [{DC.title => "dataset_uri", OT.paramValue => dataset_uri}]
})
- @prediction_dataset.add_parameters({"dataset_uri" => dataset_uri})
- Dataset.new(dataset_uri).load_compounds.each do |compound_uri|
+ d = Dataset.new(dataset_uri)
+ d.load_compounds
+ d.compounds.each do |compound_uri|
predict(compound_uri,false)
end
@prediction_dataset.save
- @prediction_dataset.uri
+ @prediction_dataset
end
+ # Predict a compound
+ # @param [String] compound_uri Compound URI
+ # @param [optinal,Boolean] verbose Verbose prediction (output includes neighbors and features)
+ # @return [OpenTox::Dataset] Dataset with prediction
def predict(compound_uri,verbose=false)
@compound = Compound.new compound_uri
+ features = {}
unless @prediction_dataset
+ #@prediction_dataset = cached_prediction
+ #return @prediction_dataset if cached_prediction
@prediction_dataset = Dataset.create
@prediction_dataset.add_metadata( {
- OT.hasSource => @lazar.uri,
- DC.creator => @lazar.uri,
- DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] ))
+ OT.hasSource => @uri,
+ DC.creator => @uri,
+ # TODO: fix dependentVariable
+ DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
+ OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
} )
- @prediction_dataset.add_parameters( {"compound_uri" => compound_uri} )
end
- neighbors
- eval @prediction_algorithm
-
- if @prediction
+ return @prediction_dataset if database_activity
- feature_uri = File.join( @prediction_dataset.uri, "feature", @prediction_dataset.compounds.size)
- @prediction_dataset.add @compound.uri, feature_uri, @prediction
+ neighbors
+ prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
+
+ prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s)
+ # TODO: fix dependentVariable
+ @prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri
+
+ if @neighbors.size == 0
+ @prediction_dataset.add_feature(prediction_feature_uri, {
+ OT.hasSource => @uri,
+ DC.creator => @uri,
+ DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
+ OT.error => "No similar compounds in training dataset.",
+ OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
+ })
+ @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
- feature_metadata = @prediction_dataset.metadata
- feature_metadata[DC.title] = File.basename(@metadata[OT.dependentVariables])
- feature_metadata[OT.prediction] = @prediction
- feature_metadata[OT.confidence] = @confidence
- @prediction_dataset.add_feature(feature_uri, feature_metadata)
+ else
+ @prediction_dataset.add_feature(prediction_feature_uri, {
+ OT.hasSource => @uri,
+ DC.creator => @uri,
+ DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
+ OT.prediction => prediction[:prediction],
+ OT.confidence => prediction[:confidence],
+ OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
+ })
+ @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
if verbose
- if @compound_features
+ if @feature_calculation_algorithm == "Substructure.match"
+ f = 0
+ @compound_features.each do |feature|
+ feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s)
+ features[feature] = feature_uri
+ @prediction_dataset.add_feature(feature_uri, {
+ OT.smarts => feature,
+ OT.p_value => @p_values[feature],
+ OT.effect => @effects[feature]
+ })
+ @prediction_dataset.add @compound.uri, feature_uri, true
+ f+=1
+ end
+ else
@compound_features.each do |feature|
+ features[feature] = feature
@prediction_dataset.add @compound.uri, feature, true
end
end
n = 0
- @neighbors.sort{|a,b| a[:similarity] <=> b[:similarity]}.each do |neighbor|
- neighbor_uri = File.join( @prediction_dataset.uri, "feature/neighbor", n )
- @prediction_dataset.add @compound.uri, neighbor_uri, true
- @prediction_dataset.add_feature(neighbor, {
+ @neighbors.each do |neighbor|
+ neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s )
+ @prediction_dataset.add_feature(neighbor_uri, {
OT.compound => neighbor[:compound],
OT.similarity => neighbor[:similarity],
OT.activity => neighbor[:activity]
})
+ @prediction_dataset.add @compound.uri, neighbor_uri, true
+ f = 0 unless f
+ neighbor[:features].each do |feature|
+ if @feature_calculation_algorithm == "Substructure.match"
+ feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature]
+ else
+ feature_uri = feature
+ end
+ @prediction_dataset.add neighbor[:compound], feature_uri, true
+ unless features.has_key? feature
+ features[feature] = feature_uri
+ @prediction_dataset.add_feature(feature_uri, {
+ OT.smarts => feature,
+ OT.p_value => @p_values[feature],
+ OT.effect => @effects[feature]
+ })
+ f+=1
+ end
+ end
n+=1
end
+ # what happens with dataset predictions?
end
end
- @prediction_dataset.save
- @prediction_dataset.uri
- end
-
- def weighted_majority_vote
- conf = 0.0
- @neighbors.each do |neighbor|
- case neighbor[:activity].to_s
- when 'true'
- conf += OpenTox::Algorithm.gauss(neighbor[:similarity])
- when 'false'
- conf -= OpenTox::Algorithm.gauss(neighbor[:similarity])
- end
- end
- if conf > 0.0
- @prediction = true
- elsif conf < 0.0
- @prediction = false
- else
- @prediction = nil
- end
- @confidence = conf/@neighbors.size if @neighbors.size > 0
- end
-
- def local_svm_regression
- sims = @neighbors.collect{ |n| n[:similarity] } # similarity values between query and neighbors
- conf = sims.inject{|sum,x| sum + x }
- acts = @neighbors.collect do |n|
- act = n[:activity]
- # TODO: check this in model creation
- raise "0 values not allowed in training dataset. log10 is calculated internally." if act.to_f == 0
- Math.log10(act.to_f)
- end # activities of neighbors for supervised learning
-
- neighbor_matches = @neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches
- gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
- if neighbor_matches.size == 0
- raise "No neighbors found"
- else
- # gram matrix
- (0..(neighbor_matches.length-1)).each do |i|
- gram_matrix[i] = []
- # lower triangle
- (0..(i-1)).each do |j|
- sim = OpenTox::Algorithm.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], @lazar.p_values)
- gram_matrix[i] << OpenTox::Algorithm.gauss(sim)
- end
- # diagonal element
- gram_matrix[i][i] = 1.0
- # upper triangle
- ((i+1)..(neighbor_matches.length-1)).each do |j|
- sim = OpenTox::Algorithm.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], @lazar.p_values) # double calculation?
- gram_matrix[i] << OpenTox::Algorithm.gauss(sim)
- end
- end
- @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
- @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
- LOGGER.debug "Setting R data ..."
- # set data
- @r.gram_matrix = gram_matrix.flatten
- @r.n = neighbor_matches.size
- @r.y = acts
- @r.sims = sims
-
- LOGGER.debug "Preparing R data ..."
- # prepare data
- @r.eval "y<-as.vector(y)"
- @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
- @r.eval "sims<-as.vector(sims)"
-
- # model + support vectors
- LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
- @r.eval "sv<-as.vector(SVindex(model))"
- @r.eval "sims<-sims[sv]"
- @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
- LOGGER.debug "Predicting ..."
- @r.eval "p<-predict(model,sims)[1,1]"
- @prediction = 10**(@r.p.to_f)
- LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
- @r.quit # free R
- end
- @confidence = conf/@neighbors.size if @neighbors.size > 0
-
+ @prediction_dataset.save
+ @prediction_dataset
end
+ # Find neighbors and store them as object variable
def neighbors
- @compound_features = eval(@feature_calculation_algorithm) if @feature_calculation_algorithm
+ @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
- @neighbors = {}
- @activities.each do |training_compound,activities|
- @training_compound = training_compound
- sim = eval(@similarity_algorithm)
+ @neighbors = []
+ @fingerprints.each do |training_compound,training_features|
+ #@activities.each do |training_compound,activities|
+ sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
if sim > @min_sim
- activities.each do |act|
+ @activities[training_compound].each do |act|
@neighbors << {
- :compound => @training_compound,
+ :compound => training_compound,
:similarity => sim,
- :features => @fingerprints[@training_compound],
+ :features => training_features,
:activity => act
}
end
@@ -356,55 +244,63 @@ module OpenTox
end
- def tanimoto
- OpenTox::Algorithm.tanimoto(@compound_features,@fingerprints[@training_compound])
- end
-
- def weighted_tanimoto
- OpenTox::Algorithm.tanimoto(@compound_features,@fingerprints[@training_compound],@p_values)
- end
-
- def euclid
- OpenTox::Algorithm.tanimoto(@compound_features,@fingerprints[@training_compound])
- end
-
- def weighted_euclid
- OpenTox::Algorithm.tanimoto(@compound_features,@fingerprints[@training_compound],@p_values)
- end
-
- def substructure_match
- @compound.match(@features)
- end
-
- def database_search
- #TODO add features method to dataset
- Dataset.new(@metadata[OT.featureDataset]).features(@compound.uri)
+=begin
+ def cached_prediction
+ dataset_uri = PredictionCache.find(:model_uri => @uri, :compound_uri => @compound.uri).dataset_uri)
+ return false unless dataset_uri
+ @prediction_dataset = Dataset.find(dataset_uri)
+ return false unless @prediction_dataset
+ LOGGER.debug "Serving cached prediction"
+ true
end
+=end
- def database_activity(compound_uri)
- prediction = OpenTox::Dataset.new
- # find database activities
- if @activities[compound_uri]
- @activities[compound_uri].each { |act| prediction.add compound_uri, @metadata[OT.dependentVariables], act }
- prediction.add_metadata(OT.hasSource => @metadata[OT.trainingDataset])
- prediction
+ # Find database activities and store them in @prediction_dataset
+ # @return [Boolean] true if compound has databasse activities, false if not
+ def database_activity
+ if @activities[@compound.uri]
+ @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act }
+ @prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset])
+ @prediction_dataset.save
+ true
else
- nil
+ false
end
end
+ # Save model at model service
def save
- RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml)
- end
-
- def self.all
- RestClientWrapper.get(CONFIG[:services]["opentox-model"]).to_s.split("\n")
+ self.uri = RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml)
end
+ # Delete model at model service
def delete
RestClientWrapper.delete @uri unless @uri == CONFIG[:services]["opentox-model"]
end
+=begin
+=end
+
+=begin
+ def self.create_from_dataset(dataset_uri,feature_dataset_uri,prediction_feature=nil)
+ training_activities = OpenTox::Dataset.find(dataset_uri)
+ training_features = OpenTox::Dataset.find(feature_dataset_uri)
+ unless prediction_feature # try to read prediction_feature from dataset
+ raise "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
+ prediction_feature = training_activities.features.keys.first
+ params[:prediction_feature] = prediction_feature
+ end
+ lazar = Lazar.new
+ training_features = OpenTox::Dataset.new(feature_dataset_uri)
+ case training_features.feature_type
+ when "classification"
+ lazar.similarity_algorithm = "weighted_tanimoto"
+ when "regression"
+ lazar.similarity_algorithm = "weighted_euclid"
+ end
+ end
+=end
+
end
end
end
diff --git a/lib/opentox-ruby-api-wrapper.rb b/lib/opentox-ruby-api-wrapper.rb
index 9dc1372..9f9ff26 100644
--- a/lib/opentox-ruby-api-wrapper.rb
+++ b/lib/opentox-ruby-api-wrapper.rb
@@ -1,4 +1,4 @@
-['rubygems', 'sinatra', 'sinatra/url_for', 'rest_client', 'yaml', 'cgi', 'spork', 'environment'].each do |lib|
+['rubygems', 'sinatra', 'sinatra/url_for', 'rest_client', 'yaml', 'cgi', 'spork', 'overwrite', 'environment'].each do |lib|
require lib
end
@@ -8,6 +8,6 @@ rescue LoadError
puts "Please install Openbabel with 'rake openbabel:install' in the compound component"
end
-['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','feature', 'ot-logger', 'overwrite', 'rest_client_wrapper'].each do |lib|
+['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','feature', 'rest_client_wrapper'].each do |lib|
require lib
end
diff --git a/lib/opentox.rb b/lib/opentox.rb
index 3b7fa65..90683e5 100644
--- a/lib/opentox.rb
+++ b/lib/opentox.rb
@@ -1,7 +1,7 @@
module OpenTox
attr_reader :uri
- attr_accessor :metadata, :parameters
+ attr_accessor :metadata
# Initialize OpenTox object with optional uri
# @param [optional, String] URI
@@ -30,11 +30,9 @@ module OpenTox
@metadata
end
- # Load parameters from URI
- #def load_parameters
- #@parameters = Parser::Owl::Generic.new(@uri).parameters
- #@parameters
- #end
+ def add_metadata(metadata)
+ metadata.each { |k,v| @metadata[k] = v }
+ end
# Get OWL-DL representation in RDF/XML format
# @return [application/rdf+xml] RDF/XML representation
diff --git a/lib/ot-logger.rb b/lib/ot-logger.rb
deleted file mode 100644
index df38d77..0000000
--- a/lib/ot-logger.rb
+++ /dev/null
@@ -1,48 +0,0 @@
-require 'logger'
-# logging
-class MyLogger < Logger
-
- def pwd
- path = Dir.pwd.to_s
- index = path.rindex(/\//)
- return path if index==nil
- path[(index+1)..-1]
- end
-
- def trace()
- lines = caller(0)
- n = 2
- line = lines[n]
-
- while (line =~ /spork.rb/ or line =~ /as_task/ or line =~ /ot-logger.rb/)
- n += 1
- line = lines[n]
- end
-
- index = line.rindex(/\/.*\.rb/)
- return line if index==nil
- line[index..-1]
- end
-
- def format(msg)
- pwd.ljust(18)+" :: "+msg.to_s+" :: "+trace+" :: "+($sinatra ? $sinatra.request.env['REMOTE_ADDR'] : nil).to_s
- end
-
- def debug(msg)
- super format(msg)
- end
-
- def info(msg)
- super format(msg)
- end
-
- def warn(msg)
- super format(msg)
- end
-
- def error(msg)
- super format(msg)
- end
-
-end
-
diff --git a/lib/overwrite.rb b/lib/overwrite.rb
index 2e4c396..f39fec3 100644
--- a/lib/overwrite.rb
+++ b/lib/overwrite.rb
@@ -34,3 +34,53 @@ class String
end
end
end
+
+require 'logger'
+# logging
+#class Logger
+class OTLogger < Logger
+
+ def pwd
+ path = Dir.pwd.to_s
+ index = path.rindex(/\//)
+ return path if index==nil
+ path[(index+1)..-1]
+ end
+
+ def trace()
+ lines = caller(0)
+ n = 2
+ line = lines[n]
+
+ while (line =~ /spork.rb/ or line =~ /create/ or line =~ /ot-logger.rb/)
+ n += 1
+ line = lines[n]
+ end
+
+ index = line.rindex(/\/.*\.rb/)
+ return line if index==nil
+ line[index..-1]
+ end
+
+ def format(msg)
+ pwd.ljust(18)+" :: "+msg.to_s+" :: "+trace+" :: "+($sinatra ? $sinatra.request.env['REMOTE_ADDR'] : nil).to_s
+ end
+
+ def debug(msg)
+ super format(msg)
+ end
+
+ def info(msg)
+ super format(msg)
+ end
+
+ def warn(msg)
+ super format(msg)
+ end
+
+ def error(msg)
+ super format(msg)
+ end
+
+end
+
diff --git a/lib/parser.rb b/lib/parser.rb
index 4d8e729..b727412 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -116,7 +116,7 @@ module OpenTox
end
end
load_features
- @dataset.metadata = metadata
+ @dataset.metadata = load_metadata
@dataset
end
@@ -253,10 +253,8 @@ module OpenTox
when OT.NumericFeature
@dataset.add compound.uri, feature, value.to_f
when OT.StringFeature
- # TODO: insert ??
@dataset.add compound.uri, feature, value.to_s
@activity_errors << smiles+", "+row.join(", ")
- #return false
end
end
end
diff --git a/lib/rest_client_wrapper.rb b/lib/rest_client_wrapper.rb
index 49549b5..5f5273b 100644
--- a/lib/rest_client_wrapper.rb
+++ b/lib/rest_client_wrapper.rb
@@ -80,6 +80,7 @@ module OpenTox
raise "content-type not set" unless res.content_type
res.code = result.code
+ # TODO: Ambit returns task representation with 200 instead of result URI
return res if res.code==200 || !wait
while (res.code==201 || res.code==202)
@@ -108,11 +109,12 @@ module OpenTox
task = nil
case res.content_type
- when /application\/rdf\+xml|application\/x-yaml/
- task = OpenTox::Task.from_data(res, res.content_type, res.code, base_uri)
+ when /application\/rdf\+xml/
+ task = OpenTox::Task.from_rdfxml(res)
+ when /yaml/
+ task = OpenTox::Task.from_yaml(res)
when /text\//
- raise "uri list has more than one entry, should be a task" if res.content_type=~/text\/uri-list/ and
- res.split("\n").size > 1 #if uri list contains more then one uri, its not a task
+ raise "uri list has more than one entry, should be a task" if res.content_type=~/text\/uri-list/ and res.split("\n").size > 1 #if uri list contains more then one uri, its not a task
task = OpenTox::Task.find(res.to_s) if res.to_s.uri?
else
raise "unknown content-type for task: '"+res.content_type.to_s+"'" #+"' content: "+res[0..200].to_s
@@ -122,7 +124,7 @@ module OpenTox
task.wait_for_completion
raise task.description unless task.completed? # maybe task was cancelled / error
- res = WrapperResult.new task.resultURI
+ res = WrapperResult.new task.result_uri
res.code = task.http_code
res.content_type = "text/uri-list"
return res
@@ -152,8 +154,8 @@ module OpenTox
# we are either in a task, or in sinatra
# PENDING: always return yaml for now
- if $self_task #this global var in Task.as_task to mark that the current process is running in a task
- raise error.to_yaml # the error is caught, logged, and task state is set to error in Task.as_task
+ if $self_task #this global var in Task.create to mark that the current process is running in a task
+ raise error.to_yaml # the error is caught, logged, and task state is set to error in Task.create
#elsif $sinatra #else halt sinatra
#$sinatra.halt(502,error.to_yaml)
elsif defined?(halt)
diff --git a/lib/serializer.rb b/lib/serializer.rb
index 31aa0d1..9b3af39 100644
--- a/lib/serializer.rb
+++ b/lib/serializer.rb
@@ -25,6 +25,7 @@ module OpenTox
OT.FeatureValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.Algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.Parameter => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OT.Task => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
OT.feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
@@ -42,6 +43,8 @@ module OpenTox
OT.isA => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.Warnings => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
XSD.anyURI => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
+ OT.hasStatus => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
+ OT.resultURI => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
@@ -100,17 +103,21 @@ module OpenTox
# @param [String] uri Algorithm URI
def add_algorithm(uri,metadata)
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] }
- LOGGER.debug @object[uri]
add_metadata uri, metadata
- LOGGER.debug @object[uri]
end
# Add a model
# @param [String] uri Model URI
- def add_model(uri,metadata,parameters)
+ def add_model(uri,metadata)
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Model }] }
add_metadata uri, metadata
- add_parameters uri, parameters
+ end
+
+ # Add a task
+ # @param [String] uri Model URI
+ def add_task(uri,metadata)
+ @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Task }] }
+ add_metadata uri, metadata
end
# Add metadata
@@ -204,7 +211,7 @@ module OpenTox
# @return [text/plain] Object OWL-DL in RDF/XML format
def to_rdfxml
Tempfile.open("owl-serializer"){|f| f.write(self.to_ntriples); @path = f.path}
- `rapper -i ntriples -o rdfxml #{@path} 2>/dev/null`
+ `rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null`
end
# Convert to JSON as specified in http://n2.talis.com/wiki/RDF_JSON_Specification
@@ -290,7 +297,11 @@ module OpenTox
entries.each do |feature, values|
i = features.index(feature)+1
values.each do |value|
- row[i] = value #TODO overwrites duplicated values
+ if row[i]
+ row[i] = "#{row[i]} #{value}" # multiple values
+ else
+ row[i] = value
+ end
end
end
@rows << row
diff --git a/lib/task.rb b/lib/task.rb
index 96ee719..5b2b5d9 100644
--- a/lib/task.rb
+++ b/lib/task.rb
@@ -2,39 +2,183 @@ $self_task=nil
module OpenTox
+ # Class for handling asynchronous tasks
class Task
- attr_accessor :uri, :date, :title, :creator, :description, :hasStatus, :percentageCompleted, :resultURI, :due_to_time, :http_code
-
- # due_to_time is only set in local tasks
- TASK_ATTRIBS = [ :uri, :date, :title, :creator, :description, :hasStatus, :percentageCompleted, :resultURI, :due_to_time ]
- TASK_ATTRIBS.each{ |a| attr_accessor(a) }
- attr_accessor :http_code
+ include OpenTox
+ attr_accessor :http_code, :due_to_time
- private
- def initialize(uri)
- @uri = uri.to_s.strip
+ def initialize(uri=nil)
+ super uri
+ @metadata = {
+ DC.title => "",
+ DC.date => "",
+ OT.hasStatus => "Running",
+ OT.percentageCompleted => "0",
+ OT.resultURI => "",
+ DC.creator => "", # not mandatory according to API
+ DC.description => "", # not mandatory according to API
+ }
end
-
- # create is private now, use OpenTox::Task.as_task
- def self.create( params )
+
+ # Create a new task for the code in the block. Catches halts and exceptions and sets task state to error if necessary. The block has to return the URI of the created resource.
+ # @example
+ # task = OpenTox::Task.create do
+ # # this code will be executed as a task
+ # model = OpenTox::Algorithm.run(params) # this can be time consuming
+ # model.uri # Important: return URI of the created resource
+ # end
+ # task.status # returns "Running", because tasks are forked
+ # @param [String] title Task title
+ # @param [String] creator Task creator
+ # @return [OPenTox::Task] Task
+ def self.create( title=nil, creator=nil, max_duration=DEFAULT_TASK_MAX_DURATION, description=nil )
+
+ # measure current memory consumption
+ memory = `free -m|sed -n '2p'`.split
+ free_memory = memory[3].to_i + memory[6].to_i # include cache
+ if free_memory < 20 # require at least 200 M free memory
+ LOGGER.warn "Cannot start task - not enough memory left (#{free_memory} M free)"
+ raise "Insufficient memory to start a new task"
+ end
+
+ cpu_load = `cat /proc/loadavg`.split(/\s+/)[0..2].collect{|c| c.to_f}
+ nr_cpu_cores = `cat /proc/cpuinfo |grep "cpu cores"|cut -d ":" -f2|tr -d " "`.split("\n").collect{|c| c.to_i}.inject{|sum,n| sum+n}
+ if cpu_load[0] > nr_cpu_cores and cpu_load[0] > cpu_load[1] and cpu_load[1] > cpu_load[2] # average CPU load of the last minute is high and CPU load is increasing
+ LOGGER.warn "Cannot start task - CPU load too high (#{cpu_load.join(", ")})"
+ raise "Server too busy to start a new task"
+ end
+
+ params = {:title=>title, :creator=>creator, :max_duration=>max_duration, :description=>description }
task_uri = RestClientWrapper.post(CONFIG[:services]["opentox-task"], params, nil, false).to_s
- Task.find(task_uri.chomp)
- end
+ task = Task.new(task_uri.chomp)
+
+ task_pid = Spork.spork(:logger => LOGGER) do
+ LOGGER.debug "Task #{task.uri} started #{Time.now}"
+ $self_task = task
+
+ begin
+ result = catch(:halt) do
+ yield task
+ end
+ # catching halt, set task state to error
+ if result && result.is_a?(Array) && result.size==2 && result[0]>202
+ LOGGER.error "task was halted: "+result.inspect
+ task.error(result[1])
+ return
+ end
+ LOGGER.debug "Task #{task.uri} done #{Time.now} -> "+result.to_s
+ task.completed(result)
+ rescue => ex
+ LOGGER.error "task failed: "+ex.message
+ LOGGER.error ": "+ex.backtrace.join("\n")
+ task.error(ex.message)
+ end
+ end
+ task.pid = task_pid
+ LOGGER.debug "Started task: "+task.uri.to_s
+ task
+ end
- public
- def self.find( uri, accept_header=nil )
+ # Find a task for querying, status changes
+ # @param [String] uri Task URI
+ # @return [OpenTox::Task] Task object
+ def self.find(uri)
task = Task.new(uri)
- task.reload( accept_header )
- return task
+ task.load_metadata
+ task
+ end
+
+ # Get a list of all tasks
+ # @param [optional, String] uri URI of task service
+ # @return [text/uri-list] Task URIs
+ def self.all(uri=CONFIG[:services]["opentox-task"])
+ OpenTox.all uri
+ end
+
+ def self.from_yaml(yaml)
+ @metadata = YAML.load(yaml)
+ end
+
+ def self.from_rdfxml(rdfxml)
+ file = Tempfile.open("ot-rdfxml"){|f| f.write(rdfxml)}.path
+ parser = Parser::Owl::Generic.new file
+ @metadata = parser.load_metadata
+ end
+
+ def to_rdfxml
+ s = Serializer::Owl.new
+ s.add_task(@uri,@metadata)
+ s.to_rdfxml
+ end
+
+ def status
+ @metadata[OT.hasStatus]
+ end
+
+ def result_uri
+ @metadata[OT.resultURI]
+ end
+
+ def description
+ @metadata[DC.description]
+ end
+
+ def cancel
+ RestClientWrapper.put(File.join(@uri,'Cancelled'))
+ load_metadata
+ end
+
+ def completed(uri)
+ RestClientWrapper.put(File.join(@uri,'Completed'),{:resultURI => uri})
+ load_metadata
+ end
+
+ def error(description)
+ RestClientWrapper.put(File.join(@uri,'Error'),{:description => description.to_s[0..2000]})
+ load_metadata
+ end
+
+ def pid=(pid)
+ RestClientWrapper.put(File.join(@uri,'pid'), {:pid => pid})
+ end
+
+ def running?
+ @metadata[OT.hasStatus] == 'Running'
+ end
+
+ def completed?
+ @metadata[OT.hasStatus] == 'Completed'
+ end
+
+ def error?
+ @metadata[OT.hasStatus] == 'Error'
+ end
+
+ def load_metadata
+ if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host))
+ result = RestClientWrapper.get(@uri, {:accept => 'application/x-yaml'}, false)
+ @metadata = YAML.load result.to_s
+ @http_code = result.code
+ else
+ @metadata = Parser::Owl::Generic.new(@uri).load_metadata
+ @http_code = RestClientWrapper.get(uri, {:accept => 'application/rdf+xml'}, false).code
+ end
end
+ # create is private now, use OpenTox::Task.as_task
+ #def self.create( params )
+ #task_uri = RestClientWrapper.post(CONFIG[:services]["opentox-task"], params, nil, false).to_s
+ #Task.find(task_uri.chomp)
+ #end
+
+=begin
def self.from_data(data, content_type, code, base_uri)
task = Task.new(nil)
task.http_code = code
task.reload_from_data(data, content_type, base_uri)
return task
end
-
+
def reload( accept_header=nil )
unless accept_header
if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host))
@@ -65,113 +209,45 @@ module OpenTox
end
raise "uri is null after loading" unless @uri and @uri.to_s.strip.size>0
end
-
- def cancel
- RestClientWrapper.put(File.join(@uri,'Cancelled'))
- reload
- end
-
- def completed(uri)
- RestClientWrapper.put(File.join(@uri,'Completed'),{:resultURI => uri})
- reload
- end
-
- def error(description)
- RestClientWrapper.put(File.join(@uri,'Error'),{:description => description.to_s[0..2000]})
- reload
- end
-
- def pid=(pid)
- RestClientWrapper.put(File.join(@uri,'pid'), {:pid => pid})
- end
-
- def running?
- @hasStatus.to_s == 'Running'
- end
-
- def completed?
- @hasStatus.to_s == 'Completed'
- end
-
- def error?
- @hasStatus.to_s == 'Error'
- end
+=end
# waits for a task, unless time exceeds or state is no longer running
def wait_for_completion(dur=0.3)
- if (@uri.match(CONFIG[:services]["opentox-task"]))
- due_to_time = (@due_to_time.is_a?(Time) ? @due_to_time : Time.parse(@due_to_time))
- running_time = due_to_time - (@date.is_a?(Time) ? @date : Time.parse(@date))
- else
- # the date of the external task cannot be trusted, offest to local time might be to big
- due_to_time = Time.new + EXTERNAL_TASK_MAX_DURATION
- running_time = EXTERNAL_TASK_MAX_DURATION
- end
+ due_to_time = Time.new + DEFAULT_TASK_MAX_DURATION
LOGGER.debug "start waiting for task "+@uri.to_s+" at: "+Time.new.to_s+", waiting at least until "+due_to_time.to_s
+ load_metadata # for extremely fast tasks
+ check_state
while self.running?
sleep dur
- reload
+ load_metadata
check_state
if (Time.new > due_to_time)
- raise "max wait time exceeded ("+running_time.to_s+"sec), task: '"+@uri.to_s+"'"
+ raise "max wait time exceeded ("+DEFAULT_TASK_MAX_DURATION.to_s+"sec), task: '"+@uri.to_s+"'"
end
end
- LOGGER.debug "Task '"+@hasStatus+"': "+@uri.to_s+", Result: "+@resultURI.to_s
+ LOGGER.debug "Task '"+@metadata[OT.hasStatus]+"': "+@uri.to_s+", Result: "+@metadata[OT.resultURI].to_s
end
+ private
def check_state
begin
- raise "illegal task state, task is completed, resultURI is no URI: '"+@resultURI.to_s+
- "'" unless @resultURI and @resultURI.to_s.uri? if completed?
+ raise "illegal task state, task is completed, resultURI is no URI: '"+@metadata[OT.resultURI].to_s+
+ "'" unless @metadata[OT.resultURI] and @metadata[OT.resultURI].to_s.uri? if completed?
if @http_code == 202
- raise "illegal task state, code is 202, but hasStatus is not Running: '"+@hasStatus+"'" unless running?
+ raise "illegal task state, code is 202, but hasStatus is not Running: '"+@metadata[OT.hasStatus]+"'" unless running?
elsif @http_code == 201
- raise "illegal task state, code is 201, but hasStatus is not Completed: '"+@hasStatus+"'" unless completed?
- raise "illegal task state, code is 201, resultURI is no task-URI: '"+@resultURI.to_s+
- "'" unless @resultURI and @resultURI.to_s.uri?
+ raise "illegal task state, code is 201, but hasStatus is not Completed: '"+@metadata[OT.hasStatus]+"'" unless completed?
+ raise "illegal task state, code is 201, resultURI is no task-URI: '"+@metadata[OT.resultURI].to_s+
+ "'" unless @metadata[OT.resultURI] and @metadata[OT.resultURI].to_s.uri?
end
rescue => ex
RestClientWrapper.raise_uri_error(ex.message, @uri)
end
end
-
- # returns the task uri
- # catches halts and exceptions, task state is set to error then
- def self.as_task( title, creator, max_duration=DEFAULT_TASK_MAX_DURATION, description=nil )
- #return yield nil
-
- params = {:title=>title, :creator=>creator, :max_duration=>max_duration, :description=>description }
- task = ::OpenTox::Task.create(params)
- task_pid = Spork.spork(:logger => LOGGER) do
- LOGGER.debug "Task #{task.uri} started #{Time.now}"
- $self_task = task
-
- begin
- result = catch(:halt) do
- yield task
- end
- # catching halt, set task state to error
- if result && result.is_a?(Array) && result.size==2 && result[0]>202
- LOGGER.error "task was halted: "+result.inspect
- task.error(result[1])
- return
- end
- LOGGER.debug "Task #{task.uri} done #{Time.now} -> "+result.to_s
- task.completed(result)
- rescue => ex
- LOGGER.error "task failed: "+ex.message
- LOGGER.error ": "+ex.backtrace.join("\n")
- task.error(ex.message)
- end
- end
- task.pid = task_pid
- LOGGER.debug "Started task: "+task.uri.to_s
- task.uri
- end
end
diff --git a/lib/validation.rb b/lib/validation.rb
index 340332a..76c4529 100644
--- a/lib/validation.rb
+++ b/lib/validation.rb
@@ -1,20 +1,70 @@
module OpenTox
class Validation
+ include OpenTox
- attr_accessor :uri
-
- def initialize(params)
- @uri = OpenTox::RestClientWrapper.post(File.join(CONFIG[:services]["opentox-validation"],"/crossvalidation"),params,nil,false)
- end
+ attr_accessor :report_uri, :qmrf_report_uri
- def self.crossvalidation(params)
+ def self.create_crossvalidation(params)
params[:uri] = File.join(CONFIG[:services]['opentox-validation'], "crossvalidation")
params[:num_folds] = 10 unless params[:num_folds]
params[:random_seed] = 2 unless params[:random_seed]
params[:stratified] = false unless params[:stratified]
- OpenTox::Validation.new(params)
+ uri = OpenTox::RestClientWrapper.post(File.join(CONFIG[:services]["opentox-validation"],"/crossvalidation"),params,nil,false)
+ OpenTox::Validation.new(uri)
end
+ def create_report
+ @report_uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-validation"],"/report/crossvalidation"), :validation_uris => @uri).to_s
+ @report_uri
+ end
+
+ def create_qmrf_report
+ @qmrf_report_uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-validation"],"/reach_report/qmrf"), :model_uri => @uri).to_s
+ @qmrf_report_uri
+ end
+
+ def summary(type)
+ v = YAML.load RestClientWrappper.get(File.join(@uri, 'statistics'),:accept => "application/x-yaml").to_s
+
+ case type
+ when "classification"
+ tp=0; tn=0; fp=0; fn=0; n=0
+ v[:classification_statistics][:confusion_matrix][:confusion_matrix_cell].each do |cell|
+ if cell[:confusion_matrix_predicted] == "true" and cell[:confusion_matrix_actual] == "true"
+ tp = cell[:confusion_matrix_value]
+ n += tp
+ elsif cell[:confusion_matrix_predicted] == "false" and cell[:confusion_matrix_actual] == "false"
+ tn = cell[:confusion_matrix_value]
+ n += tn
+ elsif cell[:confusion_matrix_predicted] == "false" and cell[:confusion_matrix_actual] == "true"
+ fn = cell[:confusion_matrix_value]
+ n += fn
+ elsif cell[:confusion_matrix_predicted] == "true" and cell[:confusion_matrix_actual] == "false"
+ fp = cell[:confusion_matrix_value]
+ n += fp
+ end
+ end
+ {
+ :nr_predictions => n,
+ :true_positives => tp,
+ :false_positives => fp,
+ :true_negatives => tn,
+ :false_negatives => fn,
+ :correct_predictions => 100*(tp+tn).to_f/n,
+ :weighted_area_under_roc => v[:classification_statistics][:weighted_area_under_roc].to_f,
+ :sensitivity => tp.to_f/(tp+fn),
+ :specificity => tn.to_f/(tn+fp),
+ }
+ when "regression"
+ {
+ :nr_predictions => v[:num_instances] - v[:num_unpredicted],
+ :r_square => v[:regression_statistics][:r_square],
+ :root_mean_squared_error => v[:regression_statistics][:root_mean_squared_error],
+ :mean_absolute_error => v[:regression_statistics][:mean_absolute_error],
+ }
+ end
+ end
+
end
end