From f8552611c2dbe25d76474f51e4e895bf9c2b5c5e Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 19 Nov 2010 16:53:21 +0100 Subject: lazar predictions for toxcreate working --- lib/algorithm.rb | 154 +++++++++++-- lib/dataset.rb | 72 ++++++- lib/environment.rb | 8 +- lib/feature.rb | 10 + lib/model.rb | 466 ++++++++++++++++------------------------ lib/opentox-ruby-api-wrapper.rb | 4 +- lib/opentox.rb | 10 +- lib/ot-logger.rb | 48 ----- lib/overwrite.rb | 50 +++++ lib/parser.rb | 4 +- lib/rest_client_wrapper.rb | 16 +- lib/serializer.rb | 23 +- lib/task.rb | 278 +++++++++++++++--------- lib/validation.rb | 64 +++++- 14 files changed, 720 insertions(+), 487 deletions(-) delete mode 100644 lib/ot-logger.rb (limited to 'lib') diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 711f63b..a6fa4a7 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -1,3 +1,9 @@ +# R integration +# workaround to initialize R non-interactively (former rinruby versions did this by default) +# avoids compiling R with X +R = nil +require "rinruby" + module OpenTox # Wrapper for OpenTox Algorithms @@ -6,8 +12,10 @@ module OpenTox include OpenTox # Execute algorithm with parameters, please consult the OpenTox API and the webservice documentation for acceptable parameters + # @param [optional,Hash] params Algorithm parameters + # @return [String] URI of new resource (dataset, model, ...) def run(params=nil) - RestClientWrapper.post(@uri, params) + RestClientWrapper.post(@uri, params).to_s end # Get OWL-DL representation in RDF/XML format @@ -23,9 +31,11 @@ module OpenTox include Algorithm end + # Fminer algorithms (https://github.com/amaunz/fminer2) module Fminer include Algorithm + # Backbone Refinement Class mining (http://bbrc.maunz.de/) class BBRC include Fminer # Initialize bbrc algorithm @@ -35,6 +45,7 @@ module OpenTox end end + # LAtent STructure Pattern Mining (http://last-pm.maunz.de) class LAST include Fminer # Initialize last algorithm @@ -58,15 +69,15 @@ module OpenTox # Utility methods without dedicated webservices + # Similarity calculations module Similarity include Algorithm # Tanimoto similarity - # # @param [Array] features_a Features of first compound # @param [Array] features_b Features of second compound # @param [optional, Hash] weights Weights for all features - # @return [Float] (Wighted) tanimoto similarity + # @return [Float] (Weighted) tanimoto similarity def self.tanimoto(features_a,features_b,weights=nil) common_features = features_a & features_b all_features = (features_a + features_b).uniq @@ -86,15 +97,19 @@ module OpenTox end # Euclidean similarity - def self.euclidean(prop_a,prop_b,weights=nil) - common_properties = prop_a.keys & prop_b.keys + # @param [Hash] properties_a Properties of first compound + # @param [Hash] properties_b Properties of second compound + # @param [optional, Hash] weights Weights for all properties + # @return [Float] (Weighted) euclidean similarity + def self.euclidean(properties_a,properties_b,weights=nil) + common_properties = properties_a.keys & properties_b.keys if common_properties.size > 1 dist_sum = 0 common_properties.each do |p| if weights - dist_sum += ( (prop_a[p] - prop_b[p]) * Algorithm.gauss(weights[p]) )**2 + dist_sum += ( (properties_a[p] - properties_b[p]) * Algorithm.gauss(weights[p]) )**2 else - dist_sum += (prop_a[p] - prop_b[p])**2 + dist_sum += (properties_a[p] - properties_b[p])**2 end end 1/(1+Math.sqrt(dist_sum)) @@ -103,14 +118,129 @@ module OpenTox end end end + + module Neighbors + + # Classification with majority vote from neighbors weighted by similarity + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity` + # @param [optional] params Ignored (only for compatibility with local_svm_regression) + # @return [Hash] Hash with keys `:prediction, :confidence` + def self.weighted_majority_vote(neighbors,params={}) + conf = 0.0 + confidence = 0.0 + neighbors.each do |neighbor| + case neighbor[:activity].to_s + when 'true' + conf += Algorithm.gauss(neighbor[:similarity]) + when 'false' + conf -= Algorithm.gauss(neighbor[:similarity]) + end + end + if conf > 0.0 + prediction = true + elsif conf < 0.0 + prediction = false + else + prediction = nil + end + confidence = conf/neighbors.size if neighbors.size > 0 + {:prediction => prediction, :confidence => confidence.abs} + end + + # Local support vector regression from neighbors + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @return [Hash] Hash with keys `:prediction, :confidence` + def self.local_svm_regression(neighbors,params ) + sims = neighbors.collect{ |n| n[:similarity] } # similarity values between query and neighbors + conf = sims.inject{|sum,x| sum + x } + acts = neighbors.collect do |n| + act = n[:activity] + Math.log10(act.to_f) + end # activities of neighbors for supervised learning + + neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches + gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + if neighbor_matches.size == 0 + raise "No neighbors found" + else + # gram matrix + (0..(neighbor_matches.length-1)).each do |i| + gram_matrix[i] = [] unless gram_matrix[i] + # upper triangle + ((i+1)..(neighbor_matches.length-1)).each do |j| + sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + gram_matrix[i][j] = Algorithm.gauss(sim) + gram_matrix[j] = [] unless gram_matrix[j] + gram_matrix[j][i] = gram_matrix[i][j] # lower triangle + end + gram_matrix[i][i] = 1.0 + end + + LOGGER.debug gram_matrix.to_yaml + + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.gram_matrix = gram_matrix.flatten + @r.n = neighbor_matches.size + @r.y = acts + @r.sims = sims + + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-as.vector(y)" + @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" + @r.eval "sims<-as.vector(sims)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)" + @r.eval "sv<-as.vector(SVindex(model))" + @r.eval "sims<-sims[sv]" + @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" + LOGGER.debug "Predicting ..." + @r.eval "p<-predict(model,sims)[1,1]" + prediction = 10**(@r.p.to_f) + LOGGER.debug "Prediction is: '" + @prediction.to_s + "'." + @r.quit # free R + end + confidence = conf/neighbors.size if neighbors.size > 0 + {:prediction => prediction, :confidence => confidence} + + end + + end + + module Substructure + include Algorithm + # Substructure matching + # @param [OpenTox::Compound] compound Compound + # @param [Array] features Array with Smarts strings + # @return [Array] Array with matching Smarts + def self.match(compound,features) + compound.match(features) + end + end + + module Dataset + include Algorithm + # API should match Substructure.match + def features(dataset_uri,compound_uri) + end + end - # Gauss kernel - def self.gauss(sim, sigma = 0.3) - x = 1.0 - sim - Math.exp(-(x*x)/(2*sigma*sigma)) - end + # Gauss kernel + # @return [Float] + def self.gauss(x, sigma = 0.3) + d = 1.0 - x + Math.exp(-(d*d)/(2*sigma*sigma)) + end # Median of an array + # @param [Array] Array with values + # @return [Float] Median def self.median(array) return nil if array.empty? array.sort! diff --git a/lib/dataset.rb b/lib/dataset.rb index 6e270e9..4737ea1 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -43,7 +43,7 @@ module OpenTox # Get all datasets from a service # @param [optional,String] uri URI of the dataset service, defaults to service specified in configuration - # @return [Array] Array of dataset object with all data + # @return [Array] Array of dataset object without data (use one of the load_* methods to pull data from the server) def self.all(uri=CONFIG[:services]["opentox-dataset"]) RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.each_line.collect{|u| Dataset.new(u)} end @@ -55,6 +55,10 @@ module OpenTox copy YAML.load(yaml) end + def load_rdfxml(rdfxml) + load_rdfxml_file Tempfile.open("ot-rdfxml"){|f| f.write(rdfxml)}.path + end + # Load RDF/XML representation from a file # @param [String] file File with RDF/XML representation of the dataset # @return [OpenTox::Dataset] Dataset object with RDF/XML data @@ -129,8 +133,6 @@ module OpenTox # @return [String] `classification", "regression", "mixed" or unknown` def feature_type feature_types = @features.collect{|f,metadata| metadata[OT.isA]}.uniq - LOGGER.debug "FEATURES" - LOGGER.debug feature_types.inspect if feature_types.size > 1 "mixed" else @@ -145,12 +147,18 @@ module OpenTox end end - # Get Excel representation + # Get Spreadsheet representation # @return [Spreadsheet::Workbook] Workbook which can be written with the spreadsheet gem (data_entries only, metadata will will be discarded)) def to_spreadsheet Serializer::Spreadsheets.new(self).to_spreadsheet end + # Get Excel representation (alias for to_spreadsheet) + # @return [Spreadsheet::Workbook] Workbook which can be written with the spreadsheet gem (data_entries only, metadata will will be discarded)) + def to_xls + to_spreadsheet + end + # Get CSV string representation (data_entries only, metadata will be discarded) # @return [String] CSV representation def to_csv @@ -180,6 +188,10 @@ module OpenTox @features[feature][DC.title] end + def title + @metadata[DC.title] + end + # Insert a statement (compound_uri,feature_uri,value) # @example Insert a statement (compound_uri,feature_uri,value) # dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", true @@ -224,11 +236,18 @@ module OpenTox # TODO: rewrite feature URI's ?? @compounds.uniq! if @uri - RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) + if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) + RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) + else + File.open("ot-post-file.rdf","w+") { |f| f.write(self.to_rdfxml); @path = f.path } + task_uri = RestClient.post(@uri, {:file => File.new(@path)},{:accept => "text/uri-list"}).to_s.chomp + #task_uri = `curl -X POST -H "Accept:text/uri-list" -F "file=@#{@path};type=application/rdf+xml" http://apps.ideaconsult.net:8080/ambit2/dataset` + Task.find(task_uri).wait_for_completion + self.uri = RestClientWrapper.get(task_uri,:accept => 'text/uri-list') + end else # create dataset if uri is empty self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{}).to_s.chomp - RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) end @uri end @@ -252,4 +271,45 @@ module OpenTox end end end + + # Class with special methods for lazar prediction datasets + class LazarPrediction < Dataset + + # Find a prediction dataset and load all data. + # @param [String] uri Prediction dataset URI + # @return [OpenTox::Dataset] Prediction dataset object with all data + def self.find(uri) + prediction = LazarPrediction.new(uri) + prediction.load_all + prediction + end + + def value(compound) + @data_entries[compound.uri].collect{|f,v| v.first if f.match(/prediction/)}.compact.first + end + + def confidence(compound) + feature_uri = @data_entries[compound.uri].collect{|f,v| f if f.match(/prediction/)}.compact.first + @features[feature_uri][OT.confidence] + end + + def descriptors(compound) + @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/descriptor/)}.compact if @data_entries[compound.uri] + end + + def measured_activities(compound) + source = @metadata[OT.hasSource] + @data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact + end + + def neighbors(compound) + @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact + end + +# def errors(compound) +# features = @data_entries[compound.uri].keys +# features.collect{|f| @features[f][OT.error]}.join(" ") if features +# end + + end end diff --git a/lib/environment.rb b/lib/environment.rb index d66b062..4f1cc80 100644 --- a/lib/environment.rb +++ b/lib/environment.rb @@ -1,4 +1,3 @@ -require "ot-logger" # set default environment ENV['RACK_ENV'] = 'production' unless ENV['RACK_ENV'] @@ -45,8 +44,8 @@ end load File.join config_dir,"mail.rb" if File.exists?(File.join config_dir,"mail.rb") logfile = "#{LOG_DIR}/#{ENV["RACK_ENV"]}.log" -#LOGGER = MyLogger.new(logfile,'daily') # daily rotation -LOGGER = MyLogger.new(logfile) # no rotation +#LOGGER = OTLogger.new(logfile,'daily') # daily rotation +LOGGER = OTLogger.new(logfile) # no rotation LOGGER.formatter = Logger::Formatter.new #this is neccessary to restore the formating in case active-record is loaded if CONFIG[:logger] and CONFIG[:logger] == "debug" LOGGER.level = Logger::DEBUG @@ -60,11 +59,12 @@ FALSE_REGEXP = /^(false|inactive|0|0.0)$/i # Task durations DEFAULT_TASK_MAX_DURATION = 36000 -EXTERNAL_TASK_MAX_DURATION = 36000 +#EXTERNAL_TASK_MAX_DURATION = 36000 # OWL Namespaces class OwlNamespace + attr_accessor :uri def initialize(uri) @uri = uri end diff --git a/lib/feature.rb b/lib/feature.rb index 13d97a2..9e28077 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -1,5 +1,15 @@ module OpenTox class Feature include OpenTox + + def self.find(uri) + feature = Feature.new uri + if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host)) + feature.add_metadata YAML.load(RestClientWrapper.get(uri,:accept => "application/x-yaml")) + else + feature.add_metadata Parser::Owl::Dataset.new(uri).load_metadata + end + feature + end end end diff --git a/lib/model.rb b/lib/model.rb index 63013cb..c6a2cf4 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -4,6 +4,9 @@ module OpenTox include OpenTox + # Run a model with parameters + # @param [Hash] params Parameters for OpenTox model + # @return [text/uri-list] Task or resource URI def run(params) if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host) accept = 'application/x-yaml' @@ -11,47 +14,25 @@ module OpenTox accept = 'application/rdf+xml' end begin - params[:acccept] = accept - #TODO fix: REstClientWrapper does not accept accept header - #RestClientWrapper.post(@uri,params)#,{:accept => accept}) - `curl -X POST -H "Accept:#{accept}" #{params.collect{|k,v| "-d #{k}=#{v}"}.join(" ")} #{@uri}`.to_s.chomp + RestClientWrapper.post(@uri,{:accept => accept},params).to_s rescue => e LOGGER.error "Failed to run #{@uri} with #{params.inspect} (#{e.inspect})" raise "Failed to run #{@uri} with #{params.inspect}" end end - -=begin - def classification? - #TODO replace with request to ontology server - if @metadata[DC.title] =~ /(?i)classification/ - return true - elsif @metadata[DC.title] =~ /(?i)regression/ - return false - elsif @uri =~/ntua/ and @metadata[DC.title] =~ /mlr/ - return false - elsif @uri =~/tu-muenchen/ and @metadata[DC.title] =~ /regression|M5P|GaussP/ - return false - elsif @uri =~/ambit2/ and @metadata[DC.title] =~ /pKa/ || @metadata[DC.title] =~ /Regression|Caco/ - return false - elsif @uri =~/majority/ - return (@uri =~ /class/) != nil - else - raise "unknown model, uri:'"+@uri+"' title:'"+@metadata[DC.title]+"'" - end - end -=end + # Generic OpenTox model class for all API compliant services class Generic include Model end + # Lazy Structure Activity Relationship class class Lazar include Model + include Algorithm - #attr_accessor :prediction_type, :feature_type, :features, :effects, :activities, :p_values, :fingerprints, :parameters - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :parameters, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim def initialize(uri=nil) @@ -61,7 +42,6 @@ module OpenTox super CONFIG[:services]["opentox-model"] end - # TODO: fix metadata, add parameters @metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar") @features = [] @@ -70,284 +50,192 @@ module OpenTox @p_values = {} @fingerprints = {} - @feature_calculation_algorithm = "substructure_match" - @similarity_algorithm = "weighted_tanimoto" - @prediction_algorithm = "weighted_majority_vote" + @feature_calculation_algorithm = "Substructure.match" + @similarity_algorithm = "Similarity.tanimoto" + @prediction_algorithm = "Neighbors.weighted_majority_vote" @min_sim = 0.3 end - def self.find(uri) - YAML.load RestClientWrapper.get(uri,:content_type => 'application/x-yaml') + # Get URIs of all lazar models + # @return [Array] List of lazar model URIs + def self.all + RestClientWrapper.get(CONFIG[:services]["opentox-model"]).to_s.split("\n") end - def self.create_from_dataset(dataset_uri,feature_dataset_uri,prediction_feature=nil) - training_activities = OpenTox::Dataset.find(dataset_uri) - training_features = OpenTox::Dataset.find(feature_dataset_uri) - unless prediction_feature # try to read prediction_feature from dataset - raise "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1 - prediction_feature = training_activities.features.keys.first - params[:prediction_feature] = prediction_feature - end - lazar = Lazar.new - training_features = OpenTox::Dataset.new(feature_dataset_uri) - case training_features.feature_type - when "classification" - lazar.similarity_algorithm = "weighted_tanimoto" - when "regression" - lazar.similarity_algorithm = "weighted_euclid" - end + # Find a lazar model + # @param [String] uri Model URI + # @return [OpenTox::Model::Lazar] lazar model + def self.find(uri) + YAML.load RestClientWrapper.get(uri,:accept => 'application/x-yaml') end - def self.create(dataset_uri,prediction_feature=nil,feature_generation_uri=File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"),params=nil) - - training_activities = OpenTox::Dataset.find(dataset_uri) - - unless prediction_feature # try to read prediction_feature from dataset - raise "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1 - prediction_feature = training_activities.features.keys.first - params[:prediction_feature] = prediction_feature - end - - lazar = Lazar.new - params[:feature_generation_uri] = feature_generation_uri - feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s - training_features = OpenTox::Dataset.find(feature_dataset_uri) - raise "Dataset #{feature_dataset_uri} not found or empty." if training_features.nil? - - # sorted features for index lookups - lazar.features = training_features.features.sort if training_features.feature_type == "regression" - - training_features.data_entries.each do |compound,entry| - lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound] - entry.keys.each do |feature| - case training_features.feature_type - when "fminer" - # fingerprints are sets - smarts = training_features.features[feature][OT.smarts] - lazar.fingerprints[compound] << smarts - unless lazar.features.include? smarts - lazar.features << smarts - lazar.p_values[smarts] = training_features.features[feature][OT.p_value] - lazar.effects[smarts] = training_features.features[feature][OT.effect] - end - when "classification" - # fingerprints are sets - if entry[feature].flatten.size == 1 - lazar.fingerprints[compound] << feature if entry[feature].flatten.first.match(TRUE_REGEXP) - lazar.features << feature unless lazar.features.include? feature - else - LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" - end - when "regression" - # fingerprints are arrays - if entry[feature].flatten.size == 1 - lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first - else - LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" - end - end - end - - lazar.activities[compound] = [] unless lazar.activities[compound] - training_activities.data_entries[compound][params[:prediction_feature]].each do |value| - case value.to_s - when "true" - lazar.activities[compound] << true - when "false" - lazar.activities[compound] << false - else - lazar.activities[compound] << value.to_f - lazar.prediction_type = "regression" - end - end - end - - if feature_generation_uri.match(/fminer/) - lazar.feature_calculation_algorithm = "substructure_match" - else - halt 404, "External feature generation services not yet supported" - end - - lazar.metadata[OT.dependentVariables] = params[:prediction_feature] - lazar.metadata[OT.trainingDataset] = dataset_uri - lazar.metadata[OT.featureDataset] = feature_dataset_uri + # Create a new lazar model + # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar) + # @return [OpenTox::Model::Lazar] lazar model + def self.create(params) + lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar") + model_uri = lazar_algorithm.run(params) + OpenTox::Model::Lazar.find(model_uri) + end - lazar.parameters = { - "dataset_uri" => dataset_uri, - "prediction_feature" => prediction_feature, - "feature_generation_uri" => feature_generation_uri - } - - model_uri = lazar.save - LOGGER.info model_uri + " created #{Time.now}" - model_uri +=begin + # Create a new lazar model and return task + # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar) + # @return [OpenTox::Task] Task for lazar model creation + def self.create_task(params) + task_uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), {}, params, false) + Task.find(task_uri) + #model_uri = lazar_algorithm.run(params) + #OpenTox::Model::Lazar.new(model_uri) + end +=end + def parameter(param) + @metadata[OT.parameters].collect{|p| p[OT.paramValue] if p[DC.title] == param}.compact.first end def predict_dataset(dataset_uri) @prediction_dataset = Dataset.create @prediction_dataset.add_metadata({ - OT.hasSource => @lazar.uri, - DC.creator => @lazar.uri, - DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )) + OT.hasSource => @uri, + DC.creator => @uri, + DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), + OT.parameters => [{DC.title => "dataset_uri", OT.paramValue => dataset_uri}] }) - @prediction_dataset.add_parameters({"dataset_uri" => dataset_uri}) - Dataset.new(dataset_uri).load_compounds.each do |compound_uri| + d = Dataset.new(dataset_uri) + d.load_compounds + d.compounds.each do |compound_uri| predict(compound_uri,false) end @prediction_dataset.save - @prediction_dataset.uri + @prediction_dataset end + # Predict a compound + # @param [String] compound_uri Compound URI + # @param [optinal,Boolean] verbose Verbose prediction (output includes neighbors and features) + # @return [OpenTox::Dataset] Dataset with prediction def predict(compound_uri,verbose=false) @compound = Compound.new compound_uri + features = {} unless @prediction_dataset + #@prediction_dataset = cached_prediction + #return @prediction_dataset if cached_prediction @prediction_dataset = Dataset.create @prediction_dataset.add_metadata( { - OT.hasSource => @lazar.uri, - DC.creator => @lazar.uri, - DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )) + OT.hasSource => @uri, + DC.creator => @uri, + # TODO: fix dependentVariable + DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), + OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] } ) - @prediction_dataset.add_parameters( {"compound_uri" => compound_uri} ) end - neighbors - eval @prediction_algorithm - - if @prediction + return @prediction_dataset if database_activity - feature_uri = File.join( @prediction_dataset.uri, "feature", @prediction_dataset.compounds.size) - @prediction_dataset.add @compound.uri, feature_uri, @prediction + neighbors + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + + prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) + # TODO: fix dependentVariable + @prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri + + if @neighbors.size == 0 + @prediction_dataset.add_feature(prediction_feature_uri, { + OT.hasSource => @uri, + DC.creator => @uri, + DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), + OT.error => "No similar compounds in training dataset.", + OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] + }) + @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction] - feature_metadata = @prediction_dataset.metadata - feature_metadata[DC.title] = File.basename(@metadata[OT.dependentVariables]) - feature_metadata[OT.prediction] = @prediction - feature_metadata[OT.confidence] = @confidence - @prediction_dataset.add_feature(feature_uri, feature_metadata) + else + @prediction_dataset.add_feature(prediction_feature_uri, { + OT.hasSource => @uri, + DC.creator => @uri, + DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), + OT.prediction => prediction[:prediction], + OT.confidence => prediction[:confidence], + OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] + }) + @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction] if verbose - if @compound_features + if @feature_calculation_algorithm == "Substructure.match" + f = 0 + @compound_features.each do |feature| + feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) + features[feature] = feature_uri + @prediction_dataset.add_feature(feature_uri, { + OT.smarts => feature, + OT.p_value => @p_values[feature], + OT.effect => @effects[feature] + }) + @prediction_dataset.add @compound.uri, feature_uri, true + f+=1 + end + else @compound_features.each do |feature| + features[feature] = feature @prediction_dataset.add @compound.uri, feature, true end end n = 0 - @neighbors.sort{|a,b| a[:similarity] <=> b[:similarity]}.each do |neighbor| - neighbor_uri = File.join( @prediction_dataset.uri, "feature/neighbor", n ) - @prediction_dataset.add @compound.uri, neighbor_uri, true - @prediction_dataset.add_feature(neighbor, { + @neighbors.each do |neighbor| + neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s ) + @prediction_dataset.add_feature(neighbor_uri, { OT.compound => neighbor[:compound], OT.similarity => neighbor[:similarity], OT.activity => neighbor[:activity] }) + @prediction_dataset.add @compound.uri, neighbor_uri, true + f = 0 unless f + neighbor[:features].each do |feature| + if @feature_calculation_algorithm == "Substructure.match" + feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature] + else + feature_uri = feature + end + @prediction_dataset.add neighbor[:compound], feature_uri, true + unless features.has_key? feature + features[feature] = feature_uri + @prediction_dataset.add_feature(feature_uri, { + OT.smarts => feature, + OT.p_value => @p_values[feature], + OT.effect => @effects[feature] + }) + f+=1 + end + end n+=1 end + # what happens with dataset predictions? end end - @prediction_dataset.save - @prediction_dataset.uri - end - - def weighted_majority_vote - conf = 0.0 - @neighbors.each do |neighbor| - case neighbor[:activity].to_s - when 'true' - conf += OpenTox::Algorithm.gauss(neighbor[:similarity]) - when 'false' - conf -= OpenTox::Algorithm.gauss(neighbor[:similarity]) - end - end - if conf > 0.0 - @prediction = true - elsif conf < 0.0 - @prediction = false - else - @prediction = nil - end - @confidence = conf/@neighbors.size if @neighbors.size > 0 - end - - def local_svm_regression - sims = @neighbors.collect{ |n| n[:similarity] } # similarity values between query and neighbors - conf = sims.inject{|sum,x| sum + x } - acts = @neighbors.collect do |n| - act = n[:activity] - # TODO: check this in model creation - raise "0 values not allowed in training dataset. log10 is calculated internally." if act.to_f == 0 - Math.log10(act.to_f) - end # activities of neighbors for supervised learning - - neighbor_matches = @neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches - gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel - if neighbor_matches.size == 0 - raise "No neighbors found" - else - # gram matrix - (0..(neighbor_matches.length-1)).each do |i| - gram_matrix[i] = [] - # lower triangle - (0..(i-1)).each do |j| - sim = OpenTox::Algorithm.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], @lazar.p_values) - gram_matrix[i] << OpenTox::Algorithm.gauss(sim) - end - # diagonal element - gram_matrix[i][i] = 1.0 - # upper triangle - ((i+1)..(neighbor_matches.length-1)).each do |j| - sim = OpenTox::Algorithm.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], @lazar.p_values) # double calculation? - gram_matrix[i] << OpenTox::Algorithm.gauss(sim) - end - end - @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests - @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed - LOGGER.debug "Setting R data ..." - # set data - @r.gram_matrix = gram_matrix.flatten - @r.n = neighbor_matches.size - @r.y = acts - @r.sims = sims - - LOGGER.debug "Preparing R data ..." - # prepare data - @r.eval "y<-as.vector(y)" - @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" - @r.eval "sims<-as.vector(sims)" - - # model + support vectors - LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)" - @r.eval "sv<-as.vector(SVindex(model))" - @r.eval "sims<-sims[sv]" - @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" - LOGGER.debug "Predicting ..." - @r.eval "p<-predict(model,sims)[1,1]" - @prediction = 10**(@r.p.to_f) - LOGGER.debug "Prediction is: '" + prediction.to_s + "'." - @r.quit # free R - end - @confidence = conf/@neighbors.size if @neighbors.size > 0 - + @prediction_dataset.save + @prediction_dataset end + # Find neighbors and store them as object variable def neighbors - @compound_features = eval(@feature_calculation_algorithm) if @feature_calculation_algorithm + @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm - @neighbors = {} - @activities.each do |training_compound,activities| - @training_compound = training_compound - sim = eval(@similarity_algorithm) + @neighbors = [] + @fingerprints.each do |training_compound,training_features| + #@activities.each do |training_compound,activities| + sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") if sim > @min_sim - activities.each do |act| + @activities[training_compound].each do |act| @neighbors << { - :compound => @training_compound, + :compound => training_compound, :similarity => sim, - :features => @fingerprints[@training_compound], + :features => training_features, :activity => act } end @@ -356,55 +244,63 @@ module OpenTox end - def tanimoto - OpenTox::Algorithm.tanimoto(@compound_features,@fingerprints[@training_compound]) - end - - def weighted_tanimoto - OpenTox::Algorithm.tanimoto(@compound_features,@fingerprints[@training_compound],@p_values) - end - - def euclid - OpenTox::Algorithm.tanimoto(@compound_features,@fingerprints[@training_compound]) - end - - def weighted_euclid - OpenTox::Algorithm.tanimoto(@compound_features,@fingerprints[@training_compound],@p_values) - end - - def substructure_match - @compound.match(@features) - end - - def database_search - #TODO add features method to dataset - Dataset.new(@metadata[OT.featureDataset]).features(@compound.uri) +=begin + def cached_prediction + dataset_uri = PredictionCache.find(:model_uri => @uri, :compound_uri => @compound.uri).dataset_uri) + return false unless dataset_uri + @prediction_dataset = Dataset.find(dataset_uri) + return false unless @prediction_dataset + LOGGER.debug "Serving cached prediction" + true end +=end - def database_activity(compound_uri) - prediction = OpenTox::Dataset.new - # find database activities - if @activities[compound_uri] - @activities[compound_uri].each { |act| prediction.add compound_uri, @metadata[OT.dependentVariables], act } - prediction.add_metadata(OT.hasSource => @metadata[OT.trainingDataset]) - prediction + # Find database activities and store them in @prediction_dataset + # @return [Boolean] true if compound has databasse activities, false if not + def database_activity + if @activities[@compound.uri] + @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act } + @prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset]) + @prediction_dataset.save + true else - nil + false end end + # Save model at model service def save - RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) - end - - def self.all - RestClientWrapper.get(CONFIG[:services]["opentox-model"]).to_s.split("\n") + self.uri = RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) end + # Delete model at model service def delete RestClientWrapper.delete @uri unless @uri == CONFIG[:services]["opentox-model"] end +=begin +=end + +=begin + def self.create_from_dataset(dataset_uri,feature_dataset_uri,prediction_feature=nil) + training_activities = OpenTox::Dataset.find(dataset_uri) + training_features = OpenTox::Dataset.find(feature_dataset_uri) + unless prediction_feature # try to read prediction_feature from dataset + raise "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1 + prediction_feature = training_activities.features.keys.first + params[:prediction_feature] = prediction_feature + end + lazar = Lazar.new + training_features = OpenTox::Dataset.new(feature_dataset_uri) + case training_features.feature_type + when "classification" + lazar.similarity_algorithm = "weighted_tanimoto" + when "regression" + lazar.similarity_algorithm = "weighted_euclid" + end + end +=end + end end end diff --git a/lib/opentox-ruby-api-wrapper.rb b/lib/opentox-ruby-api-wrapper.rb index 9dc1372..9f9ff26 100644 --- a/lib/opentox-ruby-api-wrapper.rb +++ b/lib/opentox-ruby-api-wrapper.rb @@ -1,4 +1,4 @@ -['rubygems', 'sinatra', 'sinatra/url_for', 'rest_client', 'yaml', 'cgi', 'spork', 'environment'].each do |lib| +['rubygems', 'sinatra', 'sinatra/url_for', 'rest_client', 'yaml', 'cgi', 'spork', 'overwrite', 'environment'].each do |lib| require lib end @@ -8,6 +8,6 @@ rescue LoadError puts "Please install Openbabel with 'rake openbabel:install' in the compound component" end -['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','feature', 'ot-logger', 'overwrite', 'rest_client_wrapper'].each do |lib| +['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','feature', 'rest_client_wrapper'].each do |lib| require lib end diff --git a/lib/opentox.rb b/lib/opentox.rb index 3b7fa65..90683e5 100644 --- a/lib/opentox.rb +++ b/lib/opentox.rb @@ -1,7 +1,7 @@ module OpenTox attr_reader :uri - attr_accessor :metadata, :parameters + attr_accessor :metadata # Initialize OpenTox object with optional uri # @param [optional, String] URI @@ -30,11 +30,9 @@ module OpenTox @metadata end - # Load parameters from URI - #def load_parameters - #@parameters = Parser::Owl::Generic.new(@uri).parameters - #@parameters - #end + def add_metadata(metadata) + metadata.each { |k,v| @metadata[k] = v } + end # Get OWL-DL representation in RDF/XML format # @return [application/rdf+xml] RDF/XML representation diff --git a/lib/ot-logger.rb b/lib/ot-logger.rb deleted file mode 100644 index df38d77..0000000 --- a/lib/ot-logger.rb +++ /dev/null @@ -1,48 +0,0 @@ -require 'logger' -# logging -class MyLogger < Logger - - def pwd - path = Dir.pwd.to_s - index = path.rindex(/\//) - return path if index==nil - path[(index+1)..-1] - end - - def trace() - lines = caller(0) - n = 2 - line = lines[n] - - while (line =~ /spork.rb/ or line =~ /as_task/ or line =~ /ot-logger.rb/) - n += 1 - line = lines[n] - end - - index = line.rindex(/\/.*\.rb/) - return line if index==nil - line[index..-1] - end - - def format(msg) - pwd.ljust(18)+" :: "+msg.to_s+" :: "+trace+" :: "+($sinatra ? $sinatra.request.env['REMOTE_ADDR'] : nil).to_s - end - - def debug(msg) - super format(msg) - end - - def info(msg) - super format(msg) - end - - def warn(msg) - super format(msg) - end - - def error(msg) - super format(msg) - end - -end - diff --git a/lib/overwrite.rb b/lib/overwrite.rb index 2e4c396..f39fec3 100644 --- a/lib/overwrite.rb +++ b/lib/overwrite.rb @@ -34,3 +34,53 @@ class String end end end + +require 'logger' +# logging +#class Logger +class OTLogger < Logger + + def pwd + path = Dir.pwd.to_s + index = path.rindex(/\//) + return path if index==nil + path[(index+1)..-1] + end + + def trace() + lines = caller(0) + n = 2 + line = lines[n] + + while (line =~ /spork.rb/ or line =~ /create/ or line =~ /ot-logger.rb/) + n += 1 + line = lines[n] + end + + index = line.rindex(/\/.*\.rb/) + return line if index==nil + line[index..-1] + end + + def format(msg) + pwd.ljust(18)+" :: "+msg.to_s+" :: "+trace+" :: "+($sinatra ? $sinatra.request.env['REMOTE_ADDR'] : nil).to_s + end + + def debug(msg) + super format(msg) + end + + def info(msg) + super format(msg) + end + + def warn(msg) + super format(msg) + end + + def error(msg) + super format(msg) + end + +end + diff --git a/lib/parser.rb b/lib/parser.rb index 4d8e729..b727412 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -116,7 +116,7 @@ module OpenTox end end load_features - @dataset.metadata = metadata + @dataset.metadata = load_metadata @dataset end @@ -253,10 +253,8 @@ module OpenTox when OT.NumericFeature @dataset.add compound.uri, feature, value.to_f when OT.StringFeature - # TODO: insert ?? @dataset.add compound.uri, feature, value.to_s @activity_errors << smiles+", "+row.join(", ") - #return false end end end diff --git a/lib/rest_client_wrapper.rb b/lib/rest_client_wrapper.rb index 49549b5..5f5273b 100644 --- a/lib/rest_client_wrapper.rb +++ b/lib/rest_client_wrapper.rb @@ -80,6 +80,7 @@ module OpenTox raise "content-type not set" unless res.content_type res.code = result.code + # TODO: Ambit returns task representation with 200 instead of result URI return res if res.code==200 || !wait while (res.code==201 || res.code==202) @@ -108,11 +109,12 @@ module OpenTox task = nil case res.content_type - when /application\/rdf\+xml|application\/x-yaml/ - task = OpenTox::Task.from_data(res, res.content_type, res.code, base_uri) + when /application\/rdf\+xml/ + task = OpenTox::Task.from_rdfxml(res) + when /yaml/ + task = OpenTox::Task.from_yaml(res) when /text\// - raise "uri list has more than one entry, should be a task" if res.content_type=~/text\/uri-list/ and - res.split("\n").size > 1 #if uri list contains more then one uri, its not a task + raise "uri list has more than one entry, should be a task" if res.content_type=~/text\/uri-list/ and res.split("\n").size > 1 #if uri list contains more then one uri, its not a task task = OpenTox::Task.find(res.to_s) if res.to_s.uri? else raise "unknown content-type for task: '"+res.content_type.to_s+"'" #+"' content: "+res[0..200].to_s @@ -122,7 +124,7 @@ module OpenTox task.wait_for_completion raise task.description unless task.completed? # maybe task was cancelled / error - res = WrapperResult.new task.resultURI + res = WrapperResult.new task.result_uri res.code = task.http_code res.content_type = "text/uri-list" return res @@ -152,8 +154,8 @@ module OpenTox # we are either in a task, or in sinatra # PENDING: always return yaml for now - if $self_task #this global var in Task.as_task to mark that the current process is running in a task - raise error.to_yaml # the error is caught, logged, and task state is set to error in Task.as_task + if $self_task #this global var in Task.create to mark that the current process is running in a task + raise error.to_yaml # the error is caught, logged, and task state is set to error in Task.create #elsif $sinatra #else halt sinatra #$sinatra.halt(502,error.to_yaml) elsif defined?(halt) diff --git a/lib/serializer.rb b/lib/serializer.rb index 31aa0d1..9b3af39 100644 --- a/lib/serializer.rb +++ b/lib/serializer.rb @@ -25,6 +25,7 @@ module OpenTox OT.FeatureValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.Algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.Parameter => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.Task => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , OT.feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , @@ -42,6 +43,8 @@ module OpenTox OT.isA => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.Warnings => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , XSD.anyURI => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + OT.hasStatus => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + OT.resultURI => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , @@ -100,17 +103,21 @@ module OpenTox # @param [String] uri Algorithm URI def add_algorithm(uri,metadata) @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } - LOGGER.debug @object[uri] add_metadata uri, metadata - LOGGER.debug @object[uri] end # Add a model # @param [String] uri Model URI - def add_model(uri,metadata,parameters) + def add_model(uri,metadata) @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Model }] } add_metadata uri, metadata - add_parameters uri, parameters + end + + # Add a task + # @param [String] uri Model URI + def add_task(uri,metadata) + @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Task }] } + add_metadata uri, metadata end # Add metadata @@ -204,7 +211,7 @@ module OpenTox # @return [text/plain] Object OWL-DL in RDF/XML format def to_rdfxml Tempfile.open("owl-serializer"){|f| f.write(self.to_ntriples); @path = f.path} - `rapper -i ntriples -o rdfxml #{@path} 2>/dev/null` + `rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null` end # Convert to JSON as specified in http://n2.talis.com/wiki/RDF_JSON_Specification @@ -290,7 +297,11 @@ module OpenTox entries.each do |feature, values| i = features.index(feature)+1 values.each do |value| - row[i] = value #TODO overwrites duplicated values + if row[i] + row[i] = "#{row[i]} #{value}" # multiple values + else + row[i] = value + end end end @rows << row diff --git a/lib/task.rb b/lib/task.rb index 96ee719..5b2b5d9 100644 --- a/lib/task.rb +++ b/lib/task.rb @@ -2,39 +2,183 @@ $self_task=nil module OpenTox + # Class for handling asynchronous tasks class Task - attr_accessor :uri, :date, :title, :creator, :description, :hasStatus, :percentageCompleted, :resultURI, :due_to_time, :http_code - - # due_to_time is only set in local tasks - TASK_ATTRIBS = [ :uri, :date, :title, :creator, :description, :hasStatus, :percentageCompleted, :resultURI, :due_to_time ] - TASK_ATTRIBS.each{ |a| attr_accessor(a) } - attr_accessor :http_code + include OpenTox + attr_accessor :http_code, :due_to_time - private - def initialize(uri) - @uri = uri.to_s.strip + def initialize(uri=nil) + super uri + @metadata = { + DC.title => "", + DC.date => "", + OT.hasStatus => "Running", + OT.percentageCompleted => "0", + OT.resultURI => "", + DC.creator => "", # not mandatory according to API + DC.description => "", # not mandatory according to API + } end - - # create is private now, use OpenTox::Task.as_task - def self.create( params ) + + # Create a new task for the code in the block. Catches halts and exceptions and sets task state to error if necessary. The block has to return the URI of the created resource. + # @example + # task = OpenTox::Task.create do + # # this code will be executed as a task + # model = OpenTox::Algorithm.run(params) # this can be time consuming + # model.uri # Important: return URI of the created resource + # end + # task.status # returns "Running", because tasks are forked + # @param [String] title Task title + # @param [String] creator Task creator + # @return [OPenTox::Task] Task + def self.create( title=nil, creator=nil, max_duration=DEFAULT_TASK_MAX_DURATION, description=nil ) + + # measure current memory consumption + memory = `free -m|sed -n '2p'`.split + free_memory = memory[3].to_i + memory[6].to_i # include cache + if free_memory < 20 # require at least 200 M free memory + LOGGER.warn "Cannot start task - not enough memory left (#{free_memory} M free)" + raise "Insufficient memory to start a new task" + end + + cpu_load = `cat /proc/loadavg`.split(/\s+/)[0..2].collect{|c| c.to_f} + nr_cpu_cores = `cat /proc/cpuinfo |grep "cpu cores"|cut -d ":" -f2|tr -d " "`.split("\n").collect{|c| c.to_i}.inject{|sum,n| sum+n} + if cpu_load[0] > nr_cpu_cores and cpu_load[0] > cpu_load[1] and cpu_load[1] > cpu_load[2] # average CPU load of the last minute is high and CPU load is increasing + LOGGER.warn "Cannot start task - CPU load too high (#{cpu_load.join(", ")})" + raise "Server too busy to start a new task" + end + + params = {:title=>title, :creator=>creator, :max_duration=>max_duration, :description=>description } task_uri = RestClientWrapper.post(CONFIG[:services]["opentox-task"], params, nil, false).to_s - Task.find(task_uri.chomp) - end + task = Task.new(task_uri.chomp) + + task_pid = Spork.spork(:logger => LOGGER) do + LOGGER.debug "Task #{task.uri} started #{Time.now}" + $self_task = task + + begin + result = catch(:halt) do + yield task + end + # catching halt, set task state to error + if result && result.is_a?(Array) && result.size==2 && result[0]>202 + LOGGER.error "task was halted: "+result.inspect + task.error(result[1]) + return + end + LOGGER.debug "Task #{task.uri} done #{Time.now} -> "+result.to_s + task.completed(result) + rescue => ex + LOGGER.error "task failed: "+ex.message + LOGGER.error ": "+ex.backtrace.join("\n") + task.error(ex.message) + end + end + task.pid = task_pid + LOGGER.debug "Started task: "+task.uri.to_s + task + end - public - def self.find( uri, accept_header=nil ) + # Find a task for querying, status changes + # @param [String] uri Task URI + # @return [OpenTox::Task] Task object + def self.find(uri) task = Task.new(uri) - task.reload( accept_header ) - return task + task.load_metadata + task + end + + # Get a list of all tasks + # @param [optional, String] uri URI of task service + # @return [text/uri-list] Task URIs + def self.all(uri=CONFIG[:services]["opentox-task"]) + OpenTox.all uri + end + + def self.from_yaml(yaml) + @metadata = YAML.load(yaml) + end + + def self.from_rdfxml(rdfxml) + file = Tempfile.open("ot-rdfxml"){|f| f.write(rdfxml)}.path + parser = Parser::Owl::Generic.new file + @metadata = parser.load_metadata + end + + def to_rdfxml + s = Serializer::Owl.new + s.add_task(@uri,@metadata) + s.to_rdfxml + end + + def status + @metadata[OT.hasStatus] + end + + def result_uri + @metadata[OT.resultURI] + end + + def description + @metadata[DC.description] + end + + def cancel + RestClientWrapper.put(File.join(@uri,'Cancelled')) + load_metadata + end + + def completed(uri) + RestClientWrapper.put(File.join(@uri,'Completed'),{:resultURI => uri}) + load_metadata + end + + def error(description) + RestClientWrapper.put(File.join(@uri,'Error'),{:description => description.to_s[0..2000]}) + load_metadata + end + + def pid=(pid) + RestClientWrapper.put(File.join(@uri,'pid'), {:pid => pid}) + end + + def running? + @metadata[OT.hasStatus] == 'Running' + end + + def completed? + @metadata[OT.hasStatus] == 'Completed' + end + + def error? + @metadata[OT.hasStatus] == 'Error' + end + + def load_metadata + if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host)) + result = RestClientWrapper.get(@uri, {:accept => 'application/x-yaml'}, false) + @metadata = YAML.load result.to_s + @http_code = result.code + else + @metadata = Parser::Owl::Generic.new(@uri).load_metadata + @http_code = RestClientWrapper.get(uri, {:accept => 'application/rdf+xml'}, false).code + end end + # create is private now, use OpenTox::Task.as_task + #def self.create( params ) + #task_uri = RestClientWrapper.post(CONFIG[:services]["opentox-task"], params, nil, false).to_s + #Task.find(task_uri.chomp) + #end + +=begin def self.from_data(data, content_type, code, base_uri) task = Task.new(nil) task.http_code = code task.reload_from_data(data, content_type, base_uri) return task end - + def reload( accept_header=nil ) unless accept_header if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host)) @@ -65,113 +209,45 @@ module OpenTox end raise "uri is null after loading" unless @uri and @uri.to_s.strip.size>0 end - - def cancel - RestClientWrapper.put(File.join(@uri,'Cancelled')) - reload - end - - def completed(uri) - RestClientWrapper.put(File.join(@uri,'Completed'),{:resultURI => uri}) - reload - end - - def error(description) - RestClientWrapper.put(File.join(@uri,'Error'),{:description => description.to_s[0..2000]}) - reload - end - - def pid=(pid) - RestClientWrapper.put(File.join(@uri,'pid'), {:pid => pid}) - end - - def running? - @hasStatus.to_s == 'Running' - end - - def completed? - @hasStatus.to_s == 'Completed' - end - - def error? - @hasStatus.to_s == 'Error' - end +=end # waits for a task, unless time exceeds or state is no longer running def wait_for_completion(dur=0.3) - if (@uri.match(CONFIG[:services]["opentox-task"])) - due_to_time = (@due_to_time.is_a?(Time) ? @due_to_time : Time.parse(@due_to_time)) - running_time = due_to_time - (@date.is_a?(Time) ? @date : Time.parse(@date)) - else - # the date of the external task cannot be trusted, offest to local time might be to big - due_to_time = Time.new + EXTERNAL_TASK_MAX_DURATION - running_time = EXTERNAL_TASK_MAX_DURATION - end + due_to_time = Time.new + DEFAULT_TASK_MAX_DURATION LOGGER.debug "start waiting for task "+@uri.to_s+" at: "+Time.new.to_s+", waiting at least until "+due_to_time.to_s + load_metadata # for extremely fast tasks + check_state while self.running? sleep dur - reload + load_metadata check_state if (Time.new > due_to_time) - raise "max wait time exceeded ("+running_time.to_s+"sec), task: '"+@uri.to_s+"'" + raise "max wait time exceeded ("+DEFAULT_TASK_MAX_DURATION.to_s+"sec), task: '"+@uri.to_s+"'" end end - LOGGER.debug "Task '"+@hasStatus+"': "+@uri.to_s+", Result: "+@resultURI.to_s + LOGGER.debug "Task '"+@metadata[OT.hasStatus]+"': "+@uri.to_s+", Result: "+@metadata[OT.resultURI].to_s end + private def check_state begin - raise "illegal task state, task is completed, resultURI is no URI: '"+@resultURI.to_s+ - "'" unless @resultURI and @resultURI.to_s.uri? if completed? + raise "illegal task state, task is completed, resultURI is no URI: '"+@metadata[OT.resultURI].to_s+ + "'" unless @metadata[OT.resultURI] and @metadata[OT.resultURI].to_s.uri? if completed? if @http_code == 202 - raise "illegal task state, code is 202, but hasStatus is not Running: '"+@hasStatus+"'" unless running? + raise "illegal task state, code is 202, but hasStatus is not Running: '"+@metadata[OT.hasStatus]+"'" unless running? elsif @http_code == 201 - raise "illegal task state, code is 201, but hasStatus is not Completed: '"+@hasStatus+"'" unless completed? - raise "illegal task state, code is 201, resultURI is no task-URI: '"+@resultURI.to_s+ - "'" unless @resultURI and @resultURI.to_s.uri? + raise "illegal task state, code is 201, but hasStatus is not Completed: '"+@metadata[OT.hasStatus]+"'" unless completed? + raise "illegal task state, code is 201, resultURI is no task-URI: '"+@metadata[OT.resultURI].to_s+ + "'" unless @metadata[OT.resultURI] and @metadata[OT.resultURI].to_s.uri? end rescue => ex RestClientWrapper.raise_uri_error(ex.message, @uri) end end - - # returns the task uri - # catches halts and exceptions, task state is set to error then - def self.as_task( title, creator, max_duration=DEFAULT_TASK_MAX_DURATION, description=nil ) - #return yield nil - - params = {:title=>title, :creator=>creator, :max_duration=>max_duration, :description=>description } - task = ::OpenTox::Task.create(params) - task_pid = Spork.spork(:logger => LOGGER) do - LOGGER.debug "Task #{task.uri} started #{Time.now}" - $self_task = task - - begin - result = catch(:halt) do - yield task - end - # catching halt, set task state to error - if result && result.is_a?(Array) && result.size==2 && result[0]>202 - LOGGER.error "task was halted: "+result.inspect - task.error(result[1]) - return - end - LOGGER.debug "Task #{task.uri} done #{Time.now} -> "+result.to_s - task.completed(result) - rescue => ex - LOGGER.error "task failed: "+ex.message - LOGGER.error ": "+ex.backtrace.join("\n") - task.error(ex.message) - end - end - task.pid = task_pid - LOGGER.debug "Started task: "+task.uri.to_s - task.uri - end end diff --git a/lib/validation.rb b/lib/validation.rb index 340332a..76c4529 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -1,20 +1,70 @@ module OpenTox class Validation + include OpenTox - attr_accessor :uri - - def initialize(params) - @uri = OpenTox::RestClientWrapper.post(File.join(CONFIG[:services]["opentox-validation"],"/crossvalidation"),params,nil,false) - end + attr_accessor :report_uri, :qmrf_report_uri - def self.crossvalidation(params) + def self.create_crossvalidation(params) params[:uri] = File.join(CONFIG[:services]['opentox-validation'], "crossvalidation") params[:num_folds] = 10 unless params[:num_folds] params[:random_seed] = 2 unless params[:random_seed] params[:stratified] = false unless params[:stratified] - OpenTox::Validation.new(params) + uri = OpenTox::RestClientWrapper.post(File.join(CONFIG[:services]["opentox-validation"],"/crossvalidation"),params,nil,false) + OpenTox::Validation.new(uri) end + def create_report + @report_uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-validation"],"/report/crossvalidation"), :validation_uris => @uri).to_s + @report_uri + end + + def create_qmrf_report + @qmrf_report_uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-validation"],"/reach_report/qmrf"), :model_uri => @uri).to_s + @qmrf_report_uri + end + + def summary(type) + v = YAML.load RestClientWrappper.get(File.join(@uri, 'statistics'),:accept => "application/x-yaml").to_s + + case type + when "classification" + tp=0; tn=0; fp=0; fn=0; n=0 + v[:classification_statistics][:confusion_matrix][:confusion_matrix_cell].each do |cell| + if cell[:confusion_matrix_predicted] == "true" and cell[:confusion_matrix_actual] == "true" + tp = cell[:confusion_matrix_value] + n += tp + elsif cell[:confusion_matrix_predicted] == "false" and cell[:confusion_matrix_actual] == "false" + tn = cell[:confusion_matrix_value] + n += tn + elsif cell[:confusion_matrix_predicted] == "false" and cell[:confusion_matrix_actual] == "true" + fn = cell[:confusion_matrix_value] + n += fn + elsif cell[:confusion_matrix_predicted] == "true" and cell[:confusion_matrix_actual] == "false" + fp = cell[:confusion_matrix_value] + n += fp + end + end + { + :nr_predictions => n, + :true_positives => tp, + :false_positives => fp, + :true_negatives => tn, + :false_negatives => fn, + :correct_predictions => 100*(tp+tn).to_f/n, + :weighted_area_under_roc => v[:classification_statistics][:weighted_area_under_roc].to_f, + :sensitivity => tp.to_f/(tp+fn), + :specificity => tn.to_f/(tn+fp), + } + when "regression" + { + :nr_predictions => v[:num_instances] - v[:num_unpredicted], + :r_square => v[:regression_statistics][:r_square], + :root_mean_squared_error => v[:regression_statistics][:root_mean_squared_error], + :mean_absolute_error => v[:regression_statistics][:mean_absolute_error], + } + end + end + end end -- cgit v1.2.3