From 7067bd44d5c97618ec6a968bbdfe6d6bda12a1cd Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 24 Nov 2010 13:13:40 +0100 Subject: opentox-ruby-api-wrapper renamed to opentox-ruby --- Rakefile | 64 ++++++++++++++++++++--------------------- bin/yaml2owl.rb | 18 ------------ lib/dataset.rb | 17 ++++++++++- lib/model.rb | 63 +++++++++------------------------------- lib/opentox-ruby-api-wrapper.rb | 13 --------- lib/opentox-ruby.rb | 13 +++++++++ lib/serializer.rb | 10 ++++--- lib/task.rb | 15 ++++++---- 8 files changed, 91 insertions(+), 122 deletions(-) delete mode 100755 bin/yaml2owl.rb delete mode 100644 lib/opentox-ruby-api-wrapper.rb create mode 100644 lib/opentox-ruby.rb diff --git a/Rakefile b/Rakefile index 6838e75..53c6ae9 100644 --- a/Rakefile +++ b/Rakefile @@ -4,45 +4,45 @@ require 'rake' begin require 'jeweler' Jeweler::Tasks.new do |gem| - gem.name = "opentox-ruby-api-wrapper" + gem.name = "opentox-ruby" gem.summary = %Q{Ruby wrapper for the OpenTox REST API} gem.description = %Q{Ruby wrapper for the OpenTox REST API (http://www.opentox.org)} gem.email = "helma@in-silico.ch" gem.homepage = "http://github.com/helma/opentox-ruby-api-wrapper" - gem.authors = ["Christoph Helma, Martin Guetlein"] - # dependencies - [ "sinatra", - "emk-sinatra-url-for", - "sinatra-respond_to", - "sinatra-static-assets", - "rest-client", - "rack", - "rack-contrib", - "rack-flash", - "nokogiri", - "rubyzip", - "roo", - "spreadsheet", - "google-spreadsheet-ruby", - "tmail", - "rinruby", - "rjb" - ].each { |dep| gem.add_dependency dep } - [ "dm-core", - 'dm-serializer', - 'dm-timestamps', - 'dm-types', - 'dm-migrations', - "dm-mysql-adapter", + gem.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler"] + # dependencies + [ "sinatra", + "emk-sinatra-url-for", + "sinatra-respond_to", + "sinatra-static-assets", + "rest-client", + "rack", + "rack-contrib", + "rack-flash", + "nokogiri", + "rubyzip", + "roo", + "spreadsheet", + "google-spreadsheet-ruby", + "tmail", + "rinruby", + "rjb" + ].each { |dep| gem.add_dependency dep } + [ "dm-core", + 'dm-serializer', + 'dm-timestamps', + 'dm-types', + 'dm-migrations', + "dm-mysql-adapter", "dm-validations", - ].each {|dep| gem.add_dependency dep, ">= 1" } - gem.add_dependency "haml", ">=3" - ['cucumber','jeweler'].each { |dep| gem.add_development_dependency dep } - gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore'] - gem.files.include %w(lib/tasks/owl.rb, lib/environment.rb, lib/algorithm.rb, lib/compound.rb, lib/dataset.rb, lib/model.rb, lib/validation.rb, lib/templates/*) + ].each {|dep| gem.add_dependency dep, ">= 1" } + gem.add_dependency "haml", ">=3" + ['jeweler'].each { |dep| gem.add_development_dependency dep } + gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore'] + #gem.files.include %w(lib/environment.rb, lib/algorithm.rb, lib/compound.rb, lib/dataset.rb, lib/model.rb, lib/validation.rb, lib/templates/*) # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings end - Jeweler::GemcutterTasks.new + Jeweler::GemcutterTasks.new rescue LoadError puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler" end diff --git a/bin/yaml2owl.rb b/bin/yaml2owl.rb deleted file mode 100755 index 1002912..0000000 --- a/bin/yaml2owl.rb +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env ruby -require 'rubygems' -require 'opentox-ruby-api-wrapper' - -input = YAML.load_file(ARGV[0]) -dataset = OpenTox::Dataset.new -dataset.title = input[:title] -dataset.creator = input[:source] -input[:data].each do |c,f| - f.each do |k,v| - v.each do |value| - dataset.add c,k,value - end - end -end -outfile = File.expand_path(File.join(File.dirname(__FILE__),ARGV[0].sub(/yaml/,'owl'))) -dataset.uri = outfile -File.open(outfile,'w+'){|f| f.puts dataset.rdf} diff --git a/lib/dataset.rb b/lib/dataset.rb index 4737ea1..c5704ae 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -32,6 +32,21 @@ module OpenTox dataset end + # Create dataset from CSV file (format specification: http://toxcreate.org/help) + # - loads data_entries, compounds, features + # - sets metadata (warnings) for parser errors + # - you will have to set remaining metadata manually + # @param [String] file CSV file path + # @return [OpenTox::Dataset] Dataset object with CSV data + def self.create_from_csv_file(file) + dataset = Dataset.create + parser = Parser::Spreadsheets.new + parser.dataset = dataset + parser.load_csv(File.open(file).read) + dataset.save + dataset + end + # Find a dataset and load all data. This can be time consuming, use Dataset.new together with one of the load_* methods for a fine grained control over data loading. # @param [String] uri Dataset URI # @return [OpenTox::Dataset] Dataset object with all data @@ -299,7 +314,7 @@ module OpenTox def measured_activities(compound) source = @metadata[OT.hasSource] - @data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact + @data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact.flatten end def neighbors(compound) diff --git a/lib/model.rb b/lib/model.rb index c6a2cf4..5654bcc 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -80,21 +80,16 @@ module OpenTox OpenTox::Model::Lazar.find(model_uri) end -=begin - # Create a new lazar model and return task - # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar) - # @return [OpenTox::Task] Task for lazar model creation - def self.create_task(params) - task_uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), {}, params, false) - Task.find(task_uri) - #model_uri = lazar_algorithm.run(params) - #OpenTox::Model::Lazar.new(model_uri) - end -=end + # Get a parameter value + # @param [String] param Parameter name + # @return [String] Parameter value def parameter(param) @metadata[OT.parameters].collect{|p| p[OT.paramValue] if p[DC.title] == param}.compact.first end + # Predict a dataset + # @param [String] dataset_uri Dataset URI + # @return [OpenTox::Dataset] Dataset with predictions def predict_dataset(dataset_uri) @prediction_dataset = Dataset.create @prediction_dataset.add_metadata({ @@ -145,6 +140,7 @@ module OpenTox if @neighbors.size == 0 @prediction_dataset.add_feature(prediction_feature_uri, { + OT.isA => OT.MeasuredFeature, OT.hasSource => @uri, DC.creator => @uri, DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), @@ -155,6 +151,7 @@ module OpenTox else @prediction_dataset.add_feature(prediction_feature_uri, { + OT.isA => OT.ModelPrediction, OT.hasSource => @uri, DC.creator => @uri, DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), @@ -171,8 +168,9 @@ module OpenTox feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) features[feature] = feature_uri @prediction_dataset.add_feature(feature_uri, { + OT.isA => OT.Substructure, OT.smarts => feature, - OT.p_value => @p_values[feature], + OT.pValue => @p_values[feature], OT.effect => @effects[feature] }) @prediction_dataset.add @compound.uri, feature_uri, true @@ -190,7 +188,8 @@ module OpenTox @prediction_dataset.add_feature(neighbor_uri, { OT.compound => neighbor[:compound], OT.similarity => neighbor[:similarity], - OT.activity => neighbor[:activity] + OT.measuredActivity => neighbor[:activity], + OT.isA => OT.Neighbor }) @prediction_dataset.add @compound.uri, neighbor_uri, true f = 0 unless f @@ -204,8 +203,9 @@ module OpenTox unless features.has_key? feature features[feature] = feature_uri @prediction_dataset.add_feature(feature_uri, { + OT.isA => OT.Substructure, OT.smarts => feature, - OT.p_value => @p_values[feature], + OT.pValue => @p_values[feature], OT.effect => @effects[feature] }) f+=1 @@ -228,7 +228,6 @@ module OpenTox @neighbors = [] @fingerprints.each do |training_compound,training_features| - #@activities.each do |training_compound,activities| sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") if sim > @min_sim @activities[training_compound].each do |act| @@ -244,17 +243,6 @@ module OpenTox end -=begin - def cached_prediction - dataset_uri = PredictionCache.find(:model_uri => @uri, :compound_uri => @compound.uri).dataset_uri) - return false unless dataset_uri - @prediction_dataset = Dataset.find(dataset_uri) - return false unless @prediction_dataset - LOGGER.debug "Serving cached prediction" - true - end -=end - # Find database activities and store them in @prediction_dataset # @return [Boolean] true if compound has databasse activities, false if not def database_activity @@ -278,29 +266,6 @@ module OpenTox RestClientWrapper.delete @uri unless @uri == CONFIG[:services]["opentox-model"] end -=begin -=end - -=begin - def self.create_from_dataset(dataset_uri,feature_dataset_uri,prediction_feature=nil) - training_activities = OpenTox::Dataset.find(dataset_uri) - training_features = OpenTox::Dataset.find(feature_dataset_uri) - unless prediction_feature # try to read prediction_feature from dataset - raise "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1 - prediction_feature = training_activities.features.keys.first - params[:prediction_feature] = prediction_feature - end - lazar = Lazar.new - training_features = OpenTox::Dataset.new(feature_dataset_uri) - case training_features.feature_type - when "classification" - lazar.similarity_algorithm = "weighted_tanimoto" - when "regression" - lazar.similarity_algorithm = "weighted_euclid" - end - end -=end - end end end diff --git a/lib/opentox-ruby-api-wrapper.rb b/lib/opentox-ruby-api-wrapper.rb deleted file mode 100644 index 9f9ff26..0000000 --- a/lib/opentox-ruby-api-wrapper.rb +++ /dev/null @@ -1,13 +0,0 @@ -['rubygems', 'sinatra', 'sinatra/url_for', 'rest_client', 'yaml', 'cgi', 'spork', 'overwrite', 'environment'].each do |lib| - require lib -end - -begin - require 'openbabel' -rescue LoadError - puts "Please install Openbabel with 'rake openbabel:install' in the compound component" -end - -['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','feature', 'rest_client_wrapper'].each do |lib| - require lib -end diff --git a/lib/opentox-ruby.rb b/lib/opentox-ruby.rb new file mode 100644 index 0000000..9f9ff26 --- /dev/null +++ b/lib/opentox-ruby.rb @@ -0,0 +1,13 @@ +['rubygems', 'sinatra', 'sinatra/url_for', 'rest_client', 'yaml', 'cgi', 'spork', 'overwrite', 'environment'].each do |lib| + require lib +end + +begin + require 'openbabel' +rescue LoadError + puts "Please install Openbabel with 'rake openbabel:install' in the compound component" +end + +['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','feature', 'rest_client_wrapper'].each do |lib| + require lib +end diff --git a/lib/serializer.rb b/lib/serializer.rb index 9b3af39..495702a 100644 --- a/lib/serializer.rb +++ b/lib/serializer.rb @@ -40,11 +40,13 @@ module OpenTox DC.contributor => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , DC.creator => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , DC.description => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + DC.date => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.isA => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.Warnings => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , XSD.anyURI => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.hasStatus => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.resultURI => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + OT.percentageCompleted => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , @@ -125,10 +127,7 @@ module OpenTox def add_metadata(uri,metadata) id = 0 metadata.each do |u,v| - if v.is_a? String - @object[uri] = {} unless @object[uri] - @object[uri][u] = [{"type" => type(v), "value" => v }] - elsif v.is_a? Array and u == OT.parameters + if v.is_a? Array and u == OT.parameters @object[uri][u] = [] unless @object[uri][u] v.each do |value| id+=1 @@ -139,6 +138,9 @@ module OpenTox @object[genid][name] = [{"type" => type(entry), "value" => entry }] end end + else # v.is_a? String + @object[uri] = {} unless @object[uri] + @object[uri][u] = [{"type" => type(v), "value" => v }] end end end diff --git a/lib/task.rb b/lib/task.rb index 5b2b5d9..5b59395 100644 --- a/lib/task.rb +++ b/lib/task.rb @@ -33,24 +33,29 @@ module OpenTox # @return [OPenTox::Task] Task def self.create( title=nil, creator=nil, max_duration=DEFAULT_TASK_MAX_DURATION, description=nil ) + params = {:title=>title, :creator=>creator, :max_duration=>max_duration, :description=>description } + task_uri = RestClientWrapper.post(CONFIG[:services]["opentox-task"], params, nil, false).to_s + task = Task.new(task_uri.chomp) + # measure current memory consumption memory = `free -m|sed -n '2p'`.split free_memory = memory[3].to_i + memory[6].to_i # include cache if free_memory < 20 # require at least 200 M free memory LOGGER.warn "Cannot start task - not enough memory left (#{free_memory} M free)" - raise "Insufficient memory to start a new task" + task.cancel + return task + #raise "Insufficient memory to start a new task" end cpu_load = `cat /proc/loadavg`.split(/\s+/)[0..2].collect{|c| c.to_f} nr_cpu_cores = `cat /proc/cpuinfo |grep "cpu cores"|cut -d ":" -f2|tr -d " "`.split("\n").collect{|c| c.to_i}.inject{|sum,n| sum+n} if cpu_load[0] > nr_cpu_cores and cpu_load[0] > cpu_load[1] and cpu_load[1] > cpu_load[2] # average CPU load of the last minute is high and CPU load is increasing LOGGER.warn "Cannot start task - CPU load too high (#{cpu_load.join(", ")})" - raise "Server too busy to start a new task" + task.cancel + return task + #raise "Server too busy to start a new task" end - params = {:title=>title, :creator=>creator, :max_duration=>max_duration, :description=>description } - task_uri = RestClientWrapper.post(CONFIG[:services]["opentox-task"], params, nil, false).to_s - task = Task.new(task_uri.chomp) task_pid = Spork.spork(:logger => LOGGER) do LOGGER.debug "Task #{task.uri} started #{Time.now}" -- cgit v1.2.3