summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Rakefile64
-rwxr-xr-xbin/yaml2owl.rb18
-rw-r--r--lib/dataset.rb17
-rw-r--r--lib/model.rb63
-rw-r--r--lib/opentox-ruby.rb (renamed from lib/opentox-ruby-api-wrapper.rb)0
-rw-r--r--lib/serializer.rb10
-rw-r--r--lib/task.rb15
7 files changed, 78 insertions, 109 deletions
diff --git a/Rakefile b/Rakefile
index 6838e75..53c6ae9 100644
--- a/Rakefile
+++ b/Rakefile
@@ -4,45 +4,45 @@ require 'rake'
begin
require 'jeweler'
Jeweler::Tasks.new do |gem|
- gem.name = "opentox-ruby-api-wrapper"
+ gem.name = "opentox-ruby"
gem.summary = %Q{Ruby wrapper for the OpenTox REST API}
gem.description = %Q{Ruby wrapper for the OpenTox REST API (http://www.opentox.org)}
gem.email = "helma@in-silico.ch"
gem.homepage = "http://github.com/helma/opentox-ruby-api-wrapper"
- gem.authors = ["Christoph Helma, Martin Guetlein"]
- # dependencies
- [ "sinatra",
- "emk-sinatra-url-for",
- "sinatra-respond_to",
- "sinatra-static-assets",
- "rest-client",
- "rack",
- "rack-contrib",
- "rack-flash",
- "nokogiri",
- "rubyzip",
- "roo",
- "spreadsheet",
- "google-spreadsheet-ruby",
- "tmail",
- "rinruby",
- "rjb"
- ].each { |dep| gem.add_dependency dep }
- [ "dm-core",
- 'dm-serializer',
- 'dm-timestamps',
- 'dm-types',
- 'dm-migrations',
- "dm-mysql-adapter",
+ gem.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler"]
+ # dependencies
+ [ "sinatra",
+ "emk-sinatra-url-for",
+ "sinatra-respond_to",
+ "sinatra-static-assets",
+ "rest-client",
+ "rack",
+ "rack-contrib",
+ "rack-flash",
+ "nokogiri",
+ "rubyzip",
+ "roo",
+ "spreadsheet",
+ "google-spreadsheet-ruby",
+ "tmail",
+ "rinruby",
+ "rjb"
+ ].each { |dep| gem.add_dependency dep }
+ [ "dm-core",
+ 'dm-serializer',
+ 'dm-timestamps',
+ 'dm-types',
+ 'dm-migrations',
+ "dm-mysql-adapter",
"dm-validations",
- ].each {|dep| gem.add_dependency dep, ">= 1" }
- gem.add_dependency "haml", ">=3"
- ['cucumber','jeweler'].each { |dep| gem.add_development_dependency dep }
- gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore']
- gem.files.include %w(lib/tasks/owl.rb, lib/environment.rb, lib/algorithm.rb, lib/compound.rb, lib/dataset.rb, lib/model.rb, lib/validation.rb, lib/templates/*)
+ ].each {|dep| gem.add_dependency dep, ">= 1" }
+ gem.add_dependency "haml", ">=3"
+ ['jeweler'].each { |dep| gem.add_development_dependency dep }
+ gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore']
+ #gem.files.include %w(lib/environment.rb, lib/algorithm.rb, lib/compound.rb, lib/dataset.rb, lib/model.rb, lib/validation.rb, lib/templates/*)
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
end
- Jeweler::GemcutterTasks.new
+ Jeweler::GemcutterTasks.new
rescue LoadError
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
end
diff --git a/bin/yaml2owl.rb b/bin/yaml2owl.rb
deleted file mode 100755
index 1002912..0000000
--- a/bin/yaml2owl.rb
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/env ruby
-require 'rubygems'
-require 'opentox-ruby-api-wrapper'
-
-input = YAML.load_file(ARGV[0])
-dataset = OpenTox::Dataset.new
-dataset.title = input[:title]
-dataset.creator = input[:source]
-input[:data].each do |c,f|
- f.each do |k,v|
- v.each do |value|
- dataset.add c,k,value
- end
- end
-end
-outfile = File.expand_path(File.join(File.dirname(__FILE__),ARGV[0].sub(/yaml/,'owl')))
-dataset.uri = outfile
-File.open(outfile,'w+'){|f| f.puts dataset.rdf}
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 4737ea1..c5704ae 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -32,6 +32,21 @@ module OpenTox
dataset
end
+ # Create dataset from CSV file (format specification: http://toxcreate.org/help)
+ # - loads data_entries, compounds, features
+ # - sets metadata (warnings) for parser errors
+ # - you will have to set remaining metadata manually
+ # @param [String] file CSV file path
+ # @return [OpenTox::Dataset] Dataset object with CSV data
+ def self.create_from_csv_file(file)
+ dataset = Dataset.create
+ parser = Parser::Spreadsheets.new
+ parser.dataset = dataset
+ parser.load_csv(File.open(file).read)
+ dataset.save
+ dataset
+ end
+
# Find a dataset and load all data. This can be time consuming, use Dataset.new together with one of the load_* methods for a fine grained control over data loading.
# @param [String] uri Dataset URI
# @return [OpenTox::Dataset] Dataset object with all data
@@ -299,7 +314,7 @@ module OpenTox
def measured_activities(compound)
source = @metadata[OT.hasSource]
- @data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact
+ @data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact.flatten
end
def neighbors(compound)
diff --git a/lib/model.rb b/lib/model.rb
index c6a2cf4..5654bcc 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -80,21 +80,16 @@ module OpenTox
OpenTox::Model::Lazar.find(model_uri)
end
-=begin
- # Create a new lazar model and return task
- # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar)
- # @return [OpenTox::Task] Task for lazar model creation
- def self.create_task(params)
- task_uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), {}, params, false)
- Task.find(task_uri)
- #model_uri = lazar_algorithm.run(params)
- #OpenTox::Model::Lazar.new(model_uri)
- end
-=end
+ # Get a parameter value
+ # @param [String] param Parameter name
+ # @return [String] Parameter value
def parameter(param)
@metadata[OT.parameters].collect{|p| p[OT.paramValue] if p[DC.title] == param}.compact.first
end
+ # Predict a dataset
+ # @param [String] dataset_uri Dataset URI
+ # @return [OpenTox::Dataset] Dataset with predictions
def predict_dataset(dataset_uri)
@prediction_dataset = Dataset.create
@prediction_dataset.add_metadata({
@@ -145,6 +140,7 @@ module OpenTox
if @neighbors.size == 0
@prediction_dataset.add_feature(prediction_feature_uri, {
+ OT.isA => OT.MeasuredFeature,
OT.hasSource => @uri,
DC.creator => @uri,
DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
@@ -155,6 +151,7 @@ module OpenTox
else
@prediction_dataset.add_feature(prediction_feature_uri, {
+ OT.isA => OT.ModelPrediction,
OT.hasSource => @uri,
DC.creator => @uri,
DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
@@ -171,8 +168,9 @@ module OpenTox
feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s)
features[feature] = feature_uri
@prediction_dataset.add_feature(feature_uri, {
+ OT.isA => OT.Substructure,
OT.smarts => feature,
- OT.p_value => @p_values[feature],
+ OT.pValue => @p_values[feature],
OT.effect => @effects[feature]
})
@prediction_dataset.add @compound.uri, feature_uri, true
@@ -190,7 +188,8 @@ module OpenTox
@prediction_dataset.add_feature(neighbor_uri, {
OT.compound => neighbor[:compound],
OT.similarity => neighbor[:similarity],
- OT.activity => neighbor[:activity]
+ OT.measuredActivity => neighbor[:activity],
+ OT.isA => OT.Neighbor
})
@prediction_dataset.add @compound.uri, neighbor_uri, true
f = 0 unless f
@@ -204,8 +203,9 @@ module OpenTox
unless features.has_key? feature
features[feature] = feature_uri
@prediction_dataset.add_feature(feature_uri, {
+ OT.isA => OT.Substructure,
OT.smarts => feature,
- OT.p_value => @p_values[feature],
+ OT.pValue => @p_values[feature],
OT.effect => @effects[feature]
})
f+=1
@@ -228,7 +228,6 @@ module OpenTox
@neighbors = []
@fingerprints.each do |training_compound,training_features|
- #@activities.each do |training_compound,activities|
sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
if sim > @min_sim
@activities[training_compound].each do |act|
@@ -244,17 +243,6 @@ module OpenTox
end
-=begin
- def cached_prediction
- dataset_uri = PredictionCache.find(:model_uri => @uri, :compound_uri => @compound.uri).dataset_uri)
- return false unless dataset_uri
- @prediction_dataset = Dataset.find(dataset_uri)
- return false unless @prediction_dataset
- LOGGER.debug "Serving cached prediction"
- true
- end
-=end
-
# Find database activities and store them in @prediction_dataset
# @return [Boolean] true if compound has databasse activities, false if not
def database_activity
@@ -278,29 +266,6 @@ module OpenTox
RestClientWrapper.delete @uri unless @uri == CONFIG[:services]["opentox-model"]
end
-=begin
-=end
-
-=begin
- def self.create_from_dataset(dataset_uri,feature_dataset_uri,prediction_feature=nil)
- training_activities = OpenTox::Dataset.find(dataset_uri)
- training_features = OpenTox::Dataset.find(feature_dataset_uri)
- unless prediction_feature # try to read prediction_feature from dataset
- raise "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
- prediction_feature = training_activities.features.keys.first
- params[:prediction_feature] = prediction_feature
- end
- lazar = Lazar.new
- training_features = OpenTox::Dataset.new(feature_dataset_uri)
- case training_features.feature_type
- when "classification"
- lazar.similarity_algorithm = "weighted_tanimoto"
- when "regression"
- lazar.similarity_algorithm = "weighted_euclid"
- end
- end
-=end
-
end
end
end
diff --git a/lib/opentox-ruby-api-wrapper.rb b/lib/opentox-ruby.rb
index 9f9ff26..9f9ff26 100644
--- a/lib/opentox-ruby-api-wrapper.rb
+++ b/lib/opentox-ruby.rb
diff --git a/lib/serializer.rb b/lib/serializer.rb
index 9b3af39..495702a 100644
--- a/lib/serializer.rb
+++ b/lib/serializer.rb
@@ -40,11 +40,13 @@ module OpenTox
DC.contributor => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
DC.creator => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
DC.description => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
+ DC.date => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.isA => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.Warnings => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
XSD.anyURI => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.hasStatus => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.resultURI => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
+ OT.percentageCompleted => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
@@ -125,10 +127,7 @@ module OpenTox
def add_metadata(uri,metadata)
id = 0
metadata.each do |u,v|
- if v.is_a? String
- @object[uri] = {} unless @object[uri]
- @object[uri][u] = [{"type" => type(v), "value" => v }]
- elsif v.is_a? Array and u == OT.parameters
+ if v.is_a? Array and u == OT.parameters
@object[uri][u] = [] unless @object[uri][u]
v.each do |value|
id+=1
@@ -139,6 +138,9 @@ module OpenTox
@object[genid][name] = [{"type" => type(entry), "value" => entry }]
end
end
+ else # v.is_a? String
+ @object[uri] = {} unless @object[uri]
+ @object[uri][u] = [{"type" => type(v), "value" => v }]
end
end
end
diff --git a/lib/task.rb b/lib/task.rb
index 5b2b5d9..5b59395 100644
--- a/lib/task.rb
+++ b/lib/task.rb
@@ -33,24 +33,29 @@ module OpenTox
# @return [OPenTox::Task] Task
def self.create( title=nil, creator=nil, max_duration=DEFAULT_TASK_MAX_DURATION, description=nil )
+ params = {:title=>title, :creator=>creator, :max_duration=>max_duration, :description=>description }
+ task_uri = RestClientWrapper.post(CONFIG[:services]["opentox-task"], params, nil, false).to_s
+ task = Task.new(task_uri.chomp)
+
# measure current memory consumption
memory = `free -m|sed -n '2p'`.split
free_memory = memory[3].to_i + memory[6].to_i # include cache
if free_memory < 20 # require at least 200 M free memory
LOGGER.warn "Cannot start task - not enough memory left (#{free_memory} M free)"
- raise "Insufficient memory to start a new task"
+ task.cancel
+ return task
+ #raise "Insufficient memory to start a new task"
end
cpu_load = `cat /proc/loadavg`.split(/\s+/)[0..2].collect{|c| c.to_f}
nr_cpu_cores = `cat /proc/cpuinfo |grep "cpu cores"|cut -d ":" -f2|tr -d " "`.split("\n").collect{|c| c.to_i}.inject{|sum,n| sum+n}
if cpu_load[0] > nr_cpu_cores and cpu_load[0] > cpu_load[1] and cpu_load[1] > cpu_load[2] # average CPU load of the last minute is high and CPU load is increasing
LOGGER.warn "Cannot start task - CPU load too high (#{cpu_load.join(", ")})"
- raise "Server too busy to start a new task"
+ task.cancel
+ return task
+ #raise "Server too busy to start a new task"
end
- params = {:title=>title, :creator=>creator, :max_duration=>max_duration, :description=>description }
- task_uri = RestClientWrapper.post(CONFIG[:services]["opentox-task"], params, nil, false).to_s
- task = Task.new(task_uri.chomp)
task_pid = Spork.spork(:logger => LOGGER) do
LOGGER.debug "Task #{task.uri} started #{Time.now}"