summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2011-03-09 13:54:34 +0100
committerChristoph Helma <helma@in-silico.ch>2011-03-09 13:54:34 +0100
commit4e30a6feca55e9de10c5013632593455b93f6e23 (patch)
treee0d8cc5cf7a349fcc0d17deb3d32a73beb9b35c9
parent76e58af42960fbe9357f899ffcc2588bfc756ed9 (diff)
parentd1983f442a9014d66173c7ff8ab8ae0ac35a23e8 (diff)
Merge branch 'release/v1.0.0'
-rw-r--r--.gitignore1
-rw-r--r--.gitmodules3
-rw-r--r--Rakefile101
-rw-r--r--application.rb12
-rw-r--r--balancer.rb98
-rw-r--r--config.ru5
-rw-r--r--fminer.rb487
m---------last-utils0
-rw-r--r--lazar.rb282
m---------libfminer0
-rw-r--r--openbabel.rb170
-rw-r--r--smarts.rb2
12 files changed, 792 insertions, 369 deletions
diff --git a/.gitignore b/.gitignore
index a3ee470..e3debba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
tmp/*
log/*
public/*.owl
+*.*~
diff --git a/.gitmodules b/.gitmodules
index 3330d61..75218e9 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
[submodule "libfminer"]
path = libfminer
url = http://github.com/amaunz/fminer2.git
+[submodule "last-utils"]
+ path = last-utils
+ url = git://github.com/amaunz/last-utils.git
diff --git a/Rakefile b/Rakefile
index 70139f4..e60ffc7 100644
--- a/Rakefile
+++ b/Rakefile
@@ -1,44 +1,77 @@
require 'rubygems'
require 'rake'
-require 'opentox-ruby-api-wrapper'
-#require 'tasks/opentox'
+require 'opentox-ruby'
namespace "fminer" do
- desc "Install required gems and fminer"
- task :install do
- puts `git submodule init`
- puts `git submodule update`
- Dir.chdir('libfminer/libbbrc')
- puts `git checkout master`
- puts `git pull`
- puts `./configure`
- if $? == 0
- puts `echo "Fminer successfully configured."`
- else
- puts `echo "Fminer configuration failed!"`
- exit
- end
- puts `make ruby`
- end
+ desc "Install required gems and fminer"
+ task :install do
+ puts `git submodule init`
+ puts `git submodule update`
+ Dir.chdir('libfminer/libbbrc')
+ puts `git checkout master`
+ puts `git pull`
+ puts `./configure`
+ if $? == 0
+ puts `echo "Fminer/LibBbrc successfully configured."`
+ else
+ puts `echo "Fminer/LibBbrc configuration failed!"`
+ exit
+ end
+ puts `make ruby`
+ Dir.chdir('../liblast')
+ puts `git checkout master`
+ puts `git pull`
+ puts `./configure`
+ if $? == 0
+ puts `echo "Fminer/LibLast successfully configured."`
+ else
+ puts `echo "Fminer/LibLast configuration failed!"`
+ exit
+ end
+ puts `make ruby`
+ Dir.chdir('../../last-utils')
+ #puts `git fetch`
+ # AM LAST: need branch 'experimental' until merged to master in last-utils
+ #puts `git checkout -f -b experimental origin/experimental`
+ puts `git checkout master`
+ puts `git pull`
+ end
- desc "Update gems and fminer"
- task :update do
- puts `git submodule update`
- Dir.chdir('libfminer/libbbrc')
- puts `git checkout master`
- puts `git pull`
- puts `./configure`
- if $? == 0
- puts `echo "Fminer successfully configured."`
- else
- puts `echo "Fminer configuration failed!"`
- exit
- end
- puts `make ruby`
- end
+ desc "Update gems and fminer"
+ task :update do
+ puts `git submodule update --init`
+ Dir.chdir('libfminer/libbbrc')
+ puts `git checkout Makefile`
+ puts `git pull`
+ puts `./configure`
+ if $? == 0
+ puts `echo "Fminer/LibBbrc successfully configured."`
+ else
+ puts `echo "Fminer/LibBbrc configuration failed!"`
+ exit
+ end
+ puts `make ruby`
+ Dir.chdir('../liblast')
+ puts `git checkout Makefile`
+ puts `git pull`
+ puts `./configure`
+ if $? == 0
+ puts `echo "Fminer/LibLast successfully configured."`
+ else
+ puts `echo "Fminer/LibLast configuration failed!"`
+ exit
+ end
+ puts `make ruby`
+ Dir.chdir('../../last-utils')
+ #puts `git fetch`
+ # AM LAST: need branch 'experimental' until merged to master in last-utils
+ #puts `git checkout -f -b experimental origin/experimental`
+ puts `git checkout master`
+ puts `git pull`
+ end
end
desc "Run tests"
task :test do
- load 'test/test.rb'
+ load 'test/test.rb'
end
diff --git a/application.rb b/application.rb
index c0363d5..e36643b 100644
--- a/application.rb
+++ b/application.rb
@@ -1,7 +1,10 @@
require 'rubygems'
+# AM LAST: can include both libs, no problems
require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # has to be included before openbabel, otherwise we have strange SWIG overloading problems
-gem "opentox-ruby-api-wrapper", "= 1.6.5"
-require 'opentox-ruby-api-wrapper'
+require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') # has to be included before openbabel, otherwise we have strange SWIG overloading problems
+require File.join(File.expand_path(File.dirname(__FILE__)), 'last-utils/lu.rb') # AM LAST
+gem "opentox-ruby", "~> 1"
+require 'opentox-ruby'
#require 'smarts.rb'
#require 'similarity.rb'
@@ -15,7 +18,10 @@ before do
LOGGER.debug "Request: " + request.path
end
+# Get a list of available algorithms
+#
+# @return [text/uri-list] algorithm URIs
get '/?' do
response['Content-Type'] = 'text/uri-list'
- [ url_for('/lazar', :full), url_for('/fminer', :full) ].join("\n") + "\n"
+ [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n"
end
diff --git a/balancer.rb b/balancer.rb
new file mode 100644
index 0000000..4ed2fd7
--- /dev/null
+++ b/balancer.rb
@@ -0,0 +1,98 @@
+# cuts a classification dataset into balanced pieces
+# let inact_act_ratio := majority_class.size/minority_class.size
+# then: nr pieces = ceil(inact_act_ratio) if inact_act_ratio > 1.5
+# each piece contains the complete minority class and ceil(inact_act_ratio) majority class compounds.
+
+class Balancer
+
+ attr_accessor :inact_act_ratio, :act_hash, :inact_hash, :majority_splits, :nr_majority_splits, :errors, :datasets
+
+ # Supply a OpenTox::Dataset here
+ # Calculates inact_act_ratio, iff inact_act_ratio != +/-Infinity and no regression dataset is given
+ def initialize(dataset, feature_uri, creator_url)
+ @act_arr = []
+ @inact_arr = []
+ @inact_act_ratio = 1.0/0 # trick to define +infinity
+ @nr_majority_splits = 1 # +/-1 means: no split
+ @split = [] # splitted arrays with ids
+ @datasets = [] # result datasets
+ @errors = []
+
+ classification = true
+ if dataset.features.include?(feature_uri)
+ dataset.data.each do |i,a|
+ inchi = i
+ acts = a
+ acts.each do |act|
+ value = act[feature_uri]
+ if OpenTox::Utils.is_true?(value)
+ @act_arr << inchi
+ elsif OpenTox::Utils.classification?(value)
+ @inact_arr << inchi
+ else
+ classification = false
+ break;
+ end
+ end
+ end
+ @inact_act_ratio = @inact_arr.size.to_f / @act_arr.size.to_f unless (@act_arr.size == 0 or !classification) # leave alone for regression
+ set_nr_majority_splits
+ # perform majority split
+ @split = @nr_majority_splits > 0 ? shuffle_split(@inact_arr) : shuffle_split(@act_arr) unless @nr_majority_splits.abs == 1
+ @split.each do |s|
+ new_c = @nr_majority_splits > 0 ? s.concat(@act_arr) : s.concat(@inac_arr)
+ @datasets << dataset.create_new_dataset(new_c, [feature_uri], dataset.title, creator_url)
+ end
+
+ else
+ errors << "Feature not present in dataset."
+ end
+ errors << "Can not split regression dataset." unless classification
+ end
+
+
+
+ # sets nr of splits for majority class ('+', if inact_cnt > act_cnt, or '-' else), or leaves unchanged for illegal values.
+ def set_nr_majority_splits
+ @nr_majority_splits = @inact_act_ratio >= 1.5 ? @inact_act_ratio.ceil : ( @inact_act_ratio <= (2.0/3.0) ? -(1.0/@inact_act_ratio).ceil : ( @inact_act_ratio>1.0 ? 1 : -1) ) unless OpenTox::Utils.infinity?(@inact_act_ratio) # leave alone for regression
+ end
+
+ # does the actual shuffle and split
+ def shuffle_split (arr)
+ arr = arr.shuffle
+ arr.chunk(@nr_majority_splits.abs)
+ end
+
+ # turns a hash into a 2 col csv
+ def hsh2csv (hsh)
+ res=""
+ hsh.each do |k,v|
+ arr = [v,(@nr_majority_splits > 0 ? 0 : 1)]
+ res += arr.join(", ") + "\n"
+ end
+ res
+ end
+
+end
+
+class Array
+
+ # cuts an array into <num-pieces> chunks - returns a two-dimensional array
+ def chunk(pieces)
+ q, r = length.divmod(pieces)
+ (0..pieces).map { |i| i * q + [r, i].min }.enum_cons(2) \
+ .map { |a, b| slice(a...b) }
+ end
+
+ # shuffles the elements of an array
+ def shuffle( seed=nil )
+ srand seed.to_i if seed
+ sort_by { Kernel.rand }
+ end
+
+ # shuffels self
+ def shuffle!( seed=nil )
+ self.replace shuffle( seed )
+ end
+
+end
diff --git a/config.ru b/config.ru
index 489932f..a1aab0d 100644
--- a/config.ru
+++ b/config.ru
@@ -1,5 +1,6 @@
require 'rubygems'
-require 'opentox-ruby-api-wrapper'
+require 'opentox-ruby'
require 'config/config_ru'
run Sinatra::Application
-
+set :raise_errors, false
+set :show_exceptions, false \ No newline at end of file
diff --git a/fminer.rb b/fminer.rb
index 15379fd..5cc83ed 100644
--- a/fminer.rb
+++ b/fminer.rb
@@ -1,153 +1,354 @@
ENV['FMINER_SMARTS'] = 'true'
ENV['FMINER_NO_AROMATIC'] = 'true'
ENV['FMINER_PVALUES'] = 'true'
-@@fminer = Bbrc::Bbrc.new
+@@bbrc = Bbrc::Bbrc.new
+@@last = Last::Last.new
+
+# Get list of fminer algorithms
+#
+# @return [text/uri-list] URIs of fminer algorithms
get '/fminer/?' do
- owl = OpenTox::Owl.create 'Algorithm', url_for('/fminer',:full)
- owl.set 'title',"fminer"
- owl.set 'creator',"http://github.com/amaunz/fminer2"
- owl.parameters = {
- "Dataset URI" => { :scope => "mandatory", :value => "dataset_uri" },
- "Feature URI for dependent variable" => { :scope => "mandatory", :value => "feature_uri" }
- }
- rdf = owl.rdf
- File.open('public/fminer.owl', 'w') {|f| f.print rdf}
+ response['Content-Type'] = 'text/uri-list'
+ [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n"
+end
+
+# Get RDF/XML representation of fminer bbrc algorithm
+# @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm
+get "/fminer/bbrc/?" do
response['Content-Type'] = 'application/rdf+xml'
- rdf
+ algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/bbrc',:full))
+ algorithm.metadata = {
+ DC.title => 'fminer backbone refinement class representatives',
+ DC.creator => "andreas@maunz.de, helma@in-silico.ch",
+ DC.contributor => "vorgrimmlerdavid@gmx.de",
+ OT.isA => OTA.PatternMiningSupervised,
+ OT.parameters => [
+ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
+ { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
+ { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" },
+ { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" },
+ { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" },
+ { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" },
+ ]
+ }
+ algorithm.to_rdfxml
end
-post '/fminer/?' do
+# Get RDF/XML representation of fminer last algorithm
+# @return [application/rdf+xml] OWL-DL representation of fminer last algorithm
+get "/fminer/last/?" do
+ algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/last',:full))
+ algorithm.metadata = {
+ DC.title => 'fminer latent structure class representatives',
+ DC.creator => "andreas@maunz.de, helma@in-silico.ch",
+ DC.contributor => "vorgrimmlerdavid@gmx.de",
+ OT.isA => OTA.PatternMiningSupervised,
+ OT.parameters => [
+ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
+ { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
+ { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" },
+ { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" },
+ { DC.description => "Maximum number of hops", OT.paramScope => "optional", DC.title => "hops" },
+ ]
+ }
+ algorithm.to_rdfxml
+end
+
+# Run bbrc algorithm on dataset
+#
+# @param [String] dataset_uri URI of the training dataset
+# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
+# @param [optional] parameters BBRC parameters, accepted parameters are
+# - minfreq Minimum frequency (default 5)
+# - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
+# - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true")
+# - min_chisq_significance Significance threshold (between 0 and 1)
+# @return [text/uri-list] Task URI
+post '/fminer/bbrc/?' do
+
+ # TODO: is this thread safe??
+ #@@bbrc = Bbrc::Bbrc.new
+ minfreq = 5 unless minfreq = params[:min_frequency]
+ @@bbrc.SetMinfreq(minfreq)
+ @@bbrc.SetType(1) if params[:feature_type] == "paths"
+ @@bbrc.SetBackbone(params[:backbone]) if params[:backbone]
+ @@bbrc.SetChisqSig(params[:min_chisq_significance]) if params[:min_chisq_significance]
+ @@bbrc.SetConsoleOut(false)
+
+ halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
+ halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil?
+ prediction_feature = params[:prediction_feature]
+
+ training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid
+ halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature])
+
+ task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do
+
+ feature_dataset = OpenTox::Dataset.new(nil, @subjectid)
+ feature_dataset.add_metadata({
+ DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title].to_s,
+ DC.creator => url_for('/fminer/bbrc',:full),
+ OT.hasSource => url_for('/fminer/bbrc', :full),
+ OT.parameters => [
+ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
+ { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
+ ]
+ })
+ feature_dataset.save(@subjectid)
+
+ id = 1 # fminer start id is not 0
+ compounds = []
+ nr_active=0
+ nr_inactive=0
+ all_activities = Hash.new# DV: for effect calculation in regression part
+
+ @@bbrc.Reset
+ training_dataset.data_entries.each do |compound,entry|
+ begin
+ smiles = OpenTox::Compound.new(compound.to_s).to_smiles
+ rescue
+ LOGGER.warn "No resource for #{compound.to_s}"
+ next
+ end
+ if smiles == '' or smiles.nil?
+ LOGGER.warn "Cannot find smiles for #{compound.to_s}."
+ next
+ end
+ entry.each do |feature,values|
+ values.each do |value|
+ if value.nil?
+ LOGGER.warn "No #{feature} activiity for #{compound.to_s}."
+ else
+ case value.to_s
+ when "true"
+ nr_active += 1
+ activity = 1
+ when "false"
+ nr_inactive += 1
+ activity = 0
+ else
+ activity = value.to_f
+ @@bbrc.SetRegression(true)
+ end
+ begin
+ @@bbrc.AddCompound(smiles,id)
+ @@bbrc.AddActivity(activity, id)
+ all_activities[id]=activity # DV: insert global information
+ compounds[id] = compound
+ id += 1
+ rescue
+ LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer"
+ end
+ end
+ end
+ end
+ end
+
+ g_array=all_activities.values # DV: calculation of global median for effect calculation
+ g_median=OpenTox::Algorithm.median(g_array)
+
+ raise "No compounds in dataset #{training_dataset.uri}" if compounds.size==0
+
+ features = Set.new
+ # run @@bbrc
+ (0 .. @@bbrc.GetNoRootNodes()-1).each do |j|
+
+ results = @@bbrc.MineRoot(j)
+ results.each do |result|
+ f = YAML.load(result)[0]
+ smarts = f[0]
+ p_value = f[1]
+
+ if (!@@bbrc.GetRegression)
+ ids = f[2] + f[3]
+ if f[2].size.to_f/ids.size > nr_active.to_f/(nr_active+nr_inactive)
+ effect = 'activating'
+ else
+ effect = 'deactivating'
+ end
+ else #regression part
+ ids = f[2]
+ # DV: effect calculation
+ f_arr=Array.new
+ f[2].each do |id|
+ f_arr.push(all_activities[id])
+ end
+ f_median=OpenTox::Algorithm.median(f_arr)
+ if g_median >= f_median
+ effect = 'activating'
+ else
+ effect = 'deactivating'
+ end
+ end
+
+ feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s
+ unless features.include? smarts
+ features << smarts
+ metadata = {
+ OT.hasSource => url_for('/fminer/bbrc', :full),
+ OT.isA => OT.Substructure,
+ OT.smarts => smarts,
+ OT.pValue => p_value.to_f,
+ OT.effect => effect,
+ OT.parameters => [
+ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
+ { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
+ ]
+ }
+ feature_dataset.add_feature feature_uri, metadata
+ #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters
+ end
+ ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)}
+ end
+ end
+ feature_dataset.save(@subjectid)
+ feature_dataset.uri
+ end
+ response['Content-Type'] = 'text/uri-list'
+ halt 503,task.uri+"\n" if task.status == "Cancelled"
+ halt 202,task.uri.to_s+"\n"
+ end
+#end
+
+# Run last algorithm on a dataset
+#
+# @param [String] dataset_uri URI of the training dataset
+# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
+# @param [optional] parameters LAST parameters, accepted parameters are
+# - minfreq Minimum frequency (default 5)
+# - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
+# - hops Maximum number of hops
+# @return [text/uri-list] Task URI
+post '/fminer/last/?' do
+ #@@last = Last::Last.new
+ minfreq = 5 unless minfreq = params[:min_frequency]
+ @@last.SetMinfreq(minfreq)
+ @@last.SetType(1) if params[:feature_type] == "paths"
+ @@last.SetMaxHops(params[:hops]) if params[:hops]
+ @@last.SetConsoleOut(false)
+
+ halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
+ halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil?
+ prediction_feature = params[:prediction_feature]
+
+ training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}", @subjectid
+
+ training_dataset.load_all(@subjectid)
+ halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature])
+
+ task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do
+
+ feature_dataset = OpenTox::Dataset.new
+ feature_dataset.add_metadata({
+ DC.title => "LAST representatives for " + training_dataset.metadata[DC.title].to_s,
+ DC.creator => url_for('/fminer/last',:full),
+ OT.hasSource => url_for('/fminer/last', :full),
+ OT.parameters => [
+ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
+ { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
+ ]
+ })
+ feature_dataset.save(@subjectid)
+
+ id = 1 # fminer start id is not 0
+ compounds = []
+ smi = [] # AM LAST: needed for matching the patterns back
+ nr_active=0
+ nr_inactive=0
+ all_activities = Hash.new# DV: for effect calculation in regression part
+
+ @@last.Reset
+ training_dataset.data_entries.each do |compound,entry|
+ begin
+ smiles = OpenTox::Compound.new(compound.to_s).to_smiles
+ rescue
+ LOGGER.warn "No resource for #{compound.to_s}"
+ next
+ end
+ if smiles == '' or smiles.nil?
+ LOGGER.warn "Cannot find smiles for #{compound.to_s}."
+ next
+ end
+ entry.each do |feature,values|
+ values.each do |value|
+ if value.nil?
+ LOGGER.warn "No #{feature} activiity for #{compound.to_s}."
+ else
+ case value.to_s
+ when "true"
+ nr_active += 1
+ activity = 1
+ when "false"
+ nr_inactive += 1
+ activity = 0
+ else
+ activity = value.to_f
+ @@last.SetRegression(true)
+ end
+ begin
+ @@last.AddCompound(smiles,id)
+ @@last.AddActivity(activity, id)
+ all_activities[id]=activity # DV: insert global information
+ compounds[id] = compound
+ smi[id] = smiles # AM LAST: changed this to store SMILES.
+ id += 1
+ rescue
+ LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer"
+ end
+ end
+ end
+ end
+ end
+
+ g_array=all_activities.values # DV: calculation of global median for effect calculation
+ g_median=OpenTox::Algorithm.median(g_array)
- halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
- halt 404, "Please submit a feature_uri." unless params[:feature_uri] and !params[:feature_uri].nil?
- LOGGER.debug "Dataset: " + params[:dataset_uri]
- LOGGER.debug "Endpoint: " + params[:feature_uri]
- feature_uri = params[:feature_uri]
- begin
- LOGGER.debug "Retrieving #{params[:dataset_uri]}"
- training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}"
- rescue
- LOGGER.error "Dataset #{params[:dataset_uri]} not found"
- halt 404, "Dataset #{params[:dataset_uri]} not found." if training_dataset.nil?
- end
- halt 404, "No feature #{params[:feature_uri]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:feature_uri])
-
- task_uri = OpenTox::Task.as_task("Mine features", url_for('/fminer',:full)) do
-
- feature_dataset = OpenTox::Dataset.new
- title = "BBRC representatives for " + training_dataset.title
- feature_dataset.title = title
- feature_dataset.creator = url_for('/fminer',:full)
- bbrc_uri = url_for("/fminer#BBRC_representative",:full)
- feature_dataset.features << bbrc_uri
-
- id = 1 # fminer start id is not 0
- compounds = []
-
- g_hash = Hash.new# DV: for effect calculation in regression part
- @@fminer.Reset
- #@@fminer.SetChisqSig(0.99)
- LOGGER.debug "Fminer: initialising ..."
- training_dataset.data.each do |c,features|
- begin
- smiles = OpenTox::Compound.new(:uri => c.to_s).smiles
- rescue
- LOGGER.warn "No resource for #{c.to_s}"
- next
- end
- if smiles == '' or smiles.nil?
- LOGGER.warn "Cannot find smiles for #{c.to_s}."
- else
- feature_dataset.compounds << c.to_s
- features.each do |feature|
- act = feature[feature_uri]
- if act.nil?
- LOGGER.warn "No #{feature_uri} activiity for #{c.to_s}."
- else
- case act.to_s
- when "true"
- #LOGGER.debug id.to_s + ' "' + smiles +'"' + "\t" + true.to_s
- activity = 1
- when "false"
- #LOGGER.debug id.to_s + ' "' + smiles +'"' + "\t" + false.to_s
- activity = 0
- else
- # AM: add quantitative activity
- activity = act.to_f
- @@fminer.SetRegression(true)
- end
- compounds[id] = c.to_s
- begin
- @@fminer.AddCompound(smiles,id)
- @@fminer.AddActivity(activity, id)
- g_hash[id]=activity # DV: insert global information
- rescue
- LOGGER.warn "Could not add " + smiles + "\t" + act.to_s + " to fminer"
- end
- end
- end
- id += 1
- end
- end
- g_array=g_hash.values # DV: calculation of global median for effect calculation
- g_median=OpenTox::Utils.median(g_array)
- minfreq = (0.02*id).round
- @@fminer.SetMinfreq(minfreq)
- LOGGER.debug "Fminer: initialised with #{id} compounds, minimum frequency #{minfreq}"
-
- raise "no compounds" if compounds.size==0
-
- values = {}
- # run @@fminer
- LOGGER.debug "Fminer: mining ..."
- (0 .. @@fminer.GetNoRootNodes()-1).each do |j|
- results = @@fminer.MineRoot(j)
- results.each do |result|
- f = YAML.load(result)[0]
- smarts = f[0]
- p_value = f[1]
- # AM: f[3] missing on regression
- if (!@@fminer.GetRegression)
- ids = f[2] + f[3]
- if f[2].size > f[3].size
- effect = 'activating'
- else
- effect = 'deactivating'
- end
- else #regression part
- ids = f[2]
- # DV: effect calculation
- f_arr=Array.new
- f[2].each do |id|
- f_arr.push(g_hash[id])
- end
- f_median=OpenTox::Utils.median(f_arr)
- if g_median >= f_median
- effect = 'activating'
- else
- effect = 'deactivating'
- end
- end
-
- tuple = {
- url_for('/fminer#smarts',:full) => smarts,
- url_for('/fminer#p_value',:full) => p_value.to_f,
- url_for('/fminer#effect',:full) => effect
- }
- #LOGGER.debug "#{f[0]}\t#{f[1]}\t#{effect}"
- ids.each do |id|
- feature_dataset.data[compounds[id]] = [] unless feature_dataset.data[compounds[id]]
- feature_dataset.data[compounds[id]] << {bbrc_uri => tuple}
- end
- end
- end
-
- uri = feature_dataset.save
- LOGGER.debug "Fminer finished, dataset #{uri} created."
- uri
- end
- LOGGER.debug "Fimer task started: "+task_uri.to_s
- response['Content-Type'] = 'text/uri-list'
- halt 202,task_uri.to_s+"\n"
+ raise "No compounds in dataset #{training_dataset.uri}" if compounds.size==0
+
+ # run @@last
+ features = Set.new
+ xml = ""
+
+ (0 .. @@last.GetNoRootNodes()-1).each do |j|
+ results = @@last.MineRoot(j)
+ results.each do |result|
+ xml << result
+ end
+ end
+
+ lu = LU.new # AM LAST: uses last-utils here
+ dom=lu.read(xml) # AM LAST: parse GraphML (needs hpricot, @ch: to be included in wrapper!)
+ smarts=lu.smarts_rb(dom,'msa') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de)
+ instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations
+ instances.each do |smarts, ids|
+ feat_hash = Hash[*(all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax
+ @@last.GetRegression() ? p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f : p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test
+
+
+ effect = (p_value > 0) ? "activating" : "deactivating"
+ feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s
+ unless features.include? smarts
+ features << smarts
+ metadata = {
+ OT.isA => OT.Substructure,
+ OT.hasSource => feature_dataset.uri,
+ OT.smarts => smarts,
+ OT.pValue => p_value.to_f,
+ OT.effect => effect,
+ OT.parameters => [
+ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
+ { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
+ ]
+ }
+ feature_dataset.add_feature feature_uri, metadata
+ end
+ ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)}
+ end
+ feature_dataset.save(@subjectid)
+ feature_dataset.uri
+ end
+ response['Content-Type'] = 'text/uri-list'
+ halt 503,task.uri+"\n" if task.status == "Cancelled"
+ halt 202,task.uri.to_s+"\n"
end
diff --git a/last-utils b/last-utils
new file mode 160000
+Subproject 324a179b992c7b8b6f52963d4912ce5f92fe81c
diff --git a/lazar.rb b/lazar.rb
index d037fbc..b80235e 100644
--- a/lazar.rb
+++ b/lazar.rb
@@ -1,191 +1,151 @@
+@@feature_generation_default = File.join(CONFIG[:services]["opentox-algorithm"],"fminer","bbrc")
+
+# Get RDF/XML representation of the lazar algorithm
+# @return [application/rdf+xml] OWL-DL representation of the lazar algorithm
get '/lazar/?' do
- owl = OpenTox::Owl.create 'Algorithm', url_for('/lazar',:full)
- owl.set 'title',"lazar"
- owl.set 'creator',"http://github.com/helma/opentox-algorithm"
- owl.parameters = {
- "Dataset URI" =>
- { :scope => "mandatory", :value => "dataset_uri" },
- "Feature URI for dependent variable" =>
- { :scope => "mandatory", :value => "prediction_feature" },
- "Feature generation URI" =>
- { :scope => "mandatory", :value => "feature_generation_uri" }
- }
- rdf = owl.rdf
- File.open('public/lazar.owl', 'w') {|f| f.print rdf}
response['Content-Type'] = 'application/rdf+xml'
- rdf
+ algorithm = OpenTox::Algorithm::Generic.new(url_for('/lazar',:full))
+ algorithm.metadata = {
+ DC.title => 'lazar',
+ DC.creator => "helma@in-silico.ch, andreas@maunz.de",
+ DC.contributor => "vorgrimmlerdavid@gmx.de",
+ OT.isA => OTA.ClassificationLazySingleTarget,
+ OT.parameters => [
+ { DC.description => "Dataset URI with the dependent variable", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
+ { DC.description => "Feature URI for dependent variable. Optional for datasets with only a single feature.", OT.paramScope => "optional", DC.title => "prediction_feature" },
+ { DC.description => "URI of feature genration service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" },
+ { DC.description => "URI of feature dataset. If this parameter is set no feature generation algorithm will be called", OT.paramScope => "optional", DC.title => "feature_dataset_uri" },
+ { DC.description => "Further parameters for the feaature generation service", OT.paramScope => "optional" }
+ ]
+ }
+ algorithm.to_rdfxml
end
-post '/lazar/?' do # create a model
+# Create a lazar prediction model
+# @param [String] dataset_uri Training dataset URI
+# @param [optional,String] prediction_feature URI of the feature to be predicted
+# @param [optional,String] feature_generation_uri URI of the feature generation algorithm
+# @param [optional,String] - further parameters for the feature generation service
+# @return [text/uri-list] Task URI
+post '/lazar/?' do
- LOGGER.debug "Dataset: '" + params[:dataset_uri].to_s + "'"
- LOGGER.debug "Endpoint: '" + params[:prediction_feature].to_s + "'"
- LOGGER.debug "Feature generation: '" + params[:feature_generation_uri].to_s + "'"
- dataset_uri = "#{params[:dataset_uri]}"
+ params[:subjectid] = @subjectid
+ halt 404, "No dataset_uri parameter." unless params[:dataset_uri]
+ dataset_uri = params[:dataset_uri]
- begin
- training_activities = OpenTox::Dataset.find(dataset_uri)
- rescue
- halt 404, "Dataset #{dataset_uri} not found"
+ halt 404, "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri)
+ training_activities.load_all(@subjectid)
+
+ prediction_feature = params[:prediction_feature]
+ unless prediction_feature # try to read prediction_feature from dataset
+ halt 404, "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
+ prediction_feature = training_activities.features.keys.first
+ params[:prediction_feature] = prediction_feature
end
- halt 404, "No prediction_feature parameter." unless params[:prediction_feature]
- halt 404, "No feature_generation_uri parameter." unless params[:feature_generation_uri]
- halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}. (features: "+
- training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(params[:prediction_feature])
+ feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri]
- response['Content-Type'] = 'text/uri-list'
- task_uri = OpenTox::Task.as_task("Create lazar model",url_for('/lazar',:full)) do |task|
-
- # create features
- LOGGER.debug "Starting fminer"
- params[:feature_uri] = params[:prediction_feature]
- fminer_task_uri = OpenTox::Algorithm::Fminer.create_feature_dataset(params)
- fminer_task = OpenTox::Task.find(fminer_task_uri)
- fminer_task.wait_for_completion
- raise "fminer failed" unless fminer_task.completed?
-
- LOGGER.debug "Fminer finished #{Time.now}"
- feature_dataset_uri = fminer_task.resultURI.to_s
- training_features = OpenTox::Dataset.find(feature_dataset_uri)
- halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil?
- lazar = OpenTox::Model::Lazar.new
- lazar.trainingDataset = dataset_uri
- lazar.feature_dataset_uri = feature_dataset_uri
- halt 404, "More than one descriptor type" unless training_features.features.size == 1
- bbrc = training_features.features.first
- training_features.data.each do |compound,features|
- lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound]
- features.each do |feature|
- tuple = feature[bbrc]
- if tuple
- smarts =nil; p_value = nil; effect = nil
- tuple.each do |k,v|
- case k
- when /fminer#smarts/
- smarts = v
- lazar.features << smarts
- lazar.fingerprints[compound] << smarts
- when /fminer#p_value/
- p_value = v
- when /fminer#effect/
- effect = v
- end
- end
- lazar.p_values[smarts] = p_value
- lazar.effects[smarts] = effect
- end
- end
- end
+ halt 404, "No feature #{prediction_feature} in dataset #{params[:dataset_uri]}. (features: "+
+ training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature)
- activities = {}
- classification = true
- training_activities.data.each do |compound,features|
- lazar.activities[compound] = [] unless lazar.activities[compound]
- features.each do |feature|
- case feature[params[:prediction_feature]].to_s
- when "true"
- lazar.activities[compound] << true
- when "false"
- lazar.activities[compound] << false
- # AM: handle quantitative activity values of features
- else
- lazar.activities[compound] << feature[params[:prediction_feature]].to_f
- classification = false
- end
- end
- end
- # TODO: insert regression
- if classification
- lazar.dependentVariables = params[:prediction_feature]+"_lazar_classification"
- else
- lazar.dependentVariables = params[:prediction_feature]+"_lazar_regression"
- end
-
- model_uri = lazar.save
- LOGGER.info model_uri + " created #{Time.now}"
- model_uri
- end
- halt 202,task_uri
-end
+ task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task|
-post '/property_lazar/?' do # create a model
+ lazar = OpenTox::Model::Lazar.new
+ lazar.min_sim = params[:min_sim] if params[:min_sim]
- LOGGER.debug "Dataset: '" + params[:dataset_uri].to_s + "'"
- LOGGER.debug "Endpoint: '" + params[:prediction_feature].to_s + "'"
- LOGGER.debug "Feature dataset: '" + params[:feature_dataset_uri].to_s + "'"
- dataset_uri = "#{params[:dataset_uri]}"
+ if params[:feature_dataset_uri]
+ feature_dataset_uri = params[:feature_dataset_uri]
+ training_features = OpenTox::Dataset.new(feature_dataset_uri)
+ case training_features.feature_type
+ when "classification"
+ lazar.similarity_algorithm = "Similarity.tanimoto"
+ when "regression"
+ lazar.similarity_algorithm = "Similarity.euclid"
+ end
+ else # create features
+ params[:feature_generation_uri] = feature_generation_uri
+ if feature_generation_uri.match(/fminer/)
+ lazar.feature_calculation_algorithm = "Substructure.match"
+ else
+ halt 404, "External feature generation services not yet supported"
+ end
+ params[:subjectid] = @subjectid
+ feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s
+ training_features = OpenTox::Dataset.new(feature_dataset_uri)
+ end
- begin
- training_activities = OpenTox::Dataset.find(dataset_uri)
- rescue
- halt 404, "Dataset #{dataset_uri} not found"
- end
+ training_features.load_all(@subjectid)
+ halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil?
- halt 404, "No prediction_feature parameter." unless params[:prediction_feature]
- halt 404, "No feature_dataset_uri parameter." unless params[:feature_dataset_uri]
- halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}. (features: "+
- training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(params[:prediction_feature])
+ # sorted features for index lookups
+ lazar.features = training_features.features.sort if training_features.feature_type == "regression"
- response['Content-Type'] = 'text/uri-list'
- task_uri = OpenTox::Task.as_task("Create lazar model",url_for('/property_lazar',:full)) do |task|
-
- # create features
- #LOGGER.debug "Starting fminer"
- #params[:feature_uri] = params[:prediction_feature]
- #fminer_task_uri = OpenTox::Algorithm::Fminer.create_feature_dataset(params)
- #fminer_task = OpenTox::Task.find(fminer_task_uri)
- #fminer_task.wait_for_completion
- #raise "fminer failed" unless fminer_task.completed?
-
- #LOGGER.debug "Fminer finished #{Time.now}"
- feature_dataset_uri = params[:feature_dataset_uri]
- training_features = OpenTox::Dataset.find(feature_dataset_uri)
- halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil?
- lazar = OpenTox::Model::PropertyLazar.new
- lazar.trainingDataset = dataset_uri
- lazar.feature_dataset_uri = feature_dataset_uri
- #halt 404, "More than one descriptor type" unless training_features.features.size == 1
- lazar.features = training_features.features
- training_features.data.each do |compound,features|
- lazar.properties[compound] = {} unless lazar.properties[compound]
- LOGGER.debug features.inspect
- if features
- features.each do |f|
- f.each do |name,value|
- #lazar.features.each do |feature|
- lazar.properties[compound][name] = value
- #lazar.properties[compound] = features
+ training_features.data_entries.each do |compound,entry|
+ lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound]
+ entry.keys.each do |feature|
+ if feature_generation_uri.match(/fminer/)
+ smarts = training_features.features[feature][OT.smarts]
+ lazar.fingerprints[compound] << smarts
+ unless lazar.features.include? smarts
+ lazar.features << smarts
+ lazar.p_values[smarts] = training_features.features[feature][OT.pValue]
+ lazar.effects[smarts] = training_features.features[feature][OT.effect]
+ end
+ else
+ case training_features.feature_type
+ when "classification"
+ # fingerprints are sets
+ if entry[feature].flatten.size == 1
+ lazar.fingerprints[compound] << feature if entry[feature].flatten.first.to_s.match(TRUE_REGEXP)
+ lazar.features << feature unless lazar.features.include? feature
+ else
+ LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
+ end
+ when "regression"
+ # fingerprints are arrays
+ if entry[feature].flatten.size == 1
+ lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first
+ else
+ LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
+ end
+ end
end
end
- end
- end
-
- activities = {}
- classification = true
- training_activities.data.each do |compound,features|
+
lazar.activities[compound] = [] unless lazar.activities[compound]
- features.each do |feature|
- case feature[params[:prediction_feature]].to_s
+ training_activities.data_entries[compound][params[:prediction_feature]].each do |value|
+ case value.to_s
when "true"
lazar.activities[compound] << true
when "false"
lazar.activities[compound] << false
else
- lazar.activities[compound] << feature[params[:prediction_feature]].to_f
- classification = false
+ halt 404, "0 values not allowed in training dataset. log10 is calculated internally." if value.to_f == 0
+ lazar.activities[compound] << value.to_f
+ lazar.prediction_algorithm = "Neighbors.local_svm_regression"
end
end
- end
- if classification
- lazar.dependentVariables = params[:prediction_feature]+"_lazar_classification"
- else
- lazar.dependentVariables = params[:prediction_feature]+"_lazar_regression"
- end
+ end
+
+ lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature))}"
+ # TODO: fix dependentVariable
+ lazar.metadata[OT.dependentVariables] = params[:prediction_feature]
+ lazar.metadata[OT.trainingDataset] = dataset_uri
+ lazar.metadata[OT.featureDataset] = feature_dataset_uri
+ lazar.metadata[OT.isA] = OTA.ClassificationLazySingleTarget
+
+ lazar.metadata[OT.parameters] = [
+ {DC.title => "dataset_uri", OT.paramValue => dataset_uri},
+ {DC.title => "prediction_feature", OT.paramValue => prediction_feature},
+ {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri}
+ ]
- model_uri = lazar.save
+ model_uri = lazar.save(@subjectid)
LOGGER.info model_uri + " created #{Time.now}"
model_uri
end
- halt 202,task_uri
+ response['Content-Type'] = 'text/uri-list'
+ halt 503,task.uri+"\n" if task.status == "Cancelled"
+ halt 202,task.uri
end
+
diff --git a/libfminer b/libfminer
-Subproject 5a97d006e0ccfc48e53d5f24842a898ec9e912e
+Subproject 6e35cacd8da45f21c4039591d339a74af9a44a6
diff --git a/openbabel.rb b/openbabel.rb
index a261866..3a873c0 100644
--- a/openbabel.rb
+++ b/openbabel.rb
@@ -1,28 +1,148 @@
-get '/openbabel/:smiles/:property/?' do
+OBMOL_METHODS = {
+ "NumAtoms" => "Number of atoms",
+ "NumBonds" => "Number of bonds",
+ "NumHvyAtoms" => "Number of heavy atoms",
+ "NumResidues" => "Number of residues",
+ "NumRotors" => "Number of rotatable bonds",
+ "GetEnergy" => "Heat of formation for this molecule (in kcal/mol)",
+ "GetMolWt" => "Standard molar mass given by IUPAC atomic masses (amu)",
+ "GetExactMass" => "Mass given by isotopes (or most abundant isotope, if not specified)",
+ "GetTotalCharge" => "Total charge",
+}
+
+OBDESCRIPTOR_METHODS = {
+ "HBA1" => "Number of hydrogen bond acceptors 1 (JoelLib)",
+ "HBA2" => "Number of hydrogen bond acceptors 2 (JoelLib)",
+ "HBD" => "Number of hydrogen bond donors (JoelLib)",
+ "L5" => "Lipinski rule of five",
+ "logP" => "Octanol/water partition coefficient",
+ "MR" => "Molar refractivity",
+ "MW" => "Molecular weight",
+ "nF" => "Number of fluorine atoms",
+ "nHal" => "Number of halogen atoms",
+ "spinMult" => "Total spin multiplicity",
+ "TPSA" => "Topological polar surface area",
+}
+
+# Get a list of OpenBabel algorithms
+# @return [text/uri-list] URIs of OpenBabel algorithms
+get '/openbabel' do
+ algorithms = OBMOL_METHODS.collect{|name,description| url_for("/openbabel/#{name}",:full)}
+ algorithms << OBDESCRIPTOR_METHODS.collect{|name,description| url_for("/openbabel/#{name}",:full)}
+ response['Content-Type'] = 'text/uri-list'
+ algorithms.join("\n")
+end
+
+# Get RDF/XML representation of OpenBabel algorithm
+# @return [application/rdf+xml] OWL-DL representation of OpenBabel algorithm
+get '/openbabel/:property' do
+ description = OBMOL_METHODS[params[:property]] if OBMOL_METHODS.include? params[:property]
+ description = OBDESCRIPTOR_METHODS[params[:property]] if OBDESCRIPTOR_METHODS.include? params[:property]
+ if description
+ algorithm = OpenTox::Algorithm::Generic.new(url_for("/openbabel/#{params[:property]}",:full))
+ algorithm.metadata = {
+ DC.title => params[:property],
+ DC.creator => "helma@in-silico.ch",
+ DC.description => description,
+ OT.isA => OTA.DescriptorCalculation,
+ }
+ response['Content-Type'] = 'application/rdf+xml'
+ algorithm.to_rdfxml
+ else
+ halt 404, "Unknown OpenBabel descriptor #{params[:property]}."
+ end
+end
+
+# Calculate OpenBabel descriptors
+# Supports the following OpenBabel methods (see OpenBabel API http://openbabel.org/api/2.2.0/)
+# - NumAtoms Number of atoms
+# - NumBonds Number of bonds
+# - NumHvyAtoms Number of heavy atoms
+# - NumResidues Number of residues
+# - NumRotors Number of rotatable bonds
+# - GetEnergy Heat of formation for this molecule (in kcal/mol)
+# - GetMolWt Standard molar mass given by IUPAC atomic masses (amu)
+# - GetExactMass Mass given by isotopes (or most abundant isotope, if not specified)
+# - GetTotalCharge Total charge
+# - HBA1 Number of hydrogen bond acceptors 1 (JoelLib)
+# - HBA2 Number of hydrogen bond acceptors 2 (JoelLib)
+# - HBD Number of hydrogen bond donors (JoelLib)
+# - L5 Lipinski rule of five
+# - logP Octanol/water partition coefficient
+# - MR Molar refractivity
+# - MW Molecular weight
+# - nF Number of fluorine atoms
+# - nHal Number of halogen atoms
+# - spinMult Total spin multiplicity
+# - TPSA Topological polar surface area
+# @param [String] compound_uri Compound URI
+# @return [String] descriptor value
+post '/openbabel/:property' do
obconversion = OpenBabel::OBConversion.new
obmol = OpenBabel::OBMol.new
- obconversion.set_in_and_out_formats 'smi', 'can'
- case params[:property]
- when 'logP'
- #logP = OpenBabel::OBLogP.new
- #logP.predict(obmol)
- "not yet implemented"
- when 'psa'
- #psa = OpenBabel::OBPSA.new
- "not yet implemented"
- when 'mr'
- #mr = OpenBabel::OBMR.new
- "not yet implemented"
- else
- begin
- obconversion.read_string obmol, params[:smiles]
- rescue
- halt 404, "Incorrect Smiles string #{params[:smiles]}"
- end
- begin
- eval("obmol.#{params[:property]}").to_s
- rescue
- halt 404, "Could not calculate property #{params[:property]}"
- end
- end
+ compound = OpenTox::Compound.new params[:compound_uri]
+ obconversion.set_in_and_out_formats 'inchi', 'can'
+ obconversion.read_string obmol, compound.to_inchi
+ if OBMOL_METHODS.keys.include? params[:property]
+ eval("obmol.#{params[:property].underscore}").to_s
+ elsif OBDESCRIPTOR_METHODS.keys.include? params[:property]
+ descriptor = OpenBabel::OBDescriptor.find_type(params[:property])
+ descriptor.predict(obmol).to_s
+ else
+ halt 404, "Cannot calculate property #{params[:property]} with OpenBabel"
+ end
+end
+
+# Calculate all OpenBabel descriptors for a dataset
+# @param [String] dataset_uri Dataset URI
+# @return [text/uri-list] Task URI
+post '/openbabel' do
+ task = OpenTox::Task.create("Calculating OpenBabel descriptors for #{params[:dataset_uri]}", url_for('/openbabel',:full)) do
+
+ dataset = OpenTox::Dataset.find(params[:dataset_uri])
+ result_dataset = OpenTox::Dataset.create
+ result_dataset.add_metadata({
+ DC.title => "OpenBabel descriptors for " + dataset.metadata[DC.title].to_s,
+ DC.creator => url_for('/openbabel',:full),
+ OT.hasSource => url_for('/openbabel', :full),
+ OT.parameters => [
+ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
+ ]
+ })
+
+ obconversion = OpenBabel::OBConversion.new
+ obmol = OpenBabel::OBMol.new
+ obconversion.set_in_and_out_formats 'inchi', 'can'
+
+ OBMOL_METHODS.merge(OBDESCRIPTOR_METHODS).each do |name,description|
+ feature_uri = File.join result_dataset.uri, "feature", "openbabel", name
+ metadata = {
+ OT.hasSource => url_for("/openbabel/#{name}", :full),
+ DC.description => description,
+ DC.title => name,
+ }
+ result_dataset.add_feature feature_uri, metadata
+ end
+
+ dataset.compounds.each do |compound_uri|
+ compound = OpenTox::Compound.new(compound_uri)
+ obconversion.read_string obmol, compound.to_inchi
+ #result_dataset.add_compound compound_uri
+ OBMOL_METHODS.keys.each do |name|
+ feature_uri = File.join result_dataset.uri, "feature", "openbabel", name
+ value = eval("obmol.#{name.underscore}").to_f
+ result_dataset.add compound_uri, feature_uri, value
+ end
+ OBDESCRIPTOR_METHODS.keys.each do |name|
+ feature_uri = File.join result_dataset.uri, "feature", "openbabel", name
+ value = OpenBabel::OBDescriptor.find_type(params[:property]).predict(obmol).to_f
+ result_dataset.add compound_uri, feature_uri, value
+ end
+ end
+ result_dataset.save
+ result_dataset.uri
+ end
+ response['Content-Type'] = 'text/uri-list'
+ halt 503,task.uri+"\n" if task.status == "Cancelled"
+ halt 202,task.uri.to_s+"\n"
end
diff --git a/smarts.rb b/smarts.rb
index 2ea54d2..4ae6949 100644
--- a/smarts.rb
+++ b/smarts.rb
@@ -1,3 +1,3 @@
get '/match/compound/*/smarts/*/?' do
- "#{OpenTox::Compound.new(:inchi => params[:splat][0]).match?(params[:splat][1])}"
+ "#{OpenTox::Compound.from_inchi(params[:splat][0]).match?(params[:splat][1])}"
end