summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2011-03-21 15:49:59 +0100
committerChristoph Helma <helma@in-silico.ch>2011-03-21 15:49:59 +0100
commit6bb6b86179825450e37459a6c463b593db734dc4 (patch)
treee3587d8e9ca3d3cd71ccd3645d5d26f5b3f82fa2
parent89c4a761283484afefd14c3094ba2bd7ab828619 (diff)
parentaa2ef7d05c4016785012af20408352e0f1e944c1 (diff)
Merge branch 'hotfix/v1.0.3'v1.0.3
-rw-r--r--README.md53
-rw-r--r--fminer.rb282
m---------last-utils0
m---------libfminer0
4 files changed, 176 insertions, 159 deletions
diff --git a/README.md b/README.md
index cf1f519..0eb641c 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
OpenTox Algorithm
=================
-- An [OpenTox](http://www.opentox.org) REST Webservice
+- An [OpenTox](http://www.opentox.org) REST Webservice
- Implements the OpenTox algorithm API for
- fminer
- lazar
@@ -9,18 +9,26 @@ OpenTox Algorithm
REST operations
---------------
- Get a list of all algorithms GET / - URIs of algorithms 200
- Get a representation of the GET /fminer - fminer representation 200,404
- fminer algorithm
- Get a representation of the GET /lazar - lazar representation 200,404
- lazar algorithm
- Create fminer features POST /fminer dataset_uri, URI for feature dataset 200,400,404,500
- feature_uri
- Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500
- prediction_feature,
- feature_generation_uri
-
-Supported MIME formats
+ Get a list of all algorithms GET / - URIs of algorithms 200
+ Get a representation of the GET /fminer/ - fminer representation 200,404
+ fminer algorithms
+ Get a representation of the GET /fminer/bbrc - bbrc representation 200,404
+ bbrc algorithm
+ Get a representation of the GET /fminer/last - last representation 200,404
+ last algorithm
+ Get a representation of the GET /lazar - lazar representation 200,404
+ lazar algorithm
+ Create bbrc features POST /fminer/bbrc dataset_uri, URI for feature dataset 200,400,404,500
+ feature_uri,
+ min_frequency
+ Create last features POST /fminer/last dataset_uri, URI for feature dataset 200,400,404,500
+ feature_uri,
+ min_frequency
+ Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500
+ prediction_feature,
+ feature_generation_uri
+
+Supported MIME formats
----------------------
- application/rdf+xml (default): read/write OWL-DL
@@ -29,19 +37,25 @@ Supported MIME formats
Examples
--------
-### Get the OWL-DL representation of fminer
+### Get the OWL-DL representation of fminer
curl http://webservices.in-silico.ch/algorithm/fminer
-### Get the OWL-DL representation of lazar
+### Get the OWL-DL representation of lazar
curl http://webservices.in-silico.ch/algorithm/lazar
-### Create fminer features
+### Create [BBRC](http://bbrc.maunz.de) features
- curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} http://webservices.in-silico.ch/algorithm/fminer
+ curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d min_frequency={min_frequency} http://webservices.in-silico.ch/algorithm/fminer/bbrc
-(feature_uri specifies the dependent variable, e.g. http://www.epa.gov/NCCT/dsstox/CentralFieldDef.html#ActivityOutcome_CPDBAS_Hamster)
+feature_uri specifies the dependent variable from the dataset.
+
+### Create [LAST-PM](http://last-pm.maunz.de) features
+
+ curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d min_frequency={min_frequency} http://webservices.in-silico.ch/algorithm/fminer/last
+
+feature_uri specifies the dependent variable from the dataset.
Creates a dataset with fminer features (backbone refinement class representatives from supervised graph mining, see http://www.maunz.de/libfminer-doc/). These features can be used e.g. as structural alerts, as descriptors (fingerprints) for prediction models or for similarity calculations.
@@ -49,10 +63,9 @@ Creates a dataset with fminer features (backbone refinement class representative
curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d feature_generation_uri=http://webservices.in-silico.ch/algorithm/fminer http://webservices.in-silico.ch/test/algorithm/lazar
-(feaure_uri specifies the dependent variable, e.g. http://www.epa.gov/NCCT/dsstox/CentralFieldDef.html#ActivityOutcome_CPDBAS_Hamster)
+feature_uri specifies the dependent variable from the dataset
[API documentation](http://rdoc.info/github/opentox/algorithm)
--------------------------------------------------------------
Copyright (c) 2009-2011 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details.
-
diff --git a/fminer.rb b/fminer.rb
index 5cc83ed..91e73cd 100644
--- a/fminer.rb
+++ b/fminer.rb
@@ -67,148 +67,150 @@ end
# @return [text/uri-list] Task URI
post '/fminer/bbrc/?' do
- # TODO: is this thread safe??
- #@@bbrc = Bbrc::Bbrc.new
- minfreq = 5 unless minfreq = params[:min_frequency]
- @@bbrc.SetMinfreq(minfreq)
- @@bbrc.SetType(1) if params[:feature_type] == "paths"
- @@bbrc.SetBackbone(params[:backbone]) if params[:backbone]
- @@bbrc.SetChisqSig(params[:min_chisq_significance]) if params[:min_chisq_significance]
- @@bbrc.SetConsoleOut(false)
-
- halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
- halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil?
- prediction_feature = params[:prediction_feature]
-
- training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid
- halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature])
-
- task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do
-
- feature_dataset = OpenTox::Dataset.new(nil, @subjectid)
- feature_dataset.add_metadata({
- DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title].to_s,
- DC.creator => url_for('/fminer/bbrc',:full),
- OT.hasSource => url_for('/fminer/bbrc', :full),
- OT.parameters => [
- { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
- { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
- ]
- })
- feature_dataset.save(@subjectid)
-
- id = 1 # fminer start id is not 0
- compounds = []
- nr_active=0
- nr_inactive=0
- all_activities = Hash.new# DV: for effect calculation in regression part
-
- @@bbrc.Reset
- training_dataset.data_entries.each do |compound,entry|
- begin
- smiles = OpenTox::Compound.new(compound.to_s).to_smiles
- rescue
- LOGGER.warn "No resource for #{compound.to_s}"
- next
- end
- if smiles == '' or smiles.nil?
- LOGGER.warn "Cannot find smiles for #{compound.to_s}."
- next
- end
- entry.each do |feature,values|
- values.each do |value|
- if value.nil?
- LOGGER.warn "No #{feature} activiity for #{compound.to_s}."
+ halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
+ halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil?
+ prediction_feature = params[:prediction_feature]
+
+ training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid
+ halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature])
+
+ unless minfreq = params[:min_frequency]
+ minfreq = 5*training_dataset.compounds.size/1000 # 8 promille according to Andreas suggestions
+ minfreq = 2 unless minfreq > 2
+ end
+
+ @@bbrc.SetMinfreq(minfreq)
+ @@bbrc.SetType(1) if params[:feature_type] == "paths"
+ @@bbrc.SetBackbone(eval params[:backbone]) if params[:backbone] and ( params[:backbone] == "true" or params[:backbone] == "false" ) # convert string to boolean
+ @@bbrc.SetChisqSig(params[:min_chisq_significance]) if params[:min_chisq_significance]
+ @@bbrc.SetConsoleOut(false)
+
+ task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do
+
+ feature_dataset = OpenTox::Dataset.new(nil, @subjectid)
+ feature_dataset.add_metadata({
+ DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title].to_s,
+ DC.creator => url_for('/fminer/bbrc',:full),
+ OT.hasSource => url_for('/fminer/bbrc', :full),
+ OT.parameters => [
+ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
+ { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
+ ]
+ })
+ feature_dataset.save(@subjectid)
+
+ id = 1 # fminer start id is not 0
+ compounds = []
+ nr_active=0
+ nr_inactive=0
+ all_activities = Hash.new# DV: for effect calculation in regression part
+
+ @@bbrc.Reset
+ training_dataset.data_entries.each do |compound,entry|
+ begin
+ smiles = OpenTox::Compound.new(compound.to_s).to_smiles
+ rescue
+ LOGGER.warn "No resource for #{compound.to_s}"
+ next
+ end
+ if smiles == '' or smiles.nil?
+ LOGGER.warn "Cannot find smiles for #{compound.to_s}."
+ next
+ end
+ entry.each do |feature,values|
+ values.each do |value|
+ if value.nil?
+ LOGGER.warn "No #{feature} activiity for #{compound.to_s}."
+ else
+ case value.to_s
+ when "true"
+ nr_active += 1
+ activity = 1
+ when "false"
+ nr_inactive += 1
+ activity = 0
else
- case value.to_s
- when "true"
- nr_active += 1
- activity = 1
- when "false"
- nr_inactive += 1
- activity = 0
- else
- activity = value.to_f
- @@bbrc.SetRegression(true)
- end
- begin
- @@bbrc.AddCompound(smiles,id)
- @@bbrc.AddActivity(activity, id)
- all_activities[id]=activity # DV: insert global information
- compounds[id] = compound
- id += 1
- rescue
- LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer"
- end
+ activity = value.to_f
+ @@bbrc.SetRegression(true)
+ end
+ begin
+ @@bbrc.AddCompound(smiles,id)
+ @@bbrc.AddActivity(activity, id)
+ all_activities[id]=activity # DV: insert global information
+ compounds[id] = compound
+ id += 1
+ rescue
+ LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer"
end
end
end
end
+ end
- g_array=all_activities.values # DV: calculation of global median for effect calculation
- g_median=OpenTox::Algorithm.median(g_array)
-
- raise "No compounds in dataset #{training_dataset.uri}" if compounds.size==0
-
- features = Set.new
- # run @@bbrc
- (0 .. @@bbrc.GetNoRootNodes()-1).each do |j|
-
- results = @@bbrc.MineRoot(j)
- results.each do |result|
- f = YAML.load(result)[0]
- smarts = f[0]
- p_value = f[1]
-
- if (!@@bbrc.GetRegression)
- ids = f[2] + f[3]
- if f[2].size.to_f/ids.size > nr_active.to_f/(nr_active+nr_inactive)
- effect = 'activating'
- else
- effect = 'deactivating'
- end
- else #regression part
- ids = f[2]
- # DV: effect calculation
- f_arr=Array.new
- f[2].each do |id|
- f_arr.push(all_activities[id])
- end
- f_median=OpenTox::Algorithm.median(f_arr)
- if g_median >= f_median
- effect = 'activating'
- else
- effect = 'deactivating'
- end
- end
+ g_array=all_activities.values # DV: calculation of global median for effect calculation
+ g_median=OpenTox::Algorithm.median(g_array)
+
+ raise "No compounds in dataset #{training_dataset.uri}" if compounds.size==0
+
+ features = Set.new
+ # run @@bbrc
+ (0 .. @@bbrc.GetNoRootNodes()-1).each do |j|
+
+ results = @@bbrc.MineRoot(j)
+ results.each do |result|
+ f = YAML.load(result)[0]
+ smarts = f[0]
+ p_value = f[1]
- feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s
- unless features.include? smarts
- features << smarts
- metadata = {
- OT.hasSource => url_for('/fminer/bbrc', :full),
- OT.isA => OT.Substructure,
- OT.smarts => smarts,
- OT.pValue => p_value.to_f,
- OT.effect => effect,
- OT.parameters => [
- { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
- { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
- ]
- }
- feature_dataset.add_feature feature_uri, metadata
- #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters
+ if (!@@bbrc.GetRegression)
+ ids = f[2] + f[3]
+ if f[2].size.to_f/ids.size > nr_active.to_f/(nr_active+nr_inactive)
+ effect = 'activating'
+ else
+ effect = 'deactivating'
+ end
+ else #regression part
+ ids = f[2]
+ # DV: effect calculation
+ f_arr=Array.new
+ f[2].each do |id|
+ f_arr.push(all_activities[id])
+ end
+ f_median=OpenTox::Algorithm.median(f_arr)
+ if g_median >= f_median
+ effect = 'activating'
+ else
+ effect = 'deactivating'
end
- ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)}
end
+
+ feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s
+ unless features.include? smarts
+ features << smarts
+ metadata = {
+ OT.hasSource => url_for('/fminer/bbrc', :full),
+ OT.isA => OT.Substructure,
+ OT.smarts => smarts,
+ OT.pValue => p_value.to_f,
+ OT.effect => effect,
+ OT.parameters => [
+ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
+ { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
+ ]
+ }
+ feature_dataset.add_feature feature_uri, metadata
+ #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters
+ end
+ ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)}
end
- feature_dataset.save(@subjectid)
- feature_dataset.uri
end
- response['Content-Type'] = 'text/uri-list'
- halt 503,task.uri+"\n" if task.status == "Cancelled"
- halt 202,task.uri.to_s+"\n"
+ feature_dataset.save(@subjectid)
+ feature_dataset.uri
end
+ response['Content-Type'] = 'text/uri-list'
+ halt 503,task.uri+"\n" if task.status == "Cancelled"
+ halt 202,task.uri.to_s+"\n"
+end
#end
# Run last algorithm on a dataset
@@ -221,22 +223,24 @@ post '/fminer/bbrc/?' do
# - hops Maximum number of hops
# @return [text/uri-list] Task URI
post '/fminer/last/?' do
- #@@last = Last::Last.new
- minfreq = 5 unless minfreq = params[:min_frequency]
- @@last.SetMinfreq(minfreq)
- @@last.SetType(1) if params[:feature_type] == "paths"
- @@last.SetMaxHops(params[:hops]) if params[:hops]
- @@last.SetConsoleOut(false)
halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil?
prediction_feature = params[:prediction_feature]
-
training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}", @subjectid
-
training_dataset.load_all(@subjectid)
halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature])
+ unless minfreq = params[:min_frequency]
+ minfreq = 8*training_dataset.compounds.size/100 # 8% according to Andreas suggestions
+ minfreq = 2 unless minfreq > 2
+ end
+
+ @@last.SetMinfreq(minfreq)
+ @@last.SetType(1) if params[:feature_type] == "paths"
+ @@last.SetMaxHops(params[:hops]) if params[:hops]
+ @@last.SetConsoleOut(false)
+
task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do
feature_dataset = OpenTox::Dataset.new
@@ -319,7 +323,7 @@ post '/fminer/last/?' do
lu = LU.new # AM LAST: uses last-utils here
dom=lu.read(xml) # AM LAST: parse GraphML (needs hpricot, @ch: to be included in wrapper!)
- smarts=lu.smarts_rb(dom,'msa') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de)
+ smarts=lu.smarts_rb(dom,'nls') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de)
instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations
instances.each do |smarts, ids|
feat_hash = Hash[*(all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax
diff --git a/last-utils b/last-utils
-Subproject f8a4631495309d86cb4bec5493af4110d25e4ad
+Subproject daafa32e330b27111df6dc7193a6ed72fae2be4
diff --git a/libfminer b/libfminer
-Subproject 03a3588cbea1c175d8573c2a8aad867f7a27e3e
+Subproject 01b8e50e8e6fb3ce29fc8bf0a65a8c6f6af94b3