summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormr <mr@mrautenberg.de>2011-08-04 18:18:40 +0200
committermr <mr@mrautenberg.de>2011-08-04 18:18:40 +0200
commitfb072193eebcd9dc1d64e66456846c9a08297163 (patch)
treed2a07dd5d051549f527778264bca6a5e677b60fe
parenta9c32e08d4e01da1d63e8212fd0870d39b4f112f (diff)
parentbce27bd8e66deebb6f438e56957f575399946a50 (diff)
solve merge conflicts (take new development version of fminer.rb)v2.1.0
-rw-r--r--README.md30
-rw-r--r--application.rb11
-rw-r--r--fminer.rb332
m---------last-utils0
-rw-r--r--lazar.rb150
m---------libfminer0
-rw-r--r--openbabel.rb6
-rw-r--r--similarity.rb20
8 files changed, 271 insertions, 278 deletions
diff --git a/README.md b/README.md
index 640f962..dacf1ec 100644
--- a/README.md
+++ b/README.md
@@ -23,15 +23,35 @@ REST operations
[min_frequency=5 per-mil],
[feature_type=trees],
[backbone=true],
- [min_chisq_significance=0.95]
+ [min_chisq_significance=0.95],
+ [nr_hits=false]
Create last features POST /fminer/last dataset_uri, URI for feature dataset 200,400,404,500
feature_uri,
[min_frequency=8 %],
[feature_type=trees],
- [max_hops=25],
+ [nr_hits=false]
Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500
prediction_feature,
feature_generation_uri
+ prediction_algorithm
+ [local_svm_kernel=weighted_tanimoto]
+ [min_sim=0.3]
+ [nr_hits=false]
+ [activity_transform=<Log10 (regression),NOP (classification)>]
+ [conf_stdev=false]
+
+Synopsis
+--------
+
+- prediction\_algorithm: One of "weighted\_majority\_vote" (default for classification), "local\_svm\_classification", "local\_svm\_regression (default for regression)", "local\_mlr\_prop". "weighted\_majority\_vote" is not applicable for regression. "local\_mlr\_prop" is not applicable for classification.
+- local\_svm\_kernel: One of "weighted\_tanimoto", "propositionalized". local\_svm\_kernel is not appplicable when prediction\_algorithm="weighted\_majority\_vote".
+- min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1].
+- nr_hits: Whether for instantiated models (local\_svm\_kernel = "propositionalized" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="local\_mlr\_prop") nominal features should be instantiated with their occurrence counts in the instances. For non-instantiated models (local\_svm\_kernel = "weighted\_tanimoto" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="weighted\_majority\_vote") the neighbor-to-neighbor and neighbor-to-query similarity also integrates these counts, when the parameter is set. One of "true", "false".
+- activity_transform: Normalizing transformations of the y-values (activities), applicable only to regression problems. One of "Log10", "Inverter", "NOP". "Log10" moves all values above zero and takes the log to base 10. "Inverter" moves all values above 1.0 and takes the inverted value. "NOP" is the identity transformation, which does nothing. Model predictions are output with reverse transformation applied.
+- conf_stdev: Whether confidence integrates distribution of neighbor activity values. When "true", the exp(-1.0*(standard deviation of neighbor activities)) is multiplied on the similarity. One of "true", "false".
+
+See http://www.maunz.de/wordpress/opentox/2011/lazar-models-and-how-to-trigger-them for a graphical overview.
+
Supported MIME formats
----------------------
@@ -68,6 +88,7 @@ backbone=false reduces BBRC mining to frequent and correlated subtree mining (mu
curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d min_frequency={min_frequency} http://webservices.in-silico.ch/algorithm/fminer/bbrc
feature_uri specifies the dependent variable from the dataset.
+Adding -d nr_hits=true produces frequency counts per pattern and molecule.
Please click [here](http://bbrc.maunz.de#usage) for more guidance on usage.
### Create [LAST-PM](http://last-pm.maunz.de) descriptors, recommended for small to medium-sized datasets.
@@ -75,15 +96,16 @@ Please click [here](http://bbrc.maunz.de#usage) for more guidance on usage.
curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d min_frequency={min_frequency} http://webservices.in-silico.ch/algorithm/fminer/last
feature_uri specifies the dependent variable from the dataset.
+Adding -d nr_hits=true produces frequency counts per pattern and molecule.
Please click [here](http://last-pm.maunz.de#usage) for guidance for more guidance on usage.
* * *
### Create lazar model
- curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d feature_generation_uri=http://webservices.in-silico.ch/algorithm/fminer http://webservices.in-silico.ch/test/algorithm/lazar
+Creates a standard Lazar model.
-feature_uri specifies the dependent variable from the dataset
+ curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d feature_generation_uri=http://webservices.in-silico.ch/algorithm/fminer/bbrc http://webservices.in-silico.ch/test/algorithm/lazar
[API documentation](http://rdoc.info/github/opentox/algorithm)
--------------------------------------------------------------
diff --git a/application.rb b/application.rb
index 55a8ea4..32fea95 100644
--- a/application.rb
+++ b/application.rb
@@ -22,6 +22,13 @@ end
#
# @return [text/uri-list] algorithm URIs
get '/?' do
- response['Content-Type'] = 'text/uri-list'
- [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n"
+ list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n"
+ case request.env['HTTP_ACCEPT']
+ when /text\/html/
+ content_type "text/html"
+ OpenTox.text_to_html list
+ else
+ content_type 'text/uri-list'
+ list
+ end
end
diff --git a/fminer.rb b/fminer.rb
index e5db91c..da04431 100644
--- a/fminer.rb
+++ b/fminer.rb
@@ -2,6 +2,7 @@ ENV['FMINER_SMARTS'] = 'true'
ENV['FMINER_NO_AROMATIC'] = 'true'
ENV['FMINER_PVALUES'] = 'true'
ENV['FMINER_SILENT'] = 'true'
+ENV['FMINER_NR_HITS'] = 'true'
@@bbrc = Bbrc::Bbrc.new
@@last = Last::Last.new
@@ -10,20 +11,26 @@ ENV['FMINER_SILENT'] = 'true'
#
# @return [text/uri-list] URIs of fminer algorithms
get '/fminer/?' do
- response['Content-Type'] = 'text/uri-list'
- [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n"
+ list = [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n"
+ case request.env['HTTP_ACCEPT']
+ when /text\/html/
+ content_type "text/html"
+ OpenTox.text_to_html list
+ else
+ content_type 'text/uri-list'
+ list
+ end
end
# Get RDF/XML representation of fminer bbrc algorithm
# @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm
get "/fminer/bbrc/?" do
- response['Content-Type'] = 'application/rdf+xml'
algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/bbrc',:full))
algorithm.metadata = {
DC.title => 'fminer backbone refinement class representatives',
DC.creator => "andreas@maunz.de, helma@in-silico.ch",
DC.contributor => "vorgrimmlerdavid@gmx.de",
- RDF.type => [OTA.PatternMiningSupervised],
+ RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised],
OT.parameters => [
{ DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
{ DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
@@ -33,7 +40,17 @@ get "/fminer/bbrc/?" do
{ DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" },
]
}
- algorithm.to_rdfxml
+ case request.env['HTTP_ACCEPT']
+ when /text\/html/
+ content_type "text/html"
+ OpenTox.text_to_html algorithm.to_yaml
+ when /application\/x-yaml/
+ content_type "application/x-yaml"
+ algorithm.to_yaml
+ else
+ response['Content-Type'] = 'application/rdf+xml'
+ algorithm.to_rdfxml
+ end
end
# Get RDF/XML representation of fminer last algorithm
@@ -44,7 +61,7 @@ get "/fminer/last/?" do
DC.title => 'fminer latent structure class representatives',
DC.creator => "andreas@maunz.de, helma@in-silico.ch",
DC.contributor => "vorgrimmlerdavid@gmx.de",
- RDF.type => [OTA.PatternMiningSupervised],
+ RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised],
OT.parameters => [
{ DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
{ DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
@@ -53,7 +70,17 @@ get "/fminer/last/?" do
{ DC.description => "Maximum number of hops", OT.paramScope => "optional", DC.title => "hops" },
]
}
- algorithm.to_rdfxml
+ case request.env['HTTP_ACCEPT']
+ when /text\/html/
+ content_type "text/html"
+ OpenTox.text_to_html algorithm.to_yaml
+ when /application\/x-yaml/
+ content_type "application/x-yaml"
+ algorithm.to_yaml
+ else
+ response['Content-Type'] = 'application/rdf+xml'
+ algorithm.to_rdfxml
+ end
end
# Run bbrc algorithm on dataset
@@ -61,36 +88,29 @@ end
# @param [String] dataset_uri URI of the training dataset
# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
# @param [optional] parameters BBRC parameters, accepted parameters are
-# - minfreq Minimum frequency (default 5)
+# - min_frequency Minimum frequency (default 5)
# - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
# - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true")
# - min_chisq_significance Significance threshold (between 0 and 1)
+# - nr_hits Set to "true" to get hit count instead of presence
# @return [text/uri-list] Task URI
post '/fminer/bbrc/?' do
- halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
- halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil?
- prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid
- training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid
- halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature])
-
- unless params[:min_frequency].nil?
- minfreq=params[:min_frequency].to_i
- raise "Minimum frequency must be a number >0!" unless minfreq>0
- else
- minfreq = 5*training_dataset.compounds.size/1000 # AM sugg. 8-10 per mil
- minfreq = 2 unless minfreq > 2
- end
-
- task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do
+ fminer=OpenTox::Algorithm::Fminer.new
+ fminer.check_params(params,5,@subjectid)
+ task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do |task|
@@bbrc.Reset
- if prediction_feature.feature_type == "regression"
+ if fminer.prediction_feature.feature_type == "regression"
@@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations!
else
- @training_classes = training_dataset.feature_classes(prediction_feature.uri, @subjectid)
+ raise "no accept values for dataset '"+fminer.training_dataset.uri.to_s+"' and feature '"+fminer.prediction_feature.uri.to_s+
+ "'" unless fminer.training_dataset.accept_values(fminer.prediction_feature.uri)
+ @training_classes = fminer.training_dataset.accept_values(fminer.prediction_feature.uri).sort
+ @value_map=Hash.new
+ @training_classes.each_with_index { |c,i| @value_map[i+1] = c }
end
- @@bbrc.SetMinfreq(minfreq)
+ @@bbrc.SetMinfreq(fminer.minfreq)
@@bbrc.SetType(1) if params[:feature_type] == "paths"
@@bbrc.SetBackbone(eval params[:backbone]) if params[:backbone] and ( params[:backbone] == "true" or params[:backbone] == "false" ) # convert string to boolean
@@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance]
@@ -98,7 +118,7 @@ post '/fminer/bbrc/?' do
feature_dataset = OpenTox::Dataset.new(nil, @subjectid)
feature_dataset.add_metadata({
- DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title].to_s,
+ DC.title => "BBRC representatives for " + fminer.training_dataset.metadata[DC.title].to_s,
DC.creator => url_for('/fminer/bbrc',:full),
OT.hasSource => url_for('/fminer/bbrc', :full),
OT.parameters => [
@@ -108,106 +128,44 @@ post '/fminer/bbrc/?' do
})
feature_dataset.save(@subjectid)
- id = 1 # fminer start id is not 0
- compounds = []
- nr_active=0
- nr_inactive=0
- all_activities = Hash.new# DV: for effect calculation in regression part
+ fminer.compounds = []
+ fminer.db_class_sizes = Array.new # AM: effect
+ fminer.all_activities = Hash.new # DV: for effect calculation in regression part
+ fminer.smi = [] # AM LAST: needed for matching the patterns back
- training_dataset.data_entries.each do |compound,entry|
- begin
- smiles = OpenTox::Compound.new(compound.to_s).to_smiles
- rescue
- LOGGER.warn "No resource for #{compound.to_s}"
- next
- end
- if smiles == '' or smiles.nil?
- LOGGER.warn "Cannot find smiles for #{compound.to_s}."
- next
- end
+ # Add data to fminer
+ fminer.add_fminer_data(@@bbrc, params, @value_map)
- # AM: take log if appropriate
- take_logs=true
- entry.each do |feature,values|
- values.each do |value|
- if prediction_feature.feature_type == "regression"
- if (! value.nil?) && (value.to_f < 1)
- take_logs=false
- end
- end
- end
- end
- entry.each do |feature,values|
- if feature == prediction_feature.uri
- values.each do |value|
- if value.nil?
- LOGGER.warn "No #{feature} activiity for #{compound.to_s}."
- else
- if prediction_feature.feature_type == "classification"
- case value.to_s
- when "true"
- nr_active += 1
- activity = 1
- when "false"
- nr_inactive += 1
- activity = 0
- when /#{@training_classes.last}/
- nr_active += 1
- activity = 1
- when /#{@training_classes.first}/
- nr_inactive += 1
- activity = 0
- else
- LOGGER.warn "Unknown class \"#{value.to_s}\"."
- end
- elsif prediction_feature.feature_type == "regression"
- activity= take_logs ? Math.log10(value.to_f) : value.to_f
- end
- begin
- @@bbrc.AddCompound(smiles,id)
- @@bbrc.AddActivity(activity, id)
- all_activities[id]=activity # DV: insert global information
- compounds[id] = compound
- id += 1
- rescue
- LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer"
- end
- end
- end
- end
- end
- end
-
- g_array=all_activities.values # DV: calculation of global median for effect calculation
- g_median=OpenTox::Algorithm.median(g_array)
+ g_array=fminer.all_activities.values # DV: calculation of global median for effect calculation
+ g_median=g_array.to_scale.median
- raise "No compounds in dataset #{training_dataset.uri}" if compounds.size==0
-
+ raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0
+ task.progress 10
+ step_width = 80 / @@bbrc.GetNoRootNodes().to_f
features = Set.new
+
# run @@bbrc
(0 .. @@bbrc.GetNoRootNodes()-1).each do |j|
-
results = @@bbrc.MineRoot(j)
+ task.progress 10+step_width*(j+1)
results.each do |result|
f = YAML.load(result)[0]
smarts = f[0]
p_value = f[1]
if (!@@bbrc.GetRegression)
- ids = f[2] + f[3]
- if f[2].size.to_f/ids.size > nr_active.to_f/(nr_active+nr_inactive)
- effect = 'activating'
- else
- effect = 'deactivating'
- end
+ id_arrs = f[2..-1].flatten
+ max = OpenTox::Algorithm.effect(f[2..-1], fminer.db_class_sizes)
+ effect = f[2..-1].size-max
else #regression part
- ids = f[2]
+ id_arrs = f[2]
# DV: effect calculation
f_arr=Array.new
f[2].each do |id|
- f_arr.push(all_activities[id])
+ id=id.keys[0] # extract id from hit count hash
+ f_arr.push(fminer.all_activities[id])
end
- f_median=OpenTox::Algorithm.median(f_arr)
+ f_median=f_arr.to_scale.median
if g_median >= f_median
effect = 'activating'
else
@@ -220,7 +178,7 @@ post '/fminer/bbrc/?' do
features << smarts
metadata = {
OT.hasSource => url_for('/fminer/bbrc', :full),
- RDF.type => [OT.Substructure],
+ RDF.type => [OT.Feature, OT.Substructure],
OT.smarts => smarts,
OT.pValue => p_value.to_f,
OT.effect => effect,
@@ -232,14 +190,22 @@ post '/fminer/bbrc/?' do
feature_dataset.add_feature feature_uri, metadata
#feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters
end
- ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)}
+ id_arrs.each { |id_count_hash|
+ id=id_count_hash.keys[0].to_i
+ count=id_count_hash.values[0].to_i
+ if params[:nr_hits] == "true"
+ feature_dataset.add(fminer.compounds[id], feature_uri, count)
+ else
+ feature_dataset.add(fminer.compounds[id], feature_uri, 1)
+ end
+ }
end
end
feature_dataset.save(@subjectid)
feature_dataset.uri
end
response['Content-Type'] = 'text/uri-list'
- halt 503,task.uri+"\n" if task.status == "Cancelled"
+ raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
halt 202,task.uri.to_s+"\n"
end
#end
@@ -249,36 +215,28 @@ end
# @param [String] dataset_uri URI of the training dataset
# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
# @param [optional] parameters LAST parameters, accepted parameters are
-# - minfreq Minimum frequency (default 5)
+# - min_frequency freq Minimum frequency (default 5)
# - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
# - hops Maximum number of hops
+# - nr_hits Set to "true" to get hit count instead of presence
# @return [text/uri-list] Task URI
post '/fminer/last/?' do
- halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
- halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil?
- prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid
- training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}", @subjectid
- training_dataset.load_all(@subjectid)
- halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature])
-
- unless params[:min_frequency].nil?
- minfreq=params[:min_frequency].to_i
- raise "Minimum frequency must be a number >0!" unless minfreq>0
- else
- minfreq = 8*training_dataset.compounds.size/100 # AM sugg. 5-10%
- minfreq = 2 unless minfreq > 2
- end
-
- task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do
+ fminer=OpenTox::Algorithm::Fminer.new
+ fminer.check_params(params,80,@subjectid)
+ task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do |task|
@@last.Reset
- if prediction_feature.feature_type == "regression"
+ if fminer.prediction_feature.feature_type == "regression"
@@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations!
else
- @training_classes = training_dataset.feature_classes(prediction_feature.uri)
+ raise "no accept values for dataset '"+fminer.training_dataset.uri.to_s+"' and feature '"+fminer.prediction_feature.uri.to_s+
+ "'" unless fminer.training_dataset.accept_values(fminer.prediction_feature.uri)
+ @training_classes = fminer.training_dataset.accept_values(fminer.prediction_feature.uri).sort
+ @value_map=Hash.new
+ @training_classes.each_with_index { |c,i| @value_map[i+1] = c }
end
- @@last.SetMinfreq(minfreq)
+ @@last.SetMinfreq(fminer.minfreq)
@@last.SetType(1) if params[:feature_type] == "paths"
@@last.SetMaxHops(params[:hops]) if params[:hops]
@@last.SetConsoleOut(false)
@@ -286,7 +244,7 @@ post '/fminer/last/?' do
feature_dataset = OpenTox::Dataset.new(nil, @subjectid)
feature_dataset.add_metadata({
- DC.title => "LAST representatives for " + training_dataset.metadata[DC.title].to_s,
+ DC.title => "LAST representatives for " + fminer.training_dataset.metadata[DC.title].to_s,
DC.creator => url_for('/fminer/last',:full),
OT.hasSource => url_for('/fminer/last', :full),
OT.parameters => [
@@ -296,74 +254,25 @@ post '/fminer/last/?' do
})
feature_dataset.save(@subjectid)
- id = 1 # fminer start id is not 0
- compounds = []
- smi = [] # AM LAST: needed for matching the patterns back
- nr_active=0
- nr_inactive=0
- all_activities = Hash.new #DV: for effect calculation (class and regr)
-
- training_dataset.data_entries.each do |compound,entry|
- begin
- smiles = OpenTox::Compound.new(compound.to_s).to_smiles
- rescue
- LOGGER.warn "No resource for #{compound.to_s}"
- next
- end
- if smiles == '' or smiles.nil?
- LOGGER.warn "Cannot find smiles for #{compound.to_s}."
- next
- end
- entry.each do |feature,values|
- if feature == prediction_feature.uri
- values.each do |value|
- if value.nil?
- LOGGER.warn "No #{feature} activiity for #{compound.to_s}."
- else
- if prediction_feature.feature_type == "classification"
- case value.to_s
- when "true"
- nr_active += 1
- activity = 1
- when "false"
- nr_inactive += 1
- activity = 0
- when /#{@training_classes.last}/
- nr_active += 1
- activity = 1
- when /#{@training_classes.first}/
- nr_inactive += 1
- activity = 0
- else
- LOGGER.warn "Unknown class \"#{value.to_s}."
- end
- elsif prediction_feature.feature_type == "regression"
- activity = value.to_f
- end
- begin
- @@last.AddCompound(smiles,id)
- @@last.AddActivity(activity, id)
- all_activities[id]=activity # DV: insert global information
- compounds[id] = compound
- smi[id] = smiles # AM LAST: changed this to store SMILES.
- id += 1
- rescue
- LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer"
- end
- end
- end
- end
- end
- end
+ fminer.compounds = []
+ fminer.db_class_sizes = Array.new # AM: effect
+ fminer.all_activities = Hash.new # DV: for effect calculation (class and regr)
+ fminer.smi = [] # AM LAST: needed for matching the patterns back
- raise "No compounds in dataset #{training_dataset.uri}" if compounds.size==0
+ # Add data to fminer
+ fminer.add_fminer_data(@@last, params, @value_map)
+
+ raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0
# run @@last
features = Set.new
xml = ""
+ task.progress 10
+ step_width = 80 / @@last.GetNoRootNodes().to_f
(0 .. @@last.GetNoRootNodes()-1).each do |j|
results = @@last.MineRoot(j)
+ task.progress 10+step_width*(j+1)
results.each do |result|
xml << result
end
@@ -372,21 +281,30 @@ post '/fminer/last/?' do
lu = LU.new # AM LAST: uses last-utils here
dom=lu.read(xml) # AM LAST: parse GraphML
smarts=lu.smarts_rb(dom,'nls') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de)
- instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations
- instances.each do |smarts, ids|
- feat_hash = Hash[*(all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax
- @@last.GetRegression() ? p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f : p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test
-
-
- effect = (p_value > 0) ? "activating" : "deactivating"
+ params[:nr_hits] != "true" ? hit_count=false: hit_count=true
+ matches, counts = lu.match_rb(fminer.smi,smarts,hit_count) # AM LAST: creates instantiations
+
+ matches.each do |smarts, ids|
+ feat_hash = Hash[*(fminer.all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax
+ if @@last.GetRegression()
+ p_value = @@last.KSTest(fminer.all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test
+ effect = (p_value > 0) ? "activating" : "deactivating"
+ else
+ p_value = @@last.ChisqTest(fminer.all_activities.values, feat_hash.values).to_f
+ g=Array.new
+ @value_map.each { |y,act| g[y-1]=Array.new }
+ feat_hash.each { |x,y| g[y-1].push(x) }
+ max = OpenTox::Algorithm.effect(g, fminer.db_class_sizes)
+ effect = g.size-max
+ end
feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s
unless features.include? smarts
features << smarts
metadata = {
- RDF.type => [OT.Substructure],
+ RDF.type => [OT.Feature, OT.Substructure],
OT.hasSource => feature_dataset.uri,
OT.smarts => smarts,
- OT.pValue => p_value.to_f.abs,
+ OT.pValue => p_value.abs,
OT.effect => effect,
OT.parameters => [
{ DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
@@ -395,12 +313,16 @@ post '/fminer/last/?' do
}
feature_dataset.add_feature feature_uri, metadata
end
- ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)}
+ if !hit_count
+ ids.each { |id| feature_dataset.add(fminer.compounds[id], feature_uri, true)}
+ else
+ ids.each_with_index { |id,i| feature_dataset.add(fminer.compounds[id], feature_uri, counts[smarts][i])}
+ end
end
feature_dataset.save(@subjectid)
feature_dataset.uri
end
response['Content-Type'] = 'text/uri-list'
- halt 503,task.uri+"\n" if task.status == "Cancelled"
+ raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
halt 202,task.uri.to_s+"\n"
-end
+end \ No newline at end of file
diff --git a/last-utils b/last-utils
-Subproject 75bea7645601fd296aa68c6678ee9b0a49a7b91
+Subproject 04bd1b73f54bb7422d3c08bb5a81bc02af04f6f
diff --git a/lazar.rb b/lazar.rb
index 45123f0..5de3790 100644
--- a/lazar.rb
+++ b/lazar.rb
@@ -3,7 +3,6 @@
# Get RDF/XML representation of the lazar algorithm
# @return [application/rdf+xml] OWL-DL representation of the lazar algorithm
get '/lazar/?' do
- response['Content-Type'] = 'application/rdf+xml'
algorithm = OpenTox::Algorithm::Generic.new(url_for('/lazar',:full))
algorithm.metadata = {
DC.title => 'lazar',
@@ -17,7 +16,17 @@ get '/lazar/?' do
{ DC.description => "Further parameters for the feaature generation service", OT.paramScope => "optional" }
]
}
- algorithm.to_rdfxml
+ case request.env['HTTP_ACCEPT']
+ when /text\/html/
+ content_type "text/html"
+ OpenTox.text_to_html algorithm.to_yaml
+ when /application\/x-yaml/
+ content_type "application/x-yaml"
+ algorithm.to_yaml
+ else
+ response['Content-Type'] = 'application/rdf+xml'
+ algorithm.to_rdfxml
+ end
end
# Create a lazar prediction model
@@ -28,29 +37,42 @@ end
# @return [text/uri-list] Task URI
post '/lazar/?' do
+ LOGGER.debug "building lazar model with params: "+params.inspect
params[:subjectid] = @subjectid
- halt 404, "No dataset_uri parameter." unless params[:dataset_uri]
+ raise OpenTox::NotFoundError.new "No dataset_uri parameter." unless params[:dataset_uri]
dataset_uri = params[:dataset_uri]
- halt 404, "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri)
- training_activities.load_all(@subjectid)
-
- prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid)
- unless params[:prediction_feature] # try to read prediction_feature from dataset
- halt 404, "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
- prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid)
- params[:prediction_feature] = prediction_feature.uri # pass to feature mining service
- end
+ task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task|
- feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri]
+ raise OpenTox::NotFoundError.new "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri)
+ training_activities.load_all(@subjectid)
- halt 404, "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+
- training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri)
+ prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid)
+ unless params[:prediction_feature] # try to read prediction_feature from dataset
+ raise OpenTox::NotFoundError.new "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
+ prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid)
+ params[:prediction_feature] = prediction_feature.uri # pass to feature mining service
+ end
+
+ feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri]
- task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task|
+ raise OpenTox::NotFoundError.new "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+
+ training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri)
lazar = OpenTox::Model::Lazar.new
- lazar.min_sim = params[:min_sim] if params[:min_sim]
+ lazar.min_sim = params[:min_sim].to_f if params[:min_sim]
+ lazar.nr_hits = true if params[:nr_hits] == "true"
+
+ if prediction_feature.feature_type == "classification"
+ @training_classes = training_activities.accept_values(prediction_feature.uri).sort
+ @training_classes.each_with_index { |c,i|
+ lazar.value_map[i+1] = c # don't use '0': we must take the weighted mean later.
+ params[:value_map] = lazar.value_map
+ }
+ elsif prediction_feature.feature_type == "regression"
+ lazar.prediction_algorithm = "Neighbors.local_svm_regression"
+ end
+ task.progress 10
if params[:feature_dataset_uri]
feature_dataset_uri = params[:feature_dataset_uri]
@@ -66,31 +88,36 @@ post '/lazar/?' do
if feature_generation_uri.match(/fminer/)
lazar.feature_calculation_algorithm = "Substructure.match"
else
- halt 404, "External feature generation services not yet supported"
+ raise OpenTox::NotFoundError.new "External feature generation services not yet supported"
end
params[:subjectid] = @subjectid
prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid
if prediction_feature.feature_type == "regression" && feature_generation_uri.match(/fminer/)
params[:feature_type] = "paths"
end
- feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s
+ feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params, OpenTox::SubTask.new(task,10,70)).to_s
training_features = OpenTox::Dataset.new(feature_dataset_uri)
end
training_features.load_all(@subjectid)
- halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil?
+ raise OpenTox::NotFoundError.new "Dataset #{feature_dataset_uri} not found." if training_features.nil?
# sorted features for index lookups
lazar.features = training_features.features.sort if prediction_feature.feature_type == "regression" and lazar.feature_calculation_algorithm != "Substructure.match"
training_features.data_entries.each do |compound,entry|
- lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound]
+ lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound]
entry.keys.each do |feature|
if lazar.feature_calculation_algorithm == "Substructure.match"
if training_features.features[feature]
smarts = training_features.features[feature][OT.smarts]
- lazar.fingerprints[compound] << smarts
+ #lazar.fingerprints[compound] << smarts
+ if params[:nr_hits]
+ lazar.fingerprints[compound][smarts] = entry[feature].flatten.first
+ else
+ lazar.fingerprints[compound][smarts] = 1
+ end
unless lazar.features.include? smarts
lazar.features << smarts
lazar.p_values[smarts] = training_features.features[feature][OT.pValue]
@@ -102,7 +129,8 @@ post '/lazar/?' do
when "classification"
# fingerprints are sets
if entry[feature].flatten.size == 1
- lazar.fingerprints[compound] << feature if entry[feature].flatten.first.to_s.match(TRUE_REGEXP)
+ #lazar.fingerprints[compound] << feature if entry[feature].flatten.first.to_s.match(TRUE_REGEXP)
+ lazar.fingerprints[compound][feature] = entry[feature].flatten.first if entry[feature].flatten.first.to_s.match(TRUE_REGEXP)
lazar.features << feature unless lazar.features.include? feature
else
LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
@@ -111,6 +139,7 @@ post '/lazar/?' do
# fingerprints are arrays
if entry[feature].flatten.size == 1
lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first
+ #lazar.fingerprints[compound][feature] = entry[feature].flatten.first
else
LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
end
@@ -118,45 +147,58 @@ post '/lazar/?' do
end
end
end
-
- @training_classes = training_activities.feature_classes(prediction_feature.uri, @subjectid) if prediction_feature.feature_type == "classification"
- lazar.prediction_algorithm = "Neighbors.local_svm_regression" if prediction_feature.feature_type == "regression"
-
- training_activities.data_entries.each do |compound,entry|
- lazar.activities[compound] = [] unless lazar.activities[compound]
- unless entry[prediction_feature.uri].empty?
- entry[prediction_feature.uri].each do |value|
- if prediction_feature.feature_type == "classification"
- case value.to_s
- when "true"
- lazar.activities[compound] << true
- when "false"
- lazar.activities[compound] << false
- when /#{@training_classes.last}/
- lazar.activities[compound] << true
- when /#{@training_classes.first}/
- lazar.activities[compound] << false
- else
- LOGGER.warn "Unknown class \"#{value.to_s}\"."
- end
- elsif prediction_feature.feature_type == "regression"
- halt 404, "0 values not allowed in training dataset. log10 is calculated internally." if value.to_f == 0
- lazar.activities[compound] << value.to_f
+ task.progress 80
+
+ # AM: allow settings override by user
+ lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil?
+ if prediction_feature.feature_type == "regression"
+ lazar.transform["class"] = "Log10" if lazar.transform["class"] == "NOP"
+ end
+ lazar.transform["class"] = params[:activity_transform] unless params[:activity_transform].nil?
+ lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop")
+ lazar.conf_stdev = false
+ lazar.conf_stdev = true if params[:conf_stdev] == "true"
+
+ # AM: Feed Data using Transformations
+ if prediction_feature.feature_type == "regression"
+ transformed_acts = []
+ training_activities.data_entries.each do |compound,entry|
+ transformed_acts.concat entry[prediction_feature.uri] unless entry[prediction_feature.uri].empty?
+ end
+ transformer = eval "OpenTox::Algorithm::Transform::#{lazar.transform["class"]}.new(transformed_acts)"
+ transformed_acts = transformer.values
+ lazar.transform["offset"] = transformer.offset
+ t_count=0
+ training_activities.data_entries.each do |compound,entry|
+ lazar.activities[compound] = [] unless lazar.activities[compound]
+ unless entry[prediction_feature.uri].empty?
+ entry[prediction_feature.uri].each do |value|
+ lazar.activities[compound] << transformed_acts[t_count].to_s
+ t_count+=1
+ end
+ end
+ end
+ elsif prediction_feature.feature_type == "classification"
+ training_activities.data_entries.each do |compound,entry|
+ lazar.activities[compound] = [] unless lazar.activities[compound]
+ unless entry[prediction_feature.uri].empty?
+ entry[prediction_feature.uri].each do |value|
+ lazar.activities[compound] << lazar.value_map.invert[value] # insert mapped values, not originals
end
end
end
end
+ task.progress 90
lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}"
- # TODO: fix dependentVariable
lazar.metadata[OT.dependentVariables] = prediction_feature.uri
lazar.metadata[OT.trainingDataset] = dataset_uri
lazar.metadata[OT.featureDataset] = feature_dataset_uri
-
- if prediction_feature.feature_type == "classification"
- lazar.metadata[RDF.type] = [OTA.ClassificationLazySingleTarget]
- elsif prediction_feature.feature_type == "regression"
- lazar.metadata[RDF.type] = [OTA.RegressionLazySingleTarget]
+ case training_activities.feature_type(@subjectid)
+ when "classification"
+ lazar.metadata[RDF.type] = [OT.Model, OTA.ClassificationLazySingleTarget]
+ when "regression"
+ lazar.metadata[RDF.type] = [OT.Model, OTA.RegressionLazySingleTarget]
end
lazar.metadata[OT.parameters] = [
@@ -170,7 +212,7 @@ post '/lazar/?' do
model_uri
end
response['Content-Type'] = 'text/uri-list'
- halt 503,task.uri+"\n" if task.status == "Cancelled"
+ raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
halt 202,task.uri
end
diff --git a/libfminer b/libfminer
-Subproject d51f5e784ce0f5b7ef1c47c52ea55d1c874ec2e
+Subproject 07679a625a7acad864fd3abd80654a1a0a61e69
diff --git a/openbabel.rb b/openbabel.rb
index 1644455..463663e 100644
--- a/openbabel.rb
+++ b/openbabel.rb
@@ -49,7 +49,7 @@ get '/openbabel/:property' do
response['Content-Type'] = 'application/rdf+xml'
algorithm.to_rdfxml
else
- halt 404, "Unknown OpenBabel descriptor #{params[:property]}."
+ raise OpenTox::NotFoundError.new "Unknown OpenBabel descriptor #{params[:property]}."
end
end
@@ -89,7 +89,7 @@ post '/openbabel/:property' do
descriptor = OpenBabel::OBDescriptor.find_type(params[:property])
descriptor.predict(obmol).to_s
else
- halt 404, "Cannot calculate property #{params[:property]} with OpenBabel"
+ raise OpenTox::NotFoundError.new "Cannot calculate property #{params[:property]} with OpenBabel"
end
end
@@ -143,6 +143,6 @@ post '/openbabel' do
result_dataset.uri
end
response['Content-Type'] = 'text/uri-list'
- halt 503,task.uri+"\n" if task.status == "Cancelled"
+ raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
halt 202,task.uri.to_s+"\n"
end
diff --git a/similarity.rb b/similarity.rb
index 060bd2b..faf43f9 100644
--- a/similarity.rb
+++ b/similarity.rb
@@ -2,25 +2,25 @@ require File.join(File.dirname(__FILE__),'dataset.rb')
helpers do
def find
-# + charges are dropped
-uri = uri(params[:splat].first.gsub(/(InChI.*) (.*)/,'\1+\2')) # reinsert dropped '+' signs in InChIs
-halt 404, "Dataset \"#{uri}\" not found." unless @set = Dataset.find(uri)
+ # + charges are dropped
+ uri = uri(params[:splat].first.gsub(/(InChI.*) (.*)/,'\1+\2')) # reinsert dropped '+' signs in InChIs
+ raise OpenTox::NotFoundError.new "Dataset \"#{uri}\" not found." unless @set = Dataset.find(uri)
end
def uri(name)
-name = URI.encode(name)
-uri = File.join Dataset.base_uri, name
-end
+ name = URI.encode(name)
+ uri = File.join Dataset.base_uri, name
+ end
end
get '/tanimoto/dataset/*/dataset/*/?' do
-find
-@set.tanimoto(uri(params[:splat][1]))
+ find
+ @set.tanimoto(uri(params[:splat][1]))
end
get '/weighted_tanimoto/dataset/*/dataset/*/?' do
-find
-@set.weighted_tanimoto(uri(params[:splat][1]))
+ find
+ @set.weighted_tanimoto(uri(params[:splat][1]))
end