summaryrefslogtreecommitdiff
path: root/webapp/fminer.rb
diff options
context:
space:
mode:
Diffstat (limited to 'webapp/fminer.rb')
-rw-r--r--webapp/fminer.rb241
1 files changed, 129 insertions, 112 deletions
diff --git a/webapp/fminer.rb b/webapp/fminer.rb
index 379a863..2d11dcb 100644
--- a/webapp/fminer.rb
+++ b/webapp/fminer.rb
@@ -14,7 +14,6 @@ ENV['FMINER_NR_HITS'] = 'true'
@@bbrc = Bbrc::Bbrc.new
@@last = Last::Last.new
-
module OpenTox
class Application < Service
@@ -22,11 +21,11 @@ module OpenTox
# Get list of fminer algorithms
# @return [text/uri-list] URIs
get '/fminer/?' do
- list = [ to('/fminer/bbrc', :full),
- to('/fminer/bbrc/sample', :full),
- to('/fminer/last', :full),
- to('/fminer/bbrc/match', :full),
- to('/fminer/last/match', :full)
+ list = [ uri('/fminer/bbrc'),
+ uri('/fminer/bbrc/sample'),
+ uri('/fminer/last'),
+ uri('/fminer/bbrc/match'),
+ uri('/fminer/last/match')
].join("\n") + "\n"
format_output(list)
end
@@ -36,19 +35,19 @@ module OpenTox
get "/fminer/bbrc/?" do
algorithm = OpenTox::Algorithm.new(to('/fminer/bbrc',:full))
algorithm.metadata = {
- DC.title => 'Backbone Refinement Class Representatives',
- DC.creator => "andreas@maunz.de",
- RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised]
+ RDF::DC.title => 'Backbone Refinement Class Representatives',
+ RDF::DC.creator => "andreas@maunz.de",
+ RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised]
}
algorithm.parameters = [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "optional", DC.title => "prediction_feature" },
- { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" },
- { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" },
- { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" },
- { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" },
- { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" },
- { DC.description => "Set to 'true' to obtain target variables as a feature", OT.paramScope => "optional", DC.title => "get_target" }
+ { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" },
+ { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" },
+ { RDF::DC.description => "Minimum frequency", RDF::OT.paramScope => "optional", RDF::DC.title => "min_frequency" },
+ { RDF::DC.description => "Feature type, can be 'paths' or 'trees'", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_type" },
+ { RDF::DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", RDF::OT.paramScope => "optional", RDF::DC.title => "backbone" },
+ { RDF::DC.description => "Significance threshold (between 0 and 1)", RDF::OT.paramScope => "optional", RDF::DC.title => "min_chisq_significance" },
+ { RDF::DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", RDF::OT.paramScope => "optional", RDF::DC.title => "nr_hits" },
+ { RDF::DC.description => "Set to 'true' to obtain target variables as a feature", RDF::OT.paramScope => "optional", RDF::DC.title => "get_target" }
]
format_output(algorithm)
end
@@ -58,19 +57,19 @@ module OpenTox
get "/fminer/bbrc/sample/?" do
algorithm = OpenTox::Algorithm.new(to('/fminer/bbrc/sample',:full))
algorithm.metadata = {
- DC.title => 'Backbone Refinement Class Representatives, obtained from samples of a dataset',
- DC.creator => "andreas@maunz.de",
- RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised]
+ RDF::DC.title => 'Backbone Refinement Class Representatives, obtained from samples of a dataset',
+ RDF::DC.creator => "andreas@maunz.de",
+ RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised]
}
algorithm.parameters = [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "optional", DC.title => "prediction_feature" },
- { DC.description => "Number of bootstrap samples", OT.paramScope => "optional", DC.title => "num_boots" },
- { DC.description => "Minimum sampling support", OT.paramScope => "optional", DC.title => "min_sampling_support" },
- { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" },
- { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" },
- { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" },
- { DC.description => "Chisq estimation method, pass 'mean' to use simple mean estimate for chisq test.", OT.paramScope => "optional", DC.title => "method" }
+ { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" },
+ { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" },
+ { RDF::DC.description => "Number of bootstrap samples", RDF::OT.paramScope => "optional", RDF::DC.title => "num_boots" },
+ { RDF::DC.description => "Minimum sampling support", RDF::OT.paramScope => "optional", RDF::DC.title => "min_sampling_support" },
+ { RDF::DC.description => "Minimum frequency", RDF::OT.paramScope => "optional", RDF::DC.title => "min_frequency" },
+ { RDF::DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", RDF::OT.paramScope => "optional", RDF::DC.title => "nr_hits" },
+ { RDF::DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", RDF::OT.paramScope => "optional", RDF::DC.title => "backbone" },
+ { RDF::DC.description => "Chisq estimation method, pass 'mean' to use simple mean estimate for chisq test.", RDF::OT.paramScope => "optional", RDF::DC.title => "method" }
]
format_output(algorithm)
end
@@ -80,17 +79,17 @@ module OpenTox
get "/fminer/last/?" do
algorithm = OpenTox::Algorithm.new(to('/fminer/last',:full))
algorithm.metadata = {
- DC.title => 'Latent Structure Pattern Mining descriptors',
- DC.creator => "andreas@maunz.de",
- RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised]
+ RDF::DC.title => 'Latent Structure Pattern Mining descriptors',
+ RDF::DC.creator => "andreas@maunz.de",
+ RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised]
}
algorithm.parameters = [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "optional", DC.title => "prediction_feature" },
- { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" },
- { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" },
- { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" },
- { DC.description => "Set to 'true' to obtain target variables as a feature", OT.paramScope => "optional", DC.title => "get_target" }
+ { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" },
+ { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" },
+ { RDF::DC.description => "Minimum frequency", RDF::OT.paramScope => "optional", RDF::DC.title => "min_frequency" },
+ { RDF::DC.description => "Feature type, can be 'paths' or 'trees'", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_type" },
+ { RDF::DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", RDF::OT.paramScope => "optional", RDF::DC.title => "nr_hits" },
+ { RDF::DC.description => "Set to 'true' to obtain target variables as a feature", RDF::OT.paramScope => "optional", RDF::DC.title => "get_target" }
]
format_output(algorithm)
end
@@ -101,14 +100,14 @@ module OpenTox
get "/fminer/:method/match?" do
algorithm = OpenTox::Algorithm.new(to("/fminer/#{params[:method]}/match",:full))
algorithm.metadata = {
- DC.title => 'fminer feature matching',
- DC.creator => "mguetlein@gmail.com, andreas@maunz.de",
- RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised]
+ RDF::DC.title => 'fminer feature matching',
+ RDF::DC.creator => "mguetlein@gmail.com, andreas@maunz.de",
+ RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised]
}
algorithm.parameters = [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature Dataset URI", OT.paramScope => "mandatory", DC.title => "feature_dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "optional", DC.title => "prediction_feature" }
+ { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" },
+ { RDF::DC.description => "Feature Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "feature_dataset_uri" },
+ { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" }
]
format_output(algorithm)
end
@@ -128,18 +127,10 @@ module OpenTox
# @return [text/uri-list] Task URI
post '/fminer/bbrc/?' do
-
@@fminer=OpenTox::Algorithm::Fminer.new(to('/fminer/bbrc',:full))
- @@fminer.check_params(params,5,@subjectid)
+ @@fminer.check_params(params,5)
- task = OpenTox::Task.create(
- $task[:uri],
- @subjectid,
- { RDF::DC.description => "Mining BBRC features",
- RDF::DC.creator => to('/fminer/bbrc',:full)
- }
- ) do |task|
-
+ task = OpenTox::Task.run("Mining BBRC features", uri('/fminer/bbrc'), @subjectid) do |task|
@@bbrc.Reset
if @@fminer.prediction_feature.feature_type == "regression"
@@ -149,7 +140,7 @@ module OpenTox
"dataset '#{@@fminer.training_dataset.uri}' and "\
"feature '#{@@fminer.prediction_feature.uri}'" unless
@@fminer.prediction_feature.accept_values
- value_map=@@fminer.training_dataset.value_map(@@fminer.prediction_feature)
+ value_map=@@fminer.prediction_feature.value_map
end
@@bbrc.SetMinfreq(@@fminer.minfreq)
@@bbrc.SetType(1) if params[:feature_type] == "paths"
@@ -157,21 +148,20 @@ module OpenTox
@@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance]
@@bbrc.SetConsoleOut(false)
-
feature_dataset = OpenTox::Dataset.new(nil, @subjectid)
feature_dataset.metadata = {
- DC.title => "BBRC representatives",
- DC.creator => to('/fminer/bbrc',:full),
- OT.hasSource => to('/fminer/bbrc', :full),
+ RDF::DC.title => "BBRC representatives",
+ RDF::DC.creator => to('/fminer/bbrc',:full),
+ RDF::OT.hasSource => to('/fminer/bbrc', :full),
}
feature_dataset.parameters = [
- { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
- { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] },
- { DC.title => "min_frequency", OT.paramValue => @@fminer.minfreq },
- { DC.title => "nr_hits", OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") },
- { DC.title => "backbone", OT.paramValue => (params[:backbone] == "false" ? "false" : "true") }
+ { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] },
+ { RDF::DC.title => "prediction_feature", RDF::OT.paramValue => params[:prediction_feature] },
+ { RDF::DC.title => "min_frequency", RDF::OT.paramValue => @@fminer.minfreq },
+ { RDF::DC.title => "nr_hits", RDF::OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") },
+ { RDF::DC.title => "backbone", RDF::OT.paramValue => (params[:backbone] == "false" ? "false" : "true") }
]
-
+
@@fminer.compounds = []
@@fminer.db_class_sizes = Array.new # AM: effect
@@fminer.all_activities = Hash.new # DV: for effect calculation in regression part
@@ -219,17 +209,16 @@ module OpenTox
end
end
- #feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s
unless features_smarts.include? smarts
features_smarts << smarts
- metadata = {
- OT.hasSource => to('/fminer/bbrc', :full),
- RDF.type => [OT.Feature, OT.Substructure, OT.NumericFeature],
- OT.smarts => smarts.dup,
- OT.pValue => p_value.to_f.abs.round(5),
- OT.effect => effect
- }
- feature = OpenTox::Feature.find_by_title(smarts.dup,metadata)
+ feature = OpenTox::Feature.find_or_create({
+ RDF::DC.title => smarts.dup,
+ RDF::OT.hasSource => to('/fminer/bbrc', :full),
+ RDF.type => [RDF::OT.Feature, RDF::OT.Substructure, RDF::OT.NumericFeature],
+ RDF::OT.smarts => smarts.dup,
+ RDF::OT.pValue => p_value.to_f.abs.round(5),
+ RDF::OT.effect => effect
+ }, @subjectid)
features << feature
end
@@ -247,9 +236,8 @@ module OpenTox
end # end of
end # feature parsing
- fminer_compounds = @@fminer.training_dataset.compounds.collect.to_a
- @@fminer.training_dataset.build_feature_positions
- prediction_feature_idx = @@fminer.training_dataset.feature_positions[@@fminer.prediction_feature.uri]
+ fminer_compounds = @@fminer.training_dataset.compounds
+ prediction_feature_idx = @@fminer.training_dataset.features.collect{|f| f.uri}.index @@fminer.prediction_feature.uri
prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx|
@@fminer.training_dataset.data_entries[idx][prediction_feature_idx]
}
@@ -257,29 +245,65 @@ module OpenTox
feature_dataset.features = features
if (params[:get_target] == "true")
+ puts "get_target TRUE"
feature_dataset.features = [ @@fminer.prediction_feature ] + feature_dataset.features
+ else
+ puts "get_target FALSE"
end
+ feature_dataset.compounds = fminer_compounds
fminer_compounds.each_with_index { |c,idx|
- row = [ c ]
- if (params[:get_target] == "true")
- row = row + [ prediction_feature_all_acts[idx] ]
- end
+ #puts c.smiles
+ # TODO: fix here, insert with add_data_entry
+ #row = [ c ]
+ # TODO: reenable option
+ #if (params[:get_target] == "true")
+ #row = row + [ prediction_feature_all_acts[idx] ]
+ #end
features.each { |f|
- row << (fminer_results[c] ? fminer_results[c][f.uri] : nil)
+ #m = c.match([f.title])[f.title]
+ #m = 0 unless m
+ v = fminer_results[c][f.uri] if fminer_results[c]
+ unless fminer_noact_compounds.include? c
+ v = 0 if v.nil?
+ end
+ #unless m == v
+ #puts f.title
+ #puts m
+ #puts v
+ #end
+ feature_dataset.add_data_entry c, f, v.to_i
+ #row << (fminer_results[c] ? fminer_results[c][f.uri] : nil)
}
- row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c
- feature_dataset << row
+ #row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c
+ #feature_dataset << row
}
+
+=begin
+CH: Ordering seems to be ok here
+ feature_dataset.compounds.each_with_index do |c,i|
+ feature_dataset.features.each_with_index do |f,j|
+ m = c.match([f.title])[f.title]
+ #puts c.smiles
+ #puts f.title
+ #puts m.inspect
+ v = feature_dataset.data_entries[i][j]
+ #puts v.inspect
+ unless m.to_i == v.to_i
+ puts f.title
+ puts m.to_i
+ puts v.to_i
+ end
+ end
+ end
+=end
- $logger.debug "fminer found #{feature_dataset.features.size} features for #{feature_dataset.compounds.size} compounds"
- feature_dataset.put @subjectid
- $logger.debug feature_dataset.uri
+ #puts feature_dataset.to_csv
+ feature_dataset.put
feature_dataset.uri
end
response['Content-Type'] = 'text/uri-list'
- service_unavailable_error "Service unavailable" if task.cancelled?
- halt 202,task.uri.to_s+"\n"
+ halt 202,task.uri
end
@@ -298,15 +322,9 @@ module OpenTox
post '/fminer/last/?' do
@@fminer=OpenTox::Algorithm::Fminer.new(to('/fminer/last',:full))
- @@fminer.check_params(params,80,@subjectid)
+ @@fminer.check_params(params,80)
- task = OpenTox::Task.create(
- $task[:uri],
- @subjectid,
- { RDF::DC.description => "Mining LAST features",
- RDF::DC.creator => to('/fminer/last',:full)
- }
- ) do |task|
+ task = OpenTox::Task.run("Mining LAST features", uri('/fminer/last'), @subjectid) do |task|
@@last.Reset
if @@fminer.prediction_feature.feature_type == "regression"
@@ -316,7 +334,7 @@ module OpenTox
"dataset '#{fminer.training_dataset.uri}' and "\
"feature '#{fminer.prediction_feature.uri}'" unless
@@fminer.prediction_feature.accept_values
- value_map=@@fminer.training_dataset.value_map(@@fminer.prediction_feature)
+ value_map=@@fminer.prediction_feature.value_map
end
@@last.SetMinfreq(@@fminer.minfreq)
@@last.SetType(1) if params[:feature_type] == "paths"
@@ -325,15 +343,15 @@ module OpenTox
feature_dataset = OpenTox::Dataset.new(nil, @subjectid)
feature_dataset.metadata = {
- DC.title => "LAST representatives for " + @@fminer.training_dataset.metadata[DC.title].to_s,
- DC.creator => to('/fminer/last',:full),
- OT.hasSource => to('/fminer/last', :full)
+ RDF::DC.title => "LAST representatives for " + @@fminer.training_dataset.metadata[RDF::DC.title].to_s,
+ RDF::DC.creator => to('/fminer/last'),
+ RDF::OT.hasSource => to('/fminer/last')
}
feature_dataset.parameters = [
- { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
- { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] },
- { DC.title => "min_frequency", OT.paramValue => @@fminer.minfreq },
- { DC.title => "nr_hits", OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") }
+ { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] },
+ { RDF::DC.title => "prediction_feature", RDF::OT.paramValue => params[:prediction_feature] },
+ { RDF::DC.title => "min_frequency", RDF::OT.paramValue => @@fminer.minfreq },
+ { RDF::DC.title => "nr_hits", RDF::OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") }
]
@@fminer.compounds = []
@@ -366,7 +384,8 @@ module OpenTox
fminer_results = {}
matches.each do |smarts, ids|
metadata, parameters = @@fminer.calc_metadata(smarts, ids, counts[smarts], @@last, nil, value_map, params)
- feature = OpenTox::Feature.find_by_title(smarts.dup,metadata)
+ metadata[RDF::DC.title] = smarts.dup
+ feature = OpenTox::Feature.find_or_create(metadata, @subjectid)
features << feature
ids.each_with_index { |id,idx|
fminer_results[@@fminer.compounds[id]] || fminer_results[@@fminer.compounds[id]] = {}
@@ -374,9 +393,8 @@ module OpenTox
}
end
- fminer_compounds = @@fminer.training_dataset.compounds.collect.to_a
- @@fminer.training_dataset.build_feature_positions
- prediction_feature_idx = @@fminer.training_dataset.feature_positions[@@fminer.prediction_feature.uri]
+ fminer_compounds = @@fminer.training_dataset.compounds
+ prediction_feature_idx = @@fminer.training_dataset.features.collect{|f| f.uri}.index @@fminer.prediction_feature.uri
prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx|
@@fminer.training_dataset.data_entries[idx][prediction_feature_idx]
}
@@ -387,6 +405,7 @@ module OpenTox
feature_dataset.features = [ @@fminer.prediction_feature ] + feature_dataset.features
end
fminer_compounds.each_with_index { |c,idx|
+ # TODO: fix value insertion
row = [ c ]
if (params[:get_target] == "true")
row = row + [ prediction_feature_all_acts[idx] ]
@@ -397,14 +416,12 @@ module OpenTox
row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c
feature_dataset << row
}
- feature_dataset.put @subjectid
- $logger.debug feature_dataset.uri
+ feature_dataset.put
feature_dataset.uri
end
response['Content-Type'] = 'text/uri-list'
- service_unavailable_error "Service unavailable" if task.cancelled?
- halt 202,task.uri.to_s+"\n"
+ halt 202,task.uri
end
end