1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
|
=begin
* Name: fminer.rb
* Description: Subgraph descriptor calculation
* Author: Andreas Maunz <andreas@maunz.de>
* Date: 10/2012
=end
ENV['FMINER_SMARTS'] = 'true'
ENV['FMINER_NO_AROMATIC'] = 'true'
ENV['FMINER_PVALUES'] = 'true'
ENV['FMINER_SILENT'] = 'true'
ENV['FMINER_NR_HITS'] = 'true'
@@bbrc = Bbrc::Bbrc.new
@@last = Last::Last.new
module OpenTox
class Application < Service
# Get list of fminer algorithms
# @return [text/uri-list] URIs
get '/fminer/?' do
render [ uri('/fminer/bbrc'), uri('/fminer/last') ]
end
# Get representation of BBRC algorithm
# @return [String] Representation
get "/fminer/bbrc/?" do
algorithm = OpenTox::Algorithm::Generic.new(to('/fminer/bbrc',:full))
algorithm.metadata = {
RDF::DC.title => 'Backbone Refinement Class Representatives',
RDF::DC.creator => "andreas@maunz.de",
RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised]
}
algorithm.parameters = [
{ RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" },
{ RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" },
{ RDF::DC.description => "Minimum frequency", RDF::OT.paramScope => "optional", RDF::DC.title => "min_frequency" },
{ RDF::DC.description => "Feature type, can be 'paths' or 'trees'", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_type" },
{ RDF::DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", RDF::OT.paramScope => "optional", RDF::DC.title => "backbone" },
{ RDF::DC.description => "Significance threshold (between 0 and 1)", RDF::OT.paramScope => "optional", RDF::DC.title => "min_chisq_significance" },
{ RDF::DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", RDF::OT.paramScope => "optional", RDF::DC.title => "nr_hits" },
{ RDF::DC.description => "Set to 'true' to obtain target variables as a feature", RDF::OT.paramScope => "optional", RDF::DC.title => "get_target" }
]
render(algorithm)
end
# Get representation of BBRC-sample algorithm
# @return [String] Representation
get "/fminer/bbrc/sample/?" do
algorithm = OpenTox::Algorithm::Generic.new(to('/fminer/bbrc/sample',:full))
algorithm.metadata = {
RDF::DC.title => 'Backbone Refinement Class Representatives, obtained from samples of a dataset',
RDF::DC.creator => "andreas@maunz.de",
RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised]
}
algorithm.parameters = [
{ RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" },
{ RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" },
{ RDF::DC.description => "Number of bootstrap samples", RDF::OT.paramScope => "optional", RDF::DC.title => "num_boots" },
{ RDF::DC.description => "Minimum sampling support", RDF::OT.paramScope => "optional", RDF::DC.title => "min_sampling_support" },
{ RDF::DC.description => "Minimum frequency", RDF::OT.paramScope => "optional", RDF::DC.title => "min_frequency" },
{ RDF::DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", RDF::OT.paramScope => "optional", RDF::DC.title => "nr_hits" },
{ RDF::DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", RDF::OT.paramScope => "optional", RDF::DC.title => "backbone" },
{ RDF::DC.description => "Chisq estimation method, pass 'mean' to use simple mean estimate for chisq test.", RDF::OT.paramScope => "optional", RDF::DC.title => "method" }
]
render(algorithm)
end
# Get representation of fminer LAST-PM algorithm
# @return [String] Representation
get "/fminer/last/?" do
algorithm = OpenTox::Algorithm::Generic.new(to('/fminer/last',:full))
algorithm.metadata = {
RDF::DC.title => 'Latent Structure Pattern Mining descriptors',
RDF::DC.creator => "andreas@maunz.de",
RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised]
}
algorithm.parameters = [
{ RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" },
{ RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" },
{ RDF::DC.description => "Minimum frequency", RDF::OT.paramScope => "optional", RDF::DC.title => "min_frequency" },
{ RDF::DC.description => "Feature type, can be 'paths' or 'trees'", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_type" },
{ RDF::DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", RDF::OT.paramScope => "optional", RDF::DC.title => "nr_hits" },
{ RDF::DC.description => "Set to 'true' to obtain target variables as a feature", RDF::OT.paramScope => "optional", RDF::DC.title => "get_target" }
]
render(algorithm)
end
# Get representation of matching algorithm
# @return [String] Representation
get "/fminer/:method/match?" do
algorithm = OpenTox::Algorithm::Generic.new(to("/fminer/#{params[:method]}/match",:full))
algorithm.metadata = {
RDF::DC.title => 'fminer feature matching',
RDF::DC.creator => "mguetlein@gmail.com, andreas@maunz.de",
RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised]
}
algorithm.parameters = [
{ RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" },
{ RDF::DC.description => "Feature Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "feature_dataset_uri" },
{ RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" }
]
render(algorithm)
end
# Run last algorithm on a dataset
#
# @param [String] dataset_uri URI of the training dataset
# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
# @param [optional] parameters LAST parameters, accepted parameters are
# - min_frequency freq Minimum frequency (default 5)
# - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
# - nr_hits Set to "true" to get hit count instead of presence
# - get_target Set to "true" to obtain target variable as feature
# @return [text/uri-list] Task URI
post '/fminer/last/?' do
@@fminer=OpenTox::Algorithm::Fminer.new(to('/fminer/last',:full))
@@fminer.check_params(params,80)
task = OpenTox::Task.run("Mining LAST features", uri('/fminer/last')) do |task|
@@last.Reset
if @@fminer.prediction_feature.feature_type == "regression"
@@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations!
else
bad_request_error "No accept values for "\
"dataset '#{fminer.training_dataset.uri}' and "\
"feature '#{fminer.prediction_feature.uri}'" unless
@@fminer.prediction_feature.accept_values
value_map=@@fminer.prediction_feature.value_map
end
@@last.SetMinfreq(@@fminer.minfreq)
@@last.SetType(1) if params[:feature_type] == "paths"
@@last.SetConsoleOut(false)
feature_dataset = OpenTox::Dataset.new
feature_dataset.metadata = {
RDF::DC.title => "LAST representatives for #{@@fminer.training_dataset.title}",
RDF::DC.creator => to('/fminer/last'),
RDF::OT.hasSource => to('/fminer/last')
}
feature_dataset.parameters = [
{ RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] },
{ RDF::DC.title => "prediction_feature", RDF::OT.paramValue => params[:prediction_feature] },
{ RDF::DC.title => "min_frequency", RDF::OT.paramValue => @@fminer.minfreq },
{ RDF::DC.title => "nr_hits", RDF::OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") }
]
@@fminer.compounds = []
@@fminer.db_class_sizes = Array.new # AM: effect
@@fminer.all_activities = Hash.new # DV: for effect calculation (class and regr)
@@fminer.smi = [] # needed for matching the patterns back
# Add data to fminer
@@fminer.add_fminer_data(@@last, value_map)
#task.progress 10
step_width = 80 / @@bbrc.GetNoRootNodes().to_f
# run @@last
xml = ""
(0 .. @@last.GetNoRootNodes()-1).each do |j|
results = @@last.MineRoot(j)
#task.progress 10+step_width*(j+1)
results.each do |result|
xml << result
end
end
lu = LU.new # uses last-utils here
dom=lu.read(xml) # parse GraphML
smarts=lu.smarts_rb(dom,'nls') # converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de)
params[:nr_hits] == "true" ? hit_count=true : hit_count=false
matches, counts = lu.match_rb(@@fminer.smi,smarts,hit_count,true) # creates instantiations
features = []
# prepare to receive results as hash { c => [ [f,v], ... ] }
fminer_results = {}
matches.each do |smarts, ids|
metadata, parameters = @@fminer.calc_metadata(smarts, ids, counts[smarts], @@last, nil, value_map, params)
metadata[RDF::DC.title] = smarts.dup
feature = OpenTox::Feature.find_or_create(metadata)
features << feature
ids.each_with_index { |id,idx|
fminer_results[@@fminer.compounds[id]] || fminer_results[@@fminer.compounds[id]] = {}
fminer_results[@@fminer.compounds[id]][feature.uri] = counts[smarts][idx]
}
end
fminer_compounds = @@fminer.training_dataset.compounds
prediction_feature_idx = @@fminer.training_dataset.features.collect{|f| f.uri}.index @@fminer.prediction_feature.uri
prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx|
@@fminer.training_dataset.data_entries[idx][prediction_feature_idx]
}
fminer_noact_compounds = fminer_compounds - @@fminer.compounds
feature_dataset.features = features
if (params[:get_target] == "true")
feature_dataset.features = [ @@fminer.prediction_feature ] + feature_dataset.features
end
fminer_compounds.each_with_index { |c,idx|
# TODO: fix value insertion
row = [ c ]
if (params[:get_target] == "true")
row = row + [ prediction_feature_all_acts[idx] ]
end
features.each { |f|
row << (fminer_results[c] ? fminer_results[c][f.uri] : nil)
}
row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c
feature_dataset << row
}
feature_dataset.put
feature_dataset.uri
end
response['Content-Type'] = 'text/uri-list'
halt 202,task.uri
end
end
end
|