1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
module OpenTox
module Algorithm
class Fminer
# Run last algorithm on a dataset
#
# @param [String] dataset_uri URI of the training dataset
# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
# @param [optional] parameters LAST parameters, accepted parameters are
# - min_frequency freq Minimum frequency (default 5)
# - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
# - nr_hits Set to "true" to get hit count instead of presence
# - get_target Set to "true" to obtain target variable as feature
# @return [text/uri-list] Task URI
def self.last params
@fminer=OpenTox::Algorithm::Fminer.new
@fminer.check_params(params,80)
# TODO introduce task again
#task = OpenTox::Task.run("Mining LAST features", uri('/fminer/last')) do |task|
@last = Last::Last.new
@last.Reset
if @fminer.prediction_feature.feature_type == "regression"
@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations!
else
bad_request_error "No accept values for "\
"dataset '#{fminer.training_dataset.id}' and "\
"feature '#{fminer.prediction_feature.id}'" unless
@fminer.prediction_feature.accept_values
value_map=@fminer.prediction_feature.value_map
end
@last.SetMinfreq(@fminer.minfreq)
@last.SetType(1) if params[:feature_type] == "paths"
@last.SetConsoleOut(false)
feature_dataset = OpenTox::Dataset.new
feature_dataset["title"] = "LAST representatives for #{@fminer.training_dataset.title}",
feature_dataset.creator = __FILE__
feature_dataset.parameters = [
{ "title" => "dataset_id", "paramValue" => params[:dataset].id },
{ "title" => "prediction_feature_id", "paramValue" => params[:prediction_feature].id },
{ "title" => "min_frequency", "paramValue" => @fminer.minfreq },
{ "title" => "nr_hits", "paramValue" => (params[:nr_hits] == "true" ? "true" : "false") }
]
@fminer.compounds = []
@fminer.db_class_sizes = Array.new # AM: effect
@fminer.all_activities = Hash.new # DV: for effect calculation (class and regr)
@fminer.smi = [] # needed for matching the patterns back
# Add data to fminer
@fminer.add_fminer_data(@last, value_map)
#task.progress 10
#step_width = 80 / @bbrc.GetNoRootNodes().to_f
# run @last
xml = ""
(0 .. @last.GetNoRootNodes()-1).each do |j|
results = @last.MineRoot(j)
#task.progress 10+step_width*(j+1)
results.each do |result|
xml << result
end
end
lu = LU.new # uses last-utils here
dom=lu.read(xml) # parse GraphML
smarts=lu.smarts_rb(dom,'nls') # converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de)
params[:nr_hits] == "true" ? hit_count=true : hit_count=false
matches, counts = lu.match_rb(@fminer.smi,smarts,hit_count,true) # creates instantiations
features = []
# create table with correct size
data_entries = Array.new(params[:dataset].compounds.size) {Array.new(matches.size,0)}
matches.each do |smarts, ids|
metadata = @fminer.calc_metadata(smarts, ids, counts[smarts], @last, nil, value_map, params)
feature = OpenTox::Feature.find_or_create_by(metadata)
features << feature
ids.each_with_index do |id,idx|
compound_idx = params[:dataset].compounds.index @fminer.compounds[id]
feature_idx = features.index feature
data_entries[compound_idx] ||= []
data_entries[compound_idx][feature_idx] = counts[smarts][idx]
end
end
feature_dataset.compounds = @fminer.training_dataset.compounds
feature_dataset.features = features
feature_dataset.data_entries = data_entries
=begin
# TODO check if this code is necessary, I dont understand what it does
fminer_compounds = @fminer.training_dataset.compounds
prediction_feature_idx = @fminer.training_dataset.features.index @fminer.prediction_feature
prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx|
@fminer.training_dataset.data_entries[idx][prediction_feature_idx]
}
fminer_noact_compounds = fminer_compounds - @fminer.compounds
if (params[:get_target] == "true")
feature_dataset.features = [ @fminer.prediction_feature ] + feature_dataset.features
end
fminer_compounds.each_with_index { |c,idx|
# TODO: fix value insertion
row = [ c ]
if (params[:get_target] == "true")
row = row + [ prediction_feature_all_acts[idx] ]
end
features.each { |f|
row << (fminer_results[c] ? fminer_results[c][f] : nil)
}
row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c
feature_dataset << row
}
=end
feature_dataset.save
feature_dataset
# end
end
end
end
end
|