diff options
Diffstat (limited to 'lib/last.rb')
-rw-r--r-- | lib/last.rb | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/lib/last.rb b/lib/last.rb new file mode 100644 index 0000000..944d95e --- /dev/null +++ b/lib/last.rb @@ -0,0 +1,127 @@ +module OpenTox + module Algorithm + class Fminer + + # Run last algorithm on a dataset + # + # @param [String] dataset_uri URI of the training dataset + # @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) + # @param [optional] parameters LAST parameters, accepted parameters are + # - min_frequency freq Minimum frequency (default 5) + # - feature_type Feature type, can be 'paths' or 'trees' (default "trees") + # - nr_hits Set to "true" to get hit count instead of presence + # - get_target Set to "true" to obtain target variable as feature + # @return [text/uri-list] Task URI + def self.last params + + @fminer=OpenTox::Algorithm::Fminer.new + @fminer.check_params(params,80) + + # TODO introduce task again + #task = OpenTox::Task.run("Mining LAST features", uri('/fminer/last')) do |task| + + @last = Last::Last.new + @last.Reset + if @fminer.prediction_feature.feature_type == "regression" + @last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! + else + bad_request_error "No accept values for "\ + "dataset '#{fminer.training_dataset.id}' and "\ + "feature '#{fminer.prediction_feature.id}'" unless + @fminer.prediction_feature.accept_values + value_map=@fminer.prediction_feature.value_map + end + @last.SetMinfreq(@fminer.minfreq) + @last.SetType(1) if params[:feature_type] == "paths" + @last.SetConsoleOut(false) + + + feature_dataset = OpenTox::Dataset.new + feature_dataset["title"] = "LAST representatives for #{@fminer.training_dataset.title}", + feature_dataset.creator = __FILE__ + feature_dataset.parameters = [ + { "title" => "dataset_id", "paramValue" => params[:dataset].id }, + { "title" => "prediction_feature_id", "paramValue" => params[:prediction_feature].id }, + { "title" => "min_frequency", "paramValue" => @fminer.minfreq }, + { "title" => "nr_hits", "paramValue" => (params[:nr_hits] == "true" ? "true" : "false") } + ] + + @fminer.compounds = [] + @fminer.db_class_sizes = Array.new # AM: effect + @fminer.all_activities = Hash.new # DV: for effect calculation (class and regr) + @fminer.smi = [] # needed for matching the patterns back + + # Add data to fminer + @fminer.add_fminer_data(@last, value_map) + #task.progress 10 + #step_width = 80 / @bbrc.GetNoRootNodes().to_f + # run @last + xml = "" + (0 .. @last.GetNoRootNodes()-1).each do |j| + results = @last.MineRoot(j) + #task.progress 10+step_width*(j+1) + results.each do |result| + xml << result + end + end + + lu = LU.new # uses last-utils here + dom=lu.read(xml) # parse GraphML + smarts=lu.smarts_rb(dom,'nls') # converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) + params[:nr_hits] == "true" ? hit_count=true : hit_count=false + matches, counts = lu.match_rb(@fminer.smi,smarts,hit_count,true) # creates instantiations + + features = [] + # create table with correct size + data_entries = Array.new(params[:dataset].compounds.size) {Array.new(matches.size,0)} + matches.each do |smarts, ids| + metadata = @fminer.calc_metadata(smarts, ids, counts[smarts], @last, nil, value_map, params) + feature = OpenTox::Feature.find_or_create_by(metadata) + features << feature + ids.each_with_index do |id,idx| + compound_idx = params[:dataset].compounds.index @fminer.compounds[id] + feature_idx = features.index feature + data_entries[compound_idx] ||= [] + data_entries[compound_idx][feature_idx] = counts[smarts][idx] + end + end + feature_dataset.compounds = @fminer.training_dataset.compounds + feature_dataset.features = features + feature_dataset.data_entries = data_entries + +=begin + # TODO check if this code is necessary, I dont understand what it does + fminer_compounds = @fminer.training_dataset.compounds + prediction_feature_idx = @fminer.training_dataset.features.index @fminer.prediction_feature + prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx| + @fminer.training_dataset.data_entries[idx][prediction_feature_idx] + } + fminer_noact_compounds = fminer_compounds - @fminer.compounds + + if (params[:get_target] == "true") + feature_dataset.features = [ @fminer.prediction_feature ] + feature_dataset.features + end + fminer_compounds.each_with_index { |c,idx| + # TODO: fix value insertion + row = [ c ] + if (params[:get_target] == "true") + row = row + [ prediction_feature_all_acts[idx] ] + end + features.each { |f| + row << (fminer_results[c] ? fminer_results[c][f] : nil) + } + row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c + feature_dataset << row + } +=end + + feature_dataset.save + feature_dataset + + # end + end + + end + end +end + |