summaryrefslogtreecommitdiff
path: root/lib/last.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/last.rb')
-rw-r--r--lib/last.rb127
1 files changed, 127 insertions, 0 deletions
diff --git a/lib/last.rb b/lib/last.rb
new file mode 100644
index 0000000..944d95e
--- /dev/null
+++ b/lib/last.rb
@@ -0,0 +1,127 @@
+module OpenTox
+ module Algorithm
+ class Fminer
+
+ # Run last algorithm on a dataset
+ #
+ # @param [String] dataset_uri URI of the training dataset
+ # @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
+ # @param [optional] parameters LAST parameters, accepted parameters are
+ # - min_frequency freq Minimum frequency (default 5)
+ # - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
+ # - nr_hits Set to "true" to get hit count instead of presence
+ # - get_target Set to "true" to obtain target variable as feature
+ # @return [text/uri-list] Task URI
+ def self.last params
+
+ @fminer=OpenTox::Algorithm::Fminer.new
+ @fminer.check_params(params,80)
+
+ # TODO introduce task again
+ #task = OpenTox::Task.run("Mining LAST features", uri('/fminer/last')) do |task|
+
+ @last = Last::Last.new
+ @last.Reset
+ if @fminer.prediction_feature.feature_type == "regression"
+ @last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations!
+ else
+ bad_request_error "No accept values for "\
+ "dataset '#{fminer.training_dataset.id}' and "\
+ "feature '#{fminer.prediction_feature.id}'" unless
+ @fminer.prediction_feature.accept_values
+ value_map=@fminer.prediction_feature.value_map
+ end
+ @last.SetMinfreq(@fminer.minfreq)
+ @last.SetType(1) if params[:feature_type] == "paths"
+ @last.SetConsoleOut(false)
+
+
+ feature_dataset = OpenTox::Dataset.new
+ feature_dataset["title"] = "LAST representatives for #{@fminer.training_dataset.title}",
+ feature_dataset.creator = __FILE__
+ feature_dataset.parameters = [
+ { "title" => "dataset_id", "paramValue" => params[:dataset].id },
+ { "title" => "prediction_feature_id", "paramValue" => params[:prediction_feature].id },
+ { "title" => "min_frequency", "paramValue" => @fminer.minfreq },
+ { "title" => "nr_hits", "paramValue" => (params[:nr_hits] == "true" ? "true" : "false") }
+ ]
+
+ @fminer.compounds = []
+ @fminer.db_class_sizes = Array.new # AM: effect
+ @fminer.all_activities = Hash.new # DV: for effect calculation (class and regr)
+ @fminer.smi = [] # needed for matching the patterns back
+
+ # Add data to fminer
+ @fminer.add_fminer_data(@last, value_map)
+ #task.progress 10
+ #step_width = 80 / @bbrc.GetNoRootNodes().to_f
+ # run @last
+ xml = ""
+ (0 .. @last.GetNoRootNodes()-1).each do |j|
+ results = @last.MineRoot(j)
+ #task.progress 10+step_width*(j+1)
+ results.each do |result|
+ xml << result
+ end
+ end
+
+ lu = LU.new # uses last-utils here
+ dom=lu.read(xml) # parse GraphML
+ smarts=lu.smarts_rb(dom,'nls') # converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de)
+ params[:nr_hits] == "true" ? hit_count=true : hit_count=false
+ matches, counts = lu.match_rb(@fminer.smi,smarts,hit_count,true) # creates instantiations
+
+ features = []
+ # create table with correct size
+ data_entries = Array.new(params[:dataset].compounds.size) {Array.new(matches.size,0)}
+ matches.each do |smarts, ids|
+ metadata = @fminer.calc_metadata(smarts, ids, counts[smarts], @last, nil, value_map, params)
+ feature = OpenTox::Feature.find_or_create_by(metadata)
+ features << feature
+ ids.each_with_index do |id,idx|
+ compound_idx = params[:dataset].compounds.index @fminer.compounds[id]
+ feature_idx = features.index feature
+ data_entries[compound_idx] ||= []
+ data_entries[compound_idx][feature_idx] = counts[smarts][idx]
+ end
+ end
+ feature_dataset.compounds = @fminer.training_dataset.compounds
+ feature_dataset.features = features
+ feature_dataset.data_entries = data_entries
+
+=begin
+ # TODO check if this code is necessary, I dont understand what it does
+ fminer_compounds = @fminer.training_dataset.compounds
+ prediction_feature_idx = @fminer.training_dataset.features.index @fminer.prediction_feature
+ prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx|
+ @fminer.training_dataset.data_entries[idx][prediction_feature_idx]
+ }
+ fminer_noact_compounds = fminer_compounds - @fminer.compounds
+
+ if (params[:get_target] == "true")
+ feature_dataset.features = [ @fminer.prediction_feature ] + feature_dataset.features
+ end
+ fminer_compounds.each_with_index { |c,idx|
+ # TODO: fix value insertion
+ row = [ c ]
+ if (params[:get_target] == "true")
+ row = row + [ prediction_feature_all_acts[idx] ]
+ end
+ features.each { |f|
+ row << (fminer_results[c] ? fminer_results[c][f] : nil)
+ }
+ row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c
+ feature_dataset << row
+ }
+=end
+
+ feature_dataset.save
+ feature_dataset
+
+ # end
+ end
+
+ end
+ end
+end
+