From 2b422ad18b0e94cd4bd19b83d6ab692987ae0ec7 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 12 Jun 2012 11:34:56 +0200 Subject: add max_num_fragments param to fminer bbrc --- algorithm_test.rb | 15 ++++++++++----- fminer.rb | 31 ++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/algorithm_test.rb b/algorithm_test.rb index 08c7d4c..4b6da4e 100644 --- a/algorithm_test.rb +++ b/algorithm_test.rb @@ -47,14 +47,19 @@ class AlgorithmTest < Test::Unit::TestCase #feature_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603204?pagesize=200&page=0" #prediction_feature = "http://apps.ideaconsult.net:8080/ambit2/feature/528321" - dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/425254" - prediction_feature = "http://apps.ideaconsult.net:8080/ambit2/feature/528321" + # dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/425254" + # prediction_feature = "http://apps.ideaconsult.net:8080/ambit2/feature/528321" + + ##dataset_uri = "http://local-ot/dataset/1488" + #prediction_feature = "http://local-ot/dataset/1315/feature/Rodent%20carcinogenicity" + + #kazius 250 no features + dataset_uri = "http://local-ot/dataset/9264" + prediction_feature = dataset_uri+"/feature/endpoint" -# dataset_uri = "http://local-ot/dataset/1488" -# prediction_feature = "http://local-ot/dataset/1315/feature/Rodent%20carcinogenicity" params = {:dataset_uri=>dataset_uri, :prediction_feature=>prediction_feature, - :min_frequency=>7} #multi: 10=>4, 5=>>3000 + :min_frequency=>7, :max_num_features=>300} #multi: 10=>4, 5=>>3000 post "/fminer/bbrc",params # params = {:dataset_uri=>dataset_uri, diff --git a/fminer.rb b/fminer.rb index 36e1d73..4d72827 100644 --- a/fminer.rb +++ b/fminer.rb @@ -200,6 +200,8 @@ post '/fminer/bbrc/?' do step_width = 80 / @@bbrc.GetNoRootNodes().to_f features = Set.new + feature_count = {} + # run @@bbrc (0 .. @@bbrc.GetNoRootNodes()-1).each do |j| results = @@bbrc.MineRoot(j) @@ -245,6 +247,9 @@ post '/fminer/bbrc/?' do } feature_dataset.add_feature feature_uri, metadata #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters + + feature_count[feature_uri] = 0 + end id_arrs.each { |id_count_hash| id=id_count_hash.keys[0].to_i @@ -253,6 +258,7 @@ post '/fminer/bbrc/?' do feature_dataset.add(fminer.compounds[id], feature_uri, count) else feature_dataset.add(fminer.compounds[id], feature_uri, 1) + feature_count[feature_uri] = feature_count[feature_uri]+1 end } @@ -261,7 +267,30 @@ post '/fminer/bbrc/?' do # AM: add feature values for non-present features # feature_dataset.complete_data_entries - + + if (params[:max_num_features] && feature_dataset.features.size>params[:max_num_features].to_i) + LOGGER.debug "removing features, found: #{feature_dataset.features.size}, max-num: #{params[:max_num_features]}" + + feature_p_count = [] + feature_dataset.features.each do |f_uri,m| + feature_p_count << [f_uri, m[OT.pValue], feature_count[f_uri]] + end + # sort by p-value, tie breaking by number of compounds that match this feature + sorted = feature_p_count.sort do |a,b| + if b[1] == a[1] + b[2] <=> a[2] + else + b[1] <=> a[1] + end + end + (params[:max_num_features].to_i..(sorted.size-1)).each do |i| + feature_dataset.features.delete(sorted[i][0]) + feature_dataset.compounds.each do |c| + feature_dataset.data_entries[c].delete(sorted[i][0]) + end + end + end + feature_dataset.save(@subjectid) feature_dataset.uri end -- cgit v1.2.3