summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2012-06-12 11:34:56 +0200
committermguetlein <martin.guetlein@gmail.com>2012-06-12 11:34:56 +0200
commit2b422ad18b0e94cd4bd19b83d6ab692987ae0ec7 (patch)
treef3128e3663a60345d423cb9fa6de2df38d08cd88
parentb45be18db5701e781943aafe4078a7ad67904975 (diff)
add max_num_fragments param to fminer bbrc
-rw-r--r--algorithm_test.rb15
-rw-r--r--fminer.rb31
2 files changed, 40 insertions, 6 deletions
diff --git a/algorithm_test.rb b/algorithm_test.rb
index 08c7d4c..4b6da4e 100644
--- a/algorithm_test.rb
+++ b/algorithm_test.rb
@@ -47,14 +47,19 @@ class AlgorithmTest < Test::Unit::TestCase
#feature_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603204?pagesize=200&page=0"
#prediction_feature = "http://apps.ideaconsult.net:8080/ambit2/feature/528321"
- dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/425254"
- prediction_feature = "http://apps.ideaconsult.net:8080/ambit2/feature/528321"
+ # dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/425254"
+ # prediction_feature = "http://apps.ideaconsult.net:8080/ambit2/feature/528321"
+
+ ##dataset_uri = "http://local-ot/dataset/1488"
+ #prediction_feature = "http://local-ot/dataset/1315/feature/Rodent%20carcinogenicity"
+
+ #kazius 250 no features
+ dataset_uri = "http://local-ot/dataset/9264"
+ prediction_feature = dataset_uri+"/feature/endpoint"
-# dataset_uri = "http://local-ot/dataset/1488"
-# prediction_feature = "http://local-ot/dataset/1315/feature/Rodent%20carcinogenicity"
params = {:dataset_uri=>dataset_uri,
:prediction_feature=>prediction_feature,
- :min_frequency=>7} #multi: 10=>4, 5=>>3000
+ :min_frequency=>7, :max_num_features=>300} #multi: 10=>4, 5=>>3000
post "/fminer/bbrc",params
# params = {:dataset_uri=>dataset_uri,
diff --git a/fminer.rb b/fminer.rb
index 36e1d73..4d72827 100644
--- a/fminer.rb
+++ b/fminer.rb
@@ -200,6 +200,8 @@ post '/fminer/bbrc/?' do
step_width = 80 / @@bbrc.GetNoRootNodes().to_f
features = Set.new
+ feature_count = {}
+
# run @@bbrc
(0 .. @@bbrc.GetNoRootNodes()-1).each do |j|
results = @@bbrc.MineRoot(j)
@@ -245,6 +247,9 @@ post '/fminer/bbrc/?' do
}
feature_dataset.add_feature feature_uri, metadata
#feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters
+
+ feature_count[feature_uri] = 0
+
end
id_arrs.each { |id_count_hash|
id=id_count_hash.keys[0].to_i
@@ -253,6 +258,7 @@ post '/fminer/bbrc/?' do
feature_dataset.add(fminer.compounds[id], feature_uri, count)
else
feature_dataset.add(fminer.compounds[id], feature_uri, 1)
+ feature_count[feature_uri] = feature_count[feature_uri]+1
end
}
@@ -261,7 +267,30 @@ post '/fminer/bbrc/?' do
# AM: add feature values for non-present features
# feature_dataset.complete_data_entries
-
+
+ if (params[:max_num_features] && feature_dataset.features.size>params[:max_num_features].to_i)
+ LOGGER.debug "removing features, found: #{feature_dataset.features.size}, max-num: #{params[:max_num_features]}"
+
+ feature_p_count = []
+ feature_dataset.features.each do |f_uri,m|
+ feature_p_count << [f_uri, m[OT.pValue], feature_count[f_uri]]
+ end
+ # sort by p-value, tie breaking by number of compounds that match this feature
+ sorted = feature_p_count.sort do |a,b|
+ if b[1] == a[1]
+ b[2] <=> a[2]
+ else
+ b[1] <=> a[1]
+ end
+ end
+ (params[:max_num_features].to_i..(sorted.size-1)).each do |i|
+ feature_dataset.features.delete(sorted[i][0])
+ feature_dataset.compounds.each do |c|
+ feature_dataset.data_entries[c].delete(sorted[i][0])
+ end
+ end
+ end
+
feature_dataset.save(@subjectid)
feature_dataset.uri
end