From 3539215bdbf35a6a7a78456ac409bfd66d581f3e Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 16 Nov 2012 12:58:54 +0100 Subject: Added get_target parameter --- fminer.rb | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/fminer.rb b/fminer.rb index 3dc2bf2..735df7d 100644 --- a/fminer.rb +++ b/fminer.rb @@ -77,6 +77,7 @@ get "/fminer/bbrc/?" do { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" }, { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" }, { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" }, + { DC.description => "Set to 'true' to obtain target variable as a feature", OT.paramScope => "optional", DC.title => "nr_hits" }, ] } case request.env['HTTP_ACCEPT'] @@ -141,6 +142,7 @@ get "/fminer/last/?" do { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" }, { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" }, + { DC.description => "Set to 'true' to obtain target variable as a feature", OT.paramScope => "optional", DC.title => "nr_hits" }, ] } case request.env['HTTP_ACCEPT'] @@ -201,6 +203,7 @@ end # - min_chisq_significance Significance threshold (between 0 and 1) # - nr_hits Set to "true" to get hit count instead of presence # - complete_entries Set to "true" to obtain data entries for each compound +# - get_target Set to "true" to obtain target variable as feature # @return [text/uri-list] Task URI post '/fminer/bbrc/?' do @@ -245,6 +248,9 @@ post '/fminer/bbrc/?' do # Add data to fminer @@fminer.add_fminer_data(@@bbrc, value_map) + if (params[:get_target] == "true") + feature_dataset.add_feature @@fminer.prediction_feature.uri, @@fminer.prediction_feature.metadata + end g_array=@@fminer.all_activities.values # DV: calculation of global median for effect calculation g_median=g_array.to_scale.median @@ -322,6 +328,9 @@ post '/fminer/bbrc/?' do @@fminer.training_dataset.compounds.each { |cmpd| feature_dataset.add_compound(cmpd) # *unconditionally* add compounds *in order* i = which_row[cmpd] + if (params[:get_target] == "true") + feature_dataset.add_data_entry ( cmpd, @@fminer.prediction_feature.uri, @@fminer.training_dataset.data_entries[cmpd][@@fminer.prediction_feature.uri][i] ) + end fminer_results[cmpd] && fminer_results[cmpd].each { |feature, values| feature_dataset.add_data_entry( cmpd, feature, values[i] ) } @@ -532,6 +541,7 @@ end # - feature_type Feature type, can be 'paths' or 'trees' (default "trees") # - nr_hits Set to "true" to get hit count instead of presence # - complete_entries Set to "true" to obtain data entries for each compound +# - get_target Set to "true" to obtain target variable as feature # @return [text/uri-list] Task URI post '/fminer/last/?' do @@ -573,6 +583,9 @@ post '/fminer/last/?' do # Add data to fminer @@fminer.add_fminer_data(@@last, value_map) + if (params[:get_target] == "true") + feature_dataset.add_feature @@fminer.prediction_feature.uri, @@fminer.prediction_feature.metadata + end raise "No compounds in dataset #{@@fminer.training_dataset.uri}" if @@fminer.compounds.size==0 @@ -596,7 +609,23 @@ post '/fminer/last/?' do params[:complete_entries] == "true" ? complete_entries=true : complete_entries=false matches, counts = lu.match_rb(@@fminer.smi,smarts,hit_count,complete_entries) # creates instantiations - @@fminer.training_dataset.compounds.each { |cmpd| feature_dataset.add_compound(cmpd) } + + which_row = @@fminer.training_dataset.compounds.inject({}) { |h,id| h[id]=0; h } + + all_target_vals = [] + if (params[:get_target] == "true") + @@fminer.training_dataset.compounds.each { |cmpd| + all_target_vals << @@fminer.training_dataset.data_entries[cmpd][@@fminer.prediction_feature.uri][which_row[cmpd]] + which_row[cmpd] += 1 + } + end + + @@fminer.training_dataset.compounds.each_with_index { |cmpd,idx| + feature_dataset.add_compound(cmpd) + if (params[:get_target] == "true") + feature_dataset.add_data_entry ( cmpd, @@fminer.prediction_feature.uri, all_target_vals[idx] ) + end + } matches.each do |smarts, ids| metadata = calc_metadata (smarts, ids, counts[smarts], @@last, nil, value_map, params) feature_uri = File.join feature_dataset.uri,"feature","last", feature_dataset.features.size.to_s -- cgit v1.2.3 From 9b00442a2edd1c0f57398462e4444029cd418e84 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 4 Dec 2012 11:52:26 +0100 Subject: Added weight_feature parameter --- fminer.rb | 18 ++++++++++++------ libfminer | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/fminer.rb b/fminer.rb index 735df7d..7fc7db4 100644 --- a/fminer.rb +++ b/fminer.rb @@ -78,6 +78,7 @@ get "/fminer/bbrc/?" do { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" }, { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" }, { DC.description => "Set to 'true' to obtain target variable as a feature", OT.paramScope => "optional", DC.title => "nr_hits" }, + { DC.description => "Feature URI for weight feature", OT.paramScope => "optional", DC.title => "weight_feature" } ] } case request.env['HTTP_ACCEPT'] @@ -204,6 +205,7 @@ end # - nr_hits Set to "true" to get hit count instead of presence # - complete_entries Set to "true" to obtain data entries for each compound # - get_target Set to "true" to obtain target variable as feature +# - weight_feature weight_feature URI of the weight feature # @return [text/uri-list] Task URI post '/fminer/bbrc/?' do @@ -226,7 +228,7 @@ post '/fminer/bbrc/?' do @@bbrc.SetConsoleOut(false) feature_dataset = OpenTox::Dataset.new(nil, @subjectid) - feature_dataset.add_metadata({ + metadata={ DC.title => "BBRC representatives for " + @@fminer.training_dataset.metadata[DC.title].to_s, DC.creator => url_for('/fminer/bbrc',:full), OT.hasSource => url_for('/fminer/bbrc', :full), @@ -236,9 +238,12 @@ post '/fminer/bbrc/?' do { DC.title => "min_frequency", OT.paramValue => @@fminer.minfreq }, { DC.title => "nr_hits", OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") }, { DC.title => "backbone", OT.paramValue => (params[:backbone] == "false" ? "false" : "true") } - - ] - }) + ] + } + if @@fminer.weight_feature + metadata[OT.parameters] << {DC.title => "weight_feature", OT.paramValue => params[:weight_feature]} + end + feature_dataset.add_metadata(metadata) feature_dataset.save(@subjectid) @@fminer.compounds = [] @@ -305,8 +310,9 @@ post '/fminer/bbrc/?' do OT.parameters => [ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - ] + ] } + feature_dataset.add_feature feature_uri, metadata end @@ -647,7 +653,7 @@ post '/fminer/last/?' do raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" halt 202,task.uri.to_s+"\n" end - + # Matches features of a a feature dataset onto instances of another dataset. # The latter is referred to as 'training dataset', since p-values are computed, # if user passes a prediction feature, or if the training dataset has only one feature. diff --git a/libfminer b/libfminer index 4327230..6808320 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit 4327230e9f517a9e6624e6b8e018aa3cbcbb8022 +Subproject commit 6808320dd0c822dd544ba2915e874d1453f0176b -- cgit v1.2.3 From 6fd8ab32a6a6a1a43948c2e9cb02af9e1fe764bc Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 4 Dec 2012 13:50:23 +0100 Subject: Fixed which_row to training compounds --- lazar.rb | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/lazar.rb b/lazar.rb index 11643b3..313c7d2 100644 --- a/lazar.rb +++ b/lazar.rb @@ -183,13 +183,17 @@ post '/lazar/?' do # Creating InChi/URI Hash from trainig_feature for comparison with training_dataset to avoid missmatches caused by different URI authorities feature_compounds = {} - which_row={} training_features.compounds.each {|f_c_uri| f_compound = OpenTox::Compound.new(f_c_uri) feature_compounds[f_compound.to_inchi] = f_c_uri - which_row[f_compound.to_inchi] = 0 } - + + which_row=training_dataset.compounds.inject({}) { |h,c| + t_compound = OpenTox::Compound.new(c) + h[t_compound.to_inchi]=0 + h + } + training_dataset.compounds.each do |t_c_uri| t_compound = OpenTox::Compound.new(t_c_uri) @@ -201,7 +205,6 @@ post '/lazar/?' do else lazar.fingerprints[t_c_uri] = {} unless lazar.fingerprints[t_c_uri] entry.keys.each do |feature| - # CASE 1: Substructure if (lazar.feature_calculation_algorithm == "Substructure.match") || (lazar.feature_calculation_algorithm == "Substructure.match_hits") if training_features.features[feature] @@ -214,16 +217,15 @@ post '/lazar/?' do lazar.effects[smarts] = training_features.features[feature][OT.effect] end end - # CASE 2: Others elsif entry[feature].flatten.size == 1 lazar.fingerprints[t_c_uri][feature] = [] unless lazar.fingerprints[t_c_uri][feature] lazar.fingerprints[t_c_uri][feature] << entry[feature][row_idx] lazar.features << feature unless lazar.features.include? feature end - end end + which_row[t_compound.to_inchi] += 1 end -- cgit v1.2.3 From 3354bb1fa500df3420d659553db0da9e49f02357 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 4 Dec 2012 15:13:34 +0100 Subject: Added doc --- README.md | 7 +++++-- fminer.rb | 4 ++-- libfminer | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 043f001..1abf4f7 100644 --- a/README.md +++ b/README.md @@ -48,12 +48,15 @@ REST operations [feature_type=trees], [backbone=true], [min_chisq_significance=0.95], - [nr_hits=false] + [nr_hits=false], + [get_target=false] + [weight_feature] Create last features POST /fminer/last dataset_uri, URI for feature dataset 200,400,404,500 prediction_feature, [min_frequency=8 %], [feature_type=trees], - [nr_hits=false] + [nr_hits=false], + [get_target=false] Create features POST /pc/AllDescriptors dataset_uri, URI for dataset 200,400,404,500 [pc_type=constitutional, topological,geometrical, diff --git a/fminer.rb b/fminer.rb index 7fc7db4..1d009e5 100644 --- a/fminer.rb +++ b/fminer.rb @@ -77,7 +77,7 @@ get "/fminer/bbrc/?" do { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" }, { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" }, { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" }, - { DC.description => "Set to 'true' to obtain target variable as a feature", OT.paramScope => "optional", DC.title => "nr_hits" }, + { DC.description => "Set to 'true' to obtain target variable as a feature", OT.paramScope => "optional", DC.title => "get_target" }, { DC.description => "Feature URI for weight feature", OT.paramScope => "optional", DC.title => "weight_feature" } ] } @@ -143,7 +143,7 @@ get "/fminer/last/?" do { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" }, { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" }, - { DC.description => "Set to 'true' to obtain target variable as a feature", OT.paramScope => "optional", DC.title => "nr_hits" }, + { DC.description => "Set to 'true' to obtain target variable as a feature", OT.paramScope => "optional", DC.title => "get_target" }, ] } case request.env['HTTP_ACCEPT'] diff --git a/libfminer b/libfminer index 6808320..9c7ca17 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit 6808320dd0c822dd544ba2915e874d1453f0176b +Subproject commit 9c7ca17efe84fa722ac7a9ba40fee92f51d04271 -- cgit v1.2.3