From 778139bd41d65ae9dab4302115ba37c02e48bb0f Mon Sep 17 00:00:00 2001 From: davor Date: Thu, 22 Dec 2011 16:08:24 +0100 Subject: Simplified nr_hits and add Substructure.match_hits --- lazar.rb | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/lazar.rb b/lazar.rb index 9aac0d8..0d6b704 100644 --- a/lazar.rb +++ b/lazar.rb @@ -75,13 +75,16 @@ post '/lazar/?' do } elsif prediction_feature.feature_type == "regression" lazar.nr_hits = true + lazar.feature_calculation_algorithm = "Substructure.match_hits" lazar.prediction_algorithm = "Neighbors.local_svm_regression" end if params[:nr_hits] == "false" # if nr_hits is set optional to true/false it will return as String (but should be True/FalseClass) lazar.nr_hits = false + lazar.feature_calculation_algorithm = "Substructure.match" elsif params[:nr_hits] == "true" lazar.nr_hits = true + lazar.feature_calculation_algorithm = "Substructure.match_hits" end params[:nr_hits] = "true" if lazar.nr_hits @@ -114,11 +117,11 @@ post '/lazar/?' do end else # create features params[:feature_generation_uri] = feature_generation_uri - if feature_generation_uri.match(/fminer/) - lazar.feature_calculation_algorithm = "Substructure.match" - else - raise OpenTox::NotFoundError.new "External feature generation services not yet supported" - end + #if feature_generation_uri.match(/fminer/) + # lazar.feature_calculation_algorithm = "Substructure.match" + #else + # raise OpenTox::NotFoundError.new "External feature generation services not yet supported" + #end params[:subjectid] = @subjectid prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid if prediction_feature.feature_type == "regression" && feature_generation_uri.match(/fminer/) @@ -136,21 +139,21 @@ post '/lazar/?' do # sorted features for index lookups - lazar.features = training_features.features.sort if prediction_feature.feature_type == "regression" and lazar.feature_calculation_algorithm != "Substructure.match" + lazar.features = training_features.features.sort if prediction_feature.feature_type == "regression" and lazar.feature_calculation_algorithm != "Substructure.match" || "Substructure.match_hits" training_features.data_entries.each do |compound,entry| lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound] entry.keys.each do |feature| # CASE 1: Substructure - if lazar.feature_calculation_algorithm == "Substructure.match" + if lazar.feature_calculation_algorithm == "Substructure.match" || "Substructure.match_hits" if training_features.features[feature] smarts = training_features.features[feature][OT.smarts] #lazar.fingerprints[compound] << smarts if params[:nr_hits] - lazar.fingerprints[compound][smarts] = entry[feature].flatten.first + lazar.fingerprints[compound][smarts] = entry[feature].flatten.first * training_features.features[feature][OT.pValue] else - lazar.fingerprints[compound][smarts] = 1 + lazar.fingerprints[compound][smarts] = 1 * training_features.features[feature][OT.pValue] end unless lazar.features.include? smarts lazar.features << smarts -- cgit v1.2.3 From 3b74791450ecc1bd89f6ddf84fd1976403188231 Mon Sep 17 00:00:00 2001 From: davor Date: Fri, 23 Dec 2011 11:06:58 +0100 Subject: Fixed Bug and improving clarity --- lazar.rb | 129 +++++++++++++++++++++++++-------------------------------------- 1 file changed, 52 insertions(+), 77 deletions(-) diff --git a/lazar.rb b/lazar.rb index 0d6b704..89e66c6 100644 --- a/lazar.rb +++ b/lazar.rb @@ -45,40 +45,55 @@ post '/lazar/?' do task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| + + # # # Dataset present, prediction feature present? raise OpenTox::NotFoundError.new "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri) training_activities.load_all(@subjectid) + # Prediction Feature prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid) unless params[:prediction_feature] # try to read prediction_feature from dataset raise OpenTox::NotFoundError.new "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1 prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid) params[:prediction_feature] = prediction_feature.uri # pass to feature mining service end + raise OpenTox::NotFoundError.new "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+ training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri) - feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri] - - raise OpenTox::NotFoundError.new "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+ - training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri) + # Feature Generation URI + feature_generation_uri = @@feature_generation_default unless ( (feature_generation_uri = params[:feature_generation_uri]) || (params[:feature_dataset_uri]) ) + # Create instance lazar = OpenTox::Model::Lazar.new - lazar.min_sim = params[:min_sim].to_f if params[:min_sim] - # AM: Manage endpoint related variables. + # # # ENDPOINT RELATED + + # Default Values + # Classification: Weighted Majority, Substructure.match if prediction_feature.feature_type == "classification" @training_classes = training_activities.accept_values(prediction_feature.uri).sort @training_classes.each_with_index { |c,i| lazar.value_map[i+1] = c # don't use '0': we must take the weighted mean later. params[:value_map] = lazar.value_map } + # Regression: SVM, Substructure.match_hits elsif prediction_feature.feature_type == "regression" - lazar.nr_hits = true + lazar.nr_hits = true # AM: Brauchen wir die Variable noch? Kann man an feature_calculation_algorithm auch ablesen (nĂchste Zeile) lazar.feature_calculation_algorithm = "Substructure.match_hits" lazar.prediction_algorithm = "Neighbors.local_svm_regression" end + + + + # # # USER VALUES + + # Min Sim + lazar.min_sim = params[:min_sim].to_f if params[:min_sim] + + # Nr Hits if params[:nr_hits] == "false" # if nr_hits is set optional to true/false it will return as String (but should be True/FalseClass) lazar.nr_hits = false lazar.feature_calculation_algorithm = "Substructure.match" @@ -86,7 +101,18 @@ post '/lazar/?' do lazar.nr_hits = true lazar.feature_calculation_algorithm = "Substructure.match_hits" end - params[:nr_hits] = "true" if lazar.nr_hits + params[:nr_hits] = "true" if lazar.nr_hits + + # Algorithm + lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? + + # Propositionalization + lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") + + # Conf_stdev + lazar.conf_stdev = ( (params[:conf_stdev] == "true") ? true : false ) + + @@ -99,29 +125,20 @@ post '/lazar/?' do - # - # AM: features - # - # - # + # # # Features - # READ OR CREATE + # Read Features if params[:feature_dataset_uri] + lazar.feature_calculation_algorithm = "" # TODO: Implement lookup in feature dataset feature_dataset_uri = params[:feature_dataset_uri] training_features = OpenTox::Dataset.new(feature_dataset_uri) - case training_features.feature_type(@subjectid) - when "classification" - lazar.similarity_algorithm = "Similarity.tanimoto" - when "regression" - lazar.similarity_algorithm = "Similarity.euclid" + if training_features.feature_type(@subjectid) == "regression" + lazar.similarity_algorithm = "Similarity.cosine" end - else # create features + + # Create Features + else params[:feature_generation_uri] = feature_generation_uri - #if feature_generation_uri.match(/fminer/) - # lazar.feature_calculation_algorithm = "Substructure.match" - #else - # raise OpenTox::NotFoundError.new "External feature generation services not yet supported" - #end params[:subjectid] = @subjectid prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid if prediction_feature.feature_type == "regression" && feature_generation_uri.match(/fminer/) @@ -133,20 +150,16 @@ post '/lazar/?' do - # WRITE IN MODEL + # # # Write fingerprints training_features.load_all(@subjectid) raise OpenTox::NotFoundError.new "Dataset #{feature_dataset_uri} not found." if training_features.nil? - # sorted features for index lookups - - lazar.features = training_features.features.sort if prediction_feature.feature_type == "regression" and lazar.feature_calculation_algorithm != "Substructure.match" || "Substructure.match_hits" - training_features.data_entries.each do |compound,entry| lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound] entry.keys.each do |feature| # CASE 1: Substructure - if lazar.feature_calculation_algorithm == "Substructure.match" || "Substructure.match_hits" + if lazar.feature_calculation_algorithm == "Substructure.match" || lazar.feature_calculation_algorithm == "Substructure.match_hits" if training_features.features[feature] smarts = training_features.features[feature][OT.smarts] #lazar.fingerprints[compound] << smarts @@ -163,26 +176,11 @@ post '/lazar/?' do end # CASE 2: Others + elsif entry[feature].flatten.size == 1 + lazar.fingerprints[compound][feature] = entry[feature].flatten.first + lazar.features << feature unless lazar.features.include? feature else - case training_features.feature_type(@subjectid) - when "classification" - # fingerprints are sets - if entry[feature].flatten.size == 1 - #lazar.fingerprints[compound] << feature if entry[feature].flatten.first.to_s.match(TRUE_REGEXP) - lazar.fingerprints[compound][feature] = entry[feature].flatten.first if entry[feature].flatten.first.to_s.match(TRUE_REGEXP) - lazar.features << feature unless lazar.features.include? feature - else - LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" - end - when "regression" - # fingerprints are arrays - if entry[feature].flatten.size == 1 - lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first - #lazar.fingerprints[compound][feature] = entry[feature].flatten.first - else - LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" - end - end + LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" end end end @@ -191,28 +189,8 @@ post '/lazar/?' do - - # - # AM: SETTINGS - # - # - # - - # AM: allow settings override by user - lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? - lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") - lazar.conf_stdev = false - lazar.conf_stdev = true if params[:conf_stdev] == "true" - - - - - - # - # AM: Feed data - # - # - # + + # # # Activities if prediction_feature.feature_type == "regression" training_activities.data_entries.each do |compound,entry| @@ -238,11 +216,7 @@ post '/lazar/?' do - # - # AM: Metadata - # - # - # + # Metadata lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}" lazar.metadata[OT.dependentVariables] = prediction_feature.uri @@ -264,6 +238,7 @@ post '/lazar/?' do model_uri = lazar.save(@subjectid) LOGGER.info model_uri + " created #{Time.now}" model_uri + end response['Content-Type'] = 'text/uri-list' raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" -- cgit v1.2.3 From a74fc0029e810d699b94e21c9fe922d66a4d5c4f Mon Sep 17 00:00:00 2001 From: davor Date: Fri, 23 Dec 2011 14:10:53 +0100 Subject: Removed nr_hits --- lazar.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lazar.rb b/lazar.rb index 89e66c6..f61be83 100644 --- a/lazar.rb +++ b/lazar.rb @@ -80,7 +80,7 @@ post '/lazar/?' do } # Regression: SVM, Substructure.match_hits elsif prediction_feature.feature_type == "regression" - lazar.nr_hits = true # AM: Brauchen wir die Variable noch? Kann man an feature_calculation_algorithm auch ablesen (nĂchste Zeile) + #lazar.nr_hits = true # AM: Brauchen wir die Variable noch? Kann man an feature_calculation_algorithm auch ablesen (nĂchste Zeile) lazar.feature_calculation_algorithm = "Substructure.match_hits" lazar.prediction_algorithm = "Neighbors.local_svm_regression" end @@ -95,13 +95,13 @@ post '/lazar/?' do # Nr Hits if params[:nr_hits] == "false" # if nr_hits is set optional to true/false it will return as String (but should be True/FalseClass) - lazar.nr_hits = false + #lazar.nr_hits = false lazar.feature_calculation_algorithm = "Substructure.match" elsif params[:nr_hits] == "true" - lazar.nr_hits = true + #lazar.nr_hits = true lazar.feature_calculation_algorithm = "Substructure.match_hits" end - params[:nr_hits] = "true" if lazar.nr_hits + params[:nr_hits] = "true" if lazar.feature_calculation_algorithm = "Substructure.match_hits" #not sure if this line in needed # Algorithm lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? @@ -163,7 +163,7 @@ post '/lazar/?' do if training_features.features[feature] smarts = training_features.features[feature][OT.smarts] #lazar.fingerprints[compound] << smarts - if params[:nr_hits] + if lazar.feature_calculation_algorithm == "Substructure.match_hits" lazar.fingerprints[compound][smarts] = entry[feature].flatten.first * training_features.features[feature][OT.pValue] else lazar.fingerprints[compound][smarts] = 1 * training_features.features[feature][OT.pValue] -- cgit v1.2.3 From 80bc28fccde34d9dcfe1621e466cf0930158591f Mon Sep 17 00:00:00 2001 From: davor Date: Fri, 23 Dec 2011 15:24:56 +0100 Subject: Fixed bug. --- lazar.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lazar.rb b/lazar.rb index f61be83..d66312e 100644 --- a/lazar.rb +++ b/lazar.rb @@ -101,7 +101,7 @@ post '/lazar/?' do #lazar.nr_hits = true lazar.feature_calculation_algorithm = "Substructure.match_hits" end - params[:nr_hits] = "true" if lazar.feature_calculation_algorithm = "Substructure.match_hits" #not sure if this line in needed + params[:nr_hits] = "true" if lazar.feature_calculation_algorithm == "Substructure.match_hits" #not sure if this line in needed # Algorithm lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? -- cgit v1.2.3 From 2e7ff3936adfea4ad4bc456a13b2c2fed0ad581b Mon Sep 17 00:00:00 2001 From: davor Date: Fri, 23 Dec 2011 16:54:35 +0100 Subject: Added pc_type to model --- lazar.rb | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lazar.rb b/lazar.rb index d66312e..f69495c 100644 --- a/lazar.rb +++ b/lazar.rb @@ -108,11 +108,13 @@ post '/lazar/?' do # Propositionalization lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") + + # PC type + lazar.pc_type = params[:pc_type] unless params[:pc_type].nil? # Conf_stdev lazar.conf_stdev = ( (params[:conf_stdev] == "true") ? true : false ) - - + @@ -129,7 +131,7 @@ post '/lazar/?' do # Read Features if params[:feature_dataset_uri] - lazar.feature_calculation_algorithm = "" # TODO: Implement lookup in feature dataset + lazar.feature_calculation_algorithm = "Substructure.lookup" feature_dataset_uri = params[:feature_dataset_uri] training_features = OpenTox::Dataset.new(feature_dataset_uri) if training_features.feature_type(@subjectid) == "regression" -- cgit v1.2.3 From 6374df2b69c039ffbc269a70198b94360b68e8de Mon Sep 17 00:00:00 2001 From: davor Date: Tue, 10 Jan 2012 08:31:20 +0100 Subject: Parameter clean-up propositionalized pc_type nr_hits min_sim now regular parameters (stored in metadata) Updated flowchart at http://www.maunz.de/wordpress/opentox/2011/lazar-models-and-how-to-trigger-them --- README.md | 21 +++++++++++---------- lazar.rb | 33 +++++++++++++++++++-------------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 8383cb6..e854ac3 100644 --- a/README.md +++ b/README.md @@ -31,22 +31,23 @@ REST operations [feature_type=trees], [nr_hits=false] Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500 - prediction_feature, - feature_generation_uri - prediction_algorithm - [local_svm_kernel=weighted_tanimoto] - [min_sim=0.3] - [nr_hits=false] - [conf_stdev=false] + [prediction_feature], + [feature_generation_uri], + [prediction_algorithm], + [feature_dataset_uri], + [propositionalized=false], + [pc_type=null], + [nr_hits=false (class.), true (regr.)], + [min_sim=0.3 (nominal), 0.6 (numeric features)] Synopsis -------- - prediction\_algorithm: One of "weighted\_majority\_vote" (default for classification), "local\_svm\_classification", "local\_svm\_regression (default for regression)", "local\_mlr\_prop". "weighted\_majority\_vote" is not applicable for regression. "local\_mlr\_prop" is not applicable for classification. -- local\_svm\_kernel: One of "weighted\_tanimoto", "propositionalized". local\_svm\_kernel is not appplicable when prediction\_algorithm="weighted\_majority\_vote". -- min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1]. +- propositionalized: One of "true", "false". Not appplicable when prediction\_algorithm="weighted\_majority\_vote". +- pc_type: Mandatory for feature dataset, one of [geometrical, topological, electronic, constitutional, hybrid, cpsa]. - nr_hits: Whether for instantiated models (local\_svm\_kernel = "propositionalized" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="local\_mlr\_prop") nominal features should be instantiated with their occurrence counts in the instances. For non-instantiated models (local\_svm\_kernel = "weighted\_tanimoto" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="weighted\_majority\_vote") the neighbor-to-neighbor and neighbor-to-query similarity also integrates these counts, when the parameter is set. One of "true", "false". -- conf_stdev: Whether confidence integrates distribution of neighbor activity values. When "true", the exp(-1.0*(standard deviation of neighbor activities)) is multiplied on the similarity. One of "true", "false". +- min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1]. See http://www.maunz.de/wordpress/opentox/2011/lazar-models-and-how-to-trigger-them for a graphical overview. diff --git a/lazar.rb b/lazar.rb index f69495c..6c2d8ed 100644 --- a/lazar.rb +++ b/lazar.rb @@ -12,9 +12,9 @@ get '/lazar/?' do OT.parameters => [ { DC.description => "Dataset URI with the dependent variable", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, { DC.description => "Feature URI for dependent variable. Optional for datasets with only a single feature.", OT.paramScope => "optional", DC.title => "prediction_feature" }, - { DC.description => "URI of feature genration service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" }, + { DC.description => "URI of feature generation service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" }, { DC.description => "URI of feature dataset. If this parameter is set no feature generation algorithm will be called", OT.paramScope => "optional", DC.title => "feature_dataset_uri" }, - { DC.description => "Further parameters for the feaature generation service", OT.paramScope => "optional" } + { DC.description => "Further parameters for the feature generation service", OT.paramScope => "optional" } ] } case request.env['HTTP_ACCEPT'] @@ -80,7 +80,6 @@ post '/lazar/?' do } # Regression: SVM, Substructure.match_hits elsif prediction_feature.feature_type == "regression" - #lazar.nr_hits = true # AM: Brauchen wir die Variable noch? Kann man an feature_calculation_algorithm auch ablesen (nĂchste Zeile) lazar.feature_calculation_algorithm = "Substructure.match_hits" lazar.prediction_algorithm = "Neighbors.local_svm_regression" end @@ -91,28 +90,28 @@ post '/lazar/?' do # # # USER VALUES # Min Sim - lazar.min_sim = params[:min_sim].to_f if params[:min_sim] + min_sim = params[:min_sim].to_f if params[:min_sim] + min_sim = 0.3 unless params[:min_sim] # Nr Hits - if params[:nr_hits] == "false" # if nr_hits is set optional to true/false it will return as String (but should be True/FalseClass) - #lazar.nr_hits = false - lazar.feature_calculation_algorithm = "Substructure.match" - elsif params[:nr_hits] == "true" - #lazar.nr_hits = true + nr_hits = false + if params[:nr_hits] == "true" lazar.feature_calculation_algorithm = "Substructure.match_hits" + nr_hits = true end params[:nr_hits] = "true" if lazar.feature_calculation_algorithm == "Substructure.match_hits" #not sure if this line in needed # Algorithm - lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? + lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" if params[:prediction_algorithm] # Propositionalization - lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") + propositionalized = false + propositionalized = true if (params[:propositionalized] == "true" || params[:prediction_algorithm] == "local_mlr_prop") # PC type - lazar.pc_type = params[:pc_type] unless params[:pc_type].nil? + pc_type = params[:pc_type] unless params[:pc_type].nil? - # Conf_stdev + # Conf_stdev --- To be removed?? lazar.conf_stdev = ( (params[:conf_stdev] == "true") ? true : false ) @@ -136,6 +135,8 @@ post '/lazar/?' do training_features = OpenTox::Dataset.new(feature_dataset_uri) if training_features.feature_type(@subjectid) == "regression" lazar.similarity_algorithm = "Similarity.cosine" + min_sim = 0.6 unless params[:min_sim] + raise OpenTox::NotFoundError.new "No pc_type parameter." unless params[:pc_type] end # Create Features @@ -234,7 +235,11 @@ post '/lazar/?' do lazar.metadata[OT.parameters] = [ {DC.title => "dataset_uri", OT.paramValue => dataset_uri}, {DC.title => "prediction_feature", OT.paramValue => prediction_feature.uri}, - {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri} + {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri}, + {DC.title => "propositionalized", OT.paramValue => propositionalized}, + {DC.title => "pc_type", OT.paramValue => pc_type}, + {DC.title => "nr_hits", OT.paramValue => nr_hits}, + {DC.title => "min_sim", OT.paramValue => min_sim} ] model_uri = lazar.save(@subjectid) -- cgit v1.2.3 From 33d9217423a9b724bd707ffa03edc5b1482a4bc3 Mon Sep 17 00:00:00 2001 From: ot7 Date: Tue, 10 Jan 2012 10:54:07 +0100 Subject: Fixed bug --- lazar.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lazar.rb b/lazar.rb index 6c2d8ed..5182200 100644 --- a/lazar.rb +++ b/lazar.rb @@ -162,7 +162,7 @@ post '/lazar/?' do entry.keys.each do |feature| # CASE 1: Substructure - if lazar.feature_calculation_algorithm == "Substructure.match" || lazar.feature_calculation_algorithm == "Substructure.match_hits" + if ((lazar.feature_calculation_algorithm == "Substructure.match") || (lazar.feature_calculation_algorithm == "Substructure.match_hits")) if training_features.features[feature] smarts = training_features.features[feature][OT.smarts] #lazar.fingerprints[compound] << smarts -- cgit v1.2.3 From b462f330bfacff53efe21d0d803d0102110c65df Mon Sep 17 00:00:00 2001 From: davor Date: Fri, 13 Jan 2012 12:17:51 +0100 Subject: Fix: Adding fingerprint *only when activity exists* --- lazar.rb | 53 +++++++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/lazar.rb b/lazar.rb index 6c2d8ed..242c20f 100644 --- a/lazar.rb +++ b/lazar.rb @@ -158,33 +158,38 @@ post '/lazar/?' do raise OpenTox::NotFoundError.new "Dataset #{feature_dataset_uri} not found." if training_features.nil? training_features.data_entries.each do |compound,entry| - lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound] - entry.keys.each do |feature| - - # CASE 1: Substructure - if lazar.feature_calculation_algorithm == "Substructure.match" || lazar.feature_calculation_algorithm == "Substructure.match_hits" - if training_features.features[feature] - smarts = training_features.features[feature][OT.smarts] - #lazar.fingerprints[compound] << smarts - if lazar.feature_calculation_algorithm == "Substructure.match_hits" - lazar.fingerprints[compound][smarts] = entry[feature].flatten.first * training_features.features[feature][OT.pValue] - else - lazar.fingerprints[compound][smarts] = 1 * training_features.features[feature][OT.pValue] - end - unless lazar.features.include? smarts - lazar.features << smarts - lazar.p_values[smarts] = training_features.features[feature][OT.pValue] - lazar.effects[smarts] = training_features.features[feature][OT.effect] + + if training_activities.data_entries.has_key? compound + + lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound] + entry.keys.each do |feature| + + # CASE 1: Substructure + if lazar.feature_calculation_algorithm == "Substructure.match" || lazar.feature_calculation_algorithm == "Substructure.match_hits" + if training_features.features[feature] + smarts = training_features.features[feature][OT.smarts] + #lazar.fingerprints[compound] << smarts + if lazar.feature_calculation_algorithm == "Substructure.match_hits" + lazar.fingerprints[compound][smarts] = entry[feature].flatten.first * training_features.features[feature][OT.pValue] + else + lazar.fingerprints[compound][smarts] = 1 * training_features.features[feature][OT.pValue] + end + unless lazar.features.include? smarts + lazar.features << smarts + lazar.p_values[smarts] = training_features.features[feature][OT.pValue] + lazar.effects[smarts] = training_features.features[feature][OT.effect] + end end - end - # CASE 2: Others - elsif entry[feature].flatten.size == 1 - lazar.fingerprints[compound][feature] = entry[feature].flatten.first - lazar.features << feature unless lazar.features.include? feature - else - LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + # CASE 2: Others + elsif entry[feature].flatten.size == 1 + lazar.fingerprints[compound][feature] = entry[feature].flatten.first + lazar.features << feature unless lazar.features.include? feature + else + LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + end end + end end task.progress 80 -- cgit v1.2.3 From c19b6a19c8ef0207824b038fade096af84649ea9 Mon Sep 17 00:00:00 2001 From: davor Date: Sun, 15 Jan 2012 15:52:18 +0100 Subject: Lowered sim threshold for cosine similarity to 0.4 --- lazar.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lazar.rb b/lazar.rb index 145565a..bd3c934 100644 --- a/lazar.rb +++ b/lazar.rb @@ -135,7 +135,7 @@ post '/lazar/?' do training_features = OpenTox::Dataset.new(feature_dataset_uri) if training_features.feature_type(@subjectid) == "regression" lazar.similarity_algorithm = "Similarity.cosine" - min_sim = 0.6 unless params[:min_sim] + min_sim = 0.4 unless params[:min_sim] raise OpenTox::NotFoundError.new "No pc_type parameter." unless params[:pc_type] end @@ -164,7 +164,7 @@ post '/lazar/?' do entry.keys.each do |feature| # CASE 1: Substructure - if lazar.feature_calculation_algorithm == "Substructure.match" || lazar.feature_calculation_algorithm == "Substructure.match_hits" + if (lazar.feature_calculation_algorithm == "Substructure.match") || (lazar.feature_calculation_algorithm == "Substructure.match_hits") if training_features.features[feature] smarts = training_features.features[feature][OT.smarts] #lazar.fingerprints[compound] << smarts -- cgit v1.2.3 From 05a39eba3d8f683f2f16884531f83e3a3f8c3938 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 19 Jan 2012 15:47:43 +0100 Subject: Making Prop default for SVM --- lazar.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lazar.rb b/lazar.rb index bd3c934..f2634ab 100644 --- a/lazar.rb +++ b/lazar.rb @@ -106,7 +106,7 @@ post '/lazar/?' do # Propositionalization propositionalized = false - propositionalized = true if (params[:propositionalized] == "true" || params[:prediction_algorithm] == "local_mlr_prop") + propositionalized = true if (params[:propositionalized] != "false" && ( params[:prediction_algorithm] == "local_mlr_prop" || params[:prediction_algorithm].include? "local_svm" ) ) # PC type pc_type = params[:pc_type] unless params[:pc_type].nil? -- cgit v1.2.3 From 10d5ece8b46abf72ab9c475d08c1d238f4079e39 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 19 Jan 2012 16:19:28 +0100 Subject: Fix to previous commit --- lazar.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lazar.rb b/lazar.rb index f2634ab..7edbb97 100644 --- a/lazar.rb +++ b/lazar.rb @@ -106,7 +106,13 @@ post '/lazar/?' do # Propositionalization propositionalized = false - propositionalized = true if (params[:propositionalized] != "false" && ( params[:prediction_algorithm] == "local_mlr_prop" || params[:prediction_algorithm].include? "local_svm" ) ) + propositionalized = true if ( params[:propositionalized] != "false" && + ( params[:prediction_algorithm] == "local_mlr_prop" || + ( params[:prediction_algorithm] && + params[:prediction_algorithm].include?("local_svm") + ) + ) + ) # PC type pc_type = params[:pc_type] unless params[:pc_type].nil? -- cgit v1.2.3 From ff279abf4d148b2708a7e2e1b7ee3df5b4387763 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 20 Jan 2012 17:19:18 +0100 Subject: Add new parameter, min_train_performance, which controls censoring in local_svm and local_svm_prop --- lazar.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lazar.rb b/lazar.rb index 7edbb97..8900f3a 100644 --- a/lazar.rb +++ b/lazar.rb @@ -117,6 +117,10 @@ post '/lazar/?' do # PC type pc_type = params[:pc_type] unless params[:pc_type].nil? + # Min train performance + min_train_performance = params[:min_train_performance].to_f if params[:min_train_performance] + min_train_performance = 0.1 unless params[:min_train_performance] + # Conf_stdev --- To be removed?? lazar.conf_stdev = ( (params[:conf_stdev] == "true") ? true : false ) @@ -249,7 +253,9 @@ post '/lazar/?' do {DC.title => "propositionalized", OT.paramValue => propositionalized}, {DC.title => "pc_type", OT.paramValue => pc_type}, {DC.title => "nr_hits", OT.paramValue => nr_hits}, - {DC.title => "min_sim", OT.paramValue => min_sim} + {DC.title => "min_sim", OT.paramValue => min_sim}, + {DC.title => "min_train_performance", OT.paramValue => min_train_performance}, + ] model_uri = lazar.save(@subjectid) -- cgit v1.2.3 From 6b9482101ff26b3e31cf145c4786ada56923d5f7 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 23 Jan 2012 08:08:31 +0100 Subject: Fixed bug in lazar.rb: propositionalized not set correctly --- lazar.rb | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/lazar.rb b/lazar.rb index 8900f3a..65752ec 100644 --- a/lazar.rb +++ b/lazar.rb @@ -106,13 +106,7 @@ post '/lazar/?' do # Propositionalization propositionalized = false - propositionalized = true if ( params[:propositionalized] != "false" && - ( params[:prediction_algorithm] == "local_mlr_prop" || - ( params[:prediction_algorithm] && - params[:prediction_algorithm].include?("local_svm") - ) - ) - ) + propositionalized = true if ( params[:propositionalized] != "false" && ( lazar.prediction_algorithm == "local_mlr_prop" || lazar.prediction_algorithm.include?("local_svm") ) ) # PC type pc_type = params[:pc_type] unless params[:pc_type].nil? -- cgit v1.2.3 From 8035d64e9f1c9d49d1ec947204f78534ecc21e64 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 30 Jan 2012 13:18:13 +0100 Subject: Prop always on, removed conf_stdev --- lazar.rb | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lazar.rb b/lazar.rb index 65752ec..6fd12ef 100644 --- a/lazar.rb +++ b/lazar.rb @@ -105,8 +105,7 @@ post '/lazar/?' do lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" if params[:prediction_algorithm] # Propositionalization - propositionalized = false - propositionalized = true if ( params[:propositionalized] != "false" && ( lazar.prediction_algorithm == "local_mlr_prop" || lazar.prediction_algorithm.include?("local_svm") ) ) + propositionalized = true # PC type pc_type = params[:pc_type] unless params[:pc_type].nil? @@ -115,10 +114,6 @@ post '/lazar/?' do min_train_performance = params[:min_train_performance].to_f if params[:min_train_performance] min_train_performance = 0.1 unless params[:min_train_performance] - # Conf_stdev --- To be removed?? - lazar.conf_stdev = ( (params[:conf_stdev] == "true") ? true : false ) - - -- cgit v1.2.3 From 165a82a504bf06136619af15ccc6e3be23c642fb Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 30 Jan 2012 16:17:00 +0100 Subject: Prop on not for wmv --- last-utils | 2 +- lazar.rb | 2 +- libfminer | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/last-utils b/last-utils index 8c02f7e..cf02384 160000 --- a/last-utils +++ b/last-utils @@ -1 +1 @@ -Subproject commit 8c02f7e71450cac6d8c5d7d34ecb620046b4ea46 +Subproject commit cf0238477127e54509b6ab8b5c38f50dd6ffce08 diff --git a/lazar.rb b/lazar.rb index 6fd12ef..7fa0f96 100644 --- a/lazar.rb +++ b/lazar.rb @@ -105,7 +105,7 @@ post '/lazar/?' do lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" if params[:prediction_algorithm] # Propositionalization - propositionalized = true + propositionalized = (lazar.prediction_algorithm=="Neighbors.weighted_majority_vote" ? false : true) # PC type pc_type = params[:pc_type] unless params[:pc_type].nil? diff --git a/libfminer b/libfminer index 17932e8..f9e560d 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit 17932e809c35c93374ed3d5fd19a313325c35b41 +Subproject commit f9e560dc0a7a5d5af439814ab5fa9ce027a025b7 -- cgit v1.2.3 From 0164d17d0fbb90a9dfbe755eb7a2e9b2e778d623 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 31 Jan 2012 08:12:03 +0100 Subject: nr_hits for all SVM formulations --- lazar.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lazar.rb b/lazar.rb index 7fa0f96..81929c6 100644 --- a/lazar.rb +++ b/lazar.rb @@ -93,17 +93,17 @@ post '/lazar/?' do min_sim = params[:min_sim].to_f if params[:min_sim] min_sim = 0.3 unless params[:min_sim] + # Algorithm + lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" if params[:prediction_algorithm] + # Nr Hits nr_hits = false - if params[:nr_hits] == "true" + if params[:nr_hits] == "true" || lazar.prediction_algorithm.include?("local_svm") lazar.feature_calculation_algorithm = "Substructure.match_hits" nr_hits = true end params[:nr_hits] = "true" if lazar.feature_calculation_algorithm == "Substructure.match_hits" #not sure if this line in needed - # Algorithm - lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" if params[:prediction_algorithm] - # Propositionalization propositionalized = (lazar.prediction_algorithm=="Neighbors.weighted_majority_vote" ? false : true) -- cgit v1.2.3 From 985a7a0a18f763ceae020cef2fbf0db3da17776d Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 31 Jan 2012 08:48:08 +0100 Subject: Adjusted tests to new parameters (see http://goo.gl/lXJBS) --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index e854ac3..e979ff6 100644 --- a/README.md +++ b/README.md @@ -35,19 +35,19 @@ REST operations [feature_generation_uri], [prediction_algorithm], [feature_dataset_uri], - [propositionalized=false], [pc_type=null], - [nr_hits=false (class.), true (regr.)], - [min_sim=0.3 (nominal), 0.6 (numeric features)] + [nr_hits=false (class. using wt. maj. vote), true (else)], + [min_sim=0.3 (nominal), 0.4 (numeric features)] + [min_train_performance=0.1] Synopsis -------- -- prediction\_algorithm: One of "weighted\_majority\_vote" (default for classification), "local\_svm\_classification", "local\_svm\_regression (default for regression)", "local\_mlr\_prop". "weighted\_majority\_vote" is not applicable for regression. "local\_mlr\_prop" is not applicable for classification. -- propositionalized: One of "true", "false". Not appplicable when prediction\_algorithm="weighted\_majority\_vote". +- prediction\_algorithm: One of "weighted\_majority\_vote" (default for classification), "local\_svm\_classification", "local\_svm\_regression" (default for regression). "weighted\_majority\_vote" is not applicable for regression. - pc_type: Mandatory for feature dataset, one of [geometrical, topological, electronic, constitutional, hybrid, cpsa]. -- nr_hits: Whether for instantiated models (local\_svm\_kernel = "propositionalized" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="local\_mlr\_prop") nominal features should be instantiated with their occurrence counts in the instances. For non-instantiated models (local\_svm\_kernel = "weighted\_tanimoto" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="weighted\_majority\_vote") the neighbor-to-neighbor and neighbor-to-query similarity also integrates these counts, when the parameter is set. One of "true", "false". -- min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1]. +- nr_hits: Whether nominal features should be instantiated with their occurrence counts in the instances. One of "true", "false". +- min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1]. +- min_train_performance. The minimum training performance for "local\_svm\_classification" (Accuracy) and "local\_svm\_regression" (R-squared). Numeric value in [0,1]. See http://www.maunz.de/wordpress/opentox/2011/lazar-models-and-how-to-trigger-them for a graphical overview. -- cgit v1.2.3 From 48a8728794bb8d42bab5d4f62e19d11b36bef48a Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 7 Feb 2012 17:14:45 +0100 Subject: Added rfe --- feature_selection.rb | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 feature_selection.rb diff --git a/feature_selection.rb b/feature_selection.rb new file mode 100644 index 0000000..c69a954 --- /dev/null +++ b/feature_selection.rb @@ -0,0 +1,74 @@ +# Get list of feature_selection algorithms +# +# @return [text/uri-list] URIs of feature_selection algorithms +get '/feature_selection/?' do + list = [ url_for('/feature_selection/rfe', :full) ].join("\n") + "\n" + case request.env['HTTP_ACCEPT'] + when /text\/html/ + content_type "text/html" + OpenTox.text_to_html list + else + content_type 'text/uri-list' + list + end +end + +# Get RDF/XML representation of feature_selection rfe algorithm +# @return [application/rdf+xml] OWL-DL representation of feature_selection rfe algorithm +get "/feature_selection/rfe/?" do + algorithm = OpenTox::Algorithm::Generic.new(url_for('/feature_selection/rfe',:full)) + algorithm.metadata = { + DC.title => 'recursive feature elimination', + DC.creator => "andreas@maunz.de, helma@in-silico.ch", + DC.contributor => "vorgrimmlerdavid@gmx.de", + BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#feature_selection_rfe", + RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], + OT.parameters => [ + { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, + { DC.description => "Feature Dataset URI", OT.paramScope => "mandatory", DC.title => "feature_dataset_uri" } + ] + } + case request.env['HTTP_ACCEPT'] + when /text\/html/ + content_type "text/html" + OpenTox.text_to_html algorithm.to_yaml + when /application\/x-yaml/ + content_type "application/x-yaml" + algorithm.to_yaml + else + response['Content-Type'] = 'application/rdf+xml' + algorithm.to_rdfxml + end +end + +# Run rfe algorithm on dataset +# +# @param [String] dataset_uri URI of the training dataset +# @param [String] feature_dataset_uri URI of the feature dataset +# @return [text/uri-list] Task URI +post '/feature_selection/rfe/?' do + + raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] + raise OpenTox::NotFoundError.new "Please submit a feature_dataset_uri." unless params[:feature_dataset_uri] + + ds_csv=OpenTox::RestClientWrapper.get( params[:dataset_uri], {:accept => "text/csv"} ) + tf_ds=Tempfile.open(['rfe_', '.csv']) + tf_ds.puts(ds_csv) + + fds_csv=OpenTox::RestClientWrapper.get( params[:feature_dataset_uri], {:accept => "text/csv"}) + tf_fds=Tempfile.open(['rfe_', '.csv']) + tf_fds.puts(fds_csv) + + task = OpenTox::Task.create("Recursive Feature Elimination", url_for('/feature_selection',:full)) do |task| + rfe=OpenTox::Algorithm::FeatureSelection.rfe( { :ds_csv_file => tf_ds, :fds_csv_file => tf_fds } ) + r_result_uri=OpenTox::Dataset.create_from_csv(r_result_file).uri + File.unlink(r_result_file) + tf_ds.close! + tf_fds.close! + r_result_uri + end + response['Content-Type'] = 'text/uri-list' + raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" + halt 202,task.uri.to_s+"\n" +end + -- cgit v1.2.3 From c2d9390594a2008b6fec2ce724462badd458ae74 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 8 Feb 2012 14:15:30 +0100 Subject: Adjusted rfe --- feature_selection.rb | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/feature_selection.rb b/feature_selection.rb index c69a954..c5bc4fa 100644 --- a/feature_selection.rb +++ b/feature_selection.rb @@ -25,7 +25,9 @@ get "/feature_selection/rfe/?" do RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature Dataset URI", OT.paramScope => "mandatory", DC.title => "feature_dataset_uri" } + { DC.description => "Prediction Feature URI", OT.paramScope => "mandatory", DC.title => "prediction_feature_uri" }, + { DC.description => "Feature Dataset URI", OT.paramScope => "mandatory", DC.title => "feature_dataset_uri" }, + { DC.description => "Delete Instances with missing values", OT.paramScope => "optional", DC.title => "del_missing" } ] } case request.env['HTTP_ACCEPT'] @@ -49,22 +51,31 @@ end post '/feature_selection/rfe/?' do raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] + raise OpenTox::NotFoundError.new "Please submit a prediction_feature_uri." unless params[:prediction_feature_uri] raise OpenTox::NotFoundError.new "Please submit a feature_dataset_uri." unless params[:feature_dataset_uri] ds_csv=OpenTox::RestClientWrapper.get( params[:dataset_uri], {:accept => "text/csv"} ) tf_ds=Tempfile.open(['rfe_', '.csv']) tf_ds.puts(ds_csv) + tf_ds.flush() + + prediction_feature = params[:prediction_feature_uri].split('/').last # get col name fds_csv=OpenTox::RestClientWrapper.get( params[:feature_dataset_uri], {:accept => "text/csv"}) tf_fds=Tempfile.open(['rfe_', '.csv']) tf_fds.puts(fds_csv) + tf_fds.flush() + + del_missing = params[:del_missing] == "true" ? true : false task = OpenTox::Task.create("Recursive Feature Elimination", url_for('/feature_selection',:full)) do |task| - rfe=OpenTox::Algorithm::FeatureSelection.rfe( { :ds_csv_file => tf_ds, :fds_csv_file => tf_fds } ) - r_result_uri=OpenTox::Dataset.create_from_csv(r_result_file).uri - File.unlink(r_result_file) + r_result_file = OpenTox::Algorithm::FeatureSelection.rfe( { :ds_csv_file => tf_ds.path, :prediction_feature => prediction_feature, :fds_csv_file => tf_fds.path, :del_missing => del_missing } ) + r_result_uri = OpenTox::Dataset.create_from_csv_file(r_result_file).uri tf_ds.close! tf_fds.close! + File.unlink(r_result_file) + File.unlink(tf_ds.path) + File.unlink(tf_fds.path) r_result_uri end response['Content-Type'] = 'text/uri-list' -- cgit v1.2.3 From 3e160e2e0537e2bfc0f8c89570349aa2718e34c2 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 8 Feb 2012 14:40:09 +0100 Subject: Fixed minor bugs --- application.rb | 3 ++- feature_selection.rb | 6 ++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/application.rb b/application.rb index b62f6f5..f5b331f 100644 --- a/application.rb +++ b/application.rb @@ -11,6 +11,7 @@ require 'opentox-ruby' require 'openbabel.rb' require 'fminer.rb' require 'lazar.rb' +require 'feature_selection.rb' set :lock, true @@ -22,7 +23,7 @@ end # # @return [text/uri-list] algorithm URIs get '/?' do - list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" + list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full), url_for('/feature_selection/rfe', :full) ].join("\n") + "\n" case request.env['HTTP_ACCEPT'] when /text\/html/ content_type "text/html" diff --git a/feature_selection.rb b/feature_selection.rb index c5bc4fa..efda812 100644 --- a/feature_selection.rb +++ b/feature_selection.rb @@ -71,11 +71,9 @@ post '/feature_selection/rfe/?' do task = OpenTox::Task.create("Recursive Feature Elimination", url_for('/feature_selection',:full)) do |task| r_result_file = OpenTox::Algorithm::FeatureSelection.rfe( { :ds_csv_file => tf_ds.path, :prediction_feature => prediction_feature, :fds_csv_file => tf_fds.path, :del_missing => del_missing } ) r_result_uri = OpenTox::Dataset.create_from_csv_file(r_result_file).uri - tf_ds.close! - tf_fds.close! + tf_ds.close!; tf_fds.close! + tf_ds.delete; tf_fds.delete File.unlink(r_result_file) - File.unlink(tf_ds.path) - File.unlink(tf_fds.path) r_result_uri end response['Content-Type'] = 'text/uri-list' -- cgit v1.2.3 From 41a5a6a85fa97d5d9a6495c36df3762b8f5f36c0 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 8 Feb 2012 14:50:04 +0100 Subject: Updated README --- README.md | 70 +++++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index e979ff6..344f747 100644 --- a/README.md +++ b/README.md @@ -9,36 +9,48 @@ OpenTox Algorithm REST operations --------------- - Get a list of all algorithms GET / - URIs of algorithms 200 - Get a representation of the GET /fminer/ - fminer representation 200,404 + Get a list of all algorithms GET / - URIs of algorithms 200 + Get a representation of the GET /fminer/ - fminer representation 200,404 fminer algorithms - Get a representation of the GET /fminer/bbrc - bbrc representation 200,404 + Get a representation of the GET /fminer/bbrc - bbrc representation 200,404 bbrc algorithm - Get a representation of the GET /fminer/last - last representation 200,404 + Get a representation of the GET /fminer/last - last representation 200,404 last algorithm - Get a representation of the GET /lazar - lazar representation 200,404 + Get a representation of the GET /lazar - lazar representation 200,404 lazar algorithm - Create bbrc features POST /fminer/bbrc dataset_uri, URI for feature dataset 200,400,404,500 - feature_uri, - [min_frequency=5 per-mil], - [feature_type=trees], - [backbone=true], - [min_chisq_significance=0.95], - [nr_hits=false] - Create last features POST /fminer/last dataset_uri, URI for feature dataset 200,400,404,500 - feature_uri, - [min_frequency=8 %], - [feature_type=trees], - [nr_hits=false] - Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500 - [prediction_feature], - [feature_generation_uri], - [prediction_algorithm], - [feature_dataset_uri], - [pc_type=null], - [nr_hits=false (class. using wt. maj. vote), true (else)], - [min_sim=0.3 (nominal), 0.4 (numeric features)] - [min_train_performance=0.1] + Get a representation of the GET /feature_selection - feature selection representation 200,404 + feature selection algorithms + Get a representation of the GET /feature_selection/rfe - rfe representation 200,404 + rfe algorithm + + + Create bbrc features POST /fminer/bbrc dataset_uri, URI for feature dataset 200,400,404,500 + feature_uri, + [min_frequency=5 per-mil], + [feature_type=trees], + [backbone=true], + [min_chisq_significance=0.95], + [nr_hits=false] + Create last features POST /fminer/last dataset_uri, URI for feature dataset 200,400,404,500 + feature_uri, + [min_frequency=8 %], + [feature_type=trees], + [nr_hits=false] + Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500 + [prediction_feature], + [feature_generation_uri], + [prediction_algorithm], + [feature_dataset_uri], + [pc_type=null], + [nr_hits=false (class. using wt. maj. vote), true (else)], + [min_sim=0.3 (nominal), 0.4 (numeric features)] + [min_train_performance=0.1] + + Create selected features POST /feature_selection/rfe dataset_uri, URI for dataset 200,400,404,500 + prediction_feature, + feature_dataset_uri, + [del_missing=false] + Synopsis -------- @@ -48,6 +60,7 @@ Synopsis - nr_hits: Whether nominal features should be instantiated with their occurrence counts in the instances. One of "true", "false". - min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1]. - min_train_performance. The minimum training performance for "local\_svm\_classification" (Accuracy) and "local\_svm\_regression" (R-squared). Numeric value in [0,1]. +- del_missing: one of true, false See http://www.maunz.de/wordpress/opentox/2011/lazar-models-and-how-to-trigger-them for a graphical overview. @@ -109,4 +122,9 @@ Creates a standard Lazar model. [API documentation](http://rdoc.info/github/opentox/algorithm) -------------------------------------------------------------- +* * * + +### Create a feature dataset of selected features + curl -X POST -d dataset_uri={dataset_uri} -d prediction_feature_uri={prediction_feature_uri} -d feature_dataset_uri={feature_dataset_uri} -d del_missing=true http://webservices.in-silico.ch/test/algorithm/feature_selection/rfe + Copyright (c) 2009-2011 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details. -- cgit v1.2.3 From c80b7ac28dce2df193fb61c042b0563f35b48012 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 9 Feb 2012 10:26:16 +0100 Subject: Fixed tempfile deletion --- feature_selection.rb | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/feature_selection.rb b/feature_selection.rb index efda812..d375a0e 100644 --- a/feature_selection.rb +++ b/feature_selection.rb @@ -71,9 +71,11 @@ post '/feature_selection/rfe/?' do task = OpenTox::Task.create("Recursive Feature Elimination", url_for('/feature_selection',:full)) do |task| r_result_file = OpenTox::Algorithm::FeatureSelection.rfe( { :ds_csv_file => tf_ds.path, :prediction_feature => prediction_feature, :fds_csv_file => tf_fds.path, :del_missing => del_missing } ) r_result_uri = OpenTox::Dataset.create_from_csv_file(r_result_file).uri - tf_ds.close!; tf_fds.close! - tf_ds.delete; tf_fds.delete - File.unlink(r_result_file) + begin + tf_ds.close!; tf_fds.close! + File.unlink(r_result_file) + rescue + end r_result_uri end response['Content-Type'] = 'text/uri-list' -- cgit v1.2.3