From 6374df2b69c039ffbc269a70198b94360b68e8de Mon Sep 17 00:00:00 2001 From: davor Date: Tue, 10 Jan 2012 08:31:20 +0100 Subject: Parameter clean-up propositionalized pc_type nr_hits min_sim now regular parameters (stored in metadata) Updated flowchart at http://www.maunz.de/wordpress/opentox/2011/lazar-models-and-how-to-trigger-them --- README.md | 21 +++++++++++---------- lazar.rb | 33 +++++++++++++++++++-------------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 8383cb6..e854ac3 100644 --- a/README.md +++ b/README.md @@ -31,22 +31,23 @@ REST operations [feature_type=trees], [nr_hits=false] Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500 - prediction_feature, - feature_generation_uri - prediction_algorithm - [local_svm_kernel=weighted_tanimoto] - [min_sim=0.3] - [nr_hits=false] - [conf_stdev=false] + [prediction_feature], + [feature_generation_uri], + [prediction_algorithm], + [feature_dataset_uri], + [propositionalized=false], + [pc_type=null], + [nr_hits=false (class.), true (regr.)], + [min_sim=0.3 (nominal), 0.6 (numeric features)] Synopsis -------- - prediction\_algorithm: One of "weighted\_majority\_vote" (default for classification), "local\_svm\_classification", "local\_svm\_regression (default for regression)", "local\_mlr\_prop". "weighted\_majority\_vote" is not applicable for regression. "local\_mlr\_prop" is not applicable for classification. -- local\_svm\_kernel: One of "weighted\_tanimoto", "propositionalized". local\_svm\_kernel is not appplicable when prediction\_algorithm="weighted\_majority\_vote". -- min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1]. +- propositionalized: One of "true", "false". Not appplicable when prediction\_algorithm="weighted\_majority\_vote". +- pc_type: Mandatory for feature dataset, one of [geometrical, topological, electronic, constitutional, hybrid, cpsa]. - nr_hits: Whether for instantiated models (local\_svm\_kernel = "propositionalized" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="local\_mlr\_prop") nominal features should be instantiated with their occurrence counts in the instances. For non-instantiated models (local\_svm\_kernel = "weighted\_tanimoto" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="weighted\_majority\_vote") the neighbor-to-neighbor and neighbor-to-query similarity also integrates these counts, when the parameter is set. One of "true", "false". -- conf_stdev: Whether confidence integrates distribution of neighbor activity values. When "true", the exp(-1.0*(standard deviation of neighbor activities)) is multiplied on the similarity. One of "true", "false". +- min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1]. See http://www.maunz.de/wordpress/opentox/2011/lazar-models-and-how-to-trigger-them for a graphical overview. diff --git a/lazar.rb b/lazar.rb index f69495c..6c2d8ed 100644 --- a/lazar.rb +++ b/lazar.rb @@ -12,9 +12,9 @@ get '/lazar/?' do OT.parameters => [ { DC.description => "Dataset URI with the dependent variable", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, { DC.description => "Feature URI for dependent variable. Optional for datasets with only a single feature.", OT.paramScope => "optional", DC.title => "prediction_feature" }, - { DC.description => "URI of feature genration service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" }, + { DC.description => "URI of feature generation service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" }, { DC.description => "URI of feature dataset. If this parameter is set no feature generation algorithm will be called", OT.paramScope => "optional", DC.title => "feature_dataset_uri" }, - { DC.description => "Further parameters for the feaature generation service", OT.paramScope => "optional" } + { DC.description => "Further parameters for the feature generation service", OT.paramScope => "optional" } ] } case request.env['HTTP_ACCEPT'] @@ -80,7 +80,6 @@ post '/lazar/?' do } # Regression: SVM, Substructure.match_hits elsif prediction_feature.feature_type == "regression" - #lazar.nr_hits = true # AM: Brauchen wir die Variable noch? Kann man an feature_calculation_algorithm auch ablesen (nĂchste Zeile) lazar.feature_calculation_algorithm = "Substructure.match_hits" lazar.prediction_algorithm = "Neighbors.local_svm_regression" end @@ -91,28 +90,28 @@ post '/lazar/?' do # # # USER VALUES # Min Sim - lazar.min_sim = params[:min_sim].to_f if params[:min_sim] + min_sim = params[:min_sim].to_f if params[:min_sim] + min_sim = 0.3 unless params[:min_sim] # Nr Hits - if params[:nr_hits] == "false" # if nr_hits is set optional to true/false it will return as String (but should be True/FalseClass) - #lazar.nr_hits = false - lazar.feature_calculation_algorithm = "Substructure.match" - elsif params[:nr_hits] == "true" - #lazar.nr_hits = true + nr_hits = false + if params[:nr_hits] == "true" lazar.feature_calculation_algorithm = "Substructure.match_hits" + nr_hits = true end params[:nr_hits] = "true" if lazar.feature_calculation_algorithm == "Substructure.match_hits" #not sure if this line in needed # Algorithm - lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? + lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" if params[:prediction_algorithm] # Propositionalization - lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") + propositionalized = false + propositionalized = true if (params[:propositionalized] == "true" || params[:prediction_algorithm] == "local_mlr_prop") # PC type - lazar.pc_type = params[:pc_type] unless params[:pc_type].nil? + pc_type = params[:pc_type] unless params[:pc_type].nil? - # Conf_stdev + # Conf_stdev --- To be removed?? lazar.conf_stdev = ( (params[:conf_stdev] == "true") ? true : false ) @@ -136,6 +135,8 @@ post '/lazar/?' do training_features = OpenTox::Dataset.new(feature_dataset_uri) if training_features.feature_type(@subjectid) == "regression" lazar.similarity_algorithm = "Similarity.cosine" + min_sim = 0.6 unless params[:min_sim] + raise OpenTox::NotFoundError.new "No pc_type parameter." unless params[:pc_type] end # Create Features @@ -234,7 +235,11 @@ post '/lazar/?' do lazar.metadata[OT.parameters] = [ {DC.title => "dataset_uri", OT.paramValue => dataset_uri}, {DC.title => "prediction_feature", OT.paramValue => prediction_feature.uri}, - {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri} + {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri}, + {DC.title => "propositionalized", OT.paramValue => propositionalized}, + {DC.title => "pc_type", OT.paramValue => pc_type}, + {DC.title => "nr_hits", OT.paramValue => nr_hits}, + {DC.title => "min_sim", OT.paramValue => min_sim} ] model_uri = lazar.save(@subjectid) -- cgit v1.2.3