From 4cca88212c23460d70bc37e851f62fc2961fde73 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 25 Oct 2011 12:37:47 +0000 Subject: bo:instanceOf added to algorithms --- fminer.rb | 4 +++- lazar.rb | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/fminer.rb b/fminer.rb index 94284db..50660fa 100644 --- a/fminer.rb +++ b/fminer.rb @@ -30,6 +30,7 @@ get "/fminer/bbrc/?" do DC.title => 'fminer backbone refinement class representatives', DC.creator => "andreas@maunz.de, helma@in-silico.ch", DC.contributor => "vorgrimmlerdavid@gmx.de", + BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_bbrc", RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, @@ -61,6 +62,7 @@ get "/fminer/last/?" do DC.title => 'fminer latent structure class representatives', DC.creator => "andreas@maunz.de, helma@in-silico.ch", DC.contributor => "vorgrimmlerdavid@gmx.de", + BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_last", RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, @@ -325,4 +327,4 @@ post '/fminer/last/?' do response['Content-Type'] = 'text/uri-list' raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" halt 202,task.uri.to_s+"\n" -end \ No newline at end of file +end diff --git a/lazar.rb b/lazar.rb index f4915a7..9750a56 100644 --- a/lazar.rb +++ b/lazar.rb @@ -8,6 +8,7 @@ get '/lazar/?' do DC.title => 'lazar', DC.creator => "helma@in-silico.ch, andreas@maunz.de", DC.contributor => "vorgrimmlerdavid@gmx.de", + BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#lazar", OT.parameters => [ { DC.description => "Dataset URI with the dependent variable", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, { DC.description => "Feature URI for dependent variable. Optional for datasets with only a single feature.", OT.paramScope => "optional", DC.title => "prediction_feature" }, -- cgit v1.2.3 From 3e57cfd6a551076089462e903823d215a6615e9c Mon Sep 17 00:00:00 2001 From: mguetlein Date: Fri, 28 Oct 2011 11:22:11 +0200 Subject: add subjectid param to to-html function call --- application.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/application.rb b/application.rb index 18a6c82..b62f6f5 100644 --- a/application.rb +++ b/application.rb @@ -26,7 +26,7 @@ get '/?' do case request.env['HTTP_ACCEPT'] when /text\/html/ content_type "text/html" - OpenTox.text_to_html list + OpenTox.text_to_html list,@subjectid else content_type 'text/uri-list' list -- cgit v1.2.3 From dce530c27024faf332f60c1ae0bc67f797336add Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 7 Nov 2011 12:57:22 +0100 Subject: Init to numeric branch --- last-utils | 2 +- lazar.rb | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- libfminer | 2 +- 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/last-utils b/last-utils index 04bd1b7..8c02f7e 160000 --- a/last-utils +++ b/last-utils @@ -1 +1 @@ -Subproject commit 04bd1b73f54bb7422d3c08bb5a81bc02af04f6ff +Subproject commit 8c02f7e71450cac6d8c5d7d34ecb620046b4ea46 diff --git a/lazar.rb b/lazar.rb index 9750a56..98734e4 100644 --- a/lazar.rb +++ b/lazar.rb @@ -63,6 +63,10 @@ post '/lazar/?' do lazar = OpenTox::Model::Lazar.new lazar.min_sim = params[:min_sim].to_f if params[:min_sim] + + + + # AM: Manage endpoint related variables. if prediction_feature.feature_type == "classification" @training_classes = training_activities.accept_values(prediction_feature.uri).sort @training_classes.each_with_index { |c,i| @@ -82,9 +86,23 @@ post '/lazar/?' do end params[:nr_hits] = "true" if lazar.nr_hits + + + + + task.progress 10 - if params[:feature_dataset_uri] + + + + # + # AM: features + # + # + + # READ OR CREATE + if params[:feature_dataset_uri] feature_dataset_uri = params[:feature_dataset_uri] training_features = OpenTox::Dataset.new(feature_dataset_uri) case training_features.feature_type(@subjectid) @@ -109,6 +127,9 @@ post '/lazar/?' do training_features = OpenTox::Dataset.new(feature_dataset_uri) end + + + # WRITE IN MODEL training_features.load_all(@subjectid) raise OpenTox::NotFoundError.new "Dataset #{feature_dataset_uri} not found." if training_features.nil? @@ -119,6 +140,8 @@ post '/lazar/?' do training_features.data_entries.each do |compound,entry| lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound] entry.keys.each do |feature| + + # CASE 1: Substructure if lazar.feature_calculation_algorithm == "Substructure.match" if training_features.features[feature] smarts = training_features.features[feature][OT.smarts] @@ -134,6 +157,8 @@ post '/lazar/?' do lazar.effects[smarts] = training_features.features[feature][OT.effect] end end + + # CASE 2: Others else case training_features.feature_type(@subjectid) when "classification" @@ -159,6 +184,15 @@ post '/lazar/?' do end task.progress 80 + + + + # + # AM: SETTINGS + # + # + # + # AM: allow settings override by user lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? if prediction_feature.feature_type == "regression" @@ -169,6 +203,19 @@ post '/lazar/?' do lazar.conf_stdev = false lazar.conf_stdev = true if params[:conf_stdev] == "true" + + + + + + + + # + # AM TRANSFORMATIONS + # + # + # + # AM: Feed Data using Transformations if prediction_feature.feature_type == "regression" transformed_acts = [] @@ -200,6 +247,14 @@ post '/lazar/?' do end task.progress 90 + + + # + # AM: Metadata + # + # + # + lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}" lazar.metadata[OT.dependentVariables] = prediction_feature.uri lazar.metadata[OT.trainingDataset] = dataset_uri diff --git a/libfminer b/libfminer index 07679a6..17932e8 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit 07679a625a7acad864fd3abd80654a1a0a61e690 +Subproject commit 17932e809c35c93374ed3d5fd19a313325c35b41 -- cgit v1.2.3 From 0cf433a84a050c4c2ab782d6e91603dc910c5484 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 7 Nov 2011 16:08:46 +0100 Subject: Nicer comments --- lazar.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lazar.rb b/lazar.rb index 98734e4..5ae6c9c 100644 --- a/lazar.rb +++ b/lazar.rb @@ -80,7 +80,6 @@ post '/lazar/?' do if params[:nr_hits] == "false" # if nr_hits is set optional to true/false it will return as String (but should be True/FalseClass) lazar.nr_hits = false - #params[:nr_hits] = false elsif params[:nr_hits] == "true" lazar.nr_hits = true end @@ -96,10 +95,12 @@ post '/lazar/?' do + # # AM: features # - # + # + # # READ OR CREATE if params[:feature_dataset_uri] @@ -187,6 +188,7 @@ post '/lazar/?' do + # # AM: SETTINGS # @@ -207,9 +209,6 @@ post '/lazar/?' do - - - # # AM TRANSFORMATIONS # @@ -249,6 +248,7 @@ post '/lazar/?' do + # # AM: Metadata # -- cgit v1.2.3 From 7c4e683f6a7d2996456dc09bad446933f072c07a Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 8 Nov 2011 13:43:10 +0100 Subject: Removed transformer --- README.md | 2 -- lazar.rb | 20 +++----------------- 2 files changed, 3 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index dacf1ec..8383cb6 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,6 @@ REST operations [local_svm_kernel=weighted_tanimoto] [min_sim=0.3] [nr_hits=false] - [activity_transform=] [conf_stdev=false] Synopsis @@ -47,7 +46,6 @@ Synopsis - local\_svm\_kernel: One of "weighted\_tanimoto", "propositionalized". local\_svm\_kernel is not appplicable when prediction\_algorithm="weighted\_majority\_vote". - min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1]. - nr_hits: Whether for instantiated models (local\_svm\_kernel = "propositionalized" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="local\_mlr\_prop") nominal features should be instantiated with their occurrence counts in the instances. For non-instantiated models (local\_svm\_kernel = "weighted\_tanimoto" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="weighted\_majority\_vote") the neighbor-to-neighbor and neighbor-to-query similarity also integrates these counts, when the parameter is set. One of "true", "false". -- activity_transform: Normalizing transformations of the y-values (activities), applicable only to regression problems. One of "Log10", "Inverter", "NOP". "Log10" moves all values above zero and takes the log to base 10. "Inverter" moves all values above 1.0 and takes the inverted value. "NOP" is the identity transformation, which does nothing. Model predictions are output with reverse transformation applied. - conf_stdev: Whether confidence integrates distribution of neighbor activity values. When "true", the exp(-1.0*(standard deviation of neighbor activities)) is multiplied on the similarity. One of "true", "false". See http://www.maunz.de/wordpress/opentox/2011/lazar-models-and-how-to-trigger-them for a graphical overview. diff --git a/lazar.rb b/lazar.rb index 5ae6c9c..9aac0d8 100644 --- a/lazar.rb +++ b/lazar.rb @@ -197,10 +197,6 @@ post '/lazar/?' do # AM: allow settings override by user lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? - if prediction_feature.feature_type == "regression" - lazar.transform["class"] = "Log10" if lazar.transform["class"] == "NOP" - end - lazar.transform["class"] = params[:activity_transform] unless params[:activity_transform].nil? lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") lazar.conf_stdev = false lazar.conf_stdev = true if params[:conf_stdev] == "true" @@ -210,27 +206,17 @@ post '/lazar/?' do # - # AM TRANSFORMATIONS + # AM: Feed data # # # - - # AM: Feed Data using Transformations + if prediction_feature.feature_type == "regression" - transformed_acts = [] - training_activities.data_entries.each do |compound,entry| - transformed_acts.concat entry[prediction_feature.uri] unless entry[prediction_feature.uri].empty? - end - transformer = eval "OpenTox::Algorithm::Transform::#{lazar.transform["class"]}.new(transformed_acts)" - transformed_acts = transformer.values - lazar.transform["offset"] = transformer.offset - t_count=0 training_activities.data_entries.each do |compound,entry| lazar.activities[compound] = [] unless lazar.activities[compound] unless entry[prediction_feature.uri].empty? entry[prediction_feature.uri].each do |value| - lazar.activities[compound] << transformed_acts[t_count].to_s - t_count+=1 + lazar.activities[compound] << value end end end -- cgit v1.2.3 From d9b9c012daf50f02ec756b7677a1b3313e8ef9a8 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 15 Nov 2011 10:31:43 +0100 Subject: Added completion of features --- fminer.rb | 53 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/fminer.rb b/fminer.rb index 50660fa..0637cb2 100644 --- a/fminer.rb +++ b/fminer.rb @@ -33,13 +33,13 @@ get "/fminer/bbrc/?" do BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_bbrc", RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], OT.parameters => [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, - { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" }, - { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, - { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" }, - { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" }, - ] + { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, + { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, + { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" }, + { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, + { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" }, + { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" }, + ] } case request.env['HTTP_ACCEPT'] when /text\/html/ @@ -65,12 +65,12 @@ get "/fminer/last/?" do BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_last", RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], OT.parameters => [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, - { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" }, - { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, - { DC.description => "Maximum number of hops", OT.paramScope => "optional", DC.title => "hops" }, - ] + { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, + { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, + { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" }, + { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, + { DC.description => "Maximum number of hops", OT.paramScope => "optional", DC.title => "hops" }, + ] } case request.env['HTTP_ACCEPT'] when /text\/html/ @@ -126,7 +126,7 @@ post '/fminer/bbrc/?' do OT.parameters => [ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - ] + ] }) feature_dataset.save(@subjectid) @@ -140,12 +140,12 @@ post '/fminer/bbrc/?' do g_array=fminer.all_activities.values # DV: calculation of global median for effect calculation g_median=g_array.to_scale.median - + raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0 task.progress 10 step_width = 80 / @@bbrc.GetNoRootNodes().to_f features = Set.new - + # run @@bbrc (0 .. @@bbrc.GetNoRootNodes()-1).each do |j| results = @@bbrc.MineRoot(j) @@ -187,7 +187,7 @@ post '/fminer/bbrc/?' do OT.parameters => [ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - ] + ] } feature_dataset.add_feature feature_uri, metadata #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters @@ -201,8 +201,13 @@ post '/fminer/bbrc/?' do feature_dataset.add(fminer.compounds[id], feature_uri, 1) end } - end - end + + end # end of + end # feature parsing + + # add feature values for non-present features + feature_dataset.complete_data_entries + feature_dataset.save(@subjectid) feature_dataset.uri end @@ -252,7 +257,7 @@ post '/fminer/last/?' do OT.parameters => [ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - ] + ] }) feature_dataset.save(@subjectid) @@ -263,7 +268,7 @@ post '/fminer/last/?' do # Add data to fminer fminer.add_fminer_data(@@last, params, @value_map) - + raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0 # run @@last @@ -311,7 +316,7 @@ post '/fminer/last/?' do OT.parameters => [ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } - ] + ] } feature_dataset.add_feature feature_uri, metadata end @@ -321,6 +326,10 @@ post '/fminer/last/?' do ids.each_with_index { |id,i| feature_dataset.add(fminer.compounds[id], feature_uri, counts[smarts][i])} end end + + # add feature values for non-present features + feature_dataset.complete_data_entries + feature_dataset.save(@subjectid) feature_dataset.uri end -- cgit v1.2.3 From 0553eddba202ae481a1cdc3b7cc59002c4777ad4 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 15 Nov 2011 12:24:41 +0100 Subject: Commented out previous commit --- fminer.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fminer.rb b/fminer.rb index 0637cb2..0739b62 100644 --- a/fminer.rb +++ b/fminer.rb @@ -205,8 +205,8 @@ post '/fminer/bbrc/?' do end # end of end # feature parsing - # add feature values for non-present features - feature_dataset.complete_data_entries + # AM: add feature values for non-present features + # feature_dataset.complete_data_entries feature_dataset.save(@subjectid) feature_dataset.uri @@ -327,8 +327,8 @@ post '/fminer/last/?' do end end - # add feature values for non-present features - feature_dataset.complete_data_entries + # AM: add feature values for non-present features + # feature_dataset.complete_data_entries feature_dataset.save(@subjectid) feature_dataset.uri -- cgit v1.2.3 From 778139bd41d65ae9dab4302115ba37c02e48bb0f Mon Sep 17 00:00:00 2001 From: davor Date: Thu, 22 Dec 2011 16:08:24 +0100 Subject: Simplified nr_hits and add Substructure.match_hits --- lazar.rb | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/lazar.rb b/lazar.rb index 9aac0d8..0d6b704 100644 --- a/lazar.rb +++ b/lazar.rb @@ -75,13 +75,16 @@ post '/lazar/?' do } elsif prediction_feature.feature_type == "regression" lazar.nr_hits = true + lazar.feature_calculation_algorithm = "Substructure.match_hits" lazar.prediction_algorithm = "Neighbors.local_svm_regression" end if params[:nr_hits] == "false" # if nr_hits is set optional to true/false it will return as String (but should be True/FalseClass) lazar.nr_hits = false + lazar.feature_calculation_algorithm = "Substructure.match" elsif params[:nr_hits] == "true" lazar.nr_hits = true + lazar.feature_calculation_algorithm = "Substructure.match_hits" end params[:nr_hits] = "true" if lazar.nr_hits @@ -114,11 +117,11 @@ post '/lazar/?' do end else # create features params[:feature_generation_uri] = feature_generation_uri - if feature_generation_uri.match(/fminer/) - lazar.feature_calculation_algorithm = "Substructure.match" - else - raise OpenTox::NotFoundError.new "External feature generation services not yet supported" - end + #if feature_generation_uri.match(/fminer/) + # lazar.feature_calculation_algorithm = "Substructure.match" + #else + # raise OpenTox::NotFoundError.new "External feature generation services not yet supported" + #end params[:subjectid] = @subjectid prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid if prediction_feature.feature_type == "regression" && feature_generation_uri.match(/fminer/) @@ -136,21 +139,21 @@ post '/lazar/?' do # sorted features for index lookups - lazar.features = training_features.features.sort if prediction_feature.feature_type == "regression" and lazar.feature_calculation_algorithm != "Substructure.match" + lazar.features = training_features.features.sort if prediction_feature.feature_type == "regression" and lazar.feature_calculation_algorithm != "Substructure.match" || "Substructure.match_hits" training_features.data_entries.each do |compound,entry| lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound] entry.keys.each do |feature| # CASE 1: Substructure - if lazar.feature_calculation_algorithm == "Substructure.match" + if lazar.feature_calculation_algorithm == "Substructure.match" || "Substructure.match_hits" if training_features.features[feature] smarts = training_features.features[feature][OT.smarts] #lazar.fingerprints[compound] << smarts if params[:nr_hits] - lazar.fingerprints[compound][smarts] = entry[feature].flatten.first + lazar.fingerprints[compound][smarts] = entry[feature].flatten.first * training_features.features[feature][OT.pValue] else - lazar.fingerprints[compound][smarts] = 1 + lazar.fingerprints[compound][smarts] = 1 * training_features.features[feature][OT.pValue] end unless lazar.features.include? smarts lazar.features << smarts -- cgit v1.2.3 From 3b74791450ecc1bd89f6ddf84fd1976403188231 Mon Sep 17 00:00:00 2001 From: davor Date: Fri, 23 Dec 2011 11:06:58 +0100 Subject: Fixed Bug and improving clarity --- lazar.rb | 129 +++++++++++++++++++++++++-------------------------------------- 1 file changed, 52 insertions(+), 77 deletions(-) diff --git a/lazar.rb b/lazar.rb index 0d6b704..89e66c6 100644 --- a/lazar.rb +++ b/lazar.rb @@ -45,40 +45,55 @@ post '/lazar/?' do task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| + + # # # Dataset present, prediction feature present? raise OpenTox::NotFoundError.new "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri) training_activities.load_all(@subjectid) + # Prediction Feature prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid) unless params[:prediction_feature] # try to read prediction_feature from dataset raise OpenTox::NotFoundError.new "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1 prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid) params[:prediction_feature] = prediction_feature.uri # pass to feature mining service end + raise OpenTox::NotFoundError.new "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+ training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri) - feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri] - - raise OpenTox::NotFoundError.new "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+ - training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri) + # Feature Generation URI + feature_generation_uri = @@feature_generation_default unless ( (feature_generation_uri = params[:feature_generation_uri]) || (params[:feature_dataset_uri]) ) + # Create instance lazar = OpenTox::Model::Lazar.new - lazar.min_sim = params[:min_sim].to_f if params[:min_sim] - # AM: Manage endpoint related variables. + # # # ENDPOINT RELATED + + # Default Values + # Classification: Weighted Majority, Substructure.match if prediction_feature.feature_type == "classification" @training_classes = training_activities.accept_values(prediction_feature.uri).sort @training_classes.each_with_index { |c,i| lazar.value_map[i+1] = c # don't use '0': we must take the weighted mean later. params[:value_map] = lazar.value_map } + # Regression: SVM, Substructure.match_hits elsif prediction_feature.feature_type == "regression" - lazar.nr_hits = true + lazar.nr_hits = true # AM: Brauchen wir die Variable noch? Kann man an feature_calculation_algorithm auch ablesen (nĂchste Zeile) lazar.feature_calculation_algorithm = "Substructure.match_hits" lazar.prediction_algorithm = "Neighbors.local_svm_regression" end + + + + # # # USER VALUES + + # Min Sim + lazar.min_sim = params[:min_sim].to_f if params[:min_sim] + + # Nr Hits if params[:nr_hits] == "false" # if nr_hits is set optional to true/false it will return as String (but should be True/FalseClass) lazar.nr_hits = false lazar.feature_calculation_algorithm = "Substructure.match" @@ -86,7 +101,18 @@ post '/lazar/?' do lazar.nr_hits = true lazar.feature_calculation_algorithm = "Substructure.match_hits" end - params[:nr_hits] = "true" if lazar.nr_hits + params[:nr_hits] = "true" if lazar.nr_hits + + # Algorithm + lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? + + # Propositionalization + lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") + + # Conf_stdev + lazar.conf_stdev = ( (params[:conf_stdev] == "true") ? true : false ) + + @@ -99,29 +125,20 @@ post '/lazar/?' do - # - # AM: features - # - # - # + # # # Features - # READ OR CREATE + # Read Features if params[:feature_dataset_uri] + lazar.feature_calculation_algorithm = "" # TODO: Implement lookup in feature dataset feature_dataset_uri = params[:feature_dataset_uri] training_features = OpenTox::Dataset.new(feature_dataset_uri) - case training_features.feature_type(@subjectid) - when "classification" - lazar.similarity_algorithm = "Similarity.tanimoto" - when "regression" - lazar.similarity_algorithm = "Similarity.euclid" + if training_features.feature_type(@subjectid) == "regression" + lazar.similarity_algorithm = "Similarity.cosine" end - else # create features + + # Create Features + else params[:feature_generation_uri] = feature_generation_uri - #if feature_generation_uri.match(/fminer/) - # lazar.feature_calculation_algorithm = "Substructure.match" - #else - # raise OpenTox::NotFoundError.new "External feature generation services not yet supported" - #end params[:subjectid] = @subjectid prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid if prediction_feature.feature_type == "regression" && feature_generation_uri.match(/fminer/) @@ -133,20 +150,16 @@ post '/lazar/?' do - # WRITE IN MODEL + # # # Write fingerprints training_features.load_all(@subjectid) raise OpenTox::NotFoundError.new "Dataset #{feature_dataset_uri} not found." if training_features.nil? - # sorted features for index lookups - - lazar.features = training_features.features.sort if prediction_feature.feature_type == "regression" and lazar.feature_calculation_algorithm != "Substructure.match" || "Substructure.match_hits" - training_features.data_entries.each do |compound,entry| lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound] entry.keys.each do |feature| # CASE 1: Substructure - if lazar.feature_calculation_algorithm == "Substructure.match" || "Substructure.match_hits" + if lazar.feature_calculation_algorithm == "Substructure.match" || lazar.feature_calculation_algorithm == "Substructure.match_hits" if training_features.features[feature] smarts = training_features.features[feature][OT.smarts] #lazar.fingerprints[compound] << smarts @@ -163,26 +176,11 @@ post '/lazar/?' do end # CASE 2: Others + elsif entry[feature].flatten.size == 1 + lazar.fingerprints[compound][feature] = entry[feature].flatten.first + lazar.features << feature unless lazar.features.include? feature else - case training_features.feature_type(@subjectid) - when "classification" - # fingerprints are sets - if entry[feature].flatten.size == 1 - #lazar.fingerprints[compound] << feature if entry[feature].flatten.first.to_s.match(TRUE_REGEXP) - lazar.fingerprints[compound][feature] = entry[feature].flatten.first if entry[feature].flatten.first.to_s.match(TRUE_REGEXP) - lazar.features << feature unless lazar.features.include? feature - else - LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" - end - when "regression" - # fingerprints are arrays - if entry[feature].flatten.size == 1 - lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first - #lazar.fingerprints[compound][feature] = entry[feature].flatten.first - else - LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" - end - end + LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" end end end @@ -191,28 +189,8 @@ post '/lazar/?' do - - # - # AM: SETTINGS - # - # - # - - # AM: allow settings override by user - lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? - lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") - lazar.conf_stdev = false - lazar.conf_stdev = true if params[:conf_stdev] == "true" - - - - - - # - # AM: Feed data - # - # - # + + # # # Activities if prediction_feature.feature_type == "regression" training_activities.data_entries.each do |compound,entry| @@ -238,11 +216,7 @@ post '/lazar/?' do - # - # AM: Metadata - # - # - # + # Metadata lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}" lazar.metadata[OT.dependentVariables] = prediction_feature.uri @@ -264,6 +238,7 @@ post '/lazar/?' do model_uri = lazar.save(@subjectid) LOGGER.info model_uri + " created #{Time.now}" model_uri + end response['Content-Type'] = 'text/uri-list' raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" -- cgit v1.2.3 From a74fc0029e810d699b94e21c9fe922d66a4d5c4f Mon Sep 17 00:00:00 2001 From: davor Date: Fri, 23 Dec 2011 14:10:53 +0100 Subject: Removed nr_hits --- lazar.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lazar.rb b/lazar.rb index 89e66c6..f61be83 100644 --- a/lazar.rb +++ b/lazar.rb @@ -80,7 +80,7 @@ post '/lazar/?' do } # Regression: SVM, Substructure.match_hits elsif prediction_feature.feature_type == "regression" - lazar.nr_hits = true # AM: Brauchen wir die Variable noch? Kann man an feature_calculation_algorithm auch ablesen (nĂchste Zeile) + #lazar.nr_hits = true # AM: Brauchen wir die Variable noch? Kann man an feature_calculation_algorithm auch ablesen (nĂchste Zeile) lazar.feature_calculation_algorithm = "Substructure.match_hits" lazar.prediction_algorithm = "Neighbors.local_svm_regression" end @@ -95,13 +95,13 @@ post '/lazar/?' do # Nr Hits if params[:nr_hits] == "false" # if nr_hits is set optional to true/false it will return as String (but should be True/FalseClass) - lazar.nr_hits = false + #lazar.nr_hits = false lazar.feature_calculation_algorithm = "Substructure.match" elsif params[:nr_hits] == "true" - lazar.nr_hits = true + #lazar.nr_hits = true lazar.feature_calculation_algorithm = "Substructure.match_hits" end - params[:nr_hits] = "true" if lazar.nr_hits + params[:nr_hits] = "true" if lazar.feature_calculation_algorithm = "Substructure.match_hits" #not sure if this line in needed # Algorithm lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? @@ -163,7 +163,7 @@ post '/lazar/?' do if training_features.features[feature] smarts = training_features.features[feature][OT.smarts] #lazar.fingerprints[compound] << smarts - if params[:nr_hits] + if lazar.feature_calculation_algorithm == "Substructure.match_hits" lazar.fingerprints[compound][smarts] = entry[feature].flatten.first * training_features.features[feature][OT.pValue] else lazar.fingerprints[compound][smarts] = 1 * training_features.features[feature][OT.pValue] -- cgit v1.2.3 From 80bc28fccde34d9dcfe1621e466cf0930158591f Mon Sep 17 00:00:00 2001 From: davor Date: Fri, 23 Dec 2011 15:24:56 +0100 Subject: Fixed bug. --- lazar.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lazar.rb b/lazar.rb index f61be83..d66312e 100644 --- a/lazar.rb +++ b/lazar.rb @@ -101,7 +101,7 @@ post '/lazar/?' do #lazar.nr_hits = true lazar.feature_calculation_algorithm = "Substructure.match_hits" end - params[:nr_hits] = "true" if lazar.feature_calculation_algorithm = "Substructure.match_hits" #not sure if this line in needed + params[:nr_hits] = "true" if lazar.feature_calculation_algorithm == "Substructure.match_hits" #not sure if this line in needed # Algorithm lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? -- cgit v1.2.3 From 2e7ff3936adfea4ad4bc456a13b2c2fed0ad581b Mon Sep 17 00:00:00 2001 From: davor Date: Fri, 23 Dec 2011 16:54:35 +0100 Subject: Added pc_type to model --- lazar.rb | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lazar.rb b/lazar.rb index d66312e..f69495c 100644 --- a/lazar.rb +++ b/lazar.rb @@ -108,11 +108,13 @@ post '/lazar/?' do # Propositionalization lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") + + # PC type + lazar.pc_type = params[:pc_type] unless params[:pc_type].nil? # Conf_stdev lazar.conf_stdev = ( (params[:conf_stdev] == "true") ? true : false ) - - + @@ -129,7 +131,7 @@ post '/lazar/?' do # Read Features if params[:feature_dataset_uri] - lazar.feature_calculation_algorithm = "" # TODO: Implement lookup in feature dataset + lazar.feature_calculation_algorithm = "Substructure.lookup" feature_dataset_uri = params[:feature_dataset_uri] training_features = OpenTox::Dataset.new(feature_dataset_uri) if training_features.feature_type(@subjectid) == "regression" -- cgit v1.2.3 From 6374df2b69c039ffbc269a70198b94360b68e8de Mon Sep 17 00:00:00 2001 From: davor Date: Tue, 10 Jan 2012 08:31:20 +0100 Subject: Parameter clean-up propositionalized pc_type nr_hits min_sim now regular parameters (stored in metadata) Updated flowchart at http://www.maunz.de/wordpress/opentox/2011/lazar-models-and-how-to-trigger-them --- README.md | 21 +++++++++++---------- lazar.rb | 33 +++++++++++++++++++-------------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 8383cb6..e854ac3 100644 --- a/README.md +++ b/README.md @@ -31,22 +31,23 @@ REST operations [feature_type=trees], [nr_hits=false] Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500 - prediction_feature, - feature_generation_uri - prediction_algorithm - [local_svm_kernel=weighted_tanimoto] - [min_sim=0.3] - [nr_hits=false] - [conf_stdev=false] + [prediction_feature], + [feature_generation_uri], + [prediction_algorithm], + [feature_dataset_uri], + [propositionalized=false], + [pc_type=null], + [nr_hits=false (class.), true (regr.)], + [min_sim=0.3 (nominal), 0.6 (numeric features)] Synopsis -------- - prediction\_algorithm: One of "weighted\_majority\_vote" (default for classification), "local\_svm\_classification", "local\_svm\_regression (default for regression)", "local\_mlr\_prop". "weighted\_majority\_vote" is not applicable for regression. "local\_mlr\_prop" is not applicable for classification. -- local\_svm\_kernel: One of "weighted\_tanimoto", "propositionalized". local\_svm\_kernel is not appplicable when prediction\_algorithm="weighted\_majority\_vote". -- min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1]. +- propositionalized: One of "true", "false". Not appplicable when prediction\_algorithm="weighted\_majority\_vote". +- pc_type: Mandatory for feature dataset, one of [geometrical, topological, electronic, constitutional, hybrid, cpsa]. - nr_hits: Whether for instantiated models (local\_svm\_kernel = "propositionalized" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="local\_mlr\_prop") nominal features should be instantiated with their occurrence counts in the instances. For non-instantiated models (local\_svm\_kernel = "weighted\_tanimoto" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="weighted\_majority\_vote") the neighbor-to-neighbor and neighbor-to-query similarity also integrates these counts, when the parameter is set. One of "true", "false". -- conf_stdev: Whether confidence integrates distribution of neighbor activity values. When "true", the exp(-1.0*(standard deviation of neighbor activities)) is multiplied on the similarity. One of "true", "false". +- min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1]. See http://www.maunz.de/wordpress/opentox/2011/lazar-models-and-how-to-trigger-them for a graphical overview. diff --git a/lazar.rb b/lazar.rb index f69495c..6c2d8ed 100644 --- a/lazar.rb +++ b/lazar.rb @@ -12,9 +12,9 @@ get '/lazar/?' do OT.parameters => [ { DC.description => "Dataset URI with the dependent variable", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, { DC.description => "Feature URI for dependent variable. Optional for datasets with only a single feature.", OT.paramScope => "optional", DC.title => "prediction_feature" }, - { DC.description => "URI of feature genration service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" }, + { DC.description => "URI of feature generation service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" }, { DC.description => "URI of feature dataset. If this parameter is set no feature generation algorithm will be called", OT.paramScope => "optional", DC.title => "feature_dataset_uri" }, - { DC.description => "Further parameters for the feaature generation service", OT.paramScope => "optional" } + { DC.description => "Further parameters for the feature generation service", OT.paramScope => "optional" } ] } case request.env['HTTP_ACCEPT'] @@ -80,7 +80,6 @@ post '/lazar/?' do } # Regression: SVM, Substructure.match_hits elsif prediction_feature.feature_type == "regression" - #lazar.nr_hits = true # AM: Brauchen wir die Variable noch? Kann man an feature_calculation_algorithm auch ablesen (nĂchste Zeile) lazar.feature_calculation_algorithm = "Substructure.match_hits" lazar.prediction_algorithm = "Neighbors.local_svm_regression" end @@ -91,28 +90,28 @@ post '/lazar/?' do # # # USER VALUES # Min Sim - lazar.min_sim = params[:min_sim].to_f if params[:min_sim] + min_sim = params[:min_sim].to_f if params[:min_sim] + min_sim = 0.3 unless params[:min_sim] # Nr Hits - if params[:nr_hits] == "false" # if nr_hits is set optional to true/false it will return as String (but should be True/FalseClass) - #lazar.nr_hits = false - lazar.feature_calculation_algorithm = "Substructure.match" - elsif params[:nr_hits] == "true" - #lazar.nr_hits = true + nr_hits = false + if params[:nr_hits] == "true" lazar.feature_calculation_algorithm = "Substructure.match_hits" + nr_hits = true end params[:nr_hits] = "true" if lazar.feature_calculation_algorithm == "Substructure.match_hits" #not sure if this line in needed # Algorithm - lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? + lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" if params[:prediction_algorithm] # Propositionalization - lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") + propositionalized = false + propositionalized = true if (params[:propositionalized] == "true" || params[:prediction_algorithm] == "local_mlr_prop") # PC type - lazar.pc_type = params[:pc_type] unless params[:pc_type].nil? + pc_type = params[:pc_type] unless params[:pc_type].nil? - # Conf_stdev + # Conf_stdev --- To be removed?? lazar.conf_stdev = ( (params[:conf_stdev] == "true") ? true : false ) @@ -136,6 +135,8 @@ post '/lazar/?' do training_features = OpenTox::Dataset.new(feature_dataset_uri) if training_features.feature_type(@subjectid) == "regression" lazar.similarity_algorithm = "Similarity.cosine" + min_sim = 0.6 unless params[:min_sim] + raise OpenTox::NotFoundError.new "No pc_type parameter." unless params[:pc_type] end # Create Features @@ -234,7 +235,11 @@ post '/lazar/?' do lazar.metadata[OT.parameters] = [ {DC.title => "dataset_uri", OT.paramValue => dataset_uri}, {DC.title => "prediction_feature", OT.paramValue => prediction_feature.uri}, - {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri} + {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri}, + {DC.title => "propositionalized", OT.paramValue => propositionalized}, + {DC.title => "pc_type", OT.paramValue => pc_type}, + {DC.title => "nr_hits", OT.paramValue => nr_hits}, + {DC.title => "min_sim", OT.paramValue => min_sim} ] model_uri = lazar.save(@subjectid) -- cgit v1.2.3 From 33d9217423a9b724bd707ffa03edc5b1482a4bc3 Mon Sep 17 00:00:00 2001 From: ot7 Date: Tue, 10 Jan 2012 10:54:07 +0100 Subject: Fixed bug --- lazar.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lazar.rb b/lazar.rb index 6c2d8ed..5182200 100644 --- a/lazar.rb +++ b/lazar.rb @@ -162,7 +162,7 @@ post '/lazar/?' do entry.keys.each do |feature| # CASE 1: Substructure - if lazar.feature_calculation_algorithm == "Substructure.match" || lazar.feature_calculation_algorithm == "Substructure.match_hits" + if ((lazar.feature_calculation_algorithm == "Substructure.match") || (lazar.feature_calculation_algorithm == "Substructure.match_hits")) if training_features.features[feature] smarts = training_features.features[feature][OT.smarts] #lazar.fingerprints[compound] << smarts -- cgit v1.2.3 From b462f330bfacff53efe21d0d803d0102110c65df Mon Sep 17 00:00:00 2001 From: davor Date: Fri, 13 Jan 2012 12:17:51 +0100 Subject: Fix: Adding fingerprint *only when activity exists* --- lazar.rb | 53 +++++++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/lazar.rb b/lazar.rb index 6c2d8ed..242c20f 100644 --- a/lazar.rb +++ b/lazar.rb @@ -158,33 +158,38 @@ post '/lazar/?' do raise OpenTox::NotFoundError.new "Dataset #{feature_dataset_uri} not found." if training_features.nil? training_features.data_entries.each do |compound,entry| - lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound] - entry.keys.each do |feature| - - # CASE 1: Substructure - if lazar.feature_calculation_algorithm == "Substructure.match" || lazar.feature_calculation_algorithm == "Substructure.match_hits" - if training_features.features[feature] - smarts = training_features.features[feature][OT.smarts] - #lazar.fingerprints[compound] << smarts - if lazar.feature_calculation_algorithm == "Substructure.match_hits" - lazar.fingerprints[compound][smarts] = entry[feature].flatten.first * training_features.features[feature][OT.pValue] - else - lazar.fingerprints[compound][smarts] = 1 * training_features.features[feature][OT.pValue] - end - unless lazar.features.include? smarts - lazar.features << smarts - lazar.p_values[smarts] = training_features.features[feature][OT.pValue] - lazar.effects[smarts] = training_features.features[feature][OT.effect] + + if training_activities.data_entries.has_key? compound + + lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound] + entry.keys.each do |feature| + + # CASE 1: Substructure + if lazar.feature_calculation_algorithm == "Substructure.match" || lazar.feature_calculation_algorithm == "Substructure.match_hits" + if training_features.features[feature] + smarts = training_features.features[feature][OT.smarts] + #lazar.fingerprints[compound] << smarts + if lazar.feature_calculation_algorithm == "Substructure.match_hits" + lazar.fingerprints[compound][smarts] = entry[feature].flatten.first * training_features.features[feature][OT.pValue] + else + lazar.fingerprints[compound][smarts] = 1 * training_features.features[feature][OT.pValue] + end + unless lazar.features.include? smarts + lazar.features << smarts + lazar.p_values[smarts] = training_features.features[feature][OT.pValue] + lazar.effects[smarts] = training_features.features[feature][OT.effect] + end end - end - # CASE 2: Others - elsif entry[feature].flatten.size == 1 - lazar.fingerprints[compound][feature] = entry[feature].flatten.first - lazar.features << feature unless lazar.features.include? feature - else - LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + # CASE 2: Others + elsif entry[feature].flatten.size == 1 + lazar.fingerprints[compound][feature] = entry[feature].flatten.first + lazar.features << feature unless lazar.features.include? feature + else + LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + end end + end end task.progress 80 -- cgit v1.2.3 From c19b6a19c8ef0207824b038fade096af84649ea9 Mon Sep 17 00:00:00 2001 From: davor Date: Sun, 15 Jan 2012 15:52:18 +0100 Subject: Lowered sim threshold for cosine similarity to 0.4 --- lazar.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lazar.rb b/lazar.rb index 145565a..bd3c934 100644 --- a/lazar.rb +++ b/lazar.rb @@ -135,7 +135,7 @@ post '/lazar/?' do training_features = OpenTox::Dataset.new(feature_dataset_uri) if training_features.feature_type(@subjectid) == "regression" lazar.similarity_algorithm = "Similarity.cosine" - min_sim = 0.6 unless params[:min_sim] + min_sim = 0.4 unless params[:min_sim] raise OpenTox::NotFoundError.new "No pc_type parameter." unless params[:pc_type] end @@ -164,7 +164,7 @@ post '/lazar/?' do entry.keys.each do |feature| # CASE 1: Substructure - if lazar.feature_calculation_algorithm == "Substructure.match" || lazar.feature_calculation_algorithm == "Substructure.match_hits" + if (lazar.feature_calculation_algorithm == "Substructure.match") || (lazar.feature_calculation_algorithm == "Substructure.match_hits") if training_features.features[feature] smarts = training_features.features[feature][OT.smarts] #lazar.fingerprints[compound] << smarts -- cgit v1.2.3 From 05a39eba3d8f683f2f16884531f83e3a3f8c3938 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 19 Jan 2012 15:47:43 +0100 Subject: Making Prop default for SVM --- lazar.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lazar.rb b/lazar.rb index bd3c934..f2634ab 100644 --- a/lazar.rb +++ b/lazar.rb @@ -106,7 +106,7 @@ post '/lazar/?' do # Propositionalization propositionalized = false - propositionalized = true if (params[:propositionalized] == "true" || params[:prediction_algorithm] == "local_mlr_prop") + propositionalized = true if (params[:propositionalized] != "false" && ( params[:prediction_algorithm] == "local_mlr_prop" || params[:prediction_algorithm].include? "local_svm" ) ) # PC type pc_type = params[:pc_type] unless params[:pc_type].nil? -- cgit v1.2.3 From 10d5ece8b46abf72ab9c475d08c1d238f4079e39 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 19 Jan 2012 16:19:28 +0100 Subject: Fix to previous commit --- lazar.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lazar.rb b/lazar.rb index f2634ab..7edbb97 100644 --- a/lazar.rb +++ b/lazar.rb @@ -106,7 +106,13 @@ post '/lazar/?' do # Propositionalization propositionalized = false - propositionalized = true if (params[:propositionalized] != "false" && ( params[:prediction_algorithm] == "local_mlr_prop" || params[:prediction_algorithm].include? "local_svm" ) ) + propositionalized = true if ( params[:propositionalized] != "false" && + ( params[:prediction_algorithm] == "local_mlr_prop" || + ( params[:prediction_algorithm] && + params[:prediction_algorithm].include?("local_svm") + ) + ) + ) # PC type pc_type = params[:pc_type] unless params[:pc_type].nil? -- cgit v1.2.3 From ff279abf4d148b2708a7e2e1b7ee3df5b4387763 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 20 Jan 2012 17:19:18 +0100 Subject: Add new parameter, min_train_performance, which controls censoring in local_svm and local_svm_prop --- lazar.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lazar.rb b/lazar.rb index 7edbb97..8900f3a 100644 --- a/lazar.rb +++ b/lazar.rb @@ -117,6 +117,10 @@ post '/lazar/?' do # PC type pc_type = params[:pc_type] unless params[:pc_type].nil? + # Min train performance + min_train_performance = params[:min_train_performance].to_f if params[:min_train_performance] + min_train_performance = 0.1 unless params[:min_train_performance] + # Conf_stdev --- To be removed?? lazar.conf_stdev = ( (params[:conf_stdev] == "true") ? true : false ) @@ -249,7 +253,9 @@ post '/lazar/?' do {DC.title => "propositionalized", OT.paramValue => propositionalized}, {DC.title => "pc_type", OT.paramValue => pc_type}, {DC.title => "nr_hits", OT.paramValue => nr_hits}, - {DC.title => "min_sim", OT.paramValue => min_sim} + {DC.title => "min_sim", OT.paramValue => min_sim}, + {DC.title => "min_train_performance", OT.paramValue => min_train_performance}, + ] model_uri = lazar.save(@subjectid) -- cgit v1.2.3 From 6b9482101ff26b3e31cf145c4786ada56923d5f7 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 23 Jan 2012 08:08:31 +0100 Subject: Fixed bug in lazar.rb: propositionalized not set correctly --- lazar.rb | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/lazar.rb b/lazar.rb index 8900f3a..65752ec 100644 --- a/lazar.rb +++ b/lazar.rb @@ -106,13 +106,7 @@ post '/lazar/?' do # Propositionalization propositionalized = false - propositionalized = true if ( params[:propositionalized] != "false" && - ( params[:prediction_algorithm] == "local_mlr_prop" || - ( params[:prediction_algorithm] && - params[:prediction_algorithm].include?("local_svm") - ) - ) - ) + propositionalized = true if ( params[:propositionalized] != "false" && ( lazar.prediction_algorithm == "local_mlr_prop" || lazar.prediction_algorithm.include?("local_svm") ) ) # PC type pc_type = params[:pc_type] unless params[:pc_type].nil? -- cgit v1.2.3 From 8035d64e9f1c9d49d1ec947204f78534ecc21e64 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 30 Jan 2012 13:18:13 +0100 Subject: Prop always on, removed conf_stdev --- lazar.rb | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lazar.rb b/lazar.rb index 65752ec..6fd12ef 100644 --- a/lazar.rb +++ b/lazar.rb @@ -105,8 +105,7 @@ post '/lazar/?' do lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" if params[:prediction_algorithm] # Propositionalization - propositionalized = false - propositionalized = true if ( params[:propositionalized] != "false" && ( lazar.prediction_algorithm == "local_mlr_prop" || lazar.prediction_algorithm.include?("local_svm") ) ) + propositionalized = true # PC type pc_type = params[:pc_type] unless params[:pc_type].nil? @@ -115,10 +114,6 @@ post '/lazar/?' do min_train_performance = params[:min_train_performance].to_f if params[:min_train_performance] min_train_performance = 0.1 unless params[:min_train_performance] - # Conf_stdev --- To be removed?? - lazar.conf_stdev = ( (params[:conf_stdev] == "true") ? true : false ) - - -- cgit v1.2.3 From 165a82a504bf06136619af15ccc6e3be23c642fb Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 30 Jan 2012 16:17:00 +0100 Subject: Prop on not for wmv --- last-utils | 2 +- lazar.rb | 2 +- libfminer | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/last-utils b/last-utils index 8c02f7e..cf02384 160000 --- a/last-utils +++ b/last-utils @@ -1 +1 @@ -Subproject commit 8c02f7e71450cac6d8c5d7d34ecb620046b4ea46 +Subproject commit cf0238477127e54509b6ab8b5c38f50dd6ffce08 diff --git a/lazar.rb b/lazar.rb index 6fd12ef..7fa0f96 100644 --- a/lazar.rb +++ b/lazar.rb @@ -105,7 +105,7 @@ post '/lazar/?' do lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" if params[:prediction_algorithm] # Propositionalization - propositionalized = true + propositionalized = (lazar.prediction_algorithm=="Neighbors.weighted_majority_vote" ? false : true) # PC type pc_type = params[:pc_type] unless params[:pc_type].nil? diff --git a/libfminer b/libfminer index 17932e8..f9e560d 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit 17932e809c35c93374ed3d5fd19a313325c35b41 +Subproject commit f9e560dc0a7a5d5af439814ab5fa9ce027a025b7 -- cgit v1.2.3 From 0164d17d0fbb90a9dfbe755eb7a2e9b2e778d623 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 31 Jan 2012 08:12:03 +0100 Subject: nr_hits for all SVM formulations --- lazar.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lazar.rb b/lazar.rb index 7fa0f96..81929c6 100644 --- a/lazar.rb +++ b/lazar.rb @@ -93,17 +93,17 @@ post '/lazar/?' do min_sim = params[:min_sim].to_f if params[:min_sim] min_sim = 0.3 unless params[:min_sim] + # Algorithm + lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" if params[:prediction_algorithm] + # Nr Hits nr_hits = false - if params[:nr_hits] == "true" + if params[:nr_hits] == "true" || lazar.prediction_algorithm.include?("local_svm") lazar.feature_calculation_algorithm = "Substructure.match_hits" nr_hits = true end params[:nr_hits] = "true" if lazar.feature_calculation_algorithm == "Substructure.match_hits" #not sure if this line in needed - # Algorithm - lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" if params[:prediction_algorithm] - # Propositionalization propositionalized = (lazar.prediction_algorithm=="Neighbors.weighted_majority_vote" ? false : true) -- cgit v1.2.3 From 985a7a0a18f763ceae020cef2fbf0db3da17776d Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 31 Jan 2012 08:48:08 +0100 Subject: Adjusted tests to new parameters (see http://goo.gl/lXJBS) --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index e854ac3..e979ff6 100644 --- a/README.md +++ b/README.md @@ -35,19 +35,19 @@ REST operations [feature_generation_uri], [prediction_algorithm], [feature_dataset_uri], - [propositionalized=false], [pc_type=null], - [nr_hits=false (class.), true (regr.)], - [min_sim=0.3 (nominal), 0.6 (numeric features)] + [nr_hits=false (class. using wt. maj. vote), true (else)], + [min_sim=0.3 (nominal), 0.4 (numeric features)] + [min_train_performance=0.1] Synopsis -------- -- prediction\_algorithm: One of "weighted\_majority\_vote" (default for classification), "local\_svm\_classification", "local\_svm\_regression (default for regression)", "local\_mlr\_prop". "weighted\_majority\_vote" is not applicable for regression. "local\_mlr\_prop" is not applicable for classification. -- propositionalized: One of "true", "false". Not appplicable when prediction\_algorithm="weighted\_majority\_vote". +- prediction\_algorithm: One of "weighted\_majority\_vote" (default for classification), "local\_svm\_classification", "local\_svm\_regression" (default for regression). "weighted\_majority\_vote" is not applicable for regression. - pc_type: Mandatory for feature dataset, one of [geometrical, topological, electronic, constitutional, hybrid, cpsa]. -- nr_hits: Whether for instantiated models (local\_svm\_kernel = "propositionalized" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="local\_mlr\_prop") nominal features should be instantiated with their occurrence counts in the instances. For non-instantiated models (local\_svm\_kernel = "weighted\_tanimoto" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="weighted\_majority\_vote") the neighbor-to-neighbor and neighbor-to-query similarity also integrates these counts, when the parameter is set. One of "true", "false". -- min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1]. +- nr_hits: Whether nominal features should be instantiated with their occurrence counts in the instances. One of "true", "false". +- min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1]. +- min_train_performance. The minimum training performance for "local\_svm\_classification" (Accuracy) and "local\_svm\_regression" (R-squared). Numeric value in [0,1]. See http://www.maunz.de/wordpress/opentox/2011/lazar-models-and-how-to-trigger-them for a graphical overview. -- cgit v1.2.3 From 5fdf86d55f47fa3bdca0bb8f5482a7fd33f60987 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 31 Jan 2012 11:27:31 +0100 Subject: feature match functionallity for fminer --- fminer.rb | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/fminer.rb b/fminer.rb index 0739b62..36e242e 100644 --- a/fminer.rb +++ b/fminer.rb @@ -85,6 +85,31 @@ get "/fminer/last/?" do end end +# Creates same features for dataset that have been created +# with fminer in dataset +post '/fminer/:method/match?' do + raise OpenTox::BadRequestError.new "feature_dataset_uri not given" unless params[:feature_dataset_uri] + raise OpenTox::BadRequestError.new "dataset_uri not given" unless params[:dataset_uri] + task = OpenTox::Task.create("Matching features", url_for('/fminer/match',:full)) do |task| + f_dataset = OpenTox::Dataset.find params[:feature_dataset_uri],@subjectid + c_dataset = OpenTox::Dataset.find params[:dataset_uri],@subjectid + res_dataset = OpenTox::Dataset.create @subjectid + f_dataset.features.each do |f,m| + res_dataset.add_feature(f,m) + end + c_dataset.compounds.each do |c| + res_dataset.add_compound(c) + comp = OpenTox::Compound.new(c) + f_dataset.features.each do |f,m| + res_dataset.add(c,f,1) if comp.match?(m[OT.smarts]) + end + end + res_dataset.save + res_dataset.uri + end + return_task(task) +end + # Run bbrc algorithm on dataset # # @param [String] dataset_uri URI of the training dataset -- cgit v1.2.3 From 4cca6ea7b4c88ebd58974cd998db205600e34232 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 31 Jan 2012 11:54:14 +0100 Subject: extend matching to 'nr_hits' --- fminer.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fminer.rb b/fminer.rb index 36e242e..620e147 100644 --- a/fminer.rb +++ b/fminer.rb @@ -87,6 +87,7 @@ end # Creates same features for dataset that have been created # with fminer in dataset +# accept params[:nr_hits] as used in other fminer methods post '/fminer/:method/match?' do raise OpenTox::BadRequestError.new "feature_dataset_uri not given" unless params[:feature_dataset_uri] raise OpenTox::BadRequestError.new "dataset_uri not given" unless params[:dataset_uri] @@ -101,7 +102,12 @@ post '/fminer/:method/match?' do res_dataset.add_compound(c) comp = OpenTox::Compound.new(c) f_dataset.features.each do |f,m| - res_dataset.add(c,f,1) if comp.match?(m[OT.smarts]) + if params[:nr_hits] == "true" + hits = comp.match_hits([m[OT.smarts]]) + res_dataset.add(c,f,hits[m[OT.smarts]]) if hits[m[OT.smarts]] + else + res_dataset.add(c,f,1) if comp.match?(m[OT.smarts]) + end end end res_dataset.save -- cgit v1.2.3 From 0fa509eeab52c336552a38db1a3f7195f840a1f2 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 31 Jan 2012 11:58:46 +0100 Subject: fixing a&a in fminer match --- fminer.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fminer.rb b/fminer.rb index 620e147..a5ea61d 100644 --- a/fminer.rb +++ b/fminer.rb @@ -94,7 +94,7 @@ post '/fminer/:method/match?' do task = OpenTox::Task.create("Matching features", url_for('/fminer/match',:full)) do |task| f_dataset = OpenTox::Dataset.find params[:feature_dataset_uri],@subjectid c_dataset = OpenTox::Dataset.find params[:dataset_uri],@subjectid - res_dataset = OpenTox::Dataset.create @subjectid + res_dataset = OpenTox::Dataset.create CONFIG[:services]["dataset"],@subjectid f_dataset.features.each do |f,m| res_dataset.add_feature(f,m) end @@ -110,7 +110,7 @@ post '/fminer/:method/match?' do end end end - res_dataset.save + res_dataset.save @subjectid res_dataset.uri end return_task(task) -- cgit v1.2.3 From 48a8728794bb8d42bab5d4f62e19d11b36bef48a Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 7 Feb 2012 17:14:45 +0100 Subject: Added rfe --- feature_selection.rb | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 feature_selection.rb diff --git a/feature_selection.rb b/feature_selection.rb new file mode 100644 index 0000000..c69a954 --- /dev/null +++ b/feature_selection.rb @@ -0,0 +1,74 @@ +# Get list of feature_selection algorithms +# +# @return [text/uri-list] URIs of feature_selection algorithms +get '/feature_selection/?' do + list = [ url_for('/feature_selection/rfe', :full) ].join("\n") + "\n" + case request.env['HTTP_ACCEPT'] + when /text\/html/ + content_type "text/html" + OpenTox.text_to_html list + else + content_type 'text/uri-list' + list + end +end + +# Get RDF/XML representation of feature_selection rfe algorithm +# @return [application/rdf+xml] OWL-DL representation of feature_selection rfe algorithm +get "/feature_selection/rfe/?" do + algorithm = OpenTox::Algorithm::Generic.new(url_for('/feature_selection/rfe',:full)) + algorithm.metadata = { + DC.title => 'recursive feature elimination', + DC.creator => "andreas@maunz.de, helma@in-silico.ch", + DC.contributor => "vorgrimmlerdavid@gmx.de", + BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#feature_selection_rfe", + RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], + OT.parameters => [ + { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, + { DC.description => "Feature Dataset URI", OT.paramScope => "mandatory", DC.title => "feature_dataset_uri" } + ] + } + case request.env['HTTP_ACCEPT'] + when /text\/html/ + content_type "text/html" + OpenTox.text_to_html algorithm.to_yaml + when /application\/x-yaml/ + content_type "application/x-yaml" + algorithm.to_yaml + else + response['Content-Type'] = 'application/rdf+xml' + algorithm.to_rdfxml + end +end + +# Run rfe algorithm on dataset +# +# @param [String] dataset_uri URI of the training dataset +# @param [String] feature_dataset_uri URI of the feature dataset +# @return [text/uri-list] Task URI +post '/feature_selection/rfe/?' do + + raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] + raise OpenTox::NotFoundError.new "Please submit a feature_dataset_uri." unless params[:feature_dataset_uri] + + ds_csv=OpenTox::RestClientWrapper.get( params[:dataset_uri], {:accept => "text/csv"} ) + tf_ds=Tempfile.open(['rfe_', '.csv']) + tf_ds.puts(ds_csv) + + fds_csv=OpenTox::RestClientWrapper.get( params[:feature_dataset_uri], {:accept => "text/csv"}) + tf_fds=Tempfile.open(['rfe_', '.csv']) + tf_fds.puts(fds_csv) + + task = OpenTox::Task.create("Recursive Feature Elimination", url_for('/feature_selection',:full)) do |task| + rfe=OpenTox::Algorithm::FeatureSelection.rfe( { :ds_csv_file => tf_ds, :fds_csv_file => tf_fds } ) + r_result_uri=OpenTox::Dataset.create_from_csv(r_result_file).uri + File.unlink(r_result_file) + tf_ds.close! + tf_fds.close! + r_result_uri + end + response['Content-Type'] = 'text/uri-list' + raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" + halt 202,task.uri.to_s+"\n" +end + -- cgit v1.2.3 From c2d9390594a2008b6fec2ce724462badd458ae74 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 8 Feb 2012 14:15:30 +0100 Subject: Adjusted rfe --- feature_selection.rb | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/feature_selection.rb b/feature_selection.rb index c69a954..c5bc4fa 100644 --- a/feature_selection.rb +++ b/feature_selection.rb @@ -25,7 +25,9 @@ get "/feature_selection/rfe/?" do RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature Dataset URI", OT.paramScope => "mandatory", DC.title => "feature_dataset_uri" } + { DC.description => "Prediction Feature URI", OT.paramScope => "mandatory", DC.title => "prediction_feature_uri" }, + { DC.description => "Feature Dataset URI", OT.paramScope => "mandatory", DC.title => "feature_dataset_uri" }, + { DC.description => "Delete Instances with missing values", OT.paramScope => "optional", DC.title => "del_missing" } ] } case request.env['HTTP_ACCEPT'] @@ -49,22 +51,31 @@ end post '/feature_selection/rfe/?' do raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] + raise OpenTox::NotFoundError.new "Please submit a prediction_feature_uri." unless params[:prediction_feature_uri] raise OpenTox::NotFoundError.new "Please submit a feature_dataset_uri." unless params[:feature_dataset_uri] ds_csv=OpenTox::RestClientWrapper.get( params[:dataset_uri], {:accept => "text/csv"} ) tf_ds=Tempfile.open(['rfe_', '.csv']) tf_ds.puts(ds_csv) + tf_ds.flush() + + prediction_feature = params[:prediction_feature_uri].split('/').last # get col name fds_csv=OpenTox::RestClientWrapper.get( params[:feature_dataset_uri], {:accept => "text/csv"}) tf_fds=Tempfile.open(['rfe_', '.csv']) tf_fds.puts(fds_csv) + tf_fds.flush() + + del_missing = params[:del_missing] == "true" ? true : false task = OpenTox::Task.create("Recursive Feature Elimination", url_for('/feature_selection',:full)) do |task| - rfe=OpenTox::Algorithm::FeatureSelection.rfe( { :ds_csv_file => tf_ds, :fds_csv_file => tf_fds } ) - r_result_uri=OpenTox::Dataset.create_from_csv(r_result_file).uri - File.unlink(r_result_file) + r_result_file = OpenTox::Algorithm::FeatureSelection.rfe( { :ds_csv_file => tf_ds.path, :prediction_feature => prediction_feature, :fds_csv_file => tf_fds.path, :del_missing => del_missing } ) + r_result_uri = OpenTox::Dataset.create_from_csv_file(r_result_file).uri tf_ds.close! tf_fds.close! + File.unlink(r_result_file) + File.unlink(tf_ds.path) + File.unlink(tf_fds.path) r_result_uri end response['Content-Type'] = 'text/uri-list' -- cgit v1.2.3 From 3e160e2e0537e2bfc0f8c89570349aa2718e34c2 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 8 Feb 2012 14:40:09 +0100 Subject: Fixed minor bugs --- application.rb | 3 ++- feature_selection.rb | 6 ++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/application.rb b/application.rb index b62f6f5..f5b331f 100644 --- a/application.rb +++ b/application.rb @@ -11,6 +11,7 @@ require 'opentox-ruby' require 'openbabel.rb' require 'fminer.rb' require 'lazar.rb' +require 'feature_selection.rb' set :lock, true @@ -22,7 +23,7 @@ end # # @return [text/uri-list] algorithm URIs get '/?' do - list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" + list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full), url_for('/feature_selection/rfe', :full) ].join("\n") + "\n" case request.env['HTTP_ACCEPT'] when /text\/html/ content_type "text/html" diff --git a/feature_selection.rb b/feature_selection.rb index c5bc4fa..efda812 100644 --- a/feature_selection.rb +++ b/feature_selection.rb @@ -71,11 +71,9 @@ post '/feature_selection/rfe/?' do task = OpenTox::Task.create("Recursive Feature Elimination", url_for('/feature_selection',:full)) do |task| r_result_file = OpenTox::Algorithm::FeatureSelection.rfe( { :ds_csv_file => tf_ds.path, :prediction_feature => prediction_feature, :fds_csv_file => tf_fds.path, :del_missing => del_missing } ) r_result_uri = OpenTox::Dataset.create_from_csv_file(r_result_file).uri - tf_ds.close! - tf_fds.close! + tf_ds.close!; tf_fds.close! + tf_ds.delete; tf_fds.delete File.unlink(r_result_file) - File.unlink(tf_ds.path) - File.unlink(tf_fds.path) r_result_uri end response['Content-Type'] = 'text/uri-list' -- cgit v1.2.3 From 41a5a6a85fa97d5d9a6495c36df3762b8f5f36c0 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 8 Feb 2012 14:50:04 +0100 Subject: Updated README --- README.md | 70 +++++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index e979ff6..344f747 100644 --- a/README.md +++ b/README.md @@ -9,36 +9,48 @@ OpenTox Algorithm REST operations --------------- - Get a list of all algorithms GET / - URIs of algorithms 200 - Get a representation of the GET /fminer/ - fminer representation 200,404 + Get a list of all algorithms GET / - URIs of algorithms 200 + Get a representation of the GET /fminer/ - fminer representation 200,404 fminer algorithms - Get a representation of the GET /fminer/bbrc - bbrc representation 200,404 + Get a representation of the GET /fminer/bbrc - bbrc representation 200,404 bbrc algorithm - Get a representation of the GET /fminer/last - last representation 200,404 + Get a representation of the GET /fminer/last - last representation 200,404 last algorithm - Get a representation of the GET /lazar - lazar representation 200,404 + Get a representation of the GET /lazar - lazar representation 200,404 lazar algorithm - Create bbrc features POST /fminer/bbrc dataset_uri, URI for feature dataset 200,400,404,500 - feature_uri, - [min_frequency=5 per-mil], - [feature_type=trees], - [backbone=true], - [min_chisq_significance=0.95], - [nr_hits=false] - Create last features POST /fminer/last dataset_uri, URI for feature dataset 200,400,404,500 - feature_uri, - [min_frequency=8 %], - [feature_type=trees], - [nr_hits=false] - Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500 - [prediction_feature], - [feature_generation_uri], - [prediction_algorithm], - [feature_dataset_uri], - [pc_type=null], - [nr_hits=false (class. using wt. maj. vote), true (else)], - [min_sim=0.3 (nominal), 0.4 (numeric features)] - [min_train_performance=0.1] + Get a representation of the GET /feature_selection - feature selection representation 200,404 + feature selection algorithms + Get a representation of the GET /feature_selection/rfe - rfe representation 200,404 + rfe algorithm + + + Create bbrc features POST /fminer/bbrc dataset_uri, URI for feature dataset 200,400,404,500 + feature_uri, + [min_frequency=5 per-mil], + [feature_type=trees], + [backbone=true], + [min_chisq_significance=0.95], + [nr_hits=false] + Create last features POST /fminer/last dataset_uri, URI for feature dataset 200,400,404,500 + feature_uri, + [min_frequency=8 %], + [feature_type=trees], + [nr_hits=false] + Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500 + [prediction_feature], + [feature_generation_uri], + [prediction_algorithm], + [feature_dataset_uri], + [pc_type=null], + [nr_hits=false (class. using wt. maj. vote), true (else)], + [min_sim=0.3 (nominal), 0.4 (numeric features)] + [min_train_performance=0.1] + + Create selected features POST /feature_selection/rfe dataset_uri, URI for dataset 200,400,404,500 + prediction_feature, + feature_dataset_uri, + [del_missing=false] + Synopsis -------- @@ -48,6 +60,7 @@ Synopsis - nr_hits: Whether nominal features should be instantiated with their occurrence counts in the instances. One of "true", "false". - min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1]. - min_train_performance. The minimum training performance for "local\_svm\_classification" (Accuracy) and "local\_svm\_regression" (R-squared). Numeric value in [0,1]. +- del_missing: one of true, false See http://www.maunz.de/wordpress/opentox/2011/lazar-models-and-how-to-trigger-them for a graphical overview. @@ -109,4 +122,9 @@ Creates a standard Lazar model. [API documentation](http://rdoc.info/github/opentox/algorithm) -------------------------------------------------------------- +* * * + +### Create a feature dataset of selected features + curl -X POST -d dataset_uri={dataset_uri} -d prediction_feature_uri={prediction_feature_uri} -d feature_dataset_uri={feature_dataset_uri} -d del_missing=true http://webservices.in-silico.ch/test/algorithm/feature_selection/rfe + Copyright (c) 2009-2011 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details. -- cgit v1.2.3 From c80b7ac28dce2df193fb61c042b0563f35b48012 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 9 Feb 2012 10:26:16 +0100 Subject: Fixed tempfile deletion --- feature_selection.rb | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/feature_selection.rb b/feature_selection.rb index efda812..d375a0e 100644 --- a/feature_selection.rb +++ b/feature_selection.rb @@ -71,9 +71,11 @@ post '/feature_selection/rfe/?' do task = OpenTox::Task.create("Recursive Feature Elimination", url_for('/feature_selection',:full)) do |task| r_result_file = OpenTox::Algorithm::FeatureSelection.rfe( { :ds_csv_file => tf_ds.path, :prediction_feature => prediction_feature, :fds_csv_file => tf_fds.path, :del_missing => del_missing } ) r_result_uri = OpenTox::Dataset.create_from_csv_file(r_result_file).uri - tf_ds.close!; tf_fds.close! - tf_ds.delete; tf_fds.delete - File.unlink(r_result_file) + begin + tf_ds.close!; tf_fds.close! + File.unlink(r_result_file) + rescue + end r_result_uri end response['Content-Type'] = 'text/uri-list' -- cgit v1.2.3 From 94f4526bcb73ec8586559ae8b476b5a73a176882 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 15 Feb 2012 15:04:40 +0100 Subject: Fixed parameter name in metadata (minfreq) --- fminer.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fminer.rb b/fminer.rb index a5ea61d..10823fd 100644 --- a/fminer.rb +++ b/fminer.rb @@ -35,7 +35,7 @@ get "/fminer/bbrc/?" do OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, - { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" }, + { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" }, { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" }, { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" }, @@ -67,7 +67,7 @@ get "/fminer/last/?" do OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, - { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" }, + { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" }, { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, { DC.description => "Maximum number of hops", OT.paramScope => "optional", DC.title => "hops" }, ] -- cgit v1.2.3 From bd8c5fedc9f13e3ad95547d8a437376ed37deac5 Mon Sep 17 00:00:00 2001 From: davor Date: Wed, 15 Feb 2012 16:06:47 +0100 Subject: Commented out metadata information BO.instanceOf because BO ontology is not available --- fminer.rb | 4 ++-- lazar.rb | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fminer.rb b/fminer.rb index a5ea61d..a43a505 100644 --- a/fminer.rb +++ b/fminer.rb @@ -30,7 +30,7 @@ get "/fminer/bbrc/?" do DC.title => 'fminer backbone refinement class representatives', DC.creator => "andreas@maunz.de, helma@in-silico.ch", DC.contributor => "vorgrimmlerdavid@gmx.de", - BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_bbrc", +# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_bbrc", RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, @@ -62,7 +62,7 @@ get "/fminer/last/?" do DC.title => 'fminer latent structure class representatives', DC.creator => "andreas@maunz.de, helma@in-silico.ch", DC.contributor => "vorgrimmlerdavid@gmx.de", - BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_last", +# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_last", RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, diff --git a/lazar.rb b/lazar.rb index 81929c6..2a08778 100644 --- a/lazar.rb +++ b/lazar.rb @@ -8,7 +8,7 @@ get '/lazar/?' do DC.title => 'lazar', DC.creator => "helma@in-silico.ch, andreas@maunz.de", DC.contributor => "vorgrimmlerdavid@gmx.de", - BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#lazar", +# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#lazar", OT.parameters => [ { DC.description => "Dataset URI with the dependent variable", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, { DC.description => "Feature URI for dependent variable. Optional for datasets with only a single feature.", OT.paramScope => "optional", DC.title => "prediction_feature" }, -- cgit v1.2.3 From 5175e0555846b877cf72fd5f9881bae1d16a3b2b Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 24 Feb 2012 08:57:35 +0100 Subject: Added Changelog --- ChangeLog | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 ChangeLog diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..795a878 --- /dev/null +++ b/ChangeLog @@ -0,0 +1,5 @@ +v3.1.0 2012-02-24 + * lazar.rb: pc type parameter in model, cleaned all parameters, + propositionalized learning only for SVM, switch for minimal training + performance, removed conf_stdev + * fminer.rb: feature match service for datasets, also with number of hits -- cgit v1.2.3