From e5cc33df0c3b75b655879181cc3391edb13a0711 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 17 May 2011 15:17:51 +0200 Subject: Added override flag prediction_algorithm support in params --- lazar.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lazar.rb b/lazar.rb index 45123f0..ec055ee 100644 --- a/lazar.rb +++ b/lazar.rb @@ -122,6 +122,9 @@ post '/lazar/?' do @training_classes = training_activities.feature_classes(prediction_feature.uri, @subjectid) if prediction_feature.feature_type == "classification" lazar.prediction_algorithm = "Neighbors.local_svm_regression" if prediction_feature.feature_type == "regression" + # AM: allow prediction_algorithm override by user for classification AND regression + lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? + training_activities.data_entries.each do |compound,entry| lazar.activities[compound] = [] unless lazar.activities[compound] unless entry[prediction_feature.uri].empty? -- cgit v1.2.3 From f48e41548ebb693c1abba8ad1b2a671fae0c118a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 23 May 2011 14:06:00 +0000 Subject: owl-dl fixed for model and prediction datasets --- fminer.rb | 8 ++++---- last-utils | 2 +- lazar.rb | 4 ++-- libfminer | 2 +- openbabel.rb | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fminer.rb b/fminer.rb index d0ea4f4..79b0d06 100644 --- a/fminer.rb +++ b/fminer.rb @@ -23,7 +23,7 @@ get "/fminer/bbrc/?" do DC.title => 'fminer backbone refinement class representatives', DC.creator => "andreas@maunz.de, helma@in-silico.ch", DC.contributor => "vorgrimmlerdavid@gmx.de", - OT.isA => OTA.PatternMiningSupervised, + RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, @@ -44,7 +44,7 @@ get "/fminer/last/?" do DC.title => 'fminer latent structure class representatives', DC.creator => "andreas@maunz.de, helma@in-silico.ch", DC.contributor => "vorgrimmlerdavid@gmx.de", - OT.isA => OTA.PatternMiningSupervised, + RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, @@ -195,7 +195,7 @@ post '/fminer/bbrc/?' do features << smarts metadata = { OT.hasSource => url_for('/fminer/bbrc', :full), - OT.isA => OT.Substructure, + RDF.type => [OT.Feature, OT.Substructure], OT.smarts => smarts, OT.pValue => p_value.to_f, OT.effect => effect, @@ -343,7 +343,7 @@ post '/fminer/last/?' do unless features.include? smarts features << smarts metadata = { - OT.isA => OT.Substructure, + RDF.type => [OT.Feature, OT.Substructure], OT.hasSource => feature_dataset.uri, OT.smarts => smarts, OT.pValue => p_value.to_f.abs, diff --git a/last-utils b/last-utils index daafa32..04bd1b7 160000 --- a/last-utils +++ b/last-utils @@ -1 +1 @@ -Subproject commit daafa32e330b27111df6dc7193a6ed72fae2be45 +Subproject commit 04bd1b73f54bb7422d3c08bb5a81bc02af04f6ff diff --git a/lazar.rb b/lazar.rb index 67d9f74..a4e006b 100644 --- a/lazar.rb +++ b/lazar.rb @@ -141,9 +141,9 @@ post '/lazar/?' do lazar.metadata[OT.trainingDataset] = dataset_uri lazar.metadata[OT.featureDataset] = feature_dataset_uri if training_activities.feature_type.to_s == "classification" - lazar.metadata[OT.isA] = OTA.ClassificationLazySingleTarget + lazar.metadata[RDF.type] = [OT.Model, OTA.ClassificationLazySingleTarget] elsif training_activities.feature_type.to_s == "regression" - lazar.metadata[OT.isA] = OTA.RegressionLazySingleTarget + lazar.metadata[RDF.type] = [OT.Model, OTA.RegressionLazySingleTarget] end lazar.metadata[OT.parameters] = [ diff --git a/libfminer b/libfminer index 01b8e50..6514520 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit 01b8e50e8e6fb3ce29fc8bf0a65a8c6f6af94b3f +Subproject commit 6514520638b8309ec9bf379bce54f45d3a534195 diff --git a/openbabel.rb b/openbabel.rb index 3a873c0..1644455 100644 --- a/openbabel.rb +++ b/openbabel.rb @@ -44,7 +44,7 @@ get '/openbabel/:property' do DC.title => params[:property], DC.creator => "helma@in-silico.ch", DC.description => description, - OT.isA => OTA.DescriptorCalculation, + RDF.type => [OTA.DescriptorCalculation], } response['Content-Type'] = 'application/rdf+xml' algorithm.to_rdfxml -- cgit v1.2.3 From 86b192fb3ad146337b94b52165b1f43a550b2f2d Mon Sep 17 00:00:00 2001 From: davor Date: Tue, 24 May 2011 10:48:15 +0200 Subject: Moving SetRegression up again... --- fminer.rb | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/fminer.rb b/fminer.rb index 155c555..63f6694 100644 --- a/fminer.rb +++ b/fminer.rb @@ -84,17 +84,18 @@ post '/fminer/bbrc/?' do task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do + puts params.to_yaml @@bbrc.Reset + if prediction_feature.feature_type == "regression" + @@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! + else + @training_classes = training_dataset.feature_classes(prediction_feature.uri, @subjectid) + end @@bbrc.SetMinfreq(minfreq) @@bbrc.SetType(1) if params[:feature_type] == "paths" @@bbrc.SetBackbone(eval params[:backbone]) if params[:backbone] and ( params[:backbone] == "true" or params[:backbone] == "false" ) # convert string to boolean @@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance] @@bbrc.SetConsoleOut(false) - if prediction_feature.feature_type == "regression" - @@bbrc.SetRegression(true) - else - @training_classes = training_dataset.feature_classes(prediction_feature.uri, @subjectid) - end feature_dataset = OpenTox::Dataset.new(nil, @subjectid) feature_dataset.add_metadata({ @@ -131,7 +132,7 @@ post '/fminer/bbrc/?' do entry.each do |feature,values| values.each do |value| if prediction_feature.feature_type == "regression" - if (! value.nil?) && (value.to_f < 0) + if (! value.nil?) && (value.to_f < 1) take_logs=false end end @@ -273,15 +274,16 @@ post '/fminer/last/?' do task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do @@last.Reset - @@last.SetMinfreq(minfreq) - @@last.SetType(1) if params[:feature_type] == "paths" - @@last.SetMaxHops(params[:hops]) if params[:hops] - @@last.SetConsoleOut(false) if prediction_feature.feature_type == "regression" - @@last.SetRegression(true) + @@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! else @training_classes = training_dataset.feature_classes(prediction_feature.uri) end + @@last.SetMinfreq(minfreq) + @@last.SetType(1) if params[:feature_type] == "paths" + @@last.SetMaxHops(params[:hops]) if params[:hops] + @@last.SetConsoleOut(false) + feature_dataset = OpenTox::Dataset.new(nil, @subjectid) feature_dataset.add_metadata({ -- cgit v1.2.3 From 730a35a2ca22d704524b3207c1b95766b5bcd326 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 24 May 2011 14:37:00 +0200 Subject: Removed debug --- fminer.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/fminer.rb b/fminer.rb index 63f6694..aab6246 100644 --- a/fminer.rb +++ b/fminer.rb @@ -84,7 +84,6 @@ post '/fminer/bbrc/?' do task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do - puts params.to_yaml @@bbrc.Reset if prediction_feature.feature_type == "regression" @@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! -- cgit v1.2.3 From 9e70919c3963d70cc6c9c39413ac386b4c607b85 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 25 May 2011 13:40:05 +0200 Subject: fix: add missing subject id when loading feature type --- lazar.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lazar.rb b/lazar.rb index 0c91179..a436569 100644 --- a/lazar.rb +++ b/lazar.rb @@ -155,9 +155,10 @@ post '/lazar/?' do lazar.metadata[OT.dependentVariables] = prediction_feature.uri lazar.metadata[OT.trainingDataset] = dataset_uri lazar.metadata[OT.featureDataset] = feature_dataset_uri - if training_activities.feature_type.to_s == "classification" + case training_activities.feature_type(@subjectid) + when "classification" lazar.metadata[RDF.type] = [OT.Model, OTA.ClassificationLazySingleTarget] - elsif training_activities.feature_type.to_s == "regression" + when "regression" lazar.metadata[RDF.type] = [OT.Model, OTA.RegressionLazySingleTarget] end -- cgit v1.2.3 From 4dac8a5e47a4cc85c4bafc805cd3e52be2a53669 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 26 May 2011 10:57:04 +0200 Subject: adjust to change in wrapper: feature_values -> accept_values --- fminer.rb | 4 ++-- lazar.rb | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fminer.rb b/fminer.rb index 63f6694..890a15e 100644 --- a/fminer.rb +++ b/fminer.rb @@ -89,7 +89,7 @@ post '/fminer/bbrc/?' do if prediction_feature.feature_type == "regression" @@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! else - @training_classes = training_dataset.feature_classes(prediction_feature.uri, @subjectid) + @training_classes = training_dataset.accept_values(prediction_feature.uri) end @@bbrc.SetMinfreq(minfreq) @@bbrc.SetType(1) if params[:feature_type] == "paths" @@ -277,7 +277,7 @@ post '/fminer/last/?' do if prediction_feature.feature_type == "regression" @@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! else - @training_classes = training_dataset.feature_classes(prediction_feature.uri) + @training_classes = training_dataset.accept_values(prediction_feature.uri) end @@last.SetMinfreq(minfreq) @@last.SetType(1) if params[:feature_type] == "paths" diff --git a/lazar.rb b/lazar.rb index a436569..0ed8f3f 100644 --- a/lazar.rb +++ b/lazar.rb @@ -119,7 +119,7 @@ post '/lazar/?' do end end - @training_classes = training_activities.feature_classes(prediction_feature.uri, @subjectid) if prediction_feature.feature_type == "classification" + @training_classes = training_activities.accept_values(prediction_feature.uri) if prediction_feature.feature_type == "classification" lazar.prediction_algorithm = "Neighbors.local_svm_regression" if prediction_feature.feature_type == "regression" # AM: allow prediction_algorithm override by user for classification AND regression -- cgit v1.2.3 From 645045dc2afe3323d5bf9968ffe842665398bd67 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 26 May 2011 12:19:50 +0200 Subject: fix html for algorithms --- application.rb | 11 +++++++++-- fminer.rb | 36 +++++++++++++++++++++++++++++++----- lazar.rb | 13 +++++++++++-- 3 files changed, 51 insertions(+), 9 deletions(-) diff --git a/application.rb b/application.rb index 55a8ea4..32fea95 100644 --- a/application.rb +++ b/application.rb @@ -22,6 +22,13 @@ end # # @return [text/uri-list] algorithm URIs get '/?' do - response['Content-Type'] = 'text/uri-list' - [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" + list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" + case request.env['HTTP_ACCEPT'] + when /text\/html/ + content_type "text/html" + OpenTox.text_to_html list + else + content_type 'text/uri-list' + list + end end diff --git a/fminer.rb b/fminer.rb index 890a15e..45136ea 100644 --- a/fminer.rb +++ b/fminer.rb @@ -10,14 +10,20 @@ ENV['FMINER_SILENT'] = 'true' # # @return [text/uri-list] URIs of fminer algorithms get '/fminer/?' do - response['Content-Type'] = 'text/uri-list' - [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" + list = [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" + case request.env['HTTP_ACCEPT'] + when /text\/html/ + content_type "text/html" + OpenTox.text_to_html list + else + content_type 'text/uri-list' + list + end end # Get RDF/XML representation of fminer bbrc algorithm # @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm get "/fminer/bbrc/?" do - response['Content-Type'] = 'application/rdf+xml' algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/bbrc',:full)) algorithm.metadata = { DC.title => 'fminer backbone refinement class representatives', @@ -33,7 +39,17 @@ get "/fminer/bbrc/?" do { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" }, ] } - algorithm.to_rdfxml + case request.env['HTTP_ACCEPT'] + when /text\/html/ + content_type "text/html" + OpenTox.text_to_html algorithm.to_yaml + when /application\/x-yaml/ + content_type "application/x-yaml" + algorithm.to_yaml + else + response['Content-Type'] = 'application/rdf+xml' + algorithm.to_rdfxml + end end # Get RDF/XML representation of fminer last algorithm @@ -53,7 +69,17 @@ get "/fminer/last/?" do { DC.description => "Maximum number of hops", OT.paramScope => "optional", DC.title => "hops" }, ] } - algorithm.to_rdfxml + case request.env['HTTP_ACCEPT'] + when /text\/html/ + content_type "text/html" + OpenTox.text_to_html algorithm.to_yaml + when /application\/x-yaml/ + content_type "application/x-yaml" + algorithm.to_yaml + else + response['Content-Type'] = 'application/rdf+xml' + algorithm.to_rdfxml + end end # Run bbrc algorithm on dataset diff --git a/lazar.rb b/lazar.rb index 0ed8f3f..403d99c 100644 --- a/lazar.rb +++ b/lazar.rb @@ -3,7 +3,6 @@ # Get RDF/XML representation of the lazar algorithm # @return [application/rdf+xml] OWL-DL representation of the lazar algorithm get '/lazar/?' do - response['Content-Type'] = 'application/rdf+xml' algorithm = OpenTox::Algorithm::Generic.new(url_for('/lazar',:full)) algorithm.metadata = { DC.title => 'lazar', @@ -17,7 +16,17 @@ get '/lazar/?' do { DC.description => "Further parameters for the feaature generation service", OT.paramScope => "optional" } ] } - algorithm.to_rdfxml + case request.env['HTTP_ACCEPT'] + when /text\/html/ + content_type "text/html" + OpenTox.text_to_html algorithm.to_yaml + when /application\/x-yaml/ + content_type "application/x-yaml" + algorithm.to_yaml + else + response['Content-Type'] = 'application/rdf+xml' + algorithm.to_rdfxml + end end # Create a lazar prediction model -- cgit v1.2.3 From 017efa8e3ada72dfeb3256f36784e99518a5ee07 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 26 May 2011 14:27:13 +0200 Subject: Prop kernel --- lazar.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lazar.rb b/lazar.rb index 403d99c..9262f2f 100644 --- a/lazar.rb +++ b/lazar.rb @@ -133,6 +133,7 @@ post '/lazar/?' do # AM: allow prediction_algorithm override by user for classification AND regression lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? + lazar.prop_kernel = true if params[:local_svm_kernel] == "propositionalized" training_activities.data_entries.each do |compound,entry| lazar.activities[compound] = [] unless lazar.activities[compound] -- cgit v1.2.3 From 86ef657cccd25e70f7630d84102c63c467a1be4a Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 27 May 2011 10:27:02 +0200 Subject: Updated README --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index 640f962..ad27e75 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,16 @@ REST operations Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500 prediction_feature, feature_generation_uri + prediction_algorithm + local_svm_kernel + +Synopsis +-------- + +- prediction\_algorithm: one of weighted\_majority\_vote, local\_svm\_classification, local\_svm\_regression. +- local\_svm\_kernel: one of weighted\_tanimoto, propositionalized + +Note: prediction\_algorithm and local\_svm\_kernel are only evaluated where applicable (e.g. the latter only when the former is set to loca\_svm\_\*). No error message is returned if switches are not applicable. Supported MIME formats ---------------------- @@ -83,6 +93,12 @@ Please click [here](http://last-pm.maunz.de#usage) for guidance for more guidanc curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d feature_generation_uri=http://webservices.in-silico.ch/algorithm/fminer http://webservices.in-silico.ch/test/algorithm/lazar +Creates a standard Lazar model. + + curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d feature_generation_uri=http://webservices.in-silico.ch/algorithm/fminer http://webservices.in-silico.ch/test/algorithm/lazar -d prediction_algorithm=local_svm_classification -d local_svm_kernel=propositionalized + +Creates an SVM model for classification with a propositionalized RBF kernel. + feature_uri specifies the dependent variable from the dataset [API documentation](http://rdoc.info/github/opentox/algorithm) -- cgit v1.2.3 From f2674c33982ece909bc5a3333befd602ba6abfa9 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 27 May 2011 10:34:14 +0200 Subject: Updated README --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ad27e75..198b782 100644 --- a/README.md +++ b/README.md @@ -38,10 +38,11 @@ REST operations Synopsis -------- -- prediction\_algorithm: one of weighted\_majority\_vote, local\_svm\_classification, local\_svm\_regression. -- local\_svm\_kernel: one of weighted\_tanimoto, propositionalized +- prediction\_algorithm: one of "weighted\_majority\_vote" (default for classification), "local\_svm\_classification", "local\_svm\_regression (default for regression)". "weighted\_majority\_vote" is not applicable for regression. +- local\_svm\_kernel: one of "weighted\_tanimoto", "propositionalized". local\_svm\_kernel is not appplicable when prediction\_algorithm="weighted\_majority\_vote". + +See http://www.maunz.de/wordpress/opentox/2011/lazar-models-and-how-to-trigger-them for a graphical overview. -Note: prediction\_algorithm and local\_svm\_kernel are only evaluated where applicable (e.g. the latter only when the former is set to loca\_svm\_\*). No error message is returned if switches are not applicable. Supported MIME formats ---------------------- -- cgit v1.2.3 From 5aaefadb8d5f21195fafe4a39de254f964eaf61f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 31 May 2011 08:59:02 +0000 Subject: keep classes from external datasets --- fminer.rb | 2 +- lazar.rb | 37 ++++++++++++++++++++----------------- libfminer | 2 +- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/fminer.rb b/fminer.rb index 59e4052..a125b5b 100644 --- a/fminer.rb +++ b/fminer.rb @@ -114,7 +114,7 @@ post '/fminer/bbrc/?' do if prediction_feature.feature_type == "regression" @@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! else - @training_classes = training_dataset.accept_values(prediction_feature.uri) + @training_classes = training_dataset.accept_values(prediction_feature.uri).sort end @@bbrc.SetMinfreq(minfreq) @@bbrc.SetType(1) if params[:feature_type] == "paths" diff --git a/lazar.rb b/lazar.rb index 9262f2f..a8de64a 100644 --- a/lazar.rb +++ b/lazar.rb @@ -41,22 +41,22 @@ post '/lazar/?' do halt 404, "No dataset_uri parameter." unless params[:dataset_uri] dataset_uri = params[:dataset_uri] - halt 404, "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri) - training_activities.load_all(@subjectid) - - prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid) - unless params[:prediction_feature] # try to read prediction_feature from dataset - halt 404, "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1 - prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid) - params[:prediction_feature] = prediction_feature.uri # pass to feature mining service - end + task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| - feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri] + raise OpenTox::NotFoundError.new "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri) + training_activities.load_all(@subjectid) - halt 404, "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+ - training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri) + prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid) + unless params[:prediction_feature] # try to read prediction_feature from dataset + raise OpenTox::NotFoundError.new "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1 + prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid) + params[:prediction_feature] = prediction_feature.uri # pass to feature mining service + end - task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| + feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri] + + raise OpenTox::NotFoundError.new "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+ + training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri) lazar = OpenTox::Model::Lazar.new lazar.min_sim = params[:min_sim] if params[:min_sim] @@ -127,9 +127,13 @@ post '/lazar/?' do end end end - - @training_classes = training_activities.accept_values(prediction_feature.uri) if prediction_feature.feature_type == "classification" - lazar.prediction_algorithm = "Neighbors.local_svm_regression" if prediction_feature.feature_type == "regression" + + if prediction_feature.feature_type == "classification" + @training_classes = training_activities.accept_values(prediction_feature.uri).sort + lazar.value_map = { true => @training_classes.last, false => @training_classes.first } + elsif prediction_feature.feature_type == "regression" + lazar.prediction_algorithm = "Neighbors.local_svm_regression" + end # AM: allow prediction_algorithm override by user for classification AND regression lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? @@ -161,7 +165,6 @@ post '/lazar/?' do end lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}" - # TODO: fix dependentVariable lazar.metadata[OT.dependentVariables] = prediction_feature.uri lazar.metadata[OT.trainingDataset] = dataset_uri lazar.metadata[OT.featureDataset] = feature_dataset_uri diff --git a/libfminer b/libfminer index 2af90d8..07679a6 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit 2af90d80f66649901002f223fac371fc86ca03b6 +Subproject commit 07679a625a7acad864fd3abd80654a1a0a61e690 -- cgit v1.2.3 From d69214b494706fb53f74bb17ddde28ad831cd460 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 31 May 2011 23:16:41 +0200 Subject: fix get compound from smiles (directly instead of via uri) --- fminer.rb | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fminer.rb b/fminer.rb index a125b5b..81725cd 100644 --- a/fminer.rb +++ b/fminer.rb @@ -114,6 +114,8 @@ post '/fminer/bbrc/?' do if prediction_feature.feature_type == "regression" @@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! else + raise "no accept values for dataset '"+training_dataset.uri.to_s+"' and feature '"+prediction_feature.uri.to_s+ + "'" unless training_dataset.accept_values(prediction_feature.uri) @training_classes = training_dataset.accept_values(prediction_feature.uri).sort end @@bbrc.SetMinfreq(minfreq) @@ -142,16 +144,17 @@ post '/fminer/bbrc/?' do training_dataset.data_entries.each do |compound,entry| begin - smiles = OpenTox::Compound.new(compound.to_s).to_smiles + # fix: ambit does not support inchi, directly request smiles + smiles = OpenTox::Compound.smiles(compound.to_s) rescue LOGGER.warn "No resource for #{compound.to_s}" next end if smiles == '' or smiles.nil? - LOGGER.warn "Cannot find smiles for #{compound.to_s}." + LOGGER.warn "Cannot find smiles for #{compound.to_s}" next end - + # AM: take log if appropriate take_logs=true entry.each do |feature,values| -- cgit v1.2.3 From b435b145aea744ba951c12b4f6bf90ce62a5b0a3 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 31 May 2011 23:28:02 +0200 Subject: skip 0 regression value instead of stopping model buliding --- lazar.rb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lazar.rb b/lazar.rb index a8de64a..4e5d41d 100644 --- a/lazar.rb +++ b/lazar.rb @@ -157,8 +157,12 @@ post '/lazar/?' do LOGGER.warn "Unknown class \"#{value.to_s}\"." end elsif prediction_feature.feature_type == "regression" - halt 404, "0 values not allowed in training dataset. log10 is calculated internally." if value.to_f == 0 - lazar.activities[compound] << value.to_f + #never use halt in tasks, do not raise exception when, print warning instead + if value.to_f==0 + LOGGER.warn "0 values not allowed in training dataset. log10 is calculated internally. skipping compound" + else + lazar.activities[compound] << value.to_f + end end end end -- cgit v1.2.3 From f6a55a7121a61865d85ad7195371bd8fd784fb29 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 6 Jun 2011 12:39:37 +0200 Subject: removing several halt, adding debug message, getting smiles directly --- fminer.rb | 2 +- lazar.rb | 7 ++++--- openbabel.rb | 4 ++-- similarity.rb | 20 ++++++++++---------- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/fminer.rb b/fminer.rb index 81725cd..00d11a0 100644 --- a/fminer.rb +++ b/fminer.rb @@ -334,7 +334,7 @@ post '/fminer/last/?' do training_dataset.data_entries.each do |compound,entry| begin - smiles = OpenTox::Compound.new(compound.to_s).to_smiles + smiles = OpenTox::Compound.smiles(compound.to_s) rescue LOGGER.warn "No resource for #{compound.to_s}" next diff --git a/lazar.rb b/lazar.rb index 4e5d41d..00f3216 100644 --- a/lazar.rb +++ b/lazar.rb @@ -37,8 +37,9 @@ end # @return [text/uri-list] Task URI post '/lazar/?' do + LOGGER.debug "building lazar model with params: "+params.inspect params[:subjectid] = @subjectid - halt 404, "No dataset_uri parameter." unless params[:dataset_uri] + raise OpenTox::NotFoundError.new "No dataset_uri parameter." unless params[:dataset_uri] dataset_uri = params[:dataset_uri] task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| @@ -75,7 +76,7 @@ post '/lazar/?' do if feature_generation_uri.match(/fminer/) lazar.feature_calculation_algorithm = "Substructure.match" else - halt 404, "External feature generation services not yet supported" + raise OpenTox::NotFoundError.new "External feature generation services not yet supported" end params[:subjectid] = @subjectid prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid @@ -87,7 +88,7 @@ post '/lazar/?' do end training_features.load_all(@subjectid) - halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? + raise OpenTox::NotFoundError.new "Dataset #{feature_dataset_uri} not found." if training_features.nil? # sorted features for index lookups diff --git a/openbabel.rb b/openbabel.rb index 1644455..fedcb8a 100644 --- a/openbabel.rb +++ b/openbabel.rb @@ -49,7 +49,7 @@ get '/openbabel/:property' do response['Content-Type'] = 'application/rdf+xml' algorithm.to_rdfxml else - halt 404, "Unknown OpenBabel descriptor #{params[:property]}." + raise OpenTox::NotFoundError.new "Unknown OpenBabel descriptor #{params[:property]}." end end @@ -89,7 +89,7 @@ post '/openbabel/:property' do descriptor = OpenBabel::OBDescriptor.find_type(params[:property]) descriptor.predict(obmol).to_s else - halt 404, "Cannot calculate property #{params[:property]} with OpenBabel" + raise OpenTox::NotFoundError.new "Cannot calculate property #{params[:property]} with OpenBabel" end end diff --git a/similarity.rb b/similarity.rb index 060bd2b..faf43f9 100644 --- a/similarity.rb +++ b/similarity.rb @@ -2,25 +2,25 @@ require File.join(File.dirname(__FILE__),'dataset.rb') helpers do def find -# + charges are dropped -uri = uri(params[:splat].first.gsub(/(InChI.*) (.*)/,'\1+\2')) # reinsert dropped '+' signs in InChIs -halt 404, "Dataset \"#{uri}\" not found." unless @set = Dataset.find(uri) + # + charges are dropped + uri = uri(params[:splat].first.gsub(/(InChI.*) (.*)/,'\1+\2')) # reinsert dropped '+' signs in InChIs + raise OpenTox::NotFoundError.new "Dataset \"#{uri}\" not found." unless @set = Dataset.find(uri) end def uri(name) -name = URI.encode(name) -uri = File.join Dataset.base_uri, name -end + name = URI.encode(name) + uri = File.join Dataset.base_uri, name + end end get '/tanimoto/dataset/*/dataset/*/?' do -find -@set.tanimoto(uri(params[:splat][1])) + find + @set.tanimoto(uri(params[:splat][1])) end get '/weighted_tanimoto/dataset/*/dataset/*/?' do -find -@set.weighted_tanimoto(uri(params[:splat][1])) + find + @set.weighted_tanimoto(uri(params[:splat][1])) end -- cgit v1.2.3 From 8d8880685ddaeeb968e1f1f0addf6cbf66eef67d Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 6 Jun 2011 16:54:56 +0000 Subject: halts (partially) substituted by OpenTox errors --- fminer.rb | 16 ++++++++-------- last-utils | 2 +- lazar.rb | 2 +- openbabel.rb | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fminer.rb b/fminer.rb index 00d11a0..ee031ff 100644 --- a/fminer.rb +++ b/fminer.rb @@ -94,11 +94,11 @@ end # @return [text/uri-list] Task URI post '/fminer/bbrc/?' do - halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? - halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? + raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? + raise OpenTox::NotFoundError.new "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid - halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) + raise OpenTox::NotFoundError.new "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) unless params[:min_frequency].nil? minfreq=params[:min_frequency].to_i @@ -268,7 +268,7 @@ post '/fminer/bbrc/?' do feature_dataset.uri end response['Content-Type'] = 'text/uri-list' - halt 503,task.uri+"\n" if task.status == "Cancelled" + raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" halt 202,task.uri.to_s+"\n" end #end @@ -284,12 +284,12 @@ end # @return [text/uri-list] Task URI post '/fminer/last/?' do - halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? - halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? + raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? + raise OpenTox::NotFoundError.new "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}", @subjectid training_dataset.load_all(@subjectid) - halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) + raise OpenTox::NotFoundError.new "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) unless params[:min_frequency].nil? minfreq=params[:min_frequency].to_i @@ -430,6 +430,6 @@ post '/fminer/last/?' do feature_dataset.uri end response['Content-Type'] = 'text/uri-list' - halt 503,task.uri+"\n" if task.status == "Cancelled" + raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" halt 202,task.uri.to_s+"\n" end diff --git a/last-utils b/last-utils index 75bea76..04bd1b7 160000 --- a/last-utils +++ b/last-utils @@ -1 +1 @@ -Subproject commit 75bea7645601fd296aa68c6678ee9b0a49a7b918 +Subproject commit 04bd1b73f54bb7422d3c08bb5a81bc02af04f6ff diff --git a/lazar.rb b/lazar.rb index 00f3216..4441726 100644 --- a/lazar.rb +++ b/lazar.rb @@ -191,7 +191,7 @@ post '/lazar/?' do model_uri end response['Content-Type'] = 'text/uri-list' - halt 503,task.uri+"\n" if task.status == "Cancelled" + raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" halt 202,task.uri end diff --git a/openbabel.rb b/openbabel.rb index fedcb8a..463663e 100644 --- a/openbabel.rb +++ b/openbabel.rb @@ -143,6 +143,6 @@ post '/openbabel' do result_dataset.uri end response['Content-Type'] = 'text/uri-list' - halt 503,task.uri+"\n" if task.status == "Cancelled" + raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" halt 202,task.uri.to_s+"\n" end -- cgit v1.2.3 From c1ee06638871ee7a88b07ebea05f4ecf3fab6392 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 21 Jun 2011 13:43:05 +0200 Subject: Multinomial capability for Fminer and Lazar --- fminer.rb | 106 ++++++++++++++++++++++++++++++-------------------------------- lazar.rb | 17 +++------- 2 files changed, 56 insertions(+), 67 deletions(-) diff --git a/fminer.rb b/fminer.rb index ee031ff..8c6451c 100644 --- a/fminer.rb +++ b/fminer.rb @@ -138,8 +138,8 @@ post '/fminer/bbrc/?' do id = 1 # fminer start id is not 0 compounds = [] - nr_active=0 - nr_inactive=0 + nr_classes={} + nr_total=0 all_activities = Hash.new# DV: for effect calculation in regression part training_dataset.data_entries.each do |compound,entry| @@ -160,7 +160,7 @@ post '/fminer/bbrc/?' do entry.each do |feature,values| values.each do |value| if prediction_feature.feature_type == "regression" - if (! value.nil?) && (value.to_f < 1) + if (! value.nil?) && (value.to_f <= 0) take_logs=false end end @@ -170,26 +170,13 @@ post '/fminer/bbrc/?' do if feature == prediction_feature.uri values.each do |value| if value.nil? - LOGGER.warn "No #{feature} activiity for #{compound.to_s}." + LOGGER.warn "No #{feature} activity for #{compound.to_s}." else if prediction_feature.feature_type == "classification" - case value.to_s - when "true" - nr_active += 1 - activity = 1 - when "false" - nr_inactive += 1 - activity = 0 - when /#{@training_classes.last}/ - nr_active += 1 - activity = 1 - when /#{@training_classes.first}/ - nr_inactive += 1 - activity = 0 - else - LOGGER.warn "Unknown class \"#{value.to_s}\"." - end - elsif prediction_feature.feature_type == "regression" + activity= value.to_f + nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1 + nr_total+=1 + elsif prediction_feature.feature_type == "regression" activity= take_logs ? Math.log10(value.to_f) : value.to_f end begin @@ -223,14 +210,22 @@ post '/fminer/bbrc/?' do p_value = f[1] if (!@@bbrc.GetRegression) - ids = f[2] + f[3] - if f[2].size.to_f/ids.size > nr_active.to_f/(nr_active+nr_inactive) - effect = 'activating' - else - effect = 'deactivating' - end + id_arrs = f[2..-1].flatten + max=nil + max_value=0 + f[2..-1].reverse.each_with_index { |id,i| # fminer outputs occurrences sorted reverse by activity. + actual = id.size.to_f/id_arrs.size + expected = nr_classes[i].to_f/nr_total + if actual > expected + if ((actual - expected) / actual) > max_value + max_value = (actual - expected) / actual # 'Schleppzeiger' + max = i + end + end + } + effect = max.to_s else #regression part - ids = f[2] + id_arrs = f[2] # DV: effect calculation f_arr=Array.new f[2].each do |id| @@ -261,7 +256,7 @@ post '/fminer/bbrc/?' do feature_dataset.add_feature feature_uri, metadata #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters end - ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} + id_arrs.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end end feature_dataset.save(@subjectid) @@ -328,8 +323,8 @@ post '/fminer/last/?' do id = 1 # fminer start id is not 0 compounds = [] smi = [] # AM LAST: needed for matching the patterns back - nr_active=0 - nr_inactive=0 + nr_classes = [] + nr_total=0 all_activities = Hash.new #DV: for effect calculation (class and regr) training_dataset.data_entries.each do |compound,entry| @@ -343,38 +338,38 @@ post '/fminer/last/?' do LOGGER.warn "Cannot find smiles for #{compound.to_s}." next end + + # AM: take log if appropriate + take_logs=true + entry.each do |feature,values| + values.each do |value| + if prediction_feature.feature_type == "regression" + if (! value.nil?) && (value.to_f <= 0) + take_logs=false + end + end + end + end + entry.each do |feature,values| if feature == prediction_feature.uri values.each do |value| if value.nil? - LOGGER.warn "No #{feature} activiity for #{compound.to_s}." + LOGGER.warn "No #{feature} activity for #{compound.to_s}." else if prediction_feature.feature_type == "classification" - case value.to_s - when "true" - nr_active += 1 - activity = 1 - when "false" - nr_inactive += 1 - activity = 0 - when /#{@training_classes.last}/ - nr_active += 1 - activity = 1 - when /#{@training_classes.first}/ - nr_inactive += 1 - activity = 0 - else - LOGGER.warn "Unknown class \"#{value.to_s}." - end + activity= value.to_f + nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1 + nr_total+=1 elsif prediction_feature.feature_type == "regression" - activity = value.to_f + activity= take_logs ? Math.log10(value.to_f) : value.to_f end begin @@last.AddCompound(smiles,id) @@last.AddActivity(activity, id) all_activities[id]=activity # DV: insert global information compounds[id] = compound - smi[id] = smiles # AM LAST: changed this to store SMILES. + smi[id] = smiles # AM LAST: changed this to store SMILES. id += 1 rescue LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" @@ -404,10 +399,13 @@ post '/fminer/last/?' do instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations instances.each do |smarts, ids| feat_hash = Hash[*(all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax - @@last.GetRegression() ? p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f : p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test - - - effect = (p_value > 0) ? "activating" : "deactivating" + if @@last.GetRegression() + p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test + effect = (p_value > 0) ? "activating" : "deactivating" + else + p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f + effect = "unknown" + end feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s unless features.include? smarts features << smarts diff --git a/lazar.rb b/lazar.rb index 4441726..31cdd4d 100644 --- a/lazar.rb +++ b/lazar.rb @@ -131,7 +131,9 @@ post '/lazar/?' do if prediction_feature.feature_type == "classification" @training_classes = training_activities.accept_values(prediction_feature.uri).sort - lazar.value_map = { true => @training_classes.last, false => @training_classes.first } + @training_classes.each_with_index { |c,i| + lazar.value_map[i] = c + } elsif prediction_feature.feature_type == "regression" lazar.prediction_algorithm = "Neighbors.local_svm_regression" end @@ -145,18 +147,7 @@ post '/lazar/?' do unless entry[prediction_feature.uri].empty? entry[prediction_feature.uri].each do |value| if prediction_feature.feature_type == "classification" - case value.to_s - when "true" - lazar.activities[compound] << true - when "false" - lazar.activities[compound] << false - when /#{@training_classes.last}/ - lazar.activities[compound] << true - when /#{@training_classes.first}/ - lazar.activities[compound] << false - else - LOGGER.warn "Unknown class \"#{value.to_s}\"." - end + lazar.activities[compound] << lazar.value_map.invert[value] # insert mapped values, not originals elsif prediction_feature.feature_type == "regression" #never use halt in tasks, do not raise exception when, print warning instead if value.to_f==0 -- cgit v1.2.3 From d9b997f0a0e3a45a72ff9e4a48035dacd75faacf Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 21 Jun 2011 14:44:41 +0200 Subject: Introduced switch 'balanced' --- lazar.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lazar.rb b/lazar.rb index 31cdd4d..eb14075 100644 --- a/lazar.rb +++ b/lazar.rb @@ -141,6 +141,7 @@ post '/lazar/?' do # AM: allow prediction_algorithm override by user for classification AND regression lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? lazar.prop_kernel = true if params[:local_svm_kernel] == "propositionalized" + lazar.balanced = true if params[:balanced] == "true" training_activities.data_entries.each do |compound,entry| lazar.activities[compound] = [] unless lazar.activities[compound] -- cgit v1.2.3 From cec978fb5cf0defcb79dd8e0da604c4f6c978674 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 21 Jun 2011 15:56:08 +0200 Subject: Enabling map for Fminer --- fminer.rb | 2 +- lazar.rb | 23 +++++++++++++---------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/fminer.rb b/fminer.rb index 8c6451c..2aee7bd 100644 --- a/fminer.rb +++ b/fminer.rb @@ -173,7 +173,7 @@ post '/fminer/bbrc/?' do LOGGER.warn "No #{feature} activity for #{compound.to_s}." else if prediction_feature.feature_type == "classification" - activity= value.to_f + activity= params[:value_map].invert[value].to_f nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1 nr_total+=1 elsif prediction_feature.feature_type == "regression" diff --git a/lazar.rb b/lazar.rb index eb14075..6f8f439 100644 --- a/lazar.rb +++ b/lazar.rb @@ -53,7 +53,7 @@ post '/lazar/?' do prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid) params[:prediction_feature] = prediction_feature.uri # pass to feature mining service end - + feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri] raise OpenTox::NotFoundError.new "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+ @@ -62,6 +62,18 @@ post '/lazar/?' do lazar = OpenTox::Model::Lazar.new lazar.min_sim = params[:min_sim] if params[:min_sim] + + if prediction_feature.feature_type == "classification" + @training_classes = training_activities.accept_values(prediction_feature.uri).sort + @training_classes.each_with_index { |c,i| + lazar.value_map[i] = c + params[:value_map] = lazar.value_map + } + elsif prediction_feature.feature_type == "regression" + lazar.prediction_algorithm = "Neighbors.local_svm_regression" + end + + if params[:feature_dataset_uri] feature_dataset_uri = params[:feature_dataset_uri] training_features = OpenTox::Dataset.new(feature_dataset_uri) @@ -129,15 +141,6 @@ post '/lazar/?' do end end - if prediction_feature.feature_type == "classification" - @training_classes = training_activities.accept_values(prediction_feature.uri).sort - @training_classes.each_with_index { |c,i| - lazar.value_map[i] = c - } - elsif prediction_feature.feature_type == "regression" - lazar.prediction_algorithm = "Neighbors.local_svm_regression" - end - # AM: allow prediction_algorithm override by user for classification AND regression lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? lazar.prop_kernel = true if params[:local_svm_kernel] == "propositionalized" -- cgit v1.2.3 From 1222ff3e76051cd4903bfe89a3b3d8c0a7222799 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 21 Jun 2011 16:12:18 +0200 Subject: Fixing map values to not start from 0 --- lazar.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lazar.rb b/lazar.rb index 6f8f439..d692422 100644 --- a/lazar.rb +++ b/lazar.rb @@ -66,7 +66,7 @@ post '/lazar/?' do if prediction_feature.feature_type == "classification" @training_classes = training_activities.accept_values(prediction_feature.uri).sort @training_classes.each_with_index { |c,i| - lazar.value_map[i] = c + lazar.value_map[i+1] = c # don't use '0': we must take the weighted mean later. params[:value_map] = lazar.value_map } elsif prediction_feature.feature_type == "regression" -- cgit v1.2.3 From 6e3a1649f8649026a627e20d72c238beb18de3c4 Mon Sep 17 00:00:00 2001 From: am Date: Fri, 24 Jun 2011 14:26:44 +0200 Subject: Restored log10 behaviour to pass tests --- fminer.rb | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/fminer.rb b/fminer.rb index 2aee7bd..2babbcf 100644 --- a/fminer.rb +++ b/fminer.rb @@ -160,7 +160,7 @@ post '/fminer/bbrc/?' do entry.each do |feature,values| values.each do |value| if prediction_feature.feature_type == "regression" - if (! value.nil?) && (value.to_f <= 0) + if (! value.nil?) && (value.to_f < 1) take_logs=false end end @@ -340,16 +340,16 @@ post '/fminer/last/?' do end # AM: take log if appropriate - take_logs=true - entry.each do |feature,values| - values.each do |value| - if prediction_feature.feature_type == "regression" - if (! value.nil?) && (value.to_f <= 0) - take_logs=false - end - end - end - end + #take_logs=true + #entry.each do |feature,values| + # values.each do |value| + # if prediction_feature.feature_type == "regression" + # if (! value.nil?) && (value.to_f <= 0) + # take_logs=false + # end + # end + # end + #end entry.each do |feature,values| if feature == prediction_feature.uri @@ -362,7 +362,8 @@ post '/fminer/last/?' do nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1 nr_total+=1 elsif prediction_feature.feature_type == "regression" - activity= take_logs ? Math.log10(value.to_f) : value.to_f + #activity= take_logs ? Math.log10(value.to_f) : value.to_f + activity = value.to_f end begin @@last.AddCompound(smiles,id) -- cgit v1.2.3 From cc24e7d94bf8759a39600cd5a748365f729102b8 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 24 Jun 2011 15:52:09 +0200 Subject: Fixed Fminer value maps --- fminer.rb | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fminer.rb b/fminer.rb index 2babbcf..7ccb5c4 100644 --- a/fminer.rb +++ b/fminer.rb @@ -117,6 +117,9 @@ post '/fminer/bbrc/?' do raise "no accept values for dataset '"+training_dataset.uri.to_s+"' and feature '"+prediction_feature.uri.to_s+ "'" unless training_dataset.accept_values(prediction_feature.uri) @training_classes = training_dataset.accept_values(prediction_feature.uri).sort + puts @training_classes.to_yaml + @value_map=Hash.new + @training_classes.each_with_index { |c,i| @value_map[i+1] = c } end @@bbrc.SetMinfreq(minfreq) @@bbrc.SetType(1) if params[:feature_type] == "paths" @@ -166,6 +169,8 @@ post '/fminer/bbrc/?' do end end end + + @value_map=params[:value_map] unless params[:value_map].nil? entry.each do |feature,values| if feature == prediction_feature.uri values.each do |value| @@ -173,7 +178,7 @@ post '/fminer/bbrc/?' do LOGGER.warn "No #{feature} activity for #{compound.to_s}." else if prediction_feature.feature_type == "classification" - activity= params[:value_map].invert[value].to_f + activity= @value_map.invert[value].to_f nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1 nr_total+=1 elsif prediction_feature.feature_type == "regression" @@ -223,7 +228,7 @@ post '/fminer/bbrc/?' do end end } - effect = max.to_s + effect = @value_map[f[2..-1].size-max].to_s else #regression part id_arrs = f[2] # DV: effect calculation @@ -301,6 +306,8 @@ post '/fminer/last/?' do @@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! else @training_classes = training_dataset.accept_values(prediction_feature.uri) + @value_map=Hash.new + @training_classes.each_with_index { |c,i| @value_map[i+1] = c } end @@last.SetMinfreq(minfreq) @@last.SetType(1) if params[:feature_type] == "paths" @@ -351,6 +358,7 @@ post '/fminer/last/?' do # end #end + @value_map=params[:value_map] unless params[:value_map].nil? entry.each do |feature,values| if feature == prediction_feature.uri values.each do |value| @@ -358,7 +366,7 @@ post '/fminer/last/?' do LOGGER.warn "No #{feature} activity for #{compound.to_s}." else if prediction_feature.feature_type == "classification" - activity= value.to_f + activity= @value_map.invert[value].to_f nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1 nr_total+=1 elsif prediction_feature.feature_type == "regression" -- cgit v1.2.3 From 336ca26acc9045afecc1d2ec0879f5dc0669de1c Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 27 Jun 2011 09:38:48 +0200 Subject: Fixed effect --- fminer.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fminer.rb b/fminer.rb index 7ccb5c4..6da3013 100644 --- a/fminer.rb +++ b/fminer.rb @@ -117,7 +117,6 @@ post '/fminer/bbrc/?' do raise "no accept values for dataset '"+training_dataset.uri.to_s+"' and feature '"+prediction_feature.uri.to_s+ "'" unless training_dataset.accept_values(prediction_feature.uri) @training_classes = training_dataset.accept_values(prediction_feature.uri).sort - puts @training_classes.to_yaml @value_map=Hash.new @training_classes.each_with_index { |c,i| @value_map[i+1] = c } end @@ -228,7 +227,7 @@ post '/fminer/bbrc/?' do end end } - effect = @value_map[f[2..-1].size-max].to_s + effect = @value_map[(f[2..-1].size-max).to_s].to_s else #regression part id_arrs = f[2] # DV: effect calculation -- cgit v1.2.3 From 5c3bc078eae55ed3e55fac5328e296348c10b646 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 27 Jun 2011 11:52:28 +0200 Subject: Using Hit counts --- fminer.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fminer.rb b/fminer.rb index 6da3013..0d0ddfa 100644 --- a/fminer.rb +++ b/fminer.rb @@ -2,6 +2,7 @@ ENV['FMINER_SMARTS'] = 'true' ENV['FMINER_NO_AROMATIC'] = 'true' ENV['FMINER_PVALUES'] = 'true' ENV['FMINER_SILENT'] = 'true' +ENV['FMINER_NR_HITS'] = 'true' @@bbrc = Bbrc::Bbrc.new @@last = Last::Last.new @@ -260,7 +261,11 @@ post '/fminer/bbrc/?' do feature_dataset.add_feature feature_uri, metadata #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters end - id_arrs.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} + id_arrs.each { |id_count_hash| + id=id_count_hash.keys[0].to_i + count=id_count_hash.values[0].to_i + feature_dataset.add(compounds[id], feature_uri, true) + } end end feature_dataset.save(@subjectid) @@ -405,6 +410,7 @@ post '/fminer/last/?' do dom=lu.read(xml) # AM LAST: parse GraphML smarts=lu.smarts_rb(dom,'nls') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations + instances.each do |smarts, ids| feat_hash = Hash[*(all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax if @@last.GetRegression() -- cgit v1.2.3 From 18d44225d0f5258348db3278a87628f74bd36744 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 27 Jun 2011 14:16:41 +0200 Subject: Using Hit counts (LAST-PM) --- fminer.rb | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fminer.rb b/fminer.rb index 0d0ddfa..907bbfd 100644 --- a/fminer.rb +++ b/fminer.rb @@ -264,7 +264,11 @@ post '/fminer/bbrc/?' do id_arrs.each { |id_count_hash| id=id_count_hash.keys[0].to_i count=id_count_hash.values[0].to_i - feature_dataset.add(compounds[id], feature_uri, true) + if params[:nr_hits] + feature_dataset.add(compounds[id], feature_uri, count) + else + feature_dataset.add(compounds[id], feature_uri, true) + end } end end @@ -409,9 +413,10 @@ post '/fminer/last/?' do lu = LU.new # AM LAST: uses last-utils here dom=lu.read(xml) # AM LAST: parse GraphML smarts=lu.smarts_rb(dom,'nls') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) - instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations + params[:nr_hits].nil? ? nr_hits=true : nr_hits=false + matches,counts=lu.match_rb(smi,smarts,nr_hits) # AM LAST: creates instantiations - instances.each do |smarts, ids| + matches.each do |smarts, ids| feat_hash = Hash[*(all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax if @@last.GetRegression() p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test @@ -436,7 +441,11 @@ post '/fminer/last/?' do } feature_dataset.add_feature feature_uri, metadata end - ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} + if params[:nr_hits] + ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} + else + ids.each_with_index { |id,i| feature_dataset.add(compounds[id], feature_uri, counts[id][i])} + end end feature_dataset.save(@subjectid) feature_dataset.uri -- cgit v1.2.3 From 68e670a006ec995212b21a22bc7936e43f16f205 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 27 Jun 2011 15:07:12 +0200 Subject: Minor fixed --- fminer.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fminer.rb b/fminer.rb index 907bbfd..37a8278 100644 --- a/fminer.rb +++ b/fminer.rb @@ -413,8 +413,8 @@ post '/fminer/last/?' do lu = LU.new # AM LAST: uses last-utils here dom=lu.read(xml) # AM LAST: parse GraphML smarts=lu.smarts_rb(dom,'nls') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) - params[:nr_hits].nil? ? nr_hits=true : nr_hits=false - matches,counts=lu.match_rb(smi,smarts,nr_hits) # AM LAST: creates instantiations + params[:nr_hits].nil? ? hit_count=false: hit_count=true + matches, counts = lu.match_rb(smi,smarts,hit_count) # AM LAST: creates instantiations matches.each do |smarts, ids| feat_hash = Hash[*(all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax @@ -441,10 +441,10 @@ post '/fminer/last/?' do } feature_dataset.add_feature feature_uri, metadata end - if params[:nr_hits] + if !hit_count ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} else - ids.each_with_index { |id,i| feature_dataset.add(compounds[id], feature_uri, counts[id][i])} + ids.each_with_index { |id,i| feature_dataset.add(compounds[id], feature_uri, counts[smarts][i])} end end feature_dataset.save(@subjectid) -- cgit v1.2.3 From 2ff30e6703343110baf4eaba4f1c25ae522b8ece Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 27 Jun 2011 15:34:03 +0200 Subject: Fixed effect calculation for regression --- fminer.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/fminer.rb b/fminer.rb index 37a8278..1881f39 100644 --- a/fminer.rb +++ b/fminer.rb @@ -234,6 +234,7 @@ post '/fminer/bbrc/?' do # DV: effect calculation f_arr=Array.new f[2].each do |id| + id=id.keys[0] # extract id from hit count hash f_arr.push(all_activities[id]) end f_median=OpenTox::Algorithm.median(f_arr) -- cgit v1.2.3 From d5a2ad2725c5739b125124cd93f46d0a488d2d04 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 28 Jun 2011 08:29:43 +0200 Subject: Min Frequency: own routine --- fminer.rb | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fminer.rb b/fminer.rb index 1881f39..2f75052 100644 --- a/fminer.rb +++ b/fminer.rb @@ -105,8 +105,7 @@ post '/fminer/bbrc/?' do minfreq=params[:min_frequency].to_i raise "Minimum frequency must be a number >0!" unless minfreq>0 else - minfreq = 5*training_dataset.compounds.size/1000 # AM sugg. 8-10 per mil - minfreq = 2 unless minfreq > 2 + minfreq=OpenTox::Algorithm.min_frequency(5) # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST end task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do @@ -296,16 +295,14 @@ post '/fminer/last/?' do raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? raise OpenTox::NotFoundError.new "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid - training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}", @subjectid - training_dataset.load_all(@subjectid) + training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid raise OpenTox::NotFoundError.new "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) unless params[:min_frequency].nil? minfreq=params[:min_frequency].to_i raise "Minimum frequency must be a number >0!" unless minfreq>0 else - minfreq = 8*training_dataset.compounds.size/100 # AM sugg. 5-10% - minfreq = 2 unless minfreq > 2 + minfreq=OpenTox::Algorithm.min_frequency(80) # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST end task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do @@ -314,7 +311,9 @@ post '/fminer/last/?' do if prediction_feature.feature_type == "regression" @@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! else - @training_classes = training_dataset.accept_values(prediction_feature.uri) + raise "no accept values for dataset '"+training_dataset.uri.to_s+"' and feature '"+prediction_feature.uri.to_s+ + "'" unless training_dataset.accept_values(prediction_feature.uri) + @training_classes = training_dataset.accept_values(prediction_feature.uri).sort @value_map=Hash.new @training_classes.each_with_index { |c,i| @value_map[i+1] = c } end -- cgit v1.2.3 From dad603ff86924f554c4f8d668ea3cbe13fe93d9c Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 28 Jun 2011 08:35:56 +0200 Subject: Min Frequency: own routine --- fminer.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fminer.rb b/fminer.rb index 2f75052..731edd1 100644 --- a/fminer.rb +++ b/fminer.rb @@ -105,7 +105,7 @@ post '/fminer/bbrc/?' do minfreq=params[:min_frequency].to_i raise "Minimum frequency must be a number >0!" unless minfreq>0 else - minfreq=OpenTox::Algorithm.min_frequency(5) # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST + minfreq=OpenTox::Algorithm.min_frequency(training_dataset,5) # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST end task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do @@ -302,7 +302,7 @@ post '/fminer/last/?' do minfreq=params[:min_frequency].to_i raise "Minimum frequency must be a number >0!" unless minfreq>0 else - minfreq=OpenTox::Algorithm.min_frequency(80) # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST + minfreq=OpenTox::Algorithm.min_frequency(training_dataset,80) # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST end task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do -- cgit v1.2.3 From cdd23a853f29a189bc4de49f1913a1a25cfe1b8b Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 28 Jun 2011 10:16:27 +0200 Subject: Finished BBRC effect --- fminer.rb | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/fminer.rb b/fminer.rb index 731edd1..6ce4c42 100644 --- a/fminer.rb +++ b/fminer.rb @@ -140,8 +140,9 @@ post '/fminer/bbrc/?' do id = 1 # fminer start id is not 0 compounds = [] - nr_classes={} - nr_total=0 + db_class_sizes=Array.new# AM effect calc + #nr_classes={} + #nr_total=0 all_activities = Hash.new# DV: for effect calculation in regression part training_dataset.data_entries.each do |compound,entry| @@ -177,9 +178,10 @@ post '/fminer/bbrc/?' do LOGGER.warn "No #{feature} activity for #{compound.to_s}." else if prediction_feature.feature_type == "classification" - activity= @value_map.invert[value].to_f - nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1 - nr_total+=1 + activity= @value_map.invert[value].to_i - 1 # activities are mapped to 1..n + db_class_sizes[activity].nil? ? db_class_sizes[activity]=1 : db_class_sizes[activity]+=1 # AM effect calc + #nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1 + #nr_total+=1 elsif prediction_feature.feature_type == "regression" activity= take_logs ? Math.log10(value.to_f) : value.to_f end @@ -215,19 +217,8 @@ post '/fminer/bbrc/?' do if (!@@bbrc.GetRegression) id_arrs = f[2..-1].flatten - max=nil - max_value=0 - f[2..-1].reverse.each_with_index { |id,i| # fminer outputs occurrences sorted reverse by activity. - actual = id.size.to_f/id_arrs.size - expected = nr_classes[i].to_f/nr_total - if actual > expected - if ((actual - expected) / actual) > max_value - max_value = (actual - expected) / actual # 'Schleppzeiger' - max = i - end - end - } - effect = @value_map[(f[2..-1].size-max).to_s].to_s + max = OpenTox::Algorithm.effect(f[2..-1].reverse, db_class_sizes) + effect = @value_map[(f[2..-1].size-max)].to_s else #regression part id_arrs = f[2] # DV: effect calculation @@ -432,7 +423,7 @@ post '/fminer/last/?' do RDF.type => [OT.Feature, OT.Substructure], OT.hasSource => feature_dataset.uri, OT.smarts => smarts, - OT.pValue => p_value.to_f.abs, + OT.pValue => p_value.abs, OT.effect => effect, OT.parameters => [ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, -- cgit v1.2.3 From 94d88944b7d2953960e03c86eb78e697822fbc9f Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 28 Jun 2011 11:37:29 +0200 Subject: Effect calculation unified for LAST and BBRC --- fminer.rb | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/fminer.rb b/fminer.rb index 6ce4c42..fe7f256 100644 --- a/fminer.rb +++ b/fminer.rb @@ -140,10 +140,8 @@ post '/fminer/bbrc/?' do id = 1 # fminer start id is not 0 compounds = [] - db_class_sizes=Array.new# AM effect calc - #nr_classes={} - #nr_total=0 - all_activities = Hash.new# DV: for effect calculation in regression part + db_class_sizes = Array.new # AM: effect + all_activities = Hash.new # DV: for effect calculation in regression part training_dataset.data_entries.each do |compound,entry| begin @@ -179,9 +177,7 @@ post '/fminer/bbrc/?' do else if prediction_feature.feature_type == "classification" activity= @value_map.invert[value].to_i - 1 # activities are mapped to 1..n - db_class_sizes[activity].nil? ? db_class_sizes[activity]=1 : db_class_sizes[activity]+=1 # AM effect calc - #nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1 - #nr_total+=1 + db_class_sizes[activity].nil? ? db_class_sizes[activity]=1 : db_class_sizes[activity]+=1 # AM effect elsif prediction_feature.feature_type == "regression" activity= take_logs ? Math.log10(value.to_f) : value.to_f end @@ -329,9 +325,11 @@ post '/fminer/last/?' do id = 1 # fminer start id is not 0 compounds = [] smi = [] # AM LAST: needed for matching the patterns back - nr_classes = [] - nr_total=0 - all_activities = Hash.new #DV: for effect calculation (class and regr) + + #nr_classes = [] + #nr_total=0 + db_class_sizes = Array.new # AM: effect + all_activities = Hash.new # DV: for effect calculation (class and regr) training_dataset.data_entries.each do |compound,entry| begin @@ -366,8 +364,9 @@ post '/fminer/last/?' do else if prediction_feature.feature_type == "classification" activity= @value_map.invert[value].to_f - nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1 - nr_total+=1 + db_class_sizes[activity.to_i-1].nil? ? db_class_sizes[activity.to_i-1]=1 : db_class_sizes[activity.to_i-1]+=1 + #nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1 + #nr_total+=1 elsif prediction_feature.feature_type == "regression" #activity= take_logs ? Math.log10(value.to_f) : value.to_f activity = value.to_f @@ -414,7 +413,11 @@ post '/fminer/last/?' do effect = (p_value > 0) ? "activating" : "deactivating" else p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f - effect = "unknown" + g=Array.new + @value_map.each { |y,act| g[y-1]=Array.new } + feat_hash.each { |x,y| g[y-1].push(x) } + max = OpenTox::Algorithm.effect(g, db_class_sizes) + effect = @value_map[(g.size-max)].to_s end feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s unless features.include? smarts -- cgit v1.2.3 From a1a285b9cabbfd05057dfa456f41e903f4337b59 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 28 Jun 2011 13:14:36 +0200 Subject: Fixed comments --- fminer.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fminer.rb b/fminer.rb index fe7f256..28602b2 100644 --- a/fminer.rb +++ b/fminer.rb @@ -88,10 +88,11 @@ end # @param [String] dataset_uri URI of the training dataset # @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) # @param [optional] parameters BBRC parameters, accepted parameters are -# - minfreq Minimum frequency (default 5) +# - min_frequency Minimum frequency (default 5) # - feature_type Feature type, can be 'paths' or 'trees' (default "trees") # - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true") # - min_chisq_significance Significance threshold (between 0 and 1) +# - nr_hits Set to "true" to get hit count instead of presence # @return [text/uri-list] Task URI post '/fminer/bbrc/?' do @@ -273,9 +274,10 @@ end # @param [String] dataset_uri URI of the training dataset # @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) # @param [optional] parameters LAST parameters, accepted parameters are -# - minfreq Minimum frequency (default 5) +# - min_frequency freq Minimum frequency (default 5) # - feature_type Feature type, can be 'paths' or 'trees' (default "trees") # - hops Maximum number of hops +# - nr_hits Set to "true" to get hit count instead of presence # @return [text/uri-list] Task URI post '/fminer/last/?' do -- cgit v1.2.3 From 293b82545a76d8e50c534eac2dae7df5bc49082b Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 29 Jun 2011 08:15:53 +0200 Subject: Fixed activity. --- fminer.rb | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fminer.rb b/fminer.rb index 28602b2..84f6240 100644 --- a/fminer.rb +++ b/fminer.rb @@ -177,8 +177,8 @@ post '/fminer/bbrc/?' do LOGGER.warn "No #{feature} activity for #{compound.to_s}." else if prediction_feature.feature_type == "classification" - activity= @value_map.invert[value].to_i - 1 # activities are mapped to 1..n - db_class_sizes[activity].nil? ? db_class_sizes[activity]=1 : db_class_sizes[activity]+=1 # AM effect + activity= @value_map.invert[value].to_i # activities are mapped to 1..n + db_class_sizes[activity-1].nil? ? db_class_sizes[activity-1]=1 : db_class_sizes[activity-1]+=1 # AM effect elsif prediction_feature.feature_type == "regression" activity= take_logs ? Math.log10(value.to_f) : value.to_f end @@ -367,11 +367,9 @@ post '/fminer/last/?' do if prediction_feature.feature_type == "classification" activity= @value_map.invert[value].to_f db_class_sizes[activity.to_i-1].nil? ? db_class_sizes[activity.to_i-1]=1 : db_class_sizes[activity.to_i-1]+=1 - #nr_classes[activity].nil? ? nr_classes[activity]=0 : nr_classes[activity]+=1 - #nr_total+=1 elsif prediction_feature.feature_type == "regression" #activity= take_logs ? Math.log10(value.to_f) : value.to_f - activity = value.to_f + activity = value.to_f end begin @@last.AddCompound(smiles,id) -- cgit v1.2.3 From b6a74aa1c26655f9e80c0c6b12282336541f839c Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 29 Jun 2011 09:11:55 +0200 Subject: Started Fminer refactoring --- fminer.rb | 87 +++++++++++++++++++++++---------------------------------------- 1 file changed, 32 insertions(+), 55 deletions(-) diff --git a/fminer.rb b/fminer.rb index 84f6240..7b49dc6 100644 --- a/fminer.rb +++ b/fminer.rb @@ -96,32 +96,21 @@ end # @return [text/uri-list] Task URI post '/fminer/bbrc/?' do - raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? - raise OpenTox::NotFoundError.new "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? - prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid - training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid - raise OpenTox::NotFoundError.new "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) - - unless params[:min_frequency].nil? - minfreq=params[:min_frequency].to_i - raise "Minimum frequency must be a number >0!" unless minfreq>0 - else - minfreq=OpenTox::Algorithm.min_frequency(training_dataset,5) # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST - end + fminer=OpenTox::Algorithm::Fminer.new + fminer.check_params(params,5) task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do - @@bbrc.Reset - if prediction_feature.feature_type == "regression" + if fminer.prediction_feature.feature_type == "regression" @@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! else - raise "no accept values for dataset '"+training_dataset.uri.to_s+"' and feature '"+prediction_feature.uri.to_s+ - "'" unless training_dataset.accept_values(prediction_feature.uri) - @training_classes = training_dataset.accept_values(prediction_feature.uri).sort + raise "no accept values for dataset '"+fminer.training_dataset.uri.to_s+"' and feature '"+fminer.prediction_feature.uri.to_s+ + "'" unless fminer.training_dataset.accept_values(fminer.prediction_feature.uri) + @training_classes = fminer.training_dataset.accept_values(fminer.prediction_feature.uri).sort @value_map=Hash.new @training_classes.each_with_index { |c,i| @value_map[i+1] = c } end - @@bbrc.SetMinfreq(minfreq) + @@bbrc.SetMinfreq(fminer.minfreq) @@bbrc.SetType(1) if params[:feature_type] == "paths" @@bbrc.SetBackbone(eval params[:backbone]) if params[:backbone] and ( params[:backbone] == "true" or params[:backbone] == "false" ) # convert string to boolean @@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance] @@ -129,7 +118,7 @@ post '/fminer/bbrc/?' do feature_dataset = OpenTox::Dataset.new(nil, @subjectid) feature_dataset.add_metadata({ - DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title].to_s, + DC.title => "BBRC representatives for " + fminer.training_dataset.metadata[DC.title].to_s, DC.creator => url_for('/fminer/bbrc',:full), OT.hasSource => url_for('/fminer/bbrc', :full), OT.parameters => [ @@ -144,9 +133,9 @@ post '/fminer/bbrc/?' do db_class_sizes = Array.new # AM: effect all_activities = Hash.new # DV: for effect calculation in regression part - training_dataset.data_entries.each do |compound,entry| + + fminer.training_dataset.data_entries.each do |compound,entry| begin - # fix: ambit does not support inchi, directly request smiles smiles = OpenTox::Compound.smiles(compound.to_s) rescue LOGGER.warn "No resource for #{compound.to_s}" @@ -161,7 +150,7 @@ post '/fminer/bbrc/?' do take_logs=true entry.each do |feature,values| values.each do |value| - if prediction_feature.feature_type == "regression" + if fminer.prediction_feature.feature_type == "regression" if (! value.nil?) && (value.to_f < 1) take_logs=false end @@ -171,15 +160,16 @@ post '/fminer/bbrc/?' do @value_map=params[:value_map] unless params[:value_map].nil? entry.each do |feature,values| - if feature == prediction_feature.uri + if feature == fminer.prediction_feature.uri values.each do |value| if value.nil? LOGGER.warn "No #{feature} activity for #{compound.to_s}." else - if prediction_feature.feature_type == "classification" + if fminer.prediction_feature.feature_type == "classification" activity= @value_map.invert[value].to_i # activities are mapped to 1..n db_class_sizes[activity-1].nil? ? db_class_sizes[activity-1]=1 : db_class_sizes[activity-1]+=1 # AM effect - elsif prediction_feature.feature_type == "regression" + elsif fminer.prediction_feature.feature_type == "regression" + activity= take_logs ? Math.log10(value.to_f) : value.to_f end begin @@ -187,6 +177,7 @@ post '/fminer/bbrc/?' do @@bbrc.AddActivity(activity, id) all_activities[id]=activity # DV: insert global information compounds[id] = compound + id += 1 rescue LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" @@ -200,7 +191,7 @@ post '/fminer/bbrc/?' do g_array=all_activities.values # DV: calculation of global median for effect calculation g_median=OpenTox::Algorithm.median(g_array) - raise "No compounds in dataset #{training_dataset.uri}" if compounds.size==0 + raise "No compounds in dataset #{fminer.training_dataset.uri}" if compounds.size==0 features = Set.new # run @@bbrc @@ -281,32 +272,21 @@ end # @return [text/uri-list] Task URI post '/fminer/last/?' do - raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? - raise OpenTox::NotFoundError.new "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? - prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid - training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid - raise OpenTox::NotFoundError.new "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) - - unless params[:min_frequency].nil? - minfreq=params[:min_frequency].to_i - raise "Minimum frequency must be a number >0!" unless minfreq>0 - else - minfreq=OpenTox::Algorithm.min_frequency(training_dataset,80) # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST - end + fminer=OpenTox::Algorithm::Fminer.new + fminer.check_params(params,80) task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do - @@last.Reset - if prediction_feature.feature_type == "regression" + if fminer.prediction_feature.feature_type == "regression" @@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! else - raise "no accept values for dataset '"+training_dataset.uri.to_s+"' and feature '"+prediction_feature.uri.to_s+ - "'" unless training_dataset.accept_values(prediction_feature.uri) - @training_classes = training_dataset.accept_values(prediction_feature.uri).sort + raise "no accept values for dataset '"+fminer.training_dataset.uri.to_s+"' and feature '"+fminer.prediction_feature.uri.to_s+ + "'" unless fminer.training_dataset.accept_values(fminer.prediction_feature.uri) + @training_classes = fminer.training_dataset.accept_values(fminer.prediction_feature.uri).sort @value_map=Hash.new @training_classes.each_with_index { |c,i| @value_map[i+1] = c } end - @@last.SetMinfreq(minfreq) + @@last.SetMinfreq(fminer.minfreq) @@last.SetType(1) if params[:feature_type] == "paths" @@last.SetMaxHops(params[:hops]) if params[:hops] @@last.SetConsoleOut(false) @@ -314,7 +294,7 @@ post '/fminer/last/?' do feature_dataset = OpenTox::Dataset.new(nil, @subjectid) feature_dataset.add_metadata({ - DC.title => "LAST representatives for " + training_dataset.metadata[DC.title].to_s, + DC.title => "LAST representatives for " + fminer.training_dataset.metadata[DC.title].to_s, DC.creator => url_for('/fminer/last',:full), OT.hasSource => url_for('/fminer/last', :full), OT.parameters => [ @@ -326,14 +306,11 @@ post '/fminer/last/?' do id = 1 # fminer start id is not 0 compounds = [] - smi = [] # AM LAST: needed for matching the patterns back - - #nr_classes = [] - #nr_total=0 db_class_sizes = Array.new # AM: effect all_activities = Hash.new # DV: for effect calculation (class and regr) + smi = [] # AM LAST: needed for matching the patterns back - training_dataset.data_entries.each do |compound,entry| + fminer.training_dataset.data_entries.each do |compound,entry| begin smiles = OpenTox::Compound.smiles(compound.to_s) rescue @@ -349,7 +326,7 @@ post '/fminer/last/?' do #take_logs=true #entry.each do |feature,values| # values.each do |value| - # if prediction_feature.feature_type == "regression" + # if fminer.prediction_feature.feature_type == "regression" # if (! value.nil?) && (value.to_f <= 0) # take_logs=false # end @@ -359,15 +336,15 @@ post '/fminer/last/?' do @value_map=params[:value_map] unless params[:value_map].nil? entry.each do |feature,values| - if feature == prediction_feature.uri + if feature == fminer.prediction_feature.uri values.each do |value| if value.nil? LOGGER.warn "No #{feature} activity for #{compound.to_s}." else - if prediction_feature.feature_type == "classification" + if fminer.prediction_feature.feature_type == "classification" activity= @value_map.invert[value].to_f db_class_sizes[activity.to_i-1].nil? ? db_class_sizes[activity.to_i-1]=1 : db_class_sizes[activity.to_i-1]+=1 - elsif prediction_feature.feature_type == "regression" + elsif fminer.prediction_feature.feature_type == "regression" #activity= take_logs ? Math.log10(value.to_f) : value.to_f activity = value.to_f end @@ -387,7 +364,7 @@ post '/fminer/last/?' do end end - raise "No compounds in dataset #{training_dataset.uri}" if compounds.size==0 + raise "No compounds in dataset #{fminer.training_dataset.uri}" if compounds.size==0 # run @@last features = Set.new -- cgit v1.2.3 From b9bf7885711055bc83479c7558854402ddb4b21f Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 29 Jun 2011 10:03:38 +0200 Subject: Unified adding of data --- fminer.rb | 91 +++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 45 insertions(+), 46 deletions(-) diff --git a/fminer.rb b/fminer.rb index 7b49dc6..51f0964 100644 --- a/fminer.rb +++ b/fminer.rb @@ -129,9 +129,10 @@ post '/fminer/bbrc/?' do feature_dataset.save(@subjectid) id = 1 # fminer start id is not 0 - compounds = [] - db_class_sizes = Array.new # AM: effect - all_activities = Hash.new # DV: for effect calculation in regression part + fminer.compounds = [] + fminer.db_class_sizes = Array.new # AM: effect + fminer.all_activities = Hash.new # DV: for effect calculation in regression part + fminer.smi = [] # AM LAST: needed for matching the patterns back fminer.training_dataset.data_entries.each do |compound,entry| @@ -142,7 +143,7 @@ post '/fminer/bbrc/?' do next end if smiles == '' or smiles.nil? - LOGGER.warn "Cannot find smiles for #{compound.to_s}" + LOGGER.warn "Cannot find smiles for #{compound.to_s}." next end @@ -151,7 +152,7 @@ post '/fminer/bbrc/?' do entry.each do |feature,values| values.each do |value| if fminer.prediction_feature.feature_type == "regression" - if (! value.nil?) && (value.to_f < 1) + if (! value.nil?) && (value.to_f <= 0) take_logs=false end end @@ -167,17 +168,16 @@ post '/fminer/bbrc/?' do else if fminer.prediction_feature.feature_type == "classification" activity= @value_map.invert[value].to_i # activities are mapped to 1..n - db_class_sizes[activity-1].nil? ? db_class_sizes[activity-1]=1 : db_class_sizes[activity-1]+=1 # AM effect - elsif fminer.prediction_feature.feature_type == "regression" - + fminer.db_class_sizes[activity-1].nil? ? fminer.db_class_sizes[activity-1]=1 : fminer.db_class_sizes[activity-1]+=1 # AM effect + elsif fminer.prediction_feature.feature_type == "regression" activity= take_logs ? Math.log10(value.to_f) : value.to_f end begin @@bbrc.AddCompound(smiles,id) @@bbrc.AddActivity(activity, id) - all_activities[id]=activity # DV: insert global information - compounds[id] = compound - + fminer.all_activities[id]=activity # DV: insert global information + fminer.compounds[id] = compound + fminer.smi[id] = smiles id += 1 rescue LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" @@ -188,10 +188,10 @@ post '/fminer/bbrc/?' do end end - g_array=all_activities.values # DV: calculation of global median for effect calculation + g_array=fminer.all_activities.values # DV: calculation of global median for effect calculation g_median=OpenTox::Algorithm.median(g_array) - raise "No compounds in dataset #{fminer.training_dataset.uri}" if compounds.size==0 + raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0 features = Set.new # run @@bbrc @@ -205,7 +205,7 @@ post '/fminer/bbrc/?' do if (!@@bbrc.GetRegression) id_arrs = f[2..-1].flatten - max = OpenTox::Algorithm.effect(f[2..-1].reverse, db_class_sizes) + max = OpenTox::Algorithm.effect(f[2..-1].reverse, fminer.db_class_sizes) effect = @value_map[(f[2..-1].size-max)].to_s else #regression part id_arrs = f[2] @@ -213,7 +213,7 @@ post '/fminer/bbrc/?' do f_arr=Array.new f[2].each do |id| id=id.keys[0] # extract id from hit count hash - f_arr.push(all_activities[id]) + f_arr.push(fminer.all_activities[id]) end f_median=OpenTox::Algorithm.median(f_arr) if g_median >= f_median @@ -244,9 +244,9 @@ post '/fminer/bbrc/?' do id=id_count_hash.keys[0].to_i count=id_count_hash.values[0].to_i if params[:nr_hits] - feature_dataset.add(compounds[id], feature_uri, count) + feature_dataset.add(fminer.compounds[id], feature_uri, count) else - feature_dataset.add(compounds[id], feature_uri, true) + feature_dataset.add(fminer.compounds[id], feature_uri, true) end } end @@ -305,10 +305,10 @@ post '/fminer/last/?' do feature_dataset.save(@subjectid) id = 1 # fminer start id is not 0 - compounds = [] - db_class_sizes = Array.new # AM: effect - all_activities = Hash.new # DV: for effect calculation (class and regr) - smi = [] # AM LAST: needed for matching the patterns back + fminer.compounds = [] + fminer.db_class_sizes = Array.new # AM: effect + fminer.all_activities = Hash.new # DV: for effect calculation (class and regr) + fminer.smi = [] # AM LAST: needed for matching the patterns back fminer.training_dataset.data_entries.each do |compound,entry| begin @@ -323,16 +323,16 @@ post '/fminer/last/?' do end # AM: take log if appropriate - #take_logs=true - #entry.each do |feature,values| - # values.each do |value| - # if fminer.prediction_feature.feature_type == "regression" - # if (! value.nil?) && (value.to_f <= 0) - # take_logs=false - # end - # end - # end - #end + take_logs=true + entry.each do |feature,values| + values.each do |value| + if fminer.prediction_feature.feature_type == "regression" + if (! value.nil?) && (value.to_f <= 0) + take_logs=false + end + end + end + end @value_map=params[:value_map] unless params[:value_map].nil? entry.each do |feature,values| @@ -342,18 +342,17 @@ post '/fminer/last/?' do LOGGER.warn "No #{feature} activity for #{compound.to_s}." else if fminer.prediction_feature.feature_type == "classification" - activity= @value_map.invert[value].to_f - db_class_sizes[activity.to_i-1].nil? ? db_class_sizes[activity.to_i-1]=1 : db_class_sizes[activity.to_i-1]+=1 + activity= @value_map.invert[value].to_i + fminer.db_class_sizes[activity-1].nil? ? fminer.db_class_sizes[activity-1]=1 : fminer.db_class_sizes[activity-1]+=1 elsif fminer.prediction_feature.feature_type == "regression" - #activity= take_logs ? Math.log10(value.to_f) : value.to_f - activity = value.to_f + activity= take_logs ? Math.log10(value.to_f) : value.to_f end begin @@last.AddCompound(smiles,id) @@last.AddActivity(activity, id) - all_activities[id]=activity # DV: insert global information - compounds[id] = compound - smi[id] = smiles # AM LAST: changed this to store SMILES. + fminer.all_activities[id]=activity # DV: insert global information + fminer.compounds[id] = compound + fminer.smi[id] = smiles # AM LAST: changed this to store SMILES. id += 1 rescue LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" @@ -364,7 +363,7 @@ post '/fminer/last/?' do end end - raise "No compounds in dataset #{fminer.training_dataset.uri}" if compounds.size==0 + raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0 # run @@last features = Set.new @@ -381,19 +380,19 @@ post '/fminer/last/?' do dom=lu.read(xml) # AM LAST: parse GraphML smarts=lu.smarts_rb(dom,'nls') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) params[:nr_hits].nil? ? hit_count=false: hit_count=true - matches, counts = lu.match_rb(smi,smarts,hit_count) # AM LAST: creates instantiations + matches, counts = lu.match_rb(fminer.smi,smarts,hit_count) # AM LAST: creates instantiations matches.each do |smarts, ids| - feat_hash = Hash[*(all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax + feat_hash = Hash[*(fminer.all_activities.select { |k,v| ids.include?(k) }.flatten)] # AM LAST: get activities of feature occurrences; see http://www.softiesonrails.com/2007/9/18/ruby-201-weird-hash-syntax if @@last.GetRegression() - p_value = @@last.KSTest(all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test + p_value = @@last.KSTest(fminer.all_activities.values, feat_hash.values).to_f # AM LAST: use internal function for test effect = (p_value > 0) ? "activating" : "deactivating" else - p_value = @@last.ChisqTest(all_activities.values, feat_hash.values).to_f + p_value = @@last.ChisqTest(fminer.all_activities.values, feat_hash.values).to_f g=Array.new @value_map.each { |y,act| g[y-1]=Array.new } feat_hash.each { |x,y| g[y-1].push(x) } - max = OpenTox::Algorithm.effect(g, db_class_sizes) + max = OpenTox::Algorithm.effect(g, fminer.db_class_sizes) effect = @value_map[(g.size-max)].to_s end feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s @@ -413,9 +412,9 @@ post '/fminer/last/?' do feature_dataset.add_feature feature_uri, metadata end if !hit_count - ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} + ids.each { |id| feature_dataset.add(fminer.compounds[id], feature_uri, true)} else - ids.each_with_index { |id,i| feature_dataset.add(compounds[id], feature_uri, counts[smarts][i])} + ids.each_with_index { |id,i| feature_dataset.add(fminer.compounds[id], feature_uri, counts[smarts][i])} end end feature_dataset.save(@subjectid) -- cgit v1.2.3 From ef74b831e229bf8a399e2e62818776e924c583b1 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 29 Jun 2011 10:34:07 +0200 Subject: Adding of data delegated --- fminer.rb | 111 +++----------------------------------------------------------- 1 file changed, 4 insertions(+), 107 deletions(-) diff --git a/fminer.rb b/fminer.rb index 51f0964..ad3c7f9 100644 --- a/fminer.rb +++ b/fminer.rb @@ -128,65 +128,13 @@ post '/fminer/bbrc/?' do }) feature_dataset.save(@subjectid) - id = 1 # fminer start id is not 0 fminer.compounds = [] fminer.db_class_sizes = Array.new # AM: effect fminer.all_activities = Hash.new # DV: for effect calculation in regression part fminer.smi = [] # AM LAST: needed for matching the patterns back - - fminer.training_dataset.data_entries.each do |compound,entry| - begin - smiles = OpenTox::Compound.smiles(compound.to_s) - rescue - LOGGER.warn "No resource for #{compound.to_s}" - next - end - if smiles == '' or smiles.nil? - LOGGER.warn "Cannot find smiles for #{compound.to_s}." - next - end - - # AM: take log if appropriate - take_logs=true - entry.each do |feature,values| - values.each do |value| - if fminer.prediction_feature.feature_type == "regression" - if (! value.nil?) && (value.to_f <= 0) - take_logs=false - end - end - end - end - - @value_map=params[:value_map] unless params[:value_map].nil? - entry.each do |feature,values| - if feature == fminer.prediction_feature.uri - values.each do |value| - if value.nil? - LOGGER.warn "No #{feature} activity for #{compound.to_s}." - else - if fminer.prediction_feature.feature_type == "classification" - activity= @value_map.invert[value].to_i # activities are mapped to 1..n - fminer.db_class_sizes[activity-1].nil? ? fminer.db_class_sizes[activity-1]=1 : fminer.db_class_sizes[activity-1]+=1 # AM effect - elsif fminer.prediction_feature.feature_type == "regression" - activity= take_logs ? Math.log10(value.to_f) : value.to_f - end - begin - @@bbrc.AddCompound(smiles,id) - @@bbrc.AddActivity(activity, id) - fminer.all_activities[id]=activity # DV: insert global information - fminer.compounds[id] = compound - fminer.smi[id] = smiles - id += 1 - rescue - LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" - end - end - end - end - end - end + # Add data to fminer + fminer.add_fminer_data(@@bbrc, params, @value_map) g_array=fminer.all_activities.values # DV: calculation of global median for effect calculation g_median=OpenTox::Algorithm.median(g_array) @@ -304,64 +252,13 @@ post '/fminer/last/?' do }) feature_dataset.save(@subjectid) - id = 1 # fminer start id is not 0 fminer.compounds = [] fminer.db_class_sizes = Array.new # AM: effect fminer.all_activities = Hash.new # DV: for effect calculation (class and regr) fminer.smi = [] # AM LAST: needed for matching the patterns back - fminer.training_dataset.data_entries.each do |compound,entry| - begin - smiles = OpenTox::Compound.smiles(compound.to_s) - rescue - LOGGER.warn "No resource for #{compound.to_s}" - next - end - if smiles == '' or smiles.nil? - LOGGER.warn "Cannot find smiles for #{compound.to_s}." - next - end - - # AM: take log if appropriate - take_logs=true - entry.each do |feature,values| - values.each do |value| - if fminer.prediction_feature.feature_type == "regression" - if (! value.nil?) && (value.to_f <= 0) - take_logs=false - end - end - end - end - - @value_map=params[:value_map] unless params[:value_map].nil? - entry.each do |feature,values| - if feature == fminer.prediction_feature.uri - values.each do |value| - if value.nil? - LOGGER.warn "No #{feature} activity for #{compound.to_s}." - else - if fminer.prediction_feature.feature_type == "classification" - activity= @value_map.invert[value].to_i - fminer.db_class_sizes[activity-1].nil? ? fminer.db_class_sizes[activity-1]=1 : fminer.db_class_sizes[activity-1]+=1 - elsif fminer.prediction_feature.feature_type == "regression" - activity= take_logs ? Math.log10(value.to_f) : value.to_f - end - begin - @@last.AddCompound(smiles,id) - @@last.AddActivity(activity, id) - fminer.all_activities[id]=activity # DV: insert global information - fminer.compounds[id] = compound - fminer.smi[id] = smiles # AM LAST: changed this to store SMILES. - id += 1 - rescue - LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" - end - end - end - end - end - end + # Add data to fminer + fminer.add_fminer_data(@@last, params, @value_map) raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0 -- cgit v1.2.3 From 83b67b9c2cb04a76dcfd95bb65ece5c50472e535 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Sat, 2 Jul 2011 14:41:07 +0200 Subject: Hint to nr_hits --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 198b782..61604c1 100644 --- a/README.md +++ b/README.md @@ -23,12 +23,14 @@ REST operations [min_frequency=5 per-mil], [feature_type=trees], [backbone=true], - [min_chisq_significance=0.95] + [min_chisq_significance=0.95], + [nr_hits=false] Create last features POST /fminer/last dataset_uri, URI for feature dataset 200,400,404,500 feature_uri, [min_frequency=8 %], [feature_type=trees], [max_hops=25], + [nr_hits=false] Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500 prediction_feature, feature_generation_uri @@ -79,6 +81,7 @@ backbone=false reduces BBRC mining to frequent and correlated subtree mining (mu curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d min_frequency={min_frequency} http://webservices.in-silico.ch/algorithm/fminer/bbrc feature_uri specifies the dependent variable from the dataset. +Adding -d nr_hits=true produces frequency counts per pattern and molecule. Please click [here](http://bbrc.maunz.de#usage) for more guidance on usage. ### Create [LAST-PM](http://last-pm.maunz.de) descriptors, recommended for small to medium-sized datasets. @@ -86,6 +89,7 @@ Please click [here](http://bbrc.maunz.de#usage) for more guidance on usage. curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d min_frequency={min_frequency} http://webservices.in-silico.ch/algorithm/fminer/last feature_uri specifies the dependent variable from the dataset. +Adding -d nr_hits=true produces frequency counts per pattern and molecule. Please click [here](http://last-pm.maunz.de#usage) for guidance for more guidance on usage. * * * -- cgit v1.2.3 From 8fa3dbd1013449a5600e005a8a0de9526335774d Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Sat, 2 Jul 2011 15:48:32 +0200 Subject: Removed hint to max_hops --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 61604c1..57fba05 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,6 @@ REST operations feature_uri, [min_frequency=8 %], [feature_type=trees], - [max_hops=25], [nr_hits=false] Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500 prediction_feature, -- cgit v1.2.3 From aea16f80892464cc84def3735e50daa424cf0b28 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 4 Jul 2011 08:40:54 +0200 Subject: Added switch for MLR --- lazar.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lazar.rb b/lazar.rb index d692422..e81c1ba 100644 --- a/lazar.rb +++ b/lazar.rb @@ -143,7 +143,7 @@ post '/lazar/?' do # AM: allow prediction_algorithm override by user for classification AND regression lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? - lazar.prop_kernel = true if params[:local_svm_kernel] == "propositionalized" + lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") lazar.balanced = true if params[:balanced] == "true" training_activities.data_entries.each do |compound,entry| -- cgit v1.2.3 From 624a0eaa13d0c9b14e61b7478cb7f139f08d5ecf Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 5 Jul 2011 09:01:13 +0200 Subject: Fixed log taking --- lazar.rb | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lazar.rb b/lazar.rb index e81c1ba..708fa56 100644 --- a/lazar.rb +++ b/lazar.rb @@ -154,11 +154,7 @@ post '/lazar/?' do lazar.activities[compound] << lazar.value_map.invert[value] # insert mapped values, not originals elsif prediction_feature.feature_type == "regression" #never use halt in tasks, do not raise exception when, print warning instead - if value.to_f==0 - LOGGER.warn "0 values not allowed in training dataset. log10 is calculated internally. skipping compound" - else - lazar.activities[compound] << value.to_f - end + lazar.activities[compound] << value.to_f end end end -- cgit v1.2.3 From 731e65f87d1ae3c4c8a676202f3fefcb7224159d Mon Sep 17 00:00:00 2001 From: mr Date: Tue, 5 Jul 2011 12:35:30 +0200 Subject: fix gitmodules from http to git protocol --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 75218e9..61a4b92 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "libfminer"] path = libfminer - url = http://github.com/amaunz/fminer2.git + url = git://github.com/amaunz/fminer2.git [submodule "last-utils"] path = last-utils url = git://github.com/amaunz/last-utils.git -- cgit v1.2.3 From 048a738b1e0a51d2bc5d08d87611580fb2996cb5 Mon Sep 17 00:00:00 2001 From: mr Date: Tue, 5 Jul 2011 12:36:05 +0200 Subject: fix gitmodules from http to git protocol --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 75218e9..61a4b92 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "libfminer"] path = libfminer - url = http://github.com/amaunz/fminer2.git + url = git://github.com/amaunz/fminer2.git [submodule "last-utils"] path = last-utils url = git://github.com/amaunz/last-utils.git -- cgit v1.2.3 From 469605e600f52eb409704a2e8195588a2f99591d Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 8 Jul 2011 14:55:16 +0200 Subject: Added transformation on model creation --- lazar.rb | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/lazar.rb b/lazar.rb index 708fa56..832735b 100644 --- a/lazar.rb +++ b/lazar.rb @@ -146,6 +146,22 @@ post '/lazar/?' do lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") lazar.balanced = true if params[:balanced] == "true" + # AM: Transformation of Data + transform_acts = [] + if prediction_feature.feature_type == "regression" + training_activities.data_entries.each do |compound,entry| + unless entry[prediction_feature.uri].empty? + entry[prediction_feature.uri].each do |value| + transform_acts << value.to_f + end + end + end + end + inverter = OpenTox::Algorithm::Transform::Inverter.new(transform_acts) + transform_acts = inverter.value + lazar.transform = inverter + + transform_counts=0 training_activities.data_entries.each do |compound,entry| lazar.activities[compound] = [] unless lazar.activities[compound] unless entry[prediction_feature.uri].empty? @@ -154,8 +170,9 @@ post '/lazar/?' do lazar.activities[compound] << lazar.value_map.invert[value] # insert mapped values, not originals elsif prediction_feature.feature_type == "regression" #never use halt in tasks, do not raise exception when, print warning instead - lazar.activities[compound] << value.to_f + lazar.activities[compound] << transform_acts[transform_counts].to_s end + transform_counts+=1 end end end -- cgit v1.2.3 From f90b0908922cf80e6ca0fb11c2bb0e4b1186d6cf Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 8 Jul 2011 15:51:08 +0200 Subject: Changed inverter to log10 --- lazar.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lazar.rb b/lazar.rb index 832735b..cea6bdc 100644 --- a/lazar.rb +++ b/lazar.rb @@ -157,8 +157,8 @@ post '/lazar/?' do end end end - inverter = OpenTox::Algorithm::Transform::Inverter.new(transform_acts) - transform_acts = inverter.value + inverter = OpenTox::Algorithm::Transform::Log10.new(transform_acts) + transform_acts = inverter.values lazar.transform = inverter transform_counts=0 -- cgit v1.2.3 From 2a4221980e1f19b3bee3b619bd7b8778212d488d Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 11 Jul 2011 08:48:40 +0200 Subject: Fixed representation of transform --- lazar.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lazar.rb b/lazar.rb index cea6bdc..8d87e24 100644 --- a/lazar.rb +++ b/lazar.rb @@ -157,9 +157,9 @@ post '/lazar/?' do end end end - inverter = OpenTox::Algorithm::Transform::Log10.new(transform_acts) - transform_acts = inverter.values - lazar.transform = inverter + transformer = OpenTox::Algorithm::Transform::Log10.new(transform_acts) + transform_acts = transformer.values + lazar.transform = { :class => transformer.class, :offset => transformer.offset } transform_counts=0 training_activities.data_entries.each do |compound,entry| -- cgit v1.2.3 From 632e4f7504dbcadc952432bab141ef873f2e1875 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 11 Jul 2011 08:56:47 +0200 Subject: Fixed class.to_s --- lazar.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lazar.rb b/lazar.rb index 8d87e24..da243ea 100644 --- a/lazar.rb +++ b/lazar.rb @@ -159,7 +159,7 @@ post '/lazar/?' do end transformer = OpenTox::Algorithm::Transform::Log10.new(transform_acts) transform_acts = transformer.values - lazar.transform = { :class => transformer.class, :offset => transformer.offset } + lazar.transform = { :class => transformer.class.to_s, :offset => transformer.offset } transform_counts=0 training_activities.data_entries.each do |compound,entry| -- cgit v1.2.3 From f9d650f9d033463295ec116188c46a12230e9652 Mon Sep 17 00:00:00 2001 From: dv Date: Mon, 11 Jul 2011 12:42:28 +0200 Subject: changes for support --- lazar.rb | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/lazar.rb b/lazar.rb index e81c1ba..7c1dc40 100644 --- a/lazar.rb +++ b/lazar.rb @@ -60,7 +60,8 @@ post '/lazar/?' do training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri) lazar = OpenTox::Model::Lazar.new - lazar.min_sim = params[:min_sim] if params[:min_sim] + lazar.min_sim = params[:min_sim] if params[:min_sim] + lazar.nr_hits = true if params[:nr_hits] if prediction_feature.feature_type == "classification" @@ -107,12 +108,21 @@ post '/lazar/?' do lazar.features = training_features.features.sort if prediction_feature.feature_type == "regression" and lazar.feature_calculation_algorithm != "Substructure.match" training_features.data_entries.each do |compound,entry| - lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound] + lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound] entry.keys.each do |feature| if lazar.feature_calculation_algorithm == "Substructure.match" if training_features.features[feature] smarts = training_features.features[feature][OT.smarts] - lazar.fingerprints[compound] << smarts + if nr_hits = true + lazar.fingerprints[compound][smarts] = entry[feature].flatten.first + else + lazar.fingerprints[compound][smarts] = 1 + end + #LOGGER.debug "dv ------------ frequencies --------- feature: '#{feature}'; compound: '#{compound}' smarts: '#{smarts}'; entry.first:'#{entry[feature].flatten.first}" + #unless entry[feature].flatten.first == true + # lazar.frequencies[smarts] = [] unless lazar.frequencies[smarts] + # lazar.frequencies[smarts] << {compound => entry[feature].flatten.first} + #end unless lazar.features.include? smarts lazar.features << smarts lazar.p_values[smarts] = training_features.features[feature][OT.pValue] -- cgit v1.2.3 From 686011c826bcef8c26b8c5fd30eafc8c8630e701 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 11 Jul 2011 16:10:23 +0200 Subject: setting task percentage completed for fminer and lazar model creation --- fminer.rb | 13 +++++++++---- lazar.rb | 6 ++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fminer.rb b/fminer.rb index ad3c7f9..45751fc 100644 --- a/fminer.rb +++ b/fminer.rb @@ -99,7 +99,7 @@ post '/fminer/bbrc/?' do fminer=OpenTox::Algorithm::Fminer.new fminer.check_params(params,5) - task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do + task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do |task| @@bbrc.Reset if fminer.prediction_feature.feature_type == "regression" @@bbrc.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! @@ -140,12 +140,14 @@ post '/fminer/bbrc/?' do g_median=OpenTox::Algorithm.median(g_array) raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0 - + task.progress 10 + step_width = 80 / @@bbrc.GetNoRootNodes().to_f features = Set.new + # run @@bbrc (0 .. @@bbrc.GetNoRootNodes()-1).each do |j| - results = @@bbrc.MineRoot(j) + task.progress 10+step_width*(j+1) results.each do |result| f = YAML.load(result)[0] smarts = f[0] @@ -223,7 +225,7 @@ post '/fminer/last/?' do fminer=OpenTox::Algorithm::Fminer.new fminer.check_params(params,80) - task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do + task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do |task| @@last.Reset if fminer.prediction_feature.feature_type == "regression" @@last.SetRegression(true) # AM: DO NOT MOVE DOWN! Must happen before the other Set... operations! @@ -265,9 +267,12 @@ post '/fminer/last/?' do # run @@last features = Set.new xml = "" + task.progress 10 + step_width = 80 / @@last.GetNoRootNodes().to_f (0 .. @@last.GetNoRootNodes()-1).each do |j| results = @@last.MineRoot(j) + task.progress 10+step_width*(j+1) results.each do |result| xml << result end diff --git a/lazar.rb b/lazar.rb index e81c1ba..e2dde9a 100644 --- a/lazar.rb +++ b/lazar.rb @@ -72,7 +72,7 @@ post '/lazar/?' do elsif prediction_feature.feature_type == "regression" lazar.prediction_algorithm = "Neighbors.local_svm_regression" end - + task.progress 10 if params[:feature_dataset_uri] feature_dataset_uri = params[:feature_dataset_uri] @@ -95,7 +95,7 @@ post '/lazar/?' do if prediction_feature.feature_type == "regression" && feature_generation_uri.match(/fminer/) params[:feature_type] = "paths" end - feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s + feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params, OpenTox::SubTask.new(task,10,70)).to_s training_features = OpenTox::Dataset.new(feature_dataset_uri) end @@ -140,6 +140,7 @@ post '/lazar/?' do end end end + task.progress 80 # AM: allow prediction_algorithm override by user for classification AND regression lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? @@ -163,6 +164,7 @@ post '/lazar/?' do end end end + task.progress 90 lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}" lazar.metadata[OT.dependentVariables] = prediction_feature.uri -- cgit v1.2.3 From 309f3d5e1ce60c96128b5e13ae22656c6d6eb4be Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 12 Jul 2011 13:02:08 +0200 Subject: Fixed effects calculation: Reversed and using internal values for classification --- fminer.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fminer.rb b/fminer.rb index ad3c7f9..0126fe2 100644 --- a/fminer.rb +++ b/fminer.rb @@ -153,8 +153,8 @@ post '/fminer/bbrc/?' do if (!@@bbrc.GetRegression) id_arrs = f[2..-1].flatten - max = OpenTox::Algorithm.effect(f[2..-1].reverse, fminer.db_class_sizes) - effect = @value_map[(f[2..-1].size-max)].to_s + max = OpenTox::Algorithm.effect(f[2..-1], fminer.db_class_sizes) + effect = f[2..-1].size-max else #regression part id_arrs = f[2] # DV: effect calculation @@ -290,7 +290,7 @@ post '/fminer/last/?' do @value_map.each { |y,act| g[y-1]=Array.new } feat_hash.each { |x,y| g[y-1].push(x) } max = OpenTox::Algorithm.effect(g, fminer.db_class_sizes) - effect = @value_map[(g.size-max)].to_s + effect = g.size-max end feature_uri = File.join feature_dataset.uri,"feature","last", features.size.to_s unless features.include? smarts -- cgit v1.2.3 From e2137202d522e9a167b1418228dbe24dd0f181b7 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 14 Jul 2011 10:42:40 +0200 Subject: Added switch for Transform --- lazar.rb | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/lazar.rb b/lazar.rb index da243ea..1f2fec8 100644 --- a/lazar.rb +++ b/lazar.rb @@ -141,38 +141,42 @@ post '/lazar/?' do end end - # AM: allow prediction_algorithm override by user for classification AND regression + # AM: allow settings override by user lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? + lazar.transform["class"] = params[:activity_transform] unless params[:activity_transform].nil? lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") lazar.balanced = true if params[:balanced] == "true" - # AM: Transformation of Data - transform_acts = [] + # AM: Feed Data using Transformations if prediction_feature.feature_type == "regression" + transformed_acts = [] training_activities.data_entries.each do |compound,entry| unless entry[prediction_feature.uri].empty? entry[prediction_feature.uri].each do |value| - transform_acts << value.to_f + transformed_acts << value.to_f end end end - end - transformer = OpenTox::Algorithm::Transform::Log10.new(transform_acts) - transform_acts = transformer.values - lazar.transform = { :class => transformer.class.to_s, :offset => transformer.offset } - - transform_counts=0 - training_activities.data_entries.each do |compound,entry| - lazar.activities[compound] = [] unless lazar.activities[compound] - unless entry[prediction_feature.uri].empty? - entry[prediction_feature.uri].each do |value| - if prediction_feature.feature_type == "classification" + transfomer = eval "OpenTox::Algorithm::Transform::#{lazar.transform["class"]}.new(transform_acts)" + transformed_acts = transformer.values + lazar.transform["offset"] = transformer.offset + t_count=0 + training_activities.data_entries.each do |compound,entry| + lazar.activities[compound] = [] unless lazar.activities[compound] + unless entry[prediction_feature.uri].empty? + entry[prediction_feature.uri].each do |value| + lazar.activities[compound] << transformed_acts[t_count].to_s + t_count+=1 + end + end + end + elsif prediction_feature.feature_type == "classification" + training_activities.data_entries.each do |compound,entry| + lazar.activities[compound] = [] unless lazar.activities[compound] + unless entry[prediction_feature.uri].empty? + entry[prediction_feature.uri].each do |value| lazar.activities[compound] << lazar.value_map.invert[value] # insert mapped values, not originals - elsif prediction_feature.feature_type == "regression" - #never use halt in tasks, do not raise exception when, print warning instead - lazar.activities[compound] << transform_acts[transform_counts].to_s end - transform_counts+=1 end end end -- cgit v1.2.3 From 6b8976063a21a3233210625a463e6a989a41e397 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 14 Jul 2011 11:43:27 +0200 Subject: Minor fix --- lazar.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lazar.rb b/lazar.rb index b78b12a..e93749b 100644 --- a/lazar.rb +++ b/lazar.rb @@ -158,7 +158,7 @@ post '/lazar/?' do end end end - transfomer = eval "OpenTox::Algorithm::Transform::#{lazar.transform["class"]}.new(transform_acts)" + transformer = eval "OpenTox::Algorithm::Transform::#{lazar.transform["class"]}.new(transformed_acts)" transformed_acts = transformer.values lazar.transform["offset"] = transformer.offset t_count=0 -- cgit v1.2.3 From bb2da70cc308724d980dbcb4caa8a46842050317 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 14 Jul 2011 13:21:10 +0200 Subject: Added standard LOG --- lazar.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lazar.rb b/lazar.rb index e93749b..c241ad3 100644 --- a/lazar.rb +++ b/lazar.rb @@ -144,6 +144,9 @@ post '/lazar/?' do # AM: allow settings override by user lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? + if prediction_feature.feature_type == "regression" + lazar.transform["class"] = "Log10" if lazar.transform["class"] == "NOP" + end lazar.transform["class"] = params[:activity_transform] unless params[:activity_transform].nil? lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") lazar.balanced = true if params[:balanced] == "true" -- cgit v1.2.3 From 8c0488f17154d26811d1c3408a00ba834e6f1cc1 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 14 Jul 2011 14:22:43 +0200 Subject: Shortened Transform procedure --- lazar.rb | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lazar.rb b/lazar.rb index c241ad3..1074e58 100644 --- a/lazar.rb +++ b/lazar.rb @@ -155,11 +155,7 @@ post '/lazar/?' do if prediction_feature.feature_type == "regression" transformed_acts = [] training_activities.data_entries.each do |compound,entry| - unless entry[prediction_feature.uri].empty? - entry[prediction_feature.uri].each do |value| - transformed_acts << value.to_f - end - end + transformed_acts.concat entry[prediction_feature.uri] unless entry[prediction_feature.uri].empty? end transformer = eval "OpenTox::Algorithm::Transform::#{lazar.transform["class"]}.new(transformed_acts)" transformed_acts = transformer.values -- cgit v1.2.3 From 1c8604e473b56caefe4b1f1ddb0af89691107c9d Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 14 Jul 2011 14:38:54 +0200 Subject: Removed balancing --- lazar.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lazar.rb b/lazar.rb index 1074e58..539d973 100644 --- a/lazar.rb +++ b/lazar.rb @@ -149,7 +149,6 @@ post '/lazar/?' do end lazar.transform["class"] = params[:activity_transform] unless params[:activity_transform].nil? lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") - lazar.balanced = true if params[:balanced] == "true" # AM: Feed Data using Transformations if prediction_feature.feature_type == "regression" -- cgit v1.2.3 From fa52855bc713ff7feafbe63bb8daac941f1868ae Mon Sep 17 00:00:00 2001 From: dv Date: Tue, 19 Jul 2011 10:11:00 +0200 Subject: merge with dev and some changes --- lazar.rb | 64 +++++++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/lazar.rb b/lazar.rb index 7c1dc40..c25a227 100644 --- a/lazar.rb +++ b/lazar.rb @@ -61,8 +61,7 @@ post '/lazar/?' do lazar = OpenTox::Model::Lazar.new lazar.min_sim = params[:min_sim] if params[:min_sim] - lazar.nr_hits = true if params[:nr_hits] - + lazar.nr_hits = params[:nr_hits] if params[:nr_hits] if prediction_feature.feature_type == "classification" @training_classes = training_activities.accept_values(prediction_feature.uri).sort @@ -73,7 +72,7 @@ post '/lazar/?' do elsif prediction_feature.feature_type == "regression" lazar.prediction_algorithm = "Neighbors.local_svm_regression" end - + task.progress 10 if params[:feature_dataset_uri] feature_dataset_uri = params[:feature_dataset_uri] @@ -96,7 +95,7 @@ post '/lazar/?' do if prediction_feature.feature_type == "regression" && feature_generation_uri.match(/fminer/) params[:feature_type] = "paths" end - feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s + feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params, OpenTox::SubTask.new(task,10,70)).to_s training_features = OpenTox::Dataset.new(feature_dataset_uri) end @@ -113,16 +112,12 @@ post '/lazar/?' do if lazar.feature_calculation_algorithm == "Substructure.match" if training_features.features[feature] smarts = training_features.features[feature][OT.smarts] - if nr_hits = true + #lazar.fingerprints[compound] << smarts + if params[:nr_hits] == "true" lazar.fingerprints[compound][smarts] = entry[feature].flatten.first else lazar.fingerprints[compound][smarts] = 1 end - #LOGGER.debug "dv ------------ frequencies --------- feature: '#{feature}'; compound: '#{compound}' smarts: '#{smarts}'; entry.first:'#{entry[feature].flatten.first}" - #unless entry[feature].flatten.first == true - # lazar.frequencies[smarts] = [] unless lazar.frequencies[smarts] - # lazar.frequencies[smarts] << {compound => entry[feature].flatten.first} - #end unless lazar.features.include? smarts lazar.features << smarts lazar.p_values[smarts] = training_features.features[feature][OT.pValue] @@ -134,7 +129,8 @@ post '/lazar/?' do when "classification" # fingerprints are sets if entry[feature].flatten.size == 1 - lazar.fingerprints[compound] << feature if entry[feature].flatten.first.to_s.match(TRUE_REGEXP) + #lazar.fingerprints[compound] << feature if entry[feature].flatten.first.to_s.match(TRUE_REGEXP) + lazar.fingerprints[compound][feature] = entry[feature].flatten.first if entry[feature].flatten.first.to_s.match(TRUE_REGEXP) lazar.features << feature unless lazar.features.include? feature else LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" @@ -143,6 +139,7 @@ post '/lazar/?' do # fingerprints are arrays if entry[feature].flatten.size == 1 lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first + #lazar.fingerprints[compound][feature] = entry[feature].flatten.first else LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" end @@ -150,29 +147,46 @@ post '/lazar/?' do end end end + task.progress 80 - # AM: allow prediction_algorithm override by user for classification AND regression + # AM: allow settings override by user lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil? + if prediction_feature.feature_type == "regression" + lazar.transform["class"] = "Log10" if lazar.transform["class"] == "NOP" + end + lazar.transform["class"] = params[:activity_transform] unless params[:activity_transform].nil? lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") - lazar.balanced = true if params[:balanced] == "true" - training_activities.data_entries.each do |compound,entry| - lazar.activities[compound] = [] unless lazar.activities[compound] - unless entry[prediction_feature.uri].empty? - entry[prediction_feature.uri].each do |value| - if prediction_feature.feature_type == "classification" + # AM: Feed Data using Transformations + if prediction_feature.feature_type == "regression" + transformed_acts = [] + training_activities.data_entries.each do |compound,entry| + transformed_acts.concat entry[prediction_feature.uri] unless entry[prediction_feature.uri].empty? + end + transformer = eval "OpenTox::Algorithm::Transform::#{lazar.transform["class"]}.new(transformed_acts)" + transformed_acts = transformer.values + lazar.transform["offset"] = transformer.offset + t_count=0 + training_activities.data_entries.each do |compound,entry| + lazar.activities[compound] = [] unless lazar.activities[compound] + unless entry[prediction_feature.uri].empty? + entry[prediction_feature.uri].each do |value| + lazar.activities[compound] << transformed_acts[t_count].to_s + t_count+=1 + end + end + end + elsif prediction_feature.feature_type == "classification" + training_activities.data_entries.each do |compound,entry| + lazar.activities[compound] = [] unless lazar.activities[compound] + unless entry[prediction_feature.uri].empty? + entry[prediction_feature.uri].each do |value| lazar.activities[compound] << lazar.value_map.invert[value] # insert mapped values, not originals - elsif prediction_feature.feature_type == "regression" - #never use halt in tasks, do not raise exception when, print warning instead - if value.to_f==0 - LOGGER.warn "0 values not allowed in training dataset. log10 is calculated internally. skipping compound" - else - lazar.activities[compound] << value.to_f - end end end end end + task.progress 90 lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}" lazar.metadata[OT.dependentVariables] = prediction_feature.uri -- cgit v1.2.3 From 6bdf348306752602b7aa16b2b5529397ee0b6087 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 20 Jul 2011 15:19:33 +0200 Subject: Fixed nr_hits to bool --- lazar.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lazar.rb b/lazar.rb index c25a227..7febdeb 100644 --- a/lazar.rb +++ b/lazar.rb @@ -61,7 +61,7 @@ post '/lazar/?' do lazar = OpenTox::Model::Lazar.new lazar.min_sim = params[:min_sim] if params[:min_sim] - lazar.nr_hits = params[:nr_hits] if params[:nr_hits] + lazar.nr_hits = true if params[:nr_hits] if prediction_feature.feature_type == "classification" @training_classes = training_activities.accept_values(prediction_feature.uri).sort @@ -113,7 +113,7 @@ post '/lazar/?' do if training_features.features[feature] smarts = training_features.features[feature][OT.smarts] #lazar.fingerprints[compound] << smarts - if params[:nr_hits] == "true" + if params[:nr_hits] lazar.fingerprints[compound][smarts] = entry[feature].flatten.first else lazar.fingerprints[compound][smarts] = 1 -- cgit v1.2.3 From a88df48d4c4f32640d20d21ed436089dbc967f1a Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 22 Jul 2011 09:08:42 +0200 Subject: Updated README for new switches. --- README.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 57fba05..c18b353 100644 --- a/README.md +++ b/README.md @@ -34,13 +34,21 @@ REST operations prediction_feature, feature_generation_uri prediction_algorithm - local_svm_kernel + [local_svm_kernel=weighted_tanimoto] + [min_sim=0.3] + [nr_hits=false] + [activity_transform=Log10(regression)] + [conf_stdev=false] Synopsis -------- -- prediction\_algorithm: one of "weighted\_majority\_vote" (default for classification), "local\_svm\_classification", "local\_svm\_regression (default for regression)". "weighted\_majority\_vote" is not applicable for regression. +- prediction\_algorithm: one of "weighted\_majority\_vote" (default for classification), "local\_svm\_classification", "local\_svm\_regression (default for regression)", "local\_mlr\_prop". "weighted\_majority\_vote" is not applicable for regression. "local\_mlr\_prop" is not applicable for classification. - local\_svm\_kernel: one of "weighted\_tanimoto", "propositionalized". local\_svm\_kernel is not appplicable when prediction\_algorithm="weighted\_majority\_vote". +- min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1]. +- nr_hits: Whether for instantiated models (local\_svm\_kernel = "propositionalized" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="local\_mlr\_prop") nominal features should be instantiated with their occurrence counts in the instances. For non-instantiated models (local\_svm\_kernel = "weighted\_tanimoto" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="weighted\_majority\_vote") the neighbor-to-neighbor and neighbor-to-query similarity also integrates these counts, when the parameter is set. One of "true", "false". +- activity_transform: normalizing transformations of the y-values (activities), applicable only to regression problems. One of "Log10", "Inverter", "NOP". "Log10" moves all values above zero and takes the log to base 10. "Inverter" moves all values above 1.0 and takes the inverted value. "NOP" is the identity transformation, which does nothing. Model predictions are output with reverse transformation applied. +- conf_stdev: whether Tanimoto similarity integrates distribution of neighbor activity values. When "true", the exp(-1.0*(standard deviation of neighbor activities)) is multiplied on the similarity. One of "true", "false". See http://www.maunz.de/wordpress/opentox/2011/lazar-models-and-how-to-trigger-them for a graphical overview. -- cgit v1.2.3 From 9cff1940cfcec8711cf3905e8fc1e29286b9a73b Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 22 Jul 2011 09:16:21 +0200 Subject: Updated README. --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index c18b353..e3ab812 100644 --- a/README.md +++ b/README.md @@ -37,18 +37,18 @@ REST operations [local_svm_kernel=weighted_tanimoto] [min_sim=0.3] [nr_hits=false] - [activity_transform=Log10(regression)] + [activity_transform=] [conf_stdev=false] Synopsis -------- -- prediction\_algorithm: one of "weighted\_majority\_vote" (default for classification), "local\_svm\_classification", "local\_svm\_regression (default for regression)", "local\_mlr\_prop". "weighted\_majority\_vote" is not applicable for regression. "local\_mlr\_prop" is not applicable for classification. -- local\_svm\_kernel: one of "weighted\_tanimoto", "propositionalized". local\_svm\_kernel is not appplicable when prediction\_algorithm="weighted\_majority\_vote". +- prediction\_algorithm: One of "weighted\_majority\_vote" (default for classification), "local\_svm\_classification", "local\_svm\_regression (default for regression)", "local\_mlr\_prop". "weighted\_majority\_vote" is not applicable for regression. "local\_mlr\_prop" is not applicable for classification. +- local\_svm\_kernel: One of "weighted\_tanimoto", "propositionalized". local\_svm\_kernel is not appplicable when prediction\_algorithm="weighted\_majority\_vote". - min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1]. - nr_hits: Whether for instantiated models (local\_svm\_kernel = "propositionalized" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="local\_mlr\_prop") nominal features should be instantiated with their occurrence counts in the instances. For non-instantiated models (local\_svm\_kernel = "weighted\_tanimoto" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="weighted\_majority\_vote") the neighbor-to-neighbor and neighbor-to-query similarity also integrates these counts, when the parameter is set. One of "true", "false". -- activity_transform: normalizing transformations of the y-values (activities), applicable only to regression problems. One of "Log10", "Inverter", "NOP". "Log10" moves all values above zero and takes the log to base 10. "Inverter" moves all values above 1.0 and takes the inverted value. "NOP" is the identity transformation, which does nothing. Model predictions are output with reverse transformation applied. -- conf_stdev: whether Tanimoto similarity integrates distribution of neighbor activity values. When "true", the exp(-1.0*(standard deviation of neighbor activities)) is multiplied on the similarity. One of "true", "false". +- activity_transform: Normalizing transformations of the y-values (activities), applicable only to regression problems. One of "Log10", "Inverter", "NOP". "Log10" moves all values above zero and takes the log to base 10. "Inverter" moves all values above 1.0 and takes the inverted value. "NOP" is the identity transformation, which does nothing. Model predictions are output with reverse transformation applied. +- conf_stdev: Whether confidence integrates distribution of neighbor activity values. When "true", the exp(-1.0*(standard deviation of neighbor activities)) is multiplied on the similarity. One of "true", "false". See http://www.maunz.de/wordpress/opentox/2011/lazar-models-and-how-to-trigger-them for a graphical overview. -- cgit v1.2.3 From 86349a7ad23cb051a57949edf07392b1d215558f Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 22 Jul 2011 09:22:37 +0200 Subject: Fixed switches to listen to "true" only. --- lazar.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lazar.rb b/lazar.rb index 7febdeb..5de3790 100644 --- a/lazar.rb +++ b/lazar.rb @@ -60,8 +60,8 @@ post '/lazar/?' do training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri) lazar = OpenTox::Model::Lazar.new - lazar.min_sim = params[:min_sim] if params[:min_sim] - lazar.nr_hits = true if params[:nr_hits] + lazar.min_sim = params[:min_sim].to_f if params[:min_sim] + lazar.nr_hits = true if params[:nr_hits] == "true" if prediction_feature.feature_type == "classification" @training_classes = training_activities.accept_values(prediction_feature.uri).sort @@ -156,6 +156,8 @@ post '/lazar/?' do end lazar.transform["class"] = params[:activity_transform] unless params[:activity_transform].nil? lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop") + lazar.conf_stdev = false + lazar.conf_stdev = true if params[:conf_stdev] == "true" # AM: Feed Data using Transformations if prediction_feature.feature_type == "regression" -- cgit v1.2.3 From 00751240e4cb74774dc27fc39f3a4bfd6d307a42 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 22 Jul 2011 10:08:55 +0200 Subject: Updated README --- README.md | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/README.md b/README.md index e3ab812..dacf1ec 100644 --- a/README.md +++ b/README.md @@ -103,15 +103,9 @@ Please click [here](http://last-pm.maunz.de#usage) for guidance for more guidanc ### Create lazar model - curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d feature_generation_uri=http://webservices.in-silico.ch/algorithm/fminer http://webservices.in-silico.ch/test/algorithm/lazar - Creates a standard Lazar model. - curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d feature_generation_uri=http://webservices.in-silico.ch/algorithm/fminer http://webservices.in-silico.ch/test/algorithm/lazar -d prediction_algorithm=local_svm_classification -d local_svm_kernel=propositionalized - -Creates an SVM model for classification with a propositionalized RBF kernel. - -feature_uri specifies the dependent variable from the dataset + curl -X POST -d dataset_uri={datset_uri} -d prediction_feature={feature_uri} -d feature_generation_uri=http://webservices.in-silico.ch/algorithm/fminer/bbrc http://webservices.in-silico.ch/test/algorithm/lazar [API documentation](http://rdoc.info/github/opentox/algorithm) -------------------------------------------------------------- -- cgit v1.2.3 From 341d0912e351082b589acc89ea2fcf1fd162d459 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 22 Jul 2011 11:26:11 +0200 Subject: Fixed nr_hits readout in fminer.rb --- fminer.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fminer.rb b/fminer.rb index 8a2c63e..3280776 100644 --- a/fminer.rb +++ b/fminer.rb @@ -193,7 +193,7 @@ post '/fminer/bbrc/?' do id_arrs.each { |id_count_hash| id=id_count_hash.keys[0].to_i count=id_count_hash.values[0].to_i - if params[:nr_hits] + if params[:nr_hits] == "true" feature_dataset.add(fminer.compounds[id], feature_uri, count) else feature_dataset.add(fminer.compounds[id], feature_uri, true) @@ -281,7 +281,7 @@ post '/fminer/last/?' do lu = LU.new # AM LAST: uses last-utils here dom=lu.read(xml) # AM LAST: parse GraphML smarts=lu.smarts_rb(dom,'nls') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de) - params[:nr_hits].nil? ? hit_count=false: hit_count=true + params[:nr_hits] != "true" ? hit_count=false: hit_count=true matches, counts = lu.match_rb(fminer.smi,smarts,hit_count) # AM LAST: creates instantiations matches.each do |smarts, ids| -- cgit v1.2.3 From 792c43ea83d617db8b816db05c25f1d8e0d93339 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 22 Jul 2011 11:34:52 +0200 Subject: Fixed fminer.rb count --- fminer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fminer.rb b/fminer.rb index 3280776..e7a6d4f 100644 --- a/fminer.rb +++ b/fminer.rb @@ -196,7 +196,7 @@ post '/fminer/bbrc/?' do if params[:nr_hits] == "true" feature_dataset.add(fminer.compounds[id], feature_uri, count) else - feature_dataset.add(fminer.compounds[id], feature_uri, true) + feature_dataset.add(fminer.compounds[id], feature_uri, 1) end } end -- cgit v1.2.3 From b9c4a076550433827f6c0719240332c9902318b2 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 27 Jul 2011 17:02:18 +0200 Subject: Fixed statistics to use statsample --- fminer.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fminer.rb b/fminer.rb index e7a6d4f..0160121 100644 --- a/fminer.rb +++ b/fminer.rb @@ -137,7 +137,7 @@ post '/fminer/bbrc/?' do fminer.add_fminer_data(@@bbrc, params, @value_map) g_array=fminer.all_activities.values # DV: calculation of global median for effect calculation - g_median=OpenTox::Algorithm.median(g_array) + g_median=g_array.to_scale.median raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0 task.progress 10 @@ -165,7 +165,7 @@ post '/fminer/bbrc/?' do id=id.keys[0] # extract id from hit count hash f_arr.push(fminer.all_activities[id]) end - f_median=OpenTox::Algorithm.median(f_arr) + f_median=f_arr.to_scale.median if g_median >= f_median effect = 'activating' else -- cgit v1.2.3 From bce27bd8e66deebb6f438e56957f575399946a50 Mon Sep 17 00:00:00 2001 From: mr Date: Mon, 1 Aug 2011 22:28:30 +0200 Subject: add subjectid to check_params calls --- fminer.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fminer.rb b/fminer.rb index 0160121..e220b09 100644 --- a/fminer.rb +++ b/fminer.rb @@ -97,7 +97,7 @@ end post '/fminer/bbrc/?' do fminer=OpenTox::Algorithm::Fminer.new - fminer.check_params(params,5) + fminer.check_params(params,5,@subjectid) task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do |task| @@bbrc.Reset @@ -223,7 +223,7 @@ end post '/fminer/last/?' do fminer=OpenTox::Algorithm::Fminer.new - fminer.check_params(params,80) + fminer.check_params(params,80,@subjectid) task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do |task| @@last.Reset -- cgit v1.2.3