diff options
author | Christoph Helma <helma@in-silico.ch> | 2010-11-19 14:42:29 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2010-11-19 14:42:29 +0100 |
commit | 578f57d48df70dc677ceb65720831885c3af99f2 (patch) | |
tree | 1fe2f7d76acbcca7782535c33a05910b5a9625f4 | |
parent | 397e5a2ba1913f1a66cbf85185a82d45ee942e47 (diff) |
lazar predictions and toxcreate are working
-rw-r--r-- | fminer.rb | 49 | ||||
-rw-r--r-- | lazar.rb | 79 |
2 files changed, 74 insertions, 54 deletions
@@ -11,7 +11,6 @@ get '/fminer/?' do end # Get RDF/XML representation of fminer bbrc algorithm -# # @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm get "/fminer/bbrc/?" do response['Content-Type'] = 'application/rdf+xml' @@ -23,14 +22,17 @@ get "/fminer/bbrc/?" do OT.isA => OTA.PatternMiningSupervised, OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" } + { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, + { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" }, + { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, + { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" }, + { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" }, ] } algorithm.to_rdfxml end # Get RDF/XML representation of fminer last algorithm -# # @return [application/rdf+xml] OWL-DL representation of fminer last algorithm get "/fminer/last/?" do algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/last',:full)) @@ -41,7 +43,10 @@ get "/fminer/last/?" do OT.isA => OTA.PatternMiningSupervised, OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" } + { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, + { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" }, + { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, + { DC.description => "Maximum number of hops", OT.paramScope => "optional", DC.title => "hops" }, ] } algorithm.to_rdfxml @@ -49,18 +54,23 @@ end # Run bbrc algorithm on dataset # -# @param [URI] dataset_uri URI of the training dataset -# @param [URI] prediction_feature URI of the prediction feature (i.e. dependent variable) -# @param [optional, Integer] min_frequency minimum frequency (defaults to 5) +# @param [String] dataset_uri URI of the training dataset +# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) +# @param [optional] parameters BBRC parameters, accepted parameters are +# - minfreq Minimum frequency (default 5) +# - feature_type Feature type, can be 'paths' or 'trees' (default "trees") +# - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true") +# - min_chisq_significance Significance threshold (between 0 and 1) # @return [text/uri-list] Task URI post '/fminer/bbrc/?' do -#['/fminer/bbrc/?','/fminer/?'].each do |path| # AM LAST: set bbrc as default - #post path do # TODO: is this thread safe?? @@fminer = Bbrc::Bbrc.new minfreq = 5 unless minfreq = params[:min_frequency] @@fminer.SetMinfreq(minfreq) + @@fminer.SetType(1) if params[:feature_type] == "paths" + @@fminer.SetBackbone(params[:backbone]) if params[:backbone] + @@fminer.SetChisqSig(params[:min_chisq_significance]) if params[:min_chisq_significance] @@fminer.SetConsoleOut(false) halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? @@ -70,7 +80,7 @@ post '/fminer/bbrc/?' do training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) - task_uri = OpenTox::Task.as_task("Mining BBRC features", url_for('/fminer',:full)) do + task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do feature_dataset = OpenTox::Dataset.new feature_dataset.add_metadata({ @@ -193,19 +203,26 @@ post '/fminer/bbrc/?' do feature_dataset.uri end response['Content-Type'] = 'text/uri-list' - halt 202,task_uri.to_s+"\n" + halt 202,task.uri.to_s+"\n" end #end # Run last algorithm on a dataset # -# @param [URI] dataset_uri URI of the training dataset -# @param [URI] prediction_feature URI of the prediction feature (i.e. dependent variable) +# @param [String] dataset_uri URI of the training dataset +# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable) +# @param [optional] parameters LAST parameters, accepted parameters are +# - minfreq Minimum frequency (default 5) +# - feature_type Feature type, can be 'paths' or 'trees' (default "trees") +# - hops Maximum number of hops # @return [text/uri-list] Task URI post '/fminer/last/?' do @@fminer = Last::Last.new - @@fminer.SetMinfreq(5) + minfreq = 5 unless minfreq = params[:min_frequency] + @@fminer.SetMinfreq(minfreq) + @@fminer.SetType(1) if params[:feature_type] == "paths" + @@fminer.SetMaxHops(params[:hops]) if params[:hops] @@fminer.SetConsoleOut(false) halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? @@ -216,7 +233,7 @@ post '/fminer/last/?' do training_dataset.load_all halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) - task_uri = OpenTox::Task.as_task("Mining LAST features", url_for('/fminer',:full)) do + task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do feature_dataset = OpenTox::Dataset.new feature_dataset.add_metadata({ @@ -327,5 +344,5 @@ post '/fminer/last/?' do feature_dataset.uri end response['Content-Type'] = 'text/uri-list' - halt 202,task_uri.to_s+"\n" + halt 202,task.uri.to_s+"\n" end @@ -1,7 +1,6 @@ @@feature_generation_default = File.join(CONFIG[:services]["opentox-algorithm"],"fminer","bbrc") # Get RDF/XML representation of the lazar algorithm -# # @return [application/rdf+xml] OWL-DL representation of the lazar algorithm get '/lazar/?' do response['Content-Type'] = 'application/rdf+xml' @@ -23,20 +22,18 @@ get '/lazar/?' do end # Create a lazar prediction model -# +# @param [String] dataset_uri Training dataset URI +# @param [optional,String] prediction_feature URI of the feature to be predicted +# @param [optional,String] feature_generation_uri URI of the feature generation algorithm +# @param [optional] - further parameters for the feature generation service # @ return [text/uri-list] Task URI post '/lazar/?' do - LOGGER.debug params.inspect halt 404, "No dataset_uri parameter." unless params[:dataset_uri] dataset_uri = params[:dataset_uri] - begin - training_activities = OpenTox::Dataset.new(dataset_uri) - training_activities.load_all - rescue => e - halt 404, "Dataset #{dataset_uri} not found (#{e.inspect})." - end + halt 404, "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri) + training_activities.load_all prediction_feature = params[:prediction_feature] unless prediction_feature # try to read prediction_feature from dataset @@ -51,23 +48,24 @@ post '/lazar/?' do training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature) response['Content-Type'] = 'text/uri-list' - task_uri = OpenTox::Task.as_task("Create lazar model",url_for('/lazar',:full)) do |task| + task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| lazar = OpenTox::Model::Lazar.new + lazar.min_sim = params[:min_sim] if params[:min_sim] if params[:feature_dataset_uri] feature_dataset_uri = params[:feature_dataset_uri] training_features = OpenTox::Dataset.new(feature_dataset_uri) case training_features.feature_type when "classification" - lazar.similarity_algorithm = "weighted_tanimoto" + lazar.similarity_algorithm = "Similarity.tanimoto" when "regression" - lazar.similarity_algorithm = "weighted_euclid" + lazar.similarity_algorithm = "Similarity.euclid" end else # create features params[:feature_generation_uri] = feature_generation_uri if feature_generation_uri.match(/fminer/) - lazar.feature_calculation_algorithm = "substructure_match" + lazar.feature_calculation_algorithm = "Substructure.match" else halt 404, "External feature generation services not yet supported" end @@ -82,11 +80,9 @@ post '/lazar/?' do lazar.features = training_features.features.sort if training_features.feature_type == "regression" training_features.data_entries.each do |compound,entry| - lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound] + lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound] entry.keys.each do |feature| - case training_features.feature_type - when "fminer" - # fingerprints are sets + if feature_generation_uri.match(/fminer/) smarts = training_features.features[feature][OT.smarts] lazar.fingerprints[compound] << smarts unless lazar.features.include? smarts @@ -94,20 +90,23 @@ post '/lazar/?' do lazar.p_values[smarts] = training_features.features[feature][OT.p_value] lazar.effects[smarts] = training_features.features[feature][OT.effect] end - when "classification" - # fingerprints are sets - if entry[feature].flatten.size == 1 - lazar.fingerprints[compound] << feature if entry[feature].flatten.first.match(TRUE_REGEXP) - lazar.features << feature unless lazar.features.include? feature - else - LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" - end - when "regression" - # fingerprints are arrays - if entry[feature].flatten.size == 1 - lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first - else - LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + else + case training_features.feature_type + when "classification" + # fingerprints are sets + if entry[feature].flatten.size == 1 + lazar.fingerprints[compound] << feature if entry[feature].flatten.first.to_s.match(TRUE_REGEXP) + lazar.features << feature unless lazar.features.include? feature + else + LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + end + when "regression" + # fingerprints are arrays + if entry[feature].flatten.size == 1 + lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first + else + LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + end end end end @@ -120,26 +119,30 @@ post '/lazar/?' do when "false" lazar.activities[compound] << false else + halt 404, "0 values not allowed in training dataset. log10 is calculated internally." if value.to_f == 0 lazar.activities[compound] << value.to_f - lazar.prediction_type = "regression" + lazar.prediction_algorithm = "Neighbors.local_svm_regression" end end end + lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature))}" + # TODO: fix dependentVariable lazar.metadata[OT.dependentVariables] = params[:prediction_feature] lazar.metadata[OT.trainingDataset] = dataset_uri lazar.metadata[OT.featureDataset] = feature_dataset_uri + lazar.metadata[OT.isA] = OTA.ClassificationLazySingleTarget - lazar.parameters = { - "dataset_uri" => dataset_uri, - "prediction_feature" => prediction_feature, - "feature_generation_uri" => feature_generation_uri - } + lazar.metadata[OT.parameters] = [ + {DC.title => "dataset_uri", OT.paramValue => dataset_uri}, + {DC.title => "prediction_feature", OT.paramValue => prediction_feature}, + {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri} + ] model_uri = lazar.save LOGGER.info model_uri + " created #{Time.now}" model_uri end - halt 202,task_uri + halt 202,task.uri end |