diff options
author | Christoph Helma <helma@in-silico.ch> | 2010-11-10 18:39:12 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2010-11-10 18:39:12 +0100 |
commit | 397e5a2ba1913f1a66cbf85185a82d45ee942e47 (patch) | |
tree | d18317b9308469c88254cdb3d3e6fe16a97d1f90 | |
parent | 28aac60d38678340128a54ffe99bc56401561419 (diff) |
valid algorithm OWL-DL, documentation started
-rw-r--r-- | application.rb | 5 | ||||
-rw-r--r-- | fminer.rb | 119 | ||||
-rw-r--r-- | lazar.rb | 211 |
3 files changed, 178 insertions, 157 deletions
diff --git a/application.rb b/application.rb index 8e0a573..867cf24 100644 --- a/application.rb +++ b/application.rb @@ -18,7 +18,10 @@ before do LOGGER.debug "Request: " + request.path end +# Get a list of available algorithms +# +# @return [text/uri-list] algorithm URIs get '/?' do response['Content-Type'] = 'text/uri-list' - [ url_for('/lazar', :full), url_for('/fminer', :full) ].join("\n") + "\n" + [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" end @@ -2,55 +2,85 @@ ENV['FMINER_SMARTS'] = 'true' ENV['FMINER_NO_AROMATIC'] = 'true' ENV['FMINER_PVALUES'] = 'true' +# Get list of fminer algorithms +# +# @return [text/uri-list] URIs of fminer algorithms get '/fminer/?' do + response['Content-Type'] = 'text/uri-list' + [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n" +end - metadata = { - DC.title => 'fminer', - DC.identifier => url_for("",:full), +# Get RDF/XML representation of fminer bbrc algorithm +# +# @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm +get "/fminer/bbrc/?" do + response['Content-Type'] = 'application/rdf+xml' + algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/bbrc',:full)) + algorithm.metadata = { + DC.title => 'fminer backbone refinement class representatives', DC.creator => "andreas@maunz.de, helma@in-silico.ch", DC.contributor => "vorgrimmlerdavid@gmx.de", - OT.isA => OTA.PatternMiningSupervised + OT.isA => OTA.PatternMiningSupervised, + OT.parameters => [ + { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, + { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" } + ] } + algorithm.to_rdfxml +end - parameters = [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", OT.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", OT.title => "prediction_feature" } - ] - - s = OpenTox::Serializer::Owl.new - s.add_algorithm(url_for('/fminer',:full),metadata,parameters) - response['Content-Type'] = 'application/rdf+xml' - s.to_rdfxml - +# Get RDF/XML representation of fminer last algorithm +# +# @return [application/rdf+xml] OWL-DL representation of fminer last algorithm +get "/fminer/last/?" do + algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/last',:full)) + algorithm.metadata = { + DC.title => 'fminer latent structure class representatives', + DC.creator => "andreas@maunz.de, helma@in-silico.ch", + DC.contributor => "vorgrimmlerdavid@gmx.de", + OT.isA => OTA.PatternMiningSupervised, + OT.parameters => [ + { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, + { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" } + ] + } + algorithm.to_rdfxml end -#post '/fminer/?' do -['/fminer/bbrc/?','/fminer/?'].each do |path| # AM LAST: set bbrc as default - post path do +# Run bbrc algorithm on dataset +# +# @param [URI] dataset_uri URI of the training dataset +# @param [URI] prediction_feature URI of the prediction feature (i.e. dependent variable) +# @param [optional, Integer] min_frequency minimum frequency (defaults to 5) +# @return [text/uri-list] Task URI +post '/fminer/bbrc/?' do +#['/fminer/bbrc/?','/fminer/?'].each do |path| # AM LAST: set bbrc as default + #post path do + # TODO: is this thread safe?? @@fminer = Bbrc::Bbrc.new - @@fminer.SetMinfreq(5) + minfreq = 5 unless minfreq = params[:min_frequency] + @@fminer.SetMinfreq(minfreq) @@fminer.SetConsoleOut(false) halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? prediction_feature = params[:prediction_feature] - training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}" - training_dataset.load_all + training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}" halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) task_uri = OpenTox::Task.as_task("Mining BBRC features", url_for('/fminer',:full)) do feature_dataset = OpenTox::Dataset.new feature_dataset.add_metadata({ - DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title], + DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title].to_s, DC.creator => url_for('/fminer/bbrc',:full), OT.hasSource => url_for('/fminer/bbrc', :full), - }) - feature_dataset.add_parameters({ - "dataset_uri" => params[:dataset_uri], - "prediction_feature" => params[:prediction_feature] + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + ] }) feature_dataset.save @@ -63,7 +93,7 @@ end @@fminer.Reset training_dataset.data_entries.each do |compound,entry| begin - smiles = OpenTox::Compound.new(compound.to_s).smiles + smiles = OpenTox::Compound.new(compound.to_s).to_smiles rescue LOGGER.warn "No resource for #{compound.to_s}" next @@ -142,13 +172,19 @@ end feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s unless features.include? smarts features << smarts - # TODO insert correct ontology entries metadata = { - OT.hasSource => feature_dataset.uri, + OT.hasSource => url_for('/fminer/bbrc', :full), + OT.isA => OT.NominalFeature, OT.smarts => smarts, OT.p_value => p_value.to_f, - OT.effect => effect } + OT.effect => effect, + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + ] + } feature_dataset.add_feature feature_uri, metadata + #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters end ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)} end @@ -159,8 +195,13 @@ end response['Content-Type'] = 'text/uri-list' halt 202,task_uri.to_s+"\n" end -end +#end +# Run last algorithm on a dataset +# +# @param [URI] dataset_uri URI of the training dataset +# @param [URI] prediction_feature URI of the prediction feature (i.e. dependent variable) +# @return [text/uri-list] Task URI post '/fminer/last/?' do @@fminer = Last::Last.new @@ -179,13 +220,13 @@ post '/fminer/last/?' do feature_dataset = OpenTox::Dataset.new feature_dataset.add_metadata({ - DC.title => "LAST representatives for " + training_dataset.metadata[DC.title], + DC.title => "LAST representatives for " + training_dataset.metadata[DC.title].to_s, DC.creator => url_for('/fminer/last',:full), OT.hasSource => url_for('/fminer/last', :full), - }) - feature_dataset.add_parameters({ - "dataset_uri" => params[:dataset_uri], - "prediction_feature" => params[:prediction_feature] + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + ] }) feature_dataset.save @@ -199,7 +240,7 @@ post '/fminer/last/?' do @@fminer.Reset training_dataset.data_entries.each do |compound,entry| begin - smiles = OpenTox::Compound.new(compound.to_s).smiles + smiles = OpenTox::Compound.new(compound.to_s).to_smiles rescue LOGGER.warn "No resource for #{compound.to_s}" next @@ -272,7 +313,11 @@ post '/fminer/last/?' do OT.hasSource => feature_dataset.uri, OT.smarts => smarts, OT.p_value => p_value.to_f, - OT.effect => effect + OT.effect => effect, + OT.parameters => [ + { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, + { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + ] } feature_dataset.add_feature feature_uri, metadata end @@ -1,29 +1,35 @@ -get '/lazar/?' do +@@feature_generation_default = File.join(CONFIG[:services]["opentox-algorithm"],"fminer","bbrc") - metadata = { +# Get RDF/XML representation of the lazar algorithm +# +# @return [application/rdf+xml] OWL-DL representation of the lazar algorithm +get '/lazar/?' do + response['Content-Type'] = 'application/rdf+xml' + algorithm = OpenTox::Algorithm::Generic.new(url_for('/lazar',:full)) + algorithm.metadata = { DC.title => 'lazar', - DC.identifier => url_for("",:full), DC.creator => "helma@in-silico.ch, andreas@maunz.de", DC.contributor => "vorgrimmlerdavid@gmx.de", - OT.isA => OTA.ClassificationLazySingleTarget + OT.isA => OTA.ClassificationLazySingleTarget, + OT.parameters => [ + { DC.description => "Dataset URI with the dependent variable", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, + { DC.description => "Feature URI for dependent variable. Optional for datasets with only a single feature.", OT.paramScope => "optional", DC.title => "prediction_feature" }, + { DC.description => "URI of feature genration service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" }, + { DC.description => "URI of feature dataset. If this parameter is set no feature generation algorithm will be called", OT.paramScope => "optional", DC.title => "feature_dataset_uri" }, + { DC.description => "Further parameters for the feaature generation service", OT.paramScope => "optional" } + ] } - - parameters = [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", OT.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", OT.title => "prediction_feature" }, - { DC.description => "URI of feature genration service", OT.paramScope => "mandatory", OT.title => "feature_generation_uri" } - ] - - s = OpenTox::Serializer::Owl.new - s.add_algorithm(url_for('/lazar',:full),metadata,parameters) - response['Content-Type'] = 'application/rdf+xml' - s.to_rdfxml - + algorithm.to_rdfxml end -post '/lazar/?' do # create a model +# Create a lazar prediction model +# +# @ return [text/uri-list] Task URI +post '/lazar/?' do - dataset_uri = "#{params[:dataset_uri]}" + LOGGER.debug params.inspect + halt 404, "No dataset_uri parameter." unless params[:dataset_uri] + dataset_uri = params[:dataset_uri] begin training_activities = OpenTox::Dataset.new(dataset_uri) @@ -32,35 +38,77 @@ post '/lazar/?' do # create a model halt 404, "Dataset #{dataset_uri} not found (#{e.inspect})." end - halt 404, "No prediction_feature parameter." unless params[:prediction_feature] - halt 404, "No feature_generation_uri parameter." unless params[:feature_generation_uri] - halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}. (features: "+ - training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(params[:prediction_feature]) + prediction_feature = params[:prediction_feature] + unless prediction_feature # try to read prediction_feature from dataset + halt 404, "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1 + prediction_feature = training_activities.features.keys.first + params[:prediction_feature] = prediction_feature + end + + feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri] + + halt 404, "No feature #{prediction_feature} in dataset #{params[:dataset_uri]}. (features: "+ + training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature) response['Content-Type'] = 'text/uri-list' task_uri = OpenTox::Task.as_task("Create lazar model",url_for('/lazar',:full)) do |task| - # create features - feature_dataset_uri = OpenTox::Algorithm::Fminer.new.run(params).to_s - - training_features = OpenTox::Dataset.new(feature_dataset_uri) + lazar = OpenTox::Model::Lazar.new + + if params[:feature_dataset_uri] + feature_dataset_uri = params[:feature_dataset_uri] + training_features = OpenTox::Dataset.new(feature_dataset_uri) + case training_features.feature_type + when "classification" + lazar.similarity_algorithm = "weighted_tanimoto" + when "regression" + lazar.similarity_algorithm = "weighted_euclid" + end + else # create features + params[:feature_generation_uri] = feature_generation_uri + if feature_generation_uri.match(/fminer/) + lazar.feature_calculation_algorithm = "substructure_match" + else + halt 404, "External feature generation services not yet supported" + end + feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s + training_features = OpenTox::Dataset.new(feature_dataset_uri) + end + training_features.load_all halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? - - lazar = OpenTox::Model::Lazar.new - # TODO: dataset method for iterating over data entries + # sorted features for index lookups + lazar.features = training_features.features.sort if training_features.feature_type == "regression" + training_features.data_entries.each do |compound,entry| lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound] entry.keys.each do |feature| - # TODO fix URI - fminer_uri = File.join CONFIG[:services]["opentox-algorithm"], "fminer" - smarts = training_features.features[feature]["#{fminer_uri}#smarts"] - lazar.fingerprints[compound] << smarts - unless lazar.features.include? smarts - lazar.features << smarts - lazar.p_values[smarts] = training_features.features[feature]["#{fminer_uri}#p_value"] - lazar.effects[smarts] = training_features.features[feature]["#{fminer_uri}#effect"] + case training_features.feature_type + when "fminer" + # fingerprints are sets + smarts = training_features.features[feature][OT.smarts] + lazar.fingerprints[compound] << smarts + unless lazar.features.include? smarts + lazar.features << smarts + lazar.p_values[smarts] = training_features.features[feature][OT.p_value] + lazar.effects[smarts] = training_features.features[feature][OT.effect] + end + when "classification" + # fingerprints are sets + if entry[feature].flatten.size == 1 + lazar.fingerprints[compound] << feature if entry[feature].flatten.first.match(TRUE_REGEXP) + lazar.features << feature unless lazar.features.include? feature + else + LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + end + when "regression" + # fingerprints are arrays + if entry[feature].flatten.size == 1 + lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first + else + LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + end end end @@ -73,7 +121,7 @@ post '/lazar/?' do # create a model lazar.activities[compound] << false else lazar.activities[compound] << value.to_f - lazar.type = "regression" + lazar.prediction_type = "regression" end end end @@ -81,88 +129,12 @@ post '/lazar/?' do # create a model lazar.metadata[OT.dependentVariables] = params[:prediction_feature] lazar.metadata[OT.trainingDataset] = dataset_uri lazar.metadata[OT.featureDataset] = feature_dataset_uri - - model_uri = lazar.save - LOGGER.info model_uri + " created #{Time.now}" - model_uri - end - halt 202,task_uri -end - - -post '/property_lazar/?' do # create a model - - LOGGER.debug "Dataset: '" + params[:dataset_uri].to_s + "'" - LOGGER.debug "Endpoint: '" + params[:prediction_feature].to_s + "'" - LOGGER.debug "Feature dataset: '" + params[:feature_dataset_uri].to_s + "'" - dataset_uri = "#{params[:dataset_uri]}" - - begin - training_activities = OpenTox::Dataset.find(dataset_uri) - rescue - halt 404, "Dataset #{dataset_uri} not found" - end - - halt 404, "No prediction_feature parameter." unless params[:prediction_feature] - halt 404, "No feature_dataset_uri parameter." unless params[:feature_dataset_uri] - halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}. (features: "+ - training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(params[:prediction_feature]) - - response['Content-Type'] = 'text/uri-list' - task_uri = OpenTox::Task.as_task("Create lazar model",url_for('/property_lazar',:full)) do |task| - - # create features - #LOGGER.debug "Starting fminer" - #params[:feature_uri] = params[:prediction_feature] - #fminer_task_uri = OpenTox::Algorithm::Fminer.create_feature_dataset(params) - #fminer_task = OpenTox::Task.find(fminer_task_uri) - #fminer_task.wait_for_completion - #raise "fminer failed" unless fminer_task.completed? - - #LOGGER.debug "Fminer finished #{Time.now}" - feature_dataset_uri = params[:feature_dataset_uri] - training_features = OpenTox::Dataset.find(feature_dataset_uri) - halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? - lazar = OpenTox::Model::PropertyLazar.new - lazar.trainingDataset = dataset_uri - lazar.feature_dataset_uri = feature_dataset_uri - #halt 404, "More than one descriptor type" unless training_features.features.size == 1 - lazar.features = training_features.features - training_features.data.each do |compound,features| - lazar.properties[compound] = {} unless lazar.properties[compound] - LOGGER.debug features.inspect - if features - features.each do |f| - f.each do |name,value| - #lazar.features.each do |feature| - lazar.properties[compound][name] = value - #lazar.properties[compound] = features - end - end - end - end - activities = {} - classification = true - training_activities.data.each do |compound,features| - lazar.activities[compound] = [] unless lazar.activities[compound] - features.each do |feature| - case feature[params[:prediction_feature]].to_s - when "true" - lazar.activities[compound] << true - when "false" - lazar.activities[compound] << false - else - lazar.activities[compound] << feature[params[:prediction_feature]].to_f - classification = false - end - end - end - if classification - lazar.dependentVariables = params[:prediction_feature]+"_lazar_classification" - else - lazar.dependentVariables = params[:prediction_feature]+"_lazar_regression" - end + lazar.parameters = { + "dataset_uri" => dataset_uri, + "prediction_feature" => prediction_feature, + "feature_generation_uri" => feature_generation_uri + } model_uri = lazar.save LOGGER.info model_uri + " created #{Time.now}" @@ -170,3 +142,4 @@ post '/property_lazar/?' do # create a model end halt 202,task_uri end + |