summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2010-11-10 18:39:12 +0100
committerChristoph Helma <helma@in-silico.ch>2010-11-10 18:39:12 +0100
commit397e5a2ba1913f1a66cbf85185a82d45ee942e47 (patch)
treed18317b9308469c88254cdb3d3e6fe16a97d1f90
parent28aac60d38678340128a54ffe99bc56401561419 (diff)
valid algorithm OWL-DL, documentation started
-rw-r--r--application.rb5
-rw-r--r--fminer.rb119
-rw-r--r--lazar.rb211
3 files changed, 178 insertions, 157 deletions
diff --git a/application.rb b/application.rb
index 8e0a573..867cf24 100644
--- a/application.rb
+++ b/application.rb
@@ -18,7 +18,10 @@ before do
LOGGER.debug "Request: " + request.path
end
+# Get a list of available algorithms
+#
+# @return [text/uri-list] algorithm URIs
get '/?' do
response['Content-Type'] = 'text/uri-list'
- [ url_for('/lazar', :full), url_for('/fminer', :full) ].join("\n") + "\n"
+ [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n"
end
diff --git a/fminer.rb b/fminer.rb
index 3ba3057..06b4e67 100644
--- a/fminer.rb
+++ b/fminer.rb
@@ -2,55 +2,85 @@ ENV['FMINER_SMARTS'] = 'true'
ENV['FMINER_NO_AROMATIC'] = 'true'
ENV['FMINER_PVALUES'] = 'true'
+# Get list of fminer algorithms
+#
+# @return [text/uri-list] URIs of fminer algorithms
get '/fminer/?' do
+ response['Content-Type'] = 'text/uri-list'
+ [ url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n"
+end
- metadata = {
- DC.title => 'fminer',
- DC.identifier => url_for("",:full),
+# Get RDF/XML representation of fminer bbrc algorithm
+#
+# @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm
+get "/fminer/bbrc/?" do
+ response['Content-Type'] = 'application/rdf+xml'
+ algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/bbrc',:full))
+ algorithm.metadata = {
+ DC.title => 'fminer backbone refinement class representatives',
DC.creator => "andreas@maunz.de, helma@in-silico.ch",
DC.contributor => "vorgrimmlerdavid@gmx.de",
- OT.isA => OTA.PatternMiningSupervised
+ OT.isA => OTA.PatternMiningSupervised,
+ OT.parameters => [
+ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
+ { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }
+ ]
}
+ algorithm.to_rdfxml
+end
- parameters = [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", OT.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", OT.title => "prediction_feature" }
- ]
-
- s = OpenTox::Serializer::Owl.new
- s.add_algorithm(url_for('/fminer',:full),metadata,parameters)
- response['Content-Type'] = 'application/rdf+xml'
- s.to_rdfxml
-
+# Get RDF/XML representation of fminer last algorithm
+#
+# @return [application/rdf+xml] OWL-DL representation of fminer last algorithm
+get "/fminer/last/?" do
+ algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/last',:full))
+ algorithm.metadata = {
+ DC.title => 'fminer latent structure class representatives',
+ DC.creator => "andreas@maunz.de, helma@in-silico.ch",
+ DC.contributor => "vorgrimmlerdavid@gmx.de",
+ OT.isA => OTA.PatternMiningSupervised,
+ OT.parameters => [
+ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
+ { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }
+ ]
+ }
+ algorithm.to_rdfxml
end
-#post '/fminer/?' do
-['/fminer/bbrc/?','/fminer/?'].each do |path| # AM LAST: set bbrc as default
- post path do
+# Run bbrc algorithm on dataset
+#
+# @param [URI] dataset_uri URI of the training dataset
+# @param [URI] prediction_feature URI of the prediction feature (i.e. dependent variable)
+# @param [optional, Integer] min_frequency minimum frequency (defaults to 5)
+# @return [text/uri-list] Task URI
+post '/fminer/bbrc/?' do
+#['/fminer/bbrc/?','/fminer/?'].each do |path| # AM LAST: set bbrc as default
+ #post path do
+ # TODO: is this thread safe??
@@fminer = Bbrc::Bbrc.new
- @@fminer.SetMinfreq(5)
+ minfreq = 5 unless minfreq = params[:min_frequency]
+ @@fminer.SetMinfreq(minfreq)
@@fminer.SetConsoleOut(false)
halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil?
prediction_feature = params[:prediction_feature]
- training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}"
- training_dataset.load_all
+ training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}"
halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature])
task_uri = OpenTox::Task.as_task("Mining BBRC features", url_for('/fminer',:full)) do
feature_dataset = OpenTox::Dataset.new
feature_dataset.add_metadata({
- DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title],
+ DC.title => "BBRC representatives for " + training_dataset.metadata[DC.title].to_s,
DC.creator => url_for('/fminer/bbrc',:full),
OT.hasSource => url_for('/fminer/bbrc', :full),
- })
- feature_dataset.add_parameters({
- "dataset_uri" => params[:dataset_uri],
- "prediction_feature" => params[:prediction_feature]
+ OT.parameters => [
+ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
+ { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
+ ]
})
feature_dataset.save
@@ -63,7 +93,7 @@ end
@@fminer.Reset
training_dataset.data_entries.each do |compound,entry|
begin
- smiles = OpenTox::Compound.new(compound.to_s).smiles
+ smiles = OpenTox::Compound.new(compound.to_s).to_smiles
rescue
LOGGER.warn "No resource for #{compound.to_s}"
next
@@ -142,13 +172,19 @@ end
feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s
unless features.include? smarts
features << smarts
- # TODO insert correct ontology entries
metadata = {
- OT.hasSource => feature_dataset.uri,
+ OT.hasSource => url_for('/fminer/bbrc', :full),
+ OT.isA => OT.NominalFeature,
OT.smarts => smarts,
OT.p_value => p_value.to_f,
- OT.effect => effect }
+ OT.effect => effect,
+ OT.parameters => [
+ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
+ { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
+ ]
+ }
feature_dataset.add_feature feature_uri, metadata
+ #feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters
end
ids.each { |id| feature_dataset.add(compounds[id], feature_uri, true)}
end
@@ -159,8 +195,13 @@ end
response['Content-Type'] = 'text/uri-list'
halt 202,task_uri.to_s+"\n"
end
-end
+#end
+# Run last algorithm on a dataset
+#
+# @param [URI] dataset_uri URI of the training dataset
+# @param [URI] prediction_feature URI of the prediction feature (i.e. dependent variable)
+# @return [text/uri-list] Task URI
post '/fminer/last/?' do
@@fminer = Last::Last.new
@@ -179,13 +220,13 @@ post '/fminer/last/?' do
feature_dataset = OpenTox::Dataset.new
feature_dataset.add_metadata({
- DC.title => "LAST representatives for " + training_dataset.metadata[DC.title],
+ DC.title => "LAST representatives for " + training_dataset.metadata[DC.title].to_s,
DC.creator => url_for('/fminer/last',:full),
OT.hasSource => url_for('/fminer/last', :full),
- })
- feature_dataset.add_parameters({
- "dataset_uri" => params[:dataset_uri],
- "prediction_feature" => params[:prediction_feature]
+ OT.parameters => [
+ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
+ { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
+ ]
})
feature_dataset.save
@@ -199,7 +240,7 @@ post '/fminer/last/?' do
@@fminer.Reset
training_dataset.data_entries.each do |compound,entry|
begin
- smiles = OpenTox::Compound.new(compound.to_s).smiles
+ smiles = OpenTox::Compound.new(compound.to_s).to_smiles
rescue
LOGGER.warn "No resource for #{compound.to_s}"
next
@@ -272,7 +313,11 @@ post '/fminer/last/?' do
OT.hasSource => feature_dataset.uri,
OT.smarts => smarts,
OT.p_value => p_value.to_f,
- OT.effect => effect
+ OT.effect => effect,
+ OT.parameters => [
+ { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
+ { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
+ ]
}
feature_dataset.add_feature feature_uri, metadata
end
diff --git a/lazar.rb b/lazar.rb
index 9fbc679..e1b9846 100644
--- a/lazar.rb
+++ b/lazar.rb
@@ -1,29 +1,35 @@
-get '/lazar/?' do
+@@feature_generation_default = File.join(CONFIG[:services]["opentox-algorithm"],"fminer","bbrc")
- metadata = {
+# Get RDF/XML representation of the lazar algorithm
+#
+# @return [application/rdf+xml] OWL-DL representation of the lazar algorithm
+get '/lazar/?' do
+ response['Content-Type'] = 'application/rdf+xml'
+ algorithm = OpenTox::Algorithm::Generic.new(url_for('/lazar',:full))
+ algorithm.metadata = {
DC.title => 'lazar',
- DC.identifier => url_for("",:full),
DC.creator => "helma@in-silico.ch, andreas@maunz.de",
DC.contributor => "vorgrimmlerdavid@gmx.de",
- OT.isA => OTA.ClassificationLazySingleTarget
+ OT.isA => OTA.ClassificationLazySingleTarget,
+ OT.parameters => [
+ { DC.description => "Dataset URI with the dependent variable", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
+ { DC.description => "Feature URI for dependent variable. Optional for datasets with only a single feature.", OT.paramScope => "optional", DC.title => "prediction_feature" },
+ { DC.description => "URI of feature genration service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" },
+ { DC.description => "URI of feature dataset. If this parameter is set no feature generation algorithm will be called", OT.paramScope => "optional", DC.title => "feature_dataset_uri" },
+ { DC.description => "Further parameters for the feaature generation service", OT.paramScope => "optional" }
+ ]
}
-
- parameters = [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", OT.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", OT.title => "prediction_feature" },
- { DC.description => "URI of feature genration service", OT.paramScope => "mandatory", OT.title => "feature_generation_uri" }
- ]
-
- s = OpenTox::Serializer::Owl.new
- s.add_algorithm(url_for('/lazar',:full),metadata,parameters)
- response['Content-Type'] = 'application/rdf+xml'
- s.to_rdfxml
-
+ algorithm.to_rdfxml
end
-post '/lazar/?' do # create a model
+# Create a lazar prediction model
+#
+# @ return [text/uri-list] Task URI
+post '/lazar/?' do
- dataset_uri = "#{params[:dataset_uri]}"
+ LOGGER.debug params.inspect
+ halt 404, "No dataset_uri parameter." unless params[:dataset_uri]
+ dataset_uri = params[:dataset_uri]
begin
training_activities = OpenTox::Dataset.new(dataset_uri)
@@ -32,35 +38,77 @@ post '/lazar/?' do # create a model
halt 404, "Dataset #{dataset_uri} not found (#{e.inspect})."
end
- halt 404, "No prediction_feature parameter." unless params[:prediction_feature]
- halt 404, "No feature_generation_uri parameter." unless params[:feature_generation_uri]
- halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}. (features: "+
- training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(params[:prediction_feature])
+ prediction_feature = params[:prediction_feature]
+ unless prediction_feature # try to read prediction_feature from dataset
+ halt 404, "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
+ prediction_feature = training_activities.features.keys.first
+ params[:prediction_feature] = prediction_feature
+ end
+
+ feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri]
+
+ halt 404, "No feature #{prediction_feature} in dataset #{params[:dataset_uri]}. (features: "+
+ training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature)
response['Content-Type'] = 'text/uri-list'
task_uri = OpenTox::Task.as_task("Create lazar model",url_for('/lazar',:full)) do |task|
- # create features
- feature_dataset_uri = OpenTox::Algorithm::Fminer.new.run(params).to_s
-
- training_features = OpenTox::Dataset.new(feature_dataset_uri)
+ lazar = OpenTox::Model::Lazar.new
+
+ if params[:feature_dataset_uri]
+ feature_dataset_uri = params[:feature_dataset_uri]
+ training_features = OpenTox::Dataset.new(feature_dataset_uri)
+ case training_features.feature_type
+ when "classification"
+ lazar.similarity_algorithm = "weighted_tanimoto"
+ when "regression"
+ lazar.similarity_algorithm = "weighted_euclid"
+ end
+ else # create features
+ params[:feature_generation_uri] = feature_generation_uri
+ if feature_generation_uri.match(/fminer/)
+ lazar.feature_calculation_algorithm = "substructure_match"
+ else
+ halt 404, "External feature generation services not yet supported"
+ end
+ feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s
+ training_features = OpenTox::Dataset.new(feature_dataset_uri)
+ end
+
training_features.load_all
halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil?
-
- lazar = OpenTox::Model::Lazar.new
- # TODO: dataset method for iterating over data entries
+ # sorted features for index lookups
+ lazar.features = training_features.features.sort if training_features.feature_type == "regression"
+
training_features.data_entries.each do |compound,entry|
lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound]
entry.keys.each do |feature|
- # TODO fix URI
- fminer_uri = File.join CONFIG[:services]["opentox-algorithm"], "fminer"
- smarts = training_features.features[feature]["#{fminer_uri}#smarts"]
- lazar.fingerprints[compound] << smarts
- unless lazar.features.include? smarts
- lazar.features << smarts
- lazar.p_values[smarts] = training_features.features[feature]["#{fminer_uri}#p_value"]
- lazar.effects[smarts] = training_features.features[feature]["#{fminer_uri}#effect"]
+ case training_features.feature_type
+ when "fminer"
+ # fingerprints are sets
+ smarts = training_features.features[feature][OT.smarts]
+ lazar.fingerprints[compound] << smarts
+ unless lazar.features.include? smarts
+ lazar.features << smarts
+ lazar.p_values[smarts] = training_features.features[feature][OT.p_value]
+ lazar.effects[smarts] = training_features.features[feature][OT.effect]
+ end
+ when "classification"
+ # fingerprints are sets
+ if entry[feature].flatten.size == 1
+ lazar.fingerprints[compound] << feature if entry[feature].flatten.first.match(TRUE_REGEXP)
+ lazar.features << feature unless lazar.features.include? feature
+ else
+ LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
+ end
+ when "regression"
+ # fingerprints are arrays
+ if entry[feature].flatten.size == 1
+ lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first
+ else
+ LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
+ end
end
end
@@ -73,7 +121,7 @@ post '/lazar/?' do # create a model
lazar.activities[compound] << false
else
lazar.activities[compound] << value.to_f
- lazar.type = "regression"
+ lazar.prediction_type = "regression"
end
end
end
@@ -81,88 +129,12 @@ post '/lazar/?' do # create a model
lazar.metadata[OT.dependentVariables] = params[:prediction_feature]
lazar.metadata[OT.trainingDataset] = dataset_uri
lazar.metadata[OT.featureDataset] = feature_dataset_uri
-
- model_uri = lazar.save
- LOGGER.info model_uri + " created #{Time.now}"
- model_uri
- end
- halt 202,task_uri
-end
-
-
-post '/property_lazar/?' do # create a model
-
- LOGGER.debug "Dataset: '" + params[:dataset_uri].to_s + "'"
- LOGGER.debug "Endpoint: '" + params[:prediction_feature].to_s + "'"
- LOGGER.debug "Feature dataset: '" + params[:feature_dataset_uri].to_s + "'"
- dataset_uri = "#{params[:dataset_uri]}"
-
- begin
- training_activities = OpenTox::Dataset.find(dataset_uri)
- rescue
- halt 404, "Dataset #{dataset_uri} not found"
- end
-
- halt 404, "No prediction_feature parameter." unless params[:prediction_feature]
- halt 404, "No feature_dataset_uri parameter." unless params[:feature_dataset_uri]
- halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}. (features: "+
- training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(params[:prediction_feature])
-
- response['Content-Type'] = 'text/uri-list'
- task_uri = OpenTox::Task.as_task("Create lazar model",url_for('/property_lazar',:full)) do |task|
-
- # create features
- #LOGGER.debug "Starting fminer"
- #params[:feature_uri] = params[:prediction_feature]
- #fminer_task_uri = OpenTox::Algorithm::Fminer.create_feature_dataset(params)
- #fminer_task = OpenTox::Task.find(fminer_task_uri)
- #fminer_task.wait_for_completion
- #raise "fminer failed" unless fminer_task.completed?
-
- #LOGGER.debug "Fminer finished #{Time.now}"
- feature_dataset_uri = params[:feature_dataset_uri]
- training_features = OpenTox::Dataset.find(feature_dataset_uri)
- halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil?
- lazar = OpenTox::Model::PropertyLazar.new
- lazar.trainingDataset = dataset_uri
- lazar.feature_dataset_uri = feature_dataset_uri
- #halt 404, "More than one descriptor type" unless training_features.features.size == 1
- lazar.features = training_features.features
- training_features.data.each do |compound,features|
- lazar.properties[compound] = {} unless lazar.properties[compound]
- LOGGER.debug features.inspect
- if features
- features.each do |f|
- f.each do |name,value|
- #lazar.features.each do |feature|
- lazar.properties[compound][name] = value
- #lazar.properties[compound] = features
- end
- end
- end
- end
- activities = {}
- classification = true
- training_activities.data.each do |compound,features|
- lazar.activities[compound] = [] unless lazar.activities[compound]
- features.each do |feature|
- case feature[params[:prediction_feature]].to_s
- when "true"
- lazar.activities[compound] << true
- when "false"
- lazar.activities[compound] << false
- else
- lazar.activities[compound] << feature[params[:prediction_feature]].to_f
- classification = false
- end
- end
- end
- if classification
- lazar.dependentVariables = params[:prediction_feature]+"_lazar_classification"
- else
- lazar.dependentVariables = params[:prediction_feature]+"_lazar_regression"
- end
+ lazar.parameters = {
+ "dataset_uri" => dataset_uri,
+ "prediction_feature" => prediction_feature,
+ "feature_generation_uri" => feature_generation_uri
+ }
model_uri = lazar.save
LOGGER.info model_uri + " created #{Time.now}"
@@ -170,3 +142,4 @@ post '/property_lazar/?' do # create a model
end
halt 202,task_uri
end
+