summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2010-11-19 14:42:29 +0100
committerChristoph Helma <helma@in-silico.ch>2010-11-19 14:42:29 +0100
commit578f57d48df70dc677ceb65720831885c3af99f2 (patch)
tree1fe2f7d76acbcca7782535c33a05910b5a9625f4
parent397e5a2ba1913f1a66cbf85185a82d45ee942e47 (diff)
lazar predictions and toxcreate are working
-rw-r--r--fminer.rb49
-rw-r--r--lazar.rb79
2 files changed, 74 insertions, 54 deletions
diff --git a/fminer.rb b/fminer.rb
index 06b4e67..3e39eda 100644
--- a/fminer.rb
+++ b/fminer.rb
@@ -11,7 +11,6 @@ get '/fminer/?' do
end
# Get RDF/XML representation of fminer bbrc algorithm
-#
# @return [application/rdf+xml] OWL-DL representation of fminer bbrc algorithm
get "/fminer/bbrc/?" do
response['Content-Type'] = 'application/rdf+xml'
@@ -23,14 +22,17 @@ get "/fminer/bbrc/?" do
OT.isA => OTA.PatternMiningSupervised,
OT.parameters => [
{ DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }
+ { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
+ { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" },
+ { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" },
+ { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" },
+ { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" },
]
}
algorithm.to_rdfxml
end
# Get RDF/XML representation of fminer last algorithm
-#
# @return [application/rdf+xml] OWL-DL representation of fminer last algorithm
get "/fminer/last/?" do
algorithm = OpenTox::Algorithm::Generic.new(url_for('/fminer/last',:full))
@@ -41,7 +43,10 @@ get "/fminer/last/?" do
OT.isA => OTA.PatternMiningSupervised,
OT.parameters => [
{ DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }
+ { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
+ { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" },
+ { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" },
+ { DC.description => "Maximum number of hops", OT.paramScope => "optional", DC.title => "hops" },
]
}
algorithm.to_rdfxml
@@ -49,18 +54,23 @@ end
# Run bbrc algorithm on dataset
#
-# @param [URI] dataset_uri URI of the training dataset
-# @param [URI] prediction_feature URI of the prediction feature (i.e. dependent variable)
-# @param [optional, Integer] min_frequency minimum frequency (defaults to 5)
+# @param [String] dataset_uri URI of the training dataset
+# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
+# @param [optional] parameters BBRC parameters, accepted parameters are
+# - minfreq Minimum frequency (default 5)
+# - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
+# - backbone BBRC classes, pass 'false' to switch off mining for BBRC representatives. (default "true")
+# - min_chisq_significance Significance threshold (between 0 and 1)
# @return [text/uri-list] Task URI
post '/fminer/bbrc/?' do
-#['/fminer/bbrc/?','/fminer/?'].each do |path| # AM LAST: set bbrc as default
- #post path do
# TODO: is this thread safe??
@@fminer = Bbrc::Bbrc.new
minfreq = 5 unless minfreq = params[:min_frequency]
@@fminer.SetMinfreq(minfreq)
+ @@fminer.SetType(1) if params[:feature_type] == "paths"
+ @@fminer.SetBackbone(params[:backbone]) if params[:backbone]
+ @@fminer.SetChisqSig(params[:min_chisq_significance]) if params[:min_chisq_significance]
@@fminer.SetConsoleOut(false)
halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
@@ -70,7 +80,7 @@ post '/fminer/bbrc/?' do
training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}"
halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature])
- task_uri = OpenTox::Task.as_task("Mining BBRC features", url_for('/fminer',:full)) do
+ task = OpenTox::Task.create("Mining BBRC features", url_for('/fminer',:full)) do
feature_dataset = OpenTox::Dataset.new
feature_dataset.add_metadata({
@@ -193,19 +203,26 @@ post '/fminer/bbrc/?' do
feature_dataset.uri
end
response['Content-Type'] = 'text/uri-list'
- halt 202,task_uri.to_s+"\n"
+ halt 202,task.uri.to_s+"\n"
end
#end
# Run last algorithm on a dataset
#
-# @param [URI] dataset_uri URI of the training dataset
-# @param [URI] prediction_feature URI of the prediction feature (i.e. dependent variable)
+# @param [String] dataset_uri URI of the training dataset
+# @param [String] prediction_feature URI of the prediction feature (i.e. dependent variable)
+# @param [optional] parameters LAST parameters, accepted parameters are
+# - minfreq Minimum frequency (default 5)
+# - feature_type Feature type, can be 'paths' or 'trees' (default "trees")
+# - hops Maximum number of hops
# @return [text/uri-list] Task URI
post '/fminer/last/?' do
@@fminer = Last::Last.new
- @@fminer.SetMinfreq(5)
+ minfreq = 5 unless minfreq = params[:min_frequency]
+ @@fminer.SetMinfreq(minfreq)
+ @@fminer.SetType(1) if params[:feature_type] == "paths"
+ @@fminer.SetMaxHops(params[:hops]) if params[:hops]
@@fminer.SetConsoleOut(false)
halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
@@ -216,7 +233,7 @@ post '/fminer/last/?' do
training_dataset.load_all
halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature])
- task_uri = OpenTox::Task.as_task("Mining LAST features", url_for('/fminer',:full)) do
+ task = OpenTox::Task.create("Mining LAST features", url_for('/fminer',:full)) do
feature_dataset = OpenTox::Dataset.new
feature_dataset.add_metadata({
@@ -327,5 +344,5 @@ post '/fminer/last/?' do
feature_dataset.uri
end
response['Content-Type'] = 'text/uri-list'
- halt 202,task_uri.to_s+"\n"
+ halt 202,task.uri.to_s+"\n"
end
diff --git a/lazar.rb b/lazar.rb
index e1b9846..c5a9259 100644
--- a/lazar.rb
+++ b/lazar.rb
@@ -1,7 +1,6 @@
@@feature_generation_default = File.join(CONFIG[:services]["opentox-algorithm"],"fminer","bbrc")
# Get RDF/XML representation of the lazar algorithm
-#
# @return [application/rdf+xml] OWL-DL representation of the lazar algorithm
get '/lazar/?' do
response['Content-Type'] = 'application/rdf+xml'
@@ -23,20 +22,18 @@ get '/lazar/?' do
end
# Create a lazar prediction model
-#
+# @param [String] dataset_uri Training dataset URI
+# @param [optional,String] prediction_feature URI of the feature to be predicted
+# @param [optional,String] feature_generation_uri URI of the feature generation algorithm
+# @param [optional] - further parameters for the feature generation service
# @ return [text/uri-list] Task URI
post '/lazar/?' do
- LOGGER.debug params.inspect
halt 404, "No dataset_uri parameter." unless params[:dataset_uri]
dataset_uri = params[:dataset_uri]
- begin
- training_activities = OpenTox::Dataset.new(dataset_uri)
- training_activities.load_all
- rescue => e
- halt 404, "Dataset #{dataset_uri} not found (#{e.inspect})."
- end
+ halt 404, "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri)
+ training_activities.load_all
prediction_feature = params[:prediction_feature]
unless prediction_feature # try to read prediction_feature from dataset
@@ -51,23 +48,24 @@ post '/lazar/?' do
training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature)
response['Content-Type'] = 'text/uri-list'
- task_uri = OpenTox::Task.as_task("Create lazar model",url_for('/lazar',:full)) do |task|
+ task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task|
lazar = OpenTox::Model::Lazar.new
+ lazar.min_sim = params[:min_sim] if params[:min_sim]
if params[:feature_dataset_uri]
feature_dataset_uri = params[:feature_dataset_uri]
training_features = OpenTox::Dataset.new(feature_dataset_uri)
case training_features.feature_type
when "classification"
- lazar.similarity_algorithm = "weighted_tanimoto"
+ lazar.similarity_algorithm = "Similarity.tanimoto"
when "regression"
- lazar.similarity_algorithm = "weighted_euclid"
+ lazar.similarity_algorithm = "Similarity.euclid"
end
else # create features
params[:feature_generation_uri] = feature_generation_uri
if feature_generation_uri.match(/fminer/)
- lazar.feature_calculation_algorithm = "substructure_match"
+ lazar.feature_calculation_algorithm = "Substructure.match"
else
halt 404, "External feature generation services not yet supported"
end
@@ -82,11 +80,9 @@ post '/lazar/?' do
lazar.features = training_features.features.sort if training_features.feature_type == "regression"
training_features.data_entries.each do |compound,entry|
- lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound]
+ lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound]
entry.keys.each do |feature|
- case training_features.feature_type
- when "fminer"
- # fingerprints are sets
+ if feature_generation_uri.match(/fminer/)
smarts = training_features.features[feature][OT.smarts]
lazar.fingerprints[compound] << smarts
unless lazar.features.include? smarts
@@ -94,20 +90,23 @@ post '/lazar/?' do
lazar.p_values[smarts] = training_features.features[feature][OT.p_value]
lazar.effects[smarts] = training_features.features[feature][OT.effect]
end
- when "classification"
- # fingerprints are sets
- if entry[feature].flatten.size == 1
- lazar.fingerprints[compound] << feature if entry[feature].flatten.first.match(TRUE_REGEXP)
- lazar.features << feature unless lazar.features.include? feature
- else
- LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
- end
- when "regression"
- # fingerprints are arrays
- if entry[feature].flatten.size == 1
- lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first
- else
- LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
+ else
+ case training_features.feature_type
+ when "classification"
+ # fingerprints are sets
+ if entry[feature].flatten.size == 1
+ lazar.fingerprints[compound] << feature if entry[feature].flatten.first.to_s.match(TRUE_REGEXP)
+ lazar.features << feature unless lazar.features.include? feature
+ else
+ LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
+ end
+ when "regression"
+ # fingerprints are arrays
+ if entry[feature].flatten.size == 1
+ lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first
+ else
+ LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
+ end
end
end
end
@@ -120,26 +119,30 @@ post '/lazar/?' do
when "false"
lazar.activities[compound] << false
else
+ halt 404, "0 values not allowed in training dataset. log10 is calculated internally." if value.to_f == 0
lazar.activities[compound] << value.to_f
- lazar.prediction_type = "regression"
+ lazar.prediction_algorithm = "Neighbors.local_svm_regression"
end
end
end
+ lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature))}"
+ # TODO: fix dependentVariable
lazar.metadata[OT.dependentVariables] = params[:prediction_feature]
lazar.metadata[OT.trainingDataset] = dataset_uri
lazar.metadata[OT.featureDataset] = feature_dataset_uri
+ lazar.metadata[OT.isA] = OTA.ClassificationLazySingleTarget
- lazar.parameters = {
- "dataset_uri" => dataset_uri,
- "prediction_feature" => prediction_feature,
- "feature_generation_uri" => feature_generation_uri
- }
+ lazar.metadata[OT.parameters] = [
+ {DC.title => "dataset_uri", OT.paramValue => dataset_uri},
+ {DC.title => "prediction_feature", OT.paramValue => prediction_feature},
+ {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri}
+ ]
model_uri = lazar.save
LOGGER.info model_uri + " created #{Time.now}"
model_uri
end
- halt 202,task_uri
+ halt 202,task.uri
end