summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrautenberg <rautenberg@in-silico.ch>2012-03-13 12:11:11 +0100
committerrautenberg <rautenberg@in-silico.ch>2012-03-13 12:11:11 +0100
commitc2a5477758e7a5ce747019c0d784fdf566c3c52a (patch)
tree634e51dda375c4d2a652c4481ee63834e6aee416
parent29fd67926f37e533f80f62f09c7dd71f2fab9130 (diff)
parent5175e0555846b877cf72fd5f9881bae1d16a3b2b (diff)
Merge branch 'release/v3.1.0'v3.1.0
-rw-r--r--ChangeLog5
-rw-r--r--README.md81
-rw-r--r--application.rb5
-rw-r--r--feature_selection.rb85
-rw-r--r--fminer.rb88
m---------last-utils0
-rw-r--r--lazar.rb202
m---------libfminer0
8 files changed, 324 insertions, 142 deletions
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..795a878
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,5 @@
+v3.1.0 2012-02-24
+ * lazar.rb: pc type parameter in model, cleaned all parameters,
+ propositionalized learning only for SVM, switch for minimal training
+ performance, removed conf_stdev
+ * fminer.rb: feature match service for datasets, also with number of hits
diff --git a/README.md b/README.md
index dacf1ec..344f747 100644
--- a/README.md
+++ b/README.md
@@ -9,46 +9,58 @@ OpenTox Algorithm
REST operations
---------------
- Get a list of all algorithms GET / - URIs of algorithms 200
- Get a representation of the GET /fminer/ - fminer representation 200,404
+ Get a list of all algorithms GET / - URIs of algorithms 200
+ Get a representation of the GET /fminer/ - fminer representation 200,404
fminer algorithms
- Get a representation of the GET /fminer/bbrc - bbrc representation 200,404
+ Get a representation of the GET /fminer/bbrc - bbrc representation 200,404
bbrc algorithm
- Get a representation of the GET /fminer/last - last representation 200,404
+ Get a representation of the GET /fminer/last - last representation 200,404
last algorithm
- Get a representation of the GET /lazar - lazar representation 200,404
+ Get a representation of the GET /lazar - lazar representation 200,404
lazar algorithm
- Create bbrc features POST /fminer/bbrc dataset_uri, URI for feature dataset 200,400,404,500
- feature_uri,
- [min_frequency=5 per-mil],
- [feature_type=trees],
- [backbone=true],
- [min_chisq_significance=0.95],
- [nr_hits=false]
- Create last features POST /fminer/last dataset_uri, URI for feature dataset 200,400,404,500
- feature_uri,
- [min_frequency=8 %],
- [feature_type=trees],
- [nr_hits=false]
- Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500
- prediction_feature,
- feature_generation_uri
- prediction_algorithm
- [local_svm_kernel=weighted_tanimoto]
- [min_sim=0.3]
- [nr_hits=false]
- [activity_transform=<Log10 (regression),NOP (classification)>]
- [conf_stdev=false]
+ Get a representation of the GET /feature_selection - feature selection representation 200,404
+ feature selection algorithms
+ Get a representation of the GET /feature_selection/rfe - rfe representation 200,404
+ rfe algorithm
+
+
+ Create bbrc features POST /fminer/bbrc dataset_uri, URI for feature dataset 200,400,404,500
+ feature_uri,
+ [min_frequency=5 per-mil],
+ [feature_type=trees],
+ [backbone=true],
+ [min_chisq_significance=0.95],
+ [nr_hits=false]
+ Create last features POST /fminer/last dataset_uri, URI for feature dataset 200,400,404,500
+ feature_uri,
+ [min_frequency=8 %],
+ [feature_type=trees],
+ [nr_hits=false]
+ Create lazar model POST /lazar dataset_uri, URI for lazar model 200,400,404,500
+ [prediction_feature],
+ [feature_generation_uri],
+ [prediction_algorithm],
+ [feature_dataset_uri],
+ [pc_type=null],
+ [nr_hits=false (class. using wt. maj. vote), true (else)],
+ [min_sim=0.3 (nominal), 0.4 (numeric features)]
+ [min_train_performance=0.1]
+
+ Create selected features POST /feature_selection/rfe dataset_uri, URI for dataset 200,400,404,500
+ prediction_feature,
+ feature_dataset_uri,
+ [del_missing=false]
+
Synopsis
--------
-- prediction\_algorithm: One of "weighted\_majority\_vote" (default for classification), "local\_svm\_classification", "local\_svm\_regression (default for regression)", "local\_mlr\_prop". "weighted\_majority\_vote" is not applicable for regression. "local\_mlr\_prop" is not applicable for classification.
-- local\_svm\_kernel: One of "weighted\_tanimoto", "propositionalized". local\_svm\_kernel is not appplicable when prediction\_algorithm="weighted\_majority\_vote".
-- min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1].
-- nr_hits: Whether for instantiated models (local\_svm\_kernel = "propositionalized" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="local\_mlr\_prop") nominal features should be instantiated with their occurrence counts in the instances. For non-instantiated models (local\_svm\_kernel = "weighted\_tanimoto" for prediction_algorithm="local\_svm\_classification" or "local\_svm\_regression", or for prediction_algorithm="weighted\_majority\_vote") the neighbor-to-neighbor and neighbor-to-query similarity also integrates these counts, when the parameter is set. One of "true", "false".
-- activity_transform: Normalizing transformations of the y-values (activities), applicable only to regression problems. One of "Log10", "Inverter", "NOP". "Log10" moves all values above zero and takes the log to base 10. "Inverter" moves all values above 1.0 and takes the inverted value. "NOP" is the identity transformation, which does nothing. Model predictions are output with reverse transformation applied.
-- conf_stdev: Whether confidence integrates distribution of neighbor activity values. When "true", the exp(-1.0*(standard deviation of neighbor activities)) is multiplied on the similarity. One of "true", "false".
+- prediction\_algorithm: One of "weighted\_majority\_vote" (default for classification), "local\_svm\_classification", "local\_svm\_regression" (default for regression). "weighted\_majority\_vote" is not applicable for regression.
+- pc_type: Mandatory for feature dataset, one of [geometrical, topological, electronic, constitutional, hybrid, cpsa].
+- nr_hits: Whether nominal features should be instantiated with their occurrence counts in the instances. One of "true", "false".
+- min_sim: The minimum similarity threshold for neighbors. Numeric value in [0,1].
+- min_train_performance. The minimum training performance for "local\_svm\_classification" (Accuracy) and "local\_svm\_regression" (R-squared). Numeric value in [0,1].
+- del_missing: one of true, false
See http://www.maunz.de/wordpress/opentox/2011/lazar-models-and-how-to-trigger-them for a graphical overview.
@@ -110,4 +122,9 @@ Creates a standard Lazar model.
[API documentation](http://rdoc.info/github/opentox/algorithm)
--------------------------------------------------------------
+* * *
+
+### Create a feature dataset of selected features
+ curl -X POST -d dataset_uri={dataset_uri} -d prediction_feature_uri={prediction_feature_uri} -d feature_dataset_uri={feature_dataset_uri} -d del_missing=true http://webservices.in-silico.ch/test/algorithm/feature_selection/rfe
+
Copyright (c) 2009-2011 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details.
diff --git a/application.rb b/application.rb
index 18a6c82..f5b331f 100644
--- a/application.rb
+++ b/application.rb
@@ -11,6 +11,7 @@ require 'opentox-ruby'
require 'openbabel.rb'
require 'fminer.rb'
require 'lazar.rb'
+require 'feature_selection.rb'
set :lock, true
@@ -22,11 +23,11 @@ end
#
# @return [text/uri-list] algorithm URIs
get '/?' do
- list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full) ].join("\n") + "\n"
+ list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full), url_for('/feature_selection/rfe', :full) ].join("\n") + "\n"
case request.env['HTTP_ACCEPT']
when /text\/html/
content_type "text/html"
- OpenTox.text_to_html list
+ OpenTox.text_to_html list,@subjectid
else
content_type 'text/uri-list'
list
diff --git a/feature_selection.rb b/feature_selection.rb
new file mode 100644
index 0000000..d375a0e
--- /dev/null
+++ b/feature_selection.rb
@@ -0,0 +1,85 @@
+# Get list of feature_selection algorithms
+#
+# @return [text/uri-list] URIs of feature_selection algorithms
+get '/feature_selection/?' do
+ list = [ url_for('/feature_selection/rfe', :full) ].join("\n") + "\n"
+ case request.env['HTTP_ACCEPT']
+ when /text\/html/
+ content_type "text/html"
+ OpenTox.text_to_html list
+ else
+ content_type 'text/uri-list'
+ list
+ end
+end
+
+# Get RDF/XML representation of feature_selection rfe algorithm
+# @return [application/rdf+xml] OWL-DL representation of feature_selection rfe algorithm
+get "/feature_selection/rfe/?" do
+ algorithm = OpenTox::Algorithm::Generic.new(url_for('/feature_selection/rfe',:full))
+ algorithm.metadata = {
+ DC.title => 'recursive feature elimination',
+ DC.creator => "andreas@maunz.de, helma@in-silico.ch",
+ DC.contributor => "vorgrimmlerdavid@gmx.de",
+ BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#feature_selection_rfe",
+ RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised],
+ OT.parameters => [
+ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
+ { DC.description => "Prediction Feature URI", OT.paramScope => "mandatory", DC.title => "prediction_feature_uri" },
+ { DC.description => "Feature Dataset URI", OT.paramScope => "mandatory", DC.title => "feature_dataset_uri" },
+ { DC.description => "Delete Instances with missing values", OT.paramScope => "optional", DC.title => "del_missing" }
+ ]
+ }
+ case request.env['HTTP_ACCEPT']
+ when /text\/html/
+ content_type "text/html"
+ OpenTox.text_to_html algorithm.to_yaml
+ when /application\/x-yaml/
+ content_type "application/x-yaml"
+ algorithm.to_yaml
+ else
+ response['Content-Type'] = 'application/rdf+xml'
+ algorithm.to_rdfxml
+ end
+end
+
+# Run rfe algorithm on dataset
+#
+# @param [String] dataset_uri URI of the training dataset
+# @param [String] feature_dataset_uri URI of the feature dataset
+# @return [text/uri-list] Task URI
+post '/feature_selection/rfe/?' do
+
+ raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri]
+ raise OpenTox::NotFoundError.new "Please submit a prediction_feature_uri." unless params[:prediction_feature_uri]
+ raise OpenTox::NotFoundError.new "Please submit a feature_dataset_uri." unless params[:feature_dataset_uri]
+
+ ds_csv=OpenTox::RestClientWrapper.get( params[:dataset_uri], {:accept => "text/csv"} )
+ tf_ds=Tempfile.open(['rfe_', '.csv'])
+ tf_ds.puts(ds_csv)
+ tf_ds.flush()
+
+ prediction_feature = params[:prediction_feature_uri].split('/').last # get col name
+
+ fds_csv=OpenTox::RestClientWrapper.get( params[:feature_dataset_uri], {:accept => "text/csv"})
+ tf_fds=Tempfile.open(['rfe_', '.csv'])
+ tf_fds.puts(fds_csv)
+ tf_fds.flush()
+
+ del_missing = params[:del_missing] == "true" ? true : false
+
+ task = OpenTox::Task.create("Recursive Feature Elimination", url_for('/feature_selection',:full)) do |task|
+ r_result_file = OpenTox::Algorithm::FeatureSelection.rfe( { :ds_csv_file => tf_ds.path, :prediction_feature => prediction_feature, :fds_csv_file => tf_fds.path, :del_missing => del_missing } )
+ r_result_uri = OpenTox::Dataset.create_from_csv_file(r_result_file).uri
+ begin
+ tf_ds.close!; tf_fds.close!
+ File.unlink(r_result_file)
+ rescue
+ end
+ r_result_uri
+ end
+ response['Content-Type'] = 'text/uri-list'
+ raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
+ halt 202,task.uri.to_s+"\n"
+end
+
diff --git a/fminer.rb b/fminer.rb
index 94284db..95ce469 100644
--- a/fminer.rb
+++ b/fminer.rb
@@ -30,15 +30,16 @@ get "/fminer/bbrc/?" do
DC.title => 'fminer backbone refinement class representatives',
DC.creator => "andreas@maunz.de, helma@in-silico.ch",
DC.contributor => "vorgrimmlerdavid@gmx.de",
+# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_bbrc",
RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised],
OT.parameters => [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
- { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" },
- { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" },
- { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" },
- { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" },
- ]
+ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
+ { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
+ { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" },
+ { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" },
+ { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" },
+ { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" },
+ ]
}
case request.env['HTTP_ACCEPT']
when /text\/html/
@@ -61,14 +62,15 @@ get "/fminer/last/?" do
DC.title => 'fminer latent structure class representatives',
DC.creator => "andreas@maunz.de, helma@in-silico.ch",
DC.contributor => "vorgrimmlerdavid@gmx.de",
+# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#fminer_last",
RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised],
OT.parameters => [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
- { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "minfreq" },
- { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" },
- { DC.description => "Maximum number of hops", OT.paramScope => "optional", DC.title => "hops" },
- ]
+ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
+ { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
+ { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" },
+ { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" },
+ { DC.description => "Maximum number of hops", OT.paramScope => "optional", DC.title => "hops" },
+ ]
}
case request.env['HTTP_ACCEPT']
when /text\/html/
@@ -83,6 +85,37 @@ get "/fminer/last/?" do
end
end
+# Creates same features for dataset <dataset_uri> that have been created
+# with fminer in dataset <feature_dataset_uri>
+# accept params[:nr_hits] as used in other fminer methods
+post '/fminer/:method/match?' do
+ raise OpenTox::BadRequestError.new "feature_dataset_uri not given" unless params[:feature_dataset_uri]
+ raise OpenTox::BadRequestError.new "dataset_uri not given" unless params[:dataset_uri]
+ task = OpenTox::Task.create("Matching features", url_for('/fminer/match',:full)) do |task|
+ f_dataset = OpenTox::Dataset.find params[:feature_dataset_uri],@subjectid
+ c_dataset = OpenTox::Dataset.find params[:dataset_uri],@subjectid
+ res_dataset = OpenTox::Dataset.create CONFIG[:services]["dataset"],@subjectid
+ f_dataset.features.each do |f,m|
+ res_dataset.add_feature(f,m)
+ end
+ c_dataset.compounds.each do |c|
+ res_dataset.add_compound(c)
+ comp = OpenTox::Compound.new(c)
+ f_dataset.features.each do |f,m|
+ if params[:nr_hits] == "true"
+ hits = comp.match_hits([m[OT.smarts]])
+ res_dataset.add(c,f,hits[m[OT.smarts]]) if hits[m[OT.smarts]]
+ else
+ res_dataset.add(c,f,1) if comp.match?(m[OT.smarts])
+ end
+ end
+ end
+ res_dataset.save @subjectid
+ res_dataset.uri
+ end
+ return_task(task)
+end
+
# Run bbrc algorithm on dataset
#
# @param [String] dataset_uri URI of the training dataset
@@ -124,7 +157,7 @@ post '/fminer/bbrc/?' do
OT.parameters => [
{ DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
{ DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
- ]
+ ]
})
feature_dataset.save(@subjectid)
@@ -138,12 +171,12 @@ post '/fminer/bbrc/?' do
g_array=fminer.all_activities.values # DV: calculation of global median for effect calculation
g_median=g_array.to_scale.median
-
+
raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0
task.progress 10
step_width = 80 / @@bbrc.GetNoRootNodes().to_f
features = Set.new
-
+
# run @@bbrc
(0 .. @@bbrc.GetNoRootNodes()-1).each do |j|
results = @@bbrc.MineRoot(j)
@@ -185,7 +218,7 @@ post '/fminer/bbrc/?' do
OT.parameters => [
{ DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
{ DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
- ]
+ ]
}
feature_dataset.add_feature feature_uri, metadata
#feature_dataset.add_feature_parameters feature_uri, feature_dataset.parameters
@@ -199,8 +232,13 @@ post '/fminer/bbrc/?' do
feature_dataset.add(fminer.compounds[id], feature_uri, 1)
end
}
- end
- end
+
+ end # end of
+ end # feature parsing
+
+ # AM: add feature values for non-present features
+ # feature_dataset.complete_data_entries
+
feature_dataset.save(@subjectid)
feature_dataset.uri
end
@@ -250,7 +288,7 @@ post '/fminer/last/?' do
OT.parameters => [
{ DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
{ DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
- ]
+ ]
})
feature_dataset.save(@subjectid)
@@ -261,7 +299,7 @@ post '/fminer/last/?' do
# Add data to fminer
fminer.add_fminer_data(@@last, params, @value_map)
-
+
raise "No compounds in dataset #{fminer.training_dataset.uri}" if fminer.compounds.size==0
# run @@last
@@ -309,7 +347,7 @@ post '/fminer/last/?' do
OT.parameters => [
{ DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
{ DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
- ]
+ ]
}
feature_dataset.add_feature feature_uri, metadata
end
@@ -319,10 +357,14 @@ post '/fminer/last/?' do
ids.each_with_index { |id,i| feature_dataset.add(fminer.compounds[id], feature_uri, counts[smarts][i])}
end
end
+
+ # AM: add feature values for non-present features
+ # feature_dataset.complete_data_entries
+
feature_dataset.save(@subjectid)
feature_dataset.uri
end
response['Content-Type'] = 'text/uri-list'
raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
halt 202,task.uri.to_s+"\n"
-end \ No newline at end of file
+end
diff --git a/last-utils b/last-utils
-Subproject 04bd1b73f54bb7422d3c08bb5a81bc02af04f6f
+Subproject cf0238477127e54509b6ab8b5c38f50dd6ffce0
diff --git a/lazar.rb b/lazar.rb
index f4915a7..2a08778 100644
--- a/lazar.rb
+++ b/lazar.rb
@@ -8,12 +8,13 @@ get '/lazar/?' do
DC.title => 'lazar',
DC.creator => "helma@in-silico.ch, andreas@maunz.de",
DC.contributor => "vorgrimmlerdavid@gmx.de",
+# BO.instanceOf => "http://opentox.org/ontology/ist-algorithms.owl#lazar",
OT.parameters => [
{ DC.description => "Dataset URI with the dependent variable", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
{ DC.description => "Feature URI for dependent variable. Optional for datasets with only a single feature.", OT.paramScope => "optional", DC.title => "prediction_feature" },
- { DC.description => "URI of feature genration service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" },
+ { DC.description => "URI of feature generation service. Default: #{@@feature_generation_default}", OT.paramScope => "optional", DC.title => "feature_generation_uri" },
{ DC.description => "URI of feature dataset. If this parameter is set no feature generation algorithm will be called", OT.paramScope => "optional", DC.title => "feature_dataset_uri" },
- { DC.description => "Further parameters for the feaature generation service", OT.paramScope => "optional" }
+ { DC.description => "Further parameters for the feature generation service", OT.paramScope => "optional" }
]
}
case request.env['HTTP_ACCEPT']
@@ -44,61 +45,102 @@ post '/lazar/?' do
task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task|
+
+ # # # Dataset present, prediction feature present?
raise OpenTox::NotFoundError.new "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri)
training_activities.load_all(@subjectid)
+ # Prediction Feature
prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid)
unless params[:prediction_feature] # try to read prediction_feature from dataset
raise OpenTox::NotFoundError.new "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid)
params[:prediction_feature] = prediction_feature.uri # pass to feature mining service
end
+ raise OpenTox::NotFoundError.new "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+ training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri)
- feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri]
-
- raise OpenTox::NotFoundError.new "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+
- training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri)
+ # Feature Generation URI
+ feature_generation_uri = @@feature_generation_default unless ( (feature_generation_uri = params[:feature_generation_uri]) || (params[:feature_dataset_uri]) )
+ # Create instance
lazar = OpenTox::Model::Lazar.new
- lazar.min_sim = params[:min_sim].to_f if params[:min_sim]
+
+
+
+ # # # ENDPOINT RELATED
+
+ # Default Values
+ # Classification: Weighted Majority, Substructure.match
if prediction_feature.feature_type == "classification"
@training_classes = training_activities.accept_values(prediction_feature.uri).sort
@training_classes.each_with_index { |c,i|
lazar.value_map[i+1] = c # don't use '0': we must take the weighted mean later.
params[:value_map] = lazar.value_map
}
+ # Regression: SVM, Substructure.match_hits
elsif prediction_feature.feature_type == "regression"
- lazar.nr_hits = true
+ lazar.feature_calculation_algorithm = "Substructure.match_hits"
lazar.prediction_algorithm = "Neighbors.local_svm_regression"
end
- if params[:nr_hits] == "false" # if nr_hits is set optional to true/false it will return as String (but should be True/FalseClass)
- lazar.nr_hits = false
- #params[:nr_hits] = false
- elsif params[:nr_hits] == "true"
- lazar.nr_hits = true
+
+
+
+ # # # USER VALUES
+
+ # Min Sim
+ min_sim = params[:min_sim].to_f if params[:min_sim]
+ min_sim = 0.3 unless params[:min_sim]
+
+ # Algorithm
+ lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" if params[:prediction_algorithm]
+
+ # Nr Hits
+ nr_hits = false
+ if params[:nr_hits] == "true" || lazar.prediction_algorithm.include?("local_svm")
+ lazar.feature_calculation_algorithm = "Substructure.match_hits"
+ nr_hits = true
end
- params[:nr_hits] = "true" if lazar.nr_hits
+ params[:nr_hits] = "true" if lazar.feature_calculation_algorithm == "Substructure.match_hits" #not sure if this line in needed
+
+ # Propositionalization
+ propositionalized = (lazar.prediction_algorithm=="Neighbors.weighted_majority_vote" ? false : true)
+
+ # PC type
+ pc_type = params[:pc_type] unless params[:pc_type].nil?
+
+ # Min train performance
+ min_train_performance = params[:min_train_performance].to_f if params[:min_train_performance]
+ min_train_performance = 0.1 unless params[:min_train_performance]
+
+
+
+
+
task.progress 10
- if params[:feature_dataset_uri]
+
+
+
+
+ # # # Features
+
+ # Read Features
+ if params[:feature_dataset_uri]
+ lazar.feature_calculation_algorithm = "Substructure.lookup"
feature_dataset_uri = params[:feature_dataset_uri]
training_features = OpenTox::Dataset.new(feature_dataset_uri)
- case training_features.feature_type(@subjectid)
- when "classification"
- lazar.similarity_algorithm = "Similarity.tanimoto"
- when "regression"
- lazar.similarity_algorithm = "Similarity.euclid"
+ if training_features.feature_type(@subjectid) == "regression"
+ lazar.similarity_algorithm = "Similarity.cosine"
+ min_sim = 0.4 unless params[:min_sim]
+ raise OpenTox::NotFoundError.new "No pc_type parameter." unless params[:pc_type]
end
- else # create features
+
+ # Create Features
+ else
params[:feature_generation_uri] = feature_generation_uri
- if feature_generation_uri.match(/fminer/)
- lazar.feature_calculation_algorithm = "Substructure.match"
- else
- raise OpenTox::NotFoundError.new "External feature generation services not yet supported"
- end
params[:subjectid] = @subjectid
prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid
if prediction_feature.feature_type == "regression" && feature_generation_uri.match(/fminer/)
@@ -108,82 +150,60 @@ post '/lazar/?' do
training_features = OpenTox::Dataset.new(feature_dataset_uri)
end
+
+
+ # # # Write fingerprints
training_features.load_all(@subjectid)
raise OpenTox::NotFoundError.new "Dataset #{feature_dataset_uri} not found." if training_features.nil?
- # sorted features for index lookups
+ training_features.data_entries.each do |compound,entry|
+ if training_activities.data_entries.has_key? compound
- lazar.features = training_features.features.sort if prediction_feature.feature_type == "regression" and lazar.feature_calculation_algorithm != "Substructure.match"
+ lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound]
+ entry.keys.each do |feature|
- training_features.data_entries.each do |compound,entry|
- lazar.fingerprints[compound] = {} unless lazar.fingerprints[compound]
- entry.keys.each do |feature|
- if lazar.feature_calculation_algorithm == "Substructure.match"
- if training_features.features[feature]
- smarts = training_features.features[feature][OT.smarts]
- #lazar.fingerprints[compound] << smarts
- if params[:nr_hits]
- lazar.fingerprints[compound][smarts] = entry[feature].flatten.first
- else
- lazar.fingerprints[compound][smarts] = 1
- end
- unless lazar.features.include? smarts
- lazar.features << smarts
- lazar.p_values[smarts] = training_features.features[feature][OT.pValue]
- lazar.effects[smarts] = training_features.features[feature][OT.effect]
- end
- end
- else
- case training_features.feature_type(@subjectid)
- when "classification"
- # fingerprints are sets
- if entry[feature].flatten.size == 1
- #lazar.fingerprints[compound] << feature if entry[feature].flatten.first.to_s.match(TRUE_REGEXP)
- lazar.fingerprints[compound][feature] = entry[feature].flatten.first if entry[feature].flatten.first.to_s.match(TRUE_REGEXP)
- lazar.features << feature unless lazar.features.include? feature
- else
- LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
- end
- when "regression"
- # fingerprints are arrays
- if entry[feature].flatten.size == 1
- lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first
- #lazar.fingerprints[compound][feature] = entry[feature].flatten.first
- else
- LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
+ # CASE 1: Substructure
+ if (lazar.feature_calculation_algorithm == "Substructure.match") || (lazar.feature_calculation_algorithm == "Substructure.match_hits")
+ if training_features.features[feature]
+ smarts = training_features.features[feature][OT.smarts]
+ #lazar.fingerprints[compound] << smarts
+ if lazar.feature_calculation_algorithm == "Substructure.match_hits"
+ lazar.fingerprints[compound][smarts] = entry[feature].flatten.first * training_features.features[feature][OT.pValue]
+ else
+ lazar.fingerprints[compound][smarts] = 1 * training_features.features[feature][OT.pValue]
+ end
+ unless lazar.features.include? smarts
+ lazar.features << smarts
+ lazar.p_values[smarts] = training_features.features[feature][OT.pValue]
+ lazar.effects[smarts] = training_features.features[feature][OT.effect]
+ end
end
+
+ # CASE 2: Others
+ elsif entry[feature].flatten.size == 1
+ lazar.fingerprints[compound][feature] = entry[feature].flatten.first
+ lazar.features << feature unless lazar.features.include? feature
+ else
+ LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
end
end
+
end
end
task.progress 80
- # AM: allow settings override by user
- lazar.prediction_algorithm = "Neighbors.#{params[:prediction_algorithm]}" unless params[:prediction_algorithm].nil?
- if prediction_feature.feature_type == "regression"
- lazar.transform["class"] = "Log10" if lazar.transform["class"] == "NOP"
- end
- lazar.transform["class"] = params[:activity_transform] unless params[:activity_transform].nil?
- lazar.prop_kernel = true if (params[:local_svm_kernel] == "propositionalized" || params[:prediction_algorithm] == "local_mlr_prop")
- lazar.conf_stdev = false
- lazar.conf_stdev = true if params[:conf_stdev] == "true"
- # AM: Feed Data using Transformations
+
+
+
+ # # # Activities
+
if prediction_feature.feature_type == "regression"
- transformed_acts = []
- training_activities.data_entries.each do |compound,entry|
- transformed_acts.concat entry[prediction_feature.uri] unless entry[prediction_feature.uri].empty?
- end
- transformer = eval "OpenTox::Algorithm::Transform::#{lazar.transform["class"]}.new(transformed_acts)"
- transformed_acts = transformer.values
- lazar.transform["offset"] = transformer.offset
- t_count=0
training_activities.data_entries.each do |compound,entry|
lazar.activities[compound] = [] unless lazar.activities[compound]
unless entry[prediction_feature.uri].empty?
entry[prediction_feature.uri].each do |value|
- lazar.activities[compound] << transformed_acts[t_count].to_s
- t_count+=1
+ lazar.activities[compound] << value
end
end
end
@@ -199,6 +219,11 @@ post '/lazar/?' do
end
task.progress 90
+
+
+
+ # Metadata
+
lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}"
lazar.metadata[OT.dependentVariables] = prediction_feature.uri
lazar.metadata[OT.trainingDataset] = dataset_uri
@@ -213,12 +238,19 @@ post '/lazar/?' do
lazar.metadata[OT.parameters] = [
{DC.title => "dataset_uri", OT.paramValue => dataset_uri},
{DC.title => "prediction_feature", OT.paramValue => prediction_feature.uri},
- {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri}
+ {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri},
+ {DC.title => "propositionalized", OT.paramValue => propositionalized},
+ {DC.title => "pc_type", OT.paramValue => pc_type},
+ {DC.title => "nr_hits", OT.paramValue => nr_hits},
+ {DC.title => "min_sim", OT.paramValue => min_sim},
+ {DC.title => "min_train_performance", OT.paramValue => min_train_performance},
+
]
model_uri = lazar.save(@subjectid)
LOGGER.info model_uri + " created #{Time.now}"
model_uri
+
end
response['Content-Type'] = 'text/uri-list'
raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled"
diff --git a/libfminer b/libfminer
-Subproject 07679a625a7acad864fd3abd80654a1a0a61e69
+Subproject f9e560dc0a7a5d5af439814ab5fa9ce027a025b