summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormr <mr@mrautenberg.de>2011-05-23 13:25:19 +0200
committermr <mr@mrautenberg.de>2011-05-23 13:25:19 +0200
commit0d0d76b5417a9658e402e671fc02f8ef04d64fe0 (patch)
tree5cd6e8b9052eab85032e34fb964398cd1be22af4
parentd434ab913b637c6334b159f603596e5ea95cbdc5 (diff)
parentaec620e1e1df60dc4fa75470916fb7d624366aaa (diff)
new release 2.0.0
-rw-r--r--application.rb2
-rw-r--r--fminer.rb105
m---------last-utils0
-rw-r--r--lazar.rb79
m---------libfminer0
-rw-r--r--openbabel.rb2
6 files changed, 120 insertions, 68 deletions
diff --git a/application.rb b/application.rb
index e36643b..55a8ea4 100644
--- a/application.rb
+++ b/application.rb
@@ -3,7 +3,7 @@ require 'rubygems'
require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # has to be included before openbabel, otherwise we have strange SWIG overloading problems
require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') # has to be included before openbabel, otherwise we have strange SWIG overloading problems
require File.join(File.expand_path(File.dirname(__FILE__)), 'last-utils/lu.rb') # AM LAST
-gem "opentox-ruby", "~> 1"
+gem "opentox-ruby", "~> 2"
require 'opentox-ruby'
#require 'smarts.rb'
diff --git a/fminer.rb b/fminer.rb
index d0ea4f4..ce7cc3c 100644
--- a/fminer.rb
+++ b/fminer.rb
@@ -23,7 +23,7 @@ get "/fminer/bbrc/?" do
DC.title => 'fminer backbone refinement class representatives',
DC.creator => "andreas@maunz.de, helma@in-silico.ch",
DC.contributor => "vorgrimmlerdavid@gmx.de",
- OT.isA => OTA.PatternMiningSupervised,
+ RDF.type => [OTA.PatternMiningSupervised],
OT.parameters => [
{ DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
{ DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
@@ -44,7 +44,7 @@ get "/fminer/last/?" do
DC.title => 'fminer latent structure class representatives',
DC.creator => "andreas@maunz.de, helma@in-silico.ch",
DC.contributor => "vorgrimmlerdavid@gmx.de",
- OT.isA => OTA.PatternMiningSupervised,
+ RDF.type => [OTA.PatternMiningSupervised],
OT.parameters => [
{ DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
{ DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
@@ -70,8 +70,7 @@ post '/fminer/bbrc/?' do
halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil?
- prediction_feature = params[:prediction_feature]
-
+ prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid
training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid
halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature])
@@ -89,8 +88,13 @@ post '/fminer/bbrc/?' do
@@bbrc.SetMinfreq(minfreq)
@@bbrc.SetType(1) if params[:feature_type] == "paths"
@@bbrc.SetBackbone(eval params[:backbone]) if params[:backbone] and ( params[:backbone] == "true" or params[:backbone] == "false" ) # convert string to boolean
- @@bbrc.SetChisqSig(params[:min_chisq_significance]) if params[:min_chisq_significance]
+ @@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance]
@@bbrc.SetConsoleOut(false)
+ if prediction_feature.feature_type == "regression"
+ @@bbrc.SetRegression(true)
+ else
+ @training_classes = training_dataset.feature_classes(prediction_feature.uri, @subjectid)
+ end
feature_dataset = OpenTox::Dataset.new(nil, @subjectid)
feature_dataset.add_metadata({
@@ -121,22 +125,43 @@ post '/fminer/bbrc/?' do
LOGGER.warn "Cannot find smiles for #{compound.to_s}."
next
end
+
+ # AM: take log if appropriate
+ take_logs=true
+ entry.each do |feature,values|
+ values.each do |value|
+ if prediction_feature.feature_type == "regression"
+ if (! value.nil?) && (value.to_f < 0)
+ take_logs=false
+ end
+ end
+ end
+ end
entry.each do |feature,values|
- if feature == prediction_feature
+ if feature == prediction_feature.uri
values.each do |value|
if value.nil?
LOGGER.warn "No #{feature} activiity for #{compound.to_s}."
else
- case value.to_s
- when "true"
- nr_active += 1
- activity = 1
- when "false"
- nr_inactive += 1
- activity = 0
- else
- activity = value.to_f
- @@bbrc.SetRegression(true)
+ if prediction_feature.feature_type == "classification"
+ case value.to_s
+ when "true"
+ nr_active += 1
+ activity = 1
+ when "false"
+ nr_inactive += 1
+ activity = 0
+ when /#{@training_classes.last}/
+ nr_active += 1
+ activity = 1
+ when /#{@training_classes.first}/
+ nr_inactive += 1
+ activity = 0
+ else
+ LOGGER.warn "Unknown class \"#{value.to_s}\"."
+ end
+ elsif prediction_feature.feature_type == "regression"
+ activity= take_logs ? Math.log10(value.to_f) : value.to_f
end
begin
@@bbrc.AddCompound(smiles,id)
@@ -195,7 +220,7 @@ post '/fminer/bbrc/?' do
features << smarts
metadata = {
OT.hasSource => url_for('/fminer/bbrc', :full),
- OT.isA => OT.Substructure,
+ RDF.type => [OT.Substructure],
OT.smarts => smarts,
OT.pValue => p_value.to_f,
OT.effect => effect,
@@ -232,7 +257,7 @@ post '/fminer/last/?' do
halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil?
- prediction_feature = params[:prediction_feature]
+ prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid
training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}", @subjectid
training_dataset.load_all(@subjectid)
halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature])
@@ -252,8 +277,13 @@ post '/fminer/last/?' do
@@last.SetType(1) if params[:feature_type] == "paths"
@@last.SetMaxHops(params[:hops]) if params[:hops]
@@last.SetConsoleOut(false)
+ if prediction_feature.feature_type == "regression"
+ @@last.SetRegression(true)
+ else
+ @training_classes = training_dataset.feature_classes(prediction_feature.uri)
+ end
- feature_dataset = OpenTox::Dataset.new
+ feature_dataset = OpenTox::Dataset.new(nil, @subjectid)
feature_dataset.add_metadata({
DC.title => "LAST representatives for " + training_dataset.metadata[DC.title].to_s,
DC.creator => url_for('/fminer/last',:full),
@@ -284,28 +314,37 @@ post '/fminer/last/?' do
next
end
entry.each do |feature,values|
- if feature == prediction_feature
+ if feature == prediction_feature.uri
values.each do |value|
if value.nil?
LOGGER.warn "No #{feature} activiity for #{compound.to_s}."
else
- case value.to_s
- when "true"
- nr_active += 1
- activity = 1
- when "false"
- nr_inactive += 1
- activity = 0
- else
+ if prediction_feature.feature_type == "classification"
+ case value.to_s
+ when "true"
+ nr_active += 1
+ activity = 1
+ when "false"
+ nr_inactive += 1
+ activity = 0
+ when /#{@training_classes.last}/
+ nr_active += 1
+ activity = 1
+ when /#{@training_classes.first}/
+ nr_inactive += 1
+ activity = 0
+ else
+ LOGGER.warn "Unknown class \"#{value.to_s}."
+ end
+ elsif prediction_feature.feature_type == "regression"
activity = value.to_f
- @@last.SetRegression(true)
end
begin
@@last.AddCompound(smiles,id)
@@last.AddActivity(activity, id)
all_activities[id]=activity # DV: insert global information
compounds[id] = compound
- smi[id] = smiles # AM LAST: changed this to store SMILES.
+ smi[id] = smiles # AM LAST: changed this to store SMILES.
id += 1
rescue
LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer"
@@ -315,7 +354,7 @@ post '/fminer/last/?' do
end
end
end
-
+
raise "No compounds in dataset #{training_dataset.uri}" if compounds.size==0
# run @@last
@@ -330,7 +369,7 @@ post '/fminer/last/?' do
end
lu = LU.new # AM LAST: uses last-utils here
- dom=lu.read(xml) # AM LAST: parse GraphML (needs hpricot, @ch: to be included in wrapper!)
+ dom=lu.read(xml) # AM LAST: parse GraphML
smarts=lu.smarts_rb(dom,'nls') # AM LAST: converts patterns to LAST-SMARTS using msa variant (see last-pm.maunz.de)
instances=lu.match_rb(smi,smarts) # AM LAST: creates instantiations
instances.each do |smarts, ids|
@@ -343,7 +382,7 @@ post '/fminer/last/?' do
unless features.include? smarts
features << smarts
metadata = {
- OT.isA => OT.Substructure,
+ RDF.type => [OT.Substructure],
OT.hasSource => feature_dataset.uri,
OT.smarts => smarts,
OT.pValue => p_value.to_f.abs,
diff --git a/last-utils b/last-utils
-Subproject daafa32e330b27111df6dc7193a6ed72fae2be4
+Subproject 75bea7645601fd296aa68c6678ee9b0a49a7b91
diff --git a/lazar.rb b/lazar.rb
index 67d9f74..45123f0 100644
--- a/lazar.rb
+++ b/lazar.rb
@@ -35,17 +35,17 @@ post '/lazar/?' do
halt 404, "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri)
training_activities.load_all(@subjectid)
- prediction_feature = params[:prediction_feature]
- unless prediction_feature # try to read prediction_feature from dataset
+ prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid)
+ unless params[:prediction_feature] # try to read prediction_feature from dataset
halt 404, "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
- prediction_feature = training_activities.features.keys.first
- params[:prediction_feature] = prediction_feature
+ prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid)
+ params[:prediction_feature] = prediction_feature.uri # pass to feature mining service
end
feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri]
- halt 404, "No feature #{prediction_feature} in dataset #{params[:dataset_uri]}. (features: "+
- training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature)
+ halt 404, "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+
+ training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri)
task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task|
@@ -55,7 +55,7 @@ post '/lazar/?' do
if params[:feature_dataset_uri]
feature_dataset_uri = params[:feature_dataset_uri]
training_features = OpenTox::Dataset.new(feature_dataset_uri)
- case training_features.feature_type
+ case training_features.feature_type(@subjectid)
when "classification"
lazar.similarity_algorithm = "Similarity.tanimoto"
when "regression"
@@ -73,7 +73,6 @@ post '/lazar/?' do
if prediction_feature.feature_type == "regression" && feature_generation_uri.match(/fminer/)
params[:feature_type] = "paths"
end
- prediction_feature = prediction_feature.uri #hotfix this will change in future version see development branch 2011/04/06 mr
feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s
training_features = OpenTox::Dataset.new(feature_dataset_uri)
end
@@ -82,21 +81,24 @@ post '/lazar/?' do
halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil?
# sorted features for index lookups
- lazar.features = training_features.features.sort if training_features.feature_type == "regression"
+
+ lazar.features = training_features.features.sort if prediction_feature.feature_type == "regression" and lazar.feature_calculation_algorithm != "Substructure.match"
training_features.data_entries.each do |compound,entry|
lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound]
entry.keys.each do |feature|
- if feature_generation_uri.match(/fminer/)
- smarts = training_features.features[feature][OT.smarts]
- lazar.fingerprints[compound] << smarts
- unless lazar.features.include? smarts
- lazar.features << smarts
- lazar.p_values[smarts] = training_features.features[feature][OT.pValue]
- lazar.effects[smarts] = training_features.features[feature][OT.effect]
+ if lazar.feature_calculation_algorithm == "Substructure.match"
+ if training_features.features[feature]
+ smarts = training_features.features[feature][OT.smarts]
+ lazar.fingerprints[compound] << smarts
+ unless lazar.features.include? smarts
+ lazar.features << smarts
+ lazar.p_values[smarts] = training_features.features[feature][OT.pValue]
+ lazar.effects[smarts] = training_features.features[feature][OT.effect]
+ end
end
else
- case training_features.feature_type
+ case training_features.feature_type(@subjectid)
when "classification"
# fingerprints are sets
if entry[feature].flatten.size == 1
@@ -117,38 +119,49 @@ post '/lazar/?' do
end
end
+ @training_classes = training_activities.feature_classes(prediction_feature.uri, @subjectid) if prediction_feature.feature_type == "classification"
+ lazar.prediction_algorithm = "Neighbors.local_svm_regression" if prediction_feature.feature_type == "regression"
+
training_activities.data_entries.each do |compound,entry|
lazar.activities[compound] = [] unless lazar.activities[compound]
- unless entry[params[:prediction_feature]].empty?
- entry[params[:prediction_feature]].each do |value|
- case value.to_s
- when "true"
- lazar.activities[compound] << true
- when "false"
- lazar.activities[compound] << false
- else
+ unless entry[prediction_feature.uri].empty?
+ entry[prediction_feature.uri].each do |value|
+ if prediction_feature.feature_type == "classification"
+ case value.to_s
+ when "true"
+ lazar.activities[compound] << true
+ when "false"
+ lazar.activities[compound] << false
+ when /#{@training_classes.last}/
+ lazar.activities[compound] << true
+ when /#{@training_classes.first}/
+ lazar.activities[compound] << false
+ else
+ LOGGER.warn "Unknown class \"#{value.to_s}\"."
+ end
+ elsif prediction_feature.feature_type == "regression"
halt 404, "0 values not allowed in training dataset. log10 is calculated internally." if value.to_f == 0
lazar.activities[compound] << value.to_f
- lazar.prediction_algorithm = "Neighbors.local_svm_regression"
end
end
end
end
- lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature))}"
+ lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}"
# TODO: fix dependentVariable
- lazar.metadata[OT.dependentVariables] = params[:prediction_feature]
+ lazar.metadata[OT.dependentVariables] = prediction_feature.uri
lazar.metadata[OT.trainingDataset] = dataset_uri
lazar.metadata[OT.featureDataset] = feature_dataset_uri
- if training_activities.feature_type.to_s == "classification"
- lazar.metadata[OT.isA] = OTA.ClassificationLazySingleTarget
- elsif training_activities.feature_type.to_s == "regression"
- lazar.metadata[OT.isA] = OTA.RegressionLazySingleTarget
+
+ if prediction_feature.feature_type == "classification"
+ lazar.metadata[RDF.type] = [OTA.ClassificationLazySingleTarget]
+ elsif prediction_feature.feature_type == "regression"
+ lazar.metadata[RDF.type] = [OTA.RegressionLazySingleTarget]
end
lazar.metadata[OT.parameters] = [
{DC.title => "dataset_uri", OT.paramValue => dataset_uri},
- {DC.title => "prediction_feature", OT.paramValue => prediction_feature},
+ {DC.title => "prediction_feature", OT.paramValue => prediction_feature.uri},
{DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri}
]
diff --git a/libfminer b/libfminer
-Subproject 01b8e50e8e6fb3ce29fc8bf0a65a8c6f6af94b3
+Subproject d51f5e784ce0f5b7ef1c47c52ea55d1c874ec2e
diff --git a/openbabel.rb b/openbabel.rb
index 3a873c0..1644455 100644
--- a/openbabel.rb
+++ b/openbabel.rb
@@ -44,7 +44,7 @@ get '/openbabel/:property' do
DC.title => params[:property],
DC.creator => "helma@in-silico.ch",
DC.description => description,
- OT.isA => OTA.DescriptorCalculation,
+ RDF.type => [OTA.DescriptorCalculation],
}
response['Content-Type'] = 'application/rdf+xml'
algorithm.to_rdfxml