summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2013-03-26 10:43:43 +0100
committerChristoph Helma <helma@in-silico.ch>2013-03-26 10:43:43 +0100
commit84d44cc32d31b6cf1030c2ef271a902ca9800129 (patch)
treee7ca04ec9a90cff87661a7da0f7fc04ab738aab8
parentb586920b9baf38d6d178f345cebac93771bcf04f (diff)
descriptor calculation moved to algorithm service. code cleanup and refactoring.
-rw-r--r--Gemfile8
-rw-r--r--algorithm.gemspec3
-rw-r--r--application.rb18
-rw-r--r--descriptor.rb285
-rw-r--r--lib/algorithm.rb3
-rw-r--r--lib/algorithm/feature_values.rb9
-rw-r--r--lib/algorithm/fminer.rb25
-rw-r--r--lib/algorithm/similarity.rb2
-rw-r--r--lib/algorithm/transform.rb6
-rw-r--r--lib/dataset.rb6
-rw-r--r--lib/lazar.rb222
-rw-r--r--lib/model.rb151
-rw-r--r--webapp/fminer.rb241
-rw-r--r--webapp/fs.rb14
-rw-r--r--webapp/lazar.rb233
-rw-r--r--webapp/sinatra.rb13
-rw-r--r--webapp/test.rb2
17 files changed, 709 insertions, 532 deletions
diff --git a/Gemfile b/Gemfile
index aca69b0..fb18710 100644
--- a/Gemfile
+++ b/Gemfile
@@ -1,10 +1,4 @@
source :gemcutter
-
+gemspec
gem 'opentox-server', :path => "../opentox-server"
gem "opentox-client", :path => "../opentox-client"
-gem "openbabel", "~>2.3.1.5"
-gem "rinruby", "~>2.0.2"
-gem "nokogiri", "~>1.4.4"
-gem "emk-sinatra-url-for", "~>0.2.1"
-gem "statsample", "~>1.1"
-gem "gsl", "~>1.14"
diff --git a/algorithm.gemspec b/algorithm.gemspec
index 925c397..dc334b9 100644
--- a/algorithm.gemspec
+++ b/algorithm.gemspec
@@ -23,8 +23,7 @@ Gem::Specification.new do |s|
s.add_runtime_dependency 'nokogiri', "~>1.4.4"
s.add_runtime_dependency 'statsample', "~>1.1"
s.add_runtime_dependency 'gsl', "~>1.14"
- s.add_runtime_dependency 'emk-sinatra-url-for', "~>0.2.1"
-# s.add_runtime_dependency '', "~>"
s.add_runtime_dependency "openbabel", "~>2.3.1.5"
+ s.add_runtime_dependency "rjb" ,"1.4.3" # error in 1.4.5
s.post_install_message = "Please configure your service in ~/.opentox/config/algorithm.rb"
end
diff --git a/application.rb b/application.rb
index 1b9c776..f15aa48 100644
--- a/application.rb
+++ b/application.rb
@@ -1,17 +1,21 @@
# application.rb
# Loads sub-repositories, library code, and webapps.
# Author: Andreas Maunz
+require 'statsample'
# Require sub-Repositories
-require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # include before openbabel
-require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') #
-require File.join(File.expand_path(File.dirname(__FILE__)), 'last-utils/lu.rb')
+require_relative 'libfminer/libbbrc/bbrc' # include before openbabel
+require_relative 'libfminer/liblast/last' #
+require_relative 'last-utils/lu.rb'
# Library Code
$logger.debug "Algorithm booting: #{$algorithm.collect{ |k,v| "#{k}: '#{v}'"} }"
-Dir['./lib/algorithm/*.rb'].each { |f| require f } # Libs
-Dir['./lib/*.rb'].each { |f| require f } # Libs
-Dir['./webapp/*.rb'].each { |f| require f } # Webapps
+Dir['./lib/algorithm/*.rb'].each { |f| require f; also_reload f } # Libs
+Dir['./lib/*.rb'].each { |f| require f; also_reload f } # Libs
+Dir['./webapp/*.rb'].each { |f| require f; also_reload f } # Webapps
+require_relative "descriptor.rb"
+also_reload "descriptor.rb"
+#Dir['./webapp/pc-descriptors.rb'].each { |f| require f; also_reload f } # Webapps
# Entry point
module OpenTox
@@ -24,7 +28,7 @@ module OpenTox
to('/fminer/bbrc/match', :full),
to('/fminer/last/match', :full),
to('/fs/rfe', :full),
- to('/pc', :full) ].join("\n") + "\n"
+ to('/descriptor') ].join("\n") + "\n"
format_output (list)
end
end
diff --git a/descriptor.rb b/descriptor.rb
new file mode 100644
index 0000000..18b25a5
--- /dev/null
+++ b/descriptor.rb
@@ -0,0 +1,285 @@
+# descriptors.rb
+# Calculation of physico-chemical descriptors
+# Author: Andreas Maunz, Christoph Helma
+require 'rjb'
+require 'openbabel'
+
+module OpenTox
+
+ class Application < Service
+
+ ENV["JAVA_HOME"] ||= "/usr/lib/jvm/java-7-openjdk"
+ java_dir = File.join(File.dirname(__FILE__),"java")
+ jars = Dir[File.join(ENV["JAVA_HOME"],"lib","*.jar")]
+ jars += Dir[File.join(java_dir,"*jar")]
+ ENV["CLASSPATH"] = ([java_dir]+jars).join(":")
+ jars.each { |jar| Rjb::load jar }
+
+ StringReader ||= Rjb::import "java.io.StringReader"
+ CDKMdlReader ||= Rjb::import "org.openscience.cdk.io.MDLReader"
+ CDKMolecule ||= Rjb::import "org.openscience.cdk.Molecule"
+ CDKDescriptorEngine ||= Rjb::import "org.openscience.cdk.qsar.DescriptorEngine"
+ #AromaticityDetector = Rjb::import 'org.openscience.cdk.aromaticity.CDKHueckelAromaticityDetector'
+ JOELIBHelper ||= Rjb::import 'joelib2.feature.FeatureHelper'
+ JOELIBFactory ||= Rjb::import 'joelib2.feature.FeatureFactory'
+ JOELIBSmilesParser ||= Rjb::import "joelib2.smiles.SMILESParser"
+ JOELIBTypeHolder ||= Rjb::import "joelib2.io.BasicIOTypeHolder"
+ JOELIBMolecule ||= Rjb::import "joelib2.molecule.BasicConformerMolecule"
+
+ unless defined? DESCRIPTORS
+
+ # initialize descriptors and features at startup to avoid duplication
+ descriptors = { :cdk => [], :openbabel => [], :joelib => [] } # use arrays to keep the sequence intact
+
+ @@obmol = OpenBabel::OBMol.new
+ @@obconversion = OpenBabel::OBConversion.new
+ @@obconversion.set_in_format 'inchi'
+ @@cdk_engine = CDKDescriptorEngine.new(CDKDescriptorEngine.MOLECULAR)
+
+ # OpenBabel
+ OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").each do |d|
+ title,description = d.split(/\s+/,2)
+ unless title =~ /cansmi|formula|InChI|smarts|title/ or title == "s"
+ title = "OpenBabel "+title
+ feature = OpenTox::Feature.find_or_create({
+ RDF::DC.title => title,
+ RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature],
+ RDF::DC.description => description,
+ }, @subjectid)
+ descriptors[:openbabel] << {
+ :title => title,
+ :description => description,
+ :calculator => OpenBabel::OBDescriptor.find_type(title.split(" ").last),
+ :feature => feature
+ }
+ end
+ end
+
+ # CDK
+ @@cdk_engine.getDescriptorClassNames.toArray.each do |d|
+ cdk_class = d.toString
+ title = "CDK "+cdk_class.split('.').last
+ description = @@cdk_engine.getDictionaryDefinition(cdk_class).gsub(/\s+/,' ').strip + " (Class: " + @@cdk_engine.getDictionaryClass(cdk_class).join(", ") + ")"
+ descriptor = {
+ :title => title,
+ :description => description,
+ :calculator => Rjb::import(cdk_class).new,
+ :features => []
+ }
+ # CDK Descriptors may return more than one value
+ descriptor[:features] = descriptor[:calculator].getDescriptorNames.collect do |name|
+ feature = OpenTox::Feature.find_or_create({
+ RDF::DC.title => "#{title} #{name}",
+ RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature],
+ RDF::DC.description => description
+ }, @subjectid)
+ end
+ descriptors[:cdk] << descriptor
+ end
+
+ # JOELIB
+ factory = JOELIBFactory.instance
+ JOELIBHelper.instance.getNativeFeatures.toArray.each do |f|
+ joelib_class = f.toString
+ unless joelib_class == "joelib2.feature.types.GlobalTopologicalChargeIndex"
+ # CH: returns "joelib2.feature.types.atomlabel.AtomValence\n#{numeric value}"
+ # unsure if numeric_value is GlobalTopologicalChargeIndex or AtomValence
+ # excluded from descriptor list
+ title = "JOELib "+joelib_class.split('.').last
+ description = title # feature.getDescription.hasText returns false, feature.getDescription.getHtml returns unparsable content
+ feature = OpenTox::Feature.find_or_create({
+ RDF::DC.title => title,
+ RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature],
+ RDF::DC.description => description,
+ }, @subjectid)
+ descriptors[:joelib] << {
+ :title => title,
+ :description => description,
+ :calculator => Rjb::import(joelib_class).new,
+ :feature => feature
+ }
+ end
+ end
+ DESCRIPTORS = descriptors
+
+ end
+
+ helpers do
+
+ def openbabel compounds, descriptors
+ compounds.each do |compound|
+ @@obconversion.read_string @@obmol, compound.inchi
+ descriptors.each do |descriptor|
+ puts descriptor[:title]
+ @feature_dataset.add_data_entry compound, descriptor[:feature], fix_value(descriptor[:calculator].predict(@@obmol))
+ end
+ end
+ end
+
+ def cdk compounds, descriptors
+ @@obconversion.set_out_format 'sdf'
+ compounds.each do |compound|
+ @@obconversion.read_string @@obmol, compound.inchi
+ sdf = @@obconversion.write_string(@@obmol)
+ OpenBabel::OBOp.find_type("Gen3D").do(@@obmol)
+ sdf_3D = @@obconversion.write_string(@@obmol)
+ if sdf_3D.match(/.nan/)
+ warning = "3D generation failed for compound #{compound.uri} (using 2D structure)."
+ $logger.warn warning
+ @feature_dataset[RDF::OT.Warnings] ? @feature_dataset[RDF::OT.Warnings] << warning : @feature_dataset[RDF::OT.Warnings] = warning
+ else
+ sdf = sdf_3D
+ end
+ reader = CDKMdlReader.new(StringReader.new(sdf))
+ cdk_compound = reader.read(CDKMolecule.new)
+ #AromaticityDetector.detectAromaticity(cdk_compound)
+ values = []
+ descriptors.each do |descriptor|
+ puts descriptor[:title]
+ begin
+ result = descriptor[:calculator].calculate cdk_compound
+ result.getValue.toString.split(",").each_with_index do |value,i|
+ @feature_dataset.add_data_entry compound, descriptor[:features][i], fix_value(value)
+ end
+ rescue
+ $logger.error "#{descriptor[:title]} calculation failed with #{$!.message} for compound #{compound.uri}."
+ end
+ end
+ end
+ end
+
+ def joelib compounds, descriptors
+ @@obconversion.set_out_format 'smi'
+ compounds.each do |compound|
+ mol = JOELIBMolecule.new(JOELIBTypeHolder.instance.getIOType("SMILES"), JOELIBTypeHolder.instance.getIOType("SMILES"))
+ @@obconversion.read_string @@obmol, compound.inchi
+ JOELIBSmilesParser.smiles2molecule mol, @@obconversion.write_string(@@obmol).strip, "Smiles: #{@@obconversion.write_string(@@obmol).strip}"
+ mol.addHydrogens
+ descriptors.each do |descriptor|
+ puts descriptor[:title]
+ puts descriptor[:calculator].toString#java_methods.inspect
+ puts descriptor[:calculator].calculate(mol).toString
+ @feature_dataset.add_data_entry compound, descriptor[:feature], fix_value(descriptor[:calculator].calculate(mol).toString)
+ end
+ end
+ end
+
+ def fix_value val
+ #unless val.numeric?
+ if val.numeric?
+ val = Float(val)
+ val = nil if val.nan? or val.infinite?
+ end
+ val
+ end
+ end
+
+ before '/descriptor/?*' do
+ if request.get?
+ @algorithm = OpenTox::Algorithm.new @uri
+ @algorithm.parameters = [
+ { RDF::DC.description => "Dataset URI",
+ RDF::OT.paramScope => "optional",
+ RDF::DC.title => "dataset_uri" } ,
+ { RDF::DC.description => "Compound URI",
+ RDF::OT.paramScope => "optional",
+ RDF::DC.title => "compound_uri" }
+ ]
+ @algorithm.metadata = {
+ RDF.type => [RDF::OTA.DescriptorCalculation],
+ }
+ elsif request.post?
+ @feature_dataset = Dataset.new nil, @subjectid
+ @feature_dataset.metadata = {
+ RDF::DC.title => "Physico-chemical descriptors",
+ RDF::DC.creator => @uri,
+ RDF::OT.hasSource => @uri,
+ }
+ if params[:compound_uri]
+ @feature_dataset.parameters = [ { RDF::DC.title => "compound_uri", RDF::OT.paramValue => params[:compound_uri] }]
+ elsif params[:dataset_uri]
+ @feature_dataset.parameters = [ { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] }]
+ else
+ bad_request_error "Please provide a dataset_uri or compound_uri paramaeter", @uri
+ end
+ end
+ end
+
+ before '/descriptor/:lib/:descriptor/?' do
+ @descriptors = DESCRIPTORS[params[:lib].to_sym].select{|d| d[:title].split(" ").last == params[:descriptor]}
+ bad_request_error "Unknown descriptor #{@uri}. See #{uri('descriptors')} for a complete list of supported descriptors.", @uri if @descriptors.empty?
+ @descriptor = @descriptors.first
+ end
+
+ # Get a list of descriptor calculation
+ # @return [text/uri-list] URIs
+ get '/descriptor/?' do
+ DESCRIPTORS.collect{|lib,d| d.collect{|n| uri("/descriptors/#{lib}/#{n[:title].split(" ").last}")}}.flatten.sort.join("\n")
+ end
+
+ get '/descriptor/:lib/?' do
+ DESCRIPTORS[params[:lib].to_sym].collect{|n| uri("/descriptors/#{params[:lib].to_sym}/#{n[:title].split(" ").last}")}.sort.join("\n")
+ end
+
+ # Get representation of descriptor calculation
+ # @return [String] Representation
+ get '/descriptor/:lib/:descriptor/?' do
+ @algorithm[RDF::DC.title] = @descriptor[:title]
+ @algorithm[RDF::DC.description] = @descriptor[:description]
+ format_output(@algorithm)
+ end
+
+ post '/descriptor/?' do
+ #task = OpenTox::Task.run "Calculating PC descriptors", @uri, @subjectid do |task|
+ puts "Task created"
+ if params[:descriptors]
+ descriptors = {}
+ params[:descriptors].each do |descriptor|
+ #lib, title = descriptor.split('/')
+ descriptors[lib.to_sym] ||= []
+ descriptors[lib.to_sym] << DESCRIPTORS[lib.to_sym].select{|d| d[:title] == descriptor}
+ end
+ else
+ descriptors = DESCRIPTORS
+ end
+ if params[:compound_uri]
+ compounds = [ Compound.new(params[:compound_uri], @subjectid) ]
+ elsif params[:dataset_uri]
+ compounds = Dataset.new(params[:dataset_uri]).compounds
+ end
+ puts "Calculating"
+ [:openbabel, :cdk, :joelib].each{ |lib| puts lib; send lib, compounds, descriptors[lib]; puts lib.to_s+" finished" }
+ #[:joelib].each{ |lib| send lib, compounds, descriptors[lib]; puts lib.to_s+" finished" }
+ puts "saving file"
+ File.open("/home/ch/tmp.nt","w+"){|f| f.puts @feature_dataset.to_ntriples}
+ puts "saving "+@feature_dataset.uri
+ @feature_dataset.put
+ puts "finished"
+ @feature_dataset.uri
+ #end
+ #response['Content-Type'] = 'text/uri-list'
+ #halt 202, task.uri
+ end
+
+ post '/descriptor/:lib/:descriptor/?' do
+ if params[:compound_uri]
+ compounds = [ Compound.new(params[:compound_uri], @subjectid) ]
+ send params[:lib].to_sym, compounds, @descriptors
+ @feature_dataset.put
+ @feature_dataset.uri
+ elsif params[:dataset_uri]
+ task = OpenTox::Task.run "Calculating PC descriptors", @uri, @subjectid do |task|
+ compounds = Dataset.new(params[:dataset_uri]).compounds
+ send params[:lib].to_sym, compounds, @descriptors
+ @feature_dataset.put
+ @feature_dataset.uri
+ end
+ response['Content-Type'] = 'text/uri-list'
+ halt 202, task.uri
+ end
+ end
+
+ end
+
+end
+
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 1a26322..fb47385 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -13,8 +13,7 @@ module OpenTox
# return [Integer] min-frequency
def self.min_frequency(training_dataset,prediction_feature,per_mil)
nr_labeled_cmpds=0
- training_dataset.build_feature_positions
- f_idx=training_dataset.feature_positions[prediction_feature.uri]
+ f_idx=training_dataset.features.collect{|f| f.uri}.index prediction_feature.uri
training_dataset.compounds.each_with_index { |cmpd, c_idx|
if ( training_dataset.data_entries[c_idx] )
unless training_dataset.data_entries[c_idx][f_idx].nil?
diff --git a/lib/algorithm/feature_values.rb b/lib/algorithm/feature_values.rb
index f08dea8..62373e7 100644
--- a/lib/algorithm/feature_values.rb
+++ b/lib/algorithm/feature_values.rb
@@ -13,7 +13,8 @@ module OpenTox
# @param [Hash] keys: compound, feature_dataset, values: OpenTox::Compound, Array of SMARTS strings
# @return [Array] Array with matching Smarts
def self.match(params, subjectid)
- features = params[:feature_dataset].features.collect{ |f| f[DC.title] }
+ features = params[:feature_dataset].features.collect{ |f| f[RDF::DC.title] }
+ puts features.inspect
params[:compound].match(features)
end
@@ -21,7 +22,7 @@ module OpenTox
# @param [Hash] keys: compound, feature_dataset, values: OpenTox::Compound, Array of SMARTS strings
# @return [Hash] Hash with matching Smarts and number of hits
def self.match_hits(params, subjectid)
- features = params[:feature_dataset].features.collect{ |f| f[DC.title] },
+ features = params[:feature_dataset].features.collect{ |f| f[RDF::DC.title] },
params[:compound].match_hits(features)
end
@@ -37,7 +38,7 @@ module OpenTox
}.compact
if cmpd_idxs.size > 0 # We have entries
cmpd_numeric_f = ds.features.collect { |f|
- f if f[RDF.type].include? OT.NumericFeature
+ f if f[RDF.type].include? RDF::OT.NumericFeature
}.compact
cmpd_data_entries = cmpd_idxs.collect { |idx|
ds.data_entries[idx]
@@ -61,7 +62,7 @@ module OpenTox
params[:subjectid] = subjectid
[:compound, :feature_dataset].each { |p| params.delete(p) }; [:pc_type, :lib].each { |p| params.delete(p) if params[p] == "" }
single_cmpd_ds = OpenTox::Dataset.new(nil,subjectid)
- single_cmpd_ds.parse_rdfxml(RestClient.post("#{$compound[:uri]}/#{cmpd_inchi}/pc", params, {:accept => "application/rdf+xml"}))
+ single_cmpd_ds.parse_rdfxml(RestClientWrapper.post(File.join($compound[:uri],cmpd_inchi,"pc"), params, {:accept => "application/rdf+xml"}))
single_cmpd_ds.get(true)
single_cmpd_ds.build_feature_positions
cmpd_fingerprints = single_cmpd_ds.features.inject({}) { |h,f|
diff --git a/lib/algorithm/fminer.rb b/lib/algorithm/fminer.rb
index d969bef..5b0527a 100644
--- a/lib/algorithm/fminer.rb
+++ b/lib/algorithm/fminer.rb
@@ -22,16 +22,16 @@ module OpenTox
# @param[Hash] parameters of the REST call
# @param[Integer] per-mil value for min frequency
- def check_params(params,per_mil,subjectid=nil)
+ def check_params(params,per_mil)
bad_request_error "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
- @training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", subjectid # AM: find is a shim
+ @training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}", @subjectid
unless params[:prediction_feature] # try to read prediction_feature from dataset
resource_not_found_error "Please provide a prediction_feature parameter" unless @training_dataset.features.size == 1
params[:prediction_feature] = @training_dataset.features.first.uri
end
- @prediction_feature = OpenTox::Feature.find params[:prediction_feature], subjectid # AM: find is a shim
+ @prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid
resource_not_found_error "No feature '#{params[:prediction_feature]}' in dataset '#{params[:dataset_uri]}'" unless
- @training_dataset.find_feature( params[:prediction_feature] ) # AM: find_feature is a shim
+ @training_dataset.find_feature_uri( params[:prediction_feature] )
unless params[:min_frequency].nil?
# check for percentage
if params[:min_frequency].include? "pc"
@@ -101,7 +101,7 @@ module OpenTox
def add_fminer_data(fminer_instance, value_map)
id=1
@training_dataset.compounds.each do |compound|
- compound_activities = @training_dataset.find_data_entry(compound.uri, @prediction_feature.uri)
+ compound_activities = @training_dataset.values(compound, @prediction_feature)
begin
if @prediction_feature.feature_type == "classification"
compound_activities = compound_activities.to_scale.mode
@@ -164,21 +164,20 @@ module OpenTox
end
metadata = {
- RDF.type => [OT.Feature, OT.Substructure, OT.NumericFeature],
- OT.smarts => smarts.dup,
- OT.pValue => p_value.abs.round(5),
- OT.effect => effect
+ RDF.type => [RDF::OT.Feature, RDF::OT.Substructure, RDF::OT.NumericFeature],
+ RDF::OT.smarts => smarts.dup,
+ RDF::OT.pValue => p_value.abs.round(5),
+ RDF::OT.effect => effect
}
parameters = [
- { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
- { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }
+ { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] },
+ { RDF::DC.title => "prediction_feature", RDF::OT.paramValue => params[:prediction_feature] }
]
- metadata[OT.hasSource]=feature_dataset_uri if feature_dataset_uri
+ metadata[RDF::OT.hasSource]=feature_dataset_uri if feature_dataset_uri
[ metadata, parameters ]
end
end
-
end
end
diff --git a/lib/algorithm/similarity.rb b/lib/algorithm/similarity.rb
index 22b4c28..88eed35 100644
--- a/lib/algorithm/similarity.rb
+++ b/lib/algorithm/similarity.rb
@@ -15,6 +15,8 @@ module OpenTox
# @param [Array] b fingerprints of second compound
# @return [Float] Tanimoto similarity
def self.tanimoto(a,b)
+ puts a.inspect
+ puts b.inspect
common_p_sum = 0.0
all_p_sum = 0.0
size = [ a.size, b.size ].min
diff --git a/lib/algorithm/transform.rb b/lib/algorithm/transform.rb
index ec25526..c851f73 100644
--- a/lib/algorithm/transform.rb
+++ b/lib/algorithm/transform.rb
@@ -334,8 +334,13 @@ module OpenTox
# @param[Array] training_props Propositionalized data for this neighbor
# @param[Integer] Index of neighbor
def add_neighbor(training_props, idx)
+ puts idx
+ #puts training_props.inspect
+ #puts @q_prop.inspect
+ #puts @model.acts[idx].to_s
unless @model.acts[idx].nil?
sim = similarity(training_props)
+ puts sim
if sim > @model.min_sim.to_f
@model.neighbors << {
:compound => @cmpds[idx],
@@ -381,7 +386,6 @@ module OpenTox
end
end
-
# Replaces nils by zeroes in n_prop and q_prop
# Enables the use of Tanimoto similarities with arrays (rows of n_prop and q_prop)
def convert_nils
diff --git a/lib/dataset.rb b/lib/dataset.rb
index e7588ae..a42356b 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -12,9 +12,9 @@ module OpenTox
# @param [Hash] uri Compound URI, Feature URI
# @return [Object] activity Database activity, or nil
def database_activity(params)
- f=Feature.find(params[:prediction_feature_uri],params[:subjectid])
- db_act = find_data_entry(params[:compound_uri], params[:prediction_feature_uri])
- if db_act
+ f=Feature.new params[:prediction_feature_uri], @subjectid
+ db_act = values(Compound.new(params[:compound_uri]), f)
+ if !db_act.empty?
if f.feature_type == "classification"
db_act = db_act.to_scale.mode.dup
else
diff --git a/lib/lazar.rb b/lib/lazar.rb
new file mode 100644
index 0000000..f731544
--- /dev/null
+++ b/lib/lazar.rb
@@ -0,0 +1,222 @@
+=begin
+* Name: lazar.rb
+* Description: Lazar model representation
+* Author: Andreas Maunz <andreas@maunz.de>, Christoph Helma
+* Date: 10/2012
+=end
+
+module OpenTox
+
+ class LazarPrediction < Model
+
+ attr_accessor :prediction_dataset
+
+ def initialize(params)
+ @prediction_dataset = OpenTox::Dataset.new(nil, @subjectid)
+ # set instance variables and prediction dataset parameters from parameters
+ params.each {|k,v|
+ self.class.class_eval { attr_accessor k.to_sym }
+ instance_variable_set "@#{k}", v
+ @prediction_dataset.parameters << {RDF::DC.title => k, RDF::OT.paramValue => v}
+ }
+ ["cmpds", "fps", "acts", "n_prop", "q_prop", "neighbors"].each {|k|
+ self.class.class_eval { attr_accessor k.to_sym }
+ instance_variable_set("@#{k}", [])
+ }
+
+ @prediction_feature = OpenTox::Feature.new(@prediction_feature_uri,@subjectid)
+ # TODO: set feature type
+ @predicted_variable = OpenTox::Feature.find_or_create({RDF::DC.title => "#{@prediction_feature.title} prediction", RDF.type => @prediction_feature[RDF.type]}, @subjectid)
+ @predicted_confidence = OpenTox::Feature.find_or_create({RDF::DC.title => "#{@prediction_feature.title} confidence", RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature]}, @subjectid)
+
+ @prediction_dataset.metadata = {
+ RDF::DC.title => "Lazar prediction for #{@prediction_feature.title}",
+ RDF::DC.creator => @model_uri,
+ RDF::OT.hasSource => @model_uri,
+ RDF::OT.dependentVariables => @prediction_feature_uri,
+ RDF::OT.predictedVariables => [@predicted_variable.uri,@predicted_confidence.uri]
+ }
+
+ @training_dataset = OpenTox::Dataset.new(@training_dataset_uri,@subjectid)
+
+ @feature_dataset = OpenTox::Dataset.new(@feature_dataset_uri, @subjectid)
+ bad_request_error "No features found in feature dataset #{@feature_dataset.uri}." if @feature_dataset.features.empty?
+
+ @similarity_feature = OpenTox::Feature.find_or_create({RDF::DC.title => "#{@similarity_algorithm.capitalize} similarity", RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature]}, @subjectid)
+
+ @prediction_dataset.features = [ @predicted_variable, @predicted_confidence, @prediction_feature, @similarity_feature ]
+
+ prediction_feature_pos = @training_dataset.features.collect{|f| f.uri}.index @prediction_feature.uri
+
+ if @dataset_uri
+ compounds = OpenTox::Dataset.find(@dataset_uri).compounds
+ else
+ compounds = [ OpenTox::Compound.new(@compound_uri) ]
+ end
+ compounds.each do |compound|
+
+ puts compound.smiles
+ database_activity = @training_dataset.database_activity(params)
+ if database_activity
+ @prediction_dataset.add_data_entry compound, @prediction_feature, database_activity
+ next
+ else
+ #pc_type = @feature_dataset.parameters["pc_type"]
+ #@model.pc_type = pc_type unless pc_type.nil?
+ #lib = @feature_dataset.parameters["lib"]
+ #@model.lib = lib unless lib.nil?
+
+ # AM: transform to cosine space
+ @min_sim = (@min_sim.to_f*2.0-1.0).to_s if @similarity_algorithm =~ /cosine/
+
+ compound_params = {
+ :compound => compound,
+ :feature_dataset => @feature_dataset,
+ # TODO: fix in algorithm/lib/algorithm/feature_values.rb
+ #:pc_type => @model.pc_type,
+ #:lib => @model.lib
+ }
+ compound_fingerprints = OpenTox::Algorithm::FeatureValues.send( @feature_calculation_algorithm, compound_params, @subjectid )
+ puts compound_fingerprints.inspect
+ @training_dataset.compounds.each_with_index { |cmpd, idx|
+ act = @training_dataset.data_entries[idx][prediction_feature_pos]
+ @acts << (@prediction_feature.feature_type=="classification" ? @prediction_feature.value_map.invert[act] : nil)
+ @n_prop << @feature_dataset.data_entries[idx]#.collect.to_a
+ @cmpds << cmpd.uri
+ }
+ #puts "COMPOUNDS"
+ #puts @n_prop.inspect
+ puts @cmpds.inspect
+ puts @fps.inspect
+ puts @acts.inspect
+ puts @n_prop.inspect
+ puts @q_prop.inspect
+
+ @q_prop = @feature_dataset.features.collect { |f|
+ val = compound_fingerprints[f.title]
+ bad_request_error "Can not parse value '#{val}' to numeric" if val and !val.numeric?
+ val ? val.to_f : 0.0
+ } # query structure
+
+ mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(self)
+ mtf.transform
+
+ prediction = OpenTox::Algorithm::Neighbors.send(@prediction_algorithm,
+ { :props => mtf.props,
+ :acts => mtf.acts,
+ :sims => mtf.sims,
+ :value_map => @prediction_feature.feature_type=="classification" ? @prediction_feature.value_map : nil,
+ :min_train_performance => @min_train_performance
+ } )
+
+ puts prediction.inspect
+ predicted_value = prediction[:prediction].to_f
+ confidence_value = prediction[:confidence].to_f
+
+ # AM: transform to original space
+ confidence_value = ((confidence_value+1.0)/2.0).abs if @similarity_algorithm =~ /cosine/
+ predicted_value = @prediction_feature.value_map[prediction[:prediction].to_i] if @prediction_feature.feature_type == "classification"
+
+ end
+
+ @prediction_dataset.add_data_entry compound, @predicted_variable, predicted_value
+ @prediction_dataset.add_data_entry compound, @predicted_confidence, confidence_value
+
+ if @compound_uri # add neighbors only for compound predictions
+ @neighbors.each do |neighbor|
+ n = OpenTox::Compound.new(neighbor[:compound])
+ @prediction_dataset.add_data_entry n, @prediction_feature, @prediction_feature.value_map[neighbor[:activity]]
+ @prediction_dataset.add_data_entry n, @similarity_feature, neighbor[:similarity]
+ #@prediction_dataset << [ n, @prediction_feature.value_map[neighbor[:activity]], nil, nil, neighbor[:similarity] ]
+ end
+ end
+
+ end # iteration over compounds
+ @prediction_dataset.put
+
+ end
+
+ end
+
+ class Model
+
+ # Check parameters for plausibility
+ # Prepare lazar object (includes graph mining)
+ # @param[Array] lazar parameters as strings
+ # @param[Hash] REST parameters, as input by user
+ def create_model(params)
+
+ training_dataset = OpenTox::Dataset.new(params[:dataset_uri], @subjectid)
+ @parameters << {RDF::DC.title => "training_dataset_uri", RDF::OT.paramValue => training_dataset.uri}
+
+ # TODO: This is inconsistent, it would be better to have prediction_feature_uri in the API
+ if params[:prediction_feature]
+ resource_not_found_error "No feature '#{params[:prediction_feature]}' in dataset '#{params[:dataset_uri]}'" unless training_dataset.find_feature_uri( params[:prediction_feature] )
+ else # try to read prediction_feature from dataset
+ resource_not_found_error "Please provide a prediction_feature parameter" unless training_dataset.features.size == 1
+ params[:prediction_feature] = training_dataset.features.first.uri
+ end
+ self[RDF::OT.trainingDataset] = training_dataset.uri
+ prediction_feature = OpenTox::Feature.new(params[:prediction_feature], @subjectid)
+ self[RDF::DC.title] = prediction_feature.title
+ @parameters << {RDF::DC.title => "prediction_feature_uri", RDF::OT.paramValue => prediction_feature.uri}
+ self[RDF::OT.dependentVariables] = prediction_feature.uri
+
+ bad_request_error "Unknown prediction_algorithm #{params[:prediction_algorithm]}" if params[:prediction_algorithm] and !OpenTox::Algorithm::Neighbors.respond_to?(params[:prediction_algorithm])
+ @parameters << {RDF::DC.title => "prediction_algorithm", RDF::OT.paramValue => params[:prediction_algorithm]} if params[:prediction_algorithm]
+
+ confidence_feature = OpenTox::Feature.find_or_create({RDF::DC.title => "predicted_confidence", RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature]}, @subjectid)
+ self[RDF::OT.predictedVariables] = [ prediction_feature.uri, confidence_feature.uri ]
+ case prediction_feature.feature_type
+ when "classification"
+ @parameters << {RDF::DC.title => "prediction_algorithm", RDF::OT.paramValue => "weighted_majority_vote"} unless parameter_value "prediction_algorithm"
+ self[RDF.type] = [RDF::OT.Model, RDF::OTA.ClassificationLazySingleTarget]
+ when "regression"
+ @parameters << {RDF::DC.title => "prediction_algorithm", RDF::OT.paramValue => "local_svm_regression"} unless parameter_value "prediction_algorithm"
+ self[RDF.type] = [RDF::OT.Model, RDF::OTA.RegressionLazySingleTarget]
+ end
+ parameter_value("prediction_algorithm") =~ /majority_vote/ ? @parameters << {RDF::DC.title => "propositionalized", RDF::OT.paramValue => false} : @parameters << {RDF::DC.title => "propositionalized", RDF::OT.paramValue => true}
+
+ @parameters << {RDF::DC.title => "min_sim", RDF::OT.paramValue => params[:min_sim].to_f} if params[:min_sim] and params[:min_sim].numeric?
+ @parameters << {RDF::DC.title => "feature_generation_uri", RDF::OT.paramValue => params[:feature_generation_uri]}
+ #@parameters["nr_hits"] = params[:nr_hits]
+ case params["feature_generation_uri"]
+ when /fminer/
+ if (params[:nr_hits] == "true")
+ @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "match_hits"}
+ else
+ @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "match"}
+ end
+ @parameters << {RDF::DC.title => "similarity_algorithm", RDF::OT.paramValue => "tanimoto"}
+ @parameters << {RDF::DC.title => "min_sim", RDF::OT.paramValue => 0.3} unless parameter_value("min_sim")
+ when /descriptors/
+ @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "lookup"}
+ @parameters << {RDF::DC.title => "similarity_algorithm", RDF::OT.paramValue => "cosine"}
+ @parameters << {RDF::DC.title => "min_sim", RDF::OT.paramValue => 0.7} unless parameter_value("min_sim")
+ end
+
+ #TODO: check if these parameters are necessary with new version
+ #set_parameter("pc_type", params[:pc_type] if params[:pc_type]
+ #set_parameter("lib", params[:lib] if params[:lib]
+
+ bad_request_error "Parameter min_train_performance is not numeric." if params[:min_train_performance] and !params[:min_train_performance].numeric?
+ @parameters << {RDF::DC.title => "min_train_performance", RDF::OT.paramValue => params[:min_train_performance].to_f} if params[:min_train_performance] and params[:min_train_performance].numeric?
+ @parameters << {RDF::DC.title => "min_train_performance", RDF::OT.paramValue => 0.1} unless parameter_value("min_train_performance")
+
+ if params[:feature_dataset_uri]
+ bad_request_error "Feature dataset #{params[:feature_dataset_uri]} does not exist." unless URI.accessible? params[:feature_dataset_uri]
+ @parameters << {RDF::DC.title => "feature_dataset_uri", RDF::OT.paramValue => params[:feature_dataset_uri]}
+ self[RDF::OT.featureDataset] = params["feature_dataset_uri"]
+ else
+ # run feature generation algorithm
+ feature_dataset_uri = OpenTox::Algorithm.new(params[:feature_generation_uri]).run(params)
+ @parameters << {RDF::DC.title => "feature_dataset_uri", RDF::OT.paramValue => feature_dataset_uri}
+ self[RDF::OT.featureDataset] = feature_dataset_uri
+ end
+
+ end
+
+ end
+
+end
+
diff --git a/lib/model.rb b/lib/model.rb
deleted file mode 100644
index a808aa7..0000000
--- a/lib/model.rb
+++ /dev/null
@@ -1,151 +0,0 @@
-=begin
-* Name: lazar.rb
-* Description: Lazar model representation
-* Author: Andreas Maunz <andreas@maunz.de>
-* Date: 10/2012
-=end
-
-module OpenTox
-
- class Model
-
- def initialize(*args)
- if args.size == 2
- super(*args)# We have uri and subjectid
- end
- if args.size == 1
- prepare_prediction_model(args[0]) # We have a hash (prediction time)
- end
- end
-
- # Internal use only
- def prepare_prediction_model(params)
- puts params.inspect
- params.each {|k,v|
- self.class.class_eval { attr_accessor k.to_sym }
- instance_variable_set(eval(":@"+k), v)
- }
- ["cmpds", "fps", "acts", "n_prop", "q_prop", "neighbors"].each {|k|
- self.class.class_eval { attr_accessor k.to_sym }
- instance_variable_set(eval(":@"+k), [])
- }
- end
- private :prepare_prediction_model
-
- # Fills model in with data for prediction
- # Avoids associative lookups, since canonization to InChI takes time
- # @param [OpenTox::Dataset] training dataset
- # @param [OpenTox::Dataset] feature dataset
- # @param [OpenTox::Feature] prediction feature
- # @param [Hash] compound fingerprints
- # @param [String] subjectid
- def add_data(training_dataset, feature_dataset, prediction_feature, compound_fingerprints, subjectid)
- training_dataset.build_feature_positions
- prediction_feature_pos = training_dataset.feature_positions[prediction_feature.uri]
- training_dataset.compounds.each_with_index { |cmpd, idx|
- act = training_dataset.data_entries[idx][prediction_feature_pos]
- @acts << (prediction_feature.feature_type=="classification" ?
- training_dataset.value_map(prediction_feature).invert[act] : nil)
- @n_prop << feature_dataset.data_entries[idx].collect.to_a
- @cmpds << cmpd.uri
- }
- @q_prop = feature_dataset.features.collect { |f|
- val = compound_fingerprints[f.title]
- bad_request_error "Can not parse value '#{val}' to numeric" if val and !val.numeric?
- val ? val.to_f : 0.0
- } # query structure
- end
-
-
- # Check parameters for plausibility
- # Prepare lazar object (includes graph mining)
- # @param[Array] lazar parameters as strings
- # @param[Hash] REST parameters, as input by user
- def check_params(lazar_params, params)
-
- unless params[:feature_generation_uri]
- bad_request_error "Please provide a feature generation uri"
- end
- feature_generation_uri = params[:feature_generation_uri]
-
- unless training_dataset = OpenTox::Dataset.find(params[:dataset_uri], @subjectid) # AM: find is a shim
- resource_not_found_error "Dataset '#{params[:dataset_uri]}' not found."
- end
- training_dataset_uri = training_dataset.uri
-
- unless params[:prediction_feature] # try to read prediction_feature from dataset
- resource_not_found_error "Please provide a prediction_feature parameter" unless training_dataset.features.size == 1
- params[:prediction_feature] = training_dataset.features.first.uri
- end
-
- unless training_dataset.find_feature( params[:prediction_feature] ) # AM: find_feature is a shim
- resource_not_found_error "No feature '#{params[:prediction_feature]}' in dataset '#{params[:dataset_uri]}'"
- end
- prediction_feature = OpenTox::Feature.find(params[:prediction_feature], @subjectid) # AM: find is a shim
- prediction_feature_uri = prediction_feature.uri
-
- if params[:feature_dataset_uri]
- feature_dataset_uri = params[:feature_dataset_uri]
- else
- feature_dataset_uri = OpenTox::Algorithm.new(feature_generation_uri).run(params)
- end
-
- if (feature_generation_uri =~ /fminer/)
- feature_calculation_algorithm = "match"
- if (params[:nr_hits] == "true")
- feature_calculation_algorithm = "match_hits"
- end
- elsif feature_generation_uri =~ /dataset.*\/pc/
- feature_calculation_algorithm = "lookup"
- end
-
- if feature_calculation_algorithm == "lookup"
- similarity_algorithm = "cosine"
- min_sim = 0.7
- elsif feature_calculation_algorithm =~ /match/
- similarity_algorithm = "tanimoto"
- min_sim = 0.3
- end
- if params[:min_sim] and params[:min_sim].numeric?
- min_sim = params[:min_sim].to_f # AM: frequent manual option
- end
-
- if prediction_feature.feature_type == "classification"
- prediction_algorithm = "weighted_majority_vote"
- elsif prediction_feature.feature_type == "regression"
- prediction_algorithm = "local_svm_regression"
- end
- if params[:prediction_algorithm] and OpenTox::Algorithm::Neighbors.respond_to? params[:prediction_algorithm]
- prediction_algorithm = params[:prediction_algorithm] # AM: frequent manual option
- end
-
- propositionalized = true
- if prediction_algorithm =~ /majority_vote/
- propositionalized = false
- end
-
- if params[:pc_type]
- pc_type = params[:pc_type]
- end
-
- if params[:lib]
- lib = params[:lib]
- end
-
- min_train_performance = $lazar_min_train_performance_default
- if params[:min_train_performance] and params[:min_train_performance].numeric?
- min_train_performance = params[:min_train_performance].to_f # AM: frequent manual option
- end
-
-
- lazar_params.collect { |p|
- val = eval(p)
- { DC.title => p, OT.paramValue => (val.nil? ? "" : val) }
- }.compact
- end
-
-
- end
-
-end
-
diff --git a/webapp/fminer.rb b/webapp/fminer.rb
index 379a863..2d11dcb 100644
--- a/webapp/fminer.rb
+++ b/webapp/fminer.rb
@@ -14,7 +14,6 @@ ENV['FMINER_NR_HITS'] = 'true'
@@bbrc = Bbrc::Bbrc.new
@@last = Last::Last.new
-
module OpenTox
class Application < Service
@@ -22,11 +21,11 @@ module OpenTox
# Get list of fminer algorithms
# @return [text/uri-list] URIs
get '/fminer/?' do
- list = [ to('/fminer/bbrc', :full),
- to('/fminer/bbrc/sample', :full),
- to('/fminer/last', :full),
- to('/fminer/bbrc/match', :full),
- to('/fminer/last/match', :full)
+ list = [ uri('/fminer/bbrc'),
+ uri('/fminer/bbrc/sample'),
+ uri('/fminer/last'),
+ uri('/fminer/bbrc/match'),
+ uri('/fminer/last/match')
].join("\n") + "\n"
format_output(list)
end
@@ -36,19 +35,19 @@ module OpenTox
get "/fminer/bbrc/?" do
algorithm = OpenTox::Algorithm.new(to('/fminer/bbrc',:full))
algorithm.metadata = {
- DC.title => 'Backbone Refinement Class Representatives',
- DC.creator => "andreas@maunz.de",
- RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised]
+ RDF::DC.title => 'Backbone Refinement Class Representatives',
+ RDF::DC.creator => "andreas@maunz.de",
+ RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised]
}
algorithm.parameters = [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "optional", DC.title => "prediction_feature" },
- { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" },
- { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" },
- { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" },
- { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" },
- { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" },
- { DC.description => "Set to 'true' to obtain target variables as a feature", OT.paramScope => "optional", DC.title => "get_target" }
+ { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" },
+ { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" },
+ { RDF::DC.description => "Minimum frequency", RDF::OT.paramScope => "optional", RDF::DC.title => "min_frequency" },
+ { RDF::DC.description => "Feature type, can be 'paths' or 'trees'", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_type" },
+ { RDF::DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", RDF::OT.paramScope => "optional", RDF::DC.title => "backbone" },
+ { RDF::DC.description => "Significance threshold (between 0 and 1)", RDF::OT.paramScope => "optional", RDF::DC.title => "min_chisq_significance" },
+ { RDF::DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", RDF::OT.paramScope => "optional", RDF::DC.title => "nr_hits" },
+ { RDF::DC.description => "Set to 'true' to obtain target variables as a feature", RDF::OT.paramScope => "optional", RDF::DC.title => "get_target" }
]
format_output(algorithm)
end
@@ -58,19 +57,19 @@ module OpenTox
get "/fminer/bbrc/sample/?" do
algorithm = OpenTox::Algorithm.new(to('/fminer/bbrc/sample',:full))
algorithm.metadata = {
- DC.title => 'Backbone Refinement Class Representatives, obtained from samples of a dataset',
- DC.creator => "andreas@maunz.de",
- RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised]
+ RDF::DC.title => 'Backbone Refinement Class Representatives, obtained from samples of a dataset',
+ RDF::DC.creator => "andreas@maunz.de",
+ RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised]
}
algorithm.parameters = [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "optional", DC.title => "prediction_feature" },
- { DC.description => "Number of bootstrap samples", OT.paramScope => "optional", DC.title => "num_boots" },
- { DC.description => "Minimum sampling support", OT.paramScope => "optional", DC.title => "min_sampling_support" },
- { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" },
- { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" },
- { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" },
- { DC.description => "Chisq estimation method, pass 'mean' to use simple mean estimate for chisq test.", OT.paramScope => "optional", DC.title => "method" }
+ { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" },
+ { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" },
+ { RDF::DC.description => "Number of bootstrap samples", RDF::OT.paramScope => "optional", RDF::DC.title => "num_boots" },
+ { RDF::DC.description => "Minimum sampling support", RDF::OT.paramScope => "optional", RDF::DC.title => "min_sampling_support" },
+ { RDF::DC.description => "Minimum frequency", RDF::OT.paramScope => "optional", RDF::DC.title => "min_frequency" },
+ { RDF::DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", RDF::OT.paramScope => "optional", RDF::DC.title => "nr_hits" },
+ { RDF::DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", RDF::OT.paramScope => "optional", RDF::DC.title => "backbone" },
+ { RDF::DC.description => "Chisq estimation method, pass 'mean' to use simple mean estimate for chisq test.", RDF::OT.paramScope => "optional", RDF::DC.title => "method" }
]
format_output(algorithm)
end
@@ -80,17 +79,17 @@ module OpenTox
get "/fminer/last/?" do
algorithm = OpenTox::Algorithm.new(to('/fminer/last',:full))
algorithm.metadata = {
- DC.title => 'Latent Structure Pattern Mining descriptors',
- DC.creator => "andreas@maunz.de",
- RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised]
+ RDF::DC.title => 'Latent Structure Pattern Mining descriptors',
+ RDF::DC.creator => "andreas@maunz.de",
+ RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised]
}
algorithm.parameters = [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "optional", DC.title => "prediction_feature" },
- { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" },
- { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" },
- { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" },
- { DC.description => "Set to 'true' to obtain target variables as a feature", OT.paramScope => "optional", DC.title => "get_target" }
+ { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" },
+ { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" },
+ { RDF::DC.description => "Minimum frequency", RDF::OT.paramScope => "optional", RDF::DC.title => "min_frequency" },
+ { RDF::DC.description => "Feature type, can be 'paths' or 'trees'", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_type" },
+ { RDF::DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", RDF::OT.paramScope => "optional", RDF::DC.title => "nr_hits" },
+ { RDF::DC.description => "Set to 'true' to obtain target variables as a feature", RDF::OT.paramScope => "optional", RDF::DC.title => "get_target" }
]
format_output(algorithm)
end
@@ -101,14 +100,14 @@ module OpenTox
get "/fminer/:method/match?" do
algorithm = OpenTox::Algorithm.new(to("/fminer/#{params[:method]}/match",:full))
algorithm.metadata = {
- DC.title => 'fminer feature matching',
- DC.creator => "mguetlein@gmail.com, andreas@maunz.de",
- RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised]
+ RDF::DC.title => 'fminer feature matching',
+ RDF::DC.creator => "mguetlein@gmail.com, andreas@maunz.de",
+ RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised]
}
algorithm.parameters = [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature Dataset URI", OT.paramScope => "mandatory", DC.title => "feature_dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "optional", DC.title => "prediction_feature" }
+ { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" },
+ { RDF::DC.description => "Feature Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "feature_dataset_uri" },
+ { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" }
]
format_output(algorithm)
end
@@ -128,18 +127,10 @@ module OpenTox
# @return [text/uri-list] Task URI
post '/fminer/bbrc/?' do
-
@@fminer=OpenTox::Algorithm::Fminer.new(to('/fminer/bbrc',:full))
- @@fminer.check_params(params,5,@subjectid)
+ @@fminer.check_params(params,5)
- task = OpenTox::Task.create(
- $task[:uri],
- @subjectid,
- { RDF::DC.description => "Mining BBRC features",
- RDF::DC.creator => to('/fminer/bbrc',:full)
- }
- ) do |task|
-
+ task = OpenTox::Task.run("Mining BBRC features", uri('/fminer/bbrc'), @subjectid) do |task|
@@bbrc.Reset
if @@fminer.prediction_feature.feature_type == "regression"
@@ -149,7 +140,7 @@ module OpenTox
"dataset '#{@@fminer.training_dataset.uri}' and "\
"feature '#{@@fminer.prediction_feature.uri}'" unless
@@fminer.prediction_feature.accept_values
- value_map=@@fminer.training_dataset.value_map(@@fminer.prediction_feature)
+ value_map=@@fminer.prediction_feature.value_map
end
@@bbrc.SetMinfreq(@@fminer.minfreq)
@@bbrc.SetType(1) if params[:feature_type] == "paths"
@@ -157,21 +148,20 @@ module OpenTox
@@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance]
@@bbrc.SetConsoleOut(false)
-
feature_dataset = OpenTox::Dataset.new(nil, @subjectid)
feature_dataset.metadata = {
- DC.title => "BBRC representatives",
- DC.creator => to('/fminer/bbrc',:full),
- OT.hasSource => to('/fminer/bbrc', :full),
+ RDF::DC.title => "BBRC representatives",
+ RDF::DC.creator => to('/fminer/bbrc',:full),
+ RDF::OT.hasSource => to('/fminer/bbrc', :full),
}
feature_dataset.parameters = [
- { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
- { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] },
- { DC.title => "min_frequency", OT.paramValue => @@fminer.minfreq },
- { DC.title => "nr_hits", OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") },
- { DC.title => "backbone", OT.paramValue => (params[:backbone] == "false" ? "false" : "true") }
+ { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] },
+ { RDF::DC.title => "prediction_feature", RDF::OT.paramValue => params[:prediction_feature] },
+ { RDF::DC.title => "min_frequency", RDF::OT.paramValue => @@fminer.minfreq },
+ { RDF::DC.title => "nr_hits", RDF::OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") },
+ { RDF::DC.title => "backbone", RDF::OT.paramValue => (params[:backbone] == "false" ? "false" : "true") }
]
-
+
@@fminer.compounds = []
@@fminer.db_class_sizes = Array.new # AM: effect
@@fminer.all_activities = Hash.new # DV: for effect calculation in regression part
@@ -219,17 +209,16 @@ module OpenTox
end
end
- #feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s
unless features_smarts.include? smarts
features_smarts << smarts
- metadata = {
- OT.hasSource => to('/fminer/bbrc', :full),
- RDF.type => [OT.Feature, OT.Substructure, OT.NumericFeature],
- OT.smarts => smarts.dup,
- OT.pValue => p_value.to_f.abs.round(5),
- OT.effect => effect
- }
- feature = OpenTox::Feature.find_by_title(smarts.dup,metadata)
+ feature = OpenTox::Feature.find_or_create({
+ RDF::DC.title => smarts.dup,
+ RDF::OT.hasSource => to('/fminer/bbrc', :full),
+ RDF.type => [RDF::OT.Feature, RDF::OT.Substructure, RDF::OT.NumericFeature],
+ RDF::OT.smarts => smarts.dup,
+ RDF::OT.pValue => p_value.to_f.abs.round(5),
+ RDF::OT.effect => effect
+ }, @subjectid)
features << feature
end
@@ -247,9 +236,8 @@ module OpenTox
end # end of
end # feature parsing
- fminer_compounds = @@fminer.training_dataset.compounds.collect.to_a
- @@fminer.training_dataset.build_feature_positions
- prediction_feature_idx = @@fminer.training_dataset.feature_positions[@@fminer.prediction_feature.uri]
+ fminer_compounds = @@fminer.training_dataset.compounds
+ prediction_feature_idx = @@fminer.training_dataset.features.collect{|f| f.uri}.index @@fminer.prediction_feature.uri
prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx|
@@fminer.training_dataset.data_entries[idx][prediction_feature_idx]
}
@@ -257,29 +245,65 @@ module OpenTox
feature_dataset.features = features
if (params[:get_target] == "true")
+ puts "get_target TRUE"
feature_dataset.features = [ @@fminer.prediction_feature ] + feature_dataset.features
+ else
+ puts "get_target FALSE"
end
+ feature_dataset.compounds = fminer_compounds
fminer_compounds.each_with_index { |c,idx|
- row = [ c ]
- if (params[:get_target] == "true")
- row = row + [ prediction_feature_all_acts[idx] ]
- end
+ #puts c.smiles
+ # TODO: fix here, insert with add_data_entry
+ #row = [ c ]
+ # TODO: reenable option
+ #if (params[:get_target] == "true")
+ #row = row + [ prediction_feature_all_acts[idx] ]
+ #end
features.each { |f|
- row << (fminer_results[c] ? fminer_results[c][f.uri] : nil)
+ #m = c.match([f.title])[f.title]
+ #m = 0 unless m
+ v = fminer_results[c][f.uri] if fminer_results[c]
+ unless fminer_noact_compounds.include? c
+ v = 0 if v.nil?
+ end
+ #unless m == v
+ #puts f.title
+ #puts m
+ #puts v
+ #end
+ feature_dataset.add_data_entry c, f, v.to_i
+ #row << (fminer_results[c] ? fminer_results[c][f.uri] : nil)
}
- row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c
- feature_dataset << row
+ #row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c
+ #feature_dataset << row
}
+
+=begin
+CH: Ordering seems to be ok here
+ feature_dataset.compounds.each_with_index do |c,i|
+ feature_dataset.features.each_with_index do |f,j|
+ m = c.match([f.title])[f.title]
+ #puts c.smiles
+ #puts f.title
+ #puts m.inspect
+ v = feature_dataset.data_entries[i][j]
+ #puts v.inspect
+ unless m.to_i == v.to_i
+ puts f.title
+ puts m.to_i
+ puts v.to_i
+ end
+ end
+ end
+=end
- $logger.debug "fminer found #{feature_dataset.features.size} features for #{feature_dataset.compounds.size} compounds"
- feature_dataset.put @subjectid
- $logger.debug feature_dataset.uri
+ #puts feature_dataset.to_csv
+ feature_dataset.put
feature_dataset.uri
end
response['Content-Type'] = 'text/uri-list'
- service_unavailable_error "Service unavailable" if task.cancelled?
- halt 202,task.uri.to_s+"\n"
+ halt 202,task.uri
end
@@ -298,15 +322,9 @@ module OpenTox
post '/fminer/last/?' do
@@fminer=OpenTox::Algorithm::Fminer.new(to('/fminer/last',:full))
- @@fminer.check_params(params,80,@subjectid)
+ @@fminer.check_params(params,80)
- task = OpenTox::Task.create(
- $task[:uri],
- @subjectid,
- { RDF::DC.description => "Mining LAST features",
- RDF::DC.creator => to('/fminer/last',:full)
- }
- ) do |task|
+ task = OpenTox::Task.run("Mining LAST features", uri('/fminer/last'), @subjectid) do |task|
@@last.Reset
if @@fminer.prediction_feature.feature_type == "regression"
@@ -316,7 +334,7 @@ module OpenTox
"dataset '#{fminer.training_dataset.uri}' and "\
"feature '#{fminer.prediction_feature.uri}'" unless
@@fminer.prediction_feature.accept_values
- value_map=@@fminer.training_dataset.value_map(@@fminer.prediction_feature)
+ value_map=@@fminer.prediction_feature.value_map
end
@@last.SetMinfreq(@@fminer.minfreq)
@@last.SetType(1) if params[:feature_type] == "paths"
@@ -325,15 +343,15 @@ module OpenTox
feature_dataset = OpenTox::Dataset.new(nil, @subjectid)
feature_dataset.metadata = {
- DC.title => "LAST representatives for " + @@fminer.training_dataset.metadata[DC.title].to_s,
- DC.creator => to('/fminer/last',:full),
- OT.hasSource => to('/fminer/last', :full)
+ RDF::DC.title => "LAST representatives for " + @@fminer.training_dataset.metadata[RDF::DC.title].to_s,
+ RDF::DC.creator => to('/fminer/last'),
+ RDF::OT.hasSource => to('/fminer/last')
}
feature_dataset.parameters = [
- { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] },
- { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] },
- { DC.title => "min_frequency", OT.paramValue => @@fminer.minfreq },
- { DC.title => "nr_hits", OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") }
+ { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] },
+ { RDF::DC.title => "prediction_feature", RDF::OT.paramValue => params[:prediction_feature] },
+ { RDF::DC.title => "min_frequency", RDF::OT.paramValue => @@fminer.minfreq },
+ { RDF::DC.title => "nr_hits", RDF::OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") }
]
@@fminer.compounds = []
@@ -366,7 +384,8 @@ module OpenTox
fminer_results = {}
matches.each do |smarts, ids|
metadata, parameters = @@fminer.calc_metadata(smarts, ids, counts[smarts], @@last, nil, value_map, params)
- feature = OpenTox::Feature.find_by_title(smarts.dup,metadata)
+ metadata[RDF::DC.title] = smarts.dup
+ feature = OpenTox::Feature.find_or_create(metadata, @subjectid)
features << feature
ids.each_with_index { |id,idx|
fminer_results[@@fminer.compounds[id]] || fminer_results[@@fminer.compounds[id]] = {}
@@ -374,9 +393,8 @@ module OpenTox
}
end
- fminer_compounds = @@fminer.training_dataset.compounds.collect.to_a
- @@fminer.training_dataset.build_feature_positions
- prediction_feature_idx = @@fminer.training_dataset.feature_positions[@@fminer.prediction_feature.uri]
+ fminer_compounds = @@fminer.training_dataset.compounds
+ prediction_feature_idx = @@fminer.training_dataset.features.collect{|f| f.uri}.index @@fminer.prediction_feature.uri
prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx|
@@fminer.training_dataset.data_entries[idx][prediction_feature_idx]
}
@@ -387,6 +405,7 @@ module OpenTox
feature_dataset.features = [ @@fminer.prediction_feature ] + feature_dataset.features
end
fminer_compounds.each_with_index { |c,idx|
+ # TODO: fix value insertion
row = [ c ]
if (params[:get_target] == "true")
row = row + [ prediction_feature_all_acts[idx] ]
@@ -397,14 +416,12 @@ module OpenTox
row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c
feature_dataset << row
}
- feature_dataset.put @subjectid
- $logger.debug feature_dataset.uri
+ feature_dataset.put
feature_dataset.uri
end
response['Content-Type'] = 'text/uri-list'
- service_unavailable_error "Service unavailable" if task.cancelled?
- halt 202,task.uri.to_s+"\n"
+ halt 202,task.uri
end
end
diff --git a/webapp/fs.rb b/webapp/fs.rb
index 6286629..e570860 100644
--- a/webapp/fs.rb
+++ b/webapp/fs.rb
@@ -20,15 +20,15 @@ module OpenTox
get "/fs/rfe/?" do
algorithm = OpenTox::Algorithm.new(to('/fs/rfe',:full))
algorithm.metadata = {
- DC.title => 'Recursive Feature Elimination',
- DC.creator => "andreas@maunz.de",
- RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised]
+ RDF::DC.title => 'Recursive Feature Elimination',
+ RDF::DC.creator => "andreas@maunz.de",
+ RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised]
}
algorithm.parameters = [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Prediction Feature URI", OT.paramScope => "mandatory", DC.title => "prediction_feature" },
- { DC.description => "Feature Dataset URI", OT.paramScope => "mandatory", DC.title => "feature_dataset_uri" },
- { DC.description => "Delete Instances with missing values", OT.paramScope => "optional", DC.title => "del_missing" }
+ { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" },
+ { RDF::DC.description => "Prediction Feature URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "prediction_feature" },
+ { RDF::DC.description => "Feature Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "feature_dataset_uri" },
+ { RDF::DC.description => "Delete Instances with missing values", RDF::OT.paramScope => "optional", RDF::DC.title => "del_missing" }
]
format_output(algorithm)
end
diff --git a/webapp/lazar.rb b/webapp/lazar.rb
index 9eec397..c40fc98 100644
--- a/webapp/lazar.rb
+++ b/webapp/lazar.rb
@@ -1,58 +1,25 @@
-=begin
-* Name: lazar.rb
-* Description: Lazar
-* Author: Andreas Maunz <andreas@maunz.de>
-* Date: 10/2012
-=end
-
-$lazar_params = [
- "training_dataset_uri",
- "prediction_feature_uri",
- "feature_dataset_uri",
- "feature_generation_uri",
- "feature_calculation_algorithm",
- "similarity_algorithm",
- "min_sim",
- "prediction_algorithm",
- "propositionalized",
- "pc_type",
- "lib",
- "min_train_performance"
-]
-$lazar_min_train_performance_default = 0.1
-
-
module OpenTox
class Application < Service
-
# Get representation of lazar algorithm
# @return [String] Representation
get '/lazar/?' do
algorithm = OpenTox::Algorithm.new(to('/lazar',:full))
algorithm.metadata = {
- DC.title => 'lazar',
- DC.creator => 'helma@in-silico.ch, andreas@maunz.de',
- RDF.Type => [OT.Algorithm]
+ RDF::DC.title => 'lazar',
+ RDF::DC.creator => 'helma@in-silico.ch, andreas@maunz.de',
+ RDF.Type => [RDF::OT.Algorithm]
}
algorithm.parameters = [
- { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
- { DC.description => "Feature URI for dependent variable", OT.paramScope => "optional", DC.title => "prediction_feature" },
- { DC.description => "Feature generation service URI", OT.paramScope => "optional", DC.title => "feature_generation_uri" },
- { DC.description => "Feature dataset URI", OT.paramScope => "optional", DC.title => "feature_dataset_uri" },
- { DC.description => "Further parameters for the feature generation service", OT.paramScope => "optional" }
+ { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" },
+ { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" },
+ { RDF::DC.description => "Feature generation service URI", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_generation_uri" },
+ { RDF::DC.description => "Feature dataset URI", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_dataset_uri" },
+ { RDF::DC.description => "Further parameters for the feature generation service", RDF::OT.paramScope => "optional" }
]
format_output(algorithm)
end
- def predicted_variable(prediction_feature)
- OpenTox::Feature.find_by_title("predicted_variable", {RDF.type => prediction_feature[RDF.type]})
- end
-
- def predicted_confidence
- OpenTox::Feature.find_by_title("predicted_confidence", {RDF.type => [RDF::OT.NumericFeature]})
- end
-
# Create a lazar prediction model
# @param [String] dataset_uri Training dataset URI
@@ -61,37 +28,17 @@ module OpenTox
# @param [optional,String] - further parameters for the feature generation service
# @return [text/uri-list] Task URI
post '/lazar/?' do
- params[:subjectid] = @subjectid
- resource_not_found_error "No dataset_uri parameter." unless params[:dataset_uri]
- task = OpenTox::Task.create(
- $task[:uri],
- @subjectid,
- { RDF::DC.description => "Create lazar model",
- RDF::DC.creator => to('/lazar',:full)
- }
- ) do |task|
-
+ resource_not_found_error "Please provide a dataset_uri parameter." unless params[:dataset_uri]
+ resource_not_found_error "Dataset '#{params[:dataset_uri]}' not found." unless URI.accessible? params[:dataset_uri]
+ bad_request_error "Please provide a feature_generation_uri parameter." unless params[:feature_generation_uri]
+ task = OpenTox::Task.run("Create lazar model", uri('/lazar'), @subjectid) do |task|
lazar = OpenTox::Model.new(nil, @subjectid)
- lazar.parameters = lazar.check_params($lazar_params, params)
- prediction_feature = OpenTox::Feature.find(lazar.find_parameter_value("prediction_feature_uri"))
- lazar.metadata = {
- DC.title => "lazar model",
- OT.dependentVariables => lazar.find_parameter_value("prediction_feature_uri"),
- OT.predictedVariables => [ predicted_variable(prediction_feature).uri, predicted_confidence.uri ],
- OT.trainingDataset => lazar.find_parameter_value("training_dataset_uri"),
- OT.featureDataset => lazar.find_parameter_value("feature_dataset_uri"),
- RDF.type => ( prediction_feature.feature_type == "classification" ?
- [OT.Model, OTA.ClassificationLazySingleTarget] :
- [OT.Model, OTA.RegressionLazySingleTarget]
- )
- }
- # task.progress 10
- lazar.put @subjectid
+ lazar.create_model(params)
+ lazar.put
lazar.uri
end
response['Content-Type'] = 'text/uri-list'
- #service_unavailable_error "Service unavailable" if task.cancelled?
- halt 202,task.uri.to_s+"\n"
+ halt 202,task.uri
end
@@ -109,154 +56,18 @@ module OpenTox
# @param [optional,String] Further parameters for the feature generation service
# @return [text/uri-list] Task URI
post '/lazar/predict/?' do
+ # pass parameters instead of model_uri, because model service is blocked by incoming call
- params[:subjectid] = @subjectid
- if ( (params[:compound_uri] and params[:dataset_uri]) or
- (!params[:compound_uri] and !params[:dataset_uri])
- )
- bad_request_error "Submit either compound uri or dataset uri"
- end
-
- task = OpenTox::Task.create(
- $task[:uri],
- @subjectid,
- {
- RDF::DC.description => "Apply lazar model",
- RDF::DC.creator => to('/lazar/predict',:full)
- }
- ) do |task|
-
-
- if params[:dataset_uri]
- compounds = OpenTox::Dataset.find(params[:dataset_uri]).compounds
- else
- compounds = [ OpenTox::Compound.new(params[:compound_uri]) ]
- end
-
- compounds.each { |query_compound|
- params[:compound_uri] = query_compound.uri # AM: store compound in params hash
- unless @prediction_dataset # AM: only once for dataset predictions
- @prediction_dataset = OpenTox::Dataset.new(nil, @subjectid)
-
- @model_params_hash = $lazar_params.inject({}){ |h,p|
- h[p] = params[p].to_s unless params[p].nil?
- h
- }
- @model = OpenTox::Model.new(@model_params_hash)
-
- $logger.debug "Loading training dataset"
- @training_dataset = OpenTox::Dataset.find(params[:training_dataset_uri], @subjectid)
- @prediction_feature = OpenTox::Feature.find(params[:prediction_feature_uri],@subjectid)
- @predicted_variable = predicted_variable(@prediction_feature)
- @predicted_confidence = predicted_confidence
- @similarity_feature = OpenTox::Feature.find_by_title("similarity", {RDF.type => [RDF::OT.NumericFeature]})
- @prediction_dataset.features = [ @prediction_feature, @predicted_variable, @predicted_confidence, @similarity_feature ]
-
- @prediction_dataset.metadata = {
- DC.title => "Lazar prediction",
- DC.creator => @uri.to_s,
- OT.hasSource => @uri.to_s,
- OT.dependentVariables => @model_params_hash["prediction_feature_uri"],
- OT.predictedVariables => [@predicted_variable.uri,@predicted_confidence.uri]
- }
- end
-
- database_activity = @training_dataset.database_activity(params)
- if database_activity
-
- orig_value = database_activity.to_f
- predicted_value = orig_value
- confidence_value = 1.0
-
- else
- @model = OpenTox::Model.new(@model_params_hash)
-
- unless @feature_dataset
- $logger.debug "Loading f dataset"
- @feature_dataset = OpenTox::Dataset.find(params[:feature_dataset_uri], @subjectid)
- end
-
- case @feature_dataset.find_parameter_value("nr_hits")
- when "true" then @model.feature_calculation_algorithm = "match_hits"
- when "false" then @model.feature_calculation_algorithm = "match"
- end
- pc_type = @feature_dataset.find_parameter_value("pc_type")
- @model.pc_type = pc_type unless pc_type.nil?
- lib = @feature_dataset.find_parameter_value("lib")
- @model.lib = lib unless lib.nil?
-
- # AM: transform to cosine space
- @model.min_sim = (@model.min_sim.to_f*2.0-1.0).to_s if @model.similarity_algorithm =~ /cosine/
-
- if @feature_dataset.features.size > 0
- compound_params = {
- :compound => query_compound,
- :feature_dataset => @feature_dataset,
- :pc_type => @model.pc_type,
- :lib => @model.lib
- }
- # use send, not eval, for calling the method (good backtrace)
- $logger.debug "Calculating q fps"
- compound_fingerprints = OpenTox::Algorithm::FeatureValues.send( @model.feature_calculation_algorithm, compound_params, @subjectid )
- else
- bad_request_error "No features found"
- end
-
- @model.add_data(@training_dataset, @feature_dataset, @prediction_feature, compound_fingerprints, @subjectid)
- mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(@model)
- mtf.transform
- $logger.debug "Predicting q"
- prediction = OpenTox::Algorithm::Neighbors.send(@model.prediction_algorithm,
- { :props => mtf.props,
- :acts => mtf.acts,
- :sims => mtf.sims,
- :value_map => @prediction_feature.feature_type=="classification" ?
- @training_dataset.value_map(@prediction_feature) : nil,
- :min_train_performance => @model.min_train_performance
- } )
- orig_value = nil
- predicted_value = prediction[:prediction].to_f
- confidence_value = prediction[:confidence].to_f
-
- # AM: transform to original space
- confidence_value = ((confidence_value+1.0)/2.0).abs if @model.similarity_algorithm =~ /cosine/
- predicted_value = @training_dataset.value_map(@prediction_feature)[prediction[:prediction].to_i] if @prediction_feature.feature_type == "classification"
-
- $logger.debug "Prediction: '#{predicted_value}'"
- $logger.debug "Confidence: '#{confidence_value}'"
- end
-
- @prediction_dataset << [
- query_compound,
- orig_value,
- predicted_value,
- confidence_value,
- nil
- ]
- @model.neighbors.each { |neighbor|
- @prediction_dataset << [
- OpenTox::Compound.new(neighbor[:compound]),
- @training_dataset.value_map(@prediction_feature)[neighbor[:activity]],
- nil,
- nil,
- neighbor[:similarity]
- ]
- }
-
- }
-
- @prediction_dataset.parameters = $lazar_params.collect { |p|
- {DC.title => p, OT.paramValue => @model.instance_variable_get("@#{p}")} unless @model.instance_variable_get("@#{p}").nil?
- }
+ task = OpenTox::Task.run("Apply lazar model",uri('/lazar/predict'), @subjectid) do |task|
- @prediction_dataset.put
- $logger.debug @prediction_dataset.uri
- @prediction_dataset.uri
+ lazar = OpenTox::LazarPrediction.new params
+ puts "RESULT"
+ puts lazar.prediction_dataset.uri
+ lazar.prediction_dataset.uri
end
response['Content-Type'] = 'text/uri-list'
- #service_unavailable_error "Service unavailable" if task.cancelled?
- halt 202,task.uri.to_s+"\n"
+ halt 202,task.uri
end
diff --git a/webapp/sinatra.rb b/webapp/sinatra.rb
index cf0e7c5..b61b0a3 100644
--- a/webapp/sinatra.rb
+++ b/webapp/sinatra.rb
@@ -8,15 +8,6 @@
module OpenTox
class Application < Service
- # Put any code here that should be executed immediately before
- # request is processed
- before {
- $logger.debug "Request: " + request.path
- # fix IE
- request.env['HTTP_ACCEPT'] += ";text/html" if request.env["HTTP_USER_AGENT"]=~/MSIE/
- request.env['HTTP_ACCEPT']=request.params["media"] if request.params["media"]
- }
-
# Conveniently accessible from anywhere within the Application class,
# it negotiates the appropriate output format based on object class
# and requested MIME type.
@@ -29,7 +20,7 @@ module OpenTox
case @accept
when /text\/html/
content_type "text/html"
- OpenTox.text_to_html obj
+ obj.to_html
else
content_type 'text/uri-list'
obj
@@ -43,7 +34,7 @@ module OpenTox
obj.to_rdfxml
when /text\/html/
content_type "text/html"
- OpenTox.text_to_html obj.to_turtle
+ obj.to_html
else
content_type "text/turtle"
obj.to_turtle
diff --git a/webapp/test.rb b/webapp/test.rb
index 0cb85e5..75044da 100644
--- a/webapp/test.rb
+++ b/webapp/test.rb
@@ -4,7 +4,7 @@ module OpenTox
class Application < Service
post '/test/wait_for_error_in_task/?' do
- task = OpenTox::Task.create($task[:uri],@subjectid,{ RDF::DC.description => "wait_for_error_in_task"}) do |task|
+ task = OpenTox::Task.run("wait_for_error_in_task",@uri,@subjectid) do |task|
sleep 1
uri = OpenTox::Dataset.new(File.join($dataset[:uri],'test/error_in_task')).post
end