From e1f52a8f81f59b8ef81aed87f53ce755fb25ace6 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Mon, 2 Apr 2012 16:51:21 +0200 Subject: Unified interface to PC descriptors --- application.rb | 9 +++-- balancer.rb | 98 --------------------------------------------------- feature_generation.rb | 2 +- pc.rb | 67 +++++++++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 104 deletions(-) delete mode 100644 balancer.rb create mode 100644 pc.rb diff --git a/application.rb b/application.rb index 53478a1..539ac2b 100644 --- a/application.rb +++ b/application.rb @@ -16,13 +16,12 @@ require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/l require File.join(File.expand_path(File.dirname(__FILE__)), 'last-utils/lu.rb') # AM LAST gem "opentox-ruby", "~> 3" require 'opentox-ruby' +require 'rjb' -#require 'smarts.rb' -#require 'similarity.rb' -require 'openbabel.rb' require 'fminer.rb' require 'lazar.rb' -require 'feature_selection.rb' +require 'fs.rb' +require 'pc.rb' set :lock, true @@ -34,7 +33,7 @@ end # # @return [text/uri-list] algorithm URIs get '/?' do - list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full), url_for('/feature_selection/rfe', :full) ].join("\n") + "\n" + list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full), url_for('/feature_selection/rfe', :full), url_for('/pc', :full) ].join("\n") + "\n" case request.env['HTTP_ACCEPT'] when /text\/html/ content_type "text/html" diff --git a/balancer.rb b/balancer.rb deleted file mode 100644 index 4ed2fd7..0000000 --- a/balancer.rb +++ /dev/null @@ -1,98 +0,0 @@ -# cuts a classification dataset into balanced pieces -# let inact_act_ratio := majority_class.size/minority_class.size -# then: nr pieces = ceil(inact_act_ratio) if inact_act_ratio > 1.5 -# each piece contains the complete minority class and ceil(inact_act_ratio) majority class compounds. - -class Balancer - - attr_accessor :inact_act_ratio, :act_hash, :inact_hash, :majority_splits, :nr_majority_splits, :errors, :datasets - - # Supply a OpenTox::Dataset here - # Calculates inact_act_ratio, iff inact_act_ratio != +/-Infinity and no regression dataset is given - def initialize(dataset, feature_uri, creator_url) - @act_arr = [] - @inact_arr = [] - @inact_act_ratio = 1.0/0 # trick to define +infinity - @nr_majority_splits = 1 # +/-1 means: no split - @split = [] # splitted arrays with ids - @datasets = [] # result datasets - @errors = [] - - classification = true - if dataset.features.include?(feature_uri) - dataset.data.each do |i,a| - inchi = i - acts = a - acts.each do |act| - value = act[feature_uri] - if OpenTox::Utils.is_true?(value) - @act_arr << inchi - elsif OpenTox::Utils.classification?(value) - @inact_arr << inchi - else - classification = false - break; - end - end - end - @inact_act_ratio = @inact_arr.size.to_f / @act_arr.size.to_f unless (@act_arr.size == 0 or !classification) # leave alone for regression - set_nr_majority_splits - # perform majority split - @split = @nr_majority_splits > 0 ? shuffle_split(@inact_arr) : shuffle_split(@act_arr) unless @nr_majority_splits.abs == 1 - @split.each do |s| - new_c = @nr_majority_splits > 0 ? s.concat(@act_arr) : s.concat(@inac_arr) - @datasets << dataset.create_new_dataset(new_c, [feature_uri], dataset.title, creator_url) - end - - else - errors << "Feature not present in dataset." - end - errors << "Can not split regression dataset." unless classification - end - - - - # sets nr of splits for majority class ('+', if inact_cnt > act_cnt, or '-' else), or leaves unchanged for illegal values. - def set_nr_majority_splits - @nr_majority_splits = @inact_act_ratio >= 1.5 ? @inact_act_ratio.ceil : ( @inact_act_ratio <= (2.0/3.0) ? -(1.0/@inact_act_ratio).ceil : ( @inact_act_ratio>1.0 ? 1 : -1) ) unless OpenTox::Utils.infinity?(@inact_act_ratio) # leave alone for regression - end - - # does the actual shuffle and split - def shuffle_split (arr) - arr = arr.shuffle - arr.chunk(@nr_majority_splits.abs) - end - - # turns a hash into a 2 col csv - def hsh2csv (hsh) - res="" - hsh.each do |k,v| - arr = [v,(@nr_majority_splits > 0 ? 0 : 1)] - res += arr.join(", ") + "\n" - end - res - end - -end - -class Array - - # cuts an array into chunks - returns a two-dimensional array - def chunk(pieces) - q, r = length.divmod(pieces) - (0..pieces).map { |i| i * q + [r, i].min }.enum_cons(2) \ - .map { |a, b| slice(a...b) } - end - - # shuffles the elements of an array - def shuffle( seed=nil ) - srand seed.to_i if seed - sort_by { Kernel.rand } - end - - # shuffels self - def shuffle!( seed=nil ) - self.replace shuffle( seed ) - end - -end diff --git a/feature_generation.rb b/feature_generation.rb index 1bea0f3..e822404 100644 --- a/feature_generation.rb +++ b/feature_generation.rb @@ -6,7 +6,7 @@ algorithm = OpenTox::Algorithm::Generic.new(url_for('/pcdesc',:full)) algorithm.metadata = { DC.title => 'Physico-chemical (PC) descriptor calculation', DC.creator => "andreas@maunz.de, vorgrimmlerdavid@gmx.de", - RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised], + RDF.type => [OT.Algorithm,OTA.DescriptorCalculation], OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, { DC.description => "PC type", OT.paramScope => "mandatory", DC.title => "pc_type" }, diff --git a/pc.rb b/pc.rb new file mode 100644 index 0000000..f297014 --- /dev/null +++ b/pc.rb @@ -0,0 +1,67 @@ +# pc.rb +# (P)hysico (C)hemical descriptor calculation +# Author: Andreas Maunz + + +# Get a list of OpenBabel algorithms +# @return [text/uri-list] URIs of OpenBabel algorithms +get '/pc' do + algorithms = YAML::load_file File.join(ENV['HOME'], ".opentox", "config", "pc_descriptors.yaml") + response['Content-Type'] = 'text/uri-list' + list = (algorithms.keys << "AllDescriptors").join("\n") + "\n" + case request.env['HTTP_ACCEPT'] + when /text\/html/ + content_type "text/html" + OpenTox.text_to_html list + else + content_type 'text/uri-list' + list + end +end + +# Get RDF/XML representation of OpenBabel algorithm +# @return [application/rdf+xml] OWL-DL representation of OpenBabel algorithm +get '/pc/:descriptor' do + descriptors = YAML::load_file File.join(ENV['HOME'], ".opentox", "config", "pc_descriptors.yaml") + alg_params = [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" } ] + + if params[:descriptor] != "AllDescriptors" + descriptors = descriptors[params[:descriptor]] + else + alg_params << { DC.description => "Descriptor Category, one or more of '#{descriptors.collect { |id, info| info[:category] }.uniq.sort.join(",")}'", OT.paramScope => "optional", DC.title => "category" } + alg_params << { DC.description => "Software Library, one or more of '#{descriptors.collect { |id, info| info[:lib] }.uniq.sort.join(",")}'", OT.paramScope => "optional", DC.title => "lib" } + descriptors = {:id => "AllDescriptors", :name => "All PC descriptors" } + end + + if descriptors + + # Contents + algorithm = OpenTox::Algorithm::Generic.new(url_for("/pc/#{params[:descriptor]}",:full)) + algorithm.metadata = { + DC.title => params[:descriptor], + DC.creator => "andreas@maunz.de", + DC.description => descriptors[:name], + RDF.type => [OTA.DescriptorCalculation], + } + algorithm.metadata[OT.parameters] = alg_params + algorithm.metadata[DC.description] << (", category: " + descriptors[:category]) unless descriptors[:id] == "AllDescriptors" + algorithm.metadata[DC.description] << (", lib: " + descriptors[:lib]) unless descriptors[:id] == "AllDescriptors" + + # Deliver + case request.env['HTTP_ACCEPT'] + when /text\/html/ + content_type "text/html" + OpenTox.text_to_html algorithm.to_yaml + when /yaml/ + content_type "application/x-yaml" + algorithm.to_yaml + else + response['Content-Type'] = 'application/rdf+xml' + algorithm.to_rdfxml + end + + else + raise OpenTox::NotFoundError.new "Unknown descriptor #{params[:descriptor]}." + end +end + -- cgit v1.2.3