From e1f52a8f81f59b8ef81aed87f53ce755fb25ace6 Mon Sep 17 00:00:00 2001
From: Andreas Maunz <andreas@maunz.de>
Date: Mon, 2 Apr 2012 16:51:21 +0200
Subject: Unified interface to PC descriptors

---
 application.rb        |  9 +++--
 balancer.rb           | 98 ---------------------------------------------------
 feature_generation.rb |  2 +-
 pc.rb                 | 67 +++++++++++++++++++++++++++++++++++
 4 files changed, 72 insertions(+), 104 deletions(-)
 delete mode 100644 balancer.rb
 create mode 100644 pc.rb

diff --git a/application.rb b/application.rb
index 53478a1..539ac2b 100644
--- a/application.rb
+++ b/application.rb
@@ -16,13 +16,12 @@ require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/l
 require File.join(File.expand_path(File.dirname(__FILE__)), 'last-utils/lu.rb') # AM LAST
 gem "opentox-ruby", "~> 3"
 require 'opentox-ruby'
+require 'rjb'
 
-#require 'smarts.rb'
-#require 'similarity.rb'
-require 'openbabel.rb'
 require 'fminer.rb'
 require 'lazar.rb'
-require 'feature_selection.rb'
+require 'fs.rb'
+require 'pc.rb'
 
 set :lock, true
 
@@ -34,7 +33,7 @@ end
 #
 # @return [text/uri-list] algorithm URIs
 get '/?' do
-	list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full), url_for('/feature_selection/rfe', :full) ].join("\n") + "\n"
+	list = [ url_for('/lazar', :full), url_for('/fminer/bbrc', :full), url_for('/fminer/last', :full), url_for('/feature_selection/rfe', :full), url_for('/pc', :full) ].join("\n") + "\n"
   case request.env['HTTP_ACCEPT']
   when /text\/html/
     content_type "text/html"
diff --git a/balancer.rb b/balancer.rb
deleted file mode 100644
index 4ed2fd7..0000000
--- a/balancer.rb
+++ /dev/null
@@ -1,98 +0,0 @@
-# cuts a classification dataset into balanced pieces
-# let inact_act_ratio := majority_class.size/minority_class.size 
-# then: nr pieces = ceil(inact_act_ratio) if inact_act_ratio > 1.5
-# each piece contains the complete minority class and ceil(inact_act_ratio) majority class compounds.
-
-class Balancer
-
-  attr_accessor :inact_act_ratio, :act_hash, :inact_hash, :majority_splits, :nr_majority_splits, :errors, :datasets
-
-  # Supply a OpenTox::Dataset here
-  # Calculates inact_act_ratio, iff inact_act_ratio != +/-Infinity and no regression dataset is given
-  def initialize(dataset, feature_uri, creator_url)
-    @act_arr = []
-    @inact_arr = []
-    @inact_act_ratio = 1.0/0  # trick to define +infinity
-    @nr_majority_splits = 1   # +/-1 means: no split
-    @split = []               # splitted arrays with ids
-    @datasets = []            # result datasets
-    @errors = []
-
-    classification = true
-    if dataset.features.include?(feature_uri)
-      dataset.data.each do |i,a|
-        inchi = i
-        acts = a
-        acts.each do |act|
-          value = act[feature_uri]
-          if OpenTox::Utils.is_true?(value)
-            @act_arr << inchi
-          elsif OpenTox::Utils.classification?(value)
-            @inact_arr << inchi
-          else
-            classification = false
-            break;
-          end
-        end
-      end
-      @inact_act_ratio = @inact_arr.size.to_f / @act_arr.size.to_f unless (@act_arr.size == 0 or !classification) # leave alone for regression
-      set_nr_majority_splits
-      # perform majority split
-      @split = @nr_majority_splits > 0 ? shuffle_split(@inact_arr) : shuffle_split(@act_arr) unless @nr_majority_splits.abs == 1
-      @split.each do |s|
-        new_c = @nr_majority_splits > 0 ? s.concat(@act_arr) : s.concat(@inac_arr)
-        @datasets << dataset.create_new_dataset(new_c, [feature_uri], dataset.title, creator_url)
-      end
-
-    else
-      errors << "Feature not present in dataset."
-    end
-    errors << "Can not split regression dataset." unless classification
-  end
-
-
-
-  # sets nr of splits for majority class ('+', if inact_cnt > act_cnt, or '-' else), or leaves unchanged for illegal values.
-  def set_nr_majority_splits
-    @nr_majority_splits = @inact_act_ratio >= 1.5 ? @inact_act_ratio.ceil : ( @inact_act_ratio <= (2.0/3.0) ? -(1.0/@inact_act_ratio).ceil : ( @inact_act_ratio>1.0 ? 1 : -1) ) unless OpenTox::Utils.infinity?(@inact_act_ratio) # leave alone for regression
-  end
-
-  # does the actual shuffle and split
-  def shuffle_split (arr)
-    arr = arr.shuffle
-    arr.chunk(@nr_majority_splits.abs)
-  end
-
-  # turns a hash into a 2 col csv
-  def hsh2csv (hsh)
-    res=""
-    hsh.each do |k,v|
-      arr = [v,(@nr_majority_splits > 0 ? 0 : 1)]
-      res += arr.join(", ") + "\n"
-    end
-    res
-  end
-
-end
-
-class Array
-
-  # cuts an array into <num-pieces> chunks - returns a two-dimensional array
-  def chunk(pieces)
-    q, r = length.divmod(pieces)
-    (0..pieces).map { |i| i * q + [r, i].min }.enum_cons(2) \
-      .map { |a, b| slice(a...b) }
-  end
-
-  # shuffles the elements of an array
-  def shuffle( seed=nil )
-    srand seed.to_i if seed
-    sort_by { Kernel.rand }
-  end
-
-  # shuffels self
-  def shuffle!( seed=nil )
-    self.replace shuffle( seed )
-  end
-
-end
diff --git a/feature_generation.rb b/feature_generation.rb
index 1bea0f3..e822404 100644
--- a/feature_generation.rb
+++ b/feature_generation.rb
@@ -6,7 +6,7 @@ algorithm = OpenTox::Algorithm::Generic.new(url_for('/pcdesc',:full))
   algorithm.metadata = {
     DC.title => 'Physico-chemical (PC) descriptor calculation',
     DC.creator => "andreas@maunz.de, vorgrimmlerdavid@gmx.de",
-    RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised],
+    RDF.type => [OT.Algorithm,OTA.DescriptorCalculation],
     OT.parameters => [
       { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" },
       { DC.description => "PC type", OT.paramScope => "mandatory", DC.title => "pc_type" },
diff --git a/pc.rb b/pc.rb
new file mode 100644
index 0000000..f297014
--- /dev/null
+++ b/pc.rb
@@ -0,0 +1,67 @@
+# pc.rb
+# (P)hysico (C)hemical descriptor calculation
+# Author: Andreas Maunz
+
+
+# Get a list of OpenBabel algorithms
+# @return [text/uri-list] URIs of OpenBabel algorithms
+get '/pc' do
+  algorithms = YAML::load_file File.join(ENV['HOME'], ".opentox", "config", "pc_descriptors.yaml")
+  response['Content-Type'] = 'text/uri-list'
+  list = (algorithms.keys << "AllDescriptors").join("\n") + "\n"
+  case request.env['HTTP_ACCEPT']
+  when /text\/html/
+    content_type "text/html"
+    OpenTox.text_to_html list
+  else
+    content_type 'text/uri-list'
+    list
+  end
+end
+
+# Get RDF/XML representation of OpenBabel algorithm
+# @return [application/rdf+xml] OWL-DL representation of OpenBabel algorithm
+get '/pc/:descriptor' do
+  descriptors = YAML::load_file File.join(ENV['HOME'], ".opentox", "config", "pc_descriptors.yaml")
+  alg_params = [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" } ]
+      
+  if params[:descriptor] != "AllDescriptors"
+    descriptors = descriptors[params[:descriptor]]
+  else
+    alg_params << { DC.description => "Descriptor Category, one or more of '#{descriptors.collect { |id, info| info[:category] }.uniq.sort.join(",")}'", OT.paramScope => "optional", DC.title => "category" }
+    alg_params << { DC.description => "Software Library, one or more of '#{descriptors.collect { |id, info| info[:lib] }.uniq.sort.join(",")}'", OT.paramScope => "optional", DC.title => "lib" }
+    descriptors = {:id => "AllDescriptors", :name => "All PC descriptors" }
+  end
+
+  if descriptors 
+
+    # Contents
+    algorithm = OpenTox::Algorithm::Generic.new(url_for("/pc/#{params[:descriptor]}",:full))
+    algorithm.metadata = {
+      DC.title => params[:descriptor],
+      DC.creator => "andreas@maunz.de",
+      DC.description => descriptors[:name],
+      RDF.type => [OTA.DescriptorCalculation],
+    }
+    algorithm.metadata[OT.parameters] = alg_params
+    algorithm.metadata[DC.description] << (", category: " + descriptors[:category]) unless descriptors[:id] == "AllDescriptors"
+    algorithm.metadata[DC.description] << (", lib: " + descriptors[:lib]) unless descriptors[:id] == "AllDescriptors"
+
+    # Deliver
+    case request.env['HTTP_ACCEPT']
+    when /text\/html/
+      content_type "text/html"
+      OpenTox.text_to_html algorithm.to_yaml
+    when /yaml/
+      content_type "application/x-yaml"
+      algorithm.to_yaml
+    else
+      response['Content-Type'] = 'application/rdf+xml'  
+      algorithm.to_rdfxml
+    end
+
+  else
+    raise OpenTox::NotFoundError.new "Unknown descriptor #{params[:descriptor]}."
+  end
+end
+
-- 
cgit v1.2.3