Merge branch 'release/v3.2.0'v4.0.0

author: rautenberg <rautenberg@in-silico.ch> 2012-07-13 09:45:46 +0200
committer: rautenberg <rautenberg@in-silico.ch> 2012-07-13 09:45:46 +0200
commit: ca2903692658ca7badcda425153ed12eb19a2ced (patch)
tree: c921872907dd6ce0edaea7c6251c804bc11373f0
parent: 6e016d24bf6d0272d235c466e0dab3f196f0c0d4 (diff)
parent: 20ea5f9b62966eecb283033b04f6aea98b23d1f8 (diff)
17 files changed, 1017 insertions, 352 deletions
diff --git a/.yardopts b/.yardopts
new file mode 100644
index 0000000..1217a60
--- /dev/null
+++ b/.yardopts
@@ -0,0 +1 @@
+yardoc - README.markdown ChangeLog LICENSE
diff --git a/ChangeLog b/ChangeLog
index 343cf37..e15e64d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+v4.0.0 2012-07-12
+	* fminer addition of compounds fixed
+	* improved performance for CSV download
+	* switch to opentox-ruby version 4.0.0
+
+2012-04-20
+	* Support for joelib and openbabel descriptors in a completely unified interface with CDK (Ambit)
+	* Features can have multiple types (nominal and numeric), PC descriptors have detailed meta data
+	* Myriads of bugfixes to CSV download code (e.g. missing descriptors, handling of duplicates)
+
 v3.1.0 2012-02-24
 	* utils.rb: added for special routines (e.g. descriptor calculation)
 	* task.rb: Polling with increasing interval
diff --git a/README.markdown b/README.markdown
index 79bdab2..d69b28f 100644
--- a/README.markdown
+++ b/README.markdown
@@ -38,4 +38,4 @@ This example shows how to create a lazar model and predict a compound, it assume
 Copyright
 ---------
 
-Copyright (c) 2009-2011 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details.
+Copyright (c) 2009-2012 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details.
diff --git a/Rakefile b/Rakefile
index dddea1b..41dfcd8 100644
--- a/Rakefile
+++ b/Rakefile
@@ -42,9 +42,10 @@ begin
     gem.add_dependency "dm-migrations",  "=1.1.0"
     gem.add_dependency "dm-validations",  "=1.1.0"
     gem.add_dependency "dm-sqlite-adapter", "=1.1.0"
-    gem.add_dependency "ruby-plot", "=0.6.0"
+    gem.add_dependency "ruby-plot", "=0.6.1"
     gem.add_dependency "gsl", "=1.14.7"
     gem.add_dependency "statsample", "=1.1.0"
+    gem.add_dependency "redis", "=2.2.2"
 
     gem.add_development_dependency 'jeweler'
     gem.files =  FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore']
diff --git a/VERSION b/VERSION
index a0cd9f0..fcdb2e1 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-3.1.0
-\ No newline at end of file
+4.0.0
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index c026c56..78fc447 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -56,25 +56,73 @@ module OpenTox
 
       def check_params(params,per_mil,subjectid=nil)
         raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] and  !params[:dataset_uri].nil?
-        raise OpenTox::NotFoundError.new "Please submit a prediction_feature." unless params[:prediction_feature] and  !params[:prediction_feature].nil?
-        @prediction_feature = OpenTox::Feature.find params[:prediction_feature], subjectid
         @training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", subjectid
+
+        unless params[:prediction_feature] # try to read prediction_feature from dataset
+          raise OpenTox::NotFoundError.new "Please provide a prediction_feature parameter" unless @training_dataset.features.size == 1
+          prediction_feature = OpenTox::Feature.find(@training_dataset.features.keys.first,@subjectid)
+          params[:prediction_feature] = prediction_feature.uri
+        end
+        @prediction_feature = OpenTox::Feature.find params[:prediction_feature], subjectid
+
         raise OpenTox::NotFoundError.new "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless @training_dataset.features and @training_dataset.features.include?(params[:prediction_feature])
 
         unless params[:min_frequency].nil? 
-          @minfreq=params[:min_frequency].to_i
-          raise "Minimum frequency must be a number >0!" unless @minfreq>0
-        else
-          @minfreq=OpenTox::Algorithm.min_frequency(@training_dataset,per_mil) # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST
+          # check for percentage
+          if params[:min_frequency].include? "pc"
+            per_mil=params[:min_frequency].gsub(/pc/,"")
+            if OpenTox::Algorithm.numeric? per_mil
+              per_mil = per_mil.to_i * 10
+            else
+              bad_request=true
+            end
+          # check for per-mil
+          elsif params[:min_frequency].include? "pm"
+            per_mil=params[:min_frequency].gsub(/pm/,"")
+            if OpenTox::Algorithm.numeric? per_mil
+              per_mil = per_mil.to_i
+            else
+              bad_request=true
+            end
+          # set minfreq directly
+          else
+            if OpenTox::Algorithm.numeric? params[:min_frequency]
+              @minfreq=params[:min_frequency].to_i
+              LOGGER.debug "min_frequency #{@minfreq}"
+            else
+              bad_request=true
+            end
+          end
+          raise OpenTox::BadRequestError.new "Minimum frequency must be integer [n], or a percentage [n]pc, or a per-mil [n]pm , with n greater 0" if bad_request
+        end
+        if @minfreq.nil?
+          @minfreq=OpenTox::Algorithm.min_frequency(@training_dataset,per_mil)
+          LOGGER.debug "min_frequency #{@minfreq} (input was #{per_mil} per-mil)"
         end
       end
 
-      def add_fminer_data(fminer_instance, params, value_map)
+      def add_fminer_data(fminer_instance, value_map)
+
+
+        # detect nr duplicates per compound
+        compound_sizes = {}
+        @training_dataset.compounds.each do |compound|
+          entries=@training_dataset.data_entries[compound]
+          entries.each do |feature, values|
+            compound_sizes[compound] || compound_sizes[compound] = []
+            compound_sizes[compound] << values.size unless values.size == 0
+          end
+          compound_sizes[compound].uniq!
+          raise "Inappropriate data for fminer" if compound_sizes[compound].size > 1
+          compound_sizes[compound] = compound_sizes[compound][0] # integer instead of array
+        end
 
         id = 1 # fminer start id is not 0
-        @training_dataset.data_entries.each do |compound,entry|
+
+        @training_dataset.compounds.each do |compound|
+          entry=@training_dataset.data_entries[compound]
           begin
-            smiles = OpenTox::Compound.smiles(compound.to_s)
+            smiles = OpenTox::Compound.new(compound).to_smiles
           rescue
             LOGGER.warn "No resource for #{compound.to_s}"
             next
@@ -84,32 +132,31 @@ module OpenTox
             next
           end
 
-          value_map=params[:value_map] unless params[:value_map].nil?
           entry.each do |feature,values|
             if feature == @prediction_feature.uri
-              values.each do |value|
-                if value.nil? 
+              (0...compound_sizes[compound]).each { |i|
+                if values[i].nil? 
                   LOGGER.warn "No #{feature} activity for #{compound.to_s}."
                 else
                   if @prediction_feature.feature_type == "classification"
-                    activity= value_map.invert[value.to_s].to_i # activities are mapped to 1..n
+                    activity= value_map.invert[values[i]].to_i # activities are mapped to 1..n
                     @db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect
                   elsif @prediction_feature.feature_type == "regression"
-                    activity= value.to_f 
+                    activity= values[i].to_f 
                   end
                   begin
-                    fminer_instance.AddCompound(smiles,id)
-                    fminer_instance.AddActivity(activity, id)
+                    fminer_instance.AddCompound(smiles,id) if fminer_instance
+                    fminer_instance.AddActivity(activity, id) if fminer_instance 
                     @all_activities[id]=activity # DV: insert global information
                     @compounds[id] = compound
                     @smi[id] = smiles
                     id += 1
                   rescue Exception => e
-                    LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer"
+                    LOGGER.warn "Could not add " + smiles + "\t" + values[i].to_s + " to fminer"
                     LOGGER.warn e.backtrace
                   end
                 end
-              end
+              }
             end
           end
         end
@@ -380,11 +427,11 @@ module OpenTox
           prediction = acts[0]
         else
           #LOGGER.debug gram_matrix.to_yaml
-          @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
-          @r.eval "set.seed(1)"
+          @r = RinRuby.new(true,false) # global R instance leads to Socket errors after a large number of requests
           @r.eval "suppressPackageStartupMessages(library('caret'))" # requires R packages "caret" and "kernlab"
           @r.eval "suppressPackageStartupMessages(library('doMC'))" # requires R packages "multicore"
           @r.eval "registerDoMC()" # switch on parallel processing
+          @r.eval "set.seed(1)"
           begin
 
             # set data
@@ -400,7 +447,14 @@ module OpenTox
 
             # prepare data
             LOGGER.debug "Preparing R data ..."
-            @r.eval "if (class(y) == 'character') { y = factor(y); suppressPackageStartupMessages(library('class')) }" # For classification
+            @r.eval <<-EOR
+              weights=NULL
+              if (class(y) == 'character') { 
+                y = factor(y)
+                suppressPackageStartupMessages(library('class')) 
+                #weights=unlist(as.list(prop.table(table(y))))
+              }
+            EOR
 
             @r.eval <<-EOR
               rem = nearZeroVar(prop_matrix)
@@ -417,8 +471,18 @@ module OpenTox
 
             # model + support vectors
             LOGGER.debug "Creating R SVM model ..."
-            @r.eval <<-EOR
-              model = train(prop_matrix,y,method="svmradial",tuneLength=8,trControl=trainControl(method="LGOCV",number=10),preProcess=c("center", "scale"))
+            train_success = @r.eval <<-EOR
+              # AM: TODO: evaluate class weight effect by altering:
+              # AM: comment in 'weights' above run and class.weights=weights vs. class.weights=1-weights
+              # AM: vs
+              # AM: comment out 'weights' above (status quo), thereby disabling weights
+              model = train(prop_matrix,y,
+                             method="svmradial",
+                             preProcess=c("center", "scale"),
+                             class.weights=weights,
+                             trControl=trainControl(method="LGOCV",number=10),
+                             tuneLength=8
+                           )
               perf = ifelse ( class(y)!='numeric', max(model$results$Accuracy), model$results[which.min(model$results$RMSE),]$Rsquared )
             EOR
 
@@ -431,6 +495,7 @@ module OpenTox
 
             # censoring
             prediction = nil if ( @r.perf.nan? || @r.perf < min_train_performance )
+            prediction = nil unless train_success
             LOGGER.debug "Performance: #{sprintf("%.2f", @r.perf)}"
           rescue Exception => e
             LOGGER.debug "#{e.class}: #{e.message}"
@@ -456,30 +521,42 @@ module OpenTox
         @r.del_missing = params[:del_missing] == true ? 1 : 0
         r_result_file = params[:fds_csv_file].sub("rfe_", "rfe_R_")
         @r.f_fds_r = r_result_file.to_s
-        
+
         # need packs 'randomForest', 'RANN'
         @r.eval <<-EOR
-          set.seed(1)
           suppressPackageStartupMessages(library('caret'))
           suppressPackageStartupMessages(library('randomForest'))
           suppressPackageStartupMessages(library('RANN'))
           suppressPackageStartupMessages(library('doMC'))
           registerDoMC()
-          
+          set.seed(1)
+
           acts = read.csv(ds_csv_file, check.names=F)
           feats = read.csv(fds_csv_file, check.names=F)
           ds = merge(acts, feats, by="SMILES") # duplicates features for duplicate SMILES :-)
-          
+
           features = ds[,(dim(acts)[2]+1):(dim(ds)[2])]
           y = ds[,which(names(ds) == prediction_feature)] 
-          
+
           # assumes a data matrix 'features' and a vector 'y' of target values
           row.names(features)=NULL
-          
+
+          # features with all values missing removed
+          na_col = names ( which ( apply ( features, 2, function(x) all ( is.na ( x ) ) ) ) )
+          features = features[,!names(features) %in% na_col]
+
+          # features with infinite values removed
+          inf_col = names ( which ( apply ( features, 2, function(x) any ( is.infinite ( x ) ) ) ) )
+          features = features[,!names(features) %in% inf_col]
+
+          # features with zero variance removed
+          zero_var = names ( which ( apply ( features, 2, function(x) var(x, na.rm=T) ) == 0 ) )
+          features = features[,!names(features) %in% zero_var]
+
           pp = NULL
           if (del_missing) {
             # needed if rows should be removed
-            na_ids = apply(features,1,function(x)any(is.na(x)))
+            na_ids = apply ( features,1,function(x) any ( is.na ( x ) ) )
             features = features[!na_ids,]
             y = y[!na_ids]
             pp = preProcess(features, method=c("scale", "center"))
@@ -488,17 +565,23 @@ module OpenTox
             pp = preProcess(features, method=c("scale", "center", "knnImpute"))
           }
           features = predict(pp, features)
-          
+
+          # features with nan values removed (sometimes preProcess return NaN values)
+          nan_col = names ( which ( apply ( features, 2, function(x) any ( is.nan ( x ) ) ) ) )
+          features = features[,!names(features) %in% nan_col]
+
           # determine subsets
-          subsets = dim(features)[2]*c(0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7)
-          subsets = c(2,3,4,5,7,10,subsets)
+          subsets = dim(features)[2]*c(0.3, 0.32, 0.34, 0.36, 0.38, 0.4, 0.42, 0.44, 0.46, 0.48, 0.5, 0.52, 0.54, 0.56, 0.58, 0.6, 0.62, 0.64, 0.66, 0.68, 0.7)
+          #subsets = dim(features)[2]*c(0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7)
+          #subsets = c(2,3,4,5,7,10,subsets)
+          #subsets = c(2,3,4,5,7,10,13,16,19,22,25,28,30)
           subsets = unique(sort(round(subsets))) 
           subsets = subsets[subsets<=dim(features)[2]]
           subsets = subsets[subsets>1] 
-          
+
           # Recursive feature elimination
-          rfProfile = rfe( x=features, y=y, rfeControl=rfeControl(functions=rfFuncs, number=50), sizes=subsets)
-          
+          rfProfile = rfe( x=features, y=y, rfeControl=rfeControl(functions=rfFuncs, number=150), sizes=subsets)
+
           # read existing dataset and select most useful features
           csv=feats[,c("SMILES", rfProfile$optVariables)]
           write.csv(x=csv,file=f_fds_r, row.names=F, quote=F, na='')
@@ -527,7 +610,7 @@ module OpenTox
       # @param [Hash] required keys: compound, features, feature_dataset_uri, pc_type
       # @return [Hash] Hash with matching Smarts and number of hits 
       def self.lookup(params)
-        params[:compound].lookup(params[:features], params[:feature_dataset_uri],params[:pc_type],params[:subjectid])
+        params[:compound].lookup(params[:features], params[:feature_dataset_uri], params[:pc_type], params[:lib], params[:subjectid])
       end  
     end
 
@@ -539,3 +622,26 @@ module OpenTox
     end
   end
 end
+
+class Array
+  # collect method extended for parallel processing.
+  # Note: assign return value as: ans = arr.pcollect(n) { |obj| ... }
+  # @param n the number of processes to spawn (default: unlimited)
+  def pcollect(n = nil)
+    nproc = 0
+    result = collect do |*a|
+      r, w = IO.pipe
+      fork do
+        r.close
+        w.write( Marshal.dump( yield(*a) ) )
+      end
+      if n and (nproc+=1) >= n
+        Process.wait ; nproc -= 1
+      end
+      [ w.close, r ].last
+    end
+    Process.waitall
+    result.collect{|r| Marshal.load [ r.read, r.close ].first}
+  end
+end
+
diff --git a/lib/compound.rb b/lib/compound.rb
index 16d266c..e493278 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -3,6 +3,7 @@
 
 module OpenTox
 
+  require "rexml/document"
   # Ruby wrapper for OpenTox Compound Webservices (http://opentox.org/dev/apis/api-1.2/structure).
 	class Compound 
 
@@ -134,6 +135,47 @@ module OpenTox
         "not available"
       end
 		end
+    
+    
+    # Get all known compound names sorted by classification. Relies on an external service for name lookups.
+    # @example
+    #   names = compound.to_names_hash
+    # @return [Hash] Classification => Name Array
+		def to_names_hash
+      begin
+        xml = RestClientWrapper.get("#{@@cactus_uri}#{@inchi}/names/xml")
+        xmldoc = REXML::Document.new(xml)
+        data = {}
+        
+        xmldoc.root.elements[1].elements.each{|e|
+          if data.has_key?(e.attribute("classification").value) == false
+             data[e.attribute("classification").value] = [e.text]
+          else
+             data[e.attribute("classification").value].push(e.text)
+          end
+        }
+        data
+      rescue
+        "not available"
+      end
+		end
+
+    # Get all known compound names sorted by classification. Relies on an external service for name lookups.
+    # @example
+    #   names = compound.to_names_hash
+    # @return [Hash] Classification => Name Array
+    def to_ambit_names_hash
+      begin
+        ds = OpenTox::Dataset.new
+        ds.save
+        ds.load_rdfxml(RestClientWrapper.get("http://apps.ideaconsult.net:8080/ambit2/query/compound/search/names?type=smiles&property=&search=#{@inchi}"))
+        ds.save
+        ds.uri
+      rescue
+        "not available"
+      end
+    end
+
 
 		# Match a smarts string
     # @example
@@ -201,25 +243,28 @@ module OpenTox
     # Lookup numerical values, returns hash with feature name as key and value as value 
     # @param [Array] Array of feature names
     # @param [String] Feature dataset uri
+    # @param [String] Comma separated pc types
+    # @param [String] Comma separated lib
     # @return [Hash] Hash with feature name as key and value as value
-    def lookup(feature_array,feature_dataset_uri,pc_type,subjectid=nil)
+		def lookup(feature_array,feature_dataset_uri,pc_type,lib,subjectid=nil)
       ds = OpenTox::Dataset.find(feature_dataset_uri,subjectid)
       #entry = ds.data_entries[self.uri]
       entry = nil 
-      ds.data_entries.each { |c_uri, values| 
-        if c_uri.split('/compound/').last == self.to_inchi
+      ds.data_entries.each { |c_uri, values|
+        compound = OpenTox::Compound.new(c_uri)
+        if compound.to_inchi == self.to_inchi # Compare compounds by InChI
           entry = ds.data_entries[c_uri]
           break
         end
       }
       LOGGER.debug "#{entry.size} entries in feature ds for query." unless entry.nil?
-
       if entry.nil?
-        uri, smiles_to_inchi = OpenTox::Algorithm.get_pc_descriptors({:compounds => [self.uri], :pc_type => pc_type})
-        uri = OpenTox::Algorithm.load_ds_csv(uri, smiles_to_inchi, subjectid)
-        ds = OpenTox::Dataset.find(uri,subjectid)
+        temp_ds = OpenTox::Dataset.create(CONFIG[:services]["opentox-dataset"],subjectid); temp_ds.add_compound(self.uri); temp_uri = temp_ds.save(subjectid)
+        uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-algorithm"], "/pc/AllDescriptors"), {:dataset_uri => temp_uri, :pc_type => pc_type, :lib => lib, :subjectid => subjectid})
+        ds = OpenTox::Dataset.find(uri, subjectid)
         entry = ds.data_entries[self.uri]
         ds.delete(subjectid)
+        temp_ds.delete(subjectid)
       end
       features = entry.keys
       features.each { |feature| 
@@ -228,7 +273,6 @@ module OpenTox
         entry.delete(feature) unless feature == new_feature # e.g. when loading from ambit
       }
       #res = feature_array.collect {|v| entry[v]}
-      #LOGGER.debug "----- am #{entry.to_yaml}"
       entry
 		end
 
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 95c1918..c916722 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -197,7 +197,12 @@ module OpenTox
       accept_values
     end
 
-    # Detect feature type(s) in the dataset
+    # Detect feature type (reduced to one across all features)
+    # Classification takes precedence over regression
+    # DEPRECATED -- 
+    #   HAS NO SENSE FOR DATASETS WITH MORE THAN 1 FEATURE
+    #   FEATURES CAN HAVE MULTIPLE TYPES
+    # Replacement: see feature_types()
     # @return [String] `classification", "regression", "mixed" or unknown`
     def feature_type(subjectid=nil)
       load_features(subjectid)
@@ -210,6 +215,24 @@ module OpenTox
         "unknown"
       end
     end
+
+
+    # Detect feature types. A feature can have multiple types.
+    # Returns types hashed by feature URI, with missing features omitted.
+    # Example (YAML):
+    #   http://toxcreate3.in-silico.ch:8082/dataset/152/feature/nHal: 
+    #   - http://www.opentox.org/api/1.1#NumericFeature
+    #   - http://www.opentox.org/api/1.1#NominalFeature
+    #   ...
+    #
+    # @return [Hash] Keys: feature URIs, Values: Array of types
+    def feature_types(subjectid=nil)
+      load_features(subjectid)
+        @features.inject({}){ |h,(f,metadata)| 
+          h[f]=metadata[RDF.type] unless metadata[RDF.type][0].include? "MissingFeature" 
+          h
+        }
+    end
 =begin
 =end
 
@@ -316,11 +339,14 @@ module OpenTox
     end
 
     # Complete feature values by adding zeroes
-    def complete_data_entries
+    # @param [Hash] key: compound, value: duplicate sizes
+    def complete_data_entries(compound_sizes)
       all_features = @features.keys
       @data_entries.each { |c, e|
         (Set.new(all_features.collect)).subtract(Set.new e.keys).to_a.each { |f|
-          self.add(c,f,0)
+          compound_sizes[c].times { 
+            self.add(c,f,0) 
+          }
         }
       }
     end
@@ -454,6 +480,14 @@ module OpenTox
       end
     end
 
+    def value_map(prediction_feature_uri)
+      training_classes = accept_values(prediction_feature_uri).sort
+      value_map=Hash.new 
+      training_classes.each_with_index { |c,i| value_map[i+1] = c }
+      value_map
+    end
+
+
     private
     # Copy a dataset (rewrites URI)
     def copy(dataset)
@@ -504,6 +538,7 @@ module OpenTox
       @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact if @data_entries[compound.uri]
     end
 
+
 #    def errors(compound)
 #      features = @data_entries[compound.uri].keys
 #      features.collect{|f| @features[f][OT.error]}.join(" ") if features
diff --git a/lib/model.rb b/lib/model.rb
index a858a0f..c9d367e 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -103,7 +103,7 @@ module OpenTox
       include Model
 
 
-      attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :subjectid, :value_map, :compound_fingerprints, :feature_calculation_algorithm, :neighbors
+      attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :subjectid, :value_map, :compound_fingerprints, :feature_calculation_algorithm, :neighbors, :compounds
       def initialize(uri=nil)
 
         if uri
@@ -169,12 +169,13 @@ module OpenTox
         lazar.prediction_algorithm = hash["prediction_algorithm"] if hash["prediction_algorithm"]
         lazar.subjectid = hash["subjectid"] if hash["subjectid"]
         lazar.value_map = hash["value_map"] if hash["value_map"]
+        lazar.compounds = hash["compounds"] if hash["compounds"]
 
         lazar
       end
 
       def to_json
-        Yajl::Encoder.encode({:uri => @uri,:metadata => @metadata, :compound => @compound, :prediction_dataset => @prediction_dataset, :features => @features, :effects => @effects, :activities => @activities, :p_values => @p_values, :fingerprints => @fingerprints, :feature_calculation_algorithm => @feature_calculation_algorithm, :similarity_algorithm => @similarity_algorithm, :prediction_algorithm => @prediction_algorithm, :subjectid => @subjectid, :value_map => @value_map})
+        Yajl::Encoder.encode({:uri => @uri,:metadata => @metadata, :compound => @compound, :prediction_dataset => @prediction_dataset, :features => @features, :effects => @effects, :activities => @activities, :p_values => @p_values, :fingerprints => @fingerprints, :feature_calculation_algorithm => @feature_calculation_algorithm, :similarity_algorithm => @similarity_algorithm, :prediction_algorithm => @prediction_algorithm, :subjectid => @subjectid, :value_map => @value_map, :compounds => @compounds})
       end
 
       def run( params, accept_header=nil, waiting_task=nil )
@@ -237,6 +238,7 @@ module OpenTox
 
         @compound = Compound.new compound_uri
         features = {}
+
         #LOGGER.debug self.to_yaml
         unless @prediction_dataset
           @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
@@ -247,19 +249,19 @@ module OpenTox
             OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
           } )
         end
-        if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "regression"
-          all_activities = [] 
-          all_activities = @activities.values.flatten.collect! { |i| i.to_f }
-        end
+
         unless database_activity(subjectid) # adds database activity to @prediction_dataset
+
           # Calculation of needed values for query compound
           @compound_features = eval("#{@feature_calculation_algorithm}({
                                     :compound => @compound, 
                                     :features => @features, 
                                     :feature_dataset_uri => @metadata[OT.featureDataset],
                                     :pc_type => self.parameter(\"pc_type\"),
+                                    :lib => self.parameter(\"lib\"),
                                     :subjectid => subjectid
                                     })")
+          
           # Adding fingerprint of query compound with features and values(p_value*nr_hits)
           @compound_fingerprints = {}
           @compound_features.each do |feature, value| # value is nil if "Substructure.match"
@@ -314,6 +316,16 @@ module OpenTox
                 @prediction_dataset.add @compound.uri, feature_uri, true
                 f+=1
               end
+            elsif @feature_calculation_algorithm == "Substructure.lookup"
+              f = 0
+              @compound_features.each do |feature, value|
+                features[feature] = feature
+                @prediction_dataset.add_feature(feature, {
+                  RDF.type => [OT.NumericFeature]
+                })
+                @prediction_dataset.add @compound.uri, feature, value
+                f+=1
+              end
             else
               @compound_features.each do |feature|
                 features[feature] = feature
@@ -337,15 +349,26 @@ module OpenTox
                 else
                   feature_uri = feature
                 end
-                @prediction_dataset.add neighbor[:compound], feature_uri, true
+                if @feature_calculation_algorithm == "Substructure.lookup"
+                  @prediction_dataset.add neighbor[:compound], feature_uri, @fingerprints[neighbor[:compound]][feature_uri]
+                else
+                  @prediction_dataset.add neighbor[:compound], feature_uri, true
+                end
+
                 unless features.has_key? feature
                   features[feature] = feature_uri
-                  @prediction_dataset.add_feature(feature_uri, {
-                    RDF.type => [OT.Substructure],
-                    OT.smarts => feature,
-                    OT.pValue => @p_values[feature],
-                    OT.effect => @effects[feature]
-                  })
+                  if @feature_calculation_algorithm == "Substructure.lookup"
+                    @prediction_dataset.add_feature(feature_uri, {
+                      RDF.type => [OT.NumericFeature]
+                    })
+                  else
+                    @prediction_dataset.add_feature(feature_uri, {
+                      RDF.type => [OT.Substructure],
+                      OT.smarts => feature,
+                      OT.pValue => @p_values[feature],
+                      OT.effect => @effects[feature]
+                    })
+                  end
                   f+=1
                 end
               end
diff --git a/lib/parser.rb b/lib/parser.rb
index 56e4fed..257d250 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -349,11 +349,15 @@ module OpenTox
 
       # Load CSV string (format specification: http://toxcreate.org/help)
       # @param [String] csv CSV representation of the dataset
+      # @param [Boolean] drop_missing Whether completely missing rows should be droppped
+      # @param [Boolean] all_numeric Whether all features should be treated as numeric
+      # @param [Boolean] del_nominal All nominal features will be removed
       # @return [OpenTox::Dataset] Dataset object with CSV data
-      def load_csv(csv, drop_missing=false)
+      def load_csv(csv, drop_missing=false, all_numeric=false)
         row = 0
         input = csv.split("\n")
         headers = split_row(input.shift)
+        headers.collect! {|header| header.to_s.gsub(/[\/.\\\(\)\{\}\[\]]/,"_")}
         add_features(headers)
         value_maps = Array.new
         regression_features=Array.new
@@ -362,7 +366,7 @@ module OpenTox
           row = split_row(row)
           value_maps = detect_new_values(row, value_maps)
           value_maps.each_with_index { |vm,j|
-            if vm.size > @max_class_values # max @max_class_values classes.
+            if (vm.size > @max_class_values) || all_numeric # max @max_class_values classes.
               regression_features[j]=true 
             else
               regression_features[j]=false
@@ -392,22 +396,30 @@ module OpenTox
 
       def warnings
 
-        info = ''
+        info = '<br>'
         @feature_types.each do |feature,types|
+          @dataset.add_feature_metadata(feature,{RDF.type => []})
           if types.uniq.size == 0
-            type = "helper#MissingFeature"
-          elsif types.uniq.size > 1
-            type = OT.NumericFeature
+            @dataset.add_feature_metadata(
+              feature, {RDF.type => ( @dataset.features[feature][RDF.type] << "helper#MissingFeature" ) } # TODO: Fit to OT ontology!
+            )
+            info += "'#{@dataset.feature_name(feature)}' detected as 'MissingFeature'<br>"
           else
-            type = types.first
+            info += "'#{@dataset.feature_name(feature)}' detected as "
+            types_arr = []
+            types.uniq.each { |t|
+              types_arr << t
+              info += "'#{t.split('#').last}', "
+            }
+
+            @dataset.add_feature_metadata(
+              feature, {RDF.type => types_arr.sort} # nominal should be first for downward compatibility
+            )
+
+            info.chop!.chop!
+            info += "<br>"
           end
-          @dataset.add_feature_metadata(feature,{RDF.type => [type]})
-          info += "\"#{@dataset.feature_name(feature)}\" detected as #{type.split('#').last}." if type
-
-          # TODO: rewrite feature values
-          # TODO if value.to_f == 0 @activity_errors << "#{id} Zero values not allowed for regression datasets - entry ignored."
         end
-
         @dataset.metadata[OT.Info] = info 
 
         warnings = ''
@@ -469,28 +481,31 @@ module OpenTox
           unless @duplicate_feature_indices.include? i
 
             value = row[i]
-            #LOGGER.warn "Missing values for #{id}" if value.size == 0 # String is empty
             feature = @features[feature_idx]
   
             type = feature_type(value) # May be NIL
-            type = OT.NominalFeature unless (type.nil? || regression_features[i])
             @feature_types[feature] << type if type
+            # Add nominal type if #distinct values le @max_class_values
+            if type == OT.NumericFeature
+              @feature_types[feature] << OT.NominalFeature unless regression_features[i]
+            end
   
             val = nil
             case type
             when OT.NumericFeature
               val = value.to_f
+              val = nil if val.infinite?
             when OT.NominalFeature
               val = value.to_s
             end
 
             feature_idx += 1
   
-            if val != nil
+            if val != nil 
               @dataset.add(compound.uri, feature, val)
-              if type != OT.NumericFeature
+              if @feature_types[feature].include? OT.NominalFeature
                 @dataset.features[feature][OT.acceptValue] = [] unless @dataset.features[feature][OT.acceptValue]
-                @dataset.features[feature][OT.acceptValue] << val.to_s unless @dataset.features[feature][OT.acceptValue].include?(val.to_s)
+                @dataset.features[feature][OT.acceptValue] << val unless @dataset.features[feature][OT.acceptValue].include?(val)
               end
             end
 
@@ -654,7 +669,7 @@ module OpenTox
           obmol.get_data.each { |d| row[d.get_attribute] = d.get_value if properties.include?(d.get_attribute) }
           table.data[compound.uri] = row
         end
-        
+
         # find and remove ignored_features
         @activity_errors = table.clean_features
         table.add_to_dataset @dataset
diff --git a/lib/r-util.rb b/lib/r-util.rb
index 7163c46..cc70696 100644
--- a/lib/r-util.rb
+++ b/lib/r-util.rb
@@ -8,6 +8,18 @@ PACKAGE_DIR = package_dir
 
 require "tempfile"
 
+class Array
+  
+  def check_uniq
+    hash = {}
+    self.each do |x|
+      raise "duplicate #{x}" if hash[x]
+      hash[x] = true
+    end
+  end
+  
+end
+
 module OpenTox
   
   class RUtil
@@ -75,12 +87,10 @@ module OpenTox
     end
 
     # embedds feature values of two datasets into 2D and plots it
-    # fast_plot = true -> PCA, fast_plot = false -> SMACOF (iterative optimisation method) 
     #        
     def feature_value_plot(files, dataset_uri1, dataset_uri2, dataset_name1, dataset_name2,
-        features=nil, fast_plot=true, subjectid=nil, waiting_task=nil)
+        features=nil, subjectid=nil, waiting_task=nil)
         
-      raise "r-package smacof missing" if fast_plot==false and !package_installed?("smacof")
       LOGGER.debug("r-util> create feature value plot")
       d1 = OpenTox::Dataset.find(dataset_uri1,subjectid)
       d2 = OpenTox::Dataset.find(dataset_uri2,subjectid)
@@ -102,17 +112,13 @@ module OpenTox
       @r.eval "split <- c(rep(0,nrow(#{df1})),rep(1,nrow(#{df2})))"
       @r.names = [dataset_name1, dataset_name2]
       LOGGER.debug("r-util> - convert data to 2d")
-      @r.eval "df.2d <- plot_pre_process(df, method='#{(fast_plot ? "pca" : "smacof")}')"
+      #@r.eval "save.image(\"/tmp/image.R\")"
+      @r.eval "df.2d <- plot_pre_process(df, method='sammon')"
       waiting_task.progress(75) if waiting_task
       
-      if fast_plot
-        info = "main='PCA-Embedding of #{features.size} features',xlab='PC1',ylab='PC2'"
-      else
-        info = "main='SMACOF-Embedding of #{features.size} features',xlab='x',ylab='y'"
-      end
       LOGGER.debug("r-util> - plot data")
       plot_to_files(files) do |file|
-        @r.eval "plot_split( df.2d, split, names, #{info})"
+        @r.eval "plot_split( df.2d, split, names, main='Sammon embedding of #{features.size} features',xlab='x',ylab='y')"
       end
     end
     
@@ -170,19 +176,68 @@ module OpenTox
       end
     end
     
-    # stratified splits a dataset into two dataset the feature values
+    # stratified splits a dataset into two dataset according to the feature values
+    # all features are taken into account unless <split_features> is given
+    # returns two datases
+    def stratified_split( dataset, metadata={}, missing_values="NA", pct=0.3, subjectid=nil, seed=42, split_features=nil )
+      stratified_split_internal( dataset, metadata, missing_values, nil, pct, subjectid, seed, split_features )
+    end
+    
+    # stratified splits a dataset into k datasets according the feature values
     # all features are taken into account unless <split_features> is given
-    def stratified_split( dataset, missing_values="NA", pct=0.3, subjectid=nil, seed=42, split_features=nil )
+    # returns two arrays of datasets
+    def stratified_k_fold_split( dataset, metadata={}, missing_values="NA", num_folds=10, subjectid=nil, seed=42, split_features=nil )
+      stratified_split_internal( dataset, metadata, missing_values, num_folds, nil, subjectid, seed, split_features )
+    end    
+    
+    private
+    def stratified_split_internal( dataset, metadata={}, missing_values="NA", num_folds=nil, pct=nil, subjectid=nil, seed=42, split_features=nil )
+      raise "internal error" if num_folds!=nil and pct!=nil
+      k_fold_split = num_folds!=nil
+      if k_fold_split
+        raise "num_folds not a fixnum: #{num_folds}" unless num_folds.is_a?(Fixnum)
+      else
+        raise "pct is not a numeric: #{pct}" unless pct.is_a?(Numeric)
+      end
       raise "not a loaded ot-dataset" unless dataset.is_a?(OpenTox::Dataset) and dataset.compounds.size>0 and dataset.features.size>0
+      raise "missing_values=#{missing_values}" unless missing_values.is_a?(String) or missing_values==0
+      raise "subjectid=#{subjectid}" unless subjectid==nil or subjectid.is_a?(String)          
       LOGGER.debug("r-util> apply stratified split to #{dataset.uri}")
       
-      df = dataset_to_dataframe( dataset, missing_values, subjectid, split_features )
+      df = dataset_to_dataframe( dataset, missing_values, subjectid)
       @r.eval "set.seed(#{seed})"
-      @r.eval "split <- stratified_split(#{df}, ratio=#{pct})"
-      split = @r.pull 'split'
-      split = split.collect{|s| 1-s.to_i} # reverse 1s and 0s, as 1 means selected, but 0 will be first set
-      split_to_datasets( df, split, subjectid )
+      str_split_features = ""
+      if split_features
+        @r.split_features = split_features if split_features
+        str_split_features = "colnames=split_features"
+      end
+      #@r.eval "save.image(\"/tmp/image.R\")"
+      
+      if k_fold_split
+        @r.eval "split <- stratified_k_fold_split(#{df}, num_folds=#{num_folds}, #{str_split_features})"
+        split = @r.pull 'split'
+        train = []
+        test = []
+        num_folds.times do |f|
+          datasetname = 'dataset fold '+(f+1).to_s+' of '+num_folds.to_s           
+          metadata[DC.title] = "training "+datasetname 
+          train << split_to_dataset( df, split, metadata, subjectid ){ |i| i!=(f+1) }
+          metadata[DC.title] = "test "+datasetname
+          test << split_to_dataset( df, split, metadata, subjectid ){ |i| i==(f+1) }
+        end
+        return train, test
+      else
+        puts "split <- stratified_split(#{df}, ratio=#{pct}, #{str_split_features})"
+        @r.eval "split <- stratified_split(#{df}, ratio=#{pct}, #{str_split_features})"
+        split = @r.pull 'split'
+        metadata[DC.title] = "Training dataset split of "+dataset.uri
+        train = split_to_dataset( df, split, metadata, subjectid ){ |i| i==1 }
+        metadata[DC.title] = "Test dataset split of "+dataset.uri
+        test = split_to_dataset( df, split, metadata, subjectid ){ |i| i==0 }
+        return train, test
+      end
     end
+    public
     
     # dataset should be loaded completely (use Dataset.find)
     # takes duplicates into account
@@ -212,9 +267,13 @@ module OpenTox
         features = dataset.features.keys.sort
       end
       compounds = []
+      compound_names = []
       dataset.compounds.each do |c|
+        count = 0
         num_compounds[c].times do |i|
           compounds << c
+          compound_names << "#{c}$#{count}"
+          count+=1
         end
       end
 
@@ -238,7 +297,7 @@ module OpenTox
         end
       end  
       df_name = "df_#{dataset.uri.split("/")[-1].split("?")[0]}"
-      assign_dataframe(df_name,d_values,compounds,features)
+      assign_dataframe(df_name,d_values,compound_names,features)
       
       # set dataframe column types accordingly
       f_count = 1 #R starts at 1
@@ -264,25 +323,27 @@ module OpenTox
     
     # converts a dataframe into a dataset (a new dataset is created at the dataset webservice)
     # this is only possible if a superset of the dataframe was created by dataset_to_dataframe (metadata and URIs!)
-    def dataframe_to_dataset( df, subjectid=nil )
-      dataframe_to_dataset_indices( df, subjectid, nil)
+    def dataframe_to_dataset( df, metadata={}, subjectid=nil )
+      dataframe_to_dataset_indices( df, metadata, subjectid, nil)
     end
     
     private
-    def dataframe_to_dataset_indices( df, subjectid=nil, compound_indices=nil )
+    def dataframe_to_dataset_indices( df, metadata={}, subjectid=nil, compound_indices=nil )
       raise unless @@feats[df].size>0
-      values, compounds, features = pull_dataframe(df)
+      values, compound_names, features = pull_dataframe(df)
+      compounds = compound_names.collect{|c| c.split("$")[0]}
       features.each{|f| raise unless @@feats[df][f]}
       dataset = OpenTox::Dataset.create(CONFIG[:services]["opentox-dataset"],subjectid)
+      dataset.add_metadata(metadata)
       LOGGER.debug "r-util> convert dataframe to dataset #{dataset.uri}"
       compounds.size.times{|i| dataset.add_compound(compounds[i]) if compound_indices==nil or compound_indices.include?(i)}
       features.each{|f| dataset.add_feature(f,@@feats[df][f])}
       features.size.times do |c|
         feat = OpenTox::Feature.find(features[c],subjectid)
-        nominal = feat.metadata[RDF.type].to_a.flatten.include?(OT.NominalFeature)
+        numeric = feat.metadata[RDF.type].to_a.flatten.include?(OT.NumericFeature)
         compounds.size.times do |r|
           if compound_indices==nil or compound_indices.include?(r)
-            dataset.add(compounds[r],features[c],nominal ? values[r][c] : values[r][c].to_f) if values[r][c]!="NA"
+            dataset.add(compounds[r],features[c],numeric ? values[r][c].to_f : values[r][c]) if values[r][c]!="NA"
           end 
         end
       end
@@ -290,16 +351,12 @@ module OpenTox
       dataset
     end    
     
-    def split_to_datasets( df, split, subjectid=nil )
-      sets = []
-      (split.min.to_i .. split.max.to_i).each do |i|
-        indices = []
-        split.size.times{|j| indices<<j if split[j]==i}
-        dataset = dataframe_to_dataset_indices( df, subjectid, indices )
-        LOGGER.debug("r-util> split into #{dataset.uri}, c:#{dataset.compounds.size}, f:#{dataset.features.size}")
-        sets << dataset
-      end
-      sets
+    def split_to_dataset( df, split, metadata={}, subjectid=nil )
+      indices = []
+      split.size.times{|i| indices<<i if yield(split[i]) }
+      dataset = dataframe_to_dataset_indices( df, metadata, subjectid, indices )
+      LOGGER.debug("r-util> split into #{dataset.uri}, c:#{dataset.compounds.size}, f:#{dataset.features.size}")
+      dataset
     end
     
     def pull_dataframe(df)
@@ -323,6 +380,8 @@ module OpenTox
     end
     
     def assign_dataframe(df,input,rownames,colnames)
+      rownames.check_uniq if rownames
+      colnames.check_uniq if colnames
       tmp = File.join(Dir.tmpdir,Time.new.to_f.to_s+"_"+rand(10000).to_s+".csv")
       file = File.new(tmp, 'w')
       input.each{|i| file.puts(i.collect{|e| "\"#{e}\""}.join("#")+"\n")}  
diff --git a/lib/serializer.rb b/lib/serializer.rb
index 30cb2ba..03ca285 100644
--- a/lib/serializer.rb
+++ b/lib/serializer.rb
@@ -459,32 +459,80 @@ module OpenTox
       def initialize(dataset)
         @rows = []
         @rows << ["SMILES"]
+
         features = dataset.features.keys
-        @rows.first << features
+
+        # prepare for subgraphs
+        have_substructures = features.collect{ |id| dataset.features[id][RDF.type].include? OT.Substructure}.compact.uniq
+        if have_substructures.size == 1 && have_substructures[0] 
+          features_smarts = features.collect{ |id| "'" + dataset.features[id][OT.smarts] + "'" }
+        end
+      
+        # gather missing features
+        delete_features = []
+        features.each{ |id|
+          dataset.features[id][RDF.type].each { |typestr|
+            if typestr.include? "MissingFeature"
+              delete_features << id 
+            end
+          }
+        }
+        features = features - delete_features
+
+        # detect nr duplicates per compound
+        compound_sizes = {}
+        dataset.compounds.each do |compound|
+          entries=dataset.data_entries[compound]
+          if entries
+            entries.each do |feature, values|
+              compound_sizes[compound] || compound_sizes[compound] = []
+              compound_sizes[compound] << values.size
+            end
+            compound_sizes[compound].uniq!
+            raise "Inappropriate data for CSV export for compound #{compound}" if compound_sizes[compound].size > 1
+            compound_sizes[compound] = compound_sizes[compound][0] # integer instead of array
+          end
+        end
+
+        # get headers
+        features_smarts && @rows.first << features_smarts || @rows.first << features
         @rows.first.flatten!
-        dataset.data_entries.each do |compound,entries|
-          cmpd = Compound.new(compound)
-          smiles = cmpd.to_smiles
-          inchi = URI.encode_www_form_component(cmpd.to_inchi)
-          row_container = Array.new
-          row = Array.new(@rows.first.size)
-          row_container << row
-          #row[0] = smiles
-          row[0] = inchi
-          entries.each do |feature, values|
-            i = features.index(feature)+1
-            values.each do |value|
-              if row_container[0][i]
-                #LOGGER.debug "Feature '#{feature}' (nr '#{i}'): '#{value}'"
-                row_container << row_container.last.collect
-                row_container.last[i] = value
-                #LOGGER.debug "RC: #{row_container.to_yaml}"
-              else
-                row_container.each { |r| r[i] = value }
-              end
+
+        # feature positions pre-calculated
+        feature_positions = features.inject({}) { |h,f| 
+          h.merge!({f => features.index(f)+1}) # +1 due to ID
+          h
+        }
+
+        # serialize to csv
+        dataset.compounds.each do |compound|
+          entries=dataset.data_entries[compound]
+          if entries
+            inchi = URI.encode_www_form_component(Compound.new(compound).to_inchi)
+  
+            # allocate container
+            row_container = Array.new(compound_sizes[compound])
+            (0...row_container.size).each do |i|
+              row_container[i] = Array.new(@rows.first.size)
+              row_container[i][0] = inchi
+            end
+
+            # fill entries
+            entries.each { |feature, values|
+              (0...compound_sizes[compound]).each { |i|
+                row_container[i][feature_positions[feature]] = values[i]
+              }
+            }
+
+            # fill zeroes for subgraphs
+            if (features_smarts)
+              row_container.collect! { |row|
+                row.collect! { |x| x ? x : 0 } 
+              }
             end
+            row_container.each { |row| @rows << row }
+
           end
-          row_container.each { |r| @rows << r }
         end
       end
 
diff --git a/lib/stratification.R b/lib/stratification.R
index 76ff2d8..3f8698c 100644
--- a/lib/stratification.R
+++ b/lib/stratification.R
@@ -1,4 +1,13 @@
 
+round_it <- function( x )
+{
+  if(isTRUE((x - floor(x))>=0.5))
+    ceiling(x)
+  else
+    floor(x)
+}
+
+
 nominal_to_binary <- function( data )
 {
   result = NULL
@@ -41,9 +50,13 @@ nominal_to_binary <- function( data )
   result
 }
 
-process_data <- function( data )
+process_data <- function( data, colnames=NULL )
 {
   data.num <- as.data.frame(data)
+  if (!is.null(colnames))
+  {
+    data.num = subset(data.num, select = colnames)
+  }
   if (!is.numeric(data.num))
   {
     data.num = nominal_to_binary(data.num)
@@ -72,14 +85,15 @@ cluster <- function( data, min=10, max=15 )
   cbind(s$partition[,m])
 }
 
-stratified_split <- function( data, ratio=0.3, method="cluster" )
+stratified_split <- function( data, ratio=0.3, method="cluster", colnames=NULL )
 {
-    data.processed = as.matrix(process_data( data ))
+    data.processed = as.matrix(process_data( data, colnames ))
+    print(paste("split using #features: ",ncol(data.processed)))
     if (method == "samplecube")
     {
       require("sampling")
       # adjust ratio to make samplecube return exact number of samples
-      ratio = round(nrow(data.processed)*ratio)/nrow(data.processed)
+      ratio = round_it(nrow(data.processed)*ratio)/nrow(data.processed)
       pik = rep(ratio,times=nrow(data.processed))
       data.strat = cbind(pik,data.processed)
       samplecube(data.strat,pik,order=2,comment=F)
@@ -101,10 +115,11 @@ stratified_split <- function( data, ratio=0.3, method="cluster" )
       stop("unknown method")
 }
 
-stratified_k_fold_split <- function( data, num_folds=10, method="cluster" )
+stratified_k_fold_split <- function( data, num_folds=10, method="cluster", colnames=NULL )
 {
   print(paste(num_folds,"-fold-split, data-size",nrow(data)))
-  data.processed = as.matrix(process_data( data ))
+  data.processed = as.matrix(process_data( data, colnames ))
+  print(paste("split using #features: ",ncol(data.processed)))
   if (method == "samplecube")
   {
     folds = rep(0, times=nrow(data))
@@ -133,7 +148,7 @@ stratified_k_fold_split <- function( data, num_folds=10, method="cluster" )
   {
     require("TunePareto")
     cl = cluster(data.processed)
-    res = generateCVRuns(cl,ntimes=1,nfold=3)
+    res = generateCVRuns(cl,ntimes=1,nfold=num_folds)
     folds = rep(0, times=nrow(data))
     for (i in 1:num_folds)
       for(j in 1:length(res[[1]][[i]]))
@@ -144,6 +159,50 @@ stratified_k_fold_split <- function( data, num_folds=10, method="cluster" )
     stop("unknown method")
 }
 
+duplicate_indices <- function( data ) {
+  indices = 1:nrow(data) 
+  z = data
+  duplicate_index = anyDuplicated(z) 
+  while(duplicate_index) {
+    duplicate_to_index = anyDuplicated(z[1:duplicate_index,],fromLast=T)
+    #print(paste(duplicate_index,'is dupl to',duplicate_to_index))
+    indices[duplicate_index] <- duplicate_to_index
+    z[duplicate_index,] <- paste('123$§%',duplicate_index)
+    duplicate_index = anyDuplicated(z) 
+  } 
+  indices 
+}
+
+add_duplicates <- function( data, dup_indices ) { 
+  result = data[1,]
+  for(i in 2:length(dup_indices)) { 
+    row = data[rownames(data)==dup_indices[i],]
+    if(length(row)==0)
+       stop(paste('index ',i,' dup-index ',dup_indices[i],'not found in data'))
+    result = rbind(result, row) 
+  } 
+  rownames(result)<-NULL 
+  result 
+}
+
+sammon_duplicates <- function( data, ... ) { 
+  di <- duplicate_indices(data)
+  print(di)
+  u <- unique(data) 
+  print(paste('unique data points',nrow(u),'of',nrow(data)))
+  if(nrow(u) <= 4) stop("number of unqiue datapoints <= 4")
+  points_unique <- sammon(dist(u), ...)$points
+  if (nrow(u)<nrow(data))
+  {
+    points <- add_duplicates(points_unique, di) 
+    points 
+  }
+  else
+  {
+    points_unique
+  }
+}
+
 plot_pre_process <- function( data, method="pca" )
 {
   data.processed = process_data( data )
@@ -158,6 +217,11 @@ plot_pre_process <- function( data, method="pca" )
     data.emb <- smacofSym(dist(data.processed, method = "euclidean"), ndim=2, verbose=T)
     data.emb$conf
   }
+  else if (method == "sammon")
+  {
+    require("MASS")
+    sammon_duplicates(data.processed, k=2)
+  }
   else
     stop("unknown method")
 }
diff --git a/lib/transform.rb b/lib/transform.rb
index 8fe1093..8632f6c 100644
--- a/lib/transform.rb
+++ b/lib/transform.rb
@@ -396,7 +396,7 @@ module OpenTox
             @q_prop = gsl_q_prop_orig.row(0).to_a
           end
 
-          LOGGER.debug "F: #{@n_prop.size}x#{@n_prop[0].size}; R: #{@q_prop.size}"
+          LOGGER.debug "F: #{@n_prop.size}x#{@n_prop[0].size}; R: #{@q_prop.size}" if (@n_prop && @n_prop[0] && @q_prop)
           LOGGER.debug "Sims: #{@sims.size}, Acts: #{@acts.size}"
 
           @sims = [ gram_matrix, @sims ] 
@@ -490,8 +490,10 @@ module OpenTox
 
           @cmpds = []; @fps = []; @acts = []; @n_prop = []; @q_prop = []
           
-          @model.fingerprints.each { |fp|
-            cmpd = fp[0]; fp = fp[1]
+          # Major BUG! Must loop over @model.compounds, hash is unordered!
+          # @model.fingerprints.each 
+          @model.compounds.each { |cmpd|
+            fp = @model.fingerprints[cmpd]
             if @model.activities[cmpd] # row good
               acts = @model.activities[cmpd]; @acts += acts
               LOGGER.debug "#{acts.size} activities for '#{cmpd}'" if acts.size > 1
diff --git a/lib/utils.rb b/lib/utils.rb
index d9d7b4b..149208b 100644
--- a/lib/utils.rb
+++ b/lib/utils.rb
@@ -1,155 +1,414 @@
 require 'csv'
+require 'tempfile'
 
 
 module OpenTox
 
   module Algorithm
 
+    @ambit_descriptor_algorithm_uri = "http://apps.ideaconsult.net:8080/ambit2/algorithm/org.openscience.cdk.qsar.descriptors.molecular."
+    @ambit_ds_service_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/"
+    @ambit_mopac_model_uri = "http://apps.ideaconsult.net:8080/ambit2/model/69632"
+    @keysfile = File.join(ENV['HOME'], ".opentox", "config", "pc_descriptors.yaml")
+
     include OpenTox
 
     # Calculate physico-chemical descriptors.
-    # @param[Hash] Required keys: :dataset_uri, :pc_type
+    # @param[Hash] required: :dataset_uri, :pc_type, :rjb, :task, :add_uri, optional: :descriptor, :lib, :subjectid
     # @return[String] dataset uri
-
     def self.pc_descriptors(params)
 
+      ds = OpenTox::Dataset.find(params[:dataset_uri],params[:subjectid])
+      compounds = ds.compounds.collect
+      task_weights = {"joelib"=> 20, "openbabel"=> 1, "cdk"=> 50 }
+      task_weights.keys.each { |step| task_weights.delete(step) if (params[:lib] && (!params[:lib].split(",").include?(step)))}
+      task_weights["load"] = 10
+      task_sum = Float task_weights.values.sum
+      task_weights.keys.each { |step| task_weights[step] /= task_sum }
+      task_weights.keys.each { |step| task_weights[step] = (task_weights[step]*100).floor }
+      
+      jl_master=nil
+      cdk_master=nil
+      ob_master=nil
+
+
+      # # # openbabel (via ruby bindings)
+      if !params[:lib] || params[:lib].split(",").include?("openbabel")
+        ob_master, ob_ids = get_ob_descriptors( { :compounds => compounds, :pc_type => params[:pc_type], :descriptor => params[:descriptor] } ) 
+        params[:task].progress(params[:task].metadata[OT.percentageCompleted] + task_weights["openbabel"]) if params[:task]
+      end
+
+
+      # # # joelib (via rjb)
+      if !params[:lib] || params[:lib].split(",").include?("joelib")
+        jl_master, jl_ids = get_jl_descriptors( { :compounds => compounds, :rjb => params[:rjb], :pc_type => params[:pc_type], :descriptor => params[:descriptor] } ) 
+        params[:task].progress(params[:task].metadata[OT.percentageCompleted] + task_weights["joelib"]) if params[:task]
+      end
+
+
+      # # # cdk (via REST)
+      if !params[:lib] || params[:lib].split(",").include?("cdk")
+        ambit_result_uri, smiles_to_inchi, cdk_ids = get_cdk_descriptors( { :compounds => compounds, :pc_type => params[:pc_type], :task => params[:task], :step => task_weights["cdk"], :descriptor => params[:descriptor] } )
+        #LOGGER.debug "Ambit result uri for #{params.inspect}: '#{ambit_result_uri.to_yaml}'"
+        cdk_master, cdk_ids, ambit_ids = load_ds_csv(ambit_result_uri, smiles_to_inchi, cdk_ids )
+        params[:task].progress(params[:task].metadata[OT.percentageCompleted] + task_weights["load"]) if params[:task]
+      end
+
+      # # # fuse CSVs ("master" structures)
+      if jl_master && cdk_master
+        nr_cols = (jl_master[0].size)-1
+        LOGGER.debug "Merging #{nr_cols} new columns"
+        cdk_master.each {|row| nr_cols.times { row.push(nil) }  }
+        jl_master.each do |row|
+          temp = cdk_master.assoc(row[0]) # Finds the appropriate line in master
+          ((-1*nr_cols)..-1).collect.each { |idx|
+            temp[idx] = row[nr_cols+idx+1] if temp # Updates columns if line is found
+          }
+        end
+        master = cdk_master
+      else # either jl_master or cdk_master nil
+        master = jl_master || cdk_master
+      end
+
+      if ob_master && master
+        nr_cols = (ob_master[0].size)-1
+        LOGGER.debug "Merging #{nr_cols} new columns"
+        master.each {|row| nr_cols.times { row.push(nil) }  } # Adds empty columns to all rows
+        ob_master.each do |row|
+          temp = master.assoc(row[0]) # Finds the appropriate line in master
+          ((-1*nr_cols)..-1).collect.each { |idx|
+            temp[idx] = row[nr_cols+idx+1] if temp # Updates columns if line is found
+          }
+        end
+      else # either ob_master or master nil
+        master = ob_master || master
+      end
+
+      if master
+
+        ds = OpenTox::Dataset.find( 
+          OpenTox::RestClientWrapper.post(
+            File.join(CONFIG[:services]["opentox-dataset"]), master.collect { |row| row.join(",") }.join("\n"), {:content_type => "text/csv", :subjectid => params[:subjectid]}
+          ),params[:subjectid]
+        ) 
+
+        # # # add feature metadata
+        pc_descriptors = YAML::load_file(@keysfile)
+        ambit_ids && ambit_ids.each_with_index { |id,idx|
+          raise "Feature not found" if ! ds.features[File.join(ds.uri, "feature", id.to_s)]
+          ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{DC.description => "#{pc_descriptors[cdk_ids[idx]][:name]} [#{pc_descriptors[cdk_ids[idx]][:pc_type]}, #{pc_descriptors[cdk_ids[idx]][:lib]}]"})
+          ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{DC.creator => @ambit_descriptor_algorithm_uri + cdk_ids[idx]})
+          ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{OT.hasSource => params[:dataset_uri]})
+        }
+        ob_ids && ob_ids.each { |id|
+          raise "Feature not found" if ! ds.features[File.join(ds.uri, "feature", id.to_s)]
+          ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{DC.description => "#{pc_descriptors[id][:name]} [#{pc_descriptors[id][:pc_type]}, #{pc_descriptors[id][:lib]}]"})
+          creator_uri = ds.uri.gsub(/\/dataset\/.*/, "/algorithm/pc")
+          creator_uri += "/#{id}" if params[:add_uri]
+          ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{DC.creator => creator_uri})
+          ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{OT.hasSource => params[:dataset_uri]})
+        }
+        jl_ids && jl_ids.each { |id|
+          raise "Feature not found" if ! ds.features[File.join(ds.uri, "feature", id.to_s)]
+          ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{DC.description => "#{pc_descriptors[id][:name]} [#{pc_descriptors[id][:pc_type]}, #{pc_descriptors[id][:lib]}]"})
+          creator_uri = ds.uri.gsub(/\/dataset\/.*/, "/algorithm/pc")
+          creator_uri += "/#{id}" if params[:add_uri]
+          ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{DC.creator => creator_uri})
+          ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{OT.hasSource => params[:dataset_uri]})
+        }
+
+        ds.save(params[:subjectid])
+      else
+        raise OpenTox::BadRequestError.new "No descriptors matching your criteria found."
+      end
+
+    end
+    
+
+    # Calculate OpenBabel physico-chemical descriptors.
+    # @param[Hash] required: :compounds, :pc_type, :task, optional: :descriptor
+    # @return[Array] CSV, array of field ids, array of field descriptions
+    def self.get_ob_descriptors(params)
+
+      master = nil
+
       begin
-        ds = OpenTox::Dataset.find(params[:dataset_uri])
-        compounds = ds.compounds.collect
-        ambit_result_uri, smiles_to_inchi = get_pc_descriptors( { :compounds => compounds, :pc_type => params[:pc_type] } )
-        #ambit_result_uri = ["http://apps.ideaconsult.net:8080/ambit2/dataset/987103?" ,"feature_uris[]=http%3A%2F%2Fapps.ideaconsult.net%3A8080%2Fambit2%2Ffeature%2F4276789&", "feature_uris[]=http%3A%2F%2Fapps.ideaconsult.net%3A8080%2Fambit2%2Fmodel%2F16%2Fpredicted"] # for testing
-        LOGGER.debug "Ambit result uri for #{params.inspect}: '#{ambit_result_uri.to_yaml}'"
-        load_ds_csv(ambit_result_uri, smiles_to_inchi)
+        csvfile = Tempfile.open(['ob_descriptors-','.csv'])
+
+        pc_descriptors = YAML::load_file(@keysfile)
+        ids = pc_descriptors.collect{ |id, info| 
+          id if info[:lib] == "openbabel" && params[:pc_type].split(",").include?(info[:pc_type]) && (!params[:descriptor] || id == params[:descriptor])
+        }.compact
+
+        if ids.length > 0
+          csvfile.puts((["SMILES"] + ids).join(","))
+          
+          # remember inchis
+          inchis = params[:compounds].collect { |c_uri| 
+            URI.encode_www_form_component(OpenTox::Compound.new(c_uri).to_inchi)
+          }
+
+          # Process compounds
+          obmol = OpenBabel::OBMol.new
+          obconversion = OpenBabel::OBConversion.new
+          obconversion.set_in_and_out_formats 'inchi', 'can'
+
+          inchis.each_with_index { |inchi, c_idx| 
+            row = [inchis[c_idx]]
+            obconversion.read_string(obmol, URI.decode_www_form_component(inchi))
+            ids.each { |name|
+              if obmol.respond_to?(name.underscore)
+                val = eval("obmol.#{name.underscore}") if obmol.respond_to?(name.underscore) 
+              else
+                if name != "nF" && name != "spinMult" && name != "nHal" && name != "logP"
+                  val = OpenBabel::OBDescriptor.find_type(name.underscore).predict(obmol)
+                elsif name == "nF"
+                  val = OpenBabel::OBDescriptor.find_type("nf").predict(obmol)
+                elsif name == "spinMult" || name == "nHal" || name == "logP"
+                  val = OpenBabel::OBDescriptor.find_type(name).predict(obmol)
+                end
+              end
+              if OpenTox::Algorithm.numeric?(val)
+                val = Float(val)
+                val = nil if val.nan?
+                val = nil if (val && val.infinite?)
+              end
+              row << val
+            }
+            LOGGER.debug "Compound #{c_idx+1} (#{inchis.size}), #{row.size} entries"
+            csvfile.puts(row.join(","))
+            csvfile.flush
+          }
+          master = CSV::parse(File.open(csvfile.path, "rb").read)
+        end
+
       rescue Exception => e
         LOGGER.debug "#{e.class}: #{e.message}"
         LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+      ensure
+        csvfile.close!
       end
 
+      [ master, ids ]
+
     end
-    
-    # Calculates PC descriptors via Ambit -- DO NOT OVERLOAD Ambit.
-    # @param[Hash] Required keys: :compounds, :pc_type
-    # @return[Array] Ambit result uri, piecewise (1st: base, 2nd: SMILES, 3rd+: features
-    def self.get_pc_descriptors(params)
 
+
+
+    # Calculate Joelib2 physico-chemical descriptors.
+    # @param[Hash] required: :compounds, :pc_type, :task, optional: :descriptor
+    # @return[Array] CSV, array of field ids, array of field descriptions
+    def self.get_jl_descriptors(params)
+
+      master = nil
+      s = params[:rjb]; raise "No Java environment" unless s
+
+      # Load keys, enter CSV headers
       begin
+        csvfile = Tempfile.open(['jl_descriptors-','.csv'])
 
-        ambit_ds_service_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/"
-        ambit_mopac_model_uri = "http://apps.ideaconsult.net:8080/ambit2/model/69632"
-        descs = YAML::load_file( File.join(ENV['HOME'], ".opentox", "config", "ambit_descriptors.yaml") )
-        descs_uris = []
-        params[:pc_type] = "electronic,cpsa" if params[:pc_type].nil? # rescue missing pc_type
-        types = params[:pc_type].split(",")
-        descs.each { |uri, cat_name| 
-          if types.include? cat_name[:category]
-            descs_uris << uri
-          end
-        }
-        if descs_uris.size == 0
-          raise "Error! Empty set of descriptors. Did you supply one of [geometrical, topological, electronic, constitutional, hybrid, cpsa] ?"
+        pc_descriptors = YAML::load_file(@keysfile)
+        ids = pc_descriptors.collect{ |id, info| 
+          id if info[:lib] == "joelib" && params[:pc_type].split(",").include?(info[:pc_type]) && (!params[:descriptor] || id == params[:descriptor])
+        }.compact
+
+
+        if ids.length > 0
+          csvfile.puts((["SMILES"] + ids).join(","))
+
+          # remember inchis
+          inchis = params[:compounds].collect { |c_uri| 
+            cmpd = OpenTox::Compound.new(c_uri)
+            URI.encode_www_form_component(cmpd.to_inchi)
+          }
+
+          # Process compounds
+          params[:compounds].each_with_index { |c_uri, c_idx| 
+            cmpd = OpenTox::Compound.new(c_uri)
+            inchi = cmpd.to_inchi
+            sdf_data = cmpd.to_sdf
+
+            infile = Tempfile.open(['jl_descriptors-in-','.sdf'])
+            outfile_path = infile.path.gsub(/jl_descriptors-in/,"jl_descriptors-out")
+
+            begin
+              infile.puts sdf_data
+              infile.flush
+              s.new(infile.path, outfile_path) # runs joelib
+                    
+              row = [inchis[c_idx]]
+              ids.each_with_index do |k,i| # Fill row
+                re = Regexp.new(k)
+                open(outfile_path) do |f|
+                  f.each do |line|
+                    if @prev == k
+                      entry = line.chomp
+                      val = nil
+                      if OpenTox::Algorithm.numeric?(entry)
+                        val = Float(entry)
+                        val = nil if val.nan?
+                        val = nil if (val && val.infinite?)
+                      end
+                      row << val
+                      break
+                    end
+                    @prev = line.gsub(/^.*types./,"").gsub(/count./,"").gsub(/>/,"").chomp if line =~ re
+                  end
+                end
+              end
+              LOGGER.debug "Compound #{c_idx+1} (#{inchis.size}), #{row.size} entries"
+              csvfile.puts(row.join(","))
+              csvfile.flush
+
+            rescue Exception => e
+              LOGGER.debug "#{e.class}: #{e.message}"
+              LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+            ensure
+              File.delete(infile.path.gsub(/\.sdf/,".numeric.sdf"))
+              File.delete(outfile_path)
+              infile.close!
+            end
+          }
+          master = CSV::parse(File.open(csvfile.path, "rb").read)
         end
-        #LOGGER.debug "Ambit descriptor URIs: #{descs_uris.join(", ")}"
 
+      rescue Exception => e
+        LOGGER.debug "#{e.class}: #{e.message}"
+        LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+      ensure
+        [ csvfile].each { |f| f.close! }
+      end
+
+      [ master, ids ]
+
+    end
+
+    # Calculate CDK physico-chemical descriptors via Ambit -- DO NOT OVERLOAD Ambit.
+    # @param[Hash] required: :compounds, :pc_type, :task, :step optional: :descriptor
+    # @return[Array] array of Ambit result uri, piecewise (1st: base, 2nd: SMILES, 3rd+: features, hash smiles to inchi, array of field descriptions
+    def self.get_cdk_descriptors(params)
+
+      ambit_result_uri = [] # 1st pos: base uri, then features
+      smiles_to_inchi = {}
+      task_weights = {"electronic"=> 4, "topological"=> 19, "constitutional"=> 12, "geometrical"=> 3, "hybrid"=> 2, "cpsa"=> 1 }
+      task_weights.keys.each { |pc_type| task_weights.delete(pc_type) if (params[:pc_type] && (!params[:pc_type].split(",").include?(pc_type)))}
+      task_sum = Float task_weights.values.sum
+      task_weights.keys.each { |pc_type| task_weights[pc_type] /= task_sum }
+      task_weights.keys.each { |pc_type| task_weights[pc_type] *= params[:step] }
+      
+
+      # extract wanted descriptors from config file and parameters
+      pc_descriptors = YAML::load_file(@keysfile)
+
+      ids = pc_descriptors.collect { |id, info| 
+          "#{info[:pc_type]}:::#{id}" if info[:lib] == "cdk" && params[:pc_type].split(",").include?(info[:pc_type]) && (!params[:descriptor] || id == params[:descriptor])
+      }.compact
+
+      if ids.size > 0
+        ids.sort!
+        ids.collect! { |id| id.split(":::").last }
+
+        # create dataset at Ambit
         begin
-          # Create SMI
-          smiles_array = []; smiles_to_inchi = {}
           params[:compounds].each do |n|
             cmpd = OpenTox::Compound.new(n)
             smiles_string = cmpd.to_smiles
             smiles_to_inchi[smiles_string] = URI.encode_www_form_component(cmpd.to_inchi)
-            smiles_array << smiles_string
           end
-          smi_file = Tempfile.open(['pc_ambit', '.csv'])
-          pc_descriptors = nil
-
-          # Create Ambit dataset
-          smi_file.puts( "SMILES\n" )
-          smi_file.puts( smiles_array.join("\n") )
-          smi_file.flush
-          ambit_ds_uri = OpenTox::RestClientWrapper.post(ambit_ds_service_uri, {:file => File.new(smi_file.path)}, {:content_type => "multipart/form-data", :accept => "text/uri-list"} )
+          smi_file = Tempfile.open(['pc_ambit', '.csv']) ; smi_file.puts( "SMILES\n" + smiles_to_inchi.keys.join("\n") ) ; smi_file.flush
+          ambit_ds_uri = OpenTox::RestClientWrapper.post(@ambit_ds_service_uri, {:file => File.new(smi_file.path)}, {:content_type => "multipart/form-data", :accept => "text/uri-list"} )
+          ambit_result_uri = [ ambit_ds_uri + "?" ] # 1st pos: base uri, then features
         rescue Exception => e
           LOGGER.debug "#{e.class}: #{e.message}"
           LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
         ensure
           smi_file.close! if smi_file
         end
-        ambit_smiles_uri = OpenTox::RestClientWrapper.get(ambit_ds_uri + "/features", {:accept=> "text/uri-list"} ).chomp
-
-        # Calculate 3D for CPSA
-        if types.include? "cpsa"
-          ambit_ds_mopac_uri = OpenTox::RestClientWrapper.post(ambit_mopac_model_uri, {:dataset_uri => ambit_ds_uri}, {:accept => "text/uri-list"} ) 
-          LOGGER.debug "MOPAC dataset: #{ambit_ds_mopac_uri }"
-        end
-
-        # Get Ambit results
-        ambit_result_uri = [] # 1st pos: base uri, then features
-        ambit_result_uri << ambit_ds_uri + "?"
+        # get SMILES feature URI
+        ambit_smiles_uri = OpenTox::RestClientWrapper.get(
+          ambit_ds_uri + "/features", 
+          {:accept=> "text/uri-list"} 
+        ).chomp
         ambit_result_uri << ("feature_uris[]=" + URI.encode_www_form_component(ambit_smiles_uri) + "&")
-        descs_uris.each_with_index do |uri, i|
-          algorithm = Algorithm::Generic.new(uri)
+        # always calculate 3D (http://goo.gl/Tk81j), then get results
+        OpenTox::RestClientWrapper.post(
+          @ambit_mopac_model_uri, 
+          {:dataset_uri => ambit_ds_uri}, 
+          {:accept => "text/uri-list"} 
+        ) 
+        current_cat = ""
+        ids.each_with_index do |id, i|
+          old_cat = current_cat; current_cat = pc_descriptors[id][:pc_type]
+          params[:task].progress(params[:task].metadata[OT.percentageCompleted] + task_weights[old_cat]) if params[:task] && old_cat != current_cat && old_cat != ""
+          algorithm = Algorithm::Generic.new(@ambit_descriptor_algorithm_uri+id)
           result_uri = algorithm.run({:dataset_uri => ambit_ds_uri})
           ambit_result_uri << result_uri.split("?")[1] + "&"
-          LOGGER.debug "Ambit (#{descs_uris.size}): #{i+1}"
+          LOGGER.debug "Ambit (#{ids.size}): #{i+1}"
         end
+        params[:task].progress(params[:task].metadata[OT.percentageCompleted] + task_weights[current_cat]) if params[:task]
         #LOGGER.debug "Ambit result: #{ambit_result_uri.join('')}"
-        [ ambit_result_uri, smiles_to_inchi ]
-
-      rescue Exception => e
-        LOGGER.debug "#{e.class}: #{e.message}"
-        LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
       end
+
+      [ ambit_result_uri, smiles_to_inchi, ids ]
+
     end
 
 
     # Load dataset via CSV
     # @param[Array] Ambit result uri, piecewise (1st: base, 2nd: SMILES, 3rd+: features
-    # @return[String] dataset uri 
-    def self.load_ds_csv(ambit_result_uri, smiles_to_inchi, subjectid=nil)
+    # @param[Hash] keys: SMILES, values: InChIs
+    # @param[Array] field descriptions, one for each feature
+    # @return[Array] CSV, array of field ids, array of field descriptions
+    def self.load_ds_csv(ambit_result_uri, smiles_to_inchi, single_ids, subjectid=nil)
       
       master=nil
-      (1...ambit_result_uri.size).collect { |idx|
-        curr_uri = ambit_result_uri[0] + ambit_result_uri[idx]
-        LOGGER.debug "Requesting #{curr_uri}"
-        csv_data = CSV.parse( OpenTox::RestClientWrapper.get(curr_uri, {:accept => "text/csv", :subjectid => subjectid}) )
-        if csv_data[0] && csv_data[0].size>1
-          if master.nil? # This is the smiles entry
-            (1...csv_data.size).each{ |idx| csv_data[idx][1] = smiles_to_inchi[csv_data[idx][1]] }
-            master = csv_data
-            next
-          else
-            index_uri = csv_data[0].index("SMILES")
-            csv_data.map {|i| i.delete_at(index_uri)} if index_uri #Removes additional SMILES information
-            
-            nr_cols = (csv_data[0].size)-1
-            LOGGER.debug "Merging #{nr_cols} new columns"
-            master.each {|row| nr_cols.times { row.push(nil) }  } # Adds empty columns to all rows
-            csv_data.each do |row|
-              temp = master.assoc(row[0]) # Finds the appropriate line in master
-              ((-1*nr_cols)..-1).collect.each { |idx|
-                temp[idx] = row[nr_cols+idx+1] if temp # Updates columns if line is found
-              }
+      ids=[]
+      ambit_ids=[]
+
+      if ambit_result_uri.size > 0
+        (1...ambit_result_uri.size).collect { |idx|
+          curr_uri = ambit_result_uri[0] + ambit_result_uri[idx]
+          #LOGGER.debug "Requesting #{curr_uri}"
+          csv_data = CSV.parse( OpenTox::RestClientWrapper.get(curr_uri, {:accept => "text/csv", :subjectid => subjectid}) )
+          if csv_data[0] && csv_data[0].size>1
+            if master.nil? # This is the smiles entry
+              (1...csv_data.size).each{ |idx| csv_data[idx][1] = smiles_to_inchi[csv_data[idx][1]] }
+              master = csv_data
+              next
+            else
+              index_uri = csv_data[0].index("SMILES")
+              csv_data.map {|i| i.delete_at(index_uri)} if index_uri #Removes additional SMILES information
+
+              nr_cols = (csv_data[0].size)-1
+              LOGGER.debug "Merging #{nr_cols} new columns"
+              ids += Array.new(nr_cols, single_ids[idx-2])
+              master.each {|row| nr_cols.times { row.push(nil) }  } # Adds empty columns to all rows
+              csv_data.each do |row|
+                temp = master.assoc(row[0]) # Finds the appropriate line in master
+                ((-1*nr_cols)..-1).collect.each { |idx|
+                  temp[idx] = row[nr_cols+idx+1] if temp # Updates columns if line is found
+                }
+              end
             end
           end
-        end
-      }
+        }
 
-      index_uri = master[0].index("Compound")
-      master.map {|i| i.delete_at(index_uri)}
-      master[0].each {|cell| cell.chomp!(" ")}
-      master[0][0] = "Compound" #"SMILES" 
-      index_smi = master[0].index("SMILES")
-      master.map {|i| i.delete_at(index_smi)} if index_smi
-      #master[0][0] = "SMILES" 
+        index_uri = master[0].index("Compound")
+        master.map {|i| i.delete_at(index_uri)}
+        master[0].each {|cell| cell.chomp!(" ")}
+        master[0][0] = "Compound" #"SMILES" 
+        index_smi = master[0].index("SMILES")
+        master.map {|i| i.delete_at(index_smi)} if index_smi
+        master[0][0] = "SMILES" 
+        ambit_ids=master[0].collect {|header| header.to_s.gsub(/[\/.\\\(\)\{\}\[\]]/,"_")}
+        ambit_ids.shift
+      end
        
       #LOGGER.debug "-------- AM: Writing to dumpfile"
       #File.open("/tmp/test.csv", 'w') {|f| f.write( master.collect {|r| r.join(",")}.join("\n") ) }
      
-      parser = OpenTox::Parser::Spreadsheets.new
-      ds = OpenTox::Dataset.new(nil,subjectid)
-      ds.save(subjectid)
-      parser.dataset = ds
-      ds = parser.load_csv(master.collect{|r| r.join(",")}.join("\n"))
-      ds.save(subjectid)
+      [ master, ids, ambit_ids ]
+
     end
 
 
@@ -208,8 +467,8 @@ module OpenTox
     end
 
 
-    # Effect calculation for classification
-    # @param [Array] Array of occurrences per class in the form of Enumerables.
+    # Effect calculation for classification. It is assumed that the elements of the arrays match each other pairwise
+    # @param [Array] Array of occurrences per class (in the form of Enumerables).
     # @param [Array] Array of database instance counts per class.
     def self.effect(occurrences, db_instances)
       max=0
diff --git a/lib/validation.rb b/lib/validation.rb
index 85004c7..a373e56 100644
--- a/lib/validation.rb
+++ b/lib/validation.rb
@@ -19,8 +19,9 @@ module OpenTox
     def self.list( params={} )
       filter_string = ""
       params.each do |k,v|
-        filter_string = "?" if filter_string.length==0 
-        filter_string += k.to_s+"="+v
+        filter_string += (filter_string.length==0 ? "?" : "&")
+        v = v.to_s.gsub(/;/, "%3b") if v.to_s =~ /;/
+        filter_string += k.to_s+"="+v.to_s
       end
       (OpenTox::RestClientWrapper.get(CONFIG[:services]["opentox-validation"]+filter_string).split("\n"))
     end
@@ -156,8 +157,9 @@ module OpenTox
     def self.list( params={} )
       filter_string = ""
       params.each do |k,v|
-        filter_string = "?" if filter_string.length==0 
-        filter_string += k.to_s+"="+v
+        filter_string += (filter_string.length==0 ? "?" : "&")
+        v = v.to_s.gsub(/;/, "%3b") if v.to_s =~ /;/
+        filter_string += k.to_s+"="+v.to_s
       end
       (OpenTox::RestClientWrapper.get(File.join(CONFIG[:services]["opentox-validation"],"crossvalidation")+filter_string).split("\n"))
     end
diff --git a/opentox-ruby.gemspec b/opentox-ruby.gemspec
index ca2d397..d3ae2d7 100644
--- a/opentox-ruby.gemspec
+++ b/opentox-ruby.gemspec
@@ -5,24 +5,25 @@
 
 Gem::Specification.new do |s|
   s.name = %q{opentox-ruby}
-  s.version = "3.1.0"
+  s.version = "2.0.1"
 
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler"]
-  s.date = %q{2012-03-26}
+  s.date = %q{2011-06-15}
   s.description = %q{Ruby wrapper for the OpenTox REST API (http://www.opentox.org)}
   s.email = %q{helma@in-silico.ch}
+  s.executables = ["opentox-install-debian.sh", "opentox-install-ubuntu.sh"]
   s.extra_rdoc_files = [
-    "ChangeLog",
     "LICENSE",
     "README.markdown"
   ]
   s.files = [
-    "ChangeLog",
     "LICENSE",
     "README.markdown",
     "Rakefile",
     "VERSION",
+    "bin/opentox-install-debian.sh",
+    "bin/opentox-install-ubuntu.sh",
     "lib/algorithm.rb",
     "lib/authorization.rb",
     "lib/compound.rb",
@@ -40,126 +41,121 @@ Gem::Specification.new do |s|
     "lib/overwrite.rb",
     "lib/parser.rb",
     "lib/policy.rb",
-    "lib/r-util.rb",
     "lib/rest_client_wrapper.rb",
     "lib/serializer.rb",
     "lib/spork.rb",
-    "lib/stratification.R",
     "lib/task.rb",
     "lib/templates/default_guest_policy.xml",
     "lib/templates/default_policy.xml",
     "lib/to-html.rb",
     "lib/transform.rb",
-    "lib/utils.rb",
+    "lib/utils.rb"
     "lib/validation.rb"
   ]
-  s.homepage = %q{http://github.com/opentox/opentox-ruby}
+  s.homepage = %q{http://github.com/helma/opentox-ruby}
   s.require_paths = ["lib"]
-  s.rubygems_version = %q{1.5.3}
+  s.rubygems_version = %q{1.5.2}
   s.summary = %q{Ruby wrapper for the OpenTox REST API}
 
   if s.respond_to? :specification_version then
     s.specification_version = 3
 
     if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
-      s.add_runtime_dependency(%q<sinatra>, ["= 1.2.6"])
-      s.add_runtime_dependency(%q<emk-sinatra-url-for>, ["= 0.2.1"])
-      s.add_runtime_dependency(%q<sinatra-respond_to>, ["= 0.7.0"])
-      s.add_runtime_dependency(%q<sinatra-static-assets>, ["= 0.5.0"])
-      s.add_runtime_dependency(%q<rest-client>, ["= 1.6.1"])
-      s.add_runtime_dependency(%q<rack>, ["= 1.3.5"])
-      s.add_runtime_dependency(%q<rack-contrib>, ["= 1.1.0"])
-      s.add_runtime_dependency(%q<rack-flash>, ["= 0.1.1"])
-      s.add_runtime_dependency(%q<nokogiri>, ["= 1.4.4"])
-      s.add_runtime_dependency(%q<rubyzip>, ["= 0.9.4"])
-      s.add_runtime_dependency(%q<roo>, ["= 1.9.3"])
-      s.add_runtime_dependency(%q<spreadsheet>, ["= 0.6.5.4"])
-      s.add_runtime_dependency(%q<google-spreadsheet-ruby>, ["= 0.1.5"])
-      s.add_runtime_dependency(%q<yajl-ruby>, ["= 0.8.2"])
-      s.add_runtime_dependency(%q<rinruby>, ["= 2.0.2"])
-      s.add_runtime_dependency(%q<ohm>, ["= 0.1.3"])
-      s.add_runtime_dependency(%q<ohm-contrib>, ["= 0.1.1"])
-      s.add_runtime_dependency(%q<SystemTimer>, ["= 1.2.3"])
-      s.add_runtime_dependency(%q<rjb>, ["= 1.3.4"])
-      s.add_runtime_dependency(%q<haml>, ["= 3.1.1"])
-      s.add_runtime_dependency(%q<akephalos>, ["= 0.2.5"])
-      s.add_runtime_dependency(%q<dm-core>, ["= 1.1.0"])
-      s.add_runtime_dependency(%q<dm-serializer>, ["= 1.1.0"])
-      s.add_runtime_dependency(%q<dm-timestamps>, ["= 1.1.0"])
-      s.add_runtime_dependency(%q<dm-types>, ["= 1.1.0"])
-      s.add_runtime_dependency(%q<dm-migrations>, ["= 1.1.0"])
-      s.add_runtime_dependency(%q<dm-validations>, ["= 1.1.0"])
-      s.add_runtime_dependency(%q<dm-sqlite-adapter>, ["= 1.1.0"])
-      s.add_runtime_dependency(%q<ruby-plot>, ["= 0.6.0"])
-      s.add_runtime_dependency(%q<gsl>, ["= 1.14.7"])
-      s.add_runtime_dependency(%q<statsample>, ["= 1.1.0"])
+      s.add_runtime_dependency(%q<sinatra>, [">= 0"])
+      s.add_runtime_dependency(%q<emk-sinatra-url-for>, [">= 0"])
+      s.add_runtime_dependency(%q<sinatra-respond_to>, [">= 0"])
+      s.add_runtime_dependency(%q<sinatra-static-assets>, [">= 0"])
+      s.add_runtime_dependency(%q<rest-client>, [">= 0"])
+      s.add_runtime_dependency(%q<rack>, [">= 0"])
+      s.add_runtime_dependency(%q<rack-contrib>, [">= 0"])
+      s.add_runtime_dependency(%q<rack-flash>, [">= 0"])
+      s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
+      s.add_runtime_dependency(%q<rubyzip>, [">= 0"])
+      s.add_runtime_dependency(%q<roo>, [">= 0"])
+      s.add_runtime_dependency(%q<spreadsheet>, [">= 0"])
+      s.add_runtime_dependency(%q<google-spreadsheet-ruby>, [">= 0"])
+      s.add_runtime_dependency(%q<yajl-ruby>, [">= 0"])
+      s.add_runtime_dependency(%q<tmail>, [">= 0"])
+      s.add_runtime_dependency(%q<rinruby>, [">= 0"])
+      s.add_runtime_dependency(%q<ohm>, [">= 0"])
+      s.add_runtime_dependency(%q<ohm-contrib>, [">= 0"])
+      s.add_runtime_dependency(%q<SystemTimer>, [">= 0"])
+      s.add_runtime_dependency(%q<rjb>, [">= 0"])
+      s.add_runtime_dependency(%q<dm-core>, [">= 0"])
+      s.add_runtime_dependency(%q<dm-serializer>, [">= 0"])
+      s.add_runtime_dependency(%q<dm-timestamps>, [">= 0"])
+      s.add_runtime_dependency(%q<dm-types>, [">= 0"])
+      s.add_runtime_dependency(%q<dm-migrations>, [">= 0"])
+      s.add_runtime_dependency(%q<dm-validations>, [">= 0"])
+      s.add_runtime_dependency(%q<dm-sqlite-adapter>, [">= 0"])
+      s.add_runtime_dependency(%q<haml>, [">= 3"])
+      s.add_runtime_dependency(%q<ruby-plot>, ["~> 0.4.0"])
+      s.add_runtime_dependency(%q<statsample>, [">= 0"])
       s.add_development_dependency(%q<jeweler>, [">= 0"])
     else
-      s.add_dependency(%q<sinatra>, ["= 1.2.6"])
-      s.add_dependency(%q<emk-sinatra-url-for>, ["= 0.2.1"])
-      s.add_dependency(%q<sinatra-respond_to>, ["= 0.7.0"])
-      s.add_dependency(%q<sinatra-static-assets>, ["= 0.5.0"])
-      s.add_dependency(%q<rest-client>, ["= 1.6.1"])
-      s.add_dependency(%q<rack>, ["= 1.3.5"])
-      s.add_dependency(%q<rack-contrib>, ["= 1.1.0"])
-      s.add_dependency(%q<rack-flash>, ["= 0.1.1"])
-      s.add_dependency(%q<nokogiri>, ["= 1.4.4"])
-      s.add_dependency(%q<rubyzip>, ["= 0.9.4"])
-      s.add_dependency(%q<roo>, ["= 1.9.3"])
-      s.add_dependency(%q<spreadsheet>, ["= 0.6.5.4"])
-      s.add_dependency(%q<google-spreadsheet-ruby>, ["= 0.1.5"])
-      s.add_dependency(%q<yajl-ruby>, ["= 0.8.2"])
-      s.add_dependency(%q<rinruby>, ["= 2.0.2"])
-      s.add_dependency(%q<ohm>, ["= 0.1.3"])
-      s.add_dependency(%q<ohm-contrib>, ["= 0.1.1"])
-      s.add_dependency(%q<SystemTimer>, ["= 1.2.3"])
-      s.add_dependency(%q<rjb>, ["= 1.3.4"])
-      s.add_dependency(%q<haml>, ["= 3.1.1"])
-      s.add_dependency(%q<akephalos>, ["= 0.2.5"])
-      s.add_dependency(%q<dm-core>, ["= 1.1.0"])
-      s.add_dependency(%q<dm-serializer>, ["= 1.1.0"])
-      s.add_dependency(%q<dm-timestamps>, ["= 1.1.0"])
-      s.add_dependency(%q<dm-types>, ["= 1.1.0"])
-      s.add_dependency(%q<dm-migrations>, ["= 1.1.0"])
-      s.add_dependency(%q<dm-validations>, ["= 1.1.0"])
-      s.add_dependency(%q<dm-sqlite-adapter>, ["= 1.1.0"])
-      s.add_dependency(%q<ruby-plot>, ["= 0.6.0"])
-      s.add_dependency(%q<gsl>, ["= 1.14.7"])
-      s.add_dependency(%q<statsample>, ["= 1.1.0"])
+      s.add_dependency(%q<sinatra>, [">= 0"])
+      s.add_dependency(%q<emk-sinatra-url-for>, [">= 0"])
+      s.add_dependency(%q<sinatra-respond_to>, [">= 0"])
+      s.add_dependency(%q<sinatra-static-assets>, [">= 0"])
+      s.add_dependency(%q<rest-client>, [">= 0"])
+      s.add_dependency(%q<rack>, [">= 0"])
+      s.add_dependency(%q<rack-contrib>, [">= 0"])
+      s.add_dependency(%q<rack-flash>, [">= 0"])
+      s.add_dependency(%q<nokogiri>, [">= 0"])
+      s.add_dependency(%q<rubyzip>, [">= 0"])
+      s.add_dependency(%q<roo>, [">= 0"])
+      s.add_dependency(%q<spreadsheet>, [">= 0"])
+      s.add_dependency(%q<google-spreadsheet-ruby>, [">= 0"])
+      s.add_dependency(%q<yajl-ruby>, [">= 0"])
+      s.add_dependency(%q<tmail>, [">= 0"])
+      s.add_dependency(%q<rinruby>, [">= 0"])
+      s.add_dependency(%q<ohm>, [">= 0"])
+      s.add_dependency(%q<ohm-contrib>, [">= 0"])
+      s.add_dependency(%q<SystemTimer>, [">= 0"])
+      s.add_dependency(%q<rjb>, [">= 0"])
+      s.add_dependency(%q<dm-core>, [">= 0"])
+      s.add_dependency(%q<dm-serializer>, [">= 0"])
+      s.add_dependency(%q<dm-timestamps>, [">= 0"])
+      s.add_dependency(%q<dm-types>, [">= 0"])
+      s.add_dependency(%q<dm-migrations>, [">= 0"])
+      s.add_dependency(%q<dm-validations>, [">= 0"])
+      s.add_dependency(%q<dm-sqlite-adapter>, [">= 0"])
+      s.add_dependency(%q<haml>, [">= 3"])
+      s.add_dependency(%q<ruby-plot>, ["~> 0.4.0"])
+      s.add_dependency(%q<statsample>, [">= 0"])
       s.add_dependency(%q<jeweler>, [">= 0"])
     end
   else
-    s.add_dependency(%q<sinatra>, ["= 1.2.6"])
-    s.add_dependency(%q<emk-sinatra-url-for>, ["= 0.2.1"])
-    s.add_dependency(%q<sinatra-respond_to>, ["= 0.7.0"])
-    s.add_dependency(%q<sinatra-static-assets>, ["= 0.5.0"])
-    s.add_dependency(%q<rest-client>, ["= 1.6.1"])
-    s.add_dependency(%q<rack>, ["= 1.3.5"])
-    s.add_dependency(%q<rack-contrib>, ["= 1.1.0"])
-    s.add_dependency(%q<rack-flash>, ["= 0.1.1"])
-    s.add_dependency(%q<nokogiri>, ["= 1.4.4"])
-    s.add_dependency(%q<rubyzip>, ["= 0.9.4"])
-    s.add_dependency(%q<roo>, ["= 1.9.3"])
-    s.add_dependency(%q<spreadsheet>, ["= 0.6.5.4"])
-    s.add_dependency(%q<google-spreadsheet-ruby>, ["= 0.1.5"])
-    s.add_dependency(%q<yajl-ruby>, ["= 0.8.2"])
-    s.add_dependency(%q<rinruby>, ["= 2.0.2"])
-    s.add_dependency(%q<ohm>, ["= 0.1.3"])
-    s.add_dependency(%q<ohm-contrib>, ["= 0.1.1"])
-    s.add_dependency(%q<SystemTimer>, ["= 1.2.3"])
-    s.add_dependency(%q<rjb>, ["= 1.3.4"])
-    s.add_dependency(%q<haml>, ["= 3.1.1"])
-    s.add_dependency(%q<akephalos>, ["= 0.2.5"])
-    s.add_dependency(%q<dm-core>, ["= 1.1.0"])
-    s.add_dependency(%q<dm-serializer>, ["= 1.1.0"])
-    s.add_dependency(%q<dm-timestamps>, ["= 1.1.0"])
-    s.add_dependency(%q<dm-types>, ["= 1.1.0"])
-    s.add_dependency(%q<dm-migrations>, ["= 1.1.0"])
-    s.add_dependency(%q<dm-validations>, ["= 1.1.0"])
-    s.add_dependency(%q<dm-sqlite-adapter>, ["= 1.1.0"])
-    s.add_dependency(%q<ruby-plot>, ["= 0.6.0"])
-    s.add_dependency(%q<gsl>, ["= 1.14.7"])
-    s.add_dependency(%q<statsample>, ["= 1.1.0"])
+    s.add_dependency(%q<sinatra>, [">= 0"])
+    s.add_dependency(%q<emk-sinatra-url-for>, [">= 0"])
+    s.add_dependency(%q<sinatra-respond_to>, [">= 0"])
+    s.add_dependency(%q<sinatra-static-assets>, [">= 0"])
+    s.add_dependency(%q<rest-client>, [">= 0"])
+    s.add_dependency(%q<rack>, [">= 0"])
+    s.add_dependency(%q<rack-contrib>, [">= 0"])
+    s.add_dependency(%q<rack-flash>, [">= 0"])
+    s.add_dependency(%q<nokogiri>, [">= 0"])
+    s.add_dependency(%q<rubyzip>, [">= 0"])
+    s.add_dependency(%q<roo>, [">= 0"])
+    s.add_dependency(%q<spreadsheet>, [">= 0"])
+    s.add_dependency(%q<google-spreadsheet-ruby>, [">= 0"])
+    s.add_dependency(%q<yajl-ruby>, [">= 0"])
+    s.add_dependency(%q<tmail>, [">= 0"])
+    s.add_dependency(%q<rinruby>, [">= 0"])
+    s.add_dependency(%q<ohm>, [">= 0"])
+    s.add_dependency(%q<ohm-contrib>, [">= 0"])
+    s.add_dependency(%q<SystemTimer>, [">= 0"])
+    s.add_dependency(%q<rjb>, [">= 0"])
+    s.add_dependency(%q<dm-core>, [">= 0"])
+    s.add_dependency(%q<dm-serializer>, [">= 0"])
+    s.add_dependency(%q<dm-timestamps>, [">= 0"])
+    s.add_dependency(%q<dm-types>, [">= 0"])
+    s.add_dependency(%q<dm-migrations>, [">= 0"])
+    s.add_dependency(%q<dm-validations>, [">= 0"])
+    s.add_dependency(%q<dm-sqlite-adapter>, [">= 0"])
+    s.add_dependency(%q<haml>, [">= 3"])
+    s.add_dependency(%q<ruby-plot>, ["~> 0.4.0"])
+    s.add_dependency(%q<statsample>, [">= 0"])
     s.add_dependency(%q<jeweler>, [">= 0"])
   end
 end
author	rautenberg <rautenberg@in-silico.ch>	2012-07-13 09:45:46 +0200
committer	rautenberg <rautenberg@in-silico.ch>	2012-07-13 09:45:46 +0200
commit	ca2903692658ca7badcda425153ed12eb19a2ced (patch)
tree	c921872907dd6ce0edaea7c6251c804bc11373f0
parent	6e016d24bf6d0272d235c466e0dab3f196f0c0d4 (diff)
parent	20ea5f9b62966eecb283033b04f6aea98b23d1f8 (diff)