From e68c69f90036bb7c47c57acb1ee3652b73c835c1 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Fri, 31 Jul 2015 10:59:53 +0200
Subject: descriptor tests working

---
 lib/compound.rb | 13 +++++++------
 lib/dataset.rb  | 38 +++++++++++++++-----------------------
 lib/feature.rb  |  3 ++-
 3 files changed, 24 insertions(+), 30 deletions(-)

(limited to 'lib')

diff --git a/lib/compound.rb b/lib/compound.rb
index 93f609f..4d36915 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -73,12 +73,13 @@ module OpenTox
     # Get sdf
     # @return [String] SDF string
     def sdf
-      if sdf_id.nil?
+      if self.sdf_id.nil? 
         sdf = obconversion(inchi,"inchi","sdf")
         file = Mongo::Grid::File.new(sdf, :filename => "#{id}.sdf",:content_type => "chemical/x-mdl-sdfile")
         sdf_id = $gridfs.insert_one file
+        update :sdf_id => sdf_id
       end
-      $gridfs.find_one(_id: sdf_id).data
+      $gridfs.find_one(_id: self.sdf_id).data
     end
 
     # Get png image
@@ -86,12 +87,12 @@ module OpenTox
     #   image = compound.png
     # @return [image/png] Image data
     def png
-      if image_id.nil?
+      if self.image_id.nil?
        png = obconversion(inchi,"inchi","_png2")
        file = Mongo::Grid::File.new(Base64.encode64(png), :filename => "#{id}.png", :content_type => "image/png")
        update(:image_id => $gridfs.insert_one(file))
       end
-      Base64.decode64($gridfs.find_one(_id: image_id).data)
+      Base64.decode64($gridfs.find_one(_id: self.image_id).data)
 
     end
 
@@ -134,11 +135,11 @@ module OpenTox
         OpenBabel::OBOp.find_type("Gen3D").do(obmol) 
         sdf = obconversion.write_string(obmol)
         if sdf.match(/.nan/)
-          $logger.warn "3D generation failed for compound #{compound.inchi}, trying to calculate 2D structure"
+          $logger.warn "3D generation failed for compound #{identifier}, trying to calculate 2D structure"
           OpenBabel::OBOp.find_type("Gen2D").do(obmol) 
           sdf = obconversion.write_string(obmol)
           if sdf.match(/.nan/)
-            $logger.warn "2D generation failed for compound #{compound.inchi}"
+            $logger.warn "2D generation failed for compound #{identifier}"
             sdf = nil
           end
         end
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 1392de5..92ef7b5 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -18,7 +18,7 @@ module OpenTox
     include Mongoid::Document
 
     attr_accessor :bulk
-    #attr_writer :data_entries
+    attr_writer :data_entries
 
     # associations like has_many, belongs_to deteriorate performance
     field :feature_ids, type: Array, default: []
@@ -62,19 +62,29 @@ module OpenTox
     end
 
     def [](row,col)
-      #bad_request_error "Incorrect parameter type. The first argument is a OpenTox::Compound the second a OpenTox::Feature." unless compound.is_a? Compound and feature.is_a? Feature
-      #DataEntry.where(dataset_id: self.id, compound_id: compound.id, feature_id: feature.id).distinct(:value).first
-      #data_entries[compound_ids.index(compound.id)][feature_ids.index(feature.id)]
       @data_entries[row,col]
     end
 
     def []=(row,col,v)
       @data_entries ||= []
       @data_entries[row] ||= []
-      #@data_entries ||= Array.new(compound_ids.size){Array.new(feature_ids.size)}
       @data_entries[row][col] = v
     end
 
+    # merge dataset (i.e. append features)
+    def +(dataset)
+      bad_request_error "Dataset merge failed because the argument is not a OpenTox::Dataset but a #{dataset.class}" unless dataset.is_a? Dataset
+      bad_request_error "Dataset merge failed because compounds are unequal in datasets #{self.id} and #{dataset.id}" unless compound_ids == dataset.compound_ids
+      self.feature_ids ||= []
+      self.feature_ids = self.feature_ids + dataset.feature_ids
+      @data_entries ||= Array.new(compound_ids.size){[]}
+      @data_entries.each_with_index do |row,i|
+        @data_entries[i] = row + dataset.fingerprint(compounds[i])
+      end
+      self
+
+    end
+
     def fingerprint(compound)
       data_entries[compound_ids.index(compound.id)]
     end
@@ -232,24 +242,6 @@ module OpenTox
     #def self.from_sdf_file
     #end
 
-    def bulk_write
-      time = Time.now
-      # Workaround for mongo bulk insertions (insertion of single data_entries is far too slow)
-      # Skip ruby JSON serialisation:
-      #   - to_json is too slow to write to file
-      #   - json (or bson) serialisation is probably causing very long parse times of Mongo::BulkWrite, or any other ruby insert operation
-      # this method causes a noticeable overhead compared to direct string serialisation (e.g. total processing time 16" instead of 12" for rat fminer dataset), but it can be reused at different places
-      dataset_id = self.id.to_s
-      f = Tempfile.new("#{dataset_id}.json","/tmp")
-      f.puts @bulk.collect{|row| "{'dataset_id': {'$oid': '#{dataset_id}'},'compound_id': {'$oid': '#{row[0]}'}, 'feature_id': {'$oid': '#{row[1]}'}, 'value': #{row[2]}}"}.join("\n")
-      f.close
-      $logger.debug "Write JSON file: #{Time.now-time}"
-      # TODO DB name from config
-      puts `mongoimport --db opentox --collection data_entries --type json --file #{f.path}  2>&1`
-      $logger.debug "Bulk import: #{Time.now-time}"
-      @bulk = []
-    end
-
     def self.from_csv_file file, source=nil, bioassay=true
       source ||= file
       table = CSV.read file, :skip_blanks => true
diff --git a/lib/feature.rb b/lib/feature.rb
index de8e4c9..e565875 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -56,8 +56,9 @@ module OpenTox
   end
 
   class PhysChemDescriptor < NumericFeature
-    field :algorithm, type: String
+    field :algorithm, type: String, default: "OpenTox::Algorithm::Descriptor.physchem"
     field :parameters, type: Hash
+    field :creator, type: String
   end
 
 end
-- 
cgit v1.2.3