summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-07-31 10:59:53 +0200
committerChristoph Helma <helma@in-silico.ch>2015-07-31 10:59:53 +0200
commite68c69f90036bb7c47c57acb1ee3652b73c835c1 (patch)
tree7ddbf29b2a1e48d55e2a851940f5d5949bab3d92
parent364139c560748bf266197e07f303af9028f713ec (diff)
descriptor tests working
-rw-r--r--lib/compound.rb13
-rw-r--r--lib/dataset.rb38
-rw-r--r--lib/feature.rb3
3 files changed, 24 insertions, 30 deletions
diff --git a/lib/compound.rb b/lib/compound.rb
index 93f609f..4d36915 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -73,12 +73,13 @@ module OpenTox
# Get sdf
# @return [String] SDF string
def sdf
- if sdf_id.nil?
+ if self.sdf_id.nil?
sdf = obconversion(inchi,"inchi","sdf")
file = Mongo::Grid::File.new(sdf, :filename => "#{id}.sdf",:content_type => "chemical/x-mdl-sdfile")
sdf_id = $gridfs.insert_one file
+ update :sdf_id => sdf_id
end
- $gridfs.find_one(_id: sdf_id).data
+ $gridfs.find_one(_id: self.sdf_id).data
end
# Get png image
@@ -86,12 +87,12 @@ module OpenTox
# image = compound.png
# @return [image/png] Image data
def png
- if image_id.nil?
+ if self.image_id.nil?
png = obconversion(inchi,"inchi","_png2")
file = Mongo::Grid::File.new(Base64.encode64(png), :filename => "#{id}.png", :content_type => "image/png")
update(:image_id => $gridfs.insert_one(file))
end
- Base64.decode64($gridfs.find_one(_id: image_id).data)
+ Base64.decode64($gridfs.find_one(_id: self.image_id).data)
end
@@ -134,11 +135,11 @@ module OpenTox
OpenBabel::OBOp.find_type("Gen3D").do(obmol)
sdf = obconversion.write_string(obmol)
if sdf.match(/.nan/)
- $logger.warn "3D generation failed for compound #{compound.inchi}, trying to calculate 2D structure"
+ $logger.warn "3D generation failed for compound #{identifier}, trying to calculate 2D structure"
OpenBabel::OBOp.find_type("Gen2D").do(obmol)
sdf = obconversion.write_string(obmol)
if sdf.match(/.nan/)
- $logger.warn "2D generation failed for compound #{compound.inchi}"
+ $logger.warn "2D generation failed for compound #{identifier}"
sdf = nil
end
end
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 1392de5..92ef7b5 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -18,7 +18,7 @@ module OpenTox
include Mongoid::Document
attr_accessor :bulk
- #attr_writer :data_entries
+ attr_writer :data_entries
# associations like has_many, belongs_to deteriorate performance
field :feature_ids, type: Array, default: []
@@ -62,19 +62,29 @@ module OpenTox
end
def [](row,col)
- #bad_request_error "Incorrect parameter type. The first argument is a OpenTox::Compound the second a OpenTox::Feature." unless compound.is_a? Compound and feature.is_a? Feature
- #DataEntry.where(dataset_id: self.id, compound_id: compound.id, feature_id: feature.id).distinct(:value).first
- #data_entries[compound_ids.index(compound.id)][feature_ids.index(feature.id)]
@data_entries[row,col]
end
def []=(row,col,v)
@data_entries ||= []
@data_entries[row] ||= []
- #@data_entries ||= Array.new(compound_ids.size){Array.new(feature_ids.size)}
@data_entries[row][col] = v
end
+ # merge dataset (i.e. append features)
+ def +(dataset)
+ bad_request_error "Dataset merge failed because the argument is not a OpenTox::Dataset but a #{dataset.class}" unless dataset.is_a? Dataset
+ bad_request_error "Dataset merge failed because compounds are unequal in datasets #{self.id} and #{dataset.id}" unless compound_ids == dataset.compound_ids
+ self.feature_ids ||= []
+ self.feature_ids = self.feature_ids + dataset.feature_ids
+ @data_entries ||= Array.new(compound_ids.size){[]}
+ @data_entries.each_with_index do |row,i|
+ @data_entries[i] = row + dataset.fingerprint(compounds[i])
+ end
+ self
+
+ end
+
def fingerprint(compound)
data_entries[compound_ids.index(compound.id)]
end
@@ -232,24 +242,6 @@ module OpenTox
#def self.from_sdf_file
#end
- def bulk_write
- time = Time.now
- # Workaround for mongo bulk insertions (insertion of single data_entries is far too slow)
- # Skip ruby JSON serialisation:
- # - to_json is too slow to write to file
- # - json (or bson) serialisation is probably causing very long parse times of Mongo::BulkWrite, or any other ruby insert operation
- # this method causes a noticeable overhead compared to direct string serialisation (e.g. total processing time 16" instead of 12" for rat fminer dataset), but it can be reused at different places
- dataset_id = self.id.to_s
- f = Tempfile.new("#{dataset_id}.json","/tmp")
- f.puts @bulk.collect{|row| "{'dataset_id': {'$oid': '#{dataset_id}'},'compound_id': {'$oid': '#{row[0]}'}, 'feature_id': {'$oid': '#{row[1]}'}, 'value': #{row[2]}}"}.join("\n")
- f.close
- $logger.debug "Write JSON file: #{Time.now-time}"
- # TODO DB name from config
- puts `mongoimport --db opentox --collection data_entries --type json --file #{f.path} 2>&1`
- $logger.debug "Bulk import: #{Time.now-time}"
- @bulk = []
- end
-
def self.from_csv_file file, source=nil, bioassay=true
source ||= file
table = CSV.read file, :skip_blanks => true
diff --git a/lib/feature.rb b/lib/feature.rb
index de8e4c9..e565875 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -56,8 +56,9 @@ module OpenTox
end
class PhysChemDescriptor < NumericFeature
- field :algorithm, type: String
+ field :algorithm, type: String, default: "OpenTox::Algorithm::Descriptor.physchem"
field :parameters, type: Hash
+ field :creator, type: String
end
end