From 791398c12af4f8290095425dac87e3c852905ab6 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sat, 7 Sep 2019 18:20:10 +0200 Subject: obsolete data and java directories deleted --- lib/dataset.rb | 55 +------------------------------------------------------ 1 file changed, 1 insertion(+), 54 deletions(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index 8cb343f..87e7fef 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -68,63 +68,10 @@ class Dataset @independent_variable_names = ["Canonical Smiles"] + fingerprints.flatten.sort.uniq print_variables end + end =begin - # Create a dataset from SDF file - # files with a single data field are read as BioActivities (i.e. dependent variable) - # files with multiple data fields are read as SubstanceProperties (i.e. independent variable) - # @param [File] - # @return [OpenTox::Dataset] - def self.from_sdf_file file - md5 = Digest::MD5.hexdigest(File.read(file)) # use hash to identify identical files - dataset = self.find_by(:md5 => md5) - if dataset - $logger.debug "Found #{file} in the database (id: #{dataset.id}, md5: #{dataset.md5}), skipping import." - else - $logger.debug "Parsing #{file}." - - dataset = self.new(:source => file, :name => File.basename(file,".*"), :md5 => md5) - original_id = OriginalId.find_or_create_by(:dataset_id => dataset.id,:name => dataset.name+".ID") - - read_result = false - sdf = "" - feature_name = "" - compound = nil - features = {} - table = [["ID","SMILES"]] - - File.readlines(file).each do |line| - if line.match %r{\$\$\$\$} - sdf << line - id = sdf.split("\n").first.chomp - compound = Compound.from_sdf sdf - row = [id,compound.smiles] - features.each do |f,v| - table[0] << f unless table[0].include? f - row[table[0].index(f)] = v - end - table << row - sdf = "" - features = {} - elsif line.match /^>\s+\s+<(.*)>/)[1] - read_result = true - else - if read_result - value = line.chomp - features[feature_name] = value - read_result = false - else - sdf << line - end - end - end - dataset.parse_table table - end - dataset - end - # Create a dataset from PubChem Assay # @param [Integer] PubChem AssayID (AID) # @return [OpenTox::Dataset] -- cgit v1.2.3