summaryrefslogtreecommitdiff
path: root/lib/dataset.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/dataset.rb')
-rw-r--r--lib/dataset.rb55
1 files changed, 1 insertions, 54 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 8cb343f..87e7fef 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -68,63 +68,10 @@ class Dataset
@independent_variable_names = ["Canonical Smiles"] + fingerprints.flatten.sort.uniq
print_variables
end
+
end
=begin
- # Create a dataset from SDF file
- # files with a single data field are read as BioActivities (i.e. dependent variable)
- # files with multiple data fields are read as SubstanceProperties (i.e. independent variable)
- # @param [File]
- # @return [OpenTox::Dataset]
- def self.from_sdf_file file
- md5 = Digest::MD5.hexdigest(File.read(file)) # use hash to identify identical files
- dataset = self.find_by(:md5 => md5)
- if dataset
- $logger.debug "Found #{file} in the database (id: #{dataset.id}, md5: #{dataset.md5}), skipping import."
- else
- $logger.debug "Parsing #{file}."
-
- dataset = self.new(:source => file, :name => File.basename(file,".*"), :md5 => md5)
- original_id = OriginalId.find_or_create_by(:dataset_id => dataset.id,:name => dataset.name+".ID")
-
- read_result = false
- sdf = ""
- feature_name = ""
- compound = nil
- features = {}
- table = [["ID","SMILES"]]
-
- File.readlines(file).each do |line|
- if line.match %r{\$\$\$\$}
- sdf << line
- id = sdf.split("\n").first.chomp
- compound = Compound.from_sdf sdf
- row = [id,compound.smiles]
- features.each do |f,v|
- table[0] << f unless table[0].include? f
- row[table[0].index(f)] = v
- end
- table << row
- sdf = ""
- features = {}
- elsif line.match /^>\s+</
- feature_name = line.match(/^>\s+<(.*)>/)[1]
- read_result = true
- else
- if read_result
- value = line.chomp
- features[feature_name] = value
- read_result = false
- else
- sdf << line
- end
- end
- end
- dataset.parse_table table
- end
- dataset
- end
-
# Create a dataset from PubChem Assay
# @param [Integer] PubChem AssayID (AID)
# @return [OpenTox::Dataset]