From 64f1f32ced77afb278bdb7c27397c5299a73675c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 13 Apr 2016 18:18:36 +0200 Subject: improved enm import --- lib/import.rb | 105 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 58 insertions(+), 47 deletions(-) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index 86c633a..cf0855e 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -8,64 +8,75 @@ module OpenTox def self.import #get list of bundle URIs bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] + datasets = [] bundles.each do |bundle| uri = bundle["URI"] + dataset = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"]) nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"] features = JSON.parse(RestClientWrapper.get(bundle["property"]+"?media=application%2Fjson"))["feature"] nanoparticles.each do |np| - nanoparticle = Nanoparticle.find_or_create_by( - :name => np["values"]["https://data.enanomapper.net/identifier/name"], - :source => np["compound"]["URI"], - ) - nanoparticle.bundles << uri - np["composition"].each do |comp| - case comp["relation"] - when "HAS_CORE" - nanoparticle.core = comp["component"]["compound"]["URI"] - when "HAS_COATING" - nanoparticle.coating << comp["component"]["compound"]["URI"] - end - end if np["composition"] - np["values"].each do |u,v| - if u.match(/property/) - name, unit, source = nil - features.each do |uri,feat| - if u.match(/#{uri}/) - name = feat["title"] - unit = feat["units"] - source = uri - end + nanoparticle = Nanoparticle.find_or_create_by( + :name => np["values"]["https://data.enanomapper.net/identifier/name"], + :source => np["compound"]["URI"], + ) + dataset.data_entries[nanoparticle.id.to_s] ||= {} + nanoparticle.bundles << uri + nanoparticle.dataset_ids << dataset.id + np["composition"].each do |comp| + case comp["relation"] + when "HAS_CORE" + nanoparticle.core = comp["component"]["compound"]["URI"] + when "HAS_COATING" + nanoparticle.coating << comp["component"]["compound"]["URI"] + end + end if np["composition"] + np["values"].each do |u,v| + if u.match(/property/) + name, unit, source = nil + features.each do |uri,feat| + if u.match(/#{uri}/) + name = feat["title"] + unit = feat["units"] + source = uri end - feature = Feature.find_or_create_by( - :name => name, - :unit => unit, - :source => source - ) end - v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array + feature = Feature.find_or_create_by( + :name => name, + :unit => unit, + :source => source + ) end - nanoparticle.bundles.uniq! - nanoparticle.physchem_descriptors.each{|f,v| v.uniq!} - nanoparticle.toxicities.each{|f,v| v.uniq!} - nanoparticle.save! + v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array + end + nanoparticle.bundles.uniq! + nanoparticle.physchem_descriptors.each{|f,v| v.uniq!} + #nanoparticle.toxicities.each{|f,v| v.uniq!} + nanoparticle.toxicities.each do |f,v| + dataset.data_entries[nanoparticle.id.to_s][f.to_s] ||= [] + dataset.data_entries[nanoparticle.id.to_s][f.to_s] += v + end + nanoparticle.save end + dataset.save + datasets << dataset end + datasets.collect{|d| d.id} + end - def self.dump - #get list of bundle URIs - `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json` - json = JSON.parse File.read('./bundles.json') - json["dataset"].each do |dataset| - uri = dataset["URI"] - id = uri.split("/").last - `wget --header='accept:application/json' '#{uri}' -O 'bundle#{id}'` - `wget --header='accept:application/json' '#{dataset["summary"]}' -O 'summary#{id}.json'` - `wget --header='accept:application/json' '#{dataset["compound"]}' -O 'compound#{id}.json'` - `wget --header='accept:application/json' '#{dataset["substance"]}' -O 'substance#{id}.json'` - `wget --header='accept:application/json' '#{dataset["property"]}' -O 'property#{id}.json'` - `wget --header='accept:application/json' '#{dataset["dataset"]}' -O 'dataset#{id}.json'` - `wget --header='accept:application/json' '#{dataset["matrix"]}' -O 'matrix#{id}.json'` - end + def self.dump + #get list of bundle URIs + `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json` + json = JSON.parse File.read('./bundles.json') + json["dataset"].each do |dataset| + uri = dataset["URI"] + id = uri.split("/").last + `wget --header='accept:application/json' '#{uri}' -O 'bundle#{id}'` + `wget --header='accept:application/json' '#{dataset["summary"]}' -O 'summary#{id}.json'` + `wget --header='accept:application/json' '#{dataset["compound"]}' -O 'compound#{id}.json'` + `wget --header='accept:application/json' '#{dataset["substance"]}' -O 'substance#{id}.json'` + `wget --header='accept:application/json' '#{dataset["property"]}' -O 'property#{id}.json'` + `wget --header='accept:application/json' '#{dataset["dataset"]}' -O 'dataset#{id}.json'` + `wget --header='accept:application/json' '#{dataset["matrix"]}' -O 'matrix#{id}.json'` end end -- cgit v1.2.3