summaryrefslogtreecommitdiff
path: root/lib/import.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-04-13 18:18:36 +0200
committerChristoph Helma <helma@in-silico.ch>2016-04-13 18:18:36 +0200
commit64f1f32ced77afb278bdb7c27397c5299a73675c (patch)
treeb44cdc6c9533be8e33815fb16e83a341c35ea3d1 /lib/import.rb
parent815cf6ba1543fc323eb7cbd1202fadbf03bcfbca (diff)
improved enm import
Diffstat (limited to 'lib/import.rb')
-rw-r--r--lib/import.rb105
1 files changed, 58 insertions, 47 deletions
diff --git a/lib/import.rb b/lib/import.rb
index 86c633a..cf0855e 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -8,64 +8,75 @@ module OpenTox
def self.import
#get list of bundle URIs
bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
+ datasets = []
bundles.each do |bundle|
uri = bundle["URI"]
+ dataset = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"]
features = JSON.parse(RestClientWrapper.get(bundle["property"]+"?media=application%2Fjson"))["feature"]
nanoparticles.each do |np|
- nanoparticle = Nanoparticle.find_or_create_by(
- :name => np["values"]["https://data.enanomapper.net/identifier/name"],
- :source => np["compound"]["URI"],
- )
- nanoparticle.bundles << uri
- np["composition"].each do |comp|
- case comp["relation"]
- when "HAS_CORE"
- nanoparticle.core = comp["component"]["compound"]["URI"]
- when "HAS_COATING"
- nanoparticle.coating << comp["component"]["compound"]["URI"]
- end
- end if np["composition"]
- np["values"].each do |u,v|
- if u.match(/property/)
- name, unit, source = nil
- features.each do |uri,feat|
- if u.match(/#{uri}/)
- name = feat["title"]
- unit = feat["units"]
- source = uri
- end
+ nanoparticle = Nanoparticle.find_or_create_by(
+ :name => np["values"]["https://data.enanomapper.net/identifier/name"],
+ :source => np["compound"]["URI"],
+ )
+ dataset.data_entries[nanoparticle.id.to_s] ||= {}
+ nanoparticle.bundles << uri
+ nanoparticle.dataset_ids << dataset.id
+ np["composition"].each do |comp|
+ case comp["relation"]
+ when "HAS_CORE"
+ nanoparticle.core = comp["component"]["compound"]["URI"]
+ when "HAS_COATING"
+ nanoparticle.coating << comp["component"]["compound"]["URI"]
+ end
+ end if np["composition"]
+ np["values"].each do |u,v|
+ if u.match(/property/)
+ name, unit, source = nil
+ features.each do |uri,feat|
+ if u.match(/#{uri}/)
+ name = feat["title"]
+ unit = feat["units"]
+ source = uri
end
- feature = Feature.find_or_create_by(
- :name => name,
- :unit => unit,
- :source => source
- )
end
- v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array
+ feature = Feature.find_or_create_by(
+ :name => name,
+ :unit => unit,
+ :source => source
+ )
end
- nanoparticle.bundles.uniq!
- nanoparticle.physchem_descriptors.each{|f,v| v.uniq!}
- nanoparticle.toxicities.each{|f,v| v.uniq!}
- nanoparticle.save!
+ v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array
+ end
+ nanoparticle.bundles.uniq!
+ nanoparticle.physchem_descriptors.each{|f,v| v.uniq!}
+ #nanoparticle.toxicities.each{|f,v| v.uniq!}
+ nanoparticle.toxicities.each do |f,v|
+ dataset.data_entries[nanoparticle.id.to_s][f.to_s] ||= []
+ dataset.data_entries[nanoparticle.id.to_s][f.to_s] += v
+ end
+ nanoparticle.save
end
+ dataset.save
+ datasets << dataset
end
+ datasets.collect{|d| d.id}
+ end
- def self.dump
- #get list of bundle URIs
- `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json`
- json = JSON.parse File.read('./bundles.json')
- json["dataset"].each do |dataset|
- uri = dataset["URI"]
- id = uri.split("/").last
- `wget --header='accept:application/json' '#{uri}' -O 'bundle#{id}'`
- `wget --header='accept:application/json' '#{dataset["summary"]}' -O 'summary#{id}.json'`
- `wget --header='accept:application/json' '#{dataset["compound"]}' -O 'compound#{id}.json'`
- `wget --header='accept:application/json' '#{dataset["substance"]}' -O 'substance#{id}.json'`
- `wget --header='accept:application/json' '#{dataset["property"]}' -O 'property#{id}.json'`
- `wget --header='accept:application/json' '#{dataset["dataset"]}' -O 'dataset#{id}.json'`
- `wget --header='accept:application/json' '#{dataset["matrix"]}' -O 'matrix#{id}.json'`
- end
+ def self.dump
+ #get list of bundle URIs
+ `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json`
+ json = JSON.parse File.read('./bundles.json')
+ json["dataset"].each do |dataset|
+ uri = dataset["URI"]
+ id = uri.split("/").last
+ `wget --header='accept:application/json' '#{uri}' -O 'bundle#{id}'`
+ `wget --header='accept:application/json' '#{dataset["summary"]}' -O 'summary#{id}.json'`
+ `wget --header='accept:application/json' '#{dataset["compound"]}' -O 'compound#{id}.json'`
+ `wget --header='accept:application/json' '#{dataset["substance"]}' -O 'substance#{id}.json'`
+ `wget --header='accept:application/json' '#{dataset["property"]}' -O 'property#{id}.json'`
+ `wget --header='accept:application/json' '#{dataset["dataset"]}' -O 'dataset#{id}.json'`
+ `wget --header='accept:application/json' '#{dataset["matrix"]}' -O 'matrix#{id}.json'`
end
end