summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-04-13 18:18:36 +0200
committerChristoph Helma <helma@in-silico.ch>2016-04-13 18:18:36 +0200
commit64f1f32ced77afb278bdb7c27397c5299a73675c (patch)
treeb44cdc6c9533be8e33815fb16e83a341c35ea3d1
parent815cf6ba1543fc323eb7cbd1202fadbf03bcfbca (diff)
improved enm import
-rw-r--r--.gitignore5
-rw-r--r--lib/compound.rb2
-rw-r--r--lib/import.rb105
-rw-r--r--lib/lazar.rb1
-rw-r--r--lib/nanoparticle.rb1
-rw-r--r--lib/substance.rb5
-rw-r--r--test/nanoparticles.rb6
7 files changed, 67 insertions, 58 deletions
diff --git a/.gitignore b/.gitignore
index 791dc27..fb51df7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,4 @@
-last-utils
-libfminer
openbabel
-fminer_debug.txt
-test/fminer_debug.txt
Gemfile.lock
*.gem
.bundle
@@ -11,3 +7,4 @@ pkg/*
.yardoc/
doc/
lazar.log
+data
diff --git a/lib/compound.rb b/lib/compound.rb
index 757ba1a..7895619 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -17,8 +17,6 @@ module OpenTox
field :sdf_id, type: BSON::ObjectId
field :fingerprints, type: Hash, default: {}
field :default_fingerprint_size, type: Integer
- field :physchem_descriptors, type: Hash, default: {}
- field :dataset_ids, type: Array, default: []
# TODO separate between physchem, bio and tox
field :features, type: Hash, default: {}
diff --git a/lib/import.rb b/lib/import.rb
index 86c633a..cf0855e 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -8,64 +8,75 @@ module OpenTox
def self.import
#get list of bundle URIs
bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
+ datasets = []
bundles.each do |bundle|
uri = bundle["URI"]
+ dataset = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"]
features = JSON.parse(RestClientWrapper.get(bundle["property"]+"?media=application%2Fjson"))["feature"]
nanoparticles.each do |np|
- nanoparticle = Nanoparticle.find_or_create_by(
- :name => np["values"]["https://data.enanomapper.net/identifier/name"],
- :source => np["compound"]["URI"],
- )
- nanoparticle.bundles << uri
- np["composition"].each do |comp|
- case comp["relation"]
- when "HAS_CORE"
- nanoparticle.core = comp["component"]["compound"]["URI"]
- when "HAS_COATING"
- nanoparticle.coating << comp["component"]["compound"]["URI"]
- end
- end if np["composition"]
- np["values"].each do |u,v|
- if u.match(/property/)
- name, unit, source = nil
- features.each do |uri,feat|
- if u.match(/#{uri}/)
- name = feat["title"]
- unit = feat["units"]
- source = uri
- end
+ nanoparticle = Nanoparticle.find_or_create_by(
+ :name => np["values"]["https://data.enanomapper.net/identifier/name"],
+ :source => np["compound"]["URI"],
+ )
+ dataset.data_entries[nanoparticle.id.to_s] ||= {}
+ nanoparticle.bundles << uri
+ nanoparticle.dataset_ids << dataset.id
+ np["composition"].each do |comp|
+ case comp["relation"]
+ when "HAS_CORE"
+ nanoparticle.core = comp["component"]["compound"]["URI"]
+ when "HAS_COATING"
+ nanoparticle.coating << comp["component"]["compound"]["URI"]
+ end
+ end if np["composition"]
+ np["values"].each do |u,v|
+ if u.match(/property/)
+ name, unit, source = nil
+ features.each do |uri,feat|
+ if u.match(/#{uri}/)
+ name = feat["title"]
+ unit = feat["units"]
+ source = uri
end
- feature = Feature.find_or_create_by(
- :name => name,
- :unit => unit,
- :source => source
- )
end
- v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array
+ feature = Feature.find_or_create_by(
+ :name => name,
+ :unit => unit,
+ :source => source
+ )
end
- nanoparticle.bundles.uniq!
- nanoparticle.physchem_descriptors.each{|f,v| v.uniq!}
- nanoparticle.toxicities.each{|f,v| v.uniq!}
- nanoparticle.save!
+ v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array
+ end
+ nanoparticle.bundles.uniq!
+ nanoparticle.physchem_descriptors.each{|f,v| v.uniq!}
+ #nanoparticle.toxicities.each{|f,v| v.uniq!}
+ nanoparticle.toxicities.each do |f,v|
+ dataset.data_entries[nanoparticle.id.to_s][f.to_s] ||= []
+ dataset.data_entries[nanoparticle.id.to_s][f.to_s] += v
+ end
+ nanoparticle.save
end
+ dataset.save
+ datasets << dataset
end
+ datasets.collect{|d| d.id}
+ end
- def self.dump
- #get list of bundle URIs
- `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json`
- json = JSON.parse File.read('./bundles.json')
- json["dataset"].each do |dataset|
- uri = dataset["URI"]
- id = uri.split("/").last
- `wget --header='accept:application/json' '#{uri}' -O 'bundle#{id}'`
- `wget --header='accept:application/json' '#{dataset["summary"]}' -O 'summary#{id}.json'`
- `wget --header='accept:application/json' '#{dataset["compound"]}' -O 'compound#{id}.json'`
- `wget --header='accept:application/json' '#{dataset["substance"]}' -O 'substance#{id}.json'`
- `wget --header='accept:application/json' '#{dataset["property"]}' -O 'property#{id}.json'`
- `wget --header='accept:application/json' '#{dataset["dataset"]}' -O 'dataset#{id}.json'`
- `wget --header='accept:application/json' '#{dataset["matrix"]}' -O 'matrix#{id}.json'`
- end
+ def self.dump
+ #get list of bundle URIs
+ `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json`
+ json = JSON.parse File.read('./bundles.json')
+ json["dataset"].each do |dataset|
+ uri = dataset["URI"]
+ id = uri.split("/").last
+ `wget --header='accept:application/json' '#{uri}' -O 'bundle#{id}'`
+ `wget --header='accept:application/json' '#{dataset["summary"]}' -O 'summary#{id}.json'`
+ `wget --header='accept:application/json' '#{dataset["compound"]}' -O 'compound#{id}.json'`
+ `wget --header='accept:application/json' '#{dataset["substance"]}' -O 'substance#{id}.json'`
+ `wget --header='accept:application/json' '#{dataset["property"]}' -O 'property#{id}.json'`
+ `wget --header='accept:application/json' '#{dataset["dataset"]}' -O 'dataset#{id}.json'`
+ `wget --header='accept:application/json' '#{dataset["matrix"]}' -O 'matrix#{id}.json'`
end
end
diff --git a/lib/lazar.rb b/lib/lazar.rb
index a1ad551..8eb46e0 100644
--- a/lib/lazar.rb
+++ b/lib/lazar.rb
@@ -71,6 +71,7 @@ CLASSES = ["Feature","Substance","Dataset","LazarPrediction","Validation","Cross
"opentox.rb",
"feature.rb",
"physchem.rb",
+ "substance.rb",
"compound.rb",
"nanoparticle.rb",
"dataset.rb",
diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index c58dc8c..6e9b0ea 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -6,7 +6,6 @@ module OpenTox
field :core, type: String
field :coating, type: Array, default: []
- field :physchem_descriptors, type: Hash, default: {}
field :toxicities, type: Hash, default: {}
#field :features, type: Hash, default: {}
field :bundles, type: Array, default: []
diff --git a/lib/substance.rb b/lib/substance.rb
index a5b9825..6768ce7 100644
--- a/lib/substance.rb
+++ b/lib/substance.rb
@@ -1,9 +1,8 @@
module OpenTox
class Substance
- include OpenTox
- include Mongoid::Document
- include Mongoid::Timestamps
+ field :physchem_descriptors, type: Hash, default: {}
+ field :dataset_ids, type: Array, default: []
end
end
diff --git a/test/nanoparticles.rb b/test/nanoparticles.rb
index 8a6836c..6f241ec 100644
--- a/test/nanoparticles.rb
+++ b/test/nanoparticles.rb
@@ -3,8 +3,12 @@ require_relative "setup.rb"
class NanoparticleTest < MiniTest::Test
def test_import
- Import::Enanomapper.import
+ dataset_ids = Import::Enanomapper.import
assert_operator Nanoparticle.count , :>, 570, "Only #{Nanoparticle.count} nanoparticles imported"
+ assert_operator dataset_ids.size, :>, 8, "Only #{dataset_ids.size} bundles imported"
+ p dataset_ids.collect{|d| Dataset.find(d).name}
+ assert dataset_ids.collect{|d| Dataset.find(d).name}.include? ("NanoWiki")
+ assert dataset_ids.collect{|d| Dataset.find(d).name}.include? ("Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
end
def test_create_model