From 063acd4dc63e9287287cc1ff78fff2064ff74e4f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 7 Apr 2016 17:39:14 +0200 Subject: initial ambit import --- data/enm-dump.rb | 16 ++++++++++++++++ data/enm-import.rb | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/dataset.rb | 1 - lib/feature.rb | 1 + lib/lazar.rb | 3 ++- lib/nanoparticle.rb | 17 +++++++++++++++++ lib/opentox.rb | 1 + 7 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 data/enm-dump.rb create mode 100644 data/enm-import.rb create mode 100644 lib/nanoparticle.rb diff --git a/data/enm-dump.rb b/data/enm-dump.rb new file mode 100644 index 0000000..c1c25e7 --- /dev/null +++ b/data/enm-dump.rb @@ -0,0 +1,16 @@ +require 'json' + +#get list of bundle URIs +`wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json` +json = JSON.parse File.read('./bundles.json') +json["dataset"].each do |dataset| + uri = dataset["URI"] + id = uri.split("/").last + `wget --header='accept:application/json' '#{uri}' -O 'bundle#{id}'` + `wget --header='accept:application/json' '#{dataset["summary"]}' -O 'summary#{id}.json'` + `wget --header='accept:application/json' '#{dataset["compound"]}' -O 'compound#{id}.json'` + `wget --header='accept:application/json' '#{dataset["substance"]}' -O 'substance#{id}.json'` + `wget --header='accept:application/json' '#{dataset["property"]}' -O 'property#{id}.json'` + `wget --header='accept:application/json' '#{dataset["dataset"]}' -O 'dataset#{id}.json'` + `wget --header='accept:application/json' '#{dataset["matrix"]}' -O 'matrix#{id}.json'` +end diff --git a/data/enm-import.rb b/data/enm-import.rb new file mode 100644 index 0000000..65fd3c7 --- /dev/null +++ b/data/enm-import.rb @@ -0,0 +1,50 @@ +require_relative '../lib/lazar.rb' +include OpenTox + + +#get list of bundle URIs +bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] +bundles.each do |bundle| + uri = bundle["URI"] + nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"] + features = JSON.parse(RestClientWrapper.get(bundle["property"]+"?media=application%2Fjson"))["feature"] + nanoparticles.each do |np| + nanoparticle = Nanoparticle.find_or_create_by( + :name => np["values"]["https://data.enanomapper.net/identifier/name"], + :source => np["compound"]["URI"], + ) + np["composition"].each do |comp| + case comp["relation"] + when "HAS_CORE" + nanoparticle[:core] = comp["component"]["compound"]["URI"] + when "HAS_COATING" + nanoparticle[:coating] ||= [] + nanoparticle[:coating] << comp["component"]["compound"]["URI"] + end + end if np["composition"] + np["values"].each do |u,v| + if u.match(/property/) + name, unit = nil + features.each do |uri,feat| + if u.match(/#{uri}/) + name = feat["title"] + unit = feat["units"] + end + end + feature = Feature.find_or_create_by( + :name => name, + :unit => unit, + #:source => uri + ) + nanoparticle[:features] ||= {} + if v.size == 1 and v.first.keys == ["loValue"] + nanoparticle[:features][feature.id] = v.first["loValue"] + else + #TODO + end + end + end + p nanoparticle + nanoparticle.save + end +end diff --git a/lib/dataset.rb b/lib/dataset.rb index 5d8aeaf..2e48626 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -9,7 +9,6 @@ module OpenTox field :feature_ids, type: Array, default: [] field :compound_ids, type: Array, default: [] field :data_entries, type: Array, default: [] - field :source, type: String # Readers diff --git a/lib/feature.rb b/lib/feature.rb index b58946b..f13a3fb 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -6,6 +6,7 @@ module OpenTox field :numeric, type: Boolean field :measured, type: Boolean field :calculated, type: Boolean + field :unit, type: String end # Feature for categorical variables diff --git a/lib/lazar.rb b/lib/lazar.rb index a28ba3a..39dd8fa 100644 --- a/lib/lazar.rb +++ b/lib/lazar.rb @@ -61,7 +61,7 @@ suppressPackageStartupMessages({ " # OpenTox classes and includes -CLASSES = ["Feature","Compound","Dataset","Validation","CrossValidation","LeaveOneOutValidation","RepeatedCrossValidation","Experiment"]# Algorithm and Models are modules +CLASSES = ["Feature","Compound","Dataset","Validation","CrossValidation","LeaveOneOutValidation","RepeatedCrossValidation","Experiment","Nanoparticle"]# Algorithm and Models are modules [ # be aware of the require sequence as it affects class/method overwrites "overwrite.rb", @@ -71,6 +71,7 @@ CLASSES = ["Feature","Compound","Dataset","Validation","CrossValidation","LeaveO "feature.rb", "physchem.rb", "compound.rb", + "nanoparticle.rb", "dataset.rb", "algorithm.rb", "model.rb", diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb new file mode 100644 index 0000000..3783ece --- /dev/null +++ b/lib/nanoparticle.rb @@ -0,0 +1,17 @@ +module OpenTox + + class Nanoparticle + include OpenTox + + field :particle_id, type: String + field :core, type: String + field :coatings, type: Array + + #field :physchem_descriptors, type: Hash, default: {} + #field :toxicities, type: Hash, default: {} + field :features, type: Hash, default: {} + + end +end + + diff --git a/lib/opentox.rb b/lib/opentox.rb index 186c87a..cc18cc6 100644 --- a/lib/opentox.rb +++ b/lib/opentox.rb @@ -13,6 +13,7 @@ module OpenTox include Mongoid::Timestamps store_in collection: klass.downcase.pluralize field :name, type: String + field :source, type: String field :warnings, type: Array, default: [] end OpenTox.const_set klass,c -- cgit v1.2.3