From 515e644423998a94f07be06bf6460bcf4f96f968 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 8 Apr 2016 13:05:52 +0200 Subject: enm import test --- lib/import.rb | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 lib/import.rb (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb new file mode 100644 index 0000000..86c633a --- /dev/null +++ b/lib/import.rb @@ -0,0 +1,77 @@ +module OpenTox + + module Import + + class Enanomapper + include OpenTox + + def self.import + #get list of bundle URIs + bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] + bundles.each do |bundle| + uri = bundle["URI"] + nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"] + features = JSON.parse(RestClientWrapper.get(bundle["property"]+"?media=application%2Fjson"))["feature"] + nanoparticles.each do |np| + nanoparticle = Nanoparticle.find_or_create_by( + :name => np["values"]["https://data.enanomapper.net/identifier/name"], + :source => np["compound"]["URI"], + ) + nanoparticle.bundles << uri + np["composition"].each do |comp| + case comp["relation"] + when "HAS_CORE" + nanoparticle.core = comp["component"]["compound"]["URI"] + when "HAS_COATING" + nanoparticle.coating << comp["component"]["compound"]["URI"] + end + end if np["composition"] + np["values"].each do |u,v| + if u.match(/property/) + name, unit, source = nil + features.each do |uri,feat| + if u.match(/#{uri}/) + name = feat["title"] + unit = feat["units"] + source = uri + end + end + feature = Feature.find_or_create_by( + :name => name, + :unit => unit, + :source => source + ) + end + v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array + end + nanoparticle.bundles.uniq! + nanoparticle.physchem_descriptors.each{|f,v| v.uniq!} + nanoparticle.toxicities.each{|f,v| v.uniq!} + nanoparticle.save! + end + end + + def self.dump + #get list of bundle URIs + `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json` + json = JSON.parse File.read('./bundles.json') + json["dataset"].each do |dataset| + uri = dataset["URI"] + id = uri.split("/").last + `wget --header='accept:application/json' '#{uri}' -O 'bundle#{id}'` + `wget --header='accept:application/json' '#{dataset["summary"]}' -O 'summary#{id}.json'` + `wget --header='accept:application/json' '#{dataset["compound"]}' -O 'compound#{id}.json'` + `wget --header='accept:application/json' '#{dataset["substance"]}' -O 'substance#{id}.json'` + `wget --header='accept:application/json' '#{dataset["property"]}' -O 'property#{id}.json'` + `wget --header='accept:application/json' '#{dataset["dataset"]}' -O 'dataset#{id}.json'` + `wget --header='accept:application/json' '#{dataset["matrix"]}' -O 'matrix#{id}.json'` + end + end + end + + end + + end + +end + -- cgit v1.2.3 From 64f1f32ced77afb278bdb7c27397c5299a73675c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 13 Apr 2016 18:18:36 +0200 Subject: improved enm import --- lib/import.rb | 105 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 58 insertions(+), 47 deletions(-) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index 86c633a..cf0855e 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -8,64 +8,75 @@ module OpenTox def self.import #get list of bundle URIs bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] + datasets = [] bundles.each do |bundle| uri = bundle["URI"] + dataset = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"]) nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"] features = JSON.parse(RestClientWrapper.get(bundle["property"]+"?media=application%2Fjson"))["feature"] nanoparticles.each do |np| - nanoparticle = Nanoparticle.find_or_create_by( - :name => np["values"]["https://data.enanomapper.net/identifier/name"], - :source => np["compound"]["URI"], - ) - nanoparticle.bundles << uri - np["composition"].each do |comp| - case comp["relation"] - when "HAS_CORE" - nanoparticle.core = comp["component"]["compound"]["URI"] - when "HAS_COATING" - nanoparticle.coating << comp["component"]["compound"]["URI"] - end - end if np["composition"] - np["values"].each do |u,v| - if u.match(/property/) - name, unit, source = nil - features.each do |uri,feat| - if u.match(/#{uri}/) - name = feat["title"] - unit = feat["units"] - source = uri - end + nanoparticle = Nanoparticle.find_or_create_by( + :name => np["values"]["https://data.enanomapper.net/identifier/name"], + :source => np["compound"]["URI"], + ) + dataset.data_entries[nanoparticle.id.to_s] ||= {} + nanoparticle.bundles << uri + nanoparticle.dataset_ids << dataset.id + np["composition"].each do |comp| + case comp["relation"] + when "HAS_CORE" + nanoparticle.core = comp["component"]["compound"]["URI"] + when "HAS_COATING" + nanoparticle.coating << comp["component"]["compound"]["URI"] + end + end if np["composition"] + np["values"].each do |u,v| + if u.match(/property/) + name, unit, source = nil + features.each do |uri,feat| + if u.match(/#{uri}/) + name = feat["title"] + unit = feat["units"] + source = uri end - feature = Feature.find_or_create_by( - :name => name, - :unit => unit, - :source => source - ) end - v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array + feature = Feature.find_or_create_by( + :name => name, + :unit => unit, + :source => source + ) end - nanoparticle.bundles.uniq! - nanoparticle.physchem_descriptors.each{|f,v| v.uniq!} - nanoparticle.toxicities.each{|f,v| v.uniq!} - nanoparticle.save! + v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array + end + nanoparticle.bundles.uniq! + nanoparticle.physchem_descriptors.each{|f,v| v.uniq!} + #nanoparticle.toxicities.each{|f,v| v.uniq!} + nanoparticle.toxicities.each do |f,v| + dataset.data_entries[nanoparticle.id.to_s][f.to_s] ||= [] + dataset.data_entries[nanoparticle.id.to_s][f.to_s] += v + end + nanoparticle.save end + dataset.save + datasets << dataset end + datasets.collect{|d| d.id} + end - def self.dump - #get list of bundle URIs - `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json` - json = JSON.parse File.read('./bundles.json') - json["dataset"].each do |dataset| - uri = dataset["URI"] - id = uri.split("/").last - `wget --header='accept:application/json' '#{uri}' -O 'bundle#{id}'` - `wget --header='accept:application/json' '#{dataset["summary"]}' -O 'summary#{id}.json'` - `wget --header='accept:application/json' '#{dataset["compound"]}' -O 'compound#{id}.json'` - `wget --header='accept:application/json' '#{dataset["substance"]}' -O 'substance#{id}.json'` - `wget --header='accept:application/json' '#{dataset["property"]}' -O 'property#{id}.json'` - `wget --header='accept:application/json' '#{dataset["dataset"]}' -O 'dataset#{id}.json'` - `wget --header='accept:application/json' '#{dataset["matrix"]}' -O 'matrix#{id}.json'` - end + def self.dump + #get list of bundle URIs + `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json` + json = JSON.parse File.read('./bundles.json') + json["dataset"].each do |dataset| + uri = dataset["URI"] + id = uri.split("/").last + `wget --header='accept:application/json' '#{uri}' -O 'bundle#{id}'` + `wget --header='accept:application/json' '#{dataset["summary"]}' -O 'summary#{id}.json'` + `wget --header='accept:application/json' '#{dataset["compound"]}' -O 'compound#{id}.json'` + `wget --header='accept:application/json' '#{dataset["substance"]}' -O 'substance#{id}.json'` + `wget --header='accept:application/json' '#{dataset["property"]}' -O 'property#{id}.json'` + `wget --header='accept:application/json' '#{dataset["dataset"]}' -O 'dataset#{id}.json'` + `wget --header='accept:application/json' '#{dataset["matrix"]}' -O 'matrix#{id}.json'` end end -- cgit v1.2.3 From 4662e845c12e3e623ec9bec208c42cd4b1886047 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 15 Apr 2016 14:58:17 +0200 Subject: enm study import --- lib/import.rb | 53 +++++++++++++++++++---------------------------------- 1 file changed, 19 insertions(+), 34 deletions(-) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index cf0855e..9091207 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -19,43 +19,28 @@ module OpenTox :name => np["values"]["https://data.enanomapper.net/identifier/name"], :source => np["compound"]["URI"], ) - dataset.data_entries[nanoparticle.id.to_s] ||= {} - nanoparticle.bundles << uri - nanoparticle.dataset_ids << dataset.id - np["composition"].each do |comp| - case comp["relation"] - when "HAS_CORE" - nanoparticle.core = comp["component"]["compound"]["URI"] - when "HAS_COATING" - nanoparticle.coating << comp["component"]["compound"]["URI"] - end - end if np["composition"] - np["values"].each do |u,v| - if u.match(/property/) - name, unit, source = nil - features.each do |uri,feat| - if u.match(/#{uri}/) - name = feat["title"] - unit = feat["units"] - source = uri - end - end - feature = Feature.find_or_create_by( - :name => name, - :unit => unit, - :source => source + dataset.substance_ids << nanoparticle.id + dataset.substance_ids.uniq! + studies = JSON.parse(RestClientWrapper.get(File.join(np["compound"]["URI"],"study")))["study"] + studies.each do |study| + study["effects"].each do |effect| + effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature + # TODO parse core/coating + # TODO parse proteomics, they come as a large textValue + $logger.debug File.join(np["compound"]["URI"],"study") + effect["conditions"].delete_if { |k, v| v.nil? } + feature = klass.find_or_create_by( + :source => File.join(np["compound"]["URI"],"study"), + :name => "#{study["protocol"]["category"]["title"]} #{study["protocol"]["endpoint"]}", + :unit => effect["result"]["unit"], + :category => study["protocol"]["topcategory"], + :conditions => effect["conditions"] ) + nanoparticle.parse_ambit_value feature, effect["result"] + dataset.feature_ids << feature.id + dataset.feature_ids.uniq! end - v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array - end - nanoparticle.bundles.uniq! - nanoparticle.physchem_descriptors.each{|f,v| v.uniq!} - #nanoparticle.toxicities.each{|f,v| v.uniq!} - nanoparticle.toxicities.each do |f,v| - dataset.data_entries[nanoparticle.id.to_s][f.to_s] ||= [] - dataset.data_entries[nanoparticle.id.to_s][f.to_s] += v end - nanoparticle.save end dataset.save datasets << dataset -- cgit v1.2.3 From cfc64a2966ab38698e499f0b44f41208ee77a07f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 26 Apr 2016 17:38:15 +0200 Subject: first nanomaterial prediction --- lib/import.rb | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index 9091207..3c1edfe 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -30,7 +30,7 @@ module OpenTox $logger.debug File.join(np["compound"]["URI"],"study") effect["conditions"].delete_if { |k, v| v.nil? } feature = klass.find_or_create_by( - :source => File.join(np["compound"]["URI"],"study"), + #:source => File.join(np["compound"]["URI"],"study"), :name => "#{study["protocol"]["category"]["title"]} #{study["protocol"]["endpoint"]}", :unit => effect["result"]["unit"], :category => study["protocol"]["topcategory"], @@ -48,6 +48,22 @@ module OpenTox datasets.collect{|d| d.id} end +=begin + def self.import_ld # defunct, AMBIT JSON_LD does not have substance entries + #get list of bundle URIs + bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] + datasets = [] + bundles.each do |bundle| + uri = bundle["URI"] + study = JSON.parse(`curl -H 'Accept:application/ld+json' '#{uri}/substance'`) + study["@graph"].each do |i| + puts i.to_yaml if i.keys.include? "sio:has-value" + end + end + datasets.collect{|d| d.id} + end +=end + def self.dump #get list of bundle URIs `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json` -- cgit v1.2.3 From ab7b37541b4f8a762be737009631d3eefd898b4a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 5 May 2016 16:14:02 +0200 Subject: ambit mirror, import from mirrored json, proteomics import --- lib/import.rb | 101 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 55 insertions(+), 46 deletions(-) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index 3c1edfe..11cb367 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -5,47 +5,73 @@ module OpenTox class Enanomapper include OpenTox - def self.import + def self.mirror dir="." #get list of bundle URIs bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] + File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)} datasets = [] bundles.each do |bundle| - uri = bundle["URI"] - dataset = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"]) nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"] - features = JSON.parse(RestClientWrapper.get(bundle["property"]+"?media=application%2Fjson"))["feature"] - nanoparticles.each do |np| - nanoparticle = Nanoparticle.find_or_create_by( - :name => np["values"]["https://data.enanomapper.net/identifier/name"], - :source => np["compound"]["URI"], - ) - dataset.substance_ids << nanoparticle.id - dataset.substance_ids.uniq! - studies = JSON.parse(RestClientWrapper.get(File.join(np["compound"]["URI"],"study")))["study"] + nanoparticles.each do |nanoparticle| + uuid = nanoparticle["values"]["https://data.enanomapper.net/identifier/uuid"] + $logger.debug uuid + File.open(File.join(dir,"nanoparticle-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(nanoparticle)} + studies = JSON.parse(RestClientWrapper.get(File.join(nanoparticle["compound"]["URI"],"study")))["study"] studies.each do |study| - study["effects"].each do |effect| - effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature - # TODO parse core/coating - # TODO parse proteomics, they come as a large textValue - $logger.debug File.join(np["compound"]["URI"],"study") - effect["conditions"].delete_if { |k, v| v.nil? } + File.open(File.join(dir,"study-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(study)} + end + end + end + end + + def self.import dir="." + datasets = {} + JSON.parse(File.read(File.join(dir,"bundles.json"))).each do |bundle| + datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"]) + end + Dir[File.join(dir,"study*.json")].each do |s| + study = JSON.parse(File.read(s)) + np = JSON.parse(File.read(File.join(dir,"nanoparticle-#{study['owner']['substance']['uuid']}.json"))) + nanoparticle = Nanoparticle.find_or_create_by( + :name => np["values"]["https://data.enanomapper.net/identifier/name"], + :source => np["compound"]["URI"], + ) + np["bundles"].keys.each do |bundle_uri| + datasets[bundle_uri].substance_ids << nanoparticle.id + nanoparticle["dataset_ids"] << datasets[bundle_uri].id + end + study["effects"].each do |effect| + effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature + # TODO parse core/coating + # TODO parse proteomics, they come as a large textValue + #$logger.debug File.join(np["compound"]["URI"],"study") + effect["conditions"].delete_if { |k, v| v.nil? } + # parse proteomics data + if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 + JSON.parse(effect["result"]["textValue"]).each do |identifier, value| feature = klass.find_or_create_by( - #:source => File.join(np["compound"]["URI"],"study"), - :name => "#{study["protocol"]["category"]["title"]} #{study["protocol"]["endpoint"]}", - :unit => effect["result"]["unit"], - :category => study["protocol"]["topcategory"], - :conditions => effect["conditions"] + :name => identifier, + :category => "Proteomics", ) - nanoparticle.parse_ambit_value feature, effect["result"] - dataset.feature_ids << feature.id - dataset.feature_ids.uniq! + nanoparticle.parse_ambit_value feature, value end + else + feature = klass.find_or_create_by( + :name => "#{study["protocol"]["category"]["title"]} #{study["protocol"]["endpoint"]}", + :unit => effect["result"]["unit"], + :category => study["protocol"]["topcategory"], + :conditions => effect["conditions"] + ) + nanoparticle.parse_ambit_value feature, effect["result"] end end - dataset.save - datasets << dataset + nanoparticle.save + end + datasets.each do |u,d| + d.feature_ids.uniq! + d.substance_ids.uniq! + d.save end - datasets.collect{|d| d.id} end =begin @@ -64,23 +90,6 @@ module OpenTox end =end - def self.dump - #get list of bundle URIs - `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json` - json = JSON.parse File.read('./bundles.json') - json["dataset"].each do |dataset| - uri = dataset["URI"] - id = uri.split("/").last - `wget --header='accept:application/json' '#{uri}' -O 'bundle#{id}'` - `wget --header='accept:application/json' '#{dataset["summary"]}' -O 'summary#{id}.json'` - `wget --header='accept:application/json' '#{dataset["compound"]}' -O 'compound#{id}.json'` - `wget --header='accept:application/json' '#{dataset["substance"]}' -O 'substance#{id}.json'` - `wget --header='accept:application/json' '#{dataset["property"]}' -O 'property#{id}.json'` - `wget --header='accept:application/json' '#{dataset["dataset"]}' -O 'dataset#{id}.json'` - `wget --header='accept:application/json' '#{dataset["matrix"]}' -O 'matrix#{id}.json'` - end - end - end end -- cgit v1.2.3 From 611bac891177f8d9185d45486dd574b6ef4d1912 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 9 May 2016 15:11:46 +0200 Subject: nanoparticle models fixed --- lib/import.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index 11cb367..dfe5e2d 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -40,10 +40,10 @@ module OpenTox datasets[bundle_uri].substance_ids << nanoparticle.id nanoparticle["dataset_ids"] << datasets[bundle_uri].id end + bundle = datasets[np["bundles"].keys.first].id if np["bundles"].size == 1 study["effects"].each do |effect| effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature # TODO parse core/coating - # TODO parse proteomics, they come as a large textValue #$logger.debug File.join(np["compound"]["URI"],"study") effect["conditions"].delete_if { |k, v| v.nil? } # parse proteomics data @@ -53,7 +53,7 @@ module OpenTox :name => identifier, :category => "Proteomics", ) - nanoparticle.parse_ambit_value feature, value + nanoparticle.parse_ambit_value feature, value, bundle end else feature = klass.find_or_create_by( @@ -62,7 +62,7 @@ module OpenTox :category => study["protocol"]["topcategory"], :conditions => effect["conditions"] ) - nanoparticle.parse_ambit_value feature, effect["result"] + nanoparticle.parse_ambit_value feature, effect["result"], bundle end end nanoparticle.save -- cgit v1.2.3 From b8bb12c8a163c238d7d4387c1914e2100bb660df Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 12 May 2016 15:23:01 +0200 Subject: enm study import fixed --- lib/import.rb | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index dfe5e2d..3c6966e 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -9,16 +9,18 @@ module OpenTox #get list of bundle URIs bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)} - datasets = [] bundles.each do |bundle| + p bundle["title"] nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"] + p nanoparticles.size nanoparticles.each do |nanoparticle| uuid = nanoparticle["values"]["https://data.enanomapper.net/identifier/uuid"] $logger.debug uuid File.open(File.join(dir,"nanoparticle-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(nanoparticle)} studies = JSON.parse(RestClientWrapper.get(File.join(nanoparticle["compound"]["URI"],"study")))["study"] + p uuid if studies.size < 1 studies.each do |study| - File.open(File.join(dir,"study-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(study)} + File.open(File.join(dir,"study-#{study["uuid"]}.json"),"w+"){|f| f.puts JSON.pretty_generate(study)} end end end @@ -37,7 +39,7 @@ module OpenTox :source => np["compound"]["URI"], ) np["bundles"].keys.each do |bundle_uri| - datasets[bundle_uri].substance_ids << nanoparticle.id + #datasets[bundle_uri].substance_ids << nanoparticle.id nanoparticle["dataset_ids"] << datasets[bundle_uri].id end bundle = datasets[np["bundles"].keys.first].id if np["bundles"].size == 1 -- cgit v1.2.3 From c90644211e214a50f6fdb3a936bf247f45f1f4be Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 13 May 2016 13:38:24 +0200 Subject: compound tests fixed --- lib/import.rb | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index 3c6966e..2dcc361 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -39,7 +39,6 @@ module OpenTox :source => np["compound"]["URI"], ) np["bundles"].keys.each do |bundle_uri| - #datasets[bundle_uri].substance_ids << nanoparticle.id nanoparticle["dataset_ids"] << datasets[bundle_uri].id end bundle = datasets[np["bundles"].keys.first].id if np["bundles"].size == 1 @@ -59,7 +58,7 @@ module OpenTox end else feature = klass.find_or_create_by( - :name => "#{study["protocol"]["category"]["title"]} #{study["protocol"]["endpoint"]}", + :name => effect["endpoint"], :unit => effect["result"]["unit"], :category => study["protocol"]["topcategory"], :conditions => effect["conditions"] @@ -69,11 +68,7 @@ module OpenTox end nanoparticle.save end - datasets.each do |u,d| - d.feature_ids.uniq! - d.substance_ids.uniq! - d.save - end + datasets.each { |u,d| d.save } end =begin -- cgit v1.2.3 From f46ba3b7262f5b551c81fc9396c5b7f0cac7f030 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 27 May 2016 19:16:16 +0200 Subject: first correlation of nanoparticle predictions --- lib/import.rb | 57 ++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 15 deletions(-) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index 2dcc361..80d4579 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -10,15 +10,15 @@ module OpenTox bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)} bundles.each do |bundle| - p bundle["title"] + $logger.debug bundle["title"] nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"] - p nanoparticles.size + $logger.debug nanoparticles.size nanoparticles.each do |nanoparticle| uuid = nanoparticle["values"]["https://data.enanomapper.net/identifier/uuid"] $logger.debug uuid File.open(File.join(dir,"nanoparticle-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(nanoparticle)} studies = JSON.parse(RestClientWrapper.get(File.join(nanoparticle["compound"]["URI"],"study")))["study"] - p uuid if studies.size < 1 + $logger.debug uuid if studies.size < 1 studies.each do |study| File.open(File.join(dir,"study-#{study["uuid"]}.json"),"w+"){|f| f.puts JSON.pretty_generate(study)} end @@ -27,35 +27,58 @@ module OpenTox end def self.import dir="." + start_time = Time.now + t1 = 0 + t2 = 0 datasets = {} JSON.parse(File.read(File.join(dir,"bundles.json"))).each do |bundle| datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"]) end Dir[File.join(dir,"study*.json")].each do |s| + t = Time.now study = JSON.parse(File.read(s)) np = JSON.parse(File.read(File.join(dir,"nanoparticle-#{study['owner']['substance']['uuid']}.json"))) + core = {} + coating = [] + np["composition"].each do |c| + if c["relation"] == "HAS_CORE" + core = { + :uri => c["component"]["compound"]["URI"], + :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"] + } + elsif c["relation"] == "HAS_COATING" + coating << { + :uri => c["component"]["compound"]["URI"], + :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"] + } + end + end if np["composition"] nanoparticle = Nanoparticle.find_or_create_by( :name => np["values"]["https://data.enanomapper.net/identifier/name"], :source => np["compound"]["URI"], + :core => core, + :coating => coating ) np["bundles"].keys.each do |bundle_uri| - nanoparticle["dataset_ids"] << datasets[bundle_uri].id + nanoparticle.dataset_ids << datasets[bundle_uri].id end - bundle = datasets[np["bundles"].keys.first].id if np["bundles"].size == 1 + dataset = datasets[np["bundles"].keys.first] + proteomics_features = {} study["effects"].each do |effect| effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature - # TODO parse core/coating - #$logger.debug File.join(np["compound"]["URI"],"study") effect["conditions"].delete_if { |k, v| v.nil? } - # parse proteomics data - if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 + if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data +=begin JSON.parse(effect["result"]["textValue"]).each do |identifier, value| - feature = klass.find_or_create_by( - :name => identifier, - :category => "Proteomics", - ) - nanoparticle.parse_ambit_value feature, value, bundle + # time critical step + t = Time.now + proteomics_features[identifier] ||= klass.find_or_create_by(:name => identifier, :category => "Proteomics") + t1 += Time.now - t + t = Time.now + nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset + t2 += Time.now - t end +=end else feature = klass.find_or_create_by( :name => effect["endpoint"], @@ -63,10 +86,14 @@ module OpenTox :category => study["protocol"]["topcategory"], :conditions => effect["conditions"] ) - nanoparticle.parse_ambit_value feature, effect["result"], bundle + nanoparticle.parse_ambit_value feature, effect["result"], dataset end end nanoparticle.save + #p "Total time: #{Time.now - start_time}" + #p "Proteomics features: #{t1}" + #p "Proteomics values: #{t2}" + #p "Time2: #{t2}" end datasets.each { |u,d| d.save } end -- cgit v1.2.3 From eec5bddbd35c9ecee8021128508d8718bccb4fe3 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 2 Jun 2016 17:54:48 +0200 Subject: local pls regression for nanoparticle proteomics --- lib/import.rb | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index 80d4579..4c49e5e 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -68,17 +68,10 @@ module OpenTox effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature effect["conditions"].delete_if { |k, v| v.nil? } if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data -=begin - JSON.parse(effect["result"]["textValue"]).each do |identifier, value| - # time critical step - t = Time.now - proteomics_features[identifier] ||= klass.find_or_create_by(:name => identifier, :category => "Proteomics") - t1 += Time.now - t - t = Time.now + JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step + proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics") nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset - t2 += Time.now - t end -=end else feature = klass.find_or_create_by( :name => effect["endpoint"], @@ -90,10 +83,6 @@ module OpenTox end end nanoparticle.save - #p "Total time: #{Time.now - start_time}" - #p "Proteomics features: #{t1}" - #p "Proteomics values: #{t2}" - #p "Time2: #{t2}" end datasets.each { |u,d| d.save } end -- cgit v1.2.3 From f93aad7227c7bb3702fd28aab2d289f1ca9ce7e9 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 21 Jul 2016 17:35:20 +0200 Subject: correlation plot fixed --- lib/import.rb | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index 4c49e5e..e187e3c 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -73,6 +73,8 @@ module OpenTox nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset end else + name = effect["endpoint"] + name = "log2(Net cell association)" if name == "Log2 transformed" # use a sensible name feature = klass.find_or_create_by( :name => effect["endpoint"], :unit => effect["result"]["unit"], -- cgit v1.2.3 From 9e8537997d84e78e6545a66a0d09c33e76c8b7cf Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 30 Sep 2016 17:11:30 +0200 Subject: npo uri as source, spectral count unit f proteomics features --- lib/import.rb | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index e187e3c..17894a9 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -62,24 +62,43 @@ module OpenTox np["bundles"].keys.each do |bundle_uri| nanoparticle.dataset_ids << datasets[bundle_uri].id end + dataset = datasets[np["bundles"].keys.first] proteomics_features = {} + category = study["protocol"]["topcategory"] + source = study["protocol"]["category"]["term"] + study["effects"].each do |effect| + effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature effect["conditions"].delete_if { |k, v| v.nil? } + if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data + JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step - proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics") + proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics", :unit => "Spectral counts", :source => source) nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset end else name = effect["endpoint"] - name = "log2(Net cell association)" if name == "Log2 transformed" # use a sensible name + unit = effect["result"]["unit"] + warnings = [] + case name + when "Log2 transformed" # use a sensible name + name = "log2(Net cell association)" + warnings = ["Original name was 'Log2 transformed'"] + unit = "log2(mL/ug(Mg))" + when "Total protein (BCA assay)" + category = "P-CHEM" + warnings = ["Category changed from TOX to P-CHEM"] + end feature = klass.find_or_create_by( - :name => effect["endpoint"], - :unit => effect["result"]["unit"], - :category => study["protocol"]["topcategory"], - :conditions => effect["conditions"] + :name => name, + :unit => unit, + :category => category, + :conditions => effect["conditions"], + :source => study["protocol"]["category"]["term"], + :warnings => warnings ) nanoparticle.parse_ambit_value feature, effect["result"], dataset end -- cgit v1.2.3 From 91787edb3682900bc5a2feeca66e5142f387fcc6 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 7 Oct 2016 10:25:58 +0200 Subject: unified interface for prediction algorithms --- lib/import.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index 17894a9..8e57401 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -76,7 +76,7 @@ module OpenTox if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step - proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics", :unit => "Spectral counts", :source => source) + proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics", :unit => "Spectral counts", :source => source,:measured => true) nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset end else @@ -98,6 +98,7 @@ module OpenTox :category => category, :conditions => effect["conditions"], :source => study["protocol"]["category"]["term"], + :measured => true, :warnings => warnings ) nanoparticle.parse_ambit_value feature, effect["result"], dataset -- cgit v1.2.3 From 9e7b36613e98601de7b2ceb2d4442e11f1ae868a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 10 Nov 2016 12:23:46 +0100 Subject: intermediate commit, may be defunct --- lib/import.rb | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index 8e57401..541c9b5 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -9,6 +9,12 @@ module OpenTox #get list of bundle URIs bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)} + # bundles + # id/summary + # id/compound + # id/substance + # id/property + bundles.each do |bundle| $logger.debug bundle["title"] nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"] @@ -32,32 +38,43 @@ module OpenTox t2 = 0 datasets = {} JSON.parse(File.read(File.join(dir,"bundles.json"))).each do |bundle| + if bundle["id"] == 3 datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"]) + end end - Dir[File.join(dir,"study*.json")].each do |s| + # TODO this is only for protein corona + Dir[File.join(dir,"study-F*.json")].each do |s| t = Time.now study = JSON.parse(File.read(s)) np = JSON.parse(File.read(File.join(dir,"nanoparticle-#{study['owner']['substance']['uuid']}.json"))) - core = {} - coating = [] + core_id = nil + coating_ids = [] np["composition"].each do |c| + uri = c["component"]["compound"]["URI"] + uri = CGI.escape File.join(uri,"&media=application/json") + data = JSON.parse(RestClientWrapper.get "https://data.enanomapper.net/query/compound/url/all?media=application/json&search=#{uri}") + smiles = data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23SMILESDefault"] + names = [] + names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"] + names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23IUPACNameDefault"] + if smiles + compound = Compound.find_or_create_by(:smiles => smiles) + compound.names = names.compact + else + compound = Compound.find_or_create_by(:names => names) + end + compound.save if c["relation"] == "HAS_CORE" - core = { - :uri => c["component"]["compound"]["URI"], - :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"] - } + core_id = compound.id.to_s elsif c["relation"] == "HAS_COATING" - coating << { - :uri => c["component"]["compound"]["URI"], - :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"] - } + coating_ids << compound.id.to_s end end if np["composition"] nanoparticle = Nanoparticle.find_or_create_by( :name => np["values"]["https://data.enanomapper.net/identifier/name"], :source => np["compound"]["URI"], - :core => core, - :coating => coating + :core_id => core_id, + :coating_ids => coating_ids ) np["bundles"].keys.each do |bundle_uri| nanoparticle.dataset_ids << datasets[bundle_uri].id @@ -104,6 +121,7 @@ module OpenTox nanoparticle.parse_ambit_value feature, effect["result"], dataset end end + p nanoparticle nanoparticle.save end datasets.each { |u,d| d.save } -- cgit v1.2.3 From 9a06f2ff5ae6bdbe7dc90555599e186f1585e0d2 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 10 Nov 2016 15:27:26 +0100 Subject: Model::NanoPrediction parameters --- lib/import.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index 541c9b5..8f640b1 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -5,7 +5,12 @@ module OpenTox class Enanomapper include OpenTox - def self.mirror dir="." + def self.mirror dir=nil + # clean download dir + dir ||= File.join(File.dirname(__FILE__),"..","data","enm") + FileUtils.rm_rf dir + FileUtils.mkdir_p dir + #get list of bundle URIs bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)} -- cgit v1.2.3 From b6116bc4705066da30668ff3370f3b1c307e44e7 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 11 Nov 2016 13:07:53 +0100 Subject: enm import fixed --- lib/import.rb | 194 ++++++++++++++++++++++++---------------------------------- 1 file changed, 80 insertions(+), 114 deletions(-) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index 8f640b1..aa2ee75 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -5,129 +5,95 @@ module OpenTox class Enanomapper include OpenTox - def self.mirror dir=nil - # clean download dir - dir ||= File.join(File.dirname(__FILE__),"..","data","enm") - FileUtils.rm_rf dir - FileUtils.mkdir_p dir - - #get list of bundle URIs + # time critical step: JSON parsing (>99%), Oj brings only minor speed gains (~1%) + def self.import dir="." + datasets = {} bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] - File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)} - # bundles - # id/summary - # id/compound - # id/substance - # id/property - bundles.each do |bundle| + datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"]) $logger.debug bundle["title"] nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"] - $logger.debug nanoparticles.size - nanoparticles.each do |nanoparticle| - uuid = nanoparticle["values"]["https://data.enanomapper.net/identifier/uuid"] - $logger.debug uuid - File.open(File.join(dir,"nanoparticle-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(nanoparticle)} - studies = JSON.parse(RestClientWrapper.get(File.join(nanoparticle["compound"]["URI"],"study")))["study"] - $logger.debug uuid if studies.size < 1 - studies.each do |study| - File.open(File.join(dir,"study-#{study["uuid"]}.json"),"w+"){|f| f.puts JSON.pretty_generate(study)} - end - end - end - end - - def self.import dir="." - start_time = Time.now - t1 = 0 - t2 = 0 - datasets = {} - JSON.parse(File.read(File.join(dir,"bundles.json"))).each do |bundle| - if bundle["id"] == 3 - datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"]) - end - end - # TODO this is only for protein corona - Dir[File.join(dir,"study-F*.json")].each do |s| - t = Time.now - study = JSON.parse(File.read(s)) - np = JSON.parse(File.read(File.join(dir,"nanoparticle-#{study['owner']['substance']['uuid']}.json"))) - core_id = nil - coating_ids = [] - np["composition"].each do |c| - uri = c["component"]["compound"]["URI"] - uri = CGI.escape File.join(uri,"&media=application/json") - data = JSON.parse(RestClientWrapper.get "https://data.enanomapper.net/query/compound/url/all?media=application/json&search=#{uri}") - smiles = data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23SMILESDefault"] - names = [] - names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"] - names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23IUPACNameDefault"] - if smiles - compound = Compound.find_or_create_by(:smiles => smiles) - compound.names = names.compact - else - compound = Compound.find_or_create_by(:names => names) - end - compound.save - if c["relation"] == "HAS_CORE" - core_id = compound.id.to_s - elsif c["relation"] == "HAS_COATING" - coating_ids << compound.id.to_s + nanoparticles.each_with_index do |np,n| + core_id = nil + coating_ids = [] + np["composition"].each do |c| + uri = c["component"]["compound"]["URI"] + uri = CGI.escape File.join(uri,"&media=application/json") + data = JSON.parse(RestClientWrapper.get "https://data.enanomapper.net/query/compound/url/all?media=application/json&search=#{uri}") + smiles = data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23SMILESDefault"] + names = [] + names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"] + names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23IUPACNameDefault"] + if smiles + compound = Compound.find_or_create_by(:smiles => smiles) + compound.name = names.first + compound.names = names.compact + else + compound = Compound.find_or_create_by(:name => names.first,:names => names) + end + compound.save + if c["relation"] == "HAS_CORE" + core_id = compound.id.to_s + elsif c["relation"] == "HAS_COATING" + coating_ids << compound.id.to_s + end + end if np["composition"] + nanoparticle = Nanoparticle.find_or_create_by( + :name => np["values"]["https://data.enanomapper.net/identifier/name"], + :source => np["compound"]["URI"], + :core_id => core_id, + :coating_ids => coating_ids + ) + np["bundles"].keys.each do |bundle_uri| + nanoparticle.dataset_ids << datasets[bundle_uri].id end - end if np["composition"] - nanoparticle = Nanoparticle.find_or_create_by( - :name => np["values"]["https://data.enanomapper.net/identifier/name"], - :source => np["compound"]["URI"], - :core_id => core_id, - :coating_ids => coating_ids - ) - np["bundles"].keys.each do |bundle_uri| - nanoparticle.dataset_ids << datasets[bundle_uri].id - end - dataset = datasets[np["bundles"].keys.first] - proteomics_features = {} - category = study["protocol"]["topcategory"] - source = study["protocol"]["category"]["term"] - - study["effects"].each do |effect| - - effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature - effect["conditions"].delete_if { |k, v| v.nil? } - - if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data - - JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step - proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics", :unit => "Spectral counts", :source => source,:measured => true) - nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset - end - else - name = effect["endpoint"] - unit = effect["result"]["unit"] - warnings = [] - case name - when "Log2 transformed" # use a sensible name - name = "log2(Net cell association)" - warnings = ["Original name was 'Log2 transformed'"] - unit = "log2(mL/ug(Mg))" - when "Total protein (BCA assay)" - category = "P-CHEM" - warnings = ["Category changed from TOX to P-CHEM"] + studies = JSON.parse(RestClientWrapper.get(File.join(np["compound"]["URI"],"study")))["study"] + studies.each do |study| + dataset = datasets[np["bundles"].keys.first] + proteomics_features = {} + category = study["protocol"]["topcategory"] + source = study["protocol"]["category"]["term"] + study["effects"].each do |effect| + + effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature + effect["conditions"].delete_if { |k, v| v.nil? } + + if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data + + JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step + proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics", :unit => "Spectral counts", :source => source,:measured => true) + nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset + end + else + name = effect["endpoint"] + unit = effect["result"]["unit"] + warnings = [] + case name + when "Log2 transformed" # use a sensible name + name = "log2(Net cell association)" + warnings = ["Original name was 'Log2 transformed'"] + unit = "log2(mL/ug(Mg))" + when "Total protein (BCA assay)" + category = "P-CHEM" + warnings = ["Category changed from TOX to P-CHEM"] + end + feature = klass.find_or_create_by( + :name => name, + :unit => unit, + :category => category, + :conditions => effect["conditions"], + :source => study["protocol"]["category"]["term"], + :measured => true, + :warnings => warnings + ) + nanoparticle.parse_ambit_value feature, effect["result"], dataset + end end - feature = klass.find_or_create_by( - :name => name, - :unit => unit, - :category => category, - :conditions => effect["conditions"], - :source => study["protocol"]["category"]["term"], - :measured => true, - :warnings => warnings - ) - nanoparticle.parse_ambit_value feature, effect["result"], dataset end + nanoparticle.save + print "#{n}, " end - p nanoparticle - nanoparticle.save end datasets.each { |u,d| d.save } end -- cgit v1.2.3