From f46ba3b7262f5b551c81fc9396c5b7f0cac7f030 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 27 May 2016 19:16:16 +0200 Subject: first correlation of nanoparticle predictions --- lib/import.rb | 57 ++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 15 deletions(-) (limited to 'lib/import.rb') diff --git a/lib/import.rb b/lib/import.rb index 2dcc361..80d4579 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -10,15 +10,15 @@ module OpenTox bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)} bundles.each do |bundle| - p bundle["title"] + $logger.debug bundle["title"] nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"] - p nanoparticles.size + $logger.debug nanoparticles.size nanoparticles.each do |nanoparticle| uuid = nanoparticle["values"]["https://data.enanomapper.net/identifier/uuid"] $logger.debug uuid File.open(File.join(dir,"nanoparticle-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(nanoparticle)} studies = JSON.parse(RestClientWrapper.get(File.join(nanoparticle["compound"]["URI"],"study")))["study"] - p uuid if studies.size < 1 + $logger.debug uuid if studies.size < 1 studies.each do |study| File.open(File.join(dir,"study-#{study["uuid"]}.json"),"w+"){|f| f.puts JSON.pretty_generate(study)} end @@ -27,35 +27,58 @@ module OpenTox end def self.import dir="." + start_time = Time.now + t1 = 0 + t2 = 0 datasets = {} JSON.parse(File.read(File.join(dir,"bundles.json"))).each do |bundle| datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"]) end Dir[File.join(dir,"study*.json")].each do |s| + t = Time.now study = JSON.parse(File.read(s)) np = JSON.parse(File.read(File.join(dir,"nanoparticle-#{study['owner']['substance']['uuid']}.json"))) + core = {} + coating = [] + np["composition"].each do |c| + if c["relation"] == "HAS_CORE" + core = { + :uri => c["component"]["compound"]["URI"], + :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"] + } + elsif c["relation"] == "HAS_COATING" + coating << { + :uri => c["component"]["compound"]["URI"], + :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"] + } + end + end if np["composition"] nanoparticle = Nanoparticle.find_or_create_by( :name => np["values"]["https://data.enanomapper.net/identifier/name"], :source => np["compound"]["URI"], + :core => core, + :coating => coating ) np["bundles"].keys.each do |bundle_uri| - nanoparticle["dataset_ids"] << datasets[bundle_uri].id + nanoparticle.dataset_ids << datasets[bundle_uri].id end - bundle = datasets[np["bundles"].keys.first].id if np["bundles"].size == 1 + dataset = datasets[np["bundles"].keys.first] + proteomics_features = {} study["effects"].each do |effect| effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature - # TODO parse core/coating - #$logger.debug File.join(np["compound"]["URI"],"study") effect["conditions"].delete_if { |k, v| v.nil? } - # parse proteomics data - if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 + if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data +=begin JSON.parse(effect["result"]["textValue"]).each do |identifier, value| - feature = klass.find_or_create_by( - :name => identifier, - :category => "Proteomics", - ) - nanoparticle.parse_ambit_value feature, value, bundle + # time critical step + t = Time.now + proteomics_features[identifier] ||= klass.find_or_create_by(:name => identifier, :category => "Proteomics") + t1 += Time.now - t + t = Time.now + nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset + t2 += Time.now - t end +=end else feature = klass.find_or_create_by( :name => effect["endpoint"], @@ -63,10 +86,14 @@ module OpenTox :category => study["protocol"]["topcategory"], :conditions => effect["conditions"] ) - nanoparticle.parse_ambit_value feature, effect["result"], bundle + nanoparticle.parse_ambit_value feature, effect["result"], dataset end end nanoparticle.save + #p "Total time: #{Time.now - start_time}" + #p "Proteomics features: #{t1}" + #p "Proteomics values: #{t2}" + #p "Time2: #{t2}" end datasets.each { |u,d| d.save } end -- cgit v1.2.3