From ab7b37541b4f8a762be737009631d3eefd898b4a Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 5 May 2016 16:14:02 +0200
Subject: ambit mirror, import from mirrored json, proteomics import

---
 lib/import.rb | 101 ++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 55 insertions(+), 46 deletions(-)

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
index 3c1edfe..11cb367 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -5,47 +5,73 @@ module OpenTox
     class Enanomapper
       include OpenTox
 
-      def self.import
+      def self.mirror dir="."
         #get list of bundle URIs
         bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
+        File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)}
         datasets = []
         bundles.each do |bundle|
-          uri = bundle["URI"]
-          dataset = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
           nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"]
-          features = JSON.parse(RestClientWrapper.get(bundle["property"]+"?media=application%2Fjson"))["feature"]
-          nanoparticles.each do |np|
-            nanoparticle = Nanoparticle.find_or_create_by(
-              :name => np["values"]["https://data.enanomapper.net/identifier/name"],
-              :source => np["compound"]["URI"],
-            )
-            dataset.substance_ids << nanoparticle.id
-            dataset.substance_ids.uniq!
-            studies = JSON.parse(RestClientWrapper.get(File.join(np["compound"]["URI"],"study")))["study"]
+          nanoparticles.each do |nanoparticle|
+            uuid = nanoparticle["values"]["https://data.enanomapper.net/identifier/uuid"]
+            $logger.debug uuid
+            File.open(File.join(dir,"nanoparticle-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(nanoparticle)}
+            studies = JSON.parse(RestClientWrapper.get(File.join(nanoparticle["compound"]["URI"],"study")))["study"]
             studies.each do |study|
-              study["effects"].each do |effect|
-                effect["result"]["textValue"] ?  klass = NominalFeature : klass = NumericFeature
-                # TODO parse core/coating
-                # TODO parse proteomics, they come as a large textValue
-                $logger.debug File.join(np["compound"]["URI"],"study")
-                effect["conditions"].delete_if { |k, v| v.nil? }
+              File.open(File.join(dir,"study-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(study)}
+            end
+          end
+        end
+      end
+
+      def self.import dir="."
+        datasets = {}
+        JSON.parse(File.read(File.join(dir,"bundles.json"))).each do |bundle|
+          datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
+        end
+        Dir[File.join(dir,"study*.json")].each do |s|
+          study = JSON.parse(File.read(s))
+          np = JSON.parse(File.read(File.join(dir,"nanoparticle-#{study['owner']['substance']['uuid']}.json")))
+          nanoparticle = Nanoparticle.find_or_create_by(
+            :name => np["values"]["https://data.enanomapper.net/identifier/name"],
+            :source => np["compound"]["URI"],
+          )
+          np["bundles"].keys.each do |bundle_uri|
+            datasets[bundle_uri].substance_ids << nanoparticle.id
+            nanoparticle["dataset_ids"] << datasets[bundle_uri].id
+          end
+          study["effects"].each do |effect|
+            effect["result"]["textValue"] ?  klass = NominalFeature : klass = NumericFeature
+            # TODO parse core/coating
+            # TODO parse proteomics, they come as a large textValue
+            #$logger.debug File.join(np["compound"]["URI"],"study")
+            effect["conditions"].delete_if { |k, v| v.nil? }
+            # parse proteomics data
+            if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50
+              JSON.parse(effect["result"]["textValue"]).each do |identifier, value|
                 feature = klass.find_or_create_by(
-                  #:source => File.join(np["compound"]["URI"],"study"),
-                  :name => "#{study["protocol"]["category"]["title"]} #{study["protocol"]["endpoint"]}",
-                  :unit => effect["result"]["unit"],
-                  :category => study["protocol"]["topcategory"],
-                  :conditions => effect["conditions"]
+                  :name => identifier,
+                  :category => "Proteomics",
                 )
-                nanoparticle.parse_ambit_value feature, effect["result"]
-                dataset.feature_ids << feature.id 
-                dataset.feature_ids.uniq!
+                nanoparticle.parse_ambit_value feature, value
               end
+            else
+              feature = klass.find_or_create_by(
+                :name => "#{study["protocol"]["category"]["title"]} #{study["protocol"]["endpoint"]}",
+                :unit => effect["result"]["unit"],
+                :category => study["protocol"]["topcategory"],
+                :conditions => effect["conditions"]
+              )
+              nanoparticle.parse_ambit_value feature, effect["result"]
             end
           end
-          dataset.save
-          datasets << dataset
+          nanoparticle.save
+        end
+        datasets.each do |u,d|
+          d.feature_ids.uniq!
+          d.substance_ids.uniq!
+          d.save
         end
-        datasets.collect{|d| d.id}
       end
 
 =begin
@@ -64,23 +90,6 @@ module OpenTox
       end
 =end
 
-      def self.dump
-        #get list of bundle URIs
-        `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json`
-        json = JSON.parse File.read('./bundles.json')
-        json["dataset"].each do |dataset|
-          uri = dataset["URI"]
-          id = uri.split("/").last
-          `wget --header='accept:application/json' '#{uri}' -O 'bundle#{id}'`
-          `wget --header='accept:application/json' '#{dataset["summary"]}' -O 'summary#{id}.json'`
-          `wget --header='accept:application/json' '#{dataset["compound"]}' -O 'compound#{id}.json'`
-          `wget --header='accept:application/json' '#{dataset["substance"]}' -O 'substance#{id}.json'`
-          `wget --header='accept:application/json' '#{dataset["property"]}' -O 'property#{id}.json'`
-          `wget --header='accept:application/json' '#{dataset["dataset"]}' -O 'dataset#{id}.json'`
-          `wget --header='accept:application/json' '#{dataset["matrix"]}' -O 'matrix#{id}.json'`
-        end
-      end
-
     end
 
   end
-- 
cgit v1.2.3