From 515e644423998a94f07be06bf6460bcf4f96f968 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Fri, 8 Apr 2016 13:05:52 +0200
Subject: enm import test

---
 lib/import.rb | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 lib/import.rb

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
new file mode 100644
index 0000000..86c633a
--- /dev/null
+++ b/lib/import.rb
@@ -0,0 +1,77 @@
+module OpenTox
+
+  module Import
+
+    class Enanomapper
+      include OpenTox
+
+      def self.import
+        #get list of bundle URIs
+        bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
+        bundles.each do |bundle|
+          uri = bundle["URI"]
+          nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"]
+          features = JSON.parse(RestClientWrapper.get(bundle["property"]+"?media=application%2Fjson"))["feature"]
+          nanoparticles.each do |np|
+              nanoparticle = Nanoparticle.find_or_create_by(
+                :name => np["values"]["https://data.enanomapper.net/identifier/name"],
+                :source => np["compound"]["URI"],
+              )
+              nanoparticle.bundles << uri
+              np["composition"].each do |comp|
+                case comp["relation"]
+                when "HAS_CORE"
+                  nanoparticle.core = comp["component"]["compound"]["URI"]
+                when "HAS_COATING"
+                  nanoparticle.coating << comp["component"]["compound"]["URI"]
+                end
+              end if np["composition"]
+              np["values"].each do |u,v|
+                if u.match(/property/)
+                  name, unit, source = nil
+                  features.each do |uri,feat|
+                    if u.match(/#{uri}/)
+                      name = feat["title"]
+                      unit = feat["units"]
+                      source = uri
+                    end
+                  end
+                  feature = Feature.find_or_create_by(
+                    :name => name,
+                    :unit => unit,
+                    :source => source
+                  )
+                end
+                v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array
+              end
+              nanoparticle.bundles.uniq!
+              nanoparticle.physchem_descriptors.each{|f,v| v.uniq!}
+              nanoparticle.toxicities.each{|f,v| v.uniq!}
+              nanoparticle.save!
+          end
+        end
+
+        def self.dump
+          #get list of bundle URIs
+          `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json`
+          json = JSON.parse File.read('./bundles.json')
+          json["dataset"].each do |dataset|
+            uri = dataset["URI"]
+            id = uri.split("/").last
+            `wget --header='accept:application/json' '#{uri}' -O 'bundle#{id}'`
+            `wget --header='accept:application/json' '#{dataset["summary"]}' -O 'summary#{id}.json'`
+            `wget --header='accept:application/json' '#{dataset["compound"]}' -O 'compound#{id}.json'`
+            `wget --header='accept:application/json' '#{dataset["substance"]}' -O 'substance#{id}.json'`
+            `wget --header='accept:application/json' '#{dataset["property"]}' -O 'property#{id}.json'`
+            `wget --header='accept:application/json' '#{dataset["dataset"]}' -O 'dataset#{id}.json'`
+            `wget --header='accept:application/json' '#{dataset["matrix"]}' -O 'matrix#{id}.json'`
+          end
+        end
+      end
+
+    end
+
+  end
+
+end
+
-- 
cgit v1.2.3


From 64f1f32ced77afb278bdb7c27397c5299a73675c Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Wed, 13 Apr 2016 18:18:36 +0200
Subject: improved enm import

---
 lib/import.rb | 105 ++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 58 insertions(+), 47 deletions(-)

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
index 86c633a..cf0855e 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -8,64 +8,75 @@ module OpenTox
       def self.import
         #get list of bundle URIs
         bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
+        datasets = []
         bundles.each do |bundle|
           uri = bundle["URI"]
+          dataset = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
           nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"]
           features = JSON.parse(RestClientWrapper.get(bundle["property"]+"?media=application%2Fjson"))["feature"]
           nanoparticles.each do |np|
-              nanoparticle = Nanoparticle.find_or_create_by(
-                :name => np["values"]["https://data.enanomapper.net/identifier/name"],
-                :source => np["compound"]["URI"],
-              )
-              nanoparticle.bundles << uri
-              np["composition"].each do |comp|
-                case comp["relation"]
-                when "HAS_CORE"
-                  nanoparticle.core = comp["component"]["compound"]["URI"]
-                when "HAS_COATING"
-                  nanoparticle.coating << comp["component"]["compound"]["URI"]
-                end
-              end if np["composition"]
-              np["values"].each do |u,v|
-                if u.match(/property/)
-                  name, unit, source = nil
-                  features.each do |uri,feat|
-                    if u.match(/#{uri}/)
-                      name = feat["title"]
-                      unit = feat["units"]
-                      source = uri
-                    end
+            nanoparticle = Nanoparticle.find_or_create_by(
+              :name => np["values"]["https://data.enanomapper.net/identifier/name"],
+              :source => np["compound"]["URI"],
+            )
+            dataset.data_entries[nanoparticle.id.to_s] ||= {}
+            nanoparticle.bundles << uri
+            nanoparticle.dataset_ids << dataset.id
+            np["composition"].each do |comp|
+              case comp["relation"]
+              when "HAS_CORE"
+                nanoparticle.core = comp["component"]["compound"]["URI"]
+              when "HAS_COATING"
+                nanoparticle.coating << comp["component"]["compound"]["URI"]
+              end
+            end if np["composition"]
+            np["values"].each do |u,v|
+              if u.match(/property/)
+                name, unit, source = nil
+                features.each do |uri,feat|
+                  if u.match(/#{uri}/)
+                    name = feat["title"]
+                    unit = feat["units"]
+                    source = uri
                   end
-                  feature = Feature.find_or_create_by(
-                    :name => name,
-                    :unit => unit,
-                    :source => source
-                  )
                 end
-                v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array
+                feature = Feature.find_or_create_by(
+                  :name => name,
+                  :unit => unit,
+                  :source => source
+                )
               end
-              nanoparticle.bundles.uniq!
-              nanoparticle.physchem_descriptors.each{|f,v| v.uniq!}
-              nanoparticle.toxicities.each{|f,v| v.uniq!}
-              nanoparticle.save!
+              v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array
+            end
+            nanoparticle.bundles.uniq!
+            nanoparticle.physchem_descriptors.each{|f,v| v.uniq!}
+            #nanoparticle.toxicities.each{|f,v| v.uniq!}
+            nanoparticle.toxicities.each do |f,v|
+              dataset.data_entries[nanoparticle.id.to_s][f.to_s] ||= []
+              dataset.data_entries[nanoparticle.id.to_s][f.to_s] += v
+            end
+            nanoparticle.save
           end
+          dataset.save
+          datasets << dataset
         end
+        datasets.collect{|d| d.id}
+      end
 
-        def self.dump
-          #get list of bundle URIs
-          `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json`
-          json = JSON.parse File.read('./bundles.json')
-          json["dataset"].each do |dataset|
-            uri = dataset["URI"]
-            id = uri.split("/").last
-            `wget --header='accept:application/json' '#{uri}' -O 'bundle#{id}'`
-            `wget --header='accept:application/json' '#{dataset["summary"]}' -O 'summary#{id}.json'`
-            `wget --header='accept:application/json' '#{dataset["compound"]}' -O 'compound#{id}.json'`
-            `wget --header='accept:application/json' '#{dataset["substance"]}' -O 'substance#{id}.json'`
-            `wget --header='accept:application/json' '#{dataset["property"]}' -O 'property#{id}.json'`
-            `wget --header='accept:application/json' '#{dataset["dataset"]}' -O 'dataset#{id}.json'`
-            `wget --header='accept:application/json' '#{dataset["matrix"]}' -O 'matrix#{id}.json'`
-          end
+      def self.dump
+        #get list of bundle URIs
+        `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json`
+        json = JSON.parse File.read('./bundles.json')
+        json["dataset"].each do |dataset|
+          uri = dataset["URI"]
+          id = uri.split("/").last
+          `wget --header='accept:application/json' '#{uri}' -O 'bundle#{id}'`
+          `wget --header='accept:application/json' '#{dataset["summary"]}' -O 'summary#{id}.json'`
+          `wget --header='accept:application/json' '#{dataset["compound"]}' -O 'compound#{id}.json'`
+          `wget --header='accept:application/json' '#{dataset["substance"]}' -O 'substance#{id}.json'`
+          `wget --header='accept:application/json' '#{dataset["property"]}' -O 'property#{id}.json'`
+          `wget --header='accept:application/json' '#{dataset["dataset"]}' -O 'dataset#{id}.json'`
+          `wget --header='accept:application/json' '#{dataset["matrix"]}' -O 'matrix#{id}.json'`
         end
       end
 
-- 
cgit v1.2.3


From 4662e845c12e3e623ec9bec208c42cd4b1886047 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Fri, 15 Apr 2016 14:58:17 +0200
Subject: enm study import

---
 lib/import.rb | 53 +++++++++++++++++++----------------------------------
 1 file changed, 19 insertions(+), 34 deletions(-)

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
index cf0855e..9091207 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -19,43 +19,28 @@ module OpenTox
               :name => np["values"]["https://data.enanomapper.net/identifier/name"],
               :source => np["compound"]["URI"],
             )
-            dataset.data_entries[nanoparticle.id.to_s] ||= {}
-            nanoparticle.bundles << uri
-            nanoparticle.dataset_ids << dataset.id
-            np["composition"].each do |comp|
-              case comp["relation"]
-              when "HAS_CORE"
-                nanoparticle.core = comp["component"]["compound"]["URI"]
-              when "HAS_COATING"
-                nanoparticle.coating << comp["component"]["compound"]["URI"]
-              end
-            end if np["composition"]
-            np["values"].each do |u,v|
-              if u.match(/property/)
-                name, unit, source = nil
-                features.each do |uri,feat|
-                  if u.match(/#{uri}/)
-                    name = feat["title"]
-                    unit = feat["units"]
-                    source = uri
-                  end
-                end
-                feature = Feature.find_or_create_by(
-                  :name => name,
-                  :unit => unit,
-                  :source => source
+            dataset.substance_ids << nanoparticle.id
+            dataset.substance_ids.uniq!
+            studies = JSON.parse(RestClientWrapper.get(File.join(np["compound"]["URI"],"study")))["study"]
+            studies.each do |study|
+              study["effects"].each do |effect|
+                effect["result"]["textValue"] ?  klass = NominalFeature : klass = NumericFeature
+                # TODO parse core/coating
+                # TODO parse proteomics, they come as a large textValue
+                $logger.debug File.join(np["compound"]["URI"],"study")
+                effect["conditions"].delete_if { |k, v| v.nil? }
+                feature = klass.find_or_create_by(
+                  :source => File.join(np["compound"]["URI"],"study"),
+                  :name => "#{study["protocol"]["category"]["title"]} #{study["protocol"]["endpoint"]}",
+                  :unit => effect["result"]["unit"],
+                  :category => study["protocol"]["topcategory"],
+                  :conditions => effect["conditions"]
                 )
+                nanoparticle.parse_ambit_value feature, effect["result"]
+                dataset.feature_ids << feature.id 
+                dataset.feature_ids.uniq!
               end
-              v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array
-            end
-            nanoparticle.bundles.uniq!
-            nanoparticle.physchem_descriptors.each{|f,v| v.uniq!}
-            #nanoparticle.toxicities.each{|f,v| v.uniq!}
-            nanoparticle.toxicities.each do |f,v|
-              dataset.data_entries[nanoparticle.id.to_s][f.to_s] ||= []
-              dataset.data_entries[nanoparticle.id.to_s][f.to_s] += v
             end
-            nanoparticle.save
           end
           dataset.save
           datasets << dataset
-- 
cgit v1.2.3


From cfc64a2966ab38698e499f0b44f41208ee77a07f Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Tue, 26 Apr 2016 17:38:15 +0200
Subject: first nanomaterial prediction

---
 lib/import.rb | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
index 9091207..3c1edfe 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -30,7 +30,7 @@ module OpenTox
                 $logger.debug File.join(np["compound"]["URI"],"study")
                 effect["conditions"].delete_if { |k, v| v.nil? }
                 feature = klass.find_or_create_by(
-                  :source => File.join(np["compound"]["URI"],"study"),
+                  #:source => File.join(np["compound"]["URI"],"study"),
                   :name => "#{study["protocol"]["category"]["title"]} #{study["protocol"]["endpoint"]}",
                   :unit => effect["result"]["unit"],
                   :category => study["protocol"]["topcategory"],
@@ -48,6 +48,22 @@ module OpenTox
         datasets.collect{|d| d.id}
       end
 
+=begin
+      def self.import_ld # defunct, AMBIT JSON_LD does not have substance entries
+        #get list of bundle URIs
+        bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
+        datasets = []
+        bundles.each do |bundle|
+          uri = bundle["URI"]
+          study = JSON.parse(`curl -H 'Accept:application/ld+json' '#{uri}/substance'`)
+          study["@graph"].each do |i|
+            puts i.to_yaml if i.keys.include? "sio:has-value"
+          end
+        end
+        datasets.collect{|d| d.id}
+      end
+=end
+
       def self.dump
         #get list of bundle URIs
         `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json`
-- 
cgit v1.2.3


From ab7b37541b4f8a762be737009631d3eefd898b4a Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 5 May 2016 16:14:02 +0200
Subject: ambit mirror, import from mirrored json, proteomics import

---
 lib/import.rb | 101 ++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 55 insertions(+), 46 deletions(-)

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
index 3c1edfe..11cb367 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -5,47 +5,73 @@ module OpenTox
     class Enanomapper
       include OpenTox
 
-      def self.import
+      def self.mirror dir="."
         #get list of bundle URIs
         bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
+        File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)}
         datasets = []
         bundles.each do |bundle|
-          uri = bundle["URI"]
-          dataset = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
           nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"]
-          features = JSON.parse(RestClientWrapper.get(bundle["property"]+"?media=application%2Fjson"))["feature"]
-          nanoparticles.each do |np|
-            nanoparticle = Nanoparticle.find_or_create_by(
-              :name => np["values"]["https://data.enanomapper.net/identifier/name"],
-              :source => np["compound"]["URI"],
-            )
-            dataset.substance_ids << nanoparticle.id
-            dataset.substance_ids.uniq!
-            studies = JSON.parse(RestClientWrapper.get(File.join(np["compound"]["URI"],"study")))["study"]
+          nanoparticles.each do |nanoparticle|
+            uuid = nanoparticle["values"]["https://data.enanomapper.net/identifier/uuid"]
+            $logger.debug uuid
+            File.open(File.join(dir,"nanoparticle-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(nanoparticle)}
+            studies = JSON.parse(RestClientWrapper.get(File.join(nanoparticle["compound"]["URI"],"study")))["study"]
             studies.each do |study|
-              study["effects"].each do |effect|
-                effect["result"]["textValue"] ?  klass = NominalFeature : klass = NumericFeature
-                # TODO parse core/coating
-                # TODO parse proteomics, they come as a large textValue
-                $logger.debug File.join(np["compound"]["URI"],"study")
-                effect["conditions"].delete_if { |k, v| v.nil? }
+              File.open(File.join(dir,"study-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(study)}
+            end
+          end
+        end
+      end
+
+      def self.import dir="."
+        datasets = {}
+        JSON.parse(File.read(File.join(dir,"bundles.json"))).each do |bundle|
+          datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
+        end
+        Dir[File.join(dir,"study*.json")].each do |s|
+          study = JSON.parse(File.read(s))
+          np = JSON.parse(File.read(File.join(dir,"nanoparticle-#{study['owner']['substance']['uuid']}.json")))
+          nanoparticle = Nanoparticle.find_or_create_by(
+            :name => np["values"]["https://data.enanomapper.net/identifier/name"],
+            :source => np["compound"]["URI"],
+          )
+          np["bundles"].keys.each do |bundle_uri|
+            datasets[bundle_uri].substance_ids << nanoparticle.id
+            nanoparticle["dataset_ids"] << datasets[bundle_uri].id
+          end
+          study["effects"].each do |effect|
+            effect["result"]["textValue"] ?  klass = NominalFeature : klass = NumericFeature
+            # TODO parse core/coating
+            # TODO parse proteomics, they come as a large textValue
+            #$logger.debug File.join(np["compound"]["URI"],"study")
+            effect["conditions"].delete_if { |k, v| v.nil? }
+            # parse proteomics data
+            if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50
+              JSON.parse(effect["result"]["textValue"]).each do |identifier, value|
                 feature = klass.find_or_create_by(
-                  #:source => File.join(np["compound"]["URI"],"study"),
-                  :name => "#{study["protocol"]["category"]["title"]} #{study["protocol"]["endpoint"]}",
-                  :unit => effect["result"]["unit"],
-                  :category => study["protocol"]["topcategory"],
-                  :conditions => effect["conditions"]
+                  :name => identifier,
+                  :category => "Proteomics",
                 )
-                nanoparticle.parse_ambit_value feature, effect["result"]
-                dataset.feature_ids << feature.id 
-                dataset.feature_ids.uniq!
+                nanoparticle.parse_ambit_value feature, value
               end
+            else
+              feature = klass.find_or_create_by(
+                :name => "#{study["protocol"]["category"]["title"]} #{study["protocol"]["endpoint"]}",
+                :unit => effect["result"]["unit"],
+                :category => study["protocol"]["topcategory"],
+                :conditions => effect["conditions"]
+              )
+              nanoparticle.parse_ambit_value feature, effect["result"]
             end
           end
-          dataset.save
-          datasets << dataset
+          nanoparticle.save
+        end
+        datasets.each do |u,d|
+          d.feature_ids.uniq!
+          d.substance_ids.uniq!
+          d.save
         end
-        datasets.collect{|d| d.id}
       end
 
 =begin
@@ -64,23 +90,6 @@ module OpenTox
       end
 =end
 
-      def self.dump
-        #get list of bundle URIs
-        `wget 'https://data.enanomapper.net/bundle?media=application%2Fjson' -O bundles.json`
-        json = JSON.parse File.read('./bundles.json')
-        json["dataset"].each do |dataset|
-          uri = dataset["URI"]
-          id = uri.split("/").last
-          `wget --header='accept:application/json' '#{uri}' -O 'bundle#{id}'`
-          `wget --header='accept:application/json' '#{dataset["summary"]}' -O 'summary#{id}.json'`
-          `wget --header='accept:application/json' '#{dataset["compound"]}' -O 'compound#{id}.json'`
-          `wget --header='accept:application/json' '#{dataset["substance"]}' -O 'substance#{id}.json'`
-          `wget --header='accept:application/json' '#{dataset["property"]}' -O 'property#{id}.json'`
-          `wget --header='accept:application/json' '#{dataset["dataset"]}' -O 'dataset#{id}.json'`
-          `wget --header='accept:application/json' '#{dataset["matrix"]}' -O 'matrix#{id}.json'`
-        end
-      end
-
     end
 
   end
-- 
cgit v1.2.3


From 611bac891177f8d9185d45486dd574b6ef4d1912 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Mon, 9 May 2016 15:11:46 +0200
Subject: nanoparticle models fixed

---
 lib/import.rb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
index 11cb367..dfe5e2d 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -40,10 +40,10 @@ module OpenTox
             datasets[bundle_uri].substance_ids << nanoparticle.id
             nanoparticle["dataset_ids"] << datasets[bundle_uri].id
           end
+          bundle = datasets[np["bundles"].keys.first].id if np["bundles"].size == 1
           study["effects"].each do |effect|
             effect["result"]["textValue"] ?  klass = NominalFeature : klass = NumericFeature
             # TODO parse core/coating
-            # TODO parse proteomics, they come as a large textValue
             #$logger.debug File.join(np["compound"]["URI"],"study")
             effect["conditions"].delete_if { |k, v| v.nil? }
             # parse proteomics data
@@ -53,7 +53,7 @@ module OpenTox
                   :name => identifier,
                   :category => "Proteomics",
                 )
-                nanoparticle.parse_ambit_value feature, value
+                nanoparticle.parse_ambit_value feature, value, bundle
               end
             else
               feature = klass.find_or_create_by(
@@ -62,7 +62,7 @@ module OpenTox
                 :category => study["protocol"]["topcategory"],
                 :conditions => effect["conditions"]
               )
-              nanoparticle.parse_ambit_value feature, effect["result"]
+              nanoparticle.parse_ambit_value feature, effect["result"], bundle
             end
           end
           nanoparticle.save
-- 
cgit v1.2.3


From b8bb12c8a163c238d7d4387c1914e2100bb660df Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 12 May 2016 15:23:01 +0200
Subject: enm study import fixed

---
 lib/import.rb | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
index dfe5e2d..3c6966e 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -9,16 +9,18 @@ module OpenTox
         #get list of bundle URIs
         bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
         File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)}
-        datasets = []
         bundles.each do |bundle|
+          p bundle["title"]
           nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"]
+          p nanoparticles.size
           nanoparticles.each do |nanoparticle|
             uuid = nanoparticle["values"]["https://data.enanomapper.net/identifier/uuid"]
             $logger.debug uuid
             File.open(File.join(dir,"nanoparticle-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(nanoparticle)}
             studies = JSON.parse(RestClientWrapper.get(File.join(nanoparticle["compound"]["URI"],"study")))["study"]
+            p uuid if studies.size < 1 
             studies.each do |study|
-              File.open(File.join(dir,"study-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(study)}
+              File.open(File.join(dir,"study-#{study["uuid"]}.json"),"w+"){|f| f.puts JSON.pretty_generate(study)}
             end
           end
         end
@@ -37,7 +39,7 @@ module OpenTox
             :source => np["compound"]["URI"],
           )
           np["bundles"].keys.each do |bundle_uri|
-            datasets[bundle_uri].substance_ids << nanoparticle.id
+            #datasets[bundle_uri].substance_ids << nanoparticle.id
             nanoparticle["dataset_ids"] << datasets[bundle_uri].id
           end
           bundle = datasets[np["bundles"].keys.first].id if np["bundles"].size == 1
-- 
cgit v1.2.3


From c90644211e214a50f6fdb3a936bf247f45f1f4be Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Fri, 13 May 2016 13:38:24 +0200
Subject: compound tests fixed

---
 lib/import.rb | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
index 3c6966e..2dcc361 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -39,7 +39,6 @@ module OpenTox
             :source => np["compound"]["URI"],
           )
           np["bundles"].keys.each do |bundle_uri|
-            #datasets[bundle_uri].substance_ids << nanoparticle.id
             nanoparticle["dataset_ids"] << datasets[bundle_uri].id
           end
           bundle = datasets[np["bundles"].keys.first].id if np["bundles"].size == 1
@@ -59,7 +58,7 @@ module OpenTox
               end
             else
               feature = klass.find_or_create_by(
-                :name => "#{study["protocol"]["category"]["title"]} #{study["protocol"]["endpoint"]}",
+                :name => effect["endpoint"],
                 :unit => effect["result"]["unit"],
                 :category => study["protocol"]["topcategory"],
                 :conditions => effect["conditions"]
@@ -69,11 +68,7 @@ module OpenTox
           end
           nanoparticle.save
         end
-        datasets.each do |u,d|
-          d.feature_ids.uniq!
-          d.substance_ids.uniq!
-          d.save
-        end
+        datasets.each { |u,d| d.save }
       end
 
 =begin
-- 
cgit v1.2.3


From f46ba3b7262f5b551c81fc9396c5b7f0cac7f030 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Fri, 27 May 2016 19:16:16 +0200
Subject: first correlation of nanoparticle predictions

---
 lib/import.rb | 57 ++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 15 deletions(-)

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
index 2dcc361..80d4579 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -10,15 +10,15 @@ module OpenTox
         bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
         File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)}
         bundles.each do |bundle|
-          p bundle["title"]
+          $logger.debug bundle["title"]
           nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"]
-          p nanoparticles.size
+          $logger.debug nanoparticles.size
           nanoparticles.each do |nanoparticle|
             uuid = nanoparticle["values"]["https://data.enanomapper.net/identifier/uuid"]
             $logger.debug uuid
             File.open(File.join(dir,"nanoparticle-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(nanoparticle)}
             studies = JSON.parse(RestClientWrapper.get(File.join(nanoparticle["compound"]["URI"],"study")))["study"]
-            p uuid if studies.size < 1 
+            $logger.debug uuid if studies.size < 1 
             studies.each do |study|
               File.open(File.join(dir,"study-#{study["uuid"]}.json"),"w+"){|f| f.puts JSON.pretty_generate(study)}
             end
@@ -27,35 +27,58 @@ module OpenTox
       end
 
       def self.import dir="."
+        start_time = Time.now
+        t1 = 0
+        t2 = 0
         datasets = {}
         JSON.parse(File.read(File.join(dir,"bundles.json"))).each do |bundle|
           datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
         end
         Dir[File.join(dir,"study*.json")].each do |s|
+          t = Time.now
           study = JSON.parse(File.read(s))
           np = JSON.parse(File.read(File.join(dir,"nanoparticle-#{study['owner']['substance']['uuid']}.json")))
+          core = {}
+          coating = []
+          np["composition"].each do |c|
+            if c["relation"] == "HAS_CORE"
+              core = {
+                :uri => c["component"]["compound"]["URI"],
+                :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
+              }
+            elsif c["relation"] == "HAS_COATING"
+              coating << {
+                :uri => c["component"]["compound"]["URI"],
+                :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
+              }
+            end
+          end if np["composition"]
           nanoparticle = Nanoparticle.find_or_create_by(
             :name => np["values"]["https://data.enanomapper.net/identifier/name"],
             :source => np["compound"]["URI"],
+            :core => core,
+            :coating => coating
           )
           np["bundles"].keys.each do |bundle_uri|
-            nanoparticle["dataset_ids"] << datasets[bundle_uri].id
+            nanoparticle.dataset_ids << datasets[bundle_uri].id
           end
-          bundle = datasets[np["bundles"].keys.first].id if np["bundles"].size == 1
+          dataset = datasets[np["bundles"].keys.first]
+          proteomics_features = {}
           study["effects"].each do |effect|
             effect["result"]["textValue"] ?  klass = NominalFeature : klass = NumericFeature
-            # TODO parse core/coating
-            #$logger.debug File.join(np["compound"]["URI"],"study")
             effect["conditions"].delete_if { |k, v| v.nil? }
-            # parse proteomics data
-            if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50
+            if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data
+=begin
               JSON.parse(effect["result"]["textValue"]).each do |identifier, value|
-                feature = klass.find_or_create_by(
-                  :name => identifier,
-                  :category => "Proteomics",
-                )
-                nanoparticle.parse_ambit_value feature, value, bundle
+                # time critical step
+              t = Time.now
+                proteomics_features[identifier] ||= klass.find_or_create_by(:name => identifier, :category => "Proteomics")
+              t1 += Time.now - t
+              t = Time.now
+                nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset
+              t2 += Time.now - t
               end
+=end
             else
               feature = klass.find_or_create_by(
                 :name => effect["endpoint"],
@@ -63,10 +86,14 @@ module OpenTox
                 :category => study["protocol"]["topcategory"],
                 :conditions => effect["conditions"]
               )
-              nanoparticle.parse_ambit_value feature, effect["result"], bundle
+              nanoparticle.parse_ambit_value feature, effect["result"], dataset
             end
           end
           nanoparticle.save
+          #p "Total time: #{Time.now - start_time}"
+          #p "Proteomics features: #{t1}"
+          #p "Proteomics values: #{t2}"
+          #p "Time2: #{t2}"
         end
         datasets.each { |u,d| d.save }
       end
-- 
cgit v1.2.3


From eec5bddbd35c9ecee8021128508d8718bccb4fe3 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 2 Jun 2016 17:54:48 +0200
Subject: local pls regression for nanoparticle proteomics

---
 lib/import.rb | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
index 80d4579..4c49e5e 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -68,17 +68,10 @@ module OpenTox
             effect["result"]["textValue"] ?  klass = NominalFeature : klass = NumericFeature
             effect["conditions"].delete_if { |k, v| v.nil? }
             if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data
-=begin
-              JSON.parse(effect["result"]["textValue"]).each do |identifier, value|
-                # time critical step
-              t = Time.now
-                proteomics_features[identifier] ||= klass.find_or_create_by(:name => identifier, :category => "Proteomics")
-              t1 += Time.now - t
-              t = Time.now
+              JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step
+                proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics")
                 nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset
-              t2 += Time.now - t
               end
-=end
             else
               feature = klass.find_or_create_by(
                 :name => effect["endpoint"],
@@ -90,10 +83,6 @@ module OpenTox
             end
           end
           nanoparticle.save
-          #p "Total time: #{Time.now - start_time}"
-          #p "Proteomics features: #{t1}"
-          #p "Proteomics values: #{t2}"
-          #p "Time2: #{t2}"
         end
         datasets.each { |u,d| d.save }
       end
-- 
cgit v1.2.3


From f93aad7227c7bb3702fd28aab2d289f1ca9ce7e9 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 21 Jul 2016 17:35:20 +0200
Subject: correlation plot fixed

---
 lib/import.rb | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
index 4c49e5e..e187e3c 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -73,6 +73,8 @@ module OpenTox
                 nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset
               end
             else
+              name = effect["endpoint"]
+              name = "log2(Net cell association)" if name == "Log2 transformed" # use a sensible name
               feature = klass.find_or_create_by(
                 :name => effect["endpoint"],
                 :unit => effect["result"]["unit"],
-- 
cgit v1.2.3


From 9e8537997d84e78e6545a66a0d09c33e76c8b7cf Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Fri, 30 Sep 2016 17:11:30 +0200
Subject: npo uri as source, spectral count unit f proteomics features

---
 lib/import.rb | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
index e187e3c..17894a9 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -62,24 +62,43 @@ module OpenTox
           np["bundles"].keys.each do |bundle_uri|
             nanoparticle.dataset_ids << datasets[bundle_uri].id
           end
+
           dataset = datasets[np["bundles"].keys.first]
           proteomics_features = {}
+          category = study["protocol"]["topcategory"]
+          source = study["protocol"]["category"]["term"]
+
           study["effects"].each do |effect|
+
             effect["result"]["textValue"] ?  klass = NominalFeature : klass = NumericFeature
             effect["conditions"].delete_if { |k, v| v.nil? }
+
             if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data
+
               JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step
-                proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics")
+                proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics", :unit => "Spectral counts", :source => source)
                 nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset
               end
             else
               name = effect["endpoint"]
-              name = "log2(Net cell association)" if name == "Log2 transformed" # use a sensible name
+              unit = effect["result"]["unit"]
+              warnings = []
+              case name
+              when "Log2 transformed" # use a sensible name
+                name = "log2(Net cell association)"
+                warnings = ["Original name was 'Log2 transformed'"]
+                unit = "log2(mL/ug(Mg))"
+              when "Total protein (BCA assay)"
+                category = "P-CHEM"
+                warnings = ["Category changed from TOX to P-CHEM"]
+              end
               feature = klass.find_or_create_by(
-                :name => effect["endpoint"],
-                :unit => effect["result"]["unit"],
-                :category => study["protocol"]["topcategory"],
-                :conditions => effect["conditions"]
+                :name => name,
+                :unit => unit,
+                :category => category,
+                :conditions => effect["conditions"],
+                :source => study["protocol"]["category"]["term"],
+                :warnings => warnings
               )
               nanoparticle.parse_ambit_value feature, effect["result"], dataset
             end
-- 
cgit v1.2.3


From 91787edb3682900bc5a2feeca66e5142f387fcc6 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Fri, 7 Oct 2016 10:25:58 +0200
Subject: unified interface for prediction algorithms

---
 lib/import.rb | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
index 17894a9..8e57401 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -76,7 +76,7 @@ module OpenTox
             if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data
 
               JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step
-                proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics", :unit => "Spectral counts", :source => source)
+                proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics", :unit => "Spectral counts", :source => source,:measured => true)
                 nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset
               end
             else
@@ -98,6 +98,7 @@ module OpenTox
                 :category => category,
                 :conditions => effect["conditions"],
                 :source => study["protocol"]["category"]["term"],
+                :measured => true,
                 :warnings => warnings
               )
               nanoparticle.parse_ambit_value feature, effect["result"], dataset
-- 
cgit v1.2.3


From 9e7b36613e98601de7b2ceb2d4442e11f1ae868a Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 10 Nov 2016 12:23:46 +0100
Subject: intermediate commit, may be defunct

---
 lib/import.rb | 44 +++++++++++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 13 deletions(-)

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
index 8e57401..541c9b5 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -9,6 +9,12 @@ module OpenTox
         #get list of bundle URIs
         bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
         File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)}
+        # bundles
+          # id/summary
+          # id/compound
+          # id/substance
+          # id/property
+
         bundles.each do |bundle|
           $logger.debug bundle["title"]
           nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"]
@@ -32,32 +38,43 @@ module OpenTox
         t2 = 0
         datasets = {}
         JSON.parse(File.read(File.join(dir,"bundles.json"))).each do |bundle|
+          if bundle["id"] == 3
           datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
+          end
         end
-        Dir[File.join(dir,"study*.json")].each do |s|
+        # TODO this is only for protein corona
+        Dir[File.join(dir,"study-F*.json")].each do |s|
           t = Time.now
           study = JSON.parse(File.read(s))
           np = JSON.parse(File.read(File.join(dir,"nanoparticle-#{study['owner']['substance']['uuid']}.json")))
-          core = {}
-          coating = []
+          core_id = nil
+          coating_ids = []
           np["composition"].each do |c|
+            uri = c["component"]["compound"]["URI"]
+            uri = CGI.escape File.join(uri,"&media=application/json")
+            data = JSON.parse(RestClientWrapper.get "https://data.enanomapper.net/query/compound/url/all?media=application/json&search=#{uri}")
+            smiles = data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23SMILESDefault"]
+            names = []
+            names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
+            names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23IUPACNameDefault"]
+            if smiles
+              compound = Compound.find_or_create_by(:smiles => smiles)
+              compound.names = names.compact
+            else
+              compound = Compound.find_or_create_by(:names => names)
+            end
+            compound.save
             if c["relation"] == "HAS_CORE"
-              core = {
-                :uri => c["component"]["compound"]["URI"],
-                :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
-              }
+              core_id = compound.id.to_s
             elsif c["relation"] == "HAS_COATING"
-              coating << {
-                :uri => c["component"]["compound"]["URI"],
-                :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
-              }
+              coating_ids << compound.id.to_s
             end
           end if np["composition"]
           nanoparticle = Nanoparticle.find_or_create_by(
             :name => np["values"]["https://data.enanomapper.net/identifier/name"],
             :source => np["compound"]["URI"],
-            :core => core,
-            :coating => coating
+            :core_id => core_id,
+            :coating_ids => coating_ids
           )
           np["bundles"].keys.each do |bundle_uri|
             nanoparticle.dataset_ids << datasets[bundle_uri].id
@@ -104,6 +121,7 @@ module OpenTox
               nanoparticle.parse_ambit_value feature, effect["result"], dataset
             end
           end
+    p nanoparticle
           nanoparticle.save
         end
         datasets.each { |u,d| d.save }
-- 
cgit v1.2.3


From 9a06f2ff5ae6bdbe7dc90555599e186f1585e0d2 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 10 Nov 2016 15:27:26 +0100
Subject: Model::NanoPrediction parameters

---
 lib/import.rb | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
index 541c9b5..8f640b1 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -5,7 +5,12 @@ module OpenTox
     class Enanomapper
       include OpenTox
 
-      def self.mirror dir="."
+      def self.mirror dir=nil
+        # clean download dir
+        dir ||= File.join(File.dirname(__FILE__),"..","data","enm")
+        FileUtils.rm_rf dir
+        FileUtils.mkdir_p dir
+
         #get list of bundle URIs
         bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
         File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)}
-- 
cgit v1.2.3


From b6116bc4705066da30668ff3370f3b1c307e44e7 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Fri, 11 Nov 2016 13:07:53 +0100
Subject: enm import fixed

---
 lib/import.rb | 194 ++++++++++++++++++++++++----------------------------------
 1 file changed, 80 insertions(+), 114 deletions(-)

(limited to 'lib/import.rb')

diff --git a/lib/import.rb b/lib/import.rb
index 8f640b1..aa2ee75 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -5,129 +5,95 @@ module OpenTox
     class Enanomapper
       include OpenTox
 
-      def self.mirror dir=nil
-        # clean download dir
-        dir ||= File.join(File.dirname(__FILE__),"..","data","enm")
-        FileUtils.rm_rf dir
-        FileUtils.mkdir_p dir
-
-        #get list of bundle URIs
+      # time critical step: JSON parsing (>99%), Oj brings only minor speed gains (~1%)
+      def self.import dir="."
+        datasets = {}
         bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
-        File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)}
-        # bundles
-          # id/summary
-          # id/compound
-          # id/substance
-          # id/property
-
         bundles.each do |bundle|
+          datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
           $logger.debug bundle["title"]
           nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"]
-          $logger.debug nanoparticles.size
-          nanoparticles.each do |nanoparticle|
-            uuid = nanoparticle["values"]["https://data.enanomapper.net/identifier/uuid"]
-            $logger.debug uuid
-            File.open(File.join(dir,"nanoparticle-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(nanoparticle)}
-            studies = JSON.parse(RestClientWrapper.get(File.join(nanoparticle["compound"]["URI"],"study")))["study"]
-            $logger.debug uuid if studies.size < 1 
-            studies.each do |study|
-              File.open(File.join(dir,"study-#{study["uuid"]}.json"),"w+"){|f| f.puts JSON.pretty_generate(study)}
-            end
-          end
-        end
-      end
-
-      def self.import dir="."
-        start_time = Time.now
-        t1 = 0
-        t2 = 0
-        datasets = {}
-        JSON.parse(File.read(File.join(dir,"bundles.json"))).each do |bundle|
-          if bundle["id"] == 3
-          datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
-          end
-        end
-        # TODO this is only for protein corona
-        Dir[File.join(dir,"study-F*.json")].each do |s|
-          t = Time.now
-          study = JSON.parse(File.read(s))
-          np = JSON.parse(File.read(File.join(dir,"nanoparticle-#{study['owner']['substance']['uuid']}.json")))
-          core_id = nil
-          coating_ids = []
-          np["composition"].each do |c|
-            uri = c["component"]["compound"]["URI"]
-            uri = CGI.escape File.join(uri,"&media=application/json")
-            data = JSON.parse(RestClientWrapper.get "https://data.enanomapper.net/query/compound/url/all?media=application/json&search=#{uri}")
-            smiles = data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23SMILESDefault"]
-            names = []
-            names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
-            names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23IUPACNameDefault"]
-            if smiles
-              compound = Compound.find_or_create_by(:smiles => smiles)
-              compound.names = names.compact
-            else
-              compound = Compound.find_or_create_by(:names => names)
-            end
-            compound.save
-            if c["relation"] == "HAS_CORE"
-              core_id = compound.id.to_s
-            elsif c["relation"] == "HAS_COATING"
-              coating_ids << compound.id.to_s
+          nanoparticles.each_with_index do |np,n|
+            core_id = nil
+            coating_ids = []
+            np["composition"].each do |c|
+              uri = c["component"]["compound"]["URI"]
+              uri = CGI.escape File.join(uri,"&media=application/json")
+              data = JSON.parse(RestClientWrapper.get "https://data.enanomapper.net/query/compound/url/all?media=application/json&search=#{uri}")
+              smiles = data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23SMILESDefault"]
+              names = []
+              names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
+              names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23IUPACNameDefault"]
+              if smiles
+                compound = Compound.find_or_create_by(:smiles => smiles)
+                compound.name = names.first
+                compound.names = names.compact
+              else
+                compound = Compound.find_or_create_by(:name => names.first,:names => names)
+              end
+              compound.save
+              if c["relation"] == "HAS_CORE"
+                core_id = compound.id.to_s
+              elsif c["relation"] == "HAS_COATING"
+                coating_ids << compound.id.to_s
+              end
+            end if np["composition"]
+            nanoparticle = Nanoparticle.find_or_create_by(
+              :name => np["values"]["https://data.enanomapper.net/identifier/name"],
+              :source => np["compound"]["URI"],
+              :core_id => core_id,
+              :coating_ids => coating_ids
+            )
+            np["bundles"].keys.each do |bundle_uri|
+              nanoparticle.dataset_ids << datasets[bundle_uri].id
             end
-          end if np["composition"]
-          nanoparticle = Nanoparticle.find_or_create_by(
-            :name => np["values"]["https://data.enanomapper.net/identifier/name"],
-            :source => np["compound"]["URI"],
-            :core_id => core_id,
-            :coating_ids => coating_ids
-          )
-          np["bundles"].keys.each do |bundle_uri|
-            nanoparticle.dataset_ids << datasets[bundle_uri].id
-          end
 
-          dataset = datasets[np["bundles"].keys.first]
-          proteomics_features = {}
-          category = study["protocol"]["topcategory"]
-          source = study["protocol"]["category"]["term"]
-
-          study["effects"].each do |effect|
-
-            effect["result"]["textValue"] ?  klass = NominalFeature : klass = NumericFeature
-            effect["conditions"].delete_if { |k, v| v.nil? }
-
-            if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data
-
-              JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step
-                proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics", :unit => "Spectral counts", :source => source,:measured => true)
-                nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset
-              end
-            else
-              name = effect["endpoint"]
-              unit = effect["result"]["unit"]
-              warnings = []
-              case name
-              when "Log2 transformed" # use a sensible name
-                name = "log2(Net cell association)"
-                warnings = ["Original name was 'Log2 transformed'"]
-                unit = "log2(mL/ug(Mg))"
-              when "Total protein (BCA assay)"
-                category = "P-CHEM"
-                warnings = ["Category changed from TOX to P-CHEM"]
+            studies = JSON.parse(RestClientWrapper.get(File.join(np["compound"]["URI"],"study")))["study"]
+            studies.each do |study|
+              dataset = datasets[np["bundles"].keys.first]
+              proteomics_features = {}
+              category = study["protocol"]["topcategory"]
+              source = study["protocol"]["category"]["term"]
+              study["effects"].each do |effect|
+
+                effect["result"]["textValue"] ?  klass = NominalFeature : klass = NumericFeature
+                effect["conditions"].delete_if { |k, v| v.nil? }
+
+                if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data
+
+                  JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step
+                    proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics", :unit => "Spectral counts", :source => source,:measured => true)
+                    nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset
+                  end
+                else
+                  name = effect["endpoint"]
+                  unit = effect["result"]["unit"]
+                  warnings = []
+                  case name
+                  when "Log2 transformed" # use a sensible name
+                    name = "log2(Net cell association)"
+                    warnings = ["Original name was 'Log2 transformed'"]
+                    unit = "log2(mL/ug(Mg))"
+                  when "Total protein (BCA assay)"
+                    category = "P-CHEM"
+                    warnings = ["Category changed from TOX to P-CHEM"]
+                  end
+                  feature = klass.find_or_create_by(
+                    :name => name,
+                    :unit => unit,
+                    :category => category,
+                    :conditions => effect["conditions"],
+                    :source => study["protocol"]["category"]["term"],
+                    :measured => true,
+                    :warnings => warnings
+                  )
+                  nanoparticle.parse_ambit_value feature, effect["result"], dataset
+                end
               end
-              feature = klass.find_or_create_by(
-                :name => name,
-                :unit => unit,
-                :category => category,
-                :conditions => effect["conditions"],
-                :source => study["protocol"]["category"]["term"],
-                :measured => true,
-                :warnings => warnings
-              )
-              nanoparticle.parse_ambit_value feature, effect["result"], dataset
             end
+            nanoparticle.save
+            print "#{n}, "
           end
-    p nanoparticle
-          nanoparticle.save
         end
         datasets.each { |u,d| d.save }
       end
-- 
cgit v1.2.3