From 9e7b36613e98601de7b2ceb2d4442e11f1ae868a Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 10 Nov 2016 12:23:46 +0100
Subject: intermediate commit, may be defunct

---
 lib/compound.rb     |  3 ---
 lib/import.rb       | 44 +++++++++++++++++++++++++++++++-------------
 lib/model.rb        | 11 +++++++----
 lib/nanoparticle.rb | 46 ++++++++++++++++++++++++++++++++++------------
 4 files changed, 72 insertions(+), 32 deletions(-)

(limited to 'lib')

diff --git a/lib/compound.rb b/lib/compound.rb
index a399169..8a1143b 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -136,9 +136,6 @@ module OpenTox
     # @param inchi [String] smiles InChI string
     # @return [OpenTox::Compound] Compound
     def self.from_inchi inchi
-      # Temporary workaround for OpenBabels Inchi bug
-      # http://sourceforge.net/p/openbabel/bugs/957/
-      # bug has not been fixed in latest git/development version
       #smiles = `echo "#{inchi}" | "#{File.join(File.dirname(__FILE__),"..","openbabel","bin","babel")}" -iinchi - -ocan`.chomp.strip
       smiles = obconversion(inchi,"inchi","can")
       if smiles.empty?
diff --git a/lib/import.rb b/lib/import.rb
index 8e57401..541c9b5 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -9,6 +9,12 @@ module OpenTox
         #get list of bundle URIs
         bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
         File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)}
+        # bundles
+          # id/summary
+          # id/compound
+          # id/substance
+          # id/property
+
         bundles.each do |bundle|
           $logger.debug bundle["title"]
           nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"]
@@ -32,32 +38,43 @@ module OpenTox
         t2 = 0
         datasets = {}
         JSON.parse(File.read(File.join(dir,"bundles.json"))).each do |bundle|
+          if bundle["id"] == 3
           datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
+          end
         end
-        Dir[File.join(dir,"study*.json")].each do |s|
+        # TODO this is only for protein corona
+        Dir[File.join(dir,"study-F*.json")].each do |s|
           t = Time.now
           study = JSON.parse(File.read(s))
           np = JSON.parse(File.read(File.join(dir,"nanoparticle-#{study['owner']['substance']['uuid']}.json")))
-          core = {}
-          coating = []
+          core_id = nil
+          coating_ids = []
           np["composition"].each do |c|
+            uri = c["component"]["compound"]["URI"]
+            uri = CGI.escape File.join(uri,"&media=application/json")
+            data = JSON.parse(RestClientWrapper.get "https://data.enanomapper.net/query/compound/url/all?media=application/json&search=#{uri}")
+            smiles = data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23SMILESDefault"]
+            names = []
+            names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
+            names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23IUPACNameDefault"]
+            if smiles
+              compound = Compound.find_or_create_by(:smiles => smiles)
+              compound.names = names.compact
+            else
+              compound = Compound.find_or_create_by(:names => names)
+            end
+            compound.save
             if c["relation"] == "HAS_CORE"
-              core = {
-                :uri => c["component"]["compound"]["URI"],
-                :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
-              }
+              core_id = compound.id.to_s
             elsif c["relation"] == "HAS_COATING"
-              coating << {
-                :uri => c["component"]["compound"]["URI"],
-                :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
-              }
+              coating_ids << compound.id.to_s
             end
           end if np["composition"]
           nanoparticle = Nanoparticle.find_or_create_by(
             :name => np["values"]["https://data.enanomapper.net/identifier/name"],
             :source => np["compound"]["URI"],
-            :core => core,
-            :coating => coating
+            :core_id => core_id,
+            :coating_ids => coating_ids
           )
           np["bundles"].keys.each do |bundle_uri|
             nanoparticle.dataset_ids << datasets[bundle_uri].id
@@ -104,6 +121,7 @@ module OpenTox
               nanoparticle.parse_ambit_value feature, effect["result"], dataset
             end
           end
+    p nanoparticle
           nanoparticle.save
         end
         datasets.each { |u,d| d.save }
diff --git a/lib/model.rb b/lib/model.rb
index 7503215..6a5e614 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -81,7 +81,6 @@ module OpenTox
               :method => "properties",
               :categories => ["P-CHEM"],
             },
-            #:descriptors => ["P-CHEM","Proteomics"],
             :similarity => {
               :method => "Algorithm::Similarity.weighted_cosine",
               :min => 0.5
@@ -140,10 +139,11 @@ module OpenTox
           model.algorithms[:descriptors].delete(:features)
           model.algorithms[:descriptors].delete(:type)
           model.substances.each_with_index do |s,i|
-            s.calculate_properties(features).each_with_index do |v,j|
+            props = s.calculate_properties(features)
+            props.each_with_index do |v,j|
               model.independent_variables[j] ||= []
               model.independent_variables[j][i] = v
-            end
+            end if props and !props.empty?
           end
         # parse independent_variables
         when "properties"
@@ -152,7 +152,10 @@ module OpenTox
           categories.each do |category|
             Feature.where(category:category).each{|f| feature_ids << f.id.to_s}
           end
-          properties = model.substances.collect { |s| s.properties }
+          #p feature_ids
+          #properties = Nanoparticle.all.collect { |s| p s.name; p s.id; p s.properties }
+          properties = model.substances.collect { |s| s.properties  }
+          #p properties
           property_ids = properties.collect{|p| p.keys}.flatten.uniq
           model.descriptor_ids = feature_ids & property_ids
           model.independent_variables = model.descriptor_ids.collect{|i| properties.collect{|p| p[i] ? p[i].median : nil}}
diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 23e155c..02d9a89 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -3,8 +3,30 @@ module OpenTox
   class Nanoparticle < Substance
     include OpenTox
 
-    field :core, type: Hash, default: {}
-    field :coating, type: Array, default: []
+    field :core_id, type: String, default: nil
+    field :coating_ids, type: Array, default: []
+
+    def core
+      Compound.find core_id
+    end
+
+    def coating
+      coating_ids.collect{|i| Compound.find i }
+    end
+
+    def fingerprint type=DEFAULT_FINGERPRINT
+      core_fp = core.fingerprint type
+      coating_fp = coating.collect{|c| c.fingerprint type}.flatten.uniq.compact
+      (core_fp.empty? or coating_fp.empty?) ? [] : (core_fp+coating_fp).uniq.compact
+    end
+
+    def calculate_properties descriptors=PhysChem::OPENBABEL
+      if core.smiles and !coating.collect{|c| c.smiles}.compact.empty?
+        core_prop = core.calculate_properties descriptors
+        coating_prop = coating.collect{|c| c.calculate_properties descriptors if c.smiles}
+        descriptors.collect_with_index{|d,i| [core_prop[i],coating_prop.collect{|c| c[i] if c}]}
+      end
+    end
 
     def add_feature feature, value, dataset
       unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand
@@ -37,28 +59,28 @@ module OpenTox
         add_feature feature, v["loValue"], dataset
       elsif v.keys.size == 2 and v["errorValue"]
         add_feature feature, v["loValue"], dataset
-        warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
+        #warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
       elsif v.keys.size == 2 and v["loQualifier"] == "mean"
         add_feature feature, v["loValue"], dataset
-        warn "'#{feature.name}' is a mean value. Original data is not available."
+        #warn "'#{feature.name}' is a mean value. Original data is not available."
       elsif v.keys.size == 2 and v["loQualifier"] #== ">="
-        warn "Only min value available for '#{feature.name}', entry ignored"
+        #warn "Only min value available for '#{feature.name}', entry ignored"
       elsif v.keys.size == 2 and v["upQualifier"] #== ">="
-        warn "Only max value available for '#{feature.name}', entry ignored"
+        #warn "Only max value available for '#{feature.name}', entry ignored"
       elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
         add_feature feature, v["loValue"], dataset
-        warn "loQualifier and upQualifier are empty."
+        #warn "loQualifier and upQualifier are empty."
       elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == ""
         add_feature feature, v["loValue"], dataset
-        warn "loQualifier and upQualifier are empty."
+        #warn "loQualifier and upQualifier are empty."
       elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
         add_feature feature, v["loValue"], dataset
-        warn "loQualifier and upQualifier are empty."
+        #warn "loQualifier and upQualifier are empty."
       elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"]
-        add_feature feature, [v["loValue"],v["upValue"]].mean, dataset
-        warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
+        #add_feature feature, [v["loValue"],v["upValue"]].mean, dataset
+        #warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
       elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"]
-        warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
+        #warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
         add_feature feature, v["loValue"], dataset
       elsif v == {} # do nothing
       else
-- 
cgit v1.2.3