enm study import

author: Christoph Helma <helma@in-silico.ch> 2016-04-15 14:58:17 +0200
committer: Christoph Helma <helma@in-silico.ch> 2016-04-15 14:58:17 +0200
commit: 4662e845c12e3e623ec9bec208c42cd4b1886047 (patch)
tree: 8c51d1f2098441ba3d0fc2118d774eae59724ad1
parent: 8aab046eb1ad39aaf10c5a8596102c35c7b2ee0b (diff)
4 files changed, 51 insertions, 65 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index fdf1bfc..b51d74b 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -88,22 +88,21 @@ module OpenTox
     # @return [String]
     def to_csv(inchi=false)
       CSV.generate() do |csv| 
-        compound = Substance.find(data_entries.first.first).is_a? Compound
+        compound = Substance.find(substance_ids.first).is_a? Compound
         if compound
           csv << [inchi ? "InChI" : "SMILES"] + features.collect{|f| f.name}
         else
           csv << ["Name"] + features.collect{|f| f.name}
         end
-        data_entries.each do |sid,f|
-          substance = Substance.find sid
-          features.each do |feature|
-            f[feature.id.to_s].each do |v|
+        substances.each do |substance|
+          features.each do |f|
+            substance.toxicities[f.id.to_s].each do |v|
               if compound
                 csv << [inchi ? substance.inchi : substance.smiles , v]
               else
                 csv << [substance.name , v]
               end
-            end if f[feature.id.to_s]
+            end if substance.toxicities[f.id.to_s]
           end
         end
       end
diff --git a/lib/feature.rb b/lib/feature.rb
index f13a3fb..c6fb68a 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -6,7 +6,9 @@ module OpenTox
     field :numeric, type: Boolean
     field :measured, type: Boolean
     field :calculated, type: Boolean
+    field :category, type: String
     field :unit, type: String
+    field :conditions, type: Hash
   end
 
   # Feature for categorical variables
@@ -35,12 +37,4 @@ module OpenTox
     end
   end
 
-  # Feature for categorical bioassay results
-  class NominalBioAssay < NominalFeature
-  end
-
-  # Feature for quantitative bioassay results
-  class NumericBioAssay < NumericFeature
-  end
-
 end
diff --git a/lib/import.rb b/lib/import.rb
index cf0855e..9091207 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -19,43 +19,28 @@ module OpenTox
               :name => np["values"]["https://data.enanomapper.net/identifier/name"],
               :source => np["compound"]["URI"],
             )
-            dataset.data_entries[nanoparticle.id.to_s] ||= {}
-            nanoparticle.bundles << uri
-            nanoparticle.dataset_ids << dataset.id
-            np["composition"].each do |comp|
-              case comp["relation"]
-              when "HAS_CORE"
-                nanoparticle.core = comp["component"]["compound"]["URI"]
-              when "HAS_COATING"
-                nanoparticle.coating << comp["component"]["compound"]["URI"]
-              end
-            end if np["composition"]
-            np["values"].each do |u,v|
-              if u.match(/property/)
-                name, unit, source = nil
-                features.each do |uri,feat|
-                  if u.match(/#{uri}/)
-                    name = feat["title"]
-                    unit = feat["units"]
-                    source = uri
-                  end
-                end
-                feature = Feature.find_or_create_by(
-                  :name => name,
-                  :unit => unit,
-                  :source => source
+            dataset.substance_ids << nanoparticle.id
+            dataset.substance_ids.uniq!
+            studies = JSON.parse(RestClientWrapper.get(File.join(np["compound"]["URI"],"study")))["study"]
+            studies.each do |study|
+              study["effects"].each do |effect|
+                effect["result"]["textValue"] ?  klass = NominalFeature : klass = NumericFeature
+                # TODO parse core/coating
+                # TODO parse proteomics, they come as a large textValue
+                $logger.debug File.join(np["compound"]["URI"],"study")
+                effect["conditions"].delete_if { |k, v| v.nil? }
+                feature = klass.find_or_create_by(
+                  :source => File.join(np["compound"]["URI"],"study"),
+                  :name => "#{study["protocol"]["category"]["title"]} #{study["protocol"]["endpoint"]}",
+                  :unit => effect["result"]["unit"],
+                  :category => study["protocol"]["topcategory"],
+                  :conditions => effect["conditions"]
                 )
+                nanoparticle.parse_ambit_value feature, effect["result"]
+                dataset.feature_ids << feature.id 
+                dataset.feature_ids.uniq!
               end
-              v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array
-            end
-            nanoparticle.bundles.uniq!
-            nanoparticle.physchem_descriptors.each{|f,v| v.uniq!}
-            #nanoparticle.toxicities.each{|f,v| v.uniq!}
-            nanoparticle.toxicities.each do |f,v|
-              dataset.data_entries[nanoparticle.id.to_s][f.to_s] ||= []
-              dataset.data_entries[nanoparticle.id.to_s][f.to_s] += v
             end
-            nanoparticle.save
           end
           dataset.save
           datasets << dataset
diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 0350363..295b6c0 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -12,43 +12,51 @@ module OpenTox
     end
 
     def add_feature feature, value
-      if feature.source.match /property\/P-CHEM/
+      case feature.category
+      when "P-CHEM"
         physchem_descriptors[feature.id.to_s] ||= []
         physchem_descriptors[feature.id.to_s] << value
-      elsif feature.source.match /property\/TOX/
+      when "TOX"
         toxicities[feature.id.to_s] ||= []
         toxicities[feature.id.to_s] << value
       else
-        warn "Unknown feature type '#{feature.source}'. Value '#{value}' not inserted."
+        warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
       end
+      save
     end
 
     def parse_ambit_value feature, v
-      # TODO: units, mmol/log10 conversion
-      if v.keys == ["loValue"]
-        #if v["loValue"].numeric?
-          add_feature feature, v["loValue"]
-        #else
-          #warn "'#{v["loValue"]}' is not a numeric value, entry ignored."
-        #end
+      v.delete "unit"
+      # TODO: mmol/log10 conversion
+      if v.keys == ["textValue"]
+        add_feature feature, v["textValue"]
+      elsif v.keys == ["loValue"]
+        add_feature feature, v["loValue"]
+      elsif v.keys.size == 2 and v["errorValue"]
+        add_feature feature, v["loValue"]
+        warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
       elsif v.keys.size == 2 and v["loQualifier"] == "mean"
-        #add_feature feature, {:mean => v["loValue"]}
         add_feature feature, v["loValue"]
         warn "'#{feature.name}' is a mean value. Original data is not available."
       elsif v.keys.size == 2 and v["loQualifier"] #== ">="
-        #add_feature feature, {:min => v["loValue"],:max => Float::INFINITY}
         warn "Only min value available for '#{feature.name}', entry ignored"
       elsif v.keys.size == 2 and v["upQualifier"] #== ">="
-        #add_feature feature, {:max => v["upValue"],:min => -Float::INFINITY}
         warn "Only max value available for '#{feature.name}', entry ignored"
-      elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] 
-        #add_feature feature, {:min => v["loValue"],:max => v["upValue"]}
+      elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
+        add_feature feature, v["loValue"]
+        warn "loQualifier and upQualifier are empty."
+      elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == ""
+        add_feature feature, v["loValue"]
+        warn "loQualifier and upQualifier are empty."
+      elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"]
         add_feature feature, [v["loValue"],v["upValue"]].mean
         warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
+      elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"]
+        warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
+        add_feature feature, v["loValue"]
       elsif v == {} # do nothing
       else
-        $logger.warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
-        warnings << "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
+        warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
       end
     end
author	Christoph Helma <helma@in-silico.ch>	2016-04-15 14:58:17 +0200
committer	Christoph Helma <helma@in-silico.ch>	2016-04-15 14:58:17 +0200
commit	4662e845c12e3e623ec9bec208c42cd4b1886047 (patch)
tree	8c51d1f2098441ba3d0fc2118d774eae59724ad1
parent	8aab046eb1ad39aaf10c5a8596102c35c7b2ee0b (diff)