From 063acd4dc63e9287287cc1ff78fff2064ff74e4f Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 7 Apr 2016 17:39:14 +0200
Subject: initial ambit import

---
 lib/nanoparticle.rb | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 lib/nanoparticle.rb

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
new file mode 100644
index 0000000..3783ece
--- /dev/null
+++ b/lib/nanoparticle.rb
@@ -0,0 +1,17 @@
+module OpenTox
+
+  class Nanoparticle
+    include OpenTox
+
+    field :particle_id, type: String
+    field :core, type: String
+    field :coatings, type: Array
+
+    #field :physchem_descriptors, type: Hash, default: {}
+    #field :toxicities, type: Hash, default: {}
+    field :features, type: Hash, default: {}
+
+  end
+end
+
+
-- 
cgit v1.2.3


From f3780d7507092b643216054fa3ca1e6146281e43 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Fri, 8 Apr 2016 13:04:56 +0200
Subject: enm import test

---
 lib/nanoparticle.rb | 45 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 5 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 3783ece..cda431a 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -3,13 +3,48 @@ module OpenTox
   class Nanoparticle
     include OpenTox
 
-    field :particle_id, type: String
+    #field :particle_id, type: String
     field :core, type: String
-    field :coatings, type: Array
+    field :coating, type: Array, default: []
 
-    #field :physchem_descriptors, type: Hash, default: {}
-    #field :toxicities, type: Hash, default: {}
-    field :features, type: Hash, default: {}
+    field :physchem_descriptors, type: Hash, default: {}
+    field :toxicities, type: Hash, default: {}
+    #field :features, type: Hash, default: {}
+    field :bundles, type: Array, default: []
+
+    def predict
+    end
+
+    def add_feature feature, value
+      if feature.source.match /property\/P-CHEM/
+        physchem_descriptors[feature.id.to_s] ||= []
+        physchem_descriptors[feature.id.to_s] << value
+      elsif feature.source.match /property\/TOX/
+        toxicities[feature.id.to_s] ||= []
+        toxicities[feature.id.to_s] << value
+      else
+        $logger.warn "Unknown feature type '#{feature.source}'. Value '#{value}' not inserted."
+        warnings << "Unknown feature type '#{feature.source}'. Value '#{value}' not inserted."
+      end
+    end
+
+    def parse_ambit_value feature, v
+      if v.keys == ["loValue"]
+        add_feature feature, v["loValue"]
+      elsif v.keys.size == 2 and v["loQualifier"] == "mean"
+        add_feature feature, {:mean => v["loValue"]}
+      elsif v.keys.size == 2 and v["loQualifier"] #== ">="
+        add_feature feature, {:min => v["loValue"],:max => Float::INFINITY}
+      elsif v.keys.size == 2 and v["upQualifier"] #== ">="
+        add_feature feature, {:max => v["upValue"],:min => -Float::INFINITY}
+      elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] 
+        add_feature feature, {:min => v["loValue"],:max => v["upValue"]}
+      elsif v == {} # do nothing
+      else
+        $logger.warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
+        warnings << "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
+      end
+    end
 
   end
 end
-- 
cgit v1.2.3


From 84222bae2bbb9fb3e0ce3e65de1be8e7f94d2147 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Tue, 12 Apr 2016 12:37:37 +0200
Subject: new dataset structure

---
 lib/nanoparticle.rb | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index cda431a..c58dc8c 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -1,9 +1,8 @@
 module OpenTox
 
-  class Nanoparticle
+  class Nanoparticle < Substance
     include OpenTox
 
-    #field :particle_id, type: String
     field :core, type: String
     field :coating, type: Array, default: []
 
-- 
cgit v1.2.3


From 64f1f32ced77afb278bdb7c27397c5299a73675c Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Wed, 13 Apr 2016 18:18:36 +0200
Subject: improved enm import

---
 lib/nanoparticle.rb | 1 -
 1 file changed, 1 deletion(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index c58dc8c..6e9b0ea 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -6,7 +6,6 @@ module OpenTox
     field :core, type: String
     field :coating, type: Array, default: []
 
-    field :physchem_descriptors, type: Hash, default: {}
     field :toxicities, type: Hash, default: {}
     #field :features, type: Hash, default: {}
     field :bundles, type: Array, default: []
-- 
cgit v1.2.3


From 753fcc204d93d86c76860bee6e2f7d0468c3c940 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 14 Apr 2016 19:43:24 +0200
Subject: features/toxicities fixed

---
 lib/nanoparticle.rb | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 6e9b0ea..0350363 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -5,12 +5,10 @@ module OpenTox
 
     field :core, type: String
     field :coating, type: Array, default: []
-
-    field :toxicities, type: Hash, default: {}
-    #field :features, type: Hash, default: {}
     field :bundles, type: Array, default: []
 
-    def predict
+    def nanoparticle_neighbors params
+      Dataset.find(params[:training_dataset_id]).nanoparticles
     end
 
     def add_feature feature, value
@@ -21,22 +19,32 @@ module OpenTox
         toxicities[feature.id.to_s] ||= []
         toxicities[feature.id.to_s] << value
       else
-        $logger.warn "Unknown feature type '#{feature.source}'. Value '#{value}' not inserted."
-        warnings << "Unknown feature type '#{feature.source}'. Value '#{value}' not inserted."
+        warn "Unknown feature type '#{feature.source}'. Value '#{value}' not inserted."
       end
     end
 
     def parse_ambit_value feature, v
+      # TODO: units, mmol/log10 conversion
       if v.keys == ["loValue"]
-        add_feature feature, v["loValue"]
+        #if v["loValue"].numeric?
+          add_feature feature, v["loValue"]
+        #else
+          #warn "'#{v["loValue"]}' is not a numeric value, entry ignored."
+        #end
       elsif v.keys.size == 2 and v["loQualifier"] == "mean"
-        add_feature feature, {:mean => v["loValue"]}
+        #add_feature feature, {:mean => v["loValue"]}
+        add_feature feature, v["loValue"]
+        warn "'#{feature.name}' is a mean value. Original data is not available."
       elsif v.keys.size == 2 and v["loQualifier"] #== ">="
-        add_feature feature, {:min => v["loValue"],:max => Float::INFINITY}
+        #add_feature feature, {:min => v["loValue"],:max => Float::INFINITY}
+        warn "Only min value available for '#{feature.name}', entry ignored"
       elsif v.keys.size == 2 and v["upQualifier"] #== ">="
-        add_feature feature, {:max => v["upValue"],:min => -Float::INFINITY}
+        #add_feature feature, {:max => v["upValue"],:min => -Float::INFINITY}
+        warn "Only max value available for '#{feature.name}', entry ignored"
       elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] 
-        add_feature feature, {:min => v["loValue"],:max => v["upValue"]}
+        #add_feature feature, {:min => v["loValue"],:max => v["upValue"]}
+        add_feature feature, [v["loValue"],v["upValue"]].mean
+        warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
       elsif v == {} # do nothing
       else
         $logger.warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
-- 
cgit v1.2.3


From 4662e845c12e3e623ec9bec208c42cd4b1886047 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Fri, 15 Apr 2016 14:58:17 +0200
Subject: enm study import

---
 lib/nanoparticle.rb | 42 +++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 0350363..295b6c0 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -12,43 +12,51 @@ module OpenTox
     end
 
     def add_feature feature, value
-      if feature.source.match /property\/P-CHEM/
+      case feature.category
+      when "P-CHEM"
         physchem_descriptors[feature.id.to_s] ||= []
         physchem_descriptors[feature.id.to_s] << value
-      elsif feature.source.match /property\/TOX/
+      when "TOX"
         toxicities[feature.id.to_s] ||= []
         toxicities[feature.id.to_s] << value
       else
-        warn "Unknown feature type '#{feature.source}'. Value '#{value}' not inserted."
+        warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
       end
+      save
     end
 
     def parse_ambit_value feature, v
-      # TODO: units, mmol/log10 conversion
-      if v.keys == ["loValue"]
-        #if v["loValue"].numeric?
-          add_feature feature, v["loValue"]
-        #else
-          #warn "'#{v["loValue"]}' is not a numeric value, entry ignored."
-        #end
+      v.delete "unit"
+      # TODO: mmol/log10 conversion
+      if v.keys == ["textValue"]
+        add_feature feature, v["textValue"]
+      elsif v.keys == ["loValue"]
+        add_feature feature, v["loValue"]
+      elsif v.keys.size == 2 and v["errorValue"]
+        add_feature feature, v["loValue"]
+        warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
       elsif v.keys.size == 2 and v["loQualifier"] == "mean"
-        #add_feature feature, {:mean => v["loValue"]}
         add_feature feature, v["loValue"]
         warn "'#{feature.name}' is a mean value. Original data is not available."
       elsif v.keys.size == 2 and v["loQualifier"] #== ">="
-        #add_feature feature, {:min => v["loValue"],:max => Float::INFINITY}
         warn "Only min value available for '#{feature.name}', entry ignored"
       elsif v.keys.size == 2 and v["upQualifier"] #== ">="
-        #add_feature feature, {:max => v["upValue"],:min => -Float::INFINITY}
         warn "Only max value available for '#{feature.name}', entry ignored"
-      elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] 
-        #add_feature feature, {:min => v["loValue"],:max => v["upValue"]}
+      elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
+        add_feature feature, v["loValue"]
+        warn "loQualifier and upQualifier are empty."
+      elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == ""
+        add_feature feature, v["loValue"]
+        warn "loQualifier and upQualifier are empty."
+      elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"]
         add_feature feature, [v["loValue"],v["upValue"]].mean
         warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
+      elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"]
+        warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
+        add_feature feature, v["loValue"]
       elsif v == {} # do nothing
       else
-        $logger.warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
-        warnings << "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
+        warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
       end
     end
 
-- 
cgit v1.2.3


From 75b70425ae8699464a18529eb7bf35a216c06243 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 21 Apr 2016 09:56:12 +0200
Subject: AMBIT import expanded

---
 lib/nanoparticle.rb | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 295b6c0..b934bb3 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -48,6 +48,9 @@ module OpenTox
       elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == ""
         add_feature feature, v["loValue"]
         warn "loQualifier and upQualifier are empty."
+      elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
+        add_feature feature, v["loValue"]
+        warn "loQualifier and upQualifier are empty."
       elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"]
         add_feature feature, [v["loValue"],v["upValue"]].mean
         warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
-- 
cgit v1.2.3


From cfc64a2966ab38698e499f0b44f41208ee77a07f Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Tue, 26 Apr 2016 17:38:15 +0200
Subject: first nanomaterial prediction

---
 lib/nanoparticle.rb | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index b934bb3..b5de5b9 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -16,9 +16,11 @@ module OpenTox
       when "P-CHEM"
         physchem_descriptors[feature.id.to_s] ||= []
         physchem_descriptors[feature.id.to_s] << value
+        physchem_descriptors[feature.id.to_s].uniq!
       when "TOX"
         toxicities[feature.id.to_s] ||= []
         toxicities[feature.id.to_s] << value
+        toxicities[feature.id.to_s].uniq!
       else
         warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
       end
-- 
cgit v1.2.3


From 79238bddb59607aa9f759caa9e3c8db176709703 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 28 Apr 2016 12:19:48 +0200
Subject: compound validations fixed

---
 lib/nanoparticle.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index b5de5b9..83b97a9 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -8,7 +8,7 @@ module OpenTox
     field :bundles, type: Array, default: []
 
     def nanoparticle_neighbors params
-      Dataset.find(params[:training_dataset_id]).nanoparticles
+      Dataset.find(params[:training_dataset_id]).nanoparticles.collect{|np| {"_id" => np.id, "tanimoto" => 1}}
     end
 
     def add_feature feature, value
-- 
cgit v1.2.3


From 05386e748270c337c66f6f379317ea4b25905236 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Wed, 4 May 2016 19:24:42 +0200
Subject: first reasonable results for nanoparticle crossvalidation

---
 lib/nanoparticle.rb | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 83b97a9..dda4a9f 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -8,7 +8,7 @@ module OpenTox
     field :bundles, type: Array, default: []
 
     def nanoparticle_neighbors params
-      Dataset.find(params[:training_dataset_id]).nanoparticles.collect{|np| {"_id" => np.id, "tanimoto" => 1}}
+      Dataset.find(params[:training_dataset_id]).nanoparticles.collect{|np| np["tanimoto"] = 1; np}
     end
 
     def add_feature feature, value
@@ -19,7 +19,19 @@ module OpenTox
         physchem_descriptors[feature.id.to_s].uniq!
       when "TOX"
         toxicities[feature.id.to_s] ||= []
-        toxicities[feature.id.to_s] << value
+        # TODO generic way of parsing TOX values
+        if feature.name == "7.99 Toxicity (other) ICP-AES" and feature.unit == "mL/ug(Mg)" 
+          toxicities[feature.id.to_s] << -Math.log10(value)
+        #if value.numeric?
+          #begin
+          #rescue
+            #p feature
+            #p value
+            #exit
+          #end
+        else
+          toxicities[feature.id.to_s] << value
+        end
         toxicities[feature.id.to_s].uniq!
       else
         warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
@@ -29,7 +41,7 @@ module OpenTox
 
     def parse_ambit_value feature, v
       v.delete "unit"
-      # TODO: mmol/log10 conversion
+      # TODO: ppm instead of weights
       if v.keys == ["textValue"]
         add_feature feature, v["textValue"]
       elsif v.keys == ["loValue"]
-- 
cgit v1.2.3


From ab7b37541b4f8a762be737009631d3eefd898b4a Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 5 May 2016 16:14:02 +0200
Subject: ambit mirror, import from mirrored json, proteomics import

---
 lib/nanoparticle.rb | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index dda4a9f..c9fbb77 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -6,6 +6,7 @@ module OpenTox
     field :core, type: String
     field :coating, type: Array, default: []
     field :bundles, type: Array, default: []
+    field :proteomics, type: Hash, default: {}
 
     def nanoparticle_neighbors params
       Dataset.find(params[:training_dataset_id]).nanoparticles.collect{|np| np["tanimoto"] = 1; np}
@@ -14,21 +15,18 @@ module OpenTox
     def add_feature feature, value
       case feature.category
       when "P-CHEM"
-        physchem_descriptors[feature.id.to_s] ||= []
-        physchem_descriptors[feature.id.to_s] << value
-        physchem_descriptors[feature.id.to_s].uniq!
+        physchem[feature.id.to_s] ||= []
+        physchem[feature.id.to_s] << value
+        physchem[feature.id.to_s].uniq!
+      when "Proteomics"
+        proteomics[feature.id.to_s] ||= []
+        proteomics[feature.id.to_s] << value
+        proteomics[feature.id.to_s].uniq!
       when "TOX"
         toxicities[feature.id.to_s] ||= []
         # TODO generic way of parsing TOX values
         if feature.name == "7.99 Toxicity (other) ICP-AES" and feature.unit == "mL/ug(Mg)" 
           toxicities[feature.id.to_s] << -Math.log10(value)
-        #if value.numeric?
-          #begin
-          #rescue
-            #p feature
-            #p value
-            #exit
-          #end
         else
           toxicities[feature.id.to_s] << value
         end
@@ -36,7 +34,6 @@ module OpenTox
       else
         warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
       end
-      save
     end
 
     def parse_ambit_value feature, v
@@ -79,5 +76,3 @@ module OpenTox
 
   end
 end
-
-
-- 
cgit v1.2.3


From 51f57e2858b60bed74ebcc97189b2188c900c283 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Fri, 6 May 2016 12:49:28 +0200
Subject: dataset tests cleanup

---
 lib/nanoparticle.rb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index c9fbb77..9bf419d 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -15,9 +15,9 @@ module OpenTox
     def add_feature feature, value
       case feature.category
       when "P-CHEM"
-        physchem[feature.id.to_s] ||= []
-        physchem[feature.id.to_s] << value
-        physchem[feature.id.to_s].uniq!
+        physchem_descriptors[feature.id.to_s] ||= []
+        physchem_descriptors[feature.id.to_s] << value
+        physchem_descriptors[feature.id.to_s].uniq!
       when "Proteomics"
         proteomics[feature.id.to_s] ||= []
         proteomics[feature.id.to_s] << value
-- 
cgit v1.2.3


From 611bac891177f8d9185d45486dd574b6ef4d1912 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Mon, 9 May 2016 15:11:46 +0200
Subject: nanoparticle models fixed

---
 lib/nanoparticle.rb | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 9bf419d..b79981d 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -9,10 +9,14 @@ module OpenTox
     field :proteomics, type: Hash, default: {}
 
     def nanoparticle_neighbors params
-      Dataset.find(params[:training_dataset_id]).nanoparticles.collect{|np| np["tanimoto"] = 1; np}
+      dataset = Dataset.find(params[:training_dataset_id])
+      Dataset.find(params[:training_dataset_id]).nanoparticles.collect do |np|
+        np["tanimoto"] = 1
+        np unless np.toxicities.empty?
+      end.compact
     end
 
-    def add_feature feature, value
+    def add_feature feature, value, dataset_id
       case feature.category
       when "P-CHEM"
         physchem_descriptors[feature.id.to_s] ||= []
@@ -23,51 +27,52 @@ module OpenTox
         proteomics[feature.id.to_s] << value
         proteomics[feature.id.to_s].uniq!
       when "TOX"
-        toxicities[feature.id.to_s] ||= []
+        toxicities[feature.id.to_s] ||= {}
+        toxicities[feature.id.to_s][dataset_id.to_s] ||= []
         # TODO generic way of parsing TOX values
         if feature.name == "7.99 Toxicity (other) ICP-AES" and feature.unit == "mL/ug(Mg)" 
-          toxicities[feature.id.to_s] << -Math.log10(value)
+          toxicities[feature.id.to_s][dataset_id.to_s] << -Math.log10(value)
         else
-          toxicities[feature.id.to_s] << value
+          toxicities[feature.id.to_s][dataset_id.to_s] << value
         end
-        toxicities[feature.id.to_s].uniq!
+        toxicities[feature.id.to_s][dataset_id.to_s].uniq!
       else
         warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
       end
     end
 
-    def parse_ambit_value feature, v
+    def parse_ambit_value feature, v, dataset_id
       v.delete "unit"
       # TODO: ppm instead of weights
       if v.keys == ["textValue"]
-        add_feature feature, v["textValue"]
+        add_feature feature, v["textValue"], dataset_id
       elsif v.keys == ["loValue"]
-        add_feature feature, v["loValue"]
+        add_feature feature, v["loValue"], dataset_id
       elsif v.keys.size == 2 and v["errorValue"]
-        add_feature feature, v["loValue"]
+        add_feature feature, v["loValue"], dataset_id
         warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
       elsif v.keys.size == 2 and v["loQualifier"] == "mean"
-        add_feature feature, v["loValue"]
+        add_feature feature, v["loValue"], dataset_id
         warn "'#{feature.name}' is a mean value. Original data is not available."
       elsif v.keys.size == 2 and v["loQualifier"] #== ">="
         warn "Only min value available for '#{feature.name}', entry ignored"
       elsif v.keys.size == 2 and v["upQualifier"] #== ">="
         warn "Only max value available for '#{feature.name}', entry ignored"
       elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
-        add_feature feature, v["loValue"]
+        add_feature feature, v["loValue"], dataset_id
         warn "loQualifier and upQualifier are empty."
       elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == ""
-        add_feature feature, v["loValue"]
+        add_feature feature, v["loValue"], dataset_id
         warn "loQualifier and upQualifier are empty."
       elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
-        add_feature feature, v["loValue"]
+        add_feature feature, v["loValue"], dataset_id
         warn "loQualifier and upQualifier are empty."
       elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"]
-        add_feature feature, [v["loValue"],v["upValue"]].mean
+        add_feature feature, [v["loValue"],v["upValue"]].mean, dataset_id
         warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
       elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"]
         warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
-        add_feature feature, v["loValue"]
+        add_feature feature, v["loValue"], dataset_id
       elsif v == {} # do nothing
       else
         warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
-- 
cgit v1.2.3


From b8bb12c8a163c238d7d4387c1914e2100bb660df Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 12 May 2016 15:23:01 +0200
Subject: enm study import fixed

---
 lib/nanoparticle.rb | 80 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 50 insertions(+), 30 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index b79981d..6527fa3 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -8,15 +8,31 @@ module OpenTox
     field :bundles, type: Array, default: []
     field :proteomics, type: Hash, default: {}
 
-    def nanoparticle_neighbors params
-      dataset = Dataset.find(params[:training_dataset_id])
-      Dataset.find(params[:training_dataset_id]).nanoparticles.collect do |np|
-        np["tanimoto"] = 1
-        np unless np.toxicities.empty?
-      end.compact
+    def nanoparticle_neighbors min_sim: 0.1, type:, dataset_id:, prediction_feature_id:
+      dataset = Dataset.find(dataset_id)
+      neighbors = []
+      p dataset.data_entries.size
+      p dataset.substance_ids.size
+      p dataset.substance_ids.collect{|i| i.to_s} == dataset.data_entries.keys
+      p dataset.substance_ids.collect{|i| i.to_s} 
+      p dataset.data_entries.keys
+      dataset.nanoparticles.each do |np|
+        prediction_feature_id
+        p dataset.data_entries[np.id.to_s]
+        values = dataset.values(np,prediction_feature_id)
+        p values
+        if values
+          common_descriptors = physchem_descriptors.keys & np.physchem_descriptors.keys
+          sim = Algorithm::Similarity.cosine(common_descriptors.collect{|d| physchem_descriptors[d]}, common_descriptors.collect{|d| np.physchem_descriptors[d]})
+          neighbors << {"_id" => np.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim
+        end
+      end
+      neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]}
+      neighbors
     end
 
     def add_feature feature, value, dataset_id
+      dataset = Dataset.find(dataset_id)
       case feature.category
       when "P-CHEM"
         physchem_descriptors[feature.id.to_s] ||= []
@@ -27,55 +43,59 @@ module OpenTox
         proteomics[feature.id.to_s] << value
         proteomics[feature.id.to_s].uniq!
       when "TOX"
-        toxicities[feature.id.to_s] ||= {}
-        toxicities[feature.id.to_s][dataset_id.to_s] ||= []
         # TODO generic way of parsing TOX values
+        p dataset.name
+        p self.name
+        p feature.name
+        p feature.unit
+        p value
         if feature.name == "7.99 Toxicity (other) ICP-AES" and feature.unit == "mL/ug(Mg)" 
-          toxicities[feature.id.to_s][dataset_id.to_s] << -Math.log10(value)
+          dataset.add self, feature, -Math.log10(value)
         else
-          toxicities[feature.id.to_s][dataset_id.to_s] << value
+          dataset.add self, feature, value
         end
-        toxicities[feature.id.to_s][dataset_id.to_s].uniq!
+        dataset.save
       else
         warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
       end
     end
 
     def parse_ambit_value feature, v, dataset_id
+      dataset = Dataset.find(dataset_id)
       v.delete "unit"
       # TODO: ppm instead of weights
       if v.keys == ["textValue"]
-        add_feature feature, v["textValue"], dataset_id
+        add_feature feature, v["textValue"], dataset
       elsif v.keys == ["loValue"]
-        add_feature feature, v["loValue"], dataset_id
+        add_feature feature, v["loValue"], dataset
       elsif v.keys.size == 2 and v["errorValue"]
-        add_feature feature, v["loValue"], dataset_id
-        warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
+        add_feature feature, v["loValue"], dataset
+        #warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
       elsif v.keys.size == 2 and v["loQualifier"] == "mean"
-        add_feature feature, v["loValue"], dataset_id
-        warn "'#{feature.name}' is a mean value. Original data is not available."
+        add_feature feature, v["loValue"], dataset
+        #warn "'#{feature.name}' is a mean value. Original data is not available."
       elsif v.keys.size == 2 and v["loQualifier"] #== ">="
-        warn "Only min value available for '#{feature.name}', entry ignored"
+        #warn "Only min value available for '#{feature.name}', entry ignored"
       elsif v.keys.size == 2 and v["upQualifier"] #== ">="
-        warn "Only max value available for '#{feature.name}', entry ignored"
+        #warn "Only max value available for '#{feature.name}', entry ignored"
       elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
-        add_feature feature, v["loValue"], dataset_id
-        warn "loQualifier and upQualifier are empty."
+        add_feature feature, v["loValue"], dataset
+        #warn "loQualifier and upQualifier are empty."
       elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == ""
-        add_feature feature, v["loValue"], dataset_id
-        warn "loQualifier and upQualifier are empty."
+        add_feature feature, v["loValue"], dataset
+        #warn "loQualifier and upQualifier are empty."
       elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
-        add_feature feature, v["loValue"], dataset_id
-        warn "loQualifier and upQualifier are empty."
+        add_feature feature, v["loValue"], dataset
+        #warn "loQualifier and upQualifier are empty."
       elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"]
-        add_feature feature, [v["loValue"],v["upValue"]].mean, dataset_id
-        warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
+        add_feature feature, [v["loValue"],v["upValue"]].mean, dataset
+        #warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
       elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"]
-        warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
-        add_feature feature, v["loValue"], dataset_id
+        #warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
+        add_feature feature, v["loValue"], dataset
       elsif v == {} # do nothing
       else
-        warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
+        #warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
       end
     end
 
-- 
cgit v1.2.3


From c90644211e214a50f6fdb3a936bf247f45f1f4be Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Fri, 13 May 2016 13:38:24 +0200
Subject: compound tests fixed

---
 lib/nanoparticle.rb | 40 +++++++++++++++-------------------------
 1 file changed, 15 insertions(+), 25 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 6527fa3..7890a19 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -11,19 +11,14 @@ module OpenTox
     def nanoparticle_neighbors min_sim: 0.1, type:, dataset_id:, prediction_feature_id:
       dataset = Dataset.find(dataset_id)
       neighbors = []
-      p dataset.data_entries.size
-      p dataset.substance_ids.size
-      p dataset.substance_ids.collect{|i| i.to_s} == dataset.data_entries.keys
-      p dataset.substance_ids.collect{|i| i.to_s} 
-      p dataset.data_entries.keys
       dataset.nanoparticles.each do |np|
-        prediction_feature_id
-        p dataset.data_entries[np.id.to_s]
         values = dataset.values(np,prediction_feature_id)
-        p values
         if values
           common_descriptors = physchem_descriptors.keys & np.physchem_descriptors.keys
-          sim = Algorithm::Similarity.cosine(common_descriptors.collect{|d| physchem_descriptors[d]}, common_descriptors.collect{|d| np.physchem_descriptors[d]})
+          common_descriptors.select!{|id| NumericFeature.find(id) }
+          query_descriptors = common_descriptors.collect{|d| physchem_descriptors[d].first}
+          neighbor_descriptors = common_descriptors.collect{|d| np.physchem_descriptors[d].first}
+          sim = Algorithm::Similarity.cosine(query_descriptors,neighbor_descriptors)
           neighbors << {"_id" => np.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim
         end
       end
@@ -44,12 +39,7 @@ module OpenTox
         proteomics[feature.id.to_s].uniq!
       when "TOX"
         # TODO generic way of parsing TOX values
-        p dataset.name
-        p self.name
-        p feature.name
-        p feature.unit
-        p value
-        if feature.name == "7.99 Toxicity (other) ICP-AES" and feature.unit == "mL/ug(Mg)" 
+        if feature.name == "Net cell association" and feature.unit == "mL/ug(Mg)" 
           dataset.add self, feature, -Math.log10(value)
         else
           dataset.add self, feature, value
@@ -70,32 +60,32 @@ module OpenTox
         add_feature feature, v["loValue"], dataset
       elsif v.keys.size == 2 and v["errorValue"]
         add_feature feature, v["loValue"], dataset
-        #warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
+        warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
       elsif v.keys.size == 2 and v["loQualifier"] == "mean"
         add_feature feature, v["loValue"], dataset
-        #warn "'#{feature.name}' is a mean value. Original data is not available."
+        warn "'#{feature.name}' is a mean value. Original data is not available."
       elsif v.keys.size == 2 and v["loQualifier"] #== ">="
-        #warn "Only min value available for '#{feature.name}', entry ignored"
+        warn "Only min value available for '#{feature.name}', entry ignored"
       elsif v.keys.size == 2 and v["upQualifier"] #== ">="
-        #warn "Only max value available for '#{feature.name}', entry ignored"
+        warn "Only max value available for '#{feature.name}', entry ignored"
       elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
         add_feature feature, v["loValue"], dataset
-        #warn "loQualifier and upQualifier are empty."
+        warn "loQualifier and upQualifier are empty."
       elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == ""
         add_feature feature, v["loValue"], dataset
-        #warn "loQualifier and upQualifier are empty."
+        warn "loQualifier and upQualifier are empty."
       elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
         add_feature feature, v["loValue"], dataset
-        #warn "loQualifier and upQualifier are empty."
+        warn "loQualifier and upQualifier are empty."
       elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"]
         add_feature feature, [v["loValue"],v["upValue"]].mean, dataset
-        #warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
+        warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
       elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"]
-        #warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
+        warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
         add_feature feature, v["loValue"], dataset
       elsif v == {} # do nothing
       else
-        #warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
+        warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
       end
     end
 
-- 
cgit v1.2.3


From f46ba3b7262f5b551c81fc9396c5b7f0cac7f030 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Fri, 27 May 2016 19:16:16 +0200
Subject: first correlation of nanoparticle predictions

---
 lib/nanoparticle.rb | 110 ++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 86 insertions(+), 24 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 7890a19..5c6d944 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -3,12 +3,11 @@ module OpenTox
   class Nanoparticle < Substance
     include OpenTox
 
-    field :core, type: String
+    field :core, type: Hash, default: {}
     field :coating, type: Array, default: []
-    field :bundles, type: Array, default: []
     field :proteomics, type: Hash, default: {}
 
-    def nanoparticle_neighbors min_sim: 0.1, type:, dataset_id:, prediction_feature_id:
+    def nanoparticle_neighbors_old min_sim: 0.9, type:, dataset_id:, prediction_feature_id:
       dataset = Dataset.find(dataset_id)
       neighbors = []
       dataset.nanoparticles.each do |np|
@@ -25,33 +24,96 @@ module OpenTox
       neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]}
       neighbors
     end
-
-    def add_feature feature, value, dataset_id
+ 
+    def nanoparticle_neighbors min_sim: 0.9, type:, dataset_id:, prediction_feature_id:
+      p self.name
+      #p self.physchem_descriptors.keys.size
       dataset = Dataset.find(dataset_id)
-      case feature.category
-      when "P-CHEM"
-        physchem_descriptors[feature.id.to_s] ||= []
-        physchem_descriptors[feature.id.to_s] << value
-        physchem_descriptors[feature.id.to_s].uniq!
-      when "Proteomics"
-        proteomics[feature.id.to_s] ||= []
-        proteomics[feature.id.to_s] << value
-        proteomics[feature.id.to_s].uniq!
-      when "TOX"
-        # TODO generic way of parsing TOX values
-        if feature.name == "Net cell association" and feature.unit == "mL/ug(Mg)" 
-          dataset.add self, feature, -Math.log10(value)
+      relevant_features = {}
+      toxicities = []
+      substances = []
+      # TODO: exclude query activities!!!
+      dataset.substances.each do |s|
+        dataset.values(s,prediction_feature_id).each do |act|
+          toxicities << act
+          substances << s
+        end
+      end
+      R.assign "tox", toxicities
+      feature_ids = physchem_descriptors.keys.select{|fid| Feature.find(fid).is_a? NumericFeature}
+      # identify relevant features
+      feature_ids.each do |feature_id|
+        feature_values = substances.collect{|s| s["physchem_descriptors"][feature_id].first if s["physchem_descriptors"][feature_id]}
+        R.assign "feature", feature_values
+        begin
+          R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')"
+          pvalue = R.eval("cor$p.value").to_ruby
+          if pvalue <= 0.05
+            r = R.eval("cor$estimate").to_ruby
+            relevant_features[feature_id] = {}
+            relevant_features[feature_id]["pvalue"] = pvalue
+            relevant_features[feature_id]["r"] = r
+            relevant_features[feature_id]["mean"] = R.eval("mean(feature, na.rm=TRUE)").to_ruby
+            relevant_features[feature_id]["sd"] = R.eval("sd(feature, na.rm=TRUE)").to_ruby
+          end
+        rescue
+          warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{toxicities}) failed."
+        end
+      end
+      neighbors = []
+      substances.each do |substance|
+        values = dataset.values(substance,prediction_feature_id)
+        if values
+          common_descriptors = relevant_features.keys & substance.physchem_descriptors.keys
+          # scale values
+          query_descriptors = common_descriptors.collect{|d| (physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
+          neighbor_descriptors = common_descriptors.collect{|d| (substance.physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
+          #weights = common_descriptors.collect{|d| 1-relevant_features[d]["pvalue"]}
+          weights = common_descriptors.collect{|d| relevant_features[d]["r"]**2}
+          #p weights
+          sim = Algorithm::Similarity.weighted_cosine(query_descriptors,neighbor_descriptors,weights)
+          ##p "SIM"
+          #p [sim, Algorithm::Similarity.cosine(query_descriptors,neighbor_descriptors)]
+          neighbors << {"_id" => substance.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim
+        end
+      end
+      p neighbors.size
+      neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]}
+      neighbors
+    end
+
+    def add_feature feature, value, dataset
+      unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand
+        case feature.category
+        when "P-CHEM"
+          physchem_descriptors[feature.id.to_s] ||= []
+          physchem_descriptors[feature.id.to_s] << value
+          physchem_descriptors[feature.id.to_s].uniq!
+        when "Proteomics"
+          proteomics[feature.id.to_s] ||= []
+          proteomics[feature.id.to_s] << value
+          proteomics[feature.id.to_s].uniq!
+        when "TOX"
+          # TODO generic way of parsing TOX values
+          if feature.name == "Net cell association" and feature.unit == "mL/ug(Mg)" 
+            dataset.add self, feature, Math.log2(value)
+          elsif feature.name == "Total protein (BCA assay)"
+            physchem_descriptors[feature.id.to_s] ||= []
+            physchem_descriptors[feature.id.to_s] << value
+            physchem_descriptors[feature.id.to_s].uniq!
+          else
+            dataset.add self, feature, value
+          end
+          dataset.save
+          dataset_ids << dataset.id
+          dataset_ids.uniq!
         else
-          dataset.add self, feature, value
+          warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
         end
-        dataset.save
-      else
-        warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
       end
     end
 
-    def parse_ambit_value feature, v, dataset_id
-      dataset = Dataset.find(dataset_id)
+    def parse_ambit_value feature, v, dataset
       v.delete "unit"
       # TODO: ppm instead of weights
       if v.keys == ["textValue"]
-- 
cgit v1.2.3


From b515a0cfedb887a2af753db6e4a08ae1af430cad Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Tue, 31 May 2016 18:08:08 +0200
Subject: cleanup of validation modules/classes

---
 lib/nanoparticle.rb | 80 ++++++++++++++++++++++-------------------------------
 1 file changed, 33 insertions(+), 47 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 5c6d944..d0f8f51 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -6,58 +6,43 @@ module OpenTox
     field :core, type: Hash, default: {}
     field :coating, type: Array, default: []
     field :proteomics, type: Hash, default: {}
-
-    def nanoparticle_neighbors_old min_sim: 0.9, type:, dataset_id:, prediction_feature_id:
-      dataset = Dataset.find(dataset_id)
-      neighbors = []
-      dataset.nanoparticles.each do |np|
-        values = dataset.values(np,prediction_feature_id)
-        if values
-          common_descriptors = physchem_descriptors.keys & np.physchem_descriptors.keys
-          common_descriptors.select!{|id| NumericFeature.find(id) }
-          query_descriptors = common_descriptors.collect{|d| physchem_descriptors[d].first}
-          neighbor_descriptors = common_descriptors.collect{|d| np.physchem_descriptors[d].first}
-          sim = Algorithm::Similarity.cosine(query_descriptors,neighbor_descriptors)
-          neighbors << {"_id" => np.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim
-        end
-      end
-      neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]}
-      neighbors
-    end
  
-    def nanoparticle_neighbors min_sim: 0.9, type:, dataset_id:, prediction_feature_id:
+    def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:
       p self.name
-      #p self.physchem_descriptors.keys.size
       dataset = Dataset.find(dataset_id)
       relevant_features = {}
-      toxicities = []
+      measurements = []
       substances = []
       # TODO: exclude query activities!!!
       dataset.substances.each do |s|
-        dataset.values(s,prediction_feature_id).each do |act|
-          toxicities << act
-          substances << s
+        if s.core == self.core # exclude nanoparticles with different core
+          dataset.values(s,prediction_feature_id).each do |act|
+            measurements << act
+            substances << s
+          end
         end
       end
-      R.assign "tox", toxicities
+      R.assign "tox", measurements
       feature_ids = physchem_descriptors.keys.select{|fid| Feature.find(fid).is_a? NumericFeature}
       # identify relevant features
       feature_ids.each do |feature_id|
         feature_values = substances.collect{|s| s["physchem_descriptors"][feature_id].first if s["physchem_descriptors"][feature_id]}
-        R.assign "feature", feature_values
-        begin
-          R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')"
-          pvalue = R.eval("cor$p.value").to_ruby
-          if pvalue <= 0.05
-            r = R.eval("cor$estimate").to_ruby
-            relevant_features[feature_id] = {}
-            relevant_features[feature_id]["pvalue"] = pvalue
-            relevant_features[feature_id]["r"] = r
-            relevant_features[feature_id]["mean"] = R.eval("mean(feature, na.rm=TRUE)").to_ruby
-            relevant_features[feature_id]["sd"] = R.eval("sd(feature, na.rm=TRUE)").to_ruby
+        unless feature_values.uniq.size == 1
+          R.assign "feature", feature_values
+          begin
+            R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')"
+            p_value = R.eval("cor$p.value").to_ruby
+            if p_value <= 0.05
+              r = R.eval("cor$estimate").to_ruby
+              relevant_features[feature_id] = {}
+              relevant_features[feature_id]["p_value"] = p_value
+              relevant_features[feature_id]["r"] = r
+              relevant_features[feature_id]["mean"] = R.eval("mean(feature, na.rm=TRUE)").to_ruby
+              relevant_features[feature_id]["sd"] = R.eval("sd(feature, na.rm=TRUE)").to_ruby
+            end
+          rescue
+            warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{measurements}) failed."
           end
-        rescue
-          warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{toxicities}) failed."
         end
       end
       neighbors = []
@@ -68,13 +53,17 @@ module OpenTox
           # scale values
           query_descriptors = common_descriptors.collect{|d| (physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
           neighbor_descriptors = common_descriptors.collect{|d| (substance.physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
-          #weights = common_descriptors.collect{|d| 1-relevant_features[d]["pvalue"]}
+          #weights = common_descriptors.collect{|d| 1-relevant_features[d]["p_value"]}
           weights = common_descriptors.collect{|d| relevant_features[d]["r"]**2}
-          #p weights
           sim = Algorithm::Similarity.weighted_cosine(query_descriptors,neighbor_descriptors,weights)
-          ##p "SIM"
-          #p [sim, Algorithm::Similarity.cosine(query_descriptors,neighbor_descriptors)]
-          neighbors << {"_id" => substance.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim
+          neighbors << {
+            "_id" => substance.id,
+            "measurements" => values,
+            "similarity" => sim,
+            "common_descriptors" => common_descriptors.collect do |id|
+              {:id => id, :p_value => relevant_features[id]["p_value"], :r_squared => relevant_features[id]["r"]**2}
+            end
+          } if sim >= min_sim
         end
       end
       p neighbors.size
@@ -94,10 +83,7 @@ module OpenTox
           proteomics[feature.id.to_s] << value
           proteomics[feature.id.to_s].uniq!
         when "TOX"
-          # TODO generic way of parsing TOX values
-          if feature.name == "Net cell association" and feature.unit == "mL/ug(Mg)" 
-            dataset.add self, feature, Math.log2(value)
-          elsif feature.name == "Total protein (BCA assay)"
+          if feature.name == "Total protein (BCA assay)"
             physchem_descriptors[feature.id.to_s] ||= []
             physchem_descriptors[feature.id.to_s] << value
             physchem_descriptors[feature.id.to_s].uniq!
-- 
cgit v1.2.3


From 458a2d753551ea607f2ed5efdd0ac0a02d55d673 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Wed, 1 Jun 2016 12:46:03 +0200
Subject: all tests fixed

---
 lib/nanoparticle.rb | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index d0f8f51..ca79a3d 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -100,6 +100,8 @@ module OpenTox
     end
 
     def parse_ambit_value feature, v, dataset
+      #p dataset
+      #p feature
       v.delete "unit"
       # TODO: ppm instead of weights
       if v.keys == ["textValue"]
-- 
cgit v1.2.3


From 85f2308c101b4778508c2d767e08af4cfd671b7b Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 2 Jun 2016 12:22:39 +0200
Subject: local pls regression for nanoparticles

---
 lib/nanoparticle.rb | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index ca79a3d..65aab23 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -6,9 +6,10 @@ module OpenTox
     field :core, type: Hash, default: {}
     field :coating, type: Array, default: []
     field :proteomics, type: Hash, default: {}
+
+    attr_accessor :scaled_values
  
     def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:
-      p self.name
       dataset = Dataset.find(dataset_id)
       relevant_features = {}
       measurements = []
@@ -52,7 +53,9 @@ module OpenTox
           common_descriptors = relevant_features.keys & substance.physchem_descriptors.keys
           # scale values
           query_descriptors = common_descriptors.collect{|d| (physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
+          @scaled_values = common_descriptors.collect{|d| [d,(physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h
           neighbor_descriptors = common_descriptors.collect{|d| (substance.physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
+          neighbor_scaled_values = common_descriptors.collect{|d| [d,(substance.physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h
           #weights = common_descriptors.collect{|d| 1-relevant_features[d]["p_value"]}
           weights = common_descriptors.collect{|d| relevant_features[d]["r"]**2}
           sim = Algorithm::Similarity.weighted_cosine(query_descriptors,neighbor_descriptors,weights)
@@ -61,12 +64,16 @@ module OpenTox
             "measurements" => values,
             "similarity" => sim,
             "common_descriptors" => common_descriptors.collect do |id|
-              {:id => id, :p_value => relevant_features[id]["p_value"], :r_squared => relevant_features[id]["r"]**2}
+              {
+                :id => id,
+                :scaled_value => neighbor_scaled_values[id],
+                :p_value => relevant_features[id]["p_value"],
+                :r_squared => relevant_features[id]["r"]**2}
             end
           } if sim >= min_sim
         end
       end
-      p neighbors.size
+      $logger.debug "#{self.name}: #{neighbors.size} neighbors"
       neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]}
       neighbors
     end
-- 
cgit v1.2.3


From eec5bddbd35c9ecee8021128508d8718bccb4fe3 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 2 Jun 2016 17:54:48 +0200
Subject: local pls regression for nanoparticle proteomics

---
 lib/nanoparticle.rb | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 65aab23..3e29ae1 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -10,6 +10,7 @@ module OpenTox
     attr_accessor :scaled_values
  
     def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:
+      p name
       dataset = Dataset.find(dataset_id)
       relevant_features = {}
       measurements = []
@@ -46,6 +47,7 @@ module OpenTox
           end
         end
       end
+      #p relevant_features.keys.collect{|i| Feature.find(i).name}
       neighbors = []
       substances.each do |substance|
         values = dataset.values(substance,prediction_feature_id)
@@ -86,9 +88,12 @@ module OpenTox
           physchem_descriptors[feature.id.to_s] << value
           physchem_descriptors[feature.id.to_s].uniq!
         when "Proteomics"
-          proteomics[feature.id.to_s] ||= []
-          proteomics[feature.id.to_s] << value
-          proteomics[feature.id.to_s].uniq!
+          #proteomics[feature.id.to_s] ||= []
+          #proteomics[feature.id.to_s] << value
+          #proteomics[feature.id.to_s].uniq!
+          physchem_descriptors[feature.id.to_s] ||= []
+          physchem_descriptors[feature.id.to_s] << value
+          physchem_descriptors[feature.id.to_s].uniq!
         when "TOX"
           if feature.name == "Total protein (BCA assay)"
             physchem_descriptors[feature.id.to_s] ||= []
@@ -109,6 +114,7 @@ module OpenTox
     def parse_ambit_value feature, v, dataset
       #p dataset
       #p feature
+      # TODO add study id to warnings
       v.delete "unit"
       # TODO: ppm instead of weights
       if v.keys == ["textValue"]
-- 
cgit v1.2.3


From 128fd36b2531756c15a93776871e80eb44e524f1 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 2 Jun 2016 19:01:18 +0200
Subject: proteomics regression validation

---
 lib/nanoparticle.rb | 28 ++--------------------------
 1 file changed, 2 insertions(+), 26 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 3e29ae1..c1bf1b5 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -9,10 +9,10 @@ module OpenTox
 
     attr_accessor :scaled_values
  
-    def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:
+    def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:, relevant_features:
       p name
       dataset = Dataset.find(dataset_id)
-      relevant_features = {}
+      #relevant_features = {}
       measurements = []
       substances = []
       # TODO: exclude query activities!!!
@@ -24,30 +24,6 @@ module OpenTox
           end
         end
       end
-      R.assign "tox", measurements
-      feature_ids = physchem_descriptors.keys.select{|fid| Feature.find(fid).is_a? NumericFeature}
-      # identify relevant features
-      feature_ids.each do |feature_id|
-        feature_values = substances.collect{|s| s["physchem_descriptors"][feature_id].first if s["physchem_descriptors"][feature_id]}
-        unless feature_values.uniq.size == 1
-          R.assign "feature", feature_values
-          begin
-            R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')"
-            p_value = R.eval("cor$p.value").to_ruby
-            if p_value <= 0.05
-              r = R.eval("cor$estimate").to_ruby
-              relevant_features[feature_id] = {}
-              relevant_features[feature_id]["p_value"] = p_value
-              relevant_features[feature_id]["r"] = r
-              relevant_features[feature_id]["mean"] = R.eval("mean(feature, na.rm=TRUE)").to_ruby
-              relevant_features[feature_id]["sd"] = R.eval("sd(feature, na.rm=TRUE)").to_ruby
-            end
-          rescue
-            warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{measurements}) failed."
-          end
-        end
-      end
-      #p relevant_features.keys.collect{|i| Feature.find(i).name}
       neighbors = []
       substances.each do |substance|
         values = dataset.values(substance,prediction_feature_id)
-- 
cgit v1.2.3


From f7e87b45f15083e5fcdea64821f06ed93ece4c4e Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Tue, 7 Jun 2016 18:07:28 +0200
Subject: (repeated)crossvalidation plots

---
 lib/nanoparticle.rb | 1 -
 1 file changed, 1 deletion(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index c1bf1b5..d6261ee 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -10,7 +10,6 @@ module OpenTox
     attr_accessor :scaled_values
  
     def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:, relevant_features:
-      p name
       dataset = Dataset.find(dataset_id)
       #relevant_features = {}
       measurements = []
-- 
cgit v1.2.3


From 9e8537997d84e78e6545a66a0d09c33e76c8b7cf Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Fri, 30 Sep 2016 17:11:30 +0200
Subject: npo uri as source, spectral count unit f proteomics features

---
 lib/nanoparticle.rb | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index d6261ee..b1a3835 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -5,7 +5,7 @@ module OpenTox
 
     field :core, type: Hash, default: {}
     field :coating, type: Array, default: []
-    field :proteomics, type: Hash, default: {}
+    #field :proteomics, type: Hash, default: {}
 
     attr_accessor :scaled_values
  
@@ -63,26 +63,16 @@ module OpenTox
           physchem_descriptors[feature.id.to_s] << value
           physchem_descriptors[feature.id.to_s].uniq!
         when "Proteomics"
-          #proteomics[feature.id.to_s] ||= []
-          #proteomics[feature.id.to_s] << value
-          #proteomics[feature.id.to_s].uniq!
           physchem_descriptors[feature.id.to_s] ||= []
           physchem_descriptors[feature.id.to_s] << value
           physchem_descriptors[feature.id.to_s].uniq!
         when "TOX"
-          if feature.name == "Total protein (BCA assay)"
-            physchem_descriptors[feature.id.to_s] ||= []
-            physchem_descriptors[feature.id.to_s] << value
-            physchem_descriptors[feature.id.to_s].uniq!
-          else
-            dataset.add self, feature, value
-          end
-          dataset.save
-          dataset_ids << dataset.id
-          dataset_ids.uniq!
+          dataset.add self, feature, value
         else
           warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
         end
+        dataset_ids << dataset.id
+        dataset_ids.uniq!
       end
     end
 
-- 
cgit v1.2.3


From 5d4e5e463c2b87241bbb56e4658e1e26c0ed084f Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Wed, 5 Oct 2016 13:22:12 +0200
Subject: substance and nanoparticle model creation and predictions

---
 lib/nanoparticle.rb | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index b1a3835..6905f6f 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -5,10 +5,10 @@ module OpenTox
 
     field :core, type: Hash, default: {}
     field :coating, type: Array, default: []
-    #field :proteomics, type: Hash, default: {}
 
     attr_accessor :scaled_values
  
+=begin
     def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:, relevant_features:
       dataset = Dataset.find(dataset_id)
       #relevant_features = {}
@@ -27,12 +27,12 @@ module OpenTox
       substances.each do |substance|
         values = dataset.values(substance,prediction_feature_id)
         if values
-          common_descriptors = relevant_features.keys & substance.physchem_descriptors.keys
+          common_descriptors = relevant_features.keys & substance.descriptors.keys
           # scale values
-          query_descriptors = common_descriptors.collect{|d| (physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
-          @scaled_values = common_descriptors.collect{|d| [d,(physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h
-          neighbor_descriptors = common_descriptors.collect{|d| (substance.physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
-          neighbor_scaled_values = common_descriptors.collect{|d| [d,(substance.physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h
+          query_descriptors = common_descriptors.collect{|d| (descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
+          @scaled_values = common_descriptors.collect{|d| [d,(descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h
+          neighbor_descriptors = common_descriptors.collect{|d| (substance.descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
+          neighbor_scaled_values = common_descriptors.collect{|d| [d,(substance.descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h
           #weights = common_descriptors.collect{|d| 1-relevant_features[d]["p_value"]}
           weights = common_descriptors.collect{|d| relevant_features[d]["r"]**2}
           sim = Algorithm::Similarity.weighted_cosine(query_descriptors,neighbor_descriptors,weights)
@@ -54,18 +54,19 @@ module OpenTox
       neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]}
       neighbors
     end
+=end
 
     def add_feature feature, value, dataset
       unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand
         case feature.category
         when "P-CHEM"
-          physchem_descriptors[feature.id.to_s] ||= []
-          physchem_descriptors[feature.id.to_s] << value
-          physchem_descriptors[feature.id.to_s].uniq!
+          properties[feature.id.to_s] ||= []
+          properties[feature.id.to_s] << value
+          properties[feature.id.to_s].uniq!
         when "Proteomics"
-          physchem_descriptors[feature.id.to_s] ||= []
-          physchem_descriptors[feature.id.to_s] << value
-          physchem_descriptors[feature.id.to_s].uniq!
+          properties[feature.id.to_s] ||= []
+          properties[feature.id.to_s] << value
+          properties[feature.id.to_s].uniq!
         when "TOX"
           dataset.add self, feature, value
         else
-- 
cgit v1.2.3


From 91787edb3682900bc5a2feeca66e5142f387fcc6 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Fri, 7 Oct 2016 10:25:58 +0200
Subject: unified interface for prediction algorithms

---
 lib/nanoparticle.rb | 50 --------------------------------------------------
 1 file changed, 50 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 6905f6f..f74f263 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -8,54 +8,6 @@ module OpenTox
 
     attr_accessor :scaled_values
  
-=begin
-    def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:, relevant_features:
-      dataset = Dataset.find(dataset_id)
-      #relevant_features = {}
-      measurements = []
-      substances = []
-      # TODO: exclude query activities!!!
-      dataset.substances.each do |s|
-        if s.core == self.core # exclude nanoparticles with different core
-          dataset.values(s,prediction_feature_id).each do |act|
-            measurements << act
-            substances << s
-          end
-        end
-      end
-      neighbors = []
-      substances.each do |substance|
-        values = dataset.values(substance,prediction_feature_id)
-        if values
-          common_descriptors = relevant_features.keys & substance.descriptors.keys
-          # scale values
-          query_descriptors = common_descriptors.collect{|d| (descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
-          @scaled_values = common_descriptors.collect{|d| [d,(descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h
-          neighbor_descriptors = common_descriptors.collect{|d| (substance.descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
-          neighbor_scaled_values = common_descriptors.collect{|d| [d,(substance.descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h
-          #weights = common_descriptors.collect{|d| 1-relevant_features[d]["p_value"]}
-          weights = common_descriptors.collect{|d| relevant_features[d]["r"]**2}
-          sim = Algorithm::Similarity.weighted_cosine(query_descriptors,neighbor_descriptors,weights)
-          neighbors << {
-            "_id" => substance.id,
-            "measurements" => values,
-            "similarity" => sim,
-            "common_descriptors" => common_descriptors.collect do |id|
-              {
-                :id => id,
-                :scaled_value => neighbor_scaled_values[id],
-                :p_value => relevant_features[id]["p_value"],
-                :r_squared => relevant_features[id]["r"]**2}
-            end
-          } if sim >= min_sim
-        end
-      end
-      $logger.debug "#{self.name}: #{neighbors.size} neighbors"
-      neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]}
-      neighbors
-    end
-=end
-
     def add_feature feature, value, dataset
       unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand
         case feature.category
@@ -78,8 +30,6 @@ module OpenTox
     end
 
     def parse_ambit_value feature, v, dataset
-      #p dataset
-      #p feature
       # TODO add study id to warnings
       v.delete "unit"
       # TODO: ppm instead of weights
-- 
cgit v1.2.3


From 9e99495ecbff147218023c136bade9e56a502fed Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 13 Oct 2016 14:39:04 +0200
Subject: descriptor tests fixed

---
 lib/nanoparticle.rb | 2 --
 1 file changed, 2 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index f74f263..23e155c 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -6,8 +6,6 @@ module OpenTox
     field :core, type: Hash, default: {}
     field :coating, type: Array, default: []
 
-    attr_accessor :scaled_values
- 
     def add_feature feature, value, dataset
       unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand
         case feature.category
-- 
cgit v1.2.3


From 9e7b36613e98601de7b2ceb2d4442e11f1ae868a Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 10 Nov 2016 12:23:46 +0100
Subject: intermediate commit, may be defunct

---
 lib/nanoparticle.rb | 46 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 12 deletions(-)

(limited to 'lib/nanoparticle.rb')

diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 23e155c..02d9a89 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -3,8 +3,30 @@ module OpenTox
   class Nanoparticle < Substance
     include OpenTox
 
-    field :core, type: Hash, default: {}
-    field :coating, type: Array, default: []
+    field :core_id, type: String, default: nil
+    field :coating_ids, type: Array, default: []
+
+    def core
+      Compound.find core_id
+    end
+
+    def coating
+      coating_ids.collect{|i| Compound.find i }
+    end
+
+    def fingerprint type=DEFAULT_FINGERPRINT
+      core_fp = core.fingerprint type
+      coating_fp = coating.collect{|c| c.fingerprint type}.flatten.uniq.compact
+      (core_fp.empty? or coating_fp.empty?) ? [] : (core_fp+coating_fp).uniq.compact
+    end
+
+    def calculate_properties descriptors=PhysChem::OPENBABEL
+      if core.smiles and !coating.collect{|c| c.smiles}.compact.empty?
+        core_prop = core.calculate_properties descriptors
+        coating_prop = coating.collect{|c| c.calculate_properties descriptors if c.smiles}
+        descriptors.collect_with_index{|d,i| [core_prop[i],coating_prop.collect{|c| c[i] if c}]}
+      end
+    end
 
     def add_feature feature, value, dataset
       unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand
@@ -37,28 +59,28 @@ module OpenTox
         add_feature feature, v["loValue"], dataset
       elsif v.keys.size == 2 and v["errorValue"]
         add_feature feature, v["loValue"], dataset
-        warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
+        #warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
       elsif v.keys.size == 2 and v["loQualifier"] == "mean"
         add_feature feature, v["loValue"], dataset
-        warn "'#{feature.name}' is a mean value. Original data is not available."
+        #warn "'#{feature.name}' is a mean value. Original data is not available."
       elsif v.keys.size == 2 and v["loQualifier"] #== ">="
-        warn "Only min value available for '#{feature.name}', entry ignored"
+        #warn "Only min value available for '#{feature.name}', entry ignored"
       elsif v.keys.size == 2 and v["upQualifier"] #== ">="
-        warn "Only max value available for '#{feature.name}', entry ignored"
+        #warn "Only max value available for '#{feature.name}', entry ignored"
       elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
         add_feature feature, v["loValue"], dataset
-        warn "loQualifier and upQualifier are empty."
+        #warn "loQualifier and upQualifier are empty."
       elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == ""
         add_feature feature, v["loValue"], dataset
-        warn "loQualifier and upQualifier are empty."
+        #warn "loQualifier and upQualifier are empty."
       elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
         add_feature feature, v["loValue"], dataset
-        warn "loQualifier and upQualifier are empty."
+        #warn "loQualifier and upQualifier are empty."
       elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"]
-        add_feature feature, [v["loValue"],v["upValue"]].mean, dataset
-        warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
+        #add_feature feature, [v["loValue"],v["upValue"]].mean, dataset
+        #warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
       elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"]
-        warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
+        #warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
         add_feature feature, v["loValue"], dataset
       elsif v == {} # do nothing
       else
-- 
cgit v1.2.3