From 063acd4dc63e9287287cc1ff78fff2064ff74e4f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 7 Apr 2016 17:39:14 +0200 Subject: initial ambit import --- lib/nanoparticle.rb | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 lib/nanoparticle.rb (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb new file mode 100644 index 0000000..3783ece --- /dev/null +++ b/lib/nanoparticle.rb @@ -0,0 +1,17 @@ +module OpenTox + + class Nanoparticle + include OpenTox + + field :particle_id, type: String + field :core, type: String + field :coatings, type: Array + + #field :physchem_descriptors, type: Hash, default: {} + #field :toxicities, type: Hash, default: {} + field :features, type: Hash, default: {} + + end +end + + -- cgit v1.2.3 From f3780d7507092b643216054fa3ca1e6146281e43 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 8 Apr 2016 13:04:56 +0200 Subject: enm import test --- lib/nanoparticle.rb | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 3783ece..cda431a 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -3,13 +3,48 @@ module OpenTox class Nanoparticle include OpenTox - field :particle_id, type: String + #field :particle_id, type: String field :core, type: String - field :coatings, type: Array + field :coating, type: Array, default: [] - #field :physchem_descriptors, type: Hash, default: {} - #field :toxicities, type: Hash, default: {} - field :features, type: Hash, default: {} + field :physchem_descriptors, type: Hash, default: {} + field :toxicities, type: Hash, default: {} + #field :features, type: Hash, default: {} + field :bundles, type: Array, default: [] + + def predict + end + + def add_feature feature, value + if feature.source.match /property\/P-CHEM/ + physchem_descriptors[feature.id.to_s] ||= [] + physchem_descriptors[feature.id.to_s] << value + elsif feature.source.match /property\/TOX/ + toxicities[feature.id.to_s] ||= [] + toxicities[feature.id.to_s] << value + else + $logger.warn "Unknown feature type '#{feature.source}'. Value '#{value}' not inserted." + warnings << "Unknown feature type '#{feature.source}'. Value '#{value}' not inserted." + end + end + + def parse_ambit_value feature, v + if v.keys == ["loValue"] + add_feature feature, v["loValue"] + elsif v.keys.size == 2 and v["loQualifier"] == "mean" + add_feature feature, {:mean => v["loValue"]} + elsif v.keys.size == 2 and v["loQualifier"] #== ">=" + add_feature feature, {:min => v["loValue"],:max => Float::INFINITY} + elsif v.keys.size == 2 and v["upQualifier"] #== ">=" + add_feature feature, {:max => v["upValue"],:min => -Float::INFINITY} + elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] + add_feature feature, {:min => v["loValue"],:max => v["upValue"]} + elsif v == {} # do nothing + else + $logger.warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." + warnings << "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." + end + end end end -- cgit v1.2.3 From 84222bae2bbb9fb3e0ce3e65de1be8e7f94d2147 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 12 Apr 2016 12:37:37 +0200 Subject: new dataset structure --- lib/nanoparticle.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index cda431a..c58dc8c 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -1,9 +1,8 @@ module OpenTox - class Nanoparticle + class Nanoparticle < Substance include OpenTox - #field :particle_id, type: String field :core, type: String field :coating, type: Array, default: [] -- cgit v1.2.3 From 64f1f32ced77afb278bdb7c27397c5299a73675c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 13 Apr 2016 18:18:36 +0200 Subject: improved enm import --- lib/nanoparticle.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index c58dc8c..6e9b0ea 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -6,7 +6,6 @@ module OpenTox field :core, type: String field :coating, type: Array, default: [] - field :physchem_descriptors, type: Hash, default: {} field :toxicities, type: Hash, default: {} #field :features, type: Hash, default: {} field :bundles, type: Array, default: [] -- cgit v1.2.3 From 753fcc204d93d86c76860bee6e2f7d0468c3c940 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 14 Apr 2016 19:43:24 +0200 Subject: features/toxicities fixed --- lib/nanoparticle.rb | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 6e9b0ea..0350363 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -5,12 +5,10 @@ module OpenTox field :core, type: String field :coating, type: Array, default: [] - - field :toxicities, type: Hash, default: {} - #field :features, type: Hash, default: {} field :bundles, type: Array, default: [] - def predict + def nanoparticle_neighbors params + Dataset.find(params[:training_dataset_id]).nanoparticles end def add_feature feature, value @@ -21,22 +19,32 @@ module OpenTox toxicities[feature.id.to_s] ||= [] toxicities[feature.id.to_s] << value else - $logger.warn "Unknown feature type '#{feature.source}'. Value '#{value}' not inserted." - warnings << "Unknown feature type '#{feature.source}'. Value '#{value}' not inserted." + warn "Unknown feature type '#{feature.source}'. Value '#{value}' not inserted." end end def parse_ambit_value feature, v + # TODO: units, mmol/log10 conversion if v.keys == ["loValue"] - add_feature feature, v["loValue"] + #if v["loValue"].numeric? + add_feature feature, v["loValue"] + #else + #warn "'#{v["loValue"]}' is not a numeric value, entry ignored." + #end elsif v.keys.size == 2 and v["loQualifier"] == "mean" - add_feature feature, {:mean => v["loValue"]} + #add_feature feature, {:mean => v["loValue"]} + add_feature feature, v["loValue"] + warn "'#{feature.name}' is a mean value. Original data is not available." elsif v.keys.size == 2 and v["loQualifier"] #== ">=" - add_feature feature, {:min => v["loValue"],:max => Float::INFINITY} + #add_feature feature, {:min => v["loValue"],:max => Float::INFINITY} + warn "Only min value available for '#{feature.name}', entry ignored" elsif v.keys.size == 2 and v["upQualifier"] #== ">=" - add_feature feature, {:max => v["upValue"],:min => -Float::INFINITY} + #add_feature feature, {:max => v["upValue"],:min => -Float::INFINITY} + warn "Only max value available for '#{feature.name}', entry ignored" elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] - add_feature feature, {:min => v["loValue"],:max => v["upValue"]} + #add_feature feature, {:min => v["loValue"],:max => v["upValue"]} + add_feature feature, [v["loValue"],v["upValue"]].mean + warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available." elsif v == {} # do nothing else $logger.warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." -- cgit v1.2.3 From 4662e845c12e3e623ec9bec208c42cd4b1886047 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 15 Apr 2016 14:58:17 +0200 Subject: enm study import --- lib/nanoparticle.rb | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 0350363..295b6c0 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -12,43 +12,51 @@ module OpenTox end def add_feature feature, value - if feature.source.match /property\/P-CHEM/ + case feature.category + when "P-CHEM" physchem_descriptors[feature.id.to_s] ||= [] physchem_descriptors[feature.id.to_s] << value - elsif feature.source.match /property\/TOX/ + when "TOX" toxicities[feature.id.to_s] ||= [] toxicities[feature.id.to_s] << value else - warn "Unknown feature type '#{feature.source}'. Value '#{value}' not inserted." + warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted." end + save end def parse_ambit_value feature, v - # TODO: units, mmol/log10 conversion - if v.keys == ["loValue"] - #if v["loValue"].numeric? - add_feature feature, v["loValue"] - #else - #warn "'#{v["loValue"]}' is not a numeric value, entry ignored." - #end + v.delete "unit" + # TODO: mmol/log10 conversion + if v.keys == ["textValue"] + add_feature feature, v["textValue"] + elsif v.keys == ["loValue"] + add_feature feature, v["loValue"] + elsif v.keys.size == 2 and v["errorValue"] + add_feature feature, v["loValue"] + warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." elsif v.keys.size == 2 and v["loQualifier"] == "mean" - #add_feature feature, {:mean => v["loValue"]} add_feature feature, v["loValue"] warn "'#{feature.name}' is a mean value. Original data is not available." elsif v.keys.size == 2 and v["loQualifier"] #== ">=" - #add_feature feature, {:min => v["loValue"],:max => Float::INFINITY} warn "Only min value available for '#{feature.name}', entry ignored" elsif v.keys.size == 2 and v["upQualifier"] #== ">=" - #add_feature feature, {:max => v["upValue"],:min => -Float::INFINITY} warn "Only max value available for '#{feature.name}', entry ignored" - elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] - #add_feature feature, {:min => v["loValue"],:max => v["upValue"]} + elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil? + add_feature feature, v["loValue"] + warn "loQualifier and upQualifier are empty." + elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == "" + add_feature feature, v["loValue"] + warn "loQualifier and upQualifier are empty." + elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"] add_feature feature, [v["loValue"],v["upValue"]].mean warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available." + elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"] + warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." + add_feature feature, v["loValue"] elsif v == {} # do nothing else - $logger.warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." - warnings << "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." + warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." end end -- cgit v1.2.3 From 75b70425ae8699464a18529eb7bf35a216c06243 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 21 Apr 2016 09:56:12 +0200 Subject: AMBIT import expanded --- lib/nanoparticle.rb | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 295b6c0..b934bb3 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -48,6 +48,9 @@ module OpenTox elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == "" add_feature feature, v["loValue"] warn "loQualifier and upQualifier are empty." + elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil? + add_feature feature, v["loValue"] + warn "loQualifier and upQualifier are empty." elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"] add_feature feature, [v["loValue"],v["upValue"]].mean warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available." -- cgit v1.2.3 From cfc64a2966ab38698e499f0b44f41208ee77a07f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 26 Apr 2016 17:38:15 +0200 Subject: first nanomaterial prediction --- lib/nanoparticle.rb | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index b934bb3..b5de5b9 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -16,9 +16,11 @@ module OpenTox when "P-CHEM" physchem_descriptors[feature.id.to_s] ||= [] physchem_descriptors[feature.id.to_s] << value + physchem_descriptors[feature.id.to_s].uniq! when "TOX" toxicities[feature.id.to_s] ||= [] toxicities[feature.id.to_s] << value + toxicities[feature.id.to_s].uniq! else warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted." end -- cgit v1.2.3 From 79238bddb59607aa9f759caa9e3c8db176709703 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 28 Apr 2016 12:19:48 +0200 Subject: compound validations fixed --- lib/nanoparticle.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index b5de5b9..83b97a9 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -8,7 +8,7 @@ module OpenTox field :bundles, type: Array, default: [] def nanoparticle_neighbors params - Dataset.find(params[:training_dataset_id]).nanoparticles + Dataset.find(params[:training_dataset_id]).nanoparticles.collect{|np| {"_id" => np.id, "tanimoto" => 1}} end def add_feature feature, value -- cgit v1.2.3 From 05386e748270c337c66f6f379317ea4b25905236 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 4 May 2016 19:24:42 +0200 Subject: first reasonable results for nanoparticle crossvalidation --- lib/nanoparticle.rb | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 83b97a9..dda4a9f 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -8,7 +8,7 @@ module OpenTox field :bundles, type: Array, default: [] def nanoparticle_neighbors params - Dataset.find(params[:training_dataset_id]).nanoparticles.collect{|np| {"_id" => np.id, "tanimoto" => 1}} + Dataset.find(params[:training_dataset_id]).nanoparticles.collect{|np| np["tanimoto"] = 1; np} end def add_feature feature, value @@ -19,7 +19,19 @@ module OpenTox physchem_descriptors[feature.id.to_s].uniq! when "TOX" toxicities[feature.id.to_s] ||= [] - toxicities[feature.id.to_s] << value + # TODO generic way of parsing TOX values + if feature.name == "7.99 Toxicity (other) ICP-AES" and feature.unit == "mL/ug(Mg)" + toxicities[feature.id.to_s] << -Math.log10(value) + #if value.numeric? + #begin + #rescue + #p feature + #p value + #exit + #end + else + toxicities[feature.id.to_s] << value + end toxicities[feature.id.to_s].uniq! else warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted." @@ -29,7 +41,7 @@ module OpenTox def parse_ambit_value feature, v v.delete "unit" - # TODO: mmol/log10 conversion + # TODO: ppm instead of weights if v.keys == ["textValue"] add_feature feature, v["textValue"] elsif v.keys == ["loValue"] -- cgit v1.2.3 From ab7b37541b4f8a762be737009631d3eefd898b4a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 5 May 2016 16:14:02 +0200 Subject: ambit mirror, import from mirrored json, proteomics import --- lib/nanoparticle.rb | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index dda4a9f..c9fbb77 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -6,6 +6,7 @@ module OpenTox field :core, type: String field :coating, type: Array, default: [] field :bundles, type: Array, default: [] + field :proteomics, type: Hash, default: {} def nanoparticle_neighbors params Dataset.find(params[:training_dataset_id]).nanoparticles.collect{|np| np["tanimoto"] = 1; np} @@ -14,21 +15,18 @@ module OpenTox def add_feature feature, value case feature.category when "P-CHEM" - physchem_descriptors[feature.id.to_s] ||= [] - physchem_descriptors[feature.id.to_s] << value - physchem_descriptors[feature.id.to_s].uniq! + physchem[feature.id.to_s] ||= [] + physchem[feature.id.to_s] << value + physchem[feature.id.to_s].uniq! + when "Proteomics" + proteomics[feature.id.to_s] ||= [] + proteomics[feature.id.to_s] << value + proteomics[feature.id.to_s].uniq! when "TOX" toxicities[feature.id.to_s] ||= [] # TODO generic way of parsing TOX values if feature.name == "7.99 Toxicity (other) ICP-AES" and feature.unit == "mL/ug(Mg)" toxicities[feature.id.to_s] << -Math.log10(value) - #if value.numeric? - #begin - #rescue - #p feature - #p value - #exit - #end else toxicities[feature.id.to_s] << value end @@ -36,7 +34,6 @@ module OpenTox else warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted." end - save end def parse_ambit_value feature, v @@ -79,5 +76,3 @@ module OpenTox end end - - -- cgit v1.2.3 From 51f57e2858b60bed74ebcc97189b2188c900c283 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 6 May 2016 12:49:28 +0200 Subject: dataset tests cleanup --- lib/nanoparticle.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index c9fbb77..9bf419d 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -15,9 +15,9 @@ module OpenTox def add_feature feature, value case feature.category when "P-CHEM" - physchem[feature.id.to_s] ||= [] - physchem[feature.id.to_s] << value - physchem[feature.id.to_s].uniq! + physchem_descriptors[feature.id.to_s] ||= [] + physchem_descriptors[feature.id.to_s] << value + physchem_descriptors[feature.id.to_s].uniq! when "Proteomics" proteomics[feature.id.to_s] ||= [] proteomics[feature.id.to_s] << value -- cgit v1.2.3 From 611bac891177f8d9185d45486dd574b6ef4d1912 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 9 May 2016 15:11:46 +0200 Subject: nanoparticle models fixed --- lib/nanoparticle.rb | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 9bf419d..b79981d 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -9,10 +9,14 @@ module OpenTox field :proteomics, type: Hash, default: {} def nanoparticle_neighbors params - Dataset.find(params[:training_dataset_id]).nanoparticles.collect{|np| np["tanimoto"] = 1; np} + dataset = Dataset.find(params[:training_dataset_id]) + Dataset.find(params[:training_dataset_id]).nanoparticles.collect do |np| + np["tanimoto"] = 1 + np unless np.toxicities.empty? + end.compact end - def add_feature feature, value + def add_feature feature, value, dataset_id case feature.category when "P-CHEM" physchem_descriptors[feature.id.to_s] ||= [] @@ -23,51 +27,52 @@ module OpenTox proteomics[feature.id.to_s] << value proteomics[feature.id.to_s].uniq! when "TOX" - toxicities[feature.id.to_s] ||= [] + toxicities[feature.id.to_s] ||= {} + toxicities[feature.id.to_s][dataset_id.to_s] ||= [] # TODO generic way of parsing TOX values if feature.name == "7.99 Toxicity (other) ICP-AES" and feature.unit == "mL/ug(Mg)" - toxicities[feature.id.to_s] << -Math.log10(value) + toxicities[feature.id.to_s][dataset_id.to_s] << -Math.log10(value) else - toxicities[feature.id.to_s] << value + toxicities[feature.id.to_s][dataset_id.to_s] << value end - toxicities[feature.id.to_s].uniq! + toxicities[feature.id.to_s][dataset_id.to_s].uniq! else warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted." end end - def parse_ambit_value feature, v + def parse_ambit_value feature, v, dataset_id v.delete "unit" # TODO: ppm instead of weights if v.keys == ["textValue"] - add_feature feature, v["textValue"] + add_feature feature, v["textValue"], dataset_id elsif v.keys == ["loValue"] - add_feature feature, v["loValue"] + add_feature feature, v["loValue"], dataset_id elsif v.keys.size == 2 and v["errorValue"] - add_feature feature, v["loValue"] + add_feature feature, v["loValue"], dataset_id warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." elsif v.keys.size == 2 and v["loQualifier"] == "mean" - add_feature feature, v["loValue"] + add_feature feature, v["loValue"], dataset_id warn "'#{feature.name}' is a mean value. Original data is not available." elsif v.keys.size == 2 and v["loQualifier"] #== ">=" warn "Only min value available for '#{feature.name}', entry ignored" elsif v.keys.size == 2 and v["upQualifier"] #== ">=" warn "Only max value available for '#{feature.name}', entry ignored" elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil? - add_feature feature, v["loValue"] + add_feature feature, v["loValue"], dataset_id warn "loQualifier and upQualifier are empty." elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == "" - add_feature feature, v["loValue"] + add_feature feature, v["loValue"], dataset_id warn "loQualifier and upQualifier are empty." elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil? - add_feature feature, v["loValue"] + add_feature feature, v["loValue"], dataset_id warn "loQualifier and upQualifier are empty." elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"] - add_feature feature, [v["loValue"],v["upValue"]].mean + add_feature feature, [v["loValue"],v["upValue"]].mean, dataset_id warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available." elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"] warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." - add_feature feature, v["loValue"] + add_feature feature, v["loValue"], dataset_id elsif v == {} # do nothing else warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." -- cgit v1.2.3 From b8bb12c8a163c238d7d4387c1914e2100bb660df Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 12 May 2016 15:23:01 +0200 Subject: enm study import fixed --- lib/nanoparticle.rb | 80 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 30 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index b79981d..6527fa3 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -8,15 +8,31 @@ module OpenTox field :bundles, type: Array, default: [] field :proteomics, type: Hash, default: {} - def nanoparticle_neighbors params - dataset = Dataset.find(params[:training_dataset_id]) - Dataset.find(params[:training_dataset_id]).nanoparticles.collect do |np| - np["tanimoto"] = 1 - np unless np.toxicities.empty? - end.compact + def nanoparticle_neighbors min_sim: 0.1, type:, dataset_id:, prediction_feature_id: + dataset = Dataset.find(dataset_id) + neighbors = [] + p dataset.data_entries.size + p dataset.substance_ids.size + p dataset.substance_ids.collect{|i| i.to_s} == dataset.data_entries.keys + p dataset.substance_ids.collect{|i| i.to_s} + p dataset.data_entries.keys + dataset.nanoparticles.each do |np| + prediction_feature_id + p dataset.data_entries[np.id.to_s] + values = dataset.values(np,prediction_feature_id) + p values + if values + common_descriptors = physchem_descriptors.keys & np.physchem_descriptors.keys + sim = Algorithm::Similarity.cosine(common_descriptors.collect{|d| physchem_descriptors[d]}, common_descriptors.collect{|d| np.physchem_descriptors[d]}) + neighbors << {"_id" => np.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim + end + end + neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]} + neighbors end def add_feature feature, value, dataset_id + dataset = Dataset.find(dataset_id) case feature.category when "P-CHEM" physchem_descriptors[feature.id.to_s] ||= [] @@ -27,55 +43,59 @@ module OpenTox proteomics[feature.id.to_s] << value proteomics[feature.id.to_s].uniq! when "TOX" - toxicities[feature.id.to_s] ||= {} - toxicities[feature.id.to_s][dataset_id.to_s] ||= [] # TODO generic way of parsing TOX values + p dataset.name + p self.name + p feature.name + p feature.unit + p value if feature.name == "7.99 Toxicity (other) ICP-AES" and feature.unit == "mL/ug(Mg)" - toxicities[feature.id.to_s][dataset_id.to_s] << -Math.log10(value) + dataset.add self, feature, -Math.log10(value) else - toxicities[feature.id.to_s][dataset_id.to_s] << value + dataset.add self, feature, value end - toxicities[feature.id.to_s][dataset_id.to_s].uniq! + dataset.save else warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted." end end def parse_ambit_value feature, v, dataset_id + dataset = Dataset.find(dataset_id) v.delete "unit" # TODO: ppm instead of weights if v.keys == ["textValue"] - add_feature feature, v["textValue"], dataset_id + add_feature feature, v["textValue"], dataset elsif v.keys == ["loValue"] - add_feature feature, v["loValue"], dataset_id + add_feature feature, v["loValue"], dataset elsif v.keys.size == 2 and v["errorValue"] - add_feature feature, v["loValue"], dataset_id - warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." + add_feature feature, v["loValue"], dataset + #warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." elsif v.keys.size == 2 and v["loQualifier"] == "mean" - add_feature feature, v["loValue"], dataset_id - warn "'#{feature.name}' is a mean value. Original data is not available." + add_feature feature, v["loValue"], dataset + #warn "'#{feature.name}' is a mean value. Original data is not available." elsif v.keys.size == 2 and v["loQualifier"] #== ">=" - warn "Only min value available for '#{feature.name}', entry ignored" + #warn "Only min value available for '#{feature.name}', entry ignored" elsif v.keys.size == 2 and v["upQualifier"] #== ">=" - warn "Only max value available for '#{feature.name}', entry ignored" + #warn "Only max value available for '#{feature.name}', entry ignored" elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil? - add_feature feature, v["loValue"], dataset_id - warn "loQualifier and upQualifier are empty." + add_feature feature, v["loValue"], dataset + #warn "loQualifier and upQualifier are empty." elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == "" - add_feature feature, v["loValue"], dataset_id - warn "loQualifier and upQualifier are empty." + add_feature feature, v["loValue"], dataset + #warn "loQualifier and upQualifier are empty." elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil? - add_feature feature, v["loValue"], dataset_id - warn "loQualifier and upQualifier are empty." + add_feature feature, v["loValue"], dataset + #warn "loQualifier and upQualifier are empty." elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"] - add_feature feature, [v["loValue"],v["upValue"]].mean, dataset_id - warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available." + add_feature feature, [v["loValue"],v["upValue"]].mean, dataset + #warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available." elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"] - warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." - add_feature feature, v["loValue"], dataset_id + #warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." + add_feature feature, v["loValue"], dataset elsif v == {} # do nothing else - warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." + #warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." end end -- cgit v1.2.3 From c90644211e214a50f6fdb3a936bf247f45f1f4be Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 13 May 2016 13:38:24 +0200 Subject: compound tests fixed --- lib/nanoparticle.rb | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 6527fa3..7890a19 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -11,19 +11,14 @@ module OpenTox def nanoparticle_neighbors min_sim: 0.1, type:, dataset_id:, prediction_feature_id: dataset = Dataset.find(dataset_id) neighbors = [] - p dataset.data_entries.size - p dataset.substance_ids.size - p dataset.substance_ids.collect{|i| i.to_s} == dataset.data_entries.keys - p dataset.substance_ids.collect{|i| i.to_s} - p dataset.data_entries.keys dataset.nanoparticles.each do |np| - prediction_feature_id - p dataset.data_entries[np.id.to_s] values = dataset.values(np,prediction_feature_id) - p values if values common_descriptors = physchem_descriptors.keys & np.physchem_descriptors.keys - sim = Algorithm::Similarity.cosine(common_descriptors.collect{|d| physchem_descriptors[d]}, common_descriptors.collect{|d| np.physchem_descriptors[d]}) + common_descriptors.select!{|id| NumericFeature.find(id) } + query_descriptors = common_descriptors.collect{|d| physchem_descriptors[d].first} + neighbor_descriptors = common_descriptors.collect{|d| np.physchem_descriptors[d].first} + sim = Algorithm::Similarity.cosine(query_descriptors,neighbor_descriptors) neighbors << {"_id" => np.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim end end @@ -44,12 +39,7 @@ module OpenTox proteomics[feature.id.to_s].uniq! when "TOX" # TODO generic way of parsing TOX values - p dataset.name - p self.name - p feature.name - p feature.unit - p value - if feature.name == "7.99 Toxicity (other) ICP-AES" and feature.unit == "mL/ug(Mg)" + if feature.name == "Net cell association" and feature.unit == "mL/ug(Mg)" dataset.add self, feature, -Math.log10(value) else dataset.add self, feature, value @@ -70,32 +60,32 @@ module OpenTox add_feature feature, v["loValue"], dataset elsif v.keys.size == 2 and v["errorValue"] add_feature feature, v["loValue"], dataset - #warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." + warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." elsif v.keys.size == 2 and v["loQualifier"] == "mean" add_feature feature, v["loValue"], dataset - #warn "'#{feature.name}' is a mean value. Original data is not available." + warn "'#{feature.name}' is a mean value. Original data is not available." elsif v.keys.size == 2 and v["loQualifier"] #== ">=" - #warn "Only min value available for '#{feature.name}', entry ignored" + warn "Only min value available for '#{feature.name}', entry ignored" elsif v.keys.size == 2 and v["upQualifier"] #== ">=" - #warn "Only max value available for '#{feature.name}', entry ignored" + warn "Only max value available for '#{feature.name}', entry ignored" elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil? add_feature feature, v["loValue"], dataset - #warn "loQualifier and upQualifier are empty." + warn "loQualifier and upQualifier are empty." elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == "" add_feature feature, v["loValue"], dataset - #warn "loQualifier and upQualifier are empty." + warn "loQualifier and upQualifier are empty." elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil? add_feature feature, v["loValue"], dataset - #warn "loQualifier and upQualifier are empty." + warn "loQualifier and upQualifier are empty." elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"] add_feature feature, [v["loValue"],v["upValue"]].mean, dataset - #warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available." + warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available." elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"] - #warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." + warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." add_feature feature, v["loValue"], dataset elsif v == {} # do nothing else - #warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." + warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." end end -- cgit v1.2.3 From f46ba3b7262f5b551c81fc9396c5b7f0cac7f030 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 27 May 2016 19:16:16 +0200 Subject: first correlation of nanoparticle predictions --- lib/nanoparticle.rb | 110 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 86 insertions(+), 24 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 7890a19..5c6d944 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -3,12 +3,11 @@ module OpenTox class Nanoparticle < Substance include OpenTox - field :core, type: String + field :core, type: Hash, default: {} field :coating, type: Array, default: [] - field :bundles, type: Array, default: [] field :proteomics, type: Hash, default: {} - def nanoparticle_neighbors min_sim: 0.1, type:, dataset_id:, prediction_feature_id: + def nanoparticle_neighbors_old min_sim: 0.9, type:, dataset_id:, prediction_feature_id: dataset = Dataset.find(dataset_id) neighbors = [] dataset.nanoparticles.each do |np| @@ -25,33 +24,96 @@ module OpenTox neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]} neighbors end - - def add_feature feature, value, dataset_id + + def nanoparticle_neighbors min_sim: 0.9, type:, dataset_id:, prediction_feature_id: + p self.name + #p self.physchem_descriptors.keys.size dataset = Dataset.find(dataset_id) - case feature.category - when "P-CHEM" - physchem_descriptors[feature.id.to_s] ||= [] - physchem_descriptors[feature.id.to_s] << value - physchem_descriptors[feature.id.to_s].uniq! - when "Proteomics" - proteomics[feature.id.to_s] ||= [] - proteomics[feature.id.to_s] << value - proteomics[feature.id.to_s].uniq! - when "TOX" - # TODO generic way of parsing TOX values - if feature.name == "Net cell association" and feature.unit == "mL/ug(Mg)" - dataset.add self, feature, -Math.log10(value) + relevant_features = {} + toxicities = [] + substances = [] + # TODO: exclude query activities!!! + dataset.substances.each do |s| + dataset.values(s,prediction_feature_id).each do |act| + toxicities << act + substances << s + end + end + R.assign "tox", toxicities + feature_ids = physchem_descriptors.keys.select{|fid| Feature.find(fid).is_a? NumericFeature} + # identify relevant features + feature_ids.each do |feature_id| + feature_values = substances.collect{|s| s["physchem_descriptors"][feature_id].first if s["physchem_descriptors"][feature_id]} + R.assign "feature", feature_values + begin + R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')" + pvalue = R.eval("cor$p.value").to_ruby + if pvalue <= 0.05 + r = R.eval("cor$estimate").to_ruby + relevant_features[feature_id] = {} + relevant_features[feature_id]["pvalue"] = pvalue + relevant_features[feature_id]["r"] = r + relevant_features[feature_id]["mean"] = R.eval("mean(feature, na.rm=TRUE)").to_ruby + relevant_features[feature_id]["sd"] = R.eval("sd(feature, na.rm=TRUE)").to_ruby + end + rescue + warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{toxicities}) failed." + end + end + neighbors = [] + substances.each do |substance| + values = dataset.values(substance,prediction_feature_id) + if values + common_descriptors = relevant_features.keys & substance.physchem_descriptors.keys + # scale values + query_descriptors = common_descriptors.collect{|d| (physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]} + neighbor_descriptors = common_descriptors.collect{|d| (substance.physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]} + #weights = common_descriptors.collect{|d| 1-relevant_features[d]["pvalue"]} + weights = common_descriptors.collect{|d| relevant_features[d]["r"]**2} + #p weights + sim = Algorithm::Similarity.weighted_cosine(query_descriptors,neighbor_descriptors,weights) + ##p "SIM" + #p [sim, Algorithm::Similarity.cosine(query_descriptors,neighbor_descriptors)] + neighbors << {"_id" => substance.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim + end + end + p neighbors.size + neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]} + neighbors + end + + def add_feature feature, value, dataset + unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand + case feature.category + when "P-CHEM" + physchem_descriptors[feature.id.to_s] ||= [] + physchem_descriptors[feature.id.to_s] << value + physchem_descriptors[feature.id.to_s].uniq! + when "Proteomics" + proteomics[feature.id.to_s] ||= [] + proteomics[feature.id.to_s] << value + proteomics[feature.id.to_s].uniq! + when "TOX" + # TODO generic way of parsing TOX values + if feature.name == "Net cell association" and feature.unit == "mL/ug(Mg)" + dataset.add self, feature, Math.log2(value) + elsif feature.name == "Total protein (BCA assay)" + physchem_descriptors[feature.id.to_s] ||= [] + physchem_descriptors[feature.id.to_s] << value + physchem_descriptors[feature.id.to_s].uniq! + else + dataset.add self, feature, value + end + dataset.save + dataset_ids << dataset.id + dataset_ids.uniq! else - dataset.add self, feature, value + warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted." end - dataset.save - else - warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted." end end - def parse_ambit_value feature, v, dataset_id - dataset = Dataset.find(dataset_id) + def parse_ambit_value feature, v, dataset v.delete "unit" # TODO: ppm instead of weights if v.keys == ["textValue"] -- cgit v1.2.3 From b515a0cfedb887a2af753db6e4a08ae1af430cad Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 31 May 2016 18:08:08 +0200 Subject: cleanup of validation modules/classes --- lib/nanoparticle.rb | 80 ++++++++++++++++++++++------------------------------- 1 file changed, 33 insertions(+), 47 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 5c6d944..d0f8f51 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -6,58 +6,43 @@ module OpenTox field :core, type: Hash, default: {} field :coating, type: Array, default: [] field :proteomics, type: Hash, default: {} - - def nanoparticle_neighbors_old min_sim: 0.9, type:, dataset_id:, prediction_feature_id: - dataset = Dataset.find(dataset_id) - neighbors = [] - dataset.nanoparticles.each do |np| - values = dataset.values(np,prediction_feature_id) - if values - common_descriptors = physchem_descriptors.keys & np.physchem_descriptors.keys - common_descriptors.select!{|id| NumericFeature.find(id) } - query_descriptors = common_descriptors.collect{|d| physchem_descriptors[d].first} - neighbor_descriptors = common_descriptors.collect{|d| np.physchem_descriptors[d].first} - sim = Algorithm::Similarity.cosine(query_descriptors,neighbor_descriptors) - neighbors << {"_id" => np.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim - end - end - neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]} - neighbors - end - def nanoparticle_neighbors min_sim: 0.9, type:, dataset_id:, prediction_feature_id: + def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id: p self.name - #p self.physchem_descriptors.keys.size dataset = Dataset.find(dataset_id) relevant_features = {} - toxicities = [] + measurements = [] substances = [] # TODO: exclude query activities!!! dataset.substances.each do |s| - dataset.values(s,prediction_feature_id).each do |act| - toxicities << act - substances << s + if s.core == self.core # exclude nanoparticles with different core + dataset.values(s,prediction_feature_id).each do |act| + measurements << act + substances << s + end end end - R.assign "tox", toxicities + R.assign "tox", measurements feature_ids = physchem_descriptors.keys.select{|fid| Feature.find(fid).is_a? NumericFeature} # identify relevant features feature_ids.each do |feature_id| feature_values = substances.collect{|s| s["physchem_descriptors"][feature_id].first if s["physchem_descriptors"][feature_id]} - R.assign "feature", feature_values - begin - R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')" - pvalue = R.eval("cor$p.value").to_ruby - if pvalue <= 0.05 - r = R.eval("cor$estimate").to_ruby - relevant_features[feature_id] = {} - relevant_features[feature_id]["pvalue"] = pvalue - relevant_features[feature_id]["r"] = r - relevant_features[feature_id]["mean"] = R.eval("mean(feature, na.rm=TRUE)").to_ruby - relevant_features[feature_id]["sd"] = R.eval("sd(feature, na.rm=TRUE)").to_ruby + unless feature_values.uniq.size == 1 + R.assign "feature", feature_values + begin + R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')" + p_value = R.eval("cor$p.value").to_ruby + if p_value <= 0.05 + r = R.eval("cor$estimate").to_ruby + relevant_features[feature_id] = {} + relevant_features[feature_id]["p_value"] = p_value + relevant_features[feature_id]["r"] = r + relevant_features[feature_id]["mean"] = R.eval("mean(feature, na.rm=TRUE)").to_ruby + relevant_features[feature_id]["sd"] = R.eval("sd(feature, na.rm=TRUE)").to_ruby + end + rescue + warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{measurements}) failed." end - rescue - warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{toxicities}) failed." end end neighbors = [] @@ -68,13 +53,17 @@ module OpenTox # scale values query_descriptors = common_descriptors.collect{|d| (physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]} neighbor_descriptors = common_descriptors.collect{|d| (substance.physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]} - #weights = common_descriptors.collect{|d| 1-relevant_features[d]["pvalue"]} + #weights = common_descriptors.collect{|d| 1-relevant_features[d]["p_value"]} weights = common_descriptors.collect{|d| relevant_features[d]["r"]**2} - #p weights sim = Algorithm::Similarity.weighted_cosine(query_descriptors,neighbor_descriptors,weights) - ##p "SIM" - #p [sim, Algorithm::Similarity.cosine(query_descriptors,neighbor_descriptors)] - neighbors << {"_id" => substance.id, "toxicities" => values, "similarity" => sim} if sim >= min_sim + neighbors << { + "_id" => substance.id, + "measurements" => values, + "similarity" => sim, + "common_descriptors" => common_descriptors.collect do |id| + {:id => id, :p_value => relevant_features[id]["p_value"], :r_squared => relevant_features[id]["r"]**2} + end + } if sim >= min_sim end end p neighbors.size @@ -94,10 +83,7 @@ module OpenTox proteomics[feature.id.to_s] << value proteomics[feature.id.to_s].uniq! when "TOX" - # TODO generic way of parsing TOX values - if feature.name == "Net cell association" and feature.unit == "mL/ug(Mg)" - dataset.add self, feature, Math.log2(value) - elsif feature.name == "Total protein (BCA assay)" + if feature.name == "Total protein (BCA assay)" physchem_descriptors[feature.id.to_s] ||= [] physchem_descriptors[feature.id.to_s] << value physchem_descriptors[feature.id.to_s].uniq! -- cgit v1.2.3 From 458a2d753551ea607f2ed5efdd0ac0a02d55d673 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 1 Jun 2016 12:46:03 +0200 Subject: all tests fixed --- lib/nanoparticle.rb | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index d0f8f51..ca79a3d 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -100,6 +100,8 @@ module OpenTox end def parse_ambit_value feature, v, dataset + #p dataset + #p feature v.delete "unit" # TODO: ppm instead of weights if v.keys == ["textValue"] -- cgit v1.2.3 From 85f2308c101b4778508c2d767e08af4cfd671b7b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 2 Jun 2016 12:22:39 +0200 Subject: local pls regression for nanoparticles --- lib/nanoparticle.rb | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index ca79a3d..65aab23 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -6,9 +6,10 @@ module OpenTox field :core, type: Hash, default: {} field :coating, type: Array, default: [] field :proteomics, type: Hash, default: {} + + attr_accessor :scaled_values def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id: - p self.name dataset = Dataset.find(dataset_id) relevant_features = {} measurements = [] @@ -52,7 +53,9 @@ module OpenTox common_descriptors = relevant_features.keys & substance.physchem_descriptors.keys # scale values query_descriptors = common_descriptors.collect{|d| (physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]} + @scaled_values = common_descriptors.collect{|d| [d,(physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h neighbor_descriptors = common_descriptors.collect{|d| (substance.physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]} + neighbor_scaled_values = common_descriptors.collect{|d| [d,(substance.physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h #weights = common_descriptors.collect{|d| 1-relevant_features[d]["p_value"]} weights = common_descriptors.collect{|d| relevant_features[d]["r"]**2} sim = Algorithm::Similarity.weighted_cosine(query_descriptors,neighbor_descriptors,weights) @@ -61,12 +64,16 @@ module OpenTox "measurements" => values, "similarity" => sim, "common_descriptors" => common_descriptors.collect do |id| - {:id => id, :p_value => relevant_features[id]["p_value"], :r_squared => relevant_features[id]["r"]**2} + { + :id => id, + :scaled_value => neighbor_scaled_values[id], + :p_value => relevant_features[id]["p_value"], + :r_squared => relevant_features[id]["r"]**2} end } if sim >= min_sim end end - p neighbors.size + $logger.debug "#{self.name}: #{neighbors.size} neighbors" neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]} neighbors end -- cgit v1.2.3 From eec5bddbd35c9ecee8021128508d8718bccb4fe3 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 2 Jun 2016 17:54:48 +0200 Subject: local pls regression for nanoparticle proteomics --- lib/nanoparticle.rb | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 65aab23..3e29ae1 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -10,6 +10,7 @@ module OpenTox attr_accessor :scaled_values def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id: + p name dataset = Dataset.find(dataset_id) relevant_features = {} measurements = [] @@ -46,6 +47,7 @@ module OpenTox end end end + #p relevant_features.keys.collect{|i| Feature.find(i).name} neighbors = [] substances.each do |substance| values = dataset.values(substance,prediction_feature_id) @@ -86,9 +88,12 @@ module OpenTox physchem_descriptors[feature.id.to_s] << value physchem_descriptors[feature.id.to_s].uniq! when "Proteomics" - proteomics[feature.id.to_s] ||= [] - proteomics[feature.id.to_s] << value - proteomics[feature.id.to_s].uniq! + #proteomics[feature.id.to_s] ||= [] + #proteomics[feature.id.to_s] << value + #proteomics[feature.id.to_s].uniq! + physchem_descriptors[feature.id.to_s] ||= [] + physchem_descriptors[feature.id.to_s] << value + physchem_descriptors[feature.id.to_s].uniq! when "TOX" if feature.name == "Total protein (BCA assay)" physchem_descriptors[feature.id.to_s] ||= [] @@ -109,6 +114,7 @@ module OpenTox def parse_ambit_value feature, v, dataset #p dataset #p feature + # TODO add study id to warnings v.delete "unit" # TODO: ppm instead of weights if v.keys == ["textValue"] -- cgit v1.2.3 From 128fd36b2531756c15a93776871e80eb44e524f1 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 2 Jun 2016 19:01:18 +0200 Subject: proteomics regression validation --- lib/nanoparticle.rb | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 3e29ae1..c1bf1b5 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -9,10 +9,10 @@ module OpenTox attr_accessor :scaled_values - def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id: + def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:, relevant_features: p name dataset = Dataset.find(dataset_id) - relevant_features = {} + #relevant_features = {} measurements = [] substances = [] # TODO: exclude query activities!!! @@ -24,30 +24,6 @@ module OpenTox end end end - R.assign "tox", measurements - feature_ids = physchem_descriptors.keys.select{|fid| Feature.find(fid).is_a? NumericFeature} - # identify relevant features - feature_ids.each do |feature_id| - feature_values = substances.collect{|s| s["physchem_descriptors"][feature_id].first if s["physchem_descriptors"][feature_id]} - unless feature_values.uniq.size == 1 - R.assign "feature", feature_values - begin - R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')" - p_value = R.eval("cor$p.value").to_ruby - if p_value <= 0.05 - r = R.eval("cor$estimate").to_ruby - relevant_features[feature_id] = {} - relevant_features[feature_id]["p_value"] = p_value - relevant_features[feature_id]["r"] = r - relevant_features[feature_id]["mean"] = R.eval("mean(feature, na.rm=TRUE)").to_ruby - relevant_features[feature_id]["sd"] = R.eval("sd(feature, na.rm=TRUE)").to_ruby - end - rescue - warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{measurements}) failed." - end - end - end - #p relevant_features.keys.collect{|i| Feature.find(i).name} neighbors = [] substances.each do |substance| values = dataset.values(substance,prediction_feature_id) -- cgit v1.2.3 From f7e87b45f15083e5fcdea64821f06ed93ece4c4e Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 7 Jun 2016 18:07:28 +0200 Subject: (repeated)crossvalidation plots --- lib/nanoparticle.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index c1bf1b5..d6261ee 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -10,7 +10,6 @@ module OpenTox attr_accessor :scaled_values def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:, relevant_features: - p name dataset = Dataset.find(dataset_id) #relevant_features = {} measurements = [] -- cgit v1.2.3 From 9e8537997d84e78e6545a66a0d09c33e76c8b7cf Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 30 Sep 2016 17:11:30 +0200 Subject: npo uri as source, spectral count unit f proteomics features --- lib/nanoparticle.rb | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index d6261ee..b1a3835 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -5,7 +5,7 @@ module OpenTox field :core, type: Hash, default: {} field :coating, type: Array, default: [] - field :proteomics, type: Hash, default: {} + #field :proteomics, type: Hash, default: {} attr_accessor :scaled_values @@ -63,26 +63,16 @@ module OpenTox physchem_descriptors[feature.id.to_s] << value physchem_descriptors[feature.id.to_s].uniq! when "Proteomics" - #proteomics[feature.id.to_s] ||= [] - #proteomics[feature.id.to_s] << value - #proteomics[feature.id.to_s].uniq! physchem_descriptors[feature.id.to_s] ||= [] physchem_descriptors[feature.id.to_s] << value physchem_descriptors[feature.id.to_s].uniq! when "TOX" - if feature.name == "Total protein (BCA assay)" - physchem_descriptors[feature.id.to_s] ||= [] - physchem_descriptors[feature.id.to_s] << value - physchem_descriptors[feature.id.to_s].uniq! - else - dataset.add self, feature, value - end - dataset.save - dataset_ids << dataset.id - dataset_ids.uniq! + dataset.add self, feature, value else warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted." end + dataset_ids << dataset.id + dataset_ids.uniq! end end -- cgit v1.2.3 From 5d4e5e463c2b87241bbb56e4658e1e26c0ed084f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 5 Oct 2016 13:22:12 +0200 Subject: substance and nanoparticle model creation and predictions --- lib/nanoparticle.rb | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index b1a3835..6905f6f 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -5,10 +5,10 @@ module OpenTox field :core, type: Hash, default: {} field :coating, type: Array, default: [] - #field :proteomics, type: Hash, default: {} attr_accessor :scaled_values +=begin def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:, relevant_features: dataset = Dataset.find(dataset_id) #relevant_features = {} @@ -27,12 +27,12 @@ module OpenTox substances.each do |substance| values = dataset.values(substance,prediction_feature_id) if values - common_descriptors = relevant_features.keys & substance.physchem_descriptors.keys + common_descriptors = relevant_features.keys & substance.descriptors.keys # scale values - query_descriptors = common_descriptors.collect{|d| (physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]} - @scaled_values = common_descriptors.collect{|d| [d,(physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h - neighbor_descriptors = common_descriptors.collect{|d| (substance.physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]} - neighbor_scaled_values = common_descriptors.collect{|d| [d,(substance.physchem_descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h + query_descriptors = common_descriptors.collect{|d| (descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]} + @scaled_values = common_descriptors.collect{|d| [d,(descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h + neighbor_descriptors = common_descriptors.collect{|d| (substance.descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]} + neighbor_scaled_values = common_descriptors.collect{|d| [d,(substance.descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h #weights = common_descriptors.collect{|d| 1-relevant_features[d]["p_value"]} weights = common_descriptors.collect{|d| relevant_features[d]["r"]**2} sim = Algorithm::Similarity.weighted_cosine(query_descriptors,neighbor_descriptors,weights) @@ -54,18 +54,19 @@ module OpenTox neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]} neighbors end +=end def add_feature feature, value, dataset unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand case feature.category when "P-CHEM" - physchem_descriptors[feature.id.to_s] ||= [] - physchem_descriptors[feature.id.to_s] << value - physchem_descriptors[feature.id.to_s].uniq! + properties[feature.id.to_s] ||= [] + properties[feature.id.to_s] << value + properties[feature.id.to_s].uniq! when "Proteomics" - physchem_descriptors[feature.id.to_s] ||= [] - physchem_descriptors[feature.id.to_s] << value - physchem_descriptors[feature.id.to_s].uniq! + properties[feature.id.to_s] ||= [] + properties[feature.id.to_s] << value + properties[feature.id.to_s].uniq! when "TOX" dataset.add self, feature, value else -- cgit v1.2.3 From 91787edb3682900bc5a2feeca66e5142f387fcc6 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 7 Oct 2016 10:25:58 +0200 Subject: unified interface for prediction algorithms --- lib/nanoparticle.rb | 50 -------------------------------------------------- 1 file changed, 50 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 6905f6f..f74f263 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -8,54 +8,6 @@ module OpenTox attr_accessor :scaled_values -=begin - def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:, relevant_features: - dataset = Dataset.find(dataset_id) - #relevant_features = {} - measurements = [] - substances = [] - # TODO: exclude query activities!!! - dataset.substances.each do |s| - if s.core == self.core # exclude nanoparticles with different core - dataset.values(s,prediction_feature_id).each do |act| - measurements << act - substances << s - end - end - end - neighbors = [] - substances.each do |substance| - values = dataset.values(substance,prediction_feature_id) - if values - common_descriptors = relevant_features.keys & substance.descriptors.keys - # scale values - query_descriptors = common_descriptors.collect{|d| (descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]} - @scaled_values = common_descriptors.collect{|d| [d,(descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h - neighbor_descriptors = common_descriptors.collect{|d| (substance.descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]} - neighbor_scaled_values = common_descriptors.collect{|d| [d,(substance.descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h - #weights = common_descriptors.collect{|d| 1-relevant_features[d]["p_value"]} - weights = common_descriptors.collect{|d| relevant_features[d]["r"]**2} - sim = Algorithm::Similarity.weighted_cosine(query_descriptors,neighbor_descriptors,weights) - neighbors << { - "_id" => substance.id, - "measurements" => values, - "similarity" => sim, - "common_descriptors" => common_descriptors.collect do |id| - { - :id => id, - :scaled_value => neighbor_scaled_values[id], - :p_value => relevant_features[id]["p_value"], - :r_squared => relevant_features[id]["r"]**2} - end - } if sim >= min_sim - end - end - $logger.debug "#{self.name}: #{neighbors.size} neighbors" - neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]} - neighbors - end -=end - def add_feature feature, value, dataset unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand case feature.category @@ -78,8 +30,6 @@ module OpenTox end def parse_ambit_value feature, v, dataset - #p dataset - #p feature # TODO add study id to warnings v.delete "unit" # TODO: ppm instead of weights -- cgit v1.2.3 From 9e99495ecbff147218023c136bade9e56a502fed Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 13 Oct 2016 14:39:04 +0200 Subject: descriptor tests fixed --- lib/nanoparticle.rb | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index f74f263..23e155c 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -6,8 +6,6 @@ module OpenTox field :core, type: Hash, default: {} field :coating, type: Array, default: [] - attr_accessor :scaled_values - def add_feature feature, value, dataset unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand case feature.category -- cgit v1.2.3 From 9e7b36613e98601de7b2ceb2d4442e11f1ae868a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 10 Nov 2016 12:23:46 +0100 Subject: intermediate commit, may be defunct --- lib/nanoparticle.rb | 46 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) (limited to 'lib/nanoparticle.rb') diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 23e155c..02d9a89 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -3,8 +3,30 @@ module OpenTox class Nanoparticle < Substance include OpenTox - field :core, type: Hash, default: {} - field :coating, type: Array, default: [] + field :core_id, type: String, default: nil + field :coating_ids, type: Array, default: [] + + def core + Compound.find core_id + end + + def coating + coating_ids.collect{|i| Compound.find i } + end + + def fingerprint type=DEFAULT_FINGERPRINT + core_fp = core.fingerprint type + coating_fp = coating.collect{|c| c.fingerprint type}.flatten.uniq.compact + (core_fp.empty? or coating_fp.empty?) ? [] : (core_fp+coating_fp).uniq.compact + end + + def calculate_properties descriptors=PhysChem::OPENBABEL + if core.smiles and !coating.collect{|c| c.smiles}.compact.empty? + core_prop = core.calculate_properties descriptors + coating_prop = coating.collect{|c| c.calculate_properties descriptors if c.smiles} + descriptors.collect_with_index{|d,i| [core_prop[i],coating_prop.collect{|c| c[i] if c}]} + end + end def add_feature feature, value, dataset unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand @@ -37,28 +59,28 @@ module OpenTox add_feature feature, v["loValue"], dataset elsif v.keys.size == 2 and v["errorValue"] add_feature feature, v["loValue"], dataset - warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." + #warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." elsif v.keys.size == 2 and v["loQualifier"] == "mean" add_feature feature, v["loValue"], dataset - warn "'#{feature.name}' is a mean value. Original data is not available." + #warn "'#{feature.name}' is a mean value. Original data is not available." elsif v.keys.size == 2 and v["loQualifier"] #== ">=" - warn "Only min value available for '#{feature.name}', entry ignored" + #warn "Only min value available for '#{feature.name}', entry ignored" elsif v.keys.size == 2 and v["upQualifier"] #== ">=" - warn "Only max value available for '#{feature.name}', entry ignored" + #warn "Only max value available for '#{feature.name}', entry ignored" elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil? add_feature feature, v["loValue"], dataset - warn "loQualifier and upQualifier are empty." + #warn "loQualifier and upQualifier are empty." elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == "" add_feature feature, v["loValue"], dataset - warn "loQualifier and upQualifier are empty." + #warn "loQualifier and upQualifier are empty." elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil? add_feature feature, v["loValue"], dataset - warn "loQualifier and upQualifier are empty." + #warn "loQualifier and upQualifier are empty." elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"] - add_feature feature, [v["loValue"],v["upValue"]].mean, dataset - warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available." + #add_feature feature, [v["loValue"],v["upValue"]].mean, dataset + #warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available." elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"] - warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." + #warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." add_feature feature, v["loValue"], dataset elsif v == {} # do nothing else -- cgit v1.2.3