From 815cf6ba1543fc323eb7cbd1202fadbf03bcfbca Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 13 Apr 2016 15:35:01 +0200 Subject: new files added --- lib/substance.rb | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 lib/substance.rb (limited to 'lib/substance.rb') diff --git a/lib/substance.rb b/lib/substance.rb new file mode 100644 index 0000000..a5b9825 --- /dev/null +++ b/lib/substance.rb @@ -0,0 +1,10 @@ +module OpenTox + + class Substance + include OpenTox + include Mongoid::Document + include Mongoid::Timestamps + end + +end + -- cgit v1.2.3 From 64f1f32ced77afb278bdb7c27397c5299a73675c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 13 Apr 2016 18:18:36 +0200 Subject: improved enm import --- lib/substance.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib/substance.rb') diff --git a/lib/substance.rb b/lib/substance.rb index a5b9825..6768ce7 100644 --- a/lib/substance.rb +++ b/lib/substance.rb @@ -1,9 +1,8 @@ module OpenTox class Substance - include OpenTox - include Mongoid::Document - include Mongoid::Timestamps + field :physchem_descriptors, type: Hash, default: {} + field :dataset_ids, type: Array, default: [] end end -- cgit v1.2.3 From 753fcc204d93d86c76860bee6e2f7d0468c3c940 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 14 Apr 2016 19:43:24 +0200 Subject: features/toxicities fixed --- lib/substance.rb | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/substance.rb') diff --git a/lib/substance.rb b/lib/substance.rb index 6768ce7..82ca65d 100644 --- a/lib/substance.rb +++ b/lib/substance.rb @@ -2,6 +2,7 @@ module OpenTox class Substance field :physchem_descriptors, type: Hash, default: {} + field :toxicities, type: Hash, default: {} field :dataset_ids, type: Array, default: [] end -- cgit v1.2.3 From ab7b37541b4f8a762be737009631d3eefd898b4a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 5 May 2016 16:14:02 +0200 Subject: ambit mirror, import from mirrored json, proteomics import --- lib/substance.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/substance.rb') diff --git a/lib/substance.rb b/lib/substance.rb index 82ca65d..34bc94a 100644 --- a/lib/substance.rb +++ b/lib/substance.rb @@ -1,7 +1,7 @@ module OpenTox class Substance - field :physchem_descriptors, type: Hash, default: {} + field :physchem, type: Hash, default: {} field :toxicities, type: Hash, default: {} field :dataset_ids, type: Array, default: [] end -- cgit v1.2.3 From 51f57e2858b60bed74ebcc97189b2188c900c283 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 6 May 2016 12:49:28 +0200 Subject: dataset tests cleanup --- lib/substance.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/substance.rb') diff --git a/lib/substance.rb b/lib/substance.rb index 34bc94a..82ca65d 100644 --- a/lib/substance.rb +++ b/lib/substance.rb @@ -1,7 +1,7 @@ module OpenTox class Substance - field :physchem, type: Hash, default: {} + field :physchem_descriptors, type: Hash, default: {} field :toxicities, type: Hash, default: {} field :dataset_ids, type: Array, default: [] end -- cgit v1.2.3 From b8bb12c8a163c238d7d4387c1914e2100bb660df Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 12 May 2016 15:23:01 +0200 Subject: enm study import fixed --- lib/substance.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/substance.rb') diff --git a/lib/substance.rb b/lib/substance.rb index 82ca65d..6768ce7 100644 --- a/lib/substance.rb +++ b/lib/substance.rb @@ -2,7 +2,6 @@ module OpenTox class Substance field :physchem_descriptors, type: Hash, default: {} - field :toxicities, type: Hash, default: {} field :dataset_ids, type: Array, default: [] end -- cgit v1.2.3 From 5d4e5e463c2b87241bbb56e4658e1e26c0ed084f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 5 Oct 2016 13:22:12 +0200 Subject: substance and nanoparticle model creation and predictions --- lib/substance.rb | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 2 deletions(-) (limited to 'lib/substance.rb') diff --git a/lib/substance.rb b/lib/substance.rb index 6768ce7..d271327 100644 --- a/lib/substance.rb +++ b/lib/substance.rb @@ -1,9 +1,68 @@ module OpenTox class Substance - field :physchem_descriptors, type: Hash, default: {} + field :properties, type: Hash, default: {} field :dataset_ids, type: Array, default: [] end -end + def neighbors dataset_id:,prediction_feature_id:,descriptors:,similarity:,relevant_features:nil + # TODO enable empty dataset_id -> use complete db + case descriptors[:method] + when "fingerprint" + fingerprint_neighbors dataset_id:dataset_id, prediction_feature_id:prediction_feature_id, descriptors:descriptors, similarity:similarity + when "properties" + properties_neighbors dataset_id:dataset_id, prediction_feature_id:prediction_feature_id, descriptors:descriptors, similarity:similarity, relevant_features: relevant_features + else + bad_request_error "Descriptor method '#{descriptors[:method]}' not implemented." + end + end + + def fingerprint_neighbors dataset_id:,prediction_feature_id:,descriptors:,similarity: + neighbors = [] + dataset = Dataset.find(dataset_id) + dataset.substances.each do |substance| + values = dataset.values(substance,prediction_feature_id) + if values + query_descriptors = self.send(descriptors[:method].to_sym, descriptors[:type]) + candidate_descriptors = substance.send(descriptors[:method].to_sym, descriptors[:type]) + sim = Algorithm.run similarity[:method], [query_descriptors, candidate_descriptors] + neighbors << {"_id" => substance.id, "measurements" => values, "descriptors" => candidate_descriptors, "similarity" => sim} if sim >= similarity[:min] + end + end + neighbors.sort{|a,b| b["similarity"] <=> a["similarity"]} + end + def properties_neighbors dataset_id:,prediction_feature_id:,descriptors:,similarity:,relevant_features: + neighbors = [] + dataset = Dataset.find(dataset_id) + weights = relevant_features.collect{|k,v| v["r"]**2} + means = relevant_features.collect{|k,v| v["mean"]} + standard_deviations = relevant_features.collect{|k,v| v["sd"]} + query_descriptors = relevant_features.keys.collect{|i| properties[i].is_a?(Array) ? properties[i].median : nil } + dataset.substances.each do |substance| + values = dataset.values(substance,prediction_feature_id) + # exclude nanoparticles with different core + # TODO validate exclusion + next if substance.is_a? Nanoparticle and substance.core != self.core + if values + candidate_descriptors = relevant_features.keys.collect{|i| substance.properties[i].is_a?(Array) ? substance.properties[i].median : nil } + q = [] + c = [] + w = [] + (0..relevant_features.size-1).each do |i| + # add only complete pairs + if query_descriptors[i] and candidate_descriptors[i] + w << weights[i] + # scale values + q << (query_descriptors[i] - means[i])/standard_deviations[i] + c << (candidate_descriptors[i] - means[i])/standard_deviations[i] + end + end + sim = Algorithm.run similarity[:method], [q, c, w] + neighbors << {"_id" => substance.id, "measurements" => values, "descriptors" => candidate_descriptors, "similarity" => sim} if sim >= similarity[:min] + end + end + neighbors.sort{|a,b| b["similarity"] <=> a["similarity"]} + end + +end -- cgit v1.2.3 From dc4ab1f4e64d738d6c0b70f0b690a2359685080f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 12 Oct 2016 21:32:27 +0200 Subject: physchem regression, correlation_filter for fingerprints --- lib/substance.rb | 60 -------------------------------------------------------- 1 file changed, 60 deletions(-) (limited to 'lib/substance.rb') diff --git a/lib/substance.rb b/lib/substance.rb index d271327..31c465e 100644 --- a/lib/substance.rb +++ b/lib/substance.rb @@ -5,64 +5,4 @@ module OpenTox field :dataset_ids, type: Array, default: [] end - def neighbors dataset_id:,prediction_feature_id:,descriptors:,similarity:,relevant_features:nil - # TODO enable empty dataset_id -> use complete db - case descriptors[:method] - when "fingerprint" - fingerprint_neighbors dataset_id:dataset_id, prediction_feature_id:prediction_feature_id, descriptors:descriptors, similarity:similarity - when "properties" - properties_neighbors dataset_id:dataset_id, prediction_feature_id:prediction_feature_id, descriptors:descriptors, similarity:similarity, relevant_features: relevant_features - else - bad_request_error "Descriptor method '#{descriptors[:method]}' not implemented." - end - end - - def fingerprint_neighbors dataset_id:,prediction_feature_id:,descriptors:,similarity: - neighbors = [] - dataset = Dataset.find(dataset_id) - dataset.substances.each do |substance| - values = dataset.values(substance,prediction_feature_id) - if values - query_descriptors = self.send(descriptors[:method].to_sym, descriptors[:type]) - candidate_descriptors = substance.send(descriptors[:method].to_sym, descriptors[:type]) - sim = Algorithm.run similarity[:method], [query_descriptors, candidate_descriptors] - neighbors << {"_id" => substance.id, "measurements" => values, "descriptors" => candidate_descriptors, "similarity" => sim} if sim >= similarity[:min] - end - end - neighbors.sort{|a,b| b["similarity"] <=> a["similarity"]} - end - - def properties_neighbors dataset_id:,prediction_feature_id:,descriptors:,similarity:,relevant_features: - neighbors = [] - dataset = Dataset.find(dataset_id) - weights = relevant_features.collect{|k,v| v["r"]**2} - means = relevant_features.collect{|k,v| v["mean"]} - standard_deviations = relevant_features.collect{|k,v| v["sd"]} - query_descriptors = relevant_features.keys.collect{|i| properties[i].is_a?(Array) ? properties[i].median : nil } - dataset.substances.each do |substance| - values = dataset.values(substance,prediction_feature_id) - # exclude nanoparticles with different core - # TODO validate exclusion - next if substance.is_a? Nanoparticle and substance.core != self.core - if values - candidate_descriptors = relevant_features.keys.collect{|i| substance.properties[i].is_a?(Array) ? substance.properties[i].median : nil } - q = [] - c = [] - w = [] - (0..relevant_features.size-1).each do |i| - # add only complete pairs - if query_descriptors[i] and candidate_descriptors[i] - w << weights[i] - # scale values - q << (query_descriptors[i] - means[i])/standard_deviations[i] - c << (candidate_descriptors[i] - means[i])/standard_deviations[i] - end - end - sim = Algorithm.run similarity[:method], [q, c, w] - neighbors << {"_id" => substance.id, "measurements" => values, "descriptors" => candidate_descriptors, "similarity" => sim} if sim >= similarity[:min] - end - end - neighbors.sort{|a,b| b["similarity"] <=> a["similarity"]} - end - end -- cgit v1.2.3