diff options
author | Christoph Helma <helma@in-silico.ch> | 2016-10-05 13:22:12 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2016-10-05 13:22:12 +0200 |
commit | 5d4e5e463c2b87241bbb56e4658e1e26c0ed084f (patch) | |
tree | bbae8f77dbb2ac85053f1253ab518c3076e0d176 /lib/substance.rb | |
parent | adefea0e78a4f05a2c9537e643873ad61fc22a0a (diff) |
substance and nanoparticle model creation and predictions
Diffstat (limited to 'lib/substance.rb')
-rw-r--r-- | lib/substance.rb | 63 |
1 files changed, 61 insertions, 2 deletions
diff --git a/lib/substance.rb b/lib/substance.rb index 6768ce7..d271327 100644 --- a/lib/substance.rb +++ b/lib/substance.rb @@ -1,9 +1,68 @@ module OpenTox class Substance - field :physchem_descriptors, type: Hash, default: {} + field :properties, type: Hash, default: {} field :dataset_ids, type: Array, default: [] end -end + def neighbors dataset_id:,prediction_feature_id:,descriptors:,similarity:,relevant_features:nil + # TODO enable empty dataset_id -> use complete db + case descriptors[:method] + when "fingerprint" + fingerprint_neighbors dataset_id:dataset_id, prediction_feature_id:prediction_feature_id, descriptors:descriptors, similarity:similarity + when "properties" + properties_neighbors dataset_id:dataset_id, prediction_feature_id:prediction_feature_id, descriptors:descriptors, similarity:similarity, relevant_features: relevant_features + else + bad_request_error "Descriptor method '#{descriptors[:method]}' not implemented." + end + end + + def fingerprint_neighbors dataset_id:,prediction_feature_id:,descriptors:,similarity: + neighbors = [] + dataset = Dataset.find(dataset_id) + dataset.substances.each do |substance| + values = dataset.values(substance,prediction_feature_id) + if values + query_descriptors = self.send(descriptors[:method].to_sym, descriptors[:type]) + candidate_descriptors = substance.send(descriptors[:method].to_sym, descriptors[:type]) + sim = Algorithm.run similarity[:method], [query_descriptors, candidate_descriptors] + neighbors << {"_id" => substance.id, "measurements" => values, "descriptors" => candidate_descriptors, "similarity" => sim} if sim >= similarity[:min] + end + end + neighbors.sort{|a,b| b["similarity"] <=> a["similarity"]} + end + def properties_neighbors dataset_id:,prediction_feature_id:,descriptors:,similarity:,relevant_features: + neighbors = [] + dataset = Dataset.find(dataset_id) + weights = relevant_features.collect{|k,v| v["r"]**2} + means = relevant_features.collect{|k,v| v["mean"]} + standard_deviations = relevant_features.collect{|k,v| v["sd"]} + query_descriptors = relevant_features.keys.collect{|i| properties[i].is_a?(Array) ? properties[i].median : nil } + dataset.substances.each do |substance| + values = dataset.values(substance,prediction_feature_id) + # exclude nanoparticles with different core + # TODO validate exclusion + next if substance.is_a? Nanoparticle and substance.core != self.core + if values + candidate_descriptors = relevant_features.keys.collect{|i| substance.properties[i].is_a?(Array) ? substance.properties[i].median : nil } + q = [] + c = [] + w = [] + (0..relevant_features.size-1).each do |i| + # add only complete pairs + if query_descriptors[i] and candidate_descriptors[i] + w << weights[i] + # scale values + q << (query_descriptors[i] - means[i])/standard_deviations[i] + c << (candidate_descriptors[i] - means[i])/standard_deviations[i] + end + end + sim = Algorithm.run similarity[:method], [q, c, w] + neighbors << {"_id" => substance.id, "measurements" => values, "descriptors" => candidate_descriptors, "similarity" => sim} if sim >= similarity[:min] + end + end + neighbors.sort{|a,b| b["similarity"] <=> a["similarity"]} + end + +end |