summaryrefslogtreecommitdiff
path: root/lib/substance.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-10-05 13:22:12 +0200
committerChristoph Helma <helma@in-silico.ch>2016-10-05 13:22:12 +0200
commit5d4e5e463c2b87241bbb56e4658e1e26c0ed084f (patch)
treebbae8f77dbb2ac85053f1253ab518c3076e0d176 /lib/substance.rb
parentadefea0e78a4f05a2c9537e643873ad61fc22a0a (diff)
substance and nanoparticle model creation and predictions
Diffstat (limited to 'lib/substance.rb')
-rw-r--r--lib/substance.rb63
1 files changed, 61 insertions, 2 deletions
diff --git a/lib/substance.rb b/lib/substance.rb
index 6768ce7..d271327 100644
--- a/lib/substance.rb
+++ b/lib/substance.rb
@@ -1,9 +1,68 @@
module OpenTox
class Substance
- field :physchem_descriptors, type: Hash, default: {}
+ field :properties, type: Hash, default: {}
field :dataset_ids, type: Array, default: []
end
-end
+ def neighbors dataset_id:,prediction_feature_id:,descriptors:,similarity:,relevant_features:nil
+ # TODO enable empty dataset_id -> use complete db
+ case descriptors[:method]
+ when "fingerprint"
+ fingerprint_neighbors dataset_id:dataset_id, prediction_feature_id:prediction_feature_id, descriptors:descriptors, similarity:similarity
+ when "properties"
+ properties_neighbors dataset_id:dataset_id, prediction_feature_id:prediction_feature_id, descriptors:descriptors, similarity:similarity, relevant_features: relevant_features
+ else
+ bad_request_error "Descriptor method '#{descriptors[:method]}' not implemented."
+ end
+ end
+
+ def fingerprint_neighbors dataset_id:,prediction_feature_id:,descriptors:,similarity:
+ neighbors = []
+ dataset = Dataset.find(dataset_id)
+ dataset.substances.each do |substance|
+ values = dataset.values(substance,prediction_feature_id)
+ if values
+ query_descriptors = self.send(descriptors[:method].to_sym, descriptors[:type])
+ candidate_descriptors = substance.send(descriptors[:method].to_sym, descriptors[:type])
+ sim = Algorithm.run similarity[:method], [query_descriptors, candidate_descriptors]
+ neighbors << {"_id" => substance.id, "measurements" => values, "descriptors" => candidate_descriptors, "similarity" => sim} if sim >= similarity[:min]
+ end
+ end
+ neighbors.sort{|a,b| b["similarity"] <=> a["similarity"]}
+ end
+ def properties_neighbors dataset_id:,prediction_feature_id:,descriptors:,similarity:,relevant_features:
+ neighbors = []
+ dataset = Dataset.find(dataset_id)
+ weights = relevant_features.collect{|k,v| v["r"]**2}
+ means = relevant_features.collect{|k,v| v["mean"]}
+ standard_deviations = relevant_features.collect{|k,v| v["sd"]}
+ query_descriptors = relevant_features.keys.collect{|i| properties[i].is_a?(Array) ? properties[i].median : nil }
+ dataset.substances.each do |substance|
+ values = dataset.values(substance,prediction_feature_id)
+ # exclude nanoparticles with different core
+ # TODO validate exclusion
+ next if substance.is_a? Nanoparticle and substance.core != self.core
+ if values
+ candidate_descriptors = relevant_features.keys.collect{|i| substance.properties[i].is_a?(Array) ? substance.properties[i].median : nil }
+ q = []
+ c = []
+ w = []
+ (0..relevant_features.size-1).each do |i|
+ # add only complete pairs
+ if query_descriptors[i] and candidate_descriptors[i]
+ w << weights[i]
+ # scale values
+ q << (query_descriptors[i] - means[i])/standard_deviations[i]
+ c << (candidate_descriptors[i] - means[i])/standard_deviations[i]
+ end
+ end
+ sim = Algorithm.run similarity[:method], [q, c, w]
+ neighbors << {"_id" => substance.id, "measurements" => values, "descriptors" => candidate_descriptors, "similarity" => sim} if sim >= similarity[:min]
+ end
+ end
+ neighbors.sort{|a,b| b["similarity"] <=> a["similarity"]}
+ end
+
+end