1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
module OpenTox
class Substance
field :properties, type: Hash, default: {}
field :dataset_ids, type: Array, default: []
end
def neighbors dataset_id:,prediction_feature_id:,descriptors:,similarity:,relevant_features:nil
# TODO enable empty dataset_id -> use complete db
case descriptors[:method]
when "fingerprint"
fingerprint_neighbors dataset_id:dataset_id, prediction_feature_id:prediction_feature_id, descriptors:descriptors, similarity:similarity
when "properties"
properties_neighbors dataset_id:dataset_id, prediction_feature_id:prediction_feature_id, descriptors:descriptors, similarity:similarity, relevant_features: relevant_features
else
bad_request_error "Descriptor method '#{descriptors[:method]}' not implemented."
end
end
def fingerprint_neighbors dataset_id:,prediction_feature_id:,descriptors:,similarity:
neighbors = []
dataset = Dataset.find(dataset_id)
dataset.substances.each do |substance|
values = dataset.values(substance,prediction_feature_id)
if values
query_descriptors = self.send(descriptors[:method].to_sym, descriptors[:type])
candidate_descriptors = substance.send(descriptors[:method].to_sym, descriptors[:type])
sim = Algorithm.run similarity[:method], [query_descriptors, candidate_descriptors]
neighbors << {"_id" => substance.id, "measurements" => values, "descriptors" => candidate_descriptors, "similarity" => sim} if sim >= similarity[:min]
end
end
neighbors.sort{|a,b| b["similarity"] <=> a["similarity"]}
end
def properties_neighbors dataset_id:,prediction_feature_id:,descriptors:,similarity:,relevant_features:
neighbors = []
dataset = Dataset.find(dataset_id)
weights = relevant_features.collect{|k,v| v["r"]**2}
means = relevant_features.collect{|k,v| v["mean"]}
standard_deviations = relevant_features.collect{|k,v| v["sd"]}
query_descriptors = relevant_features.keys.collect{|i| properties[i].is_a?(Array) ? properties[i].median : nil }
dataset.substances.each do |substance|
values = dataset.values(substance,prediction_feature_id)
# exclude nanoparticles with different core
# TODO validate exclusion
next if substance.is_a? Nanoparticle and substance.core != self.core
if values
candidate_descriptors = relevant_features.keys.collect{|i| substance.properties[i].is_a?(Array) ? substance.properties[i].median : nil }
q = []
c = []
w = []
(0..relevant_features.size-1).each do |i|
# add only complete pairs
if query_descriptors[i] and candidate_descriptors[i]
w << weights[i]
# scale values
q << (query_descriptors[i] - means[i])/standard_deviations[i]
c << (candidate_descriptors[i] - means[i])/standard_deviations[i]
end
end
sim = Algorithm.run similarity[:method], [q, c, w]
neighbors << {"_id" => substance.id, "measurements" => values, "descriptors" => candidate_descriptors, "similarity" => sim} if sim >= similarity[:min]
end
end
neighbors.sort{|a,b| b["similarity"] <=> a["similarity"]}
end
end
|