1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
module OpenTox
class Nanoparticle < Substance
include OpenTox
field :core, type: Hash, default: {}
field :coating, type: Array, default: []
attr_accessor :scaled_values
=begin
def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:, relevant_features:
dataset = Dataset.find(dataset_id)
#relevant_features = {}
measurements = []
substances = []
# TODO: exclude query activities!!!
dataset.substances.each do |s|
if s.core == self.core # exclude nanoparticles with different core
dataset.values(s,prediction_feature_id).each do |act|
measurements << act
substances << s
end
end
end
neighbors = []
substances.each do |substance|
values = dataset.values(substance,prediction_feature_id)
if values
common_descriptors = relevant_features.keys & substance.descriptors.keys
# scale values
query_descriptors = common_descriptors.collect{|d| (descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
@scaled_values = common_descriptors.collect{|d| [d,(descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h
neighbor_descriptors = common_descriptors.collect{|d| (substance.descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
neighbor_scaled_values = common_descriptors.collect{|d| [d,(substance.descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h
#weights = common_descriptors.collect{|d| 1-relevant_features[d]["p_value"]}
weights = common_descriptors.collect{|d| relevant_features[d]["r"]**2}
sim = Algorithm::Similarity.weighted_cosine(query_descriptors,neighbor_descriptors,weights)
neighbors << {
"_id" => substance.id,
"measurements" => values,
"similarity" => sim,
"common_descriptors" => common_descriptors.collect do |id|
{
:id => id,
:scaled_value => neighbor_scaled_values[id],
:p_value => relevant_features[id]["p_value"],
:r_squared => relevant_features[id]["r"]**2}
end
} if sim >= min_sim
end
end
$logger.debug "#{self.name}: #{neighbors.size} neighbors"
neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]}
neighbors
end
=end
def add_feature feature, value, dataset
unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand
case feature.category
when "P-CHEM"
properties[feature.id.to_s] ||= []
properties[feature.id.to_s] << value
properties[feature.id.to_s].uniq!
when "Proteomics"
properties[feature.id.to_s] ||= []
properties[feature.id.to_s] << value
properties[feature.id.to_s].uniq!
when "TOX"
dataset.add self, feature, value
else
warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
end
dataset_ids << dataset.id
dataset_ids.uniq!
end
end
def parse_ambit_value feature, v, dataset
#p dataset
#p feature
# TODO add study id to warnings
v.delete "unit"
# TODO: ppm instead of weights
if v.keys == ["textValue"]
add_feature feature, v["textValue"], dataset
elsif v.keys == ["loValue"]
add_feature feature, v["loValue"], dataset
elsif v.keys.size == 2 and v["errorValue"]
add_feature feature, v["loValue"], dataset
warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
elsif v.keys.size == 2 and v["loQualifier"] == "mean"
add_feature feature, v["loValue"], dataset
warn "'#{feature.name}' is a mean value. Original data is not available."
elsif v.keys.size == 2 and v["loQualifier"] #== ">="
warn "Only min value available for '#{feature.name}', entry ignored"
elsif v.keys.size == 2 and v["upQualifier"] #== ">="
warn "Only max value available for '#{feature.name}', entry ignored"
elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
add_feature feature, v["loValue"], dataset
warn "loQualifier and upQualifier are empty."
elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == ""
add_feature feature, v["loValue"], dataset
warn "loQualifier and upQualifier are empty."
elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
add_feature feature, v["loValue"], dataset
warn "loQualifier and upQualifier are empty."
elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"]
add_feature feature, [v["loValue"],v["upValue"]].mean, dataset
warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"]
warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
add_feature feature, v["loValue"], dataset
elsif v == {} # do nothing
else
warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
end
end
end
end
|