lib/nanoparticle.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122

module OpenTox

  class Nanoparticle < Substance
    include OpenTox

    field :core, type: Hash, default: {}
    field :coating, type: Array, default: []

    attr_accessor :scaled_values
 
=begin
    def physchem_neighbors min_sim: 0.9, dataset_id:, prediction_feature_id:, relevant_features:
      dataset = Dataset.find(dataset_id)
      #relevant_features = {}
      measurements = []
      substances = []
      # TODO: exclude query activities!!!
      dataset.substances.each do |s|
        if s.core == self.core # exclude nanoparticles with different core
          dataset.values(s,prediction_feature_id).each do |act|
            measurements << act
            substances << s
          end
        end
      end
      neighbors = []
      substances.each do |substance|
        values = dataset.values(substance,prediction_feature_id)
        if values
          common_descriptors = relevant_features.keys & substance.descriptors.keys
          # scale values
          query_descriptors = common_descriptors.collect{|d| (descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
          @scaled_values = common_descriptors.collect{|d| [d,(descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h
          neighbor_descriptors = common_descriptors.collect{|d| (substance.descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]}
          neighbor_scaled_values = common_descriptors.collect{|d| [d,(substance.descriptors[d].median-relevant_features[d]["mean"])/relevant_features[d]["sd"]]}.to_h
          #weights = common_descriptors.collect{|d| 1-relevant_features[d]["p_value"]}
          weights = common_descriptors.collect{|d| relevant_features[d]["r"]**2}
          sim = Algorithm::Similarity.weighted_cosine(query_descriptors,neighbor_descriptors,weights)
          neighbors << {
            "_id" => substance.id,
            "measurements" => values,
            "similarity" => sim,
            "common_descriptors" => common_descriptors.collect do |id|
              {
                :id => id,
                :scaled_value => neighbor_scaled_values[id],
                :p_value => relevant_features[id]["p_value"],
                :r_squared => relevant_features[id]["r"]**2}
            end
          } if sim >= min_sim
        end
      end
      $logger.debug "#{self.name}: #{neighbors.size} neighbors"
      neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]}
      neighbors
    end
=end

    def add_feature feature, value, dataset
      unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand
        case feature.category
        when "P-CHEM"
          properties[feature.id.to_s] ||= []
          properties[feature.id.to_s] << value
          properties[feature.id.to_s].uniq!
        when "Proteomics"
          properties[feature.id.to_s] ||= []
          properties[feature.id.to_s] << value
          properties[feature.id.to_s].uniq!
        when "TOX"
          dataset.add self, feature, value
        else
          warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
        end
        dataset_ids << dataset.id
        dataset_ids.uniq!
      end
    end

    def parse_ambit_value feature, v, dataset
      #p dataset
      #p feature
      # TODO add study id to warnings
      v.delete "unit"
      # TODO: ppm instead of weights
      if v.keys == ["textValue"]
        add_feature feature, v["textValue"], dataset
      elsif v.keys == ["loValue"]
        add_feature feature, v["loValue"], dataset
      elsif v.keys.size == 2 and v["errorValue"]
        add_feature feature, v["loValue"], dataset
        warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
      elsif v.keys.size == 2 and v["loQualifier"] == "mean"
        add_feature feature, v["loValue"], dataset
        warn "'#{feature.name}' is a mean value. Original data is not available."
      elsif v.keys.size == 2 and v["loQualifier"] #== ">="
        warn "Only min value available for '#{feature.name}', entry ignored"
      elsif v.keys.size == 2 and v["upQualifier"] #== ">="
        warn "Only max value available for '#{feature.name}', entry ignored"
      elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
        add_feature feature, v["loValue"], dataset
        warn "loQualifier and upQualifier are empty."
      elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == ""
        add_feature feature, v["loValue"], dataset
        warn "loQualifier and upQualifier are empty."
      elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
        add_feature feature, v["loValue"], dataset
        warn "loQualifier and upQualifier are empty."
      elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"]
        add_feature feature, [v["loValue"],v["upValue"]].mean, dataset
        warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
      elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"]
        warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
        add_feature feature, v["loValue"], dataset
      elsif v == {} # do nothing
      else
        warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'."
      end
    end

  end
end