1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
|
# TODO: missing data for protein corona silver particles
require 'json'
require 'yaml'
require 'csv'
require_relative "lib/nano-lazar.rb"
include OpenTox
def feature_name uri
f = @features[uri]
name = f['title']
annotations = f['annotation'].collect{|a| "#{a['p']}: #{a['o']}"}.uniq.join ", "
name << " (#{annotations})" unless annotations.empty?
name << " [#{f['units']}]" if f['units'] and !f['units'].empty?
name
end
nanomaterials = []
feature_names = {}
@features = {}
["nanowiki.json", "protein-corona.json", "marina.json"].each do |f|
bundle = JSON.parse(File.read(File.join("data",f)))
@features.merge! bundle["feature"]
bundle["dataEntry"].each do |substance|
nm = Nanoparticle.new
nm.uri = substance["compound"]["URI"]
nm.name = substance["values"]["https://apps.ideaconsult.net/enanomapper/identifier/name"] if substance["values"]
if substance["composition"]
nr_cores = substance["composition"].select{|c| c["relation"] == "HAS_CORE"}.size
puts "#{substance["compound"]["URI"]} has #{nr_cores} cores" if nr_cores !=1
substance["composition"].each do |composition|
component = composition["component"]
if component
name = component["values"]["https://apps.ideaconsult.net/enanomapper/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
#names << name
if composition["relation"] == "HAS_CORE"
nm.core = name
elsif composition["relation"] == "HAS_COATING"
nm.coating ||= []
nm.coating << name
end
else
#puts substance.to_yaml
end
end
else
#puts substance.to_yaml
end
substance["values"].each do |k,v|
property = nil
if k.match(/TOX/)
nm.tox ||= []
property = "tox"
elsif k.match(/P-CHEM/)
nm.p_chem ||= []
property = "p_chem"
end
if property
v.each do |val|
if val.keys == ["loValue"]
nm.tox << {k => val["loValue"]} if property == "tox"
nm.p_chem << {k => val["loValue"]} if property == "p_chem"
elsif val.keys == ["loQualifier", "loValue"] and val["loQualifier"] == "mean"
nm.tox << {k => val["loValue"]} if property == "tox"
nm.p_chem << {k => val["loValue"]} if property == "p_chem"
elsif val.keys == ["loQualifier", "loValue", "upQualifier", "upValue" ]
nm.tox << {k => (val["loValue"]+val["upValue"])/2} if property == "tox"
nm.p_chem << {k => (val["loValue"]+val["upValue"])/2} if property == "p_chem"
elsif val.keys == ["loQualifier", "loValue"] and val["loQualifier"] == ">="
else
p val
end
end
else
#p k,v
end
end
nm.tox.uniq! if nm.tox
nm.p_chem.uniq! if nm.p_chem
nanomaterials << nm
end
end
puts "Total imported: #{nanomaterials.size}"
puts "With nanoparticle characterisation: #{nanomaterials.select{|n| n.p_chem}.size}"
modelling_data = nanomaterials.select{|n| n.tox and n.p_chem}
puts "With TOX data: #{nanomaterials.select{|n| n.tox}.size}"
puts "With TOX data and particle characterisation: #{modelling_data.size}"
endpoints = modelling_data.collect{|n| n.tox.collect{|t| t.keys}}.flatten.compact.uniq
puts
puts "Endpoints: #{endpoints.size}"
single_value_endpoints = []
endpoint_values = {}
endpoints.each do |e|
i = 0
values = []
modelling_data.each do |n|
n.tox.each do |t|
if t[e]
i += 1
values << t[e]
end
end
end
single_value_endpoints << e if values.uniq.size == 1
endpoint_values[e] = values.size unless values.uniq.size == 1
end
endpoints -= single_value_endpoints
puts "Endpoints with more than one measurement value: #{endpoints.size}"
endpoint_values.select!{|k,v| v > 10}
puts "Endpoints with more than 10 measurements: #{endpoint_values.size}"
endpoints = endpoint_values.keys
puts
puts endpoint_values.sort{|a,b| b[1] <=> a[1]}.collect{|e,v| "#{feature_names[e]}: #{v}"}.join("\n")
endpoint = "https://apps.ideaconsult.net/enanomapper/property/TOX/UNKNOWN_TOXICITY_SECTION/Log2+transformed/94D664CFE4929A0F400A5AD8CA733B52E049A688/E/3ed642f9-1b42-387a-9966-dea5b91e5f8a"
nanomaterials.select!{|nm| nm.tox and nm.tox.collect{|t| t.keys}.flatten.include? endpoint}
p nanomaterials.size
feature_values = {}
nanomaterials.each do |nm|
(nm.p_chem + nm.tox).each do |f|
feature_names[f] = feature_name f # avoid appending annotations/units with each function call, unclear why it happens
p f unless f.size == 1
k = f.keys.first
unless f[k].is_a? String
feature_values[k] ||= []
feature_values[k] << f[k]
end
end
end
# remove empty values
feature_values.select!{|f,vals| vals.uniq.size > 2}
tox_descriptors = feature_values.select{|f,vals| f.match 'TOX'}.keys
p_chem_descriptors = feature_values.select{|f,vals| f.match 'P-CHEM'}.keys
#puts @features.to_yaml
column_names = ["Nanoparticle"] + p_chem_descriptors.collect{|d| feature_names[d]} + tox_descriptors.collect{|d| feature_names[d]}
table = []
CSV.open(File.join(File.dirname(__FILE__),"data","protein_corona_extract.csv"),"w+") do |csv|
csv << column_names
nanomaterials.each do |nm|
if nm.tox and nm.tox.collect{|t| t.keys}.flatten.include? endpoint
#table << []
csv << [nm.name] + p_chem_descriptors.collect{|p| nm.p_chem.collect{|pchem| pchem[p]}.compact.first} + tox_descriptors.collect{|p| nm.p_chem.collect{|pchem| pchem[p]}.compact.first}
end
end
end
|