summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-01-14 17:41:43 +0100
committerChristoph Helma <helma@in-silico.ch>2016-01-14 17:41:43 +0100
commite5b2f59ab602b2fb850a5338f5645ef331e0e66c (patch)
treef26ef1ba8042f3dffc59a289c1630672c26d9f30
parentd132e8441b8878861d0b2d0a4563b35387f0e3e9 (diff)
endpoint counts
-rw-r--r--import.rb54
1 files changed, 51 insertions, 3 deletions
diff --git a/import.rb b/import.rb
index b086a8c..2a83b94 100644
--- a/import.rb
+++ b/import.rb
@@ -1,5 +1,6 @@
require 'json'
require 'yaml'
+#require_relative "../lazar/lib/lazar.rb"
require_relative "lib/nano-lazar.rb"
include OpenTox
@@ -13,7 +14,6 @@ names = []
if substance["composition"]
nr_cores = substance["composition"].select{|c| c["relation"] == "HAS_CORE"}.size
puts "#{substance["compound"]["URI"]} has #{nr_cores} cores" if nr_cores !=1
- #component = substance["composition"]#["component"]
substance["composition"].each do |composition|
component = composition["component"]
if component
@@ -61,12 +61,60 @@ names = []
#p k,v
end
end
+ nm.tox.uniq! if nm.tox
+ nm.p_chem.uniq! if nm.p_chem
nanomaterials << nm
end
end
+
puts "Total imported: #{nanomaterials.size}"
-puts "With TOX data: #{nanomaterials.select{|n| n.tox}.size}"
puts "With nanoparticle characterisation: #{nanomaterials.select{|n| n.p_chem}.size}"
-puts "With TOX data and particle characterisation: #{nanomaterials.select{|n| n.tox and n.p_chem}.size}"
+modelling_data = nanomaterials.select{|n| n.tox and n.p_chem}
+puts "With TOX data: #{nanomaterials.select{|n| n.tox}.size}"
+puts "With TOX data and particle characterisation: #{modelling_data.size}"
+endpoints = modelling_data.collect{|n| n.tox.collect{|t| t.keys}}.flatten.compact.uniq
+puts "Endpoints: #{endpoints.size}"
+
+single_value_endpoints = []
+endpoint_values = {}
+
+endpoints.each do |e|
+ #json = `curl -H "Accept:application/json" "#{e}" 2>/dev/null`
+ #f = JSON.parse(json)["feature"]
+ #p k unless f.keys.size == 1
+ #k = f.keys.first
+ #p e
+ #p modelling_data.select{|n| n.tox.select{|t| t[e]}}.size
+ i = 0
+ values = []
+ modelling_data.each do |n|
+ n.tox.each do |t|
+ if t[e]
+ i += 1
+ values << t[e]
+ end
+ end
+ end
+ single_value_endpoints << e if values.uniq.size == 1
+ endpoint_values[e] = values.size unless values.uniq.size == 1
+ #puts "#{f[k]['title']} [#{f[k]['units']}]: #{i} #{values}"
+end
+
+endpoints -= single_value_endpoints
+puts "Endpoints with more than one measurement value: #{endpoints.size}"
+#endpoint_values.sort{|a,b| b[1] <=> a[1]}
+endpoint_values.select!{|k,v| v > 10}
+puts "Endpoints with more than 10 measurements: #{endpoint_values.size}"
+endpoints = endpoint_values.keys
+#puts endpoints.to_yaml
+endpoint_values.sort{|a,b| b[1] <=> a[1]}.each do |e,v|
+ json = `curl -H "Accept:application/json" "#{e}" 2>/dev/null`
+ f = JSON.parse(json)["feature"]
+ p k unless f.keys.size == 1
+ k = f.keys.first
+ p e
+ puts "#{f[k]['title']} [#{f[k]['units']}]: #{v} "
+end
+#puts "Endpoints with more than one value single_value_endpoints.size
#puts names.sort.uniq.to_yaml
#p nanomaterials.collect{|n| n.uri}.uniq.size