From e5b2f59ab602b2fb850a5338f5645ef331e0e66c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 14 Jan 2016 17:41:43 +0100 Subject: endpoint counts --- import.rb | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/import.rb b/import.rb index b086a8c..2a83b94 100644 --- a/import.rb +++ b/import.rb @@ -1,5 +1,6 @@ require 'json' require 'yaml' +#require_relative "../lazar/lib/lazar.rb" require_relative "lib/nano-lazar.rb" include OpenTox @@ -13,7 +14,6 @@ names = [] if substance["composition"] nr_cores = substance["composition"].select{|c| c["relation"] == "HAS_CORE"}.size puts "#{substance["compound"]["URI"]} has #{nr_cores} cores" if nr_cores !=1 - #component = substance["composition"]#["component"] substance["composition"].each do |composition| component = composition["component"] if component @@ -61,12 +61,60 @@ names = [] #p k,v end end + nm.tox.uniq! if nm.tox + nm.p_chem.uniq! if nm.p_chem nanomaterials << nm end end + puts "Total imported: #{nanomaterials.size}" -puts "With TOX data: #{nanomaterials.select{|n| n.tox}.size}" puts "With nanoparticle characterisation: #{nanomaterials.select{|n| n.p_chem}.size}" -puts "With TOX data and particle characterisation: #{nanomaterials.select{|n| n.tox and n.p_chem}.size}" +modelling_data = nanomaterials.select{|n| n.tox and n.p_chem} +puts "With TOX data: #{nanomaterials.select{|n| n.tox}.size}" +puts "With TOX data and particle characterisation: #{modelling_data.size}" +endpoints = modelling_data.collect{|n| n.tox.collect{|t| t.keys}}.flatten.compact.uniq +puts "Endpoints: #{endpoints.size}" + +single_value_endpoints = [] +endpoint_values = {} + +endpoints.each do |e| + #json = `curl -H "Accept:application/json" "#{e}" 2>/dev/null` + #f = JSON.parse(json)["feature"] + #p k unless f.keys.size == 1 + #k = f.keys.first + #p e + #p modelling_data.select{|n| n.tox.select{|t| t[e]}}.size + i = 0 + values = [] + modelling_data.each do |n| + n.tox.each do |t| + if t[e] + i += 1 + values << t[e] + end + end + end + single_value_endpoints << e if values.uniq.size == 1 + endpoint_values[e] = values.size unless values.uniq.size == 1 + #puts "#{f[k]['title']} [#{f[k]['units']}]: #{i} #{values}" +end + +endpoints -= single_value_endpoints +puts "Endpoints with more than one measurement value: #{endpoints.size}" +#endpoint_values.sort{|a,b| b[1] <=> a[1]} +endpoint_values.select!{|k,v| v > 10} +puts "Endpoints with more than 10 measurements: #{endpoint_values.size}" +endpoints = endpoint_values.keys +#puts endpoints.to_yaml +endpoint_values.sort{|a,b| b[1] <=> a[1]}.each do |e,v| + json = `curl -H "Accept:application/json" "#{e}" 2>/dev/null` + f = JSON.parse(json)["feature"] + p k unless f.keys.size == 1 + k = f.keys.first + p e + puts "#{f[k]['title']} [#{f[k]['units']}]: #{v} " +end +#puts "Endpoints with more than one value single_value_endpoints.size #puts names.sort.uniq.to_yaml #p nanomaterials.collect{|n| n.uri}.uniq.size -- cgit v1.2.3