summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-01-20 13:53:22 +0100
committerChristoph Helma <helma@in-silico.ch>2016-01-20 13:53:22 +0100
commit9546c589f6852942ed85f8da1e12c351fb92e0f0 (patch)
treeb8ebc48eded5f47d54431a2b49bb0993244bfdaa
parent6bc457aa0a10898a53480f8031609ac2a9b96db8 (diff)
enm import removed
-rw-r--r--.gitignore1
-rw-r--r--application.rb4
-rw-r--r--import.rb153
-rw-r--r--nanoparticles.rb (renamed from protein_corona.rb)0
4 files changed, 3 insertions, 155 deletions
diff --git a/.gitignore b/.gitignore
index 43ce16a..e73632d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ data
papers
*tmp
*swp
+enm-import.rb
diff --git a/application.rb b/application.rb
index 1cb0e13..703932f 100644
--- a/application.rb
+++ b/application.rb
@@ -1,7 +1,7 @@
require 'sinatra'
require "sinatra/reloader" if development?
-require_relative 'protein_corona.rb'
-also_reload './protein_corona.rb'
+require_relative 'nanoparticles.rb'
+also_reload './nanoparticles.rb'
get '/?' do
@data = JSON.parse(File.read("./data.json"))
diff --git a/import.rb b/import.rb
deleted file mode 100644
index 63d8a08..0000000
--- a/import.rb
+++ /dev/null
@@ -1,153 +0,0 @@
-# TODO: missing data for protein corona silver particles
-require 'json'
-require 'yaml'
-require 'csv'
-require_relative "lib/nano-lazar.rb"
-include OpenTox
-
-def feature_name uri
- f = @features[uri]
- name = f['title']
- annotations = f['annotation'].collect{|a| "#{a['p']}: #{a['o']}"}.uniq.join ", "
- name << " (#{annotations})" unless annotations.empty?
- name << " [#{f['units']}]" if f['units'] and !f['units'].empty?
- name
-end
-
-nanomaterials = []
-feature_names = {}
-@features = {}
-
-["nanowiki.json", "protein-corona.json", "marina.json"].each do |f|
- bundle = JSON.parse(File.read(File.join("data",f)))
- @features.merge! bundle["feature"]
- bundle["dataEntry"].each do |substance|
- nm = Nanoparticle.new
- nm.uri = substance["compound"]["URI"]
- nm.name = substance["values"]["https://apps.ideaconsult.net/enanomapper/identifier/name"] if substance["values"]
- if substance["composition"]
- nr_cores = substance["composition"].select{|c| c["relation"] == "HAS_CORE"}.size
- puts "#{substance["compound"]["URI"]} has #{nr_cores} cores" if nr_cores !=1
- substance["composition"].each do |composition|
- component = composition["component"]
- if component
- name = component["values"]["https://apps.ideaconsult.net/enanomapper/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
- #names << name
- if composition["relation"] == "HAS_CORE"
- nm.core = name
- elsif composition["relation"] == "HAS_COATING"
- nm.coating ||= []
- nm.coating << name
- end
- else
- #puts substance.to_yaml
- end
- end
- else
- #puts substance.to_yaml
- end
- substance["values"].each do |k,v|
- property = nil
- if k.match(/TOX/)
- nm.tox ||= []
- property = "tox"
- elsif k.match(/P-CHEM/)
- nm.p_chem ||= []
- property = "p_chem"
- end
- if property
- v.each do |val|
- if val.keys == ["loValue"]
- nm.tox << {k => val["loValue"]} if property == "tox"
- nm.p_chem << {k => val["loValue"]} if property == "p_chem"
- elsif val.keys == ["loQualifier", "loValue"] and val["loQualifier"] == "mean"
- nm.tox << {k => val["loValue"]} if property == "tox"
- nm.p_chem << {k => val["loValue"]} if property == "p_chem"
- elsif val.keys == ["loQualifier", "loValue", "upQualifier", "upValue" ]
- nm.tox << {k => (val["loValue"]+val["upValue"])/2} if property == "tox"
- nm.p_chem << {k => (val["loValue"]+val["upValue"])/2} if property == "p_chem"
- elsif val.keys == ["loQualifier", "loValue"] and val["loQualifier"] == ">="
- else
- p val
- end
- end
- else
- #p k,v
- end
- end
- nm.tox.uniq! if nm.tox
- nm.p_chem.uniq! if nm.p_chem
- nanomaterials << nm
- end
-end
-
-puts "Total imported: #{nanomaterials.size}"
-puts "With nanoparticle characterisation: #{nanomaterials.select{|n| n.p_chem}.size}"
-modelling_data = nanomaterials.select{|n| n.tox and n.p_chem}
-puts "With TOX data: #{nanomaterials.select{|n| n.tox}.size}"
-puts "With TOX data and particle characterisation: #{modelling_data.size}"
-endpoints = modelling_data.collect{|n| n.tox.collect{|t| t.keys}}.flatten.compact.uniq
-puts
-puts "Endpoints: #{endpoints.size}"
-
-single_value_endpoints = []
-endpoint_values = {}
-
-endpoints.each do |e|
- i = 0
- values = []
- modelling_data.each do |n|
- n.tox.each do |t|
- if t[e]
- i += 1
- values << t[e]
- end
- end
- end
- single_value_endpoints << e if values.uniq.size == 1
- endpoint_values[e] = values.size unless values.uniq.size == 1
-end
-
-endpoints -= single_value_endpoints
-puts "Endpoints with more than one measurement value: #{endpoints.size}"
-endpoint_values.select!{|k,v| v > 10}
-puts "Endpoints with more than 10 measurements: #{endpoint_values.size}"
-endpoints = endpoint_values.keys
-puts
-puts endpoint_values.sort{|a,b| b[1] <=> a[1]}.collect{|e,v| "#{feature_names[e]}: #{v}"}.join("\n")
-
-endpoint = "https://apps.ideaconsult.net/enanomapper/property/TOX/UNKNOWN_TOXICITY_SECTION/Log2+transformed/94D664CFE4929A0F400A5AD8CA733B52E049A688/E/3ed642f9-1b42-387a-9966-dea5b91e5f8a"
-nanomaterials.select!{|nm| nm.tox and nm.tox.collect{|t| t.keys}.flatten.include? endpoint}
-p nanomaterials.size
-
-feature_values = {}
-nanomaterials.each do |nm|
- (nm.p_chem + nm.tox).each do |f|
- feature_names[f] = feature_name f # avoid appending annotations/units with each function call, unclear why it happens
- p f unless f.size == 1
- k = f.keys.first
- unless f[k].is_a? String
- feature_values[k] ||= []
- feature_values[k] << f[k]
- end
- end
-end
-
-# remove empty values
-feature_values.select!{|f,vals| vals.uniq.size > 2}
-tox_descriptors = feature_values.select{|f,vals| f.match 'TOX'}.keys
-p_chem_descriptors = feature_values.select{|f,vals| f.match 'P-CHEM'}.keys
-
-#puts @features.to_yaml
-
-column_names = ["Nanoparticle"] + p_chem_descriptors.collect{|d| feature_names[d]} + tox_descriptors.collect{|d| feature_names[d]}
-table = []
-CSV.open(File.join(File.dirname(__FILE__),"data","protein_corona_extract.csv"),"w+") do |csv|
- csv << column_names
- nanomaterials.each do |nm|
- if nm.tox and nm.tox.collect{|t| t.keys}.flatten.include? endpoint
- #table << []
- csv << [nm.name] + p_chem_descriptors.collect{|p| nm.p_chem.collect{|pchem| pchem[p]}.compact.first} + tox_descriptors.collect{|p| nm.p_chem.collect{|pchem| pchem[p]}.compact.first}
- end
- end
-end
diff --git a/protein_corona.rb b/nanoparticles.rb
index 890b3ca..890b3ca 100644
--- a/protein_corona.rb
+++ b/nanoparticles.rb