diff options
author | Christoph Helma <helma@in-silico.ch> | 2016-01-20 13:53:22 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2016-01-20 13:53:22 +0100 |
commit | 9546c589f6852942ed85f8da1e12c351fb92e0f0 (patch) | |
tree | b8ebc48eded5f47d54431a2b49bb0993244bfdaa | |
parent | 6bc457aa0a10898a53480f8031609ac2a9b96db8 (diff) |
enm import removed
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | application.rb | 4 | ||||
-rw-r--r-- | import.rb | 153 | ||||
-rw-r--r-- | nanoparticles.rb (renamed from protein_corona.rb) | 0 |
4 files changed, 3 insertions, 155 deletions
@@ -3,3 +3,4 @@ data papers *tmp *swp +enm-import.rb diff --git a/application.rb b/application.rb index 1cb0e13..703932f 100644 --- a/application.rb +++ b/application.rb @@ -1,7 +1,7 @@ require 'sinatra' require "sinatra/reloader" if development? -require_relative 'protein_corona.rb' -also_reload './protein_corona.rb' +require_relative 'nanoparticles.rb' +also_reload './nanoparticles.rb' get '/?' do @data = JSON.parse(File.read("./data.json")) diff --git a/import.rb b/import.rb deleted file mode 100644 index 63d8a08..0000000 --- a/import.rb +++ /dev/null @@ -1,153 +0,0 @@ -# TODO: missing data for protein corona silver particles -require 'json' -require 'yaml' -require 'csv' -require_relative "lib/nano-lazar.rb" -include OpenTox - -def feature_name uri - f = @features[uri] - name = f['title'] - annotations = f['annotation'].collect{|a| "#{a['p']}: #{a['o']}"}.uniq.join ", " - name << " (#{annotations})" unless annotations.empty? - name << " [#{f['units']}]" if f['units'] and !f['units'].empty? - name -end - -nanomaterials = [] -feature_names = {} -@features = {} - -["nanowiki.json", "protein-corona.json", "marina.json"].each do |f| - bundle = JSON.parse(File.read(File.join("data",f))) - @features.merge! bundle["feature"] - bundle["dataEntry"].each do |substance| - nm = Nanoparticle.new - nm.uri = substance["compound"]["URI"] - nm.name = substance["values"]["https://apps.ideaconsult.net/enanomapper/identifier/name"] if substance["values"] - if substance["composition"] - nr_cores = substance["composition"].select{|c| c["relation"] == "HAS_CORE"}.size - puts "#{substance["compound"]["URI"]} has #{nr_cores} cores" if nr_cores !=1 - substance["composition"].each do |composition| - component = composition["component"] - if component - name = component["values"]["https://apps.ideaconsult.net/enanomapper/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"] - #names << name - if composition["relation"] == "HAS_CORE" - nm.core = name - elsif composition["relation"] == "HAS_COATING" - nm.coating ||= [] - nm.coating << name - end - else - #puts substance.to_yaml - end - end - else - #puts substance.to_yaml - end - substance["values"].each do |k,v| - property = nil - if k.match(/TOX/) - nm.tox ||= [] - property = "tox" - elsif k.match(/P-CHEM/) - nm.p_chem ||= [] - property = "p_chem" - end - if property - v.each do |val| - if val.keys == ["loValue"] - nm.tox << {k => val["loValue"]} if property == "tox" - nm.p_chem << {k => val["loValue"]} if property == "p_chem" - elsif val.keys == ["loQualifier", "loValue"] and val["loQualifier"] == "mean" - nm.tox << {k => val["loValue"]} if property == "tox" - nm.p_chem << {k => val["loValue"]} if property == "p_chem" - elsif val.keys == ["loQualifier", "loValue", "upQualifier", "upValue" ] - nm.tox << {k => (val["loValue"]+val["upValue"])/2} if property == "tox" - nm.p_chem << {k => (val["loValue"]+val["upValue"])/2} if property == "p_chem" - elsif val.keys == ["loQualifier", "loValue"] and val["loQualifier"] == ">=" - else - p val - end - end - else - #p k,v - end - end - nm.tox.uniq! if nm.tox - nm.p_chem.uniq! if nm.p_chem - nanomaterials << nm - end -end - -puts "Total imported: #{nanomaterials.size}" -puts "With nanoparticle characterisation: #{nanomaterials.select{|n| n.p_chem}.size}" -modelling_data = nanomaterials.select{|n| n.tox and n.p_chem} -puts "With TOX data: #{nanomaterials.select{|n| n.tox}.size}" -puts "With TOX data and particle characterisation: #{modelling_data.size}" -endpoints = modelling_data.collect{|n| n.tox.collect{|t| t.keys}}.flatten.compact.uniq -puts -puts "Endpoints: #{endpoints.size}" - -single_value_endpoints = [] -endpoint_values = {} - -endpoints.each do |e| - i = 0 - values = [] - modelling_data.each do |n| - n.tox.each do |t| - if t[e] - i += 1 - values << t[e] - end - end - end - single_value_endpoints << e if values.uniq.size == 1 - endpoint_values[e] = values.size unless values.uniq.size == 1 -end - -endpoints -= single_value_endpoints -puts "Endpoints with more than one measurement value: #{endpoints.size}" -endpoint_values.select!{|k,v| v > 10} -puts "Endpoints with more than 10 measurements: #{endpoint_values.size}" -endpoints = endpoint_values.keys -puts -puts endpoint_values.sort{|a,b| b[1] <=> a[1]}.collect{|e,v| "#{feature_names[e]}: #{v}"}.join("\n") - -endpoint = "https://apps.ideaconsult.net/enanomapper/property/TOX/UNKNOWN_TOXICITY_SECTION/Log2+transformed/94D664CFE4929A0F400A5AD8CA733B52E049A688/E/3ed642f9-1b42-387a-9966-dea5b91e5f8a" -nanomaterials.select!{|nm| nm.tox and nm.tox.collect{|t| t.keys}.flatten.include? endpoint} -p nanomaterials.size - -feature_values = {} -nanomaterials.each do |nm| - (nm.p_chem + nm.tox).each do |f| - feature_names[f] = feature_name f # avoid appending annotations/units with each function call, unclear why it happens - p f unless f.size == 1 - k = f.keys.first - unless f[k].is_a? String - feature_values[k] ||= [] - feature_values[k] << f[k] - end - end -end - -# remove empty values -feature_values.select!{|f,vals| vals.uniq.size > 2} -tox_descriptors = feature_values.select{|f,vals| f.match 'TOX'}.keys -p_chem_descriptors = feature_values.select{|f,vals| f.match 'P-CHEM'}.keys - -#puts @features.to_yaml - -column_names = ["Nanoparticle"] + p_chem_descriptors.collect{|d| feature_names[d]} + tox_descriptors.collect{|d| feature_names[d]} -table = [] -CSV.open(File.join(File.dirname(__FILE__),"data","protein_corona_extract.csv"),"w+") do |csv| - csv << column_names - nanomaterials.each do |nm| - if nm.tox and nm.tox.collect{|t| t.keys}.flatten.include? endpoint - #table << [] - csv << [nm.name] + p_chem_descriptors.collect{|p| nm.p_chem.collect{|pchem| pchem[p]}.compact.first} + tox_descriptors.collect{|p| nm.p_chem.collect{|pchem| pchem[p]}.compact.first} - end - end -end diff --git a/protein_corona.rb b/nanoparticles.rb index 890b3ca..890b3ca 100644 --- a/protein_corona.rb +++ b/nanoparticles.rb |