From 9e7b36613e98601de7b2ceb2d4442e11f1ae868a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 10 Nov 2016 12:23:46 +0100 Subject: intermediate commit, may be defunct --- lib/compound.rb | 3 --- lib/import.rb | 44 +++++++++++++++++++++++++++++++------------- lib/model.rb | 11 +++++++---- lib/nanoparticle.rb | 46 ++++++++++++++++++++++++++++++++++------------ 4 files changed, 72 insertions(+), 32 deletions(-) (limited to 'lib') diff --git a/lib/compound.rb b/lib/compound.rb index a399169..8a1143b 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -136,9 +136,6 @@ module OpenTox # @param inchi [String] smiles InChI string # @return [OpenTox::Compound] Compound def self.from_inchi inchi - # Temporary workaround for OpenBabels Inchi bug - # http://sourceforge.net/p/openbabel/bugs/957/ - # bug has not been fixed in latest git/development version #smiles = `echo "#{inchi}" | "#{File.join(File.dirname(__FILE__),"..","openbabel","bin","babel")}" -iinchi - -ocan`.chomp.strip smiles = obconversion(inchi,"inchi","can") if smiles.empty? diff --git a/lib/import.rb b/lib/import.rb index 8e57401..541c9b5 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -9,6 +9,12 @@ module OpenTox #get list of bundle URIs bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)} + # bundles + # id/summary + # id/compound + # id/substance + # id/property + bundles.each do |bundle| $logger.debug bundle["title"] nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"] @@ -32,32 +38,43 @@ module OpenTox t2 = 0 datasets = {} JSON.parse(File.read(File.join(dir,"bundles.json"))).each do |bundle| + if bundle["id"] == 3 datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"]) + end end - Dir[File.join(dir,"study*.json")].each do |s| + # TODO this is only for protein corona + Dir[File.join(dir,"study-F*.json")].each do |s| t = Time.now study = JSON.parse(File.read(s)) np = JSON.parse(File.read(File.join(dir,"nanoparticle-#{study['owner']['substance']['uuid']}.json"))) - core = {} - coating = [] + core_id = nil + coating_ids = [] np["composition"].each do |c| + uri = c["component"]["compound"]["URI"] + uri = CGI.escape File.join(uri,"&media=application/json") + data = JSON.parse(RestClientWrapper.get "https://data.enanomapper.net/query/compound/url/all?media=application/json&search=#{uri}") + smiles = data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23SMILESDefault"] + names = [] + names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"] + names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23IUPACNameDefault"] + if smiles + compound = Compound.find_or_create_by(:smiles => smiles) + compound.names = names.compact + else + compound = Compound.find_or_create_by(:names => names) + end + compound.save if c["relation"] == "HAS_CORE" - core = { - :uri => c["component"]["compound"]["URI"], - :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"] - } + core_id = compound.id.to_s elsif c["relation"] == "HAS_COATING" - coating << { - :uri => c["component"]["compound"]["URI"], - :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"] - } + coating_ids << compound.id.to_s end end if np["composition"] nanoparticle = Nanoparticle.find_or_create_by( :name => np["values"]["https://data.enanomapper.net/identifier/name"], :source => np["compound"]["URI"], - :core => core, - :coating => coating + :core_id => core_id, + :coating_ids => coating_ids ) np["bundles"].keys.each do |bundle_uri| nanoparticle.dataset_ids << datasets[bundle_uri].id @@ -104,6 +121,7 @@ module OpenTox nanoparticle.parse_ambit_value feature, effect["result"], dataset end end + p nanoparticle nanoparticle.save end datasets.each { |u,d| d.save } diff --git a/lib/model.rb b/lib/model.rb index 7503215..6a5e614 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -81,7 +81,6 @@ module OpenTox :method => "properties", :categories => ["P-CHEM"], }, - #:descriptors => ["P-CHEM","Proteomics"], :similarity => { :method => "Algorithm::Similarity.weighted_cosine", :min => 0.5 @@ -140,10 +139,11 @@ module OpenTox model.algorithms[:descriptors].delete(:features) model.algorithms[:descriptors].delete(:type) model.substances.each_with_index do |s,i| - s.calculate_properties(features).each_with_index do |v,j| + props = s.calculate_properties(features) + props.each_with_index do |v,j| model.independent_variables[j] ||= [] model.independent_variables[j][i] = v - end + end if props and !props.empty? end # parse independent_variables when "properties" @@ -152,7 +152,10 @@ module OpenTox categories.each do |category| Feature.where(category:category).each{|f| feature_ids << f.id.to_s} end - properties = model.substances.collect { |s| s.properties } + #p feature_ids + #properties = Nanoparticle.all.collect { |s| p s.name; p s.id; p s.properties } + properties = model.substances.collect { |s| s.properties } + #p properties property_ids = properties.collect{|p| p.keys}.flatten.uniq model.descriptor_ids = feature_ids & property_ids model.independent_variables = model.descriptor_ids.collect{|i| properties.collect{|p| p[i] ? p[i].median : nil}} diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 23e155c..02d9a89 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -3,8 +3,30 @@ module OpenTox class Nanoparticle < Substance include OpenTox - field :core, type: Hash, default: {} - field :coating, type: Array, default: [] + field :core_id, type: String, default: nil + field :coating_ids, type: Array, default: [] + + def core + Compound.find core_id + end + + def coating + coating_ids.collect{|i| Compound.find i } + end + + def fingerprint type=DEFAULT_FINGERPRINT + core_fp = core.fingerprint type + coating_fp = coating.collect{|c| c.fingerprint type}.flatten.uniq.compact + (core_fp.empty? or coating_fp.empty?) ? [] : (core_fp+coating_fp).uniq.compact + end + + def calculate_properties descriptors=PhysChem::OPENBABEL + if core.smiles and !coating.collect{|c| c.smiles}.compact.empty? + core_prop = core.calculate_properties descriptors + coating_prop = coating.collect{|c| c.calculate_properties descriptors if c.smiles} + descriptors.collect_with_index{|d,i| [core_prop[i],coating_prop.collect{|c| c[i] if c}]} + end + end def add_feature feature, value, dataset unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand @@ -37,28 +59,28 @@ module OpenTox add_feature feature, v["loValue"], dataset elsif v.keys.size == 2 and v["errorValue"] add_feature feature, v["loValue"], dataset - warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." + #warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." elsif v.keys.size == 2 and v["loQualifier"] == "mean" add_feature feature, v["loValue"], dataset - warn "'#{feature.name}' is a mean value. Original data is not available." + #warn "'#{feature.name}' is a mean value. Original data is not available." elsif v.keys.size == 2 and v["loQualifier"] #== ">=" - warn "Only min value available for '#{feature.name}', entry ignored" + #warn "Only min value available for '#{feature.name}', entry ignored" elsif v.keys.size == 2 and v["upQualifier"] #== ">=" - warn "Only max value available for '#{feature.name}', entry ignored" + #warn "Only max value available for '#{feature.name}', entry ignored" elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil? add_feature feature, v["loValue"], dataset - warn "loQualifier and upQualifier are empty." + #warn "loQualifier and upQualifier are empty." elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == "" add_feature feature, v["loValue"], dataset - warn "loQualifier and upQualifier are empty." + #warn "loQualifier and upQualifier are empty." elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil? add_feature feature, v["loValue"], dataset - warn "loQualifier and upQualifier are empty." + #warn "loQualifier and upQualifier are empty." elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"] - add_feature feature, [v["loValue"],v["upValue"]].mean, dataset - warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available." + #add_feature feature, [v["loValue"],v["upValue"]].mean, dataset + #warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available." elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"] - warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." + #warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." add_feature feature, v["loValue"], dataset elsif v == {} # do nothing else -- cgit v1.2.3