From f3780d7507092b643216054fa3ca1e6146281e43 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 8 Apr 2016 13:04:56 +0200 Subject: enm import test --- data/enm-import.rb | 25 +++++++++++-------------- lib/compound.rb | 1 + lib/lazar.rb | 1 + lib/nanoparticle.rb | 45 ++++++++++++++++++++++++++++++++++++++++----- test/setup.rb | 4 ++-- 5 files changed, 55 insertions(+), 21 deletions(-) diff --git a/data/enm-import.rb b/data/enm-import.rb index 65fd3c7..37bc22b 100644 --- a/data/enm-import.rb +++ b/data/enm-import.rb @@ -1,6 +1,7 @@ require_relative '../lib/lazar.rb' include OpenTox - +$mongo.database.drop +$gridfs = $mongo.database.fs #get list of bundle URIs bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] @@ -13,38 +14,34 @@ bundles.each do |bundle| :name => np["values"]["https://data.enanomapper.net/identifier/name"], :source => np["compound"]["URI"], ) + nanoparticle.bundles << uri + nanoparticle.bundles.uniq! np["composition"].each do |comp| case comp["relation"] when "HAS_CORE" - nanoparticle[:core] = comp["component"]["compound"]["URI"] + nanoparticle.core = comp["component"]["compound"]["URI"] when "HAS_COATING" - nanoparticle[:coating] ||= [] - nanoparticle[:coating] << comp["component"]["compound"]["URI"] + nanoparticle.coating << comp["component"]["compound"]["URI"] end end if np["composition"] np["values"].each do |u,v| if u.match(/property/) - name, unit = nil + name, unit, source = nil features.each do |uri,feat| if u.match(/#{uri}/) name = feat["title"] unit = feat["units"] + source = uri end end feature = Feature.find_or_create_by( :name => name, :unit => unit, - #:source => uri + :source => source ) - nanoparticle[:features] ||= {} - if v.size == 1 and v.first.keys == ["loValue"] - nanoparticle[:features][feature.id] = v.first["loValue"] - else - #TODO - end end + v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array end - p nanoparticle - nanoparticle.save + nanoparticle.save! end end diff --git a/lib/compound.rb b/lib/compound.rb index 2a79fd6..a7518ed 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -21,6 +21,7 @@ module OpenTox field :default_fingerprint_size, type: Integer field :physchem_descriptors, type: Hash, default: {} field :dataset_ids, type: Array, default: [] + # TODO separate between physchem, bio and tox field :features, type: Hash, default: {} index({smiles: 1}, {unique: true}) diff --git a/lib/lazar.rb b/lib/lazar.rb index 39dd8fa..0e2cec2 100644 --- a/lib/lazar.rb +++ b/lib/lazar.rb @@ -81,5 +81,6 @@ CLASSES = ["Feature","Compound","Dataset","Validation","CrossValidation","LeaveO "crossvalidation.rb", "leave-one-out-validation.rb", "experiment.rb", + "import.rb", ].each{ |f| require_relative f } OpenTox::PhysChem.descriptors # load descriptor features diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 3783ece..cda431a 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -3,13 +3,48 @@ module OpenTox class Nanoparticle include OpenTox - field :particle_id, type: String + #field :particle_id, type: String field :core, type: String - field :coatings, type: Array + field :coating, type: Array, default: [] - #field :physchem_descriptors, type: Hash, default: {} - #field :toxicities, type: Hash, default: {} - field :features, type: Hash, default: {} + field :physchem_descriptors, type: Hash, default: {} + field :toxicities, type: Hash, default: {} + #field :features, type: Hash, default: {} + field :bundles, type: Array, default: [] + + def predict + end + + def add_feature feature, value + if feature.source.match /property\/P-CHEM/ + physchem_descriptors[feature.id.to_s] ||= [] + physchem_descriptors[feature.id.to_s] << value + elsif feature.source.match /property\/TOX/ + toxicities[feature.id.to_s] ||= [] + toxicities[feature.id.to_s] << value + else + $logger.warn "Unknown feature type '#{feature.source}'. Value '#{value}' not inserted." + warnings << "Unknown feature type '#{feature.source}'. Value '#{value}' not inserted." + end + end + + def parse_ambit_value feature, v + if v.keys == ["loValue"] + add_feature feature, v["loValue"] + elsif v.keys.size == 2 and v["loQualifier"] == "mean" + add_feature feature, {:mean => v["loValue"]} + elsif v.keys.size == 2 and v["loQualifier"] #== ">=" + add_feature feature, {:min => v["loValue"],:max => Float::INFINITY} + elsif v.keys.size == 2 and v["upQualifier"] #== ">=" + add_feature feature, {:max => v["upValue"],:min => -Float::INFINITY} + elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] + add_feature feature, {:min => v["loValue"],:max => v["upValue"]} + elsif v == {} # do nothing + else + $logger.warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." + warnings << "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." + end + end end end diff --git a/test/setup.rb b/test/setup.rb index be3140a..e7c32b4 100644 --- a/test/setup.rb +++ b/test/setup.rb @@ -1,7 +1,7 @@ ENV["LAZAR_ENV"] = "development" require 'minitest/autorun' -#require_relative '../lib/lazar.rb' -require 'lazar' +require_relative '../lib/lazar.rb' +#require 'lazar' include OpenTox TEST_DIR ||= File.expand_path(File.dirname(__FILE__)) DATA_DIR ||= File.join(TEST_DIR,"data") -- cgit v1.2.3