From 4662e845c12e3e623ec9bec208c42cd4b1886047 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 15 Apr 2016 14:58:17 +0200 Subject: enm study import --- lib/dataset.rb | 11 +++++------ lib/feature.rb | 10 ++-------- lib/import.rb | 53 +++++++++++++++++++---------------------------------- lib/nanoparticle.rb | 42 +++++++++++++++++++++++++----------------- 4 files changed, 51 insertions(+), 65 deletions(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index fdf1bfc..b51d74b 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -88,22 +88,21 @@ module OpenTox # @return [String] def to_csv(inchi=false) CSV.generate() do |csv| - compound = Substance.find(data_entries.first.first).is_a? Compound + compound = Substance.find(substance_ids.first).is_a? Compound if compound csv << [inchi ? "InChI" : "SMILES"] + features.collect{|f| f.name} else csv << ["Name"] + features.collect{|f| f.name} end - data_entries.each do |sid,f| - substance = Substance.find sid - features.each do |feature| - f[feature.id.to_s].each do |v| + substances.each do |substance| + features.each do |f| + substance.toxicities[f.id.to_s].each do |v| if compound csv << [inchi ? substance.inchi : substance.smiles , v] else csv << [substance.name , v] end - end if f[feature.id.to_s] + end if substance.toxicities[f.id.to_s] end end end diff --git a/lib/feature.rb b/lib/feature.rb index f13a3fb..c6fb68a 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -6,7 +6,9 @@ module OpenTox field :numeric, type: Boolean field :measured, type: Boolean field :calculated, type: Boolean + field :category, type: String field :unit, type: String + field :conditions, type: Hash end # Feature for categorical variables @@ -35,12 +37,4 @@ module OpenTox end end - # Feature for categorical bioassay results - class NominalBioAssay < NominalFeature - end - - # Feature for quantitative bioassay results - class NumericBioAssay < NumericFeature - end - end diff --git a/lib/import.rb b/lib/import.rb index cf0855e..9091207 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -19,43 +19,28 @@ module OpenTox :name => np["values"]["https://data.enanomapper.net/identifier/name"], :source => np["compound"]["URI"], ) - dataset.data_entries[nanoparticle.id.to_s] ||= {} - nanoparticle.bundles << uri - nanoparticle.dataset_ids << dataset.id - np["composition"].each do |comp| - case comp["relation"] - when "HAS_CORE" - nanoparticle.core = comp["component"]["compound"]["URI"] - when "HAS_COATING" - nanoparticle.coating << comp["component"]["compound"]["URI"] - end - end if np["composition"] - np["values"].each do |u,v| - if u.match(/property/) - name, unit, source = nil - features.each do |uri,feat| - if u.match(/#{uri}/) - name = feat["title"] - unit = feat["units"] - source = uri - end - end - feature = Feature.find_or_create_by( - :name => name, - :unit => unit, - :source => source + dataset.substance_ids << nanoparticle.id + dataset.substance_ids.uniq! + studies = JSON.parse(RestClientWrapper.get(File.join(np["compound"]["URI"],"study")))["study"] + studies.each do |study| + study["effects"].each do |effect| + effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature + # TODO parse core/coating + # TODO parse proteomics, they come as a large textValue + $logger.debug File.join(np["compound"]["URI"],"study") + effect["conditions"].delete_if { |k, v| v.nil? } + feature = klass.find_or_create_by( + :source => File.join(np["compound"]["URI"],"study"), + :name => "#{study["protocol"]["category"]["title"]} #{study["protocol"]["endpoint"]}", + :unit => effect["result"]["unit"], + :category => study["protocol"]["topcategory"], + :conditions => effect["conditions"] ) + nanoparticle.parse_ambit_value feature, effect["result"] + dataset.feature_ids << feature.id + dataset.feature_ids.uniq! end - v.each{|value| nanoparticle.parse_ambit_value feature, value} if v.is_a? Array - end - nanoparticle.bundles.uniq! - nanoparticle.physchem_descriptors.each{|f,v| v.uniq!} - #nanoparticle.toxicities.each{|f,v| v.uniq!} - nanoparticle.toxicities.each do |f,v| - dataset.data_entries[nanoparticle.id.to_s][f.to_s] ||= [] - dataset.data_entries[nanoparticle.id.to_s][f.to_s] += v end - nanoparticle.save end dataset.save datasets << dataset diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 0350363..295b6c0 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -12,43 +12,51 @@ module OpenTox end def add_feature feature, value - if feature.source.match /property\/P-CHEM/ + case feature.category + when "P-CHEM" physchem_descriptors[feature.id.to_s] ||= [] physchem_descriptors[feature.id.to_s] << value - elsif feature.source.match /property\/TOX/ + when "TOX" toxicities[feature.id.to_s] ||= [] toxicities[feature.id.to_s] << value else - warn "Unknown feature type '#{feature.source}'. Value '#{value}' not inserted." + warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted." end + save end def parse_ambit_value feature, v - # TODO: units, mmol/log10 conversion - if v.keys == ["loValue"] - #if v["loValue"].numeric? - add_feature feature, v["loValue"] - #else - #warn "'#{v["loValue"]}' is not a numeric value, entry ignored." - #end + v.delete "unit" + # TODO: mmol/log10 conversion + if v.keys == ["textValue"] + add_feature feature, v["textValue"] + elsif v.keys == ["loValue"] + add_feature feature, v["loValue"] + elsif v.keys.size == 2 and v["errorValue"] + add_feature feature, v["loValue"] + warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." elsif v.keys.size == 2 and v["loQualifier"] == "mean" - #add_feature feature, {:mean => v["loValue"]} add_feature feature, v["loValue"] warn "'#{feature.name}' is a mean value. Original data is not available." elsif v.keys.size == 2 and v["loQualifier"] #== ">=" - #add_feature feature, {:min => v["loValue"],:max => Float::INFINITY} warn "Only min value available for '#{feature.name}', entry ignored" elsif v.keys.size == 2 and v["upQualifier"] #== ">=" - #add_feature feature, {:max => v["upValue"],:min => -Float::INFINITY} warn "Only max value available for '#{feature.name}', entry ignored" - elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] - #add_feature feature, {:min => v["loValue"],:max => v["upValue"]} + elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil? + add_feature feature, v["loValue"] + warn "loQualifier and upQualifier are empty." + elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == "" + add_feature feature, v["loValue"] + warn "loQualifier and upQualifier are empty." + elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"] add_feature feature, [v["loValue"],v["upValue"]].mean warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available." + elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"] + warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'." + add_feature feature, v["loValue"] elsif v == {} # do nothing else - $logger.warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." - warnings << "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." + warn "Cannot parse Ambit eNanoMapper value '#{v}' for feature '#{feature.name}'." end end -- cgit v1.2.3