summaryrefslogtreecommitdiff
path: root/lib/import.rb
blob: 8e574012d32a36bec34041ece21d0631b5dcbeab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
module OpenTox

  module Import

    class Enanomapper
      include OpenTox

      def self.mirror dir="."
        #get list of bundle URIs
        bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
        File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)}
        bundles.each do |bundle|
          $logger.debug bundle["title"]
          nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"]
          $logger.debug nanoparticles.size
          nanoparticles.each do |nanoparticle|
            uuid = nanoparticle["values"]["https://data.enanomapper.net/identifier/uuid"]
            $logger.debug uuid
            File.open(File.join(dir,"nanoparticle-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(nanoparticle)}
            studies = JSON.parse(RestClientWrapper.get(File.join(nanoparticle["compound"]["URI"],"study")))["study"]
            $logger.debug uuid if studies.size < 1 
            studies.each do |study|
              File.open(File.join(dir,"study-#{study["uuid"]}.json"),"w+"){|f| f.puts JSON.pretty_generate(study)}
            end
          end
        end
      end

      def self.import dir="."
        start_time = Time.now
        t1 = 0
        t2 = 0
        datasets = {}
        JSON.parse(File.read(File.join(dir,"bundles.json"))).each do |bundle|
          datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
        end
        Dir[File.join(dir,"study*.json")].each do |s|
          t = Time.now
          study = JSON.parse(File.read(s))
          np = JSON.parse(File.read(File.join(dir,"nanoparticle-#{study['owner']['substance']['uuid']}.json")))
          core = {}
          coating = []
          np["composition"].each do |c|
            if c["relation"] == "HAS_CORE"
              core = {
                :uri => c["component"]["compound"]["URI"],
                :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
              }
            elsif c["relation"] == "HAS_COATING"
              coating << {
                :uri => c["component"]["compound"]["URI"],
                :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
              }
            end
          end if np["composition"]
          nanoparticle = Nanoparticle.find_or_create_by(
            :name => np["values"]["https://data.enanomapper.net/identifier/name"],
            :source => np["compound"]["URI"],
            :core => core,
            :coating => coating
          )
          np["bundles"].keys.each do |bundle_uri|
            nanoparticle.dataset_ids << datasets[bundle_uri].id
          end

          dataset = datasets[np["bundles"].keys.first]
          proteomics_features = {}
          category = study["protocol"]["topcategory"]
          source = study["protocol"]["category"]["term"]

          study["effects"].each do |effect|

            effect["result"]["textValue"] ?  klass = NominalFeature : klass = NumericFeature
            effect["conditions"].delete_if { |k, v| v.nil? }

            if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data

              JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step
                proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics", :unit => "Spectral counts", :source => source,:measured => true)
                nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset
              end
            else
              name = effect["endpoint"]
              unit = effect["result"]["unit"]
              warnings = []
              case name
              when "Log2 transformed" # use a sensible name
                name = "log2(Net cell association)"
                warnings = ["Original name was 'Log2 transformed'"]
                unit = "log2(mL/ug(Mg))"
              when "Total protein (BCA assay)"
                category = "P-CHEM"
                warnings = ["Category changed from TOX to P-CHEM"]
              end
              feature = klass.find_or_create_by(
                :name => name,
                :unit => unit,
                :category => category,
                :conditions => effect["conditions"],
                :source => study["protocol"]["category"]["term"],
                :measured => true,
                :warnings => warnings
              )
              nanoparticle.parse_ambit_value feature, effect["result"], dataset
            end
          end
          nanoparticle.save
        end
        datasets.each { |u,d| d.save }
      end

=begin
      def self.import_ld # defunct, AMBIT JSON_LD does not have substance entries
        #get list of bundle URIs
        bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
        datasets = []
        bundles.each do |bundle|
          uri = bundle["URI"]
          study = JSON.parse(`curl -H 'Accept:application/ld+json' '#{uri}/substance'`)
          study["@graph"].each do |i|
            puts i.to_yaml if i.keys.include? "sio:has-value"
          end
        end
        datasets.collect{|d| d.id}
      end
=end

    end

  end

end