summaryrefslogtreecommitdiff
path: root/descriptor.rb
blob: 96599d671665db71e9f0c42b5d48aae2482646c2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# descriptors.rb
# Calculation of physico-chemical descriptors
# Author: Andreas Maunz, Christoph Helma
require 'openbabel'

module OpenTox

  class Application < Service

    ENV["JAVA_HOME"] ||= "/usr/lib/jvm/java-7-openjdk" 
    JAVA_DIR = File.join(File.dirname(__FILE__),"java")
    CDK_JAR = Dir[File.join(JAVA_DIR,"cdk-*jar")].last
    JOELIB_JAR = File.join(JAVA_DIR,"joelib2.jar")
    LOG4J_JAR = File.join(JAVA_DIR,"log4j.jar")
    JMOL_JAR = File.join(JAVA_DIR,"Jmol.jar")

    unless defined? DESCRIPTORS 

      # initialize descriptors and features at startup to avoid duplication
      descriptors = { :cdk => [], :openbabel => [], :joelib => [] } # use arrays to keep the sequence intact

      @@obmol = OpenBabel::OBMol.new
      @@obconversion = OpenBabel::OBConversion.new
      @@obconversion.set_in_format 'inchi'

      # OpenBabel
      OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").each do |d|
        title,description = d.split(/\s+/,2)
        unless title =~ /cansmi|formula|InChI|smarts|title/ or title == "s"
          uri = File.join $algorithm[:uri], "descriptor/openbabel" ,title
          title = "OpenBabel "+title
          feature = OpenTox::Feature.find_or_create({
              RDF::DC.title => title,
              RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature],
              RDF::DC.description => description,
            }, @subjectid)
          descriptors[:openbabel] << {
            :title => title,
            :uri => uri,
            :description => description,
            :calculator => OpenBabel::OBDescriptor.find_type(title.split(" ").last),
            :feature => feature
          }
        end
      end

      # CDK
      cdk_descriptors = YAML.load(`java -classpath #{CDK_JAR}:#{JAVA_DIR}  CdkDescriptorInfo`)
      cdk_descriptors.each do |descriptor|
        title = descriptor[:java_class].split('.').last.sub(/Descriptor/,'')
        descriptor[:title] = "Cdk " + title
        descriptor[:uri] = File.join $algorithm[:uri], "descriptor/cdk" ,title
        descriptor[:features] = []
        descriptor[:names].each do |name|
          descriptor[:features] << OpenTox::Feature.find_or_create({
            RDF::DC.title => "#{descriptor[:title]} #{name}",
            RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature],
            RDF::DC.description => descriptor[:description]
          }, @subjectid)
        end
      end
      descriptors[:cdk] = cdk_descriptors
      
      # Joelib
      joelib_descriptors = YAML.load(`java -classpath #{JOELIB_JAR}:#{LOG4J_JAR}:#{JAVA_DIR}  JoelibDescriptorInfo | sed '0,/---/d'`) # strip Joelib messages at stdout
      joelib_descriptors.each do |descriptor|
        # exclude Hashcode (not a physchem property) and GlobalTopologicalChargeIndex (Joelib bug)
        next if descriptor[:java_class] == "joelib2.feature.types.MoleculeHashcode" or descriptor[:java_class] == "joelib2.feature.types.GlobalTopologicalChargeIndex"
        title = descriptor[:java_class].split('.').last
        descriptor[:uri] = File.join $algorithm[:uri], "descriptor/joelib",title
        descriptor[:title] = "Joelib " + title
        descriptor[:feature] = OpenTox::Feature.find_or_create({
            RDF::DC.title => descriptor[:title],
            RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature],
            #RDF::DC.description => descriptor[:title], # impossible to obtain meaningful descriptions from JOELIb, see java/JoelibDescriptors.java
          }, @subjectid)
      end
      descriptors[:joelib] = joelib_descriptors.select{|d| d[:title]}

      DESCRIPTORS = descriptors

    end

    helpers do

      def openbabel compounds, descriptors
        compounds.each do |compound|
          @@obconversion.read_string @@obmol, compound.inchi
          descriptors.each do |descriptor|
            @feature_dataset.add_data_entry compound, descriptor[:feature], fix_value(descriptor[:calculator].predict(@@obmol))
          end
        end
      end

      def cdk compounds, descriptors
        sdf_3d compounds
        # use java system call (rjb blocks within tasks)
        # use Tempfiles to avoid "Argument list too long" error 
        puts `java -classpath #{CDK_JAR}:#{JAVA_DIR}  CdkDescriptors #{@sdf_file.path} #{descriptors.collect{|d| d[:title].split("\s").last}.join(" ")}`
        YAML.load_file(@sdf_file.path+"cdk.yaml").each_with_index do |calculation,i|
          $logger.error "Descriptor calculation failed for compound #{compounds[i].uri}." if calculation.empty?
          calculation.each do |name,value|
            feature = DESCRIPTORS[:cdk].collect{|d| d[:features]}.flatten.select{|f| f[RDF::DC.title].split("\s").last == name.to_s}.first
            @feature_dataset.add_data_entry compounds[i], feature, fix_value(value)
          end
        end
      end

      def joelib compounds, descriptors
        # use java system call (rjb blocks within tasks)
        # use Tempfiles to avoid "Argument list too long" error 
        sdf_3d compounds
        puts `java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR}  JoelibDescriptors  #{@sdf_file.path} #{descriptors.collect{|d| d[:java_class]}.join(" ")}`
        YAML.load_file(@sdf_file.path+"joelib.yaml").each_with_index do |calculation,i|
          $logger.error "Descriptor calculation failed for compound #{compounds[i].uri}." if calculation.empty?
          calculation.each do |java_class,value|
            feature = DESCRIPTORS[:joelib].select{|d| d[:java_class] == java_class}.first[:feature]
            @feature_dataset.add_data_entry compounds[i], feature, fix_value(value)
          end
        end
      end

      def sdf_3d compounds
        unless @sdf_file and File.exists? @sdf_file.path
          @sdf_file = Tempfile.open("sdf")
          @@obconversion.set_out_format 'sdf'
          # create 3d sdf file (faster in Openbabel than in CDK)
          compounds.each do |compound|
            @@obconversion.read_string @@obmol, compound.inchi
            sdf_2d = @@obconversion.write_string(@@obmol)  
            OpenBabel::OBOp.find_type("Gen3D").do(@@obmol) 
            sdf_3d = @@obconversion.write_string(@@obmol)  
            if sdf_3d.match(/.nan/)
              warning = "3D generation failed for compound #{compound.uri}, trying to calculate descriptors from 2D structure."
              $logger.warn warning
              @feature_dataset[RDF::OT.Warnings] ? @feature_dataset[RDF::OT.Warnings] << warning : @feature_dataset[RDF::OT.Warnings] = warning
              @sdf_file.puts sdf_2d
            else
              @sdf_file.puts sdf_3d
            end
          end
          @sdf_file.close
        end
      end

      def fix_value val
        if val.numeric?
          val = Float(val)
          val = nil if val.nan? or val.infinite?
        else
          val = nil if val == "NaN"
        end
        val
      end
    end

    before '/descriptor/?*' do
      if request.get?
        @algorithm = OpenTox::Algorithm.new @uri
        @algorithm.parameters = [ 
          { RDF::DC.description => "Dataset URI", 
            RDF::OT.paramScope => "optional", 
            RDF::DC.title => "dataset_uri" } ,
          { RDF::DC.description => "Compound URI", 
            RDF::OT.paramScope => "optional", 
            RDF::DC.title => "compound_uri" } 
        ]
        @algorithm.metadata = {
          RDF.type => [RDF::OTA.DescriptorCalculation],
        }
      elsif request.post?
        @feature_dataset = Dataset.new nil, @subjectid
        @feature_dataset.metadata = {
          RDF::DC.title => "Physico-chemical descriptors",
          RDF::DC.creator => @uri,
          RDF::OT.hasSource => @uri,
        }
        if params[:compound_uri]
          @feature_dataset.parameters = [ { RDF::DC.title => "compound_uri", RDF::OT.paramValue => params[:compound_uri] }]
        elsif params[:dataset_uri]
          @feature_dataset.parameters = [ { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] }]
        else
          bad_request_error "Please provide a dataset_uri or compound_uri parameter", @uri
        end
      end
    end

    before '/descriptor/:lib/:descriptor/?' do
      @descriptors = DESCRIPTORS[params[:lib].to_sym].select{|d| d[:title].split(" ").last == params[:descriptor]}
      bad_request_error "Unknown descriptor #{@uri}. See #{uri('descriptor')} for a complete list of supported descriptors.", @uri if @descriptors.empty?
      @descriptor = @descriptors.first
    end

    after do # Tempfile cleanup
      if @sdf_file and File.exists? @sdf_file.path
        FileUtils.rm Dir["#{@sdf_file.path}*.yaml"]
        @sdf_file.unlink
      end
      @sdf_file = nil
    end

    # Get a list of descriptor calculation 
    # @return [text/uri-list] URIs
    get '/descriptor/?' do
      DESCRIPTORS.collect{|lib,d| d.collect{|n| uri("/descriptor/#{lib}/#{n[:title].split(" ").last}")}}.flatten.sort.join("\n")
    end

    get '/descriptor/:lib/?' do
      DESCRIPTORS[params[:lib].to_sym].collect{|n| uri("/descriptor/#{params[:lib].to_sym}/#{n[:title].split(" ").last}")}.sort.join("\n")
    end

    # Get representation of descriptor calculation
    # @return [String] Representation
    get '/descriptor/:lib/:descriptor/?' do
      @algorithm[RDF::DC.title] = @descriptor[:title]
      @algorithm[RDF::DC.description] = @descriptor[:description] if @descriptor[:description]
      format_output(@algorithm)
    end

    post '/descriptor/?' do
      task = OpenTox::Task.run "Calculating PC descriptors", @uri, @subjectid do |task|
        if params[:descriptor_uris]
          descriptors = {}
          params[:descriptor_uris].each do |descriptor_uri|
            #lib, title = descriptor.split('/')
            lib = descriptor_uri.split('/')[-2]
            descriptors[lib.to_sym] ||= []
            descriptors[lib.to_sym] += DESCRIPTORS[lib.to_sym].select{|d| d[:uri] == descriptor_uri}
          end
        else
          descriptors = DESCRIPTORS
        end
        if params[:compound_uri]
          compounds = [ Compound.new(params[:compound_uri], @subjectid) ]
        elsif params[:dataset_uri]
          compounds = Dataset.new(params[:dataset_uri]).compounds
        end
        [:openbabel, :cdk, :joelib].each{ |lib| send lib, compounds, descriptors[lib] }
        @feature_dataset.put
        @feature_dataset.uri
      end
      response['Content-Type'] = 'text/uri-list'
      halt 202, task.uri
    end

    # use /descriptor with dataset_uri and descriptor_uri parameters for efficient calculation of multiple compounds/descriptors
    post '/descriptor/:lib/:descriptor/?' do
      bad_request_error "Please provide a compound_uri parameter", @uri unless params[:compound_uri]
      compounds = [ Compound.new(params[:compound_uri], @subjectid) ]
      send params[:lib].to_sym, compounds, @descriptors
      @feature_dataset.put
      @feature_dataset.uri
    end

  end

end