summaryrefslogtreecommitdiff
path: root/lib/physchem.rb
blob: 07df867f907de9975f76fa307e778e37fa32bd50 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
module OpenTox

  # Feature for physico-chemical descriptors
  class PhysChem < NumericFeature

    field :library, type: String
    field :descriptor, type: String
    field :description, type: String

    JAVA_DIR = File.join(File.dirname(__FILE__),"..","java")
    CDK_JAR = Dir[File.join(JAVA_DIR,"cdk-*jar")].last
    JOELIB_JAR = File.join(JAVA_DIR,"joelib2.jar")
    LOG4J_JAR = File.join(JAVA_DIR,"log4j.jar")
    JMOL_JAR = File.join(JAVA_DIR,"Jmol.jar")

    obexclude = ["cansmi","cansmiNS","formula","InChI","InChIKey","s","smarts","title","L5"]
    OPENBABEL = Hash[OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").collect do |d|
      name,description = d.split(/\s+/,2)
      ["Openbabel."+name,description] unless obexclude.include? name
    end.compact.sort{|a,b| a[0] <=> b[0]}]

    cdkdescriptors = {}
    CDK_DESCRIPTIONS = YAML.load(`java -classpath #{CDK_JAR}:#{JAVA_DIR}  CdkDescriptorInfo`)
    CDK_DESCRIPTIONS.each do |d|
      prefix="Cdk."+d[:java_class].split('.').last.sub(/Descriptor/,'')
      d[:names].each { |name| cdkdescriptors[prefix+"."+name] = d[:description] }
    end
    CDK = cdkdescriptors

    # exclude Hashcode (not a physchem property) and GlobalTopologicalChargeIndex (Joelib bug)
    joelibexclude = ["MoleculeHashcode","GlobalTopologicalChargeIndex"]
    # strip Joelib messages from stdout
    JOELIB = Hash[YAML.load(`java -classpath #{JOELIB_JAR}:#{LOG4J_JAR}:#{JAVA_DIR}  JoelibDescriptorInfo | sed '0,/---/d'`).collect do |d|
      name = d[:java_class].sub(/^joelib2.feature.types./,'')
      ["Joelib."+name, "JOELIb does not provide meaningful descriptions, see java/JoelibDescriptors.java for details."] unless joelibexclude.include? name
    end.compact.sort{|a,b| a[0] <=> b[0]}] 

    DESCRIPTORS = OPENBABEL.merge(CDK.merge(JOELIB))

    require_relative "unique_descriptors.rb"

    # Get descriptor features
    # @param [Hash]
    # @return [Array<OpenTox::PhysChem>]
    def self.descriptors desc=DESCRIPTORS
      desc.collect do |name,description|
        lib,desc = name.split('.',2)
        self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true)
      end
    end

    # Get unique descriptor features
    # @return [Array<OpenTox::PhysChem>]
    def self.unique_descriptors
      udesc = []
      UNIQUEDESCRIPTORS.each do |name|
        lib,desc = name.split('.',2)
        if lib == "Cdk"
          CDK_DESCRIPTIONS.select{|d| desc == d[:java_class].split('.').last.sub('Descriptor','') }.first[:names].each do |n|
            dname = "#{name}.#{n}"
            description = DESCRIPTORS[dname]
            udesc << self.find_or_create_by(:name => dname, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true)
          end
        else
          description = DESCRIPTORS[name]
          udesc << self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true)
        end
      end
      udesc
    end

    # Get OpenBabel descriptor features
    # @return [Array<OpenTox::PhysChem>]
    def self.openbabel_descriptors
      descriptors OPENBABEL
    end

    # Get CDK descriptor features
    # @return [Array<OpenTox::PhysChem>]
    def self.cdk_descriptors
      descriptors CDK
    end

    # Get JOELIB descriptor features
    # @return [Array<OpenTox::PhysChem>]
    def self.joelib_descriptors
      descriptors JOELIB
    end

    # Calculate OpenBabel descriptors
    # @param [String] descriptor type
    # @param [OpenTox::Compound]
    # @return [Hash]
    def openbabel descriptor, compound
      obdescriptor = OpenBabel::OBDescriptor.find_type descriptor
      obmol = OpenBabel::OBMol.new
      obconversion = OpenBabel::OBConversion.new
      obconversion.set_in_format 'smi'
      obconversion.read_string obmol, compound.smiles
      {"#{library.capitalize}.#{descriptor}" => fix_value(obdescriptor.predict(obmol))}
    end

    # Calculate CDK descriptors
    # @param [String] descriptor type
    # @param [OpenTox::Compound]
    # @return [Hash]
    def cdk descriptor, compound
      java_descriptor "cdk", descriptor, compound
    end

    # Calculate JOELIB descriptors
    # @param [String] descriptor type
    # @param [OpenTox::Compound]
    # @return [Hash]
    def joelib descriptor, compound
      java_descriptor "joelib", descriptor, compound
    end

    private

    def java_descriptor lib, descriptor, compound

      sdf_3d = "/tmp/#{SecureRandom.uuid}.sdf"
      File.open(sdf_3d,"w+"){|f| f.print compound.sdf}
      
      # use java system call (rjb blocks within tasks)
      # use Tempfiles to avoid "Argument list too long" error 
      case lib
      when "cdk"
        `java -classpath #{CDK_JAR}:#{JAVA_DIR}  CdkDescriptors #{sdf_3d} #{descriptor}`
      when "joelib"
        `java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR}  JoelibDescriptors  #{sdf_3d} #{descriptor}`
      end
      result = YAML.load_file("#{sdf_3d}#{lib}.yaml").first
      result.keys.each{|k| result[k] = result.delete(k)}
      result
    end

    def fix_value val
      val = val.first if val.is_a? Array and val.size == 1
      val = nil if val == "NaN"
      if val.numeric?
        val = Float(val)
        val = nil if val.nan? or val.infinite?
      end
      val
    end

  end

end
OpenTox::PhysChem.descriptors # load descriptor features