summaryrefslogtreecommitdiff
path: root/lib/physchem.rb
blob: 327acd85b791373cd9a7bd8910ceb17d2eaa96e3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
module OpenTox

  # Feature for physico-chemical descriptors
  class PhysChem < NumericFeature

    field :library, type: String
    field :descriptor, type: String
    field :description, type: String

    JAVA_DIR = File.join(File.dirname(__FILE__),"..","java")
    CDK_JAR = Dir[File.join(JAVA_DIR,"cdk-*jar")].last
    JOELIB_JAR = File.join(JAVA_DIR,"joelib2.jar")
    LOG4J_JAR = File.join(JAVA_DIR,"log4j.jar")
    JMOL_JAR = File.join(JAVA_DIR,"Jmol.jar")

    obexclude = ["cansmi","cansmiNS","formula","InChI","InChIKey","s","smarts","title","L5"]
    OPENBABEL = Hash[OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").collect do |d|
      name,description = d.split(/\s+/,2)
      ["Openbabel."+name,description] unless obexclude.include? name
    end.compact.sort{|a,b| a[0] <=> b[0]}]

    cdkdescriptors = {}
    CDK_DESCRIPTIONS = YAML.load(`java -classpath #{CDK_JAR}:#{JAVA_DIR}  CdkDescriptorInfo`)
    CDK_DESCRIPTIONS.each do |d|
      prefix="Cdk."+d[:java_class].split('.').last.sub(/Descriptor/,'')
      d[:names].each { |name| cdkdescriptors[prefix+"."+name] = d[:description] }
    end
    CDK = cdkdescriptors

    # exclude Hashcode (not a physchem property) and GlobalTopologicalChargeIndex (Joelib bug)
    joelibexclude = ["MoleculeHashcode","GlobalTopologicalChargeIndex"]
    # strip Joelib messages from stdout
    JOELIB = Hash[YAML.load(`java -classpath #{JOELIB_JAR}:#{LOG4J_JAR}:#{JAVA_DIR}  JoelibDescriptorInfo | sed '0,/---/d'`).collect do |d|
      name = d[:java_class].sub(/^joelib2.feature.types./,'')
      ["Joelib."+name, "JOELIb does not provide meaningful descriptions, see java/JoelibDescriptors.java for details."] unless joelibexclude.include? name
    end.compact.sort{|a,b| a[0] <=> b[0]}] 

    DESCRIPTORS = OPENBABEL.merge(CDK.merge(JOELIB))

    require_relative "unique_descriptors.rb"

    def self.descriptors desc=DESCRIPTORS
      desc.collect do |name,description|
        lib,desc = name.split('.',2)
        self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true)
      end
    end

    def self.unique_descriptors
      udesc = []
      UNIQUEDESCRIPTORS.each do |name|
        lib,desc = name.split('.',2)
        if lib == "Cdk"
          CDK_DESCRIPTIONS.select{|d| desc == d[:java_class].split('.').last.sub('Descriptor','') }.first[:names].each do |n|
            dname = "#{name}.#{n}"
            description = DESCRIPTORS[dname]
            udesc << self.find_or_create_by(:name => dname, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true)
          end
        else
          description = DESCRIPTORS[name]
          udesc << self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true)
        end
      end
      udesc
    end

    def self.openbabel_descriptors
      descriptors OPENBABEL
    end

    def self.cdk_descriptors
      descriptors CDK
    end

    def self.joelib_descriptors
      descriptors JOELIB
    end

    def calculate compound
      result = send library.downcase,descriptor,compound
      result[self.name]
    end

    def openbabel descriptor, compound
      obdescriptor = OpenBabel::OBDescriptor.find_type descriptor
      obmol = OpenBabel::OBMol.new
      obconversion = OpenBabel::OBConversion.new
      obconversion.set_in_format 'smi'
      obconversion.read_string obmol, compound.smiles
      {"#{library.capitalize}.#{descriptor}" => fix_value(obdescriptor.predict(obmol))}
    end

    def cdk descriptor, compound
      java_descriptor "cdk", descriptor, compound
    end

    def joelib descriptor, compound
      java_descriptor "joelib", descriptor, compound
    end

    private

    def java_descriptor lib, descriptor, compound

      sdf_3d = "/tmp/#{SecureRandom.uuid}.sdf"
      File.open(sdf_3d,"w+"){|f| f.print compound.sdf}
      
      # use java system call (rjb blocks within tasks)
      # use Tempfiles to avoid "Argument list too long" error 
      case lib
      when "cdk"
        `java -classpath #{CDK_JAR}:#{JAVA_DIR}  CdkDescriptors #{sdf_3d} #{descriptor}`
      when "joelib"
        `java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR}  JoelibDescriptors  #{sdf_3d} #{descriptor}`
      end
      result = YAML.load_file("#{sdf_3d}#{lib}.yaml").first
      result.keys.each{|k| result[k] = result.delete(k)}
      result
    end

    def fix_value val
      val = val.first if val.is_a? Array and val.size == 1
      val = nil if val == "NaN"
      if val.numeric?
        val = Float(val)
        val = nil if val.nan? or val.infinite?
      end
      val
    end

  end

end
OpenTox::PhysChem.descriptors # load descriptor features