summaryrefslogtreecommitdiff
path: root/java/CdkDescriptors.java
blob: b5f8672abdc302774bf79c9225f0e366682e1c8b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import java.util.*;
import java.io.*;
import org.openscience.cdk.DefaultChemObjectBuilder;
import org.openscience.cdk.IImplementationSpecification;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.io.iterator.IteratingSDFReader;
import org.openscience.cdk.qsar.*;
import org.openscience.cdk.aromaticity.CDKHueckelAromaticityDetector;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
import org.openscience.cdk.exception.NoSuchAtomTypeException;

class CdkDescriptors {
  public static void main(String[] args) {

    if (args==null || args.length<2) {
	System.err.println("required params: <sd-file> <descriptor1> <descriptor2(optional)> <descriptor3(optional)> ...");
	System.exit(1);
    }
    if (! new File(args[0]).exists()){
      System.err.println("file not found "+args[0]);
      System.exit(1);
    }

    // command line descriptor params can be either "descriptorName" or "descriptorValueName"
    // terminology:
    // A descriptor can calculate serveral values, e.g., ALOGP produces ALOGP.ALogP, ALOGP.ALogp2, ALOGP.AMR
    // "descriptorName" ALOGP
    // "valueName" AMR
    // "descriptorValueName" ALOGP.AMR
    DescriptorEngine engine;
    Set<String> classNames = new LinkedHashSet<String>(); // descriptors to be computed
    Set<String> descriptorNames = new LinkedHashSet<String>(); // all values of this descriptor will be printed
    Set<String> descriptorValueNames = new LinkedHashSet<String>(); // only these values of a descriptor will be printed
    for (int i =1; i < args.length; i++) {
      String descriptorName;
      if (args[i].indexOf(".")!=-1) {
        descriptorValueNames.add(args[i]);
        descriptorName = args[i].substring(0,args[i].indexOf("."));
      }
      else {
        descriptorNames.add(args[i]);
        descriptorName = args[i];
      }
      classNames.add(getDescriptorClassName(descriptorName));
    }

    engine = new DescriptorEngine(new ArrayList<String>(classNames),null);
    List<IDescriptor> instances =  engine.instantiateDescriptors(new ArrayList<String>(classNames));
    List<IImplementationSpecification> specs = engine.initializeSpecifications(instances);
    engine.setDescriptorInstances(instances);
    engine.setDescriptorSpecifications(specs);

    try {
      BufferedReader br = new BufferedReader(new FileReader(args[0]));
      PrintWriter yaml = new PrintWriter(new FileWriter(args[0]+"cdk.yaml"));
      // parse 3d sdf from file and calculate descriptors
      IteratingSDFReader reader = new IteratingSDFReader( br, DefaultChemObjectBuilder.getInstance());
      int c = 0;
      while (reader.hasNext()) {
        try {
          System.out.println("computing "+(args.length-1)+" descriptors for compound "+(++c));
          IAtomContainer molecule = (IAtomContainer)reader.next();
          molecule = (IAtomContainer) AtomContainerManipulator.removeHydrogens(molecule);
          try {
            AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(molecule);
          }
	      catch (NoSuchAtomTypeException e) {
            e.printStackTrace();
          }
          CDKHueckelAromaticityDetector.detectAromaticity(molecule);

          engine.process(molecule);
          Map<Object,Object> properties = molecule.getProperties();
          Boolean first = true;
          for (Map.Entry<Object, Object> entry : properties.entrySet()) {
            try {
              if ((entry.getKey() instanceof DescriptorSpecification) && (entry.getValue() instanceof DescriptorValue)) {
                DescriptorSpecification property = (DescriptorSpecification)entry.getKey();
                DescriptorValue value = (DescriptorValue)entry.getValue();
                String[] values = value.getValue().toString().split(",");
                for (int i = 0; i < values.length; i++) {
                  String cdk_class = property.getImplementationTitle();
                  String descriptorName = cdk_class.substring(cdk_class.lastIndexOf(".")+1).replace("Descriptor","");
                  String descriptorValueName = descriptorName + "." + value.getNames()[i];
		  if (descriptorNames.contains(descriptorName) || descriptorValueNames.contains(descriptorValueName)) {
		      if (first) { yaml.print("- "); first = false; }
		      else { yaml.print("  "); }
                      yaml.println("Cdk." + descriptorValueName  + ": " + values[i]);
		  }
                }
              }
            }
            catch (ClassCastException e) { } // sdf properties are stored as molecules properties (strings), ignore them
            catch (Exception e) { e.printStackTrace(); } // output nothing to yaml
          }
        }
        catch (Exception e) {
          yaml.println("- {}");
          e.printStackTrace();
          continue;
        }
      }
      yaml.close();
    }
    catch (Exception e) { e.printStackTrace(); }
  }
    

    /** HACK to find the class for a descriptor
     * problem: Descriptor is not always at the end of the class (APolDescriptor), but may be in the middle (AutocorrelationDescriptorPolarizability)
     * this method makes a class-lookup using trial and error */
    static String getDescriptorClassName(String descriptorName) {
      String split = splitCamelCase(descriptorName)+" "; // space mark possible positions for 'Descriptor'
      for(int i = split.length()-1; i>0; i--) {
        if (split.charAt(i)==' ') { // iterate over all spaces, starting with the trailing one
          String test = split.substring(0,i)+"Descriptor"+split.substring(i+1,split.length()); // replace current space with 'Descriptor' ..
          test = test.replaceAll("\\s",""); // .. and remove other spaces
          String className = "org.openscience.cdk.qsar.descriptors.molecular." + test;
          try {
              Class.forName(className);
              return className;
          } catch (ClassNotFoundException e) {}
        }
      }
      System.err.println("Descriptor not found: "+descriptorName);
      System.exit(1);
      return null;
    }

    /** inserts space in between camel words */
  static String splitCamelCase(String s) {
   return s.replaceAll(
      String.format("%s|%s|%s",
         "(?<=[A-Z])(?=[A-Z][a-z])",
         "(?<=[^A-Z])(?=[A-Z])",
         "(?<=[A-Za-z])(?=[^A-Za-z])"
      ),
      " "
   );
  }
}