summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2014-10-08 11:22:53 +0200
committermguetlein <martin.guetlein@gmail.com>2014-10-08 11:22:53 +0200
commit7173d52d7c6e476c476b1eebbb46ada3ade4454e (patch)
tree781840dc9d7799ec5f2148a10b7eef532096c09a
parent355720b394ab66938c381c2cac4db821ab664e27 (diff)
cdk descriptor class can now compoute single feature values (e.g. Cdk.ALOGP.ALogP out of 3 values provided by Cdk.ALOG)
-rw-r--r--java/CdkDescriptors.java55
-rw-r--r--lib/lazar.rb11
2 files changed, 45 insertions, 21 deletions
diff --git a/java/CdkDescriptors.java b/java/CdkDescriptors.java
index 092e986..a42853e 100644
--- a/java/CdkDescriptors.java
+++ b/java/CdkDescriptors.java
@@ -9,14 +9,47 @@ import org.openscience.cdk.qsar.DescriptorValue;
class CdkDescriptors {
public static void main(String[] args) {
- // parse command line arguments > 1 (descriptors)
+ if (args==null || args.length<2) {
+ System.err.println("required params: <sd-file> <descriptor1> <descriptor2(optional)> <descriptor3(optional)> ...");
+ System.exit(1);
+ }
+ if (! new File(args[0]).exists()){
+ System.err.println("file not found "+args[0]);
+ System.exit(1);
+ }
+
+ // command line descriptor params can be either "descriptorName" or "descriptorValueName"
+ // terminology:
+ // A descriptor can calculate serveral values, e.g., ALOGP produces ALOGP.ALogP, ALOGP.ALogp2, ALOGP.AMR
+ // "descriptorName" ALOGP
+ // "valueName" AMR
+ // "descriptorValueName" ALOGP.AMR
DescriptorEngine engine;
- List<String> classNames = new ArrayList<String>();
+ Set<String> classNames = new LinkedHashSet<String>(); // descriptors to be computed
+ Set<String> descriptorNames = new LinkedHashSet<String>(); // all values of this descriptor will be printed
+ Set<String> descriptorValueNames = new LinkedHashSet<String>(); // only these values of a descriptor will be printed
for (int i =1; i < args.length; i++) {
- classNames.add("org.openscience.cdk.qsar.descriptors.molecular." + args[i] + "Descriptor");
+ String descriptorName;
+ if (args[i].indexOf(".")!=-1) {
+ descriptorValueNames.add(args[i]);
+ descriptorName = args[i].substring(0,args[i].indexOf("."));
+ }
+ else {
+ descriptorNames.add(args[i]);
+ descriptorName = args[i];
+ }
+ String className = "org.openscience.cdk.qsar.descriptors.molecular." + descriptorName + "Descriptor";
+ try {
+ Class.forName(className);
+ } catch (ClassNotFoundException e) {
+ System.err.println("Descriptor not found: "+args[i]);
+ System.exit(1);
+ }
+ classNames.add(className);
}
- engine = new DescriptorEngine(classNames);
- List<IDescriptor> instances = engine.instantiateDescriptors(classNames);
+
+ engine = new DescriptorEngine(new ArrayList<String>(classNames));
+ List<IDescriptor> instances = engine.instantiateDescriptors(new ArrayList<String>(classNames));
List<DescriptorSpecification> specs = engine.initializeSpecifications(instances);
engine.setDescriptorInstances(instances);
engine.setDescriptorSpecifications(specs);
@@ -41,13 +74,15 @@ class CdkDescriptors {
DescriptorValue value = (DescriptorValue)entry.getValue();
String[] values = value.getValue().toString().split(",");
for (int i = 0; i < values.length; i++) {
- if (first) { yaml.print("- "); first = false; }
- else { yaml.print(" "); }
String cdk_class = property.getImplementationTitle();
- String name = cdk_class.substring(cdk_class.lastIndexOf(".")+1).replace("Descriptor","");
- yaml.println("Cdk." + name + "." + value.getNames()[i] + ": " + values[i]);
+ String descriptorName = cdk_class.substring(cdk_class.lastIndexOf(".")+1).replace("Descriptor","");
+ String descriptorValueName = descriptorName + "." + value.getNames()[i];
+ if (descriptorNames.contains(descriptorName) || descriptorValueNames.contains(descriptorValueName)) {
+ if (first) { yaml.print("- "); first = false; }
+ else { yaml.print(" "); }
+ yaml.println("Cdk." + descriptorValueName + ": " + values[i]);
+ }
}
-
}
}
catch (ClassCastException e) { } // sdf properties are stored as molecules properties (strings), ignore them
diff --git a/lib/lazar.rb b/lib/lazar.rb
index 2416569..bd1c7f2 100644
--- a/lib/lazar.rb
+++ b/lib/lazar.rb
@@ -141,17 +141,6 @@ module OpenTox
@training_compounds = @training_dataset.compounds
feature_names = @feature_dataset.features.collect{ |f| f[RDF::DC.title] }
- # one Cdk descriptor may produce several features, e.g., Cdk.WienerNumbers produces Cdk.WienerNumbers.WPATH and Cdk.WienerNumbers.WPOL
- # -> strip suffix and use the feature only once
- feature_names = feature_names.collect do |f|
- if f=~/Cdk/ and f.count(".")==2
- f[0..(f.rindex(".")-1)]
- else
- f
- end
- end
- feature_names.uniq!
-
query_fingerprints = OpenTox::Algorithm::Descriptor.send( @feature_calculation_algorithm, compounds, feature_names )#.collect{|row| row.collect{|val| val ? val.to_f : 0.0 } }
compounds.each do |compound|