From 7173d52d7c6e476c476b1eebbb46ada3ade4454e Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 8 Oct 2014 11:22:53 +0200 Subject: cdk descriptor class can now compoute single feature values (e.g. Cdk.ALOGP.ALogP out of 3 values provided by Cdk.ALOG) --- java/CdkDescriptors.java | 55 +++++++++++++++++++++++++++++++++++++++--------- lib/lazar.rb | 11 ---------- 2 files changed, 45 insertions(+), 21 deletions(-) diff --git a/java/CdkDescriptors.java b/java/CdkDescriptors.java index 092e986..a42853e 100644 --- a/java/CdkDescriptors.java +++ b/java/CdkDescriptors.java @@ -9,14 +9,47 @@ import org.openscience.cdk.qsar.DescriptorValue; class CdkDescriptors { public static void main(String[] args) { - // parse command line arguments > 1 (descriptors) + if (args==null || args.length<2) { + System.err.println("required params: ..."); + System.exit(1); + } + if (! new File(args[0]).exists()){ + System.err.println("file not found "+args[0]); + System.exit(1); + } + + // command line descriptor params can be either "descriptorName" or "descriptorValueName" + // terminology: + // A descriptor can calculate serveral values, e.g., ALOGP produces ALOGP.ALogP, ALOGP.ALogp2, ALOGP.AMR + // "descriptorName" ALOGP + // "valueName" AMR + // "descriptorValueName" ALOGP.AMR DescriptorEngine engine; - List classNames = new ArrayList(); + Set classNames = new LinkedHashSet(); // descriptors to be computed + Set descriptorNames = new LinkedHashSet(); // all values of this descriptor will be printed + Set descriptorValueNames = new LinkedHashSet(); // only these values of a descriptor will be printed for (int i =1; i < args.length; i++) { - classNames.add("org.openscience.cdk.qsar.descriptors.molecular." + args[i] + "Descriptor"); + String descriptorName; + if (args[i].indexOf(".")!=-1) { + descriptorValueNames.add(args[i]); + descriptorName = args[i].substring(0,args[i].indexOf(".")); + } + else { + descriptorNames.add(args[i]); + descriptorName = args[i]; + } + String className = "org.openscience.cdk.qsar.descriptors.molecular." + descriptorName + "Descriptor"; + try { + Class.forName(className); + } catch (ClassNotFoundException e) { + System.err.println("Descriptor not found: "+args[i]); + System.exit(1); + } + classNames.add(className); } - engine = new DescriptorEngine(classNames); - List instances = engine.instantiateDescriptors(classNames); + + engine = new DescriptorEngine(new ArrayList(classNames)); + List instances = engine.instantiateDescriptors(new ArrayList(classNames)); List specs = engine.initializeSpecifications(instances); engine.setDescriptorInstances(instances); engine.setDescriptorSpecifications(specs); @@ -41,13 +74,15 @@ class CdkDescriptors { DescriptorValue value = (DescriptorValue)entry.getValue(); String[] values = value.getValue().toString().split(","); for (int i = 0; i < values.length; i++) { - if (first) { yaml.print("- "); first = false; } - else { yaml.print(" "); } String cdk_class = property.getImplementationTitle(); - String name = cdk_class.substring(cdk_class.lastIndexOf(".")+1).replace("Descriptor",""); - yaml.println("Cdk." + name + "." + value.getNames()[i] + ": " + values[i]); + String descriptorName = cdk_class.substring(cdk_class.lastIndexOf(".")+1).replace("Descriptor",""); + String descriptorValueName = descriptorName + "." + value.getNames()[i]; + if (descriptorNames.contains(descriptorName) || descriptorValueNames.contains(descriptorValueName)) { + if (first) { yaml.print("- "); first = false; } + else { yaml.print(" "); } + yaml.println("Cdk." + descriptorValueName + ": " + values[i]); + } } - } } catch (ClassCastException e) { } // sdf properties are stored as molecules properties (strings), ignore them diff --git a/lib/lazar.rb b/lib/lazar.rb index 2416569..bd1c7f2 100644 --- a/lib/lazar.rb +++ b/lib/lazar.rb @@ -141,17 +141,6 @@ module OpenTox @training_compounds = @training_dataset.compounds feature_names = @feature_dataset.features.collect{ |f| f[RDF::DC.title] } - # one Cdk descriptor may produce several features, e.g., Cdk.WienerNumbers produces Cdk.WienerNumbers.WPATH and Cdk.WienerNumbers.WPOL - # -> strip suffix and use the feature only once - feature_names = feature_names.collect do |f| - if f=~/Cdk/ and f.count(".")==2 - f[0..(f.rindex(".")-1)] - else - f - end - end - feature_names.uniq! - query_fingerprints = OpenTox::Algorithm::Descriptor.send( @feature_calculation_algorithm, compounds, feature_names )#.collect{|row| row.collect{|val| val ? val.to_f : 0.0 } } compounds.each do |compound| -- cgit v1.2.3