diff options
author | Christoph Helma <helma@in-silico.ch> | 2013-04-24 15:58:32 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2013-04-24 15:58:32 +0200 |
commit | 95eecdd3583b33fa8aceab857468296443decd0c (patch) | |
tree | 9f45eae1255632ebbf9b6be8eab8e4c76f2081cf | |
parent | 8668ad02ec1bb80e8b4af0f4b3633c84c8464e03 (diff) |
descriptor calculation for datasets working. Rakefile for compilation f Java classes.
-rw-r--r-- | descriptor.rb | 31 | ||||
-rw-r--r-- | java/CdkDescriptors.class | bin | 3307 -> 3576 bytes | |||
-rw-r--r-- | java/CdkDescriptors.java | 51 | ||||
-rw-r--r-- | java/JoelibDescriptorInfo.class | bin | 1039 -> 1039 bytes | |||
-rw-r--r-- | java/JoelibDescriptors.class | bin | 2833 -> 2774 bytes | |||
-rw-r--r-- | java/JoelibDescriptors.java | 43 | ||||
-rw-r--r-- | java/Rakefile | 15 |
7 files changed, 88 insertions, 52 deletions
diff --git a/descriptor.rb b/descriptor.rb index 2d0844f..18a44d6 100644 --- a/descriptor.rb +++ b/descriptor.rb @@ -1,7 +1,6 @@ # descriptors.rb # Calculation of physico-chemical descriptors # Author: Andreas Maunz, Christoph Helma -#require 'rjb' require 'openbabel' module OpenTox @@ -91,9 +90,12 @@ module OpenTox sdf_3d compounds # rjb blocks within tasks # Avoid "Argument list too long" error by sending only short descriptor names - yaml = `echo "#{@sdf}" |java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{descriptors.collect{|d| d[:title].split("\s").last}.join(" ")}` - YAML.load(yaml).each_with_index do |calculation,i| - $logger.error "Descriptor calculation failed with #{$!.message} for compound #{compounds[i].uri}." if calculation.empty? + #yaml = `export CDKDescriptors= ;echo "#{@sdf}" |java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{descriptors.collect{|d| d[:title].split("\s").last}.join(" ")}` + #yaml = `export CDKDescriptors='#{descriptors.collect{|d| d[:title].split("\s").last}.join(" ")}';echo "#{@sdf}" |java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors ` + puts `java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{@sdf_file.path} #{descriptors.collect{|d| d[:title].split("\s").last}.join(" ")}` + #puts yaml + YAML.load_file(@sdf_file.path+"cdk.yaml").each_with_index do |calculation,i| + $logger.error "Descriptor calculation failed for compound #{compounds[i].uri}." if calculation.empty? calculation.each do |name,value| feature = DESCRIPTORS[:cdk].collect{|d| d[:features]}.flatten.select{|f| f[RDF::DC.title].split("\s").last == name.to_s}.first @feature_dataset.add_data_entry compounds[i], feature, fix_value(value) @@ -104,9 +106,12 @@ module OpenTox def joelib compounds, descriptors sdf_3d compounds # rjb blocks within tasks - yaml = `echo "#{@sdf}" |java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{descriptors.collect{|d| d[:java_class]}.join(" ")}|grep "^[- ]"` - YAML.load(yaml).each_with_index do |calculation,i| - $logger.error "Descriptor calculation failed with #{$!.message} for compound #{compounds[i].uri}." if calculation.empty? + #yaml = `echo "#{@sdf}" |java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{descriptors.collect{|d| d[:java_class]}.join(" ")}|grep "^[- ]"` + #puts "java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{@sdf_file.path} #{descriptors.collect{|d| d[:java_class]}.join(" ")}" + puts `java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{@sdf_file.path} #{descriptors.collect{|d| d[:java_class]}.join(" ")}` + #YAML.load(yaml).each_with_index do |calculation,i| + YAML.load_file(@sdf_file.path+"joelib.yaml").each_with_index do |calculation,i| + $logger.error "Descriptor calculation failed for compound #{compounds[i].uri}." if calculation.empty? calculation.each do |java_class,value| feature = DESCRIPTORS[:joelib].select{|d| d[:java_class] == java_class}.first[:feature] @feature_dataset.add_data_entry compounds[i], feature, fix_value(value) @@ -115,6 +120,7 @@ module OpenTox end def sdf_3d compounds + #unless @sdf_file and File.exists? @sdf_file.path unless @sdf @sdf = "" @@obconversion.set_out_format 'sdf' @@ -125,7 +131,7 @@ module OpenTox OpenBabel::OBOp.find_type("Gen3D").do(@@obmol) sdf_3d = @@obconversion.write_string(@@obmol) if sdf_3d.match(/.nan/) - warning = "3D generation failed for compound #{compound.uri}, using 2D structure." + warning = "3D generation failed for compound #{compound.uri}, trying to calculate descriptors from 2D structure." $logger.warn warning @feature_dataset[RDF::OT.Warnings] ? @feature_dataset[RDF::OT.Warnings] << warning : @feature_dataset[RDF::OT.Warnings] = warning @sdf << sdf_2d @@ -133,6 +139,9 @@ module OpenTox @sdf << sdf_3d end end + @sdf_file = Tempfile.open("sdf") + @sdf_file.puts @sdf + @sdf_file.close end end @@ -184,6 +193,12 @@ module OpenTox @descriptor = @descriptors.first end + after do + #@sdf_file.unlink if @sdf_file and File.exists @sdf_file.path + #TODO cleanup yamls + @sdf_file = nil + end + # Get a list of descriptor calculation # @return [text/uri-list] URIs get '/descriptor/?' do diff --git a/java/CdkDescriptors.class b/java/CdkDescriptors.class Binary files differindex 21f82c4..9373bc7 100644 --- a/java/CdkDescriptors.class +++ b/java/CdkDescriptors.class diff --git a/java/CdkDescriptors.java b/java/CdkDescriptors.java index 5635507..69c27a8 100644 --- a/java/CdkDescriptors.java +++ b/java/CdkDescriptors.java @@ -9,13 +9,13 @@ import org.openscience.cdk.qsar.DescriptorValue; class CdkDescriptors { public static void main(String[] args) { - // parse command line arguments (descriptors) + // parse command line arguments > 1 (descriptors) DescriptorEngine engine; - if (args.length > 0) { - for (int i =0; i < args.length; i++) { - args[i] = "org.openscience.cdk.qsar.descriptors.molecular." + args[i] + "Descriptor"; + if (args.length > 1) { + List<String> classNames = new ArrayList<String>(); + for (int i =1; i < args.length; i++) { + classNames.add("org.openscience.cdk.qsar.descriptors.molecular." + args[i] + "Descriptor"); } - List<String> classNames = Arrays.asList(args); engine = new DescriptorEngine(classNames); List<IDescriptor> instances = engine.instantiateDescriptors(classNames); List<DescriptorSpecification> specs = engine.initializeSpecifications(instances); @@ -25,13 +25,21 @@ class CdkDescriptors { engine = new DescriptorEngine(DescriptorEngine.MOLECULAR); } - // parse 3d sdf from stdin and calculate descriptors - BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); - IteratingMDLReader reader = new IteratingMDLReader( br, DefaultChemObjectBuilder.getInstance()); - while (reader.hasNext()) { - IMolecule molecule = (IMolecule)reader.next(); - try { - engine.process(molecule); + try { + BufferedReader br = new BufferedReader(new FileReader(args[0])); + PrintWriter yaml = new PrintWriter(new FileWriter(args[0]+"cdk.yaml")); + // parse 3d sdf from file and calculate descriptors + IteratingMDLReader reader = new IteratingMDLReader( br, DefaultChemObjectBuilder.getInstance()); + while (reader.hasNext()) { + try { + IMolecule molecule = (IMolecule)reader.next(); + engine.process(molecule); + } + catch (Exception e) { + yaml.println("- {}"); + e.printStackTrace(); + continue; + } Iterator it = molecule.getProperties().values().iterator(); Boolean first = true; while (it.hasNext()) { @@ -39,28 +47,27 @@ class CdkDescriptors { DescriptorValue value = (DescriptorValue)it.next(); int size = value.getValue().length(); if (size == 1) { - if (first) { System.out.print("- "); } - else { System.out.print(" "); } - System.out.println(":"+value.getNames()[0].toString() + ": " + value.getValue()); + if (first) { yaml.print("- "); } + else { yaml.print(" "); } + yaml.println(":"+value.getNames()[0].toString() + ": " + value.getValue()); first = false; } else { String[] values = value.getValue().toString().split(","); for (int i = 0; i < size; i++) { - if (first) { System.out.print("- "); } - else { System.out.print(" "); } - System.out.println(":"+value.getNames()[i].toString() + ": " + values[i]); + if (first) { yaml.print("- "); } + else { yaml.print(" "); } + yaml.println(":"+value.getNames()[i].toString() + ": " + values[i]); first = false; } } } catch (ClassCastException e) { } // sdf properties are stored as molecules properties (strings), ignore them + catch (Exception e) { e.printStackTrace(); } // output nothing to yaml } } - catch (Exception e) { - System.out.println("- {}"); - System.err.println(e.toString()); - } + yaml.close(); } + catch (Exception e) { e.printStackTrace(); } } } diff --git a/java/JoelibDescriptorInfo.class b/java/JoelibDescriptorInfo.class Binary files differindex 293cb72..0ee67bf 100644 --- a/java/JoelibDescriptorInfo.class +++ b/java/JoelibDescriptorInfo.class diff --git a/java/JoelibDescriptors.class b/java/JoelibDescriptors.class Binary files differindex 4a23d26..1426e7d 100644 --- a/java/JoelibDescriptors.class +++ b/java/JoelibDescriptors.class diff --git a/java/JoelibDescriptors.java b/java/JoelibDescriptors.java index fceb2a9..ecd1b3f 100644 --- a/java/JoelibDescriptors.java +++ b/java/JoelibDescriptors.java @@ -15,52 +15,51 @@ import joelib2.molecule.BasicConformerMolecule; class JoelibDescriptors { public static void main(String[] args) { - // set args to all descriptors - if (args.length == 0) { + String[] features = null; + // set features to all descriptors + if (args.length == 1) { FeatureHelper helper = FeatureHelper.instance(); - args = (String[]) helper.getNativeFeatures().toArray(new String[0]); + features = (String[]) helper.getNativeFeatures().toArray(new String[0]); + } else { + features = new String[args.length-1]; + System.arraycopy(args,1,features,0,args.length-1); } FeatureFactory factory = FeatureFactory.instance(); MoleculeFileIO loader = null; - BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); String line = new String(); String sdf = new String(); try { - while ((line = br.readLine()) != null) { sdf += line + "\n"; } - br.close(); - InputStream is = null; - is = new ByteArrayInputStream(sdf.getBytes("UTF-8")); + // parse 3d sdf from file and calculate descriptors + InputStream is = new FileInputStream(args[0]); + PrintWriter yaml = new PrintWriter(new FileWriter(args[0]+"joelib.yaml")); BasicIOType inType = BasicIOTypeHolder.instance().getIOType("SDF"); loader = MoleculeFileHelper.getMolReader(is, inType); - //BasicIOType outType = BasicIOTypeHolder.instance().getIOType("SMILES"); - //JOEMol mol = new JOEMol(inType, inType); BasicConformerMolecule mol = new BasicConformerMolecule(inType, inType); while (true) { try { Boolean success = loader.read(mol); - if (!success) { break; } - //System.err.println( mol ); - for (int i =0; i < args.length; i++) { - Feature feature = factory.getFeature(args[i]); + if (!success) { break; } // last molecule + for (int i =0; i < features.length; i++) { + Feature feature = factory.getFeature(features[i]); FeatureResult result = feature.calculate(mol); - if (i == 0) { System.out.print("- "); } - else { System.out.print(" "); } - System.out.print( args[i]+": " ); - System.out.println( result.toString() ); + if (i == 0) { yaml.print("- "); } + else { yaml.print(" "); } + yaml.print( features[i]+": " ); + yaml.println( result.toString() ); } } catch (Exception e) { - System.err.println(e.toString()); - e.printStackTrace(); - //next; + System.err.println(e.toString()); + e.printStackTrace(); } } + yaml.close(); } catch (Exception e) { + System.err.println(e.toString()); e.printStackTrace(); - //System.err.println(e.toString()); } } } diff --git a/java/Rakefile b/java/Rakefile new file mode 100644 index 0000000..4348b49 --- /dev/null +++ b/java/Rakefile @@ -0,0 +1,15 @@ +# Java class, classpath +java_classes = [ + ["CdkDescriptors", "cdk-1.4.16.jar"], + ["CdkDescriptorInfo", "cdk-1.4.16.jar"], + ["JoelibDescriptors", "joelib2.jar:."], + ["JoelibDescriptorInfo", "joelib2.jar:."], +] + +task :default => java_classes.collect{|c| "#{c.first}.class"} + +java_classes.each do |c| + file "#{c.first}.class" => "#{c.first}.java" do + puts `javac -classpath #{c.last} #{c.first}.java` + end +end |