summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2013-04-24 15:58:32 +0200
committerChristoph Helma <helma@in-silico.ch>2013-04-24 15:58:32 +0200
commit95eecdd3583b33fa8aceab857468296443decd0c (patch)
tree9f45eae1255632ebbf9b6be8eab8e4c76f2081cf
parent8668ad02ec1bb80e8b4af0f4b3633c84c8464e03 (diff)
descriptor calculation for datasets working. Rakefile for compilation f Java classes.
-rw-r--r--descriptor.rb31
-rw-r--r--java/CdkDescriptors.classbin3307 -> 3576 bytes
-rw-r--r--java/CdkDescriptors.java51
-rw-r--r--java/JoelibDescriptorInfo.classbin1039 -> 1039 bytes
-rw-r--r--java/JoelibDescriptors.classbin2833 -> 2774 bytes
-rw-r--r--java/JoelibDescriptors.java43
-rw-r--r--java/Rakefile15
7 files changed, 88 insertions, 52 deletions
diff --git a/descriptor.rb b/descriptor.rb
index 2d0844f..18a44d6 100644
--- a/descriptor.rb
+++ b/descriptor.rb
@@ -1,7 +1,6 @@
# descriptors.rb
# Calculation of physico-chemical descriptors
# Author: Andreas Maunz, Christoph Helma
-#require 'rjb'
require 'openbabel'
module OpenTox
@@ -91,9 +90,12 @@ module OpenTox
sdf_3d compounds
# rjb blocks within tasks
# Avoid "Argument list too long" error by sending only short descriptor names
- yaml = `echo "#{@sdf}" |java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{descriptors.collect{|d| d[:title].split("\s").last}.join(" ")}`
- YAML.load(yaml).each_with_index do |calculation,i|
- $logger.error "Descriptor calculation failed with #{$!.message} for compound #{compounds[i].uri}." if calculation.empty?
+ #yaml = `export CDKDescriptors= ;echo "#{@sdf}" |java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{descriptors.collect{|d| d[:title].split("\s").last}.join(" ")}`
+ #yaml = `export CDKDescriptors='#{descriptors.collect{|d| d[:title].split("\s").last}.join(" ")}';echo "#{@sdf}" |java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors `
+ puts `java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{@sdf_file.path} #{descriptors.collect{|d| d[:title].split("\s").last}.join(" ")}`
+ #puts yaml
+ YAML.load_file(@sdf_file.path+"cdk.yaml").each_with_index do |calculation,i|
+ $logger.error "Descriptor calculation failed for compound #{compounds[i].uri}." if calculation.empty?
calculation.each do |name,value|
feature = DESCRIPTORS[:cdk].collect{|d| d[:features]}.flatten.select{|f| f[RDF::DC.title].split("\s").last == name.to_s}.first
@feature_dataset.add_data_entry compounds[i], feature, fix_value(value)
@@ -104,9 +106,12 @@ module OpenTox
def joelib compounds, descriptors
sdf_3d compounds
# rjb blocks within tasks
- yaml = `echo "#{@sdf}" |java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{descriptors.collect{|d| d[:java_class]}.join(" ")}|grep "^[- ]"`
- YAML.load(yaml).each_with_index do |calculation,i|
- $logger.error "Descriptor calculation failed with #{$!.message} for compound #{compounds[i].uri}." if calculation.empty?
+ #yaml = `echo "#{@sdf}" |java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{descriptors.collect{|d| d[:java_class]}.join(" ")}|grep "^[- ]"`
+ #puts "java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{@sdf_file.path} #{descriptors.collect{|d| d[:java_class]}.join(" ")}"
+ puts `java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{@sdf_file.path} #{descriptors.collect{|d| d[:java_class]}.join(" ")}`
+ #YAML.load(yaml).each_with_index do |calculation,i|
+ YAML.load_file(@sdf_file.path+"joelib.yaml").each_with_index do |calculation,i|
+ $logger.error "Descriptor calculation failed for compound #{compounds[i].uri}." if calculation.empty?
calculation.each do |java_class,value|
feature = DESCRIPTORS[:joelib].select{|d| d[:java_class] == java_class}.first[:feature]
@feature_dataset.add_data_entry compounds[i], feature, fix_value(value)
@@ -115,6 +120,7 @@ module OpenTox
end
def sdf_3d compounds
+ #unless @sdf_file and File.exists? @sdf_file.path
unless @sdf
@sdf = ""
@@obconversion.set_out_format 'sdf'
@@ -125,7 +131,7 @@ module OpenTox
OpenBabel::OBOp.find_type("Gen3D").do(@@obmol)
sdf_3d = @@obconversion.write_string(@@obmol)
if sdf_3d.match(/.nan/)
- warning = "3D generation failed for compound #{compound.uri}, using 2D structure."
+ warning = "3D generation failed for compound #{compound.uri}, trying to calculate descriptors from 2D structure."
$logger.warn warning
@feature_dataset[RDF::OT.Warnings] ? @feature_dataset[RDF::OT.Warnings] << warning : @feature_dataset[RDF::OT.Warnings] = warning
@sdf << sdf_2d
@@ -133,6 +139,9 @@ module OpenTox
@sdf << sdf_3d
end
end
+ @sdf_file = Tempfile.open("sdf")
+ @sdf_file.puts @sdf
+ @sdf_file.close
end
end
@@ -184,6 +193,12 @@ module OpenTox
@descriptor = @descriptors.first
end
+ after do
+ #@sdf_file.unlink if @sdf_file and File.exists @sdf_file.path
+ #TODO cleanup yamls
+ @sdf_file = nil
+ end
+
# Get a list of descriptor calculation
# @return [text/uri-list] URIs
get '/descriptor/?' do
diff --git a/java/CdkDescriptors.class b/java/CdkDescriptors.class
index 21f82c4..9373bc7 100644
--- a/java/CdkDescriptors.class
+++ b/java/CdkDescriptors.class
Binary files differ
diff --git a/java/CdkDescriptors.java b/java/CdkDescriptors.java
index 5635507..69c27a8 100644
--- a/java/CdkDescriptors.java
+++ b/java/CdkDescriptors.java
@@ -9,13 +9,13 @@ import org.openscience.cdk.qsar.DescriptorValue;
class CdkDescriptors {
public static void main(String[] args) {
- // parse command line arguments (descriptors)
+ // parse command line arguments > 1 (descriptors)
DescriptorEngine engine;
- if (args.length > 0) {
- for (int i =0; i < args.length; i++) {
- args[i] = "org.openscience.cdk.qsar.descriptors.molecular." + args[i] + "Descriptor";
+ if (args.length > 1) {
+ List<String> classNames = new ArrayList<String>();
+ for (int i =1; i < args.length; i++) {
+ classNames.add("org.openscience.cdk.qsar.descriptors.molecular." + args[i] + "Descriptor");
}
- List<String> classNames = Arrays.asList(args);
engine = new DescriptorEngine(classNames);
List<IDescriptor> instances = engine.instantiateDescriptors(classNames);
List<DescriptorSpecification> specs = engine.initializeSpecifications(instances);
@@ -25,13 +25,21 @@ class CdkDescriptors {
engine = new DescriptorEngine(DescriptorEngine.MOLECULAR);
}
- // parse 3d sdf from stdin and calculate descriptors
- BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
- IteratingMDLReader reader = new IteratingMDLReader( br, DefaultChemObjectBuilder.getInstance());
- while (reader.hasNext()) {
- IMolecule molecule = (IMolecule)reader.next();
- try {
- engine.process(molecule);
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(args[0]));
+ PrintWriter yaml = new PrintWriter(new FileWriter(args[0]+"cdk.yaml"));
+ // parse 3d sdf from file and calculate descriptors
+ IteratingMDLReader reader = new IteratingMDLReader( br, DefaultChemObjectBuilder.getInstance());
+ while (reader.hasNext()) {
+ try {
+ IMolecule molecule = (IMolecule)reader.next();
+ engine.process(molecule);
+ }
+ catch (Exception e) {
+ yaml.println("- {}");
+ e.printStackTrace();
+ continue;
+ }
Iterator it = molecule.getProperties().values().iterator();
Boolean first = true;
while (it.hasNext()) {
@@ -39,28 +47,27 @@ class CdkDescriptors {
DescriptorValue value = (DescriptorValue)it.next();
int size = value.getValue().length();
if (size == 1) {
- if (first) { System.out.print("- "); }
- else { System.out.print(" "); }
- System.out.println(":"+value.getNames()[0].toString() + ": " + value.getValue());
+ if (first) { yaml.print("- "); }
+ else { yaml.print(" "); }
+ yaml.println(":"+value.getNames()[0].toString() + ": " + value.getValue());
first = false;
}
else {
String[] values = value.getValue().toString().split(",");
for (int i = 0; i < size; i++) {
- if (first) { System.out.print("- "); }
- else { System.out.print(" "); }
- System.out.println(":"+value.getNames()[i].toString() + ": " + values[i]);
+ if (first) { yaml.print("- "); }
+ else { yaml.print(" "); }
+ yaml.println(":"+value.getNames()[i].toString() + ": " + values[i]);
first = false;
}
}
}
catch (ClassCastException e) { } // sdf properties are stored as molecules properties (strings), ignore them
+ catch (Exception e) { e.printStackTrace(); } // output nothing to yaml
}
}
- catch (Exception e) {
- System.out.println("- {}");
- System.err.println(e.toString());
- }
+ yaml.close();
}
+ catch (Exception e) { e.printStackTrace(); }
}
}
diff --git a/java/JoelibDescriptorInfo.class b/java/JoelibDescriptorInfo.class
index 293cb72..0ee67bf 100644
--- a/java/JoelibDescriptorInfo.class
+++ b/java/JoelibDescriptorInfo.class
Binary files differ
diff --git a/java/JoelibDescriptors.class b/java/JoelibDescriptors.class
index 4a23d26..1426e7d 100644
--- a/java/JoelibDescriptors.class
+++ b/java/JoelibDescriptors.class
Binary files differ
diff --git a/java/JoelibDescriptors.java b/java/JoelibDescriptors.java
index fceb2a9..ecd1b3f 100644
--- a/java/JoelibDescriptors.java
+++ b/java/JoelibDescriptors.java
@@ -15,52 +15,51 @@ import joelib2.molecule.BasicConformerMolecule;
class JoelibDescriptors {
public static void main(String[] args) {
- // set args to all descriptors
- if (args.length == 0) {
+ String[] features = null;
+ // set features to all descriptors
+ if (args.length == 1) {
FeatureHelper helper = FeatureHelper.instance();
- args = (String[]) helper.getNativeFeatures().toArray(new String[0]);
+ features = (String[]) helper.getNativeFeatures().toArray(new String[0]);
+ } else {
+ features = new String[args.length-1];
+ System.arraycopy(args,1,features,0,args.length-1);
}
FeatureFactory factory = FeatureFactory.instance();
MoleculeFileIO loader = null;
- BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
String line = new String();
String sdf = new String();
try {
- while ((line = br.readLine()) != null) { sdf += line + "\n"; }
- br.close();
- InputStream is = null;
- is = new ByteArrayInputStream(sdf.getBytes("UTF-8"));
+ // parse 3d sdf from file and calculate descriptors
+ InputStream is = new FileInputStream(args[0]);
+ PrintWriter yaml = new PrintWriter(new FileWriter(args[0]+"joelib.yaml"));
BasicIOType inType = BasicIOTypeHolder.instance().getIOType("SDF");
loader = MoleculeFileHelper.getMolReader(is, inType);
- //BasicIOType outType = BasicIOTypeHolder.instance().getIOType("SMILES");
- //JOEMol mol = new JOEMol(inType, inType);
BasicConformerMolecule mol = new BasicConformerMolecule(inType, inType);
while (true) {
try {
Boolean success = loader.read(mol);
- if (!success) { break; }
- //System.err.println( mol );
- for (int i =0; i < args.length; i++) {
- Feature feature = factory.getFeature(args[i]);
+ if (!success) { break; } // last molecule
+ for (int i =0; i < features.length; i++) {
+ Feature feature = factory.getFeature(features[i]);
FeatureResult result = feature.calculate(mol);
- if (i == 0) { System.out.print("- "); }
- else { System.out.print(" "); }
- System.out.print( args[i]+": " );
- System.out.println( result.toString() );
+ if (i == 0) { yaml.print("- "); }
+ else { yaml.print(" "); }
+ yaml.print( features[i]+": " );
+ yaml.println( result.toString() );
}
}
catch (Exception e) {
- System.err.println(e.toString());
- e.printStackTrace();
- //next;
+ System.err.println(e.toString());
+ e.printStackTrace();
}
}
+ yaml.close();
}
catch (Exception e) {
+ System.err.println(e.toString());
e.printStackTrace();
- //System.err.println(e.toString());
}
}
}
diff --git a/java/Rakefile b/java/Rakefile
new file mode 100644
index 0000000..4348b49
--- /dev/null
+++ b/java/Rakefile
@@ -0,0 +1,15 @@
+# Java class, classpath
+java_classes = [
+ ["CdkDescriptors", "cdk-1.4.16.jar"],
+ ["CdkDescriptorInfo", "cdk-1.4.16.jar"],
+ ["JoelibDescriptors", "joelib2.jar:."],
+ ["JoelibDescriptorInfo", "joelib2.jar:."],
+]
+
+task :default => java_classes.collect{|c| "#{c.first}.class"}
+
+java_classes.each do |c|
+ file "#{c.first}.class" => "#{c.first}.java" do
+ puts `javac -classpath #{c.last} #{c.first}.java`
+ end
+end