From 59509099257225b068a13626d3a42eac2f4244ab Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 13 Oct 2020 11:25:51 +0200 Subject: mutagenicity downloads moved, PaDEL metadata --- bin/export-fingerprints.rb | 23 +++++++++++++++++++++++ bin/export-sdf.rb | 22 ++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100755 bin/export-fingerprints.rb create mode 100755 bin/export-sdf.rb (limited to 'bin') diff --git a/bin/export-fingerprints.rb b/bin/export-fingerprints.rb new file mode 100755 index 0000000..0e1e934 --- /dev/null +++ b/bin/export-fingerprints.rb @@ -0,0 +1,23 @@ +#!/usr/bin/env ruby + +dir = ARGV[0] +fp = [] +endpoint_name = File.read(File.join dir,"dependent_variable_name").chomp +endpoints = File.open(File.join dir,"dependent_variables").readlines.collect{|v| v.to_i} +endpoint_values = File.open(File.join dir,"dependent_variable_values").readlines +smiles = [] + +File.open(File.join dir,"independent_variables").each_line do |l| + l = l.chomp.split(",") + smiles << l.shift + fp << l +end + +fp_names = fp.flatten.sort.uniq +header = ["Canonical SMILES"]+fp_names+[endpoint_name] +puts header.join(",") + +(0..smiles.size-1).each do |i| + line = [smiles[i]]+fp_names.collect{|n| fp[i].include?(n) ? 1 : 0}+[endpoint_values[endpoints[i]]] + puts line.join(",") +end diff --git a/bin/export-sdf.rb b/bin/export-sdf.rb new file mode 100755 index 0000000..4f4a9ff --- /dev/null +++ b/bin/export-sdf.rb @@ -0,0 +1,22 @@ +#!/usr/bin/env ruby +require_relative "../lib/lazar.rb" + +dir = ARGV[0] +endpoint_name = File.read(File.join dir,"dependent_variable_name").chomp +endpoints = File.open(File.join dir,"dependent_variables").readlines.collect{|v| v.to_i} +endpoint_values = File.open(File.join dir,"dependent_variable_values").readlines.collect{|v| v.chomp} +compounds = [] + +File.open(File.join dir,"independent_variables").each_line do |l| + compounds << Compound.new(l.chomp.split(",").shift) +end +compounds.each_with_index do |compound,i| + sdf_lines = compound.sdf.sub(/\$\$\$\$\n/,"").split("\n") + sdf_lines[0] = compound.smiles + sdf = sdf_lines.join("\n") + sdf += "\n> <#{endpoint_name}>\n" + sdf += endpoint_values[endpoints[i]] + sdf += "\n" + sdf += "\n$$$$\n" + print sdf +end -- cgit v1.2.3