From 158e9a7ecbc467c3db77c354f203b1176b0fc3f2 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 28 Dec 2020 18:08:58 +0100 Subject: batch processing scripts added --- bin/batch_fingerprint_classification.rb | 13 +++++++++++++ bin/batch_fingerprints.rb | 10 ++++++++++ bin/batch_padel_classification.rb | 20 ++++++++++++++++++++ 3 files changed, 43 insertions(+) create mode 100755 bin/batch_fingerprint_classification.rb create mode 100755 bin/batch_fingerprints.rb create mode 100755 bin/batch_padel_classification.rb (limited to 'bin') diff --git a/bin/batch_fingerprint_classification.rb b/bin/batch_fingerprint_classification.rb new file mode 100755 index 0000000..318fae6 --- /dev/null +++ b/bin/batch_fingerprint_classification.rb @@ -0,0 +1,13 @@ +#!/usr/bin/env ruby +require_relative "../lib/lazar" +model = ClassificationModel.new ARGV[0] + +File.read(ARGV[1]).each_line do |line| + if line.match(/SMILES/i) + puts "ID,SMILES,experimental,classification,probability(0),probability(1),max_similarity,nr_neighbors" + else + id,smi = line.chomp.split(",") + puts ([id] + model.predict_smiles(smi)).join(",") + end +end + diff --git a/bin/batch_fingerprints.rb b/bin/batch_fingerprints.rb new file mode 100755 index 0000000..cd46ee2 --- /dev/null +++ b/bin/batch_fingerprints.rb @@ -0,0 +1,10 @@ +#!/usr/bin/env ruby +require_relative "../lib/lazar" +File.read(ARGV[0]).each_line do |line| + unless line.match(/SMILES/i) + id,smi = line.chomp.split(",") + puts smi + c = Compound.from_smiles(smi) + puts ([c.smiles] + c.fingerprint).join(",") + end +end diff --git a/bin/batch_padel_classification.rb b/bin/batch_padel_classification.rb new file mode 100755 index 0000000..6d05907 --- /dev/null +++ b/bin/batch_padel_classification.rb @@ -0,0 +1,20 @@ +#!/usr/bin/env ruby +require_relative "../lib/lazar" +model = ClassificationModel.new ARGV[0] +model.predict_file ARGV[1] +=begin +File.read(ARGV[1]).each_line do |line| + if line.match(/Name/i) + file_descriptors = line.chomp.split(";") + model_descriptors = File.read(File.join(ARGV[0],"independent_variable_names")).chomp.split(",").collect{|d| d.gsub('"','')} + common_descriptors = model_descriptors & file_descriptors + puts "ID,SMILES,experimental,classification,probability(0),probability(1),max_similarity,nr_neighbors" + else + descriptor_values = [] + line.chomp.split(",") + descriptor_values.shift + puts ([id] + model.predict_smiles(smi)).join(",") + end +end + +=end -- cgit v1.2.3