summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/carcinogenicity2csv.rb25
-rwxr-xr-xscripts/convert_pa.rb11
-rwxr-xr-xscripts/pa_neighbor_sets.rb16
-rwxr-xr-xscripts/pa_summary.rb27
-rwxr-xr-xscripts/predict.rb24
-rwxr-xr-xscripts/repeated_crossvalidation_summary.rb9
6 files changed, 112 insertions, 0 deletions
diff --git a/scripts/carcinogenicity2csv.rb b/scripts/carcinogenicity2csv.rb
new file mode 100755
index 0000000..9501bdc
--- /dev/null
+++ b/scripts/carcinogenicity2csv.rb
@@ -0,0 +1,25 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar.rb'
+
+i = 0
+activities = []
+File.readlines(ARGV[0]).each do |line|
+ if i > 2
+ tokens = line.split ","
+ p line if tokens[1].empty?
+ activities << [tokens[1],tokens[3]]
+ end
+ i += 1
+end
+
+puts "SMILES,Activity"
+activities.each_slice(100) do |slice| # get SMILES in chunks
+ sids = slice.collect{|e| e[0]}
+ smiles = `curl https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/#{sids.join(",")}/property/CanonicalSMILES/TXT`.split("\n")
+ abort("Could not get SMILES for all SIDs from PubChem") unless sids.size == smiles.size
+ smiles.each_with_index do |smi,i|
+ act = slice[i]
+ puts [smi.chomp,act[1]].join(",")
+ end
+end
+
diff --git a/scripts/convert_pa.rb b/scripts/convert_pa.rb
new file mode 100755
index 0000000..00b4831
--- /dev/null
+++ b/scripts/convert_pa.rb
@@ -0,0 +1,11 @@
+#!/usr/bin/env ruby
+
+while STDIN.gets
+ tokens = $_.chomp.encode("UTF-8", {:invalid => :replace, :undef => :replace}).split(";")
+ begin
+ #puts "\"#{tokens[0]}\",\"#{tokens.last}\"" if tokens[3]
+ puts "#{tokens[0]},#{tokens.last}" if tokens[3]
+ #puts tokens.last if tokens[3]
+ rescue
+ end
+end
diff --git a/scripts/pa_neighbor_sets.rb b/scripts/pa_neighbor_sets.rb
new file mode 100755
index 0000000..1975ffd
--- /dev/null
+++ b/scripts/pa_neighbor_sets.rb
@@ -0,0 +1,16 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar'
+include OpenTox
+
+results = JSON.parse File.read(ARGV[0])
+neighbors = []
+results.each do |s,r|
+ neighbors << r["neighbors"] if r["value"]
+end
+
+o = {
+ :nr_predictions => neighbors.size,
+ :unique_neighbor_sets => neighbors.uniq.size,
+ :unique_neighbor_sets_without_similarity => neighbors.collect{|s| s.collect{|n| n["id"]}.sort}.uniq.size
+}
+puts o.to_json
diff --git a/scripts/pa_summary.rb b/scripts/pa_summary.rb
new file mode 100755
index 0000000..7487e3e
--- /dev/null
+++ b/scripts/pa_summary.rb
@@ -0,0 +1,27 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar'
+include OpenTox
+
+results = JSON.parse File.read(ARGV[0])
+results.each do |s,r|
+ if r["value"]
+ puts [
+ s,
+ r["warnings"],
+ r["value"],
+ r["probabilities"]["0"],
+ r["probabilities"]["1"],
+ r["neighbors"].size,
+ ].join(",")
+ else
+ r["neighbors"] ? n = r["neighbors"].size : n = nil
+ puts [
+ s,
+ r["warnings"],
+ r["value"],
+ nil,
+ nil,
+ n
+ ].join(",")
+ end
+end
diff --git a/scripts/predict.rb b/scripts/predict.rb
new file mode 100755
index 0000000..9161f45
--- /dev/null
+++ b/scripts/predict.rb
@@ -0,0 +1,24 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar'
+include OpenTox
+
+train = Dataset.from_csv_file ARGV[0]
+
+model = Model::LazarClassification.create(training_dataset: train)
+
+n = 0
+results = {}
+File.open(ARGV[1]).each_line do |l|
+ unless n == 0
+ id,smi = l.chomp.split ","
+ c = Compound.from_smiles smi
+ result = model.predict c
+ results[id] = result
+ end
+ n += 1
+end
+
+puts results.to_json
+ #puts result.inspect
+ #rcv = Validation::RepeatedCrossValidation.create(model)
+#puts "#{rcv.id}"
diff --git a/scripts/repeated_crossvalidation_summary.rb b/scripts/repeated_crossvalidation_summary.rb
new file mode 100755
index 0000000..add66a2
--- /dev/null
+++ b/scripts/repeated_crossvalidation_summary.rb
@@ -0,0 +1,9 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar'
+include OpenTox
+
+summary = []
+Validation::RepeatedCrossValidation.find(File.read(ARGV[0]).chomp).crossvalidations.each do |cv|
+ summary << cv.statistics
+end
+puts JSON.pretty_generate(summary)