diff options
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/carcinogenicity2csv.rb | 25 | ||||
-rwxr-xr-x | scripts/convert_pa.rb | 11 | ||||
-rwxr-xr-x | scripts/pa_neighbor_sets.rb | 16 | ||||
-rwxr-xr-x | scripts/pa_summary.rb | 27 | ||||
-rwxr-xr-x | scripts/predict.rb | 24 | ||||
-rwxr-xr-x | scripts/repeated_crossvalidation_summary.rb | 9 |
6 files changed, 112 insertions, 0 deletions
diff --git a/scripts/carcinogenicity2csv.rb b/scripts/carcinogenicity2csv.rb new file mode 100755 index 0000000..9501bdc --- /dev/null +++ b/scripts/carcinogenicity2csv.rb @@ -0,0 +1,25 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar.rb' + +i = 0 +activities = [] +File.readlines(ARGV[0]).each do |line| + if i > 2 + tokens = line.split "," + p line if tokens[1].empty? + activities << [tokens[1],tokens[3]] + end + i += 1 +end + +puts "SMILES,Activity" +activities.each_slice(100) do |slice| # get SMILES in chunks + sids = slice.collect{|e| e[0]} + smiles = `curl https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/#{sids.join(",")}/property/CanonicalSMILES/TXT`.split("\n") + abort("Could not get SMILES for all SIDs from PubChem") unless sids.size == smiles.size + smiles.each_with_index do |smi,i| + act = slice[i] + puts [smi.chomp,act[1]].join(",") + end +end + diff --git a/scripts/convert_pa.rb b/scripts/convert_pa.rb new file mode 100755 index 0000000..00b4831 --- /dev/null +++ b/scripts/convert_pa.rb @@ -0,0 +1,11 @@ +#!/usr/bin/env ruby + +while STDIN.gets + tokens = $_.chomp.encode("UTF-8", {:invalid => :replace, :undef => :replace}).split(";") + begin + #puts "\"#{tokens[0]}\",\"#{tokens.last}\"" if tokens[3] + puts "#{tokens[0]},#{tokens.last}" if tokens[3] + #puts tokens.last if tokens[3] + rescue + end +end diff --git a/scripts/pa_neighbor_sets.rb b/scripts/pa_neighbor_sets.rb new file mode 100755 index 0000000..1975ffd --- /dev/null +++ b/scripts/pa_neighbor_sets.rb @@ -0,0 +1,16 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar' +include OpenTox + +results = JSON.parse File.read(ARGV[0]) +neighbors = [] +results.each do |s,r| + neighbors << r["neighbors"] if r["value"] +end + +o = { + :nr_predictions => neighbors.size, + :unique_neighbor_sets => neighbors.uniq.size, + :unique_neighbor_sets_without_similarity => neighbors.collect{|s| s.collect{|n| n["id"]}.sort}.uniq.size +} +puts o.to_json diff --git a/scripts/pa_summary.rb b/scripts/pa_summary.rb new file mode 100755 index 0000000..7487e3e --- /dev/null +++ b/scripts/pa_summary.rb @@ -0,0 +1,27 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar' +include OpenTox + +results = JSON.parse File.read(ARGV[0]) +results.each do |s,r| + if r["value"] + puts [ + s, + r["warnings"], + r["value"], + r["probabilities"]["0"], + r["probabilities"]["1"], + r["neighbors"].size, + ].join(",") + else + r["neighbors"] ? n = r["neighbors"].size : n = nil + puts [ + s, + r["warnings"], + r["value"], + nil, + nil, + n + ].join(",") + end +end diff --git a/scripts/predict.rb b/scripts/predict.rb new file mode 100755 index 0000000..9161f45 --- /dev/null +++ b/scripts/predict.rb @@ -0,0 +1,24 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar' +include OpenTox + +train = Dataset.from_csv_file ARGV[0] + +model = Model::LazarClassification.create(training_dataset: train) + +n = 0 +results = {} +File.open(ARGV[1]).each_line do |l| + unless n == 0 + id,smi = l.chomp.split "," + c = Compound.from_smiles smi + result = model.predict c + results[id] = result + end + n += 1 +end + +puts results.to_json + #puts result.inspect + #rcv = Validation::RepeatedCrossValidation.create(model) +#puts "#{rcv.id}" diff --git a/scripts/repeated_crossvalidation_summary.rb b/scripts/repeated_crossvalidation_summary.rb new file mode 100755 index 0000000..add66a2 --- /dev/null +++ b/scripts/repeated_crossvalidation_summary.rb @@ -0,0 +1,9 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar' +include OpenTox + +summary = [] +Validation::RepeatedCrossValidation.find(File.read(ARGV[0]).chomp).crossvalidations.each do |cv| + summary << cv.statistics +end +puts JSON.pretty_generate(summary) |