summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/efsa2csv.rb5
-rwxr-xr-xscripts/kazius2csv.rb42
-rw-r--r--scripts/repeated_crossvalidation.rb10
3 files changed, 55 insertions, 2 deletions
diff --git a/scripts/efsa2csv.rb b/scripts/efsa2csv.rb
index 56c69df..247faac 100755
--- a/scripts/efsa2csv.rb
+++ b/scripts/efsa2csv.rb
@@ -4,10 +4,11 @@ require_relative '../../lazar/lib/lazar.rb'
i = 0
db = {}
-CSV.foreach(ARGV[0]) do |row|
+CSV.foreach(ARGV[0], :encoding => 'ISO-8859-1', :col_sep => "\t") do |row|
begin
if i > 0 and row[11]
c = OpenTox::Compound.from_smiles(row[11]).smiles
+ #c = row[11]
if row[24].match(/Salmonella/i)
if row[25].match("TA 98") or row[25].match("TA 100")
if row[33].match(/Positiv/i)
@@ -23,6 +24,6 @@ CSV.foreach(ARGV[0]) do |row|
i += 1
end
-dm.each do |s,v|
+db.each do |s,v|
puts [s,v].join ","
end
diff --git a/scripts/kazius2csv.rb b/scripts/kazius2csv.rb
new file mode 100755
index 0000000..ed335b2
--- /dev/null
+++ b/scripts/kazius2csv.rb
@@ -0,0 +1,42 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar.rb'
+
+sdfs = []
+results = []
+read_result = false
+@sdf = ""
+File.readlines(ARGV[0]).each do |line|
+ if line.match %r{\$\$\$\$}
+ @sdf << line
+ sdfs << @sdf
+ @sdf = ""
+ elsif line.match "> <Ames test categorisation>"
+ read_result = true
+ else
+ #p line
+ if read_result
+ if line.chomp == "mutagen"
+ results << 1
+ elsif line.chomp == "nonmutagen"
+ results << 0
+ else
+ results << line.chomp
+ end
+ read_result = false
+ else
+ @sdf << line
+ end
+ end
+end
+
+obconversion = OpenBabel::OBConversion.new
+obconversion.set_in_and_out_formats "sdf","can"
+obmol = OpenBabel::OBMol.new
+
+puts "SMILES,Activity"
+sdfs.each_with_index do |sdf,i|
+ obconversion.read_string obmol,sdf
+ s = obconversion.write_string(obmol).split.first
+ puts [s,results[i]].join ","
+end
+
diff --git a/scripts/repeated_crossvalidation.rb b/scripts/repeated_crossvalidation.rb
new file mode 100644
index 0000000..9783c03
--- /dev/null
+++ b/scripts/repeated_crossvalidation.rb
@@ -0,0 +1,10 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar'
+include OpenTox
+
+file = ARGV[0]
+dataset = Dataset.from_csv_file file
+model = Model::LazarRegression.create(training_dataset: dataset)
+
+rcv = Validation::RepeatedCrossValidation.create model, :repeats => 5
+puts "#{rcv.id}"