diff options
author | Christoph Helma <helma@in-silico.ch> | 2018-08-08 18:46:55 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2018-08-08 18:46:55 +0200 |
commit | 925c47003d55a8ad5b0e5a86a5d698954da88f31 (patch) | |
tree | 309815e8ccecc7cbf593402324e9645b95026dc8 /scripts | |
parent | 97e65d06feba57b45f55c8b83549b8b5350d3e7a (diff) |
merged dataset
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/efsa2csv.rb | 5 | ||||
-rwxr-xr-x | scripts/kazius2csv.rb | 42 | ||||
-rw-r--r-- | scripts/repeated_crossvalidation.rb | 10 |
3 files changed, 55 insertions, 2 deletions
diff --git a/scripts/efsa2csv.rb b/scripts/efsa2csv.rb index 56c69df..247faac 100755 --- a/scripts/efsa2csv.rb +++ b/scripts/efsa2csv.rb @@ -4,10 +4,11 @@ require_relative '../../lazar/lib/lazar.rb' i = 0 db = {} -CSV.foreach(ARGV[0]) do |row| +CSV.foreach(ARGV[0], :encoding => 'ISO-8859-1', :col_sep => "\t") do |row| begin if i > 0 and row[11] c = OpenTox::Compound.from_smiles(row[11]).smiles + #c = row[11] if row[24].match(/Salmonella/i) if row[25].match("TA 98") or row[25].match("TA 100") if row[33].match(/Positiv/i) @@ -23,6 +24,6 @@ CSV.foreach(ARGV[0]) do |row| i += 1 end -dm.each do |s,v| +db.each do |s,v| puts [s,v].join "," end diff --git a/scripts/kazius2csv.rb b/scripts/kazius2csv.rb new file mode 100755 index 0000000..ed335b2 --- /dev/null +++ b/scripts/kazius2csv.rb @@ -0,0 +1,42 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar.rb' + +sdfs = [] +results = [] +read_result = false +@sdf = "" +File.readlines(ARGV[0]).each do |line| + if line.match %r{\$\$\$\$} + @sdf << line + sdfs << @sdf + @sdf = "" + elsif line.match "> <Ames test categorisation>" + read_result = true + else + #p line + if read_result + if line.chomp == "mutagen" + results << 1 + elsif line.chomp == "nonmutagen" + results << 0 + else + results << line.chomp + end + read_result = false + else + @sdf << line + end + end +end + +obconversion = OpenBabel::OBConversion.new +obconversion.set_in_and_out_formats "sdf","can" +obmol = OpenBabel::OBMol.new + +puts "SMILES,Activity" +sdfs.each_with_index do |sdf,i| + obconversion.read_string obmol,sdf + s = obconversion.write_string(obmol).split.first + puts [s,results[i]].join "," +end + diff --git a/scripts/repeated_crossvalidation.rb b/scripts/repeated_crossvalidation.rb new file mode 100644 index 0000000..9783c03 --- /dev/null +++ b/scripts/repeated_crossvalidation.rb @@ -0,0 +1,10 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar' +include OpenTox + +file = ARGV[0] +dataset = Dataset.from_csv_file file +model = Model::LazarRegression.create(training_dataset: dataset) + +rcv = Validation::RepeatedCrossValidation.create model, :repeats => 5 +puts "#{rcv.id}" |