diff options
author | Christoph Helma <helma@in-silico.ch> | 2018-11-05 17:37:29 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2018-11-05 17:37:29 +0100 |
commit | 2361c6d9259e7ec8eea97f9bff3b24d105b3662a (patch) | |
tree | c8c6a869cf0af40b03ae4bd33ae48cbe4dfd5532 /scripts | |
parent | ba5372ce84bd89a937478d32eb067995214f1331 (diff) |
adjustments for latest lazar version
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/crossvalidation-summary.rb | 9 | ||||
-rwxr-xr-x | scripts/drop-database.rb | 5 | ||||
-rwxr-xr-x | scripts/efsa2csv.rb | 29 | ||||
-rwxr-xr-x | scripts/export.rb | 7 | ||||
-rwxr-xr-x | scripts/hansen2csv.rb | 10 | ||||
-rwxr-xr-x | scripts/import-pubchem.rb | 8 | ||||
-rwxr-xr-x | scripts/import.rb | 7 | ||||
-rwxr-xr-x | scripts/json2csv.rb | 28 | ||||
-rwxr-xr-x | scripts/merge.rb | 11 | ||||
-rwxr-xr-x | scripts/model.rb | 7 | ||||
-rwxr-xr-x | scripts/predict.rb | 24 | ||||
-rwxr-xr-x | scripts/repeated_crossvalidation_summary.rb | 12 |
12 files changed, 134 insertions, 23 deletions
diff --git a/scripts/crossvalidation-summary.rb b/scripts/crossvalidation-summary.rb new file mode 100755 index 0000000..13b0dfa --- /dev/null +++ b/scripts/crossvalidation-summary.rb @@ -0,0 +1,9 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar' +include OpenTox + +summary = [] +model = Model::Validation.find(File.read(ARGV[0]).chomp).crossvalidations.each do |cv| + summary << cv.statistics +end +puts JSON.pretty_generate(summary) diff --git a/scripts/drop-database.rb b/scripts/drop-database.rb new file mode 100755 index 0000000..e263312 --- /dev/null +++ b/scripts/drop-database.rb @@ -0,0 +1,5 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar' +$mongo.database.drop +$gridfs = $mongo.database.fs # recreate GridFS indexes + diff --git a/scripts/efsa2csv.rb b/scripts/efsa2csv.rb new file mode 100755 index 0000000..5251cae --- /dev/null +++ b/scripts/efsa2csv.rb @@ -0,0 +1,29 @@ +#!/usr/bin/env ruby +require 'csv' +require_relative '../../lazar/lib/lazar.rb' + +i = 0 +db = {} +CSV.foreach(ARGV[0], :encoding => "UTF-8", :col_sep => "\t", :liberal_parsing => true) do |row| + #STDERR.puts i if i%100 == 0 + if i > 0 and row[11] and !row[11].empty? and row[24].match(/Salmonella/i) and ( row[25].match("TA 98") or row[25].match("TA 100") ) and row[33] + begin + c = OpenTox::Compound.from_smiles(row[11].gsub('"','')).smiles + rescue + c = OpenTox::Compound.from_inchi(row[12]).smiles # some smiles (row[11]) contain non-parseable characters + end + db[c] ||= {} + db[c][:id] ||= row[2] + if row[33].match(/Positiv/i) + db[c][:value] = 1 # at least one positive result in TA 98 or TA 100 + elsif row[33].match(/Negativ/i) + db[c][:value] ||= 0 + end + end + i += 1 +end + +puts "ID,SMILES,Mutagenicity" +db.each do |s,v| + puts [v[:id],s,v[:value]].join "," +end diff --git a/scripts/export.rb b/scripts/export.rb new file mode 100755 index 0000000..afcb51d --- /dev/null +++ b/scripts/export.rb @@ -0,0 +1,7 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar' +include OpenTox + +ext = File.extname(ARGV[0]).sub('.','') +dataset = Dataset.find File.read(ARGV[0]).chomp +dataset.send("to_#{ext}") diff --git a/scripts/hansen2csv.rb b/scripts/hansen2csv.rb new file mode 100755 index 0000000..3ae22ee --- /dev/null +++ b/scripts/hansen2csv.rb @@ -0,0 +1,10 @@ +#!/usr/bin/env ruby +require 'csv' +require_relative '../../lazar/lib/lazar.rb' + +puts "ID,SMILES,Mutagenicity" +i = 0 +CSV.foreach(ARGV[0]) do |row| + puts [row[0],row[5],row[2]].join "," if i > 0 + i += 1 +end diff --git a/scripts/import-pubchem.rb b/scripts/import-pubchem.rb new file mode 100755 index 0000000..c18bc81 --- /dev/null +++ b/scripts/import-pubchem.rb @@ -0,0 +1,8 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar' +include OpenTox + +dataset = Dataset.send("from_pubchem_aid", ARGV[0]) +puts dataset.id.to_s + + diff --git a/scripts/import.rb b/scripts/import.rb new file mode 100755 index 0000000..f166265 --- /dev/null +++ b/scripts/import.rb @@ -0,0 +1,7 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar' +include OpenTox + +ext = File.extname(ARGV[0]).sub('.','') +dataset = Dataset.send("from_#{ext}_file", ARGV[0]) +puts dataset.id.to_s diff --git a/scripts/json2csv.rb b/scripts/json2csv.rb new file mode 100755 index 0000000..7c61c5a --- /dev/null +++ b/scripts/json2csv.rb @@ -0,0 +1,28 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar' +include OpenTox + +results = JSON.parse File.read(ARGV[0]) +puts "SMILES,#{File.basename(ARGV[0]).sub("pa_","").capitalize},Probability(0),Probability(1),Nr Neighbors,Warnings" +results.each do |s,r| + if r["value"] + puts [ + s, + r["value"], + r["probabilities"]["0"], + r["probabilities"]["1"], + r["neighbors"].size, + r["warnings"], + ].join(",") + else + r["neighbors"] ? n = r["neighbors"].size : n = nil + puts [ + s, + r["value"], + nil, + nil, + n, + r["warnings"], + ].join(",") + end +end diff --git a/scripts/merge.rb b/scripts/merge.rb new file mode 100755 index 0000000..5e9dac4 --- /dev/null +++ b/scripts/merge.rb @@ -0,0 +1,11 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar.rb' +include OpenTox + +hansen = Dataset.find File.read(ARGV[0]).chomp +efsa = Dataset.find File.read(ARGV[1]).chomp +kazius = Dataset.find File.read(ARGV[2]).chomp +datasets = [hansen,efsa,kazius] +map = {"mutagen" => "1", "nonmutagen" => "0"} +dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,nil,map], keep_original_features: true, remove_duplicates: true +puts dataset.id diff --git a/scripts/model.rb b/scripts/model.rb new file mode 100755 index 0000000..b86d33d --- /dev/null +++ b/scripts/model.rb @@ -0,0 +1,7 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar' +include OpenTox + +dataset = Dataset.find File.read(ARGV[0]).chomp +model_validation = Model::Validation.from_dataset training_dataset: dataset, prediction_feature: dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity", repeats: 3 +puts model_validation.id.to_s diff --git a/scripts/predict.rb b/scripts/predict.rb index 9161f45..57d5a8b 100755 --- a/scripts/predict.rb +++ b/scripts/predict.rb @@ -2,23 +2,7 @@ require_relative '../../lazar/lib/lazar' include OpenTox -train = Dataset.from_csv_file ARGV[0] - -model = Model::LazarClassification.create(training_dataset: train) - -n = 0 -results = {} -File.open(ARGV[1]).each_line do |l| - unless n == 0 - id,smi = l.chomp.split "," - c = Compound.from_smiles smi - result = model.predict c - results[id] = result - end - n += 1 -end - -puts results.to_json - #puts result.inspect - #rcv = Validation::RepeatedCrossValidation.create(model) -#puts "#{rcv.id}" +dataset = Dataset.find(File.read(ARGV[0]).chomp) +model = Model::Validation.find(File.read(ARGV[1]).chomp) +predictions = model.predict dataset.compounds # avoid dataset prediction to keep neighbors +puts predictions.to_json diff --git a/scripts/repeated_crossvalidation_summary.rb b/scripts/repeated_crossvalidation_summary.rb index add66a2..067fea8 100755 --- a/scripts/repeated_crossvalidation_summary.rb +++ b/scripts/repeated_crossvalidation_summary.rb @@ -3,7 +3,13 @@ require_relative '../../lazar/lib/lazar' include OpenTox summary = [] -Validation::RepeatedCrossValidation.find(File.read(ARGV[0]).chomp).crossvalidations.each do |cv| - summary << cv.statistics +#Validation::RepeatedCrossValidation.find(File.read(ARGV[0]).chomp).crossvalidations.each do |cv| +# summary << cv.statistics +#end +cv = Validation::RepeatedCrossValidation.all.last.crossvalidations.each do |cv| +#cv = Validation::RepeatedCrossValidation.find("5bbb9151ca626916248b328b").crossvalidations.first +p cv.id +p cv.statistics end -puts JSON.pretty_generate(summary) + +#puts JSON.pretty_generate(summary) |