diff options
author | Christoph Helma <helma@in-silico.ch> | 2019-08-19 15:20:28 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2019-08-19 15:20:28 +0200 |
commit | b82b4e640a11f3821b2bcf78fbbeead5d80b9066 (patch) | |
tree | c0020cb804f3439db3af5248eed4b99cbbb91a79 /scripts | |
parent | b6ad21e340bce9ba2a2ad09fe48c656f0c2e3905 (diff) |
obsolete files removed, adjusted export scripts
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/cv-tensorflow-confusion-matrix.rb | 24 | ||||
-rwxr-xr-x | scripts/efsa2csv.rb | 29 | ||||
-rwxr-xr-x | scripts/export-fingerprints.rb | 5 | ||||
-rwxr-xr-x | scripts/export.rb | 3 | ||||
-rwxr-xr-x | scripts/hansen2csv.rb | 10 | ||||
-rwxr-xr-x | scripts/import.rb | 7 | ||||
-rwxr-xr-x | scripts/lazar-crossvalidation.rb | 8 | ||||
-rwxr-xr-x | scripts/lazar-model.rb (renamed from scripts/import-pubchem.rb) | 6 | ||||
-rwxr-xr-x | scripts/merge.rb | 11 | ||||
-rwxr-xr-x | scripts/model.rb | 7 | ||||
-rwxr-xr-x | scripts/pa_neighbor_sets.rb | 16 | ||||
-rwxr-xr-x | scripts/pa_summary.rb | 27 | ||||
-rwxr-xr-x | scripts/predict.rb | 8 |
13 files changed, 39 insertions, 122 deletions
diff --git a/scripts/cv-tensorflow-confusion-matrix.rb b/scripts/cv-tensorflow-confusion-matrix.rb new file mode 100755 index 0000000..067519b --- /dev/null +++ b/scripts/cv-tensorflow-confusion-matrix.rb @@ -0,0 +1,24 @@ +#!/usr/bin/env ruby +require 'csv' + +tp = 0 +fp = 0 +tn = 0 +fn = 0 + +pred = CSV.read(ARGV[0],headers: true,:col_sep => ",") +act = CSV.read(File.join(File.dirname(ARGV[0]),"GenoTox-database.csv"),headers: true,:col_sep => ",") + +pred.each_with_index do |row,i| + + row[1].to_f < 0.5 ? p = 0 : p = 1 + a = act[i].to_h["GENO.Outcome"].to_i + + tp += 1 if a == 1 and p == 1 + tn += 1 if a == 0 and p == 0 + fp += 1 if a == 0 and p == 1 + fn += 1 if a == 1 and p == 0 + +end + +puts "#{tp},#{fp}\n#{fn},#{tn}" diff --git a/scripts/efsa2csv.rb b/scripts/efsa2csv.rb deleted file mode 100755 index 5251cae..0000000 --- a/scripts/efsa2csv.rb +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env ruby -require 'csv' -require_relative '../../lazar/lib/lazar.rb' - -i = 0 -db = {} -CSV.foreach(ARGV[0], :encoding => "UTF-8", :col_sep => "\t", :liberal_parsing => true) do |row| - #STDERR.puts i if i%100 == 0 - if i > 0 and row[11] and !row[11].empty? and row[24].match(/Salmonella/i) and ( row[25].match("TA 98") or row[25].match("TA 100") ) and row[33] - begin - c = OpenTox::Compound.from_smiles(row[11].gsub('"','')).smiles - rescue - c = OpenTox::Compound.from_inchi(row[12]).smiles # some smiles (row[11]) contain non-parseable characters - end - db[c] ||= {} - db[c][:id] ||= row[2] - if row[33].match(/Positiv/i) - db[c][:value] = 1 # at least one positive result in TA 98 or TA 100 - elsif row[33].match(/Negativ/i) - db[c][:value] ||= 0 - end - end - i += 1 -end - -puts "ID,SMILES,Mutagenicity" -db.each do |s,v| - puts [v[:id],s,v[:value]].join "," -end diff --git a/scripts/export-fingerprints.rb b/scripts/export-fingerprints.rb index 5c54b96..6b9c3db 100755 --- a/scripts/export-fingerprints.rb +++ b/scripts/export-fingerprints.rb @@ -2,5 +2,6 @@ require_relative '../../lazar/lib/lazar' include OpenTox -dataset = Dataset.find File.read(ARGV[0]).chomp -puts dataset.to_csv(default_fingerprints:true) +cv = Validation::CrossValidation.find File.read(ARGV[0]).chomp +dataset = cv.model.training_dataset +puts dataset.to_fingerprint_csv diff --git a/scripts/export.rb b/scripts/export.rb index 87e2921..76d201b 100755 --- a/scripts/export.rb +++ b/scripts/export.rb @@ -2,5 +2,6 @@ require_relative '../../lazar/lib/lazar' include OpenTox -dataset = Dataset.find File.read(ARGV[0]).chomp +cv = Validation::CrossValidation.find File.read(ARGV[0]).chomp +dataset = cv.model.training_dataset puts dataset.send("to_#{ARGV[1]}") diff --git a/scripts/hansen2csv.rb b/scripts/hansen2csv.rb deleted file mode 100755 index 3ae22ee..0000000 --- a/scripts/hansen2csv.rb +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env ruby -require 'csv' -require_relative '../../lazar/lib/lazar.rb' - -puts "ID,SMILES,Mutagenicity" -i = 0 -CSV.foreach(ARGV[0]) do |row| - puts [row[0],row[5],row[2]].join "," if i > 0 - i += 1 -end diff --git a/scripts/import.rb b/scripts/import.rb deleted file mode 100755 index f166265..0000000 --- a/scripts/import.rb +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env ruby -require_relative '../../lazar/lib/lazar' -include OpenTox - -ext = File.extname(ARGV[0]).sub('.','') -dataset = Dataset.send("from_#{ext}_file", ARGV[0]) -puts dataset.id.to_s diff --git a/scripts/lazar-crossvalidation.rb b/scripts/lazar-crossvalidation.rb new file mode 100755 index 0000000..a6792b7 --- /dev/null +++ b/scripts/lazar-crossvalidation.rb @@ -0,0 +1,8 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar' +include OpenTox + +training_dataset = Dataset.from_csv_file ARGV[0] +model = Model::Lazar.create training_dataset: training_dataset +cv = Validation::CrossValidation.create model +puts cv.id.to_s diff --git a/scripts/import-pubchem.rb b/scripts/lazar-model.rb index c18bc81..879b539 100755 --- a/scripts/import-pubchem.rb +++ b/scripts/lazar-model.rb @@ -2,7 +2,5 @@ require_relative '../../lazar/lib/lazar' include OpenTox -dataset = Dataset.send("from_pubchem_aid", ARGV[0]) -puts dataset.id.to_s - - +model = Model::Lazar.from_csv_file ARGV[0] +puts model.id.to_s diff --git a/scripts/merge.rb b/scripts/merge.rb deleted file mode 100755 index 5e9dac4..0000000 --- a/scripts/merge.rb +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env ruby -require_relative '../../lazar/lib/lazar.rb' -include OpenTox - -hansen = Dataset.find File.read(ARGV[0]).chomp -efsa = Dataset.find File.read(ARGV[1]).chomp -kazius = Dataset.find File.read(ARGV[2]).chomp -datasets = [hansen,efsa,kazius] -map = {"mutagen" => "1", "nonmutagen" => "0"} -dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,nil,map], keep_original_features: true, remove_duplicates: true -puts dataset.id diff --git a/scripts/model.rb b/scripts/model.rb deleted file mode 100755 index b86d33d..0000000 --- a/scripts/model.rb +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env ruby -require_relative '../../lazar/lib/lazar' -include OpenTox - -dataset = Dataset.find File.read(ARGV[0]).chomp -model_validation = Model::Validation.from_dataset training_dataset: dataset, prediction_feature: dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity", repeats: 3 -puts model_validation.id.to_s diff --git a/scripts/pa_neighbor_sets.rb b/scripts/pa_neighbor_sets.rb deleted file mode 100755 index 1975ffd..0000000 --- a/scripts/pa_neighbor_sets.rb +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env ruby -require_relative '../../lazar/lib/lazar' -include OpenTox - -results = JSON.parse File.read(ARGV[0]) -neighbors = [] -results.each do |s,r| - neighbors << r["neighbors"] if r["value"] -end - -o = { - :nr_predictions => neighbors.size, - :unique_neighbor_sets => neighbors.uniq.size, - :unique_neighbor_sets_without_similarity => neighbors.collect{|s| s.collect{|n| n["id"]}.sort}.uniq.size -} -puts o.to_json diff --git a/scripts/pa_summary.rb b/scripts/pa_summary.rb deleted file mode 100755 index 7487e3e..0000000 --- a/scripts/pa_summary.rb +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env ruby -require_relative '../../lazar/lib/lazar' -include OpenTox - -results = JSON.parse File.read(ARGV[0]) -results.each do |s,r| - if r["value"] - puts [ - s, - r["warnings"], - r["value"], - r["probabilities"]["0"], - r["probabilities"]["1"], - r["neighbors"].size, - ].join(",") - else - r["neighbors"] ? n = r["neighbors"].size : n = nil - puts [ - s, - r["warnings"], - r["value"], - nil, - nil, - n - ].join(",") - end -end diff --git a/scripts/predict.rb b/scripts/predict.rb deleted file mode 100755 index 57d5a8b..0000000 --- a/scripts/predict.rb +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env ruby -require_relative '../../lazar/lib/lazar' -include OpenTox - -dataset = Dataset.find(File.read(ARGV[0]).chomp) -model = Model::Validation.find(File.read(ARGV[1]).chomp) -predictions = model.predict dataset.compounds # avoid dataset prediction to keep neighbors -puts predictions.to_json |