summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2019-08-19 15:20:28 +0200
committerChristoph Helma <helma@in-silico.ch>2019-08-19 15:20:28 +0200
commitb82b4e640a11f3821b2bcf78fbbeead5d80b9066 (patch)
treec0020cb804f3439db3af5248eed4b99cbbb91a79 /scripts
parentb6ad21e340bce9ba2a2ad09fe48c656f0c2e3905 (diff)
obsolete files removed, adjusted export scripts
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/cv-tensorflow-confusion-matrix.rb24
-rwxr-xr-xscripts/efsa2csv.rb29
-rwxr-xr-xscripts/export-fingerprints.rb5
-rwxr-xr-xscripts/export.rb3
-rwxr-xr-xscripts/hansen2csv.rb10
-rwxr-xr-xscripts/import.rb7
-rwxr-xr-xscripts/lazar-crossvalidation.rb8
-rwxr-xr-xscripts/lazar-model.rb (renamed from scripts/import-pubchem.rb)6
-rwxr-xr-xscripts/merge.rb11
-rwxr-xr-xscripts/model.rb7
-rwxr-xr-xscripts/pa_neighbor_sets.rb16
-rwxr-xr-xscripts/pa_summary.rb27
-rwxr-xr-xscripts/predict.rb8
13 files changed, 39 insertions, 122 deletions
diff --git a/scripts/cv-tensorflow-confusion-matrix.rb b/scripts/cv-tensorflow-confusion-matrix.rb
new file mode 100755
index 0000000..067519b
--- /dev/null
+++ b/scripts/cv-tensorflow-confusion-matrix.rb
@@ -0,0 +1,24 @@
+#!/usr/bin/env ruby
+require 'csv'
+
+tp = 0
+fp = 0
+tn = 0
+fn = 0
+
+pred = CSV.read(ARGV[0],headers: true,:col_sep => ",")
+act = CSV.read(File.join(File.dirname(ARGV[0]),"GenoTox-database.csv"),headers: true,:col_sep => ",")
+
+pred.each_with_index do |row,i|
+
+ row[1].to_f < 0.5 ? p = 0 : p = 1
+ a = act[i].to_h["GENO.Outcome"].to_i
+
+ tp += 1 if a == 1 and p == 1
+ tn += 1 if a == 0 and p == 0
+ fp += 1 if a == 0 and p == 1
+ fn += 1 if a == 1 and p == 0
+
+end
+
+puts "#{tp},#{fp}\n#{fn},#{tn}"
diff --git a/scripts/efsa2csv.rb b/scripts/efsa2csv.rb
deleted file mode 100755
index 5251cae..0000000
--- a/scripts/efsa2csv.rb
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env ruby
-require 'csv'
-require_relative '../../lazar/lib/lazar.rb'
-
-i = 0
-db = {}
-CSV.foreach(ARGV[0], :encoding => "UTF-8", :col_sep => "\t", :liberal_parsing => true) do |row|
- #STDERR.puts i if i%100 == 0
- if i > 0 and row[11] and !row[11].empty? and row[24].match(/Salmonella/i) and ( row[25].match("TA 98") or row[25].match("TA 100") ) and row[33]
- begin
- c = OpenTox::Compound.from_smiles(row[11].gsub('"','')).smiles
- rescue
- c = OpenTox::Compound.from_inchi(row[12]).smiles # some smiles (row[11]) contain non-parseable characters
- end
- db[c] ||= {}
- db[c][:id] ||= row[2]
- if row[33].match(/Positiv/i)
- db[c][:value] = 1 # at least one positive result in TA 98 or TA 100
- elsif row[33].match(/Negativ/i)
- db[c][:value] ||= 0
- end
- end
- i += 1
-end
-
-puts "ID,SMILES,Mutagenicity"
-db.each do |s,v|
- puts [v[:id],s,v[:value]].join ","
-end
diff --git a/scripts/export-fingerprints.rb b/scripts/export-fingerprints.rb
index 5c54b96..6b9c3db 100755
--- a/scripts/export-fingerprints.rb
+++ b/scripts/export-fingerprints.rb
@@ -2,5 +2,6 @@
require_relative '../../lazar/lib/lazar'
include OpenTox
-dataset = Dataset.find File.read(ARGV[0]).chomp
-puts dataset.to_csv(default_fingerprints:true)
+cv = Validation::CrossValidation.find File.read(ARGV[0]).chomp
+dataset = cv.model.training_dataset
+puts dataset.to_fingerprint_csv
diff --git a/scripts/export.rb b/scripts/export.rb
index 87e2921..76d201b 100755
--- a/scripts/export.rb
+++ b/scripts/export.rb
@@ -2,5 +2,6 @@
require_relative '../../lazar/lib/lazar'
include OpenTox
-dataset = Dataset.find File.read(ARGV[0]).chomp
+cv = Validation::CrossValidation.find File.read(ARGV[0]).chomp
+dataset = cv.model.training_dataset
puts dataset.send("to_#{ARGV[1]}")
diff --git a/scripts/hansen2csv.rb b/scripts/hansen2csv.rb
deleted file mode 100755
index 3ae22ee..0000000
--- a/scripts/hansen2csv.rb
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/env ruby
-require 'csv'
-require_relative '../../lazar/lib/lazar.rb'
-
-puts "ID,SMILES,Mutagenicity"
-i = 0
-CSV.foreach(ARGV[0]) do |row|
- puts [row[0],row[5],row[2]].join "," if i > 0
- i += 1
-end
diff --git a/scripts/import.rb b/scripts/import.rb
deleted file mode 100755
index f166265..0000000
--- a/scripts/import.rb
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env ruby
-require_relative '../../lazar/lib/lazar'
-include OpenTox
-
-ext = File.extname(ARGV[0]).sub('.','')
-dataset = Dataset.send("from_#{ext}_file", ARGV[0])
-puts dataset.id.to_s
diff --git a/scripts/lazar-crossvalidation.rb b/scripts/lazar-crossvalidation.rb
new file mode 100755
index 0000000..a6792b7
--- /dev/null
+++ b/scripts/lazar-crossvalidation.rb
@@ -0,0 +1,8 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar'
+include OpenTox
+
+training_dataset = Dataset.from_csv_file ARGV[0]
+model = Model::Lazar.create training_dataset: training_dataset
+cv = Validation::CrossValidation.create model
+puts cv.id.to_s
diff --git a/scripts/import-pubchem.rb b/scripts/lazar-model.rb
index c18bc81..879b539 100755
--- a/scripts/import-pubchem.rb
+++ b/scripts/lazar-model.rb
@@ -2,7 +2,5 @@
require_relative '../../lazar/lib/lazar'
include OpenTox
-dataset = Dataset.send("from_pubchem_aid", ARGV[0])
-puts dataset.id.to_s
-
-
+model = Model::Lazar.from_csv_file ARGV[0]
+puts model.id.to_s
diff --git a/scripts/merge.rb b/scripts/merge.rb
deleted file mode 100755
index 5e9dac4..0000000
--- a/scripts/merge.rb
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/env ruby
-require_relative '../../lazar/lib/lazar.rb'
-include OpenTox
-
-hansen = Dataset.find File.read(ARGV[0]).chomp
-efsa = Dataset.find File.read(ARGV[1]).chomp
-kazius = Dataset.find File.read(ARGV[2]).chomp
-datasets = [hansen,efsa,kazius]
-map = {"mutagen" => "1", "nonmutagen" => "0"}
-dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,nil,map], keep_original_features: true, remove_duplicates: true
-puts dataset.id
diff --git a/scripts/model.rb b/scripts/model.rb
deleted file mode 100755
index b86d33d..0000000
--- a/scripts/model.rb
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env ruby
-require_relative '../../lazar/lib/lazar'
-include OpenTox
-
-dataset = Dataset.find File.read(ARGV[0]).chomp
-model_validation = Model::Validation.from_dataset training_dataset: dataset, prediction_feature: dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity", repeats: 3
-puts model_validation.id.to_s
diff --git a/scripts/pa_neighbor_sets.rb b/scripts/pa_neighbor_sets.rb
deleted file mode 100755
index 1975ffd..0000000
--- a/scripts/pa_neighbor_sets.rb
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env ruby
-require_relative '../../lazar/lib/lazar'
-include OpenTox
-
-results = JSON.parse File.read(ARGV[0])
-neighbors = []
-results.each do |s,r|
- neighbors << r["neighbors"] if r["value"]
-end
-
-o = {
- :nr_predictions => neighbors.size,
- :unique_neighbor_sets => neighbors.uniq.size,
- :unique_neighbor_sets_without_similarity => neighbors.collect{|s| s.collect{|n| n["id"]}.sort}.uniq.size
-}
-puts o.to_json
diff --git a/scripts/pa_summary.rb b/scripts/pa_summary.rb
deleted file mode 100755
index 7487e3e..0000000
--- a/scripts/pa_summary.rb
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env ruby
-require_relative '../../lazar/lib/lazar'
-include OpenTox
-
-results = JSON.parse File.read(ARGV[0])
-results.each do |s,r|
- if r["value"]
- puts [
- s,
- r["warnings"],
- r["value"],
- r["probabilities"]["0"],
- r["probabilities"]["1"],
- r["neighbors"].size,
- ].join(",")
- else
- r["neighbors"] ? n = r["neighbors"].size : n = nil
- puts [
- s,
- r["warnings"],
- r["value"],
- nil,
- nil,
- n
- ].join(",")
- end
-end
diff --git a/scripts/predict.rb b/scripts/predict.rb
deleted file mode 100755
index 57d5a8b..0000000
--- a/scripts/predict.rb
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/env ruby
-require_relative '../../lazar/lib/lazar'
-include OpenTox
-
-dataset = Dataset.find(File.read(ARGV[0]).chomp)
-model = Model::Validation.find(File.read(ARGV[1]).chomp)
-predictions = model.predict dataset.compounds # avoid dataset prediction to keep neighbors
-puts predictions.to_json