summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2018-11-05 17:37:29 +0100
committerChristoph Helma <helma@in-silico.ch>2018-11-05 17:37:29 +0100
commit2361c6d9259e7ec8eea97f9bff3b24d105b3662a (patch)
treec8c6a869cf0af40b03ae4bd33ae48cbe4dfd5532 /scripts
parentba5372ce84bd89a937478d32eb067995214f1331 (diff)
adjustments for latest lazar version
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/crossvalidation-summary.rb9
-rwxr-xr-xscripts/drop-database.rb5
-rwxr-xr-xscripts/efsa2csv.rb29
-rwxr-xr-xscripts/export.rb7
-rwxr-xr-xscripts/hansen2csv.rb10
-rwxr-xr-xscripts/import-pubchem.rb8
-rwxr-xr-xscripts/import.rb7
-rwxr-xr-xscripts/json2csv.rb28
-rwxr-xr-xscripts/merge.rb11
-rwxr-xr-xscripts/model.rb7
-rwxr-xr-xscripts/predict.rb24
-rwxr-xr-xscripts/repeated_crossvalidation_summary.rb12
12 files changed, 134 insertions, 23 deletions
diff --git a/scripts/crossvalidation-summary.rb b/scripts/crossvalidation-summary.rb
new file mode 100755
index 0000000..13b0dfa
--- /dev/null
+++ b/scripts/crossvalidation-summary.rb
@@ -0,0 +1,9 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar'
+include OpenTox
+
+summary = []
+model = Model::Validation.find(File.read(ARGV[0]).chomp).crossvalidations.each do |cv|
+ summary << cv.statistics
+end
+puts JSON.pretty_generate(summary)
diff --git a/scripts/drop-database.rb b/scripts/drop-database.rb
new file mode 100755
index 0000000..e263312
--- /dev/null
+++ b/scripts/drop-database.rb
@@ -0,0 +1,5 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar'
+$mongo.database.drop
+$gridfs = $mongo.database.fs # recreate GridFS indexes
+
diff --git a/scripts/efsa2csv.rb b/scripts/efsa2csv.rb
new file mode 100755
index 0000000..5251cae
--- /dev/null
+++ b/scripts/efsa2csv.rb
@@ -0,0 +1,29 @@
+#!/usr/bin/env ruby
+require 'csv'
+require_relative '../../lazar/lib/lazar.rb'
+
+i = 0
+db = {}
+CSV.foreach(ARGV[0], :encoding => "UTF-8", :col_sep => "\t", :liberal_parsing => true) do |row|
+ #STDERR.puts i if i%100 == 0
+ if i > 0 and row[11] and !row[11].empty? and row[24].match(/Salmonella/i) and ( row[25].match("TA 98") or row[25].match("TA 100") ) and row[33]
+ begin
+ c = OpenTox::Compound.from_smiles(row[11].gsub('"','')).smiles
+ rescue
+ c = OpenTox::Compound.from_inchi(row[12]).smiles # some smiles (row[11]) contain non-parseable characters
+ end
+ db[c] ||= {}
+ db[c][:id] ||= row[2]
+ if row[33].match(/Positiv/i)
+ db[c][:value] = 1 # at least one positive result in TA 98 or TA 100
+ elsif row[33].match(/Negativ/i)
+ db[c][:value] ||= 0
+ end
+ end
+ i += 1
+end
+
+puts "ID,SMILES,Mutagenicity"
+db.each do |s,v|
+ puts [v[:id],s,v[:value]].join ","
+end
diff --git a/scripts/export.rb b/scripts/export.rb
new file mode 100755
index 0000000..afcb51d
--- /dev/null
+++ b/scripts/export.rb
@@ -0,0 +1,7 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar'
+include OpenTox
+
+ext = File.extname(ARGV[0]).sub('.','')
+dataset = Dataset.find File.read(ARGV[0]).chomp
+dataset.send("to_#{ext}")
diff --git a/scripts/hansen2csv.rb b/scripts/hansen2csv.rb
new file mode 100755
index 0000000..3ae22ee
--- /dev/null
+++ b/scripts/hansen2csv.rb
@@ -0,0 +1,10 @@
+#!/usr/bin/env ruby
+require 'csv'
+require_relative '../../lazar/lib/lazar.rb'
+
+puts "ID,SMILES,Mutagenicity"
+i = 0
+CSV.foreach(ARGV[0]) do |row|
+ puts [row[0],row[5],row[2]].join "," if i > 0
+ i += 1
+end
diff --git a/scripts/import-pubchem.rb b/scripts/import-pubchem.rb
new file mode 100755
index 0000000..c18bc81
--- /dev/null
+++ b/scripts/import-pubchem.rb
@@ -0,0 +1,8 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar'
+include OpenTox
+
+dataset = Dataset.send("from_pubchem_aid", ARGV[0])
+puts dataset.id.to_s
+
+
diff --git a/scripts/import.rb b/scripts/import.rb
new file mode 100755
index 0000000..f166265
--- /dev/null
+++ b/scripts/import.rb
@@ -0,0 +1,7 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar'
+include OpenTox
+
+ext = File.extname(ARGV[0]).sub('.','')
+dataset = Dataset.send("from_#{ext}_file", ARGV[0])
+puts dataset.id.to_s
diff --git a/scripts/json2csv.rb b/scripts/json2csv.rb
new file mode 100755
index 0000000..7c61c5a
--- /dev/null
+++ b/scripts/json2csv.rb
@@ -0,0 +1,28 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar'
+include OpenTox
+
+results = JSON.parse File.read(ARGV[0])
+puts "SMILES,#{File.basename(ARGV[0]).sub("pa_","").capitalize},Probability(0),Probability(1),Nr Neighbors,Warnings"
+results.each do |s,r|
+ if r["value"]
+ puts [
+ s,
+ r["value"],
+ r["probabilities"]["0"],
+ r["probabilities"]["1"],
+ r["neighbors"].size,
+ r["warnings"],
+ ].join(",")
+ else
+ r["neighbors"] ? n = r["neighbors"].size : n = nil
+ puts [
+ s,
+ r["value"],
+ nil,
+ nil,
+ n,
+ r["warnings"],
+ ].join(",")
+ end
+end
diff --git a/scripts/merge.rb b/scripts/merge.rb
new file mode 100755
index 0000000..5e9dac4
--- /dev/null
+++ b/scripts/merge.rb
@@ -0,0 +1,11 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar.rb'
+include OpenTox
+
+hansen = Dataset.find File.read(ARGV[0]).chomp
+efsa = Dataset.find File.read(ARGV[1]).chomp
+kazius = Dataset.find File.read(ARGV[2]).chomp
+datasets = [hansen,efsa,kazius]
+map = {"mutagen" => "1", "nonmutagen" => "0"}
+dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,nil,map], keep_original_features: true, remove_duplicates: true
+puts dataset.id
diff --git a/scripts/model.rb b/scripts/model.rb
new file mode 100755
index 0000000..b86d33d
--- /dev/null
+++ b/scripts/model.rb
@@ -0,0 +1,7 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar'
+include OpenTox
+
+dataset = Dataset.find File.read(ARGV[0]).chomp
+model_validation = Model::Validation.from_dataset training_dataset: dataset, prediction_feature: dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity", repeats: 3
+puts model_validation.id.to_s
diff --git a/scripts/predict.rb b/scripts/predict.rb
index 9161f45..57d5a8b 100755
--- a/scripts/predict.rb
+++ b/scripts/predict.rb
@@ -2,23 +2,7 @@
require_relative '../../lazar/lib/lazar'
include OpenTox
-train = Dataset.from_csv_file ARGV[0]
-
-model = Model::LazarClassification.create(training_dataset: train)
-
-n = 0
-results = {}
-File.open(ARGV[1]).each_line do |l|
- unless n == 0
- id,smi = l.chomp.split ","
- c = Compound.from_smiles smi
- result = model.predict c
- results[id] = result
- end
- n += 1
-end
-
-puts results.to_json
- #puts result.inspect
- #rcv = Validation::RepeatedCrossValidation.create(model)
-#puts "#{rcv.id}"
+dataset = Dataset.find(File.read(ARGV[0]).chomp)
+model = Model::Validation.find(File.read(ARGV[1]).chomp)
+predictions = model.predict dataset.compounds # avoid dataset prediction to keep neighbors
+puts predictions.to_json
diff --git a/scripts/repeated_crossvalidation_summary.rb b/scripts/repeated_crossvalidation_summary.rb
index add66a2..067fea8 100755
--- a/scripts/repeated_crossvalidation_summary.rb
+++ b/scripts/repeated_crossvalidation_summary.rb
@@ -3,7 +3,13 @@ require_relative '../../lazar/lib/lazar'
include OpenTox
summary = []
-Validation::RepeatedCrossValidation.find(File.read(ARGV[0]).chomp).crossvalidations.each do |cv|
- summary << cv.statistics
+#Validation::RepeatedCrossValidation.find(File.read(ARGV[0]).chomp).crossvalidations.each do |cv|
+# summary << cv.statistics
+#end
+cv = Validation::RepeatedCrossValidation.all.last.crossvalidations.each do |cv|
+#cv = Validation::RepeatedCrossValidation.find("5bbb9151ca626916248b328b").crossvalidations.first
+p cv.id
+p cv.statistics
end
-puts JSON.pretty_generate(summary)
+
+#puts JSON.pretty_generate(summary)