summaryrefslogtreecommitdiff
path: root/scripts/data.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2021-02-18 21:59:37 +0100
committerChristoph Helma <helma@in-silico.ch>2021-02-18 21:59:37 +0100
commit3af0c3d5c5b7f7d506a4582bbe3dca7d22bbefcc (patch)
tree66a0f989c01fdac9085e9d22961fae2de0b568f7 /scripts/data.rb
parent9901f99e546619121a5dc9f31e82865198e7b912 (diff)
further cleanup, detailled pa-predictions separated, text modified until results
Diffstat (limited to 'scripts/data.rb')
-rwxr-xr-xscripts/data.rb29
1 files changed, 29 insertions, 0 deletions
diff --git a/scripts/data.rb b/scripts/data.rb
new file mode 100755
index 0000000..e834677
--- /dev/null
+++ b/scripts/data.rb
@@ -0,0 +1,29 @@
+#!/usr/bin/env ruby
+require 'yaml'
+
+data = {}
+["crossvalidations","pyrrolizidine-alkaloids"].each do |dir|
+ data.merge!(YAML.load_file(File.join(dir,"summary.yaml")))
+end
+
+data[:cv][:n] = `cut -f1 -d ',' mutagenicity/mutagenicity.csv | wc -l`.chomp.to_i - 1
+data[:cv][:n_uniq] = `cut -f1 -d ',' mutagenicity/mutagenicity.csv | sort -u | wc -l`.chomp.to_i - 1
+
+data[:cv][:cdk] = {}
+cdk = File.readlines("mutagenicity/cdk/mutagenicity-mod-2.new.csv")
+data[:cv][:cdk][:n_descriptors] = cdk.shift.split(",").size-2
+data[:cv][:cdk][:n_compounds] = cdk.size
+
+
+
+
+data[:pa][:groups] = {}
+lines = File.readlines("pyrrolizidine-alkaloids/pa-predictions.csv")
+pa_groups = lines.shift.chomp.split(",")[1..9].collect{|g| g.sub(/[ -]/,"_").to_sym}
+pa_groups.each {|g| data[:pa][:groups][g] = {}; data[:pa][:groups][g][:n] = 0}
+lines.each do |l|
+ l.chomp.split(",")[1..9].each_with_index do |v,i|
+ data[:pa][:groups][pa_groups[i]][:n] += v.to_i
+ end
+end
+puts data.to_yaml