diff options
author | Christoph Helma <helma@in-silico.ch> | 2021-02-18 21:59:37 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2021-02-18 21:59:37 +0100 |
commit | 3af0c3d5c5b7f7d506a4582bbe3dca7d22bbefcc (patch) | |
tree | 66a0f989c01fdac9085e9d22961fae2de0b568f7 /scripts/data.rb | |
parent | 9901f99e546619121a5dc9f31e82865198e7b912 (diff) |
further cleanup, detailled pa-predictions separated, text modified until results
Diffstat (limited to 'scripts/data.rb')
-rwxr-xr-x | scripts/data.rb | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/scripts/data.rb b/scripts/data.rb new file mode 100755 index 0000000..e834677 --- /dev/null +++ b/scripts/data.rb @@ -0,0 +1,29 @@ +#!/usr/bin/env ruby +require 'yaml' + +data = {} +["crossvalidations","pyrrolizidine-alkaloids"].each do |dir| + data.merge!(YAML.load_file(File.join(dir,"summary.yaml"))) +end + +data[:cv][:n] = `cut -f1 -d ',' mutagenicity/mutagenicity.csv | wc -l`.chomp.to_i - 1 +data[:cv][:n_uniq] = `cut -f1 -d ',' mutagenicity/mutagenicity.csv | sort -u | wc -l`.chomp.to_i - 1 + +data[:cv][:cdk] = {} +cdk = File.readlines("mutagenicity/cdk/mutagenicity-mod-2.new.csv") +data[:cv][:cdk][:n_descriptors] = cdk.shift.split(",").size-2 +data[:cv][:cdk][:n_compounds] = cdk.size + + + + +data[:pa][:groups] = {} +lines = File.readlines("pyrrolizidine-alkaloids/pa-predictions.csv") +pa_groups = lines.shift.chomp.split(",")[1..9].collect{|g| g.sub(/[ -]/,"_").to_sym} +pa_groups.each {|g| data[:pa][:groups][g] = {}; data[:pa][:groups][g][:n] = 0} +lines.each do |l| + l.chomp.split(",")[1..9].each_with_index do |v,i| + data[:pa][:groups][pa_groups[i]][:n] += v.to_i + end +end +puts data.to_yaml |