#!/usr/bin/env ruby require 'yaml' data = {} ["crossvalidations","pyrrolizidine-alkaloids"].each do |dir| data.merge!(YAML.load_file(File.join(dir,"summary.yaml"))) end data[:cv][:n] = `cut -f1 -d ',' mutagenicity/mutagenicity.csv | wc -l`.chomp.to_i - 1 data[:cv][:n_uniq] = `cut -f1 -d ',' mutagenicity/mutagenicity.csv | sort -u | wc -l`.chomp.to_i - 1 data[:cv][:cdk] = {} cdk = File.readlines("mutagenicity/mutagenicity-cdk.csv") data[:cv][:cdk][:n_descriptors] = cdk.shift.split(",").size-1 data[:cv][:cdk][:n_compounds] = cdk.size data[:pa][:groups] = {} lines = File.readlines("pyrrolizidine-alkaloids/pa-groups.csv") pa_groups = lines.shift.chomp.split(",")[1..-1].collect{|g| g.sub(/[ -]/,"_").to_sym} pa_groups.each {|g| data[:pa][:groups][g] = {}; data[:pa][:groups][g][:n] = 0} groups = {} lines.each do |l| items = l.chomp.split(",") smi = items[0] items[1..-1].each_with_index do |v,i| data[:pa][:groups][pa_groups[i]][:n] += v.to_i groups[pa_groups[i]] ||= [] groups[pa_groups[i]] << smi if v == "1" end end lines = File.readlines("pyrrolizidine-alkaloids/pa-predictions.csv") algos = lines.shift.chomp.split(",")[1..-1].collect{|g| g.sub(/[ -]/,"_").to_sym} lines.each do |l| items = l.chomp.split(",") smi = items[0] items[1..-1].each do |v| groups.each do |group,smiles| data[:pa][:groups][group][:mut] ||= 0 data[:pa][:groups][group][:non_mut] ||= 0 if smiles.include? smi if v == "1" data[:pa][:groups][group][:mut] += 1 elsif v == "0" data[:pa][:groups][group][:non_mut] += 1 end end end end end data[:pa][:groups].each do |g,values| data[:pa][:groups][g][:n_pred] = values[:mut]+values[:non_mut] data[:pa][:groups][g][:mut_perc] = (100*values[:mut]/data[:pa][:groups][g][:n_pred]).round data[:pa][:groups][g][:non_mut_perc] = (100*values[:non_mut]/data[:pa][:groups][g][:n_pred]).round end puts data.to_yaml