#!/usr/bin/env ruby group_data = File.readlines(ARGV[0]).collect{|l| l.chomp.split(",")} predictions = File.readlines(ARGV[1]).collect{|l| l.chomp.split(",")} group_names = group_data.shift group_names.shift algo_names = predictions.shift algo_names.shift groups = {} group_data.each do |d| smi = d.shift groups[smi] ||= [] d.each_with_index do |v,i| groups[smi] << group_names[i] if v == "1" end end mut = {} n = {} predictions.each do |pred| smi = pred.shift pred.each_with_index do |p,i| algo = algo_names[i] group_names.each do |g| mut[g] ||= {} n[g] ||= {} mut[g][algo] ||= 0 n[g][algo] ||= 0 if groups[smi].include? g n[g][algo]+=1 mut[g][algo]+=1 if p == "1" end end end end q = "'" puts ["'PA Group'","'Model'","'Frequency'"].join(",") mut.each do |g,val| val.each do |a,n_mut| puts [q+g+q,q+a.sub("high-confidence","HC").upcase.sub(/-LR$/,"-LR-sgd").sub("LR2","LR-scikit")+q,100.0*n_mut/n[g][a]].join(",") end end