#!/usr/bin/env ruby header = ["ID","CID","SMILES","Canonical SMILES","Measured","lazar-MP2D","lazar-MP2D-high-confidence","lazar-PaDEL","lazar-PaDEL-high-confidence"] tab = [] i = 0 File.read("pyrrolizidine-alkaloids/180920_PA_complete_SMILES.csv").each_line do |l| if i > 0 id,cid,name,smi = l.chomp.split(";") tab << [id,cid,'"'+smi+'"'] end i += 1 end i = 0 File.read("pyrrolizidine-alkaloids/lazar/pa-mp2d-predictions.csv").each_line do |l| if i > 0 id,cansmi,exp,mut,p0,p1,max_sim,nn = l.chomp.split(",") max_sim.to_f < 0.5? hc = "F" : hc = "T" hc = "" if mut.empty? tab[i-1] += ['"'+cansmi+'"',exp,mut,hc] end i += 1 end i=0 File.read("pyrrolizidine-alkaloids/lazar/pa-padel-predictions.csv").each_line do |l| #if i > 0 cansmi,exp,mut,p0,p1,max_sim,nn = l.chomp.split(",") max_sim.to_f < 0.5? hc = "F" : hc = "T" hc = "" if mut.empty? tab[i] += [mut,hc] #end i += 1 end Dir["pyrrolizidine-alkaloids/R/PA.*.outcome.csv"].each do |r| header << "R-"+r.sub('pyrrolizidine-alkaloids/R/PA.','').sub('.outcome.csv','') i = 0 File.read(r).each_line do |l| if i > 0 items = l.chomp.split(";") items.shift if items.uniq.include? "1" tab[i-1] << 1 elsif items.uniq.include? "0" tab[i-1] << 0 end end i += 1 end end Dir["pyrrolizidine-alkaloids/tensorflow/pred.*.v3-ext-Padel-2D.csv"].each do |r| header << "TF-"+r.sub('pyrrolizidine-alkaloids/tensorflow/pred.','').sub('.v3-ext-Padel-2D.csv','').sub("lr2","LR-scikit").sub("lr","LR-sgd").sub("rf","RF").sub("nn","NN") i = 0 File.read(r).each_line do |l| if i > 0 id,pred = l.chomp.split(",") pred.to_f > 0.5 ? tab[i-1] << 1 : tab[i-1] << 0 end i += 1 end end puts header.join(",") puts tab.collect{|r| r.join(",")}.join("\n")