Skip to content

Commit

Permalink
Merge pull request #58 from AntonJMLarsson/master
Browse files Browse the repository at this point in the history
Speed up bam splitting for bcall
  • Loading branch information
caleblareau authored Jun 1, 2022
2 parents f0a27c1 b0c1650 commit 9606660
Showing 1 changed file with 12 additions and 11 deletions.
23 changes: 12 additions & 11 deletions mgatk/bin/python/split_barcoded_bam.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,35 16,35 @@
base=os.path.basename(bamfile)
basename=os.path.splitext(base)[0]

def getBarcode(intags):
def getBarcode(read):
'''
Parse out the barcode per-read
'''
for tg in intags:
if(barcodeTag == tg[0]):
return(tg[1])
return("NA")
if read.has_tag(barcodeTag):
return read.get_tag(barcodeTag)
else:
return "NA"


def writePassingReads(bc, mtchr):
def writePassingReads(bc_dict, mtchr):
'''
Write out reads to their corresponding files based on a barcode index
'''
bam = pysam.AlignmentFile(bamfile,'rb')
Itr = bam.fetch(str(mtchr),multiple_iterators=False)
for read in Itr:
read_barcode = getBarcode(read.tags)
read_barcode = getBarcode(read)

# If read barcode is in whitelist, then write it out
if read_barcode in bc:
idx = bc.index(read_barcode)
if read_barcode in bc_dict:
idx = bc_dict[read_barcode]
file = fopen[idx]
file.write(read)

# Read in the barcodes
with open(bcfile) as barcode_file_handle:
content = barcode_file_handle.readlines()
bc = [x.strip() for x in content]
bc = [x.strip() for x in content]

# Open up a bunch of files and write out reads for valid barcodes
@contextmanager
Expand All @@ -61,8 61,9 @@ def multi_file_manager(files, mode='rt'):

# Final loop to write out passing reads
bambcfiles = [outfolder "/" bc1 ".bam" for bc1 in bc]
bc_dict = {bc1: i for i,bc1 in enumerate(bc)}
with multi_file_manager(bambcfiles) as fopen:
writePassingReads(bc, mtchr)
writePassingReads(bc_dict, mtchr)



0 comments on commit 9606660

Please sign in to comment.