-
Notifications
You must be signed in to change notification settings - Fork 0
/
27_fasta_to_dict.py
executable file
·46 lines (35 loc) · 1.35 KB
/
27_fasta_to_dict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#! /usr/bin/python
# 27_fasta_to_dict.py
# read fasta formatted file using biopython module and store them in dict
import argparse
import os.path
import sys
from Bio import SeqIO
parser = argparse.ArgumentParser(description = 'read fasta formatted file using '
'biopython module and store them '
'in dict')
parser.add_argument('-i', '--input', help='input fasta data',
required = True)
parser.add_argument('-v', '--verbose', help = 'increase verbosity',
action = 'store_true')
args = parser.parse_args()
# check if the input fild existed
inputFile = args.input
filesL = [inputFile]
if args.verbose:
# check file path one at a time and tell which one is missing
for inputFile in filesL:
if not os.path.isfile(inputFile):
print('Input file {} does not exist!'.format(inputFile))
sys.exit()
else:
# check file path in a bulk, but may not be the optimum case
if not all(map(os.path.isfile, filesL)):
print('Missing input file(s)!')
sys.exit()
# ABOVE are all HEADER INFO
faSeqDict = SeqIO.to_dict(SeqIO.parse(inputFile, 'fasta'))
for seqRec in faSeqDict:
print('Sequence ID: {}'.format(seqRec))
print('Sequence length: {}'.format(len(faSeqDict[seqRec])))
# print('\n')