-
Notifications
You must be signed in to change notification settings - Fork 0
/
23_find_NP_regex.py
executable file
·86 lines (65 loc) · 2.19 KB
/
23_find_NP_regex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#! /usr/bin/python
# 23_find_NP_regex.py
# find the fasta header contains NP in the text. Write the sequence and
# accession ID to the new fasta file using regex
# Jie Wang
# September 7, 2016
import argparse
import os.path
import sys
import re
parser = argparse.ArgumentParser(description = 'find the fasta header contains NP in the text. '
'Write the sequence and accession ID to the new fasta file')
parser.add_argument('-i', '--input', help='input fasta data',
required = True)
parser.add_argument('-o', '--output', help = 'output file in text format',
required = True)
parser.add_argument('-v', '--verbose', help = 'increase verbosity',
action = 'store_true')
args = parser.parse_args()
# check if the input fild existed
inputFile = args.input
outputFile = args.output
filesL = [inputFile]
if args.verbose:
# check file path one at a time and tell which one is missing
for inputFile in filesL:
if not os.path.isfile(inputFile):
print('Input file {} does not exist!'.format(inputFile))
sys.exit()
else:
# check file path in a bulk, but may not be the optimum case
if not all(map(os.path.isfile, filesL)):
print('Missing input file(s)!')
sys.exit()
# check if output file is present
if os.path.isfile(outputFile):
print('output file {0} already exists'.format(outputFile))
sys.exit()
# ABOVE are all HEADER INFO
FILE_IN=open(inputFile, 'r')
FILE_OUT=open(outputFile, 'w')
seqDict={}
firstLine=True
while True:
line=FILE_IN.readline()
if not line:
break
if line.startswith('>'):
flagNP=0
matchStr = re.split('\|', line)
# determine if Accession number existed
if matchStr[3].startswith('NP'):
if firstLine:
FILE_OUT.write('>{0}\n'.format(matchStr[3]))
firstLine=False
else:
FILE_OUT.write('\n>{0}\n'.format(matchStr[3]))
flagNP=1
# the continue permits read the next line
continue
if flagNP==1:
FILE_OUT.write(line.rstrip())
FILE_OUT.write('\n')
FILE_IN.close()
FILE_OUT.close()