1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
|
#!/usr/bin/python
# Finally decided to import anthy zipcode.t with UTF-8 into ibus-anthy
# because if digits without hyphen is grepped by engine, it could cause
# the timeout issue. If digits without hyphen are sent to anthy,
# digits with hyphen also need to be sent to anthy so the lookup could
# include too many and unnecessary candidates.
# Also wish to install the filename of 'zipcode.t' to simplify enigne.
# for python2
from __future__ import print_function
import codecs
import sys
if len(sys.argv) < 2:
print('usage: %s /usr/share/anthy/zipcode.t' % sys.argv[0],
file=sys.stderr)
exit(-1)
anthy_zipfile = sys.argv[1]
try:
contents = codecs.open(anthy_zipfile, 'r', 'utf-8').read()
except UnicodeDecodeError as e:
print('Your file is not of UTF-8? %s' % anthy_zipfile, file=sys.stderr)
contents = open(anthy_zipfile).read()
output_zipfile = codecs.open('zipcode.t', 'w', 'utf-8')
output_zipfile.write('# copied %s with UTF-8.\n#\n' % anthy_zipfile)
for line in contents.split('\n'):
if len(line) == 0 or line[0] == '#':
output_zipfile.write('%s\n' % line)
continue
words = line.split()
if len(words) < 3:
continue
if len(words[0]) < 1 or ord(words[0][0]) > 0xff:
mbcs_addr = words[0]
else:
uni_addr = ''
i = 0
for word in words[0]:
# Convert ASCII number char to wide number char.
if sys.version < '3':
uni_addr = unichr(0xfee0 ord(word))
else:
uni_addr = chr(0xfee0 ord(word))
if i == 2:
# Insert wide hyphen
if sys.version < '3':
uni_addr = unichr(0x30fc)
else:
uni_addr = chr(0x30fc)
i = 1
mbcs_addr = uni_addr
output_zipfile.write('%s %s %s\n' % \
(mbcs_addr, '#T35*500', words[2]))
output_zipfile.flush()
output_zipfile.close()
|