-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpinyin.py
52 lines (42 loc) · 1.49 KB
/
pinyin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# -*- coding: utf-8 -*-
# Copyright 2010 Yefe<[email protected]>
from os import path
import codecs
class Hanzi2Pinyin(object):
def __init__(self):
self.table = {}
try:
fp = codecs.open(path.join(path.dirname(__file__), 'pinyin.txt'), 'r', 'utf-8')
except IOError:
raise Exception("Can't load data from pinyin.txt")
except UnicodeDecodeError:
raise Exception("Can't decode data from pinyin.txt")
else:
for l in fp.readlines():
self.table[l[0]] = l[1:-1]
fp.close()
def convert(self, value):
pinyin = []
tASCII = ''
# 字符检查
for c in value.lower() + ' ': # 加个空格多一次循环 修正尾部字符丢失问题
i = ord(c)
if (i >= 48 and i <= 57) or (i >= 97 and i <= 122): # 48-57[0-9] 97-122[a-z]
tASCII += c
continue
tASCII and pinyin.append(tASCII)
tASCII = ''
if self.table.has_key(c):
pinyin.append(self.table[c])
return pinyin
if __name__ == '__main__':
import time
t = u'Prep 你好 中 国!I Love China! 2010年8月 !@ # $%^ &* ()_+ Append'
s = time.time()
p = Hanzi2Pinyin() # you class
print 'init:', time.time() - s
print '-'.join(p.convert(t)) # you convert
s = time.time()
for i in xrange(0,10000):
p.convert(t) # you convert
print 'convert:', time.time() - s