Skip to content
This repository was archived by the owner on Mar 10, 2018. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 20 additions & 29 deletions NLP/tool.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,27 @@
from .cut import Scissor
from .mark import Mark

import json
import lzma
class Tool:
def __init__(self):
'''
对分词类和词性标注类进行初始化
'''
self.M = Mark(json.loads(lzma.open('NLP/pos.json.xz').read().decode('utf-8')), \
json.loads(lzma.open('NLP/bposf.json.xz').read().decode('utf-8')), \
json.loads(lzma.open('NLP/iposf.json.xz').read().decode('utf-8')))
self.S = Scissor(json.loads(lzma.open('NLP/wf.json.xz').read().decode('UTF-8')), \
json.loads(lzma.open('NLP/iw.json.xz').read().decode('UTF-8')))

def Cut_mark(self, s):
'''
对一个字符串进行分词和词性标注
:param s: 待处理的字符串
:return: list类型的处理结果
'''
result = []
cut = self.S.Cut(s)
mark = self.M.Sentemark(cut)
for i in range(0,len(cut)):
result.append(cut[i]+'/'+mark[i])
return result

if __name__ =='__main__':
t = Tool()
print('Enter: ')
s = input()
t.Cut_mark(s)
import pkg_resources as pkgres

rc = lambda n: pkgres.resource_filename(__name__, n)

M = Mark(json.loads(lzma.open(rc('pos.json.xz')).read().decode('utf-8')), json.loads(lzma.open(rc('bposf.json.xz')).read().decode('utf-8')), json.loads(lzma.open(rc('iposf.json.xz')).read().decode('utf-8')))
S = Scissor(json.loads(lzma.open(rc('wf.json.xz')).read().decode('UTF-8')), json.loads(lzma.open(rc('iw.json.xz')).read().decode('UTF-8')))

def CutMark(s):
'''
对一个字符串进行分词和词性标注
:param s: 待处理的字符串
:return: list类型的处理结果
'''
result = []
cut = S.Cut(s)
mark = M.Sentemark(cut)
return (cut, mark)

if __name__ =='__main__':
print('Enter: ')
s = input()
CutMark(s)
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

* 其他源文件用来产生词典

* *.json.xz 为压缩后的词典
* \*.json.xz 为压缩后的词典

## 使用方法

Expand All @@ -41,8 +41,7 @@
```

```
>>> from NLP import tool
>>> t = tool.Tool()
>>> t.Cut_mark('自然语言通常是指一种自然地随文化演化的语言。')
['自然/n', '语言/n', '通常/d', '是/vshi', '指/v', '一种/mq', '自然/n', '地/ude2', '随/p', '文化/n', '演化/vn', '的/ude1', '语言/n', '。/w']
>>> from NLP.tool import CutMark
>>> CutMark('自然语言通常是指一种自然地随文化演化的语言。')
(['自然', '语言', '通常', '是', '指', '一种', '自然', '地', '随', '文化', '演化', '的', '语言', '。'], ['n', 'n', 'd', 'vshi', 'v', 'mq', 'n', 'ude2', 'p', 'n', 'vn', 'ude1', 'n', 'w'])
```