Skip to content

Commit 6df8aab

Browse files
author
燕睿涛
committed
完善逻辑,方法
1 parent d6d1395 commit 6df8aab

File tree

3 files changed

+46
-7
lines changed

3 files changed

+46
-7
lines changed

php5_pinyin.h

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#define py_add_next_index_string(arr, val, dup) add_next_index_string(arr, val, dup)
99
#define py_add_index_stringl(arr, index, str, len, dup) add_index_stringl(arr, index, str, len, dup)
10+
#define py_add_index_bool(arg, index, b) add_index_bool(arg, index, b)
1011

1112
#define PY_RETURN_ARR(arr) \
1213
Z_ARRVAL_P(return_value) = arr; \

php7_pinyin.h

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#define py_add_next_index_string(arr, val, dup) add_next_index_string(arr, val)
99
#define py_add_index_stringl(arr, index, str, len, dup) add_index_stringl(arr, index, str, len)
10+
#define py_add_index_bool(arg, index, b) add_index_bool(arg, index, b)
1011

1112
#define PY_RETURN_ARR(arr) RETVAL_ARR(arr)
1213

pinyin.c

+44-7
Original file line numberDiff line numberDiff line change
@@ -195,22 +195,28 @@ py_row_data_list *py_split_sentence(const char *sentence, size_t flag)
195195
*splitItemPtr = NULL,
196196
tmpStr[100] = {0};
197197
size_t splitLen = 0,
198-
i = 0,
199-
j = 0,
200-
k = 0,
201-
m = 0;
198+
isPinyin = 0,
199+
isChangeTone = 0,
200+
i = 0,
201+
j = 0,
202+
k = 0,
203+
m = 0;
202204
zend_ulong numKey;
203205
#if PHP_MAJOR_VERSION < 7
204-
zval **entry;
206+
zval **entry,
207+
**splitIsPinyinEntry;
205208
#else
206-
zval *entry;
209+
zval *entry,
210+
*splitIsPinyinEntry;
207211
#endif
208-
zval *pinyinPieces = (zval *)py_malloc(sizeof(zval), 0);
212+
zval *pinyinPieces = (zval *)py_malloc(sizeof(zval), 0),
213+
*splitIsPinyin = (zval *)py_malloc(sizeof(zval), 0);
209214
py_row_data_list *rowDataList = (py_row_data_list *)py_malloc(sizeof(py_row_data_list), 0),
210215
*rowDataListPtr = rowDataList,
211216
*rowDataListTmpPtr = NULL;
212217

213218
array_init(pinyinPieces);
219+
array_init(splitIsPinyin);
214220

215221
/* 替换姓名优先 */
216222
if (flag & PINYIN_ISNAME) {
@@ -219,6 +225,7 @@ py_row_data_list *py_split_sentence(const char *sentence, size_t flag)
219225
{
220226
while (NULL != (wordPtr = py_strstr(chinese, wordListPtr->key))) {
221227
py_add_index_stringl(pinyinPieces, wordPtr-chinese, wordListPtr->val, py_strlen(wordListPtr->val), 1);
228+
py_add_index_bool(splitIsPinyin, wordPtr-chinese, 1);
222229
memset(wordPtr, CHINESE_SUB_CHAR, py_strlen(wordListPtr->key));
223230
}
224231
wordListPtr = wordListPtr->next;
@@ -230,6 +237,7 @@ py_row_data_list *py_split_sentence(const char *sentence, size_t flag)
230237
{
231238
while (NULL != (wordPtr = py_strstr(chinese, wordListPtr->key))) {
232239
py_add_index_stringl(pinyinPieces, wordPtr-chinese, wordListPtr->val, py_strlen(wordListPtr->val), 1);
240+
py_add_index_bool(splitIsPinyin, wordPtr-chinese, 1);
233241
memset(wordPtr, CHINESE_SUB_CHAR, py_strlen(wordListPtr->key));
234242
}
235243
wordListPtr = wordListPtr->next;
@@ -240,10 +248,12 @@ py_row_data_list *py_split_sentence(const char *sentence, size_t flag)
240248
for (; i<PY_CHAR_TRANS_MAP_NUM; i++) {
241249
while (NULL != (wordPtr = py_strstr(chinese, charTransMap[i][0]))) {
242250
py_add_index_stringl(pinyinPieces, wordPtr-chinese, charTransMap[i][0], py_strlen(charTransMap[i][0]), 1);
251+
py_add_index_bool(splitIsPinyin, wordPtr-chinese, 0);
243252
memset(wordPtr, CHINESE_SUB_CHAR, py_strlen(charTransMap[i][0]));
244253
}
245254
while (NULL != (wordPtr = py_strstr(chinese, charTransMap[i][1]))) {
246255
py_add_index_stringl(pinyinPieces, wordPtr-chinese, charTransMap[i][1], py_strlen(charTransMap[i][1]), 1);
256+
py_add_index_bool(splitIsPinyin, wordPtr-chinese, 0);
247257
memset(wordPtr, CHINESE_SUB_CHAR, py_strlen(charTransMap[i][1]));
248258
}
249259
}
@@ -255,6 +265,7 @@ py_row_data_list *py_split_sentence(const char *sentence, size_t flag)
255265
if (splitLen > 0) {
256266
*wordPtr = 0;
257267
py_add_index_stringl(pinyinPieces, wordPtr-chinese-splitLen,wordPtr - splitLen, py_strlen(wordPtr - splitLen), 1);
268+
py_add_index_bool(splitIsPinyin, wordPtr-chinese-splitLen, 0);
258269
}
259270
splitLen = 0;
260271
} else {
@@ -266,34 +277,52 @@ py_row_data_list *py_split_sentence(const char *sentence, size_t flag)
266277
/* 特殊情况:最后一个为非汉字的时候 */
267278
if (splitLen > 0) {
268279
py_add_index_stringl(pinyinPieces, wordPtr-chinese-splitLen,wordPtr - splitLen, py_strlen(wordPtr - splitLen), 1);
280+
py_add_index_bool(splitIsPinyin, wordPtr-chinese-splitLen, 0);
269281
}
270282

271283
/* 格式化数组,将汉字切分为单个的一个,去掉制表符 */
272284
for (i=0; i<=strlen(sentence); i++) {
285+
isPinyin = 0;
273286
#if PHP_MAJOR_VERSION < 7
274287
if (zend_hash_index_find(Z_ARRVAL_P(pinyinPieces), i, (void**)&entry) == FAILURE || py_strlen(Z_STRVAL_PP(entry)) <= 0)
275288
continue;
276289
splitItem = strtok(Z_STRVAL_PP(entry), "\t");
290+
if (zend_hash_index_find(Z_ARRVAL_P(splitIsPinyin), i, (void**)&splitIsPinyinEntry) == SUCCESS) {
291+
if (Z_BVAL_PP(splitIsPinyinEntry)) {
292+
isPinyin = 1;
293+
}
294+
}
277295
#else
278296
entry = zend_hash_index_find(Z_ARRVAL_P(pinyinPieces), i);
279297
if (NULL == entry)
280298
continue;
281299
splitItem = strtok(Z_STRVAL_P(entry), "\t");
300+
splitIsPinyinEntry = zend_hash_index_find(Z_ARRVAL_P(splitIsPinyin), i);
301+
if (NULL != splitIsPinyinEntry) {
302+
if(Z_TYPE_INFO_P(splitIsPinyinEntry) == IS_TRUE) {
303+
isPinyin = 1;
304+
}
305+
}
282306
#endif
283307
/* 不需要拼音声调 */
284308
CREATE_ROW_DATA_ITEM(rowDataListTmpPtr);
285309
rowDataListTmpPtr->ori = py_strdup(splitItem, 0);
286310
rowDataListPtr->next = rowDataListTmpPtr;
287311
rowDataListPtr = rowDataListTmpPtr;
288312
if (flag & (PINYIN_NONE|PINYIN_ASCII|PINYIN_LCFIRST|PINYIN_UCFIRST)) {
313+
isChangeTone = 0;
289314
for(m=0 ; m<PY_TONE_INFO_NUM; m++) {
290315
if (NULL != (wordPtr=py_strstr(splitItem, toneInfos[m].complete))){
291316
CHANGE_STR(tmpStr, splitItem, wordPtr, toneInfos[m].complete, toneInfos[m].simple, j, k);
292317
rowDataListTmpPtr->none = py_strdup(tmpStr, 0);
293318
rowDataListTmpPtr->tone = toneInfos[m].tone;
319+
isChangeTone = 1;
294320
break;
295321
}
296322
}
323+
if (!isChangeTone && isPinyin) {
324+
rowDataListTmpPtr->none = py_strdup(rowDataListTmpPtr->ori, 0);
325+
}
297326
}
298327
if (flag & (PINYIN_LCFIRST|PINYIN_UCFIRST)){
299328
if (NULL != rowDataListTmpPtr->none) {
@@ -314,14 +343,19 @@ py_row_data_list *py_split_sentence(const char *sentence, size_t flag)
314343
rowDataListPtr->next = rowDataListTmpPtr;
315344
rowDataListPtr = rowDataListTmpPtr;
316345
if (flag & (PINYIN_NONE|PINYIN_ASCII|PINYIN_LCFIRST|PINYIN_UCFIRST)) {
346+
isChangeTone = 0;
317347
for(m=0 ; m<PY_TONE_INFO_NUM; m++) {
318348
if (NULL != (wordPtr=py_strstr(splitItem, toneInfos[m].complete))){
319349
CHANGE_STR(tmpStr, splitItem, wordPtr, toneInfos[m].complete, toneInfos[m].simple, j, k);
320350
rowDataListTmpPtr->none = py_strdup(tmpStr, 0);
321351
rowDataListTmpPtr->tone = toneInfos[m].tone;
352+
isChangeTone = 1;
322353
break;
323354
}
324355
}
356+
if (!isChangeTone && isPinyin) {
357+
rowDataListTmpPtr->none = py_strdup(rowDataListTmpPtr->ori, 0);
358+
}
325359
}
326360
if (flag & (PINYIN_LCFIRST|PINYIN_UCFIRST)){
327361
if (NULL != rowDataListTmpPtr->none) {
@@ -341,6 +375,9 @@ py_row_data_list *py_split_sentence(const char *sentence, size_t flag)
341375
zend_hash_destroy(Z_ARRVAL_P(pinyinPieces));
342376
efree(Z_ARRVAL_P(pinyinPieces));
343377
efree(pinyinPieces);
378+
zend_hash_destroy(Z_ARRVAL_P(splitIsPinyin));
379+
efree(Z_ARRVAL_P(splitIsPinyin));
380+
efree(splitIsPinyin);
344381

345382
return rowDataList;
346383
}

0 commit comments

Comments
 (0)