@@ -195,22 +195,28 @@ py_row_data_list *py_split_sentence(const char *sentence, size_t flag)
195
195
* splitItemPtr = NULL ,
196
196
tmpStr [100 ] = {0 };
197
197
size_t splitLen = 0 ,
198
- i = 0 ,
199
- j = 0 ,
200
- k = 0 ,
201
- m = 0 ;
198
+ isPinyin = 0 ,
199
+ isChangeTone = 0 ,
200
+ i = 0 ,
201
+ j = 0 ,
202
+ k = 0 ,
203
+ m = 0 ;
202
204
zend_ulong numKey ;
203
205
#if PHP_MAJOR_VERSION < 7
204
- zval * * entry ;
206
+ zval * * entry ,
207
+ * * splitIsPinyinEntry ;
205
208
#else
206
- zval * entry ;
209
+ zval * entry ,
210
+ * splitIsPinyinEntry ;
207
211
#endif
208
- zval * pinyinPieces = (zval * )py_malloc (sizeof (zval ), 0 );
212
+ zval * pinyinPieces = (zval * )py_malloc (sizeof (zval ), 0 ),
213
+ * splitIsPinyin = (zval * )py_malloc (sizeof (zval ), 0 );
209
214
py_row_data_list * rowDataList = (py_row_data_list * )py_malloc (sizeof (py_row_data_list ), 0 ),
210
215
* rowDataListPtr = rowDataList ,
211
216
* rowDataListTmpPtr = NULL ;
212
217
213
218
array_init (pinyinPieces );
219
+ array_init (splitIsPinyin );
214
220
215
221
/* 替换姓名优先 */
216
222
if (flag & PINYIN_ISNAME ) {
@@ -219,6 +225,7 @@ py_row_data_list *py_split_sentence(const char *sentence, size_t flag)
219
225
{
220
226
while (NULL != (wordPtr = py_strstr (chinese , wordListPtr -> key ))) {
221
227
py_add_index_stringl (pinyinPieces , wordPtr - chinese , wordListPtr -> val , py_strlen (wordListPtr -> val ), 1 );
228
+ py_add_index_bool (splitIsPinyin , wordPtr - chinese , 1 );
222
229
memset (wordPtr , CHINESE_SUB_CHAR , py_strlen (wordListPtr -> key ));
223
230
}
224
231
wordListPtr = wordListPtr -> next ;
@@ -230,6 +237,7 @@ py_row_data_list *py_split_sentence(const char *sentence, size_t flag)
230
237
{
231
238
while (NULL != (wordPtr = py_strstr (chinese , wordListPtr -> key ))) {
232
239
py_add_index_stringl (pinyinPieces , wordPtr - chinese , wordListPtr -> val , py_strlen (wordListPtr -> val ), 1 );
240
+ py_add_index_bool (splitIsPinyin , wordPtr - chinese , 1 );
233
241
memset (wordPtr , CHINESE_SUB_CHAR , py_strlen (wordListPtr -> key ));
234
242
}
235
243
wordListPtr = wordListPtr -> next ;
@@ -240,10 +248,12 @@ py_row_data_list *py_split_sentence(const char *sentence, size_t flag)
240
248
for (; i < PY_CHAR_TRANS_MAP_NUM ; i ++ ) {
241
249
while (NULL != (wordPtr = py_strstr (chinese , charTransMap [i ][0 ]))) {
242
250
py_add_index_stringl (pinyinPieces , wordPtr - chinese , charTransMap [i ][0 ], py_strlen (charTransMap [i ][0 ]), 1 );
251
+ py_add_index_bool (splitIsPinyin , wordPtr - chinese , 0 );
243
252
memset (wordPtr , CHINESE_SUB_CHAR , py_strlen (charTransMap [i ][0 ]));
244
253
}
245
254
while (NULL != (wordPtr = py_strstr (chinese , charTransMap [i ][1 ]))) {
246
255
py_add_index_stringl (pinyinPieces , wordPtr - chinese , charTransMap [i ][1 ], py_strlen (charTransMap [i ][1 ]), 1 );
256
+ py_add_index_bool (splitIsPinyin , wordPtr - chinese , 0 );
247
257
memset (wordPtr , CHINESE_SUB_CHAR , py_strlen (charTransMap [i ][1 ]));
248
258
}
249
259
}
@@ -255,6 +265,7 @@ py_row_data_list *py_split_sentence(const char *sentence, size_t flag)
255
265
if (splitLen > 0 ) {
256
266
* wordPtr = 0 ;
257
267
py_add_index_stringl (pinyinPieces , wordPtr - chinese - splitLen ,wordPtr - splitLen , py_strlen (wordPtr - splitLen ), 1 );
268
+ py_add_index_bool (splitIsPinyin , wordPtr - chinese - splitLen , 0 );
258
269
}
259
270
splitLen = 0 ;
260
271
} else {
@@ -266,34 +277,52 @@ py_row_data_list *py_split_sentence(const char *sentence, size_t flag)
266
277
/* 特殊情况:最后一个为非汉字的时候 */
267
278
if (splitLen > 0 ) {
268
279
py_add_index_stringl (pinyinPieces , wordPtr - chinese - splitLen ,wordPtr - splitLen , py_strlen (wordPtr - splitLen ), 1 );
280
+ py_add_index_bool (splitIsPinyin , wordPtr - chinese - splitLen , 0 );
269
281
}
270
282
271
283
/* 格式化数组,将汉字切分为单个的一个,去掉制表符 */
272
284
for (i = 0 ; i <=strlen (sentence ); i ++ ) {
285
+ isPinyin = 0 ;
273
286
#if PHP_MAJOR_VERSION < 7
274
287
if (zend_hash_index_find (Z_ARRVAL_P (pinyinPieces ), i , (void * * )& entry ) == FAILURE || py_strlen (Z_STRVAL_PP (entry )) <= 0 )
275
288
continue ;
276
289
splitItem = strtok (Z_STRVAL_PP (entry ), "\t" );
290
+ if (zend_hash_index_find (Z_ARRVAL_P (splitIsPinyin ), i , (void * * )& splitIsPinyinEntry ) == SUCCESS ) {
291
+ if (Z_BVAL_PP (splitIsPinyinEntry )) {
292
+ isPinyin = 1 ;
293
+ }
294
+ }
277
295
#else
278
296
entry = zend_hash_index_find (Z_ARRVAL_P (pinyinPieces ), i );
279
297
if (NULL == entry )
280
298
continue ;
281
299
splitItem = strtok (Z_STRVAL_P (entry ), "\t" );
300
+ splitIsPinyinEntry = zend_hash_index_find (Z_ARRVAL_P (splitIsPinyin ), i );
301
+ if (NULL != splitIsPinyinEntry ) {
302
+ if (Z_TYPE_INFO_P (splitIsPinyinEntry ) == IS_TRUE ) {
303
+ isPinyin = 1 ;
304
+ }
305
+ }
282
306
#endif
283
307
/* 不需要拼音声调 */
284
308
CREATE_ROW_DATA_ITEM (rowDataListTmpPtr );
285
309
rowDataListTmpPtr -> ori = py_strdup (splitItem , 0 );
286
310
rowDataListPtr -> next = rowDataListTmpPtr ;
287
311
rowDataListPtr = rowDataListTmpPtr ;
288
312
if (flag & (PINYIN_NONE |PINYIN_ASCII |PINYIN_LCFIRST |PINYIN_UCFIRST )) {
313
+ isChangeTone = 0 ;
289
314
for (m = 0 ; m < PY_TONE_INFO_NUM ; m ++ ) {
290
315
if (NULL != (wordPtr = py_strstr (splitItem , toneInfos [m ].complete ))){
291
316
CHANGE_STR (tmpStr , splitItem , wordPtr , toneInfos [m ].complete , toneInfos [m ].simple , j , k );
292
317
rowDataListTmpPtr -> none = py_strdup (tmpStr , 0 );
293
318
rowDataListTmpPtr -> tone = toneInfos [m ].tone ;
319
+ isChangeTone = 1 ;
294
320
break ;
295
321
}
296
322
}
323
+ if (!isChangeTone && isPinyin ) {
324
+ rowDataListTmpPtr -> none = py_strdup (rowDataListTmpPtr -> ori , 0 );
325
+ }
297
326
}
298
327
if (flag & (PINYIN_LCFIRST |PINYIN_UCFIRST )){
299
328
if (NULL != rowDataListTmpPtr -> none ) {
@@ -314,14 +343,19 @@ py_row_data_list *py_split_sentence(const char *sentence, size_t flag)
314
343
rowDataListPtr -> next = rowDataListTmpPtr ;
315
344
rowDataListPtr = rowDataListTmpPtr ;
316
345
if (flag & (PINYIN_NONE |PINYIN_ASCII |PINYIN_LCFIRST |PINYIN_UCFIRST )) {
346
+ isChangeTone = 0 ;
317
347
for (m = 0 ; m < PY_TONE_INFO_NUM ; m ++ ) {
318
348
if (NULL != (wordPtr = py_strstr (splitItem , toneInfos [m ].complete ))){
319
349
CHANGE_STR (tmpStr , splitItem , wordPtr , toneInfos [m ].complete , toneInfos [m ].simple , j , k );
320
350
rowDataListTmpPtr -> none = py_strdup (tmpStr , 0 );
321
351
rowDataListTmpPtr -> tone = toneInfos [m ].tone ;
352
+ isChangeTone = 1 ;
322
353
break ;
323
354
}
324
355
}
356
+ if (!isChangeTone && isPinyin ) {
357
+ rowDataListTmpPtr -> none = py_strdup (rowDataListTmpPtr -> ori , 0 );
358
+ }
325
359
}
326
360
if (flag & (PINYIN_LCFIRST |PINYIN_UCFIRST )){
327
361
if (NULL != rowDataListTmpPtr -> none ) {
@@ -341,6 +375,9 @@ py_row_data_list *py_split_sentence(const char *sentence, size_t flag)
341
375
zend_hash_destroy (Z_ARRVAL_P (pinyinPieces ));
342
376
efree (Z_ARRVAL_P (pinyinPieces ));
343
377
efree (pinyinPieces );
378
+ zend_hash_destroy (Z_ARRVAL_P (splitIsPinyin ));
379
+ efree (Z_ARRVAL_P (splitIsPinyin ));
380
+ efree (splitIsPinyin );
344
381
345
382
return rowDataList ;
346
383
}
0 commit comments