@@ -175,9 +175,10 @@ void py_fill_data_list(const char *dir, unsigned int num)
175
175
/**
176
176
*
177
177
* @param chinese
178
+ * @param flag
178
179
* @return
179
180
*/
180
- zval * py_split_sentence (const char * sentence )
181
+ py_row_data_list * py_split_sentence (const char * sentence , size_t flag )
181
182
{
182
183
if (PY_GLOBAL (can_access ) == false)
183
184
{
@@ -188,21 +189,43 @@ zval *py_split_sentence(const char *sentence)
188
189
char * chinese = estrdup (sentence );
189
190
190
191
//正常的拼音化
191
- py_data_list * wordListPtr = PY_GLOBAL ( wordList ) -> next ;
192
+ py_data_list * wordListPtr ;
192
193
char * wordPtr = NULL ,
193
- * splitItem = NULL ;
194
+ * splitItem = NULL ,
195
+ * splitItemPtr = NULL ,
196
+ tmpStr [100 ] = {0 };
194
197
size_t splitLen = 0 ,
195
- i = 0 ;
198
+ i = 0 ,
199
+ j = 0 ,
200
+ k = 0 ,
201
+ m = 0 ;
196
202
zend_ulong numKey ;
197
203
#if PHP_MAJOR_VERSION < 7
198
204
zval * * entry ;
199
205
#else
200
206
zval * entry ;
201
207
#endif
202
208
zval * pinyinPieces = (zval * )py_malloc (sizeof (zval ), 0 );
203
- zval * pinyinSplit = (zval * )py_malloc (sizeof (zval ), 0 );
209
+ py_row_data_list * rowDataList = (py_row_data_list * )py_malloc (sizeof (py_row_data_list ), 0 ),
210
+ * rowDataListPtr = rowDataList ,
211
+ * rowDataListTmpPtr = NULL ;
204
212
205
213
array_init (pinyinPieces );
214
+
215
+ /* 替换姓名优先 */
216
+ if (flag & PINYIN_ISNAME ) {
217
+ wordListPtr = PY_GLOBAL (surnameList )-> next ;
218
+ while (wordListPtr != NULL )
219
+ {
220
+ while (NULL != (wordPtr = py_strstr (chinese , wordListPtr -> key ))) {
221
+ py_add_index_stringl (pinyinPieces , wordPtr - chinese , wordListPtr -> val , py_strlen (wordListPtr -> val ), 1 );
222
+ memset (wordPtr , CHINESE_SUB_CHAR , py_strlen (wordListPtr -> key ));
223
+ }
224
+ wordListPtr = wordListPtr -> next ;
225
+ }
226
+ }
227
+
228
+ wordListPtr = PY_GLOBAL (wordList )-> next ;
206
229
while (wordListPtr != NULL )
207
230
{
208
231
while (NULL != (wordPtr = py_strstr (chinese , wordListPtr -> key ))) {
@@ -246,7 +269,6 @@ zval *py_split_sentence(const char *sentence)
246
269
}
247
270
248
271
/* 格式化数组,将汉字切分为单个的一个,去掉制表符 */
249
- array_init (pinyinSplit );
250
272
for (i = 0 ; i <=strlen (sentence ); i ++ ) {
251
273
#if PHP_MAJOR_VERSION < 7
252
274
if (zend_hash_index_find (Z_ARRVAL_P (pinyinPieces ), i , (void * * )& entry ) == FAILURE || py_strlen (Z_STRVAL_PP (entry )) <= 0 )
@@ -258,10 +280,56 @@ zval *py_split_sentence(const char *sentence)
258
280
continue ;
259
281
splitItem = strtok (Z_STRVAL_P (entry ), "\t" );
260
282
#endif
261
- py_add_next_index_string (pinyinSplit , splitItem , 1 );
283
+ /* 不需要拼音声调 */
284
+ CREATE_ROW_DATA_ITEM (rowDataListTmpPtr );
285
+ rowDataListTmpPtr -> ori = py_strdup (splitItem , 0 );
286
+ rowDataListPtr -> next = rowDataListTmpPtr ;
287
+ rowDataListPtr = rowDataListTmpPtr ;
288
+ if (flag & (PINYIN_NONE |PINYIN_ASCII |PINYIN_LCFIRST |PINYIN_UCFIRST )) {
289
+ for (m = 0 ; m < PY_TONE_INFO_NUM ; m ++ ) {
290
+ if (NULL != (wordPtr = py_strstr (splitItem , toneInfos [m ].complete ))){
291
+ CHANGE_STR (tmpStr , splitItem , wordPtr , toneInfos [m ].complete , toneInfos [m ].simple , j , k );
292
+ rowDataListTmpPtr -> none = py_strdup (tmpStr , 0 );
293
+ rowDataListTmpPtr -> tone = toneInfos [m ].tone ;
294
+ break ;
295
+ }
296
+ }
297
+ }
298
+ if (flag & (PINYIN_LCFIRST |PINYIN_UCFIRST )){
299
+ if (NULL != rowDataListTmpPtr -> none ) {
300
+ rowDataListTmpPtr -> lcfirst = * rowDataListTmpPtr -> none ;
301
+ if (!(rowDataListTmpPtr -> lcfirst >= 65 && rowDataListTmpPtr -> lcfirst <= 90 )
302
+ && !(rowDataListTmpPtr -> lcfirst >= 97 && rowDataListTmpPtr -> lcfirst <= 122 )){
303
+ rowDataListTmpPtr -> lcfirst = 0 ;
304
+ }
305
+ }
306
+ }
307
+
262
308
while ((splitItem = strtok (NULL , "\t" )))
263
309
{
264
- py_add_next_index_string (pinyinSplit , splitItem , 1 );
310
+ CREATE_ROW_DATA_ITEM (rowDataListTmpPtr );
311
+ rowDataListTmpPtr -> ori = py_strdup (splitItem , 0 );
312
+ rowDataListPtr -> next = rowDataListTmpPtr ;
313
+ rowDataListPtr = rowDataListTmpPtr ;
314
+ if (flag & (PINYIN_NONE |PINYIN_ASCII |PINYIN_LCFIRST |PINYIN_UCFIRST )) {
315
+ for (m = 0 ; m < PY_TONE_INFO_NUM ; m ++ ) {
316
+ if (NULL != (wordPtr = py_strstr (splitItem , toneInfos [m ].complete ))){
317
+ CHANGE_STR (tmpStr , splitItem , wordPtr , toneInfos [m ].complete , toneInfos [m ].simple , j , k );
318
+ rowDataListTmpPtr -> none = py_strdup (tmpStr , 0 );
319
+ rowDataListTmpPtr -> tone = toneInfos [m ].tone ;
320
+ break ;
321
+ }
322
+ }
323
+ }
324
+ if (flag & (PINYIN_LCFIRST |PINYIN_UCFIRST )){
325
+ if (NULL != rowDataListTmpPtr -> none ) {
326
+ rowDataListTmpPtr -> lcfirst = * rowDataListTmpPtr -> none ;
327
+ if (!(rowDataListTmpPtr -> lcfirst >= 65 && rowDataListTmpPtr -> lcfirst <= 90 )
328
+ && !(rowDataListTmpPtr -> lcfirst >= 97 && rowDataListTmpPtr -> lcfirst <= 122 )){
329
+ rowDataListTmpPtr -> lcfirst = 0 ;
330
+ }
331
+ }
332
+ }
265
333
}
266
334
}
267
335
@@ -270,7 +338,23 @@ zval *py_split_sentence(const char *sentence)
270
338
efree (Z_ARRVAL_P (pinyinPieces ));
271
339
efree (pinyinPieces );
272
340
273
- return pinyinSplit ;
341
+ return rowDataList ;
342
+ }
343
+
344
+ void py_destory_row_list (py_row_data_list * list )
345
+ {
346
+ py_row_data_list * ptr = list -> next ,
347
+ * tmp = NULL ;
348
+ while (ptr != NULL ) {
349
+ if (NULL != ptr -> ori )
350
+ efree (ptr -> ori );
351
+ if (NULL != ptr -> none )
352
+ efree (ptr -> none );
353
+ tmp = ptr -> next ;
354
+ efree (ptr );
355
+ ptr = tmp ;
356
+ }
357
+ efree (list );
274
358
}
275
359
276
360
PHP_INI_BEGIN ()
@@ -281,14 +365,24 @@ PHP_FUNCTION(pinyin)
281
365
{
282
366
char * chinese = NULL ;
283
367
size_t len ;
368
+ size_t l = PINYIN_UNICODE ;
284
369
285
- if (zend_parse_parameters (ZEND_NUM_ARGS () TSRMLS_CC , "s" , & chinese , & len ) == FAILURE ) {
370
+ if (zend_parse_parameters (ZEND_NUM_ARGS () TSRMLS_CC , "s|l " , & chinese , & len , & l ) == FAILURE ) {
286
371
return ;
287
372
}
288
373
289
- zval * pinyinSplit = py_split_sentence (chinese );
290
- PY_RETURN_ARR (Z_ARRVAL_P (pinyinSplit ));
291
- efree (pinyinSplit );
374
+ py_row_data_list * list = py_split_sentence (chinese , l ),
375
+ * rowDataListPtr ;
376
+
377
+ array_init (return_value );
378
+ rowDataListPtr = list -> next ;
379
+ while (rowDataListPtr != NULL ) {
380
+ if (l & PINYIN_UNICODE ) {
381
+ py_add_next_index_string (return_value , rowDataListPtr -> ori , 1 );
382
+ }
383
+ rowDataListPtr = rowDataListPtr -> next ;
384
+ }
385
+ py_destory_row_list (list );
292
386
}
293
387
294
388
PHP_MINIT_FUNCTION (pinyin )
@@ -318,9 +412,9 @@ PHP_MINIT_FUNCTION(pinyin)
318
412
REGISTER_LONG_CONSTANT ("PINYIN_NONE" , PINYIN_NONE , CONST_PERSISTENT | CONST_CS );
319
413
REGISTER_LONG_CONSTANT ("PINYIN_UNICODE" , PINYIN_UNICODE , CONST_PERSISTENT | CONST_CS );
320
414
REGISTER_LONG_CONSTANT ("PINYIN_ISNAME" , PINYIN_ISNAME , CONST_PERSISTENT | CONST_CS );
321
- REGISTER_LONG_CONSTANT ("PINYIN_TRIM " , PINYIN_TRIM , CONST_PERSISTENT | CONST_CS );
322
- REGISTER_LONG_CONSTANT ("PINYIN_FORMAT_EN " , PINYIN_FORMAT_EN , CONST_PERSISTENT | CONST_CS );
323
- REGISTER_LONG_CONSTANT ("PINYIN_FORMAT_CH " , PINYIN_FORMAT_CH , CONST_PERSISTENT | CONST_CS );
415
+ REGISTER_LONG_CONSTANT ("PINYIN_ASCII " , PINYIN_ASCII , CONST_PERSISTENT | CONST_CS );
416
+ REGISTER_LONG_CONSTANT ("PINYIN_UCFIRST " , PINYIN_UCFIRST , CONST_PERSISTENT | CONST_CS );
417
+ REGISTER_LONG_CONSTANT ("PINYIN_LCFIRST " , PINYIN_LCFIRST , CONST_PERSISTENT | CONST_CS );
324
418
325
419
return SUCCESS ;
326
420
}
0 commit comments