@@ -434,6 +434,12 @@ def _preprocess_regular_expression(self):
434
434
'' .join ([bracket (LIMIT_YEAR_STRING ), '第' , bracket (WEEK_NUM_STRING ),
435
435
'(个)?' , WEEK_STRING ]))
436
436
437
+ # 1月1 此类不全的日期,缺少日
438
+ # 注意,此种情况只针对 日 是 阿拉伯数字的情况,若是汉字 日,如 “五月二十”,则按农历进行解析,
439
+ # 此时,则不存在日期的 “日” 的缺失。
440
+ self .num_month_num_pattern = re .compile (
441
+ '' .join (['^' , MONTH_NUM_STRING , '月' , '([12]\d|3[01]|[0]?[1-9])' , '$' ]))
442
+
437
443
# 公历固定节日
438
444
self .year_fixed_solar_festival_pattern = re .compile (
439
445
'' .join ([bracket_absence (YEAR_STRING ), FIXED_SOLAR_FESTIVAL ]))
@@ -967,6 +973,21 @@ def _adjust_underlying_future_time(self, time_string):
967
973
968
974
return time_string
969
975
976
+ def _compensate_num_month_num (self , time_string ):
977
+ """ 一种特定的日期类型,“1月1”,没指明 “日”。因此需要进行补全,然后再进行处理。
978
+
979
+ Args:
980
+ time_string:
981
+
982
+ Returns:
983
+
984
+ """
985
+ matched_res = self .num_month_num_pattern .search (time_string )
986
+ if matched_res is not None :
987
+ return time_string + '日'
988
+ else :
989
+ return time_string
990
+
970
991
def parse_time_span_point (self , time_string ):
971
992
# 按照 “从 …… 至 ……” 进行解析
972
993
first_time_string , second_time_string = self .parse_span_2_2_point (time_string )
@@ -976,6 +997,7 @@ def parse_time_span_point(self, time_string):
976
997
old_time_base_handler = self .time_base_handler
977
998
try :
978
999
if first_time_string is not None and second_time_string is None :
1000
+ first_time_string = self ._compensate_num_month_num (first_time_string )
979
1001
980
1002
first_full_time_handler , _ , _ , blur_time = self .parse_time_point (
981
1003
first_time_string , self .time_base_handler )
@@ -990,6 +1012,8 @@ def parse_time_span_point(self, time_string):
990
1012
second_full_time_handler = self .time_base_handler
991
1013
elif first_time_string is not None and second_time_string is not None :
992
1014
1015
+ first_time_string = self ._compensate_num_month_num (first_time_string )
1016
+ second_time_string = self ._compensate_num_month_num (second_time_string )
993
1017
first_time_string , second_time_string = self ._compensate_string (
994
1018
time_string , first_time_string , second_time_string )
995
1019
@@ -1013,6 +1037,8 @@ def parse_time_span_point(self, time_string):
1013
1037
second_full_time_handler [4 :] = [0 , 0 ]
1014
1038
1015
1039
elif first_time_string is None and second_time_string is not None :
1040
+ second_time_string = self ._compensate_num_month_num (second_time_string )
1041
+
1016
1042
_ , second_full_time_handler , _ , blur_time = self .parse_time_point (
1017
1043
second_time_string , self .time_base_handler )
1018
1044
@@ -1037,6 +1063,8 @@ def parse_time_span_point(self, time_string):
1037
1063
time_string , self .time_base_handler )
1038
1064
else :
1039
1065
# 非 time span,按 time_point 解析
1066
+ time_string = self ._compensate_num_month_num (time_string )
1067
+
1040
1068
first_full_time_handler , second_full_time_handler , time_type , \
1041
1069
blur_time = self .parse_time_point (
1042
1070
time_string , self .time_base_handler )
0 commit comments