staran 1.0.8__py3-none-any.whl → 1.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- staran/__init__.py +0 -62
- staran/date/__init__.py +72 -87
- staran/date/core/__init__.py +24 -0
- staran/date/{core.py → core/core.py} +1094 -9
- staran/date/examples/v1010_features_demo.py +376 -0
- staran/date/examples/v109_features_demo.py +302 -0
- staran/date/extensions/__init__.py +48 -0
- staran/date/extensions/expressions.py +554 -0
- staran/date/extensions/solar_terms.py +417 -0
- staran/date/extensions/timezone.py +263 -0
- staran/date/integrations/__init__.py +38 -0
- staran/date/integrations/api_server.py +754 -0
- staran/date/integrations/visualization.py +689 -0
- staran/date/tests/run_tests.py +77 -6
- staran/date/tests/test_v1010_features.py +495 -0
- staran/date/tests/test_v109_features.py +316 -0
- staran-1.0.10.dist-info/METADATA +240 -0
- staran-1.0.10.dist-info/RECORD +34 -0
- staran-1.0.10.dist-info/entry_points.txt +2 -0
- staran-1.0.8.dist-info/METADATA +0 -371
- staran-1.0.8.dist-info/RECORD +0 -21
- /staran/date/{i18n.py → core/i18n.py} +0 -0
- /staran/date/{lunar.py → core/lunar.py} +0 -0
- {staran-1.0.8.dist-info → staran-1.0.10.dist-info}/WHEEL +0 -0
- {staran-1.0.8.dist-info → staran-1.0.10.dist-info}/licenses/LICENSE +0 -0
- {staran-1.0.8.dist-info → staran-1.0.10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
"""
|
5
|
+
Staran 扩展功能模块
|
6
|
+
==================
|
7
|
+
|
8
|
+
包含v1.0.10新增的扩展功能:
|
9
|
+
- 时区支持
|
10
|
+
- 日期表达式解析
|
11
|
+
- 二十四节气计算
|
12
|
+
"""
|
13
|
+
|
14
|
+
try:
|
15
|
+
from .timezone import Timezone, TimezoneInfo
|
16
|
+
TIMEZONE_AVAILABLE = True
|
17
|
+
except ImportError:
|
18
|
+
TIMEZONE_AVAILABLE = False
|
19
|
+
Timezone = None
|
20
|
+
TimezoneInfo = None
|
21
|
+
|
22
|
+
try:
|
23
|
+
from .expressions import DateExpressionParser, ParseResult
|
24
|
+
EXPRESSIONS_AVAILABLE = True
|
25
|
+
except ImportError:
|
26
|
+
EXPRESSIONS_AVAILABLE = False
|
27
|
+
DateExpressionParser = None
|
28
|
+
ParseResult = None
|
29
|
+
|
30
|
+
try:
|
31
|
+
from .solar_terms import SolarTerms, SolarTerm
|
32
|
+
SOLAR_TERMS_AVAILABLE = True
|
33
|
+
except ImportError:
|
34
|
+
SOLAR_TERMS_AVAILABLE = False
|
35
|
+
SolarTerms = None
|
36
|
+
SolarTerm = None
|
37
|
+
|
38
|
+
__all__ = [
|
39
|
+
'Timezone',
|
40
|
+
'TimezoneInfo',
|
41
|
+
'DateExpressionParser',
|
42
|
+
'ParseResult',
|
43
|
+
'SolarTerms',
|
44
|
+
'SolarTerm',
|
45
|
+
'TIMEZONE_AVAILABLE',
|
46
|
+
'EXPRESSIONS_AVAILABLE',
|
47
|
+
'SOLAR_TERMS_AVAILABLE'
|
48
|
+
]
|
@@ -0,0 +1,554 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
"""
|
5
|
+
Staran 日期表达式解析模块 v1.0.10
|
6
|
+
==============================
|
7
|
+
|
8
|
+
提供自然语言日期表达式的解析功能,支持中文和英文表达。
|
9
|
+
|
10
|
+
主要功能:
|
11
|
+
- 自然语言日期解析
|
12
|
+
- 相对日期表达式
|
13
|
+
- 日期计算表达式
|
14
|
+
- 智能日期推断
|
15
|
+
"""
|
16
|
+
|
17
|
+
import re
|
18
|
+
import datetime
|
19
|
+
from typing import Optional, Union, Dict, List, Tuple
|
20
|
+
from dataclasses import dataclass
|
21
|
+
|
22
|
+
@dataclass
|
23
|
+
class ParseResult:
|
24
|
+
"""解析结果类"""
|
25
|
+
success: bool
|
26
|
+
date: Optional[datetime.date]
|
27
|
+
expression: str
|
28
|
+
confidence: float # 置信度 0-1
|
29
|
+
matched_pattern: str
|
30
|
+
extracted_components: Dict[str, any]
|
31
|
+
|
32
|
+
class DateExpressionParser:
|
33
|
+
"""日期表达式解析器"""
|
34
|
+
|
35
|
+
def __init__(self):
|
36
|
+
self._init_patterns()
|
37
|
+
|
38
|
+
def _init_patterns(self):
|
39
|
+
"""初始化解析模式"""
|
40
|
+
|
41
|
+
# 中文数字映射
|
42
|
+
self.chinese_numbers = {
|
43
|
+
'零': 0, '一': 1, '二': 2, '三': 3, '四': 4, '五': 5,
|
44
|
+
'六': 6, '七': 7, '八': 8, '九': 9, '十': 10,
|
45
|
+
'十一': 11, '十二': 12, '十三': 13, '十四': 14, '十五': 15,
|
46
|
+
'十六': 16, '十七': 17, '十八': 18, '十九': 19, '二十': 20,
|
47
|
+
'二十一': 21, '二十二': 22, '二十三': 23, '二十四': 24,
|
48
|
+
'二十五': 25, '二十六': 26, '二十七': 27, '二十八': 28,
|
49
|
+
'二十九': 29, '三十': 30, '三十一': 31
|
50
|
+
}
|
51
|
+
|
52
|
+
# 月份映射
|
53
|
+
self.month_names = {
|
54
|
+
# 中文
|
55
|
+
'一月': 1, '二月': 2, '三月': 3, '四月': 4, '五月': 5, '六月': 6,
|
56
|
+
'七月': 7, '八月': 8, '九月': 9, '十月': 10, '十一月': 11, '十二月': 12,
|
57
|
+
'正月': 1, '腊月': 12,
|
58
|
+
# 英文
|
59
|
+
'january': 1, 'february': 2, 'march': 3, 'april': 4, 'may': 5, 'june': 6,
|
60
|
+
'july': 7, 'august': 8, 'september': 9, 'october': 10, 'november': 11, 'december': 12,
|
61
|
+
'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'jun': 6, 'jul': 7,
|
62
|
+
'aug': 8, 'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12
|
63
|
+
}
|
64
|
+
|
65
|
+
# 星期映射
|
66
|
+
self.weekday_names = {
|
67
|
+
# 中文
|
68
|
+
'周一': 0, '周二': 1, '周三': 2, '周四': 3, '周五': 4, '周六': 5, '周日': 6,
|
69
|
+
'星期一': 0, '星期二': 1, '星期三': 2, '星期四': 3, '星期五': 4, '星期六': 5, '星期日': 6,
|
70
|
+
'星期天': 6, '礼拜一': 0, '礼拜二': 1, '礼拜三': 2, '礼拜四': 3, '礼拜五': 4, '礼拜六': 5, '礼拜天': 6,
|
71
|
+
# 英文
|
72
|
+
'monday': 0, 'tuesday': 1, 'wednesday': 2, 'thursday': 3, 'friday': 4, 'saturday': 5, 'sunday': 6,
|
73
|
+
'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5, 'sun': 6
|
74
|
+
}
|
75
|
+
|
76
|
+
# 相对时间表达式模式
|
77
|
+
self.patterns = {
|
78
|
+
# 绝对日期表达式
|
79
|
+
'absolute_dates': [
|
80
|
+
# YYYY年MM月DD日
|
81
|
+
r'(\d{4})年(\d{1,2})月(\d{1,2})日?',
|
82
|
+
# YYYY-MM-DD, YYYY/MM/DD
|
83
|
+
r'(\d{4})[-/](\d{1,2})[-/](\d{1,2})',
|
84
|
+
# MM月DD日 (当年)
|
85
|
+
r'(\d{1,2})月(\d{1,2})日?',
|
86
|
+
# 中文数字日期
|
87
|
+
r'([一二三四五六七八九十]+)年([一二三四五六七八九十]+)月([一二三四五六七八九十]+)日?',
|
88
|
+
],
|
89
|
+
|
90
|
+
# 相对日期表达式
|
91
|
+
'relative_dates': [
|
92
|
+
# 今天、明天、后天、昨天、前天
|
93
|
+
r'(今天|明天|后天|昨天|前天|大后天)',
|
94
|
+
r'(today|tomorrow|yesterday)',
|
95
|
+
|
96
|
+
# N天前/后
|
97
|
+
r'(\d+)天(前|后|之前|之后)',
|
98
|
+
r'(\d+)\s*(days?)\s*(ago|later|before|after)',
|
99
|
+
|
100
|
+
# N周前/后
|
101
|
+
r'(\d+)(周|星期|礼拜)(前|后|之前|之后)',
|
102
|
+
r'(\d+)\s*(weeks?)\s*(ago|later|before|after)',
|
103
|
+
|
104
|
+
# N个月前/后
|
105
|
+
r'(\d+)个?月(前|后|之前|之后)',
|
106
|
+
r'(\d+)\s*(months?)\s*(ago|later|before|after)',
|
107
|
+
|
108
|
+
# N年前/后
|
109
|
+
r'(\d+)年(前|后|之前|之后)',
|
110
|
+
r'(\d+)\s*(years?)\s*(ago|later|before|after)',
|
111
|
+
|
112
|
+
# 上/下 + 时间单位
|
113
|
+
r'(上|下)(周|星期|礼拜|月|年)',
|
114
|
+
r'(last|next)\s*(week|month|year)',
|
115
|
+
|
116
|
+
# 这/本 + 时间单位
|
117
|
+
r'(这|本)(周|星期|礼拜|月|年)',
|
118
|
+
r'(this)\s*(week|month|year)',
|
119
|
+
],
|
120
|
+
|
121
|
+
# 星期表达式
|
122
|
+
'weekday_expressions': [
|
123
|
+
# 这周/下周/上周 + 星期X
|
124
|
+
r'(这|本|下|上)(周|星期|礼拜)([一二三四五六日天])',
|
125
|
+
r'(this|next|last)\s*(week)?\s*(monday|tuesday|wednesday|thursday|friday|saturday|sunday)',
|
126
|
+
|
127
|
+
# 直接星期X
|
128
|
+
r'(周|星期|礼拜)([一二三四五六日天])',
|
129
|
+
r'(monday|tuesday|wednesday|thursday|friday|saturday|sunday)',
|
130
|
+
],
|
131
|
+
|
132
|
+
# 节假日和特殊日期
|
133
|
+
'special_dates': [
|
134
|
+
r'(春节|除夕|元宵节|清明节|劳动节|端午节|中秋节|国庆节|圣诞节|元旦)',
|
135
|
+
r'(new\s*year|christmas|valentine|halloween|thanksgiving)',
|
136
|
+
r'(母亲节|父亲节|儿童节|教师节|妇女节)',
|
137
|
+
r'(生日|结婚纪念日|工作日|周末)',
|
138
|
+
],
|
139
|
+
|
140
|
+
# 季度和月份表达式
|
141
|
+
'quarter_month': [
|
142
|
+
r'(第?[一二三四1234])季度',
|
143
|
+
r'([一二三四五六七八九十\d]+)月',
|
144
|
+
r'(Q[1234])',
|
145
|
+
r'(spring|summer|autumn|fall|winter)',
|
146
|
+
r'(春天|夏天|秋天|冬天)',
|
147
|
+
]
|
148
|
+
}
|
149
|
+
|
150
|
+
def parse(self, expression: str) -> ParseResult:
|
151
|
+
"""解析日期表达式"""
|
152
|
+
expression = expression.strip()
|
153
|
+
|
154
|
+
# 尝试不同的解析策略
|
155
|
+
for pattern_type, patterns in self.patterns.items():
|
156
|
+
for pattern in patterns:
|
157
|
+
result = self._try_parse_pattern(expression, pattern, pattern_type)
|
158
|
+
if result.success:
|
159
|
+
return result
|
160
|
+
|
161
|
+
# 如果所有模式都失败,返回失败结果
|
162
|
+
return ParseResult(
|
163
|
+
success=False,
|
164
|
+
date=None,
|
165
|
+
expression=expression,
|
166
|
+
confidence=0.0,
|
167
|
+
matched_pattern='',
|
168
|
+
extracted_components={}
|
169
|
+
)
|
170
|
+
|
171
|
+
def _try_parse_pattern(self, expression: str, pattern: str, pattern_type: str) -> ParseResult:
|
172
|
+
"""尝试匹配特定模式"""
|
173
|
+
match = re.search(pattern, expression, re.IGNORECASE)
|
174
|
+
if not match:
|
175
|
+
return ParseResult(False, None, expression, 0.0, pattern, {})
|
176
|
+
|
177
|
+
try:
|
178
|
+
if pattern_type == 'absolute_dates':
|
179
|
+
return self._parse_absolute_date(match, pattern, expression)
|
180
|
+
elif pattern_type == 'relative_dates':
|
181
|
+
return self._parse_relative_date(match, pattern, expression)
|
182
|
+
elif pattern_type == 'weekday_expressions':
|
183
|
+
return self._parse_weekday_expression(match, pattern, expression)
|
184
|
+
elif pattern_type == 'special_dates':
|
185
|
+
return self._parse_special_date(match, pattern, expression)
|
186
|
+
elif pattern_type == 'quarter_month':
|
187
|
+
return self._parse_quarter_month(match, pattern, expression)
|
188
|
+
|
189
|
+
except Exception as e:
|
190
|
+
pass
|
191
|
+
|
192
|
+
return ParseResult(False, None, expression, 0.0, pattern, {})
|
193
|
+
|
194
|
+
def _parse_absolute_date(self, match, pattern: str, expression: str) -> ParseResult:
|
195
|
+
"""解析绝对日期"""
|
196
|
+
groups = match.groups()
|
197
|
+
|
198
|
+
if len(groups) == 3:
|
199
|
+
if '年' in pattern:
|
200
|
+
# 年月日格式
|
201
|
+
year_str, month_str, day_str = groups
|
202
|
+
if any(c in year_str for c in '一二三四五六七八九十'):
|
203
|
+
# 中文数字
|
204
|
+
year = self._chinese_to_number(year_str)
|
205
|
+
month = self._chinese_to_number(month_str)
|
206
|
+
day = self._chinese_to_number(day_str)
|
207
|
+
else:
|
208
|
+
year, month, day = int(year_str), int(month_str), int(day_str)
|
209
|
+
else:
|
210
|
+
# YYYY-MM-DD 或 YYYY/MM/DD
|
211
|
+
year, month, day = int(groups[0]), int(groups[1]), int(groups[2])
|
212
|
+
|
213
|
+
try:
|
214
|
+
date = datetime.date(year, month, day)
|
215
|
+
return ParseResult(
|
216
|
+
success=True,
|
217
|
+
date=date,
|
218
|
+
expression=expression,
|
219
|
+
confidence=0.95,
|
220
|
+
matched_pattern=pattern,
|
221
|
+
extracted_components={'year': year, 'month': month, 'day': day}
|
222
|
+
)
|
223
|
+
except ValueError:
|
224
|
+
pass
|
225
|
+
|
226
|
+
elif len(groups) == 2:
|
227
|
+
# MM月DD日 (当年)
|
228
|
+
month_str, day_str = groups
|
229
|
+
current_year = datetime.date.today().year
|
230
|
+
|
231
|
+
if any(c in month_str for c in '一二三四五六七八九十'):
|
232
|
+
month = self._chinese_to_number(month_str)
|
233
|
+
day = self._chinese_to_number(day_str)
|
234
|
+
else:
|
235
|
+
month, day = int(month_str), int(day_str)
|
236
|
+
|
237
|
+
try:
|
238
|
+
date = datetime.date(current_year, month, day)
|
239
|
+
return ParseResult(
|
240
|
+
success=True,
|
241
|
+
date=date,
|
242
|
+
expression=expression,
|
243
|
+
confidence=0.85,
|
244
|
+
matched_pattern=pattern,
|
245
|
+
extracted_components={'year': current_year, 'month': month, 'day': day}
|
246
|
+
)
|
247
|
+
except ValueError:
|
248
|
+
pass
|
249
|
+
|
250
|
+
return ParseResult(False, None, expression, 0.0, pattern, {})
|
251
|
+
|
252
|
+
def _parse_relative_date(self, match, pattern: str, expression: str) -> ParseResult:
|
253
|
+
"""解析相对日期"""
|
254
|
+
groups = match.groups()
|
255
|
+
today = datetime.date.today()
|
256
|
+
|
257
|
+
# 简单相对日期
|
258
|
+
if len(groups) == 1:
|
259
|
+
term = groups[0].lower()
|
260
|
+
|
261
|
+
relative_map = {
|
262
|
+
'今天': 0, 'today': 0,
|
263
|
+
'明天': 1, 'tomorrow': 1,
|
264
|
+
'后天': 2,
|
265
|
+
'昨天': -1, 'yesterday': -1,
|
266
|
+
'前天': -2,
|
267
|
+
'大后天': 3,
|
268
|
+
}
|
269
|
+
|
270
|
+
if term in relative_map:
|
271
|
+
days_offset = relative_map[term]
|
272
|
+
target_date = today + datetime.timedelta(days=days_offset)
|
273
|
+
return ParseResult(
|
274
|
+
success=True,
|
275
|
+
date=target_date,
|
276
|
+
expression=expression,
|
277
|
+
confidence=0.9,
|
278
|
+
matched_pattern=pattern,
|
279
|
+
extracted_components={'offset_days': days_offset}
|
280
|
+
)
|
281
|
+
|
282
|
+
# 数量 + 时间单位 + 方向
|
283
|
+
elif len(groups) >= 2:
|
284
|
+
try:
|
285
|
+
num_str = groups[0]
|
286
|
+
direction = groups[-1] if len(groups) > 1 else ''
|
287
|
+
|
288
|
+
# 确定数量
|
289
|
+
num = int(num_str) if num_str.isdigit() else self._chinese_to_number(num_str)
|
290
|
+
|
291
|
+
# 确定方向(正负)
|
292
|
+
is_past = any(word in direction.lower() for word in ['前', '之前', 'ago', 'before'])
|
293
|
+
if is_past:
|
294
|
+
num = -num
|
295
|
+
|
296
|
+
# 确定时间单位
|
297
|
+
unit = ''
|
298
|
+
for group in groups[1:-1]:
|
299
|
+
unit += group
|
300
|
+
|
301
|
+
# 计算目标日期
|
302
|
+
if '天' in unit or 'day' in unit:
|
303
|
+
target_date = today + datetime.timedelta(days=num)
|
304
|
+
elif any(word in unit for word in ['周', '星期', '礼拜', 'week']):
|
305
|
+
target_date = today + datetime.timedelta(weeks=num)
|
306
|
+
elif '月' in unit or 'month' in unit:
|
307
|
+
target_date = self._add_months(today, num)
|
308
|
+
elif '年' in unit or 'year' in unit:
|
309
|
+
target_date = self._add_years(today, num)
|
310
|
+
else:
|
311
|
+
return ParseResult(False, None, expression, 0.0, pattern, {})
|
312
|
+
|
313
|
+
return ParseResult(
|
314
|
+
success=True,
|
315
|
+
date=target_date,
|
316
|
+
expression=expression,
|
317
|
+
confidence=0.85,
|
318
|
+
matched_pattern=pattern,
|
319
|
+
extracted_components={'number': abs(num), 'unit': unit, 'direction': direction}
|
320
|
+
)
|
321
|
+
except:
|
322
|
+
pass
|
323
|
+
|
324
|
+
return ParseResult(False, None, expression, 0.0, pattern, {})
|
325
|
+
|
326
|
+
def _parse_weekday_expression(self, match, pattern: str, expression: str) -> ParseResult:
|
327
|
+
"""解析星期表达式"""
|
328
|
+
groups = match.groups()
|
329
|
+
today = datetime.date.today()
|
330
|
+
|
331
|
+
# 提取星期几
|
332
|
+
weekday_name = ''
|
333
|
+
week_modifier = ''
|
334
|
+
|
335
|
+
for group in groups:
|
336
|
+
if group:
|
337
|
+
group_lower = group.lower()
|
338
|
+
if group_lower in self.weekday_names:
|
339
|
+
weekday_name = group_lower
|
340
|
+
elif any(word in group_lower for word in ['上', '下', '这', '本', 'last', 'next', 'this']):
|
341
|
+
week_modifier = group_lower
|
342
|
+
elif group in '一二三四五六日天':
|
343
|
+
weekday_name = f'星期{group}'
|
344
|
+
|
345
|
+
if not weekday_name:
|
346
|
+
# 尝试从单个字符推断星期
|
347
|
+
for group in groups:
|
348
|
+
if group in '一二三四五六日天':
|
349
|
+
weekday_name = f'星期{group}'
|
350
|
+
break
|
351
|
+
|
352
|
+
if weekday_name in self.weekday_names:
|
353
|
+
target_weekday = self.weekday_names[weekday_name]
|
354
|
+
current_weekday = today.weekday()
|
355
|
+
|
356
|
+
# 计算到目标星期几的天数
|
357
|
+
days_ahead = target_weekday - current_weekday
|
358
|
+
|
359
|
+
# 根据修饰符调整
|
360
|
+
if any(word in week_modifier for word in ['下', 'next']):
|
361
|
+
if days_ahead <= 0:
|
362
|
+
days_ahead += 7
|
363
|
+
elif any(word in week_modifier for word in ['上', 'last']):
|
364
|
+
if days_ahead >= 0:
|
365
|
+
days_ahead -= 7
|
366
|
+
elif any(word in week_modifier for word in ['这', '本', 'this']):
|
367
|
+
# 本周的星期X
|
368
|
+
pass
|
369
|
+
else:
|
370
|
+
# 默认是下一个该星期几
|
371
|
+
if days_ahead <= 0:
|
372
|
+
days_ahead += 7
|
373
|
+
|
374
|
+
target_date = today + datetime.timedelta(days=days_ahead)
|
375
|
+
|
376
|
+
return ParseResult(
|
377
|
+
success=True,
|
378
|
+
date=target_date,
|
379
|
+
expression=expression,
|
380
|
+
confidence=0.8,
|
381
|
+
matched_pattern=pattern,
|
382
|
+
extracted_components={'weekday': target_weekday, 'modifier': week_modifier}
|
383
|
+
)
|
384
|
+
|
385
|
+
return ParseResult(False, None, expression, 0.0, pattern, {})
|
386
|
+
|
387
|
+
def _parse_special_date(self, match, pattern: str, expression: str) -> ParseResult:
|
388
|
+
"""解析特殊日期(节假日等)"""
|
389
|
+
holiday_name = match.group(1).lower()
|
390
|
+
current_year = datetime.date.today().year
|
391
|
+
|
392
|
+
# 中国节假日映射
|
393
|
+
chinese_holidays = {
|
394
|
+
'元旦': (1, 1),
|
395
|
+
'春节': (2, 10), # 简化,实际需要农历计算
|
396
|
+
'清明节': (4, 5), # 简化
|
397
|
+
'劳动节': (5, 1),
|
398
|
+
'端午节': (6, 14), # 简化,实际需要农历计算
|
399
|
+
'中秋节': (9, 17), # 简化,实际需要农历计算
|
400
|
+
'国庆节': (10, 1),
|
401
|
+
'圣诞节': (12, 25),
|
402
|
+
}
|
403
|
+
|
404
|
+
# 英文节假日映射
|
405
|
+
english_holidays = {
|
406
|
+
'new year': (1, 1),
|
407
|
+
'christmas': (12, 25),
|
408
|
+
'valentine': (2, 14),
|
409
|
+
'halloween': (10, 31),
|
410
|
+
}
|
411
|
+
|
412
|
+
target_date = None
|
413
|
+
if holiday_name in chinese_holidays:
|
414
|
+
month, day = chinese_holidays[holiday_name]
|
415
|
+
target_date = datetime.date(current_year, month, day)
|
416
|
+
elif holiday_name in english_holidays:
|
417
|
+
month, day = english_holidays[holiday_name]
|
418
|
+
target_date = datetime.date(current_year, month, day)
|
419
|
+
|
420
|
+
if target_date:
|
421
|
+
return ParseResult(
|
422
|
+
success=True,
|
423
|
+
date=target_date,
|
424
|
+
expression=expression,
|
425
|
+
confidence=0.7,
|
426
|
+
matched_pattern=pattern,
|
427
|
+
extracted_components={'holiday': holiday_name}
|
428
|
+
)
|
429
|
+
|
430
|
+
return ParseResult(False, None, expression, 0.0, pattern, {})
|
431
|
+
|
432
|
+
def _parse_quarter_month(self, match, pattern: str, expression: str) -> ParseResult:
|
433
|
+
"""解析季度和月份表达式"""
|
434
|
+
groups = match.groups()
|
435
|
+
today = datetime.date.today()
|
436
|
+
current_year = today.year
|
437
|
+
|
438
|
+
quarter_str = groups[0].lower()
|
439
|
+
|
440
|
+
# 季度映射
|
441
|
+
quarter_map = {
|
442
|
+
'第一季度': 1, '第二季度': 2, '第三季度': 3, '第四季度': 4,
|
443
|
+
'一季度': 1, '二季度': 2, '三季度': 3, '四季度': 4,
|
444
|
+
'q1': 1, 'q2': 2, 'q3': 3, 'q4': 4,
|
445
|
+
}
|
446
|
+
|
447
|
+
# 季节映射
|
448
|
+
season_map = {
|
449
|
+
'spring': 1, '春天': 1,
|
450
|
+
'summer': 2, '夏天': 2,
|
451
|
+
'autumn': 3, 'fall': 3, '秋天': 3,
|
452
|
+
'winter': 4, '冬天': 4,
|
453
|
+
}
|
454
|
+
|
455
|
+
# 月份处理
|
456
|
+
if '月' in quarter_str and quarter_str not in season_map:
|
457
|
+
month_str = quarter_str.replace('月', '')
|
458
|
+
try:
|
459
|
+
month = int(month_str) if month_str.isdigit() else self._chinese_to_number(month_str)
|
460
|
+
if 1 <= month <= 12:
|
461
|
+
target_date = datetime.date(current_year, month, 1)
|
462
|
+
return ParseResult(
|
463
|
+
success=True,
|
464
|
+
date=target_date,
|
465
|
+
expression=expression,
|
466
|
+
confidence=0.8,
|
467
|
+
matched_pattern=pattern,
|
468
|
+
extracted_components={'month': month}
|
469
|
+
)
|
470
|
+
except:
|
471
|
+
pass
|
472
|
+
|
473
|
+
# 季度或季节处理
|
474
|
+
quarter = quarter_map.get(quarter_str) or season_map.get(quarter_str)
|
475
|
+
if quarter:
|
476
|
+
# 每个季度的第一个月
|
477
|
+
quarter_start_month = (quarter - 1) * 3 + 1
|
478
|
+
target_date = datetime.date(current_year, quarter_start_month, 1)
|
479
|
+
|
480
|
+
return ParseResult(
|
481
|
+
success=True,
|
482
|
+
date=target_date,
|
483
|
+
expression=expression,
|
484
|
+
confidence=0.75,
|
485
|
+
matched_pattern=pattern,
|
486
|
+
extracted_components={'quarter': quarter}
|
487
|
+
)
|
488
|
+
|
489
|
+
return ParseResult(False, None, expression, 0.0, pattern, {})
|
490
|
+
|
491
|
+
def _chinese_to_number(self, chinese_str: str) -> int:
|
492
|
+
"""中文数字转阿拉伯数字"""
|
493
|
+
if chinese_str in self.chinese_numbers:
|
494
|
+
return self.chinese_numbers[chinese_str]
|
495
|
+
|
496
|
+
# 处理复合数字(如二十三)
|
497
|
+
if '十' in chinese_str:
|
498
|
+
if chinese_str.startswith('十'):
|
499
|
+
# 十X -> 10 + X
|
500
|
+
if len(chinese_str) > 1:
|
501
|
+
return 10 + self.chinese_numbers.get(chinese_str[1], 0)
|
502
|
+
return 10
|
503
|
+
else:
|
504
|
+
# X十Y -> X*10 + Y
|
505
|
+
parts = chinese_str.split('十')
|
506
|
+
tens = self.chinese_numbers.get(parts[0], 0) * 10
|
507
|
+
ones = self.chinese_numbers.get(parts[1], 0) if len(parts) > 1 and parts[1] else 0
|
508
|
+
return tens + ones
|
509
|
+
|
510
|
+
# 尝试直接转换
|
511
|
+
try:
|
512
|
+
return int(chinese_str)
|
513
|
+
except ValueError:
|
514
|
+
return 0
|
515
|
+
|
516
|
+
def _add_months(self, date: datetime.date, months: int) -> datetime.date:
|
517
|
+
"""添加月份"""
|
518
|
+
year = date.year
|
519
|
+
month = date.month + months
|
520
|
+
|
521
|
+
while month > 12:
|
522
|
+
year += 1
|
523
|
+
month -= 12
|
524
|
+
while month < 1:
|
525
|
+
year -= 1
|
526
|
+
month += 12
|
527
|
+
|
528
|
+
# 处理日期溢出(如31号在2月)
|
529
|
+
try:
|
530
|
+
return datetime.date(year, month, date.day)
|
531
|
+
except ValueError:
|
532
|
+
# 如果日期无效,使用该月最后一天
|
533
|
+
import calendar
|
534
|
+
last_day = calendar.monthrange(year, month)[1]
|
535
|
+
return datetime.date(year, month, min(date.day, last_day))
|
536
|
+
|
537
|
+
def _add_years(self, date: datetime.date, years: int) -> datetime.date:
|
538
|
+
"""添加年份"""
|
539
|
+
try:
|
540
|
+
return datetime.date(date.year + years, date.month, date.day)
|
541
|
+
except ValueError:
|
542
|
+
# 闰年处理(2月29日)
|
543
|
+
return datetime.date(date.year + years, date.month, 28)
|
544
|
+
|
545
|
+
# 便捷函数
|
546
|
+
def parse_date_expression(expression: str) -> ParseResult:
|
547
|
+
"""解析日期表达式(便捷函数)"""
|
548
|
+
parser = DateExpressionParser()
|
549
|
+
return parser.parse(expression)
|
550
|
+
|
551
|
+
def smart_parse_date(expression: str) -> Optional[datetime.date]:
|
552
|
+
"""智能解析日期,返回日期对象或None"""
|
553
|
+
result = parse_date_expression(expression)
|
554
|
+
return result.date if result.success else None
|