staran 1.0.9__py3-none-any.whl → 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,554 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Staran 日期表达式解析模块 v1.0.10
6
+ ==============================
7
+
8
+ 提供自然语言日期表达式的解析功能,支持中文和英文表达。
9
+
10
+ 主要功能:
11
+ - 自然语言日期解析
12
+ - 相对日期表达式
13
+ - 日期计算表达式
14
+ - 智能日期推断
15
+ """
16
+
17
+ import re
18
+ import datetime
19
+ from typing import Optional, Union, Dict, List, Tuple
20
+ from dataclasses import dataclass
21
+
22
+ @dataclass
23
+ class ParseResult:
24
+ """解析结果类"""
25
+ success: bool
26
+ date: Optional[datetime.date]
27
+ expression: str
28
+ confidence: float # 置信度 0-1
29
+ matched_pattern: str
30
+ extracted_components: Dict[str, any]
31
+
32
+ class DateExpressionParser:
33
+ """日期表达式解析器"""
34
+
35
+ def __init__(self):
36
+ self._init_patterns()
37
+
38
+ def _init_patterns(self):
39
+ """初始化解析模式"""
40
+
41
+ # 中文数字映射
42
+ self.chinese_numbers = {
43
+ '零': 0, '一': 1, '二': 2, '三': 3, '四': 4, '五': 5,
44
+ '六': 6, '七': 7, '八': 8, '九': 9, '十': 10,
45
+ '十一': 11, '十二': 12, '十三': 13, '十四': 14, '十五': 15,
46
+ '十六': 16, '十七': 17, '十八': 18, '十九': 19, '二十': 20,
47
+ '二十一': 21, '二十二': 22, '二十三': 23, '二十四': 24,
48
+ '二十五': 25, '二十六': 26, '二十七': 27, '二十八': 28,
49
+ '二十九': 29, '三十': 30, '三十一': 31
50
+ }
51
+
52
+ # 月份映射
53
+ self.month_names = {
54
+ # 中文
55
+ '一月': 1, '二月': 2, '三月': 3, '四月': 4, '五月': 5, '六月': 6,
56
+ '七月': 7, '八月': 8, '九月': 9, '十月': 10, '十一月': 11, '十二月': 12,
57
+ '正月': 1, '腊月': 12,
58
+ # 英文
59
+ 'january': 1, 'february': 2, 'march': 3, 'april': 4, 'may': 5, 'june': 6,
60
+ 'july': 7, 'august': 8, 'september': 9, 'october': 10, 'november': 11, 'december': 12,
61
+ 'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'jun': 6, 'jul': 7,
62
+ 'aug': 8, 'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12
63
+ }
64
+
65
+ # 星期映射
66
+ self.weekday_names = {
67
+ # 中文
68
+ '周一': 0, '周二': 1, '周三': 2, '周四': 3, '周五': 4, '周六': 5, '周日': 6,
69
+ '星期一': 0, '星期二': 1, '星期三': 2, '星期四': 3, '星期五': 4, '星期六': 5, '星期日': 6,
70
+ '星期天': 6, '礼拜一': 0, '礼拜二': 1, '礼拜三': 2, '礼拜四': 3, '礼拜五': 4, '礼拜六': 5, '礼拜天': 6,
71
+ # 英文
72
+ 'monday': 0, 'tuesday': 1, 'wednesday': 2, 'thursday': 3, 'friday': 4, 'saturday': 5, 'sunday': 6,
73
+ 'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5, 'sun': 6
74
+ }
75
+
76
+ # 相对时间表达式模式
77
+ self.patterns = {
78
+ # 绝对日期表达式
79
+ 'absolute_dates': [
80
+ # YYYY年MM月DD日
81
+ r'(\d{4})年(\d{1,2})月(\d{1,2})日?',
82
+ # YYYY-MM-DD, YYYY/MM/DD
83
+ r'(\d{4})[-/](\d{1,2})[-/](\d{1,2})',
84
+ # MM月DD日 (当年)
85
+ r'(\d{1,2})月(\d{1,2})日?',
86
+ # 中文数字日期
87
+ r'([一二三四五六七八九十]+)年([一二三四五六七八九十]+)月([一二三四五六七八九十]+)日?',
88
+ ],
89
+
90
+ # 相对日期表达式
91
+ 'relative_dates': [
92
+ # 今天、明天、后天、昨天、前天
93
+ r'(今天|明天|后天|昨天|前天|大后天)',
94
+ r'(today|tomorrow|yesterday)',
95
+
96
+ # N天前/后
97
+ r'(\d+)天(前|后|之前|之后)',
98
+ r'(\d+)\s*(days?)\s*(ago|later|before|after)',
99
+
100
+ # N周前/后
101
+ r'(\d+)(周|星期|礼拜)(前|后|之前|之后)',
102
+ r'(\d+)\s*(weeks?)\s*(ago|later|before|after)',
103
+
104
+ # N个月前/后
105
+ r'(\d+)个?月(前|后|之前|之后)',
106
+ r'(\d+)\s*(months?)\s*(ago|later|before|after)',
107
+
108
+ # N年前/后
109
+ r'(\d+)年(前|后|之前|之后)',
110
+ r'(\d+)\s*(years?)\s*(ago|later|before|after)',
111
+
112
+ # 上/下 + 时间单位
113
+ r'(上|下)(周|星期|礼拜|月|年)',
114
+ r'(last|next)\s*(week|month|year)',
115
+
116
+ # 这/本 + 时间单位
117
+ r'(这|本)(周|星期|礼拜|月|年)',
118
+ r'(this)\s*(week|month|year)',
119
+ ],
120
+
121
+ # 星期表达式
122
+ 'weekday_expressions': [
123
+ # 这周/下周/上周 + 星期X
124
+ r'(这|本|下|上)(周|星期|礼拜)([一二三四五六日天])',
125
+ r'(this|next|last)\s*(week)?\s*(monday|tuesday|wednesday|thursday|friday|saturday|sunday)',
126
+
127
+ # 直接星期X
128
+ r'(周|星期|礼拜)([一二三四五六日天])',
129
+ r'(monday|tuesday|wednesday|thursday|friday|saturday|sunday)',
130
+ ],
131
+
132
+ # 节假日和特殊日期
133
+ 'special_dates': [
134
+ r'(春节|除夕|元宵节|清明节|劳动节|端午节|中秋节|国庆节|圣诞节|元旦)',
135
+ r'(new\s*year|christmas|valentine|halloween|thanksgiving)',
136
+ r'(母亲节|父亲节|儿童节|教师节|妇女节)',
137
+ r'(生日|结婚纪念日|工作日|周末)',
138
+ ],
139
+
140
+ # 季度和月份表达式
141
+ 'quarter_month': [
142
+ r'(第?[一二三四1234])季度',
143
+ r'([一二三四五六七八九十\d]+)月',
144
+ r'(Q[1234])',
145
+ r'(spring|summer|autumn|fall|winter)',
146
+ r'(春天|夏天|秋天|冬天)',
147
+ ]
148
+ }
149
+
150
+ def parse(self, expression: str) -> ParseResult:
151
+ """解析日期表达式"""
152
+ expression = expression.strip()
153
+
154
+ # 尝试不同的解析策略
155
+ for pattern_type, patterns in self.patterns.items():
156
+ for pattern in patterns:
157
+ result = self._try_parse_pattern(expression, pattern, pattern_type)
158
+ if result.success:
159
+ return result
160
+
161
+ # 如果所有模式都失败,返回失败结果
162
+ return ParseResult(
163
+ success=False,
164
+ date=None,
165
+ expression=expression,
166
+ confidence=0.0,
167
+ matched_pattern='',
168
+ extracted_components={}
169
+ )
170
+
171
+ def _try_parse_pattern(self, expression: str, pattern: str, pattern_type: str) -> ParseResult:
172
+ """尝试匹配特定模式"""
173
+ match = re.search(pattern, expression, re.IGNORECASE)
174
+ if not match:
175
+ return ParseResult(False, None, expression, 0.0, pattern, {})
176
+
177
+ try:
178
+ if pattern_type == 'absolute_dates':
179
+ return self._parse_absolute_date(match, pattern, expression)
180
+ elif pattern_type == 'relative_dates':
181
+ return self._parse_relative_date(match, pattern, expression)
182
+ elif pattern_type == 'weekday_expressions':
183
+ return self._parse_weekday_expression(match, pattern, expression)
184
+ elif pattern_type == 'special_dates':
185
+ return self._parse_special_date(match, pattern, expression)
186
+ elif pattern_type == 'quarter_month':
187
+ return self._parse_quarter_month(match, pattern, expression)
188
+
189
+ except Exception as e:
190
+ pass
191
+
192
+ return ParseResult(False, None, expression, 0.0, pattern, {})
193
+
194
+ def _parse_absolute_date(self, match, pattern: str, expression: str) -> ParseResult:
195
+ """解析绝对日期"""
196
+ groups = match.groups()
197
+
198
+ if len(groups) == 3:
199
+ if '年' in pattern:
200
+ # 年月日格式
201
+ year_str, month_str, day_str = groups
202
+ if any(c in year_str for c in '一二三四五六七八九十'):
203
+ # 中文数字
204
+ year = self._chinese_to_number(year_str)
205
+ month = self._chinese_to_number(month_str)
206
+ day = self._chinese_to_number(day_str)
207
+ else:
208
+ year, month, day = int(year_str), int(month_str), int(day_str)
209
+ else:
210
+ # YYYY-MM-DD 或 YYYY/MM/DD
211
+ year, month, day = int(groups[0]), int(groups[1]), int(groups[2])
212
+
213
+ try:
214
+ date = datetime.date(year, month, day)
215
+ return ParseResult(
216
+ success=True,
217
+ date=date,
218
+ expression=expression,
219
+ confidence=0.95,
220
+ matched_pattern=pattern,
221
+ extracted_components={'year': year, 'month': month, 'day': day}
222
+ )
223
+ except ValueError:
224
+ pass
225
+
226
+ elif len(groups) == 2:
227
+ # MM月DD日 (当年)
228
+ month_str, day_str = groups
229
+ current_year = datetime.date.today().year
230
+
231
+ if any(c in month_str for c in '一二三四五六七八九十'):
232
+ month = self._chinese_to_number(month_str)
233
+ day = self._chinese_to_number(day_str)
234
+ else:
235
+ month, day = int(month_str), int(day_str)
236
+
237
+ try:
238
+ date = datetime.date(current_year, month, day)
239
+ return ParseResult(
240
+ success=True,
241
+ date=date,
242
+ expression=expression,
243
+ confidence=0.85,
244
+ matched_pattern=pattern,
245
+ extracted_components={'year': current_year, 'month': month, 'day': day}
246
+ )
247
+ except ValueError:
248
+ pass
249
+
250
+ return ParseResult(False, None, expression, 0.0, pattern, {})
251
+
252
+ def _parse_relative_date(self, match, pattern: str, expression: str) -> ParseResult:
253
+ """解析相对日期"""
254
+ groups = match.groups()
255
+ today = datetime.date.today()
256
+
257
+ # 简单相对日期
258
+ if len(groups) == 1:
259
+ term = groups[0].lower()
260
+
261
+ relative_map = {
262
+ '今天': 0, 'today': 0,
263
+ '明天': 1, 'tomorrow': 1,
264
+ '后天': 2,
265
+ '昨天': -1, 'yesterday': -1,
266
+ '前天': -2,
267
+ '大后天': 3,
268
+ }
269
+
270
+ if term in relative_map:
271
+ days_offset = relative_map[term]
272
+ target_date = today + datetime.timedelta(days=days_offset)
273
+ return ParseResult(
274
+ success=True,
275
+ date=target_date,
276
+ expression=expression,
277
+ confidence=0.9,
278
+ matched_pattern=pattern,
279
+ extracted_components={'offset_days': days_offset}
280
+ )
281
+
282
+ # 数量 + 时间单位 + 方向
283
+ elif len(groups) >= 2:
284
+ try:
285
+ num_str = groups[0]
286
+ direction = groups[-1] if len(groups) > 1 else ''
287
+
288
+ # 确定数量
289
+ num = int(num_str) if num_str.isdigit() else self._chinese_to_number(num_str)
290
+
291
+ # 确定方向(正负)
292
+ is_past = any(word in direction.lower() for word in ['前', '之前', 'ago', 'before'])
293
+ if is_past:
294
+ num = -num
295
+
296
+ # 确定时间单位
297
+ unit = ''
298
+ for group in groups[1:-1]:
299
+ unit += group
300
+
301
+ # 计算目标日期
302
+ if '天' in unit or 'day' in unit:
303
+ target_date = today + datetime.timedelta(days=num)
304
+ elif any(word in unit for word in ['周', '星期', '礼拜', 'week']):
305
+ target_date = today + datetime.timedelta(weeks=num)
306
+ elif '月' in unit or 'month' in unit:
307
+ target_date = self._add_months(today, num)
308
+ elif '年' in unit or 'year' in unit:
309
+ target_date = self._add_years(today, num)
310
+ else:
311
+ return ParseResult(False, None, expression, 0.0, pattern, {})
312
+
313
+ return ParseResult(
314
+ success=True,
315
+ date=target_date,
316
+ expression=expression,
317
+ confidence=0.85,
318
+ matched_pattern=pattern,
319
+ extracted_components={'number': abs(num), 'unit': unit, 'direction': direction}
320
+ )
321
+ except:
322
+ pass
323
+
324
+ return ParseResult(False, None, expression, 0.0, pattern, {})
325
+
326
+ def _parse_weekday_expression(self, match, pattern: str, expression: str) -> ParseResult:
327
+ """解析星期表达式"""
328
+ groups = match.groups()
329
+ today = datetime.date.today()
330
+
331
+ # 提取星期几
332
+ weekday_name = ''
333
+ week_modifier = ''
334
+
335
+ for group in groups:
336
+ if group:
337
+ group_lower = group.lower()
338
+ if group_lower in self.weekday_names:
339
+ weekday_name = group_lower
340
+ elif any(word in group_lower for word in ['上', '下', '这', '本', 'last', 'next', 'this']):
341
+ week_modifier = group_lower
342
+ elif group in '一二三四五六日天':
343
+ weekday_name = f'星期{group}'
344
+
345
+ if not weekday_name:
346
+ # 尝试从单个字符推断星期
347
+ for group in groups:
348
+ if group in '一二三四五六日天':
349
+ weekday_name = f'星期{group}'
350
+ break
351
+
352
+ if weekday_name in self.weekday_names:
353
+ target_weekday = self.weekday_names[weekday_name]
354
+ current_weekday = today.weekday()
355
+
356
+ # 计算到目标星期几的天数
357
+ days_ahead = target_weekday - current_weekday
358
+
359
+ # 根据修饰符调整
360
+ if any(word in week_modifier for word in ['下', 'next']):
361
+ if days_ahead <= 0:
362
+ days_ahead += 7
363
+ elif any(word in week_modifier for word in ['上', 'last']):
364
+ if days_ahead >= 0:
365
+ days_ahead -= 7
366
+ elif any(word in week_modifier for word in ['这', '本', 'this']):
367
+ # 本周的星期X
368
+ pass
369
+ else:
370
+ # 默认是下一个该星期几
371
+ if days_ahead <= 0:
372
+ days_ahead += 7
373
+
374
+ target_date = today + datetime.timedelta(days=days_ahead)
375
+
376
+ return ParseResult(
377
+ success=True,
378
+ date=target_date,
379
+ expression=expression,
380
+ confidence=0.8,
381
+ matched_pattern=pattern,
382
+ extracted_components={'weekday': target_weekday, 'modifier': week_modifier}
383
+ )
384
+
385
+ return ParseResult(False, None, expression, 0.0, pattern, {})
386
+
387
+ def _parse_special_date(self, match, pattern: str, expression: str) -> ParseResult:
388
+ """解析特殊日期(节假日等)"""
389
+ holiday_name = match.group(1).lower()
390
+ current_year = datetime.date.today().year
391
+
392
+ # 中国节假日映射
393
+ chinese_holidays = {
394
+ '元旦': (1, 1),
395
+ '春节': (2, 10), # 简化,实际需要农历计算
396
+ '清明节': (4, 5), # 简化
397
+ '劳动节': (5, 1),
398
+ '端午节': (6, 14), # 简化,实际需要农历计算
399
+ '中秋节': (9, 17), # 简化,实际需要农历计算
400
+ '国庆节': (10, 1),
401
+ '圣诞节': (12, 25),
402
+ }
403
+
404
+ # 英文节假日映射
405
+ english_holidays = {
406
+ 'new year': (1, 1),
407
+ 'christmas': (12, 25),
408
+ 'valentine': (2, 14),
409
+ 'halloween': (10, 31),
410
+ }
411
+
412
+ target_date = None
413
+ if holiday_name in chinese_holidays:
414
+ month, day = chinese_holidays[holiday_name]
415
+ target_date = datetime.date(current_year, month, day)
416
+ elif holiday_name in english_holidays:
417
+ month, day = english_holidays[holiday_name]
418
+ target_date = datetime.date(current_year, month, day)
419
+
420
+ if target_date:
421
+ return ParseResult(
422
+ success=True,
423
+ date=target_date,
424
+ expression=expression,
425
+ confidence=0.7,
426
+ matched_pattern=pattern,
427
+ extracted_components={'holiday': holiday_name}
428
+ )
429
+
430
+ return ParseResult(False, None, expression, 0.0, pattern, {})
431
+
432
+ def _parse_quarter_month(self, match, pattern: str, expression: str) -> ParseResult:
433
+ """解析季度和月份表达式"""
434
+ groups = match.groups()
435
+ today = datetime.date.today()
436
+ current_year = today.year
437
+
438
+ quarter_str = groups[0].lower()
439
+
440
+ # 季度映射
441
+ quarter_map = {
442
+ '第一季度': 1, '第二季度': 2, '第三季度': 3, '第四季度': 4,
443
+ '一季度': 1, '二季度': 2, '三季度': 3, '四季度': 4,
444
+ 'q1': 1, 'q2': 2, 'q3': 3, 'q4': 4,
445
+ }
446
+
447
+ # 季节映射
448
+ season_map = {
449
+ 'spring': 1, '春天': 1,
450
+ 'summer': 2, '夏天': 2,
451
+ 'autumn': 3, 'fall': 3, '秋天': 3,
452
+ 'winter': 4, '冬天': 4,
453
+ }
454
+
455
+ # 月份处理
456
+ if '月' in quarter_str and quarter_str not in season_map:
457
+ month_str = quarter_str.replace('月', '')
458
+ try:
459
+ month = int(month_str) if month_str.isdigit() else self._chinese_to_number(month_str)
460
+ if 1 <= month <= 12:
461
+ target_date = datetime.date(current_year, month, 1)
462
+ return ParseResult(
463
+ success=True,
464
+ date=target_date,
465
+ expression=expression,
466
+ confidence=0.8,
467
+ matched_pattern=pattern,
468
+ extracted_components={'month': month}
469
+ )
470
+ except:
471
+ pass
472
+
473
+ # 季度或季节处理
474
+ quarter = quarter_map.get(quarter_str) or season_map.get(quarter_str)
475
+ if quarter:
476
+ # 每个季度的第一个月
477
+ quarter_start_month = (quarter - 1) * 3 + 1
478
+ target_date = datetime.date(current_year, quarter_start_month, 1)
479
+
480
+ return ParseResult(
481
+ success=True,
482
+ date=target_date,
483
+ expression=expression,
484
+ confidence=0.75,
485
+ matched_pattern=pattern,
486
+ extracted_components={'quarter': quarter}
487
+ )
488
+
489
+ return ParseResult(False, None, expression, 0.0, pattern, {})
490
+
491
+ def _chinese_to_number(self, chinese_str: str) -> int:
492
+ """中文数字转阿拉伯数字"""
493
+ if chinese_str in self.chinese_numbers:
494
+ return self.chinese_numbers[chinese_str]
495
+
496
+ # 处理复合数字(如二十三)
497
+ if '十' in chinese_str:
498
+ if chinese_str.startswith('十'):
499
+ # 十X -> 10 + X
500
+ if len(chinese_str) > 1:
501
+ return 10 + self.chinese_numbers.get(chinese_str[1], 0)
502
+ return 10
503
+ else:
504
+ # X十Y -> X*10 + Y
505
+ parts = chinese_str.split('十')
506
+ tens = self.chinese_numbers.get(parts[0], 0) * 10
507
+ ones = self.chinese_numbers.get(parts[1], 0) if len(parts) > 1 and parts[1] else 0
508
+ return tens + ones
509
+
510
+ # 尝试直接转换
511
+ try:
512
+ return int(chinese_str)
513
+ except ValueError:
514
+ return 0
515
+
516
+ def _add_months(self, date: datetime.date, months: int) -> datetime.date:
517
+ """添加月份"""
518
+ year = date.year
519
+ month = date.month + months
520
+
521
+ while month > 12:
522
+ year += 1
523
+ month -= 12
524
+ while month < 1:
525
+ year -= 1
526
+ month += 12
527
+
528
+ # 处理日期溢出(如31号在2月)
529
+ try:
530
+ return datetime.date(year, month, date.day)
531
+ except ValueError:
532
+ # 如果日期无效,使用该月最后一天
533
+ import calendar
534
+ last_day = calendar.monthrange(year, month)[1]
535
+ return datetime.date(year, month, min(date.day, last_day))
536
+
537
+ def _add_years(self, date: datetime.date, years: int) -> datetime.date:
538
+ """添加年份"""
539
+ try:
540
+ return datetime.date(date.year + years, date.month, date.day)
541
+ except ValueError:
542
+ # 闰年处理(2月29日)
543
+ return datetime.date(date.year + years, date.month, 28)
544
+
545
+ # 便捷函数
546
+ def parse_date_expression(expression: str) -> ParseResult:
547
+ """解析日期表达式(便捷函数)"""
548
+ parser = DateExpressionParser()
549
+ return parser.parse(expression)
550
+
551
+ def smart_parse_date(expression: str) -> Optional[datetime.date]:
552
+ """智能解析日期,返回日期对象或None"""
553
+ result = parse_date_expression(expression)
554
+ return result.date if result.success else None