utilitz 0.7.2__tar.gz → 0.7.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {utilitz-0.7.2 → utilitz-0.7.4}/PKG-INFO +1 -1
- {utilitz-0.7.2 → utilitz-0.7.4}/pyproject.toml +1 -1
- {utilitz-0.7.2 → utilitz-0.7.4}/src/utilitz/regex.py +85 -3
- {utilitz-0.7.2 → utilitz-0.7.4}/.gitignore +0 -0
- {utilitz-0.7.2 → utilitz-0.7.4}/LICENSE +0 -0
- {utilitz-0.7.2 → utilitz-0.7.4}/README.md +0 -0
- {utilitz-0.7.2 → utilitz-0.7.4}/src/utilitz/__init__.py +0 -0
- {utilitz-0.7.2 → utilitz-0.7.4}/src/utilitz/crypto.py +0 -0
- {utilitz-0.7.2 → utilitz-0.7.4}/src/utilitz/excel.py +0 -0
- {utilitz-0.7.2 → utilitz-0.7.4}/src/utilitz/io.py +0 -0
- {utilitz-0.7.2 → utilitz-0.7.4}/src/utilitz/path.py +0 -0
- {utilitz-0.7.2 → utilitz-0.7.4}/src/utilitz/sys.py +0 -0
|
@@ -70,7 +70,7 @@ def decode(regex, text, split=False, kind=None):
|
|
|
70
70
|
decoded semantic values.
|
|
71
71
|
"""
|
|
72
72
|
regex_list = regex if isinstance(regex, list) else [regex]
|
|
73
|
-
regex_list = [patt
|
|
73
|
+
regex_list = [str(patt)
|
|
74
74
|
if isinstance(patt, Pattern) else patt for patt in regex_list]
|
|
75
75
|
result = [{name: []
|
|
76
76
|
for name in find_patterns(regex, names=True)}
|
|
@@ -151,8 +151,8 @@ class Pattern:
|
|
|
151
151
|
raise ValueError(
|
|
152
152
|
"Cannot decode a match from a non-visible Pattern instance.")
|
|
153
153
|
if to_dict:
|
|
154
|
-
return {self.name: match.group()}
|
|
155
|
-
return match.group()
|
|
154
|
+
return {self.name: match.group(self.id)}
|
|
155
|
+
return match.group(self.id)
|
|
156
156
|
|
|
157
157
|
def __str__(self):
|
|
158
158
|
return self.new_group(self.regex)
|
|
@@ -323,3 +323,85 @@ class First(Pattern):
|
|
|
323
323
|
|
|
324
324
|
def __repr__(self):
|
|
325
325
|
return f"First({self.__str__()})"
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
class Currency(Number):
|
|
329
|
+
|
|
330
|
+
"""
|
|
331
|
+
Specialized Number pattern for currency values.
|
|
332
|
+
|
|
333
|
+
Presets common currency defaults such as separators and symbol.
|
|
334
|
+
"""
|
|
335
|
+
|
|
336
|
+
def __init__(self, name=None, integer_sep=',', decimal_sep='.', currency_sym='$'):
|
|
337
|
+
super().__init__(name, integer_sep, decimal_sep, currency_sym, signum=True)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
class Date(Pattern):
|
|
341
|
+
"""
|
|
342
|
+
Pattern that matches and decodes dates using pandas-like formats.
|
|
343
|
+
|
|
344
|
+
Supported tokens:
|
|
345
|
+
%Y year (4 digits)
|
|
346
|
+
%m month number
|
|
347
|
+
%d day
|
|
348
|
+
%b abbreviated month name (en/es, case-insensitive)
|
|
349
|
+
%B full month name (en/es, case-insensitive)
|
|
350
|
+
"""
|
|
351
|
+
|
|
352
|
+
DEFAULT_MONTHS = {
|
|
353
|
+
1: ['jan', 'january', 'ene', 'enero'],
|
|
354
|
+
2: ['feb', 'february', 'febrero'],
|
|
355
|
+
3: ['mar', 'march', 'marzo'],
|
|
356
|
+
4: ['apr', 'april', 'abr', 'abril'],
|
|
357
|
+
5: ['may', 'mayo'],
|
|
358
|
+
6: ['jun', 'june', 'junio'],
|
|
359
|
+
7: ['jul', 'july', 'julio'],
|
|
360
|
+
8: ['aug', 'august', 'ago', 'agosto'],
|
|
361
|
+
9: ['sep', 'sept', 'september', 'septiembre'],
|
|
362
|
+
10: ['oct', 'october', 'octubre'],
|
|
363
|
+
11: ['nov', 'november', 'noviembre'],
|
|
364
|
+
12: ['dec', 'december', 'dic', 'diciembre'],
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
def __init__(self, name=None, format='%Y-%m-%d', month_names=None):
|
|
368
|
+
super().__init__(regex=None, name=name)
|
|
369
|
+
self.format = format
|
|
370
|
+
|
|
371
|
+
# if None → use all known abbreviations
|
|
372
|
+
months = month_names or self.DEFAULT_MONTHS
|
|
373
|
+
|
|
374
|
+
self._month_map = {}
|
|
375
|
+
for num, names in months.items():
|
|
376
|
+
for n in names:
|
|
377
|
+
self._month_map[n.lower()] = num
|
|
378
|
+
|
|
379
|
+
self._month_regex = '(?i:' + '|'.join(
|
|
380
|
+
re.escape(m) for m in self._month_map
|
|
381
|
+
) + ')'
|
|
382
|
+
|
|
383
|
+
@property
|
|
384
|
+
def regex(self):
|
|
385
|
+
regex = self.format
|
|
386
|
+
regex = regex.replace('%Y', self.new_group(r'\d{4}', 'year'))
|
|
387
|
+
regex = regex.replace('%m', self.new_group(r'\d{1,2}', 'month'))
|
|
388
|
+
regex = regex.replace('%d', self.new_group(r'\d{1,2}', 'day'))
|
|
389
|
+
regex = regex.replace('%b', self.new_group(self._month_regex, 'month'))
|
|
390
|
+
regex = regex.replace('%B', self.new_group(self._month_regex, 'month'))
|
|
391
|
+
return regex
|
|
392
|
+
|
|
393
|
+
def decode(self, match, to_dict=False):
|
|
394
|
+
year = int(match.group(self.get_id('year')))
|
|
395
|
+
day = int(match.group(self.get_id('day')))
|
|
396
|
+
month_raw = match.group(self.get_id('month'))
|
|
397
|
+
if month_raw.isdigit():
|
|
398
|
+
month = int(month_raw)
|
|
399
|
+
else:
|
|
400
|
+
month = self._month_map[month_raw.lower()]
|
|
401
|
+
|
|
402
|
+
if to_dict:
|
|
403
|
+
return {self.name: f'{year:02d}-{month:02d}-{day:02d}'}
|
|
404
|
+
return f'{year:02d}-{month:02d}-{day:02d}'
|
|
405
|
+
|
|
406
|
+
def __repr__(self):
|
|
407
|
+
return f"Date({self.__str__()})"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|