valid8r 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valid8r might be problematic. Click here for more details.
- valid8r/__init__.py +27 -0
- valid8r/core/__init__.py +28 -0
- valid8r/core/combinators.py +89 -0
- valid8r/core/maybe.py +162 -0
- valid8r/core/parsers.py +1354 -0
- valid8r/core/validators.py +200 -0
- valid8r/prompt/__init__.py +8 -0
- valid8r/prompt/basic.py +190 -0
- valid8r/py.typed +0 -0
- valid8r/testing/__init__.py +32 -0
- valid8r/testing/assertions.py +67 -0
- valid8r/testing/generators.py +283 -0
- valid8r/testing/mock_input.py +84 -0
- valid8r-0.2.0.dist-info/METADATA +168 -0
- valid8r-0.2.0.dist-info/RECORD +17 -0
- valid8r-0.2.0.dist-info/WHEEL +4 -0
- valid8r-0.2.0.dist-info/entry_points.txt +4 -0
valid8r/core/parsers.py
ADDED
|
@@ -0,0 +1,1354 @@
|
|
|
1
|
+
"""String parsing functions with Maybe monad error handling."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from datetime import (
|
|
7
|
+
date,
|
|
8
|
+
datetime,
|
|
9
|
+
)
|
|
10
|
+
from decimal import (
|
|
11
|
+
Decimal,
|
|
12
|
+
InvalidOperation,
|
|
13
|
+
)
|
|
14
|
+
from enum import Enum
|
|
15
|
+
from functools import wraps
|
|
16
|
+
from typing import (
|
|
17
|
+
TYPE_CHECKING,
|
|
18
|
+
ParamSpec,
|
|
19
|
+
TypeVar,
|
|
20
|
+
cast,
|
|
21
|
+
overload,
|
|
22
|
+
)
|
|
23
|
+
from uuid import UUID
|
|
24
|
+
|
|
25
|
+
from valid8r.core.maybe import (
|
|
26
|
+
Failure,
|
|
27
|
+
Maybe,
|
|
28
|
+
Success,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
import uuid_utils as uuidu
|
|
33
|
+
except Exception: # noqa: BLE001
|
|
34
|
+
uuidu = None # type: ignore[assignment]
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
from email_validator import (
|
|
38
|
+
EmailNotValidError,
|
|
39
|
+
validate_email,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
HAS_EMAIL_VALIDATOR = True
|
|
43
|
+
except ImportError:
|
|
44
|
+
HAS_EMAIL_VALIDATOR = False
|
|
45
|
+
EmailNotValidError = None # type: ignore[assignment,misc]
|
|
46
|
+
validate_email = None # type: ignore[assignment]
|
|
47
|
+
|
|
48
|
+
from dataclasses import dataclass
|
|
49
|
+
from ipaddress import (
|
|
50
|
+
IPv4Address,
|
|
51
|
+
IPv4Network,
|
|
52
|
+
IPv6Address,
|
|
53
|
+
IPv6Network,
|
|
54
|
+
ip_address,
|
|
55
|
+
ip_network,
|
|
56
|
+
)
|
|
57
|
+
from urllib.parse import urlsplit
|
|
58
|
+
|
|
59
|
+
if TYPE_CHECKING:
|
|
60
|
+
from collections.abc import (
|
|
61
|
+
Callable,
|
|
62
|
+
Iterable,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
T = TypeVar('T')
|
|
66
|
+
K = TypeVar('K')
|
|
67
|
+
V = TypeVar('V')
|
|
68
|
+
P = ParamSpec('P')
|
|
69
|
+
E = TypeVar('E', bound=Enum)
|
|
70
|
+
|
|
71
|
+
ISO_DATE_LENGTH = 10
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def parse_int(input_value: str, error_message: str | None = None) -> Maybe[int]:
|
|
75
|
+
"""Parse a string to an integer."""
|
|
76
|
+
if not input_value:
|
|
77
|
+
return Maybe.failure('Input must not be empty')
|
|
78
|
+
|
|
79
|
+
cleaned_input = input_value.strip()
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
if '.' in cleaned_input:
|
|
83
|
+
float_val = float(cleaned_input)
|
|
84
|
+
if float_val.is_integer():
|
|
85
|
+
# It's a whole number like 42.0
|
|
86
|
+
return Maybe.success(int(float_val))
|
|
87
|
+
# It has a fractional part like 42.5
|
|
88
|
+
return Maybe.failure(error_message or 'Input must be a valid integer')
|
|
89
|
+
|
|
90
|
+
value = int(cleaned_input)
|
|
91
|
+
return Maybe.success(value)
|
|
92
|
+
except ValueError:
|
|
93
|
+
return Maybe.failure(error_message or 'Input must be a valid integer')
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def parse_float(input_value: str, error_message: str | None = None) -> Maybe[float]:
|
|
97
|
+
"""Parse a string to a float."""
|
|
98
|
+
if not input_value:
|
|
99
|
+
return Maybe.failure('Input must not be empty')
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
value = float(input_value.strip())
|
|
103
|
+
return Maybe.success(value)
|
|
104
|
+
except ValueError:
|
|
105
|
+
return Maybe.failure(error_message or 'Input must be a valid number')
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def parse_bool(input_value: str, error_message: str | None = None) -> Maybe[bool]:
|
|
109
|
+
"""Parse a string to a boolean."""
|
|
110
|
+
if not input_value:
|
|
111
|
+
return Maybe.failure('Input must not be empty')
|
|
112
|
+
|
|
113
|
+
# Normalize input
|
|
114
|
+
input_lower = input_value.strip().lower()
|
|
115
|
+
|
|
116
|
+
# True values
|
|
117
|
+
if input_lower in ('true', 't', 'yes', 'y', '1'):
|
|
118
|
+
return Maybe.success(value=True)
|
|
119
|
+
|
|
120
|
+
# False values
|
|
121
|
+
if input_lower in ('false', 'f', 'no', 'n', '0'):
|
|
122
|
+
return Maybe.success(value=False)
|
|
123
|
+
|
|
124
|
+
return Maybe.failure(error_message or 'Input must be a valid boolean')
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def parse_date(input_value: str, date_format: str | None = None, error_message: str | None = None) -> Maybe[date]:
|
|
128
|
+
"""Parse a string to a date."""
|
|
129
|
+
if not input_value:
|
|
130
|
+
return Maybe.failure('Input must not be empty')
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
# Clean input
|
|
134
|
+
input_value = input_value.strip()
|
|
135
|
+
|
|
136
|
+
if date_format:
|
|
137
|
+
# Parse with the provided format
|
|
138
|
+
dt = datetime.strptime(input_value, date_format) # noqa: DTZ007
|
|
139
|
+
return Maybe.success(dt.date())
|
|
140
|
+
|
|
141
|
+
# Try ISO format by default, but be more strict
|
|
142
|
+
# Standard ISO format should have dashes: YYYY-MM-DD
|
|
143
|
+
if len(input_value) == ISO_DATE_LENGTH and input_value[4] == '-' and input_value[7] == '-':
|
|
144
|
+
return Maybe.success(date.fromisoformat(input_value))
|
|
145
|
+
# Non-standard formats should be explicitly specified
|
|
146
|
+
return Maybe.failure(error_message or 'Input must be a valid date')
|
|
147
|
+
except ValueError:
|
|
148
|
+
return Maybe.failure(error_message or 'Input must be a valid date')
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def parse_complex(input_value: str, error_message: str | None = None) -> Maybe[complex]:
|
|
152
|
+
"""Parse a string to a complex number."""
|
|
153
|
+
if not input_value:
|
|
154
|
+
return Maybe.failure('Input must not be empty')
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
# Strip whitespace from the outside but not inside
|
|
158
|
+
input_str = input_value.strip()
|
|
159
|
+
|
|
160
|
+
# Handle parentheses if present
|
|
161
|
+
if input_str.startswith('(') and input_str.endswith(')'):
|
|
162
|
+
input_str = input_str[1:-1]
|
|
163
|
+
|
|
164
|
+
# Handle 'i' notation by converting to 'j' notation
|
|
165
|
+
if 'i' in input_str and 'j' not in input_str:
|
|
166
|
+
input_str = input_str.replace('i', 'j')
|
|
167
|
+
|
|
168
|
+
# Handle spaces in complex notation (e.g., "3 + 4j")
|
|
169
|
+
if ' ' in input_str:
|
|
170
|
+
# Remove spaces while preserving operators
|
|
171
|
+
input_str = input_str.replace(' + ', '+').replace(' - ', '-')
|
|
172
|
+
input_str = input_str.replace('+ ', '+').replace('- ', '-')
|
|
173
|
+
input_str = input_str.replace(' +', '+').replace(' -', '-')
|
|
174
|
+
|
|
175
|
+
value = complex(input_str)
|
|
176
|
+
return Maybe.success(value)
|
|
177
|
+
except ValueError:
|
|
178
|
+
return Maybe.failure(error_message or 'Input must be a valid complex number')
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def parse_decimal(input_value: str, error_message: str | None = None) -> Maybe[Decimal]:
|
|
182
|
+
"""Parse a string to a Decimal.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
input_value: String representation of a decimal number
|
|
186
|
+
error_message: Optional custom error message
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
Maybe[Decimal]: Success with Decimal value or Failure with an error message
|
|
190
|
+
|
|
191
|
+
"""
|
|
192
|
+
if not input_value:
|
|
193
|
+
return Maybe.failure('Input must not be empty')
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
value = Decimal(input_value.strip())
|
|
197
|
+
return Maybe.success(value)
|
|
198
|
+
except (InvalidOperation, ValueError):
|
|
199
|
+
return Maybe.failure(error_message or 'Input must be a valid number')
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _check_enum_has_empty_value(enum_class: type[Enum]) -> bool:
|
|
203
|
+
"""Check if an enum has an empty string as a value."""
|
|
204
|
+
return any(member.value == '' for member in enum_class.__members__.values())
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _find_enum_by_value(enum_class: type[Enum], value: str) -> Enum | None:
|
|
208
|
+
"""Find an enum member by its value."""
|
|
209
|
+
for member in enum_class.__members__.values():
|
|
210
|
+
if member.value == value:
|
|
211
|
+
return member
|
|
212
|
+
return None
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _find_enum_by_name(enum_class: type[E], value: str) -> E | None:
|
|
216
|
+
"""Find an enum member by its name."""
|
|
217
|
+
try:
|
|
218
|
+
return enum_class[value]
|
|
219
|
+
except KeyError:
|
|
220
|
+
return None
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def parse_enum(input_value: str, enum_class: type[E], error_message: str | None = None) -> Maybe[object]:
|
|
224
|
+
"""Parse a string to an enum value."""
|
|
225
|
+
if not isinstance(enum_class, type) or not issubclass(enum_class, Enum):
|
|
226
|
+
return Maybe.failure(error_message or 'Invalid enum class provided')
|
|
227
|
+
|
|
228
|
+
# Check if empty is valid for this enum
|
|
229
|
+
has_empty_value = _check_enum_has_empty_value(enum_class)
|
|
230
|
+
|
|
231
|
+
if input_value == '' and not has_empty_value:
|
|
232
|
+
return Maybe.failure('Input must not be empty')
|
|
233
|
+
|
|
234
|
+
# Try direct match with enum values
|
|
235
|
+
member = _find_enum_by_value(enum_class, input_value)
|
|
236
|
+
if member is not None:
|
|
237
|
+
return Maybe.success(member)
|
|
238
|
+
|
|
239
|
+
member = _find_enum_by_name(enum_class, input_value)
|
|
240
|
+
if member is not None:
|
|
241
|
+
return Maybe.success(member)
|
|
242
|
+
|
|
243
|
+
input_stripped = input_value.strip()
|
|
244
|
+
if input_stripped != input_value:
|
|
245
|
+
member = _find_enum_by_value(enum_class, input_stripped)
|
|
246
|
+
if member is not None:
|
|
247
|
+
return Maybe.success(member)
|
|
248
|
+
|
|
249
|
+
for name in enum_class.__members__:
|
|
250
|
+
if name.lower() == input_value.lower():
|
|
251
|
+
return Maybe.success(enum_class[name])
|
|
252
|
+
|
|
253
|
+
return Maybe.failure(error_message or 'Input must be a valid enumeration value')
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def parse_list(
|
|
257
|
+
input_value: str,
|
|
258
|
+
element_parser: Callable[[str], Maybe[T]] | None = None,
|
|
259
|
+
separator: str = ',',
|
|
260
|
+
error_message: str | None = None,
|
|
261
|
+
) -> Maybe[list[T]]:
|
|
262
|
+
"""Parse a string to a list using the specified element parser and separator.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
input_value: The string to parse
|
|
266
|
+
element_parser: A function that parses individual elements
|
|
267
|
+
separator: The string that separates elements
|
|
268
|
+
error_message: Custom error message for parsing failures
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
A Maybe containing the parsed list or an error message
|
|
272
|
+
|
|
273
|
+
"""
|
|
274
|
+
if not input_value:
|
|
275
|
+
return Maybe.failure('Input must not be empty')
|
|
276
|
+
|
|
277
|
+
def default_parser(s: str) -> Maybe[T]:
|
|
278
|
+
return Maybe.success(s.strip()) # type: ignore[arg-type]
|
|
279
|
+
|
|
280
|
+
parser = element_parser if element_parser is not None else default_parser
|
|
281
|
+
|
|
282
|
+
elements = input_value.split(separator)
|
|
283
|
+
|
|
284
|
+
parsed_elements: list[T] = []
|
|
285
|
+
for i, element in enumerate(elements, start=1):
|
|
286
|
+
match parser(element.strip()):
|
|
287
|
+
case Success(value) if value is not None:
|
|
288
|
+
parsed_elements.append(value)
|
|
289
|
+
case Failure() if error_message:
|
|
290
|
+
return Maybe.failure(error_message)
|
|
291
|
+
case Failure(result):
|
|
292
|
+
return Maybe.failure(f"Failed to parse element {i} '{element}': {result}")
|
|
293
|
+
|
|
294
|
+
return Maybe.success(parsed_elements)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _parse_key_value_pair( # noqa: PLR0913
|
|
298
|
+
pair: str,
|
|
299
|
+
index: int,
|
|
300
|
+
key_parser: Callable[[str], Maybe[K]], # K can be None
|
|
301
|
+
value_parser: Callable[[str], Maybe[V]], # V can be None
|
|
302
|
+
key_value_separator: str,
|
|
303
|
+
error_message: str | None = None,
|
|
304
|
+
) -> tuple[bool, K | None, V | None, str | None]:
|
|
305
|
+
"""Parse a single key-value pair.
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
A tuple of (success, key, value, error_message)
|
|
309
|
+
|
|
310
|
+
"""
|
|
311
|
+
if key_value_separator not in pair:
|
|
312
|
+
error = f"Invalid key-value pair '{pair}': missing separator '{key_value_separator}'"
|
|
313
|
+
return False, None, None, error_message or error
|
|
314
|
+
|
|
315
|
+
key_str, value_str = pair.split(key_value_separator, 1)
|
|
316
|
+
|
|
317
|
+
# Parse the key
|
|
318
|
+
key_result = key_parser(key_str.strip())
|
|
319
|
+
if key_result.is_failure():
|
|
320
|
+
error = f"Failed to parse key in pair {index + 1} '{pair}': {key_result.error_or('Parse error')}"
|
|
321
|
+
return False, None, None, error_message or error
|
|
322
|
+
|
|
323
|
+
# Parse the value
|
|
324
|
+
value_result = value_parser(value_str.strip())
|
|
325
|
+
if value_result.is_failure():
|
|
326
|
+
error = f"Failed to parse value in pair {index + 1} '{pair}': {value_result.error_or('Parse error')}"
|
|
327
|
+
return False, None, None, error_message or error
|
|
328
|
+
|
|
329
|
+
# At this point both results are Success; extract concrete values by pattern matching
|
|
330
|
+
match key_result:
|
|
331
|
+
case Success(key_val):
|
|
332
|
+
key: K | None = key_val
|
|
333
|
+
case _:
|
|
334
|
+
key = None
|
|
335
|
+
|
|
336
|
+
match value_result:
|
|
337
|
+
case Success(value_val):
|
|
338
|
+
value: V | None = value_val
|
|
339
|
+
case _:
|
|
340
|
+
value = None
|
|
341
|
+
|
|
342
|
+
return True, key, value, None
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def parse_dict( # noqa: PLR0913
|
|
346
|
+
input_value: str,
|
|
347
|
+
key_parser: Callable[[str], Maybe[K]] | None = None,
|
|
348
|
+
value_parser: Callable[[str], Maybe[V]] | None = None,
|
|
349
|
+
pair_separator: str = ',',
|
|
350
|
+
key_value_separator: str = ':',
|
|
351
|
+
error_message: str | None = None,
|
|
352
|
+
) -> Maybe[dict[K, V]]:
|
|
353
|
+
"""Parse a string to a dictionary using the specified parsers and separators."""
|
|
354
|
+
if not input_value:
|
|
355
|
+
return Maybe.failure('Input must not be empty')
|
|
356
|
+
|
|
357
|
+
def _default_parser(s: str) -> Maybe[str | None]:
|
|
358
|
+
"""Parse a string by stripping whitespace."""
|
|
359
|
+
return Maybe.success(s.strip())
|
|
360
|
+
|
|
361
|
+
actual_key_parser: Callable[[str], Maybe[K | None]] = cast(
|
|
362
|
+
'Callable[[str], Maybe[K | None]]', key_parser if key_parser is not None else _default_parser
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
actual_value_parser: Callable[[str], Maybe[V | None]] = cast(
|
|
366
|
+
'Callable[[str], Maybe[V | None]]', value_parser if value_parser is not None else _default_parser
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
# Split the input string by the pair separator
|
|
370
|
+
pairs = input_value.split(pair_separator)
|
|
371
|
+
|
|
372
|
+
# Parse each key-value pair
|
|
373
|
+
parsed_dict: dict[K, V] = {}
|
|
374
|
+
|
|
375
|
+
for i, pair in enumerate(pairs):
|
|
376
|
+
success, key, value, err = _parse_key_value_pair(
|
|
377
|
+
pair, i, actual_key_parser, actual_value_parser, key_value_separator, error_message
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
if not success:
|
|
381
|
+
return Maybe.failure(err or 'Failed to parse key-value pair')
|
|
382
|
+
|
|
383
|
+
if key is not None and value is not None:
|
|
384
|
+
parsed_dict[key] = value
|
|
385
|
+
|
|
386
|
+
return Maybe.success(parsed_dict)
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def parse_set(
|
|
390
|
+
input_value: str,
|
|
391
|
+
element_parser: Callable[[str], Maybe[T]] | None = None,
|
|
392
|
+
separator: str | None = None,
|
|
393
|
+
error_message: str | None = None,
|
|
394
|
+
) -> Maybe[set[T]]:
|
|
395
|
+
"""Parse a string to a set using the specified element parser and separator.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
input_value: The string to parse
|
|
399
|
+
element_parser: A function that parses individual elements
|
|
400
|
+
separator: The string that separates elements
|
|
401
|
+
error_message: Custom error message for parsing failures
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
A Maybe containing the parsed set or an error message
|
|
405
|
+
|
|
406
|
+
"""
|
|
407
|
+
if separator is None:
|
|
408
|
+
separator = ','
|
|
409
|
+
# Use the list parser and convert to set
|
|
410
|
+
result = parse_list(input_value, element_parser, separator, error_message)
|
|
411
|
+
if result.is_failure():
|
|
412
|
+
return Maybe.failure('Parse error')
|
|
413
|
+
|
|
414
|
+
# Convert to set (removes duplicates)
|
|
415
|
+
parsed_list = result.value_or([])
|
|
416
|
+
return Maybe.success(set(parsed_list))
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
# Type-specific validation parsers
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def parse_int_with_validation(
|
|
423
|
+
input_value: str,
|
|
424
|
+
min_value: int | None = None,
|
|
425
|
+
max_value: int | None = None,
|
|
426
|
+
error_message: str | None = None,
|
|
427
|
+
) -> Maybe[int]:
|
|
428
|
+
"""Parse a string to an integer with validation.
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
input_value: The string to parse
|
|
432
|
+
min_value: Minimum allowed value (inclusive)
|
|
433
|
+
max_value: Maximum allowed value (inclusive)
|
|
434
|
+
error_message: Custom error message for parsing failures
|
|
435
|
+
|
|
436
|
+
Returns:
|
|
437
|
+
A Maybe containing the parsed integer or an error message
|
|
438
|
+
|
|
439
|
+
"""
|
|
440
|
+
result = parse_int(input_value, error_message)
|
|
441
|
+
if result.is_failure():
|
|
442
|
+
return result
|
|
443
|
+
|
|
444
|
+
# Validate the parsed value
|
|
445
|
+
value = result.value_or(0)
|
|
446
|
+
|
|
447
|
+
if min_value is not None and value < min_value:
|
|
448
|
+
return Maybe.failure(error_message or f'Value must be at least {min_value}')
|
|
449
|
+
|
|
450
|
+
if max_value is not None and value > max_value:
|
|
451
|
+
return Maybe.failure(error_message or f'Value must be at most {max_value}')
|
|
452
|
+
|
|
453
|
+
return Maybe.success(value)
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def parse_list_with_validation( # noqa: PLR0913
|
|
457
|
+
input_value: str,
|
|
458
|
+
element_parser: Callable[[str], Maybe[T]] | None = None,
|
|
459
|
+
separator: str = ',',
|
|
460
|
+
min_length: int | None = None,
|
|
461
|
+
max_length: int | None = None,
|
|
462
|
+
error_message: str | None = None,
|
|
463
|
+
) -> Maybe[list[T]]:
|
|
464
|
+
"""Parse a string to a list with validation.
|
|
465
|
+
|
|
466
|
+
Args:
|
|
467
|
+
input_value: The string to parse
|
|
468
|
+
element_parser: A function that parses individual elements
|
|
469
|
+
separator: The string that separates elements
|
|
470
|
+
min_length: Minimum allowed list length
|
|
471
|
+
max_length: Maximum allowed list length
|
|
472
|
+
error_message: Custom error message for parsing failures
|
|
473
|
+
|
|
474
|
+
Returns:
|
|
475
|
+
A Maybe containing the parsed list or an error message
|
|
476
|
+
|
|
477
|
+
"""
|
|
478
|
+
result = parse_list(input_value, element_parser, separator, error_message)
|
|
479
|
+
if result.is_failure():
|
|
480
|
+
return result
|
|
481
|
+
|
|
482
|
+
# Validate the parsed list
|
|
483
|
+
parsed_list = result.value_or([])
|
|
484
|
+
|
|
485
|
+
if min_length is not None and len(parsed_list) < min_length:
|
|
486
|
+
return Maybe.failure(error_message or f'List must have at least {min_length} elements')
|
|
487
|
+
|
|
488
|
+
if max_length is not None and len(parsed_list) > max_length:
|
|
489
|
+
return Maybe.failure(error_message or f'List must have at most {max_length} elements')
|
|
490
|
+
|
|
491
|
+
return Maybe.success(parsed_list)
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def parse_dict_with_validation( # noqa: PLR0913
|
|
495
|
+
input_value: str,
|
|
496
|
+
key_parser: Callable[[str], Maybe[K]] | None = None,
|
|
497
|
+
value_parser: Callable[[str], Maybe[V]] | None = None,
|
|
498
|
+
pair_separator: str = ',',
|
|
499
|
+
key_value_separator: str = ':',
|
|
500
|
+
required_keys: list[str] | None = None,
|
|
501
|
+
error_message: str | None = None,
|
|
502
|
+
) -> Maybe[dict[K, V]]:
|
|
503
|
+
"""Parse a string to a dictionary with validation.
|
|
504
|
+
|
|
505
|
+
Args:
|
|
506
|
+
input_value: The string to parse
|
|
507
|
+
key_parser: A function that parses keys
|
|
508
|
+
value_parser: A function that parses values
|
|
509
|
+
pair_separator: The string that separates key-value pairs
|
|
510
|
+
key_value_separator: The string that separates keys from values
|
|
511
|
+
required_keys: List of keys that must be present
|
|
512
|
+
error_message: Custom error message for parsing failures
|
|
513
|
+
|
|
514
|
+
Returns:
|
|
515
|
+
A Maybe containing the parsed dictionary or an error message
|
|
516
|
+
|
|
517
|
+
"""
|
|
518
|
+
result = parse_dict(input_value, key_parser, value_parser, pair_separator, key_value_separator, error_message)
|
|
519
|
+
if result.is_failure():
|
|
520
|
+
return result
|
|
521
|
+
|
|
522
|
+
# Validate the parsed dictionary
|
|
523
|
+
parsed_dict = result.value_or({})
|
|
524
|
+
|
|
525
|
+
if required_keys:
|
|
526
|
+
missing_keys = [key for key in required_keys if key not in parsed_dict]
|
|
527
|
+
if missing_keys:
|
|
528
|
+
return Maybe.failure(error_message or f'Missing required keys: {", ".join(missing_keys)}')
|
|
529
|
+
|
|
530
|
+
return Maybe.success(parsed_dict)
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def create_parser(convert_func: Callable[[str], T], error_message: str | None = None) -> Callable[[str], Maybe[T]]:
|
|
534
|
+
"""Create a parser function from a conversion function.
|
|
535
|
+
|
|
536
|
+
This factory takes a function that converts strings to values and wraps it
|
|
537
|
+
in error handling logic to return Maybe instances.
|
|
538
|
+
|
|
539
|
+
Args:
|
|
540
|
+
convert_func: A function that converts strings to values of type T
|
|
541
|
+
error_message: Optional custom error message for failures
|
|
542
|
+
|
|
543
|
+
Returns:
|
|
544
|
+
A parser function that returns Maybe[T]
|
|
545
|
+
|
|
546
|
+
Example:
|
|
547
|
+
>>> from decimal import Decimal
|
|
548
|
+
>>> parse_decimal = create_parser(Decimal, "Invalid decimal format")
|
|
549
|
+
>>> result = parse_decimal("3.14")
|
|
550
|
+
>>> result.is_success()
|
|
551
|
+
True
|
|
552
|
+
|
|
553
|
+
"""
|
|
554
|
+
|
|
555
|
+
def parser(input_value: str) -> Maybe[T]:
|
|
556
|
+
if not input_value:
|
|
557
|
+
return Failure('Input must not be empty')
|
|
558
|
+
|
|
559
|
+
try:
|
|
560
|
+
return Success(convert_func(input_value.strip()))
|
|
561
|
+
except Exception as e: # noqa: BLE001
|
|
562
|
+
return Failure(error_message or f'Invalid {convert_func.__name__} format: {e}')
|
|
563
|
+
|
|
564
|
+
return parser
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
@overload
|
|
568
|
+
def make_parser(func: Callable[[str], T]) -> Callable[[str], Maybe[T]]: ...
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
@overload
|
|
572
|
+
def make_parser() -> Callable[[Callable[[str], T]], Callable[[str], Maybe[T]]]: ...
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
def make_parser(
|
|
576
|
+
func: Callable[[str], T] | None = None,
|
|
577
|
+
) -> Callable[[str], Maybe[T]] | Callable[[Callable[[str], T]], Callable[[str], Maybe[T]]]:
|
|
578
|
+
"""Create a parser function from a conversion function with a decorator.
|
|
579
|
+
|
|
580
|
+
Example:
|
|
581
|
+
@make_parser
|
|
582
|
+
def parse_decimal(s: str) -> Decimal:
|
|
583
|
+
return Decimal(s)
|
|
584
|
+
|
|
585
|
+
# Or with parentheses
|
|
586
|
+
@make_parser()
|
|
587
|
+
def parse_decimal(s: str) -> Decimal:
|
|
588
|
+
return Decimal(s)
|
|
589
|
+
|
|
590
|
+
result = parse_decimal("123.45") # Returns Maybe[Decimal]
|
|
591
|
+
|
|
592
|
+
"""
|
|
593
|
+
|
|
594
|
+
def decorator(f: Callable[[str], T]) -> Callable[[str], Maybe[T]]:
|
|
595
|
+
@wraps(f)
|
|
596
|
+
def wrapper(input_value: str) -> Maybe[T]:
|
|
597
|
+
if not input_value:
|
|
598
|
+
return Maybe.failure('Input must not be empty')
|
|
599
|
+
try:
|
|
600
|
+
return Maybe.success(f(input_value.strip()))
|
|
601
|
+
except Exception as e: # noqa: BLE001
|
|
602
|
+
return Maybe.failure(f'Invalid format for {f.__name__}, error: {e}')
|
|
603
|
+
|
|
604
|
+
return wrapper
|
|
605
|
+
|
|
606
|
+
# Handle both @create_parser and @create_parser() syntax
|
|
607
|
+
if func is None:
|
|
608
|
+
return decorator
|
|
609
|
+
return decorator(func)
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
def validated_parser(
|
|
613
|
+
convert_func: Callable[[str], T], validator: Callable[[T], Maybe[T]], error_message: str | None = None
|
|
614
|
+
) -> Callable[[str], Maybe[T]]:
|
|
615
|
+
"""Create a parser with a built-in validator.
|
|
616
|
+
|
|
617
|
+
This combines parsing and validation in a single function.
|
|
618
|
+
|
|
619
|
+
Args:
|
|
620
|
+
convert_func: A function that converts strings to values of type T
|
|
621
|
+
validator: A validator function that validates the parsed value
|
|
622
|
+
error_message: Optional custom error message for parsing failures
|
|
623
|
+
|
|
624
|
+
Returns:
|
|
625
|
+
A parser function that returns Maybe[T]
|
|
626
|
+
|
|
627
|
+
Example:
|
|
628
|
+
>>> from decimal import Decimal
|
|
629
|
+
>>> from valid8r.core.validators import minimum, maximum
|
|
630
|
+
>>> # Create a parser for positive decimals
|
|
631
|
+
>>> valid_range = lambda x: minimum(0)(x).bind(lambda y: maximum(100)(y))
|
|
632
|
+
>>> parse_percent = validated_parser(Decimal, valid_range)
|
|
633
|
+
>>> result = parse_percent("42.5")
|
|
634
|
+
>>> result.is_success()
|
|
635
|
+
True
|
|
636
|
+
|
|
637
|
+
"""
|
|
638
|
+
parse = create_parser(convert_func, error_message)
|
|
639
|
+
|
|
640
|
+
def parser(input_value: str) -> Maybe[T]:
|
|
641
|
+
# First parse the input
|
|
642
|
+
result = parse(input_value)
|
|
643
|
+
|
|
644
|
+
# If parsing succeeded, validate the result
|
|
645
|
+
return result.bind(validator)
|
|
646
|
+
|
|
647
|
+
return parser
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def parse_uuid(text: str, version: int | None = None, strict: bool = True) -> Maybe[UUID]:
|
|
651
|
+
"""Parse a string to a UUID.
|
|
652
|
+
|
|
653
|
+
Uses uuid-utils to parse and validate UUIDs across versions 1, 3, 4, 5, 6, 7, and 8 when available.
|
|
654
|
+
When ``version`` is provided, validates the parsed UUID version. In ``strict`` mode (default),
|
|
655
|
+
a mismatch yields a Failure; otherwise, the mismatch is ignored and the UUID is returned.
|
|
656
|
+
|
|
657
|
+
Args:
|
|
658
|
+
text: The input string to parse as UUID.
|
|
659
|
+
version: Optional expected UUID version to validate against.
|
|
660
|
+
strict: Whether to enforce the expected version when provided.
|
|
661
|
+
|
|
662
|
+
Returns:
|
|
663
|
+
Maybe[UUID]: Success with a UUID object or Failure with an error message.
|
|
664
|
+
|
|
665
|
+
"""
|
|
666
|
+
if not text:
|
|
667
|
+
return Maybe.failure('Input must not be empty')
|
|
668
|
+
|
|
669
|
+
s = text.strip()
|
|
670
|
+
|
|
671
|
+
try:
|
|
672
|
+
# Prefer uuid-utils if available; fall back to stdlib
|
|
673
|
+
if uuidu is not None:
|
|
674
|
+
parsed_any = uuidu.UUID(s)
|
|
675
|
+
parsed_version = getattr(parsed_any, 'version', None)
|
|
676
|
+
else:
|
|
677
|
+
parsed_std = UUID(s)
|
|
678
|
+
parsed_version = getattr(parsed_std, 'version', None)
|
|
679
|
+
except Exception: # noqa: BLE001
|
|
680
|
+
return Maybe.failure('Input must be a valid UUID')
|
|
681
|
+
|
|
682
|
+
if version is not None:
|
|
683
|
+
supported_versions = {1, 3, 4, 5, 6, 7, 8}
|
|
684
|
+
if version not in supported_versions:
|
|
685
|
+
return Maybe.failure(f'Unsupported UUID version: v{version}')
|
|
686
|
+
if strict and version != parsed_version:
|
|
687
|
+
return Maybe.failure(f'UUID version mismatch: expected v{version}, got v{parsed_version}')
|
|
688
|
+
|
|
689
|
+
# Return a standard library UUID object for compatibility
|
|
690
|
+
try:
|
|
691
|
+
return Maybe.success(UUID(s))
|
|
692
|
+
except Exception: # noqa: BLE001
|
|
693
|
+
# This should not happen if initial parsing succeeded, but guard anyway
|
|
694
|
+
return Maybe.failure('Input must be a valid UUID')
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def parse_ipv4(text: str) -> Maybe[IPv4Address]:
|
|
698
|
+
"""Parse an IPv4 address string.
|
|
699
|
+
|
|
700
|
+
Trims surrounding whitespace only. Returns Success with a concrete
|
|
701
|
+
IPv4Address on success, or Failure with a deterministic error message.
|
|
702
|
+
|
|
703
|
+
Error messages:
|
|
704
|
+
- value must be a string
|
|
705
|
+
- value is empty
|
|
706
|
+
- not a valid IPv4 address
|
|
707
|
+
"""
|
|
708
|
+
if not isinstance(text, str):
|
|
709
|
+
return Maybe.failure('Input must be a string')
|
|
710
|
+
|
|
711
|
+
s = text.strip()
|
|
712
|
+
if s == '':
|
|
713
|
+
return Maybe.failure('Input must not be empty')
|
|
714
|
+
|
|
715
|
+
try:
|
|
716
|
+
addr = ip_address(s)
|
|
717
|
+
except ValueError:
|
|
718
|
+
return Maybe.failure('not a valid IPv4 address')
|
|
719
|
+
|
|
720
|
+
if isinstance(addr, IPv4Address):
|
|
721
|
+
return Maybe.success(addr)
|
|
722
|
+
|
|
723
|
+
return Maybe.failure('not a valid IPv4 address')
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
def parse_ipv6(text: str) -> Maybe[IPv6Address]:
|
|
727
|
+
"""Parse an IPv6 address string.
|
|
728
|
+
|
|
729
|
+
Trims surrounding whitespace only. Returns Success with a concrete
|
|
730
|
+
IPv6Address on success, or Failure with a deterministic error message.
|
|
731
|
+
|
|
732
|
+
Error messages:
|
|
733
|
+
- value must be a string
|
|
734
|
+
- value is empty
|
|
735
|
+
- not a valid IPv6 address
|
|
736
|
+
"""
|
|
737
|
+
if not isinstance(text, str):
|
|
738
|
+
return Maybe.failure('Input must be a string')
|
|
739
|
+
|
|
740
|
+
s = text.strip()
|
|
741
|
+
if s == '':
|
|
742
|
+
return Maybe.failure('Input must not be empty')
|
|
743
|
+
|
|
744
|
+
# Explicitly reject scope IDs like %eth0
|
|
745
|
+
if '%' in s:
|
|
746
|
+
return Maybe.failure('not a valid IPv6 address')
|
|
747
|
+
|
|
748
|
+
try:
|
|
749
|
+
addr = ip_address(s)
|
|
750
|
+
except ValueError:
|
|
751
|
+
return Maybe.failure('not a valid IPv6 address')
|
|
752
|
+
|
|
753
|
+
if isinstance(addr, IPv6Address):
|
|
754
|
+
return Maybe.success(addr)
|
|
755
|
+
|
|
756
|
+
return Maybe.failure('not a valid IPv6 address')
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
def parse_ip(text: str) -> Maybe[IPv4Address | IPv6Address]:
|
|
760
|
+
"""Parse a string as either an IPv4 or IPv6 address.
|
|
761
|
+
|
|
762
|
+
Trims surrounding whitespace only.
|
|
763
|
+
|
|
764
|
+
Error messages:
|
|
765
|
+
- value must be a string
|
|
766
|
+
- value is empty
|
|
767
|
+
- not a valid IP address
|
|
768
|
+
"""
|
|
769
|
+
if not isinstance(text, str):
|
|
770
|
+
return Maybe.failure('Input must be a string')
|
|
771
|
+
|
|
772
|
+
s = text.strip()
|
|
773
|
+
if s == '':
|
|
774
|
+
return Maybe.failure('Input must not be empty')
|
|
775
|
+
|
|
776
|
+
# Reject non-address forms such as IPv6 scope IDs or URLs
|
|
777
|
+
if '%' in s or '://' in s:
|
|
778
|
+
return Maybe.failure('not a valid IP address')
|
|
779
|
+
|
|
780
|
+
try:
|
|
781
|
+
addr = ip_address(s)
|
|
782
|
+
except ValueError:
|
|
783
|
+
return Maybe.failure('not a valid IP address')
|
|
784
|
+
|
|
785
|
+
if isinstance(addr, (IPv4Address, IPv6Address)):
|
|
786
|
+
return Maybe.success(addr)
|
|
787
|
+
|
|
788
|
+
return Maybe.failure('not a valid IP address')
|
|
789
|
+
|
|
790
|
+
|
|
791
|
+
def parse_cidr(text: str, *, strict: bool = True) -> Maybe[IPv4Network | IPv6Network]:
|
|
792
|
+
"""Parse a CIDR network string (IPv4 or IPv6).
|
|
793
|
+
|
|
794
|
+
Uses ipaddress.ip_network under the hood. By default ``strict=True``
|
|
795
|
+
so host bits set will fail. With ``strict=False``, host bits are masked.
|
|
796
|
+
|
|
797
|
+
Error messages:
|
|
798
|
+
- value must be a string
|
|
799
|
+
- value is empty
|
|
800
|
+
- has host bits set (when strict and host bits are present)
|
|
801
|
+
- not a valid network (all other parsing failures)
|
|
802
|
+
"""
|
|
803
|
+
if not isinstance(text, str):
|
|
804
|
+
return Maybe.failure('Input must be a string')
|
|
805
|
+
|
|
806
|
+
s = text.strip()
|
|
807
|
+
if s == '':
|
|
808
|
+
return Maybe.failure('Input must not be empty')
|
|
809
|
+
|
|
810
|
+
try:
|
|
811
|
+
net = ip_network(s, strict=strict)
|
|
812
|
+
except ValueError as exc:
|
|
813
|
+
msg = str(exc)
|
|
814
|
+
if 'has host bits set' in msg:
|
|
815
|
+
return Maybe.failure('has host bits set')
|
|
816
|
+
return Maybe.failure('not a valid network')
|
|
817
|
+
|
|
818
|
+
if isinstance(net, (IPv4Network, IPv6Network)):
|
|
819
|
+
return Maybe.success(net)
|
|
820
|
+
|
|
821
|
+
return Maybe.failure('not a valid network')
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
# ---------------------------
|
|
825
|
+
# URL and Email parsing
|
|
826
|
+
# ---------------------------
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
@dataclass(frozen=True)
|
|
830
|
+
class UrlParts:
|
|
831
|
+
"""Structured URL components.
|
|
832
|
+
|
|
833
|
+
Attributes:
|
|
834
|
+
scheme: Lowercased scheme (e.g. "http").
|
|
835
|
+
username: Username from userinfo, if present.
|
|
836
|
+
password: Password from userinfo, if present.
|
|
837
|
+
host: Lowercased host or IPv6 literal without brackets, or None when not provided and not required.
|
|
838
|
+
port: Explicit port if present, otherwise None.
|
|
839
|
+
path: Path component as-is (no normalization).
|
|
840
|
+
query: Query string without leading '?'.
|
|
841
|
+
fragment: Fragment without leading '#'.
|
|
842
|
+
|
|
843
|
+
Examples:
|
|
844
|
+
>>> from valid8r.core.maybe import Success
|
|
845
|
+
>>> match parse_url('https://alice:pw@example.com:8443/x?q=1#top'):
|
|
846
|
+
... case Success(u):
|
|
847
|
+
... (u.scheme, u.username, u.password, u.host, u.port, u.path, u.query, u.fragment)
|
|
848
|
+
... case _:
|
|
849
|
+
... ()
|
|
850
|
+
('https', 'alice', 'pw', 'example.com', 8443, '/x', 'q=1', 'top')
|
|
851
|
+
|
|
852
|
+
"""
|
|
853
|
+
|
|
854
|
+
scheme: str
|
|
855
|
+
username: str | None
|
|
856
|
+
password: str | None
|
|
857
|
+
host: str | None
|
|
858
|
+
port: int | None
|
|
859
|
+
path: str
|
|
860
|
+
query: str
|
|
861
|
+
fragment: str
|
|
862
|
+
|
|
863
|
+
|
|
864
|
+
@dataclass(frozen=True)
|
|
865
|
+
class EmailAddress:
|
|
866
|
+
"""Structured email address.
|
|
867
|
+
|
|
868
|
+
Attributes:
|
|
869
|
+
local: Local part (preserves original case).
|
|
870
|
+
domain: Domain part lowercased.
|
|
871
|
+
|
|
872
|
+
Examples:
|
|
873
|
+
>>> from valid8r.core.maybe import Success
|
|
874
|
+
>>> match parse_email('First.Last+tag@Example.COM'):
|
|
875
|
+
... case Success(addr):
|
|
876
|
+
... (addr.local, addr.domain)
|
|
877
|
+
... case _:
|
|
878
|
+
... ()
|
|
879
|
+
('First.Last+tag', 'example.com')
|
|
880
|
+
|
|
881
|
+
"""
|
|
882
|
+
|
|
883
|
+
local: str
|
|
884
|
+
domain: str
|
|
885
|
+
|
|
886
|
+
|
|
887
|
+
@dataclass(frozen=True)
|
|
888
|
+
class PhoneNumber:
|
|
889
|
+
"""Structured North American phone number (NANP).
|
|
890
|
+
|
|
891
|
+
Represents a parsed and validated phone number in the North American Numbering Plan
|
|
892
|
+
(United States, Canada, and other NANP territories).
|
|
893
|
+
|
|
894
|
+
Attributes:
|
|
895
|
+
area_code: Three-digit area code (NPA).
|
|
896
|
+
exchange: Three-digit exchange code (NXX).
|
|
897
|
+
subscriber: Four-digit subscriber number.
|
|
898
|
+
country_code: Country code (always '1' for NANP).
|
|
899
|
+
region: Two-letter region code ('US', 'CA', etc.).
|
|
900
|
+
extension: Optional extension number.
|
|
901
|
+
|
|
902
|
+
Examples:
|
|
903
|
+
>>> from valid8r.core.maybe import Success
|
|
904
|
+
>>> match parse_phone('(415) 555-2671'):
|
|
905
|
+
... case Success(phone):
|
|
906
|
+
... (phone.area_code, phone.exchange, phone.subscriber)
|
|
907
|
+
... case _:
|
|
908
|
+
... ()
|
|
909
|
+
('415', '555', '2671')
|
|
910
|
+
|
|
911
|
+
"""
|
|
912
|
+
|
|
913
|
+
area_code: str
|
|
914
|
+
exchange: str
|
|
915
|
+
subscriber: str
|
|
916
|
+
country_code: str
|
|
917
|
+
region: str
|
|
918
|
+
extension: str | None
|
|
919
|
+
|
|
920
|
+
@property
|
|
921
|
+
def e164(self) -> str:
|
|
922
|
+
"""E.164 international format (+14155552671).
|
|
923
|
+
|
|
924
|
+
The E.164 format is the international standard for phone numbers.
|
|
925
|
+
It includes the country code prefix and no formatting separators.
|
|
926
|
+
|
|
927
|
+
Returns:
|
|
928
|
+
Phone number in E.164 format, with extension if present.
|
|
929
|
+
"""
|
|
930
|
+
base = f'+{self.country_code}{self.area_code}{self.exchange}{self.subscriber}'
|
|
931
|
+
if self.extension:
|
|
932
|
+
return f'{base} x{self.extension}'
|
|
933
|
+
return base
|
|
934
|
+
|
|
935
|
+
@property
|
|
936
|
+
def national(self) -> str:
|
|
937
|
+
"""National format ((415) 555-2671).
|
|
938
|
+
|
|
939
|
+
The national format is the standard format for displaying phone numbers
|
|
940
|
+
within a country, without the country code.
|
|
941
|
+
|
|
942
|
+
Returns:
|
|
943
|
+
Phone number in national format, with extension if present.
|
|
944
|
+
"""
|
|
945
|
+
base = f'({self.area_code}) {self.exchange}-{self.subscriber}'
|
|
946
|
+
if self.extension:
|
|
947
|
+
return f'{base} ext. {self.extension}'
|
|
948
|
+
return base
|
|
949
|
+
|
|
950
|
+
@property
|
|
951
|
+
def international(self) -> str:
|
|
952
|
+
"""International format (+1 415-555-2671).
|
|
953
|
+
|
|
954
|
+
The international format includes the country code and uses dashes
|
|
955
|
+
as separators.
|
|
956
|
+
|
|
957
|
+
Returns:
|
|
958
|
+
Phone number in international format, with extension if present.
|
|
959
|
+
"""
|
|
960
|
+
base = f'+{self.country_code} {self.area_code}-{self.exchange}-{self.subscriber}'
|
|
961
|
+
if self.extension:
|
|
962
|
+
return f'{base} ext. {self.extension}'
|
|
963
|
+
return base
|
|
964
|
+
|
|
965
|
+
@property
|
|
966
|
+
def raw_digits(self) -> str:
|
|
967
|
+
"""Raw digits with country code (14155552671).
|
|
968
|
+
|
|
969
|
+
Returns all digits including the country code, with no formatting.
|
|
970
|
+
Does not include the extension.
|
|
971
|
+
|
|
972
|
+
Returns:
|
|
973
|
+
All digits as a string without any formatting.
|
|
974
|
+
"""
|
|
975
|
+
return f'{self.country_code}{self.area_code}{self.exchange}{self.subscriber}'
|
|
976
|
+
|
|
977
|
+
|
|
978
|
+
def _is_valid_hostname_label(label: str) -> bool:
|
|
979
|
+
if not (1 <= len(label) <= 63):
|
|
980
|
+
return False
|
|
981
|
+
# Alnum or hyphen; cannot start or end with hyphen
|
|
982
|
+
if label.startswith('-') or label.endswith('-'):
|
|
983
|
+
return False
|
|
984
|
+
for ch in label:
|
|
985
|
+
if ch.isalnum() or ch == '-':
|
|
986
|
+
continue
|
|
987
|
+
return False
|
|
988
|
+
return True
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
def _is_valid_hostname(host: str) -> bool:
|
|
992
|
+
# Allow localhost explicitly
|
|
993
|
+
if host.lower() == 'localhost':
|
|
994
|
+
return True
|
|
995
|
+
|
|
996
|
+
if len(host) == 0 or len(host) > 253:
|
|
997
|
+
return False
|
|
998
|
+
|
|
999
|
+
# Reject underscores and empty labels
|
|
1000
|
+
labels = host.split('.')
|
|
1001
|
+
return all(not (part == '' or not _is_valid_hostname_label(part)) for part in labels)
|
|
1002
|
+
|
|
1003
|
+
|
|
1004
|
+
def _parse_userinfo_and_hostport(netloc: str) -> tuple[str | None, str | None, str]:
|
|
1005
|
+
"""Split userinfo and hostport from a netloc string."""
|
|
1006
|
+
if '@' in netloc:
|
|
1007
|
+
userinfo, hostport = netloc.rsplit('@', 1)
|
|
1008
|
+
if ':' in userinfo:
|
|
1009
|
+
user, pwd = userinfo.split(':', 1)
|
|
1010
|
+
else:
|
|
1011
|
+
user, pwd = userinfo, None
|
|
1012
|
+
return (user or None), (pwd or None), hostport
|
|
1013
|
+
return None, None, netloc
|
|
1014
|
+
|
|
1015
|
+
|
|
1016
|
+
def _parse_host_and_port(hostport: str) -> tuple[str | None, int | None]:
|
|
1017
|
+
"""Parse host and optional port from hostport.
|
|
1018
|
+
|
|
1019
|
+
Supports IPv6 literals in brackets.
|
|
1020
|
+
Returns (host, port). Host is None when missing.
|
|
1021
|
+
"""
|
|
1022
|
+
if not hostport:
|
|
1023
|
+
return None, None
|
|
1024
|
+
|
|
1025
|
+
host = None
|
|
1026
|
+
port: int | None = None
|
|
1027
|
+
|
|
1028
|
+
if hostport.startswith('['):
|
|
1029
|
+
# IPv6 literal [::1] or [::1]:443
|
|
1030
|
+
if ']' not in hostport:
|
|
1031
|
+
return None, None
|
|
1032
|
+
end = hostport.find(']')
|
|
1033
|
+
host = hostport[1:end]
|
|
1034
|
+
rest = hostport[end + 1 :]
|
|
1035
|
+
if rest.startswith(':'):
|
|
1036
|
+
try:
|
|
1037
|
+
port_val = int(rest[1:])
|
|
1038
|
+
except ValueError:
|
|
1039
|
+
return None, None
|
|
1040
|
+
if not (0 <= port_val <= 65535):
|
|
1041
|
+
return None, None
|
|
1042
|
+
port = port_val
|
|
1043
|
+
elif rest != '':
|
|
1044
|
+
# Garbage after bracket
|
|
1045
|
+
return None, None
|
|
1046
|
+
return host, port
|
|
1047
|
+
|
|
1048
|
+
# Not bracketed: split on last ':' to allow IPv6 bracket requirement
|
|
1049
|
+
if ':' in hostport:
|
|
1050
|
+
host_candidate, port_str = hostport.rsplit(':', 1)
|
|
1051
|
+
if host_candidate == '':
|
|
1052
|
+
return None, None
|
|
1053
|
+
try:
|
|
1054
|
+
port_val = int(port_str)
|
|
1055
|
+
except ValueError:
|
|
1056
|
+
# Could be part of IPv6 without brackets (not supported by URL syntax)
|
|
1057
|
+
return hostport, None
|
|
1058
|
+
if not (0 <= port_val <= 65535):
|
|
1059
|
+
return None, None
|
|
1060
|
+
return host_candidate, port_val
|
|
1061
|
+
|
|
1062
|
+
return hostport, None
|
|
1063
|
+
|
|
1064
|
+
|
|
1065
|
+
def _validate_url_host(host: str | None, original_netloc: str) -> bool:
|
|
1066
|
+
if host is None:
|
|
1067
|
+
return False
|
|
1068
|
+
|
|
1069
|
+
# If original contained brackets or host contains ':' treat as IPv6
|
|
1070
|
+
if original_netloc.startswith('[') or ':' in host:
|
|
1071
|
+
try:
|
|
1072
|
+
_ = ip_address(host)
|
|
1073
|
+
return isinstance(_, (IPv6Address, IPv4Address))
|
|
1074
|
+
except ValueError:
|
|
1075
|
+
return False
|
|
1076
|
+
|
|
1077
|
+
# Try IPv4
|
|
1078
|
+
try:
|
|
1079
|
+
_ = ip_address(host)
|
|
1080
|
+
if isinstance(_, IPv4Address):
|
|
1081
|
+
return True
|
|
1082
|
+
except ValueError:
|
|
1083
|
+
pass
|
|
1084
|
+
|
|
1085
|
+
# Hostname
|
|
1086
|
+
return _is_valid_hostname(host)
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
def parse_url(
|
|
1090
|
+
text: str,
|
|
1091
|
+
*,
|
|
1092
|
+
allowed_schemes: Iterable[str] = ('http', 'https'),
|
|
1093
|
+
require_host: bool = True,
|
|
1094
|
+
) -> Maybe[UrlParts]:
|
|
1095
|
+
"""Parse a URL with light validation.
|
|
1096
|
+
|
|
1097
|
+
Rules:
|
|
1098
|
+
- Trim surrounding whitespace only
|
|
1099
|
+
- Require scheme in allowed_schemes (defaults to http/https)
|
|
1100
|
+
- If require_host, netloc must include a valid host (hostname, IPv4, or bracketed IPv6)
|
|
1101
|
+
- Lowercase scheme and host; do not modify path/query/fragment
|
|
1102
|
+
|
|
1103
|
+
Failure messages (exact substrings):
|
|
1104
|
+
- Input must be a string
|
|
1105
|
+
- Input must not be empty
|
|
1106
|
+
- Unsupported URL scheme
|
|
1107
|
+
- URL requires host
|
|
1108
|
+
- Invalid host
|
|
1109
|
+
"""
|
|
1110
|
+
if not isinstance(text, str):
|
|
1111
|
+
return Maybe.failure('Input must be a string')
|
|
1112
|
+
|
|
1113
|
+
s = text.strip()
|
|
1114
|
+
if s == '':
|
|
1115
|
+
return Maybe.failure('Input must not be empty')
|
|
1116
|
+
|
|
1117
|
+
parts = urlsplit(s)
|
|
1118
|
+
|
|
1119
|
+
scheme_lower = parts.scheme.lower()
|
|
1120
|
+
if scheme_lower == '' or scheme_lower not in {sch.lower() for sch in allowed_schemes}:
|
|
1121
|
+
return Maybe.failure('Unsupported URL scheme')
|
|
1122
|
+
|
|
1123
|
+
username: str | None
|
|
1124
|
+
password: str | None
|
|
1125
|
+
host: str | None
|
|
1126
|
+
port: int | None
|
|
1127
|
+
|
|
1128
|
+
username = None
|
|
1129
|
+
password = None
|
|
1130
|
+
host = None
|
|
1131
|
+
port = None
|
|
1132
|
+
|
|
1133
|
+
netloc = parts.netloc
|
|
1134
|
+
|
|
1135
|
+
if netloc:
|
|
1136
|
+
username, password, hostport = _parse_userinfo_and_hostport(netloc)
|
|
1137
|
+
host, port = _parse_host_and_port(hostport)
|
|
1138
|
+
|
|
1139
|
+
if host is not None:
|
|
1140
|
+
host = host.lower()
|
|
1141
|
+
|
|
1142
|
+
# Validate host when present
|
|
1143
|
+
if host is not None and not _validate_url_host(host, netloc):
|
|
1144
|
+
return Maybe.failure('Invalid host')
|
|
1145
|
+
elif require_host:
|
|
1146
|
+
return Maybe.failure('URL requires host')
|
|
1147
|
+
|
|
1148
|
+
# When require_host is True we must have a host
|
|
1149
|
+
if require_host and (host is None or host == ''):
|
|
1150
|
+
return Maybe.failure('URL requires host')
|
|
1151
|
+
|
|
1152
|
+
result = UrlParts(
|
|
1153
|
+
scheme=scheme_lower,
|
|
1154
|
+
username=username,
|
|
1155
|
+
password=password,
|
|
1156
|
+
host=host,
|
|
1157
|
+
port=port,
|
|
1158
|
+
path=parts.path,
|
|
1159
|
+
query=parts.query,
|
|
1160
|
+
fragment=parts.fragment,
|
|
1161
|
+
)
|
|
1162
|
+
|
|
1163
|
+
return Maybe.success(result)
|
|
1164
|
+
|
|
1165
|
+
|
|
1166
|
+
def parse_email(text: str) -> Maybe[EmailAddress]:
|
|
1167
|
+
"""Parse a bare email address of the form ``local@domain``.
|
|
1168
|
+
|
|
1169
|
+
Uses the email-validator library for RFC 5322 compliant validation.
|
|
1170
|
+
Domain names are normalized to lowercase, local parts preserve their case.
|
|
1171
|
+
|
|
1172
|
+
Requires the email-validator library to be installed. If not available,
|
|
1173
|
+
returns a Failure indicating the library is required.
|
|
1174
|
+
|
|
1175
|
+
Rules:
|
|
1176
|
+
- Trim surrounding whitespace
|
|
1177
|
+
- Full RFC 5322 email validation
|
|
1178
|
+
- Supports internationalized domains (IDNA)
|
|
1179
|
+
- Domain is lowercased in the result; local part preserves case
|
|
1180
|
+
|
|
1181
|
+
Failure messages:
|
|
1182
|
+
- Input must be a string
|
|
1183
|
+
- Input must not be empty
|
|
1184
|
+
- email-validator library is required but not installed
|
|
1185
|
+
- Various RFC-compliant validation error messages from email-validator
|
|
1186
|
+
|
|
1187
|
+
Args:
|
|
1188
|
+
text: The email address string to parse
|
|
1189
|
+
|
|
1190
|
+
Returns:
|
|
1191
|
+
Maybe[EmailAddress]: Success with EmailAddress or Failure with error message
|
|
1192
|
+
"""
|
|
1193
|
+
if not isinstance(text, str):
|
|
1194
|
+
return Maybe.failure('Input must be a string')
|
|
1195
|
+
|
|
1196
|
+
s = text.strip()
|
|
1197
|
+
if s == '':
|
|
1198
|
+
return Maybe.failure('Input must not be empty')
|
|
1199
|
+
|
|
1200
|
+
if not HAS_EMAIL_VALIDATOR:
|
|
1201
|
+
return Maybe.failure('email-validator library is required but not installed')
|
|
1202
|
+
|
|
1203
|
+
try:
|
|
1204
|
+
# Validate without DNS lookups
|
|
1205
|
+
result = validate_email(s, check_deliverability=False)
|
|
1206
|
+
|
|
1207
|
+
# Return normalized components
|
|
1208
|
+
return Maybe.success(EmailAddress(local=result.local_part, domain=result.domain))
|
|
1209
|
+
except EmailNotValidError as e:
|
|
1210
|
+
return Maybe.failure(str(e))
|
|
1211
|
+
except Exception as e: # noqa: BLE001
|
|
1212
|
+
return Maybe.failure(f'email validation error: {e}')
|
|
1213
|
+
|
|
1214
|
+
|
|
1215
|
+
def parse_phone(text: str | None, *, region: str = 'US', strict: bool = False) -> Maybe[PhoneNumber]: # noqa: PLR0912
|
|
1216
|
+
"""Parse a North American phone number (NANP format).
|
|
1217
|
+
|
|
1218
|
+
Parses phone numbers in the North American Numbering Plan format (US, Canada, etc.).
|
|
1219
|
+
Supports various formatting styles and validates area codes and exchanges.
|
|
1220
|
+
|
|
1221
|
+
Rules:
|
|
1222
|
+
- Accepts 10-digit or 11-digit (with country code 1) phone numbers
|
|
1223
|
+
- Strips all non-digit characters except extension markers
|
|
1224
|
+
- Validates area code (NPA): cannot start with 0 or 1, cannot be 555
|
|
1225
|
+
- Validates exchange (NXX): cannot start with 0 or 1, cannot be 555 or 911
|
|
1226
|
+
- Supports extensions with markers: x, ext, extension, comma
|
|
1227
|
+
- In strict mode, requires formatting characters (not just digits)
|
|
1228
|
+
- Defaults to US region unless specified
|
|
1229
|
+
|
|
1230
|
+
Failure messages:
|
|
1231
|
+
- Phone number cannot be empty
|
|
1232
|
+
- Phone number must have exactly 10 digits (after country code)
|
|
1233
|
+
- Invalid area code (starts with 0/1 or reserved)
|
|
1234
|
+
- Invalid exchange (starts with 0/1, reserved, or emergency)
|
|
1235
|
+
- Only North American phone numbers are supported
|
|
1236
|
+
- Invalid format (contains non-digit/non-separator characters)
|
|
1237
|
+
- Strict mode requires formatting characters
|
|
1238
|
+
- Invalid extension (non-numeric or too long)
|
|
1239
|
+
|
|
1240
|
+
Args:
|
|
1241
|
+
text: The phone number string to parse
|
|
1242
|
+
region: Two-letter region code (default: 'US')
|
|
1243
|
+
strict: If True, requires formatting characters (default: False)
|
|
1244
|
+
|
|
1245
|
+
Returns:
|
|
1246
|
+
Maybe[PhoneNumber]: Success with PhoneNumber or Failure with error message
|
|
1247
|
+
|
|
1248
|
+
Examples:
|
|
1249
|
+
>>> match parse_phone('(415) 555-2671'):
|
|
1250
|
+
... case Success(phone):
|
|
1251
|
+
... phone.area_code
|
|
1252
|
+
... case _:
|
|
1253
|
+
... None
|
|
1254
|
+
'415'
|
|
1255
|
+
|
|
1256
|
+
>>> match parse_phone('415-555-2671 x123'):
|
|
1257
|
+
... case Success(phone):
|
|
1258
|
+
... phone.extension
|
|
1259
|
+
... case _:
|
|
1260
|
+
... None
|
|
1261
|
+
'123'
|
|
1262
|
+
|
|
1263
|
+
>>> match parse_phone('+1 604 555 1234', region='CA'):
|
|
1264
|
+
... case Success(phone):
|
|
1265
|
+
... phone.region
|
|
1266
|
+
... case _:
|
|
1267
|
+
... None
|
|
1268
|
+
'CA'
|
|
1269
|
+
"""
|
|
1270
|
+
# Handle None or empty input
|
|
1271
|
+
if text is None or not isinstance(text, str):
|
|
1272
|
+
return Maybe.failure('Phone number cannot be empty')
|
|
1273
|
+
|
|
1274
|
+
s = text.strip()
|
|
1275
|
+
if s == '':
|
|
1276
|
+
return Maybe.failure('Phone number cannot be empty')
|
|
1277
|
+
|
|
1278
|
+
# Extract extension if present
|
|
1279
|
+
extension = None
|
|
1280
|
+
extension_pattern = r'\s*[,;]\s*(\d+)$|\s+(?:x|ext\.?|extension)\s*(\d+)$'
|
|
1281
|
+
extension_match = re.search(extension_pattern, s, re.IGNORECASE)
|
|
1282
|
+
if extension_match:
|
|
1283
|
+
# Get the captured group (either group 1 or 2)
|
|
1284
|
+
extension = extension_match.group(1) or extension_match.group(2)
|
|
1285
|
+
# Validate extension length
|
|
1286
|
+
if len(extension) > 8:
|
|
1287
|
+
return Maybe.failure('Extension is too long (maximum 8 digits)')
|
|
1288
|
+
# Remove extension from phone number for parsing
|
|
1289
|
+
s = s[: extension_match.start()]
|
|
1290
|
+
|
|
1291
|
+
# Check for invalid characters before extracting digits
|
|
1292
|
+
# Allow only: digits, whitespace (including tabs/newlines), ()-.+ and common separators
|
|
1293
|
+
if not re.match(r'^[\d\s()\-+.]+$', s, re.MULTILINE):
|
|
1294
|
+
return Maybe.failure('Invalid format: phone number contains invalid characters')
|
|
1295
|
+
|
|
1296
|
+
# Extract only digits
|
|
1297
|
+
digits = re.sub(r'\D', '', s)
|
|
1298
|
+
|
|
1299
|
+
# Check for strict mode - original must have formatting
|
|
1300
|
+
if strict and text.strip() == digits:
|
|
1301
|
+
return Maybe.failure('Strict mode requires formatting characters (e.g., dashes, parentheses, spaces)')
|
|
1302
|
+
|
|
1303
|
+
# Validate digit count
|
|
1304
|
+
if len(digits) == 0:
|
|
1305
|
+
return Maybe.failure('Phone number cannot be empty')
|
|
1306
|
+
|
|
1307
|
+
# Handle country code
|
|
1308
|
+
country_code = '1'
|
|
1309
|
+
if len(digits) == 11:
|
|
1310
|
+
if digits[0] != '1':
|
|
1311
|
+
return Maybe.failure('Only North American phone numbers (country code 1) are supported')
|
|
1312
|
+
digits = digits[1:] # Strip country code
|
|
1313
|
+
elif len(digits) > 11:
|
|
1314
|
+
# Check if it starts with a non-1 digit (likely international)
|
|
1315
|
+
if digits[0] != '1':
|
|
1316
|
+
return Maybe.failure('Only North American phone numbers (country code 1) are supported')
|
|
1317
|
+
return Maybe.failure(f'Phone number must have 10 digits, got {len(digits)}')
|
|
1318
|
+
elif len(digits) != 10:
|
|
1319
|
+
return Maybe.failure(f'Phone number must have 10 digits, got {len(digits)}')
|
|
1320
|
+
|
|
1321
|
+
# Check for extremely long input (security)
|
|
1322
|
+
if len(text) > 100:
|
|
1323
|
+
return Maybe.failure('Invalid format: phone number is too long')
|
|
1324
|
+
|
|
1325
|
+
# Extract components
|
|
1326
|
+
area_code = digits[0:3]
|
|
1327
|
+
exchange = digits[3:6]
|
|
1328
|
+
subscriber = digits[6:10]
|
|
1329
|
+
|
|
1330
|
+
# Validate area code (NPA)
|
|
1331
|
+
if area_code[0] in ('0', '1'):
|
|
1332
|
+
return Maybe.failure(f'Invalid area code: {area_code} (cannot start with 0 or 1)')
|
|
1333
|
+
if area_code == '555':
|
|
1334
|
+
return Maybe.failure(f'Invalid area code: {area_code} (reserved for fiction)')
|
|
1335
|
+
|
|
1336
|
+
# Validate exchange (NXX)
|
|
1337
|
+
if exchange[0] in ('0', '1'):
|
|
1338
|
+
return Maybe.failure(f'Invalid exchange: {exchange} (cannot start with 0 or 1)')
|
|
1339
|
+
if exchange == '911':
|
|
1340
|
+
return Maybe.failure(f'Invalid exchange: {exchange} (emergency number)')
|
|
1341
|
+
# 555 exchange with 555x subscriber numbers (555-0000 to 555-9999) are fictional
|
|
1342
|
+
if exchange == '555' and subscriber.startswith('555'):
|
|
1343
|
+
return Maybe.failure(f'Invalid exchange: {exchange} with subscriber {subscriber} (reserved for fiction)')
|
|
1344
|
+
|
|
1345
|
+
return Maybe.success(
|
|
1346
|
+
PhoneNumber(
|
|
1347
|
+
area_code=area_code,
|
|
1348
|
+
exchange=exchange,
|
|
1349
|
+
subscriber=subscriber,
|
|
1350
|
+
country_code=country_code,
|
|
1351
|
+
region=region,
|
|
1352
|
+
extension=extension,
|
|
1353
|
+
)
|
|
1354
|
+
)
|