valid8r 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2115 @@
1
+ """String parsing functions with Maybe monad error handling."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from datetime import (
7
+ date,
8
+ datetime,
9
+ )
10
+ from decimal import (
11
+ Decimal,
12
+ InvalidOperation,
13
+ )
14
+ from enum import Enum
15
+ from functools import wraps
16
+ from typing import (
17
+ TYPE_CHECKING,
18
+ ParamSpec,
19
+ TypeVar,
20
+ cast,
21
+ overload,
22
+ )
23
+ from uuid import UUID
24
+
25
+ from valid8r.core.maybe import (
26
+ Failure,
27
+ Maybe,
28
+ Success,
29
+ )
30
+
31
+ try:
32
+ import uuid_utils as uuidu
33
+ except Exception: # noqa: BLE001
34
+ uuidu = None # type: ignore[assignment]
35
+
36
+ try:
37
+ from email_validator import (
38
+ EmailNotValidError,
39
+ validate_email,
40
+ )
41
+
42
+ HAS_EMAIL_VALIDATOR = True
43
+ except ImportError:
44
+ HAS_EMAIL_VALIDATOR = False
45
+ EmailNotValidError = None # type: ignore[assignment,misc]
46
+ validate_email = None # type: ignore[assignment]
47
+
48
+ import base64
49
+ import binascii
50
+ import json
51
+ from dataclasses import dataclass
52
+ from ipaddress import (
53
+ IPv4Address,
54
+ IPv4Network,
55
+ IPv6Address,
56
+ IPv6Network,
57
+ ip_address,
58
+ ip_network,
59
+ )
60
+ from pathlib import Path
61
+ from urllib.parse import urlsplit
62
+
63
+ if TYPE_CHECKING:
64
+ from collections.abc import (
65
+ Callable,
66
+ Iterable,
67
+ )
68
+
69
+ T = TypeVar('T')
70
+ K = TypeVar('K')
71
+ V = TypeVar('V')
72
+ P = ParamSpec('P')
73
+ E = TypeVar('E', bound=Enum)
74
+
75
+ ISO_DATE_LENGTH = 10
76
+
77
+ # Compiled regex patterns for phone parsing (cached for performance)
78
+ _PHONE_EXTENSION_PATTERN = re.compile(r'\s*[,;]\s*(\d+)$|\s+(?:x|ext\.?|extension)\s*(\d+)$', re.IGNORECASE)
79
+ _PHONE_VALID_CHARS_PATTERN = re.compile(r'^[\d\s()\-+.]+$', re.MULTILINE)
80
+ _PHONE_DIGIT_EXTRACTION_PATTERN = re.compile(r'\D')
81
+
82
+
83
+ def parse_int(input_value: str, error_message: str | None = None) -> Maybe[int]:
84
+ """Parse a string to an integer.
85
+
86
+ Converts string representations of integers to Python int values.
87
+ Handles whitespace trimming and accepts whole numbers in float notation (e.g., "42.0").
88
+
89
+ Args:
90
+ input_value: String to parse (leading/trailing whitespace is stripped)
91
+ error_message: Optional custom error message for parsing failures
92
+
93
+ Returns:
94
+ Maybe[int]: Success(int) if parsing succeeds, Failure(str) with error message otherwise
95
+
96
+ Examples:
97
+ >>> parse_int("42")
98
+ Success(42)
99
+ >>> parse_int(" -17 ")
100
+ Success(-17)
101
+ >>> parse_int("42.0")
102
+ Success(42)
103
+ >>> parse_int("42.5").is_failure()
104
+ True
105
+ >>> parse_int("not a number").is_failure()
106
+ True
107
+ """
108
+ if not input_value:
109
+ return Maybe.failure('Input must not be empty')
110
+
111
+ cleaned_input = input_value.strip()
112
+
113
+ try:
114
+ if '.' in cleaned_input:
115
+ float_val = float(cleaned_input)
116
+ if float_val.is_integer():
117
+ # It's a whole number like 42.0
118
+ return Maybe.success(int(float_val))
119
+ # It has a fractional part like 42.5
120
+ return Maybe.failure(error_message or 'Input must be a valid integer')
121
+
122
+ value = int(cleaned_input)
123
+ return Maybe.success(value)
124
+ except ValueError:
125
+ return Maybe.failure(error_message or 'Input must be a valid integer')
126
+
127
+
128
+ def parse_float(input_value: str, error_message: str | None = None) -> Maybe[float]:
129
+ """Parse a string to a floating-point number.
130
+
131
+ Converts string representations of numbers to Python float values.
132
+ Handles whitespace trimming and scientific notation.
133
+
134
+ Args:
135
+ input_value: String to parse (leading/trailing whitespace is stripped)
136
+ error_message: Optional custom error message for parsing failures
137
+
138
+ Returns:
139
+ Maybe[float]: Success(float) if parsing succeeds, Failure(str) with error message otherwise
140
+
141
+ Examples:
142
+ >>> parse_float("3.14")
143
+ Success(3.14)
144
+ >>> parse_float(" -2.5 ")
145
+ Success(-2.5)
146
+ >>> parse_float("1e-3")
147
+ Success(0.001)
148
+ >>> parse_float("not a number").is_failure()
149
+ True
150
+ """
151
+ if not input_value:
152
+ return Maybe.failure('Input must not be empty')
153
+
154
+ try:
155
+ value = float(input_value.strip())
156
+ return Maybe.success(value)
157
+ except ValueError:
158
+ return Maybe.failure(error_message or 'Input must be a valid number')
159
+
160
+
161
+ def parse_bool(input_value: str, error_message: str | None = None) -> Maybe[bool]:
162
+ """Parse a string to a boolean value.
163
+
164
+ Accepts various common representations of true/false values.
165
+ Case-insensitive and handles whitespace.
166
+
167
+ Recognized true values: 'true', 't', 'yes', 'y', '1'
168
+ Recognized false values: 'false', 'f', 'no', 'n', '0'
169
+
170
+ Args:
171
+ input_value: String to parse (leading/trailing whitespace is stripped, case-insensitive)
172
+ error_message: Optional custom error message for parsing failures
173
+
174
+ Returns:
175
+ Maybe[bool]: Success(bool) if parsing succeeds, Failure(str) with error message otherwise
176
+
177
+ Examples:
178
+ >>> parse_bool("true")
179
+ Success(True)
180
+ >>> parse_bool("YES")
181
+ Success(True)
182
+ >>> parse_bool("n")
183
+ Success(False)
184
+ >>> parse_bool(" 0 ")
185
+ Success(False)
186
+ >>> parse_bool("maybe").is_failure()
187
+ True
188
+ """
189
+ if not input_value:
190
+ return Maybe.failure('Input must not be empty')
191
+
192
+ # Normalize input
193
+ input_lower = input_value.strip().lower()
194
+
195
+ # True values
196
+ if input_lower in ('true', 't', 'yes', 'y', '1'):
197
+ return Maybe.success(value=True)
198
+
199
+ # False values
200
+ if input_lower in ('false', 'f', 'no', 'n', '0'):
201
+ return Maybe.success(value=False)
202
+
203
+ return Maybe.failure(error_message or 'Input must be a valid boolean')
204
+
205
+
206
+ def parse_date(input_value: str, date_format: str | None = None, error_message: str | None = None) -> Maybe[date]:
207
+ """Parse a string to a date object.
208
+
209
+ Parses date strings using ISO 8601 format (YYYY-MM-DD) by default,
210
+ or a custom format if specified.
211
+
212
+ Args:
213
+ input_value: String to parse (leading/trailing whitespace is stripped)
214
+ date_format: Optional strftime format string (e.g., '%Y-%m-%d', '%m/%d/%Y')
215
+ error_message: Optional custom error message for parsing failures
216
+
217
+ Returns:
218
+ Maybe[date]: Success(date) if parsing succeeds, Failure(str) with error message otherwise
219
+
220
+ Examples:
221
+ >>> parse_date("2025-01-15")
222
+ Success(datetime.date(2025, 1, 15))
223
+ >>> parse_date("01/15/2025", date_format="%m/%d/%Y")
224
+ Success(datetime.date(2025, 1, 15))
225
+ >>> parse_date("invalid").is_failure()
226
+ True
227
+ """
228
+ if not input_value:
229
+ return Maybe.failure('Input must not be empty')
230
+
231
+ try:
232
+ # Clean input
233
+ input_value = input_value.strip()
234
+
235
+ if date_format:
236
+ # Parse with the provided format
237
+ dt = datetime.strptime(input_value, date_format) # noqa: DTZ007
238
+ return Maybe.success(dt.date())
239
+
240
+ # Try ISO format by default, but be more strict
241
+ # Standard ISO format should have dashes: YYYY-MM-DD
242
+ if len(input_value) == ISO_DATE_LENGTH and input_value[4] == '-' and input_value[7] == '-':
243
+ return Maybe.success(date.fromisoformat(input_value))
244
+ # Non-standard formats should be explicitly specified
245
+ return Maybe.failure(error_message or 'Input must be a valid date')
246
+ except ValueError:
247
+ return Maybe.failure(error_message or 'Input must be a valid date')
248
+
249
+
250
+ def parse_complex(input_value: str, error_message: str | None = None) -> Maybe[complex]:
251
+ """Parse a string to a complex number.
252
+
253
+ Accepts various complex number representations including both 'j' and 'i' notation.
254
+ Handles parentheses and spaces in the input.
255
+
256
+ Args:
257
+ input_value: String to parse (whitespace is stripped, both 'i' and 'j' accepted)
258
+ error_message: Optional custom error message for parsing failures
259
+
260
+ Returns:
261
+ Maybe[complex]: Success(complex) if parsing succeeds, Failure(str) with error message otherwise
262
+
263
+ Examples:
264
+ >>> parse_complex("3+4j")
265
+ Success((3+4j))
266
+ >>> parse_complex("3 + 4i")
267
+ Success((3+4j))
268
+ >>> parse_complex("(2-3j)")
269
+ Success((2-3j))
270
+ >>> parse_complex("5j")
271
+ Success(5j)
272
+ >>> parse_complex("invalid").is_failure()
273
+ True
274
+ """
275
+ if not input_value:
276
+ return Maybe.failure('Input must not be empty')
277
+
278
+ try:
279
+ # Strip whitespace from the outside but not inside
280
+ input_str = input_value.strip()
281
+
282
+ # Handle parentheses if present
283
+ if input_str.startswith('(') and input_str.endswith(')'):
284
+ input_str = input_str[1:-1]
285
+
286
+ # Handle 'i' notation by converting to 'j' notation
287
+ if 'i' in input_str and 'j' not in input_str:
288
+ input_str = input_str.replace('i', 'j')
289
+
290
+ # Handle spaces in complex notation (e.g., "3 + 4j")
291
+ if ' ' in input_str:
292
+ # Remove spaces while preserving operators
293
+ input_str = input_str.replace(' + ', '+').replace(' - ', '-')
294
+ input_str = input_str.replace('+ ', '+').replace('- ', '-')
295
+ input_str = input_str.replace(' +', '+').replace(' -', '-')
296
+
297
+ value = complex(input_str)
298
+ return Maybe.success(value)
299
+ except ValueError:
300
+ return Maybe.failure(error_message or 'Input must be a valid complex number')
301
+
302
+
303
+ def parse_decimal(input_value: str, error_message: str | None = None) -> Maybe[Decimal]:
304
+ """Parse a string to a Decimal for precise decimal arithmetic.
305
+
306
+ Uses Python's Decimal type for arbitrary-precision decimal arithmetic,
307
+ avoiding floating-point rounding errors. Ideal for financial calculations.
308
+
309
+ Args:
310
+ input_value: String representation of a decimal number
311
+ error_message: Optional custom error message
312
+
313
+ Returns:
314
+ Maybe[Decimal]: Success with Decimal value or Failure with an error message
315
+
316
+ Examples:
317
+ >>> parse_decimal("3.14159")
318
+ Success(Decimal('3.14159'))
319
+ >>> parse_decimal(" 0.1 ")
320
+ Success(Decimal('0.1'))
321
+ >>> parse_decimal("-99.99")
322
+ Success(Decimal('-99.99'))
323
+ >>> parse_decimal("not a number").is_failure()
324
+ True
325
+ """
326
+ if not input_value:
327
+ return Maybe.failure('Input must not be empty')
328
+
329
+ try:
330
+ value = Decimal(input_value.strip())
331
+ return Maybe.success(value)
332
+ except (InvalidOperation, ValueError):
333
+ return Maybe.failure(error_message or 'Input must be a valid number')
334
+
335
+
336
+ def _check_enum_has_empty_value(enum_class: type[Enum]) -> bool:
337
+ """Check if an enum has an empty string as a value."""
338
+ return any(member.value == '' for member in enum_class.__members__.values())
339
+
340
+
341
+ def _find_enum_by_value(enum_class: type[Enum], value: str) -> Enum | None:
342
+ """Find an enum member by its value."""
343
+ for member in enum_class.__members__.values():
344
+ if member.value == value:
345
+ return member
346
+ return None
347
+
348
+
349
+ def _find_enum_by_name(enum_class: type[E], value: str) -> E | None:
350
+ """Find an enum member by its name."""
351
+ try:
352
+ return enum_class[value]
353
+ except KeyError:
354
+ return None
355
+
356
+
357
+ def parse_enum(input_value: str, enum_class: type[E], error_message: str | None = None) -> Maybe[object]:
358
+ """Parse a string to an enum member.
359
+
360
+ Matches input against enum member values and names (case-insensitive for names).
361
+ Handles whitespace trimming and supports enums with empty string values.
362
+
363
+ Args:
364
+ input_value: String to parse (whitespace is stripped for non-exact matches)
365
+ enum_class: The Enum class to parse into
366
+ error_message: Optional custom error message for parsing failures
367
+
368
+ Returns:
369
+ Maybe[object]: Success with enum member if valid, Failure(str) with error message otherwise
370
+
371
+ Examples:
372
+ >>> from enum import Enum
373
+ >>> class Color(Enum):
374
+ ... RED = 'red'
375
+ ... GREEN = 'green'
376
+ ... BLUE = 'blue'
377
+ >>> parse_enum("red", Color)
378
+ Success(<Color.RED: 'red'>)
379
+ >>> parse_enum("RED", Color)
380
+ Success(<Color.RED: 'red'>)
381
+ >>> parse_enum(" green ", Color)
382
+ Success(<Color.GREEN: 'green'>)
383
+ >>> parse_enum("yellow", Color).is_failure()
384
+ True
385
+ """
386
+ if not isinstance(enum_class, type) or not issubclass(enum_class, Enum):
387
+ return Maybe.failure(error_message or 'Invalid enum class provided')
388
+
389
+ # Check if empty is valid for this enum
390
+ has_empty_value = _check_enum_has_empty_value(enum_class)
391
+
392
+ if input_value == '' and not has_empty_value:
393
+ return Maybe.failure('Input must not be empty')
394
+
395
+ # Try direct match with enum values
396
+ member = _find_enum_by_value(enum_class, input_value)
397
+ if member is not None:
398
+ return Maybe.success(member)
399
+
400
+ member = _find_enum_by_name(enum_class, input_value)
401
+ if member is not None:
402
+ return Maybe.success(member)
403
+
404
+ input_stripped = input_value.strip()
405
+ if input_stripped != input_value:
406
+ member = _find_enum_by_value(enum_class, input_stripped)
407
+ if member is not None:
408
+ return Maybe.success(member)
409
+
410
+ for name in enum_class.__members__:
411
+ if name.lower() == input_value.lower():
412
+ return Maybe.success(enum_class[name])
413
+
414
+ return Maybe.failure(error_message or 'Input must be a valid enumeration value')
415
+
416
+
417
+ def parse_list(
418
+ input_value: str,
419
+ element_parser: Callable[[str], Maybe[T]] | None = None,
420
+ separator: str = ',',
421
+ error_message: str | None = None,
422
+ ) -> Maybe[list[T]]:
423
+ """Parse a string to a list using the specified element parser and separator.
424
+
425
+ Splits the input string by the separator and parses each element using the element parser.
426
+ If no element parser is provided, elements are returned as trimmed strings.
427
+
428
+ Args:
429
+ input_value: The string to parse
430
+ element_parser: A function that parses individual elements (default: strips whitespace)
431
+ separator: The string that separates elements (default: ',')
432
+ error_message: Custom error message for parsing failures
433
+
434
+ Returns:
435
+ Maybe[list[T]]: Success with parsed list or Failure with error message
436
+
437
+ Examples:
438
+ >>> parse_list("a,b,c")
439
+ Success(['a', 'b', 'c'])
440
+ >>> parse_list("1, 2, 3", element_parser=parse_int)
441
+ Success([1, 2, 3])
442
+ >>> parse_list("apple|banana|cherry", separator="|")
443
+ Success(['apple', 'banana', 'cherry'])
444
+ >>> parse_list("1,2,invalid", element_parser=parse_int).is_failure()
445
+ True
446
+ """
447
+ if not input_value:
448
+ return Maybe.failure('Input must not be empty')
449
+
450
+ def default_parser(s: str) -> Maybe[T]:
451
+ return Maybe.success(s.strip()) # type: ignore[arg-type]
452
+
453
+ parser = element_parser if element_parser is not None else default_parser
454
+
455
+ elements = input_value.split(separator)
456
+
457
+ parsed_elements: list[T] = []
458
+ for i, element in enumerate(elements, start=1):
459
+ match parser(element.strip()):
460
+ case Success(value) if value is not None:
461
+ parsed_elements.append(value)
462
+ case Failure() if error_message:
463
+ return Maybe.failure(error_message)
464
+ case Failure(result):
465
+ return Maybe.failure(f"Failed to parse element {i} '{element}': {result}")
466
+
467
+ return Maybe.success(parsed_elements)
468
+
469
+
470
+ def _parse_key_value_pair( # noqa: PLR0913
471
+ pair: str,
472
+ index: int,
473
+ key_parser: Callable[[str], Maybe[K]], # K can be None
474
+ value_parser: Callable[[str], Maybe[V]], # V can be None
475
+ key_value_separator: str,
476
+ error_message: str | None = None,
477
+ ) -> tuple[bool, K | None, V | None, str | None]:
478
+ """Parse a single key-value pair.
479
+
480
+ Returns:
481
+ A tuple of (success, key, value, error_message)
482
+
483
+ """
484
+ if key_value_separator not in pair:
485
+ error = f"Invalid key-value pair '{pair}': missing separator '{key_value_separator}'"
486
+ return False, None, None, error_message or error
487
+
488
+ key_str, value_str = pair.split(key_value_separator, 1)
489
+
490
+ # Parse the key
491
+ key_result = key_parser(key_str.strip())
492
+ if key_result.is_failure():
493
+ error = f"Failed to parse key in pair {index + 1} '{pair}': {key_result.error_or('Parse error')}"
494
+ return False, None, None, error_message or error
495
+
496
+ # Parse the value
497
+ value_result = value_parser(value_str.strip())
498
+ if value_result.is_failure():
499
+ error = f"Failed to parse value in pair {index + 1} '{pair}': {value_result.error_or('Parse error')}"
500
+ return False, None, None, error_message or error
501
+
502
+ # At this point both results are Success; extract concrete values by pattern matching
503
+ match key_result:
504
+ case Success(key_val):
505
+ key: K | None = key_val
506
+ case _:
507
+ key = None
508
+
509
+ match value_result:
510
+ case Success(value_val):
511
+ value: V | None = value_val
512
+ case _:
513
+ value = None
514
+
515
+ return True, key, value, None
516
+
517
+
518
+ def parse_dict( # noqa: PLR0913
519
+ input_value: str,
520
+ key_parser: Callable[[str], Maybe[K]] | None = None,
521
+ value_parser: Callable[[str], Maybe[V]] | None = None,
522
+ pair_separator: str = ',',
523
+ key_value_separator: str = ':',
524
+ error_message: str | None = None,
525
+ ) -> Maybe[dict[K, V]]:
526
+ """Parse a string to a dictionary using the specified parsers and separators.
527
+
528
+ Splits the input string by pair_separator, then splits each pair by key_value_separator.
529
+ Parses keys and values using the provided parsers (defaults to trimmed strings).
530
+
531
+ Args:
532
+ input_value: The string to parse
533
+ key_parser: A function that parses keys (default: strips whitespace)
534
+ value_parser: A function that parses values (default: strips whitespace)
535
+ pair_separator: The string that separates key-value pairs (default: ',')
536
+ key_value_separator: The string that separates keys from values (default: ':')
537
+ error_message: Custom error message for parsing failures
538
+
539
+ Returns:
540
+ Maybe[dict[K, V]]: Success with parsed dictionary or Failure with error message
541
+
542
+ Examples:
543
+ >>> parse_dict("a:1,b:2,c:3")
544
+ Success({'a': '1', 'b': '2', 'c': '3'})
545
+ >>> parse_dict("x:10, y:20", value_parser=parse_int)
546
+ Success({'x': 10, 'y': 20})
547
+ >>> parse_dict("name=Alice|age=30", pair_separator="|", key_value_separator="=")
548
+ Success({'name': 'Alice', 'age': '30'})
549
+ >>> parse_dict("a:1,b:invalid", value_parser=parse_int).is_failure()
550
+ True
551
+ """
552
+ if not input_value:
553
+ return Maybe.failure('Input must not be empty')
554
+
555
+ def _default_parser(s: str) -> Maybe[str | None]:
556
+ """Parse a string by stripping whitespace."""
557
+ return Maybe.success(s.strip())
558
+
559
+ actual_key_parser: Callable[[str], Maybe[K | None]] = cast(
560
+ 'Callable[[str], Maybe[K | None]]', key_parser if key_parser is not None else _default_parser
561
+ )
562
+
563
+ actual_value_parser: Callable[[str], Maybe[V | None]] = cast(
564
+ 'Callable[[str], Maybe[V | None]]', value_parser if value_parser is not None else _default_parser
565
+ )
566
+
567
+ # Split the input string by the pair separator
568
+ pairs = input_value.split(pair_separator)
569
+
570
+ # Parse each key-value pair
571
+ parsed_dict: dict[K, V] = {}
572
+
573
+ for i, pair in enumerate(pairs):
574
+ success, key, value, err = _parse_key_value_pair(
575
+ pair, i, actual_key_parser, actual_value_parser, key_value_separator, error_message
576
+ )
577
+
578
+ if not success:
579
+ return Maybe.failure(err or 'Failed to parse key-value pair')
580
+
581
+ if key is not None and value is not None:
582
+ parsed_dict[key] = value
583
+
584
+ return Maybe.success(parsed_dict)
585
+
586
+
587
+ def parse_set(
588
+ input_value: str,
589
+ element_parser: Callable[[str], Maybe[T]] | None = None,
590
+ separator: str | None = None,
591
+ error_message: str | None = None,
592
+ ) -> Maybe[set[T]]:
593
+ """Parse a string to a set using the specified element parser and separator.
594
+
595
+ Splits the input string by the separator and parses each element using the element parser.
596
+ Automatically removes duplicate values. If no element parser is provided, elements are
597
+ returned as trimmed strings.
598
+
599
+ Args:
600
+ input_value: The string to parse
601
+ element_parser: A function that parses individual elements (default: strips whitespace)
602
+ separator: The string that separates elements (default: ',')
603
+ error_message: Custom error message for parsing failures
604
+
605
+ Returns:
606
+ Maybe[set[T]]: Success with parsed set or Failure with error message
607
+
608
+ Examples:
609
+ >>> result = parse_set("a,b,c")
610
+ >>> result.is_success()
611
+ True
612
+ >>> sorted(result.value_or(set()))
613
+ ['a', 'b', 'c']
614
+ >>> result = parse_set("1, 2, 3, 2, 1", element_parser=parse_int)
615
+ >>> sorted(result.value_or(set()))
616
+ [1, 2, 3]
617
+ >>> result = parse_set("red|blue|green|red", separator="|")
618
+ >>> sorted(result.value_or(set()))
619
+ ['blue', 'green', 'red']
620
+ >>> parse_set("1,2,invalid", element_parser=parse_int).is_failure()
621
+ True
622
+ """
623
+ if separator is None:
624
+ separator = ','
625
+ # Use the list parser and convert to set
626
+ result = parse_list(input_value, element_parser, separator, error_message)
627
+ if result.is_failure():
628
+ return Maybe.failure('Parse error')
629
+
630
+ # Convert to set (removes duplicates)
631
+ parsed_list = result.value_or([])
632
+ return Maybe.success(set(parsed_list))
633
+
634
+
635
+ # Type-specific validation parsers
636
+
637
+
638
+ def parse_int_with_validation(
639
+ input_value: str,
640
+ min_value: int | None = None,
641
+ max_value: int | None = None,
642
+ error_message: str | None = None,
643
+ ) -> Maybe[int]:
644
+ """Parse a string to an integer with range validation.
645
+
646
+ Combines parsing and validation in a single step. First parses the string to an integer,
647
+ then validates it falls within the specified range.
648
+
649
+ Args:
650
+ input_value: The string to parse
651
+ min_value: Minimum allowed value (inclusive)
652
+ max_value: Maximum allowed value (inclusive)
653
+ error_message: Custom error message for parsing failures
654
+
655
+ Returns:
656
+ Maybe[int]: Success with validated integer or Failure with error message
657
+
658
+ Examples:
659
+ >>> parse_int_with_validation("42", min_value=0, max_value=100)
660
+ Success(42)
661
+ >>> parse_int_with_validation("5", min_value=10).is_failure()
662
+ True
663
+ >>> parse_int_with_validation("150", max_value=100).is_failure()
664
+ True
665
+ >>> parse_int_with_validation("50", min_value=0, max_value=100)
666
+ Success(50)
667
+ """
668
+ result = parse_int(input_value, error_message)
669
+ if result.is_failure():
670
+ return result
671
+
672
+ # Validate the parsed value
673
+ value = result.value_or(0)
674
+
675
+ if min_value is not None and value < min_value:
676
+ return Maybe.failure(error_message or f'Value must be at least {min_value}')
677
+
678
+ if max_value is not None and value > max_value:
679
+ return Maybe.failure(error_message or f'Value must be at most {max_value}')
680
+
681
+ return Maybe.success(value)
682
+
683
+
684
+ def parse_list_with_validation( # noqa: PLR0913
685
+ input_value: str,
686
+ element_parser: Callable[[str], Maybe[T]] | None = None,
687
+ separator: str = ',',
688
+ min_length: int | None = None,
689
+ max_length: int | None = None,
690
+ error_message: str | None = None,
691
+ ) -> Maybe[list[T]]:
692
+ """Parse a string to a list with length validation.
693
+
694
+ Combines parsing and validation in a single step. First parses the string to a list,
695
+ then validates it has an acceptable number of elements.
696
+
697
+ Args:
698
+ input_value: The string to parse
699
+ element_parser: A function that parses individual elements
700
+ separator: The string that separates elements
701
+ min_length: Minimum allowed list length
702
+ max_length: Maximum allowed list length
703
+ error_message: Custom error message for parsing failures
704
+
705
+ Returns:
706
+ Maybe[list[T]]: Success with validated list or Failure with error message
707
+
708
+ Examples:
709
+ >>> parse_list_with_validation("a,b,c", min_length=2, max_length=5)
710
+ Success(['a', 'b', 'c'])
711
+ >>> parse_list_with_validation("1,2", element_parser=parse_int, min_length=3).is_failure()
712
+ True
713
+ >>> parse_list_with_validation("1,2,3,4,5,6", max_length=5).is_failure()
714
+ True
715
+ >>> parse_list_with_validation("10,20,30", element_parser=parse_int, min_length=1)
716
+ Success([10, 20, 30])
717
+ """
718
+ result = parse_list(input_value, element_parser, separator, error_message)
719
+ if result.is_failure():
720
+ return result
721
+
722
+ # Validate the parsed list
723
+ parsed_list = result.value_or([])
724
+
725
+ if min_length is not None and len(parsed_list) < min_length:
726
+ return Maybe.failure(error_message or f'List must have at least {min_length} elements')
727
+
728
+ if max_length is not None and len(parsed_list) > max_length:
729
+ return Maybe.failure(error_message or f'List must have at most {max_length} elements')
730
+
731
+ return Maybe.success(parsed_list)
732
+
733
+
734
+ def parse_dict_with_validation( # noqa: PLR0913
735
+ input_value: str,
736
+ key_parser: Callable[[str], Maybe[K]] | None = None,
737
+ value_parser: Callable[[str], Maybe[V]] | None = None,
738
+ pair_separator: str = ',',
739
+ key_value_separator: str = ':',
740
+ required_keys: list[str] | None = None,
741
+ error_message: str | None = None,
742
+ ) -> Maybe[dict[K, V]]:
743
+ """Parse a string to a dictionary with required keys validation.
744
+
745
+ Combines parsing and validation in a single step. First parses the string to a dictionary,
746
+ then validates that all required keys are present.
747
+
748
+ Args:
749
+ input_value: The string to parse
750
+ key_parser: A function that parses keys
751
+ value_parser: A function that parses values
752
+ pair_separator: The string that separates key-value pairs
753
+ key_value_separator: The string that separates keys from values
754
+ required_keys: List of keys that must be present
755
+ error_message: Custom error message for parsing failures
756
+
757
+ Returns:
758
+ Maybe[dict[K, V]]: Success with validated dictionary or Failure with error message
759
+
760
+ Examples:
761
+ >>> parse_dict_with_validation("name:Alice,age:30", required_keys=["name", "age"])
762
+ Success({'name': 'Alice', 'age': '30'})
763
+ >>> parse_dict_with_validation("name:Bob", required_keys=["name", "age"]).is_failure()
764
+ True
765
+ >>> result = parse_dict_with_validation("x:10,y:20", value_parser=parse_int, required_keys=["x"])
766
+ >>> result.value_or({})
767
+ {'x': 10, 'y': 20}
768
+ """
769
+ result = parse_dict(input_value, key_parser, value_parser, pair_separator, key_value_separator, error_message)
770
+ if result.is_failure():
771
+ return result
772
+
773
+ # Validate the parsed dictionary
774
+ parsed_dict = result.value_or({})
775
+
776
+ if required_keys:
777
+ missing_keys = [key for key in required_keys if key not in parsed_dict]
778
+ if missing_keys:
779
+ return Maybe.failure(error_message or f'Missing required keys: {", ".join(missing_keys)}')
780
+
781
+ return Maybe.success(parsed_dict)
782
+
783
+
784
+ def create_parser(convert_func: Callable[[str], T], error_message: str | None = None) -> Callable[[str], Maybe[T]]:
785
+ """Create a parser function from a conversion function.
786
+
787
+ This factory takes a function that converts strings to values and wraps it
788
+ in error handling logic to return Maybe instances.
789
+
790
+ Args:
791
+ convert_func: A function that converts strings to values of type T
792
+ error_message: Optional custom error message for failures
793
+
794
+ Returns:
795
+ A parser function that returns Maybe[T]
796
+
797
+ Example:
798
+ >>> from decimal import Decimal
799
+ >>> parse_decimal = create_parser(Decimal, "Invalid decimal format")
800
+ >>> result = parse_decimal("3.14")
801
+ >>> result.is_success()
802
+ True
803
+
804
+ """
805
+
806
+ def parser(input_value: str) -> Maybe[T]:
807
+ if not input_value:
808
+ return Failure('Input must not be empty')
809
+
810
+ try:
811
+ return Success(convert_func(input_value.strip()))
812
+ except Exception as e: # noqa: BLE001
813
+ return Failure(error_message or f'Invalid {convert_func.__name__} format: {e}')
814
+
815
+ return parser
816
+
817
+
818
+ @overload
819
+ def make_parser(func: Callable[[str], T]) -> Callable[[str], Maybe[T]]: ...
820
+
821
+
822
+ @overload
823
+ def make_parser() -> Callable[[Callable[[str], T]], Callable[[str], Maybe[T]]]: ...
824
+
825
+
826
+ def make_parser(
827
+ func: Callable[[str], T] | None = None,
828
+ ) -> Callable[[str], Maybe[T]] | Callable[[Callable[[str], T]], Callable[[str], Maybe[T]]]:
829
+ """Create a parser function from a conversion function with a decorator.
830
+
831
+ Example:
832
+ @make_parser
833
+ def parse_decimal(s: str) -> Decimal:
834
+ return Decimal(s)
835
+
836
+ # Or with parentheses
837
+ @make_parser()
838
+ def parse_decimal(s: str) -> Decimal:
839
+ return Decimal(s)
840
+
841
+ result = parse_decimal("123.45") # Returns Maybe[Decimal]
842
+
843
+ """
844
+
845
+ def decorator(f: Callable[[str], T]) -> Callable[[str], Maybe[T]]:
846
+ @wraps(f)
847
+ def wrapper(input_value: str) -> Maybe[T]:
848
+ if not input_value:
849
+ return Maybe.failure('Input must not be empty')
850
+ try:
851
+ return Maybe.success(f(input_value.strip()))
852
+ except Exception as e: # noqa: BLE001
853
+ return Maybe.failure(f'Invalid format for {f.__name__}, error: {e}')
854
+
855
+ return wrapper
856
+
857
+ # Handle both @create_parser and @create_parser() syntax
858
+ if func is None:
859
+ return decorator
860
+ return decorator(func)
861
+
862
+
863
+ def validated_parser(
864
+ convert_func: Callable[[str], T], validator: Callable[[T], Maybe[T]], error_message: str | None = None
865
+ ) -> Callable[[str], Maybe[T]]:
866
+ """Create a parser with a built-in validator.
867
+
868
+ This combines parsing and validation in a single function.
869
+
870
+ Args:
871
+ convert_func: A function that converts strings to values of type T
872
+ validator: A validator function that validates the parsed value
873
+ error_message: Optional custom error message for parsing failures
874
+
875
+ Returns:
876
+ A parser function that returns Maybe[T]
877
+
878
+ Example:
879
+ >>> from decimal import Decimal
880
+ >>> from valid8r.core.validators import minimum, maximum
881
+ >>> # Create a parser for positive decimals
882
+ >>> valid_range = lambda x: minimum(0)(x).bind(lambda y: maximum(100)(y))
883
+ >>> parse_percent = validated_parser(Decimal, valid_range)
884
+ >>> result = parse_percent("42.5")
885
+ >>> result.is_success()
886
+ True
887
+
888
+ """
889
+ parse = create_parser(convert_func, error_message)
890
+
891
+ def parser(input_value: str) -> Maybe[T]:
892
+ # First parse the input
893
+ result = parse(input_value)
894
+
895
+ # If parsing succeeded, validate the result
896
+ return result.bind(validator)
897
+
898
+ return parser
899
+
900
+
901
+ def parse_uuid(text: str, version: int | None = None, strict: bool = True) -> Maybe[UUID]:
902
+ """Parse a string to a UUID.
903
+
904
+ Uses uuid-utils to parse and validate UUIDs across versions 1, 3, 4, 5, 6, 7, and 8 when available.
905
+ When ``version`` is provided, validates the parsed UUID version. In ``strict`` mode (default),
906
+ a mismatch yields a Failure; otherwise, the mismatch is ignored and the UUID is returned.
907
+
908
+ Args:
909
+ text: The input string to parse as UUID.
910
+ version: Optional expected UUID version to validate against.
911
+ strict: Whether to enforce the expected version when provided.
912
+
913
+ Returns:
914
+ Maybe[UUID]: Success with a UUID object or Failure with an error message.
915
+
916
+ """
917
+ if not text:
918
+ return Maybe.failure('Input must not be empty')
919
+
920
+ s = text.strip()
921
+
922
+ try:
923
+ # Prefer uuid-utils if available; fall back to stdlib
924
+ if uuidu is not None:
925
+ parsed_any = uuidu.UUID(s)
926
+ parsed_version = getattr(parsed_any, 'version', None)
927
+ else:
928
+ parsed_std = UUID(s)
929
+ parsed_version = getattr(parsed_std, 'version', None)
930
+ except Exception: # noqa: BLE001
931
+ return Maybe.failure('Input must be a valid UUID')
932
+
933
+ if version is not None:
934
+ supported_versions = {1, 3, 4, 5, 6, 7, 8}
935
+ if version not in supported_versions:
936
+ return Maybe.failure(f'Unsupported UUID version: v{version}')
937
+ if strict and version != parsed_version:
938
+ return Maybe.failure(f'UUID version mismatch: expected v{version}, got v{parsed_version}')
939
+
940
+ # Return a standard library UUID object for compatibility
941
+ try:
942
+ return Maybe.success(UUID(s))
943
+ except Exception: # noqa: BLE001
944
+ # This should not happen if initial parsing succeeded, but guard anyway
945
+ return Maybe.failure('Input must be a valid UUID')
946
+
947
+
948
+ def parse_ipv4(text: str) -> Maybe[IPv4Address]:
949
+ """Parse an IPv4 address string.
950
+
951
+ Validates and parses IPv4 addresses in dotted-decimal notation.
952
+ Trims surrounding whitespace.
953
+
954
+ Args:
955
+ text: String containing an IPv4 address (whitespace is stripped)
956
+
957
+ Returns:
958
+ Maybe[IPv4Address]: Success(IPv4Address) if valid, Failure(str) with error message otherwise
959
+
960
+ Examples:
961
+ >>> parse_ipv4("192.168.1.1")
962
+ Success(IPv4Address('192.168.1.1'))
963
+ >>> parse_ipv4(" 10.0.0.1 ")
964
+ Success(IPv4Address('10.0.0.1'))
965
+ >>> parse_ipv4("256.1.1.1").is_failure()
966
+ True
967
+ >>> parse_ipv4("not an ip").is_failure()
968
+ True
969
+ """
970
+ if not isinstance(text, str):
971
+ return Maybe.failure('Input must be a string')
972
+
973
+ s = text.strip()
974
+ if s == '':
975
+ return Maybe.failure('Input must not be empty')
976
+
977
+ try:
978
+ addr = ip_address(s)
979
+ except ValueError:
980
+ return Maybe.failure('not a valid IPv4 address')
981
+
982
+ if isinstance(addr, IPv4Address):
983
+ return Maybe.success(addr)
984
+
985
+ return Maybe.failure('not a valid IPv4 address')
986
+
987
+
988
+ def parse_ipv6(text: str) -> Maybe[IPv6Address]:
989
+ """Parse an IPv6 address string.
990
+
991
+ Validates and parses IPv6 addresses in standard notation.
992
+ Rejects scope IDs (e.g., %eth0). Trims surrounding whitespace.
993
+
994
+ Args:
995
+ text: String containing an IPv6 address (whitespace is stripped)
996
+
997
+ Returns:
998
+ Maybe[IPv6Address]: Success(IPv6Address) if valid, Failure(str) with error message otherwise
999
+
1000
+ Examples:
1001
+ >>> parse_ipv6("::1")
1002
+ Success(IPv6Address('::1'))
1003
+ >>> parse_ipv6("2001:0db8:85a3::8a2e:0370:7334")
1004
+ Success(IPv6Address('2001:db8:85a3::8a2e:370:7334'))
1005
+ >>> parse_ipv6(" fe80::1 ")
1006
+ Success(IPv6Address('fe80::1'))
1007
+ >>> parse_ipv6("192.168.1.1").is_failure()
1008
+ True
1009
+ """
1010
+ if not isinstance(text, str):
1011
+ return Maybe.failure('Input must be a string')
1012
+
1013
+ s = text.strip()
1014
+ if s == '':
1015
+ return Maybe.failure('Input must not be empty')
1016
+
1017
+ # Explicitly reject scope IDs like %eth0
1018
+ if '%' in s:
1019
+ return Maybe.failure('not a valid IPv6 address')
1020
+
1021
+ try:
1022
+ addr = ip_address(s)
1023
+ except ValueError:
1024
+ return Maybe.failure('not a valid IPv6 address')
1025
+
1026
+ if isinstance(addr, IPv6Address):
1027
+ return Maybe.success(addr)
1028
+
1029
+ return Maybe.failure('not a valid IPv6 address')
1030
+
1031
+
1032
+ def parse_ip(text: str) -> Maybe[IPv4Address | IPv6Address]:
1033
+ """Parse a string as either an IPv4 or IPv6 address.
1034
+
1035
+ Automatically detects and parses either IPv4 or IPv6 addresses.
1036
+ Trims surrounding whitespace.
1037
+
1038
+ Args:
1039
+ text: String containing an IP address (IPv4 or IPv6, whitespace is stripped)
1040
+
1041
+ Returns:
1042
+ Maybe[IPv4Address | IPv6Address]: Success with IPv4Address or IPv6Address if valid,
1043
+ Failure(str) with error message otherwise
1044
+
1045
+ Examples:
1046
+ >>> result = parse_ip("192.168.1.1")
1047
+ >>> result.is_success()
1048
+ True
1049
+ >>> result = parse_ip("::1")
1050
+ >>> result.is_success()
1051
+ True
1052
+ >>> parse_ip(" 10.0.0.1 ")
1053
+ Success(IPv4Address('10.0.0.1'))
1054
+ >>> parse_ip("not an ip").is_failure()
1055
+ True
1056
+ """
1057
+ if not isinstance(text, str):
1058
+ return Maybe.failure('Input must be a string')
1059
+
1060
+ s = text.strip()
1061
+ if s == '':
1062
+ return Maybe.failure('Input must not be empty')
1063
+
1064
+ # Reject non-address forms such as IPv6 scope IDs or URLs
1065
+ if '%' in s or '://' in s:
1066
+ return Maybe.failure('not a valid IP address')
1067
+
1068
+ try:
1069
+ addr = ip_address(s)
1070
+ except ValueError:
1071
+ return Maybe.failure('not a valid IP address')
1072
+
1073
+ if isinstance(addr, (IPv4Address, IPv6Address)):
1074
+ return Maybe.success(addr)
1075
+
1076
+ return Maybe.failure('not a valid IP address')
1077
+
1078
+
1079
+ def parse_cidr(text: str, *, strict: bool = True) -> Maybe[IPv4Network | IPv6Network]:
1080
+ """Parse a CIDR network string (IPv4 or IPv6).
1081
+
1082
+ Validates and parses network addresses in CIDR notation (e.g., 192.168.1.0/24).
1083
+ By default, validates that host bits are not set (strict mode).
1084
+ With strict=False, host bits are masked to the network address.
1085
+
1086
+ Args:
1087
+ text: String containing a CIDR network (whitespace is stripped)
1088
+ strict: If True, reject networks with host bits set; if False, mask them (default: True)
1089
+
1090
+ Returns:
1091
+ Maybe[IPv4Network | IPv6Network]: Success with IPv4Network or IPv6Network if valid,
1092
+ Failure(str) with error message otherwise
1093
+
1094
+ Examples:
1095
+ >>> parse_cidr("192.168.1.0/24")
1096
+ Success(IPv4Network('192.168.1.0/24'))
1097
+ >>> parse_cidr("10.0.0.0/8")
1098
+ Success(IPv4Network('10.0.0.0/8'))
1099
+ >>> parse_cidr("2001:db8::/32")
1100
+ Success(IPv6Network('2001:db8::/32'))
1101
+ >>> # Strict mode rejects host bits
1102
+ >>> parse_cidr("192.168.1.5/24").is_failure()
1103
+ True
1104
+ >>> # Non-strict mode masks host bits
1105
+ >>> result = parse_cidr("192.168.1.5/24", strict=False)
1106
+ >>> str(result.value_or(None))
1107
+ '192.168.1.0/24'
1108
+ """
1109
+ if not isinstance(text, str):
1110
+ return Maybe.failure('Input must be a string')
1111
+
1112
+ s = text.strip()
1113
+ if s == '':
1114
+ return Maybe.failure('Input must not be empty')
1115
+
1116
+ try:
1117
+ net = ip_network(s, strict=strict)
1118
+ except ValueError as exc:
1119
+ msg = str(exc)
1120
+ if 'has host bits set' in msg:
1121
+ return Maybe.failure('has host bits set')
1122
+ return Maybe.failure('not a valid network')
1123
+
1124
+ if isinstance(net, (IPv4Network, IPv6Network)):
1125
+ return Maybe.success(net)
1126
+
1127
+ return Maybe.failure('not a valid network')
1128
+
1129
+
1130
+ # ---------------------------
1131
+ # URL and Email parsing
1132
+ # ---------------------------
1133
+
1134
+
1135
+ @dataclass(frozen=True)
1136
+ class UrlParts:
1137
+ """Structured URL components.
1138
+
1139
+ Attributes:
1140
+ scheme: Lowercased scheme (e.g. "http").
1141
+ username: Username from userinfo, if present.
1142
+ password: Password from userinfo, if present.
1143
+ host: Lowercased host or IPv6 literal without brackets, or None when not provided and not required.
1144
+ port: Explicit port if present, otherwise None.
1145
+ path: Path component as-is (no normalization).
1146
+ query: Query string without leading '?'.
1147
+ fragment: Fragment without leading '#'.
1148
+
1149
+ Examples:
1150
+ >>> from valid8r.core.maybe import Success
1151
+ >>> match parse_url('https://alice:pw@example.com:8443/x?q=1#top'):
1152
+ ... case Success(u):
1153
+ ... (u.scheme, u.username, u.password, u.host, u.port, u.path, u.query, u.fragment)
1154
+ ... case _:
1155
+ ... ()
1156
+ ('https', 'alice', 'pw', 'example.com', 8443, '/x', 'q=1', 'top')
1157
+
1158
+ """
1159
+
1160
+ scheme: str
1161
+ username: str | None
1162
+ password: str | None
1163
+ host: str | None
1164
+ port: int | None
1165
+ path: str
1166
+ query: str
1167
+ fragment: str
1168
+
1169
+
1170
+ @dataclass(frozen=True)
1171
+ class EmailAddress:
1172
+ """Structured email address.
1173
+
1174
+ Attributes:
1175
+ local: Local part (preserves original case).
1176
+ domain: Domain part lowercased.
1177
+
1178
+ Examples:
1179
+ >>> from valid8r.core.maybe import Success
1180
+ >>> match parse_email('First.Last+tag@Example.COM'):
1181
+ ... case Success(addr):
1182
+ ... (addr.local, addr.domain)
1183
+ ... case _:
1184
+ ... ()
1185
+ ('First.Last+tag', 'example.com')
1186
+
1187
+ """
1188
+
1189
+ local: str
1190
+ domain: str
1191
+
1192
+
1193
+ @dataclass(frozen=True)
1194
+ class PhoneNumber:
1195
+ """Structured North American phone number (NANP).
1196
+
1197
+ Represents a parsed and validated phone number in the North American Numbering Plan
1198
+ (United States, Canada, and other NANP territories).
1199
+
1200
+ Attributes:
1201
+ area_code: Three-digit area code (NPA).
1202
+ exchange: Three-digit exchange code (NXX).
1203
+ subscriber: Four-digit subscriber number.
1204
+ country_code: Country code (always '1' for NANP).
1205
+ region: Two-letter region code ('US', 'CA', etc.).
1206
+ extension: Optional extension number.
1207
+
1208
+ Examples:
1209
+ >>> from valid8r.core.maybe import Success
1210
+ >>> match parse_phone('(415) 555-2671'):
1211
+ ... case Success(phone):
1212
+ ... (phone.area_code, phone.exchange, phone.subscriber)
1213
+ ... case _:
1214
+ ... ()
1215
+ ('415', '555', '2671')
1216
+
1217
+ """
1218
+
1219
+ area_code: str
1220
+ exchange: str
1221
+ subscriber: str
1222
+ country_code: str
1223
+ region: str
1224
+ extension: str | None
1225
+
1226
+ @property
1227
+ def e164(self) -> str:
1228
+ """E.164 international format (+14155552671).
1229
+
1230
+ The E.164 format is the international standard for phone numbers.
1231
+ It includes the country code prefix and no formatting separators.
1232
+
1233
+ Returns:
1234
+ Phone number in E.164 format, with extension if present.
1235
+ """
1236
+ base = f'+{self.country_code}{self.area_code}{self.exchange}{self.subscriber}'
1237
+ if self.extension:
1238
+ return f'{base} x{self.extension}'
1239
+ return base
1240
+
1241
+ @property
1242
+ def national(self) -> str:
1243
+ """National format ((415) 555-2671).
1244
+
1245
+ The national format is the standard format for displaying phone numbers
1246
+ within a country, without the country code.
1247
+
1248
+ Returns:
1249
+ Phone number in national format, with extension if present.
1250
+ """
1251
+ base = f'({self.area_code}) {self.exchange}-{self.subscriber}'
1252
+ if self.extension:
1253
+ return f'{base} ext. {self.extension}'
1254
+ return base
1255
+
1256
+ @property
1257
+ def international(self) -> str:
1258
+ """International format (+1 415-555-2671).
1259
+
1260
+ The international format includes the country code and uses dashes
1261
+ as separators.
1262
+
1263
+ Returns:
1264
+ Phone number in international format, with extension if present.
1265
+ """
1266
+ base = f'+{self.country_code} {self.area_code}-{self.exchange}-{self.subscriber}'
1267
+ if self.extension:
1268
+ return f'{base} ext. {self.extension}'
1269
+ return base
1270
+
1271
+ @property
1272
+ def raw_digits(self) -> str:
1273
+ """Raw digits with country code (14155552671).
1274
+
1275
+ Returns all digits including the country code, with no formatting.
1276
+ Does not include the extension.
1277
+
1278
+ Returns:
1279
+ All digits as a string without any formatting.
1280
+ """
1281
+ return f'{self.country_code}{self.area_code}{self.exchange}{self.subscriber}'
1282
+
1283
+
1284
+ def _is_valid_hostname_label(label: str) -> bool:
1285
+ if not (1 <= len(label) <= 63):
1286
+ return False
1287
+ # Alnum or hyphen; cannot start or end with hyphen
1288
+ if label.startswith('-') or label.endswith('-'):
1289
+ return False
1290
+ for ch in label:
1291
+ if ch.isalnum() or ch == '-':
1292
+ continue
1293
+ return False
1294
+ return True
1295
+
1296
+
1297
+ def _is_valid_hostname(host: str) -> bool:
1298
+ # Allow localhost explicitly
1299
+ if host.lower() == 'localhost':
1300
+ return True
1301
+
1302
+ if len(host) == 0 or len(host) > 253:
1303
+ return False
1304
+
1305
+ # Reject underscores and empty labels
1306
+ labels = host.split('.')
1307
+ return all(not (part == '' or not _is_valid_hostname_label(part)) for part in labels)
1308
+
1309
+
1310
+ def _parse_userinfo_and_hostport(netloc: str) -> tuple[str | None, str | None, str]:
1311
+ """Split userinfo and hostport from a netloc string."""
1312
+ if '@' in netloc:
1313
+ userinfo, hostport = netloc.rsplit('@', 1)
1314
+ if ':' in userinfo:
1315
+ user, pwd = userinfo.split(':', 1)
1316
+ else:
1317
+ user, pwd = userinfo, None
1318
+ return (user or None), (pwd or None), hostport
1319
+ return None, None, netloc
1320
+
1321
+
1322
+ def _parse_host_and_port(hostport: str) -> tuple[str | None, int | None]:
1323
+ """Parse host and optional port from hostport.
1324
+
1325
+ Supports IPv6 literals in brackets.
1326
+ Returns (host, port). Host is None when missing.
1327
+ """
1328
+ if not hostport:
1329
+ return None, None
1330
+
1331
+ host = None
1332
+ port: int | None = None
1333
+
1334
+ if hostport.startswith('['):
1335
+ # IPv6 literal [::1] or [::1]:443
1336
+ if ']' not in hostport:
1337
+ return None, None
1338
+ end = hostport.find(']')
1339
+ host = hostport[1:end]
1340
+ rest = hostport[end + 1 :]
1341
+ if rest.startswith(':'):
1342
+ try:
1343
+ port_val = int(rest[1:])
1344
+ except ValueError:
1345
+ return None, None
1346
+ if not (0 <= port_val <= 65535):
1347
+ return None, None
1348
+ port = port_val
1349
+ elif rest != '':
1350
+ # Garbage after bracket
1351
+ return None, None
1352
+ return host, port
1353
+
1354
+ # Not bracketed: split on last ':' to allow IPv6 bracket requirement
1355
+ if ':' in hostport:
1356
+ host_candidate, port_str = hostport.rsplit(':', 1)
1357
+ if host_candidate == '':
1358
+ return None, None
1359
+ try:
1360
+ port_val = int(port_str)
1361
+ except ValueError:
1362
+ # Could be part of IPv6 without brackets (not supported by URL syntax)
1363
+ return hostport, None
1364
+ if not (0 <= port_val <= 65535):
1365
+ return None, None
1366
+ return host_candidate, port_val
1367
+
1368
+ return hostport, None
1369
+
1370
+
1371
+ def _validate_url_host(host: str | None, original_netloc: str) -> bool:
1372
+ if host is None:
1373
+ return False
1374
+
1375
+ # If original contained brackets or host contains ':' treat as IPv6
1376
+ if original_netloc.startswith('[') or ':' in host:
1377
+ try:
1378
+ _ = ip_address(host)
1379
+ return isinstance(_, (IPv6Address, IPv4Address))
1380
+ except ValueError:
1381
+ return False
1382
+
1383
+ # Try IPv4
1384
+ try:
1385
+ _ = ip_address(host)
1386
+ if isinstance(_, IPv4Address):
1387
+ return True
1388
+ except ValueError:
1389
+ pass
1390
+
1391
+ # Hostname
1392
+ return _is_valid_hostname(host)
1393
+
1394
+
1395
+ def parse_url(
1396
+ text: str,
1397
+ *,
1398
+ allowed_schemes: Iterable[str] = ('http', 'https'),
1399
+ require_host: bool = True,
1400
+ ) -> Maybe[UrlParts]:
1401
+ """Parse a URL with light validation.
1402
+
1403
+ Rules:
1404
+ - Trim surrounding whitespace only
1405
+ - Require scheme in allowed_schemes (defaults to http/https)
1406
+ - If require_host, netloc must include a valid host (hostname, IPv4, or bracketed IPv6)
1407
+ - Lowercase scheme and host; do not modify path/query/fragment
1408
+
1409
+ Failure messages (exact substrings):
1410
+ - Input must be a string
1411
+ - Input must not be empty
1412
+ - Unsupported URL scheme
1413
+ - URL requires host
1414
+ - Invalid host
1415
+
1416
+ Args:
1417
+ text: The URL string to parse
1418
+ allowed_schemes: Iterable of allowed scheme names (default: ('http', 'https'))
1419
+ require_host: Whether to require a host in the URL (default: True)
1420
+
1421
+ Returns:
1422
+ Maybe[UrlParts]: Success with UrlParts containing parsed components, or Failure with error message
1423
+
1424
+ Examples:
1425
+ >>> from valid8r.core.parsers import parse_url
1426
+ >>> from valid8r.core.maybe import Success
1427
+ >>>
1428
+ >>> # Parse a complete URL
1429
+ >>> result = parse_url('https://user:pass@api.example.com:8080/v1/users?active=true#section')
1430
+ >>> isinstance(result, Success)
1431
+ True
1432
+ >>> url = result.value
1433
+ >>> url.scheme
1434
+ 'https'
1435
+ >>> url.host
1436
+ 'api.example.com'
1437
+ >>> url.port
1438
+ 8080
1439
+ >>> url.path
1440
+ '/v1/users'
1441
+ >>> url.query
1442
+ 'active=true'
1443
+ >>> url.fragment
1444
+ 'section'
1445
+ >>>
1446
+ >>> # Access credentials
1447
+ >>> url.username
1448
+ 'user'
1449
+ >>> url.password
1450
+ 'pass'
1451
+ """
1452
+ if not isinstance(text, str):
1453
+ return Maybe.failure('Input must be a string')
1454
+
1455
+ s = text.strip()
1456
+ if s == '':
1457
+ return Maybe.failure('Input must not be empty')
1458
+
1459
+ parts = urlsplit(s)
1460
+
1461
+ scheme_lower = parts.scheme.lower()
1462
+ if scheme_lower == '' or scheme_lower not in {sch.lower() for sch in allowed_schemes}:
1463
+ return Maybe.failure('Unsupported URL scheme')
1464
+
1465
+ username: str | None
1466
+ password: str | None
1467
+ host: str | None
1468
+ port: int | None
1469
+
1470
+ username = None
1471
+ password = None
1472
+ host = None
1473
+ port = None
1474
+
1475
+ netloc = parts.netloc
1476
+
1477
+ if netloc:
1478
+ username, password, hostport = _parse_userinfo_and_hostport(netloc)
1479
+ host, port = _parse_host_and_port(hostport)
1480
+
1481
+ if host is not None:
1482
+ host = host.lower()
1483
+
1484
+ # Validate host when present
1485
+ if host is not None and not _validate_url_host(host, netloc):
1486
+ return Maybe.failure('Invalid host')
1487
+ elif require_host:
1488
+ return Maybe.failure('URL requires host')
1489
+
1490
+ # When require_host is True we must have a host
1491
+ if require_host and (host is None or host == ''):
1492
+ return Maybe.failure('URL requires host')
1493
+
1494
+ result = UrlParts(
1495
+ scheme=scheme_lower,
1496
+ username=username,
1497
+ password=password,
1498
+ host=host,
1499
+ port=port,
1500
+ path=parts.path,
1501
+ query=parts.query,
1502
+ fragment=parts.fragment,
1503
+ )
1504
+
1505
+ return Maybe.success(result)
1506
+
1507
+
1508
+ def parse_email(text: str) -> Maybe[EmailAddress]:
1509
+ """Parse a bare email address of the form ``local@domain``.
1510
+
1511
+ Uses the email-validator library for RFC 5322 compliant validation.
1512
+ Domain names are normalized to lowercase, local parts preserve their case.
1513
+
1514
+ Requires the email-validator library to be installed. If not available,
1515
+ returns a Failure indicating the library is required.
1516
+
1517
+ Rules:
1518
+ - Trim surrounding whitespace
1519
+ - Full RFC 5322 email validation
1520
+ - Supports internationalized domains (IDNA)
1521
+ - Domain is lowercased in the result; local part preserves case
1522
+
1523
+ Failure messages:
1524
+ - Input must be a string
1525
+ - Input must not be empty
1526
+ - email-validator library is required but not installed
1527
+ - Various RFC-compliant validation error messages from email-validator
1528
+
1529
+ Args:
1530
+ text: The email address string to parse
1531
+
1532
+ Returns:
1533
+ Maybe[EmailAddress]: Success with EmailAddress or Failure with error message
1534
+
1535
+ Examples:
1536
+ >>> from valid8r.core.parsers import parse_email
1537
+ >>> from valid8r.core.maybe import Success
1538
+ >>>
1539
+ >>> # Parse an email with case normalization
1540
+ >>> result = parse_email('User.Name+tag@Example.COM')
1541
+ >>> isinstance(result, Success)
1542
+ True
1543
+ >>> email = result.value
1544
+ >>> # Local part preserves original case
1545
+ >>> email.local
1546
+ 'User.Name+tag'
1547
+ >>> # Domain is normalized to lowercase
1548
+ >>> email.domain
1549
+ 'example.com'
1550
+ """
1551
+ if not isinstance(text, str):
1552
+ return Maybe.failure('Input must be a string')
1553
+
1554
+ s = text.strip()
1555
+ if s == '':
1556
+ return Maybe.failure('Input must not be empty')
1557
+
1558
+ if not HAS_EMAIL_VALIDATOR:
1559
+ return Maybe.failure('email-validator library is required but not installed')
1560
+
1561
+ try:
1562
+ # Validate without DNS lookups
1563
+ result = validate_email(s, check_deliverability=False)
1564
+
1565
+ # Return normalized components
1566
+ return Maybe.success(EmailAddress(local=result.local_part, domain=result.domain))
1567
+ except EmailNotValidError as e:
1568
+ return Maybe.failure(str(e))
1569
+ except Exception as e: # noqa: BLE001
1570
+ return Maybe.failure(f'email validation error: {e}')
1571
+
1572
+
1573
+ def parse_phone(text: str | None, *, region: str = 'US', strict: bool = False) -> Maybe[PhoneNumber]: # noqa: PLR0912
1574
+ """Parse a North American phone number (NANP format).
1575
+
1576
+ Parses phone numbers in the North American Numbering Plan format (US, Canada, etc.).
1577
+ Supports various formatting styles and validates area codes and exchanges.
1578
+
1579
+ Rules:
1580
+ - Accepts 10-digit or 11-digit (with country code 1) phone numbers
1581
+ - Strips all non-digit characters except extension markers
1582
+ - Validates area code (NPA): cannot start with 0 or 1, cannot be 555
1583
+ - Validates exchange (NXX): cannot start with 0 or 1, cannot be 555 or 911
1584
+ - Supports extensions with markers: x, ext, extension, comma
1585
+ - In strict mode, requires formatting characters (not just digits)
1586
+ - Defaults to US region unless specified
1587
+
1588
+ Failure messages:
1589
+ - Phone number cannot be empty
1590
+ - Phone number must have exactly 10 digits (after country code)
1591
+ - Invalid area code (starts with 0/1 or reserved)
1592
+ - Invalid exchange (starts with 0/1, reserved, or emergency)
1593
+ - Only North American phone numbers are supported
1594
+ - Invalid format (contains non-digit/non-separator characters)
1595
+ - Strict mode requires formatting characters
1596
+ - Invalid extension (non-numeric or too long)
1597
+
1598
+ Args:
1599
+ text: The phone number string to parse
1600
+ region: Two-letter region code (default: 'US')
1601
+ strict: If True, requires formatting characters (default: False)
1602
+
1603
+ Returns:
1604
+ Maybe[PhoneNumber]: Success with PhoneNumber or Failure with error message
1605
+
1606
+ Examples:
1607
+ >>> match parse_phone('(415) 555-2671'):
1608
+ ... case Success(phone):
1609
+ ... phone.area_code
1610
+ ... case _:
1611
+ ... None
1612
+ '415'
1613
+
1614
+ >>> match parse_phone('415-555-2671 x123'):
1615
+ ... case Success(phone):
1616
+ ... phone.extension
1617
+ ... case _:
1618
+ ... None
1619
+ '123'
1620
+
1621
+ >>> match parse_phone('+1 604 555 1234', region='CA'):
1622
+ ... case Success(phone):
1623
+ ... phone.region
1624
+ ... case _:
1625
+ ... None
1626
+ 'CA'
1627
+ """
1628
+ # Handle None or empty input
1629
+ if text is None or not isinstance(text, str):
1630
+ return Maybe.failure('Phone number cannot be empty')
1631
+
1632
+ s = text.strip()
1633
+ if s == '':
1634
+ return Maybe.failure('Phone number cannot be empty')
1635
+
1636
+ # Early length guard (DoS mitigation) - check BEFORE regex operations
1637
+ if len(text) > 100:
1638
+ return Maybe.failure('Invalid format: phone number is too long')
1639
+
1640
+ # Extract extension if present
1641
+ extension = None
1642
+ extension_match = _PHONE_EXTENSION_PATTERN.search(s)
1643
+ if extension_match:
1644
+ # Get the captured group (either group 1 or 2)
1645
+ extension = extension_match.group(1) or extension_match.group(2)
1646
+ # Validate extension length
1647
+ if len(extension) > 8:
1648
+ return Maybe.failure('Extension is too long (maximum 8 digits)')
1649
+ # Remove extension from phone number for parsing
1650
+ s = s[: extension_match.start()]
1651
+
1652
+ # Check for invalid characters before extracting digits
1653
+ # Allow only: digits, whitespace (including tabs/newlines), ()-.+ and common separators
1654
+ if not _PHONE_VALID_CHARS_PATTERN.match(s):
1655
+ return Maybe.failure('Invalid format: phone number contains invalid characters')
1656
+
1657
+ # Extract only digits
1658
+ digits = _PHONE_DIGIT_EXTRACTION_PATTERN.sub('', s)
1659
+
1660
+ # Check for strict mode - original must have formatting
1661
+ if strict and text.strip() == digits:
1662
+ return Maybe.failure('Strict mode requires formatting characters (e.g., dashes, parentheses, spaces)')
1663
+
1664
+ # Validate digit count
1665
+ if len(digits) == 0:
1666
+ return Maybe.failure('Phone number cannot be empty')
1667
+
1668
+ # Handle country code
1669
+ country_code = '1'
1670
+ if len(digits) == 11:
1671
+ if digits[0] != '1':
1672
+ return Maybe.failure('Only North American phone numbers (country code 1) are supported')
1673
+ digits = digits[1:] # Strip country code
1674
+ elif len(digits) > 11:
1675
+ # Check if it starts with a non-1 digit (likely international)
1676
+ if digits[0] != '1':
1677
+ return Maybe.failure('Only North American phone numbers (country code 1) are supported')
1678
+ return Maybe.failure(f'Phone number must have 10 digits, got {len(digits)}')
1679
+ elif len(digits) != 10:
1680
+ return Maybe.failure(f'Phone number must have 10 digits, got {len(digits)}')
1681
+
1682
+ # Extract components
1683
+ area_code = digits[0:3]
1684
+ exchange = digits[3:6]
1685
+ subscriber = digits[6:10]
1686
+
1687
+ # Validate area code (NPA)
1688
+ if area_code[0] in ('0', '1'):
1689
+ return Maybe.failure(f'Invalid area code: {area_code} (cannot start with 0 or 1)')
1690
+ if area_code == '555':
1691
+ return Maybe.failure(f'Invalid area code: {area_code} (reserved for fiction)')
1692
+
1693
+ # Validate exchange (NXX)
1694
+ if exchange[0] in ('0', '1'):
1695
+ return Maybe.failure(f'Invalid exchange: {exchange} (cannot start with 0 or 1)')
1696
+ if exchange == '911':
1697
+ return Maybe.failure(f'Invalid exchange: {exchange} (emergency number)')
1698
+ # 555 exchange with 01xx subscriber numbers (0100-0199) are reserved
1699
+ if exchange == '555' and subscriber.startswith('01'):
1700
+ return Maybe.failure(f'Invalid exchange: 555-{subscriber} (555-01xx range is reserved)')
1701
+ # 555 exchange with 5xxx subscriber numbers (5000-5999) are fictional
1702
+ if exchange == '555' and subscriber.startswith('5'):
1703
+ return Maybe.failure(f'Invalid exchange: 555-{subscriber} (555-5xxx range is reserved for fiction)')
1704
+
1705
+ return Maybe.success(
1706
+ PhoneNumber(
1707
+ area_code=area_code,
1708
+ exchange=exchange,
1709
+ subscriber=subscriber,
1710
+ country_code=country_code,
1711
+ region=region,
1712
+ extension=extension,
1713
+ )
1714
+ )
1715
+
1716
+
1717
+ def parse_slug(
1718
+ text: str,
1719
+ *,
1720
+ min_length: int | None = None,
1721
+ max_length: int | None = None,
1722
+ ) -> Maybe[str]:
1723
+ """Parse a URL-safe slug (lowercase letters, numbers, hyphens only).
1724
+
1725
+ A valid slug contains only lowercase letters, numbers, and hyphens.
1726
+ Cannot start/end with hyphen or have consecutive hyphens.
1727
+
1728
+ Args:
1729
+ text: String to validate as slug
1730
+ min_length: Minimum length (optional)
1731
+ max_length: Maximum length (optional)
1732
+
1733
+ Returns:
1734
+ Maybe[str]: Success with slug or Failure with error
1735
+
1736
+ Examples:
1737
+ >>> from valid8r.core.parsers import parse_slug
1738
+ >>>
1739
+ >>> # Valid slugs
1740
+ >>> parse_slug('hello-world').value_or(None)
1741
+ 'hello-world'
1742
+ >>> parse_slug('blog-post-123').value_or(None)
1743
+ 'blog-post-123'
1744
+ >>> parse_slug('a').value_or(None)
1745
+ 'a'
1746
+ >>>
1747
+ >>> # With length constraints
1748
+ >>> parse_slug('hello', min_length=5).value_or(None)
1749
+ 'hello'
1750
+ >>> parse_slug('hello', max_length=10).value_or(None)
1751
+ 'hello'
1752
+ >>>
1753
+ >>> # Invalid slugs
1754
+ >>> parse_slug('').is_failure()
1755
+ True
1756
+ >>> parse_slug('Hello-World').is_failure()
1757
+ True
1758
+ >>> parse_slug('hello_world').is_failure()
1759
+ True
1760
+ >>> parse_slug('-hello').is_failure()
1761
+ True
1762
+ >>> parse_slug('hello-').is_failure()
1763
+ True
1764
+ >>> parse_slug('hello--world').is_failure()
1765
+ True
1766
+ >>>
1767
+ >>> # Length constraint failures
1768
+ >>> parse_slug('hi', min_length=5).is_failure()
1769
+ True
1770
+ >>> parse_slug('very-long-slug', max_length=5).is_failure()
1771
+ True
1772
+ """
1773
+ if not text:
1774
+ return Maybe.failure('Slug cannot be empty')
1775
+
1776
+ # Check length constraints
1777
+ if min_length is not None and len(text) < min_length:
1778
+ return Maybe.failure(f'Slug is too short (minimum {min_length} characters)')
1779
+
1780
+ if max_length is not None and len(text) > max_length:
1781
+ return Maybe.failure(f'Slug is too long (maximum {max_length} characters)')
1782
+
1783
+ # Check for leading hyphen
1784
+ if text.startswith('-'):
1785
+ return Maybe.failure('Slug cannot start with a hyphen')
1786
+
1787
+ # Check for trailing hyphen
1788
+ if text.endswith('-'):
1789
+ return Maybe.failure('Slug cannot end with a hyphen')
1790
+
1791
+ # Check for consecutive hyphens
1792
+ if '--' in text:
1793
+ return Maybe.failure('Slug cannot contain consecutive hyphens')
1794
+
1795
+ # Check for invalid characters (not lowercase, digit, or hyphen)
1796
+ if not re.match(r'^[a-z0-9-]+$', text):
1797
+ # Check specifically for uppercase
1798
+ if any(c.isupper() for c in text):
1799
+ return Maybe.failure('Slug must contain only lowercase letters, numbers, and hyphens')
1800
+ return Maybe.failure('Slug contains invalid characters')
1801
+
1802
+ return Maybe.success(text)
1803
+
1804
+
1805
+ def parse_json(text: str) -> Maybe[object]:
1806
+ """Parse a JSON string into a Python object.
1807
+
1808
+ Supports all JSON types: objects, arrays, strings, numbers, booleans, null.
1809
+
1810
+ Args:
1811
+ text: JSON-formatted string
1812
+
1813
+ Returns:
1814
+ Maybe[object]: Success with parsed object or Failure with error
1815
+
1816
+ Examples:
1817
+ >>> from valid8r.core.parsers import parse_json
1818
+ >>>
1819
+ >>> # JSON objects
1820
+ >>> parse_json('{"name": "Alice", "age": 30}').value_or(None)
1821
+ {'name': 'Alice', 'age': 30}
1822
+ >>>
1823
+ >>> # JSON arrays
1824
+ >>> parse_json('[1, 2, 3, 4, 5]').value_or(None)
1825
+ [1, 2, 3, 4, 5]
1826
+ >>>
1827
+ >>> # JSON primitives
1828
+ >>> parse_json('"hello world"').value_or(None)
1829
+ 'hello world'
1830
+ >>> parse_json('42').value_or(None)
1831
+ 42
1832
+ >>> parse_json('true').value_or(None)
1833
+ True
1834
+ >>> parse_json('false').value_or(None)
1835
+ False
1836
+ >>> parse_json('null').value_or(None)
1837
+ >>>
1838
+ >>> # Invalid JSON
1839
+ >>> parse_json('').is_failure()
1840
+ True
1841
+ >>> parse_json('{invalid}').is_failure()
1842
+ True
1843
+ >>> parse_json('{"name": "Alice"').is_failure()
1844
+ True
1845
+ """
1846
+ if not text:
1847
+ return Maybe.failure('JSON input cannot be empty')
1848
+
1849
+ try:
1850
+ result = json.loads(text)
1851
+ return Maybe.success(result)
1852
+ except json.JSONDecodeError as e:
1853
+ return Maybe.failure(f'Invalid JSON: {e.msg}')
1854
+
1855
+
1856
+ def parse_base64(text: str) -> Maybe[bytes]:
1857
+ r"""Parse and decode a base64-encoded string.
1858
+
1859
+ Accepts both standard and URL-safe base64, with or without padding.
1860
+ Handles whitespace and newlines within the base64 string.
1861
+
1862
+ Args:
1863
+ text: Base64-encoded string
1864
+
1865
+ Returns:
1866
+ Maybe[bytes]: Success with decoded bytes or Failure with error
1867
+
1868
+ Examples:
1869
+ >>> from valid8r.core.parsers import parse_base64
1870
+ >>>
1871
+ >>> # Standard base64 with padding
1872
+ >>> parse_base64('SGVsbG8gV29ybGQ=').value_or(None)
1873
+ b'Hello World'
1874
+ >>>
1875
+ >>> # Standard base64 without padding
1876
+ >>> parse_base64('SGVsbG8gV29ybGQ').value_or(None)
1877
+ b'Hello World'
1878
+ >>>
1879
+ >>> # URL-safe base64 (hyphens and underscores)
1880
+ >>> parse_base64('A-A=').is_success()
1881
+ True
1882
+ >>> parse_base64('Pz8_').is_success()
1883
+ True
1884
+ >>>
1885
+ >>> # Base64 with whitespace (automatically stripped)
1886
+ >>> parse_base64(' SGVsbG8gV29ybGQ= ').value_or(None)
1887
+ b'Hello World'
1888
+ >>>
1889
+ >>> # Invalid base64
1890
+ >>> parse_base64('').is_failure()
1891
+ True
1892
+ >>> parse_base64('Not@Valid!').is_failure()
1893
+ True
1894
+ >>> parse_base64('====').is_failure()
1895
+ True
1896
+ """
1897
+ # Strip all whitespace (including internal newlines)
1898
+ text = ''.join(text.split())
1899
+
1900
+ if not text:
1901
+ return Maybe.failure('Base64 input cannot be empty')
1902
+
1903
+ try:
1904
+ # Replace URL-safe characters with standard base64
1905
+ text = text.replace('-', '+').replace('_', '/')
1906
+
1907
+ # Add padding if missing
1908
+ missing_padding = len(text) % 4
1909
+ if missing_padding:
1910
+ text += '=' * (4 - missing_padding)
1911
+
1912
+ decoded = base64.b64decode(text, validate=True)
1913
+ return Maybe.success(decoded)
1914
+ except (ValueError, binascii.Error):
1915
+ return Maybe.failure('Base64 contains invalid characters')
1916
+
1917
+
1918
+ def parse_jwt(text: str) -> Maybe[str]:
1919
+ """Parse and validate a JWT (JSON Web Token) structure.
1920
+
1921
+ Validates that the JWT has exactly three parts (header.payload.signature)
1922
+ separated by dots, and that each part is valid base64url.
1923
+ Also validates that header and payload are valid JSON.
1924
+
1925
+ Note: This function validates JWT structure only. It does NOT verify
1926
+ the cryptographic signature. Use a dedicated JWT library (e.g., PyJWT)
1927
+ for signature verification and claims validation.
1928
+
1929
+ Args:
1930
+ text: JWT string to validate
1931
+
1932
+ Returns:
1933
+ Maybe[str]: Success with original JWT or Failure with error
1934
+
1935
+ Examples:
1936
+ >>> from valid8r.core.parsers import parse_jwt
1937
+ >>>
1938
+ >>> # Valid JWT (structure only - signature not verified)
1939
+ >>> jwt = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.sig'
1940
+ >>> parse_jwt(jwt).is_success()
1941
+ True
1942
+ >>>
1943
+ >>> # JWT with whitespace (automatically stripped)
1944
+ >>> parse_jwt(' ' + jwt + ' ').is_success()
1945
+ True
1946
+ >>>
1947
+ >>> # Invalid: empty string
1948
+ >>> parse_jwt('').is_failure()
1949
+ True
1950
+ >>>
1951
+ >>> # Invalid: wrong number of parts
1952
+ >>> parse_jwt('header.payload').is_failure()
1953
+ True
1954
+ >>> parse_jwt('a.b.c.d').is_failure()
1955
+ True
1956
+ >>>
1957
+ >>> # Invalid: non-base64url encoding
1958
+ >>> parse_jwt('not-base64!.eyJzdWIiOiIxMjM0In0.sig').is_failure()
1959
+ True
1960
+ >>>
1961
+ >>> # Invalid: non-JSON header/payload
1962
+ >>> parse_jwt('bm90anNvbg==.eyJzdWIiOiIxMjM0In0.sig').is_failure()
1963
+ True
1964
+ """
1965
+ # Strip whitespace
1966
+ text = text.strip()
1967
+
1968
+ if not text:
1969
+ return Maybe.failure('JWT cannot be empty')
1970
+
1971
+ parts = text.split('.')
1972
+ if len(parts) != 3:
1973
+ return Maybe.failure('JWT must have exactly three parts separated by dots')
1974
+
1975
+ # Helper to convert base64url to base64 with padding
1976
+ def decode_base64url(part: str) -> bytes:
1977
+ base64_part = part.replace('-', '+').replace('_', '/')
1978
+ missing_padding = len(base64_part) % 4
1979
+ if missing_padding:
1980
+ base64_part += '=' * (4 - missing_padding)
1981
+ return base64.b64decode(base64_part, validate=True)
1982
+
1983
+ # Validate header (part 0)
1984
+ if not parts[0]:
1985
+ return Maybe.failure('JWT header cannot be empty')
1986
+
1987
+ try:
1988
+ header_bytes = decode_base64url(parts[0])
1989
+ json.loads(header_bytes)
1990
+ except (ValueError, binascii.Error):
1991
+ return Maybe.failure('JWT header is not valid base64')
1992
+ except json.JSONDecodeError:
1993
+ return Maybe.failure('JWT header is not valid JSON')
1994
+
1995
+ # Validate payload (part 1)
1996
+ if not parts[1]:
1997
+ return Maybe.failure('JWT payload cannot be empty')
1998
+
1999
+ try:
2000
+ payload_bytes = decode_base64url(parts[1])
2001
+ json.loads(payload_bytes)
2002
+ except (ValueError, binascii.Error):
2003
+ return Maybe.failure('JWT payload is not valid base64')
2004
+ except json.JSONDecodeError:
2005
+ return Maybe.failure('JWT payload is not valid JSON')
2006
+
2007
+ # Validate signature (part 2)
2008
+ if not parts[2]:
2009
+ return Maybe.failure('JWT signature cannot be empty')
2010
+
2011
+ try:
2012
+ decode_base64url(parts[2])
2013
+ except (ValueError, binascii.Error):
2014
+ return Maybe.failure('JWT signature is not valid base64')
2015
+
2016
+ return Maybe.success(text)
2017
+
2018
+
2019
+ def parse_path(
2020
+ text: str | None,
2021
+ *,
2022
+ expand_user: bool = False,
2023
+ resolve: bool = False,
2024
+ ) -> Maybe[Path]:
2025
+ """Parse a string into a pathlib.Path object.
2026
+
2027
+ Converts string representations of filesystem paths to Python Path objects.
2028
+ Handles cross-platform path formats, optional home directory expansion,
2029
+ and optional resolution to absolute paths.
2030
+
2031
+ Args:
2032
+ text: The path string to parse (leading/trailing whitespace is stripped)
2033
+ expand_user: If True, expand ~ to user's home directory (default: False)
2034
+ resolve: If True, resolve to absolute path following symlinks (default: False)
2035
+
2036
+ Returns:
2037
+ Maybe[Path]: Success(Path) if parsing succeeds, Failure(str) with error message otherwise
2038
+
2039
+ Examples:
2040
+ >>> parse_path('/home/user/file.txt')
2041
+ Success(PosixPath('/home/user/file.txt'))
2042
+ >>> parse_path('data/file.txt')
2043
+ Success(PosixPath('data/file.txt'))
2044
+ >>> parse_path('')
2045
+ Failure('Path cannot be empty')
2046
+ >>> parse_path(None)
2047
+ Failure('Path cannot be empty')
2048
+
2049
+ Notes:
2050
+ - Path normalization (collapsing redundant separators) happens automatically
2051
+ - This parser does NOT validate path existence - use validators for that
2052
+ - Use expand_user=True to expand ~ to the user's home directory
2053
+ - Use resolve=True to convert relative paths to absolute paths
2054
+ - The resolve option will follow symlinks and normalize the path
2055
+ """
2056
+ # Handle None or empty input
2057
+ if text is None or not isinstance(text, str):
2058
+ return Maybe.failure('Path cannot be empty')
2059
+
2060
+ stripped = text.strip()
2061
+ if stripped == '':
2062
+ return Maybe.failure('Path cannot be empty')
2063
+
2064
+ try:
2065
+ # Create Path object (automatically normalizes redundant separators)
2066
+ path = Path(stripped)
2067
+
2068
+ # Expand user directory if requested
2069
+ if expand_user:
2070
+ path = path.expanduser()
2071
+
2072
+ # Resolve to absolute path if requested
2073
+ if resolve:
2074
+ path = path.resolve()
2075
+
2076
+ return Maybe.success(path)
2077
+ except (ValueError, OSError) as e:
2078
+ return Maybe.failure(f'Invalid path: {e!s}')
2079
+
2080
+
2081
+ # Public API exports
2082
+ __all__ = [
2083
+ 'EmailAddress',
2084
+ 'PhoneNumber',
2085
+ 'UrlParts',
2086
+ 'create_parser',
2087
+ 'make_parser',
2088
+ 'parse_base64',
2089
+ 'parse_bool',
2090
+ 'parse_cidr',
2091
+ 'parse_complex',
2092
+ 'parse_date',
2093
+ 'parse_decimal',
2094
+ 'parse_dict',
2095
+ 'parse_dict_with_validation',
2096
+ 'parse_email',
2097
+ 'parse_enum',
2098
+ 'parse_float',
2099
+ 'parse_int',
2100
+ 'parse_int_with_validation',
2101
+ 'parse_ip',
2102
+ 'parse_ipv4',
2103
+ 'parse_ipv6',
2104
+ 'parse_json',
2105
+ 'parse_jwt',
2106
+ 'parse_list',
2107
+ 'parse_list_with_validation',
2108
+ 'parse_path',
2109
+ 'parse_phone',
2110
+ 'parse_set',
2111
+ 'parse_slug',
2112
+ 'parse_url',
2113
+ 'parse_uuid',
2114
+ 'validated_parser',
2115
+ ]