structmatch 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- structmatch/__init__.py +40 -0
- structmatch/comparators.py +106 -0
- structmatch/core.py +177 -0
- structmatch/diff.py +203 -0
- structmatch/options.py +43 -0
- structmatch/schema.py +110 -0
- structmatch/utils.py +67 -0
- structmatch-0.1.0.dist-info/METADATA +11 -0
- structmatch-0.1.0.dist-info/RECORD +11 -0
- structmatch-0.1.0.dist-info/WHEEL +5 -0
- structmatch-0.1.0.dist-info/top_level.txt +1 -0
structmatch/__init__.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""structmatch — Deep structural matching, diffing, and pattern matching for Python."""
|
|
2
|
+
|
|
3
|
+
from .core import eq, diff, match
|
|
4
|
+
from .comparators import (
|
|
5
|
+
Comparator,
|
|
6
|
+
ANY,
|
|
7
|
+
TYPE,
|
|
8
|
+
GT,
|
|
9
|
+
LT,
|
|
10
|
+
GE,
|
|
11
|
+
LE,
|
|
12
|
+
BETWEEN,
|
|
13
|
+
REGEX,
|
|
14
|
+
NOT,
|
|
15
|
+
)
|
|
16
|
+
from .diff import DiffResult
|
|
17
|
+
from .schema import Schema, SchemaError
|
|
18
|
+
from .options import MatchOptions
|
|
19
|
+
|
|
20
|
+
__version__ = "0.1.0"
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"eq",
|
|
24
|
+
"diff",
|
|
25
|
+
"match",
|
|
26
|
+
"ANY",
|
|
27
|
+
"TYPE",
|
|
28
|
+
"GT",
|
|
29
|
+
"LT",
|
|
30
|
+
"GE",
|
|
31
|
+
"LE",
|
|
32
|
+
"BETWEEN",
|
|
33
|
+
"REGEX",
|
|
34
|
+
"NOT",
|
|
35
|
+
"Comparator",
|
|
36
|
+
"DiffResult",
|
|
37
|
+
"Schema",
|
|
38
|
+
"SchemaError",
|
|
39
|
+
"MatchOptions",
|
|
40
|
+
]
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Pattern-matching comparators."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
import re
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Comparator(ABC):
|
|
9
|
+
"""Base class for custom comparators."""
|
|
10
|
+
|
|
11
|
+
@abstractmethod
|
|
12
|
+
def matches(self, a, b) -> bool:
|
|
13
|
+
...
|
|
14
|
+
|
|
15
|
+
def __call__(self, a, b) -> bool:
|
|
16
|
+
return self.matches(a, b)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ANY(Comparator):
|
|
20
|
+
"""Matches anything."""
|
|
21
|
+
|
|
22
|
+
def matches(self, a, b) -> bool:
|
|
23
|
+
return True
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TYPE(Comparator):
|
|
27
|
+
"""Matches any value of a given type."""
|
|
28
|
+
|
|
29
|
+
def __init__(self, expected_type):
|
|
30
|
+
self.expected_type = expected_type
|
|
31
|
+
|
|
32
|
+
def matches(self, a, b) -> bool:
|
|
33
|
+
return isinstance(b, self.expected_type)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class GT(Comparator):
|
|
37
|
+
"""Matches if b > value."""
|
|
38
|
+
|
|
39
|
+
def __init__(self, value):
|
|
40
|
+
self.value = value
|
|
41
|
+
|
|
42
|
+
def matches(self, a, b) -> bool:
|
|
43
|
+
return b > self.value
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class LT(Comparator):
|
|
47
|
+
"""Matches if b < value."""
|
|
48
|
+
|
|
49
|
+
def __init__(self, value):
|
|
50
|
+
self.value = value
|
|
51
|
+
|
|
52
|
+
def matches(self, a, b) -> bool:
|
|
53
|
+
return b < self.value
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class GE(Comparator):
|
|
57
|
+
"""Matches if b >= value."""
|
|
58
|
+
|
|
59
|
+
def __init__(self, value):
|
|
60
|
+
self.value = value
|
|
61
|
+
|
|
62
|
+
def matches(self, a, b) -> bool:
|
|
63
|
+
return b >= self.value
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class LE(Comparator):
|
|
67
|
+
"""Matches if b <= value."""
|
|
68
|
+
|
|
69
|
+
def __init__(self, value):
|
|
70
|
+
self.value = value
|
|
71
|
+
|
|
72
|
+
def matches(self, a, b) -> bool:
|
|
73
|
+
return b <= self.value
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class BETWEEN(Comparator):
|
|
77
|
+
"""Matches if low <= b <= high."""
|
|
78
|
+
|
|
79
|
+
def __init__(self, low, high):
|
|
80
|
+
self.low = min(low, high)
|
|
81
|
+
self.high = max(low, high)
|
|
82
|
+
|
|
83
|
+
def matches(self, a, b) -> bool:
|
|
84
|
+
return self.low <= b <= self.high
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class REGEX(Comparator):
|
|
88
|
+
"""Matches if b (string) matches the regex pattern."""
|
|
89
|
+
|
|
90
|
+
def __init__(self, pattern: str):
|
|
91
|
+
self.pattern = re.compile(pattern)
|
|
92
|
+
|
|
93
|
+
def matches(self, a, b) -> bool:
|
|
94
|
+
if not isinstance(b, str):
|
|
95
|
+
return False
|
|
96
|
+
return bool(self.pattern.search(b))
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class NOT(Comparator):
|
|
100
|
+
"""Negates another comparator."""
|
|
101
|
+
|
|
102
|
+
def __init__(self, comparator: Comparator):
|
|
103
|
+
self.comparator = comparator
|
|
104
|
+
|
|
105
|
+
def matches(self, a, b) -> bool:
|
|
106
|
+
return not self.comparator.matches(a, b)
|
structmatch/core.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""Core functions: eq(), diff(), match()."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from .options import MatchOptions
|
|
5
|
+
from .diff import DiffResult, diff as _diff
|
|
6
|
+
from .utils import (
|
|
7
|
+
_is_numeric,
|
|
8
|
+
_within_tolerance,
|
|
9
|
+
_compare_strings,
|
|
10
|
+
_filter_keys,
|
|
11
|
+
_is_dataclass_like,
|
|
12
|
+
_get_fields,
|
|
13
|
+
_is_comparator,
|
|
14
|
+
_as_multiset,
|
|
15
|
+
_hashable,
|
|
16
|
+
)
|
|
17
|
+
from .comparators import Comparator
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _deep_eq(va, vb, opts: MatchOptions) -> bool:
|
|
21
|
+
"""Recursive deep equality."""
|
|
22
|
+
# Check custom comparators first
|
|
23
|
+
for comp in opts.comparators:
|
|
24
|
+
if comp.matches(va, vb):
|
|
25
|
+
return True
|
|
26
|
+
|
|
27
|
+
# Handle type coercion: int/float interop
|
|
28
|
+
if opts.type_coerce and _is_numeric(va, vb) and _within_tolerance(va, vb, opts.tolerance):
|
|
29
|
+
return True
|
|
30
|
+
|
|
31
|
+
# Type mismatch (but handle dataclass/NamedTuple equality)
|
|
32
|
+
type_a = type(va)
|
|
33
|
+
type_b = type(vb)
|
|
34
|
+
if type_a != type_b:
|
|
35
|
+
if opts.type_coerce and _is_numeric(va, vb) and _within_tolerance(va, vb, opts.tolerance):
|
|
36
|
+
return True
|
|
37
|
+
return False
|
|
38
|
+
|
|
39
|
+
# None
|
|
40
|
+
if va is None:
|
|
41
|
+
return vb is None
|
|
42
|
+
|
|
43
|
+
# Booleans
|
|
44
|
+
if isinstance(va, bool):
|
|
45
|
+
return va == vb
|
|
46
|
+
|
|
47
|
+
# Strings
|
|
48
|
+
if isinstance(va, str):
|
|
49
|
+
return _compare_strings(va, vb, opts.case_sensitive)
|
|
50
|
+
|
|
51
|
+
# Numbers
|
|
52
|
+
if _is_numeric(va, vb):
|
|
53
|
+
return _within_tolerance(va, vb, opts.tolerance)
|
|
54
|
+
|
|
55
|
+
# Dicts
|
|
56
|
+
if isinstance(va, dict):
|
|
57
|
+
if len(va) != len(vb):
|
|
58
|
+
return False
|
|
59
|
+
va_f = _filter_keys(va, opts.ignore_keys)
|
|
60
|
+
vb_f = _filter_keys(vb, opts.ignore_keys)
|
|
61
|
+
if len(va_f) != len(vb_f):
|
|
62
|
+
return False
|
|
63
|
+
if set(va_f) != set(vb_f):
|
|
64
|
+
return False
|
|
65
|
+
for k in va_f:
|
|
66
|
+
if not _deep_eq(va_f[k], vb_f[k], opts):
|
|
67
|
+
return False
|
|
68
|
+
return True
|
|
69
|
+
|
|
70
|
+
# Lists / tuples
|
|
71
|
+
if isinstance(va, (list, tuple)):
|
|
72
|
+
if type(va) != type(vb):
|
|
73
|
+
return False
|
|
74
|
+
if len(va) != len(vb):
|
|
75
|
+
return False
|
|
76
|
+
if opts.ignore_order and isinstance(va, list):
|
|
77
|
+
return _as_multiset(va) == _as_multiset(vb)
|
|
78
|
+
return all(_deep_eq(a, b, opts) for a, b in zip(va, vb))
|
|
79
|
+
|
|
80
|
+
# Sets
|
|
81
|
+
if isinstance(va, set):
|
|
82
|
+
return va == vb
|
|
83
|
+
|
|
84
|
+
# Dataclasses / NamedTuples / objects with __dict__
|
|
85
|
+
if _is_dataclass_like(va):
|
|
86
|
+
fa = _get_fields(va)
|
|
87
|
+
fb = _get_fields(vb)
|
|
88
|
+
if fa is not None and fb is not None:
|
|
89
|
+
return _deep_eq(fa, fb, opts)
|
|
90
|
+
|
|
91
|
+
# Objects with __dict__ (but not dataclasses)
|
|
92
|
+
if not isinstance(va, (type, bool, int, float, str, list, tuple, set, dict)):
|
|
93
|
+
fa = _get_fields(va)
|
|
94
|
+
fb = _get_fields(vb)
|
|
95
|
+
if fa is not None and fb is not None and type(va) == type(vb):
|
|
96
|
+
return _deep_eq(fa, fb, opts)
|
|
97
|
+
|
|
98
|
+
# Fallback to ==
|
|
99
|
+
return va == vb
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def eq(a, b, *, tolerance: float = 0.0, ignore_order: bool = False,
|
|
103
|
+
ignore_keys: list[str] | None = None, case_sensitive: bool = True,
|
|
104
|
+
type_coerce: bool = False, comparators: list[Comparator] | None = None) -> bool:
|
|
105
|
+
"""Deep structural equality comparison."""
|
|
106
|
+
opts = MatchOptions(
|
|
107
|
+
tolerance=tolerance,
|
|
108
|
+
ignore_order=ignore_order,
|
|
109
|
+
ignore_keys=ignore_keys,
|
|
110
|
+
case_sensitive=case_sensitive,
|
|
111
|
+
type_coerce=type_coerce,
|
|
112
|
+
comparators=comparators,
|
|
113
|
+
)
|
|
114
|
+
return _deep_eq(a, b, opts)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def diff(a, b, **opts) -> DiffResult:
|
|
118
|
+
"""Compute a deep diff between two structures."""
|
|
119
|
+
return _diff(a, b, **opts)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _deep_match(value, pattern, opts: MatchOptions) -> bool:
|
|
123
|
+
"""Check if value matches a pattern (which may contain comparators)."""
|
|
124
|
+
# Pattern is a comparator (check FIRST, before any type checks)
|
|
125
|
+
if _is_comparator(pattern):
|
|
126
|
+
if isinstance(pattern, type):
|
|
127
|
+
pattern = pattern()
|
|
128
|
+
return pattern.matches(value, value)
|
|
129
|
+
|
|
130
|
+
# None value
|
|
131
|
+
if value is None:
|
|
132
|
+
if pattern is None or pattern is type(None):
|
|
133
|
+
return True
|
|
134
|
+
if isinstance(pattern, type):
|
|
135
|
+
return False
|
|
136
|
+
return False
|
|
137
|
+
|
|
138
|
+
# Pattern is a type
|
|
139
|
+
if isinstance(pattern, type):
|
|
140
|
+
if pattern is type(None):
|
|
141
|
+
return value is None
|
|
142
|
+
if pattern in (int, float):
|
|
143
|
+
return _is_numeric(value, value) and isinstance(value, pattern)
|
|
144
|
+
return isinstance(value, pattern)
|
|
145
|
+
|
|
146
|
+
# Both dicts
|
|
147
|
+
if isinstance(pattern, dict) and isinstance(value, dict):
|
|
148
|
+
value_f = _filter_keys(value, opts.ignore_keys)
|
|
149
|
+
for key, sub_pattern in pattern.items():
|
|
150
|
+
if key not in value_f:
|
|
151
|
+
return False
|
|
152
|
+
if not _deep_match(value_f[key], sub_pattern, opts):
|
|
153
|
+
return False
|
|
154
|
+
return True
|
|
155
|
+
|
|
156
|
+
# Both lists/tuples
|
|
157
|
+
if isinstance(pattern, (list, tuple)) and isinstance(value, (list, tuple)):
|
|
158
|
+
if len(pattern) != len(value):
|
|
159
|
+
return False
|
|
160
|
+
return all(_deep_match(v, p, opts) for v, p in zip(value, pattern))
|
|
161
|
+
|
|
162
|
+
# String pattern with case sensitivity
|
|
163
|
+
if isinstance(pattern, str) and isinstance(value, str):
|
|
164
|
+
return _compare_strings(pattern, value, opts.case_sensitive)
|
|
165
|
+
|
|
166
|
+
# Numeric with tolerance
|
|
167
|
+
if _is_numeric(pattern, value) or _is_numeric(value, pattern):
|
|
168
|
+
return _within_tolerance(pattern, value, opts.tolerance)
|
|
169
|
+
|
|
170
|
+
# Literal match
|
|
171
|
+
return pattern == value
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def match(obj, pattern, **opts) -> bool:
|
|
175
|
+
"""Pattern matching: check if obj matches the given pattern."""
|
|
176
|
+
options = MatchOptions(**opts)
|
|
177
|
+
return _deep_match(obj, pattern, options)
|
structmatch/diff.py
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""Deep diff engine."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from .options import MatchOptions
|
|
5
|
+
from .utils import (
|
|
6
|
+
_is_numeric,
|
|
7
|
+
_within_tolerance,
|
|
8
|
+
_compare_strings,
|
|
9
|
+
_filter_keys,
|
|
10
|
+
_is_dataclass_like,
|
|
11
|
+
_get_fields,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DiffResult:
|
|
16
|
+
"""Result of a deep diff between two structures."""
|
|
17
|
+
|
|
18
|
+
__slots__ = ("added", "removed", "changed", "type_changes", "path_changes")
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
added: dict | None = None,
|
|
23
|
+
removed: dict | None = None,
|
|
24
|
+
changed: dict | None = None,
|
|
25
|
+
type_changes: dict | None = None,
|
|
26
|
+
path_changes: list | None = None,
|
|
27
|
+
):
|
|
28
|
+
self.added = added if added is not None else {}
|
|
29
|
+
self.removed = removed if removed is not None else {}
|
|
30
|
+
self.changed = changed if changed is not None else {}
|
|
31
|
+
self.type_changes = type_changes if type_changes is not None else {}
|
|
32
|
+
self.path_changes = path_changes if path_changes is not None else []
|
|
33
|
+
|
|
34
|
+
def has_changes(self) -> bool:
|
|
35
|
+
return bool(self.added or self.removed or self.changed or self.type_changes or self.path_changes)
|
|
36
|
+
|
|
37
|
+
def __bool__(self) -> bool:
|
|
38
|
+
return self.has_changes()
|
|
39
|
+
|
|
40
|
+
def __repr__(self) -> str:
|
|
41
|
+
parts = []
|
|
42
|
+
if self.added:
|
|
43
|
+
parts.append(f"added={self.added}")
|
|
44
|
+
if self.removed:
|
|
45
|
+
parts.append(f"removed={self.removed}")
|
|
46
|
+
if self.changed:
|
|
47
|
+
parts.append(f"changed={self.changed}")
|
|
48
|
+
if self.type_changes:
|
|
49
|
+
parts.append(f"type_changes={self.type_changes}")
|
|
50
|
+
if self.path_changes:
|
|
51
|
+
parts.append(f"path_changes={self.path_changes}")
|
|
52
|
+
return f"DiffResult({', '.join(parts)})"
|
|
53
|
+
|
|
54
|
+
def __eq__(self, other):
|
|
55
|
+
if not isinstance(other, DiffResult):
|
|
56
|
+
return NotImplemented
|
|
57
|
+
return (
|
|
58
|
+
self.added == other.added
|
|
59
|
+
and self.removed == other.removed
|
|
60
|
+
and self.changed == other.changed
|
|
61
|
+
and self.type_changes == other.type_changes
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _diff_dicts(a: dict, b: dict, opts: MatchOptions, path: str = "") -> DiffResult:
|
|
66
|
+
added = {}
|
|
67
|
+
removed = {}
|
|
68
|
+
changed = {}
|
|
69
|
+
type_changes = {}
|
|
70
|
+
path_changes = []
|
|
71
|
+
|
|
72
|
+
a_filtered = _filter_keys(a, opts.ignore_keys)
|
|
73
|
+
b_filtered = _filter_keys(b, opts.ignore_keys)
|
|
74
|
+
|
|
75
|
+
all_keys = set(a_filtered) | set(b_filtered)
|
|
76
|
+
for key in all_keys:
|
|
77
|
+
key_path = f"{path}.{key}" if path else key
|
|
78
|
+
if key not in a_filtered:
|
|
79
|
+
added[key] = b_filtered[key]
|
|
80
|
+
elif key not in b_filtered:
|
|
81
|
+
removed[key] = a_filtered[key]
|
|
82
|
+
else:
|
|
83
|
+
va = a_filtered[key]
|
|
84
|
+
vb = b_filtered[key]
|
|
85
|
+
child = _diff_values(va, vb, opts, key_path)
|
|
86
|
+
if child is None:
|
|
87
|
+
continue
|
|
88
|
+
if isinstance(child, dict):
|
|
89
|
+
if "sub_diff" in child:
|
|
90
|
+
sub = child["sub_diff"]
|
|
91
|
+
# Merge sub_diff into current
|
|
92
|
+
added.update({f"{key}.{k}" if k in added else k: v for k, v in sub.added.items()})
|
|
93
|
+
removed.update({f"{key}.{k}" if k in removed else k: v for k, v in sub.removed.items()})
|
|
94
|
+
changed.update({f"{key}.{k}" if k in changed else k: v for k, v in sub.changed.items()})
|
|
95
|
+
type_changes.update({f"{key}.{k}" if k in type_changes else k: v for k, v in sub.type_changes.items()})
|
|
96
|
+
path_changes.extend(sub.path_changes)
|
|
97
|
+
elif "type_change" in child:
|
|
98
|
+
type_changes[key] = child["type_change"]
|
|
99
|
+
elif "set_diff" in child:
|
|
100
|
+
changed[key] = (va, vb)
|
|
101
|
+
elif isinstance(child, tuple):
|
|
102
|
+
changed[key] = child
|
|
103
|
+
|
|
104
|
+
return DiffResult(added=added, removed=removed, changed=changed, type_changes=type_changes, path_changes=path_changes)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _diff_sequences(a, b, opts: MatchOptions, path: str = "") -> DiffResult:
|
|
108
|
+
if opts.ignore_order:
|
|
109
|
+
# If same elements (multiset), no changes
|
|
110
|
+
from .utils import _as_multiset
|
|
111
|
+
if _as_multiset(a) == _as_multiset(b):
|
|
112
|
+
return DiffResult()
|
|
113
|
+
|
|
114
|
+
path_changes = []
|
|
115
|
+
max_len = max(len(a), len(b))
|
|
116
|
+
for i in range(max_len):
|
|
117
|
+
idx_path = f"{path}[{i}]"
|
|
118
|
+
if i >= len(a):
|
|
119
|
+
path_changes.append({"path": idx_path, "change": "added", "value": b[i]})
|
|
120
|
+
elif i >= len(b):
|
|
121
|
+
path_changes.append({"path": idx_path, "change": "removed", "value": a[i]})
|
|
122
|
+
else:
|
|
123
|
+
child = _diff_values(a[i], b[i], opts, idx_path)
|
|
124
|
+
if child is not None:
|
|
125
|
+
if isinstance(child, dict) and "type_change" in child:
|
|
126
|
+
path_changes.append({"path": idx_path, "change": "type_change", "from": child["type_change"][0], "to": child["type_change"][1]})
|
|
127
|
+
elif isinstance(child, tuple):
|
|
128
|
+
path_changes.append({"path": idx_path, "change": "changed", "from": child[0], "to": child[1]})
|
|
129
|
+
elif isinstance(child, dict) and "sub_diff" in child:
|
|
130
|
+
path_changes.append({"path": idx_path, "change": "sub_diff", "details": child["sub_diff"]})
|
|
131
|
+
else:
|
|
132
|
+
path_changes.append({"path": idx_path, "change": "changed", "from": a[i], "to": b[i]})
|
|
133
|
+
|
|
134
|
+
return DiffResult(path_changes=path_changes)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _diff_values(va, vb, opts: MatchOptions, path: str = ""):
|
|
138
|
+
"""Returns None if equal, a tuple (old, new) if changed, or dict with type_change."""
|
|
139
|
+
type_a = type(va)
|
|
140
|
+
type_b = type(vb)
|
|
141
|
+
|
|
142
|
+
if type_a != type_b:
|
|
143
|
+
if opts.type_coerce and _is_numeric(va, vb) and _within_tolerance(va, vb, opts.tolerance):
|
|
144
|
+
return None
|
|
145
|
+
return {"type_change": (type_a, type_b)}
|
|
146
|
+
|
|
147
|
+
if isinstance(va, dict):
|
|
148
|
+
sub = _diff_dicts(va, vb, opts, path)
|
|
149
|
+
if sub.has_changes():
|
|
150
|
+
return {"sub_diff": sub}
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
if isinstance(va, str):
|
|
154
|
+
if not _compare_strings(va, vb, opts.case_sensitive):
|
|
155
|
+
return (va, vb)
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
if _is_numeric(va, vb):
|
|
159
|
+
if _within_tolerance(va, vb, opts.tolerance):
|
|
160
|
+
return None
|
|
161
|
+
return (va, vb)
|
|
162
|
+
|
|
163
|
+
if isinstance(va, (list, tuple)):
|
|
164
|
+
sub = _diff_sequences(va, vb, opts, path)
|
|
165
|
+
if sub.has_changes():
|
|
166
|
+
return {"sub_diff": sub}
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
if isinstance(va, set):
|
|
170
|
+
extra_a = va - vb
|
|
171
|
+
extra_b = vb - va
|
|
172
|
+
if extra_a or extra_b:
|
|
173
|
+
return {"set_diff": {"removed": extra_a, "added": extra_b}}
|
|
174
|
+
return None
|
|
175
|
+
|
|
176
|
+
if _is_dataclass_like(va):
|
|
177
|
+
fa = _get_fields(va)
|
|
178
|
+
fb = _get_fields(vb)
|
|
179
|
+
return _diff_dicts(fa, fb, opts, path)
|
|
180
|
+
|
|
181
|
+
if va == vb:
|
|
182
|
+
return None
|
|
183
|
+
return (va, vb)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def diff(a, b, **opts) -> DiffResult:
|
|
187
|
+
"""Compute a deep diff between two structures."""
|
|
188
|
+
options = MatchOptions(**opts)
|
|
189
|
+
result = _diff_values(a, b, options)
|
|
190
|
+
if result is None:
|
|
191
|
+
return DiffResult()
|
|
192
|
+
if isinstance(result, dict):
|
|
193
|
+
if "sub_diff" in result:
|
|
194
|
+
return result["sub_diff"]
|
|
195
|
+
if "type_change" in result:
|
|
196
|
+
return DiffResult(type_changes={"root": result["type_change"]})
|
|
197
|
+
if "set_diff" in result:
|
|
198
|
+
return DiffResult(
|
|
199
|
+
added={"root_set_added": result["set_diff"]["added"]},
|
|
200
|
+
removed={"root_set_removed": result["set_diff"]["removed"]},
|
|
201
|
+
)
|
|
202
|
+
return DiffResult(changed={"root": (a, b)})
|
|
203
|
+
return DiffResult(changed={"root": result})
|
structmatch/options.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class MatchOptions:
|
|
5
|
+
"""Options controlling comparison behavior."""
|
|
6
|
+
|
|
7
|
+
__slots__ = (
|
|
8
|
+
"tolerance",
|
|
9
|
+
"ignore_order",
|
|
10
|
+
"ignore_keys",
|
|
11
|
+
"case_sensitive",
|
|
12
|
+
"type_coerce",
|
|
13
|
+
"comparators",
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
tolerance: float = 0.0,
|
|
19
|
+
ignore_order: bool = False,
|
|
20
|
+
ignore_keys: set[str] | None = None,
|
|
21
|
+
case_sensitive: bool = True,
|
|
22
|
+
type_coerce: bool = False,
|
|
23
|
+
comparators: list | None = None,
|
|
24
|
+
):
|
|
25
|
+
self.tolerance = tolerance
|
|
26
|
+
self.ignore_order = ignore_order
|
|
27
|
+
self.ignore_keys = frozenset(ignore_keys) if ignore_keys else frozenset()
|
|
28
|
+
self.case_sensitive = case_sensitive
|
|
29
|
+
self.type_coerce = type_coerce
|
|
30
|
+
self.comparators = comparators or []
|
|
31
|
+
|
|
32
|
+
def update(self, **kwargs) -> MatchOptions:
|
|
33
|
+
"""Return a new MatchOptions with updated fields."""
|
|
34
|
+
d = {
|
|
35
|
+
"tolerance": self.tolerance,
|
|
36
|
+
"ignore_order": self.ignore_order,
|
|
37
|
+
"ignore_keys": set(self.ignore_keys) if self.ignore_keys else None,
|
|
38
|
+
"case_sensitive": self.case_sensitive,
|
|
39
|
+
"type_coerce": self.type_coerce,
|
|
40
|
+
"comparators": list(self.comparators),
|
|
41
|
+
}
|
|
42
|
+
d.update(kwargs)
|
|
43
|
+
return MatchOptions(**d)
|
structmatch/schema.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Schema validation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from .utils import _is_comparator
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SchemaError(Exception):
|
|
8
|
+
"""Raised when schema validation fails."""
|
|
9
|
+
|
|
10
|
+
def __init__(self, errors: list[dict]):
|
|
11
|
+
self.errors = errors
|
|
12
|
+
parts = []
|
|
13
|
+
for e in errors:
|
|
14
|
+
path = e.get("path", "root")
|
|
15
|
+
msg = e.get("message", "validation error")
|
|
16
|
+
parts.append(f" {path}: {msg}")
|
|
17
|
+
super().__init__("Schema validation failed:\n" + "\n".join(parts))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _validate_type(value, expected, path: str, errors: list):
|
|
21
|
+
"""Validate a value against an expected type/schema."""
|
|
22
|
+
if expected is None or expected is type(None):
|
|
23
|
+
if value is not None:
|
|
24
|
+
errors.append({"path": path, "message": f"expected None, got {type(value).__name__}: {value!r}"})
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
if isinstance(expected, type):
|
|
28
|
+
if isinstance(value, bool) and expected in (int, float):
|
|
29
|
+
errors.append({"path": path, "message": f"expected {expected.__name__}, got bool"})
|
|
30
|
+
return
|
|
31
|
+
if not isinstance(value, expected):
|
|
32
|
+
# Allow int/float coercion
|
|
33
|
+
if expected in (int, float) and isinstance(value, (int, float)) and not isinstance(value, bool):
|
|
34
|
+
return
|
|
35
|
+
if expected is float and isinstance(value, int) and not isinstance(value, bool):
|
|
36
|
+
return
|
|
37
|
+
errors.append({"path": path, "message": f"expected {expected.__name__}, got {type(value).__name__}: {value!r}"})
|
|
38
|
+
return
|
|
39
|
+
|
|
40
|
+
if isinstance(expected, dict):
|
|
41
|
+
if not isinstance(value, dict):
|
|
42
|
+
errors.append({"path": path, "message": f"expected dict, got {type(value).__name__}"})
|
|
43
|
+
return
|
|
44
|
+
for key, sub_schema in expected.items():
|
|
45
|
+
if key not in value:
|
|
46
|
+
errors.append({"path": f"{path}.{key}", "message": "missing required key"})
|
|
47
|
+
else:
|
|
48
|
+
_validate_type(value[key], sub_schema, f"{path}.{key}", errors)
|
|
49
|
+
return
|
|
50
|
+
|
|
51
|
+
if isinstance(expected, list):
|
|
52
|
+
schema = expected[0] if expected else None
|
|
53
|
+
if not isinstance(value, (list, tuple)):
|
|
54
|
+
errors.append({"path": path, "message": f"expected list, got {type(value).__name__}"})
|
|
55
|
+
return
|
|
56
|
+
for i, item in enumerate(value):
|
|
57
|
+
if schema is not None:
|
|
58
|
+
_validate_type(item, schema, f"{path}[{i}]", errors)
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
if isinstance(expected, tuple):
|
|
62
|
+
if not isinstance(value, (list, tuple)):
|
|
63
|
+
errors.append({"path": path, "message": f"expected tuple/list, got {type(value).__name__}"})
|
|
64
|
+
return
|
|
65
|
+
if len(value) != len(expected):
|
|
66
|
+
errors.append({"path": path, "message": f"expected {len(expected)} elements, got {len(value)}"})
|
|
67
|
+
return
|
|
68
|
+
for i, (item, sub_schema) in enumerate(zip(value, expected)):
|
|
69
|
+
_validate_type(item, sub_schema, f"{path}[{i}]", errors)
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
if isinstance(expected, set):
|
|
73
|
+
if not isinstance(expected, frozenset):
|
|
74
|
+
expected = frozenset(expected)
|
|
75
|
+
if not isinstance(value, (list, tuple, set, frozenset)):
|
|
76
|
+
errors.append({"path": path, "message": f"expected set/list, got {type(value).__name__}"})
|
|
77
|
+
return
|
|
78
|
+
if set(value) != set(expected):
|
|
79
|
+
errors.append({"path": path, "message": f"expected one of {expected}, got {set(value)}"})
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
if _is_comparator(expected):
|
|
83
|
+
if not expected.matches(None, value):
|
|
84
|
+
errors.append({"path": path, "message": f"failed comparator {type(expected).__name__}"})
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
# Literal value
|
|
88
|
+
if value != expected:
|
|
89
|
+
errors.append({"path": path, "message": f"expected {expected!r}, got {value!r}"})
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class Schema:
|
|
93
|
+
"""Schema validator for Python structures."""
|
|
94
|
+
|
|
95
|
+
def __init__(self, definition):
|
|
96
|
+
self.definition = definition
|
|
97
|
+
|
|
98
|
+
def validate(self, data) -> bool:
|
|
99
|
+
errors = []
|
|
100
|
+
_validate_type(data, self.definition, "root", errors)
|
|
101
|
+
if errors:
|
|
102
|
+
raise SchemaError(errors)
|
|
103
|
+
return True
|
|
104
|
+
|
|
105
|
+
def is_valid(self, data) -> bool:
|
|
106
|
+
try:
|
|
107
|
+
self.validate(data)
|
|
108
|
+
return True
|
|
109
|
+
except SchemaError:
|
|
110
|
+
return False
|
structmatch/utils.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Helper utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from collections import Counter
|
|
5
|
+
from dataclasses import is_dataclass, fields
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _is_dataclass_like(obj: Any) -> bool:
|
|
10
|
+
return is_dataclass(obj) and not isinstance(obj, type)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _get_fields(obj: Any) -> dict:
|
|
14
|
+
"""Get comparable fields from an object."""
|
|
15
|
+
if _is_dataclass_like(obj):
|
|
16
|
+
return {f.name: getattr(obj, f.name) for f in fields(obj)}
|
|
17
|
+
try:
|
|
18
|
+
return dict(vars(obj))
|
|
19
|
+
except TypeError:
|
|
20
|
+
return None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _as_multiset(lst):
|
|
24
|
+
"""Convert a list to a Counter for order-independent comparison."""
|
|
25
|
+
return Counter(_hashable(x) for x in lst)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _hashable(x):
|
|
29
|
+
"""Make a value hashable for multiset comparison."""
|
|
30
|
+
if isinstance(x, dict):
|
|
31
|
+
return tuple(sorted((k, _hashable(v)) for k, v in x.items()))
|
|
32
|
+
if isinstance(x, (list, tuple)):
|
|
33
|
+
return tuple(_hashable(i) for i in x)
|
|
34
|
+
if isinstance(x, set):
|
|
35
|
+
return frozenset(_hashable(i) for i in x)
|
|
36
|
+
return x
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _is_numeric(a, b) -> bool:
|
|
40
|
+
return isinstance(a, (int, float)) and isinstance(b, (int, float)) and not isinstance(a, bool) and not isinstance(b, bool)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _within_tolerance(a, b, tolerance: float) -> bool:
|
|
44
|
+
if not _is_numeric(a, b):
|
|
45
|
+
return a == b
|
|
46
|
+
if tolerance <= 0.0:
|
|
47
|
+
return a == b
|
|
48
|
+
if tolerance > 1:
|
|
49
|
+
return abs(a - b) <= tolerance
|
|
50
|
+
return abs(a - b) <= tolerance * max(abs(a), abs(b), 1)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _compare_strings(a: str, b: str, case_sensitive: bool) -> bool:
|
|
54
|
+
if case_sensitive:
|
|
55
|
+
return a == b
|
|
56
|
+
return a.lower() == b.lower()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _is_comparator(val) -> bool:
|
|
60
|
+
from .comparators import Comparator
|
|
61
|
+
return isinstance(val, Comparator) or (isinstance(val, type) and issubclass(val, Comparator) and val is not Comparator)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _filter_keys(d: dict, ignore_keys) -> dict:
|
|
65
|
+
if not ignore_keys:
|
|
66
|
+
return d
|
|
67
|
+
return {k: v for k, v in d.items() if k not in ignore_keys}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: structmatch
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Deep structural matching, diffing, and pattern matching for Python
|
|
5
|
+
Author: Teja
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Provides-Extra: dev
|
|
9
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
10
|
+
Requires-Dist: hypothesis; extra == "dev"
|
|
11
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
structmatch/__init__.py,sha256=-sm6hkVh9ygag6cRmiSycqFDED960gqEkS_6n08L9DQ,621
|
|
2
|
+
structmatch/comparators.py,sha256=yPtXdm494iL0wqOfFcPVDnbbl_yiItUAIwplcS0tRuY,2218
|
|
3
|
+
structmatch/core.py,sha256=LuzZv2BeAfYEYlQhuT5sw_grnlroDL2BvqHV0axLtZA,5501
|
|
4
|
+
structmatch/diff.py,sha256=i8xER0qLdaMp1cQpqTwpOC4eTvRszp5uk-Gan_cD87I,7469
|
|
5
|
+
structmatch/options.py,sha256=tINGmGDuF3Q-SKf7azcJRildu88kPvPyJagR0h3VrJI,1333
|
|
6
|
+
structmatch/schema.py,sha256=cuvexzNQKaDneu-NO0Ct-ObTZIYVOjP3VOMMH_d5zJ8,4254
|
|
7
|
+
structmatch/utils.py,sha256=W1azfIBHVlT4HhY8EP0uj3OtZ0C6Ko38Jqs7nUNM-Lg,1960
|
|
8
|
+
structmatch-0.1.0.dist-info/METADATA,sha256=v2VDfenfY0RtAf0WklAjB55Z1mcnepsq_s4uryn9gKk,328
|
|
9
|
+
structmatch-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
10
|
+
structmatch-0.1.0.dist-info/top_level.txt,sha256=-m-cg6sthw8Eo6WwUYlf_eH-5wWgGX3JyVlrndT9WZw,12
|
|
11
|
+
structmatch-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
structmatch
|