starfish-wal 3.0.0a20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.4
2
+ Name: starfish-wal
3
+ Version: 3.0.0a20
4
+ Summary: Starfish WAL extension — append-only CRDT op-log document model (LWW register + RGA list + text) core, cross-language conformant with the TypeScript package
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: starfish-protocol
7
+ Provides-Extra: dev
8
+ Requires-Dist: pytest>=7.0; extra == "dev"
@@ -0,0 +1,31 @@
1
+ # starfish-wal (Python)
2
+
3
+ Cross-language CRDT core for the [Starfish](https://github.com/Drakkar-Software/starfish)
4
+ write-ahead-log document model.
5
+
6
+ This package ships the deterministic clock + fold that mirrors the TypeScript
7
+ `@drakkar.software/starfish-wal` package and conforms to the shared vectors in
8
+ `tests/test-vectors/wal-crdt.json`. The fold is commutative, idempotent, and
9
+ byte-identical across languages:
10
+
11
+ - **LWW typed register** (`set` / `del`) — objects / scalar fields,
12
+ - **RGA sequence** (`ins` / `rmv`) — ordered lists,
13
+ - **text** — an RGA of single characters.
14
+
15
+ ```python
16
+ from starfish_wal import WalCrdt
17
+
18
+ crdt = WalCrdt()
19
+ crdt.fold([
20
+ {"t": "set", "reg": "title", "clock": {"c": 1, "r": "a"}, "value": "Hello"},
21
+ {"t": "ins", "list": "body", "id": "1@a", "after": "", "clock": {"c": 2, "r": "a"}, "value": "h"},
22
+ {"t": "ins", "list": "body", "id": "2@a", "after": "1@a", "clock": {"c": 3, "r": "a"}, "value": "i"},
23
+ ])
24
+ crdt.materialize() # {"body": ["h", "i"], "title": "Hello"}
25
+ crdt.text("body") # "hi"
26
+ ```
27
+
28
+ The client document-log layer (`WalDocument`: commit / materialize / snapshot)
29
+ currently ships in the TypeScript package; Python parity is planned.
30
+
31
+ Full reference: [`docs/ts/wal/01-overview.md`](../../../docs/ts/wal/01-overview.md)
@@ -0,0 +1,23 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "starfish-wal"
7
+ version = "3.0.0a20"
8
+ description = "Starfish WAL extension — append-only CRDT op-log document model (LWW register + RGA list + text) core, cross-language conformant with the TypeScript package"
9
+ requires-python = ">=3.11"
10
+ dependencies = [
11
+ "starfish-protocol",
12
+ ]
13
+
14
+ [project.optional-dependencies]
15
+ dev = [
16
+ "pytest>=7.0",
17
+ ]
18
+
19
+ [tool.uv.sources]
20
+ starfish-protocol = { path = "../protocol", editable = true }
21
+
22
+ [tool.pytest.ini_options]
23
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,27 @@
1
+ """starfish-wal — write-ahead-log / doc-diff collections with CRDT semantics.
2
+
3
+ This package ships the deterministic, cross-language CRDT core (clock + fold)
4
+ that mirrors ``@drakkar.software/starfish-wal`` and conforms to
5
+ ``tests/test-vectors/wal-crdt.json``. The client document-log layer
6
+ (``WalDocument``: commit / materialize / snapshot) currently ships in the
7
+ TypeScript package; Python parity is planned.
8
+ """
9
+
10
+ from starfish_wal.clock import (
11
+ Clock,
12
+ LamportClock,
13
+ clock_greater,
14
+ compare_clocks,
15
+ derive_replica_id,
16
+ )
17
+ from starfish_wal.crdt import Op, WalCrdt
18
+
19
+ __all__ = [
20
+ "Clock",
21
+ "LamportClock",
22
+ "clock_greater",
23
+ "compare_clocks",
24
+ "derive_replica_id",
25
+ "Op",
26
+ "WalCrdt",
27
+ ]
@@ -0,0 +1,68 @@
1
+ """Causal clock for the WAL CRDT.
2
+
3
+ Every CRDT op carries a :class:`Clock`: a Lamport counter ``c`` plus a stable,
4
+ per-session ``replicaId`` ``r``. The pair ``(c, r)`` is a **total order with no
5
+ ties** — two concurrent ops may share a counter but never a replica id, so the
6
+ LWW tie-break is always decidable and byte-identical to the TypeScript
7
+ implementation (Python compares strings by Unicode code point, matching the
8
+ protocol's ``stable_stringify`` key sort).
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from typing import TypedDict
14
+
15
+
16
+ class Clock(TypedDict):
17
+ """A Lamport counter (``c``) tie-broken by a stable replica id (``r``)."""
18
+
19
+ c: int
20
+ r: str
21
+
22
+
23
+ def _cmp_str(a: str, b: str) -> int:
24
+ # Python orders strings by Unicode code point already.
25
+ return (a > b) - (a < b)
26
+
27
+
28
+ def compare_clocks(a: Clock, b: Clock) -> int:
29
+ """Total order over clocks: counter first, replica id second.
30
+
31
+ Returns a negative/positive int for ``a < b`` / ``a > b`` and ``0`` only when
32
+ the clocks are identical (i.e. the same op, given unique replica ids).
33
+ """
34
+ if a["c"] != b["c"]:
35
+ return a["c"] - b["c"]
36
+ return _cmp_str(a["r"], b["r"])
37
+
38
+
39
+ def clock_greater(a: Clock, b: Clock) -> bool:
40
+ """True iff clock ``a`` strictly dominates ``b``."""
41
+ return compare_clocks(a, b) > 0
42
+
43
+
44
+ class LamportClock:
45
+ """A monotonic Lamport clock for one replica."""
46
+
47
+ def __init__(self, replica_id: str, start: int = 0) -> None:
48
+ self.replica_id = replica_id
49
+ self._counter = start
50
+
51
+ @property
52
+ def value(self) -> int:
53
+ return self._counter
54
+
55
+ def tick(self) -> Clock:
56
+ """Advance and return the next clock to stamp on a local op."""
57
+ self._counter += 1
58
+ return {"c": self._counter, "r": self.replica_id}
59
+
60
+ def observe(self, clock: Clock) -> None:
61
+ """Advance past an observed clock (Lamport receive rule)."""
62
+ if clock["c"] > self._counter:
63
+ self._counter = clock["c"]
64
+
65
+
66
+ def derive_replica_id(author_pub_hex: str, session_nonce: str) -> str:
67
+ """Stable, unique-per-session replica id from author key + session nonce."""
68
+ return f"{author_pub_hex}:{session_nonce}"
@@ -0,0 +1,237 @@
1
+ """The deterministic, op-based CRDT at the heart of ``starfish-wal``.
2
+
3
+ This is the Python mirror of ``packages/ts/wal/src/crdt.ts``; both fold the same
4
+ ops to byte-identical materialized state, locked by
5
+ ``tests/test-vectors/wal-crdt.json``. The fold is commutative (order-independent)
6
+ and idempotent (re-applying an op is a structural no-op), so a server-reordered
7
+ or retried op-log converges without a dedup set.
8
+
9
+ Two CRDT shapes, addressed by name within one document:
10
+
11
+ * **LWW typed register** (``set`` / ``del``) — objects / scalar fields; the value
12
+ is opaque JSON written whole, highest ``(clock)`` wins, ties broken on replica
13
+ id.
14
+ * **RGA sequence** (``ins`` / ``rmv``) — ordered lists, and **text** as a
15
+ sequence of single-character values.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from typing import Any, Iterable
21
+
22
+ from .clock import Clock, _cmp_str, clock_greater, compare_clocks
23
+
24
+ # An op is a JSON dict discriminated by ``t``: "set" | "del" | "ins" | "rmv".
25
+ Op = dict[str, Any]
26
+
27
+
28
+ class _Reg:
29
+ __slots__ = ("clock", "value", "deleted")
30
+
31
+ def __init__(self, clock: Clock, value: Any, deleted: bool) -> None:
32
+ self.clock = clock
33
+ self.value = value
34
+ self.deleted = deleted
35
+
36
+
37
+ class _Node:
38
+ __slots__ = ("id", "after", "clock", "value", "deleted", "pending")
39
+
40
+ def __init__(
41
+ self, id: str, after: str, clock: Clock, value: Any, deleted: bool, pending: bool
42
+ ) -> None:
43
+ self.id = id
44
+ self.after = after
45
+ self.clock = clock
46
+ self.value = value
47
+ self.deleted = deleted
48
+ # True for a tombstone created by a ``rmv`` whose ``ins`` has not yet
49
+ # arrived: its after/clock/value are placeholders the insert fills in.
50
+ self.pending = pending
51
+
52
+
53
+ class WalCrdt:
54
+ """A bag of named LWW registers and named RGA sequences."""
55
+
56
+ def __init__(self) -> None:
57
+ self._regs: dict[str, _Reg] = {}
58
+ self._lists: dict[str, dict[str, _Node]] = {}
59
+
60
+ # ── ingest ──────────────────────────────────────────────────────────────────
61
+
62
+ def apply(self, op: Op) -> None:
63
+ """Apply one op (commutative and idempotent)."""
64
+ kind = op["t"]
65
+ if kind == "set":
66
+ self._apply_reg(op["reg"], op["clock"], op["value"], False)
67
+ elif kind == "del":
68
+ self._apply_reg(op["reg"], op["clock"], None, True)
69
+ elif kind == "ins":
70
+ self._apply_ins(op)
71
+ elif kind == "rmv":
72
+ self._apply_rmv(op)
73
+
74
+ def fold(self, ops: Iterable[Op]) -> None:
75
+ for op in ops:
76
+ self.apply(op)
77
+
78
+ def _apply_reg(self, reg: str, clock: Clock, value: Any, deleted: bool) -> None:
79
+ cur = self._regs.get(reg)
80
+ # LWW: keep the highest clock; equal clock => identical op => no-op.
81
+ if cur is not None and not clock_greater(clock, cur.clock):
82
+ return
83
+ self._regs[reg] = _Reg(clock, value, deleted)
84
+
85
+ def _apply_ins(self, op: Op) -> None:
86
+ nodes = self._lists.setdefault(op["list"], {})
87
+ existing = nodes.get(op["id"])
88
+ if existing is not None:
89
+ # If a ``rmv`` arrived first it left a *pending* tombstone with no
90
+ # real position; the insert now supplies the true after/clock/value
91
+ # (the element stays deleted). This keeps the fold commutative under
92
+ # out-of-order delivery — the placeholder must not own the anchor.
93
+ # A non-pending hit is a verbatim replay => no-op.
94
+ if existing.pending:
95
+ existing.after = op["after"]
96
+ existing.clock = op["clock"]
97
+ existing.value = op["value"]
98
+ existing.pending = False
99
+ return
100
+ nodes[op["id"]] = _Node(op["id"], op["after"], op["clock"], op["value"], False, False)
101
+
102
+ def _apply_rmv(self, op: Op) -> None:
103
+ # The remove may be delivered before any insert; create the list so the
104
+ # *pending* tombstone placeholder survives until the insert fills in its
105
+ # real position (see _apply_ins). Its ``after: ""`` never owns the anchor.
106
+ nodes = self._lists.setdefault(op["list"], {})
107
+ node = nodes.get(op["id"])
108
+ if node is None:
109
+ nodes[op["id"]] = _Node(op["id"], "", op["clock"], None, True, True)
110
+ return
111
+ node.deleted = True
112
+
113
+ # ── projection ──────────────────────────────────────────────────────────────
114
+
115
+ def _list_order(self, nodes: dict[str, _Node]) -> list[_Node]:
116
+ # RGA: siblings sharing an ``after`` anchor are ordered by DESCENDING
117
+ # clock (newest-first); pre-order DFS from the head ("").
118
+ from functools import cmp_to_key
119
+
120
+ children: dict[str, list[_Node]] = {}
121
+ for node in nodes.values():
122
+ children.setdefault(node.after, []).append(node)
123
+ for bucket in children.values():
124
+ # Descending clock; break exact-clock ties on the unique element id so
125
+ # the order is total even for malformed ops with decoupled id/clock.
126
+ bucket.sort(
127
+ key=cmp_to_key(
128
+ lambda a, b: compare_clocks(b.clock, a.clock) or _cmp_str(b.id, a.id)
129
+ )
130
+ )
131
+
132
+ # Iterative pre-order DFS (an explicit stack, NOT recursion): a long
133
+ # linear chain — e.g. a multi-thousand-character text run — would
134
+ # otherwise blow Python's recursion limit. Push each bucket reversed so
135
+ # siblings pop in bucket order.
136
+ out: list[_Node] = []
137
+ stack: list[_Node] = list(reversed(children.get("", [])))
138
+ while stack:
139
+ node = stack.pop()
140
+ out.append(node)
141
+ stack.extend(reversed(children.get(node.id, [])))
142
+ return out
143
+
144
+ def list_values(self, list_name: str) -> list[Any]:
145
+ """Live element values of a named list, in RGA order."""
146
+ nodes = self._lists.get(list_name)
147
+ if not nodes:
148
+ return []
149
+ return [n.value for n in self._list_order(nodes) if not n.deleted]
150
+
151
+ def list_ids(self, list_name: str) -> list[str]:
152
+ """Live element ids of a named list, in RGA order."""
153
+ nodes = self._lists.get(list_name)
154
+ if not nodes:
155
+ return []
156
+ return [n.id for n in self._list_order(nodes) if not n.deleted]
157
+
158
+ def text(self, list_name: str) -> str:
159
+ """A named list materialized as a string (1-char element values)."""
160
+ return "".join(v for v in self.list_values(list_name) if isinstance(v, str))
161
+
162
+ def get_register(self, reg: str) -> Any:
163
+ cur = self._regs.get(reg)
164
+ if cur is None or cur.deleted:
165
+ return None
166
+ return cur.value
167
+
168
+ def materialize(self) -> dict[str, Any]:
169
+ """Project the current document (live registers + lists as arrays)."""
170
+ keys: set[str] = set()
171
+ for reg, st in self._regs.items():
172
+ if not st.deleted:
173
+ keys.add(reg)
174
+ keys.update(self._lists.keys())
175
+ out: dict[str, Any] = {}
176
+ for key in sorted(keys):
177
+ reg = self._regs.get(key)
178
+ if reg is not None and not reg.deleted:
179
+ out[key] = reg.value
180
+ else:
181
+ out[key] = self.list_values(key)
182
+ return out
183
+
184
+ # ── snapshot state ──────────────────────────────────────────────────────────
185
+
186
+ def list_names(self) -> list[str]:
187
+ """Names of all RGA lists currently present (live or tombstoned)."""
188
+ return sorted(self._lists.keys())
189
+
190
+ def export_state(self) -> dict[str, Any]:
191
+ """Export full CRDT state (tombstones included) for a snapshot. Clocks
192
+ are deep-copied so an exported state (and ``clone``) never aliases the
193
+ live document's nested clock dicts."""
194
+ regs = {
195
+ reg: {"clock": dict(st.clock), "value": st.value, "deleted": st.deleted}
196
+ for reg, st in self._regs.items()
197
+ }
198
+ lists = {
199
+ name: [
200
+ {
201
+ "id": n.id,
202
+ "after": n.after,
203
+ "clock": dict(n.clock),
204
+ "value": n.value,
205
+ "deleted": n.deleted,
206
+ "pending": n.pending,
207
+ }
208
+ for n in nodes.values()
209
+ ]
210
+ for name, nodes in self._lists.items()
211
+ }
212
+ return {"v": 1, "regs": regs, "lists": lists}
213
+
214
+ def import_state(self, state: dict[str, Any]) -> None:
215
+ """Replace state from a snapshot's ``state`` (bootstrap readers)."""
216
+ self._regs = {
217
+ reg: _Reg(dict(st["clock"]), st["value"], st["deleted"])
218
+ for reg, st in state["regs"].items()
219
+ }
220
+ self._lists = {}
221
+ for name, nodes in state["lists"].items():
222
+ self._lists[name] = {
223
+ n["id"]: _Node(
224
+ n["id"],
225
+ n["after"],
226
+ dict(n["clock"]),
227
+ n["value"],
228
+ n["deleted"],
229
+ n.get("pending", False),
230
+ )
231
+ for n in nodes
232
+ }
233
+
234
+ def clone(self) -> "WalCrdt":
235
+ c = WalCrdt()
236
+ c.import_state(self.export_state())
237
+ return c
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.4
2
+ Name: starfish-wal
3
+ Version: 3.0.0a20
4
+ Summary: Starfish WAL extension — append-only CRDT op-log document model (LWW register + RGA list + text) core, cross-language conformant with the TypeScript package
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: starfish-protocol
7
+ Provides-Extra: dev
8
+ Requires-Dist: pytest>=7.0; extra == "dev"
@@ -0,0 +1,13 @@
1
+ README.md
2
+ pyproject.toml
3
+ starfish_wal/__init__.py
4
+ starfish_wal/clock.py
5
+ starfish_wal/crdt.py
6
+ starfish_wal.egg-info/PKG-INFO
7
+ starfish_wal.egg-info/SOURCES.txt
8
+ starfish_wal.egg-info/dependency_links.txt
9
+ starfish_wal.egg-info/requires.txt
10
+ starfish_wal.egg-info/top_level.txt
11
+ tests/test_clock.py
12
+ tests/test_crdt.py
13
+ tests/test_vectors.py
@@ -0,0 +1,4 @@
1
+ starfish-protocol
2
+
3
+ [dev]
4
+ pytest>=7.0
@@ -0,0 +1 @@
1
+ starfish_wal
@@ -0,0 +1,36 @@
1
+ from starfish_wal import (
2
+ LamportClock,
3
+ clock_greater,
4
+ compare_clocks,
5
+ derive_replica_id,
6
+ )
7
+
8
+
9
+ def _sign(n: int) -> int:
10
+ return (n > 0) - (n < 0)
11
+
12
+
13
+ def test_orders_by_counter_first():
14
+ assert _sign(compare_clocks({"c": 1, "r": "z"}, {"c": 2, "r": "a"})) == -1
15
+ assert clock_greater({"c": 3, "r": "a"}, {"c": 2, "r": "z"})
16
+
17
+
18
+ def test_ties_break_on_replica_id():
19
+ assert _sign(compare_clocks({"c": 2, "r": "a"}, {"c": 2, "r": "b"})) == -1
20
+ assert _sign(compare_clocks({"c": 2, "r": "b"}, {"c": 2, "r": "a"})) == 1
21
+
22
+
23
+ def test_identical_clocks_compare_equal():
24
+ assert compare_clocks({"c": 7, "r": "abc"}, {"c": 7, "r": "abc"}) == 0
25
+
26
+
27
+ def test_lamport_tick_and_observe():
28
+ clk = LamportClock("r1")
29
+ assert clk.tick() == {"c": 1, "r": "r1"}
30
+ assert clk.tick() == {"c": 2, "r": "r1"}
31
+ clk.observe({"c": 10, "r": "other"})
32
+ assert clk.tick() == {"c": 11, "r": "r1"}
33
+
34
+
35
+ def test_replica_id_is_session_unique():
36
+ assert derive_replica_id("pub", "s1") != derive_replica_id("pub", "s2")
@@ -0,0 +1,179 @@
1
+ from starfish_wal import WalCrdt, compare_clocks
2
+ from functools import cmp_to_key
3
+
4
+
5
+ def _fold(ops):
6
+ c = WalCrdt()
7
+ c.fold(ops)
8
+ return c
9
+
10
+
11
+ def _perms(ops):
12
+ return [
13
+ ops,
14
+ list(reversed(ops)),
15
+ sorted(ops, key=cmp_to_key(lambda a, b: compare_clocks(a["clock"], b["clock"]))),
16
+ sorted(ops, key=cmp_to_key(lambda a, b: compare_clocks(b["clock"], a["clock"]))),
17
+ ]
18
+
19
+
20
+ def test_lww_highest_clock_wins():
21
+ ops = [
22
+ {"t": "set", "reg": "title", "clock": {"c": 1, "r": "a"}, "value": "draft"},
23
+ {"t": "set", "reg": "title", "clock": {"c": 2, "r": "a"}, "value": "final"},
24
+ {"t": "set", "reg": "title", "clock": {"c": 2, "r": "b"}, "value": "other"},
25
+ ]
26
+ for p in _perms(ops):
27
+ assert _fold(p).materialize() == {"title": "other"}
28
+
29
+
30
+ def test_delete_tombstone_and_resurrect():
31
+ ops = [
32
+ {"t": "set", "reg": "x", "clock": {"c": 1, "r": "a"}, "value": "v1"},
33
+ {"t": "del", "reg": "x", "clock": {"c": 2, "r": "a"}},
34
+ {"t": "set", "reg": "x", "clock": {"c": 3, "r": "a"}, "value": "v2"},
35
+ ]
36
+ for p in _perms(ops):
37
+ assert _fold(p).materialize() == {"x": "v2"}
38
+
39
+
40
+ def test_stale_delete_cannot_erase_newer_set():
41
+ c = _fold([
42
+ {"t": "set", "reg": "k", "clock": {"c": 5, "r": "a"}, "value": "keep"},
43
+ {"t": "del", "reg": "k", "clock": {"c": 2, "r": "b"}},
44
+ ])
45
+ assert c.get_register("k") == "keep"
46
+
47
+
48
+ def test_rga_concurrent_head_insert_tiebreak():
49
+ ops = [
50
+ {"t": "ins", "list": "l", "id": "1@a", "after": "", "clock": {"c": 1, "r": "a"}, "value": "A"},
51
+ {"t": "ins", "list": "l", "id": "1@b", "after": "", "clock": {"c": 1, "r": "b"}, "value": "B"},
52
+ {"t": "ins", "list": "l", "id": "2@a", "after": "1@a", "clock": {"c": 2, "r": "a"}, "value": "C"},
53
+ ]
54
+ for p in _perms(ops):
55
+ assert _fold(p).list_values("l") == ["B", "A", "C"]
56
+
57
+
58
+ def test_rga_delete_keeps_anchor():
59
+ ops = [
60
+ {"t": "ins", "list": "l", "id": "1@a", "after": "", "clock": {"c": 1, "r": "a"}, "value": "x"},
61
+ {"t": "ins", "list": "l", "id": "2@a", "after": "1@a", "clock": {"c": 2, "r": "a"}, "value": "y"},
62
+ {"t": "rmv", "list": "l", "id": "1@a", "clock": {"c": 3, "r": "a"}},
63
+ ]
64
+ for p in _perms(ops):
65
+ assert _fold(p).list_values("l") == ["y"]
66
+
67
+
68
+ def test_rga_insert_idempotent():
69
+ ins = {"t": "ins", "list": "l", "id": "1@a", "after": "", "clock": {"c": 1, "r": "a"}, "value": "x"}
70
+ assert _fold([ins, ins, ins]).list_values("l") == ["x"]
71
+
72
+
73
+ def test_remove_before_insert():
74
+ c = _fold([
75
+ {"t": "rmv", "list": "l", "id": "1@a", "clock": {"c": 2, "r": "a"}},
76
+ {"t": "ins", "list": "l", "id": "1@a", "after": "", "clock": {"c": 1, "r": "a"}, "value": "x"},
77
+ ])
78
+ assert c.list_values("l") == []
79
+
80
+
81
+ def test_remove_before_insert_with_live_descendant():
82
+ # Regression: the rmv-before-ins tombstone must not be mis-anchored at the
83
+ # head and drag its live subtree (3@a) to the wrong position.
84
+ ops = [
85
+ {"t": "ins", "list": "l", "id": "1@a", "after": "", "clock": {"c": 1, "r": "a"}, "value": "A"},
86
+ {"t": "ins", "list": "l", "id": "2@a", "after": "1@a", "clock": {"c": 2, "r": "a"}, "value": "B"},
87
+ {"t": "rmv", "list": "l", "id": "2@a", "clock": {"c": 3, "r": "a"}},
88
+ {"t": "ins", "list": "l", "id": "3@a", "after": "2@a", "clock": {"c": 4, "r": "a"}, "value": "C"},
89
+ ]
90
+ for p in _perms(ops):
91
+ assert _fold(p).list_values("l") == ["A", "C"]
92
+
93
+
94
+ def test_sibling_identical_clock_orders_by_id():
95
+ # Malformed ops (id decoupled from clock) sharing an exact clock must still
96
+ # converge via the id tie-break, independent of fold order.
97
+ ops = [
98
+ {"t": "ins", "list": "l", "id": "A", "after": "", "clock": {"c": 1, "r": "x"}, "value": "first"},
99
+ {"t": "ins", "list": "l", "id": "B", "after": "", "clock": {"c": 1, "r": "x"}, "value": "second"},
100
+ ]
101
+ assert _fold(ops).list_values("l") == ["second", "first"]
102
+ assert _fold(list(reversed(ops))).list_values("l") == ["second", "first"]
103
+
104
+
105
+ def test_text_materializes_as_string():
106
+ c = _fold([
107
+ {"t": "ins", "list": "t", "id": "1@a", "after": "", "clock": {"c": 1, "r": "a"}, "value": "h"},
108
+ {"t": "ins", "list": "t", "id": "2@a", "after": "1@a", "clock": {"c": 2, "r": "a"}, "value": "i"},
109
+ ])
110
+ assert c.text("t") == "hi"
111
+
112
+
113
+ def test_state_round_trip_with_tombstones():
114
+ src = _fold([
115
+ {"t": "set", "reg": "a", "clock": {"c": 1, "r": "a"}, "value": 1},
116
+ {"t": "ins", "list": "l", "id": "1@a", "after": "", "clock": {"c": 2, "r": "a"}, "value": "x"},
117
+ {"t": "rmv", "list": "l", "id": "1@a", "clock": {"c": 3, "r": "a"}},
118
+ ])
119
+ restored = WalCrdt()
120
+ restored.import_state(src.export_state())
121
+ assert restored.materialize() == src.materialize()
122
+ restored.apply({"t": "ins", "list": "l", "id": "2@a", "after": "1@a", "clock": {"c": 4, "r": "a"}, "value": "y"})
123
+ assert restored.list_values("l") == ["y"]
124
+
125
+
126
+ def test_clone_is_independent():
127
+ src = _fold([{"t": "set", "reg": "a", "clock": {"c": 1, "r": "a"}, "value": 1}])
128
+ copy = src.clone()
129
+ copy.apply({"t": "set", "reg": "a", "clock": {"c": 2, "r": "a"}, "value": 2})
130
+ assert src.get_register("a") == 1
131
+ assert copy.get_register("a") == 2
132
+
133
+
134
+ def test_export_deep_copies_clocks():
135
+ src = _fold([
136
+ {"t": "set", "reg": "a", "clock": {"c": 5, "r": "a"}, "value": 1},
137
+ {"t": "ins", "list": "l", "id": "1@a", "after": "", "clock": {"c": 6, "r": "a"}, "value": "x"},
138
+ ])
139
+ exported = src.export_state()
140
+ # Mutate the exported clocks in place — the live document must be unaffected.
141
+ exported["regs"]["a"]["clock"]["c"] = 999
142
+ exported["lists"]["l"][0]["clock"]["c"] = 999
143
+ after = src.export_state()
144
+ assert after["regs"]["a"]["clock"]["c"] == 5
145
+ assert after["lists"]["l"][0]["clock"]["c"] == 6
146
+
147
+
148
+ def test_export_state_idempotent_under_refold():
149
+ ops = [
150
+ {"t": "set", "reg": "a", "clock": {"c": 1, "r": "a"}, "value": 1},
151
+ {"t": "ins", "list": "l", "id": "1@a", "after": "", "clock": {"c": 2, "r": "a"}, "value": "x"},
152
+ {"t": "rmv", "list": "l", "id": "1@a", "clock": {"c": 3, "r": "a"}},
153
+ ]
154
+ c = WalCrdt()
155
+ c.fold(ops)
156
+ before = c.export_state()
157
+ c.fold(ops)
158
+ assert c.export_state() == before
159
+
160
+
161
+ def test_materializes_long_linear_chain_without_recursion_error():
162
+ # A long text run is a deep RGA chain; _list_order must not recurse per node
163
+ # (Python's default recursion limit is ~1000).
164
+ n = 50_000
165
+ ops = [
166
+ {
167
+ "t": "ins",
168
+ "list": "body",
169
+ "id": f"{i}@a",
170
+ "after": "" if i == 0 else f"{i - 1}@a",
171
+ "clock": {"c": i + 1, "r": "a"},
172
+ "value": "x",
173
+ }
174
+ for i in range(n)
175
+ ]
176
+ c = WalCrdt()
177
+ c.fold(ops)
178
+ assert len(c.list_values("body")) == n
179
+ assert len(c.text("body")) == n
@@ -0,0 +1,47 @@
1
+ """Cross-language conformance: the Python CRDT must fold the shared vectors to
2
+ the same materialized state as the TypeScript implementation."""
3
+
4
+ import json
5
+ from functools import cmp_to_key
6
+ from pathlib import Path
7
+
8
+ from starfish_wal import WalCrdt, compare_clocks
9
+
10
+ VECTORS = json.loads(
11
+ (Path(__file__).resolve().parents[4] / "tests" / "test-vectors" / "wal-crdt.json").read_text()
12
+ )
13
+
14
+
15
+ def _sign(n: int) -> int:
16
+ return (n > 0) - (n < 0)
17
+
18
+
19
+ def test_clock_total_order():
20
+ for case in VECTORS["clockOrder"]:
21
+ assert _sign(compare_clocks(case["a"], case["b"])) == case["sign"]
22
+
23
+
24
+ def test_fold_cases_converge_and_are_idempotent():
25
+ for f in VECTORS["fold"]:
26
+ ops = f["ops"]
27
+ expected = f["expected"]
28
+
29
+ forward = WalCrdt()
30
+ forward.fold(ops)
31
+ assert forward.materialize() == expected, f["name"]
32
+
33
+ reverse = WalCrdt()
34
+ reverse.fold(list(reversed(ops)))
35
+ assert reverse.materialize() == expected, f["name"]
36
+
37
+ by_clock = WalCrdt()
38
+ by_clock.fold(sorted(ops, key=cmp_to_key(lambda a, b: compare_clocks(a["clock"], b["clock"]))))
39
+ assert by_clock.materialize() == expected, f["name"]
40
+
41
+ twice = WalCrdt()
42
+ twice.fold(ops)
43
+ twice.fold(ops)
44
+ assert twice.materialize() == expected, f["name"]
45
+
46
+ for list_name, text in f.get("expectedText", {}).items():
47
+ assert forward.text(list_name) == text, f["name"]