starfish-replica 3.0.0a5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: starfish-replica
3
+ Version: 3.0.0a5
4
+ Summary: Starfish replication extension (primary→replica sync, write modes, on-pull/scheduled triggers, push proxy) as a server plugin
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: starfish-protocol
7
+ Requires-Dist: starfish-server
8
+ Requires-Dist: httpx>=0.27
9
+ Provides-Extra: dev
10
+ Requires-Dist: pytest>=7.0; extra == "dev"
11
+ Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
12
+ Requires-Dist: respx>=0.23.1; extra == "dev"
@@ -0,0 +1,58 @@
1
+ # starfish-replica
2
+
3
+ Replication extension for [Starfish](https://github.com/Drakkar-Software/starfish). Lets you run
4
+ multiple Starfish servers that stay in sync: a **primary** holds the source of truth; **replicas**
5
+ pull from it and serve reads locally.
6
+
7
+ Shipped as a `ServerPlugin` — it owns its own config (the `remote` field is no longer part of the
8
+ core `CollectionConfig`).
9
+
10
+ ## Install
11
+
12
+ ```bash
13
+ pip install starfish-replica
14
+ ```
15
+
16
+ ## Usage
17
+
18
+ ```python
19
+ from starfish_server import create_sync_router, SyncRouterOptions
20
+ from starfish_replica import create_replica_server_plugin, RemoteConfig
21
+
22
+ replica = create_replica_server_plugin(
23
+ store=store,
24
+ sync_config=config,
25
+ collections={
26
+ # keyed by root collection name
27
+ "posts": RemoteConfig(
28
+ url="https://primary.example.com/v1",
29
+ pullPath="/pull/posts/featured",
30
+ interval_ms=60_000,
31
+ headers={"Authorization": "Bearer <replica-token>"},
32
+ write_mode="pull_only", # clients can't push to this replica
33
+ sync_triggers=["scheduled"], # or ["on_pull"]
34
+ ),
35
+ },
36
+ )
37
+
38
+ router = create_sync_router(SyncRouterOptions(
39
+ store=store,
40
+ config=config,
41
+ role_resolver=role_resolver,
42
+ plugins=[replica.plugin], # + other plugins
43
+ ))
44
+
45
+ await replica.manager.start() # begin scheduled / initial syncs
46
+ # on shutdown: register replica.plugin in GracefulShutdownOptions(plugins=[...])
47
+ ```
48
+
49
+ ## Write modes
50
+
51
+ | Mode | Client reads | Client writes | Syncs from primary |
52
+ | --- | --- | --- | --- |
53
+ | `pull_only` | ✓ | rejected (405) | ✓ replace |
54
+ | `push_through` | ✓ | forwarded to primary | ✓ replace |
55
+ | `bidirectional` | ✓ | stored locally | ✓ merge (remote-wins) |
56
+ | `push_only` | rejected (405) | stored locally | — |
57
+
58
+ `push_through` and `bidirectional` require `push_path`.
@@ -0,0 +1,29 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "starfish-replica"
7
+ version = "3.0.0a5"
8
+ description = "Starfish replication extension (primary→replica sync, write modes, on-pull/scheduled triggers, push proxy) as a server plugin"
9
+ requires-python = ">=3.11"
10
+ dependencies = [
11
+ "starfish-protocol",
12
+ "starfish-server",
13
+ "httpx>=0.27",
14
+ ]
15
+
16
+ [project.optional-dependencies]
17
+ dev = [
18
+ "pytest>=7.0",
19
+ "pytest-asyncio>=0.21",
20
+ "respx>=0.23.1",
21
+ ]
22
+
23
+ [tool.uv.sources]
24
+ starfish-protocol = { path = "../protocol", editable = true }
25
+ starfish-server = { path = "../server", editable = true }
26
+
27
+ [tool.pytest.ini_options]
28
+ asyncio_mode = "auto"
29
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,30 @@
1
+ """``starfish-replica`` — primary→replica replication extension.
2
+
3
+ Public surface: :class:`ReplicaManager` (the sync engine), the replica config
4
+ types (:class:`RemoteConfig`/:class:`WriteMode`/:class:`SyncTrigger`/
5
+ :class:`RemoteCollection`), :func:`validate_replica_config`, and
6
+ :func:`create_replica_server_plugin` — a ``ServerPlugin`` whose
7
+ ``before_pull``/``intercept_push`` hooks enforce write modes and proxy
8
+ push-through writes, and whose ``shutdown`` hook stops the sync timers.
9
+ """
10
+
11
+ from starfish_replica.config import (
12
+ RemoteCollection,
13
+ RemoteConfig,
14
+ SyncTrigger,
15
+ WriteMode,
16
+ )
17
+ from starfish_replica.manager import ReplicaManager
18
+ from starfish_replica.plugin import ReplicaServerPlugin, create_replica_server_plugin
19
+ from starfish_replica.validate import validate_replica_config
20
+
21
+ __all__ = [
22
+ "RemoteCollection",
23
+ "RemoteConfig",
24
+ "SyncTrigger",
25
+ "WriteMode",
26
+ "ReplicaManager",
27
+ "ReplicaServerPlugin",
28
+ "create_replica_server_plugin",
29
+ "validate_replica_config",
30
+ ]
@@ -0,0 +1,92 @@
1
+ """Replica configuration types. Owned by the replica plugin — apps pass a
2
+ ``{ collection_name: RemoteConfig }`` map to ``create_replica_server_plugin``.
3
+
4
+ (Moved out of ``starfish-server``'s ``CollectionConfig`` so the core schema no
5
+ longer knows about replication — mirrors how ``QueueConfig`` lives in
6
+ ``starfish-queuing``.)
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass
12
+ from enum import StrEnum
13
+
14
+ from pydantic import BaseModel, Field
15
+
16
+
17
+ class WriteMode(StrEnum):
18
+ """Controls how local client writes are handled on a replica collection."""
19
+
20
+ PULL_ONLY = "pull_only"
21
+ """Only the ReplicaManager writes locally; local client pushes are rejected (405)."""
22
+
23
+ PUSH_THROUGH = "push_through"
24
+ """Local client pushes are forwarded to the primary; the replica syncs back afterwards."""
25
+
26
+ BIDIRECTIONAL = "bidirectional"
27
+ """Local client pushes are stored locally and merged (remote-wins) with the primary on sync."""
28
+
29
+ PUSH_ONLY = "push_only"
30
+ """Local client pushes are stored locally; pull requests are rejected (405).
31
+ The replica does not sync from the primary — data is managed entirely locally."""
32
+
33
+
34
+ class SyncTrigger(StrEnum):
35
+ """Events that trigger a sync from the primary."""
36
+
37
+ SCHEDULED = "scheduled"
38
+ """Sync on a fixed interval (``interval_ms``)."""
39
+
40
+ ON_PULL = "on_pull"
41
+ """Sync before serving each local ``GET /pull/…`` request (lazy / always-fresh)."""
42
+
43
+
44
+ class RemoteConfig(BaseModel):
45
+ """Declares that a collection should be replicated from a remote (primary) starfish server."""
46
+
47
+ model_config = {"populate_by_name": True}
48
+
49
+ url: str
50
+ """Base URL of the primary starfish server, e.g. ``https://primary.example.com/v1``."""
51
+
52
+ pull_path: str = Field(alias="pullPath")
53
+ """Pull endpoint path on the primary, e.g. ``/pull/posts/featured``.
54
+ Must be a static path — no template variables."""
55
+
56
+ push_path: str | None = Field(default=None, alias="pushPath")
57
+ """Push endpoint path on the primary. Required for ``push_through`` and ``bidirectional`` write modes."""
58
+
59
+ interval_ms: int = Field(default=60_000, gt=0, alias="intervalMs")
60
+ """Sync interval in milliseconds (used by the ``scheduled`` trigger). Defaults to 60 000 ms."""
61
+
62
+ headers: dict[str, str] = Field(default_factory=dict)
63
+ """Static HTTP headers sent to the primary on every request (e.g. ``Authorization: Bearer <token>``).
64
+ These credentials must satisfy the primary collection's ``readRoles`` (and ``writeRoles`` for write-through)."""
65
+
66
+ write_mode: WriteMode = Field(default=WriteMode.PULL_ONLY, alias="writeMode")
67
+ """How local client writes are handled. Defaults to ``pull_only``."""
68
+
69
+ sync_triggers: list[SyncTrigger] = Field(
70
+ default_factory=lambda: [SyncTrigger.SCHEDULED],
71
+ alias="syncTriggers",
72
+ )
73
+ """Which events trigger a sync from the primary. Defaults to ``[scheduled]``."""
74
+
75
+ on_pull_min_interval_ms: int | None = Field(default=None, gt=0, alias="onPullMinIntervalMs")
76
+ """Minimum time in milliseconds between two consecutive syncs triggered by ``on_pull``.
77
+
78
+ When a client pulls and this cooldown has not elapsed since the last sync, the replica
79
+ skips the round-trip to the primary and serves the locally cached data instead.
80
+
81
+ ``None`` (default) means every ``on_pull`` request always syncs from the primary.
82
+ Only relevant when ``on_pull`` is listed in ``sync_triggers``."""
83
+
84
+
85
+ @dataclass
86
+ class RemoteCollection:
87
+ """A collection to replicate: the manager needs its name (route key), its
88
+ static storage path (document key), and its :class:`RemoteConfig`."""
89
+
90
+ name: str
91
+ storage_path: str
92
+ remote: RemoteConfig
@@ -0,0 +1,243 @@
1
+ """Replica manager — scheduled and on-demand sync from a remote primary starfish."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import json
7
+ import logging
8
+ import time
9
+ from collections.abc import Callable
10
+ from typing import Any
11
+
12
+ import httpx
13
+
14
+ from starfish_protocol.merge import deep_merge
15
+ from starfish_server.protocol.push import push
16
+ from starfish_server.router.helpers import deep_sanitize
17
+ from starfish_server.protocol.types import PushSuccess
18
+ from starfish_server.storage.base import AbstractObjectStore
19
+
20
+ from starfish_replica.config import RemoteCollection, RemoteConfig, SyncTrigger, WriteMode
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class ReplicaManager:
26
+ """Manages replication from remote (primary) starfish servers.
27
+
28
+ For each :class:`RemoteCollection`, syncs data from the primary to local
29
+ storage. Write mode, sync triggers, and interval are driven by config.
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ store: AbstractObjectStore,
35
+ collections: list[RemoteCollection],
36
+ *,
37
+ client: httpx.AsyncClient | None = None,
38
+ on_error: Callable[[str, Exception], None] | None = None,
39
+ ) -> None:
40
+ self._store = store
41
+ self._remote_cols = list(collections)
42
+ self._owned_client = client is None
43
+ self._client = client or httpx.AsyncClient(timeout=30.0)
44
+ self._on_error = on_error or (
45
+ lambda name, exc: logger.error("[ReplicaManager] %s: %s", name, exc)
46
+ )
47
+ self._last_hash: dict[str, str] = {}
48
+ self._last_sync_at: dict[str, float] = {}
49
+ self._tasks: list[asyncio.Task[None]] = []
50
+
51
+ def remote_for(self, name: str) -> RemoteConfig | None:
52
+ """The :class:`RemoteConfig` for a collection name, or ``None`` if not replicated."""
53
+ col = self._find(name)
54
+ return col.remote if col else None
55
+
56
+ async def start(self) -> None:
57
+ """Start background sync tasks for all remote collections."""
58
+ for col in self._remote_cols:
59
+ remote = col.remote
60
+
61
+ if SyncTrigger.SCHEDULED in remote.sync_triggers:
62
+ task = asyncio.create_task(self._run_loop(col))
63
+ self._tasks.append(task)
64
+ else:
65
+ asyncio.create_task(self._sync_safe(col))
66
+
67
+ async def stop(self) -> None:
68
+ """Cancel all background tasks and close the HTTP client (if owned)."""
69
+ for task in self._tasks:
70
+ task.cancel()
71
+ if self._tasks:
72
+ await asyncio.gather(*self._tasks, return_exceptions=True)
73
+ self._tasks.clear()
74
+ if self._owned_client:
75
+ await self._client.aclose()
76
+
77
+ async def on_pull(self, collection_name: str) -> None:
78
+ """Called by the pull route when ``on_pull`` is listed in ``sync_triggers``.
79
+
80
+ Awaited before the local store is read, ensuring the response is fresh.
81
+ If ``on_pull_min_interval_ms`` is configured and the last sync occurred within
82
+ that window, the primary is not contacted and cached local data is served instead.
83
+ """
84
+ col = self._find(collection_name)
85
+ if col is None:
86
+ return
87
+
88
+ min_interval_ms = col.remote.on_pull_min_interval_ms
89
+ if min_interval_ms is not None:
90
+ last = self._last_sync_at.get(collection_name)
91
+ if last is not None and (time.monotonic() - last) * 1000 < min_interval_ms:
92
+ return # within cooldown — serve cached local data
93
+
94
+ await self._sync_safe(col)
95
+
96
+ async def sync_now(self, name: str) -> None:
97
+ """Trigger an immediate sync for a single collection by name."""
98
+ col = self._find(name)
99
+ if col is None:
100
+ raise ValueError(f"[ReplicaManager] Unknown remote collection: {name!r}")
101
+ await self._do_sync(col)
102
+
103
+ async def sync_all(self) -> None:
104
+ """Trigger an immediate sync for all remote collections in parallel."""
105
+ await asyncio.gather(*(self._sync_safe(col) for col in self._remote_cols))
106
+
107
+ async def proxy_push(self, name: str, raw_body: bytes | str) -> tuple[int, Any]:
108
+ """Forward a client push to the primary (write_mode ``push_through``).
109
+
110
+ Returns ``(status, body)`` to relay to the client. On success, triggers
111
+ a background sync so the local replica catches up. Framework-neutral —
112
+ the caller (replica plugin) turns this into an HTTP response.
113
+ """
114
+ col = self._find(name)
115
+ if col is None:
116
+ return 404, {"error": f"Unknown remote collection: {name!r}"}
117
+ remote = col.remote
118
+ primary_url = f"{remote.url.rstrip('/')}{remote.push_path}"
119
+ headers = {
120
+ "Content-Type": "application/json",
121
+ "Accept": "application/json",
122
+ **remote.headers,
123
+ }
124
+
125
+ try:
126
+ resp = await self._client.post(primary_url, content=raw_body, headers=headers)
127
+ except httpx.HTTPError as exc:
128
+ logger.error("Failed to reach primary for %r: %s", name, exc)
129
+ return 502, {"error": "Failed to reach primary"}
130
+
131
+ if resp.status_code == 409:
132
+ return 409, {"error": "hash_mismatch"}
133
+ if not resp.is_success:
134
+ return resp.status_code, {"error": f"Primary returned {resp.status_code}"}
135
+
136
+ body = resp.json()
137
+
138
+ # Validate the primary's response shape before relaying it to our client.
139
+ # A successful push returns ``{ hash, timestamp }``; refuse to forward an
140
+ # arbitrary/garbage body a compromised or misbehaving primary might send.
141
+ if not isinstance(body, dict) or not isinstance(body.get("hash"), str):
142
+ logger.error("Primary returned an unexpected push response shape for %r", name)
143
+ return 502, {"error": "Primary returned an unexpected response"}
144
+
145
+ # Trigger sync in background (don't await)
146
+ task = asyncio.create_task(self.sync_now(name))
147
+ task.add_done_callback(
148
+ lambda t: logger.error("replica sync_now failed for %r: %s", name, t.exception())
149
+ if not t.cancelled() and t.exception() is not None
150
+ else None
151
+ )
152
+
153
+ return resp.status_code, body
154
+
155
+ def _find(self, name: str) -> RemoteCollection | None:
156
+ return next((c for c in self._remote_cols if c.name == name), None)
157
+
158
+ async def _run_loop(self, col: RemoteCollection) -> None:
159
+ interval = col.remote.interval_ms / 1000
160
+ while True:
161
+ await self._sync_safe(col)
162
+ await asyncio.sleep(interval)
163
+
164
+ async def _sync_safe(self, col: RemoteCollection) -> None:
165
+ try:
166
+ await self._do_sync(col)
167
+ except Exception as exc: # noqa: BLE001
168
+ self._on_error(col.name, exc)
169
+
170
+ async def _do_sync(self, col: RemoteCollection) -> None:
171
+ remote = col.remote
172
+
173
+ if remote.write_mode == WriteMode.PUSH_ONLY:
174
+ return
175
+
176
+ document_key = col.storage_path
177
+
178
+ primary_url = f"{remote.url.rstrip('/')}{remote.pull_path}"
179
+ resp = await self._client.get(
180
+ primary_url,
181
+ headers={"Accept": "application/json", **remote.headers},
182
+ )
183
+ resp.raise_for_status()
184
+ pulled: dict[str, Any] = resp.json()
185
+
186
+ primary_hash: str = pulled.get("hash", "")
187
+ primary_data: dict[str, Any] = pulled.get("data", {})
188
+
189
+ if not primary_hash:
190
+ return
191
+
192
+ if self._last_hash.get(col.name) == primary_hash:
193
+ return
194
+
195
+ raw_local = await self._store.get_string(document_key)
196
+ current_local_hash: str = ""
197
+ current_local_data: dict[str, Any] = {}
198
+ if raw_local:
199
+ try:
200
+ local_doc = json.loads(raw_local)
201
+ current_local_hash = local_doc.get("hash", "")
202
+ current_local_data = local_doc.get("data", {})
203
+ except json.JSONDecodeError as exc:
204
+ logger.error(
205
+ "[ReplicaManager] Corrupt local document at %r — treating as empty: %s",
206
+ document_key, exc,
207
+ )
208
+ # current_local_hash stays "" — push with baseHash="" will overwrite
209
+
210
+ if current_local_hash == primary_hash:
211
+ self._last_hash[col.name] = primary_hash
212
+ return
213
+
214
+ if remote.write_mode == WriteMode.BIDIRECTIONAL and current_local_data:
215
+ data_to_write = deep_merge(current_local_data, primary_data)
216
+ else:
217
+ data_to_write = primary_data
218
+
219
+ # Strip prototype-pollution keys before writing primary data into the
220
+ # local store. The bidirectional merge drops them via deep_merge, but the
221
+ # pull-only / push-through path writes the primary's ``data`` verbatim and
222
+ # must not trust it — a compromised primary could otherwise plant a
223
+ # ``__proto__`` / ``__class__`` payload.
224
+ sanitized = deep_sanitize(data_to_write)
225
+
226
+ # Use current_local_hash directly ("" works for both "no document" and
227
+ # "corrupt document"): push() treats base_hash="" the same as no hash when
228
+ # the stored current_hash is also "". Must NOT coerce "" → None — push()
229
+ # rejects base_hash=None when a (corrupt) doc is present, which would leave
230
+ # a corrupt local doc permanently unrecoverable (sync would raise
231
+ # "Concurrent write" forever). A valid local doc still yields its real
232
+ # hash, so genuine concurrent-write detection is preserved.
233
+ base_hash = current_local_hash
234
+ result = await push(self._store, document_key, sanitized, base_hash)
235
+
236
+ if not isinstance(result, PushSuccess):
237
+ raise RuntimeError(
238
+ f"[ReplicaManager] Concurrent write on {col.name!r} — will retry"
239
+ )
240
+
241
+ self._last_hash[col.name] = result.hash
242
+ self._last_sync_at[col.name] = time.monotonic()
243
+ logger.debug("[ReplicaManager] Synced %r (hash=%s)", col.name, result.hash)
@@ -0,0 +1,111 @@
1
+ """Server plugin for the replication extension (Python mirror).
2
+
3
+ Implements the route hooks from the ``ServerPlugin`` contract:
4
+ - ``before_pull``: rejects pulls on write-only (``push_only``) collections, and
5
+ triggers a sync from the primary when the ``on_pull`` trigger is configured.
6
+ - ``intercept_push``: rejects pushes on read-only (``pull_only``) collections,
7
+ and proxies the push to the primary when the write mode is ``push_through``.
8
+ - ``shutdown``: stops the manager's background tasks.
9
+
10
+ Like ``starfish-queuing``, this plugin owns its config: apps pass a
11
+ ``{ collection_name: RemoteConfig }`` map; the field is no longer part of the
12
+ core ``CollectionConfig``. The factory validates the config at construction and
13
+ raises on conflict.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from collections.abc import Callable, Mapping
19
+ from dataclasses import dataclass
20
+
21
+ import httpx
22
+
23
+ from starfish_protocol.plugins import (
24
+ PullHookContext,
25
+ PullHookResult,
26
+ PushHookContext,
27
+ PushHookResult,
28
+ ServerPlugin,
29
+ )
30
+ from starfish_server.config.schema import SyncConfig
31
+ from starfish_server.storage.base import AbstractObjectStore
32
+
33
+ from starfish_replica.config import RemoteCollection, RemoteConfig, SyncTrigger, WriteMode
34
+ from starfish_replica.manager import ReplicaManager
35
+ from starfish_replica.validate import validate_replica_config
36
+
37
+
38
+ @dataclass
39
+ class ReplicaServerPlugin:
40
+ """Bundle returned by :func:`create_replica_server_plugin`.
41
+
42
+ Pass ``.plugin`` to ``SyncRouterOptions(plugins=[...])`` and call
43
+ ``await .manager.start()`` to begin scheduled/initial syncs.
44
+ """
45
+
46
+ plugin: ServerPlugin
47
+ manager: ReplicaManager
48
+
49
+
50
+ def create_replica_server_plugin(
51
+ *,
52
+ store: AbstractObjectStore,
53
+ sync_config: SyncConfig,
54
+ collections: Mapping[str, RemoteConfig],
55
+ client: httpx.AsyncClient | None = None,
56
+ on_error: Callable[[str, Exception], None] | None = None,
57
+ ) -> ReplicaServerPlugin:
58
+ """Build a replica :class:`ServerPlugin` and its :class:`ReplicaManager`.
59
+
60
+ Validates the config (cross-referencing *collections* against
61
+ *sync_config*) and raises ``ValueError`` on conflict.
62
+ """
63
+ errors = validate_replica_config(sync_config, dict(collections))
64
+ if errors:
65
+ joined = "\n- ".join(errors)
66
+ raise ValueError(f"[starfish-replica] invalid configuration:\n- {joined}")
67
+
68
+ by_name = {c.name: c for c in sync_config.collections}
69
+ remote_cols = [
70
+ RemoteCollection(name=name, storage_path=by_name[name].storage_path, remote=remote)
71
+ for name, remote in collections.items()
72
+ ]
73
+
74
+ manager = ReplicaManager(store, remote_cols, client=client, on_error=on_error)
75
+
76
+ async def _before_pull(ctx: PullHookContext) -> PullHookResult:
77
+ remote = manager.remote_for(ctx.collection)
78
+ if remote is None:
79
+ return PullHookResult(action="proceed")
80
+ if remote.write_mode == WriteMode.PUSH_ONLY:
81
+ return PullHookResult(
82
+ action="reject", status=405, error="This collection is write-only on this server"
83
+ )
84
+ if SyncTrigger.ON_PULL in remote.sync_triggers:
85
+ await manager.on_pull(ctx.collection)
86
+ return PullHookResult(action="proceed")
87
+
88
+ async def _intercept_push(ctx: PushHookContext) -> PushHookResult:
89
+ remote = manager.remote_for(ctx.collection)
90
+ if remote is None:
91
+ return PushHookResult(action="proceed")
92
+ if remote.write_mode == WriteMode.PULL_ONLY:
93
+ return PushHookResult(
94
+ action="reject", status=405, error="This collection is read-only on this server"
95
+ )
96
+ if remote.write_mode == WriteMode.PUSH_THROUGH:
97
+ status, body = await manager.proxy_push(ctx.collection, ctx.raw_body)
98
+ return PushHookResult(action="respond", status=status, body=body)
99
+ # bidirectional / push_only → store locally, then sync reconciles
100
+ return PushHookResult(action="proceed")
101
+
102
+ async def _shutdown() -> None:
103
+ await manager.stop()
104
+
105
+ plugin = ServerPlugin(
106
+ name="starfish-replica",
107
+ before_pull=_before_pull,
108
+ intercept_push=_intercept_push,
109
+ shutdown=_shutdown,
110
+ )
111
+ return ReplicaServerPlugin(plugin=plugin, manager=manager)
@@ -0,0 +1,66 @@
1
+ """Replica config validation.
2
+
3
+ Cross-references the ``remotes`` map (collection name → :class:`RemoteConfig`)
4
+ against the server's :class:`SyncConfig` collections. These rules were
5
+ previously inline in ``starfish-server``'s config validator; they moved here
6
+ when the ``remote`` field left the core ``CollectionConfig``.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import re
12
+
13
+ from starfish_server.config.schema import CollectionConfig, SyncConfig
14
+ from starfish_server.constants import ENCRYPTION_DELEGATED
15
+
16
+ from starfish_replica.config import RemoteConfig, WriteMode
17
+
18
+ MIME_JSON = "application/json"
19
+
20
+
21
+ def _is_binary_collection(allowed_mime_types: list[str]) -> bool:
22
+ return MIME_JSON not in [m.lower() for m in allowed_mime_types]
23
+
24
+
25
+ def validate_replica_config(
26
+ config: SyncConfig,
27
+ remotes: dict[str, RemoteConfig],
28
+ ) -> list[str]:
29
+ """Validate the replica configuration. Returns error messages (empty = valid)."""
30
+ errors: list[str] = []
31
+ by_name: dict[str, CollectionConfig] = {c.name: c for c in config.collections}
32
+
33
+ for name, remote in remotes.items():
34
+ col = by_name.get(name)
35
+ if col is None:
36
+ errors.append(
37
+ f'Collection "{name}": remote replication configured for an unknown root collection'
38
+ )
39
+ continue
40
+
41
+ if col.append_only:
42
+ errors.append(f'Collection "{name}": appendOnly cannot be used with remote replication')
43
+ if _is_binary_collection(col.allowed_mime_types):
44
+ errors.append(f'Collection "{name}": binary collections cannot have remote replication')
45
+ if re.search(r"\{[^}]+\}", col.storage_path):
46
+ errors.append(
47
+ f'Collection "{name}": remote collections must have a static storagePath '
48
+ f'with no template variables (found "{col.storage_path}")'
49
+ )
50
+ if col.push_only:
51
+ errors.append(f'Collection "{name}": remote collections cannot be pushOnly')
52
+ if col.bundle:
53
+ errors.append(f'Collection "{name}": remote collections cannot be part of a bundle')
54
+ if col.encryption == ENCRYPTION_DELEGATED:
55
+ errors.append(
56
+ f'Collection "{name}": remote collections cannot use "{col.encryption}" encryption '
57
+ f'(server cannot replicate opaque client-encrypted blobs)'
58
+ )
59
+ if remote.write_mode in (WriteMode.PUSH_THROUGH, WriteMode.BIDIRECTIONAL):
60
+ if not remote.push_path:
61
+ errors.append(
62
+ f'Collection "{name}": write_mode "{remote.write_mode.value}" '
63
+ f'requires remote.push_path to be set'
64
+ )
65
+
66
+ return errors
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: starfish-replica
3
+ Version: 3.0.0a5
4
+ Summary: Starfish replication extension (primary→replica sync, write modes, on-pull/scheduled triggers, push proxy) as a server plugin
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: starfish-protocol
7
+ Requires-Dist: starfish-server
8
+ Requires-Dist: httpx>=0.27
9
+ Provides-Extra: dev
10
+ Requires-Dist: pytest>=7.0; extra == "dev"
11
+ Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
12
+ Requires-Dist: respx>=0.23.1; extra == "dev"
@@ -0,0 +1,15 @@
1
+ README.md
2
+ pyproject.toml
3
+ starfish_replica/__init__.py
4
+ starfish_replica/config.py
5
+ starfish_replica/manager.py
6
+ starfish_replica/plugin.py
7
+ starfish_replica/validate.py
8
+ starfish_replica.egg-info/PKG-INFO
9
+ starfish_replica.egg-info/SOURCES.txt
10
+ starfish_replica.egg-info/dependency_links.txt
11
+ starfish_replica.egg-info/requires.txt
12
+ starfish_replica.egg-info/top_level.txt
13
+ tests/test_config_validation.py
14
+ tests/test_manager.py
15
+ tests/test_plugin.py
@@ -0,0 +1,8 @@
1
+ starfish-protocol
2
+ starfish-server
3
+ httpx>=0.27
4
+
5
+ [dev]
6
+ pytest>=7.0
7
+ pytest-asyncio>=0.21
8
+ respx>=0.23.1
@@ -0,0 +1 @@
1
+ starfish_replica
@@ -0,0 +1,113 @@
1
+ """Tests for replica config validation (validate_replica_config)."""
2
+
3
+
4
+ from starfish_server.config.schema import CollectionConfig, SyncConfig
5
+
6
+ from starfish_replica.config import RemoteConfig, WriteMode
7
+ from starfish_replica.validate import validate_replica_config
8
+
9
+
10
+ def _col(**kwargs) -> CollectionConfig:
11
+ """Build a minimal collection config, overriding with kwargs."""
12
+ defaults = dict(
13
+ name="featured",
14
+ storagePath="posts/featured",
15
+ readRoles=["public"],
16
+ writeRoles=[],
17
+ encryption="none",
18
+ maxBodyBytes=65536,
19
+ pullOnly=True,
20
+ )
21
+ defaults.update(kwargs)
22
+ return CollectionConfig(**defaults)
23
+
24
+
25
+ def _config(*cols: CollectionConfig) -> SyncConfig:
26
+ return SyncConfig(version=1, collections=list(cols))
27
+
28
+
29
+ def _remote(**kwargs) -> RemoteConfig:
30
+ defaults = dict(
31
+ url="https://primary.example.com/v1",
32
+ pullPath="/pull/posts/featured",
33
+ intervalMs=30_000,
34
+ )
35
+ defaults.update(kwargs)
36
+ return RemoteConfig(**defaults)
37
+
38
+
39
+ def test_valid_remote_collection_passes():
40
+ errors = validate_replica_config(_config(_col()), {"featured": _remote()})
41
+ assert errors == []
42
+
43
+
44
+ def test_remote_for_unknown_collection_rejected():
45
+ errors = validate_replica_config(_config(_col()), {"ghost": _remote()})
46
+ assert any("unknown root collection" in e for e in errors)
47
+
48
+
49
+ def test_remote_with_template_vars_rejected():
50
+ col = _col(storagePath="users/{identity}/data")
51
+ errors = validate_replica_config(_config(col), {"featured": _remote()})
52
+ assert any("template variables" in e for e in errors)
53
+
54
+
55
+ def test_remote_push_only_rejected():
56
+ col = _col(pushOnly=True, pullOnly=None)
57
+ errors = validate_replica_config(_config(col), {"featured": _remote()})
58
+ assert any("pushOnly" in e for e in errors)
59
+
60
+
61
+ def test_remote_in_bundle_rejected():
62
+ col = _col(storagePath="users/shared/data", bundle="my-bundle")
63
+ errors = validate_replica_config(_config(col), {"featured": _remote()})
64
+ assert any("bundle" in e for e in errors)
65
+
66
+
67
+ def test_remote_delegated_encryption_rejected():
68
+ col = _col(encryption="delegated")
69
+ errors = validate_replica_config(_config(col), {"featured": _remote()})
70
+ assert any("delegated" in e for e in errors)
71
+
72
+
73
+ def test_appendonly_remote_rejected():
74
+ col = _col(appendOnly=True)
75
+ errors = validate_replica_config(_config(col), {"featured": _remote()})
76
+ assert any("appendOnly cannot be used with remote replication" in e for e in errors)
77
+
78
+
79
+ def test_binary_remote_rejected():
80
+ col = _col(allowedMimeTypes=["image/png"])
81
+ errors = validate_replica_config(_config(col), {"featured": _remote()})
82
+ assert any("binary collections cannot have remote replication" in e for e in errors)
83
+
84
+
85
+ def test_push_through_without_push_path_rejected():
86
+ errors = validate_replica_config(
87
+ _config(_col()), {"featured": _remote(writeMode=WriteMode.PUSH_THROUGH)}
88
+ )
89
+ assert any("push_path" in e for e in errors)
90
+
91
+
92
+ def test_bidirectional_without_push_path_rejected():
93
+ errors = validate_replica_config(
94
+ _config(_col()), {"featured": _remote(writeMode=WriteMode.BIDIRECTIONAL)}
95
+ )
96
+ assert any("push_path" in e for e in errors)
97
+
98
+
99
+ def test_push_through_with_push_path_passes():
100
+ errors = validate_replica_config(
101
+ _config(_col(pullOnly=None)),
102
+ {"featured": _remote(pushPath="/push/posts/featured", writeMode=WriteMode.PUSH_THROUGH)},
103
+ )
104
+ assert errors == []
105
+
106
+
107
+ def test_push_only_without_push_path_passes():
108
+ """PUSH_ONLY does not require push_path (writes are local-only, no proxying)."""
109
+ errors = validate_replica_config(
110
+ _config(_col(pullOnly=None)),
111
+ {"featured": _remote(writeMode=WriteMode.PUSH_ONLY)},
112
+ )
113
+ assert errors == []
@@ -0,0 +1,356 @@
1
+ """Tests for ReplicaManager — sync logic and write modes."""
2
+
3
+
4
+ import json
5
+ import time
6
+
7
+ import httpx
8
+ import pytest
9
+ import respx
10
+
11
+ from starfish_protocol.merge import deep_merge
12
+ from starfish_server.protocol.push import push
13
+ from starfish_replica.config import RemoteCollection, RemoteConfig, SyncTrigger, WriteMode
14
+ from starfish_replica.manager import ReplicaManager
15
+ from tests.helpers import MemoryObjectStore
16
+
17
+
18
+ def _make_col(
19
+ write_mode: WriteMode = WriteMode.PULL_ONLY,
20
+ sync_triggers: list[SyncTrigger] | None = None,
21
+ on_pull_min_interval_ms: int | None = None,
22
+ ) -> RemoteCollection:
23
+ return RemoteCollection(
24
+ name="featured",
25
+ storage_path="posts/featured",
26
+ remote=RemoteConfig(
27
+ url="https://primary.example.com/v1",
28
+ pull_path="/pull/posts/featured",
29
+ push_path="/push/posts/featured",
30
+ interval_ms=60_000,
31
+ write_mode=write_mode,
32
+ sync_triggers=sync_triggers or [SyncTrigger.SCHEDULED],
33
+ on_pull_min_interval_ms=on_pull_min_interval_ms,
34
+ ),
35
+ )
36
+
37
+
38
+ def _primary_response(data: dict, hash_val: str = "abc123", timestamp: int = 1000) -> dict:
39
+ return {"data": data, "hash": hash_val, "timestamp": timestamp}
40
+
41
+
42
+
43
+ def test_deep_merge_remote_wins_scalar():
44
+ result = deep_merge({"a": 1}, {"a": 2})
45
+ assert result == {"a": 2}
46
+
47
+
48
+ def test_deep_merge_adds_remote_keys():
49
+ result = deep_merge({"a": 1}, {"b": 2})
50
+ assert result == {"a": 1, "b": 2}
51
+
52
+
53
+ def test_deep_merge_recursive():
54
+ local = {"nested": {"x": 1, "y": 2}}
55
+ remote = {"nested": {"y": 99, "z": 3}}
56
+ result = deep_merge(local, remote)
57
+ assert result == {"nested": {"x": 1, "y": 99, "z": 3}}
58
+
59
+
60
+
61
+ @respx.mock
62
+ async def test_sync_writes_primary_data_to_store():
63
+ store = MemoryObjectStore()
64
+ col = _make_col()
65
+ respx.get("https://primary.example.com/v1/pull/posts/featured").respond(
66
+ 200, json=_primary_response({"title": "Hello"}, hash_val="hash1")
67
+ )
68
+
69
+ async with httpx.AsyncClient() as client:
70
+ manager = ReplicaManager(store, [col], client=client)
71
+ await manager.sync_now("featured")
72
+
73
+ raw = await store.get_string("posts/featured")
74
+ assert raw is not None
75
+ doc = json.loads(raw)
76
+ assert doc["data"] == {"title": "Hello"}
77
+
78
+
79
+ @respx.mock
80
+ async def test_sync_skips_write_when_hash_unchanged():
81
+ store = MemoryObjectStore()
82
+ col = _make_col()
83
+
84
+ # Pre-populate local store with same data
85
+ await push(store, "posts/featured", {"title": "Hello"}, None)
86
+ local_raw = await store.get_string("posts/featured")
87
+ local_hash = json.loads(local_raw)["hash"]
88
+
89
+ respx.get("https://primary.example.com/v1/pull/posts/featured").respond(
90
+ 200, json=_primary_response({"title": "Hello"}, hash_val=local_hash)
91
+ )
92
+
93
+ async with httpx.AsyncClient() as client:
94
+ manager = ReplicaManager(store, [col], client=client)
95
+ manager._last_hash["featured"] = local_hash # simulate already synced
96
+ await manager.sync_now("featured")
97
+
98
+ # Store content should be unchanged
99
+ raw = await store.get_string("posts/featured")
100
+ assert json.loads(raw)["data"] == {"title": "Hello"}
101
+
102
+
103
+ @respx.mock
104
+ async def test_sync_empty_primary_is_noop():
105
+ store = MemoryObjectStore()
106
+ col = _make_col()
107
+ respx.get("https://primary.example.com/v1/pull/posts/featured").respond(
108
+ 200, json={"data": {}, "hash": "", "timestamp": 1000}
109
+ )
110
+
111
+ async with httpx.AsyncClient() as client:
112
+ manager = ReplicaManager(store, [col], client=client)
113
+ await manager.sync_now("featured")
114
+
115
+ assert await store.get_string("posts/featured") is None
116
+
117
+
118
+ @respx.mock
119
+ async def test_sync_bidirectional_merges_local_and_remote():
120
+ store = MemoryObjectStore()
121
+ col = _make_col(write_mode=WriteMode.BIDIRECTIONAL)
122
+
123
+ # Local has {a: 1, b: 2}; primary has {b: 99, c: 3}
124
+ await push(store, "posts/featured", {"a": 1, "b": 2}, None)
125
+
126
+ respx.get("https://primary.example.com/v1/pull/posts/featured").respond(
127
+ 200, json=_primary_response({"b": 99, "c": 3}, hash_val="newhash")
128
+ )
129
+
130
+ async with httpx.AsyncClient() as client:
131
+ manager = ReplicaManager(store, [col], client=client)
132
+ await manager.sync_now("featured")
133
+
134
+ raw = await store.get_string("posts/featured")
135
+ doc = json.loads(raw)
136
+ # remote wins on b; local a survives; remote adds c
137
+ assert doc["data"]["a"] == 1
138
+ assert doc["data"]["b"] == 99
139
+ assert doc["data"]["c"] == 3
140
+
141
+
142
+ # Regression (parity with the TS test): a corrupt local replica document is
143
+ # recovered by overwriting it on sync. `_do_sync` passes `base_hash =
144
+ # current_local_hash` ("" for a corrupt/empty read); push() treats base_hash="" the
145
+ # same as no hash when current_hash is also "". Before the fix the manager coerced
146
+ # `current_local_hash if current_local_hash else None` → None, and push() rejects
147
+ # base_hash=None when a (corrupt) doc is present, so sync_now raised "Concurrent
148
+ # write" every cycle and the replica was permanently stuck.
149
+ @respx.mock
150
+ async def test_sync_recovers_from_a_corrupt_local_document():
151
+ store = MemoryObjectStore()
152
+ col = _make_col() # pull_only → writes primary data verbatim
153
+ await store.put("posts/featured", "{ this is not valid json") # left by a crash
154
+ respx.get("https://primary.example.com/v1/pull/posts/featured").respond(
155
+ 200, json=_primary_response({"x": 1}, hash_val="h1")
156
+ )
157
+ async with httpx.AsyncClient() as client:
158
+ manager = ReplicaManager(store, [col], client=client)
159
+ await manager.sync_now("featured") # must not raise — the corrupt doc is recoverable
160
+ doc = json.loads(await store.get_string("posts/featured"))
161
+ assert doc["data"] == {"x": 1}
162
+
163
+
164
+ @respx.mock
165
+ async def test_sync_bidirectional_converges_on_repeat():
166
+ # local and remote diverge; the merged doc's hash differs from the primary's,
167
+ # so each sync re-pulls and re-merges. Pin that the loop is stable: the second
168
+ # cycle produces the same data (no drift, no key loss/growth).
169
+ store = MemoryObjectStore()
170
+ col = _make_col(write_mode=WriteMode.BIDIRECTIONAL)
171
+ await push(store, "posts/featured", {"local": "value", "shared": "old"}, None)
172
+ respx.get("https://primary.example.com/v1/pull/posts/featured").respond(
173
+ 200, json=_primary_response({"remote": "value", "shared": "new"}, hash_val="remote-hash")
174
+ )
175
+ async with httpx.AsyncClient() as client:
176
+ manager = ReplicaManager(store, [col], client=client)
177
+ await manager.sync_now("featured")
178
+ after1 = json.loads(await store.get_string("posts/featured"))["data"]
179
+ await manager.sync_now("featured")
180
+ after2 = json.loads(await store.get_string("posts/featured"))["data"]
181
+ assert after2 == after1 # idempotent: no drift across cycles
182
+ assert after1 == {"local": "value", "remote": "value", "shared": "new"}
183
+
184
+
185
+ @respx.mock
186
+ async def test_on_pull_triggers_sync():
187
+ store = MemoryObjectStore()
188
+ col = _make_col(sync_triggers=[SyncTrigger.ON_PULL])
189
+ respx.get("https://primary.example.com/v1/pull/posts/featured").respond(
190
+ 200, json=_primary_response({"title": "Fresh"}, hash_val="freshhash")
191
+ )
192
+
193
+ async with httpx.AsyncClient() as client:
194
+ manager = ReplicaManager(store, [col], client=client)
195
+ await manager.on_pull("featured")
196
+
197
+ raw = await store.get_string("posts/featured")
198
+ assert json.loads(raw)["data"] == {"title": "Fresh"}
199
+
200
+
201
+ @respx.mock
202
+ async def test_primary_error_calls_on_error_handler():
203
+ """Background sync paths (on_pull) catch primary errors via on_error
204
+ rather than propagating them to the caller."""
205
+ store = MemoryObjectStore()
206
+ col = _make_col(sync_triggers=[SyncTrigger.ON_PULL])
207
+ respx.get("https://primary.example.com/v1/pull/posts/featured").respond(503)
208
+
209
+ errors: list[tuple[str, Exception]] = []
210
+
211
+ async with httpx.AsyncClient() as client:
212
+ manager = ReplicaManager(store, [col], client=client, on_error=lambda name, exc: errors.append((name, exc)))
213
+ await manager.on_pull("featured") # should not raise
214
+
215
+ assert len(errors) == 1
216
+ assert errors[0][0] == "featured"
217
+
218
+
219
+ async def test_sync_now_unknown_collection_raises():
220
+ store = MemoryObjectStore()
221
+ manager = ReplicaManager(store, [])
222
+ with pytest.raises(ValueError, match="Unknown remote collection"):
223
+ await manager.sync_now("nonexistent")
224
+
225
+
226
+
227
+ @respx.mock
228
+ async def test_on_pull_respects_cooldown():
229
+ """Second on_pull within cooldown window skips the primary."""
230
+ store = MemoryObjectStore()
231
+ col = _make_col(
232
+ sync_triggers=[SyncTrigger.ON_PULL],
233
+ on_pull_min_interval_ms=5_000, # 5-second cooldown
234
+ )
235
+ route = respx.get("https://primary.example.com/v1/pull/posts/featured").respond(
236
+ 200, json=_primary_response({"title": "Hello"}, hash_val="hash1")
237
+ )
238
+
239
+ async with httpx.AsyncClient() as client:
240
+ manager = ReplicaManager(store, [col], client=client)
241
+ await manager.on_pull("featured") # first call — hits primary
242
+ await manager.on_pull("featured") # within cooldown — should NOT hit primary
243
+
244
+ assert route.call_count == 1
245
+
246
+
247
+ @respx.mock
248
+ async def test_on_pull_syncs_after_cooldown_expires():
249
+ """on_pull syncs again once the cooldown has elapsed."""
250
+ store = MemoryObjectStore()
251
+ col = _make_col(
252
+ sync_triggers=[SyncTrigger.ON_PULL],
253
+ on_pull_min_interval_ms=1, # 1 ms — expires almost immediately
254
+ )
255
+ route = respx.get("https://primary.example.com/v1/pull/posts/featured").respond(
256
+ 200, json=_primary_response({"title": "Hello"}, hash_val="hash1")
257
+ )
258
+
259
+ async with httpx.AsyncClient() as client:
260
+ manager = ReplicaManager(store, [col], client=client)
261
+ await manager.on_pull("featured") # first sync
262
+ # Force last_sync_at into the past so the cooldown is expired
263
+ manager._last_sync_at["featured"] = time.monotonic() - 1.0
264
+ await manager.on_pull("featured") # cooldown elapsed — hits primary again
265
+
266
+ assert route.call_count == 2
267
+
268
+
269
+ @respx.mock
270
+ async def test_on_pull_no_cooldown_always_syncs():
271
+ """Without on_pull_min_interval_ms, every on_pull hits the primary."""
272
+ store = MemoryObjectStore()
273
+ col = _make_col(sync_triggers=[SyncTrigger.ON_PULL]) # no cooldown
274
+ route = respx.get("https://primary.example.com/v1/pull/posts/featured").respond(
275
+ 200, json=_primary_response({"title": "Hello"}, hash_val="hash1")
276
+ )
277
+
278
+ async with httpx.AsyncClient() as client:
279
+ manager = ReplicaManager(store, [col], client=client)
280
+ await manager.on_pull("featured")
281
+ await manager.on_pull("featured")
282
+
283
+ assert route.call_count == 2
284
+
285
+
286
+
287
+ @respx.mock
288
+ async def test_push_only_skips_sync_from_primary():
289
+ """PUSH_ONLY collections never pull from the primary."""
290
+ store = MemoryObjectStore()
291
+ col = _make_col(write_mode=WriteMode.PUSH_ONLY)
292
+ route = respx.get("https://primary.example.com/v1/pull/posts/featured").respond(
293
+ 200, json=_primary_response({"title": "Should not arrive"}, hash_val="x")
294
+ )
295
+
296
+ async with httpx.AsyncClient() as client:
297
+ manager = ReplicaManager(store, [col], client=client)
298
+ await manager.sync_now("featured")
299
+
300
+ # Primary should never have been contacted
301
+ assert route.call_count == 0
302
+ # Store should remain empty — no data pulled
303
+ assert await store.get_string("posts/featured") is None
304
+
305
+
306
+ @respx.mock
307
+ async def test_push_only_allows_local_writes():
308
+ """PUSH_ONLY collections accept local writes to the store."""
309
+ store = MemoryObjectStore()
310
+ _ = _make_col(write_mode=WriteMode.PUSH_ONLY)
311
+
312
+ # Simulate a local push (as the route handler would do)
313
+ from starfish_server.protocol.types import PushSuccess
314
+ result = await push(store, "posts/featured", {"submitted": True}, None)
315
+ assert isinstance(result, PushSuccess)
316
+
317
+ raw = await store.get_string("posts/featured")
318
+ assert json.loads(raw)["data"] == {"submitted": True}
319
+
320
+
321
+ @respx.mock
322
+ async def test_sync_strips_prototype_pollution_keys():
323
+ # A compromised primary must not be able to plant prototype-pollution keys
324
+ # into the local store via the pull-only (verbatim) write path.
325
+ store = MemoryObjectStore()
326
+ col = _make_col()
327
+ malicious = {"safe": 1, "__proto__": {"polluted": True}, "constructor": {"x": 1}}
328
+ respx.get("https://primary.example.com/v1/pull/posts/featured").respond(
329
+ 200, json=_primary_response(malicious, hash_val="hash1")
330
+ )
331
+ async with httpx.AsyncClient() as client:
332
+ manager = ReplicaManager(store, [col], client=client)
333
+ await manager.sync_now("featured")
334
+
335
+ raw = await store.get_string("posts/featured")
336
+ assert raw is not None
337
+ doc = json.loads(raw)
338
+ assert doc["data"]["safe"] == 1
339
+ assert "__proto__" not in doc["data"]
340
+ assert "constructor" not in doc["data"]
341
+
342
+
343
+ @respx.mock
344
+ async def test_proxy_push_rejects_unexpected_response_shape():
345
+ store = MemoryObjectStore()
346
+ col = _make_col(write_mode=WriteMode.PUSH_THROUGH)
347
+ # Primary replies 200 but with a body that is not a valid push result.
348
+ respx.post("https://primary.example.com/v1/push/posts/featured").respond(
349
+ 200, json={"unexpected": True}
350
+ )
351
+ async with httpx.AsyncClient() as client:
352
+ manager = ReplicaManager(store, [col], client=client)
353
+ status, _body = await manager.proxy_push(
354
+ "featured", json.dumps({"data": {"x": 1}, "baseHash": None})
355
+ )
356
+ assert status == 502
@@ -0,0 +1,151 @@
1
+ """Tests for create_replica_server_plugin — route hooks, validation, shutdown."""
2
+
3
+
4
+ import asyncio
5
+
6
+ import httpx
7
+ import pytest
8
+ import respx
9
+
10
+ from starfish_protocol.plugins import PullHookContext, PushHookContext
11
+ from starfish_server.config.schema import CollectionConfig, SyncConfig
12
+
13
+ from starfish_replica.config import RemoteConfig, SyncTrigger, WriteMode
14
+ from starfish_replica.plugin import create_replica_server_plugin
15
+ from tests.helpers import MemoryObjectStore
16
+
17
+
18
+ def _col(**kwargs) -> CollectionConfig:
19
+ defaults = dict(
20
+ name="featured",
21
+ storagePath="posts/featured",
22
+ readRoles=["public"],
23
+ writeRoles=[],
24
+ encryption="none",
25
+ maxBodyBytes=65536,
26
+ )
27
+ defaults.update(kwargs)
28
+ return CollectionConfig(**defaults)
29
+
30
+
31
+ def _config() -> SyncConfig:
32
+ return SyncConfig(version=1, collections=[_col()])
33
+
34
+
35
+ def _remote(**kwargs) -> RemoteConfig:
36
+ defaults = dict(url="https://primary.example.com/v1", pullPath="/pull/posts/featured")
37
+ defaults.update(kwargs)
38
+ return RemoteConfig(**defaults)
39
+
40
+
41
+ def test_invalid_config_raises_at_construction():
42
+ with pytest.raises(ValueError, match="invalid configuration"):
43
+ create_replica_server_plugin(
44
+ store=MemoryObjectStore(),
45
+ sync_config=_config(),
46
+ collections={"featured": _remote(writeMode=WriteMode.PUSH_THROUGH)}, # missing push_path
47
+ )
48
+
49
+
50
+ async def test_before_pull_rejects_push_only():
51
+ replica = create_replica_server_plugin(
52
+ store=MemoryObjectStore(),
53
+ sync_config=_config(),
54
+ collections={"featured": _remote(writeMode=WriteMode.PUSH_ONLY)},
55
+ )
56
+ res = await replica.plugin.before_pull(PullHookContext(collection="featured", params={}))
57
+ assert res.action == "reject"
58
+ assert res.status == 405
59
+ assert "write-only" in res.error
60
+
61
+
62
+ async def test_before_pull_proceeds_for_non_remote():
63
+ replica = create_replica_server_plugin(
64
+ store=MemoryObjectStore(),
65
+ sync_config=_config(),
66
+ collections={"featured": _remote()},
67
+ )
68
+ res = await replica.plugin.before_pull(PullHookContext(collection="other", params={}))
69
+ assert res.action == "proceed"
70
+
71
+
72
+ @respx.mock
73
+ async def test_before_pull_syncs_on_pull_trigger():
74
+ route = respx.get("https://primary.example.com/v1/pull/posts/featured").respond(
75
+ 200, json={"data": {"a": 1}, "hash": "h1", "timestamp": 1}
76
+ )
77
+ replica = create_replica_server_plugin(
78
+ store=MemoryObjectStore(),
79
+ sync_config=_config(),
80
+ collections={"featured": _remote(syncTriggers=[SyncTrigger.ON_PULL])},
81
+ )
82
+ res = await replica.plugin.before_pull(PullHookContext(collection="featured", params={}))
83
+ assert res.action == "proceed"
84
+ assert route.call_count == 1
85
+ await replica.manager.stop()
86
+
87
+
88
+ async def test_intercept_push_rejects_pull_only():
89
+ replica = create_replica_server_plugin(
90
+ store=MemoryObjectStore(),
91
+ sync_config=_config(),
92
+ collections={"featured": _remote(writeMode=WriteMode.PULL_ONLY)},
93
+ )
94
+ res = await replica.plugin.intercept_push(
95
+ PushHookContext(collection="featured", params={}, raw_body="{}")
96
+ )
97
+ assert res.action == "reject"
98
+ assert res.status == 405
99
+ assert "read-only" in res.error
100
+
101
+
102
+ @respx.mock
103
+ async def test_intercept_push_proxies_push_through():
104
+ respx.post("https://primary.example.com/v1/push/posts/featured").respond(
105
+ 200, json={"hash": "primary-hash", "timestamp": 5}
106
+ )
107
+ respx.get("https://primary.example.com/v1/pull/posts/featured").respond(
108
+ 200, json={"data": {}, "hash": "primary-hash", "timestamp": 5}
109
+ )
110
+ replica = create_replica_server_plugin(
111
+ store=MemoryObjectStore(),
112
+ sync_config=_config(),
113
+ collections={
114
+ "featured": _remote(writeMode=WriteMode.PUSH_THROUGH, pushPath="/push/posts/featured")
115
+ },
116
+ )
117
+ res = await replica.plugin.intercept_push(
118
+ PushHookContext(collection="featured", params={}, raw_body='{"data": {}}')
119
+ )
120
+ assert res.action == "respond"
121
+ assert res.status == 200
122
+ assert res.body == {"hash": "primary-hash", "timestamp": 5}
123
+ await asyncio.sleep(0) # let the background sync task run under respx
124
+ await replica.manager.stop()
125
+
126
+
127
+ async def test_intercept_push_proceeds_for_bidirectional():
128
+ replica = create_replica_server_plugin(
129
+ store=MemoryObjectStore(),
130
+ sync_config=_config(),
131
+ collections={
132
+ "featured": _remote(writeMode=WriteMode.BIDIRECTIONAL, pushPath="/push/posts/featured")
133
+ },
134
+ )
135
+ res = await replica.plugin.intercept_push(
136
+ PushHookContext(collection="featured", params={}, raw_body="{}")
137
+ )
138
+ assert res.action == "proceed"
139
+ await replica.manager.stop()
140
+
141
+
142
+ async def test_shutdown_stops_manager():
143
+ replica = create_replica_server_plugin(
144
+ store=MemoryObjectStore(),
145
+ sync_config=_config(),
146
+ collections={"featured": _remote(syncTriggers=[SyncTrigger.SCHEDULED])},
147
+ )
148
+ await replica.manager.start()
149
+ assert len(replica.manager._tasks) == 1
150
+ await replica.plugin.shutdown()
151
+ assert replica.manager._tasks == []