starfish-projection 3.0.0a17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- starfish_projection-3.0.0a17/PKG-INFO +12 -0
- starfish_projection-3.0.0a17/README.md +83 -0
- starfish_projection-3.0.0a17/pyproject.toml +29 -0
- starfish_projection-3.0.0a17/setup.cfg +4 -0
- starfish_projection-3.0.0a17/starfish_projection/__init__.py +37 -0
- starfish_projection-3.0.0a17/starfish_projection/config.py +63 -0
- starfish_projection-3.0.0a17/starfish_projection/plugin.py +157 -0
- starfish_projection-3.0.0a17/starfish_projection.egg-info/PKG-INFO +12 -0
- starfish_projection-3.0.0a17/starfish_projection.egg-info/SOURCES.txt +11 -0
- starfish_projection-3.0.0a17/starfish_projection.egg-info/dependency_links.txt +1 -0
- starfish_projection-3.0.0a17/starfish_projection.egg-info/requires.txt +8 -0
- starfish_projection-3.0.0a17/starfish_projection.egg-info/top_level.txt +1 -0
- starfish_projection-3.0.0a17/tests/test_plugin.py +304 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: starfish-projection
|
|
3
|
+
Version: 3.0.0a17
|
|
4
|
+
Summary: Starfish incremental-list extension (post-push projection hook: fold each write into a single list document — append, update-in-place, or remove)
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: starfish-protocol
|
|
7
|
+
Requires-Dist: starfish-server
|
|
8
|
+
Provides-Extra: dev
|
|
9
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
10
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
|
|
11
|
+
Requires-Dist: httpx>=0.25.0; extra == "dev"
|
|
12
|
+
Requires-Dist: fastapi>=0.100; extra == "dev"
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# starfish-projection
|
|
2
|
+
|
|
3
|
+
Incremental-list extension for Starfish (Python). After a successful push, the
|
|
4
|
+
server hands each registered plugin a `WriteEvent`; this plugin runs an
|
|
5
|
+
app-supplied **pure** mapping for each watched source collection and folds the
|
|
6
|
+
result into a single **list document** — appending a new entry, updating one in
|
|
7
|
+
place, or removing it. The client then pulls one document to read the whole list.
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```sh
|
|
12
|
+
pip install starfish-server starfish-projection
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Usage
|
|
16
|
+
|
|
17
|
+
```python
|
|
18
|
+
from starfish_server import create_sync_router, SyncRouterOptions
|
|
19
|
+
from starfish_projection import (
|
|
20
|
+
Projection, ProjectionSet, ProjectionRemove, create_projection_server_plugin,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
def project(e):
|
|
24
|
+
meta = e.body or {}
|
|
25
|
+
if meta.get("deleted") is True:
|
|
26
|
+
return ProjectionRemove(id=e.params["id"])
|
|
27
|
+
return ProjectionSet(id=e.params["id"], value={"name": meta.get("name")})
|
|
28
|
+
|
|
29
|
+
plugin = create_projection_server_plugin(
|
|
30
|
+
store=store,
|
|
31
|
+
projections=[
|
|
32
|
+
Projection(
|
|
33
|
+
source="products",
|
|
34
|
+
# One list document per tenant keeps each list bounded.
|
|
35
|
+
target=lambda e: f"catalog/{e.params['tenant']}",
|
|
36
|
+
project=project,
|
|
37
|
+
)
|
|
38
|
+
],
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
router = create_sync_router(
|
|
42
|
+
SyncRouterOptions(config=config, store=store, plugins=[plugin]),
|
|
43
|
+
)
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
`project(event)` returns one of:
|
|
47
|
+
|
|
48
|
+
- `ProjectionSet(id, value)` — **upsert**: append a new entry `{id, value}` to the
|
|
49
|
+
target list, or replace an existing entry's `value` in place (keeping its
|
|
50
|
+
position),
|
|
51
|
+
- `ProjectionRemove(id)` — **remove** the entry with this `id` (a no-op if absent).
|
|
52
|
+
There is no delete route on the server, so a removal is signalled by a normal
|
|
53
|
+
write whose body your mapping recognises as a deletion (a tombstone),
|
|
54
|
+
- `None` — **ignore** the event.
|
|
55
|
+
|
|
56
|
+
`project` may be sync or async. The list is stored as
|
|
57
|
+
`{"items": [{"id", "value"}, …]}` in insertion order — `id` is held alongside
|
|
58
|
+
`value`, never merged into it. `target` is a fixed storage key or a function of
|
|
59
|
+
the event; return `None` from the function to ignore the event, or a per-bucket
|
|
60
|
+
key to **shard** a large view into many small lists.
|
|
61
|
+
|
|
62
|
+
The list is written in-process against the store, never over HTTP, so the target
|
|
63
|
+
collection can be declared `pull_only=True` — clients read it, but only the
|
|
64
|
+
projection writes it.
|
|
65
|
+
|
|
66
|
+
### Concurrency & scale
|
|
67
|
+
|
|
68
|
+
Many source writes can target the same list at once, so each apply is a
|
|
69
|
+
compare-and-set loop: the plugin re-pulls the list, folds the entry in, and
|
|
70
|
+
pushes with the pulled hash; on a hash mismatch (a concurrent write landed first)
|
|
71
|
+
it re-pulls and re-applies rather than clobbering. No update is lost.
|
|
72
|
+
|
|
73
|
+
Every write rewrites and re-hashes the **whole** list document under one per-key
|
|
74
|
+
lock, and in-process pushes bypass the HTTP `max_body_bytes` limit, so an
|
|
75
|
+
unbounded list can grow without limit. Keep lists bounded — **shard** with a
|
|
76
|
+
`target` function (one list per tenant/bucket), and optionally set `max_items` as
|
|
77
|
+
a safety cap (once full, further appends are logged and dropped; updates and
|
|
78
|
+
removes still apply). `max_retries` (default 8) bounds the CAS loop.
|
|
79
|
+
|
|
80
|
+
Projection failures are logged and never break the originating client write.
|
|
81
|
+
|
|
82
|
+
See `docs/ts/projection/` for the full guide (the TypeScript and Python APIs
|
|
83
|
+
mirror each other).
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "starfish-projection"
|
|
7
|
+
version = "3.0.0a17"
|
|
8
|
+
description = "Starfish incremental-list extension (post-push projection hook: fold each write into a single list document — append, update-in-place, or remove)"
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"starfish-protocol",
|
|
12
|
+
"starfish-server",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
[project.optional-dependencies]
|
|
16
|
+
dev = [
|
|
17
|
+
"pytest>=7.0",
|
|
18
|
+
"pytest-asyncio>=0.21",
|
|
19
|
+
"httpx>=0.25.0",
|
|
20
|
+
"fastapi>=0.100",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[tool.uv.sources]
|
|
24
|
+
starfish-protocol = { path = "../protocol", editable = true }
|
|
25
|
+
starfish-server = { path = "../server", editable = true }
|
|
26
|
+
|
|
27
|
+
[tool.pytest.ini_options]
|
|
28
|
+
asyncio_mode = "auto"
|
|
29
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""``starfish-projection`` — incremental-list extension.
|
|
2
|
+
|
|
3
|
+
Public surface: the :class:`Projection` list spec and its outcome types
|
|
4
|
+
(:class:`ProjectionSet`, :class:`ProjectionRemove`), and
|
|
5
|
+
``create_projection_server_plugin`` — a ``ServerPlugin`` whose ``after_write``
|
|
6
|
+
hook folds each source write into a single target list document (append /
|
|
7
|
+
update-in-place / remove). Clients pull that one document to read the whole list.
|
|
8
|
+
Pair the target collection with ``pull_only=True`` so only the projection writes
|
|
9
|
+
it (clients read it only).
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from starfish_projection.config import (
|
|
13
|
+
Projection,
|
|
14
|
+
ProjectionOp,
|
|
15
|
+
ProjectionRemove,
|
|
16
|
+
ProjectionSet,
|
|
17
|
+
ProjectionTarget,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def __getattr__(name: str):
|
|
22
|
+
"""Lazy import of ``create_projection_server_plugin`` (keeps the
|
|
23
|
+
``starfish_server`` import off the hot path for import-only users)."""
|
|
24
|
+
if name == "create_projection_server_plugin":
|
|
25
|
+
from starfish_projection.plugin import create_projection_server_plugin as _p
|
|
26
|
+
return _p
|
|
27
|
+
raise AttributeError(f"module 'starfish_projection' has no attribute {name!r}")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"Projection",
|
|
32
|
+
"ProjectionSet",
|
|
33
|
+
"ProjectionRemove",
|
|
34
|
+
"ProjectionOp",
|
|
35
|
+
"ProjectionTarget",
|
|
36
|
+
"create_projection_server_plugin",
|
|
37
|
+
]
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Projection (incremental-list) configuration types."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Awaitable, Callable, Sequence
|
|
7
|
+
|
|
8
|
+
from starfish_protocol.plugins import WriteEvent
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ProjectionSet:
|
|
13
|
+
"""UPSERT outcome: append the entry ``{id, value}`` to the target list, or —
|
|
14
|
+
if an entry with this ``id`` already exists — replace its ``value`` in place
|
|
15
|
+
(keeping its position)."""
|
|
16
|
+
|
|
17
|
+
id: str
|
|
18
|
+
value: dict
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class ProjectionRemove:
|
|
23
|
+
"""REMOVE outcome: drop the entry with this ``id`` from the target list (a
|
|
24
|
+
no-op if absent). The server has no delete route, so a removal is signalled by
|
|
25
|
+
a normal write whose body your mapping recognises as a deletion (a
|
|
26
|
+
tombstone)."""
|
|
27
|
+
|
|
28
|
+
id: str
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# A projection function returns an upsert, a remove, or ``None`` (ignore).
|
|
32
|
+
ProjectionOp = ProjectionSet | ProjectionRemove | None
|
|
33
|
+
|
|
34
|
+
ProjectFn = Callable[[WriteEvent], "ProjectionOp | Awaitable[ProjectionOp]"]
|
|
35
|
+
|
|
36
|
+
# Where a projection writes its list: a fixed storage key, or a function of the
|
|
37
|
+
# event returning a key (route the entry into that list) or ``None`` (ignore).
|
|
38
|
+
ProjectionTarget = str | Callable[[WriteEvent], "str | None"]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class Projection:
|
|
43
|
+
"""A single projection list.
|
|
44
|
+
|
|
45
|
+
On every write to one of ``source`` collections, ``project`` derives an entry
|
|
46
|
+
op which the plugin folds into the target list document
|
|
47
|
+
(:class:`ProjectionSet` → append / update-in-place, :class:`ProjectionRemove`
|
|
48
|
+
→ remove, ``None`` → ignore). The plugin owns the read-modify-write against
|
|
49
|
+
the store — the app only supplies the pure mapping.
|
|
50
|
+
|
|
51
|
+
``project`` MUST be a pure function of the event: it receives the
|
|
52
|
+
:class:`WriteEvent` (carrying ``collection``, ``params``, optional ``body``,
|
|
53
|
+
``hash``, ``timestamp``, ``identity``). The server populates
|
|
54
|
+
``WriteEvent.body`` for JSON pushes; ``params`` is always present.
|
|
55
|
+
|
|
56
|
+
``target`` is a fixed storage key or a function of the event; return ``None``
|
|
57
|
+
from the function to ignore the event, or a per-bucket key to shard a large
|
|
58
|
+
view into many small lists (e.g. one per tenant).
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
source: str | Sequence[str]
|
|
62
|
+
target: ProjectionTarget
|
|
63
|
+
project: ProjectFn
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""Server plugin for the projection (incremental-list) extension (Python mirror).
|
|
2
|
+
|
|
3
|
+
Implements the ``after_write`` write-path hook from the ``ServerPlugin``
|
|
4
|
+
contract: after a successful push the server hands the plugin a
|
|
5
|
+
:class:`WriteEvent`; for any projection whose ``source`` includes the event's
|
|
6
|
+
collection, the plugin runs the app-supplied pure ``project(event)`` mapping and
|
|
7
|
+
folds its outcome into a single target *list document* — appending a new entry,
|
|
8
|
+
replacing an existing one in place (:class:`ProjectionSet`), or removing it
|
|
9
|
+
(:class:`ProjectionRemove`); ``None`` ignores the event. The app supplies only
|
|
10
|
+
the mapping; the plugin owns all store IO. The client then pulls one document to
|
|
11
|
+
read the whole list, rather than enumerating a directory of per-entry documents.
|
|
12
|
+
|
|
13
|
+
The list is written in-process, directly against the object store — never over
|
|
14
|
+
HTTP — so the target collection can be configured ``pull_only=True`` to reject
|
|
15
|
+
every *client* write while still being populated here. That ``pull_only`` + this
|
|
16
|
+
plugin is how a target list becomes "owned by the indexer": clients read it, but
|
|
17
|
+
only the projection writes it.
|
|
18
|
+
|
|
19
|
+
Concurrency: many source writes can target the same list at once, so each apply
|
|
20
|
+
is a CAS loop — pull the current list, fold the entry in, then ``push`` with the
|
|
21
|
+
pulled ``base_hash``. ``push`` rejects on a stale hash (optimistic concurrency),
|
|
22
|
+
so on conflict we re-pull and re-apply onto fresh state rather than clobbering a
|
|
23
|
+
concurrent write. The pull MUST happen inside the loop so each retry sees the
|
|
24
|
+
latest list. Failures are logged, never raised — ``after_write`` must not break
|
|
25
|
+
the originating client write (same contract as starfish-queuing).
|
|
26
|
+
|
|
27
|
+
Scale: every write rewrites and re-hashes the whole list document under one
|
|
28
|
+
per-key lock, and in-process pushes bypass the HTTP ``max_body_bytes`` limit, so
|
|
29
|
+
a single list can grow unbounded server-side. Keep lists bounded — shard via a
|
|
30
|
+
``target`` function (one list per tenant/bucket) and/or set ``max_items``.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
from __future__ import annotations
|
|
34
|
+
|
|
35
|
+
import inspect
|
|
36
|
+
import logging
|
|
37
|
+
from typing import Sequence
|
|
38
|
+
|
|
39
|
+
from starfish_protocol.plugins import ServerPlugin, WriteEvent
|
|
40
|
+
from starfish_server.protocol.pull import pull
|
|
41
|
+
from starfish_server.protocol.push import push
|
|
42
|
+
from starfish_server.protocol.types import PushConflict
|
|
43
|
+
from starfish_server.storage.base import AbstractObjectStore, StoreContext
|
|
44
|
+
|
|
45
|
+
from starfish_projection.config import Projection, ProjectionOp, ProjectionRemove
|
|
46
|
+
|
|
47
|
+
_log = logging.getLogger(__name__)
|
|
48
|
+
|
|
49
|
+
DEFAULT_MAX_RETRIES = 8
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _source_set(source: str | Sequence[str]) -> frozenset[str]:
|
|
53
|
+
return frozenset([source] if isinstance(source, str) else source)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
async def _apply_op(
|
|
57
|
+
store: AbstractObjectStore,
|
|
58
|
+
target_key: str,
|
|
59
|
+
op: ProjectionOp,
|
|
60
|
+
max_retries: int,
|
|
61
|
+
max_items: int | None,
|
|
62
|
+
source_collection: str,
|
|
63
|
+
) -> None:
|
|
64
|
+
"""Fold a single entry op into the target list document under a CAS retry loop."""
|
|
65
|
+
# A projection-owned write runs in-process with the plugin's authority, not a
|
|
66
|
+
# client's — no per-document role gating.
|
|
67
|
+
ctx = StoreContext(
|
|
68
|
+
collection=source_collection,
|
|
69
|
+
params={},
|
|
70
|
+
identity=None,
|
|
71
|
+
roles=(),
|
|
72
|
+
action="push",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
for _ in range(max_retries):
|
|
76
|
+
# Re-pull every iteration so each retry folds onto the latest list.
|
|
77
|
+
current = await pull(store, target_key, context=ctx)
|
|
78
|
+
base_hash = current.hash or None
|
|
79
|
+
stored = current.data.get("items") if isinstance(current.data, dict) else None
|
|
80
|
+
items = list(stored) if isinstance(stored, list) else []
|
|
81
|
+
idx = next((i for i, it in enumerate(items) if it.get("id") == op.id), -1)
|
|
82
|
+
|
|
83
|
+
if isinstance(op, ProjectionRemove):
|
|
84
|
+
if idx == -1:
|
|
85
|
+
return # already absent — nothing to write
|
|
86
|
+
items.pop(idx)
|
|
87
|
+
elif idx == -1:
|
|
88
|
+
if max_items is not None and len(items) >= max_items:
|
|
89
|
+
_log.warning(
|
|
90
|
+
"projection list %r at max_items=%d; dropping append of id %r",
|
|
91
|
+
target_key,
|
|
92
|
+
max_items,
|
|
93
|
+
op.id,
|
|
94
|
+
)
|
|
95
|
+
return
|
|
96
|
+
items.append({"id": op.id, "value": op.value})
|
|
97
|
+
else:
|
|
98
|
+
# Update in place: keep the entry's position, full-replace its value.
|
|
99
|
+
items[idx] = {"id": op.id, "value": op.value}
|
|
100
|
+
|
|
101
|
+
result = await push(store, target_key, {"items": items}, base_hash, context=ctx)
|
|
102
|
+
if not isinstance(result, PushConflict):
|
|
103
|
+
return # PushSuccess — done
|
|
104
|
+
# hash_mismatch: a concurrent write changed the list; loop to re-pull/re-apply.
|
|
105
|
+
|
|
106
|
+
_log.warning(
|
|
107
|
+
"projection list %r exhausted %d CAS retries; dropped op for id %r",
|
|
108
|
+
target_key,
|
|
109
|
+
max_retries,
|
|
110
|
+
op.id,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def create_projection_server_plugin(
|
|
115
|
+
*,
|
|
116
|
+
store: AbstractObjectStore,
|
|
117
|
+
projections: list[Projection],
|
|
118
|
+
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
119
|
+
max_items: int | None = None,
|
|
120
|
+
) -> ServerPlugin:
|
|
121
|
+
"""Build a :class:`ServerPlugin` that maintains one or more projection lists:
|
|
122
|
+
after a successful push to a watched ``source`` collection, it derives an
|
|
123
|
+
entry op via the app's ``project`` function and folds it into the target list
|
|
124
|
+
document in *store*.
|
|
125
|
+
|
|
126
|
+
``max_retries`` (default 8) bounds the CAS loop; on exhaustion the op is
|
|
127
|
+
logged and dropped. ``max_items`` optionally caps each list — once full,
|
|
128
|
+
further appends are logged and dropped (existing entries are never evicted);
|
|
129
|
+
prefer sharding via a ``target`` function for large views.
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
compiled = [(_source_set(p.source), p.target, p.project) for p in projections]
|
|
133
|
+
|
|
134
|
+
async def _after_write(event: WriteEvent) -> None:
|
|
135
|
+
for sources, target, project in compiled:
|
|
136
|
+
if event.collection not in sources:
|
|
137
|
+
continue
|
|
138
|
+
try:
|
|
139
|
+
# Resolve the target list before running the mapping; None = ignore.
|
|
140
|
+
target_key = target(event) if callable(target) else target
|
|
141
|
+
if target_key is None:
|
|
142
|
+
continue
|
|
143
|
+
op = project(event)
|
|
144
|
+
if inspect.isawaitable(op):
|
|
145
|
+
op = await op
|
|
146
|
+
if op is None:
|
|
147
|
+
continue
|
|
148
|
+
await _apply_op(
|
|
149
|
+
store, target_key, op, max_retries, max_items, event.collection
|
|
150
|
+
)
|
|
151
|
+
except Exception as exc: # noqa: BLE001 — must not break client writes
|
|
152
|
+
_log.warning("projection for %r failed: %s", event.collection, exc)
|
|
153
|
+
|
|
154
|
+
return ServerPlugin(name="starfish-projection", after_write=_after_write)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
__all__ = ["create_projection_server_plugin"]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: starfish-projection
|
|
3
|
+
Version: 3.0.0a17
|
|
4
|
+
Summary: Starfish incremental-list extension (post-push projection hook: fold each write into a single list document — append, update-in-place, or remove)
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: starfish-protocol
|
|
7
|
+
Requires-Dist: starfish-server
|
|
8
|
+
Provides-Extra: dev
|
|
9
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
10
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
|
|
11
|
+
Requires-Dist: httpx>=0.25.0; extra == "dev"
|
|
12
|
+
Requires-Dist: fastapi>=0.100; extra == "dev"
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
starfish_projection/__init__.py
|
|
4
|
+
starfish_projection/config.py
|
|
5
|
+
starfish_projection/plugin.py
|
|
6
|
+
starfish_projection.egg-info/PKG-INFO
|
|
7
|
+
starfish_projection.egg-info/SOURCES.txt
|
|
8
|
+
starfish_projection.egg-info/dependency_links.txt
|
|
9
|
+
starfish_projection.egg-info/requires.txt
|
|
10
|
+
starfish_projection.egg-info/top_level.txt
|
|
11
|
+
tests/test_plugin.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
starfish_projection
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""Integration tests — the projection plugin maintains an incremental list on push."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
from fastapi import FastAPI, Request
|
|
7
|
+
from httpx import AsyncClient, ASGITransport
|
|
8
|
+
|
|
9
|
+
from starfish_server.config.schema import SyncConfig, CollectionConfig
|
|
10
|
+
from starfish_server.router.route_builder import (
|
|
11
|
+
create_sync_router,
|
|
12
|
+
SyncRouterOptions,
|
|
13
|
+
AuthResult,
|
|
14
|
+
)
|
|
15
|
+
from starfish_projection import (
|
|
16
|
+
Projection,
|
|
17
|
+
ProjectionRemove,
|
|
18
|
+
ProjectionSet,
|
|
19
|
+
create_projection_server_plugin,
|
|
20
|
+
)
|
|
21
|
+
from starfish_protocol.plugins import WriteEvent
|
|
22
|
+
|
|
23
|
+
from tests.helpers import MemoryObjectStore, OneShotConflictStore
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _build_app(
|
|
27
|
+
collections: list[CollectionConfig],
|
|
28
|
+
projections: list[Projection],
|
|
29
|
+
*,
|
|
30
|
+
store=None,
|
|
31
|
+
max_items: int | None = None,
|
|
32
|
+
max_retries: int = 8,
|
|
33
|
+
):
|
|
34
|
+
store = store or MemoryObjectStore()
|
|
35
|
+
config = SyncConfig(version=1, collections=collections)
|
|
36
|
+
|
|
37
|
+
async def role_resolver(request: Request) -> AuthResult:
|
|
38
|
+
return AuthResult(identity="user-1", roles=["self"])
|
|
39
|
+
|
|
40
|
+
plugin = create_projection_server_plugin(
|
|
41
|
+
store=store, projections=projections, max_retries=max_retries, max_items=max_items
|
|
42
|
+
)
|
|
43
|
+
router = create_sync_router(
|
|
44
|
+
SyncRouterOptions(
|
|
45
|
+
store=store, config=config, role_resolver=role_resolver, plugins=[plugin],
|
|
46
|
+
),
|
|
47
|
+
)
|
|
48
|
+
app = FastAPI()
|
|
49
|
+
app.include_router(router)
|
|
50
|
+
return app, store
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _col(name: str, storage_path: str, **overrides) -> CollectionConfig:
|
|
54
|
+
return CollectionConfig(
|
|
55
|
+
name=name,
|
|
56
|
+
storagePath=storage_path,
|
|
57
|
+
readRoles=["self"],
|
|
58
|
+
writeRoles=["self"],
|
|
59
|
+
encryption="none",
|
|
60
|
+
maxBodyBytes=1_000_000,
|
|
61
|
+
**overrides,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _source_and_list() -> list[CollectionConfig]:
|
|
66
|
+
return [
|
|
67
|
+
_col("products", "products/{id}"),
|
|
68
|
+
_col("catalog", "catalog", pullOnly=True),
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
async def _push(client: AsyncClient, path: str, data: dict) -> None:
|
|
73
|
+
# Read the current hash first so an update to an existing key passes the
|
|
74
|
+
# optimistic-concurrency check (a second push with baseHash:None would 409).
|
|
75
|
+
pull_path = path.replace("/push/", "/pull/")
|
|
76
|
+
cur = await client.get(pull_path)
|
|
77
|
+
base_hash = (cur.json().get("hash") or None) if cur.status_code == 200 else None
|
|
78
|
+
resp = await client.post(
|
|
79
|
+
path,
|
|
80
|
+
json={"data": data, "baseHash": base_hash},
|
|
81
|
+
headers={"content-type": "application/json"},
|
|
82
|
+
)
|
|
83
|
+
assert resp.status_code == 200
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
async def _read_list(store, key: str) -> list[dict]:
|
|
87
|
+
raw = await store.get_string(key)
|
|
88
|
+
if raw is None:
|
|
89
|
+
return []
|
|
90
|
+
return json.loads(raw)["data"]["items"]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _ids(items: list[dict]) -> list[str]:
|
|
94
|
+
return [i["id"] for i in items]
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# Mirror each product as a {id, value:{name}} entry in `catalog`, treating
|
|
98
|
+
# {deleted: True} as a removal.
|
|
99
|
+
def _catalog_project(e: WriteEvent):
|
|
100
|
+
if (e.body or {}).get("deleted") is True:
|
|
101
|
+
return ProjectionRemove(id=e.params["id"])
|
|
102
|
+
return ProjectionSet(id=e.params["id"], value={"name": (e.body or {}).get("name", "")})
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _catalog_projection() -> Projection:
|
|
106
|
+
return Projection(source="products", target="catalog", project=_catalog_project)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@pytest.mark.asyncio
|
|
110
|
+
async def test_appends_entries_and_serves_whole_list():
|
|
111
|
+
app, store = _build_app(_source_and_list(), [_catalog_projection()])
|
|
112
|
+
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
|
|
113
|
+
await _push(client, "/push/products/a", {"name": "Alpha"})
|
|
114
|
+
await _push(client, "/push/products/b", {"name": "Beta"})
|
|
115
|
+
body = (await client.get("/pull/catalog")).json()
|
|
116
|
+
|
|
117
|
+
expected = [
|
|
118
|
+
{"id": "a", "value": {"name": "Alpha"}},
|
|
119
|
+
{"id": "b", "value": {"name": "Beta"}},
|
|
120
|
+
]
|
|
121
|
+
assert await _read_list(store, "catalog") == expected
|
|
122
|
+
# The client reads the whole list in a single GET of the one list document.
|
|
123
|
+
assert body["data"]["items"] == expected
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@pytest.mark.asyncio
|
|
127
|
+
async def test_updates_entry_in_place_keeping_position():
|
|
128
|
+
app, store = _build_app(_source_and_list(), [_catalog_projection()])
|
|
129
|
+
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
|
|
130
|
+
await _push(client, "/push/products/a", {"name": "Alpha"})
|
|
131
|
+
await _push(client, "/push/products/b", {"name": "Beta"})
|
|
132
|
+
await _push(client, "/push/products/a", {"name": "Alpha v2"})
|
|
133
|
+
|
|
134
|
+
items = await _read_list(store, "catalog")
|
|
135
|
+
assert _ids(items) == ["a", "b"] # position preserved
|
|
136
|
+
assert items[0]["value"] == {"name": "Alpha v2"} # value fully replaced
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@pytest.mark.asyncio
|
|
140
|
+
async def test_removes_entry_on_tombstone_and_list_survives_when_emptied():
|
|
141
|
+
app, store = _build_app(_source_and_list(), [_catalog_projection()])
|
|
142
|
+
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
|
|
143
|
+
await _push(client, "/push/products/a", {"name": "Alpha"})
|
|
144
|
+
await _push(client, "/push/products/b", {"name": "Beta"})
|
|
145
|
+
await _push(client, "/push/products/a", {"deleted": True})
|
|
146
|
+
assert _ids(await _read_list(store, "catalog")) == ["b"]
|
|
147
|
+
|
|
148
|
+
# Removing an absent id is a no-op.
|
|
149
|
+
await _push(client, "/push/products/zzz", {"deleted": True})
|
|
150
|
+
assert _ids(await _read_list(store, "catalog")) == ["b"]
|
|
151
|
+
|
|
152
|
+
# Emptying the list leaves an empty list document, not a 404.
|
|
153
|
+
await _push(client, "/push/products/b", {"deleted": True})
|
|
154
|
+
assert await _read_list(store, "catalog") == []
|
|
155
|
+
assert (await client.get("/pull/catalog")).status_code == 200
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@pytest.mark.asyncio
|
|
159
|
+
async def test_ignores_when_project_returns_none():
|
|
160
|
+
app, store = _build_app(
|
|
161
|
+
_source_and_list(),
|
|
162
|
+
[Projection(source="products", target="catalog", project=lambda e: None)],
|
|
163
|
+
)
|
|
164
|
+
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
|
|
165
|
+
await _push(client, "/push/products/a", {"name": "Alpha"})
|
|
166
|
+
assert await store.get_string("catalog") is None
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@pytest.mark.asyncio
|
|
170
|
+
async def test_target_function_shards_across_multiple_sources():
|
|
171
|
+
app, store = _build_app(
|
|
172
|
+
collections=[
|
|
173
|
+
_col("products", "products/{tenant}/{id}"),
|
|
174
|
+
_col("services", "services/{tenant}/{id}"),
|
|
175
|
+
],
|
|
176
|
+
projections=[
|
|
177
|
+
Projection(
|
|
178
|
+
source=["products", "services"],
|
|
179
|
+
target=lambda e: (f"catalog/{e.params['tenant']}" if e.params.get("tenant") else None),
|
|
180
|
+
project=lambda e: ProjectionSet(
|
|
181
|
+
id=e.params["id"],
|
|
182
|
+
value={"kind": e.collection, "name": (e.body or {}).get("name", "")},
|
|
183
|
+
),
|
|
184
|
+
)
|
|
185
|
+
],
|
|
186
|
+
)
|
|
187
|
+
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
|
|
188
|
+
await _push(client, "/push/products/t1/p1", {"name": "P1"})
|
|
189
|
+
await _push(client, "/push/services/t1/s1", {"name": "S1"})
|
|
190
|
+
await _push(client, "/push/products/t2/p2", {"name": "P2"})
|
|
191
|
+
|
|
192
|
+
assert await _read_list(store, "catalog/t1") == [
|
|
193
|
+
{"id": "p1", "value": {"kind": "products", "name": "P1"}},
|
|
194
|
+
{"id": "s1", "value": {"kind": "services", "name": "S1"}},
|
|
195
|
+
]
|
|
196
|
+
assert _ids(await _read_list(store, "catalog/t2")) == ["p2"]
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
@pytest.mark.asyncio
|
|
200
|
+
async def test_client_fetches_all_shards_via_list_endpoint():
|
|
201
|
+
# Shard a product catalog by category (the documented manual-sharding pattern).
|
|
202
|
+
app, _ = _build_app(
|
|
203
|
+
collections=[
|
|
204
|
+
_col("products", "products/{id}"),
|
|
205
|
+
_col("catalog", "catalog/{category}", pullOnly=True, listable=True),
|
|
206
|
+
],
|
|
207
|
+
projections=[
|
|
208
|
+
Projection(
|
|
209
|
+
source="products",
|
|
210
|
+
target=lambda e: (f"catalog/{e.body['category']}" if (e.body or {}).get("category") else None),
|
|
211
|
+
project=lambda e: ProjectionSet(id=e.params["id"], value={"name": (e.body or {}).get("name", "")}),
|
|
212
|
+
)
|
|
213
|
+
],
|
|
214
|
+
)
|
|
215
|
+
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
|
|
216
|
+
await _push(client, "/push/products/p1", {"name": "Novel", "category": "books"})
|
|
217
|
+
await _push(client, "/push/products/p2", {"name": "Phone", "category": "electronics"})
|
|
218
|
+
await _push(client, "/push/products/p3", {"name": "Comic", "category": "books"})
|
|
219
|
+
|
|
220
|
+
# Discover shards via the list endpoint, then pull each and concatenate.
|
|
221
|
+
shards = (await client.get("/list/catalog")).json()["items"]
|
|
222
|
+
all_items: list[dict] = []
|
|
223
|
+
for cat in shards:
|
|
224
|
+
body = (await client.get(f"/pull/catalog/{cat}")).json()
|
|
225
|
+
all_items.extend(body["data"]["items"])
|
|
226
|
+
|
|
227
|
+
assert sorted(shards) == ["books", "electronics"]
|
|
228
|
+
assert sorted(i["id"] for i in all_items) == ["p1", "p2", "p3"]
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
@pytest.mark.asyncio
|
|
232
|
+
async def test_concurrent_writes_do_not_lose_updates():
|
|
233
|
+
store = OneShotConflictStore()
|
|
234
|
+
app, _ = _build_app(_source_and_list(), [_catalog_projection()], store=store)
|
|
235
|
+
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
|
|
236
|
+
# Seed the list with one entry.
|
|
237
|
+
await _push(client, "/push/products/a", {"name": "Alpha"})
|
|
238
|
+
|
|
239
|
+
# Arm a competing write that adds entry "c", then push "b". The plugin's
|
|
240
|
+
# first push hash-mismatches, re-pulls (now seeing "a" + "c") and re-applies
|
|
241
|
+
# "b" on top — losing neither.
|
|
242
|
+
store.arm(
|
|
243
|
+
"catalog",
|
|
244
|
+
json.dumps(
|
|
245
|
+
{
|
|
246
|
+
"v": 1,
|
|
247
|
+
"data": {
|
|
248
|
+
"items": [
|
|
249
|
+
{"id": "a", "value": {"name": "Alpha"}},
|
|
250
|
+
{"id": "c", "value": {"name": "Concurrent"}},
|
|
251
|
+
]
|
|
252
|
+
},
|
|
253
|
+
"ts": 1,
|
|
254
|
+
"hash": "f" * 64,
|
|
255
|
+
}
|
|
256
|
+
),
|
|
257
|
+
)
|
|
258
|
+
await _push(client, "/push/products/b", {"name": "Beta"})
|
|
259
|
+
|
|
260
|
+
assert _ids(await _read_list(store, "catalog")) == ["a", "c", "b"]
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
@pytest.mark.asyncio
|
|
264
|
+
async def test_max_items_caps_the_list():
|
|
265
|
+
app, store = _build_app(_source_and_list(), [_catalog_projection()], max_items=2)
|
|
266
|
+
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
|
|
267
|
+
await _push(client, "/push/products/a", {"name": "A"})
|
|
268
|
+
await _push(client, "/push/products/b", {"name": "B"})
|
|
269
|
+
await _push(client, "/push/products/c", {"name": "C"}) # exceeds cap → dropped
|
|
270
|
+
assert _ids(await _read_list(store, "catalog")) == ["a", "b"]
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
@pytest.mark.asyncio
|
|
274
|
+
async def test_pullonly_list_rejects_client_writes():
|
|
275
|
+
app, _ = _build_app(_source_and_list(), [_catalog_projection()])
|
|
276
|
+
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
|
|
277
|
+
await _push(client, "/push/products/a", {"name": "Alpha"})
|
|
278
|
+
assert (await client.get("/pull/catalog")).status_code == 200
|
|
279
|
+
# pullOnly → no push route registered for the list.
|
|
280
|
+
resp = await client.post(
|
|
281
|
+
"/push/catalog",
|
|
282
|
+
json={"data": {"tampered": True}, "baseHash": None},
|
|
283
|
+
headers={"content-type": "application/json"},
|
|
284
|
+
)
|
|
285
|
+
assert resp.status_code >= 400
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
@pytest.mark.asyncio
|
|
289
|
+
async def test_projection_failure_does_not_break_client_write():
|
|
290
|
+
def boom(e: WriteEvent):
|
|
291
|
+
raise RuntimeError("boom")
|
|
292
|
+
|
|
293
|
+
app, _ = _build_app(
|
|
294
|
+
collections=[_col("products", "products/{id}")],
|
|
295
|
+
projections=[Projection(source="products", target="catalog", project=boom)],
|
|
296
|
+
)
|
|
297
|
+
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
|
|
298
|
+
resp = await client.post(
|
|
299
|
+
"/push/products/a",
|
|
300
|
+
json={"data": {"name": "x"}, "baseHash": None},
|
|
301
|
+
headers={"content-type": "application/json"},
|
|
302
|
+
)
|
|
303
|
+
assert resp.status_code == 200
|
|
304
|
+
assert len(resp.json()["hash"]) == 64
|