zodb-pgjsonb 0.1.dev19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zodb_pgjsonb-0.1.dev19/.github/workflows/ci.yaml +21 -0
- zodb_pgjsonb-0.1.dev19/.github/workflows/qa.yaml +23 -0
- zodb_pgjsonb-0.1.dev19/.github/workflows/release.yaml +80 -0
- zodb_pgjsonb-0.1.dev19/.github/workflows/tests.yaml +101 -0
- zodb_pgjsonb-0.1.dev19/.gitignore +10 -0
- zodb_pgjsonb-0.1.dev19/ARCHITECTURE.md +269 -0
- zodb_pgjsonb-0.1.dev19/BENCHMARKS.md +126 -0
- zodb_pgjsonb-0.1.dev19/PKG-INFO +192 -0
- zodb_pgjsonb-0.1.dev19/README.md +153 -0
- zodb_pgjsonb-0.1.dev19/benchmarks/bench.py +1203 -0
- zodb_pgjsonb-0.1.dev19/benchmarks/bench_plone.py +175 -0
- zodb_pgjsonb-0.1.dev19/benchmarks/results.json +316 -0
- zodb_pgjsonb-0.1.dev19/pyproject.toml +116 -0
- zodb_pgjsonb-0.1.dev19/src/zodb_pgjsonb/__init__.py +1 -0
- zodb_pgjsonb-0.1.dev19/src/zodb_pgjsonb/blobs.py +124 -0
- zodb_pgjsonb-0.1.dev19/src/zodb_pgjsonb/component.xml +115 -0
- zodb_pgjsonb-0.1.dev19/src/zodb_pgjsonb/config.py +56 -0
- zodb_pgjsonb-0.1.dev19/src/zodb_pgjsonb/interfaces.py +17 -0
- zodb_pgjsonb-0.1.dev19/src/zodb_pgjsonb/invalidation.py +64 -0
- zodb_pgjsonb-0.1.dev19/src/zodb_pgjsonb/objects.py +111 -0
- zodb_pgjsonb-0.1.dev19/src/zodb_pgjsonb/packer.py +194 -0
- zodb_pgjsonb-0.1.dev19/src/zodb_pgjsonb/schema.py +102 -0
- zodb_pgjsonb-0.1.dev19/src/zodb_pgjsonb/storage.py +1912 -0
- zodb_pgjsonb-0.1.dev19/tests/__init__.py +0 -0
- zodb_pgjsonb-0.1.dev19/tests/conftest.py +11 -0
- zodb_pgjsonb-0.1.dev19/tests/test_blobs.py +684 -0
- zodb_pgjsonb-0.1.dev19/tests/test_conflict.py +221 -0
- zodb_pgjsonb-0.1.dev19/tests/test_conformance.py +332 -0
- zodb_pgjsonb-0.1.dev19/tests/test_history.py +426 -0
- zodb_pgjsonb-0.1.dev19/tests/test_iteration.py +382 -0
- zodb_pgjsonb-0.1.dev19/tests/test_mvcc.py +281 -0
- zodb_pgjsonb-0.1.dev19/tests/test_roundtrip.py +235 -0
- zodb_pgjsonb-0.1.dev19/tests/test_storage.py +239 -0
- zodb_pgjsonb-0.1.dev19/tests/test_stress.py +259 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: CI
|
|
3
|
+
|
|
4
|
+
on:
|
|
5
|
+
push:
|
|
6
|
+
branches-ignore: [main]
|
|
7
|
+
pull_request:
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: read
|
|
11
|
+
|
|
12
|
+
concurrency:
|
|
13
|
+
group: ci-${{ github.ref }}
|
|
14
|
+
cancel-in-progress: true
|
|
15
|
+
|
|
16
|
+
jobs:
|
|
17
|
+
qa:
|
|
18
|
+
uses: "./.github/workflows/qa.yaml"
|
|
19
|
+
|
|
20
|
+
tests:
|
|
21
|
+
uses: "./.github/workflows/tests.yaml"
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: QA
|
|
3
|
+
|
|
4
|
+
on:
|
|
5
|
+
workflow_call:
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: read
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
lint:
|
|
12
|
+
name: Ruff
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- uses: astral-sh/setup-uv@v6
|
|
18
|
+
|
|
19
|
+
- name: Run ruff check
|
|
20
|
+
run: uvx ruff check .
|
|
21
|
+
|
|
22
|
+
- name: Run ruff format check
|
|
23
|
+
run: uvx ruff format --check .
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Build & upload PyPI package
|
|
3
|
+
|
|
4
|
+
on:
|
|
5
|
+
push:
|
|
6
|
+
branches: [main]
|
|
7
|
+
release:
|
|
8
|
+
types:
|
|
9
|
+
- published
|
|
10
|
+
workflow_dispatch:
|
|
11
|
+
|
|
12
|
+
concurrency:
|
|
13
|
+
group: release-${{ github.ref }}
|
|
14
|
+
cancel-in-progress: true
|
|
15
|
+
|
|
16
|
+
jobs:
|
|
17
|
+
qa:
|
|
18
|
+
uses: "./.github/workflows/qa.yaml"
|
|
19
|
+
|
|
20
|
+
tests:
|
|
21
|
+
uses: "./.github/workflows/tests.yaml"
|
|
22
|
+
|
|
23
|
+
build-package:
|
|
24
|
+
name: Build & verify package
|
|
25
|
+
needs:
|
|
26
|
+
- qa
|
|
27
|
+
- tests
|
|
28
|
+
runs-on: ubuntu-latest
|
|
29
|
+
permissions:
|
|
30
|
+
contents: read
|
|
31
|
+
|
|
32
|
+
steps:
|
|
33
|
+
- uses: actions/checkout@v4
|
|
34
|
+
with:
|
|
35
|
+
fetch-depth: 0
|
|
36
|
+
persist-credentials: false
|
|
37
|
+
|
|
38
|
+
- uses: hynek/build-and-inspect-python-package@v2
|
|
39
|
+
|
|
40
|
+
release-test-pypi:
|
|
41
|
+
name: Publish in-dev package to test.pypi.org
|
|
42
|
+
environment: release-test-pypi
|
|
43
|
+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
|
44
|
+
runs-on: ubuntu-latest
|
|
45
|
+
needs:
|
|
46
|
+
- build-package
|
|
47
|
+
permissions:
|
|
48
|
+
id-token: write
|
|
49
|
+
|
|
50
|
+
steps:
|
|
51
|
+
- name: Download packages built by build-and-inspect-python-package
|
|
52
|
+
uses: actions/download-artifact@v4
|
|
53
|
+
with:
|
|
54
|
+
name: Packages
|
|
55
|
+
path: dist
|
|
56
|
+
|
|
57
|
+
- name: Upload package to Test PyPI
|
|
58
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
59
|
+
with:
|
|
60
|
+
repository-url: https://test.pypi.org/legacy/
|
|
61
|
+
|
|
62
|
+
release-pypi:
|
|
63
|
+
name: Publish released package to pypi.org
|
|
64
|
+
environment: release-pypi
|
|
65
|
+
if: github.event.action == 'published'
|
|
66
|
+
runs-on: ubuntu-latest
|
|
67
|
+
needs:
|
|
68
|
+
- build-package
|
|
69
|
+
permissions:
|
|
70
|
+
id-token: write
|
|
71
|
+
|
|
72
|
+
steps:
|
|
73
|
+
- name: Download packages built by build-and-inspect-python-package
|
|
74
|
+
uses: actions/download-artifact@v4
|
|
75
|
+
with:
|
|
76
|
+
name: Packages
|
|
77
|
+
path: dist
|
|
78
|
+
|
|
79
|
+
- name: Upload package to PyPI
|
|
80
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Tests
|
|
3
|
+
|
|
4
|
+
on:
|
|
5
|
+
workflow_call:
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: read
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
test:
|
|
12
|
+
name: Python ${{ matrix.python-version }}
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
strategy:
|
|
15
|
+
fail-fast: false
|
|
16
|
+
matrix:
|
|
17
|
+
python-version: ["3.12", "3.13"]
|
|
18
|
+
|
|
19
|
+
services:
|
|
20
|
+
postgres:
|
|
21
|
+
image: postgres:17
|
|
22
|
+
env:
|
|
23
|
+
POSTGRES_USER: zodb
|
|
24
|
+
POSTGRES_PASSWORD: zodb
|
|
25
|
+
POSTGRES_DB: zodb_test
|
|
26
|
+
ports:
|
|
27
|
+
- 5432:5432
|
|
28
|
+
options: >-
|
|
29
|
+
--health-cmd "pg_isready -U zodb"
|
|
30
|
+
--health-interval 5s
|
|
31
|
+
--health-timeout 3s
|
|
32
|
+
--health-retries 5
|
|
33
|
+
|
|
34
|
+
env:
|
|
35
|
+
ZODB_TEST_DSN: "dbname=zodb_test user=zodb password=zodb host=localhost port=5432"
|
|
36
|
+
|
|
37
|
+
steps:
|
|
38
|
+
- uses: actions/checkout@v4
|
|
39
|
+
with:
|
|
40
|
+
fetch-depth: 0
|
|
41
|
+
|
|
42
|
+
- uses: astral-sh/setup-uv@v6
|
|
43
|
+
with:
|
|
44
|
+
enable-cache: true
|
|
45
|
+
cache-dependency-glob: "pyproject.toml"
|
|
46
|
+
|
|
47
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
48
|
+
run: uv python install ${{ matrix.python-version }}
|
|
49
|
+
|
|
50
|
+
- name: Install dependencies
|
|
51
|
+
run: uv venv && uv pip install -e ".[test,s3]"
|
|
52
|
+
|
|
53
|
+
- name: Run tests with coverage
|
|
54
|
+
run: uv run pytest --cov --cov-report=
|
|
55
|
+
|
|
56
|
+
- name: Rename coverage data
|
|
57
|
+
run: mv .coverage .coverage.${{ matrix.python-version }}
|
|
58
|
+
|
|
59
|
+
- name: Upload coverage data
|
|
60
|
+
uses: actions/upload-artifact@v4
|
|
61
|
+
with:
|
|
62
|
+
name: coverage-data-${{ matrix.python-version }}
|
|
63
|
+
path: .coverage.${{ matrix.python-version }}
|
|
64
|
+
include-hidden-files: true
|
|
65
|
+
if-no-files-found: ignore
|
|
66
|
+
|
|
67
|
+
coverage:
|
|
68
|
+
name: Combine & check coverage
|
|
69
|
+
if: always()
|
|
70
|
+
needs: test
|
|
71
|
+
runs-on: ubuntu-latest
|
|
72
|
+
|
|
73
|
+
steps:
|
|
74
|
+
- uses: actions/checkout@v4
|
|
75
|
+
|
|
76
|
+
- uses: astral-sh/setup-uv@v6
|
|
77
|
+
with:
|
|
78
|
+
enable-cache: true
|
|
79
|
+
cache-dependency-glob: "pyproject.toml"
|
|
80
|
+
|
|
81
|
+
- name: Install dependencies
|
|
82
|
+
run: uv venv && uv pip install -e ".[test]"
|
|
83
|
+
|
|
84
|
+
- uses: actions/download-artifact@v4
|
|
85
|
+
with:
|
|
86
|
+
pattern: coverage-data-*
|
|
87
|
+
merge-multiple: true
|
|
88
|
+
|
|
89
|
+
- name: Combine coverage & check threshold
|
|
90
|
+
run: |
|
|
91
|
+
uv run coverage combine
|
|
92
|
+
uv run coverage html --skip-covered --skip-empty
|
|
93
|
+
uv run coverage report --format=markdown >> $GITHUB_STEP_SUMMARY
|
|
94
|
+
uv run coverage report
|
|
95
|
+
|
|
96
|
+
- name: Upload HTML report if check failed
|
|
97
|
+
uses: actions/upload-artifact@v4
|
|
98
|
+
with:
|
|
99
|
+
name: html-report
|
|
100
|
+
path: htmlcov
|
|
101
|
+
if: ${{ failure() }}
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# Architecture: ZODB PostgreSQL JSONB Storage
|
|
2
|
+
|
|
3
|
+
## Vision
|
|
4
|
+
|
|
5
|
+
A modern, PostgreSQL-only ZODB storage that stores object state as **JSONB** (not pickle/bytea), using **zodb-json-codec** for transparent transcoding. Blobs use tiered storage (small in PG bytea, large in S3 via zodb-s3blobs). Enables SQL queryability of ZODB data while remaining fully transparent to ZODB/Zope/Plone.
|
|
6
|
+
|
|
7
|
+
## Design Decisions
|
|
8
|
+
|
|
9
|
+
- **Direction C: Informed Independent** — new independent storage, studying proven algorithms from RelStorage
|
|
10
|
+
- **Separate repo/package** — https://github.com/bluedynamics/zodb-pgjsonb
|
|
11
|
+
- **Both history modes (configurable)** — history-free and history-preserving, like RelStorage
|
|
12
|
+
- **Tiered blob storage** — small blobs in PG bytea, large blobs in S3 (configurable threshold)
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## Architecture
|
|
17
|
+
|
|
18
|
+
```
|
|
19
|
+
ZODB.DB
|
|
20
|
+
└-> PGJsonbStorage (IStorage + IMVCCStorage + IBlobStorage + IStorageUndoable + IStorageIteration + IStorageRestoreable)
|
|
21
|
+
├-> zodb-json-codec (pickle <-> JSON transcoding, Rust)
|
|
22
|
+
├-> psycopg3 (sync, pipelined writes via executemany, ConnectionPool)
|
|
23
|
+
├-> LISTEN/NOTIFY (instant invalidation via PG trigger)
|
|
24
|
+
├-> Tiered blobs (PG bytea + optional S3 via zodb-s3blobs)
|
|
25
|
+
└-> Pure SQL pack/GC (recursive CTE on pre-extracted refs column)
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### What we borrow from RelStorage (algorithms, not code)
|
|
29
|
+
|
|
30
|
+
- Transaction ID allocation (monotonic, global lock via `pg_advisory_xact_lock`)
|
|
31
|
+
- MVCC semantics (snapshot isolation, `new_instance()` per connection)
|
|
32
|
+
- Pack algorithm concept (mark reachable, sweep unreachable) — reimplemented as pure SQL graph traversal via `refs` column, no object loading needed
|
|
33
|
+
- Conflict resolution via inherited `ConflictResolvingStorage` (data is pickle at boundaries)
|
|
34
|
+
|
|
35
|
+
### What we design fresh
|
|
36
|
+
|
|
37
|
+
- Schema optimized for JSONB from day one
|
|
38
|
+
- psycopg3 sync API with pipelined `executemany()` batch writes
|
|
39
|
+
- LISTEN/NOTIFY for instant invalidation
|
|
40
|
+
- Tiered blob storage with configurable threshold
|
|
41
|
+
- Configurable history modes (history-free / history-preserving)
|
|
42
|
+
- SQL query API as first-class citizen
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Schema Design
|
|
47
|
+
|
|
48
|
+
### History-Free Mode (default, recommended for Plone)
|
|
49
|
+
|
|
50
|
+
```sql
|
|
51
|
+
CREATE TABLE transaction_log (
|
|
52
|
+
tid BIGINT PRIMARY KEY,
|
|
53
|
+
username TEXT DEFAULT '',
|
|
54
|
+
description TEXT DEFAULT '',
|
|
55
|
+
extension BYTEA DEFAULT ''
|
|
56
|
+
);
|
|
57
|
+
|
|
58
|
+
CREATE TABLE object_state (
|
|
59
|
+
zoid BIGINT PRIMARY KEY,
|
|
60
|
+
tid BIGINT NOT NULL REFERENCES transaction_log(tid),
|
|
61
|
+
class_mod TEXT NOT NULL,
|
|
62
|
+
class_name TEXT NOT NULL,
|
|
63
|
+
state JSONB,
|
|
64
|
+
state_size INTEGER NOT NULL,
|
|
65
|
+
refs BIGINT[] NOT NULL DEFAULT '{}'
|
|
66
|
+
);
|
|
67
|
+
|
|
68
|
+
CREATE TABLE blob_state (
|
|
69
|
+
zoid BIGINT NOT NULL,
|
|
70
|
+
tid BIGINT NOT NULL,
|
|
71
|
+
blob_size BIGINT NOT NULL,
|
|
72
|
+
data BYTEA,
|
|
73
|
+
s3_key TEXT,
|
|
74
|
+
PRIMARY KEY (zoid, tid)
|
|
75
|
+
);
|
|
76
|
+
|
|
77
|
+
CREATE INDEX idx_object_class ON object_state (class_mod, class_name);
|
|
78
|
+
CREATE INDEX idx_object_state_gin ON object_state USING gin (state jsonb_path_ops);
|
|
79
|
+
CREATE INDEX idx_object_refs ON object_state USING gin (refs);
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### History-Preserving Mode (adds history tables)
|
|
83
|
+
|
|
84
|
+
```sql
|
|
85
|
+
CREATE TABLE object_history (
|
|
86
|
+
zoid BIGINT NOT NULL,
|
|
87
|
+
tid BIGINT NOT NULL,
|
|
88
|
+
class_mod TEXT NOT NULL,
|
|
89
|
+
class_name TEXT NOT NULL,
|
|
90
|
+
state JSONB,
|
|
91
|
+
state_size INTEGER NOT NULL,
|
|
92
|
+
refs BIGINT[] NOT NULL DEFAULT '{}',
|
|
93
|
+
PRIMARY KEY (zoid, tid)
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
CREATE TABLE blob_history (
|
|
97
|
+
zoid BIGINT NOT NULL,
|
|
98
|
+
tid BIGINT NOT NULL,
|
|
99
|
+
blob_size BIGINT NOT NULL,
|
|
100
|
+
data BYTEA,
|
|
101
|
+
s3_key TEXT,
|
|
102
|
+
PRIMARY KEY (zoid, tid)
|
|
103
|
+
);
|
|
104
|
+
|
|
105
|
+
CREATE TABLE pack_state (
|
|
106
|
+
zoid BIGINT PRIMARY KEY,
|
|
107
|
+
tid BIGINT NOT NULL
|
|
108
|
+
);
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Schema Notes
|
|
112
|
+
|
|
113
|
+
- **Class columns inline** (not normalized FK): avoids JOIN on the hot path (`load()`), class strings compress well via TOAST
|
|
114
|
+
- **`state` is `@s` only**: the `@cls` marker is extracted into `class_mod`/`class_name` columns, saving JSONB space and enabling efficient class-based queries
|
|
115
|
+
- **`state_size`**: original pickle size, useful for monitoring
|
|
116
|
+
- **Advisory locks** for commit serialization: `pg_advisory_xact_lock(0)`, released automatically on transaction end
|
|
117
|
+
- **GIN index with `jsonb_path_ops`**: enables `@>` containment queries efficiently
|
|
118
|
+
- **`refs` column**: pre-extracted persistent reference OIDs, enables pure SQL pack/GC
|
|
119
|
+
|
|
120
|
+
### Pack via Pure SQL Graph Traversal
|
|
121
|
+
|
|
122
|
+
Traditional ZODB pack loads and unpickles every object to extract references. PGJsonbStorage extracts `@ref` OIDs at `store()` time into the `refs` column, enabling pack as a single recursive SQL query:
|
|
123
|
+
|
|
124
|
+
```sql
|
|
125
|
+
WITH RECURSIVE reachable AS (
|
|
126
|
+
SELECT zoid FROM object_state WHERE zoid = 0
|
|
127
|
+
UNION
|
|
128
|
+
SELECT unnest(o.refs)
|
|
129
|
+
FROM object_state o
|
|
130
|
+
JOIN reachable r ON o.zoid = r.zoid
|
|
131
|
+
)
|
|
132
|
+
DELETE FROM object_state
|
|
133
|
+
WHERE zoid NOT IN (SELECT zoid FROM reachable);
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Result: 15-28x faster than RelStorage for pack operations.
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## Tiered Blob Storage
|
|
141
|
+
|
|
142
|
+
| Mode | Config | Where blobs are stored |
|
|
143
|
+
|----------|----------------------------|-------------------------------|
|
|
144
|
+
| PG-only | No S3 keys set | All blobs in PostgreSQL bytea |
|
|
145
|
+
| Tiered | S3 keys + blob-threshold | Small in PG, large in S3 |
|
|
146
|
+
| S3-only | S3 keys + blob-threshold=0 | All blobs in S3 |
|
|
147
|
+
|
|
148
|
+
S3 tiering uses `zodb-s3blobs` (`S3Client` + `S3BlobCache`) as optional dependency.
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## Transcoding Integration
|
|
153
|
+
|
|
154
|
+
### Store Path (ZODB -> PostgreSQL)
|
|
155
|
+
|
|
156
|
+
```
|
|
157
|
+
pickle bytes -> zodb_json_codec.decode_zodb_record()
|
|
158
|
+
-> {"@cls": [mod, name], "@s": {state...}}
|
|
159
|
+
-> extract class_mod, class_name, state, refs
|
|
160
|
+
-> INSERT INTO object_state (JSONB)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### Load Path (PostgreSQL -> ZODB)
|
|
164
|
+
|
|
165
|
+
```
|
|
166
|
+
SELECT class_mod, class_name, state FROM object_state
|
|
167
|
+
-> reconstruct {"@cls": [mod, name], "@s": {state...}}
|
|
168
|
+
-> zodb_json_codec.encode_zodb_record()
|
|
169
|
+
-> pickle bytes
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Conflict Resolution
|
|
173
|
+
|
|
174
|
+
Data is pickle bytes at the ZODB boundary. `ConflictResolvingStorage.tryToResolveConflict()` receives pickle, unpickles, calls `_p_resolveConflict()`, re-pickles. The default identity transform hooks are correct since all data at the resolution boundary is already pickle.
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## MVCC Architecture
|
|
179
|
+
|
|
180
|
+
```
|
|
181
|
+
PGJsonbStorage (main, factory)
|
|
182
|
+
├── implements IMVCCStorage
|
|
183
|
+
├── owns: schema, OID allocation, DSN, connection pool
|
|
184
|
+
├── new_instance() -> PGJsonbStorageInstance
|
|
185
|
+
└── close() closes pool + main connection
|
|
186
|
+
|
|
187
|
+
PGJsonbStorageInstance (per-connection)
|
|
188
|
+
├── borrows PG connection from pool (autocommit=True)
|
|
189
|
+
├── REPEATABLE READ snapshots for consistent reads
|
|
190
|
+
├── poll_invalidations() via SQL query
|
|
191
|
+
├── store/tpc_begin/vote/finish/abort (write path)
|
|
192
|
+
└── release() returns connection to pool
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
---
|
|
196
|
+
|
|
197
|
+
## Migration via zodbconvert
|
|
198
|
+
|
|
199
|
+
PGJsonbStorage implements `IStorageRestoreable` (`restore()`, `restoreBlob()`), making it compatible with `zodbconvert` out of the box:
|
|
200
|
+
|
|
201
|
+
```ini
|
|
202
|
+
<source>
|
|
203
|
+
<relstorage>
|
|
204
|
+
<postgresql>
|
|
205
|
+
dsn dbname='zodb_old'
|
|
206
|
+
</postgresql>
|
|
207
|
+
</relstorage>
|
|
208
|
+
</source>
|
|
209
|
+
<destination>
|
|
210
|
+
%import zodb_pgjsonb
|
|
211
|
+
<pgjsonb>
|
|
212
|
+
dsn dbname='zodb_new'
|
|
213
|
+
</pgjsonb>
|
|
214
|
+
</destination>
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
## Package Structure
|
|
220
|
+
|
|
221
|
+
```
|
|
222
|
+
zodb-pgjsonb/
|
|
223
|
+
├── src/zodb_pgjsonb/
|
|
224
|
+
│ ├── __init__.py # public API
|
|
225
|
+
│ ├── storage.py # PGJsonbStorage + PGJsonbStorageInstance (core)
|
|
226
|
+
│ ├── schema.py # DDL definitions
|
|
227
|
+
│ ├── objects.py # ObjectStore (JSONB CRUD)
|
|
228
|
+
│ ├── invalidation.py # LISTEN/NOTIFY channel
|
|
229
|
+
│ ├── blobs.py # BlobManager (tiered PG/S3)
|
|
230
|
+
│ ├── packer.py # Pack/GC (pure SQL)
|
|
231
|
+
│ ├── interfaces.py # Zope interface declarations
|
|
232
|
+
│ ├── config.py # ZConfig factory
|
|
233
|
+
│ └── component.xml # ZConfig schema definition
|
|
234
|
+
├── tests/
|
|
235
|
+
│ ├── test_storage.py # Core IStorage tests
|
|
236
|
+
│ ├── test_mvcc.py # MVCC / poll_invalidations
|
|
237
|
+
│ ├── test_history.py # History-preserving mode + undo
|
|
238
|
+
│ ├── test_blobs.py # Blob storage (PG + S3)
|
|
239
|
+
│ ├── test_conflict.py # Conflict resolution (BTrees)
|
|
240
|
+
│ ├── test_conformance.py # ZODB conformance test suite
|
|
241
|
+
│ ├── test_iteration.py # IStorageIteration
|
|
242
|
+
│ ├── test_roundtrip.py # Codec roundtrip verification
|
|
243
|
+
│ └── test_stress.py # Concurrent write stress tests
|
|
244
|
+
├── benchmarks/
|
|
245
|
+
│ └── bench.py # Performance benchmarks vs RelStorage
|
|
246
|
+
├── example/ # Docker Compose demo setup
|
|
247
|
+
├── ARCHITECTURE.md # This file
|
|
248
|
+
├── BENCHMARKS.md # Performance results
|
|
249
|
+
├── pyproject.toml
|
|
250
|
+
└── README.md
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
---
|
|
254
|
+
|
|
255
|
+
## Comparison: RelStorage vs PGJsonbStorage
|
|
256
|
+
|
|
257
|
+
| Feature | RelStorage | PGJsonbStorage |
|
|
258
|
+
|---------|-----------|----------------|
|
|
259
|
+
| Storage format | pickle/bytea | JSONB |
|
|
260
|
+
| SQL queryable | No | Yes (GIN indexes, jsonpath) |
|
|
261
|
+
| Databases | PG, MySQL, Oracle, SQLite | PostgreSQL only |
|
|
262
|
+
| Python driver | psycopg2 | psycopg3 (pipelined writes, pool) |
|
|
263
|
+
| Invalidation | Polling (1-5s latency) | LISTEN/NOTIFY (~1ms) |
|
|
264
|
+
| Blob storage | Filesystem or cloud (separate) | Tiered PG + S3 (integrated) |
|
|
265
|
+
| History modes | Both | Both (configurable) |
|
|
266
|
+
| Conflict resolution | Built-in | Via inherited ConflictResolvingStorage |
|
|
267
|
+
| Pack algorithm | Load+unpickle every object | Pure SQL graph traversal via `refs` column |
|
|
268
|
+
| Codec dependency | None | zodb-json-codec (Rust) |
|
|
269
|
+
| Maturity | 15+ years | New |
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# Performance Benchmarks
|
|
2
|
+
|
|
3
|
+
Comparison of `PGJsonbStorage` vs `RelStorage` (both using PostgreSQL)
|
|
4
|
+
for ZODB storage operations at multiple abstraction levels.
|
|
5
|
+
|
|
6
|
+
Measured on: 2026-02-08
|
|
7
|
+
Python: 3.13.9, PostgreSQL: 17, 100 iterations, 10 warmup
|
|
8
|
+
Host: localhost, Docker-containerized PostgreSQL
|
|
9
|
+
|
|
10
|
+
## Context
|
|
11
|
+
|
|
12
|
+
PGJsonbStorage stores object state as **JSONB** (enabling SQL queries on ZODB
|
|
13
|
+
data), while RelStorage stores **pickle bytes** as bytea. This fundamental
|
|
14
|
+
difference affects the performance profile:
|
|
15
|
+
|
|
16
|
+
- **Writes**: PGJsonbStorage transcodes pickle to JSONB via `zodb-json-codec`
|
|
17
|
+
(Rust), then PostgreSQL indexes the JSONB. RelStorage writes raw bytes.
|
|
18
|
+
Batch writes use psycopg3's `executemany()` with pipeline mode for
|
|
19
|
+
pipelined SQL (single network round-trip per batch).
|
|
20
|
+
- **Reads (raw)**: Both storages use a local in-memory LRU cache
|
|
21
|
+
(`cache_local_mb`) that serves hot objects from RAM. Cache misses hit
|
|
22
|
+
PostgreSQL — PGJsonbStorage additionally transcodes JSONB back to pickle.
|
|
23
|
+
- **Reads (ZODB)**: ZODB's object cache handles >95% of reads in production,
|
|
24
|
+
making the raw storage read speed largely irrelevant.
|
|
25
|
+
- **Pack/GC**: PGJsonbStorage uses pure SQL graph traversal via pre-extracted
|
|
26
|
+
`refs` column (recursive CTE, no data leaves PostgreSQL). RelStorage must
|
|
27
|
+
load and unpickle every object to extract references.
|
|
28
|
+
|
|
29
|
+
## Storage API (raw store/load)
|
|
30
|
+
|
|
31
|
+
| Operation | PGJsonb | RelStorage | Comparison |
|
|
32
|
+
|---|---|---|---|
|
|
33
|
+
| store single | 5.2 ms | 8.4 ms | **1.6x faster** |
|
|
34
|
+
| store batch 10 | 8.4 ms | 8.5 ms | **on par** |
|
|
35
|
+
| store batch 100 | 12.4 ms | 10.4 ms | 1.2x slower |
|
|
36
|
+
| load single | 1 us | 1 us | **1.4x faster** |
|
|
37
|
+
| load batch 100 | 21 us | 96 us | **4.6x faster** |
|
|
38
|
+
|
|
39
|
+
Single stores are faster because PGJsonbStorage has a simpler 2PC path (direct
|
|
40
|
+
SQL, no OID/TID tracking tables). Batch stores use pipelined `executemany()`
|
|
41
|
+
— transcoding is <1% of batch time (0.14 ms for 100 objects via
|
|
42
|
+
`decode_zodb_record_for_pg`), the remaining cost is PostgreSQL JSONB indexing
|
|
43
|
+
overhead vs RelStorage's raw bytea writes. Both storages serve hot reads from
|
|
44
|
+
their local LRU cache — PGJsonbStorage's cache is slightly faster due to
|
|
45
|
+
simpler lookup logic.
|
|
46
|
+
|
|
47
|
+
## ZODB.DB (through object cache)
|
|
48
|
+
|
|
49
|
+
| Operation | PGJsonb | RelStorage | Comparison |
|
|
50
|
+
|---|---|---|---|
|
|
51
|
+
| write simple | 8.9 ms | 8.7 ms | on par |
|
|
52
|
+
| write btree | 7.7 ms | 10.8 ms | **1.4x faster** |
|
|
53
|
+
| read | 3 us | 3 us | on par |
|
|
54
|
+
| connection cycle | 224 us | 240 us | **1.1x faster** |
|
|
55
|
+
| write batch 10 | 12.6 ms | 11.2 ms | 1.1x slower |
|
|
56
|
+
|
|
57
|
+
Through ZODB.DB, ZODB's object cache handles the hot read path. Writes are
|
|
58
|
+
on par. Both storages use connection pooling (`psycopg_pool`
|
|
59
|
+
for PGJsonbStorage, built-in for RelStorage). Connection cycle overhead is
|
|
60
|
+
on par thanks to pool pre-warming (`pool-size=1` default).
|
|
61
|
+
|
|
62
|
+
## Pack / GC
|
|
63
|
+
|
|
64
|
+
| Objects | PGJsonb | RelStorage | Comparison |
|
|
65
|
+
|---|---|---|---|
|
|
66
|
+
| 100 | 14.0 ms | 202.5 ms | **14.5x faster** |
|
|
67
|
+
| 1,000 | 8.3 ms | 236.8 ms | **28.4x faster** |
|
|
68
|
+
| 10,000 | 23.5 ms | 604.7 ms | **25.7x faster** |
|
|
69
|
+
|
|
70
|
+
Pack is the standout advantage. PGJsonbStorage's pure SQL graph traversal
|
|
71
|
+
via the pre-extracted `refs` column (recursive CTE) runs entirely inside
|
|
72
|
+
PostgreSQL — no objects are loaded, no Python unpickling occurs. RelStorage
|
|
73
|
+
must load and unpickle every object to discover references via `referencesf()`.
|
|
74
|
+
The advantage grows with database size.
|
|
75
|
+
|
|
76
|
+
## Plone Application (50 documents)
|
|
77
|
+
|
|
78
|
+
| Operation | PGJsonb | RelStorage | Comparison |
|
|
79
|
+
|---|---|---|---|
|
|
80
|
+
| site creation | 1.05 s | 1.06 s | on par |
|
|
81
|
+
| content create/doc | 27.6 ms | 27.2 ms | on par |
|
|
82
|
+
| catalog query | 187 us | 182 us | on par |
|
|
83
|
+
| content modify/doc | 6.6 ms | 7.0 ms | **1.1x faster** |
|
|
84
|
+
|
|
85
|
+
At the Plone application level, both storage backends are **on par**. The
|
|
86
|
+
storage layer accounts for a small fraction of total request time, which
|
|
87
|
+
is dominated by Plone framework overhead (catalog indexing, security checks,
|
|
88
|
+
event subscribers, ZCML lookups).
|
|
89
|
+
|
|
90
|
+
This confirms that PGJsonbStorage can serve as a drop-in replacement for
|
|
91
|
+
RelStorage in Plone deployments, trading minimal overhead for **SQL
|
|
92
|
+
queryability of all ZODB data via JSONB**.
|
|
93
|
+
|
|
94
|
+
## Analysis
|
|
95
|
+
|
|
96
|
+
**Strengths:**
|
|
97
|
+
- Single-object writes 1.4-1.6x faster (simpler 2PC)
|
|
98
|
+
- Raw loads 1.4-4.6x faster (both cached, simpler lookup)
|
|
99
|
+
- Pack/GC 15-28x faster (pure SQL, no object loading)
|
|
100
|
+
- Plone-level performance on par with RelStorage
|
|
101
|
+
- All ZODB data queryable via SQL/JSONB (unique to PGJsonbStorage)
|
|
102
|
+
|
|
103
|
+
**Trade-offs:**
|
|
104
|
+
- Batch store 100 is 1.2x slower (JSONB indexing overhead vs raw bytea)
|
|
105
|
+
|
|
106
|
+
## Running Benchmarks
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
cd sources/zodb-pgjsonb
|
|
110
|
+
|
|
111
|
+
# Requires PostgreSQL on localhost:5433 with benchmark databases:
|
|
112
|
+
# createdb -h localhost -p 5433 -U zodb zodb_bench_pgjsonb
|
|
113
|
+
# createdb -h localhost -p 5433 -U zodb zodb_bench_relstorage
|
|
114
|
+
|
|
115
|
+
# Install benchmark dependencies
|
|
116
|
+
uv pip install -e ".[bench]"
|
|
117
|
+
|
|
118
|
+
# Individual benchmark categories
|
|
119
|
+
python benchmarks/bench.py storage --iterations 100
|
|
120
|
+
python benchmarks/bench.py zodb --iterations 100
|
|
121
|
+
python benchmarks/bench.py pack
|
|
122
|
+
python benchmarks/bench.py plone --docs 50
|
|
123
|
+
|
|
124
|
+
# All benchmarks with JSON export
|
|
125
|
+
python benchmarks/bench.py all --output benchmarks/results.json --format both
|
|
126
|
+
```
|