tableflip 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tableflip-0.1.0/PKG-INFO +134 -0
- tableflip-0.1.0/README.md +125 -0
- tableflip-0.1.0/pyproject.toml +29 -0
- tableflip-0.1.0/src/tableflip/__init__.py +24 -0
- tableflip-0.1.0/src/tableflip/_child.py +133 -0
- tableflip-0.1.0/src/tableflip/_errors.py +22 -0
- tableflip-0.1.0/src/tableflip/_fds.py +150 -0
- tableflip-0.1.0/src/tableflip/_parent.py +115 -0
- tableflip-0.1.0/src/tableflip/_process.py +111 -0
- tableflip-0.1.0/src/tableflip/_upgrader.py +322 -0
- tableflip-0.1.0/src/tableflip/testing.py +48 -0
tableflip-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: tableflip
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Zero-downtime process upgrades for Python, inspired by cloudflare/tableflip
|
|
5
|
+
Author: Bernardo Vale
|
|
6
|
+
Author-email: Bernardo Vale <bernardo@kentik.com>
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
|
|
10
|
+
# tableflip — Graceful process restarts in Python
|
|
11
|
+
|
|
12
|
+
Zero-downtime upgrades for Python network services. Update running code or configuration without dropping existing connections.
|
|
13
|
+
|
|
14
|
+
This is a Python port of Cloudflare's [tableflip](https://github.com/cloudflare/tableflip) Go library. The core design — fd inheritance, IPC protocol, and state machine — follows the original closely, adapted to Python's `asyncio` runtime.
|
|
15
|
+
|
|
16
|
+
**Works on Linux and macOS.** Raises `NotSupportedError` on Windows (use `tableflip.testing` stubs instead).
|
|
17
|
+
|
|
18
|
+
## How it works
|
|
19
|
+
|
|
20
|
+
1. On `SIGHUP`, the running process spawns a new copy of itself
|
|
21
|
+
2. TCP listener sockets are passed to the new process via fd inheritance
|
|
22
|
+
3. The new process signals readiness after initialization
|
|
23
|
+
4. The old process stops accepting new connections and exits
|
|
24
|
+
|
|
25
|
+
Only one upgrade runs at a time. If the new process crashes during init, the old one keeps serving.
|
|
26
|
+
|
|
27
|
+
## Installation
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
uv add tableflip
|
|
31
|
+
# or
|
|
32
|
+
pip install tableflip
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Requires Python 3.13+.
|
|
36
|
+
|
|
37
|
+
## Usage
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
import asyncio
|
|
41
|
+
import signal
|
|
42
|
+
from tableflip import Upgrader, Options
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
async def main():
|
|
46
|
+
upg = await Upgrader.new(Options(pid_file="/tmp/myapp.pid"))
|
|
47
|
+
|
|
48
|
+
# Trigger upgrade on SIGHUP
|
|
49
|
+
loop = asyncio.get_running_loop()
|
|
50
|
+
loop.add_signal_handler(signal.SIGHUP, lambda: asyncio.create_task(do_upgrade(upg)))
|
|
51
|
+
|
|
52
|
+
# Listen must be called before ready()
|
|
53
|
+
sock = await upg.fds.listen("127.0.0.1", 8080)
|
|
54
|
+
|
|
55
|
+
server = await asyncio.start_server(handle_conn, sock=sock)
|
|
56
|
+
|
|
57
|
+
await upg.ready()
|
|
58
|
+
|
|
59
|
+
# Block until an upgrade completes or stop() is called
|
|
60
|
+
await upg.exit().wait()
|
|
61
|
+
|
|
62
|
+
# Graceful shutdown
|
|
63
|
+
server.close()
|
|
64
|
+
await server.wait_closed()
|
|
65
|
+
await upg.wait_for_parent()
|
|
66
|
+
upg.stop()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
async def do_upgrade(upg: Upgrader):
|
|
70
|
+
try:
|
|
71
|
+
await upg.upgrade()
|
|
72
|
+
except Exception as e:
|
|
73
|
+
print(f"Upgrade failed: {e}")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
async def handle_conn(reader, writer):
|
|
77
|
+
writer.write(b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nok")
|
|
78
|
+
await writer.drain()
|
|
79
|
+
writer.close()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
asyncio.run(main())
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Trigger an upgrade:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
kill -HUP $(cat /tmp/myapp.pid)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Integration with systemd
|
|
92
|
+
|
|
93
|
+
```ini
|
|
94
|
+
[Unit]
|
|
95
|
+
Description=My Python service
|
|
96
|
+
|
|
97
|
+
[Service]
|
|
98
|
+
ExecStart=/path/to/venv/bin/python app.py
|
|
99
|
+
ExecReload=/bin/kill -HUP $MAINPID
|
|
100
|
+
PIDFile=/tmp/myapp.pid
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Testing
|
|
104
|
+
|
|
105
|
+
Use the `tableflip.testing` module for unit tests or unsupported platforms:
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
from tableflip.testing import Upgrader, Fds
|
|
109
|
+
|
|
110
|
+
upg = Upgrader() # never upgrades, upgrade() raises NotSupportedError
|
|
111
|
+
await upg.ready() # no-op
|
|
112
|
+
assert not upg.has_parent()
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## API
|
|
116
|
+
|
|
117
|
+
| Method | Description |
|
|
118
|
+
|--------|-------------|
|
|
119
|
+
| `await Upgrader.new(opts)` | Create an upgrader (one per process) |
|
|
120
|
+
| `await upg.fds.listen(addr, port)` | Get an inherited or new TCP listener |
|
|
121
|
+
| `await upg.ready()` | Signal readiness, notify parent, write PID file |
|
|
122
|
+
| `await upg.upgrade()` | Spawn new process and wait for it to become ready |
|
|
123
|
+
| `upg.exit()` | Returns `asyncio.Event` set when the process should exit |
|
|
124
|
+
| `upg.stop()` | Prevent further upgrades and trigger exit |
|
|
125
|
+
| `await upg.wait_for_parent()` | Block until parent process exits |
|
|
126
|
+
| `upg.has_parent()` | `True` if spawned by a tableflip upgrade |
|
|
127
|
+
|
|
128
|
+
## Acknowledgments
|
|
129
|
+
|
|
130
|
+
This is a Python port of [cloudflare/tableflip](https://github.com/cloudflare/tableflip) by Cloudflare. The Go library was created by Lorenz Bauer and the Cloudflare team. All credit for the design and protocol goes to them.
|
|
131
|
+
|
|
132
|
+
## License
|
|
133
|
+
|
|
134
|
+
See [LICENSE](LICENSE) (BSD 3-Clause, same as the original Go library).
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# tableflip — Graceful process restarts in Python
|
|
2
|
+
|
|
3
|
+
Zero-downtime upgrades for Python network services. Update running code or configuration without dropping existing connections.
|
|
4
|
+
|
|
5
|
+
This is a Python port of Cloudflare's [tableflip](https://github.com/cloudflare/tableflip) Go library. The core design — fd inheritance, IPC protocol, and state machine — follows the original closely, adapted to Python's `asyncio` runtime.
|
|
6
|
+
|
|
7
|
+
**Works on Linux and macOS.** Raises `NotSupportedError` on Windows (use `tableflip.testing` stubs instead).
|
|
8
|
+
|
|
9
|
+
## How it works
|
|
10
|
+
|
|
11
|
+
1. On `SIGHUP`, the running process spawns a new copy of itself
|
|
12
|
+
2. TCP listener sockets are passed to the new process via fd inheritance
|
|
13
|
+
3. The new process signals readiness after initialization
|
|
14
|
+
4. The old process stops accepting new connections and exits
|
|
15
|
+
|
|
16
|
+
Only one upgrade runs at a time. If the new process crashes during init, the old one keeps serving.
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
uv add tableflip
|
|
22
|
+
# or
|
|
23
|
+
pip install tableflip
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Requires Python 3.13+.
|
|
27
|
+
|
|
28
|
+
## Usage
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import asyncio
|
|
32
|
+
import signal
|
|
33
|
+
from tableflip import Upgrader, Options
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
async def main():
|
|
37
|
+
upg = await Upgrader.new(Options(pid_file="/tmp/myapp.pid"))
|
|
38
|
+
|
|
39
|
+
# Trigger upgrade on SIGHUP
|
|
40
|
+
loop = asyncio.get_running_loop()
|
|
41
|
+
loop.add_signal_handler(signal.SIGHUP, lambda: asyncio.create_task(do_upgrade(upg)))
|
|
42
|
+
|
|
43
|
+
# Listen must be called before ready()
|
|
44
|
+
sock = await upg.fds.listen("127.0.0.1", 8080)
|
|
45
|
+
|
|
46
|
+
server = await asyncio.start_server(handle_conn, sock=sock)
|
|
47
|
+
|
|
48
|
+
await upg.ready()
|
|
49
|
+
|
|
50
|
+
# Block until an upgrade completes or stop() is called
|
|
51
|
+
await upg.exit().wait()
|
|
52
|
+
|
|
53
|
+
# Graceful shutdown
|
|
54
|
+
server.close()
|
|
55
|
+
await server.wait_closed()
|
|
56
|
+
await upg.wait_for_parent()
|
|
57
|
+
upg.stop()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
async def do_upgrade(upg: Upgrader):
|
|
61
|
+
try:
|
|
62
|
+
await upg.upgrade()
|
|
63
|
+
except Exception as e:
|
|
64
|
+
print(f"Upgrade failed: {e}")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
async def handle_conn(reader, writer):
|
|
68
|
+
writer.write(b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nok")
|
|
69
|
+
await writer.drain()
|
|
70
|
+
writer.close()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
asyncio.run(main())
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Trigger an upgrade:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
kill -HUP $(cat /tmp/myapp.pid)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Integration with systemd
|
|
83
|
+
|
|
84
|
+
```ini
|
|
85
|
+
[Unit]
|
|
86
|
+
Description=My Python service
|
|
87
|
+
|
|
88
|
+
[Service]
|
|
89
|
+
ExecStart=/path/to/venv/bin/python app.py
|
|
90
|
+
ExecReload=/bin/kill -HUP $MAINPID
|
|
91
|
+
PIDFile=/tmp/myapp.pid
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Testing
|
|
95
|
+
|
|
96
|
+
Use the `tableflip.testing` module for unit tests or unsupported platforms:
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from tableflip.testing import Upgrader, Fds
|
|
100
|
+
|
|
101
|
+
upg = Upgrader() # never upgrades, upgrade() raises NotSupportedError
|
|
102
|
+
await upg.ready() # no-op
|
|
103
|
+
assert not upg.has_parent()
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## API
|
|
107
|
+
|
|
108
|
+
| Method | Description |
|
|
109
|
+
|--------|-------------|
|
|
110
|
+
| `await Upgrader.new(opts)` | Create an upgrader (one per process) |
|
|
111
|
+
| `await upg.fds.listen(addr, port)` | Get an inherited or new TCP listener |
|
|
112
|
+
| `await upg.ready()` | Signal readiness, notify parent, write PID file |
|
|
113
|
+
| `await upg.upgrade()` | Spawn new process and wait for it to become ready |
|
|
114
|
+
| `upg.exit()` | Returns `asyncio.Event` set when the process should exit |
|
|
115
|
+
| `upg.stop()` | Prevent further upgrades and trigger exit |
|
|
116
|
+
| `await upg.wait_for_parent()` | Block until parent process exits |
|
|
117
|
+
| `upg.has_parent()` | `True` if spawned by a tableflip upgrade |
|
|
118
|
+
|
|
119
|
+
## Acknowledgments
|
|
120
|
+
|
|
121
|
+
This is a Python port of [cloudflare/tableflip](https://github.com/cloudflare/tableflip) by Cloudflare. The Go library was created by Lorenz Bauer and the Cloudflare team. All credit for the design and protocol goes to them.
|
|
122
|
+
|
|
123
|
+
## License
|
|
124
|
+
|
|
125
|
+
See [LICENSE](LICENSE) (BSD 3-Clause, same as the original Go library).
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "tableflip"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Zero-downtime process upgrades for Python, inspired by cloudflare/tableflip"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "Bernardo Vale", email = "bernardo@kentik.com" }
|
|
8
|
+
]
|
|
9
|
+
requires-python = ">=3.12"
|
|
10
|
+
dependencies = []
|
|
11
|
+
|
|
12
|
+
[dependency-groups]
|
|
13
|
+
dev = [
|
|
14
|
+
"pytest>=8",
|
|
15
|
+
"pytest-asyncio>=0.24",
|
|
16
|
+
"pytest-cov>=7.1.0",
|
|
17
|
+
"ruff>=0.4",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[build-system]
|
|
21
|
+
requires = ["uv_build>=0.10.2,<0.11.0"]
|
|
22
|
+
build-backend = "uv_build"
|
|
23
|
+
|
|
24
|
+
[tool.pytest.ini_options]
|
|
25
|
+
asyncio_mode = "strict"
|
|
26
|
+
markers = [
|
|
27
|
+
"integration: integration tests that spawn real processes (slow, Unix-only)",
|
|
28
|
+
]
|
|
29
|
+
addopts = "-m 'not integration'"
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""tableflip: zero-downtime process upgrades for Python."""
|
|
2
|
+
|
|
3
|
+
from tableflip._errors import (
|
|
4
|
+
AlreadyUpgradedError,
|
|
5
|
+
NotReadyError,
|
|
6
|
+
NotSupportedError,
|
|
7
|
+
TableflipError,
|
|
8
|
+
TerminatingError,
|
|
9
|
+
UpgradeInProgressError,
|
|
10
|
+
)
|
|
11
|
+
from tableflip._fds import Fds
|
|
12
|
+
from tableflip._upgrader import Options, Upgrader
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"AlreadyUpgradedError",
|
|
16
|
+
"Fds",
|
|
17
|
+
"NotReadyError",
|
|
18
|
+
"NotSupportedError",
|
|
19
|
+
"Options",
|
|
20
|
+
"TableflipError",
|
|
21
|
+
"TerminatingError",
|
|
22
|
+
"Upgrader",
|
|
23
|
+
"UpgradeInProgressError",
|
|
24
|
+
]
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Parent-side IPC: spawn a child process and wait for it to become ready."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
import signal
|
|
9
|
+
import struct
|
|
10
|
+
import sys
|
|
11
|
+
|
|
12
|
+
from tableflip._fds import FdName, encode_fd_names
|
|
13
|
+
from tableflip._process import NOTIFY_READY, SENTINEL_ENV_VAR, Env, Process
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Child:
|
|
19
|
+
"""Represents a spawned child process from the parent's perspective."""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
proc: Process,
|
|
24
|
+
ready: asyncio.Future[int],
|
|
25
|
+
result: asyncio.Future[Exception | None],
|
|
26
|
+
exited: asyncio.Event,
|
|
27
|
+
names_w: int,
|
|
28
|
+
) -> None:
|
|
29
|
+
self._proc = proc
|
|
30
|
+
self.ready = ready
|
|
31
|
+
self.result = result
|
|
32
|
+
self.exited = exited
|
|
33
|
+
self._names_w = names_w
|
|
34
|
+
|
|
35
|
+
def kill(self) -> None:
|
|
36
|
+
self._proc.signal(signal.SIGKILL)
|
|
37
|
+
|
|
38
|
+
def __str__(self) -> str:
|
|
39
|
+
return str(self._proc)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
async def start_child(
|
|
43
|
+
env: Env,
|
|
44
|
+
passed_fds: dict[FdName, int],
|
|
45
|
+
) -> Child:
|
|
46
|
+
"""Spawn a child process, passing inherited fds and setting up IPC pipes.
|
|
47
|
+
|
|
48
|
+
Fd layout in child: [stdin=0, stdout=1, stderr=2, readyW=3, namesR=4, ...passed].
|
|
49
|
+
The Env.new_proc() implementation is responsible for fd placement.
|
|
50
|
+
"""
|
|
51
|
+
ready_r, ready_w = os.pipe()
|
|
52
|
+
names_r, names_w = os.pipe()
|
|
53
|
+
|
|
54
|
+
inherited_fds: list[int] = []
|
|
55
|
+
fd_names_list: list[list[str]] = []
|
|
56
|
+
for name, fd in passed_fds.items():
|
|
57
|
+
inherited_fds.append(fd)
|
|
58
|
+
fd_names_list.append(list(name))
|
|
59
|
+
|
|
60
|
+
child_environ = env.environ()
|
|
61
|
+
child_environ[SENTINEL_ENV_VAR] = "yes"
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
proc = env.new_proc(
|
|
65
|
+
sys.executable,
|
|
66
|
+
sys.argv,
|
|
67
|
+
[ready_w, names_r, *inherited_fds],
|
|
68
|
+
child_environ,
|
|
69
|
+
)
|
|
70
|
+
except Exception:
|
|
71
|
+
for fd in (ready_r, ready_w, names_r, names_w):
|
|
72
|
+
try:
|
|
73
|
+
os.close(fd)
|
|
74
|
+
except OSError:
|
|
75
|
+
pass
|
|
76
|
+
raise
|
|
77
|
+
|
|
78
|
+
# Close the child-side pipe ends in the parent
|
|
79
|
+
os.close(ready_w)
|
|
80
|
+
os.close(names_r)
|
|
81
|
+
|
|
82
|
+
loop = asyncio.get_running_loop()
|
|
83
|
+
ready_future: asyncio.Future[int] = loop.create_future()
|
|
84
|
+
result_future: asyncio.Future[Exception | None] = loop.create_future()
|
|
85
|
+
exited_event = asyncio.Event()
|
|
86
|
+
|
|
87
|
+
async def write_names() -> None:
|
|
88
|
+
data = encode_fd_names(fd_names_list)
|
|
89
|
+
# Length-prefix: 4-byte big-endian length then JSON payload.
|
|
90
|
+
# Go uses gob (self-delimiting), Python uses JSON which is not,
|
|
91
|
+
# so we frame the message so the child can read exactly one message
|
|
92
|
+
# then continue reading the same pipe for parent-exit detection.
|
|
93
|
+
framed = struct.pack("!I", len(data)) + data
|
|
94
|
+
await loop.run_in_executor(None, os.write, names_w, framed)
|
|
95
|
+
|
|
96
|
+
async def wait_exit() -> None:
|
|
97
|
+
try:
|
|
98
|
+
exit_code = await proc.wait()
|
|
99
|
+
if exit_code == 0:
|
|
100
|
+
result_future.set_result(None)
|
|
101
|
+
else:
|
|
102
|
+
result_future.set_result(Exception(f"exit code {exit_code}"))
|
|
103
|
+
except Exception as e:
|
|
104
|
+
result_future.set_result(e)
|
|
105
|
+
finally:
|
|
106
|
+
exited_event.set()
|
|
107
|
+
# Unblock wait_ready and close names_w if ready was never received
|
|
108
|
+
for fd in (ready_r, names_w):
|
|
109
|
+
try:
|
|
110
|
+
os.close(fd)
|
|
111
|
+
except OSError:
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
async def wait_ready() -> None:
|
|
115
|
+
try:
|
|
116
|
+
data = await loop.run_in_executor(None, os.read, ready_r, 1)
|
|
117
|
+
if data and data[0] == NOTIFY_READY:
|
|
118
|
+
ready_future.set_result(names_w)
|
|
119
|
+
os.close(ready_r)
|
|
120
|
+
except OSError:
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
asyncio.create_task(write_names())
|
|
124
|
+
asyncio.create_task(wait_exit())
|
|
125
|
+
asyncio.create_task(wait_ready())
|
|
126
|
+
|
|
127
|
+
return Child(
|
|
128
|
+
proc=proc,
|
|
129
|
+
ready=ready_future,
|
|
130
|
+
result=result_future,
|
|
131
|
+
exited=exited_event,
|
|
132
|
+
names_w=names_w,
|
|
133
|
+
)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
class TableflipError(Exception):
|
|
2
|
+
"""Base exception for tableflip."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class NotSupportedError(TableflipError):
|
|
6
|
+
"""Platform does not support graceful restart."""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class NotReadyError(TableflipError):
|
|
10
|
+
"""Process is not ready yet."""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class UpgradeInProgressError(TableflipError):
|
|
14
|
+
"""An upgrade is already in progress."""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AlreadyUpgradedError(TableflipError):
|
|
18
|
+
"""Process has already been upgraded."""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TerminatingError(TableflipError):
|
|
22
|
+
"""Process is terminating, no more upgrades allowed."""
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""File descriptor manager for passing TCP listeners between processes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import fcntl
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
import socket
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from collections.abc import Callable
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
LISTENER_KIND = "listener"
|
|
18
|
+
|
|
19
|
+
FdName = tuple[str, str, str] # (kind, network, addr)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _fd_name_to_list(name: FdName) -> list[str]:
|
|
23
|
+
return list(name)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _list_to_fd_name(parts: list[str]) -> FdName:
|
|
27
|
+
return (parts[0], parts[1], parts[2])
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class _InheritedFd:
|
|
31
|
+
"""Wraps an inherited file descriptor."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, fd: int, name: FdName) -> None:
|
|
34
|
+
self.fd = fd
|
|
35
|
+
self.name = name
|
|
36
|
+
|
|
37
|
+
def close(self) -> None:
|
|
38
|
+
try:
|
|
39
|
+
os.close(self.fd)
|
|
40
|
+
except OSError:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def dup_fd(fd: int) -> int:
|
|
45
|
+
"""Duplicate a file descriptor with CLOEXEC flag."""
|
|
46
|
+
return fcntl.fcntl(fd, fcntl.F_DUPFD_CLOEXEC, 0)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _socket_from_fd(fd: int) -> socket.socket:
|
|
50
|
+
"""Create a socket from a file descriptor without closing the original."""
|
|
51
|
+
new_fd = dup_fd(fd)
|
|
52
|
+
return socket.socket(fileno=new_fd)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class Fds:
|
|
56
|
+
"""Manages file descriptors inherited from parent and used by current process.
|
|
57
|
+
|
|
58
|
+
File descriptors move from 'inherited' to 'used' when retrieved.
|
|
59
|
+
Only 'used' fds are passed to the child on upgrade.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
inherited: dict[FdName, _InheritedFd] | None = None,
|
|
65
|
+
) -> None:
|
|
66
|
+
self._inherited: dict[FdName, _InheritedFd] = inherited or {}
|
|
67
|
+
self._used: dict[FdName, _InheritedFd] = {}
|
|
68
|
+
|
|
69
|
+
async def listen(
|
|
70
|
+
self,
|
|
71
|
+
addr: str,
|
|
72
|
+
port: int,
|
|
73
|
+
*,
|
|
74
|
+
callback: Callable[[str, int], socket.socket] | None = None,
|
|
75
|
+
) -> socket.socket:
|
|
76
|
+
"""Return an inherited TCP listener or create a new one.
|
|
77
|
+
|
|
78
|
+
If port is 0 (dynamic), always creates a new listener.
|
|
79
|
+
"""
|
|
80
|
+
if port != 0:
|
|
81
|
+
sock = self._get_inherited_listener(addr, port)
|
|
82
|
+
if sock is not None:
|
|
83
|
+
return sock
|
|
84
|
+
|
|
85
|
+
if callback is not None:
|
|
86
|
+
new_sock = callback(addr, port)
|
|
87
|
+
else:
|
|
88
|
+
new_sock = _create_tcp_listener(addr, port)
|
|
89
|
+
|
|
90
|
+
actual_addr, actual_port = new_sock.getsockname()[:2]
|
|
91
|
+
self._add_listener_locked(actual_addr, actual_port, new_sock)
|
|
92
|
+
return new_sock
|
|
93
|
+
|
|
94
|
+
def get_listener(self, addr: str, port: int) -> socket.socket | None:
|
|
95
|
+
"""Return an inherited listener or None. Does not create new listeners."""
|
|
96
|
+
return self._get_inherited_listener(addr, port)
|
|
97
|
+
|
|
98
|
+
def add_listener(self, addr: str, port: int, sock: socket.socket) -> None:
|
|
99
|
+
"""Register a listener socket for passing to child on upgrade."""
|
|
100
|
+
self._add_listener_locked(addr, port, sock)
|
|
101
|
+
|
|
102
|
+
def _get_inherited_listener(self, addr: str, port: int) -> socket.socket | None:
|
|
103
|
+
key: FdName = (LISTENER_KIND, "tcp", f"{addr}:{port}")
|
|
104
|
+
ifd = self._inherited.pop(key, None)
|
|
105
|
+
if ifd is None:
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
sock = _socket_from_fd(ifd.fd)
|
|
109
|
+
self._used[key] = ifd
|
|
110
|
+
return sock
|
|
111
|
+
|
|
112
|
+
def _add_listener_locked(self, addr: str, port: int, sock: socket.socket) -> None:
|
|
113
|
+
key: FdName = (LISTENER_KIND, "tcp", f"{addr}:{port}")
|
|
114
|
+
new_fd = dup_fd(sock.fileno())
|
|
115
|
+
self._used[key] = _InheritedFd(new_fd, key)
|
|
116
|
+
|
|
117
|
+
def copy_used(self) -> dict[FdName, int]:
|
|
118
|
+
"""Return a snapshot of used fds (name -> fd) for passing to child."""
|
|
119
|
+
return {name: ifd.fd for name, ifd in self._used.items()}
|
|
120
|
+
|
|
121
|
+
def close_inherited(self) -> None:
|
|
122
|
+
"""Close all inherited fds that were not claimed."""
|
|
123
|
+
for ifd in self._inherited.values():
|
|
124
|
+
ifd.close()
|
|
125
|
+
self._inherited.clear()
|
|
126
|
+
|
|
127
|
+
def close_used(self) -> None:
|
|
128
|
+
"""Close all used fds."""
|
|
129
|
+
for ifd in self._used.values():
|
|
130
|
+
ifd.close()
|
|
131
|
+
self._used.clear()
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _create_tcp_listener(addr: str, port: int) -> socket.socket:
|
|
135
|
+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
136
|
+
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
137
|
+
sock.setblocking(False)
|
|
138
|
+
sock.bind((addr, port))
|
|
139
|
+
sock.listen(128)
|
|
140
|
+
return sock
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def encode_fd_names(names: list[list[str]]) -> bytes:
|
|
144
|
+
"""Encode fd names as JSON for the IPC pipe."""
|
|
145
|
+
return json.dumps(names).encode()
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def decode_fd_names(data: bytes) -> list[list[str]]:
|
|
149
|
+
"""Decode fd names from JSON received from the IPC pipe."""
|
|
150
|
+
return json.loads(data.decode())
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Child-side IPC: communicate with the parent process that spawned us."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
import struct
|
|
9
|
+
|
|
10
|
+
from tableflip._fds import FdName, decode_fd_names
|
|
11
|
+
from tableflip._process import FD_ENV_VAR, NOTIFY_READY, SENTINEL_ENV_VAR, Env
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _read_exact(fd: int, n: int) -> bytes:
|
|
17
|
+
"""Read exactly n bytes from a file descriptor."""
|
|
18
|
+
buf = b""
|
|
19
|
+
while len(buf) < n:
|
|
20
|
+
chunk = os.read(fd, n - len(buf))
|
|
21
|
+
if not chunk:
|
|
22
|
+
return buf
|
|
23
|
+
buf += chunk
|
|
24
|
+
return buf
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class Parent:
|
|
28
|
+
"""Represents the parent process from the child's perspective.
|
|
29
|
+
|
|
30
|
+
Created by new_parent() when a child detects it was spawned by tableflip.
|
|
31
|
+
Holds fd 3 (ready write pipe) and fd 4 (names read pipe).
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
ready_w: int,
|
|
37
|
+
result: asyncio.Future[Exception | None],
|
|
38
|
+
exited: asyncio.Event,
|
|
39
|
+
) -> None:
|
|
40
|
+
self._ready_w = ready_w
|
|
41
|
+
self.result = result
|
|
42
|
+
self.exited = exited
|
|
43
|
+
|
|
44
|
+
def send_ready(self) -> None:
|
|
45
|
+
"""Write the ready byte to fd 3, then close it."""
|
|
46
|
+
try:
|
|
47
|
+
os.write(self._ready_w, bytes([NOTIFY_READY]))
|
|
48
|
+
finally:
|
|
49
|
+
os.close(self._ready_w)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def new_parent(env: Env) -> tuple[Parent | None, dict[FdName, int]]:
|
|
53
|
+
"""Decode inherited fds and create a Parent if we were spawned by tableflip.
|
|
54
|
+
|
|
55
|
+
Returns (None, {}) on first invocation (no parent).
|
|
56
|
+
Returns (Parent, inherited_fds) when spawned by a tableflip upgrade.
|
|
57
|
+
"""
|
|
58
|
+
if not env.getenv(SENTINEL_ENV_VAR):
|
|
59
|
+
return None, {}
|
|
60
|
+
|
|
61
|
+
# Read fd numbers from environment. Layout: ready_w, names_r, ...inherited.
|
|
62
|
+
fd_str = env.getenv(FD_ENV_VAR)
|
|
63
|
+
if not fd_str:
|
|
64
|
+
return None, {}
|
|
65
|
+
|
|
66
|
+
fd_nums = [int(x) for x in fd_str.split(",")]
|
|
67
|
+
ready_w = env.new_file(fd_nums[0], "ready_w")
|
|
68
|
+
names_r = env.new_file(fd_nums[1], "names_r")
|
|
69
|
+
|
|
70
|
+
# Read length-prefixed fd names: 4-byte big-endian length then JSON payload.
|
|
71
|
+
length_buf = _read_exact(names_r, 4)
|
|
72
|
+
if length_buf:
|
|
73
|
+
(length,) = struct.unpack("!I", length_buf)
|
|
74
|
+
data = _read_exact(names_r, length) if length > 0 else b""
|
|
75
|
+
names = decode_fd_names(data) if data else []
|
|
76
|
+
else:
|
|
77
|
+
names = []
|
|
78
|
+
|
|
79
|
+
# Fds after the first two (ready_w, names_r) are inherited listeners.
|
|
80
|
+
files: dict[FdName, int] = {}
|
|
81
|
+
for i, parts in enumerate(names):
|
|
82
|
+
fd = fd_nums[2 + i]
|
|
83
|
+
env.close_on_exec(fd)
|
|
84
|
+
key: FdName = (parts[0], parts[1], parts[2])
|
|
85
|
+
files[key] = fd
|
|
86
|
+
|
|
87
|
+
loop = asyncio.get_running_loop()
|
|
88
|
+
result: asyncio.Future[Exception | None] = loop.create_future()
|
|
89
|
+
exited = asyncio.Event()
|
|
90
|
+
|
|
91
|
+
# The names pipe (fd 4) serves double duty: after delivering fd names,
|
|
92
|
+
# the parent holds the write end open. When the parent exits, EOF arrives.
|
|
93
|
+
async def watch_parent_exit() -> None:
|
|
94
|
+
try:
|
|
95
|
+
reader = asyncio.StreamReader()
|
|
96
|
+
transport, _ = await loop.connect_read_pipe(
|
|
97
|
+
lambda: asyncio.StreamReaderProtocol(reader),
|
|
98
|
+
os.fdopen(names_r, "rb", closefd=False),
|
|
99
|
+
)
|
|
100
|
+
remaining = await reader.read()
|
|
101
|
+
if remaining:
|
|
102
|
+
result.set_result(Exception("unexpected data from parent process"))
|
|
103
|
+
else:
|
|
104
|
+
result.set_result(None)
|
|
105
|
+
except Exception as e:
|
|
106
|
+
result.set_result(
|
|
107
|
+
Exception(f"unexpected error while waiting for parent to exit: {e}")
|
|
108
|
+
)
|
|
109
|
+
finally:
|
|
110
|
+
os.close(names_r)
|
|
111
|
+
exited.set()
|
|
112
|
+
|
|
113
|
+
asyncio.create_task(watch_parent_exit())
|
|
114
|
+
|
|
115
|
+
return Parent(ready_w, result, exited), files
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Process spawning and environment abstraction for dependency injection."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import os
|
|
7
|
+
import subprocess
|
|
8
|
+
from typing import Protocol, runtime_checkable
|
|
9
|
+
|
|
10
|
+
initial_wd: str = os.getcwd()
|
|
11
|
+
|
|
12
|
+
SENTINEL_ENV_VAR = "TABLEFLIP_HAS_PARENT_7DIU3"
|
|
13
|
+
FD_ENV_VAR = "TABLEFLIP_FDS"
|
|
14
|
+
NOTIFY_READY = 42
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@runtime_checkable
|
|
18
|
+
class Process(Protocol):
|
|
19
|
+
def signal(self, sig: int) -> None: ...
|
|
20
|
+
async def wait(self) -> int: ...
|
|
21
|
+
def __str__(self) -> str: ...
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class OsProcess:
|
|
25
|
+
"""Wraps subprocess.Popen to implement the Process protocol."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, proc: subprocess.Popen[bytes]) -> None:
|
|
28
|
+
self._proc = proc
|
|
29
|
+
|
|
30
|
+
def signal(self, sig: int) -> None:
|
|
31
|
+
self._proc.send_signal(sig)
|
|
32
|
+
|
|
33
|
+
async def wait(self) -> int:
|
|
34
|
+
loop = asyncio.get_running_loop()
|
|
35
|
+
return await loop.run_in_executor(None, self._proc.wait)
|
|
36
|
+
|
|
37
|
+
def __str__(self) -> str:
|
|
38
|
+
return f"pid={self._proc.pid}"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@runtime_checkable
|
|
42
|
+
class Env(Protocol):
|
|
43
|
+
"""DI seam for OS interactions, mirroring Go's env struct."""
|
|
44
|
+
|
|
45
|
+
def new_proc(
|
|
46
|
+
self,
|
|
47
|
+
executable: str,
|
|
48
|
+
args: list[str],
|
|
49
|
+
fds: list[int],
|
|
50
|
+
environ: dict[str, str],
|
|
51
|
+
) -> Process: ...
|
|
52
|
+
|
|
53
|
+
def new_file(self, fd: int, name: str) -> int: ...
|
|
54
|
+
|
|
55
|
+
def environ(self) -> dict[str, str]: ...
|
|
56
|
+
|
|
57
|
+
def getenv(self, key: str) -> str: ...
|
|
58
|
+
|
|
59
|
+
def close_on_exec(self, fd: int) -> None: ...
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class OsEnv:
|
|
63
|
+
"""Production Env implementation using real OS calls."""
|
|
64
|
+
|
|
65
|
+
def new_proc(
|
|
66
|
+
self,
|
|
67
|
+
executable: str,
|
|
68
|
+
args: list[str],
|
|
69
|
+
fds: list[int],
|
|
70
|
+
environ: dict[str, str],
|
|
71
|
+
) -> Process:
|
|
72
|
+
# Pass fds at their natural positions and communicate the fd numbers
|
|
73
|
+
# to the child via an environment variable. The child reads
|
|
74
|
+
# FD_ENV_VAR to discover which fds to use (ready_w, names_r, ...).
|
|
75
|
+
#
|
|
76
|
+
# We avoid remapping fds in the parent because dup2 temporarily
|
|
77
|
+
# clobbers fds used by asyncio's event loop (kqueue, self-pipe),
|
|
78
|
+
# and restoring them isn't enough — kqueue event registrations
|
|
79
|
+
# are lost when the fd is closed/replaced.
|
|
80
|
+
for fd in fds:
|
|
81
|
+
os.set_inheritable(fd, True)
|
|
82
|
+
|
|
83
|
+
environ[FD_ENV_VAR] = ",".join(str(fd) for fd in fds)
|
|
84
|
+
|
|
85
|
+
proc = subprocess.Popen(
|
|
86
|
+
[executable, *args],
|
|
87
|
+
pass_fds=tuple(fds),
|
|
88
|
+
env=environ,
|
|
89
|
+
cwd=initial_wd,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Restore source fds to non-inheritable in the parent
|
|
93
|
+
for fd in fds:
|
|
94
|
+
try:
|
|
95
|
+
os.set_inheritable(fd, False)
|
|
96
|
+
except OSError:
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
return OsProcess(proc)
|
|
100
|
+
|
|
101
|
+
def new_file(self, fd: int, name: str) -> int:
|
|
102
|
+
return fd
|
|
103
|
+
|
|
104
|
+
def environ(self) -> dict[str, str]:
|
|
105
|
+
return dict(os.environ)
|
|
106
|
+
|
|
107
|
+
def getenv(self, key: str) -> str:
|
|
108
|
+
return os.environ.get(key, "")
|
|
109
|
+
|
|
110
|
+
def close_on_exec(self, fd: int) -> None:
|
|
111
|
+
os.set_inheritable(fd, False)
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
"""Main Upgrader class — orchestrates zero-downtime process upgrades."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import tempfile
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
|
|
12
|
+
from tableflip._child import Child, start_child
|
|
13
|
+
from tableflip._errors import (
|
|
14
|
+
AlreadyUpgradedError,
|
|
15
|
+
NotReadyError,
|
|
16
|
+
NotSupportedError,
|
|
17
|
+
TerminatingError,
|
|
18
|
+
UpgradeInProgressError,
|
|
19
|
+
)
|
|
20
|
+
from tableflip._fds import Fds, _InheritedFd
|
|
21
|
+
from tableflip._parent import Parent, new_parent
|
|
22
|
+
from tableflip._process import Env, OsEnv, initial_wd
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
DEFAULT_UPGRADE_TIMEOUT: float = 60.0
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class Options:
|
|
31
|
+
upgrade_timeout: float = DEFAULT_UPGRADE_TIMEOUT
|
|
32
|
+
pid_file: str | None = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
_std_env_upgrader: Upgrader | None = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Upgrader:
|
|
39
|
+
"""Handles zero-downtime upgrades and passing files between processes."""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
*,
|
|
44
|
+
env: Env,
|
|
45
|
+
opts: Options,
|
|
46
|
+
parent: Parent | None,
|
|
47
|
+
fds: Fds,
|
|
48
|
+
) -> None:
|
|
49
|
+
self._env = env
|
|
50
|
+
self._opts = opts
|
|
51
|
+
self._parent = parent
|
|
52
|
+
self.fds = fds
|
|
53
|
+
self._ready_event = asyncio.Event()
|
|
54
|
+
self._ready_called = False
|
|
55
|
+
self._stop_event = asyncio.Event()
|
|
56
|
+
self._exit_event = asyncio.Event()
|
|
57
|
+
self._upgrade_queue: asyncio.Queue[asyncio.Future[None]] = asyncio.Queue()
|
|
58
|
+
self._exit_fd: int | None = None # prevent GC from closing the exit-detection pipe
|
|
59
|
+
self._parent_err: Exception | None = None
|
|
60
|
+
self._parent_err_set = False
|
|
61
|
+
self._run_task: asyncio.Task[None] | None = None
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
async def new(cls, opts: Options | None = None) -> Upgrader:
|
|
65
|
+
"""Create a new Upgrader. Only one per process.
|
|
66
|
+
|
|
67
|
+
Raises NotSupportedError on Windows.
|
|
68
|
+
"""
|
|
69
|
+
global _std_env_upgrader
|
|
70
|
+
|
|
71
|
+
if sys.platform == "win32":
|
|
72
|
+
raise NotSupportedError(
|
|
73
|
+
"tableflip: platform does not support graceful restart"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if _std_env_upgrader is not None:
|
|
77
|
+
raise RuntimeError("tableflip: only a single Upgrader allowed")
|
|
78
|
+
|
|
79
|
+
if opts is None:
|
|
80
|
+
opts = Options()
|
|
81
|
+
|
|
82
|
+
env = OsEnv()
|
|
83
|
+
upg = await new_upgrader(env, opts)
|
|
84
|
+
_std_env_upgrader = upg
|
|
85
|
+
return upg
|
|
86
|
+
|
|
87
|
+
async def ready(self) -> None:
|
|
88
|
+
"""Signal that the current process is ready to accept connections.
|
|
89
|
+
|
|
90
|
+
Closes unused inherited fds and notifies the parent via pipe.
|
|
91
|
+
"""
|
|
92
|
+
if not self._ready_called:
|
|
93
|
+
self._ready_called = True
|
|
94
|
+
self.fds.close_inherited()
|
|
95
|
+
self._ready_event.set()
|
|
96
|
+
|
|
97
|
+
if self._opts.pid_file:
|
|
98
|
+
write_pid_file(self._opts.pid_file)
|
|
99
|
+
|
|
100
|
+
if self._parent is not None:
|
|
101
|
+
self._parent.send_ready()
|
|
102
|
+
|
|
103
|
+
def exit(self) -> asyncio.Event:
|
|
104
|
+
"""Return event that is set when the process should exit."""
|
|
105
|
+
return self._exit_event
|
|
106
|
+
|
|
107
|
+
def stop(self) -> None:
|
|
108
|
+
"""Prevent further upgrades and signal exit."""
|
|
109
|
+
if not self._stop_event.is_set():
|
|
110
|
+
self._stop_event.set()
|
|
111
|
+
|
|
112
|
+
async def wait_for_parent(self) -> None:
|
|
113
|
+
"""Block until the parent has exited.
|
|
114
|
+
|
|
115
|
+
Raises if the parent misbehaved during shutdown.
|
|
116
|
+
"""
|
|
117
|
+
if self._parent is None:
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
if not self._parent_err_set:
|
|
121
|
+
await self._parent.exited.wait()
|
|
122
|
+
err = self._parent.result.result()
|
|
123
|
+
self._parent_err = err
|
|
124
|
+
self._parent_err_set = True
|
|
125
|
+
|
|
126
|
+
if self._parent_err is not None:
|
|
127
|
+
raise self._parent_err
|
|
128
|
+
|
|
129
|
+
def has_parent(self) -> bool:
|
|
130
|
+
return self._parent is not None
|
|
131
|
+
|
|
132
|
+
async def upgrade(self) -> None:
|
|
133
|
+
"""Trigger an upgrade. Blocks until the new process is ready or fails."""
|
|
134
|
+
if self._stop_event.is_set():
|
|
135
|
+
raise TerminatingError("terminating")
|
|
136
|
+
if self._exit_event.is_set():
|
|
137
|
+
raise AlreadyUpgradedError("already upgraded")
|
|
138
|
+
|
|
139
|
+
response: asyncio.Future[None] = asyncio.get_running_loop().create_future()
|
|
140
|
+
await self._upgrade_queue.put(response)
|
|
141
|
+
await response
|
|
142
|
+
|
|
143
|
+
async def _run(self) -> None:
|
|
144
|
+
"""Main event loop — mirrors Go's run() select loop.
|
|
145
|
+
|
|
146
|
+
Tracks parent exit, process readiness, stop requests, and upgrade requests.
|
|
147
|
+
Uses asyncio.wait with FIRST_COMPLETED to mimic Go's select.
|
|
148
|
+
"""
|
|
149
|
+
parent_exited = self._parent is None
|
|
150
|
+
process_ready = False
|
|
151
|
+
|
|
152
|
+
while True:
|
|
153
|
+
tasks: dict[str, asyncio.Task[object]] = {}
|
|
154
|
+
|
|
155
|
+
if not parent_exited and self._parent is not None:
|
|
156
|
+
tasks["parent"] = asyncio.create_task(self._parent.exited.wait())
|
|
157
|
+
if not process_ready:
|
|
158
|
+
tasks["ready"] = asyncio.create_task(self._ready_event.wait())
|
|
159
|
+
|
|
160
|
+
tasks["stop"] = asyncio.create_task(self._stop_event.wait())
|
|
161
|
+
tasks["upgrade"] = asyncio.create_task(self._upgrade_queue.get())
|
|
162
|
+
|
|
163
|
+
done, _ = await asyncio.wait(
|
|
164
|
+
tasks.values(),
|
|
165
|
+
return_when=asyncio.FIRST_COMPLETED,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
for task in tasks.values():
|
|
169
|
+
if task not in done:
|
|
170
|
+
task.cancel()
|
|
171
|
+
|
|
172
|
+
# Suppress CancelledError from cancelled tasks
|
|
173
|
+
for task in tasks.values():
|
|
174
|
+
if task not in done:
|
|
175
|
+
try:
|
|
176
|
+
await task
|
|
177
|
+
except (asyncio.CancelledError, Exception):
|
|
178
|
+
pass
|
|
179
|
+
|
|
180
|
+
for task in done:
|
|
181
|
+
result = task.result()
|
|
182
|
+
|
|
183
|
+
# Identify which event completed by checking task identity
|
|
184
|
+
if tasks.get("parent") is task:
|
|
185
|
+
parent_exited = True
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
if tasks.get("ready") is task:
|
|
189
|
+
process_ready = True
|
|
190
|
+
continue
|
|
191
|
+
|
|
192
|
+
if tasks.get("stop") is task:
|
|
193
|
+
self.fds.close_used()
|
|
194
|
+
self._exit_event.set()
|
|
195
|
+
return
|
|
196
|
+
|
|
197
|
+
if tasks.get("upgrade") is task:
|
|
198
|
+
request: asyncio.Future[None] = result # type: ignore[assignment]
|
|
199
|
+
if not process_ready:
|
|
200
|
+
request.set_exception(
|
|
201
|
+
NotReadyError("process is not ready yet")
|
|
202
|
+
)
|
|
203
|
+
continue
|
|
204
|
+
if not parent_exited:
|
|
205
|
+
request.set_exception(RuntimeError("parent hasn't exited"))
|
|
206
|
+
continue
|
|
207
|
+
|
|
208
|
+
try:
|
|
209
|
+
exit_fd = await self._do_upgrade()
|
|
210
|
+
self._exit_fd = exit_fd
|
|
211
|
+
self.fds.close_used()
|
|
212
|
+
request.set_result(None)
|
|
213
|
+
self._exit_event.set()
|
|
214
|
+
return
|
|
215
|
+
except Exception as e:
|
|
216
|
+
request.set_exception(e)
|
|
217
|
+
|
|
218
|
+
async def _do_upgrade(self) -> int:
|
|
219
|
+
"""Fork a child and wait for it to become ready. Returns the exit fd."""
|
|
220
|
+
child = await start_child(self._env, self.fds.copy_used())
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
return await asyncio.wait_for(
|
|
224
|
+
self._wait_for_child(child),
|
|
225
|
+
timeout=self._opts.upgrade_timeout,
|
|
226
|
+
)
|
|
227
|
+
except asyncio.TimeoutError:
|
|
228
|
+
child.kill()
|
|
229
|
+
raise TimeoutError(f"new child {child} timed out") from None
|
|
230
|
+
|
|
231
|
+
async def _wait_for_child(self, child: Child) -> int:
|
|
232
|
+
"""Wait for child ready or exit, handling concurrent upgrade requests."""
|
|
233
|
+
ready_task = asyncio.create_task(self._wrap_ready(child))
|
|
234
|
+
result_task = asyncio.create_task(self._wrap_result(child))
|
|
235
|
+
upgrade_task = asyncio.create_task(self._drain_upgrades())
|
|
236
|
+
stop_task = asyncio.create_task(self._wrap_stop(child))
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
done, _ = await asyncio.wait(
|
|
240
|
+
[ready_task, result_task, stop_task],
|
|
241
|
+
return_when=asyncio.FIRST_COMPLETED,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
for task in done:
|
|
245
|
+
return task.result()
|
|
246
|
+
|
|
247
|
+
finally:
|
|
248
|
+
for t in [ready_task, result_task, upgrade_task, stop_task]:
|
|
249
|
+
t.cancel()
|
|
250
|
+
try:
|
|
251
|
+
await t
|
|
252
|
+
except (asyncio.CancelledError, Exception):
|
|
253
|
+
pass
|
|
254
|
+
|
|
255
|
+
raise RuntimeError("unreachable") # pragma: no cover
|
|
256
|
+
|
|
257
|
+
async def _wrap_ready(self, child: Child) -> int:
|
|
258
|
+
return await child.ready
|
|
259
|
+
|
|
260
|
+
async def _wrap_result(self, child: Child) -> int:
|
|
261
|
+
err = await child.result
|
|
262
|
+
if err is None:
|
|
263
|
+
raise RuntimeError(f"child {child} exited")
|
|
264
|
+
raise RuntimeError(f"child {child} exited: {err}")
|
|
265
|
+
|
|
266
|
+
async def _wrap_stop(self, child: Child) -> int:
|
|
267
|
+
await self._stop_event.wait()
|
|
268
|
+
child.kill()
|
|
269
|
+
raise TerminatingError("terminating")
|
|
270
|
+
|
|
271
|
+
async def _drain_upgrades(self) -> None:
|
|
272
|
+
"""Reject any concurrent upgrade requests while one is in progress."""
|
|
273
|
+
while True:
|
|
274
|
+
request = await self._upgrade_queue.get()
|
|
275
|
+
request.set_exception(UpgradeInProgressError("upgrade in progress"))
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
async def new_upgrader(env: Env, opts: Options) -> Upgrader:
|
|
279
|
+
"""Internal constructor — mirrors Go's newUpgrader."""
|
|
280
|
+
if not initial_wd:
|
|
281
|
+
raise RuntimeError("couldn't determine initial working directory")
|
|
282
|
+
|
|
283
|
+
parent, files = new_parent(env)
|
|
284
|
+
|
|
285
|
+
if opts.upgrade_timeout <= 0:
|
|
286
|
+
opts.upgrade_timeout = DEFAULT_UPGRADE_TIMEOUT
|
|
287
|
+
|
|
288
|
+
inherited = {}
|
|
289
|
+
for name, fd in files.items():
|
|
290
|
+
inherited[name] = _InheritedFd(fd, name)
|
|
291
|
+
|
|
292
|
+
fds = Fds(inherited=inherited)
|
|
293
|
+
upg = Upgrader(env=env, opts=opts, parent=parent, fds=fds)
|
|
294
|
+
upg._run_task = asyncio.create_task(upg._run())
|
|
295
|
+
return upg
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def write_pid_file(path: str) -> None:
|
|
299
|
+
"""Atomically write the current PID to a file."""
|
|
300
|
+
dir_path, file_name = os.path.split(path)
|
|
301
|
+
if not dir_path:
|
|
302
|
+
dir_path = initial_wd
|
|
303
|
+
if not dir_path:
|
|
304
|
+
raise RuntimeError("empty initial working directory")
|
|
305
|
+
|
|
306
|
+
fd = -1
|
|
307
|
+
tmp_path = ""
|
|
308
|
+
try:
|
|
309
|
+
fd, tmp_path = tempfile.mkstemp(prefix=file_name, dir=dir_path)
|
|
310
|
+
os.write(fd, str(os.getpid()).encode())
|
|
311
|
+
os.close(fd)
|
|
312
|
+
fd = -1
|
|
313
|
+
os.rename(tmp_path, path)
|
|
314
|
+
except BaseException:
|
|
315
|
+
if fd >= 0:
|
|
316
|
+
os.close(fd)
|
|
317
|
+
if tmp_path:
|
|
318
|
+
try:
|
|
319
|
+
os.unlink(tmp_path)
|
|
320
|
+
except OSError:
|
|
321
|
+
pass
|
|
322
|
+
raise
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Stub implementations for testing and unsupported platforms."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import socket
|
|
7
|
+
|
|
8
|
+
from tableflip._errors import NotSupportedError
|
|
9
|
+
from tableflip._fds import _create_tcp_listener
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Fds:
|
|
13
|
+
"""Stub Fds that creates listeners directly (no inheritance)."""
|
|
14
|
+
|
|
15
|
+
async def listen(self, addr: str, port: int) -> socket.socket:
|
|
16
|
+
return _create_tcp_listener(addr, port)
|
|
17
|
+
|
|
18
|
+
def get_listener(self, addr: str, port: int) -> socket.socket | None:
|
|
19
|
+
return None
|
|
20
|
+
|
|
21
|
+
def add_listener(self, addr: str, port: int, sock: socket.socket) -> None:
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Upgrader:
|
|
26
|
+
"""Stub Upgrader that never actually upgrades."""
|
|
27
|
+
|
|
28
|
+
def __init__(self) -> None:
|
|
29
|
+
self.fds = Fds()
|
|
30
|
+
self._exit_event = asyncio.Event()
|
|
31
|
+
|
|
32
|
+
async def ready(self) -> None:
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
def exit(self) -> asyncio.Event:
|
|
36
|
+
return self._exit_event
|
|
37
|
+
|
|
38
|
+
def stop(self) -> None:
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
async def wait_for_parent(self) -> None:
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
def has_parent(self) -> bool:
|
|
45
|
+
return False
|
|
46
|
+
|
|
47
|
+
async def upgrade(self) -> None:
|
|
48
|
+
raise NotSupportedError("stub upgrader does not support upgrades")
|