tableflip 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,134 @@
1
+ Metadata-Version: 2.3
2
+ Name: tableflip
3
+ Version: 0.1.0
4
+ Summary: Zero-downtime process upgrades for Python, inspired by cloudflare/tableflip
5
+ Author: Bernardo Vale
6
+ Author-email: Bernardo Vale <bernardo@kentik.com>
7
+ Requires-Python: >=3.12
8
+ Description-Content-Type: text/markdown
9
+
10
+ # tableflip — Graceful process restarts in Python
11
+
12
+ Zero-downtime upgrades for Python network services. Update running code or configuration without dropping existing connections.
13
+
14
+ This is a Python port of Cloudflare's [tableflip](https://github.com/cloudflare/tableflip) Go library. The core design — fd inheritance, IPC protocol, and state machine — follows the original closely, adapted to Python's `asyncio` runtime.
15
+
16
+ **Works on Linux and macOS.** Raises `NotSupportedError` on Windows (use `tableflip.testing` stubs instead).
17
+
18
+ ## How it works
19
+
20
+ 1. On `SIGHUP`, the running process spawns a new copy of itself
21
+ 2. TCP listener sockets are passed to the new process via fd inheritance
22
+ 3. The new process signals readiness after initialization
23
+ 4. The old process stops accepting new connections and exits
24
+
25
+ Only one upgrade runs at a time. If the new process crashes during init, the old one keeps serving.
26
+
27
+ ## Installation
28
+
29
+ ```bash
30
+ uv add tableflip
31
+ # or
32
+ pip install tableflip
33
+ ```
34
+
35
+ Requires Python 3.13+.
36
+
37
+ ## Usage
38
+
39
+ ```python
40
+ import asyncio
41
+ import signal
42
+ from tableflip import Upgrader, Options
43
+
44
+
45
+ async def main():
46
+ upg = await Upgrader.new(Options(pid_file="/tmp/myapp.pid"))
47
+
48
+ # Trigger upgrade on SIGHUP
49
+ loop = asyncio.get_running_loop()
50
+ loop.add_signal_handler(signal.SIGHUP, lambda: asyncio.create_task(do_upgrade(upg)))
51
+
52
+ # Listen must be called before ready()
53
+ sock = await upg.fds.listen("127.0.0.1", 8080)
54
+
55
+ server = await asyncio.start_server(handle_conn, sock=sock)
56
+
57
+ await upg.ready()
58
+
59
+ # Block until an upgrade completes or stop() is called
60
+ await upg.exit().wait()
61
+
62
+ # Graceful shutdown
63
+ server.close()
64
+ await server.wait_closed()
65
+ await upg.wait_for_parent()
66
+ upg.stop()
67
+
68
+
69
+ async def do_upgrade(upg: Upgrader):
70
+ try:
71
+ await upg.upgrade()
72
+ except Exception as e:
73
+ print(f"Upgrade failed: {e}")
74
+
75
+
76
+ async def handle_conn(reader, writer):
77
+ writer.write(b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nok")
78
+ await writer.drain()
79
+ writer.close()
80
+
81
+
82
+ asyncio.run(main())
83
+ ```
84
+
85
+ Trigger an upgrade:
86
+
87
+ ```bash
88
+ kill -HUP $(cat /tmp/myapp.pid)
89
+ ```
90
+
91
+ ## Integration with systemd
92
+
93
+ ```ini
94
+ [Unit]
95
+ Description=My Python service
96
+
97
+ [Service]
98
+ ExecStart=/path/to/venv/bin/python app.py
99
+ ExecReload=/bin/kill -HUP $MAINPID
100
+ PIDFile=/tmp/myapp.pid
101
+ ```
102
+
103
+ ## Testing
104
+
105
+ Use the `tableflip.testing` module for unit tests or unsupported platforms:
106
+
107
+ ```python
108
+ from tableflip.testing import Upgrader, Fds
109
+
110
+ upg = Upgrader() # never upgrades, upgrade() raises NotSupportedError
111
+ await upg.ready() # no-op
112
+ assert not upg.has_parent()
113
+ ```
114
+
115
+ ## API
116
+
117
+ | Method | Description |
118
+ |--------|-------------|
119
+ | `await Upgrader.new(opts)` | Create an upgrader (one per process) |
120
+ | `await upg.fds.listen(addr, port)` | Get an inherited or new TCP listener |
121
+ | `await upg.ready()` | Signal readiness, notify parent, write PID file |
122
+ | `await upg.upgrade()` | Spawn new process and wait for it to become ready |
123
+ | `upg.exit()` | Returns `asyncio.Event` set when the process should exit |
124
+ | `upg.stop()` | Prevent further upgrades and trigger exit |
125
+ | `await upg.wait_for_parent()` | Block until parent process exits |
126
+ | `upg.has_parent()` | `True` if spawned by a tableflip upgrade |
127
+
128
+ ## Acknowledgments
129
+
130
+ This is a Python port of [cloudflare/tableflip](https://github.com/cloudflare/tableflip) by Cloudflare. The Go library was created by Lorenz Bauer and the Cloudflare team. All credit for the design and protocol goes to them.
131
+
132
+ ## License
133
+
134
+ See [LICENSE](LICENSE) (BSD 3-Clause, same as the original Go library).
@@ -0,0 +1,125 @@
1
+ # tableflip — Graceful process restarts in Python
2
+
3
+ Zero-downtime upgrades for Python network services. Update running code or configuration without dropping existing connections.
4
+
5
+ This is a Python port of Cloudflare's [tableflip](https://github.com/cloudflare/tableflip) Go library. The core design — fd inheritance, IPC protocol, and state machine — follows the original closely, adapted to Python's `asyncio` runtime.
6
+
7
+ **Works on Linux and macOS.** Raises `NotSupportedError` on Windows (use `tableflip.testing` stubs instead).
8
+
9
+ ## How it works
10
+
11
+ 1. On `SIGHUP`, the running process spawns a new copy of itself
12
+ 2. TCP listener sockets are passed to the new process via fd inheritance
13
+ 3. The new process signals readiness after initialization
14
+ 4. The old process stops accepting new connections and exits
15
+
16
+ Only one upgrade runs at a time. If the new process crashes during init, the old one keeps serving.
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ uv add tableflip
22
+ # or
23
+ pip install tableflip
24
+ ```
25
+
26
+ Requires Python 3.13+.
27
+
28
+ ## Usage
29
+
30
+ ```python
31
+ import asyncio
32
+ import signal
33
+ from tableflip import Upgrader, Options
34
+
35
+
36
+ async def main():
37
+ upg = await Upgrader.new(Options(pid_file="/tmp/myapp.pid"))
38
+
39
+ # Trigger upgrade on SIGHUP
40
+ loop = asyncio.get_running_loop()
41
+ loop.add_signal_handler(signal.SIGHUP, lambda: asyncio.create_task(do_upgrade(upg)))
42
+
43
+ # Listen must be called before ready()
44
+ sock = await upg.fds.listen("127.0.0.1", 8080)
45
+
46
+ server = await asyncio.start_server(handle_conn, sock=sock)
47
+
48
+ await upg.ready()
49
+
50
+ # Block until an upgrade completes or stop() is called
51
+ await upg.exit().wait()
52
+
53
+ # Graceful shutdown
54
+ server.close()
55
+ await server.wait_closed()
56
+ await upg.wait_for_parent()
57
+ upg.stop()
58
+
59
+
60
+ async def do_upgrade(upg: Upgrader):
61
+ try:
62
+ await upg.upgrade()
63
+ except Exception as e:
64
+ print(f"Upgrade failed: {e}")
65
+
66
+
67
+ async def handle_conn(reader, writer):
68
+ writer.write(b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nok")
69
+ await writer.drain()
70
+ writer.close()
71
+
72
+
73
+ asyncio.run(main())
74
+ ```
75
+
76
+ Trigger an upgrade:
77
+
78
+ ```bash
79
+ kill -HUP $(cat /tmp/myapp.pid)
80
+ ```
81
+
82
+ ## Integration with systemd
83
+
84
+ ```ini
85
+ [Unit]
86
+ Description=My Python service
87
+
88
+ [Service]
89
+ ExecStart=/path/to/venv/bin/python app.py
90
+ ExecReload=/bin/kill -HUP $MAINPID
91
+ PIDFile=/tmp/myapp.pid
92
+ ```
93
+
94
+ ## Testing
95
+
96
+ Use the `tableflip.testing` module for unit tests or unsupported platforms:
97
+
98
+ ```python
99
+ from tableflip.testing import Upgrader, Fds
100
+
101
+ upg = Upgrader() # never upgrades, upgrade() raises NotSupportedError
102
+ await upg.ready() # no-op
103
+ assert not upg.has_parent()
104
+ ```
105
+
106
+ ## API
107
+
108
+ | Method | Description |
109
+ |--------|-------------|
110
+ | `await Upgrader.new(opts)` | Create an upgrader (one per process) |
111
+ | `await upg.fds.listen(addr, port)` | Get an inherited or new TCP listener |
112
+ | `await upg.ready()` | Signal readiness, notify parent, write PID file |
113
+ | `await upg.upgrade()` | Spawn new process and wait for it to become ready |
114
+ | `upg.exit()` | Returns `asyncio.Event` set when the process should exit |
115
+ | `upg.stop()` | Prevent further upgrades and trigger exit |
116
+ | `await upg.wait_for_parent()` | Block until parent process exits |
117
+ | `upg.has_parent()` | `True` if spawned by a tableflip upgrade |
118
+
119
+ ## Acknowledgments
120
+
121
+ This is a Python port of [cloudflare/tableflip](https://github.com/cloudflare/tableflip) by Cloudflare. The Go library was created by Lorenz Bauer and the Cloudflare team. All credit for the design and protocol goes to them.
122
+
123
+ ## License
124
+
125
+ See [LICENSE](LICENSE) (BSD 3-Clause, same as the original Go library).
@@ -0,0 +1,29 @@
1
+ [project]
2
+ name = "tableflip"
3
+ version = "0.1.0"
4
+ description = "Zero-downtime process upgrades for Python, inspired by cloudflare/tableflip"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Bernardo Vale", email = "bernardo@kentik.com" }
8
+ ]
9
+ requires-python = ">=3.12"
10
+ dependencies = []
11
+
12
+ [dependency-groups]
13
+ dev = [
14
+ "pytest>=8",
15
+ "pytest-asyncio>=0.24",
16
+ "pytest-cov>=7.1.0",
17
+ "ruff>=0.4",
18
+ ]
19
+
20
+ [build-system]
21
+ requires = ["uv_build>=0.10.2,<0.11.0"]
22
+ build-backend = "uv_build"
23
+
24
+ [tool.pytest.ini_options]
25
+ asyncio_mode = "strict"
26
+ markers = [
27
+ "integration: integration tests that spawn real processes (slow, Unix-only)",
28
+ ]
29
+ addopts = "-m 'not integration'"
@@ -0,0 +1,24 @@
1
+ """tableflip: zero-downtime process upgrades for Python."""
2
+
3
+ from tableflip._errors import (
4
+ AlreadyUpgradedError,
5
+ NotReadyError,
6
+ NotSupportedError,
7
+ TableflipError,
8
+ TerminatingError,
9
+ UpgradeInProgressError,
10
+ )
11
+ from tableflip._fds import Fds
12
+ from tableflip._upgrader import Options, Upgrader
13
+
14
+ __all__ = [
15
+ "AlreadyUpgradedError",
16
+ "Fds",
17
+ "NotReadyError",
18
+ "NotSupportedError",
19
+ "Options",
20
+ "TableflipError",
21
+ "TerminatingError",
22
+ "Upgrader",
23
+ "UpgradeInProgressError",
24
+ ]
@@ -0,0 +1,133 @@
1
+ """Parent-side IPC: spawn a child process and wait for it to become ready."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ import os
8
+ import signal
9
+ import struct
10
+ import sys
11
+
12
+ from tableflip._fds import FdName, encode_fd_names
13
+ from tableflip._process import NOTIFY_READY, SENTINEL_ENV_VAR, Env, Process
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class Child:
19
+ """Represents a spawned child process from the parent's perspective."""
20
+
21
+ def __init__(
22
+ self,
23
+ proc: Process,
24
+ ready: asyncio.Future[int],
25
+ result: asyncio.Future[Exception | None],
26
+ exited: asyncio.Event,
27
+ names_w: int,
28
+ ) -> None:
29
+ self._proc = proc
30
+ self.ready = ready
31
+ self.result = result
32
+ self.exited = exited
33
+ self._names_w = names_w
34
+
35
+ def kill(self) -> None:
36
+ self._proc.signal(signal.SIGKILL)
37
+
38
+ def __str__(self) -> str:
39
+ return str(self._proc)
40
+
41
+
42
+ async def start_child(
43
+ env: Env,
44
+ passed_fds: dict[FdName, int],
45
+ ) -> Child:
46
+ """Spawn a child process, passing inherited fds and setting up IPC pipes.
47
+
48
+ Fd layout in child: [stdin=0, stdout=1, stderr=2, readyW=3, namesR=4, ...passed].
49
+ The Env.new_proc() implementation is responsible for fd placement.
50
+ """
51
+ ready_r, ready_w = os.pipe()
52
+ names_r, names_w = os.pipe()
53
+
54
+ inherited_fds: list[int] = []
55
+ fd_names_list: list[list[str]] = []
56
+ for name, fd in passed_fds.items():
57
+ inherited_fds.append(fd)
58
+ fd_names_list.append(list(name))
59
+
60
+ child_environ = env.environ()
61
+ child_environ[SENTINEL_ENV_VAR] = "yes"
62
+
63
+ try:
64
+ proc = env.new_proc(
65
+ sys.executable,
66
+ sys.argv,
67
+ [ready_w, names_r, *inherited_fds],
68
+ child_environ,
69
+ )
70
+ except Exception:
71
+ for fd in (ready_r, ready_w, names_r, names_w):
72
+ try:
73
+ os.close(fd)
74
+ except OSError:
75
+ pass
76
+ raise
77
+
78
+ # Close the child-side pipe ends in the parent
79
+ os.close(ready_w)
80
+ os.close(names_r)
81
+
82
+ loop = asyncio.get_running_loop()
83
+ ready_future: asyncio.Future[int] = loop.create_future()
84
+ result_future: asyncio.Future[Exception | None] = loop.create_future()
85
+ exited_event = asyncio.Event()
86
+
87
+ async def write_names() -> None:
88
+ data = encode_fd_names(fd_names_list)
89
+ # Length-prefix: 4-byte big-endian length then JSON payload.
90
+ # Go uses gob (self-delimiting), Python uses JSON which is not,
91
+ # so we frame the message so the child can read exactly one message
92
+ # then continue reading the same pipe for parent-exit detection.
93
+ framed = struct.pack("!I", len(data)) + data
94
+ await loop.run_in_executor(None, os.write, names_w, framed)
95
+
96
+ async def wait_exit() -> None:
97
+ try:
98
+ exit_code = await proc.wait()
99
+ if exit_code == 0:
100
+ result_future.set_result(None)
101
+ else:
102
+ result_future.set_result(Exception(f"exit code {exit_code}"))
103
+ except Exception as e:
104
+ result_future.set_result(e)
105
+ finally:
106
+ exited_event.set()
107
+ # Unblock wait_ready and close names_w if ready was never received
108
+ for fd in (ready_r, names_w):
109
+ try:
110
+ os.close(fd)
111
+ except OSError:
112
+ pass
113
+
114
+ async def wait_ready() -> None:
115
+ try:
116
+ data = await loop.run_in_executor(None, os.read, ready_r, 1)
117
+ if data and data[0] == NOTIFY_READY:
118
+ ready_future.set_result(names_w)
119
+ os.close(ready_r)
120
+ except OSError:
121
+ pass
122
+
123
+ asyncio.create_task(write_names())
124
+ asyncio.create_task(wait_exit())
125
+ asyncio.create_task(wait_ready())
126
+
127
+ return Child(
128
+ proc=proc,
129
+ ready=ready_future,
130
+ result=result_future,
131
+ exited=exited_event,
132
+ names_w=names_w,
133
+ )
@@ -0,0 +1,22 @@
1
+ class TableflipError(Exception):
2
+ """Base exception for tableflip."""
3
+
4
+
5
+ class NotSupportedError(TableflipError):
6
+ """Platform does not support graceful restart."""
7
+
8
+
9
+ class NotReadyError(TableflipError):
10
+ """Process is not ready yet."""
11
+
12
+
13
+ class UpgradeInProgressError(TableflipError):
14
+ """An upgrade is already in progress."""
15
+
16
+
17
+ class AlreadyUpgradedError(TableflipError):
18
+ """Process has already been upgraded."""
19
+
20
+
21
+ class TerminatingError(TableflipError):
22
+ """Process is terminating, no more upgrades allowed."""
@@ -0,0 +1,150 @@
1
+ """File descriptor manager for passing TCP listeners between processes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import fcntl
6
+ import json
7
+ import logging
8
+ import os
9
+ import socket
10
+ from typing import TYPE_CHECKING
11
+
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Callable
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ LISTENER_KIND = "listener"
18
+
19
+ FdName = tuple[str, str, str] # (kind, network, addr)
20
+
21
+
22
+ def _fd_name_to_list(name: FdName) -> list[str]:
23
+ return list(name)
24
+
25
+
26
+ def _list_to_fd_name(parts: list[str]) -> FdName:
27
+ return (parts[0], parts[1], parts[2])
28
+
29
+
30
+ class _InheritedFd:
31
+ """Wraps an inherited file descriptor."""
32
+
33
+ def __init__(self, fd: int, name: FdName) -> None:
34
+ self.fd = fd
35
+ self.name = name
36
+
37
+ def close(self) -> None:
38
+ try:
39
+ os.close(self.fd)
40
+ except OSError:
41
+ pass
42
+
43
+
44
+ def dup_fd(fd: int) -> int:
45
+ """Duplicate a file descriptor with CLOEXEC flag."""
46
+ return fcntl.fcntl(fd, fcntl.F_DUPFD_CLOEXEC, 0)
47
+
48
+
49
+ def _socket_from_fd(fd: int) -> socket.socket:
50
+ """Create a socket from a file descriptor without closing the original."""
51
+ new_fd = dup_fd(fd)
52
+ return socket.socket(fileno=new_fd)
53
+
54
+
55
+ class Fds:
56
+ """Manages file descriptors inherited from parent and used by current process.
57
+
58
+ File descriptors move from 'inherited' to 'used' when retrieved.
59
+ Only 'used' fds are passed to the child on upgrade.
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ inherited: dict[FdName, _InheritedFd] | None = None,
65
+ ) -> None:
66
+ self._inherited: dict[FdName, _InheritedFd] = inherited or {}
67
+ self._used: dict[FdName, _InheritedFd] = {}
68
+
69
+ async def listen(
70
+ self,
71
+ addr: str,
72
+ port: int,
73
+ *,
74
+ callback: Callable[[str, int], socket.socket] | None = None,
75
+ ) -> socket.socket:
76
+ """Return an inherited TCP listener or create a new one.
77
+
78
+ If port is 0 (dynamic), always creates a new listener.
79
+ """
80
+ if port != 0:
81
+ sock = self._get_inherited_listener(addr, port)
82
+ if sock is not None:
83
+ return sock
84
+
85
+ if callback is not None:
86
+ new_sock = callback(addr, port)
87
+ else:
88
+ new_sock = _create_tcp_listener(addr, port)
89
+
90
+ actual_addr, actual_port = new_sock.getsockname()[:2]
91
+ self._add_listener_locked(actual_addr, actual_port, new_sock)
92
+ return new_sock
93
+
94
+ def get_listener(self, addr: str, port: int) -> socket.socket | None:
95
+ """Return an inherited listener or None. Does not create new listeners."""
96
+ return self._get_inherited_listener(addr, port)
97
+
98
+ def add_listener(self, addr: str, port: int, sock: socket.socket) -> None:
99
+ """Register a listener socket for passing to child on upgrade."""
100
+ self._add_listener_locked(addr, port, sock)
101
+
102
+ def _get_inherited_listener(self, addr: str, port: int) -> socket.socket | None:
103
+ key: FdName = (LISTENER_KIND, "tcp", f"{addr}:{port}")
104
+ ifd = self._inherited.pop(key, None)
105
+ if ifd is None:
106
+ return None
107
+
108
+ sock = _socket_from_fd(ifd.fd)
109
+ self._used[key] = ifd
110
+ return sock
111
+
112
+ def _add_listener_locked(self, addr: str, port: int, sock: socket.socket) -> None:
113
+ key: FdName = (LISTENER_KIND, "tcp", f"{addr}:{port}")
114
+ new_fd = dup_fd(sock.fileno())
115
+ self._used[key] = _InheritedFd(new_fd, key)
116
+
117
+ def copy_used(self) -> dict[FdName, int]:
118
+ """Return a snapshot of used fds (name -> fd) for passing to child."""
119
+ return {name: ifd.fd for name, ifd in self._used.items()}
120
+
121
+ def close_inherited(self) -> None:
122
+ """Close all inherited fds that were not claimed."""
123
+ for ifd in self._inherited.values():
124
+ ifd.close()
125
+ self._inherited.clear()
126
+
127
+ def close_used(self) -> None:
128
+ """Close all used fds."""
129
+ for ifd in self._used.values():
130
+ ifd.close()
131
+ self._used.clear()
132
+
133
+
134
+ def _create_tcp_listener(addr: str, port: int) -> socket.socket:
135
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
136
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
137
+ sock.setblocking(False)
138
+ sock.bind((addr, port))
139
+ sock.listen(128)
140
+ return sock
141
+
142
+
143
+ def encode_fd_names(names: list[list[str]]) -> bytes:
144
+ """Encode fd names as JSON for the IPC pipe."""
145
+ return json.dumps(names).encode()
146
+
147
+
148
+ def decode_fd_names(data: bytes) -> list[list[str]]:
149
+ """Decode fd names from JSON received from the IPC pipe."""
150
+ return json.loads(data.decode())
@@ -0,0 +1,115 @@
1
+ """Child-side IPC: communicate with the parent process that spawned us."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ import os
8
+ import struct
9
+
10
+ from tableflip._fds import FdName, decode_fd_names
11
+ from tableflip._process import FD_ENV_VAR, NOTIFY_READY, SENTINEL_ENV_VAR, Env
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def _read_exact(fd: int, n: int) -> bytes:
17
+ """Read exactly n bytes from a file descriptor."""
18
+ buf = b""
19
+ while len(buf) < n:
20
+ chunk = os.read(fd, n - len(buf))
21
+ if not chunk:
22
+ return buf
23
+ buf += chunk
24
+ return buf
25
+
26
+
27
+ class Parent:
28
+ """Represents the parent process from the child's perspective.
29
+
30
+ Created by new_parent() when a child detects it was spawned by tableflip.
31
+ Holds fd 3 (ready write pipe) and fd 4 (names read pipe).
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ ready_w: int,
37
+ result: asyncio.Future[Exception | None],
38
+ exited: asyncio.Event,
39
+ ) -> None:
40
+ self._ready_w = ready_w
41
+ self.result = result
42
+ self.exited = exited
43
+
44
+ def send_ready(self) -> None:
45
+ """Write the ready byte to fd 3, then close it."""
46
+ try:
47
+ os.write(self._ready_w, bytes([NOTIFY_READY]))
48
+ finally:
49
+ os.close(self._ready_w)
50
+
51
+
52
+ def new_parent(env: Env) -> tuple[Parent | None, dict[FdName, int]]:
53
+ """Decode inherited fds and create a Parent if we were spawned by tableflip.
54
+
55
+ Returns (None, {}) on first invocation (no parent).
56
+ Returns (Parent, inherited_fds) when spawned by a tableflip upgrade.
57
+ """
58
+ if not env.getenv(SENTINEL_ENV_VAR):
59
+ return None, {}
60
+
61
+ # Read fd numbers from environment. Layout: ready_w, names_r, ...inherited.
62
+ fd_str = env.getenv(FD_ENV_VAR)
63
+ if not fd_str:
64
+ return None, {}
65
+
66
+ fd_nums = [int(x) for x in fd_str.split(",")]
67
+ ready_w = env.new_file(fd_nums[0], "ready_w")
68
+ names_r = env.new_file(fd_nums[1], "names_r")
69
+
70
+ # Read length-prefixed fd names: 4-byte big-endian length then JSON payload.
71
+ length_buf = _read_exact(names_r, 4)
72
+ if length_buf:
73
+ (length,) = struct.unpack("!I", length_buf)
74
+ data = _read_exact(names_r, length) if length > 0 else b""
75
+ names = decode_fd_names(data) if data else []
76
+ else:
77
+ names = []
78
+
79
+ # Fds after the first two (ready_w, names_r) are inherited listeners.
80
+ files: dict[FdName, int] = {}
81
+ for i, parts in enumerate(names):
82
+ fd = fd_nums[2 + i]
83
+ env.close_on_exec(fd)
84
+ key: FdName = (parts[0], parts[1], parts[2])
85
+ files[key] = fd
86
+
87
+ loop = asyncio.get_running_loop()
88
+ result: asyncio.Future[Exception | None] = loop.create_future()
89
+ exited = asyncio.Event()
90
+
91
+ # The names pipe (fd 4) serves double duty: after delivering fd names,
92
+ # the parent holds the write end open. When the parent exits, EOF arrives.
93
+ async def watch_parent_exit() -> None:
94
+ try:
95
+ reader = asyncio.StreamReader()
96
+ transport, _ = await loop.connect_read_pipe(
97
+ lambda: asyncio.StreamReaderProtocol(reader),
98
+ os.fdopen(names_r, "rb", closefd=False),
99
+ )
100
+ remaining = await reader.read()
101
+ if remaining:
102
+ result.set_result(Exception("unexpected data from parent process"))
103
+ else:
104
+ result.set_result(None)
105
+ except Exception as e:
106
+ result.set_result(
107
+ Exception(f"unexpected error while waiting for parent to exit: {e}")
108
+ )
109
+ finally:
110
+ os.close(names_r)
111
+ exited.set()
112
+
113
+ asyncio.create_task(watch_parent_exit())
114
+
115
+ return Parent(ready_w, result, exited), files
@@ -0,0 +1,111 @@
1
+ """Process spawning and environment abstraction for dependency injection."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import os
7
+ import subprocess
8
+ from typing import Protocol, runtime_checkable
9
+
10
+ initial_wd: str = os.getcwd()
11
+
12
+ SENTINEL_ENV_VAR = "TABLEFLIP_HAS_PARENT_7DIU3"
13
+ FD_ENV_VAR = "TABLEFLIP_FDS"
14
+ NOTIFY_READY = 42
15
+
16
+
17
+ @runtime_checkable
18
+ class Process(Protocol):
19
+ def signal(self, sig: int) -> None: ...
20
+ async def wait(self) -> int: ...
21
+ def __str__(self) -> str: ...
22
+
23
+
24
+ class OsProcess:
25
+ """Wraps subprocess.Popen to implement the Process protocol."""
26
+
27
+ def __init__(self, proc: subprocess.Popen[bytes]) -> None:
28
+ self._proc = proc
29
+
30
+ def signal(self, sig: int) -> None:
31
+ self._proc.send_signal(sig)
32
+
33
+ async def wait(self) -> int:
34
+ loop = asyncio.get_running_loop()
35
+ return await loop.run_in_executor(None, self._proc.wait)
36
+
37
+ def __str__(self) -> str:
38
+ return f"pid={self._proc.pid}"
39
+
40
+
41
+ @runtime_checkable
42
+ class Env(Protocol):
43
+ """DI seam for OS interactions, mirroring Go's env struct."""
44
+
45
+ def new_proc(
46
+ self,
47
+ executable: str,
48
+ args: list[str],
49
+ fds: list[int],
50
+ environ: dict[str, str],
51
+ ) -> Process: ...
52
+
53
+ def new_file(self, fd: int, name: str) -> int: ...
54
+
55
+ def environ(self) -> dict[str, str]: ...
56
+
57
+ def getenv(self, key: str) -> str: ...
58
+
59
+ def close_on_exec(self, fd: int) -> None: ...
60
+
61
+
62
+ class OsEnv:
63
+ """Production Env implementation using real OS calls."""
64
+
65
+ def new_proc(
66
+ self,
67
+ executable: str,
68
+ args: list[str],
69
+ fds: list[int],
70
+ environ: dict[str, str],
71
+ ) -> Process:
72
+ # Pass fds at their natural positions and communicate the fd numbers
73
+ # to the child via an environment variable. The child reads
74
+ # FD_ENV_VAR to discover which fds to use (ready_w, names_r, ...).
75
+ #
76
+ # We avoid remapping fds in the parent because dup2 temporarily
77
+ # clobbers fds used by asyncio's event loop (kqueue, self-pipe),
78
+ # and restoring them isn't enough — kqueue event registrations
79
+ # are lost when the fd is closed/replaced.
80
+ for fd in fds:
81
+ os.set_inheritable(fd, True)
82
+
83
+ environ[FD_ENV_VAR] = ",".join(str(fd) for fd in fds)
84
+
85
+ proc = subprocess.Popen(
86
+ [executable, *args],
87
+ pass_fds=tuple(fds),
88
+ env=environ,
89
+ cwd=initial_wd,
90
+ )
91
+
92
+ # Restore source fds to non-inheritable in the parent
93
+ for fd in fds:
94
+ try:
95
+ os.set_inheritable(fd, False)
96
+ except OSError:
97
+ pass
98
+
99
+ return OsProcess(proc)
100
+
101
+ def new_file(self, fd: int, name: str) -> int:
102
+ return fd
103
+
104
+ def environ(self) -> dict[str, str]:
105
+ return dict(os.environ)
106
+
107
+ def getenv(self, key: str) -> str:
108
+ return os.environ.get(key, "")
109
+
110
+ def close_on_exec(self, fd: int) -> None:
111
+ os.set_inheritable(fd, False)
@@ -0,0 +1,322 @@
1
+ """Main Upgrader class — orchestrates zero-downtime process upgrades."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ import os
8
+ import sys
9
+ import tempfile
10
+ from dataclasses import dataclass
11
+
12
+ from tableflip._child import Child, start_child
13
+ from tableflip._errors import (
14
+ AlreadyUpgradedError,
15
+ NotReadyError,
16
+ NotSupportedError,
17
+ TerminatingError,
18
+ UpgradeInProgressError,
19
+ )
20
+ from tableflip._fds import Fds, _InheritedFd
21
+ from tableflip._parent import Parent, new_parent
22
+ from tableflip._process import Env, OsEnv, initial_wd
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ DEFAULT_UPGRADE_TIMEOUT: float = 60.0
27
+
28
+
29
+ @dataclass
30
+ class Options:
31
+ upgrade_timeout: float = DEFAULT_UPGRADE_TIMEOUT
32
+ pid_file: str | None = None
33
+
34
+
35
+ _std_env_upgrader: Upgrader | None = None
36
+
37
+
38
+ class Upgrader:
39
+ """Handles zero-downtime upgrades and passing files between processes."""
40
+
41
+ def __init__(
42
+ self,
43
+ *,
44
+ env: Env,
45
+ opts: Options,
46
+ parent: Parent | None,
47
+ fds: Fds,
48
+ ) -> None:
49
+ self._env = env
50
+ self._opts = opts
51
+ self._parent = parent
52
+ self.fds = fds
53
+ self._ready_event = asyncio.Event()
54
+ self._ready_called = False
55
+ self._stop_event = asyncio.Event()
56
+ self._exit_event = asyncio.Event()
57
+ self._upgrade_queue: asyncio.Queue[asyncio.Future[None]] = asyncio.Queue()
58
+ self._exit_fd: int | None = None # prevent GC from closing the exit-detection pipe
59
+ self._parent_err: Exception | None = None
60
+ self._parent_err_set = False
61
+ self._run_task: asyncio.Task[None] | None = None
62
+
63
+ @classmethod
64
+ async def new(cls, opts: Options | None = None) -> Upgrader:
65
+ """Create a new Upgrader. Only one per process.
66
+
67
+ Raises NotSupportedError on Windows.
68
+ """
69
+ global _std_env_upgrader
70
+
71
+ if sys.platform == "win32":
72
+ raise NotSupportedError(
73
+ "tableflip: platform does not support graceful restart"
74
+ )
75
+
76
+ if _std_env_upgrader is not None:
77
+ raise RuntimeError("tableflip: only a single Upgrader allowed")
78
+
79
+ if opts is None:
80
+ opts = Options()
81
+
82
+ env = OsEnv()
83
+ upg = await new_upgrader(env, opts)
84
+ _std_env_upgrader = upg
85
+ return upg
86
+
87
+ async def ready(self) -> None:
88
+ """Signal that the current process is ready to accept connections.
89
+
90
+ Closes unused inherited fds and notifies the parent via pipe.
91
+ """
92
+ if not self._ready_called:
93
+ self._ready_called = True
94
+ self.fds.close_inherited()
95
+ self._ready_event.set()
96
+
97
+ if self._opts.pid_file:
98
+ write_pid_file(self._opts.pid_file)
99
+
100
+ if self._parent is not None:
101
+ self._parent.send_ready()
102
+
103
+ def exit(self) -> asyncio.Event:
104
+ """Return event that is set when the process should exit."""
105
+ return self._exit_event
106
+
107
+ def stop(self) -> None:
108
+ """Prevent further upgrades and signal exit."""
109
+ if not self._stop_event.is_set():
110
+ self._stop_event.set()
111
+
112
+ async def wait_for_parent(self) -> None:
113
+ """Block until the parent has exited.
114
+
115
+ Raises if the parent misbehaved during shutdown.
116
+ """
117
+ if self._parent is None:
118
+ return
119
+
120
+ if not self._parent_err_set:
121
+ await self._parent.exited.wait()
122
+ err = self._parent.result.result()
123
+ self._parent_err = err
124
+ self._parent_err_set = True
125
+
126
+ if self._parent_err is not None:
127
+ raise self._parent_err
128
+
129
+ def has_parent(self) -> bool:
130
+ return self._parent is not None
131
+
132
+ async def upgrade(self) -> None:
133
+ """Trigger an upgrade. Blocks until the new process is ready or fails."""
134
+ if self._stop_event.is_set():
135
+ raise TerminatingError("terminating")
136
+ if self._exit_event.is_set():
137
+ raise AlreadyUpgradedError("already upgraded")
138
+
139
+ response: asyncio.Future[None] = asyncio.get_running_loop().create_future()
140
+ await self._upgrade_queue.put(response)
141
+ await response
142
+
143
+ async def _run(self) -> None:
144
+ """Main event loop — mirrors Go's run() select loop.
145
+
146
+ Tracks parent exit, process readiness, stop requests, and upgrade requests.
147
+ Uses asyncio.wait with FIRST_COMPLETED to mimic Go's select.
148
+ """
149
+ parent_exited = self._parent is None
150
+ process_ready = False
151
+
152
+ while True:
153
+ tasks: dict[str, asyncio.Task[object]] = {}
154
+
155
+ if not parent_exited and self._parent is not None:
156
+ tasks["parent"] = asyncio.create_task(self._parent.exited.wait())
157
+ if not process_ready:
158
+ tasks["ready"] = asyncio.create_task(self._ready_event.wait())
159
+
160
+ tasks["stop"] = asyncio.create_task(self._stop_event.wait())
161
+ tasks["upgrade"] = asyncio.create_task(self._upgrade_queue.get())
162
+
163
+ done, _ = await asyncio.wait(
164
+ tasks.values(),
165
+ return_when=asyncio.FIRST_COMPLETED,
166
+ )
167
+
168
+ for task in tasks.values():
169
+ if task not in done:
170
+ task.cancel()
171
+
172
+ # Suppress CancelledError from cancelled tasks
173
+ for task in tasks.values():
174
+ if task not in done:
175
+ try:
176
+ await task
177
+ except (asyncio.CancelledError, Exception):
178
+ pass
179
+
180
+ for task in done:
181
+ result = task.result()
182
+
183
+ # Identify which event completed by checking task identity
184
+ if tasks.get("parent") is task:
185
+ parent_exited = True
186
+ continue
187
+
188
+ if tasks.get("ready") is task:
189
+ process_ready = True
190
+ continue
191
+
192
+ if tasks.get("stop") is task:
193
+ self.fds.close_used()
194
+ self._exit_event.set()
195
+ return
196
+
197
+ if tasks.get("upgrade") is task:
198
+ request: asyncio.Future[None] = result # type: ignore[assignment]
199
+ if not process_ready:
200
+ request.set_exception(
201
+ NotReadyError("process is not ready yet")
202
+ )
203
+ continue
204
+ if not parent_exited:
205
+ request.set_exception(RuntimeError("parent hasn't exited"))
206
+ continue
207
+
208
+ try:
209
+ exit_fd = await self._do_upgrade()
210
+ self._exit_fd = exit_fd
211
+ self.fds.close_used()
212
+ request.set_result(None)
213
+ self._exit_event.set()
214
+ return
215
+ except Exception as e:
216
+ request.set_exception(e)
217
+
218
+ async def _do_upgrade(self) -> int:
219
+ """Fork a child and wait for it to become ready. Returns the exit fd."""
220
+ child = await start_child(self._env, self.fds.copy_used())
221
+
222
+ try:
223
+ return await asyncio.wait_for(
224
+ self._wait_for_child(child),
225
+ timeout=self._opts.upgrade_timeout,
226
+ )
227
+ except asyncio.TimeoutError:
228
+ child.kill()
229
+ raise TimeoutError(f"new child {child} timed out") from None
230
+
231
+ async def _wait_for_child(self, child: Child) -> int:
232
+ """Wait for child ready or exit, handling concurrent upgrade requests."""
233
+ ready_task = asyncio.create_task(self._wrap_ready(child))
234
+ result_task = asyncio.create_task(self._wrap_result(child))
235
+ upgrade_task = asyncio.create_task(self._drain_upgrades())
236
+ stop_task = asyncio.create_task(self._wrap_stop(child))
237
+
238
+ try:
239
+ done, _ = await asyncio.wait(
240
+ [ready_task, result_task, stop_task],
241
+ return_when=asyncio.FIRST_COMPLETED,
242
+ )
243
+
244
+ for task in done:
245
+ return task.result()
246
+
247
+ finally:
248
+ for t in [ready_task, result_task, upgrade_task, stop_task]:
249
+ t.cancel()
250
+ try:
251
+ await t
252
+ except (asyncio.CancelledError, Exception):
253
+ pass
254
+
255
+ raise RuntimeError("unreachable") # pragma: no cover
256
+
257
+ async def _wrap_ready(self, child: Child) -> int:
258
+ return await child.ready
259
+
260
+ async def _wrap_result(self, child: Child) -> int:
261
+ err = await child.result
262
+ if err is None:
263
+ raise RuntimeError(f"child {child} exited")
264
+ raise RuntimeError(f"child {child} exited: {err}")
265
+
266
+ async def _wrap_stop(self, child: Child) -> int:
267
+ await self._stop_event.wait()
268
+ child.kill()
269
+ raise TerminatingError("terminating")
270
+
271
+ async def _drain_upgrades(self) -> None:
272
+ """Reject any concurrent upgrade requests while one is in progress."""
273
+ while True:
274
+ request = await self._upgrade_queue.get()
275
+ request.set_exception(UpgradeInProgressError("upgrade in progress"))
276
+
277
+
278
+ async def new_upgrader(env: Env, opts: Options) -> Upgrader:
279
+ """Internal constructor — mirrors Go's newUpgrader."""
280
+ if not initial_wd:
281
+ raise RuntimeError("couldn't determine initial working directory")
282
+
283
+ parent, files = new_parent(env)
284
+
285
+ if opts.upgrade_timeout <= 0:
286
+ opts.upgrade_timeout = DEFAULT_UPGRADE_TIMEOUT
287
+
288
+ inherited = {}
289
+ for name, fd in files.items():
290
+ inherited[name] = _InheritedFd(fd, name)
291
+
292
+ fds = Fds(inherited=inherited)
293
+ upg = Upgrader(env=env, opts=opts, parent=parent, fds=fds)
294
+ upg._run_task = asyncio.create_task(upg._run())
295
+ return upg
296
+
297
+
298
+ def write_pid_file(path: str) -> None:
299
+ """Atomically write the current PID to a file."""
300
+ dir_path, file_name = os.path.split(path)
301
+ if not dir_path:
302
+ dir_path = initial_wd
303
+ if not dir_path:
304
+ raise RuntimeError("empty initial working directory")
305
+
306
+ fd = -1
307
+ tmp_path = ""
308
+ try:
309
+ fd, tmp_path = tempfile.mkstemp(prefix=file_name, dir=dir_path)
310
+ os.write(fd, str(os.getpid()).encode())
311
+ os.close(fd)
312
+ fd = -1
313
+ os.rename(tmp_path, path)
314
+ except BaseException:
315
+ if fd >= 0:
316
+ os.close(fd)
317
+ if tmp_path:
318
+ try:
319
+ os.unlink(tmp_path)
320
+ except OSError:
321
+ pass
322
+ raise
@@ -0,0 +1,48 @@
1
+ """Stub implementations for testing and unsupported platforms."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import socket
7
+
8
+ from tableflip._errors import NotSupportedError
9
+ from tableflip._fds import _create_tcp_listener
10
+
11
+
12
+ class Fds:
13
+ """Stub Fds that creates listeners directly (no inheritance)."""
14
+
15
+ async def listen(self, addr: str, port: int) -> socket.socket:
16
+ return _create_tcp_listener(addr, port)
17
+
18
+ def get_listener(self, addr: str, port: int) -> socket.socket | None:
19
+ return None
20
+
21
+ def add_listener(self, addr: str, port: int, sock: socket.socket) -> None:
22
+ pass
23
+
24
+
25
+ class Upgrader:
26
+ """Stub Upgrader that never actually upgrades."""
27
+
28
+ def __init__(self) -> None:
29
+ self.fds = Fds()
30
+ self._exit_event = asyncio.Event()
31
+
32
+ async def ready(self) -> None:
33
+ pass
34
+
35
+ def exit(self) -> asyncio.Event:
36
+ return self._exit_event
37
+
38
+ def stop(self) -> None:
39
+ pass
40
+
41
+ async def wait_for_parent(self) -> None:
42
+ pass
43
+
44
+ def has_parent(self) -> bool:
45
+ return False
46
+
47
+ async def upgrade(self) -> None:
48
+ raise NotSupportedError("stub upgrader does not support upgrades")