warmpool 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- warmpool-0.1.1/PKG-INFO +80 -0
- warmpool-0.1.1/README.md +61 -0
- warmpool-0.1.1/pyproject.toml +37 -0
- warmpool-0.1.1/src/warmpool/__init__.py +20 -0
- warmpool-0.1.1/src/warmpool/_exceptions.py +21 -0
- warmpool-0.1.1/src/warmpool/_logging.py +80 -0
- warmpool-0.1.1/src/warmpool/_worker.py +76 -0
- warmpool-0.1.1/src/warmpool/pool.py +642 -0
- warmpool-0.1.1/src/warmpool/py.typed +0 -0
warmpool-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: warmpool
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Process pool with hard-kill timeouts and import warming
|
|
5
|
+
Keywords: subprocess,pool,timeout,multiprocessing
|
|
6
|
+
Author: Michael Dawson-Haggerty
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Requires-Dist: psutil>=5.9
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Project-URL: Repository, https://github.com/slopden/warmpool
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
<p align="center">
|
|
21
|
+
<img src="static/logo.svg" alt="warmpool" width="480">
|
|
22
|
+
</p>
|
|
23
|
+
|
|
24
|
+
<p align="center">
|
|
25
|
+
<strong>A single-worker subprocess pool that can actually kill C extensions.</strong>
|
|
26
|
+
</p>
|
|
27
|
+
|
|
28
|
+
<p align="center">
|
|
29
|
+
<a href="https://pypi.org/project/warmpool/"><img alt="PyPI" src="https://img.shields.io/pypi/v/warmpool?color=ff6b35"></a>
|
|
30
|
+
<a href="https://github.com/slopden/warmpool/blob/main/LICENSE"><img alt="License" src="https://img.shields.io/github/license/slopden/warmpool?color=ff3d00"></a>
|
|
31
|
+
<img alt="Python" src="https://img.shields.io/pypi/pyversions/warmpool?color=1a1a2e">
|
|
32
|
+
</p>
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
A "ProcessPool-like-executor" with hard-kill timeouts and import warming. The basic problem is that if you freeze up deep in a C-extension Python timeout-handling stuff doesn't work. `warmpool` runs functions in a spawned subprocess, and if they exceed their timeout it SIGTERM+SIGKILL the process and all children if the C extension has spawned anything.
|
|
37
|
+
|
|
38
|
+
- It calls a "warming function" in each new process, so you can have it keep a process warmed with `import scipy, numpy, etc` which can easily be 2+ seconds.
|
|
39
|
+
- The timeouts actually work regardless of what happens in the function.
|
|
40
|
+
- It has an option to keep a spare process warm in the background so it can rotate cleanly without eating an import period.
|
|
41
|
+
- It sends logs back to the parent through a pipe.
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
import time
|
|
45
|
+
from warmpool import WarmPool
|
|
46
|
+
|
|
47
|
+
def warm_imports():
|
|
48
|
+
import numpy
|
|
49
|
+
import scipy.linalg
|
|
50
|
+
|
|
51
|
+
def eigh_huge(n=5000):
|
|
52
|
+
"""Stuck in LAPACK C code — only SIGKILL works."""
|
|
53
|
+
import numpy as np
|
|
54
|
+
from scipy import linalg
|
|
55
|
+
a = np.random.rand(n, n)
|
|
56
|
+
a = a + a.T
|
|
57
|
+
return linalg.eigh(a)
|
|
58
|
+
|
|
59
|
+
def add(a=0, b=0):
|
|
60
|
+
return a + b
|
|
61
|
+
|
|
62
|
+
pool = WarmPool(warming=warm_imports)
|
|
63
|
+
|
|
64
|
+
# numpy+scipy are already imported — no 2s wait
|
|
65
|
+
start = time.perf_counter()
|
|
66
|
+
try:
|
|
67
|
+
pool.run(eigh_huge, timeout=0.5, n=5000)
|
|
68
|
+
except TimeoutError:
|
|
69
|
+
print(f"killed after {time.perf_counter() - start:.2f}s")
|
|
70
|
+
|
|
71
|
+
# pool recovered via spare
|
|
72
|
+
result = pool.run(add, timeout=5.0, a=2, b=3)
|
|
73
|
+
print(f"recovered: add(2, 3) = {result}")
|
|
74
|
+
pool.shutdown()
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
```
|
|
78
|
+
killed after 0.53s
|
|
79
|
+
recovered: add(2, 3) = 5
|
|
80
|
+
```
|
warmpool-0.1.1/README.md
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="static/logo.svg" alt="warmpool" width="480">
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
<p align="center">
|
|
6
|
+
<strong>A single-worker subprocess pool that can actually kill C extensions.</strong>
|
|
7
|
+
</p>
|
|
8
|
+
|
|
9
|
+
<p align="center">
|
|
10
|
+
<a href="https://pypi.org/project/warmpool/"><img alt="PyPI" src="https://img.shields.io/pypi/v/warmpool?color=ff6b35"></a>
|
|
11
|
+
<a href="https://github.com/slopden/warmpool/blob/main/LICENSE"><img alt="License" src="https://img.shields.io/github/license/slopden/warmpool?color=ff3d00"></a>
|
|
12
|
+
<img alt="Python" src="https://img.shields.io/pypi/pyversions/warmpool?color=1a1a2e">
|
|
13
|
+
</p>
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
A "ProcessPool-like-executor" with hard-kill timeouts and import warming. The basic problem is that if you freeze up deep in a C-extension Python timeout-handling stuff doesn't work. `warmpool` runs functions in a spawned subprocess, and if they exceed their timeout it SIGTERM+SIGKILL the process and all children if the C extension has spawned anything.
|
|
18
|
+
|
|
19
|
+
- It calls a "warming function" in each new process, so you can have it keep a process warmed with `import scipy, numpy, etc` which can easily be 2+ seconds.
|
|
20
|
+
- The timeouts actually work regardless of what happens in the function.
|
|
21
|
+
- It has an option to keep a spare process warm in the background so it can rotate cleanly without eating an import period.
|
|
22
|
+
- It sends logs back to the parent through a pipe.
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
import time
|
|
26
|
+
from warmpool import WarmPool
|
|
27
|
+
|
|
28
|
+
def warm_imports():
|
|
29
|
+
import numpy
|
|
30
|
+
import scipy.linalg
|
|
31
|
+
|
|
32
|
+
def eigh_huge(n=5000):
|
|
33
|
+
"""Stuck in LAPACK C code — only SIGKILL works."""
|
|
34
|
+
import numpy as np
|
|
35
|
+
from scipy import linalg
|
|
36
|
+
a = np.random.rand(n, n)
|
|
37
|
+
a = a + a.T
|
|
38
|
+
return linalg.eigh(a)
|
|
39
|
+
|
|
40
|
+
def add(a=0, b=0):
|
|
41
|
+
return a + b
|
|
42
|
+
|
|
43
|
+
pool = WarmPool(warming=warm_imports)
|
|
44
|
+
|
|
45
|
+
# numpy+scipy are already imported — no 2s wait
|
|
46
|
+
start = time.perf_counter()
|
|
47
|
+
try:
|
|
48
|
+
pool.run(eigh_huge, timeout=0.5, n=5000)
|
|
49
|
+
except TimeoutError:
|
|
50
|
+
print(f"killed after {time.perf_counter() - start:.2f}s")
|
|
51
|
+
|
|
52
|
+
# pool recovered via spare
|
|
53
|
+
result = pool.run(add, timeout=5.0, a=2, b=3)
|
|
54
|
+
print(f"recovered: add(2, 3) = {result}")
|
|
55
|
+
pool.shutdown()
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
killed after 0.53s
|
|
60
|
+
recovered: add(2, 3) = 5
|
|
61
|
+
```
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "warmpool"
|
|
3
|
+
version = "0.1.1"
|
|
4
|
+
description = "Process pool with hard-kill timeouts and import warming"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
requires-python = ">=3.10"
|
|
8
|
+
dependencies = ["psutil>=5.9"]
|
|
9
|
+
authors = [{name = "Michael Dawson-Haggerty"}]
|
|
10
|
+
keywords = ["subprocess", "pool", "timeout", "multiprocessing"]
|
|
11
|
+
classifiers = [
|
|
12
|
+
"Development Status :: 3 - Alpha",
|
|
13
|
+
"License :: OSI Approved :: MIT License",
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Programming Language :: Python :: 3.10",
|
|
16
|
+
"Programming Language :: Python :: 3.11",
|
|
17
|
+
"Programming Language :: Python :: 3.12",
|
|
18
|
+
"Programming Language :: Python :: 3.13",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[project.urls]
|
|
22
|
+
Repository = "https://github.com/slopden/warmpool"
|
|
23
|
+
|
|
24
|
+
[dependency-groups]
|
|
25
|
+
dev = [
|
|
26
|
+
"pytest>=8.0",
|
|
27
|
+
"pytest-asyncio>=0.24",
|
|
28
|
+
"scipy>=1.15.3",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[build-system]
|
|
32
|
+
requires = ["uv_build>=0.10.9,<0.11.0"]
|
|
33
|
+
build-backend = "uv_build"
|
|
34
|
+
|
|
35
|
+
[tool.pytest.ini_options]
|
|
36
|
+
testpaths = ["tests"]
|
|
37
|
+
asyncio_mode = "auto"
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""warmpool — single-worker subprocess pool with hard-kill timeouts.
|
|
2
|
+
|
|
3
|
+
Usage
|
|
4
|
+
-----
|
|
5
|
+
>>> from warmpool import WarmPool, PoolStatus
|
|
6
|
+
>>> pool = WarmPool(max_tasks=100, keep_spare=True)
|
|
7
|
+
>>> result = pool.run(my_func, timeout=10.0, x=42)
|
|
8
|
+
>>> pool.status is PoolStatus.READY
|
|
9
|
+
True
|
|
10
|
+
>>> pool.shutdown()
|
|
11
|
+
|
|
12
|
+
Memory-based rotation:
|
|
13
|
+
|
|
14
|
+
>>> pool = WarmPool(max_memory=500 * 1024 * 1024)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from ._exceptions import ProcessPoolExhausted
|
|
18
|
+
from .pool import PoolStatus, WarmPool
|
|
19
|
+
|
|
20
|
+
__all__ = ["PoolStatus", "WarmPool", "ProcessPoolExhausted"]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Exception types for the warmpool package."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ProcessPoolExhausted(Exception):
|
|
5
|
+
"""The worker process is dead or has hit its task limit.
|
|
6
|
+
|
|
7
|
+
When ``keep_spare=False``, the caller must create a new
|
|
8
|
+
:class:`~warmpool.WarmPool` instance to continue.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
message
|
|
13
|
+
Human-readable description of why the pool is exhausted.
|
|
14
|
+
exit_code
|
|
15
|
+
The worker's exit code, if it died. ``None`` when the pool
|
|
16
|
+
was shut down explicitly or the code is unavailable.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, message: str, exit_code: int | None = None):
|
|
20
|
+
super().__init__(message)
|
|
21
|
+
self.exit_code = exit_code
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Pipe-based logging for worker subprocesses.
|
|
2
|
+
|
|
3
|
+
Provides :class:`PipeHandler` (installed in workers) and
|
|
4
|
+
:func:`forward_subprocess_log` (called in the parent) so that log
|
|
5
|
+
records cross the process boundary as structured dicts.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import traceback
|
|
12
|
+
from multiprocessing.connection import Connection
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class PipeHandler(logging.Handler):
|
|
17
|
+
"""Logging handler that serializes records and sends them through a
|
|
18
|
+
multiprocessing pipe as structured dicts (JSON-ready).
|
|
19
|
+
|
|
20
|
+
Installed in the **worker** subprocess so that all log output is
|
|
21
|
+
forwarded to the parent process over the pipe.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
connection
|
|
26
|
+
The child-side :class:`~multiprocessing.connection.Connection`.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, connection: Connection):
|
|
30
|
+
super().__init__()
|
|
31
|
+
self.connection = connection
|
|
32
|
+
|
|
33
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
34
|
+
"""Serialize *record* to a dict and send it over the pipe.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
record
|
|
39
|
+
The log record to forward.
|
|
40
|
+
"""
|
|
41
|
+
try:
|
|
42
|
+
entry: dict = {
|
|
43
|
+
"timestamp": record.created,
|
|
44
|
+
"level": record.levelname,
|
|
45
|
+
"message": record.getMessage(),
|
|
46
|
+
"logger": record.name,
|
|
47
|
+
"process_id": record.process,
|
|
48
|
+
}
|
|
49
|
+
if record.exc_info and record.exc_info[1] is not None:
|
|
50
|
+
entry["exception"] = "".join(
|
|
51
|
+
traceback.format_exception(*record.exc_info)
|
|
52
|
+
)
|
|
53
|
+
self.connection.send(("log", entry, {}))
|
|
54
|
+
except Exception:
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def forward_subprocess_log(
|
|
59
|
+
payload: dict[str, Any],
|
|
60
|
+
logger: logging.Logger | None = None,
|
|
61
|
+
) -> None:
|
|
62
|
+
"""Re-emit a subprocess log record via the parent's logging system.
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
payload
|
|
67
|
+
The structured dict received from :class:`PipeHandler`.
|
|
68
|
+
logger
|
|
69
|
+
Logger to emit on. Defaults to ``warmpool.subprocess``.
|
|
70
|
+
"""
|
|
71
|
+
if logger is None:
|
|
72
|
+
logger = logging.getLogger("warmpool.subprocess")
|
|
73
|
+
level = getattr(logging, payload.get("level", "INFO"), logging.INFO)
|
|
74
|
+
message = payload.get("message", "")
|
|
75
|
+
extra = {
|
|
76
|
+
k: v
|
|
77
|
+
for k, v in payload.items()
|
|
78
|
+
if k not in ("level", "message", "levelname", "levelno")
|
|
79
|
+
}
|
|
80
|
+
logger.log(level, message, extra=extra)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Worker subprocess entry point.
|
|
2
|
+
|
|
3
|
+
This module is imported by the spawned child process. It sets up
|
|
4
|
+
pipe-based logging, runs the optional warming callable, then enters a
|
|
5
|
+
receive-execute-send loop until the parent sends a shutdown sentinel
|
|
6
|
+
(``func is None``) or the pipe breaks.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import time
|
|
13
|
+
from multiprocessing.connection import Connection
|
|
14
|
+
from typing import Callable
|
|
15
|
+
|
|
16
|
+
from ._logging import PipeHandler
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _worker_process(
|
|
20
|
+
connection: Connection,
|
|
21
|
+
log_level: int = logging.DEBUG,
|
|
22
|
+
warming: Callable | None = None,
|
|
23
|
+
) -> None:
|
|
24
|
+
"""Entry point for the worker subprocess.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
connection
|
|
29
|
+
Child-side pipe connection shared with the parent.
|
|
30
|
+
warming
|
|
31
|
+
Optional callable invoked once on startup (e.g. to pre-import
|
|
32
|
+
modules). Its return value is sent to the parent.
|
|
33
|
+
|
|
34
|
+
Notes
|
|
35
|
+
-----
|
|
36
|
+
1. Replaces all root-logger handlers with a :class:`PipeHandler` so
|
|
37
|
+
every log record is forwarded to the parent as a structured dict.
|
|
38
|
+
2. Calls *warming* if provided.
|
|
39
|
+
3. Sends a ``("ready", init_result, {})`` message, then enters the task loop.
|
|
40
|
+
"""
|
|
41
|
+
root = logging.getLogger()
|
|
42
|
+
root.handlers.clear()
|
|
43
|
+
root.addHandler(PipeHandler(connection))
|
|
44
|
+
root.setLevel(log_level)
|
|
45
|
+
|
|
46
|
+
init_result = warming() if warming is not None else None
|
|
47
|
+
connection.send(("ready", init_result, {}))
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
while True:
|
|
51
|
+
if not connection.poll(timeout=None):
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
function, args, kwargs = connection.recv()
|
|
56
|
+
if function is None: # shutdown sentinel
|
|
57
|
+
break
|
|
58
|
+
|
|
59
|
+
start = time.perf_counter()
|
|
60
|
+
result = function(*args, **kwargs)
|
|
61
|
+
elapsed_ms = int((time.perf_counter() - start) * 1000)
|
|
62
|
+
|
|
63
|
+
connection.send(("success", result, {"elapsed_ms": elapsed_ms}))
|
|
64
|
+
except Exception as error:
|
|
65
|
+
# Guard against unpicklable exceptions (common with
|
|
66
|
+
# C-API wrappers). If the exception can't be pickled
|
|
67
|
+
# the parent would see a silent worker death instead
|
|
68
|
+
# of a useful error message.
|
|
69
|
+
try:
|
|
70
|
+
connection.send(("error", error, {}))
|
|
71
|
+
except Exception:
|
|
72
|
+
connection.send(("error", RuntimeError(repr(error)), {}))
|
|
73
|
+
except (EOFError, BrokenPipeError):
|
|
74
|
+
pass
|
|
75
|
+
finally:
|
|
76
|
+
connection.close()
|
|
@@ -0,0 +1,642 @@
|
|
|
1
|
+
"""Single-worker subprocess pool with hard-kill timeouts.
|
|
2
|
+
|
|
3
|
+
Runs callables in a spawned subprocess that can be SIGKILLed when
|
|
4
|
+
C-API code (OpenCASCADE, etc.) ignores Python signals. An optional
|
|
5
|
+
spare worker is pre-warmed in the background so that rotation after
|
|
6
|
+
task-limit exhaustion or crash is near-instant.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import atexit
|
|
13
|
+
import enum
|
|
14
|
+
import logging
|
|
15
|
+
import multiprocessing
|
|
16
|
+
import time
|
|
17
|
+
import weakref
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from multiprocessing import Pipe, Process
|
|
20
|
+
from multiprocessing.connection import Connection
|
|
21
|
+
from typing import Any, Callable, NoReturn
|
|
22
|
+
|
|
23
|
+
import psutil
|
|
24
|
+
|
|
25
|
+
from ._exceptions import ProcessPoolExhausted
|
|
26
|
+
from ._logging import forward_subprocess_log
|
|
27
|
+
from ._worker import _worker_process
|
|
28
|
+
|
|
29
|
+
log = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
# Seconds to wait when polling a pipe for data.
|
|
32
|
+
_POLL_TIMEOUT = 0.1
|
|
33
|
+
# Seconds to wait for a worker to join after a graceful shutdown signal.
|
|
34
|
+
_JOIN_TIMEOUT = 0.5
|
|
35
|
+
# Seconds to wait for a process tree to die after SIGKILL.
|
|
36
|
+
_KILL_WAIT = 1.0
|
|
37
|
+
|
|
38
|
+
_active_pools: weakref.WeakSet[WarmPool] = weakref.WeakSet()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _cleanup_all_pools() -> None:
|
|
42
|
+
"""Shut down every live pool at interpreter exit."""
|
|
43
|
+
for pool in list(_active_pools):
|
|
44
|
+
try:
|
|
45
|
+
pool.shutdown()
|
|
46
|
+
except Exception:
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
atexit.register(_cleanup_all_pools)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class PoolStatus(enum.Enum):
|
|
54
|
+
"""The pool's readiness state.
|
|
55
|
+
|
|
56
|
+
Returned by :attr:`WarmPool.status`. Every decision point
|
|
57
|
+
in the pool dispatches on this enum with :func:`_assert_never` in
|
|
58
|
+
the ``else`` branch so that mypy proves exhaustive handling.
|
|
59
|
+
|
|
60
|
+
Attributes
|
|
61
|
+
----------
|
|
62
|
+
READY
|
|
63
|
+
Active worker is alive and under the task limit.
|
|
64
|
+
NEEDS_ROTATION
|
|
65
|
+
Active worker is spent or dead, but a spare can take over.
|
|
66
|
+
EXHAUSTED
|
|
67
|
+
No workers available and no spare to promote.
|
|
68
|
+
SHUTDOWN
|
|
69
|
+
The pool has been explicitly shut down.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
READY = "ready"
|
|
73
|
+
NEEDS_ROTATION = "rotation"
|
|
74
|
+
EXHAUSTED = "exhausted"
|
|
75
|
+
SHUTDOWN = "shutdown"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _assert_never(value: NoReturn) -> NoReturn:
|
|
79
|
+
"""Statically assert all enum cases are handled.
|
|
80
|
+
|
|
81
|
+
Mypy narrows the type through each ``if``/``elif`` branch. If all
|
|
82
|
+
:class:`PoolStatus` members are covered, the remaining type at the
|
|
83
|
+
``else`` is ``Never``/``NoReturn`` and this call type-checks.
|
|
84
|
+
Adding a new enum member without a matching branch causes a mypy
|
|
85
|
+
error.
|
|
86
|
+
"""
|
|
87
|
+
raise AssertionError(f"Unhandled status: {value!r}")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class WorkerHandle:
|
|
92
|
+
"""Bookkeeping for a single worker subprocess.
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
process
|
|
97
|
+
The :class:`multiprocessing.Process` instance.
|
|
98
|
+
connection
|
|
99
|
+
Parent-side pipe connection.
|
|
100
|
+
child_connection
|
|
101
|
+
Child-side pipe connection (kept open so we can close it on
|
|
102
|
+
cleanup).
|
|
103
|
+
ready
|
|
104
|
+
``True`` once the worker has sent its ``"ready"`` message.
|
|
105
|
+
task_count
|
|
106
|
+
Number of tasks dispatched to this worker.
|
|
107
|
+
last_metadata
|
|
108
|
+
Metadata dict from the most recent completed task.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
process: multiprocessing.Process
|
|
112
|
+
connection: Connection
|
|
113
|
+
child_connection: Connection
|
|
114
|
+
ready: bool = False
|
|
115
|
+
task_count: int = 0
|
|
116
|
+
last_metadata: dict[str, Any] = field(default_factory=dict)
|
|
117
|
+
init_result: Any = None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class WarmPool:
|
|
121
|
+
"""Single-worker subprocess pool with hard-kill timeouts.
|
|
122
|
+
|
|
123
|
+
Runs functions in a spawned subprocess that can be SIGKILLed when
|
|
124
|
+
C-API code (OpenCASCADE, etc.) ignores Python signals.
|
|
125
|
+
|
|
126
|
+
.. note::
|
|
127
|
+
This class is **not** thread-safe. Do not call :meth:`run` from
|
|
128
|
+
multiple threads concurrently.
|
|
129
|
+
|
|
130
|
+
Parameters
|
|
131
|
+
----------
|
|
132
|
+
warming
|
|
133
|
+
Optional callable invoked once per worker on startup (e.g. to
|
|
134
|
+
pre-import modules). Its return value is available via
|
|
135
|
+
:attr:`init_result`.
|
|
136
|
+
max_tasks
|
|
137
|
+
Maximum tasks a single worker may handle before rotation.
|
|
138
|
+
keep_spare
|
|
139
|
+
If ``True``, a spare worker is pre-warmed in the background so
|
|
140
|
+
rotation is near-instant.
|
|
141
|
+
ready_timeout
|
|
142
|
+
Seconds to wait for a worker to send its ``"ready"`` signal.
|
|
143
|
+
max_memory
|
|
144
|
+
Maximum RSS in bytes before the worker is rotated.
|
|
145
|
+
max_memory_percent
|
|
146
|
+
Maximum RSS as a fraction of total system memory (0.0–1.0)
|
|
147
|
+
before the worker is rotated.
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
def __init__(
|
|
151
|
+
self,
|
|
152
|
+
max_tasks: int = 50,
|
|
153
|
+
keep_spare: bool = True,
|
|
154
|
+
ready_timeout: float = 30.0,
|
|
155
|
+
max_memory: int | None = None,
|
|
156
|
+
max_memory_percent: float | None = 0.35,
|
|
157
|
+
warming: Callable | None = None,
|
|
158
|
+
init_retries: int = 1,
|
|
159
|
+
) -> None:
|
|
160
|
+
self._max_tasks = max_tasks
|
|
161
|
+
self._keep_spare = keep_spare
|
|
162
|
+
self._ready_timeout = ready_timeout
|
|
163
|
+
self._warming = warming
|
|
164
|
+
self._max_memory = max_memory
|
|
165
|
+
self._init_retries = init_retries
|
|
166
|
+
# Pre-compute absolute byte limit from percentage (avoid per-task psutil call).
|
|
167
|
+
if max_memory_percent is not None:
|
|
168
|
+
clamped = max(0.0, min(1.0, max_memory_percent))
|
|
169
|
+
self._max_memory_percent_bytes: int | None = int(
|
|
170
|
+
clamped * psutil.virtual_memory().total
|
|
171
|
+
)
|
|
172
|
+
else:
|
|
173
|
+
self._max_memory_percent_bytes = None
|
|
174
|
+
|
|
175
|
+
self._active: WorkerHandle | None = None
|
|
176
|
+
self._spare: WorkerHandle | None = None
|
|
177
|
+
self._shutdown = False
|
|
178
|
+
# Pool-level cache so elapsed_ms survives rotation.
|
|
179
|
+
self._last_elapsed_ms: int | None = None
|
|
180
|
+
self._last_memory_rss: int | None = None
|
|
181
|
+
|
|
182
|
+
_active_pools.add(self)
|
|
183
|
+
|
|
184
|
+
# Start primary worker (blocking), with retry.
|
|
185
|
+
for attempt in range(1 + self._init_retries):
|
|
186
|
+
try:
|
|
187
|
+
self._active = self._start_worker(block_ready=True)
|
|
188
|
+
break
|
|
189
|
+
except RuntimeError:
|
|
190
|
+
if attempt < self._init_retries:
|
|
191
|
+
log.warning(
|
|
192
|
+
"Primary worker failed to become ready, "
|
|
193
|
+
f"retrying ({attempt + 1}/{self._init_retries})..."
|
|
194
|
+
)
|
|
195
|
+
time.sleep(5)
|
|
196
|
+
else:
|
|
197
|
+
raise
|
|
198
|
+
|
|
199
|
+
# Start spare (non-blocking) if requested.
|
|
200
|
+
if self._keep_spare:
|
|
201
|
+
self._spare = self._start_worker(block_ready=False)
|
|
202
|
+
|
|
203
|
+
# ------------------------------------------------------------------
|
|
204
|
+
# Public API
|
|
205
|
+
# ------------------------------------------------------------------
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
def status(self) -> PoolStatus:
|
|
209
|
+
"""The pool's current readiness state.
|
|
210
|
+
|
|
211
|
+
Returns
|
|
212
|
+
-------
|
|
213
|
+
PoolStatus
|
|
214
|
+
Pure query — no side effects, no mutations.
|
|
215
|
+
"""
|
|
216
|
+
if self._shutdown:
|
|
217
|
+
return PoolStatus.SHUTDOWN
|
|
218
|
+
if (
|
|
219
|
+
self._active is not None
|
|
220
|
+
and self._active.process.is_alive()
|
|
221
|
+
and self._active.task_count < self._max_tasks
|
|
222
|
+
):
|
|
223
|
+
return PoolStatus.READY
|
|
224
|
+
if self._keep_spare:
|
|
225
|
+
return PoolStatus.NEEDS_ROTATION
|
|
226
|
+
return PoolStatus.EXHAUSTED
|
|
227
|
+
|
|
228
|
+
@property
|
|
229
|
+
def init_result(self) -> Any:
|
|
230
|
+
"""Return value of ``warming`` from the active worker, or ``None``."""
|
|
231
|
+
return self._active.init_result if self._active else None
|
|
232
|
+
|
|
233
|
+
@property
|
|
234
|
+
def last_elapsed_ms(self) -> int | None:
|
|
235
|
+
"""Wall-clock milliseconds the last completed task took.
|
|
236
|
+
|
|
237
|
+
Returns
|
|
238
|
+
-------
|
|
239
|
+
int or None
|
|
240
|
+
``None`` if no task has completed yet.
|
|
241
|
+
"""
|
|
242
|
+
return self._last_elapsed_ms
|
|
243
|
+
|
|
244
|
+
@property
|
|
245
|
+
def last_memory_rss(self) -> int | None:
|
|
246
|
+
"""RSS in bytes of the worker after the last completed task.
|
|
247
|
+
|
|
248
|
+
Returns ``None`` if no task has completed or memory checking is disabled.
|
|
249
|
+
"""
|
|
250
|
+
return self._last_memory_rss
|
|
251
|
+
|
|
252
|
+
def run(self, function: Callable, timeout: float, **kwargs: Any) -> Any:
|
|
253
|
+
"""Run *function* in the worker subprocess (blocking).
|
|
254
|
+
|
|
255
|
+
Parameters
|
|
256
|
+
----------
|
|
257
|
+
function
|
|
258
|
+
A picklable callable to execute in the worker.
|
|
259
|
+
timeout
|
|
260
|
+
Hard timeout in seconds; the worker is SIGKILLed if it
|
|
261
|
+
exceeds this.
|
|
262
|
+
**kwargs
|
|
263
|
+
Keyword arguments forwarded to *function*.
|
|
264
|
+
|
|
265
|
+
Returns
|
|
266
|
+
-------
|
|
267
|
+
Any
|
|
268
|
+
Whatever *function* returns.
|
|
269
|
+
|
|
270
|
+
Raises
|
|
271
|
+
------
|
|
272
|
+
TimeoutError
|
|
273
|
+
If the worker exceeds *timeout*.
|
|
274
|
+
ProcessPoolExhausted
|
|
275
|
+
If the pool has no available workers.
|
|
276
|
+
"""
|
|
277
|
+
status = self.status
|
|
278
|
+
if status is PoolStatus.READY:
|
|
279
|
+
pass
|
|
280
|
+
elif status is PoolStatus.NEEDS_ROTATION:
|
|
281
|
+
self._rotate_worker()
|
|
282
|
+
elif status is PoolStatus.EXHAUSTED:
|
|
283
|
+
# Capture diagnostics *before* cleanup.
|
|
284
|
+
task_count = self._active.task_count if self._active else 0
|
|
285
|
+
exit_code = self._active.process.exitcode if self._active else None
|
|
286
|
+
if self._active is not None:
|
|
287
|
+
self._shutdown_worker(self._active)
|
|
288
|
+
self._active = None
|
|
289
|
+
raise ProcessPoolExhausted(
|
|
290
|
+
f"tasks={task_count}/{self._max_tasks}",
|
|
291
|
+
exit_code=exit_code,
|
|
292
|
+
)
|
|
293
|
+
elif status is PoolStatus.SHUTDOWN:
|
|
294
|
+
raise ProcessPoolExhausted("Pool is shut down")
|
|
295
|
+
else:
|
|
296
|
+
_assert_never(status)
|
|
297
|
+
|
|
298
|
+
# At this point self._active is guaranteed non-None.
|
|
299
|
+
handle = self._active
|
|
300
|
+
assert handle is not None # narrowing for mypy
|
|
301
|
+
|
|
302
|
+
# Send the task.
|
|
303
|
+
handle.connection.send((function, (), kwargs))
|
|
304
|
+
handle.task_count += 1
|
|
305
|
+
|
|
306
|
+
# Wait for result.
|
|
307
|
+
try:
|
|
308
|
+
result = self._wait_for_result(handle, function, timeout)
|
|
309
|
+
except (TimeoutError, ProcessPoolExhausted):
|
|
310
|
+
self._kill_worker(handle)
|
|
311
|
+
self._active = None
|
|
312
|
+
if self._keep_spare:
|
|
313
|
+
try:
|
|
314
|
+
self._promote_spare()
|
|
315
|
+
except Exception:
|
|
316
|
+
log.warning("Failed to promote spare after error", exc_info=True)
|
|
317
|
+
raise
|
|
318
|
+
|
|
319
|
+
# Persist elapsed_ms at pool level so it survives rotation.
|
|
320
|
+
self._last_elapsed_ms = handle.last_metadata.get("elapsed_ms")
|
|
321
|
+
|
|
322
|
+
# Rotate after the final allowed task or memory limit exceeded.
|
|
323
|
+
memory_exceeded = self._exceeds_memory_limit(handle)
|
|
324
|
+
if handle.task_count >= self._max_tasks or memory_exceeded:
|
|
325
|
+
self._shutdown_worker(handle)
|
|
326
|
+
self._active = None
|
|
327
|
+
if self._keep_spare:
|
|
328
|
+
try:
|
|
329
|
+
self._promote_spare()
|
|
330
|
+
except Exception:
|
|
331
|
+
log.warning("Failed to promote spare after rotation", exc_info=True)
|
|
332
|
+
|
|
333
|
+
return result
|
|
334
|
+
|
|
335
|
+
async def arun(self, function: Callable, timeout: float, **kwargs: Any) -> Any:
|
|
336
|
+
"""Async wrapper around :meth:`run`.
|
|
337
|
+
|
|
338
|
+
Parameters
|
|
339
|
+
----------
|
|
340
|
+
function
|
|
341
|
+
A picklable callable.
|
|
342
|
+
timeout
|
|
343
|
+
Hard timeout in seconds.
|
|
344
|
+
**kwargs
|
|
345
|
+
Forwarded to *function*.
|
|
346
|
+
|
|
347
|
+
Returns
|
|
348
|
+
-------
|
|
349
|
+
Any
|
|
350
|
+
Whatever *function* returns.
|
|
351
|
+
"""
|
|
352
|
+
loop = asyncio.get_running_loop()
|
|
353
|
+
return await loop.run_in_executor(
|
|
354
|
+
None, lambda: self.run(function, timeout, **kwargs)
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
def shutdown(self) -> None:
|
|
358
|
+
"""Shut down all workers and mark the pool as dead."""
|
|
359
|
+
self._shutdown = True
|
|
360
|
+
if self._active is not None:
|
|
361
|
+
self._shutdown_worker(self._active)
|
|
362
|
+
self._active = None
|
|
363
|
+
if self._spare is not None:
|
|
364
|
+
self._shutdown_worker(self._spare)
|
|
365
|
+
self._spare = None
|
|
366
|
+
|
|
367
|
+
# ------------------------------------------------------------------
|
|
368
|
+
# Worker lifecycle
|
|
369
|
+
# ------------------------------------------------------------------
|
|
370
|
+
|
|
371
|
+
def _start_worker(self, block_ready: bool = True) -> WorkerHandle:
|
|
372
|
+
"""Spawn a new worker subprocess.
|
|
373
|
+
|
|
374
|
+
Parameters
|
|
375
|
+
----------
|
|
376
|
+
block_ready
|
|
377
|
+
If ``True``, block until the worker sends ``"ready"``.
|
|
378
|
+
|
|
379
|
+
Returns
|
|
380
|
+
-------
|
|
381
|
+
WorkerHandle
|
|
382
|
+
"""
|
|
383
|
+
parent_connection, child_connection = Pipe()
|
|
384
|
+
log_level = logging.getLogger().getEffectiveLevel()
|
|
385
|
+
try:
|
|
386
|
+
context = multiprocessing.get_context("spawn")
|
|
387
|
+
process = context.Process(
|
|
388
|
+
target=_worker_process,
|
|
389
|
+
args=(child_connection, log_level, self._warming),
|
|
390
|
+
)
|
|
391
|
+
except RuntimeError:
|
|
392
|
+
process = Process(
|
|
393
|
+
target=_worker_process,
|
|
394
|
+
args=(child_connection, log_level, self._warming),
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
process.start()
|
|
398
|
+
handle = WorkerHandle(
|
|
399
|
+
process=process,
|
|
400
|
+
connection=parent_connection,
|
|
401
|
+
child_connection=child_connection,
|
|
402
|
+
)
|
|
403
|
+
log.info(f"Started worker pid={process.pid}")
|
|
404
|
+
|
|
405
|
+
if block_ready:
|
|
406
|
+
if not self._wait_for_ready(handle):
|
|
407
|
+
self._kill_worker(handle)
|
|
408
|
+
raise RuntimeError(
|
|
409
|
+
f"Worker pid={handle.process.pid} failed to become ready "
|
|
410
|
+
f"within {self._ready_timeout}s"
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
return handle
|
|
414
|
+
|
|
415
|
+
def _wait_for_ready(self, handle: WorkerHandle) -> bool:
|
|
416
|
+
"""Block until *handle* sends ``"ready"`` or the timeout expires.
|
|
417
|
+
|
|
418
|
+
Parameters
|
|
419
|
+
----------
|
|
420
|
+
handle
|
|
421
|
+
The worker to wait on.
|
|
422
|
+
|
|
423
|
+
Returns
|
|
424
|
+
-------
|
|
425
|
+
bool
|
|
426
|
+
``True`` if the worker became ready, ``False`` on timeout or error.
|
|
427
|
+
"""
|
|
428
|
+
deadline = time.perf_counter() + self._ready_timeout
|
|
429
|
+
while time.perf_counter() < deadline:
|
|
430
|
+
remaining = max(0.01, deadline - time.perf_counter())
|
|
431
|
+
if not handle.connection.poll(timeout=remaining):
|
|
432
|
+
break
|
|
433
|
+
try:
|
|
434
|
+
status, payload, _ = handle.connection.recv()
|
|
435
|
+
if status == "log":
|
|
436
|
+
forward_subprocess_log(payload)
|
|
437
|
+
continue
|
|
438
|
+
if status == "ready":
|
|
439
|
+
handle.ready = True
|
|
440
|
+
handle.init_result = payload
|
|
441
|
+
return True
|
|
442
|
+
log.warning(f"Expected 'ready', got: {status}")
|
|
443
|
+
return False
|
|
444
|
+
except Exception as error:
|
|
445
|
+
log.warning(f"Failed to receive ready signal: {error}")
|
|
446
|
+
return False
|
|
447
|
+
log.warning("Worker didn't send ready signal within timeout")
|
|
448
|
+
return False
|
|
449
|
+
|
|
450
|
+
def _wait_for_result(
|
|
451
|
+
self, handle: WorkerHandle, function: Callable, timeout: float
|
|
452
|
+
) -> Any:
|
|
453
|
+
"""Poll *handle* for the task result, forwarding log messages.
|
|
454
|
+
|
|
455
|
+
Parameters
|
|
456
|
+
----------
|
|
457
|
+
handle
|
|
458
|
+
The active worker handle.
|
|
459
|
+
function
|
|
460
|
+
The function that was dispatched (used for error messages).
|
|
461
|
+
timeout
|
|
462
|
+
Hard timeout in seconds.
|
|
463
|
+
|
|
464
|
+
Returns
|
|
465
|
+
-------
|
|
466
|
+
Any
|
|
467
|
+
The return value of *function*.
|
|
468
|
+
|
|
469
|
+
Raises
|
|
470
|
+
------
|
|
471
|
+
TimeoutError
|
|
472
|
+
If *timeout* is exceeded.
|
|
473
|
+
ProcessPoolExhausted
|
|
474
|
+
If the worker dies mid-task.
|
|
475
|
+
"""
|
|
476
|
+
start = time.perf_counter()
|
|
477
|
+
while time.perf_counter() - start < timeout:
|
|
478
|
+
if handle.connection.poll(timeout=_POLL_TIMEOUT):
|
|
479
|
+
try:
|
|
480
|
+
status, payload, metadata = handle.connection.recv()
|
|
481
|
+
except (EOFError, BrokenPipeError):
|
|
482
|
+
exit_code = handle.process.exitcode
|
|
483
|
+
raise ProcessPoolExhausted(
|
|
484
|
+
f"Subprocess died during `{function.__name__}`",
|
|
485
|
+
exit_code=exit_code,
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
if status == "log":
|
|
489
|
+
forward_subprocess_log(payload)
|
|
490
|
+
continue
|
|
491
|
+
|
|
492
|
+
handle.last_metadata = metadata
|
|
493
|
+
|
|
494
|
+
if status == "success":
|
|
495
|
+
return payload
|
|
496
|
+
if status == "error":
|
|
497
|
+
raise payload
|
|
498
|
+
|
|
499
|
+
if not handle.process.is_alive():
|
|
500
|
+
exit_code = handle.process.exitcode
|
|
501
|
+
raise ProcessPoolExhausted(
|
|
502
|
+
f"Subprocess died during `{function.__name__}`",
|
|
503
|
+
exit_code=exit_code,
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
raise TimeoutError(f"`{function.__name__}` timed out after {timeout}s")
|
|
507
|
+
|
|
508
|
+
def _exceeds_memory_limit(self, handle: WorkerHandle) -> bool:
|
|
509
|
+
if self._max_memory is None and self._max_memory_percent_bytes is None:
|
|
510
|
+
return False
|
|
511
|
+
try:
|
|
512
|
+
rss = psutil.Process(handle.process.pid).memory_info().rss
|
|
513
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied, ProcessLookupError):
|
|
514
|
+
return False
|
|
515
|
+
self._last_memory_rss = rss
|
|
516
|
+
if self._max_memory is not None and rss > self._max_memory:
|
|
517
|
+
log.info(
|
|
518
|
+
f"Worker pid={handle.process.pid} RSS {rss:,}B exceeds max_memory={self._max_memory:,}B, rotating"
|
|
519
|
+
)
|
|
520
|
+
return True
|
|
521
|
+
if (
|
|
522
|
+
self._max_memory_percent_bytes is not None
|
|
523
|
+
and rss > self._max_memory_percent_bytes
|
|
524
|
+
):
|
|
525
|
+
log.info(
|
|
526
|
+
f"Worker pid={handle.process.pid} RSS {rss:,}B exceeds percent limit ({self._max_memory_percent_bytes:,}B), rotating"
|
|
527
|
+
)
|
|
528
|
+
return True
|
|
529
|
+
return False
|
|
530
|
+
|
|
531
|
+
def _rotate_worker(self) -> None:
|
|
532
|
+
"""Shut down the spent active worker and promote the spare.
|
|
533
|
+
|
|
534
|
+
Called when :attr:`status` is :attr:`PoolStatus.NEEDS_ROTATION`.
|
|
535
|
+
"""
|
|
536
|
+
if self._active is not None:
|
|
537
|
+
self._shutdown_worker(self._active)
|
|
538
|
+
self._active = None
|
|
539
|
+
self._promote_spare()
|
|
540
|
+
|
|
541
|
+
def _promote_spare(self) -> None:
|
|
542
|
+
"""Make the spare worker active and replenish the spare.
|
|
543
|
+
|
|
544
|
+
If the spare has died or never became ready (e.g. deadlocked
|
|
545
|
+
during warming), it is killed and a fresh worker is
|
|
546
|
+
cold-started instead.
|
|
547
|
+
"""
|
|
548
|
+
if self._spare is not None:
|
|
549
|
+
if self._spare.process.is_alive():
|
|
550
|
+
if not self._spare.ready:
|
|
551
|
+
self._wait_for_ready(self._spare)
|
|
552
|
+
if self._spare.ready:
|
|
553
|
+
self._active = self._spare
|
|
554
|
+
self._spare = None
|
|
555
|
+
else:
|
|
556
|
+
# Spare is alive but never became ready — kill it.
|
|
557
|
+
log.warning(
|
|
558
|
+
f"Spare pid={self._spare.process.pid} never became ready, killing"
|
|
559
|
+
)
|
|
560
|
+
self._kill_worker(self._spare)
|
|
561
|
+
self._spare = None
|
|
562
|
+
self._active = self._start_worker(block_ready=True)
|
|
563
|
+
else:
|
|
564
|
+
# Spare died — clean up and cold-start.
|
|
565
|
+
self._close_worker(self._spare)
|
|
566
|
+
self._spare = None
|
|
567
|
+
self._active = self._start_worker(block_ready=True)
|
|
568
|
+
else:
|
|
569
|
+
self._active = self._start_worker(block_ready=True)
|
|
570
|
+
|
|
571
|
+
# Replenish spare.
|
|
572
|
+
try:
|
|
573
|
+
self._spare = self._start_worker(block_ready=False)
|
|
574
|
+
except Exception:
|
|
575
|
+
log.warning("Failed to start spare worker", exc_info=True)
|
|
576
|
+
self._spare = None
|
|
577
|
+
|
|
578
|
+
def _shutdown_worker(self, handle: WorkerHandle) -> None:
|
|
579
|
+
"""Gracefully shut down *handle*; escalate to kill if needed.
|
|
580
|
+
|
|
581
|
+
Parameters
|
|
582
|
+
----------
|
|
583
|
+
handle
|
|
584
|
+
The worker to shut down.
|
|
585
|
+
"""
|
|
586
|
+
if handle.process.is_alive():
|
|
587
|
+
try:
|
|
588
|
+
handle.connection.send((None, (), {}))
|
|
589
|
+
except (BrokenPipeError, OSError):
|
|
590
|
+
pass
|
|
591
|
+
handle.process.join(timeout=_JOIN_TIMEOUT)
|
|
592
|
+
if handle.process.is_alive():
|
|
593
|
+
self._kill_worker(handle)
|
|
594
|
+
return
|
|
595
|
+
self._close_worker(handle)
|
|
596
|
+
|
|
597
|
+
def _kill_worker(self, handle: WorkerHandle) -> None:
|
|
598
|
+
"""SIGTERM then SIGKILL the worker and its entire process tree.
|
|
599
|
+
|
|
600
|
+
Parameters
|
|
601
|
+
----------
|
|
602
|
+
handle
|
|
603
|
+
The worker to kill.
|
|
604
|
+
"""
|
|
605
|
+
if not handle.process.is_alive():
|
|
606
|
+
self._close_worker(handle)
|
|
607
|
+
return
|
|
608
|
+
try:
|
|
609
|
+
worker = psutil.Process(handle.process.pid)
|
|
610
|
+
children = worker.children(recursive=True)
|
|
611
|
+
for child in children:
|
|
612
|
+
child.terminate()
|
|
613
|
+
worker.terminate()
|
|
614
|
+
gone, alive = psutil.wait_procs(children + [worker], timeout=0.1)
|
|
615
|
+
for remaining in alive:
|
|
616
|
+
remaining.kill()
|
|
617
|
+
psutil.wait_procs(alive, timeout=_KILL_WAIT)
|
|
618
|
+
except (psutil.NoSuchProcess, ProcessLookupError):
|
|
619
|
+
pass
|
|
620
|
+
except psutil.TimeoutExpired:
|
|
621
|
+
log.warning("Process tree still alive after SIGKILL")
|
|
622
|
+
except Exception:
|
|
623
|
+
log.error("Error killing process tree", exc_info=True)
|
|
624
|
+
self._close_worker(handle)
|
|
625
|
+
|
|
626
|
+
def _close_worker(self, handle: WorkerHandle) -> None:
|
|
627
|
+
"""Join the process and close both pipe endpoints.
|
|
628
|
+
|
|
629
|
+
Parameters
|
|
630
|
+
----------
|
|
631
|
+
handle
|
|
632
|
+
The worker whose resources should be freed.
|
|
633
|
+
"""
|
|
634
|
+
handle.process.join(timeout=_POLL_TIMEOUT)
|
|
635
|
+
try:
|
|
636
|
+
handle.connection.close()
|
|
637
|
+
except Exception:
|
|
638
|
+
pass
|
|
639
|
+
try:
|
|
640
|
+
handle.child_connection.close()
|
|
641
|
+
except Exception:
|
|
642
|
+
pass
|
|
File without changes
|