xoscar 0.4.0__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xoscar might be problematic. Click here for more details.
- xoscar/__init__.py +60 -0
- xoscar/_utils.cpython-312-x86_64-linux-gnu.so +0 -0
- xoscar/_utils.pxd +36 -0
- xoscar/_utils.pyx +241 -0
- xoscar/_version.py +693 -0
- xoscar/aio/__init__.py +16 -0
- xoscar/aio/base.py +86 -0
- xoscar/aio/file.py +59 -0
- xoscar/aio/lru.py +228 -0
- xoscar/aio/parallelism.py +39 -0
- xoscar/api.py +493 -0
- xoscar/backend.py +67 -0
- xoscar/backends/__init__.py +14 -0
- xoscar/backends/allocate_strategy.py +160 -0
- xoscar/backends/communication/__init__.py +30 -0
- xoscar/backends/communication/base.py +315 -0
- xoscar/backends/communication/core.py +69 -0
- xoscar/backends/communication/dummy.py +242 -0
- xoscar/backends/communication/errors.py +20 -0
- xoscar/backends/communication/socket.py +414 -0
- xoscar/backends/communication/ucx.py +531 -0
- xoscar/backends/communication/utils.py +97 -0
- xoscar/backends/config.py +145 -0
- xoscar/backends/context.py +404 -0
- xoscar/backends/core.py +193 -0
- xoscar/backends/indigen/__init__.py +16 -0
- xoscar/backends/indigen/backend.py +51 -0
- xoscar/backends/indigen/driver.py +26 -0
- xoscar/backends/indigen/pool.py +469 -0
- xoscar/backends/message.cpython-312-x86_64-linux-gnu.so +0 -0
- xoscar/backends/message.pyi +239 -0
- xoscar/backends/message.pyx +599 -0
- xoscar/backends/pool.py +1596 -0
- xoscar/backends/router.py +207 -0
- xoscar/backends/test/__init__.py +16 -0
- xoscar/backends/test/backend.py +38 -0
- xoscar/backends/test/pool.py +208 -0
- xoscar/batch.py +256 -0
- xoscar/collective/__init__.py +27 -0
- xoscar/collective/common.py +102 -0
- xoscar/collective/core.py +737 -0
- xoscar/collective/process_group.py +687 -0
- xoscar/collective/utils.py +41 -0
- xoscar/collective/xoscar_pygloo.cpython-312-x86_64-linux-gnu.so +0 -0
- xoscar/collective/xoscar_pygloo.pyi +239 -0
- xoscar/constants.py +21 -0
- xoscar/context.cpython-312-x86_64-linux-gnu.so +0 -0
- xoscar/context.pxd +21 -0
- xoscar/context.pyx +368 -0
- xoscar/core.cpython-312-x86_64-linux-gnu.so +0 -0
- xoscar/core.pxd +50 -0
- xoscar/core.pyx +658 -0
- xoscar/debug.py +188 -0
- xoscar/driver.py +42 -0
- xoscar/errors.py +63 -0
- xoscar/libcpp.pxd +31 -0
- xoscar/metrics/__init__.py +21 -0
- xoscar/metrics/api.py +288 -0
- xoscar/metrics/backends/__init__.py +13 -0
- xoscar/metrics/backends/console/__init__.py +13 -0
- xoscar/metrics/backends/console/console_metric.py +82 -0
- xoscar/metrics/backends/metric.py +149 -0
- xoscar/metrics/backends/prometheus/__init__.py +13 -0
- xoscar/metrics/backends/prometheus/prometheus_metric.py +70 -0
- xoscar/nvutils.py +717 -0
- xoscar/profiling.py +260 -0
- xoscar/serialization/__init__.py +20 -0
- xoscar/serialization/aio.py +138 -0
- xoscar/serialization/core.cpython-312-x86_64-linux-gnu.so +0 -0
- xoscar/serialization/core.pxd +28 -0
- xoscar/serialization/core.pyi +57 -0
- xoscar/serialization/core.pyx +944 -0
- xoscar/serialization/cuda.py +111 -0
- xoscar/serialization/exception.py +48 -0
- xoscar/serialization/numpy.py +82 -0
- xoscar/serialization/pyfury.py +37 -0
- xoscar/serialization/scipy.py +72 -0
- xoscar/utils.py +517 -0
- xoscar-0.4.0.dist-info/METADATA +223 -0
- xoscar-0.4.0.dist-info/RECORD +82 -0
- xoscar-0.4.0.dist-info/WHEEL +6 -0
- xoscar-0.4.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,469 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
# derived from copyright 1999-2021 Alibaba Group Holding Ltd.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import asyncio
|
|
19
|
+
import concurrent.futures as futures
|
|
20
|
+
import configparser
|
|
21
|
+
import contextlib
|
|
22
|
+
import itertools
|
|
23
|
+
import logging.config
|
|
24
|
+
import multiprocessing
|
|
25
|
+
import os
|
|
26
|
+
import random
|
|
27
|
+
import signal
|
|
28
|
+
import sys
|
|
29
|
+
import threading
|
|
30
|
+
import uuid
|
|
31
|
+
from dataclasses import dataclass
|
|
32
|
+
from types import TracebackType
|
|
33
|
+
from typing import List, Optional
|
|
34
|
+
|
|
35
|
+
from ..._utils import reset_id_random_seed
|
|
36
|
+
from ...utils import dataslots, ensure_coverage
|
|
37
|
+
from ..config import ActorPoolConfig
|
|
38
|
+
from ..message import (
|
|
39
|
+
ControlMessage,
|
|
40
|
+
ControlMessageType,
|
|
41
|
+
CreateActorMessage,
|
|
42
|
+
new_message_id,
|
|
43
|
+
)
|
|
44
|
+
from ..pool import MainActorPoolBase, SubActorPoolBase, _register_message_handler
|
|
45
|
+
|
|
46
|
+
_is_windows: bool = sys.platform.startswith("win")
|
|
47
|
+
|
|
48
|
+
if sys.version_info[:2] == (3, 9):
|
|
49
|
+
# fix for Python 3.9, see https://bugs.python.org/issue43517
|
|
50
|
+
if sys.platform == "win32":
|
|
51
|
+
from multiprocessing import popen_spawn_win32 as popen_spawn
|
|
52
|
+
|
|
53
|
+
popen_forkserver = popen_fork = synchronize = None
|
|
54
|
+
else:
|
|
55
|
+
from multiprocessing import popen_fork, popen_forkserver
|
|
56
|
+
from multiprocessing import popen_spawn_posix as popen_spawn
|
|
57
|
+
from multiprocessing import synchronize
|
|
58
|
+
_ = popen_spawn, popen_forkserver, popen_fork, synchronize
|
|
59
|
+
del _
|
|
60
|
+
elif sys.version_info[:2] == (3, 6): # pragma: no cover
|
|
61
|
+
from multiprocessing.process import BaseProcess
|
|
62
|
+
|
|
63
|
+
# define kill method for multiprocessing
|
|
64
|
+
def _mp_kill(self):
|
|
65
|
+
if not _is_windows:
|
|
66
|
+
try:
|
|
67
|
+
os.kill(self.pid, signal.SIGKILL)
|
|
68
|
+
except ProcessLookupError:
|
|
69
|
+
pass
|
|
70
|
+
except OSError:
|
|
71
|
+
if self.wait(timeout=0.1) is None:
|
|
72
|
+
raise
|
|
73
|
+
else:
|
|
74
|
+
self.terminate()
|
|
75
|
+
|
|
76
|
+
BaseProcess.kill = _mp_kill
|
|
77
|
+
|
|
78
|
+
logger = logging.getLogger(__name__)
|
|
79
|
+
_init_main_suspended_local = threading.local()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _patch_spawn_get_preparation_data():
|
|
83
|
+
try:
|
|
84
|
+
from multiprocessing import spawn as mp_spawn
|
|
85
|
+
|
|
86
|
+
_raw_get_preparation_data = mp_spawn.get_preparation_data
|
|
87
|
+
|
|
88
|
+
def _patched_get_preparation_data(*args, **kw):
|
|
89
|
+
ret = _raw_get_preparation_data(*args, **kw)
|
|
90
|
+
if getattr(_init_main_suspended_local, "value", False):
|
|
91
|
+
# make sure user module is not imported when start cluster
|
|
92
|
+
ret.pop("init_main_from_name", None)
|
|
93
|
+
ret.pop("init_main_from_path", None)
|
|
94
|
+
return ret
|
|
95
|
+
|
|
96
|
+
_patched_get_preparation_data._indigen_patched = True
|
|
97
|
+
if not getattr(mp_spawn.get_preparation_data, "_indigen_patched", False):
|
|
98
|
+
mp_spawn.get_preparation_data = _patched_get_preparation_data
|
|
99
|
+
except (ImportError, AttributeError): # pragma: no cover
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@contextlib.contextmanager
|
|
104
|
+
def _suspend_init_main():
|
|
105
|
+
try:
|
|
106
|
+
_init_main_suspended_local.value = True
|
|
107
|
+
yield
|
|
108
|
+
finally:
|
|
109
|
+
_init_main_suspended_local.value = False
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@dataslots
|
|
113
|
+
@dataclass
|
|
114
|
+
class SubpoolStatus:
|
|
115
|
+
# for status, 0 is succeeded, 1 is failed
|
|
116
|
+
status: int | None = None
|
|
117
|
+
external_addresses: List[str] | None = None
|
|
118
|
+
error: BaseException | None = None
|
|
119
|
+
traceback: TracebackType | None = None
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@_register_message_handler
|
|
123
|
+
class MainActorPool(MainActorPoolBase):
|
|
124
|
+
@classmethod
|
|
125
|
+
def get_external_addresses(
|
|
126
|
+
cls,
|
|
127
|
+
address: str,
|
|
128
|
+
n_process: int | None = None,
|
|
129
|
+
ports: list[int] | None = None,
|
|
130
|
+
schemes: list[Optional[str]] | None = None,
|
|
131
|
+
):
|
|
132
|
+
"""Get external address for every process"""
|
|
133
|
+
assert n_process is not None
|
|
134
|
+
if ":" in address:
|
|
135
|
+
host, port_str = address.rsplit(":", 1)
|
|
136
|
+
port = int(port_str)
|
|
137
|
+
if ports:
|
|
138
|
+
if len(ports) != n_process:
|
|
139
|
+
raise ValueError(
|
|
140
|
+
f"`ports` specified, but its count "
|
|
141
|
+
f"is not equal to `n_process`, "
|
|
142
|
+
f"number of ports: {len(ports)}, "
|
|
143
|
+
f"n_process: {n_process}"
|
|
144
|
+
)
|
|
145
|
+
sub_ports = ports
|
|
146
|
+
else:
|
|
147
|
+
sub_ports = [0] * n_process
|
|
148
|
+
else:
|
|
149
|
+
host = address
|
|
150
|
+
if ports and len(ports) != n_process + 1:
|
|
151
|
+
# ports specified, the first of which should be main port
|
|
152
|
+
raise ValueError(
|
|
153
|
+
f"`ports` specified, but its count "
|
|
154
|
+
f"is not equal to `n_process` + 1, "
|
|
155
|
+
f"number of ports: {len(ports)}, "
|
|
156
|
+
f"n_process + 1: {n_process + 1}"
|
|
157
|
+
)
|
|
158
|
+
elif not ports:
|
|
159
|
+
ports = [0] * (n_process + 1)
|
|
160
|
+
port = ports[0]
|
|
161
|
+
sub_ports = ports[1:]
|
|
162
|
+
if not schemes:
|
|
163
|
+
prefix_iter = itertools.repeat("")
|
|
164
|
+
else:
|
|
165
|
+
prefix_iter = [f"{scheme}://" if scheme else "" for scheme in schemes] # type: ignore
|
|
166
|
+
return [
|
|
167
|
+
f"{prefix}{host}:{port}"
|
|
168
|
+
for port, prefix in zip([port] + sub_ports, prefix_iter)
|
|
169
|
+
]
|
|
170
|
+
|
|
171
|
+
@classmethod
|
|
172
|
+
def gen_internal_address(
|
|
173
|
+
cls, process_index: int, external_address: str | None = None
|
|
174
|
+
) -> str | None:
|
|
175
|
+
if hasattr(asyncio, "start_unix_server"):
|
|
176
|
+
return f"unixsocket:///{process_index}"
|
|
177
|
+
else:
|
|
178
|
+
return external_address
|
|
179
|
+
|
|
180
|
+
@classmethod
|
|
181
|
+
async def start_sub_pool(
|
|
182
|
+
cls,
|
|
183
|
+
actor_pool_config: ActorPoolConfig,
|
|
184
|
+
process_index: int,
|
|
185
|
+
start_method: str | None = None,
|
|
186
|
+
):
|
|
187
|
+
def start_pool_in_process():
|
|
188
|
+
ctx = multiprocessing.get_context(method=start_method)
|
|
189
|
+
status_queue = ctx.Queue()
|
|
190
|
+
main_pool_pid = os.getpid()
|
|
191
|
+
|
|
192
|
+
with _suspend_init_main():
|
|
193
|
+
process = ctx.Process(
|
|
194
|
+
target=cls._start_sub_pool,
|
|
195
|
+
args=(
|
|
196
|
+
actor_pool_config,
|
|
197
|
+
process_index,
|
|
198
|
+
status_queue,
|
|
199
|
+
main_pool_pid,
|
|
200
|
+
),
|
|
201
|
+
name=f"IndigenActorPool{process_index}",
|
|
202
|
+
)
|
|
203
|
+
process.start()
|
|
204
|
+
|
|
205
|
+
# wait for sub actor pool to finish starting
|
|
206
|
+
process_status = status_queue.get()
|
|
207
|
+
return process, process_status
|
|
208
|
+
|
|
209
|
+
_patch_spawn_get_preparation_data()
|
|
210
|
+
loop = asyncio.get_running_loop()
|
|
211
|
+
with futures.ThreadPoolExecutor(1) as executor:
|
|
212
|
+
create_pool_task = loop.run_in_executor(executor, start_pool_in_process)
|
|
213
|
+
return await create_pool_task
|
|
214
|
+
|
|
215
|
+
@classmethod
|
|
216
|
+
async def wait_sub_pools_ready(cls, create_pool_tasks: List[asyncio.Task]):
|
|
217
|
+
processes: list[multiprocessing.Process] = []
|
|
218
|
+
ext_addresses = []
|
|
219
|
+
error = None
|
|
220
|
+
for task in create_pool_tasks:
|
|
221
|
+
process, status = await task
|
|
222
|
+
processes.append(process)
|
|
223
|
+
if status.status == 1:
|
|
224
|
+
# start sub pool failed
|
|
225
|
+
error = status.error.with_traceback(status.traceback)
|
|
226
|
+
else:
|
|
227
|
+
ext_addresses.append(status.external_addresses)
|
|
228
|
+
if error:
|
|
229
|
+
for p in processes:
|
|
230
|
+
# error happens, kill all subprocesses
|
|
231
|
+
p.kill()
|
|
232
|
+
raise error
|
|
233
|
+
return processes, ext_addresses
|
|
234
|
+
|
|
235
|
+
@classmethod
|
|
236
|
+
def _start_sub_pool(
|
|
237
|
+
cls,
|
|
238
|
+
actor_config: ActorPoolConfig,
|
|
239
|
+
process_index: int,
|
|
240
|
+
status_queue: multiprocessing.Queue,
|
|
241
|
+
main_pool_pid: int,
|
|
242
|
+
):
|
|
243
|
+
ensure_coverage()
|
|
244
|
+
|
|
245
|
+
# make sure enough randomness for every sub pool
|
|
246
|
+
random.seed(uuid.uuid1().bytes)
|
|
247
|
+
reset_id_random_seed()
|
|
248
|
+
|
|
249
|
+
conf = actor_config.get_pool_config(process_index)
|
|
250
|
+
suspend_sigint = conf["suspend_sigint"]
|
|
251
|
+
if suspend_sigint:
|
|
252
|
+
signal.signal(signal.SIGINT, lambda *_: None)
|
|
253
|
+
|
|
254
|
+
logging_conf = conf["logging_conf"] or {}
|
|
255
|
+
if isinstance(logging_conf, configparser.RawConfigParser):
|
|
256
|
+
logging.config.fileConfig(logging_conf)
|
|
257
|
+
elif logging_conf.get("dict"):
|
|
258
|
+
logging.config.dictConfig(logging_conf["dict"])
|
|
259
|
+
elif logging_conf.get("file"):
|
|
260
|
+
logging.config.fileConfig(logging_conf["file"])
|
|
261
|
+
elif logging_conf.get("level"):
|
|
262
|
+
logging.getLogger("__main__").setLevel(logging_conf["level"])
|
|
263
|
+
logging.getLogger("xoscar").setLevel(logging_conf["level"])
|
|
264
|
+
if logging_conf.get("format"):
|
|
265
|
+
logging.basicConfig(format=logging_conf["format"])
|
|
266
|
+
|
|
267
|
+
use_uvloop = conf["use_uvloop"]
|
|
268
|
+
if use_uvloop:
|
|
269
|
+
import uvloop
|
|
270
|
+
|
|
271
|
+
asyncio.set_event_loop(uvloop.new_event_loop())
|
|
272
|
+
else:
|
|
273
|
+
asyncio.set_event_loop(asyncio.new_event_loop())
|
|
274
|
+
|
|
275
|
+
coro = cls._create_sub_pool(
|
|
276
|
+
actor_config, process_index, status_queue, main_pool_pid
|
|
277
|
+
)
|
|
278
|
+
asyncio.run(coro)
|
|
279
|
+
|
|
280
|
+
@classmethod
|
|
281
|
+
async def _create_sub_pool(
|
|
282
|
+
cls,
|
|
283
|
+
actor_config: ActorPoolConfig,
|
|
284
|
+
process_index: int,
|
|
285
|
+
status_queue: multiprocessing.Queue,
|
|
286
|
+
main_pool_pid: int,
|
|
287
|
+
):
|
|
288
|
+
process_status = None
|
|
289
|
+
try:
|
|
290
|
+
cur_pool_config = actor_config.get_pool_config(process_index)
|
|
291
|
+
env = cur_pool_config["env"]
|
|
292
|
+
if env:
|
|
293
|
+
os.environ.update(env)
|
|
294
|
+
pool = await SubActorPool.create(
|
|
295
|
+
{
|
|
296
|
+
"actor_pool_config": actor_config,
|
|
297
|
+
"process_index": process_index,
|
|
298
|
+
"main_pool_pid": main_pool_pid,
|
|
299
|
+
}
|
|
300
|
+
)
|
|
301
|
+
external_addresses = cur_pool_config["external_address"]
|
|
302
|
+
process_status = SubpoolStatus(
|
|
303
|
+
status=0, external_addresses=external_addresses
|
|
304
|
+
)
|
|
305
|
+
await pool.start()
|
|
306
|
+
except: # noqa: E722 # nosec # pylint: disable=bare-except
|
|
307
|
+
_, error, tb = sys.exc_info()
|
|
308
|
+
process_status = SubpoolStatus(status=1, error=error, traceback=tb)
|
|
309
|
+
raise
|
|
310
|
+
finally:
|
|
311
|
+
status_queue.put(process_status)
|
|
312
|
+
await pool.join()
|
|
313
|
+
|
|
314
|
+
async def append_sub_pool(
|
|
315
|
+
self,
|
|
316
|
+
label: str | None = None,
|
|
317
|
+
internal_address: str | None = None,
|
|
318
|
+
external_address: str | None = None,
|
|
319
|
+
env: dict | None = None,
|
|
320
|
+
modules: list[str] | None = None,
|
|
321
|
+
suspend_sigint: bool | None = None,
|
|
322
|
+
use_uvloop: bool | None = None,
|
|
323
|
+
logging_conf: dict | None = None,
|
|
324
|
+
start_method: str | None = None,
|
|
325
|
+
kwargs: dict | None = None,
|
|
326
|
+
):
|
|
327
|
+
# external_address has port 0, subprocess will bind random port.
|
|
328
|
+
external_address = (
|
|
329
|
+
external_address
|
|
330
|
+
or MainActorPool.get_external_addresses(self.external_address, n_process=1)[
|
|
331
|
+
-1
|
|
332
|
+
]
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# use last process index's logging_conf and use_uv_loop config if not provide
|
|
336
|
+
actor_pool_config = self._config.as_dict()
|
|
337
|
+
last_process_index = self._config.get_process_indexes()[-1]
|
|
338
|
+
last_logging_conf = actor_pool_config["pools"][last_process_index][
|
|
339
|
+
"logging_conf"
|
|
340
|
+
]
|
|
341
|
+
last_use_uv_loop = actor_pool_config["pools"][last_process_index]["use_uvloop"]
|
|
342
|
+
_logging_conf = logging_conf or last_logging_conf
|
|
343
|
+
_use_uv_loop = use_uvloop if use_uvloop is not None else last_use_uv_loop
|
|
344
|
+
|
|
345
|
+
process_index = next(MainActorPool.process_index_gen(external_address))
|
|
346
|
+
internal_address = internal_address or MainActorPool.gen_internal_address(
|
|
347
|
+
process_index, external_address
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
self._config.add_pool_conf(
|
|
351
|
+
process_index,
|
|
352
|
+
label,
|
|
353
|
+
internal_address,
|
|
354
|
+
external_address,
|
|
355
|
+
env,
|
|
356
|
+
modules,
|
|
357
|
+
suspend_sigint,
|
|
358
|
+
_use_uv_loop,
|
|
359
|
+
_logging_conf,
|
|
360
|
+
kwargs,
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
def start_pool_in_process():
|
|
364
|
+
ctx = multiprocessing.get_context(method=start_method)
|
|
365
|
+
status_queue = ctx.Queue()
|
|
366
|
+
main_pool_pid = os.getpid()
|
|
367
|
+
|
|
368
|
+
with _suspend_init_main():
|
|
369
|
+
process = ctx.Process(
|
|
370
|
+
target=self._start_sub_pool,
|
|
371
|
+
args=(self._config, process_index, status_queue, main_pool_pid),
|
|
372
|
+
name=f"IndigenActorPool{process_index}",
|
|
373
|
+
)
|
|
374
|
+
process.start()
|
|
375
|
+
|
|
376
|
+
# wait for sub actor pool to finish starting
|
|
377
|
+
process_status = status_queue.get()
|
|
378
|
+
return process, process_status
|
|
379
|
+
|
|
380
|
+
loop = asyncio.get_running_loop()
|
|
381
|
+
with futures.ThreadPoolExecutor(1) as executor:
|
|
382
|
+
create_pool_task = loop.run_in_executor(executor, start_pool_in_process)
|
|
383
|
+
process, process_status = await create_pool_task
|
|
384
|
+
|
|
385
|
+
self._config.reset_pool_external_address(
|
|
386
|
+
process_index, process_status.external_addresses[0]
|
|
387
|
+
)
|
|
388
|
+
self.attach_sub_process(process_status.external_addresses[0], process)
|
|
389
|
+
|
|
390
|
+
control_message = ControlMessage(
|
|
391
|
+
message_id=new_message_id(),
|
|
392
|
+
address=self.external_address,
|
|
393
|
+
control_message_type=ControlMessageType.sync_config,
|
|
394
|
+
content=self._config,
|
|
395
|
+
)
|
|
396
|
+
await self.handle_control_command(control_message)
|
|
397
|
+
# The actual port will return in process_status.
|
|
398
|
+
return process_status.external_addresses[0]
|
|
399
|
+
|
|
400
|
+
async def remove_sub_pool(
|
|
401
|
+
self, external_address: str, timeout: float | None = None, force: bool = False
|
|
402
|
+
):
|
|
403
|
+
process = self.sub_processes[external_address]
|
|
404
|
+
process_index = self._config.get_process_index(external_address)
|
|
405
|
+
await self.stop_sub_pool(external_address, process, timeout, force)
|
|
406
|
+
del self.sub_processes[external_address]
|
|
407
|
+
self._config.remove_pool_config(process_index)
|
|
408
|
+
|
|
409
|
+
control_message = ControlMessage(
|
|
410
|
+
message_id=new_message_id(),
|
|
411
|
+
address=self.external_address,
|
|
412
|
+
control_message_type=ControlMessageType.sync_config,
|
|
413
|
+
content=self._config,
|
|
414
|
+
)
|
|
415
|
+
await self.handle_control_command(control_message)
|
|
416
|
+
|
|
417
|
+
async def kill_sub_pool(
|
|
418
|
+
self, process: multiprocessing.Process, force: bool = False
|
|
419
|
+
):
|
|
420
|
+
if not force: # pragma: no cover
|
|
421
|
+
# must shutdown gracefully, or subprocess created by model will not exit
|
|
422
|
+
if not _is_windows:
|
|
423
|
+
try:
|
|
424
|
+
os.kill(process.pid, signal.SIGINT) # type: ignore
|
|
425
|
+
except OSError: # pragma: no cover
|
|
426
|
+
pass
|
|
427
|
+
process.terminate() # SIGTERM
|
|
428
|
+
wait_pool = futures.ThreadPoolExecutor(1)
|
|
429
|
+
try:
|
|
430
|
+
loop = asyncio.get_running_loop()
|
|
431
|
+
await loop.run_in_executor(wait_pool, process.join, 3)
|
|
432
|
+
finally:
|
|
433
|
+
wait_pool.shutdown(False)
|
|
434
|
+
process.kill() # SIGKILL
|
|
435
|
+
await asyncio.to_thread(process.join, 5)
|
|
436
|
+
|
|
437
|
+
async def is_sub_pool_alive(self, process: multiprocessing.Process):
|
|
438
|
+
try:
|
|
439
|
+
return await asyncio.to_thread(process.is_alive)
|
|
440
|
+
except RuntimeError as ex: # pragma: no cover
|
|
441
|
+
if "cannot schedule new futures" not in str(ex):
|
|
442
|
+
# when atexit is triggered, the default pool might be shutdown
|
|
443
|
+
# and to_thread will fail
|
|
444
|
+
raise
|
|
445
|
+
return process.is_alive()
|
|
446
|
+
|
|
447
|
+
async def recover_sub_pool(self, address: str):
|
|
448
|
+
process_index = self._config.get_process_index(address)
|
|
449
|
+
# process dead, restart it
|
|
450
|
+
# remember always use spawn to recover sub pool
|
|
451
|
+
task = asyncio.create_task(
|
|
452
|
+
self.start_sub_pool(self._config, process_index, "spawn")
|
|
453
|
+
)
|
|
454
|
+
self.sub_processes[address] = (await self.wait_sub_pools_ready([task]))[0][0]
|
|
455
|
+
|
|
456
|
+
if self._auto_recover == "actor":
|
|
457
|
+
# need to recover all created actors
|
|
458
|
+
for _, message in self._allocated_actors[address].values():
|
|
459
|
+
create_actor_message: CreateActorMessage = message # type: ignore
|
|
460
|
+
await self.call(address, create_actor_message)
|
|
461
|
+
|
|
462
|
+
async def start(self):
|
|
463
|
+
await super().start()
|
|
464
|
+
await self.start_monitor()
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
@_register_message_handler
|
|
468
|
+
class SubActorPool(SubActorPoolBase):
|
|
469
|
+
pass
|
|
Binary file
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
# derived from copyright 1999-2022 Alibaba Group Holding Ltd.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from enum import Enum
|
|
19
|
+
from types import TracebackType
|
|
20
|
+
from typing import Any, List, Type
|
|
21
|
+
|
|
22
|
+
from ..core import ActorRef, BufferRef
|
|
23
|
+
|
|
24
|
+
DEFAULT_PROTOCOL: int = 0
|
|
25
|
+
|
|
26
|
+
class MessageType(Enum):
|
|
27
|
+
control = 0
|
|
28
|
+
result = 1
|
|
29
|
+
error = 2
|
|
30
|
+
create_actor = 3
|
|
31
|
+
destroy_actor = 4
|
|
32
|
+
has_actor = 5
|
|
33
|
+
actor_ref = 6
|
|
34
|
+
send = 7
|
|
35
|
+
tell = 8
|
|
36
|
+
cancel = 9
|
|
37
|
+
copy_to_buffers = 10
|
|
38
|
+
copy_to_fileobjs = 11
|
|
39
|
+
|
|
40
|
+
class ControlMessageType(Enum):
|
|
41
|
+
stop = 0
|
|
42
|
+
restart = 1
|
|
43
|
+
sync_config = 2
|
|
44
|
+
get_config = 3
|
|
45
|
+
wait_pool_recovered = 4
|
|
46
|
+
add_sub_pool_actor = 5
|
|
47
|
+
# indicate that the following data will be used for copy_to
|
|
48
|
+
switch_to_copy_to = 6
|
|
49
|
+
|
|
50
|
+
class _MessageBase:
|
|
51
|
+
message_type: MessageType
|
|
52
|
+
protocol: int
|
|
53
|
+
message_id: bytes
|
|
54
|
+
message_trace: list
|
|
55
|
+
profiling_context: Any
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
message_id: bytes | None = None,
|
|
60
|
+
protocol: int = DEFAULT_PROTOCOL,
|
|
61
|
+
message_trace: list | None = None,
|
|
62
|
+
profiling_context: Any = None,
|
|
63
|
+
): ...
|
|
64
|
+
def __repr__(self): ...
|
|
65
|
+
|
|
66
|
+
class CopyToBuffersMessage(_MessageBase):
|
|
67
|
+
message_type = MessageType.copy_to_buffers
|
|
68
|
+
|
|
69
|
+
content: object
|
|
70
|
+
|
|
71
|
+
def __int__(
|
|
72
|
+
self,
|
|
73
|
+
message_id: bytes | None = None,
|
|
74
|
+
content: object = None,
|
|
75
|
+
protocol: int = DEFAULT_PROTOCOL,
|
|
76
|
+
message_trace: list | None = None,
|
|
77
|
+
): ...
|
|
78
|
+
|
|
79
|
+
class CopyToFileObjectsMessage(CopyToBuffersMessage):
|
|
80
|
+
message_type = MessageType.copy_to_fileobjs
|
|
81
|
+
|
|
82
|
+
class ControlMessage(_MessageBase):
|
|
83
|
+
message_type = MessageType.control
|
|
84
|
+
|
|
85
|
+
address: str
|
|
86
|
+
control_message_type: ControlMessageType
|
|
87
|
+
content: Any
|
|
88
|
+
|
|
89
|
+
def __init__(
|
|
90
|
+
self,
|
|
91
|
+
message_id: bytes | None = None,
|
|
92
|
+
address: str | None = None,
|
|
93
|
+
control_message_type: ControlMessageType | None = None,
|
|
94
|
+
content: Any = None,
|
|
95
|
+
protocol: int = DEFAULT_PROTOCOL,
|
|
96
|
+
message_trace: list | None = None,
|
|
97
|
+
): ...
|
|
98
|
+
|
|
99
|
+
class ResultMessage(_MessageBase):
|
|
100
|
+
message_type = MessageType.result
|
|
101
|
+
|
|
102
|
+
result: Any
|
|
103
|
+
|
|
104
|
+
def __init__(
|
|
105
|
+
self,
|
|
106
|
+
message_id: bytes | None = None,
|
|
107
|
+
result: Any = None,
|
|
108
|
+
protocol: int = DEFAULT_PROTOCOL,
|
|
109
|
+
message_trace: list | None = None,
|
|
110
|
+
profiling_context: Any = None,
|
|
111
|
+
): ...
|
|
112
|
+
|
|
113
|
+
class ErrorMessage(_MessageBase):
|
|
114
|
+
message_type = MessageType.error
|
|
115
|
+
|
|
116
|
+
address: str
|
|
117
|
+
pid: int
|
|
118
|
+
error_type: Type
|
|
119
|
+
error: BaseException
|
|
120
|
+
traceback: TracebackType
|
|
121
|
+
|
|
122
|
+
def __init__(
|
|
123
|
+
self,
|
|
124
|
+
message_id: bytes | None = None,
|
|
125
|
+
address: str | None = None,
|
|
126
|
+
pid: int = -1,
|
|
127
|
+
error_type: Type[BaseException] | None = None,
|
|
128
|
+
error: BaseException | None = None,
|
|
129
|
+
traceback: TracebackType | None = None,
|
|
130
|
+
protocol: int = DEFAULT_PROTOCOL,
|
|
131
|
+
message_trace: list | None = None,
|
|
132
|
+
): ...
|
|
133
|
+
def as_instanceof_cause(self) -> BaseException: ...
|
|
134
|
+
|
|
135
|
+
class CreateActorMessage(_MessageBase):
|
|
136
|
+
message_type = MessageType.create_actor
|
|
137
|
+
|
|
138
|
+
actor_cls: Type
|
|
139
|
+
actor_id: bytes
|
|
140
|
+
args: tuple
|
|
141
|
+
kwargs: dict
|
|
142
|
+
allocate_strategy: Any
|
|
143
|
+
from_main: bool
|
|
144
|
+
|
|
145
|
+
def __init__(
|
|
146
|
+
self,
|
|
147
|
+
message_id: bytes | None = None,
|
|
148
|
+
actor_cls: Type | None = None,
|
|
149
|
+
actor_id: bytes | None = None,
|
|
150
|
+
args: tuple | None = None,
|
|
151
|
+
kwargs: dict | None = None,
|
|
152
|
+
allocate_strategy: Any = None,
|
|
153
|
+
from_main: bool = False,
|
|
154
|
+
protocol: int = DEFAULT_PROTOCOL,
|
|
155
|
+
message_trace: list | None = None,
|
|
156
|
+
): ...
|
|
157
|
+
|
|
158
|
+
class DestroyActorMessage(_MessageBase):
|
|
159
|
+
message_type = MessageType.destroy_actor
|
|
160
|
+
|
|
161
|
+
actor_ref: ActorRef
|
|
162
|
+
from_main: bool
|
|
163
|
+
|
|
164
|
+
def __init__(
|
|
165
|
+
self,
|
|
166
|
+
message_id: bytes | None = None,
|
|
167
|
+
actor_ref: ActorRef = None,
|
|
168
|
+
from_main: bool = False,
|
|
169
|
+
protocol: int = DEFAULT_PROTOCOL,
|
|
170
|
+
message_trace: list | None = None,
|
|
171
|
+
): ...
|
|
172
|
+
|
|
173
|
+
class HasActorMessage(_MessageBase):
|
|
174
|
+
message_type = MessageType.has_actor
|
|
175
|
+
|
|
176
|
+
actor_ref: ActorRef
|
|
177
|
+
|
|
178
|
+
def __init__(
|
|
179
|
+
self,
|
|
180
|
+
message_id: bytes | None = None,
|
|
181
|
+
actor_ref: ActorRef = None,
|
|
182
|
+
protocol: int = DEFAULT_PROTOCOL,
|
|
183
|
+
message_trace: list | None = None,
|
|
184
|
+
): ...
|
|
185
|
+
|
|
186
|
+
class ActorRefMessage(_MessageBase):
|
|
187
|
+
message_type = MessageType.actor_ref
|
|
188
|
+
|
|
189
|
+
actor_ref: ActorRef
|
|
190
|
+
|
|
191
|
+
def __init__(
|
|
192
|
+
self,
|
|
193
|
+
message_id: bytes | None = None,
|
|
194
|
+
actor_ref: ActorRef = None,
|
|
195
|
+
protocol: int = DEFAULT_PROTOCOL,
|
|
196
|
+
message_trace: list | None = None,
|
|
197
|
+
): ...
|
|
198
|
+
|
|
199
|
+
class SendMessage(_MessageBase):
|
|
200
|
+
message_type = MessageType.send
|
|
201
|
+
|
|
202
|
+
actor_ref: ActorRef
|
|
203
|
+
content: Any
|
|
204
|
+
|
|
205
|
+
def __init__(
|
|
206
|
+
self,
|
|
207
|
+
message_id: bytes | None = None,
|
|
208
|
+
actor_ref: ActorRef = None,
|
|
209
|
+
content: object = None,
|
|
210
|
+
protocol: int = DEFAULT_PROTOCOL,
|
|
211
|
+
message_trace: list | None = None,
|
|
212
|
+
profiling_context: Any = None,
|
|
213
|
+
): ...
|
|
214
|
+
|
|
215
|
+
class TellMessage(SendMessage):
|
|
216
|
+
message_type = MessageType.tell
|
|
217
|
+
|
|
218
|
+
class CancelMessage(_MessageBase):
|
|
219
|
+
message_type = MessageType.cancel
|
|
220
|
+
|
|
221
|
+
address: str
|
|
222
|
+
cancel_message_id: bytes
|
|
223
|
+
|
|
224
|
+
def __init__(
|
|
225
|
+
self,
|
|
226
|
+
message_id: bytes | None = None,
|
|
227
|
+
address: str | None = None,
|
|
228
|
+
cancel_message_id: bytes | None = None,
|
|
229
|
+
protocol: int = DEFAULT_PROTOCOL,
|
|
230
|
+
message_trace: list | None = None,
|
|
231
|
+
): ...
|
|
232
|
+
|
|
233
|
+
class DeserializeMessageFailed(RuntimeError):
|
|
234
|
+
message_id: bytes
|
|
235
|
+
|
|
236
|
+
def __init__(self, message_id: bytes): ...
|
|
237
|
+
def __str__(self): ...
|
|
238
|
+
|
|
239
|
+
def new_message_id() -> bytes: ...
|