xoscar 0.7.0__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xoscar might be problematic. Click here for more details.

Files changed (89) hide show
  1. xoscar/__init__.py +60 -0
  2. xoscar/_utils.cpython-312-darwin.so +0 -0
  3. xoscar/_utils.pxd +36 -0
  4. xoscar/_utils.pyx +246 -0
  5. xoscar/_version.py +693 -0
  6. xoscar/aio/__init__.py +16 -0
  7. xoscar/aio/base.py +86 -0
  8. xoscar/aio/file.py +59 -0
  9. xoscar/aio/lru.py +228 -0
  10. xoscar/aio/parallelism.py +39 -0
  11. xoscar/api.py +493 -0
  12. xoscar/backend.py +67 -0
  13. xoscar/backends/__init__.py +14 -0
  14. xoscar/backends/allocate_strategy.py +160 -0
  15. xoscar/backends/communication/__init__.py +30 -0
  16. xoscar/backends/communication/base.py +315 -0
  17. xoscar/backends/communication/core.py +69 -0
  18. xoscar/backends/communication/dummy.py +253 -0
  19. xoscar/backends/communication/errors.py +20 -0
  20. xoscar/backends/communication/socket.py +444 -0
  21. xoscar/backends/communication/ucx.py +538 -0
  22. xoscar/backends/communication/utils.py +97 -0
  23. xoscar/backends/config.py +157 -0
  24. xoscar/backends/context.py +437 -0
  25. xoscar/backends/core.py +304 -0
  26. xoscar/backends/indigen/__init__.py +16 -0
  27. xoscar/backends/indigen/__main__.py +19 -0
  28. xoscar/backends/indigen/backend.py +51 -0
  29. xoscar/backends/indigen/driver.py +26 -0
  30. xoscar/backends/indigen/fate_sharing.py +221 -0
  31. xoscar/backends/indigen/pool.py +450 -0
  32. xoscar/backends/indigen/shared_memory.py +548 -0
  33. xoscar/backends/message.cpython-312-darwin.so +0 -0
  34. xoscar/backends/message.pyi +255 -0
  35. xoscar/backends/message.pyx +646 -0
  36. xoscar/backends/pool.py +1625 -0
  37. xoscar/backends/router.py +285 -0
  38. xoscar/backends/test/__init__.py +16 -0
  39. xoscar/backends/test/backend.py +38 -0
  40. xoscar/backends/test/pool.py +197 -0
  41. xoscar/batch.py +256 -0
  42. xoscar/collective/__init__.py +27 -0
  43. xoscar/collective/common.py +102 -0
  44. xoscar/collective/core.py +737 -0
  45. xoscar/collective/process_group.py +687 -0
  46. xoscar/collective/utils.py +41 -0
  47. xoscar/collective/xoscar_pygloo.cpython-312-darwin.so +0 -0
  48. xoscar/collective/xoscar_pygloo.pyi +239 -0
  49. xoscar/constants.py +23 -0
  50. xoscar/context.cpython-312-darwin.so +0 -0
  51. xoscar/context.pxd +21 -0
  52. xoscar/context.pyx +368 -0
  53. xoscar/core.cpython-312-darwin.so +0 -0
  54. xoscar/core.pxd +51 -0
  55. xoscar/core.pyx +664 -0
  56. xoscar/debug.py +188 -0
  57. xoscar/driver.py +42 -0
  58. xoscar/errors.py +63 -0
  59. xoscar/libcpp.pxd +31 -0
  60. xoscar/metrics/__init__.py +21 -0
  61. xoscar/metrics/api.py +288 -0
  62. xoscar/metrics/backends/__init__.py +13 -0
  63. xoscar/metrics/backends/console/__init__.py +13 -0
  64. xoscar/metrics/backends/console/console_metric.py +82 -0
  65. xoscar/metrics/backends/metric.py +149 -0
  66. xoscar/metrics/backends/prometheus/__init__.py +13 -0
  67. xoscar/metrics/backends/prometheus/prometheus_metric.py +70 -0
  68. xoscar/nvutils.py +717 -0
  69. xoscar/profiling.py +260 -0
  70. xoscar/serialization/__init__.py +20 -0
  71. xoscar/serialization/aio.py +142 -0
  72. xoscar/serialization/core.cpython-312-darwin.so +0 -0
  73. xoscar/serialization/core.pxd +28 -0
  74. xoscar/serialization/core.pyi +57 -0
  75. xoscar/serialization/core.pyx +944 -0
  76. xoscar/serialization/cuda.py +111 -0
  77. xoscar/serialization/exception.py +48 -0
  78. xoscar/serialization/mlx.py +63 -0
  79. xoscar/serialization/numpy.py +82 -0
  80. xoscar/serialization/pyfury.py +37 -0
  81. xoscar/serialization/scipy.py +72 -0
  82. xoscar/utils.py +517 -0
  83. xoscar/virtualenv/__init__.py +34 -0
  84. xoscar/virtualenv/core.py +52 -0
  85. xoscar/virtualenv/uv.py +91 -0
  86. xoscar-0.7.0.dist-info/METADATA +228 -0
  87. xoscar-0.7.0.dist-info/RECORD +89 -0
  88. xoscar-0.7.0.dist-info/WHEEL +6 -0
  89. xoscar-0.7.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,450 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ # derived from copyright 1999-2021 Alibaba Group Holding Ltd.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from __future__ import annotations
17
+
18
+ import asyncio
19
+ import asyncio.subprocess
20
+ import configparser
21
+ import itertools
22
+ import logging.config
23
+ import os
24
+ import pickle
25
+ import random
26
+ import signal
27
+ import struct
28
+ import sys
29
+ import threading
30
+ import time
31
+ import uuid
32
+ from enum import IntEnum
33
+ from typing import List, Optional
34
+
35
+ import psutil
36
+
37
+ from ..._utils import reset_id_random_seed
38
+ from ...utils import ensure_coverage
39
+ from ..config import ActorPoolConfig
40
+ from ..message import (
41
+ ControlMessage,
42
+ ControlMessageType,
43
+ CreateActorMessage,
44
+ new_message_id,
45
+ )
46
+ from ..pool import MainActorPoolBase, SubActorPoolBase, _register_message_handler
47
+ from . import shared_memory
48
+ from .fate_sharing import create_subprocess_exec
49
+
50
+ _SUBPROCESS_SHM_SIZE = 10240
51
+ _is_windows: bool = sys.platform.startswith("win")
52
+
53
+ logger = logging.getLogger(__name__)
54
+
55
+
56
+ class _ShmSeq(IntEnum):
57
+ INIT_PARAMS = 1
58
+ INIT_RESULT = 2
59
+
60
+
61
+ def _shm_put_object(seq: _ShmSeq, shm: shared_memory.SharedMemory, o: object):
62
+ serialized = pickle.dumps(o)
63
+ assert (
64
+ len(serialized) < _SUBPROCESS_SHM_SIZE - 8
65
+ ), f"Serialized object {o} is too long."
66
+ shm.buf[4:12] = struct.pack("<II", sys.hexversion, len(serialized))
67
+ shm.buf[12 : 12 + len(serialized)] = serialized
68
+ shm.buf[:4] = struct.pack("<I", seq)
69
+
70
+
71
+ def _shm_get_object(seq: _ShmSeq, shm: shared_memory.SharedMemory):
72
+ recv_seq = struct.unpack("<I", shm.buf[:4])[0]
73
+ if recv_seq != seq:
74
+ return
75
+ python_version_hex, size = struct.unpack("<II", shm.buf[4:12])
76
+ if python_version_hex != sys.hexversion:
77
+ raise RuntimeError(
78
+ f"Python version mismatch, sender: {python_version_hex}, receiver: {sys.hexversion}"
79
+ )
80
+ return pickle.loads(shm.buf[12 : 12 + size])
81
+
82
+
83
+ @_register_message_handler
84
+ class MainActorPool(MainActorPoolBase):
85
+ @classmethod
86
+ def get_external_addresses(
87
+ cls,
88
+ address: str,
89
+ n_process: int | None = None,
90
+ ports: list[int] | None = None,
91
+ schemes: list[Optional[str]] | None = None,
92
+ ):
93
+ """Get external address for every process"""
94
+ assert n_process is not None
95
+ if ":" in address:
96
+ host, port_str = address.rsplit(":", 1)
97
+ port = int(port_str)
98
+ if ports:
99
+ if len(ports) != n_process:
100
+ raise ValueError(
101
+ f"`ports` specified, but its count "
102
+ f"is not equal to `n_process`, "
103
+ f"number of ports: {len(ports)}, "
104
+ f"n_process: {n_process}"
105
+ )
106
+ sub_ports = ports
107
+ else:
108
+ sub_ports = [0] * n_process
109
+ else:
110
+ host = address
111
+ if ports and len(ports) != n_process + 1:
112
+ # ports specified, the first of which should be main port
113
+ raise ValueError(
114
+ f"`ports` specified, but its count "
115
+ f"is not equal to `n_process` + 1, "
116
+ f"number of ports: {len(ports)}, "
117
+ f"n_process + 1: {n_process + 1}"
118
+ )
119
+ elif not ports:
120
+ ports = [0] * (n_process + 1)
121
+ port = ports[0]
122
+ sub_ports = ports[1:]
123
+ if not schemes:
124
+ prefix_iter = itertools.repeat("")
125
+ else:
126
+ prefix_iter = [f"{scheme}://" if scheme else "" for scheme in schemes] # type: ignore
127
+ return [
128
+ f"{prefix}{host}:{port}"
129
+ for port, prefix in zip([port] + sub_ports, prefix_iter)
130
+ ]
131
+
132
+ @classmethod
133
+ def gen_internal_address(
134
+ cls, process_index: int, external_address: str | None = None
135
+ ) -> str | None:
136
+ if hasattr(asyncio, "start_unix_server"):
137
+ return f"unixsocket:///{process_index}"
138
+ else:
139
+ return external_address
140
+
141
+ @classmethod
142
+ async def start_sub_pool(
143
+ cls,
144
+ actor_pool_config: ActorPoolConfig,
145
+ process_index: int,
146
+ start_python: str | None = None,
147
+ ):
148
+ return await cls._create_sub_pool_from_parent(
149
+ actor_pool_config, process_index, start_python
150
+ )
151
+
152
+ @classmethod
153
+ async def wait_sub_pools_ready(cls, create_pool_tasks: List[asyncio.Task]):
154
+ processes: list[asyncio.subprocess.Process] = []
155
+ ext_addresses = []
156
+ error = None
157
+ for task in create_pool_tasks:
158
+ process, address = await task
159
+ processes.append(process)
160
+ ext_addresses.append(address)
161
+ if error:
162
+ for p in processes:
163
+ # error happens, kill all subprocesses
164
+ p.kill()
165
+ raise error
166
+ return processes, ext_addresses
167
+
168
+ @classmethod
169
+ def _start_sub_pool_in_child(
170
+ cls,
171
+ shm_name: str,
172
+ ):
173
+ ensure_coverage()
174
+
175
+ shm = shared_memory.SharedMemory(shm_name, track=False)
176
+ try:
177
+ config = _shm_get_object(_ShmSeq.INIT_PARAMS, shm)
178
+ actor_config = config["actor_pool_config"]
179
+ process_index = config["process_index"]
180
+ main_pool_pid = config["main_pool_pid"]
181
+
182
+ def _check_ppid():
183
+ while True:
184
+ try:
185
+ # We can't simply check if the os.getppid() equals with main_pool_pid,
186
+ # as the double fork may result in a new process as the parent.
187
+ psutil.Process(main_pool_pid)
188
+ except psutil.NoSuchProcess:
189
+ logger.info("Exit due to main pool %s exit.", main_pool_pid)
190
+ os._exit(0)
191
+ except Exception as e:
192
+ logger.exception("Check ppid failed: %s", e)
193
+ time.sleep(10)
194
+
195
+ t = threading.Thread(target=_check_ppid, daemon=True)
196
+ t.start()
197
+
198
+ # make sure enough randomness for every sub pool
199
+ random.seed(uuid.uuid1().bytes)
200
+ reset_id_random_seed()
201
+
202
+ conf = actor_config.get_pool_config(process_index)
203
+ suspend_sigint = conf["suspend_sigint"]
204
+ if suspend_sigint:
205
+ signal.signal(signal.SIGINT, lambda *_: None)
206
+
207
+ logging_conf = conf["logging_conf"] or {}
208
+ if isinstance(logging_conf, configparser.RawConfigParser):
209
+ logging.config.fileConfig(logging_conf)
210
+ elif logging_conf.get("dict"):
211
+ logging.config.dictConfig(logging_conf["dict"])
212
+ elif logging_conf.get("file"):
213
+ logging.config.fileConfig(logging_conf["file"])
214
+ elif logging_conf.get("level"):
215
+ logging.getLogger("__main__").setLevel(logging_conf["level"])
216
+ logging.getLogger("xoscar").setLevel(logging_conf["level"])
217
+ if logging_conf.get("format"):
218
+ logging.basicConfig(format=logging_conf["format"])
219
+
220
+ use_uvloop = conf["use_uvloop"]
221
+ if use_uvloop:
222
+ import uvloop
223
+
224
+ asyncio.set_event_loop(uvloop.new_event_loop())
225
+ else:
226
+ asyncio.set_event_loop(asyncio.new_event_loop())
227
+
228
+ coro = cls._create_sub_pool(actor_config, process_index, main_pool_pid, shm)
229
+ asyncio.run(coro)
230
+ finally:
231
+ shm.close()
232
+
233
+ @classmethod
234
+ async def _create_sub_pool(
235
+ cls,
236
+ actor_config: ActorPoolConfig,
237
+ process_index: int,
238
+ main_pool_pid: int,
239
+ shm: shared_memory.SharedMemory,
240
+ ):
241
+ cur_pool_config = actor_config.get_pool_config(process_index)
242
+ env = cur_pool_config["env"]
243
+ if env:
244
+ os.environ.update(env)
245
+ pool = await SubActorPool.create(
246
+ {
247
+ "actor_pool_config": actor_config,
248
+ "process_index": process_index,
249
+ "main_pool_pid": main_pool_pid,
250
+ }
251
+ )
252
+ await pool.start()
253
+ _shm_put_object(_ShmSeq.INIT_RESULT, shm, cur_pool_config["external_address"])
254
+ await pool.join()
255
+
256
+ @staticmethod
257
+ async def _create_sub_pool_from_parent(
258
+ actor_pool_config: ActorPoolConfig,
259
+ process_index: int,
260
+ start_python: str | None = None,
261
+ ):
262
+ # We check the Python version in _shm_get_object to make it faster,
263
+ # as in most cases the Python versions are the same.
264
+ if start_python is None:
265
+ start_python = sys.executable
266
+
267
+ external_addresses: List | None = None
268
+ shm = shared_memory.SharedMemory(
269
+ create=True, size=_SUBPROCESS_SHM_SIZE, track=False
270
+ )
271
+ try:
272
+ _shm_put_object(
273
+ _ShmSeq.INIT_PARAMS,
274
+ shm,
275
+ {
276
+ "actor_pool_config": actor_pool_config,
277
+ "process_index": process_index,
278
+ "main_pool_pid": os.getpid(),
279
+ },
280
+ )
281
+ process = await create_subprocess_exec(
282
+ start_python,
283
+ "-m",
284
+ "xoscar.backends.indigen",
285
+ "start_sub_pool",
286
+ "-sn",
287
+ shm.name,
288
+ )
289
+
290
+ def _get_external_addresses():
291
+ try:
292
+ nonlocal external_addresses
293
+ while (
294
+ shm
295
+ and shm.buf is not None
296
+ and not (
297
+ external_addresses := _shm_get_object(
298
+ _ShmSeq.INIT_RESULT, shm
299
+ )
300
+ )
301
+ ):
302
+ time.sleep(0.1)
303
+ except asyncio.CancelledError:
304
+ pass
305
+
306
+ _, unfinished = await asyncio.wait(
307
+ [
308
+ asyncio.create_task(process.wait()),
309
+ asyncio.create_task(asyncio.to_thread(_get_external_addresses)),
310
+ ],
311
+ return_when=asyncio.FIRST_COMPLETED,
312
+ )
313
+ for t in unfinished:
314
+ t.cancel()
315
+ finally:
316
+ shm.close()
317
+ shm.unlink()
318
+ if external_addresses is None:
319
+ raise OSError("Start sub pool failed.")
320
+ return process, external_addresses
321
+
322
+ async def append_sub_pool(
323
+ self,
324
+ label: str | None = None,
325
+ internal_address: str | None = None,
326
+ external_address: str | None = None,
327
+ env: dict | None = None,
328
+ modules: list[str] | None = None,
329
+ suspend_sigint: bool | None = None,
330
+ use_uvloop: bool | None = None,
331
+ logging_conf: dict | None = None,
332
+ start_python: str | None = None,
333
+ kwargs: dict | None = None,
334
+ ):
335
+ # external_address has port 0, subprocess will bind random port.
336
+ external_address = (
337
+ external_address
338
+ or MainActorPool.get_external_addresses(self.external_address, n_process=1)[
339
+ -1
340
+ ]
341
+ )
342
+
343
+ # use last process index's logging_conf and use_uv_loop config if not provide
344
+ actor_pool_config = self._config.as_dict()
345
+ last_process_index = self._config.get_process_indexes()[-1]
346
+ last_logging_conf = actor_pool_config["pools"][last_process_index][
347
+ "logging_conf"
348
+ ]
349
+ last_use_uv_loop = actor_pool_config["pools"][last_process_index]["use_uvloop"]
350
+ _logging_conf = logging_conf or last_logging_conf
351
+ _use_uv_loop = use_uvloop if use_uvloop is not None else last_use_uv_loop
352
+
353
+ process_index = next(MainActorPool.process_index_gen(external_address))
354
+ internal_address = internal_address or MainActorPool.gen_internal_address(
355
+ process_index, external_address
356
+ )
357
+
358
+ self._config.add_pool_conf(
359
+ process_index,
360
+ label,
361
+ internal_address,
362
+ external_address,
363
+ env,
364
+ modules,
365
+ suspend_sigint,
366
+ _use_uv_loop,
367
+ _logging_conf,
368
+ kwargs,
369
+ )
370
+
371
+ process, external_addresses = await self._create_sub_pool_from_parent(
372
+ self._config, process_index, start_python
373
+ )
374
+
375
+ self._config.reset_pool_external_address(process_index, external_addresses[0])
376
+ self.attach_sub_process(external_addresses[0], process)
377
+
378
+ control_message = ControlMessage(
379
+ message_id=new_message_id(),
380
+ address=self.external_address,
381
+ control_message_type=ControlMessageType.sync_config,
382
+ content=self._config,
383
+ )
384
+ await self.handle_control_command(control_message)
385
+ # The actual port will return in process_status.
386
+ return external_addresses[0]
387
+
388
+ async def remove_sub_pool(
389
+ self, external_address: str, timeout: float | None = None, force: bool = False
390
+ ):
391
+ process = self.sub_processes[external_address]
392
+ process_index = self._config.get_process_index(external_address)
393
+ del self.sub_processes[external_address]
394
+ self._config.remove_pool_config(process_index)
395
+ await self.stop_sub_pool(external_address, process, timeout, force)
396
+
397
+ control_message = ControlMessage(
398
+ message_id=new_message_id(),
399
+ address=self.external_address,
400
+ control_message_type=ControlMessageType.sync_config,
401
+ content=self._config,
402
+ )
403
+ await self.handle_control_command(control_message)
404
+
405
+ async def kill_sub_pool(
406
+ self, process: asyncio.subprocess.Process, force: bool = False
407
+ ):
408
+ try:
409
+ p = psutil.Process(process.pid)
410
+ except psutil.NoSuchProcess:
411
+ return
412
+
413
+ if not force: # pragma: no cover
414
+ p.terminate()
415
+ try:
416
+ p.wait(5)
417
+ except psutil.TimeoutExpired:
418
+ pass
419
+
420
+ while p.is_running():
421
+ p.kill()
422
+ if not p.is_running():
423
+ return
424
+ logger.info("Sub pool can't be killed: %s", p)
425
+ time.sleep(0.1)
426
+
427
+ async def is_sub_pool_alive(self, process: asyncio.subprocess.Process):
428
+ return process.returncode is None
429
+
430
+ async def recover_sub_pool(self, address: str):
431
+ process_index = self._config.get_process_index(address)
432
+ # process dead, restart it
433
+ # remember always use spawn to recover sub pool
434
+ task = asyncio.create_task(self.start_sub_pool(self._config, process_index))
435
+ self.sub_processes[address] = (await self.wait_sub_pools_ready([task]))[0][0]
436
+
437
+ if self._auto_recover == "actor":
438
+ # need to recover all created actors
439
+ for _, message in self._allocated_actors[address].values():
440
+ create_actor_message: CreateActorMessage = message # type: ignore
441
+ await self.call(address, create_actor_message)
442
+
443
+ async def start(self):
444
+ await super().start()
445
+ await self.start_monitor()
446
+
447
+
448
+ @_register_message_handler
449
+ class SubActorPool(SubActorPoolBase):
450
+ pass