xoscar 0.9.0__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. xoscar/__init__.py +61 -0
  2. xoscar/_utils.cpython-312-darwin.so +0 -0
  3. xoscar/_utils.pxd +36 -0
  4. xoscar/_utils.pyx +246 -0
  5. xoscar/_version.py +693 -0
  6. xoscar/aio/__init__.py +16 -0
  7. xoscar/aio/base.py +86 -0
  8. xoscar/aio/file.py +59 -0
  9. xoscar/aio/lru.py +228 -0
  10. xoscar/aio/parallelism.py +39 -0
  11. xoscar/api.py +527 -0
  12. xoscar/backend.py +67 -0
  13. xoscar/backends/__init__.py +14 -0
  14. xoscar/backends/allocate_strategy.py +160 -0
  15. xoscar/backends/communication/__init__.py +30 -0
  16. xoscar/backends/communication/base.py +315 -0
  17. xoscar/backends/communication/core.py +69 -0
  18. xoscar/backends/communication/dummy.py +253 -0
  19. xoscar/backends/communication/errors.py +20 -0
  20. xoscar/backends/communication/socket.py +444 -0
  21. xoscar/backends/communication/ucx.py +538 -0
  22. xoscar/backends/communication/utils.py +97 -0
  23. xoscar/backends/config.py +157 -0
  24. xoscar/backends/context.py +437 -0
  25. xoscar/backends/core.py +352 -0
  26. xoscar/backends/indigen/__init__.py +16 -0
  27. xoscar/backends/indigen/__main__.py +19 -0
  28. xoscar/backends/indigen/backend.py +51 -0
  29. xoscar/backends/indigen/driver.py +26 -0
  30. xoscar/backends/indigen/fate_sharing.py +221 -0
  31. xoscar/backends/indigen/pool.py +515 -0
  32. xoscar/backends/indigen/shared_memory.py +548 -0
  33. xoscar/backends/message.cpython-312-darwin.so +0 -0
  34. xoscar/backends/message.pyi +255 -0
  35. xoscar/backends/message.pyx +646 -0
  36. xoscar/backends/pool.py +1630 -0
  37. xoscar/backends/router.py +285 -0
  38. xoscar/backends/test/__init__.py +16 -0
  39. xoscar/backends/test/backend.py +38 -0
  40. xoscar/backends/test/pool.py +233 -0
  41. xoscar/batch.py +256 -0
  42. xoscar/collective/__init__.py +27 -0
  43. xoscar/collective/backend/__init__.py +13 -0
  44. xoscar/collective/backend/nccl_backend.py +160 -0
  45. xoscar/collective/common.py +102 -0
  46. xoscar/collective/core.py +737 -0
  47. xoscar/collective/process_group.py +687 -0
  48. xoscar/collective/utils.py +41 -0
  49. xoscar/collective/xoscar_pygloo.cpython-312-darwin.so +0 -0
  50. xoscar/collective/xoscar_pygloo.pyi +239 -0
  51. xoscar/constants.py +23 -0
  52. xoscar/context.cpython-312-darwin.so +0 -0
  53. xoscar/context.pxd +21 -0
  54. xoscar/context.pyx +368 -0
  55. xoscar/core.cpython-312-darwin.so +0 -0
  56. xoscar/core.pxd +51 -0
  57. xoscar/core.pyx +664 -0
  58. xoscar/debug.py +188 -0
  59. xoscar/driver.py +42 -0
  60. xoscar/errors.py +63 -0
  61. xoscar/libcpp.pxd +31 -0
  62. xoscar/metrics/__init__.py +21 -0
  63. xoscar/metrics/api.py +288 -0
  64. xoscar/metrics/backends/__init__.py +13 -0
  65. xoscar/metrics/backends/console/__init__.py +13 -0
  66. xoscar/metrics/backends/console/console_metric.py +82 -0
  67. xoscar/metrics/backends/metric.py +149 -0
  68. xoscar/metrics/backends/prometheus/__init__.py +13 -0
  69. xoscar/metrics/backends/prometheus/prometheus_metric.py +70 -0
  70. xoscar/nvutils.py +717 -0
  71. xoscar/profiling.py +260 -0
  72. xoscar/serialization/__init__.py +20 -0
  73. xoscar/serialization/aio.py +141 -0
  74. xoscar/serialization/core.cpython-312-darwin.so +0 -0
  75. xoscar/serialization/core.pxd +28 -0
  76. xoscar/serialization/core.pyi +57 -0
  77. xoscar/serialization/core.pyx +944 -0
  78. xoscar/serialization/cuda.py +111 -0
  79. xoscar/serialization/exception.py +48 -0
  80. xoscar/serialization/mlx.py +67 -0
  81. xoscar/serialization/numpy.py +82 -0
  82. xoscar/serialization/pyfury.py +37 -0
  83. xoscar/serialization/scipy.py +72 -0
  84. xoscar/serialization/torch.py +180 -0
  85. xoscar/utils.py +522 -0
  86. xoscar/virtualenv/__init__.py +34 -0
  87. xoscar/virtualenv/core.py +268 -0
  88. xoscar/virtualenv/platform.py +56 -0
  89. xoscar/virtualenv/utils.py +100 -0
  90. xoscar/virtualenv/uv.py +321 -0
  91. xoscar-0.9.0.dist-info/METADATA +230 -0
  92. xoscar-0.9.0.dist-info/RECORD +94 -0
  93. xoscar-0.9.0.dist-info/WHEEL +6 -0
  94. xoscar-0.9.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,444 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ # derived from copyright 1999-2021 Alibaba Group Holding Ltd.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from __future__ import annotations
17
+
18
+ import asyncio
19
+ import concurrent.futures as futures
20
+ import logging
21
+ import os
22
+ import socket
23
+ import sys
24
+ from abc import ABCMeta
25
+ from asyncio import AbstractServer, StreamReader, StreamWriter
26
+ from functools import lru_cache
27
+ from hashlib import md5
28
+ from typing import Any, Callable, Coroutine, Dict, Type
29
+ from urllib.parse import urlparse
30
+
31
+ from ..._utils import to_binary
32
+ from ...constants import XOSCAR_CONNECT_TIMEOUT, XOSCAR_UNIX_SOCKET_DIR
33
+ from ...serialization import AioDeserializer, AioSerializer, deserialize
34
+ from ...utils import classproperty, implements, is_py_312, is_py_312_or_above, is_v6_ip
35
+ from .base import Channel, ChannelType, Client, Server
36
+ from .core import register_client, register_server
37
+ from .errors import ChannelClosed
38
+ from .utils import read_buffers, write_buffers
39
+
40
+ _is_windows: bool = sys.platform.startswith("win")
41
+
42
+
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ class SocketChannel(Channel):
47
+ __slots__ = "reader", "writer", "_channel_type", "_send_lock", "_recv_lock"
48
+
49
+ name = "socket"
50
+
51
+ def __init__(
52
+ self,
53
+ reader: StreamReader,
54
+ writer: StreamWriter,
55
+ local_address: str | None = None,
56
+ dest_address: str | None = None,
57
+ compression: str | None = None,
58
+ channel_type: int | None = None,
59
+ ):
60
+ super().__init__(
61
+ local_address=local_address,
62
+ dest_address=dest_address,
63
+ compression=compression,
64
+ )
65
+ self.reader = reader
66
+ self.writer = writer
67
+ self._channel_type = channel_type
68
+
69
+ self._send_lock = asyncio.Lock()
70
+ self._recv_lock = asyncio.Lock()
71
+
72
+ @property
73
+ @implements(Channel.type)
74
+ def type(self) -> int:
75
+ return self._channel_type # type: ignore
76
+
77
+ @implements(Channel.send)
78
+ async def send(self, message: Any):
79
+ # get buffers
80
+ compress = self.compression or 0
81
+ serializer = AioSerializer(message, compress=compress)
82
+ buffers = await serializer.run()
83
+
84
+ try:
85
+ # write buffers
86
+ write_buffers(self.writer, buffers)
87
+ async with self._send_lock:
88
+ # add lock, or when parallel send,
89
+ # assertion error may be raised
90
+ await self.writer.drain()
91
+ except RuntimeError as e:
92
+ if self.writer.is_closing():
93
+ raise ChannelClosed(
94
+ "Channel already closed, cannot write message"
95
+ ) from e
96
+ raise e
97
+
98
+ @implements(Channel.recv)
99
+ async def recv(self):
100
+ deserializer = AioDeserializer(self.reader)
101
+ async with self._recv_lock:
102
+ header = await deserializer.get_header()
103
+ buffers = await read_buffers(header, self.reader)
104
+ return deserialize(header, buffers)
105
+
106
+ @implements(Channel.close)
107
+ async def close(self):
108
+ self.writer.close()
109
+ try:
110
+ await self.writer.wait_closed()
111
+ # TODO: May raise Runtime error: attach to another event loop
112
+ except (ConnectionResetError, RuntimeError): # pragma: no cover
113
+ pass
114
+
115
+ @property
116
+ @implements(Channel.closed)
117
+ def closed(self):
118
+ return self.writer.is_closing()
119
+
120
+
121
+ class _BaseSocketServer(Server, metaclass=ABCMeta):
122
+ __slots__ = "_aio_server", "_channels"
123
+
124
+ _channels: set[Channel]
125
+
126
+ def __init__(
127
+ self,
128
+ address: str,
129
+ aio_server: AbstractServer,
130
+ channel_handler: Callable[[Channel], Coroutine] | None = None,
131
+ ):
132
+ super().__init__(address, channel_handler)
133
+ # asyncio.Server
134
+ self._aio_server = aio_server
135
+ self._channels = set()
136
+
137
+ @implements(Server.start)
138
+ async def start(self):
139
+ await self._aio_server.start_serving()
140
+
141
+ @implements(Server.join)
142
+ async def join(self, timeout=None):
143
+ if timeout is None:
144
+ await self._aio_server.serve_forever()
145
+ else:
146
+ if is_py_312():
147
+ # For python 3.12, there's a bug for `serve_forever`:
148
+ # https://github.com/python/cpython/issues/123720,
149
+ # which is unable to be cancelled.
150
+ # Here is really a simulation of `wait_for`
151
+ task = asyncio.create_task(self._aio_server.serve_forever())
152
+ await asyncio.sleep(timeout)
153
+ if task.done():
154
+ logger.warning(f"`serve_forever` should never be done.")
155
+ else:
156
+ task.cancel()
157
+ else:
158
+ future = asyncio.create_task(self._aio_server.serve_forever())
159
+ try:
160
+ await asyncio.wait_for(future, timeout=timeout)
161
+ except (futures.TimeoutError, asyncio.TimeoutError, TimeoutError):
162
+ future.cancel()
163
+
164
+ @implements(Server.on_connected)
165
+ async def on_connected(self, *args, **kwargs):
166
+ reader, writer = args
167
+ local_address = kwargs.pop("local_address", None)
168
+ dest_address = kwargs.pop("dest_address", None)
169
+ if kwargs: # pragma: no cover
170
+ raise TypeError(
171
+ f"{type(self).__name__} got unexpected "
172
+ f'arguments: {",".join(kwargs)}'
173
+ )
174
+ channel = SocketChannel(
175
+ reader,
176
+ writer,
177
+ local_address=local_address,
178
+ dest_address=dest_address,
179
+ channel_type=self.channel_type,
180
+ )
181
+ self._channels.add(channel)
182
+ # handle over channel to some handlers
183
+ try:
184
+ await self.channel_handler(channel)
185
+ finally:
186
+ if not channel.closed:
187
+ await channel.close()
188
+ # Remove channel if channel exit
189
+ self._channels.discard(channel)
190
+ logger.debug("Channel exit: %s", channel.info)
191
+
192
+ @implements(Server.stop)
193
+ async def stop(self):
194
+ self._aio_server.close()
195
+ # Python 3.12+: # https://github.com/python/cpython/issues/104344
196
+ # `wait_closed` leads to hang in Python 3.12 and 3.13
197
+ if not is_py_312_or_above():
198
+ await self._aio_server.wait_closed()
199
+ # close all channels
200
+ await asyncio.gather(
201
+ *(channel.close() for channel in self._channels if not channel.closed)
202
+ )
203
+ self._channels.clear()
204
+
205
+ @property
206
+ @implements(Server.stopped)
207
+ def stopped(self) -> bool:
208
+ return not self._aio_server.is_serving()
209
+
210
+
211
+ @register_server
212
+ class SocketServer(_BaseSocketServer):
213
+ __slots__ = "host", "port"
214
+
215
+ scheme = None
216
+
217
+ def __init__(
218
+ self,
219
+ host: str,
220
+ port: int,
221
+ aio_server: AbstractServer,
222
+ channel_handler: Callable[[Channel], Coroutine] | None = None,
223
+ ):
224
+ address = f"{host}:{port}"
225
+ super().__init__(address, aio_server, channel_handler=channel_handler)
226
+ self.host = host
227
+ self.port = port
228
+
229
+ @classproperty
230
+ @implements(Server.client_type)
231
+ def client_type(self) -> Type["Client"]:
232
+ return SocketClient
233
+
234
+ @property
235
+ @implements(Server.channel_type)
236
+ def channel_type(self) -> int:
237
+ return ChannelType.remote
238
+
239
+ @classmethod
240
+ def parse_config(cls, config: dict) -> dict:
241
+ if config is None or not config:
242
+ return dict()
243
+ # we only need the following config
244
+ keys = ["listen_elastic_ip"]
245
+ parsed_config = {key: config[key] for key in keys if key in config}
246
+
247
+ return parsed_config
248
+
249
+ @staticmethod
250
+ @implements(Server.create)
251
+ async def create(config: Dict) -> "Server":
252
+ config = config.copy()
253
+ if "address" in config:
254
+ address = config.pop("address")
255
+ host, port = address.rsplit(":", 1)
256
+ port = int(port)
257
+ else:
258
+ host = config.pop("host")
259
+ port = int(config.pop("port"))
260
+ _host = host
261
+ if config.pop("listen_elastic_ip", False):
262
+ # The Actor.address will be announce to client, and is not on our host,
263
+ # cannot actually listen on it,
264
+ # so we have to keep SocketServer.host untouched to make sure Actor.address not changed
265
+ if is_v6_ip(host):
266
+ _host = "::"
267
+ else:
268
+ _host = "0.0.0.0"
269
+
270
+ handle_channel = config.pop("handle_channel")
271
+ if "start_serving" not in config:
272
+ config["start_serving"] = False
273
+
274
+ async def handle_connection(reader: StreamReader, writer: StreamWriter):
275
+ # create a channel when client connected
276
+ return await server.on_connected(
277
+ reader, writer, local_address=server.address
278
+ )
279
+
280
+ port = port if port != 0 else None
281
+ aio_server = await asyncio.start_server(
282
+ handle_connection, host=_host, port=port, **config
283
+ )
284
+
285
+ # get port of the socket if not specified
286
+ if not port:
287
+ port = aio_server.sockets[0].getsockname()[1]
288
+
289
+ if _is_windows:
290
+ for sock in aio_server.sockets:
291
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, True)
292
+
293
+ server = SocketServer(host, port, aio_server, channel_handler=handle_channel)
294
+ return server
295
+
296
+
297
+ @register_client
298
+ class SocketClient(Client):
299
+ __slots__ = ()
300
+
301
+ scheme = SocketServer.scheme
302
+
303
+ @staticmethod
304
+ @implements(Client.connect)
305
+ async def connect(
306
+ dest_address: str, local_address: str | None = None, **kwargs
307
+ ) -> "Client":
308
+ host, port_str = dest_address.rsplit(":", 1)
309
+ port = int(port_str)
310
+ config = kwargs.get("config", {})
311
+ connect_timeout = config.get("connect_timeout", XOSCAR_CONNECT_TIMEOUT)
312
+ fut = asyncio.open_connection(host=host, port=port)
313
+ try:
314
+ reader, writer = await asyncio.wait_for(fut, timeout=connect_timeout)
315
+ except asyncio.TimeoutError:
316
+ raise ConnectionError("connect timeout")
317
+ channel = SocketChannel(
318
+ reader,
319
+ writer,
320
+ local_address=local_address,
321
+ dest_address=dest_address,
322
+ channel_type=ChannelType.remote,
323
+ )
324
+ return SocketClient(local_address, dest_address, channel)
325
+
326
+
327
+ def _get_or_create_default_unix_socket_dir():
328
+ os.makedirs(XOSCAR_UNIX_SOCKET_DIR, exist_ok=True)
329
+ return XOSCAR_UNIX_SOCKET_DIR
330
+
331
+
332
+ @lru_cache(100)
333
+ def _gen_unix_socket_default_path(process_index):
334
+ return (
335
+ f"{_get_or_create_default_unix_socket_dir()}/"
336
+ f"{md5(to_binary(str(process_index))).hexdigest()}"
337
+ ) # nosec
338
+
339
+
340
+ @register_server
341
+ class UnixSocketServer(_BaseSocketServer):
342
+ __slots__ = "process_index", "path"
343
+
344
+ scheme = "unixsocket"
345
+
346
+ def __init__(
347
+ self,
348
+ process_index: int,
349
+ aio_server: AbstractServer,
350
+ path: str,
351
+ channel_handler: Callable[[Channel], Coroutine] | None = None,
352
+ ):
353
+ address = f"{self.scheme}:///{process_index}"
354
+ super().__init__(address, aio_server, channel_handler=channel_handler)
355
+ self.process_index = process_index
356
+ self.path = path
357
+
358
+ @classproperty
359
+ @implements(Server.client_type)
360
+ def client_type(self) -> Type["Client"]:
361
+ return UnixSocketClient
362
+
363
+ @property
364
+ @implements(Server.channel_type)
365
+ def channel_type(self) -> int:
366
+ return ChannelType.ipc
367
+
368
+ @staticmethod
369
+ @implements(Server.create)
370
+ async def create(config: Dict) -> "Server":
371
+ config = config.copy()
372
+ if "address" in config:
373
+ process_index = int(urlparse(config.pop("address")).path.lstrip("/"))
374
+ else:
375
+ process_index = config.pop("process_index")
376
+ handle_channel = config.pop("handle_channel")
377
+ path = config.pop("path", _gen_unix_socket_default_path(process_index))
378
+
379
+ dirname = os.path.dirname(path)
380
+ if not os.path.exists(dirname):
381
+ os.makedirs(dirname, exist_ok=True)
382
+
383
+ if "start_serving" not in config:
384
+ config["start_serving"] = False
385
+
386
+ async def handle_connection(reader, writer):
387
+ # create a channel when client connected
388
+ return await server.on_connected(
389
+ reader, writer, local_address=server.address
390
+ )
391
+
392
+ aio_server = await asyncio.start_unix_server(
393
+ handle_connection, path=path, **config
394
+ )
395
+
396
+ for sock in aio_server.sockets:
397
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, True)
398
+
399
+ server = UnixSocketServer(
400
+ process_index, aio_server, path, channel_handler=handle_channel
401
+ )
402
+ return server
403
+
404
+ @implements(Server.stop)
405
+ async def stop(self):
406
+ await super().stop()
407
+ try:
408
+ os.remove(self.path)
409
+ except OSError: # pragma: no cover
410
+ pass
411
+
412
+
413
+ @register_client
414
+ class UnixSocketClient(Client):
415
+ __slots__ = ()
416
+
417
+ scheme = UnixSocketServer.scheme
418
+
419
+ @staticmethod
420
+ @lru_cache(100)
421
+ def _get_process_index(addr):
422
+ return int(urlparse(addr).path.lstrip("/"))
423
+
424
+ @staticmethod
425
+ @implements(Client.connect)
426
+ async def connect(
427
+ dest_address: str, local_address: str | None = None, **kwargs
428
+ ) -> "Client":
429
+ process_index = UnixSocketClient._get_process_index(dest_address)
430
+ path = kwargs.pop("path", _gen_unix_socket_default_path(process_index))
431
+ try:
432
+ (reader, writer) = await asyncio.open_unix_connection(path, **kwargs)
433
+ except FileNotFoundError:
434
+ raise ConnectionRefusedError(
435
+ "Cannot connect unix socket due to file not exists"
436
+ )
437
+ channel = SocketChannel(
438
+ reader,
439
+ writer,
440
+ local_address=local_address,
441
+ dest_address=dest_address,
442
+ channel_type=ChannelType.ipc,
443
+ )
444
+ return UnixSocketClient(local_address, dest_address, channel)