xoscar 0.3.1__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xoscar might be problematic. Click here for more details.

Files changed (80) hide show
  1. xoscar/__init__.py +60 -0
  2. xoscar/_utils.cpython-39-darwin.so +0 -0
  3. xoscar/_utils.pxd +36 -0
  4. xoscar/_utils.pyx +241 -0
  5. xoscar/_version.py +693 -0
  6. xoscar/aio/__init__.py +25 -0
  7. xoscar/aio/_threads.py +35 -0
  8. xoscar/aio/base.py +86 -0
  9. xoscar/aio/file.py +59 -0
  10. xoscar/aio/lru.py +228 -0
  11. xoscar/aio/parallelism.py +39 -0
  12. xoscar/api.py +493 -0
  13. xoscar/backend.py +67 -0
  14. xoscar/backends/__init__.py +14 -0
  15. xoscar/backends/allocate_strategy.py +160 -0
  16. xoscar/backends/communication/__init__.py +30 -0
  17. xoscar/backends/communication/base.py +315 -0
  18. xoscar/backends/communication/core.py +69 -0
  19. xoscar/backends/communication/dummy.py +242 -0
  20. xoscar/backends/communication/errors.py +20 -0
  21. xoscar/backends/communication/socket.py +375 -0
  22. xoscar/backends/communication/ucx.py +520 -0
  23. xoscar/backends/communication/utils.py +97 -0
  24. xoscar/backends/config.py +145 -0
  25. xoscar/backends/context.py +404 -0
  26. xoscar/backends/core.py +193 -0
  27. xoscar/backends/indigen/__init__.py +16 -0
  28. xoscar/backends/indigen/backend.py +51 -0
  29. xoscar/backends/indigen/driver.py +26 -0
  30. xoscar/backends/indigen/pool.py +469 -0
  31. xoscar/backends/message.cpython-39-darwin.so +0 -0
  32. xoscar/backends/message.pyx +591 -0
  33. xoscar/backends/pool.py +1593 -0
  34. xoscar/backends/router.py +207 -0
  35. xoscar/backends/test/__init__.py +16 -0
  36. xoscar/backends/test/backend.py +38 -0
  37. xoscar/backends/test/pool.py +208 -0
  38. xoscar/batch.py +256 -0
  39. xoscar/collective/__init__.py +27 -0
  40. xoscar/collective/common.py +102 -0
  41. xoscar/collective/core.py +737 -0
  42. xoscar/collective/process_group.py +687 -0
  43. xoscar/collective/utils.py +41 -0
  44. xoscar/collective/xoscar_pygloo.cpython-39-darwin.so +0 -0
  45. xoscar/constants.py +21 -0
  46. xoscar/context.cpython-39-darwin.so +0 -0
  47. xoscar/context.pxd +21 -0
  48. xoscar/context.pyx +368 -0
  49. xoscar/core.cpython-39-darwin.so +0 -0
  50. xoscar/core.pxd +50 -0
  51. xoscar/core.pyx +658 -0
  52. xoscar/debug.py +188 -0
  53. xoscar/driver.py +42 -0
  54. xoscar/errors.py +63 -0
  55. xoscar/libcpp.pxd +31 -0
  56. xoscar/metrics/__init__.py +21 -0
  57. xoscar/metrics/api.py +288 -0
  58. xoscar/metrics/backends/__init__.py +13 -0
  59. xoscar/metrics/backends/console/__init__.py +13 -0
  60. xoscar/metrics/backends/console/console_metric.py +82 -0
  61. xoscar/metrics/backends/metric.py +149 -0
  62. xoscar/metrics/backends/prometheus/__init__.py +13 -0
  63. xoscar/metrics/backends/prometheus/prometheus_metric.py +70 -0
  64. xoscar/nvutils.py +717 -0
  65. xoscar/profiling.py +260 -0
  66. xoscar/serialization/__init__.py +20 -0
  67. xoscar/serialization/aio.py +138 -0
  68. xoscar/serialization/core.cpython-39-darwin.so +0 -0
  69. xoscar/serialization/core.pxd +28 -0
  70. xoscar/serialization/core.pyx +954 -0
  71. xoscar/serialization/cuda.py +111 -0
  72. xoscar/serialization/exception.py +48 -0
  73. xoscar/serialization/numpy.py +82 -0
  74. xoscar/serialization/pyfury.py +37 -0
  75. xoscar/serialization/scipy.py +72 -0
  76. xoscar/utils.py +502 -0
  77. xoscar-0.3.1.dist-info/METADATA +225 -0
  78. xoscar-0.3.1.dist-info/RECORD +80 -0
  79. xoscar-0.3.1.dist-info/WHEEL +5 -0
  80. xoscar-0.3.1.dist-info/top_level.txt +2 -0
xoscar/utils.py ADDED
@@ -0,0 +1,502 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ # derived from copyright 1999-2021 Alibaba Group Holding Ltd.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from __future__ import annotations
17
+
18
+ import asyncio
19
+ import dataclasses
20
+ import functools
21
+ import importlib
22
+ import inspect
23
+ import io
24
+ import logging
25
+ import os
26
+ import pkgutil
27
+ import random
28
+ import socket
29
+ import sys
30
+ import time
31
+ import uuid
32
+ from abc import ABC
33
+ from types import TracebackType
34
+ from typing import Callable, Type, Union
35
+
36
+ from ._utils import ( # noqa: F401 # pylint: disable=unused-import
37
+ NamedType,
38
+ Timer,
39
+ TypeDispatcher,
40
+ to_binary,
41
+ to_str,
42
+ )
43
+
44
+ # Please refer to https://bugs.python.org/issue41451
45
+ try:
46
+
47
+ class _Dummy(ABC):
48
+ __slots__ = ("__weakref__",)
49
+
50
+ abc_type_require_weakref_slot = True
51
+ except TypeError:
52
+ abc_type_require_weakref_slot = False
53
+
54
+
55
+ logger = logging.getLogger(__name__)
56
+
57
+
58
+ _memory_size_indices = {"": 0, "k": 1, "m": 2, "g": 3, "t": 4}
59
+
60
+
61
+ def parse_readable_size(value: str | int | float) -> tuple[float, bool]:
62
+ if isinstance(value, (int, float)):
63
+ return float(value), False
64
+
65
+ value = value.strip().lower()
66
+ num_pos = 0
67
+ while num_pos < len(value) and value[num_pos] in "0123456789.-":
68
+ num_pos += 1
69
+
70
+ value, suffix = value[:num_pos], value[num_pos:]
71
+ suffix = suffix.strip()
72
+ if suffix.endswith("%"):
73
+ return float(value) / 100, True
74
+
75
+ try:
76
+ return float(value) * (1024 ** _memory_size_indices[suffix[:1]]), False
77
+ except (ValueError, KeyError):
78
+ raise ValueError(f"Unknown limitation value: {value}")
79
+
80
+
81
+ def wrap_exception(
82
+ exc: BaseException,
83
+ bases: tuple[Type] | tuple | None = None,
84
+ wrap_name: str | None = None,
85
+ message: str | None = None,
86
+ traceback: TracebackType | None = None,
87
+ attr_dict: dict | None = None,
88
+ ) -> BaseException:
89
+ """Generate an exception wraps the cause exception."""
90
+
91
+ def __init__(self):
92
+ pass
93
+
94
+ def __getattr__(self, item):
95
+ return getattr(exc, item)
96
+
97
+ def __str__(self):
98
+ return message or super(type(self), self).__str__()
99
+
100
+ traceback = traceback or exc.__traceback__
101
+ bases = bases or ()
102
+ attr_dict = attr_dict or {}
103
+ attr_dict.update(
104
+ {
105
+ "__init__": __init__,
106
+ "__getattr__": __getattr__,
107
+ "__str__": __str__,
108
+ "__wrapname__": wrap_name,
109
+ "__wrapped__": exc,
110
+ "__module__": type(exc).__module__,
111
+ "__cause__": exc.__cause__,
112
+ "__context__": exc.__context__,
113
+ "__suppress_context__": exc.__suppress_context__,
114
+ "args": exc.args,
115
+ }
116
+ )
117
+ new_exc_type = type(type(exc).__name__, bases + (type(exc),), attr_dict)
118
+ return new_exc_type().with_traceback(traceback)
119
+
120
+
121
+ # from https://github.com/ericvsmith/dataclasses/blob/master/dataclass_tools.py
122
+ # released under Apache License 2.0
123
+ def dataslots(cls):
124
+ # Need to create a new class, since we can't set __slots__
125
+ # after a class has been created.
126
+
127
+ # Make sure __slots__ isn't already set.
128
+ if "__slots__" in cls.__dict__: # pragma: no cover
129
+ raise TypeError(f"{cls.__name__} already specifies __slots__")
130
+
131
+ # Create a new dict for our new class.
132
+ cls_dict = dict(cls.__dict__)
133
+ field_names = tuple(f.name for f in dataclasses.fields(cls))
134
+ cls_dict["__slots__"] = field_names
135
+ for field_name in field_names:
136
+ # Remove our attributes, if present. They'll still be
137
+ # available in _MARKER.
138
+ cls_dict.pop(field_name, None)
139
+ # Remove __dict__ itself.
140
+ cls_dict.pop("__dict__", None)
141
+ # And finally create the class.
142
+ qualname = getattr(cls, "__qualname__", None)
143
+ cls = type(cls)(cls.__name__, cls.__bases__, cls_dict)
144
+ if qualname is not None:
145
+ cls.__qualname__ = qualname
146
+ return cls
147
+
148
+
149
+ def implements(f: Callable):
150
+ def decorator(g):
151
+ g.__doc__ = f.__doc__
152
+ return g
153
+
154
+ return decorator
155
+
156
+
157
+ class classproperty:
158
+ def __init__(self, f):
159
+ self.f = f
160
+
161
+ def __get__(self, obj, owner):
162
+ return self.f(owner)
163
+
164
+
165
+ LOW_PORT_BOUND = 10000
166
+ HIGH_PORT_BOUND = 65535
167
+ _local_occupied_ports: set = set()
168
+
169
+
170
+ def _get_ports_from_netstat() -> set[int]:
171
+ import subprocess
172
+
173
+ while True:
174
+ p = subprocess.Popen("netstat -a -n -p tcp".split(), stdout=subprocess.PIPE)
175
+ try:
176
+ outs, _ = p.communicate(timeout=5)
177
+ lines = outs.split(to_binary(os.linesep))
178
+ occupied = set()
179
+ for line in lines:
180
+ if b"." not in line:
181
+ continue
182
+ line_str: str = to_str(line)
183
+ for part in line_str.split():
184
+ # in windows, netstat uses ':' to separate host and port
185
+ part = part.replace(":", ".")
186
+ if "." in part:
187
+ _, port_str = part.rsplit(".", 1)
188
+ if port_str == "*":
189
+ continue
190
+ port = int(port_str)
191
+ if LOW_PORT_BOUND <= port <= HIGH_PORT_BOUND:
192
+ occupied.add(int(port_str))
193
+ break
194
+ return occupied
195
+ except subprocess.TimeoutExpired:
196
+ p.kill()
197
+ continue
198
+
199
+
200
+ def get_next_port(typ: int | None = None, occupy: bool = True) -> int:
201
+ import psutil
202
+
203
+ if sys.platform.lower().startswith("win"):
204
+ occupied = _get_ports_from_netstat()
205
+ else:
206
+ try:
207
+ conns = psutil.net_connections()
208
+ typ = typ or socket.SOCK_STREAM
209
+ occupied = set(
210
+ sc.laddr.port
211
+ for sc in conns
212
+ if sc.type == typ and LOW_PORT_BOUND <= sc.laddr.port <= HIGH_PORT_BOUND
213
+ )
214
+ except psutil.AccessDenied:
215
+ occupied = _get_ports_from_netstat()
216
+
217
+ occupied.update(_local_occupied_ports)
218
+ random.seed(uuid.uuid1().bytes)
219
+ randn = random.randint(0, 100000000)
220
+
221
+ idx = int(randn % (1 + HIGH_PORT_BOUND - LOW_PORT_BOUND - len(occupied)))
222
+ for i in range(LOW_PORT_BOUND, HIGH_PORT_BOUND + 1):
223
+ if i in occupied:
224
+ continue
225
+ if idx == 0:
226
+ if occupy:
227
+ _local_occupied_ports.add(i)
228
+ return i
229
+ idx -= 1
230
+ raise SystemError("No ports available.")
231
+
232
+
233
+ def lazy_import(
234
+ name: str,
235
+ package: str | None = None,
236
+ globals: dict | None = None, # pylint: disable=redefined-builtin
237
+ locals: dict | None = None, # pylint: disable=redefined-builtin
238
+ rename: str | None = None,
239
+ placeholder: bool = False,
240
+ ):
241
+ rename = rename or name
242
+ prefix_name = name.split(".", 1)[0]
243
+ globals = globals or inspect.currentframe().f_back.f_globals # type: ignore
244
+
245
+ class LazyModule:
246
+ def __init__(self):
247
+ self._on_loads = []
248
+
249
+ def __getattr__(self, item):
250
+ if item.startswith("_pytest") or item in ("__bases__", "__test__"):
251
+ raise AttributeError(item)
252
+
253
+ real_mod = importlib.import_module(name, package=package)
254
+ if rename in globals:
255
+ globals[rename] = real_mod
256
+ elif locals is not None:
257
+ locals[rename] = real_mod
258
+ ret = getattr(real_mod, item)
259
+ for on_load_func in self._on_loads:
260
+ on_load_func()
261
+ # make sure on_load hooks only executed once
262
+ self._on_loads = []
263
+ return ret
264
+
265
+ def add_load_handler(self, func: Callable):
266
+ self._on_loads.append(func)
267
+ return func
268
+
269
+ if pkgutil.find_loader(prefix_name) is not None:
270
+ return LazyModule()
271
+ elif placeholder:
272
+ return ModulePlaceholder(prefix_name)
273
+ else:
274
+ return None
275
+
276
+
277
+ def lazy_import_on_load(lazy_mod):
278
+ def wrapper(fun):
279
+ if lazy_mod is not None and hasattr(lazy_mod, "add_load_handler"):
280
+ lazy_mod.add_load_handler(fun)
281
+ return fun
282
+
283
+ return wrapper
284
+
285
+
286
+ class ModulePlaceholder:
287
+ def __init__(self, mod_name: str):
288
+ self._mod_name = mod_name
289
+
290
+ def _raises(self):
291
+ raise AttributeError(f"{self._mod_name} is required but not installed.")
292
+
293
+ def __getattr__(self, key):
294
+ self._raises()
295
+
296
+ def __call__(self, *_args, **_kwargs):
297
+ self._raises()
298
+
299
+
300
+ def patch_asyncio_task_create_time(): # pragma: no cover
301
+ new_loop = False
302
+ try:
303
+ loop = asyncio.get_running_loop()
304
+ except RuntimeError:
305
+ loop = asyncio.new_event_loop()
306
+ new_loop = True
307
+ loop_class = loop.__class__
308
+ # Save raw loop_class.create_task and make multiple apply idempotent
309
+ loop_create_task = getattr(
310
+ patch_asyncio_task_create_time, "loop_create_task", loop_class.create_task
311
+ )
312
+ patch_asyncio_task_create_time.loop_create_task = loop_create_task
313
+
314
+ def new_loop_create_task(*args, **kwargs):
315
+ task = loop_create_task(*args, **kwargs)
316
+ task.__xoscar_asyncio_task_create_time__ = time.time()
317
+ return task
318
+
319
+ if loop_create_task is not new_loop_create_task:
320
+ loop_class.create_task = new_loop_create_task
321
+ if not new_loop and loop.create_task is not new_loop_create_task:
322
+ loop.create_task = functools.partial(new_loop_create_task, loop)
323
+
324
+
325
+ async def asyncio_task_timeout_detector(
326
+ check_interval: int, task_timeout_seconds: int, task_exclude_filters: list[str]
327
+ ):
328
+ task_exclude_filters.append("asyncio_task_timeout_detector")
329
+ while True: # pragma: no cover
330
+ await asyncio.sleep(check_interval)
331
+ loop = asyncio.get_running_loop()
332
+ current_time = (
333
+ time.time()
334
+ ) # avoid invoke `time.time()` frequently if we have plenty of unfinished tasks.
335
+ for task in asyncio.all_tasks(loop=loop):
336
+ # Some task may be create before `patch_asyncio_task_create_time` applied, take them as never timeout.
337
+ create_time = getattr(
338
+ task, "__xoscar_asyncio_task_create_time__", current_time
339
+ )
340
+ if current_time - create_time >= task_timeout_seconds:
341
+ stack = io.StringIO()
342
+ task.print_stack(file=stack)
343
+ task_str = str(task)
344
+ if any(
345
+ excluded_task in task_str for excluded_task in task_exclude_filters
346
+ ):
347
+ continue
348
+ logger.warning(
349
+ """Task %s in event loop %s doesn't finish in %s seconds. %s""",
350
+ task,
351
+ loop,
352
+ time.time() - create_time,
353
+ stack.getvalue(),
354
+ )
355
+
356
+
357
+ def register_asyncio_task_timeout_detector(
358
+ check_interval: int | None = None,
359
+ task_timeout_seconds: int | None = None,
360
+ task_exclude_filters: list[str] | None = None,
361
+ ) -> asyncio.Task | None: # pragma: no cover
362
+ """Register a asyncio task which print timeout task periodically."""
363
+ check_interval = check_interval or int(
364
+ os.environ.get("XOSCAR_DEBUG_ASYNCIO_TASK_TIMEOUT_CHECK_INTERVAL", -1)
365
+ )
366
+ if check_interval > 0:
367
+ patch_asyncio_task_create_time()
368
+ task_timeout_seconds = task_timeout_seconds or int(
369
+ os.environ.get("XOSCAR_DEBUG_ASYNCIO_TASK_TIMEOUT_SECONDS", check_interval)
370
+ )
371
+ if not task_exclude_filters:
372
+ # Ignore Xoscar by default since it has some long-running coroutines.
373
+ task_exclude_filter = os.environ.get(
374
+ "XOSCAR_DEBUG_ASYNCIO_TASK_EXCLUDE_FILTERS", "xoscar"
375
+ )
376
+ task_exclude_filters = task_exclude_filter.split(";")
377
+ if sys.version_info[:2] < (3, 7):
378
+ logger.warning(
379
+ "asyncio tasks timeout detector is not supported under python %s",
380
+ sys.version,
381
+ )
382
+ else:
383
+ loop = asyncio.get_running_loop()
384
+ logger.info(
385
+ "Create asyncio tasks timeout detector with check_interval %s task_timeout_seconds %s "
386
+ "task_exclude_filters %s",
387
+ check_interval,
388
+ task_timeout_seconds,
389
+ task_exclude_filters,
390
+ )
391
+ return loop.create_task(
392
+ asyncio_task_timeout_detector(
393
+ check_interval, task_timeout_seconds, task_exclude_filters
394
+ )
395
+ )
396
+ else:
397
+ return None
398
+
399
+
400
+ def ensure_coverage():
401
+ # make sure coverage is handled when starting with subprocess.Popen
402
+ if (
403
+ not sys.platform.startswith("win") and "COV_CORE_SOURCE" in os.environ
404
+ ): # pragma: no cover
405
+ try:
406
+ from pytest_cov.embed import cleanup_on_sigterm
407
+ except ImportError:
408
+ pass
409
+ else:
410
+ cleanup_on_sigterm()
411
+
412
+
413
+ def retry_callable(
414
+ callable_,
415
+ ex_type: type = Exception,
416
+ wait_interval=1,
417
+ max_retries=-1,
418
+ sync: bool | None = None,
419
+ ):
420
+ if inspect.iscoroutinefunction(callable_) or sync is False:
421
+
422
+ @functools.wraps(callable)
423
+ async def retry_call(*args, **kwargs):
424
+ num_retried = 0
425
+ while max_retries < 0 or num_retried < max_retries:
426
+ num_retried += 1
427
+ try:
428
+ return await callable_(*args, **kwargs)
429
+ except ex_type:
430
+ await asyncio.sleep(wait_interval)
431
+
432
+ else:
433
+
434
+ @functools.wraps(callable)
435
+ def retry_call(*args, **kwargs):
436
+ num_retried = 0
437
+ ex = None
438
+ while max_retries < 0 or num_retried < max_retries:
439
+ num_retried += 1
440
+ try:
441
+ return callable_(*args, **kwargs)
442
+ except ex_type as e:
443
+ ex = e
444
+ time.sleep(wait_interval)
445
+ assert ex is not None
446
+ raise ex # pylint: disable-msg=E0702
447
+
448
+ return retry_call
449
+
450
+
451
+ _cupy = lazy_import("cupy")
452
+ _rmm = lazy_import("rmm")
453
+
454
+
455
+ def is_cuda_buffer(cuda_buffer: Union["_cupy.ndarray", "_rmm.DeviceBuffer"]) -> bool: # type: ignore
456
+ return hasattr(cuda_buffer, "__cuda_array_interface__")
457
+
458
+
459
+ def is_windows():
460
+ return sys.platform.startswith("win")
461
+
462
+
463
+ def is_linux():
464
+ return sys.platform.startswith("linux")
465
+
466
+
467
+ def is_v4_zero_ip(ip_port_addr: str) -> bool:
468
+ return ip_port_addr.startswith("0.0.0.0:")
469
+
470
+
471
+ def is_v6_zero_ip(ip_port_addr: str) -> bool:
472
+ # tcp6 addr ":::123", ":: means all zero"
473
+ arr = ip_port_addr.split(":")
474
+ if len(arr) <= 2: # Not tcp6 or udp6
475
+ return False
476
+ for part in arr[0:-1]:
477
+ if part != "":
478
+ if int(part, 16) != 0:
479
+ return False
480
+ return True
481
+
482
+
483
+ def fix_all_zero_ip(remote_addr: str, connect_addr: str) -> str:
484
+ """
485
+ Use connect_addr to fix ActorRef.address return by remote server.
486
+ When remote server listen on "0.0.0.0:port" or ":::port", it will return ActorRef.address set to listening addr,
487
+ it cannot be use by client for the following interaction unless we fix it.
488
+ (client will treat 0.0.0.0 as 127.0.0.1)
489
+
490
+ NOTE: Server might return a different addr from a pool for load-balance purpose.
491
+ """
492
+ if remote_addr == connect_addr:
493
+ return remote_addr
494
+ if not is_v4_zero_ip(remote_addr) and not is_v6_zero_ip(remote_addr):
495
+ # Remote server returns on non-zero ip
496
+ return remote_addr
497
+ if is_v4_zero_ip(connect_addr) or is_v6_zero_ip(connect_addr):
498
+ # Client connect to local server
499
+ return remote_addr
500
+ remote_port = remote_addr.split(":")[-1]
501
+ connect_ip = ":".join(connect_addr.split(":")[0:-1]) # Remote the port
502
+ return f"{connect_ip}:{remote_port}"