xoscar 0.9.0__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. xoscar/__init__.py +61 -0
  2. xoscar/_utils.cpython-312-darwin.so +0 -0
  3. xoscar/_utils.pxd +36 -0
  4. xoscar/_utils.pyx +246 -0
  5. xoscar/_version.py +693 -0
  6. xoscar/aio/__init__.py +16 -0
  7. xoscar/aio/base.py +86 -0
  8. xoscar/aio/file.py +59 -0
  9. xoscar/aio/lru.py +228 -0
  10. xoscar/aio/parallelism.py +39 -0
  11. xoscar/api.py +527 -0
  12. xoscar/backend.py +67 -0
  13. xoscar/backends/__init__.py +14 -0
  14. xoscar/backends/allocate_strategy.py +160 -0
  15. xoscar/backends/communication/__init__.py +30 -0
  16. xoscar/backends/communication/base.py +315 -0
  17. xoscar/backends/communication/core.py +69 -0
  18. xoscar/backends/communication/dummy.py +253 -0
  19. xoscar/backends/communication/errors.py +20 -0
  20. xoscar/backends/communication/socket.py +444 -0
  21. xoscar/backends/communication/ucx.py +538 -0
  22. xoscar/backends/communication/utils.py +97 -0
  23. xoscar/backends/config.py +157 -0
  24. xoscar/backends/context.py +437 -0
  25. xoscar/backends/core.py +352 -0
  26. xoscar/backends/indigen/__init__.py +16 -0
  27. xoscar/backends/indigen/__main__.py +19 -0
  28. xoscar/backends/indigen/backend.py +51 -0
  29. xoscar/backends/indigen/driver.py +26 -0
  30. xoscar/backends/indigen/fate_sharing.py +221 -0
  31. xoscar/backends/indigen/pool.py +515 -0
  32. xoscar/backends/indigen/shared_memory.py +548 -0
  33. xoscar/backends/message.cpython-312-darwin.so +0 -0
  34. xoscar/backends/message.pyi +255 -0
  35. xoscar/backends/message.pyx +646 -0
  36. xoscar/backends/pool.py +1630 -0
  37. xoscar/backends/router.py +285 -0
  38. xoscar/backends/test/__init__.py +16 -0
  39. xoscar/backends/test/backend.py +38 -0
  40. xoscar/backends/test/pool.py +233 -0
  41. xoscar/batch.py +256 -0
  42. xoscar/collective/__init__.py +27 -0
  43. xoscar/collective/backend/__init__.py +13 -0
  44. xoscar/collective/backend/nccl_backend.py +160 -0
  45. xoscar/collective/common.py +102 -0
  46. xoscar/collective/core.py +737 -0
  47. xoscar/collective/process_group.py +687 -0
  48. xoscar/collective/utils.py +41 -0
  49. xoscar/collective/xoscar_pygloo.cpython-312-darwin.so +0 -0
  50. xoscar/collective/xoscar_pygloo.pyi +239 -0
  51. xoscar/constants.py +23 -0
  52. xoscar/context.cpython-312-darwin.so +0 -0
  53. xoscar/context.pxd +21 -0
  54. xoscar/context.pyx +368 -0
  55. xoscar/core.cpython-312-darwin.so +0 -0
  56. xoscar/core.pxd +51 -0
  57. xoscar/core.pyx +664 -0
  58. xoscar/debug.py +188 -0
  59. xoscar/driver.py +42 -0
  60. xoscar/errors.py +63 -0
  61. xoscar/libcpp.pxd +31 -0
  62. xoscar/metrics/__init__.py +21 -0
  63. xoscar/metrics/api.py +288 -0
  64. xoscar/metrics/backends/__init__.py +13 -0
  65. xoscar/metrics/backends/console/__init__.py +13 -0
  66. xoscar/metrics/backends/console/console_metric.py +82 -0
  67. xoscar/metrics/backends/metric.py +149 -0
  68. xoscar/metrics/backends/prometheus/__init__.py +13 -0
  69. xoscar/metrics/backends/prometheus/prometheus_metric.py +70 -0
  70. xoscar/nvutils.py +717 -0
  71. xoscar/profiling.py +260 -0
  72. xoscar/serialization/__init__.py +20 -0
  73. xoscar/serialization/aio.py +141 -0
  74. xoscar/serialization/core.cpython-312-darwin.so +0 -0
  75. xoscar/serialization/core.pxd +28 -0
  76. xoscar/serialization/core.pyi +57 -0
  77. xoscar/serialization/core.pyx +944 -0
  78. xoscar/serialization/cuda.py +111 -0
  79. xoscar/serialization/exception.py +48 -0
  80. xoscar/serialization/mlx.py +67 -0
  81. xoscar/serialization/numpy.py +82 -0
  82. xoscar/serialization/pyfury.py +37 -0
  83. xoscar/serialization/scipy.py +72 -0
  84. xoscar/serialization/torch.py +180 -0
  85. xoscar/utils.py +522 -0
  86. xoscar/virtualenv/__init__.py +34 -0
  87. xoscar/virtualenv/core.py +268 -0
  88. xoscar/virtualenv/platform.py +56 -0
  89. xoscar/virtualenv/utils.py +100 -0
  90. xoscar/virtualenv/uv.py +321 -0
  91. xoscar-0.9.0.dist-info/METADATA +230 -0
  92. xoscar-0.9.0.dist-info/RECORD +94 -0
  93. xoscar-0.9.0.dist-info/WHEEL +6 -0
  94. xoscar-0.9.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,944 @@
1
+ # distutils: language = c++
2
+ # Copyright 2022-2023 XProbe Inc.
3
+ # derived from copyright 1999-2022 Alibaba Group Holding Ltd.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import asyncio
18
+ import datetime
19
+ import enum
20
+ import hashlib
21
+ import inspect
22
+ import sys
23
+ from functools import partial, wraps
24
+ from typing import Any, Dict, List
25
+
26
+ if sys.version_info[:2] < (3, 8): # pragma: no cover
27
+ try:
28
+ import pickle5 as pickle # nosec # pylint: disable=import_pickle
29
+ except ImportError:
30
+ import pickle # nosec # pylint: disable=import_pickle
31
+ else:
32
+ import pickle # nosec # pylint: disable=import_pickle
33
+
34
+ import cloudpickle
35
+ import numpy as np
36
+ import pandas as pd
37
+
38
+ from cpython cimport PyObject
39
+ from libc.stdint cimport int32_t, int64_t, uint32_t, uint64_t, uintptr_t
40
+ from libcpp.unordered_map cimport unordered_map
41
+
42
+ # resolve pandas pickle compatibility between <1.2 and >=1.3
43
+ try:
44
+ from pandas.core.internals import blocks as pd_blocks
45
+ if not hasattr(pd_blocks, "new_block") and hasattr(pd_blocks, "make_block"):
46
+ # register missing func that would cause errors
47
+ pd_blocks.new_block = pd_blocks.make_block
48
+ except (ImportError, AttributeError):
49
+ pass
50
+
51
+ from .._utils import NamedType
52
+
53
+ from .._utils cimport TypeDispatcher
54
+ from .pyfury import get_fury
55
+
56
+ BUFFER_PICKLE_PROTOCOL = max(pickle.DEFAULT_PROTOCOL, 5)
57
+ cdef bint HAS_PICKLE_BUFFER = pickle.HIGHEST_PROTOCOL >= 5
58
+
59
+ cdef TypeDispatcher _serial_dispatcher = TypeDispatcher()
60
+ cdef dict _deserializers = dict()
61
+
62
+ cdef uint32_t _MAX_STR_PRIMITIVE_LEN = 1024
63
+ # prime modulus for serializer ids
64
+ # use the largest prime number smaller than 32767
65
+ cdef int32_t _SERIALIZER_ID_PRIME = 32749
66
+
67
+
68
+ cdef class Serializer:
69
+ serializer_id = None
70
+
71
+ def __cinit__(self):
72
+ # make the value can be referenced with C code
73
+ self._serializer_id = self.serializer_id
74
+
75
+ cpdef serial(self, object obj, dict context):
76
+ """
77
+ Returns intermediate serialization result of certain object.
78
+ The returned value can be a Placeholder or a tuple comprising
79
+ of three parts: a header, a group of subcomponents and
80
+ a finalizing flag.
81
+
82
+ * Header is a pickle-serializable tuple
83
+ * Subcomponents are parts or buffers for iterative
84
+ serialization.
85
+ * Flag is a boolean value. If true, subcomponents should be
86
+ buffers (for instance, bytes, memory views, GPU buffers,
87
+ etc.) that can be read and written directly. If false,
88
+ subcomponents will be serialized iteratively.
89
+
90
+ Parameters
91
+ ----------
92
+ obj: Any
93
+ Object to serialize
94
+ context: Dict
95
+ Serialization context to help creating Placeholder objects
96
+ for reducing duplicated serialization
97
+
98
+ Returns
99
+ -------
100
+ result: Placeholder | Tuple[Tuple, List, bool]
101
+ Intermediate result of serialization
102
+ """
103
+ raise NotImplementedError
104
+
105
+ cpdef deserial(self, tuple serialized, dict context, list subs):
106
+ """
107
+ Returns deserialized object given serialized headers and
108
+ deserialized subcomponents.
109
+
110
+ Parameters
111
+ ----------
112
+ serialized: Tuple
113
+ Serialized object header as a tuple
114
+ context
115
+ Serialization context for instantiation of Placeholder
116
+ objects
117
+ subs: List
118
+ Deserialized subcomponents
119
+
120
+ Returns
121
+ -------
122
+ result: Any
123
+ Deserialized objects
124
+ """
125
+ raise NotImplementedError
126
+
127
+ cpdef on_deserial_error(
128
+ self,
129
+ tuple serialized,
130
+ dict context,
131
+ list subs_serialized,
132
+ int error_index,
133
+ object exc,
134
+ ):
135
+ """
136
+ Returns rewritten exception when subcomponent deserialization fails
137
+
138
+ Parameters
139
+ ----------
140
+ serialized: Tuple
141
+ Serialized object header as a tuple
142
+ context
143
+ Serialization context for instantiation of Placeholder
144
+ objects
145
+ subs_serialized: List
146
+ Serialized subcomponents
147
+ error_index: int
148
+ Index of subcomponent causing error
149
+ exc: BaseException
150
+ Exception raised
151
+
152
+ Returns
153
+ -------
154
+ exc: BaseException | None
155
+ Rewritten exception. If None, original exception is kept.
156
+ """
157
+ return None
158
+
159
+ @classmethod
160
+ def calc_default_serializer_id(cls):
161
+ s = f"{cls.__module__}.{cls.__qualname__}"
162
+ h = hashlib.md5(s.encode())
163
+ return int(h.hexdigest(), 16) % _SERIALIZER_ID_PRIME
164
+
165
+ @classmethod
166
+ def register(cls, obj_type, name=None):
167
+ if (
168
+ cls.serializer_id is None
169
+ or cls.serializer_id == getattr(super(cls, cls), "serializer_id", None)
170
+ ):
171
+ # a class should have its own serializer_id
172
+ # inherited serializer_id not acceptable
173
+ cls.serializer_id = cls.calc_default_serializer_id()
174
+
175
+ inst = cls()
176
+ if name is not None:
177
+ obj_type = NamedType(name, obj_type)
178
+ _serial_dispatcher.register(obj_type, inst)
179
+ if _deserializers.get(cls.serializer_id) is not None:
180
+ assert type(_deserializers[cls.serializer_id]) is cls
181
+ else:
182
+ _deserializers[cls.serializer_id] = inst
183
+
184
+ @classmethod
185
+ def unregister(cls, obj_type, name=None):
186
+ if name is not None:
187
+ obj_type = NamedType(name, obj_type)
188
+ _serial_dispatcher.unregister(obj_type)
189
+ _deserializers.pop(cls.serializer_id, None)
190
+
191
+
192
+ cdef inline uint64_t _fast_id(object obj) nogil:
193
+ return <uintptr_t><PyObject*>obj
194
+
195
+
196
+ def fast_id(obj):
197
+ """C version of id() used for serialization"""
198
+ return _fast_id(obj)
199
+
200
+
201
+ def buffered(func):
202
+ """
203
+ Wrapper for serial() method to reduce duplicated serialization
204
+ """
205
+ @wraps(func)
206
+ def wrapped(self, obj: Any, dict context):
207
+ cdef uint64_t obj_id = _fast_id(obj)
208
+ if obj_id in context:
209
+ return Placeholder(_fast_id(obj))
210
+ else:
211
+ context[obj_id] = obj
212
+ return func(self, obj, context)
213
+
214
+ return wrapped
215
+
216
+
217
+ def pickle_buffers(obj):
218
+ cdef list buffers = [None]
219
+
220
+ fury = get_fury()
221
+ if fury is not None:
222
+ def buffer_cb(x):
223
+ try:
224
+ buffers.append(memoryview(x))
225
+ except TypeError:
226
+ buffers.append(x.to_buffer())
227
+
228
+ buffers[0] = b"__fury__"
229
+ buffers.append(None)
230
+ buffers[1] = fury.serialize(
231
+ obj,
232
+ buffer_callback=buffer_cb,
233
+ )
234
+ else:
235
+ if HAS_PICKLE_BUFFER:
236
+ def buffer_cb(x):
237
+ x = x.raw()
238
+ if x.ndim > 1:
239
+ # ravel n-d memoryview
240
+ x = x.cast(x.format)
241
+ buffers.append(memoryview(x))
242
+
243
+ buffers[0] = cloudpickle.dumps(
244
+ obj,
245
+ buffer_callback=buffer_cb,
246
+ protocol=BUFFER_PICKLE_PROTOCOL,
247
+ )
248
+ else:
249
+ buffers[0] = cloudpickle.dumps(obj)
250
+ return buffers
251
+
252
+
253
+ def unpickle_buffers(list buffers):
254
+ if buffers[0] == b"__fury__":
255
+ fury = get_fury()
256
+ if fury is None:
257
+ raise Exception("fury is not installed.")
258
+ result = fury.deserialize(buffers[1], buffers[2:])
259
+ else:
260
+ result = cloudpickle.loads(buffers[0], buffers=buffers[1:])
261
+
262
+ return result
263
+
264
+
265
+ cdef class PickleSerializer(Serializer):
266
+ serializer_id = 0
267
+
268
+ cpdef serial(self, obj: Any, dict context):
269
+ cdef uint64_t obj_id
270
+ obj_id = _fast_id(obj)
271
+ if obj_id in context:
272
+ return Placeholder(obj_id)
273
+ context[obj_id] = obj
274
+
275
+ return (), pickle_buffers(obj), True
276
+
277
+ cpdef deserial(self, tuple serialized, dict context, list subs):
278
+ return unpickle_buffers(subs)
279
+
280
+
281
+ cdef set _primitive_types = {
282
+ type(None),
283
+ bool,
284
+ int,
285
+ float,
286
+ complex,
287
+ datetime.datetime,
288
+ datetime.date,
289
+ datetime.timedelta,
290
+ enum.Enum,
291
+ type(max), # builtin functions
292
+ np.dtype,
293
+ np.number,
294
+ }
295
+
296
+
297
+ class PrimitiveSerializer(Serializer):
298
+ serializer_id = 1
299
+
300
+ @buffered
301
+ def serial(self, obj: Any, context: Dict):
302
+ return (obj,), [], True
303
+
304
+ def deserial(self, tuple obj, context: Dict, subs: List[Any]):
305
+ return obj[0]
306
+
307
+
308
+ cdef class BytesSerializer(Serializer):
309
+ serializer_id = 2
310
+
311
+ cpdef serial(self, obj: Any, dict context):
312
+ cdef uint64_t obj_id
313
+ obj_id = _fast_id(obj)
314
+ if obj_id in context:
315
+ return Placeholder(obj_id)
316
+ context[obj_id] = obj
317
+
318
+ return (), [obj], True
319
+
320
+ cpdef deserial(self, tuple serialized, dict context, list subs):
321
+ return subs[0]
322
+
323
+
324
+ cdef class StrSerializer(Serializer):
325
+ serializer_id = 3
326
+
327
+ cpdef serial(self, obj: Any, dict context):
328
+ cdef uint64_t obj_id
329
+ obj_id = _fast_id(obj)
330
+ if obj_id in context:
331
+ return Placeholder(obj_id)
332
+ context[obj_id] = obj
333
+
334
+ return (), [(<str>obj).encode()], True
335
+
336
+ cpdef deserial(self, tuple serialized, dict context, list subs):
337
+ buffer = subs[0]
338
+ if type(buffer) is memoryview:
339
+ buffer = buffer.tobytes()
340
+ return buffer.decode()
341
+
342
+
343
+ cdef class CollectionSerializer(Serializer):
344
+ obj_type = None
345
+
346
+ cdef object _obj_type
347
+
348
+ def __cinit__(self):
349
+ # make the value can be referenced with C code
350
+ self._obj_type = self.obj_type
351
+
352
+ cdef tuple _serial_iterable(self, obj: Any):
353
+ cdef list idx_to_propagate = []
354
+ cdef list obj_to_propagate = []
355
+ cdef list obj_list = <list>obj if type(obj) is list else list(obj)
356
+ cdef int64_t idx
357
+ cdef object item
358
+
359
+ for idx in range(len(obj_list)):
360
+ item = obj_list[idx]
361
+
362
+ if type(item) is bytes and len(<bytes>item) < _MAX_STR_PRIMITIVE_LEN:
363
+ # treat short strings as primitives
364
+ continue
365
+ elif type(item) is str and len(<str>item) < _MAX_STR_PRIMITIVE_LEN:
366
+ # treat short strings as primitives
367
+ continue
368
+ elif type(item) in _primitive_types:
369
+ continue
370
+
371
+ if obj is obj_list:
372
+ obj_list = list(obj)
373
+
374
+ obj_list[idx] = None
375
+ idx_to_propagate.append(idx)
376
+ obj_to_propagate.append(item)
377
+
378
+ if self._obj_type is not None and type(obj) is not self._obj_type:
379
+ obj_type = type(obj)
380
+ else:
381
+ obj_type = None
382
+ return (obj_list, idx_to_propagate, obj_type), obj_to_propagate, False
383
+
384
+ cpdef serial(self, obj: Any, dict context):
385
+ cdef uint64_t obj_id
386
+ obj_id = _fast_id(obj)
387
+ if obj_id in context:
388
+ return Placeholder(obj_id)
389
+ context[obj_id] = obj
390
+
391
+ return self._serial_iterable(obj)
392
+
393
+ cdef list _deserial_iterable(self, tuple serialized, list subs):
394
+ cdef list res_list, idx_to_propagate
395
+ cdef int64_t i
396
+
397
+ res_list, idx_to_propagate, _ = serialized
398
+
399
+ for i in range(len(idx_to_propagate)):
400
+ res_list[idx_to_propagate[i]] = subs[i]
401
+ return res_list
402
+
403
+
404
+ cdef class TupleSerializer(CollectionSerializer):
405
+ serializer_id = 4
406
+ obj_type = tuple
407
+
408
+ cpdef deserial(self, tuple serialized, dict context, list subs):
409
+ cdef list res = self._deserial_iterable(serialized, subs)
410
+ for v in res:
411
+ assert type(v) is not Placeholder
412
+
413
+ obj_type = serialized[-1] or tuple
414
+ if hasattr(obj_type, "_fields"):
415
+ # namedtuple
416
+ return obj_type(*res)
417
+ else:
418
+ return obj_type(res)
419
+
420
+
421
+ cdef class ListSerializer(CollectionSerializer):
422
+ serializer_id = 5
423
+ obj_type = list
424
+
425
+ cpdef deserial(self, tuple serialized, dict context, list subs):
426
+ cdef int64_t idx
427
+ cdef list res = self._deserial_iterable(serialized, subs)
428
+
429
+ obj_type = serialized[-1]
430
+ if obj_type is None:
431
+ result = res
432
+ else:
433
+ result = obj_type(res)
434
+
435
+ for idx, v in enumerate(res):
436
+ if type(v) is Placeholder:
437
+ cb = partial(result.__setitem__, idx)
438
+ (<Placeholder>v).callbacks.append(cb)
439
+ return result
440
+
441
+
442
+ def _dict_key_replacer(ret, key, real_key):
443
+ ret[real_key] = ret.pop(key)
444
+
445
+
446
+ def _dict_value_replacer(context, ret, key, real_value):
447
+ if type(key) is Placeholder:
448
+ key = context[(<Placeholder>key).id]
449
+ ret[key] = real_value
450
+
451
+
452
+ cdef class DictSerializer(CollectionSerializer):
453
+ serializer_id = 6
454
+ cdef set _inspected_inherits
455
+
456
+ def __cinit__(self):
457
+ self._inspected_inherits = set()
458
+
459
+ cpdef serial(self, obj: Any, dict context):
460
+ cdef uint64_t obj_id
461
+ cdef tuple key_obj, value_obj
462
+ cdef list key_bufs, value_bufs
463
+
464
+ if type(obj) is dict and len(<dict>obj) == 0:
465
+ return (), [], True
466
+
467
+ obj_id = _fast_id(obj)
468
+ if obj_id in context:
469
+ return Placeholder(obj_id)
470
+ context[obj_id] = obj
471
+
472
+ obj_type = type(obj)
473
+
474
+ if obj_type is not dict and obj_type not in self._inspected_inherits:
475
+ inspect_init = inspect.getfullargspec(obj_type.__init__)
476
+ if (
477
+ inspect_init.args == ["self"]
478
+ and not inspect_init.varargs
479
+ and not inspect_init.varkw
480
+ ):
481
+ # inherited dicts may not have proper initializers
482
+ # for deserialization
483
+ # remove context to generate real serialized result
484
+ context.pop(obj_id)
485
+ return (obj,), [], True
486
+ else:
487
+ self._inspected_inherits.add(obj_type)
488
+
489
+ key_obj, key_bufs, _ = self._serial_iterable(obj.keys())
490
+ value_obj, value_bufs, _ = self._serial_iterable(obj.values())
491
+ if obj_type is dict:
492
+ obj_type = None
493
+ ser_obj = (key_obj[:-1], value_obj[:-1], len(key_bufs), obj_type)
494
+ return ser_obj, key_bufs + value_bufs, False
495
+
496
+ cpdef deserial(self, tuple serialized, dict context, list subs):
497
+ cdef int64_t i, num_key_bufs
498
+ cdef list key_subs, value_subs, keys, values
499
+
500
+ if not serialized:
501
+ return {}
502
+ if len(serialized) == 1:
503
+ # serialized directly
504
+ return serialized[0]
505
+
506
+ key_serialized, value_serialized, num_key_bufs, obj_type = serialized
507
+ key_subs = subs[:num_key_bufs]
508
+ value_subs = subs[num_key_bufs:]
509
+
510
+ keys = self._deserial_iterable(<tuple>key_serialized + (None,), key_subs)
511
+ values = self._deserial_iterable(<tuple>value_serialized + (None,), value_subs)
512
+
513
+ if obj_type is None:
514
+ ret = dict(zip(keys, values))
515
+ else:
516
+ try:
517
+ ret = obj_type(zip(keys, values))
518
+ except TypeError:
519
+ # first arg of defaultdict is a callable
520
+ ret = obj_type()
521
+ ret.update(zip(keys, values))
522
+
523
+ for i in range(len(keys)):
524
+ k, v = keys[i], values[i]
525
+ if type(k) is Placeholder:
526
+ (<Placeholder>k).callbacks.append(
527
+ partial(_dict_key_replacer, ret, k)
528
+ )
529
+ if type(v) is Placeholder:
530
+ (<Placeholder>v).callbacks.append(
531
+ partial(_dict_value_replacer, context, ret, k)
532
+ )
533
+ return ret
534
+
535
+
536
+ cdef class Placeholder:
537
+ """
538
+ Placeholder object to reduce duplicated serialization
539
+
540
+ The object records object identifier and keeps callbacks
541
+ to replace itself in parent objects.
542
+ """
543
+ cdef public uint64_t id
544
+ cdef public list callbacks
545
+
546
+ def __init__(self, uint64_t id_):
547
+ self.id = id_
548
+ self.callbacks = []
549
+
550
+ def __hash__(self):
551
+ return self.id
552
+
553
+ def __eq__(self, other): # pragma: no cover
554
+ if type(other) is not Placeholder:
555
+ return False
556
+ return self.id == other.id
557
+
558
+ def __repr__(self):
559
+ return (
560
+ f"Placeholder(id={self.id}, "
561
+ f"callbacks=[list of {len(self.callbacks)}])"
562
+ )
563
+
564
+
565
+ cdef class PlaceholderSerializer(Serializer):
566
+ serializer_id = 7
567
+
568
+ cpdef serial(self, obj: Any, dict context):
569
+ return (), [], True
570
+
571
+ cpdef deserial(self, tuple serialized, dict context, list subs):
572
+ return Placeholder(0)
573
+
574
+
575
+ PickleSerializer.register(object)
576
+ for _primitive in _primitive_types:
577
+ PrimitiveSerializer.register(_primitive)
578
+ BytesSerializer.register(bytes)
579
+ BytesSerializer.register(memoryview)
580
+ StrSerializer.register(str)
581
+ ListSerializer.register(list)
582
+ TupleSerializer.register(tuple)
583
+ DictSerializer.register(dict)
584
+ PlaceholderSerializer.register(Placeholder)
585
+
586
+
587
+ cdef class _SerialStackItem:
588
+ cdef public tuple serialized
589
+ cdef public list subs
590
+ cdef public list subs_serialized
591
+
592
+ def __cinit__(self, tuple serialized, list subs):
593
+ self.serialized = serialized
594
+ self.subs = subs
595
+ self.subs_serialized = []
596
+
597
+
598
+ cdef class _IdContextHolder:
599
+ cdef unordered_map[uint64_t, uint64_t] d
600
+
601
+
602
+ cdef int _COMMON_HEADER_LEN = 4
603
+
604
+
605
+ cdef tuple _serial_single(
606
+ obj, dict context, _IdContextHolder id_context_holder
607
+ ):
608
+ """Serialize single object and return serialized tuples"""
609
+ cdef uint64_t obj_id, ordered_id
610
+ cdef Serializer serializer
611
+ cdef tuple common_header, serialized
612
+
613
+ while True:
614
+ name = context.get("serializer")
615
+ obj_type = type(obj) if name is None else NamedType(name, type(obj))
616
+ serializer = _serial_dispatcher.get_handler(obj_type)
617
+ ret_serial = serializer.serial(obj, context)
618
+ if type(ret_serial) is tuple:
619
+ # object is serialized, form a common header and return
620
+ serialized, subs, final = <tuple>ret_serial
621
+
622
+ if type(obj) is Placeholder:
623
+ obj_id = (<Placeholder>obj).id
624
+ ordered_id = id_context_holder.d[obj_id]
625
+ else:
626
+ obj_id = _fast_id(obj)
627
+ ordered_id = id_context_holder.d.size()
628
+ id_context_holder.d[obj_id] = ordered_id
629
+
630
+ # REMEMBER to change _COMMON_HEADER_LEN when content of
631
+ # this header changed
632
+ common_header = (
633
+ serializer._serializer_id, ordered_id, len(subs), final
634
+ )
635
+ break
636
+ else:
637
+ # object is converted into another (usually a Placeholder)
638
+ obj = ret_serial
639
+ return common_header + serialized, subs, final
640
+
641
+
642
+ class _SerializeObjectOverflow(Exception):
643
+ def __init__(self, tuple cur_serialized, int num_total_serialized):
644
+ super(_SerializeObjectOverflow, self).__init__(cur_serialized)
645
+ self.cur_serialized = cur_serialized
646
+ self.num_total_serialized = num_total_serialized
647
+
648
+
649
+ cpdef object _serialize_with_stack(
650
+ list serial_stack,
651
+ tuple serialized,
652
+ dict context,
653
+ _IdContextHolder id_context_holder,
654
+ list result_bufs_list,
655
+ int64_t num_overflow = 0,
656
+ int64_t num_total_serialized = 0,
657
+ ):
658
+ cdef _SerialStackItem stack_item
659
+ cdef list subs
660
+ cdef bint final
661
+ cdef int64_t num_sub_serialized
662
+ cdef bint is_resume = num_total_serialized > 0
663
+
664
+ while serial_stack:
665
+ stack_item = serial_stack[-1]
666
+ if serialized is not None:
667
+ # have previously-serialized results, record first
668
+ stack_item.subs_serialized.append(serialized)
669
+
670
+ num_sub_serialized = len(stack_item.subs_serialized)
671
+ if len(stack_item.subs) == num_sub_serialized:
672
+ # all subcomponents serialized, serialization of current is done
673
+ # and we can move to the parent object
674
+ serialized = stack_item.serialized + tuple(stack_item.subs_serialized)
675
+ num_total_serialized += 1
676
+ serial_stack.pop()
677
+ else:
678
+ # serialize next subcomponent at stack top
679
+ serialized, subs, final = _serial_single(
680
+ stack_item.subs[num_sub_serialized], context, id_context_holder
681
+ )
682
+ num_total_serialized += 1
683
+ if final or not subs:
684
+ # the subcomponent is a leaf
685
+ if subs:
686
+ result_bufs_list.extend(subs)
687
+ else:
688
+ # the subcomponent has its own subcomponents, we push itself
689
+ # into stack and process its children
690
+ stack_item = _SerialStackItem(serialized, subs)
691
+ serial_stack.append(stack_item)
692
+ # note that the serialized header should not be recorded
693
+ # as we are now processing the subcomponent itself
694
+ serialized = None
695
+ if 0 < num_overflow < num_total_serialized:
696
+ raise _SerializeObjectOverflow(serialized, num_total_serialized)
697
+
698
+ # we keep an empty dict for extra metas required for other modules
699
+ if is_resume:
700
+ # returns num of deserialized objects when resumed
701
+ extra_meta = {"_N": num_total_serialized}
702
+ else:
703
+ # otherwise does not record the number to reduce result size
704
+ extra_meta = {}
705
+ return (extra_meta, serialized), result_bufs_list
706
+
707
+
708
+ def serialize(obj, dict context = None):
709
+ """
710
+ Serialize an object and return a header and buffers.
711
+ Buffers are intended for zero-copy data manipulation.
712
+
713
+ Parameters
714
+ ----------
715
+ obj: Any
716
+ Object to serialize
717
+ context:
718
+ Serialization context for instantiation of Placeholder
719
+ objects
720
+
721
+ Returns
722
+ -------
723
+ result: Tuple[Tuple, List]
724
+ Picklable header and buffers
725
+ """
726
+ cdef list serial_stack = []
727
+ cdef list result_bufs_list = []
728
+ cdef tuple serialized
729
+ cdef list subs
730
+ cdef bint final
731
+ cdef _IdContextHolder id_context_holder = _IdContextHolder()
732
+
733
+ context = context if context is not None else dict()
734
+ serialized, subs, final = _serial_single(obj, context, id_context_holder)
735
+ if final or not subs:
736
+ # marked as a leaf node, return directly
737
+ return ({}, serialized), subs
738
+
739
+ serial_stack.append(_SerialStackItem(serialized, subs))
740
+ return _serialize_with_stack(
741
+ serial_stack, None, context, id_context_holder, result_bufs_list
742
+ )
743
+
744
+
745
+ async def serialize_with_spawn(
746
+ obj, dict context = None, int spawn_threshold = 100, object executor = None
747
+ ):
748
+ """
749
+ Serialize an object and return a header and buffers.
750
+ Buffers are intended for zero-copy data manipulation.
751
+
752
+ Parameters
753
+ ----------
754
+ obj: Any
755
+ Object to serialize
756
+ context: Dict
757
+ Serialization context for instantiation of Placeholder
758
+ objects
759
+ spawn_threshold: int
760
+ Threshold to spawn into a ThreadPoolExecutor
761
+ executor: ThreadPoolExecutor
762
+ ThreadPoolExecutor to spawn rest serialization into
763
+
764
+ Returns
765
+ -------
766
+ result: Tuple[Tuple, List]
767
+ Picklable header and buffers
768
+ """
769
+ cdef list serial_stack = []
770
+ cdef list result_bufs_list = []
771
+ cdef tuple serialized
772
+ cdef list subs
773
+ cdef bint final
774
+ cdef _IdContextHolder id_context_holder = _IdContextHolder()
775
+
776
+ context = context if context is not None else dict()
777
+ serialized, subs, final = _serial_single(obj, context, id_context_holder)
778
+ if final or not subs:
779
+ # marked as a leaf node, return directly
780
+ return ({}, serialized), subs
781
+
782
+ serial_stack.append(_SerialStackItem(serialized, subs))
783
+
784
+ try:
785
+ result = _serialize_with_stack(
786
+ serial_stack, None, context, id_context_holder, result_bufs_list, spawn_threshold
787
+ )
788
+ except _SerializeObjectOverflow as ex:
789
+ result = await asyncio.get_running_loop().run_in_executor(
790
+ executor,
791
+ _serialize_with_stack,
792
+ serial_stack,
793
+ ex.cur_serialized,
794
+ context,
795
+ id_context_holder,
796
+ result_bufs_list,
797
+ 0,
798
+ ex.num_total_serialized,
799
+ )
800
+ return result
801
+
802
+
803
+ cdef class _DeserialStackItem:
804
+ cdef public tuple serialized
805
+ cdef public tuple subs
806
+ cdef public list subs_deserialized
807
+
808
+ def __cinit__(self, tuple serialized, tuple subs):
809
+ self.serialized = serialized
810
+ self.subs = subs
811
+ self.subs_deserialized = []
812
+
813
+
814
+ cdef _deserial_single(tuple serialized, dict context, list subs):
815
+ """Deserialize a single object"""
816
+ cdef Serializer serializer
817
+ cdef int64_t num_subs
818
+
819
+ serializer_id, obj_id, num_subs, final = serialized[:_COMMON_HEADER_LEN]
820
+ serializer = _deserializers[serializer_id]
821
+ res = serializer.deserial(serialized[_COMMON_HEADER_LEN:], context, subs)
822
+
823
+ if type(res) is Placeholder:
824
+ try:
825
+ res = context[obj_id]
826
+ except KeyError:
827
+ (<Placeholder>res).id = obj_id
828
+
829
+ # get previously-recorded context values
830
+ context_val, context[obj_id] = context.get(obj_id), res
831
+ # if previously recorded object is a Placeholder,
832
+ # replace it with callbacks
833
+ if type(context_val) is Placeholder:
834
+ for cb in (<Placeholder>context_val).callbacks:
835
+ cb(res)
836
+ return res
837
+
838
+
839
+ def deserialize(tuple serialized, list buffers, dict context = None):
840
+ """
841
+ Deserialize an object with serialized headers and buffers
842
+
843
+ Parameters
844
+ ----------
845
+ serialized: Tuple
846
+ Serialized object header
847
+ buffers: List
848
+ List of buffers extracted from serialize() calls
849
+ context: Dict
850
+ Serialization context for replacing Placeholder
851
+ objects
852
+
853
+ Returns
854
+ -------
855
+ result: Any
856
+ Deserialized object
857
+ """
858
+ cdef list deserial_stack = []
859
+ cdef _DeserialStackItem stack_item
860
+ cdef int64_t num_subs, num_deserialized, buf_pos = 0
861
+ cdef bint final
862
+ cdef Serializer serializer
863
+ cdef object deserialized = None, exc_value = None
864
+ cdef bint has_deserialized = False
865
+
866
+ context = context if context is not None else dict()
867
+ # drop extra meta field
868
+ serialized = serialized[-1]
869
+ serializer_id, obj_id, num_subs, final = serialized[:4]
870
+ if final or num_subs == 0:
871
+ # marked as a leaf node, return directly
872
+ return _deserial_single(serialized, context, buffers)
873
+
874
+ deserial_stack.append(
875
+ _DeserialStackItem(
876
+ serialized[:-num_subs], serialized[-num_subs:]
877
+ )
878
+ )
879
+
880
+ while deserial_stack:
881
+ stack_item = deserial_stack[-1]
882
+ # the deserialized result can be None, hence we cannot
883
+ # simply judge from the value deserialized
884
+ if has_deserialized:
885
+ # have previously-deserialized results, record first
886
+ stack_item.subs_deserialized.append(deserialized)
887
+ elif exc_value is not None:
888
+ # have exception in successor components, try rewrite
889
+ # and pass to predecessors
890
+ serializer_id = stack_item.serialized[0]
891
+ serializer = _deserializers[serializer_id]
892
+ new_exc_value = serializer.on_deserial_error(
893
+ stack_item.serialized[_COMMON_HEADER_LEN:],
894
+ context,
895
+ list(stack_item.subs),
896
+ len(stack_item.subs_deserialized),
897
+ exc_value,
898
+ )
899
+ exc_value = new_exc_value if new_exc_value is not None else exc_value
900
+ deserial_stack.pop()
901
+ continue
902
+
903
+ num_deserialized = len(stack_item.subs_deserialized)
904
+ if len(stack_item.subs) == num_deserialized:
905
+ try:
906
+ # all subcomponents deserialized, we can deserialize the object itself
907
+ deserialized = _deserial_single(
908
+ stack_item.serialized, context, stack_item.subs_deserialized
909
+ )
910
+ has_deserialized = True
911
+ deserial_stack.pop()
912
+ except BaseException as ex:
913
+ has_deserialized = False
914
+ exc_value = ex
915
+ deserial_stack.pop()
916
+ else:
917
+ # select next subcomponent to process
918
+ serialized = stack_item.subs[num_deserialized]
919
+ serializer_id, obj_id, num_subs, final = serialized[:4]
920
+ if final or num_subs == 0:
921
+ try:
922
+ # next subcomponent is a leaf, just deserialize
923
+ deserialized = _deserial_single(
924
+ serialized, context, buffers[buf_pos : buf_pos + num_subs]
925
+ )
926
+ has_deserialized = True
927
+ buf_pos += num_subs
928
+ except BaseException as ex:
929
+ has_deserialized = False
930
+ exc_value = ex
931
+ else:
932
+ # next subcomponent has its own subcomponents, we push it
933
+ # into stack and start handling its children
934
+ stack_item = _DeserialStackItem(
935
+ serialized[:-num_subs], serialized[-num_subs:]
936
+ )
937
+ deserial_stack.append(stack_item)
938
+ # note that the deserialized object should be cleaned
939
+ # as we are just starting to handle the subcomponent itself
940
+ has_deserialized = False
941
+
942
+ if exc_value is not None:
943
+ raise exc_value
944
+ return deserialized