xoscar 0.3.1__cp38-cp38-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xoscar might be problematic. Click here for more details.

Files changed (80) hide show
  1. xoscar/__init__.py +60 -0
  2. xoscar/_utils.cpython-38-darwin.so +0 -0
  3. xoscar/_utils.pxd +36 -0
  4. xoscar/_utils.pyx +241 -0
  5. xoscar/_version.py +693 -0
  6. xoscar/aio/__init__.py +25 -0
  7. xoscar/aio/_threads.py +35 -0
  8. xoscar/aio/base.py +86 -0
  9. xoscar/aio/file.py +59 -0
  10. xoscar/aio/lru.py +228 -0
  11. xoscar/aio/parallelism.py +39 -0
  12. xoscar/api.py +493 -0
  13. xoscar/backend.py +67 -0
  14. xoscar/backends/__init__.py +14 -0
  15. xoscar/backends/allocate_strategy.py +160 -0
  16. xoscar/backends/communication/__init__.py +30 -0
  17. xoscar/backends/communication/base.py +315 -0
  18. xoscar/backends/communication/core.py +69 -0
  19. xoscar/backends/communication/dummy.py +242 -0
  20. xoscar/backends/communication/errors.py +20 -0
  21. xoscar/backends/communication/socket.py +375 -0
  22. xoscar/backends/communication/ucx.py +520 -0
  23. xoscar/backends/communication/utils.py +97 -0
  24. xoscar/backends/config.py +145 -0
  25. xoscar/backends/context.py +404 -0
  26. xoscar/backends/core.py +193 -0
  27. xoscar/backends/indigen/__init__.py +16 -0
  28. xoscar/backends/indigen/backend.py +51 -0
  29. xoscar/backends/indigen/driver.py +26 -0
  30. xoscar/backends/indigen/pool.py +469 -0
  31. xoscar/backends/message.cpython-38-darwin.so +0 -0
  32. xoscar/backends/message.pyx +591 -0
  33. xoscar/backends/pool.py +1593 -0
  34. xoscar/backends/router.py +207 -0
  35. xoscar/backends/test/__init__.py +16 -0
  36. xoscar/backends/test/backend.py +38 -0
  37. xoscar/backends/test/pool.py +208 -0
  38. xoscar/batch.py +256 -0
  39. xoscar/collective/__init__.py +27 -0
  40. xoscar/collective/common.py +102 -0
  41. xoscar/collective/core.py +737 -0
  42. xoscar/collective/process_group.py +687 -0
  43. xoscar/collective/utils.py +41 -0
  44. xoscar/collective/xoscar_pygloo.cpython-38-darwin.so +0 -0
  45. xoscar/constants.py +21 -0
  46. xoscar/context.cpython-38-darwin.so +0 -0
  47. xoscar/context.pxd +21 -0
  48. xoscar/context.pyx +368 -0
  49. xoscar/core.cpython-38-darwin.so +0 -0
  50. xoscar/core.pxd +50 -0
  51. xoscar/core.pyx +658 -0
  52. xoscar/debug.py +188 -0
  53. xoscar/driver.py +42 -0
  54. xoscar/errors.py +63 -0
  55. xoscar/libcpp.pxd +31 -0
  56. xoscar/metrics/__init__.py +21 -0
  57. xoscar/metrics/api.py +288 -0
  58. xoscar/metrics/backends/__init__.py +13 -0
  59. xoscar/metrics/backends/console/__init__.py +13 -0
  60. xoscar/metrics/backends/console/console_metric.py +82 -0
  61. xoscar/metrics/backends/metric.py +149 -0
  62. xoscar/metrics/backends/prometheus/__init__.py +13 -0
  63. xoscar/metrics/backends/prometheus/prometheus_metric.py +70 -0
  64. xoscar/nvutils.py +717 -0
  65. xoscar/profiling.py +260 -0
  66. xoscar/serialization/__init__.py +20 -0
  67. xoscar/serialization/aio.py +138 -0
  68. xoscar/serialization/core.cpython-38-darwin.so +0 -0
  69. xoscar/serialization/core.pxd +28 -0
  70. xoscar/serialization/core.pyx +954 -0
  71. xoscar/serialization/cuda.py +111 -0
  72. xoscar/serialization/exception.py +48 -0
  73. xoscar/serialization/numpy.py +82 -0
  74. xoscar/serialization/pyfury.py +37 -0
  75. xoscar/serialization/scipy.py +72 -0
  76. xoscar/utils.py +502 -0
  77. xoscar-0.3.1.dist-info/METADATA +225 -0
  78. xoscar-0.3.1.dist-info/RECORD +80 -0
  79. xoscar-0.3.1.dist-info/WHEEL +5 -0
  80. xoscar-0.3.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,954 @@
1
+ # distutils: language = c++
2
+ # Copyright 2022-2023 XProbe Inc.
3
+ # derived from copyright 1999-2022 Alibaba Group Holding Ltd.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import asyncio
18
+ import datetime
19
+ import enum
20
+ import hashlib
21
+ import inspect
22
+ import sys
23
+ from functools import partial, wraps
24
+ from typing import Any, Dict, List
25
+
26
+ if sys.version_info[:2] < (3, 8): # pragma: no cover
27
+ try:
28
+ import pickle5 as pickle # nosec # pylint: disable=import_pickle
29
+ except ImportError:
30
+ import pickle # nosec # pylint: disable=import_pickle
31
+ else:
32
+ import pickle # nosec # pylint: disable=import_pickle
33
+
34
+ import cloudpickle
35
+ import numpy as np
36
+ import pandas as pd
37
+
38
+ from cpython cimport PyObject
39
+ from libc.stdint cimport int32_t, int64_t, uint32_t, uint64_t, uintptr_t
40
+ from libcpp.unordered_map cimport unordered_map
41
+
42
+ # resolve pandas pickle compatibility between <1.2 and >=1.3
43
+ try:
44
+ from pandas.core.internals import blocks as pd_blocks
45
+ if not hasattr(pd_blocks, "new_block") and hasattr(pd_blocks, "make_block"):
46
+ # register missing func that would cause errors
47
+ pd_blocks.new_block = pd_blocks.make_block
48
+ except (ImportError, AttributeError):
49
+ pass
50
+
51
+ from .._utils import NamedType
52
+
53
+ from .._utils cimport TypeDispatcher
54
+ from .pyfury import get_fury
55
+
56
+ BUFFER_PICKLE_PROTOCOL = max(pickle.DEFAULT_PROTOCOL, 5)
57
+ cdef bint HAS_PICKLE_BUFFER = pickle.HIGHEST_PROTOCOL >= 5
58
+ cdef bint _PANDAS_HAS_MGR = hasattr(pd.Series([0]), "_mgr")
59
+
60
+ cdef TypeDispatcher _serial_dispatcher = TypeDispatcher()
61
+ cdef dict _deserializers = dict()
62
+
63
+ cdef uint32_t _MAX_STR_PRIMITIVE_LEN = 1024
64
+ # prime modulus for serializer ids
65
+ # use the largest prime number smaller than 32767
66
+ cdef int32_t _SERIALIZER_ID_PRIME = 32749
67
+
68
+
69
+ cdef class Serializer:
70
+ serializer_id = None
71
+
72
+ def __cinit__(self):
73
+ # make the value can be referenced with C code
74
+ self._serializer_id = self.serializer_id
75
+
76
+ cpdef serial(self, object obj, dict context):
77
+ """
78
+ Returns intermediate serialization result of certain object.
79
+ The returned value can be a Placeholder or a tuple comprising
80
+ of three parts: a header, a group of subcomponents and
81
+ a finalizing flag.
82
+
83
+ * Header is a pickle-serializable tuple
84
+ * Subcomponents are parts or buffers for iterative
85
+ serialization.
86
+ * Flag is a boolean value. If true, subcomponents should be
87
+ buffers (for instance, bytes, memory views, GPU buffers,
88
+ etc.) that can be read and written directly. If false,
89
+ subcomponents will be serialized iteratively.
90
+
91
+ Parameters
92
+ ----------
93
+ obj: Any
94
+ Object to serialize
95
+ context: Dict
96
+ Serialization context to help creating Placeholder objects
97
+ for reducing duplicated serialization
98
+
99
+ Returns
100
+ -------
101
+ result: Placeholder | Tuple[Tuple, List, bool]
102
+ Intermediate result of serialization
103
+ """
104
+ raise NotImplementedError
105
+
106
+ cpdef deserial(self, tuple serialized, dict context, list subs):
107
+ """
108
+ Returns deserialized object given serialized headers and
109
+ deserialized subcomponents.
110
+
111
+ Parameters
112
+ ----------
113
+ serialized: Tuple
114
+ Serialized object header as a tuple
115
+ context
116
+ Serialization context for instantiation of Placeholder
117
+ objects
118
+ subs: List
119
+ Deserialized subcomponents
120
+
121
+ Returns
122
+ -------
123
+ result: Any
124
+ Deserialized objects
125
+ """
126
+ raise NotImplementedError
127
+
128
+ cpdef on_deserial_error(
129
+ self,
130
+ tuple serialized,
131
+ dict context,
132
+ list subs_serialized,
133
+ int error_index,
134
+ object exc,
135
+ ):
136
+ """
137
+ Returns rewritten exception when subcomponent deserialization fails
138
+
139
+ Parameters
140
+ ----------
141
+ serialized: Tuple
142
+ Serialized object header as a tuple
143
+ context
144
+ Serialization context for instantiation of Placeholder
145
+ objects
146
+ subs_serialized: List
147
+ Serialized subcomponents
148
+ error_index: int
149
+ Index of subcomponent causing error
150
+ exc: BaseException
151
+ Exception raised
152
+
153
+ Returns
154
+ -------
155
+ exc: BaseException | None
156
+ Rewritten exception. If None, original exception is kept.
157
+ """
158
+ return None
159
+
160
+ @classmethod
161
+ def calc_default_serializer_id(cls):
162
+ s = f"{cls.__module__}.{cls.__qualname__}"
163
+ h = hashlib.md5(s.encode())
164
+ return int(h.hexdigest(), 16) % _SERIALIZER_ID_PRIME
165
+
166
+ @classmethod
167
+ def register(cls, obj_type, name=None):
168
+ if (
169
+ cls.serializer_id is None
170
+ or cls.serializer_id == getattr(super(cls, cls), "serializer_id", None)
171
+ ):
172
+ # a class should have its own serializer_id
173
+ # inherited serializer_id not acceptable
174
+ cls.serializer_id = cls.calc_default_serializer_id()
175
+
176
+ inst = cls()
177
+ if name is not None:
178
+ obj_type = NamedType(name, obj_type)
179
+ _serial_dispatcher.register(obj_type, inst)
180
+ if _deserializers.get(cls.serializer_id) is not None:
181
+ assert type(_deserializers[cls.serializer_id]) is cls
182
+ else:
183
+ _deserializers[cls.serializer_id] = inst
184
+
185
+ @classmethod
186
+ def unregister(cls, obj_type, name=None):
187
+ if name is not None:
188
+ obj_type = NamedType(name, obj_type)
189
+ _serial_dispatcher.unregister(obj_type)
190
+ _deserializers.pop(cls.serializer_id, None)
191
+
192
+
193
+ cdef inline uint64_t _fast_id(object obj) nogil:
194
+ return <uintptr_t><PyObject*>obj
195
+
196
+
197
+ def fast_id(obj):
198
+ """C version of id() used for serialization"""
199
+ return _fast_id(obj)
200
+
201
+
202
+ def buffered(func):
203
+ """
204
+ Wrapper for serial() method to reduce duplicated serialization
205
+ """
206
+ @wraps(func)
207
+ def wrapped(self, obj: Any, dict context):
208
+ cdef uint64_t obj_id = _fast_id(obj)
209
+ if obj_id in context:
210
+ return Placeholder(_fast_id(obj))
211
+ else:
212
+ context[obj_id] = obj
213
+ return func(self, obj, context)
214
+
215
+ return wrapped
216
+
217
+
218
+ def pickle_buffers(obj):
219
+ cdef list buffers = [None]
220
+
221
+ fury = get_fury()
222
+ if fury is not None:
223
+ def buffer_cb(x):
224
+ try:
225
+ buffers.append(memoryview(x))
226
+ except TypeError:
227
+ buffers.append(x.to_buffer())
228
+
229
+ buffers[0] = b"__fury__"
230
+ buffers.append(None)
231
+ buffers[1] = fury.serialize(
232
+ obj,
233
+ buffer_callback=buffer_cb,
234
+ )
235
+ else:
236
+ if HAS_PICKLE_BUFFER:
237
+ def buffer_cb(x):
238
+ x = x.raw()
239
+ if x.ndim > 1:
240
+ # ravel n-d memoryview
241
+ x = x.cast(x.format)
242
+ buffers.append(memoryview(x))
243
+
244
+ buffers[0] = cloudpickle.dumps(
245
+ obj,
246
+ buffer_callback=buffer_cb,
247
+ protocol=BUFFER_PICKLE_PROTOCOL,
248
+ )
249
+ else:
250
+ buffers[0] = cloudpickle.dumps(obj)
251
+ return buffers
252
+
253
+
254
+ def unpickle_buffers(list buffers):
255
+ if buffers[0] == b"__fury__":
256
+ fury = get_fury()
257
+ if fury is None:
258
+ raise Exception("fury is not installed.")
259
+ result = fury.deserialize(buffers[1], buffers[2:])
260
+ else:
261
+ result = cloudpickle.loads(buffers[0], buffers=buffers[1:])
262
+
263
+ # as pandas prior to 1.1.0 use _data instead of _mgr to hold BlockManager,
264
+ # deserializing from high versions may produce mal-functioned pandas objects,
265
+ # thus the patch is needed
266
+ if _PANDAS_HAS_MGR:
267
+ return result
268
+ else: # pragma: no cover
269
+ if hasattr(result, "_mgr") and isinstance(result, (pd.DataFrame, pd.Series)):
270
+ result._data = getattr(result, "_mgr")
271
+ delattr(result, "_mgr")
272
+ return result
273
+
274
+
275
+ cdef class PickleSerializer(Serializer):
276
+ serializer_id = 0
277
+
278
+ cpdef serial(self, obj: Any, dict context):
279
+ cdef uint64_t obj_id
280
+ obj_id = _fast_id(obj)
281
+ if obj_id in context:
282
+ return Placeholder(obj_id)
283
+ context[obj_id] = obj
284
+
285
+ return (), pickle_buffers(obj), True
286
+
287
+ cpdef deserial(self, tuple serialized, dict context, list subs):
288
+ return unpickle_buffers(subs)
289
+
290
+
291
+ cdef set _primitive_types = {
292
+ type(None),
293
+ bool,
294
+ int,
295
+ float,
296
+ complex,
297
+ datetime.datetime,
298
+ datetime.date,
299
+ datetime.timedelta,
300
+ enum.Enum,
301
+ type(max), # builtin functions
302
+ np.dtype,
303
+ np.number,
304
+ }
305
+
306
+
307
+ class PrimitiveSerializer(Serializer):
308
+ serializer_id = 1
309
+
310
+ @buffered
311
+ def serial(self, obj: Any, context: Dict):
312
+ return (obj,), [], True
313
+
314
+ def deserial(self, tuple obj, context: Dict, subs: List[Any]):
315
+ return obj[0]
316
+
317
+
318
+ cdef class BytesSerializer(Serializer):
319
+ serializer_id = 2
320
+
321
+ cpdef serial(self, obj: Any, dict context):
322
+ cdef uint64_t obj_id
323
+ obj_id = _fast_id(obj)
324
+ if obj_id in context:
325
+ return Placeholder(obj_id)
326
+ context[obj_id] = obj
327
+
328
+ return (), [obj], True
329
+
330
+ cpdef deserial(self, tuple serialized, dict context, list subs):
331
+ return subs[0]
332
+
333
+
334
+ cdef class StrSerializer(Serializer):
335
+ serializer_id = 3
336
+
337
+ cpdef serial(self, obj: Any, dict context):
338
+ cdef uint64_t obj_id
339
+ obj_id = _fast_id(obj)
340
+ if obj_id in context:
341
+ return Placeholder(obj_id)
342
+ context[obj_id] = obj
343
+
344
+ return (), [(<str>obj).encode()], True
345
+
346
+ cpdef deserial(self, tuple serialized, dict context, list subs):
347
+ buffer = subs[0]
348
+ if type(buffer) is memoryview:
349
+ buffer = buffer.tobytes()
350
+ return buffer.decode()
351
+
352
+
353
+ cdef class CollectionSerializer(Serializer):
354
+ obj_type = None
355
+
356
+ cdef object _obj_type
357
+
358
+ def __cinit__(self):
359
+ # make the value can be referenced with C code
360
+ self._obj_type = self.obj_type
361
+
362
+ cdef tuple _serial_iterable(self, obj: Any):
363
+ cdef list idx_to_propagate = []
364
+ cdef list obj_to_propagate = []
365
+ cdef list obj_list = <list>obj if type(obj) is list else list(obj)
366
+ cdef int64_t idx
367
+ cdef object item
368
+
369
+ for idx in range(len(obj_list)):
370
+ item = obj_list[idx]
371
+
372
+ if type(item) is bytes and len(<bytes>item) < _MAX_STR_PRIMITIVE_LEN:
373
+ # treat short strings as primitives
374
+ continue
375
+ elif type(item) is str and len(<str>item) < _MAX_STR_PRIMITIVE_LEN:
376
+ # treat short strings as primitives
377
+ continue
378
+ elif type(item) in _primitive_types:
379
+ continue
380
+
381
+ if obj is obj_list:
382
+ obj_list = list(obj)
383
+
384
+ obj_list[idx] = None
385
+ idx_to_propagate.append(idx)
386
+ obj_to_propagate.append(item)
387
+
388
+ if self._obj_type is not None and type(obj) is not self._obj_type:
389
+ obj_type = type(obj)
390
+ else:
391
+ obj_type = None
392
+ return (obj_list, idx_to_propagate, obj_type), obj_to_propagate, False
393
+
394
+ cpdef serial(self, obj: Any, dict context):
395
+ cdef uint64_t obj_id
396
+ obj_id = _fast_id(obj)
397
+ if obj_id in context:
398
+ return Placeholder(obj_id)
399
+ context[obj_id] = obj
400
+
401
+ return self._serial_iterable(obj)
402
+
403
+ cdef list _deserial_iterable(self, tuple serialized, list subs):
404
+ cdef list res_list, idx_to_propagate
405
+ cdef int64_t i
406
+
407
+ res_list, idx_to_propagate, _ = serialized
408
+
409
+ for i in range(len(idx_to_propagate)):
410
+ res_list[idx_to_propagate[i]] = subs[i]
411
+ return res_list
412
+
413
+
414
+ cdef class TupleSerializer(CollectionSerializer):
415
+ serializer_id = 4
416
+ obj_type = tuple
417
+
418
+ cpdef deserial(self, tuple serialized, dict context, list subs):
419
+ cdef list res = self._deserial_iterable(serialized, subs)
420
+ for v in res:
421
+ assert type(v) is not Placeholder
422
+
423
+ obj_type = serialized[-1] or tuple
424
+ if hasattr(obj_type, "_fields"):
425
+ # namedtuple
426
+ return obj_type(*res)
427
+ else:
428
+ return obj_type(res)
429
+
430
+
431
+ cdef class ListSerializer(CollectionSerializer):
432
+ serializer_id = 5
433
+ obj_type = list
434
+
435
+ cpdef deserial(self, tuple serialized, dict context, list subs):
436
+ cdef int64_t idx
437
+ cdef list res = self._deserial_iterable(serialized, subs)
438
+
439
+ obj_type = serialized[-1]
440
+ if obj_type is None:
441
+ result = res
442
+ else:
443
+ result = obj_type(res)
444
+
445
+ for idx, v in enumerate(res):
446
+ if type(v) is Placeholder:
447
+ cb = partial(result.__setitem__, idx)
448
+ (<Placeholder>v).callbacks.append(cb)
449
+ return result
450
+
451
+
452
+ def _dict_key_replacer(ret, key, real_key):
453
+ ret[real_key] = ret.pop(key)
454
+
455
+
456
+ def _dict_value_replacer(context, ret, key, real_value):
457
+ if type(key) is Placeholder:
458
+ key = context[(<Placeholder>key).id]
459
+ ret[key] = real_value
460
+
461
+
462
+ cdef class DictSerializer(CollectionSerializer):
463
+ serializer_id = 6
464
+ cdef set _inspected_inherits
465
+
466
+ def __cinit__(self):
467
+ self._inspected_inherits = set()
468
+
469
+ cpdef serial(self, obj: Any, dict context):
470
+ cdef uint64_t obj_id
471
+ cdef tuple key_obj, value_obj
472
+ cdef list key_bufs, value_bufs
473
+
474
+ if type(obj) is dict and len(<dict>obj) == 0:
475
+ return (), [], True
476
+
477
+ obj_id = _fast_id(obj)
478
+ if obj_id in context:
479
+ return Placeholder(obj_id)
480
+ context[obj_id] = obj
481
+
482
+ obj_type = type(obj)
483
+
484
+ if obj_type is not dict and obj_type not in self._inspected_inherits:
485
+ inspect_init = inspect.getfullargspec(obj_type.__init__)
486
+ if (
487
+ inspect_init.args == ["self"]
488
+ and not inspect_init.varargs
489
+ and not inspect_init.varkw
490
+ ):
491
+ # inherited dicts may not have proper initializers
492
+ # for deserialization
493
+ # remove context to generate real serialized result
494
+ context.pop(obj_id)
495
+ return (obj,), [], True
496
+ else:
497
+ self._inspected_inherits.add(obj_type)
498
+
499
+ key_obj, key_bufs, _ = self._serial_iterable(obj.keys())
500
+ value_obj, value_bufs, _ = self._serial_iterable(obj.values())
501
+ if obj_type is dict:
502
+ obj_type = None
503
+ ser_obj = (key_obj[:-1], value_obj[:-1], len(key_bufs), obj_type)
504
+ return ser_obj, key_bufs + value_bufs, False
505
+
506
+ cpdef deserial(self, tuple serialized, dict context, list subs):
507
+ cdef int64_t i, num_key_bufs
508
+ cdef list key_subs, value_subs, keys, values
509
+
510
+ if not serialized:
511
+ return {}
512
+ if len(serialized) == 1:
513
+ # serialized directly
514
+ return serialized[0]
515
+
516
+ key_serialized, value_serialized, num_key_bufs, obj_type = serialized
517
+ key_subs = subs[:num_key_bufs]
518
+ value_subs = subs[num_key_bufs:]
519
+
520
+ keys = self._deserial_iterable(<tuple>key_serialized + (None,), key_subs)
521
+ values = self._deserial_iterable(<tuple>value_serialized + (None,), value_subs)
522
+
523
+ if obj_type is None:
524
+ ret = dict(zip(keys, values))
525
+ else:
526
+ try:
527
+ ret = obj_type(zip(keys, values))
528
+ except TypeError:
529
+ # first arg of defaultdict is a callable
530
+ ret = obj_type()
531
+ ret.update(zip(keys, values))
532
+
533
+ for i in range(len(keys)):
534
+ k, v = keys[i], values[i]
535
+ if type(k) is Placeholder:
536
+ (<Placeholder>k).callbacks.append(
537
+ partial(_dict_key_replacer, ret, k)
538
+ )
539
+ if type(v) is Placeholder:
540
+ (<Placeholder>v).callbacks.append(
541
+ partial(_dict_value_replacer, context, ret, k)
542
+ )
543
+ return ret
544
+
545
+
546
+ cdef class Placeholder:
547
+ """
548
+ Placeholder object to reduce duplicated serialization
549
+
550
+ The object records object identifier and keeps callbacks
551
+ to replace itself in parent objects.
552
+ """
553
+ cdef public uint64_t id
554
+ cdef public list callbacks
555
+
556
+ def __init__(self, uint64_t id_):
557
+ self.id = id_
558
+ self.callbacks = []
559
+
560
+ def __hash__(self):
561
+ return self.id
562
+
563
+ def __eq__(self, other): # pragma: no cover
564
+ if type(other) is not Placeholder:
565
+ return False
566
+ return self.id == other.id
567
+
568
+ def __repr__(self):
569
+ return (
570
+ f"Placeholder(id={self.id}, "
571
+ f"callbacks=[list of {len(self.callbacks)}])"
572
+ )
573
+
574
+
575
+ cdef class PlaceholderSerializer(Serializer):
576
+ serializer_id = 7
577
+
578
+ cpdef serial(self, obj: Any, dict context):
579
+ return (), [], True
580
+
581
+ cpdef deserial(self, tuple serialized, dict context, list subs):
582
+ return Placeholder(0)
583
+
584
+
585
+ PickleSerializer.register(object)
586
+ for _primitive in _primitive_types:
587
+ PrimitiveSerializer.register(_primitive)
588
+ BytesSerializer.register(bytes)
589
+ BytesSerializer.register(memoryview)
590
+ StrSerializer.register(str)
591
+ ListSerializer.register(list)
592
+ TupleSerializer.register(tuple)
593
+ DictSerializer.register(dict)
594
+ PlaceholderSerializer.register(Placeholder)
595
+
596
+
597
+ cdef class _SerialStackItem:
598
+ cdef public tuple serialized
599
+ cdef public list subs
600
+ cdef public list subs_serialized
601
+
602
+ def __cinit__(self, tuple serialized, list subs):
603
+ self.serialized = serialized
604
+ self.subs = subs
605
+ self.subs_serialized = []
606
+
607
+
608
+ cdef class _IdContextHolder:
609
+ cdef unordered_map[uint64_t, uint64_t] d
610
+
611
+
612
+ cdef int _COMMON_HEADER_LEN = 4
613
+
614
+
615
+ cdef tuple _serial_single(
616
+ obj, dict context, _IdContextHolder id_context_holder
617
+ ):
618
+ """Serialize single object and return serialized tuples"""
619
+ cdef uint64_t obj_id, ordered_id
620
+ cdef Serializer serializer
621
+ cdef tuple common_header, serialized
622
+
623
+ while True:
624
+ name = context.get("serializer")
625
+ obj_type = type(obj) if name is None else NamedType(name, type(obj))
626
+ serializer = _serial_dispatcher.get_handler(obj_type)
627
+ ret_serial = serializer.serial(obj, context)
628
+ if type(ret_serial) is tuple:
629
+ # object is serialized, form a common header and return
630
+ serialized, subs, final = <tuple>ret_serial
631
+
632
+ if type(obj) is Placeholder:
633
+ obj_id = (<Placeholder>obj).id
634
+ ordered_id = id_context_holder.d[obj_id]
635
+ else:
636
+ obj_id = _fast_id(obj)
637
+ ordered_id = id_context_holder.d.size()
638
+ id_context_holder.d[obj_id] = ordered_id
639
+
640
+ # REMEMBER to change _COMMON_HEADER_LEN when content of
641
+ # this header changed
642
+ common_header = (
643
+ serializer._serializer_id, ordered_id, len(subs), final
644
+ )
645
+ break
646
+ else:
647
+ # object is converted into another (usually a Placeholder)
648
+ obj = ret_serial
649
+ return common_header + serialized, subs, final
650
+
651
+
652
+ class _SerializeObjectOverflow(Exception):
653
+ def __init__(self, tuple cur_serialized, int num_total_serialized):
654
+ super(_SerializeObjectOverflow, self).__init__(cur_serialized)
655
+ self.cur_serialized = cur_serialized
656
+ self.num_total_serialized = num_total_serialized
657
+
658
+
659
+ cpdef object _serialize_with_stack(
660
+ list serial_stack,
661
+ tuple serialized,
662
+ dict context,
663
+ _IdContextHolder id_context_holder,
664
+ list result_bufs_list,
665
+ int64_t num_overflow = 0,
666
+ int64_t num_total_serialized = 0,
667
+ ):
668
+ cdef _SerialStackItem stack_item
669
+ cdef list subs
670
+ cdef bint final
671
+ cdef int64_t num_sub_serialized
672
+ cdef bint is_resume = num_total_serialized > 0
673
+
674
+ while serial_stack:
675
+ stack_item = serial_stack[-1]
676
+ if serialized is not None:
677
+ # have previously-serialized results, record first
678
+ stack_item.subs_serialized.append(serialized)
679
+
680
+ num_sub_serialized = len(stack_item.subs_serialized)
681
+ if len(stack_item.subs) == num_sub_serialized:
682
+ # all subcomponents serialized, serialization of current is done
683
+ # and we can move to the parent object
684
+ serialized = stack_item.serialized + tuple(stack_item.subs_serialized)
685
+ num_total_serialized += 1
686
+ serial_stack.pop()
687
+ else:
688
+ # serialize next subcomponent at stack top
689
+ serialized, subs, final = _serial_single(
690
+ stack_item.subs[num_sub_serialized], context, id_context_holder
691
+ )
692
+ num_total_serialized += 1
693
+ if final or not subs:
694
+ # the subcomponent is a leaf
695
+ if subs:
696
+ result_bufs_list.extend(subs)
697
+ else:
698
+ # the subcomponent has its own subcomponents, we push itself
699
+ # into stack and process its children
700
+ stack_item = _SerialStackItem(serialized, subs)
701
+ serial_stack.append(stack_item)
702
+ # note that the serialized header should not be recorded
703
+ # as we are now processing the subcomponent itself
704
+ serialized = None
705
+ if 0 < num_overflow < num_total_serialized:
706
+ raise _SerializeObjectOverflow(serialized, num_total_serialized)
707
+
708
+ # we keep an empty dict for extra metas required for other modules
709
+ if is_resume:
710
+ # returns num of deserialized objects when resumed
711
+ extra_meta = {"_N": num_total_serialized}
712
+ else:
713
+ # otherwise does not record the number to reduce result size
714
+ extra_meta = {}
715
+ return (extra_meta, serialized), result_bufs_list
716
+
717
+
718
+ def serialize(obj, dict context = None):
719
+ """
720
+ Serialize an object and return a header and buffers.
721
+ Buffers are intended for zero-copy data manipulation.
722
+
723
+ Parameters
724
+ ----------
725
+ obj: Any
726
+ Object to serialize
727
+ context:
728
+ Serialization context for instantiation of Placeholder
729
+ objects
730
+
731
+ Returns
732
+ -------
733
+ result: Tuple[Tuple, List]
734
+ Picklable header and buffers
735
+ """
736
+ cdef list serial_stack = []
737
+ cdef list result_bufs_list = []
738
+ cdef tuple serialized
739
+ cdef list subs
740
+ cdef bint final
741
+ cdef _IdContextHolder id_context_holder = _IdContextHolder()
742
+
743
+ context = context if context is not None else dict()
744
+ serialized, subs, final = _serial_single(obj, context, id_context_holder)
745
+ if final or not subs:
746
+ # marked as a leaf node, return directly
747
+ return ({}, serialized), subs
748
+
749
+ serial_stack.append(_SerialStackItem(serialized, subs))
750
+ return _serialize_with_stack(
751
+ serial_stack, None, context, id_context_holder, result_bufs_list
752
+ )
753
+
754
+
755
+ async def serialize_with_spawn(
756
+ obj, dict context = None, int spawn_threshold = 100, object executor = None
757
+ ):
758
+ """
759
+ Serialize an object and return a header and buffers.
760
+ Buffers are intended for zero-copy data manipulation.
761
+
762
+ Parameters
763
+ ----------
764
+ obj: Any
765
+ Object to serialize
766
+ context: Dict
767
+ Serialization context for instantiation of Placeholder
768
+ objects
769
+ spawn_threshold: int
770
+ Threshold to spawn into a ThreadPoolExecutor
771
+ executor: ThreadPoolExecutor
772
+ ThreadPoolExecutor to spawn rest serialization into
773
+
774
+ Returns
775
+ -------
776
+ result: Tuple[Tuple, List]
777
+ Picklable header and buffers
778
+ """
779
+ cdef list serial_stack = []
780
+ cdef list result_bufs_list = []
781
+ cdef tuple serialized
782
+ cdef list subs
783
+ cdef bint final
784
+ cdef _IdContextHolder id_context_holder = _IdContextHolder()
785
+
786
+ context = context if context is not None else dict()
787
+ serialized, subs, final = _serial_single(obj, context, id_context_holder)
788
+ if final or not subs:
789
+ # marked as a leaf node, return directly
790
+ return ({}, serialized), subs
791
+
792
+ serial_stack.append(_SerialStackItem(serialized, subs))
793
+
794
+ try:
795
+ result = _serialize_with_stack(
796
+ serial_stack, None, context, id_context_holder, result_bufs_list, spawn_threshold
797
+ )
798
+ except _SerializeObjectOverflow as ex:
799
+ result = await asyncio.get_running_loop().run_in_executor(
800
+ executor,
801
+ _serialize_with_stack,
802
+ serial_stack,
803
+ ex.cur_serialized,
804
+ context,
805
+ id_context_holder,
806
+ result_bufs_list,
807
+ 0,
808
+ ex.num_total_serialized,
809
+ )
810
+ return result
811
+
812
+
813
+ cdef class _DeserialStackItem:
814
+ cdef public tuple serialized
815
+ cdef public tuple subs
816
+ cdef public list subs_deserialized
817
+
818
+ def __cinit__(self, tuple serialized, tuple subs):
819
+ self.serialized = serialized
820
+ self.subs = subs
821
+ self.subs_deserialized = []
822
+
823
+
824
+ cdef _deserial_single(tuple serialized, dict context, list subs):
825
+ """Deserialize a single object"""
826
+ cdef Serializer serializer
827
+ cdef int64_t num_subs
828
+
829
+ serializer_id, obj_id, num_subs, final = serialized[:_COMMON_HEADER_LEN]
830
+ serializer = _deserializers[serializer_id]
831
+ res = serializer.deserial(serialized[_COMMON_HEADER_LEN:], context, subs)
832
+
833
+ if type(res) is Placeholder:
834
+ try:
835
+ res = context[obj_id]
836
+ except KeyError:
837
+ (<Placeholder>res).id = obj_id
838
+
839
+ # get previously-recorded context values
840
+ context_val, context[obj_id] = context.get(obj_id), res
841
+ # if previously recorded object is a Placeholder,
842
+ # replace it with callbacks
843
+ if type(context_val) is Placeholder:
844
+ for cb in (<Placeholder>context_val).callbacks:
845
+ cb(res)
846
+ return res
847
+
848
+
849
+ def deserialize(tuple serialized, list buffers, dict context = None):
850
+ """
851
+ Deserialize an object with serialized headers and buffers
852
+
853
+ Parameters
854
+ ----------
855
+ serialized: Tuple
856
+ Serialized object header
857
+ buffers: List
858
+ List of buffers extracted from serialize() calls
859
+ context: Dict
860
+ Serialization context for replacing Placeholder
861
+ objects
862
+
863
+ Returns
864
+ -------
865
+ result: Any
866
+ Deserialized object
867
+ """
868
+ cdef list deserial_stack = []
869
+ cdef _DeserialStackItem stack_item
870
+ cdef int64_t num_subs, num_deserialized, buf_pos = 0
871
+ cdef bint final
872
+ cdef Serializer serializer
873
+ cdef object deserialized = None, exc_value = None
874
+ cdef bint has_deserialized = False
875
+
876
+ context = context if context is not None else dict()
877
+ # drop extra meta field
878
+ serialized = serialized[-1]
879
+ serializer_id, obj_id, num_subs, final = serialized[:4]
880
+ if final or num_subs == 0:
881
+ # marked as a leaf node, return directly
882
+ return _deserial_single(serialized, context, buffers)
883
+
884
+ deserial_stack.append(
885
+ _DeserialStackItem(
886
+ serialized[:-num_subs], serialized[-num_subs:]
887
+ )
888
+ )
889
+
890
+ while deserial_stack:
891
+ stack_item = deserial_stack[-1]
892
+ # the deserialized result can be None, hence we cannot
893
+ # simply judge from the value deserialized
894
+ if has_deserialized:
895
+ # have previously-deserialized results, record first
896
+ stack_item.subs_deserialized.append(deserialized)
897
+ elif exc_value is not None:
898
+ # have exception in successor components, try rewrite
899
+ # and pass to predecessors
900
+ serializer_id = stack_item.serialized[0]
901
+ serializer = _deserializers[serializer_id]
902
+ new_exc_value = serializer.on_deserial_error(
903
+ stack_item.serialized[_COMMON_HEADER_LEN:],
904
+ context,
905
+ list(stack_item.subs),
906
+ len(stack_item.subs_deserialized),
907
+ exc_value,
908
+ )
909
+ exc_value = new_exc_value if new_exc_value is not None else exc_value
910
+ deserial_stack.pop()
911
+ continue
912
+
913
+ num_deserialized = len(stack_item.subs_deserialized)
914
+ if len(stack_item.subs) == num_deserialized:
915
+ try:
916
+ # all subcomponents deserialized, we can deserialize the object itself
917
+ deserialized = _deserial_single(
918
+ stack_item.serialized, context, stack_item.subs_deserialized
919
+ )
920
+ has_deserialized = True
921
+ deserial_stack.pop()
922
+ except BaseException as ex:
923
+ has_deserialized = False
924
+ exc_value = ex
925
+ deserial_stack.pop()
926
+ else:
927
+ # select next subcomponent to process
928
+ serialized = stack_item.subs[num_deserialized]
929
+ serializer_id, obj_id, num_subs, final = serialized[:4]
930
+ if final or num_subs == 0:
931
+ try:
932
+ # next subcomponent is a leaf, just deserialize
933
+ deserialized = _deserial_single(
934
+ serialized, context, buffers[buf_pos : buf_pos + num_subs]
935
+ )
936
+ has_deserialized = True
937
+ buf_pos += num_subs
938
+ except BaseException as ex:
939
+ has_deserialized = False
940
+ exc_value = ex
941
+ else:
942
+ # next subcomponent has its own subcomponents, we push it
943
+ # into stack and start handling its children
944
+ stack_item = _DeserialStackItem(
945
+ serialized[:-num_subs], serialized[-num_subs:]
946
+ )
947
+ deserial_stack.append(stack_item)
948
+ # note that the deserialized object should be cleaned
949
+ # as we are just starting to handle the subcomponent itself
950
+ has_deserialized = False
951
+
952
+ if exc_value is not None:
953
+ raise exc_value
954
+ return deserialized