torchmonarch-nightly 2025.8.2__cp313-cp313-manylinux2014_x86_64.whl → 2025.9.3__cp313-cp313-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- monarch/_rust_bindings.so +0 -0
- monarch/_src/actor/actor_mesh.py +414 -216
- monarch/_src/actor/allocator.py +75 -6
- monarch/_src/actor/bootstrap_main.py +7 -4
- monarch/_src/actor/code_sync/__init__.py +2 -0
- monarch/_src/actor/debugger/__init__.py +7 -0
- monarch/_src/actor/{debugger.py → debugger/debugger.py} +246 -135
- monarch/_src/actor/{pdb_wrapper.py → debugger/pdb_wrapper.py} +62 -23
- monarch/_src/actor/endpoint.py +27 -45
- monarch/_src/actor/future.py +86 -24
- monarch/_src/actor/host_mesh.py +125 -0
- monarch/_src/actor/logging.py +94 -0
- monarch/_src/actor/pickle.py +25 -0
- monarch/_src/actor/proc_mesh.py +423 -156
- monarch/_src/actor/python_extension_methods.py +90 -0
- monarch/_src/actor/shape.py +8 -1
- monarch/_src/actor/source_loader.py +45 -0
- monarch/_src/actor/telemetry/__init__.py +172 -0
- monarch/_src/actor/telemetry/rust_span_tracing.py +6 -39
- monarch/_src/debug_cli/__init__.py +7 -0
- monarch/_src/debug_cli/debug_cli.py +43 -0
- monarch/_src/tensor_engine/rdma.py +64 -9
- monarch/_testing.py +1 -3
- monarch/actor/__init__.py +24 -4
- monarch/common/_C.so +0 -0
- monarch/common/device_mesh.py +14 -0
- monarch/common/future.py +10 -0
- monarch/common/remote.py +14 -25
- monarch/common/tensor.py +12 -0
- monarch/debug_cli/__init__.py +7 -0
- monarch/debug_cli/__main__.py +12 -0
- monarch/fetch.py +2 -2
- monarch/gradient/_gradient_generator.so +0 -0
- monarch/gradient_generator.py +4 -2
- monarch/mesh_controller.py +34 -14
- monarch/monarch_controller +0 -0
- monarch/tools/colors.py +25 -0
- monarch/tools/commands.py +42 -7
- monarch/tools/components/hyperactor.py +1 -1
- monarch/tools/config/__init__.py +31 -4
- monarch/tools/config/defaults.py +13 -3
- monarch/tools/config/environment.py +45 -0
- monarch/tools/config/workspace.py +165 -0
- monarch/tools/mesh_spec.py +2 -0
- monarch/utils/__init__.py +9 -0
- monarch/utils/utils.py +78 -0
- tests/error_test_binary.py +5 -3
- tests/python_actor_test_binary.py +52 -0
- tests/test_actor_error.py +142 -14
- tests/test_alloc.py +1 -1
- tests/test_allocator.py +59 -72
- tests/test_debugger.py +639 -45
- tests/test_env_before_cuda.py +4 -4
- tests/test_mesh_trait.py +38 -0
- tests/test_python_actors.py +965 -75
- tests/test_rdma.py +7 -6
- tests/test_tensor_engine.py +6 -6
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.3.dist-info}/METADATA +82 -4
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.3.dist-info}/RECORD +63 -47
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.3.dist-info}/WHEEL +0 -0
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.3.dist-info}/entry_points.txt +0 -0
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.3.dist-info}/licenses/LICENSE +0 -0
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.3.dist-info}/top_level.txt +0 -0
monarch/_src/actor/actor_mesh.py
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
|
7
7
|
# pyre-unsafe
|
8
8
|
|
9
|
+
import abc
|
9
10
|
import collections
|
10
11
|
import contextvars
|
11
12
|
import functools
|
@@ -14,17 +15,18 @@ import itertools
|
|
14
15
|
import logging
|
15
16
|
import random
|
16
17
|
import traceback
|
18
|
+
from abc import abstractmethod, abstractproperty
|
17
19
|
|
18
20
|
from dataclasses import dataclass
|
19
|
-
from
|
21
|
+
from pprint import pformat
|
22
|
+
from textwrap import indent
|
23
|
+
from traceback import TracebackException
|
20
24
|
from typing import (
|
21
25
|
Any,
|
22
|
-
AsyncGenerator,
|
23
26
|
Awaitable,
|
24
27
|
Callable,
|
25
28
|
cast,
|
26
29
|
Concatenate,
|
27
|
-
Coroutine,
|
28
30
|
Dict,
|
29
31
|
Generator,
|
30
32
|
Generic,
|
@@ -32,7 +34,6 @@ from typing import (
|
|
32
34
|
Iterator,
|
33
35
|
List,
|
34
36
|
Literal,
|
35
|
-
NamedTuple,
|
36
37
|
Optional,
|
37
38
|
overload,
|
38
39
|
ParamSpec,
|
@@ -48,24 +49,25 @@ from monarch._rust_bindings.monarch_hyperactor.actor import (
|
|
48
49
|
PythonMessage,
|
49
50
|
PythonMessageKind,
|
50
51
|
)
|
51
|
-
from monarch._rust_bindings.monarch_hyperactor.actor_mesh import
|
52
|
+
from monarch._rust_bindings.monarch_hyperactor.actor_mesh import (
|
53
|
+
PythonActorMesh,
|
54
|
+
PythonActorMeshImpl,
|
55
|
+
)
|
52
56
|
from monarch._rust_bindings.monarch_hyperactor.mailbox import (
|
53
57
|
Mailbox,
|
54
|
-
OncePortReceiver,
|
58
|
+
OncePortReceiver as HyOncePortReceiver, # noqa: F401
|
55
59
|
OncePortRef,
|
56
|
-
PortReceiver as HyPortReceiver,
|
60
|
+
PortReceiver as HyPortReceiver, # noqa: F401
|
57
61
|
PortRef,
|
62
|
+
UndeliverableMessageEnvelope,
|
58
63
|
)
|
59
|
-
|
60
|
-
if TYPE_CHECKING:
|
61
|
-
from monarch._rust_bindings.monarch_hyperactor.actor import PortProtocol
|
62
|
-
from monarch._rust_bindings.monarch_hyperactor.mailbox import PortReceiverBase
|
63
|
-
|
64
64
|
from monarch._rust_bindings.monarch_hyperactor.proc import ActorId
|
65
|
+
from monarch._rust_bindings.monarch_hyperactor.pytokio import PythonTask, Shared
|
66
|
+
from monarch._rust_bindings.monarch_hyperactor.selection import Selection as HySelection
|
65
67
|
from monarch._rust_bindings.monarch_hyperactor.shape import Point as HyPoint, Shape
|
66
68
|
from monarch._rust_bindings.monarch_hyperactor.supervision import SupervisionError
|
67
|
-
from monarch._rust_bindings.monarch_hyperactor.telemetry import enter_span, exit_span
|
68
69
|
from monarch._src.actor.allocator import LocalAllocator, ProcessAllocator
|
70
|
+
from monarch._src.actor.debugger.pdb_wrapper import PdbWrapper
|
69
71
|
from monarch._src.actor.endpoint import (
|
70
72
|
Endpoint,
|
71
73
|
EndpointProperty,
|
@@ -74,21 +76,28 @@ from monarch._src.actor.endpoint import (
|
|
74
76
|
Propagator,
|
75
77
|
Selection,
|
76
78
|
)
|
77
|
-
from monarch._src.actor.future import Future
|
78
|
-
from monarch._src.actor.pdb_wrapper import PdbWrapper
|
79
|
-
|
79
|
+
from monarch._src.actor.future import DeprecatedNotAFuture, Future
|
80
80
|
from monarch._src.actor.pickle import flatten, unflatten
|
81
|
-
|
81
|
+
from monarch._src.actor.python_extension_methods import rust_struct
|
82
82
|
from monarch._src.actor.shape import MeshTrait, NDSlice
|
83
83
|
from monarch._src.actor.sync_state import fake_sync_state
|
84
|
-
|
84
|
+
from monarch._src.actor.telemetry import METER
|
85
85
|
from monarch._src.actor.tensor_engine_shim import actor_rref, actor_send
|
86
|
+
from typing_extensions import Self
|
86
87
|
|
87
88
|
if TYPE_CHECKING:
|
88
|
-
from monarch.
|
89
|
+
from monarch._rust_bindings.monarch_hyperactor.actor import PortProtocol
|
90
|
+
from monarch._rust_bindings.monarch_hyperactor.actor_mesh import ActorMeshProtocol
|
91
|
+
from monarch._rust_bindings.monarch_hyperactor.mailbox import PortReceiverBase
|
92
|
+
from monarch._src.actor.proc_mesh import _ControllerController, ProcMesh
|
93
|
+
from monarch._src.actor.telemetry import get_monarch_tracer
|
94
|
+
|
95
|
+
CallMethod = PythonMessageKind.CallMethod
|
89
96
|
|
90
97
|
logger: logging.Logger = logging.getLogger(__name__)
|
91
98
|
|
99
|
+
TRACER = get_monarch_tracer()
|
100
|
+
|
92
101
|
Allocator = ProcessAllocator | LocalAllocator
|
93
102
|
|
94
103
|
try:
|
@@ -106,22 +115,103 @@ class Point(HyPoint, collections.abc.Mapping):
|
|
106
115
|
pass
|
107
116
|
|
108
117
|
|
109
|
-
@
|
110
|
-
class
|
111
|
-
|
112
|
-
|
113
|
-
|
118
|
+
@rust_struct("monarch_hyperactor::mailbox::Instance")
|
119
|
+
class Instance(abc.ABC):
|
120
|
+
@abstractproperty
|
121
|
+
def _mailbox(self) -> Mailbox:
|
122
|
+
"""
|
123
|
+
This can be removed once we fix all the uses of mailbox to just use context instead.
|
124
|
+
"""
|
125
|
+
...
|
126
|
+
|
127
|
+
@property
|
128
|
+
def proc_id(self) -> str:
|
129
|
+
"""
|
130
|
+
The proc_id of the current actor.
|
131
|
+
"""
|
132
|
+
return self.actor_id.proc_id
|
133
|
+
|
134
|
+
@abstractproperty
|
135
|
+
def actor_id(self) -> ActorId:
|
136
|
+
"""
|
137
|
+
The actor_id of the current actor.
|
138
|
+
"""
|
139
|
+
...
|
140
|
+
|
141
|
+
@property
|
142
|
+
def proc(self) -> "ProcMesh":
|
143
|
+
"""
|
144
|
+
The singleton proc mesh that corresponds to just this actor.
|
145
|
+
"""
|
146
|
+
|
147
|
+
return self.proc_mesh.slice(**self.rank)
|
148
|
+
|
149
|
+
"""
|
150
|
+
Every actor is spawned over some mesh of processes. This identifies the point in that mesh where
|
151
|
+
the current actor was spawned. In other words, it is the `monarch.current_rank()` of
|
152
|
+
The actors __init__ message.
|
153
|
+
"""
|
154
|
+
rank: Point
|
155
|
+
proc_mesh: "ProcMesh"
|
156
|
+
_controller_controller: "_ControllerController"
|
157
|
+
|
158
|
+
# this property is used to hold the handles to actors and processes launched by this actor
|
159
|
+
# in order to keep them alive until this actor exits.
|
160
|
+
_children: "Optional[List[ActorMesh | ProcMesh]]"
|
161
|
+
|
162
|
+
def _add_child(self, child: "ActorMesh | ProcMesh") -> None:
|
163
|
+
if self._children is None:
|
164
|
+
self._children = [child]
|
165
|
+
else:
|
166
|
+
self._children.append(child)
|
167
|
+
|
168
|
+
|
169
|
+
@rust_struct("monarch_hyperactor::mailbox::Context")
|
170
|
+
class Context:
|
171
|
+
@property
|
172
|
+
def actor_instance(self) -> Instance:
|
173
|
+
"""
|
174
|
+
Information about the actor currently running in this context.
|
175
|
+
"""
|
176
|
+
...
|
177
|
+
|
178
|
+
@property
|
179
|
+
def message_rank(self) -> Point:
|
180
|
+
"""
|
181
|
+
Every message is sent as some broadcast of messages. This call identifies the
|
182
|
+
point in this space where the current actor is participating.
|
183
|
+
|
184
|
+
This is not the same self.actor_instance.rank: if the message was sent to some slice of
|
185
|
+
actors this identifies where the actor appears in the slice and not the identity of the actor.
|
186
|
+
|
187
|
+
These Point objects always exist. For singletons it will have 0 dimensions.
|
188
|
+
"""
|
189
|
+
...
|
114
190
|
|
115
191
|
@staticmethod
|
116
|
-
def
|
117
|
-
return _context.get()
|
192
|
+
def _root_client_context() -> "Context": ...
|
118
193
|
|
119
194
|
|
120
|
-
_context: contextvars.ContextVar[
|
195
|
+
_context: contextvars.ContextVar[Context] = contextvars.ContextVar(
|
121
196
|
"monarch.actor_mesh._context"
|
122
197
|
)
|
123
198
|
|
124
199
|
|
200
|
+
def context() -> Context:
|
201
|
+
c = _context.get(None)
|
202
|
+
if c is None:
|
203
|
+
c = Context._root_client_context()
|
204
|
+
_context.set(c)
|
205
|
+
from monarch._src.actor.host_mesh import create_local_host_mesh
|
206
|
+
from monarch._src.actor.proc_mesh import _get_controller_controller
|
207
|
+
|
208
|
+
c.actor_instance.proc_mesh, c.actor_instance._controller_controller = (
|
209
|
+
_get_controller_controller()
|
210
|
+
)
|
211
|
+
c.actor_instance.proc_mesh._host_mesh = create_local_host_mesh()
|
212
|
+
return c
|
213
|
+
|
214
|
+
|
125
215
|
@dataclass
|
126
216
|
class DebugContext:
|
127
217
|
pdb_wrapper: Optional[PdbWrapper] = None
|
@@ -149,6 +239,37 @@ A = TypeVar("A")
|
|
149
239
|
_load_balancing_seed = random.Random(4)
|
150
240
|
|
151
241
|
|
242
|
+
class _SingletonActorAdapator:
|
243
|
+
def __init__(self, inner: ActorId, shape: Optional[Shape] = None) -> None:
|
244
|
+
self._inner: ActorId = inner
|
245
|
+
if shape is None:
|
246
|
+
shape = singleton_shape
|
247
|
+
self._shape = shape
|
248
|
+
|
249
|
+
def cast(
|
250
|
+
self,
|
251
|
+
message: PythonMessage,
|
252
|
+
selection: str,
|
253
|
+
mailbox: Mailbox,
|
254
|
+
) -> None:
|
255
|
+
mailbox.post(self._inner, message)
|
256
|
+
|
257
|
+
def new_with_shape(self, shape: Shape) -> "ActorMeshProtocol":
|
258
|
+
return _SingletonActorAdapator(self._inner, self._shape)
|
259
|
+
|
260
|
+
def supervision_event(self) -> "Optional[Shared[Exception]]":
|
261
|
+
return None
|
262
|
+
|
263
|
+
def stop(self) -> "PythonTask[None]":
|
264
|
+
raise NotImplementedError("stop()")
|
265
|
+
|
266
|
+
def initialized(self) -> "PythonTask[None]":
|
267
|
+
async def empty():
|
268
|
+
pass
|
269
|
+
|
270
|
+
return PythonTask.from_coroutine(empty())
|
271
|
+
|
272
|
+
|
152
273
|
# standin class for whatever is the serializable python object we use
|
153
274
|
# to name an actor mesh. Hacked up today because ActorMesh
|
154
275
|
# isn't plumbed to non-clients
|
@@ -156,7 +277,7 @@ class _ActorMeshRefImpl:
|
|
156
277
|
def __init__(
|
157
278
|
self,
|
158
279
|
mailbox: Mailbox,
|
159
|
-
hy_actor_mesh: Optional[
|
280
|
+
hy_actor_mesh: Optional[PythonActorMeshImpl],
|
160
281
|
proc_mesh: "Optional[ProcMesh]",
|
161
282
|
shape: Shape,
|
162
283
|
actor_ids: List[ActorId],
|
@@ -171,29 +292,19 @@ class _ActorMeshRefImpl:
|
|
171
292
|
|
172
293
|
@staticmethod
|
173
294
|
def from_hyperactor_mesh(
|
174
|
-
mailbox: Mailbox,
|
295
|
+
mailbox: Mailbox,
|
296
|
+
shape: Shape,
|
297
|
+
hy_actor_mesh: PythonActorMeshImpl,
|
298
|
+
proc_mesh: "ProcMesh",
|
175
299
|
) -> "_ActorMeshRefImpl":
|
176
|
-
shape: Shape = hy_actor_mesh.shape
|
177
300
|
return _ActorMeshRefImpl(
|
178
301
|
mailbox,
|
179
302
|
hy_actor_mesh,
|
180
303
|
proc_mesh,
|
181
|
-
|
304
|
+
shape,
|
182
305
|
[cast(ActorId, hy_actor_mesh.get(i)) for i in range(len(shape))],
|
183
306
|
)
|
184
307
|
|
185
|
-
@staticmethod
|
186
|
-
def from_actor_id(mailbox: Mailbox, actor_id: ActorId) -> "_ActorMeshRefImpl":
|
187
|
-
return _ActorMeshRefImpl(mailbox, None, None, singleton_shape, [actor_id])
|
188
|
-
|
189
|
-
@staticmethod
|
190
|
-
def from_actor_ref_with_shape(
|
191
|
-
ref: "_ActorMeshRefImpl", shape: Shape
|
192
|
-
) -> "_ActorMeshRefImpl":
|
193
|
-
return _ActorMeshRefImpl(
|
194
|
-
ref._mailbox, None, None, shape, ref._please_replace_me_actor_ids
|
195
|
-
)
|
196
|
-
|
197
308
|
def __getstate__(
|
198
309
|
self,
|
199
310
|
) -> Tuple[Shape, List[ActorId], Mailbox]:
|
@@ -214,22 +325,19 @@ class _ActorMeshRefImpl:
|
|
214
325
|
if self._actor_mesh is not None:
|
215
326
|
if self._actor_mesh.stopped:
|
216
327
|
raise SupervisionError(
|
217
|
-
"actor mesh is
|
328
|
+
"actor mesh is unhealthy with reason: actor mesh is stopped due to proc mesh shutdown. "
|
329
|
+
"`PythonActorMesh` has already been stopped."
|
218
330
|
)
|
219
331
|
|
220
332
|
event = self._actor_mesh.get_supervision_event()
|
221
333
|
if event is not None:
|
222
|
-
raise SupervisionError(f"actor mesh is
|
223
|
-
|
224
|
-
def send(self, rank: int, message: PythonMessage) -> None:
|
225
|
-
self._check_state()
|
226
|
-
actor = self._please_replace_me_actor_ids[rank]
|
227
|
-
self._mailbox.post(actor, message)
|
334
|
+
raise SupervisionError(f"actor mesh is unhealthy with reason: {event}")
|
228
335
|
|
229
336
|
def cast(
|
230
337
|
self,
|
231
338
|
message: PythonMessage,
|
232
|
-
selection:
|
339
|
+
selection: str,
|
340
|
+
mailbox: Mailbox,
|
233
341
|
) -> None:
|
234
342
|
self._check_state()
|
235
343
|
|
@@ -279,14 +387,44 @@ class _ActorMeshRefImpl:
|
|
279
387
|
actor_id0 = self._please_replace_me_actor_ids[0]
|
280
388
|
return actor_id0.actor_name, actor_id0.pid
|
281
389
|
|
282
|
-
|
283
|
-
|
390
|
+
@property
|
391
|
+
def shape(self) -> Shape:
|
392
|
+
return self._shape
|
393
|
+
|
394
|
+
@property
|
395
|
+
def proc_mesh(self) -> Optional["ProcMesh"]:
|
396
|
+
return self._proc_mesh
|
397
|
+
|
398
|
+
def new_with_shape(self, shape: Shape) -> "_ActorMeshRefImpl":
|
399
|
+
return _ActorMeshRefImpl(
|
400
|
+
self._mailbox, None, None, shape, self._please_replace_me_actor_ids
|
401
|
+
)
|
402
|
+
|
403
|
+
def supervision_event(self) -> "Optional[Shared[Exception]]":
|
404
|
+
if self._actor_mesh is None:
|
405
|
+
return None
|
406
|
+
return self._actor_mesh.supervision_event()
|
407
|
+
|
408
|
+
def stop(self) -> PythonTask[None]:
|
409
|
+
async def task():
|
410
|
+
if self._actor_mesh is not None:
|
411
|
+
self._actor_mesh.stop()
|
412
|
+
|
413
|
+
return PythonTask.from_coroutine(task())
|
414
|
+
|
415
|
+
def initialized(self) -> PythonTask[None]:
|
416
|
+
async def task():
|
417
|
+
pass
|
418
|
+
|
419
|
+
return PythonTask.from_coroutine(task())
|
284
420
|
|
285
421
|
|
286
422
|
class ActorEndpoint(Endpoint[P, R]):
|
287
423
|
def __init__(
|
288
424
|
self,
|
289
|
-
|
425
|
+
actor_mesh: "ActorMeshProtocol",
|
426
|
+
shape: Shape,
|
427
|
+
proc_mesh: "Optional[ProcMesh]",
|
290
428
|
name: MethodSpecifier,
|
291
429
|
impl: Callable[Concatenate[Any, P], Awaitable[R]],
|
292
430
|
mailbox: Mailbox,
|
@@ -294,16 +432,14 @@ class ActorEndpoint(Endpoint[P, R]):
|
|
294
432
|
explicit_response_port: bool,
|
295
433
|
) -> None:
|
296
434
|
super().__init__(propagator)
|
297
|
-
self._actor_mesh =
|
435
|
+
self._actor_mesh = actor_mesh
|
298
436
|
self._name = name
|
437
|
+
self._shape = shape
|
438
|
+
self._proc_mesh = proc_mesh
|
299
439
|
self._signature: inspect.Signature = inspect.signature(impl)
|
300
440
|
self._mailbox = mailbox
|
301
441
|
self._explicit_response_port = explicit_response_port
|
302
442
|
|
303
|
-
def _supervise(self, r: HyPortReceiver | OncePortReceiver) -> Any:
|
304
|
-
mesh = self._actor_mesh._actor_mesh
|
305
|
-
return r if mesh is None else mesh.supervise(r)
|
306
|
-
|
307
443
|
def _call_name(self) -> Any:
|
308
444
|
return self._name
|
309
445
|
|
@@ -334,19 +470,17 @@ class ActorEndpoint(Endpoint[P, R]):
|
|
334
470
|
),
|
335
471
|
bytes,
|
336
472
|
)
|
337
|
-
self._actor_mesh.cast(message, selection)
|
473
|
+
self._actor_mesh.cast(message, selection, self._mailbox)
|
338
474
|
else:
|
339
475
|
actor_send(self, bytes, objects, port, selection)
|
340
|
-
shape = self.
|
476
|
+
shape = self._shape
|
341
477
|
return Extent(shape.labels, shape.ndslice.sizes)
|
342
478
|
|
343
|
-
def _port(self, once: bool = False) -> "
|
344
|
-
p, r =
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
), "unexpected receiver type"
|
349
|
-
return PortTuple(p, PortReceiver(self._mailbox, self._supervise(r._receiver)))
|
479
|
+
def _port(self, once: bool = False) -> "Tuple[Port[R], PortReceiver[R]]":
|
480
|
+
p, r = super()._port(once=once)
|
481
|
+
monitor: Optional[Shared[Exception]] = self._actor_mesh.supervision_event()
|
482
|
+
r._set_monitor(monitor)
|
483
|
+
return (p, r)
|
350
484
|
|
351
485
|
def _rref(self, args, kwargs):
|
352
486
|
self._check_arguments(args, kwargs)
|
@@ -386,11 +520,9 @@ def as_endpoint(
|
|
386
520
|
if explicit_response_port
|
387
521
|
else MethodSpecifier.ReturnsResponse
|
388
522
|
)
|
389
|
-
return
|
390
|
-
not_an_endpoint._ref._actor_mesh_ref,
|
523
|
+
return not_an_endpoint._ref._endpoint(
|
391
524
|
kind(not_an_endpoint._name),
|
392
525
|
getattr(not_an_endpoint._ref, not_an_endpoint._name),
|
393
|
-
not_an_endpoint._ref._mailbox,
|
394
526
|
propagate,
|
395
527
|
explicit_response_port,
|
396
528
|
)
|
@@ -405,9 +537,7 @@ class Accumulator(Generic[P, R, A]):
|
|
405
537
|
self._combine: Callable[[A, R], A] = combine
|
406
538
|
|
407
539
|
def accumulate(self, *args: P.args, **kwargs: P.kwargs) -> "Future[A]":
|
408
|
-
gen: Generator[
|
409
|
-
*args, **kwargs
|
410
|
-
)
|
540
|
+
gen: Generator[Future[R], None, None] = self._endpoint.stream(*args, **kwargs)
|
411
541
|
|
412
542
|
async def impl() -> A:
|
413
543
|
value = self._identity
|
@@ -438,17 +568,16 @@ class ValueMesh(MeshTrait, Generic[R]):
|
|
438
568
|
return self._values[self._ndslice.nditem(coordinates)]
|
439
569
|
|
440
570
|
def items(self) -> Iterable[Tuple[Point, R]]:
|
441
|
-
|
442
|
-
|
571
|
+
extent = self._shape.extent
|
572
|
+
for i, rank in enumerate(self._shape.ranks()):
|
573
|
+
yield Point(i, extent), self._values[rank]
|
443
574
|
|
444
575
|
def __iter__(self) -> Iterator[Tuple[Point, R]]:
|
445
576
|
return iter(self.items())
|
446
577
|
|
447
|
-
def __len__(self) -> int:
|
448
|
-
return len(self._shape)
|
449
|
-
|
450
578
|
def __repr__(self) -> str:
|
451
|
-
|
579
|
+
body = indent(pformat(tuple(self.items())), " ")
|
580
|
+
return f"ValueMesh({self._shape.extent}):\n{body}"
|
452
581
|
|
453
582
|
@property
|
454
583
|
def _ndslice(self) -> NDSlice:
|
@@ -522,49 +651,25 @@ R = TypeVar("R")
|
|
522
651
|
|
523
652
|
T = TypeVar("T")
|
524
653
|
|
525
|
-
if TYPE_CHECKING:
|
526
|
-
# Python <= 3.10 cannot inherit from Generic[R] and NamedTuple at the same time.
|
527
|
-
# we only need it for type checking though, so copypasta it until 3.11.
|
528
|
-
class PortTuple(NamedTuple, Generic[R]):
|
529
|
-
sender: "Port[R]"
|
530
|
-
receiver: "PortReceiver[R]"
|
531
|
-
|
532
|
-
@staticmethod
|
533
|
-
def create(mailbox: Mailbox, once: bool = False) -> "PortTuple[Any]":
|
534
|
-
handle, receiver = mailbox.open_once_port() if once else mailbox.open_port()
|
535
|
-
port_ref = handle.bind()
|
536
|
-
return PortTuple(
|
537
|
-
Port(port_ref, mailbox, rank=None),
|
538
|
-
PortReceiver(mailbox, receiver),
|
539
|
-
)
|
540
|
-
else:
|
541
|
-
|
542
|
-
class PortTuple(NamedTuple):
|
543
|
-
sender: "Port[Any]"
|
544
|
-
receiver: "PortReceiver[Any]"
|
545
|
-
|
546
|
-
@staticmethod
|
547
|
-
def create(mailbox: Mailbox, once: bool = False) -> "PortTuple[Any]":
|
548
|
-
handle, receiver = mailbox.open_once_port() if once else mailbox.open_port()
|
549
|
-
port_ref = handle.bind()
|
550
|
-
return PortTuple(
|
551
|
-
Port(port_ref, mailbox, rank=None),
|
552
|
-
PortReceiver(mailbox, receiver),
|
553
|
-
)
|
554
|
-
|
555
654
|
|
556
655
|
# advance lower-level API for sending messages. This is intentially
|
557
656
|
# not part of the Endpoint API because they way it accepts arguments
|
558
657
|
# and handles concerns is different.
|
559
|
-
|
560
|
-
|
561
|
-
|
658
|
+
class Channel(Generic[R]):
|
659
|
+
@staticmethod
|
660
|
+
def open(once: bool = False) -> Tuple["Port[R]", "PortReceiver[R]"]:
|
661
|
+
mailbox = context().actor_instance._mailbox
|
662
|
+
handle, receiver = mailbox.open_once_port() if once else mailbox.open_port()
|
663
|
+
port_ref = handle.bind()
|
664
|
+
return (
|
665
|
+
Port(port_ref, mailbox, rank=None),
|
666
|
+
PortReceiver(mailbox, receiver),
|
667
|
+
)
|
562
668
|
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
return p, RankedPortReceiver[R](receiver._mailbox, receiver._receiver)
|
669
|
+
@staticmethod
|
670
|
+
def open_ranked(once: bool = False) -> Tuple["Port[R]", "RankedPortReceiver[R]"]:
|
671
|
+
send, recv = Channel[R].open()
|
672
|
+
return (send, recv.ranked())
|
568
673
|
|
569
674
|
|
570
675
|
class PortReceiver(Generic[R]):
|
@@ -572,12 +677,22 @@ class PortReceiver(Generic[R]):
|
|
572
677
|
self,
|
573
678
|
mailbox: Mailbox,
|
574
679
|
receiver: "PortReceiverBase",
|
680
|
+
monitor: "Optional[Shared[Exception]]" = None,
|
575
681
|
) -> None:
|
576
682
|
self._mailbox: Mailbox = mailbox
|
683
|
+
self._monitor = monitor
|
577
684
|
self._receiver = receiver
|
578
685
|
|
579
686
|
async def _recv(self) -> R:
|
580
|
-
|
687
|
+
awaitable = self._receiver.recv_task()
|
688
|
+
if self._monitor is None:
|
689
|
+
result = await awaitable
|
690
|
+
else:
|
691
|
+
# type: ignore
|
692
|
+
result, i = await PythonTask.select_one([self._monitor.task(), awaitable])
|
693
|
+
if i == 0:
|
694
|
+
raise result
|
695
|
+
return self._process(result)
|
581
696
|
|
582
697
|
def _process(self, msg: PythonMessage) -> R:
|
583
698
|
# TODO: Try to do something more structured than a cast here
|
@@ -593,6 +708,12 @@ class PortReceiver(Generic[R]):
|
|
593
708
|
def recv(self) -> "Future[R]":
|
594
709
|
return Future(coro=self._recv())
|
595
710
|
|
711
|
+
def ranked(self) -> "RankedPortReceiver[R]":
|
712
|
+
return RankedPortReceiver[R](self._mailbox, self._receiver, self._monitor)
|
713
|
+
|
714
|
+
def _set_monitor(self, monitor: "Optional[Shared[Exception]]"):
|
715
|
+
self._monitor = monitor
|
716
|
+
|
596
717
|
|
597
718
|
class RankedPortReceiver(PortReceiver[Tuple[int, R]]):
|
598
719
|
def _process(self, msg: PythonMessage) -> Tuple[int, R]:
|
@@ -614,6 +735,8 @@ singleton_shape = Shape([], NDSlice(offset=0, sizes=[], strides=[]))
|
|
614
735
|
# we need to signal to the consumer of the PythonTask object that the thread really isn't in an async context.
|
615
736
|
# We do this by blanking out the running event loop during the call to the synchronous actor function.
|
616
737
|
|
738
|
+
MESSAGES_HANDLED = METER.create_counter("py_mesages_handled")
|
739
|
+
|
617
740
|
|
618
741
|
class _Actor:
|
619
742
|
"""
|
@@ -637,30 +760,28 @@ class _Actor:
|
|
637
760
|
|
638
761
|
async def handle(
|
639
762
|
self,
|
640
|
-
|
641
|
-
|
642
|
-
shape: Shape,
|
643
|
-
method_spec: MethodSpecifier,
|
763
|
+
ctx: Context,
|
764
|
+
method: MethodSpecifier,
|
644
765
|
message: bytes,
|
645
766
|
panic_flag: PanicFlag,
|
646
767
|
local_state: Iterable[Any],
|
647
|
-
|
768
|
+
response_port: "PortProtocol[Any]",
|
648
769
|
) -> None:
|
770
|
+
MESSAGES_HANDLED.add(1)
|
649
771
|
# response_port can be None. If so, then sending to port will drop the response,
|
650
772
|
# and raise any exceptions to the caller.
|
651
773
|
try:
|
652
|
-
ctx: MonarchContext = MonarchContext(
|
653
|
-
mailbox, mailbox.actor_id.proc_id, Point(rank, shape)
|
654
|
-
)
|
655
774
|
_context.set(ctx)
|
656
775
|
|
657
776
|
DebugContext.set(DebugContext())
|
658
777
|
|
659
778
|
args, kwargs = unflatten(message, local_state)
|
660
779
|
|
661
|
-
match
|
780
|
+
match method:
|
662
781
|
case MethodSpecifier.Init():
|
663
|
-
|
782
|
+
ins = ctx.actor_instance
|
783
|
+
Class, ins.proc_mesh, ins._controller_controller, *args = args
|
784
|
+
ins.rank = ctx.message_rank
|
664
785
|
try:
|
665
786
|
self.instance = Class(*args, **kwargs)
|
666
787
|
except Exception as e:
|
@@ -668,13 +789,13 @@ class _Actor:
|
|
668
789
|
e, f"Remote actor {Class}.__init__ call failed."
|
669
790
|
)
|
670
791
|
raise e
|
671
|
-
|
792
|
+
response_port.send(None)
|
672
793
|
return None
|
673
|
-
case MethodSpecifier.ReturnsResponse(name=
|
794
|
+
case MethodSpecifier.ReturnsResponse(name=method_name):
|
674
795
|
pass
|
675
|
-
case MethodSpecifier.ExplicitPort(name=
|
676
|
-
args = (
|
677
|
-
|
796
|
+
case MethodSpecifier.ExplicitPort(name=method_name):
|
797
|
+
args = (response_port, *args)
|
798
|
+
response_port = DroppingPort()
|
678
799
|
|
679
800
|
if self.instance is None:
|
680
801
|
# This could happen because of the following reasons. Both
|
@@ -687,52 +808,50 @@ class _Actor:
|
|
687
808
|
# should never happen. It indicates either a bug in the
|
688
809
|
# message delivery mechanism, or the framework accidentally
|
689
810
|
# mixed the usage of cast and direct send.
|
690
|
-
|
811
|
+
|
812
|
+
error_message = f"Actor object is missing when executing method {method_name} on actor {ctx.actor_instance.actor_id}."
|
691
813
|
if self._saved_error is not None:
|
692
814
|
error_message += (
|
693
815
|
f" This is likely due to an earlier error: {self._saved_error}"
|
694
816
|
)
|
695
817
|
raise AssertionError(error_message)
|
696
|
-
|
818
|
+
|
819
|
+
the_method = getattr(self.instance, method_name)
|
697
820
|
if isinstance(the_method, EndpointProperty):
|
698
|
-
module = the_method._method.__module__
|
699
821
|
the_method = functools.partial(the_method._method, self.instance)
|
700
|
-
else:
|
701
|
-
module = the_method.__module__
|
702
822
|
|
703
823
|
if inspect.iscoroutinefunction(the_method):
|
704
824
|
|
705
825
|
async def instrumented():
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
raise e
|
720
|
-
exit_span()
|
826
|
+
with TRACER.start_as_current_span(
|
827
|
+
method_name,
|
828
|
+
attributes={"actor_id": str(ctx.actor_instance.actor_id)},
|
829
|
+
):
|
830
|
+
try:
|
831
|
+
result = await the_method(*args, **kwargs)
|
832
|
+
self._maybe_exit_debugger()
|
833
|
+
except Exception as e:
|
834
|
+
logging.critical(
|
835
|
+
"Unhandled exception in actor endpoint",
|
836
|
+
exc_info=e,
|
837
|
+
)
|
838
|
+
raise e
|
721
839
|
return result
|
722
840
|
|
723
841
|
result = await instrumented()
|
724
842
|
else:
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
843
|
+
with TRACER.start_as_current_span(
|
844
|
+
method_name,
|
845
|
+
attributes={"actor_id": str(ctx.actor_instance.actor_id)},
|
846
|
+
):
|
847
|
+
with fake_sync_state():
|
848
|
+
result = the_method(*args, **kwargs)
|
849
|
+
self._maybe_exit_debugger()
|
850
|
+
|
851
|
+
response_port.send(result)
|
732
852
|
except Exception as e:
|
733
853
|
self._post_mortem_debug(e.__traceback__)
|
734
|
-
|
735
|
-
port.exception(ActorError(e))
|
854
|
+
response_port.exception(ActorError(e))
|
736
855
|
except BaseException as e:
|
737
856
|
self._post_mortem_debug(e.__traceback__)
|
738
857
|
# A BaseException can be thrown in the case of a Rust panic.
|
@@ -754,21 +873,33 @@ class _Actor:
|
|
754
873
|
DebugContext.set(DebugContext())
|
755
874
|
|
756
875
|
def _post_mortem_debug(self, exc_tb) -> None:
|
757
|
-
from monarch._src.actor.debugger import
|
876
|
+
from monarch._src.actor.debugger.debugger import debug_controller
|
758
877
|
|
759
878
|
if (pdb_wrapper := DebugContext.get().pdb_wrapper) is not None:
|
760
879
|
with fake_sync_state():
|
761
|
-
ctx =
|
880
|
+
ctx = context()
|
881
|
+
msg_rank = ctx.message_rank
|
762
882
|
pdb_wrapper = PdbWrapper(
|
763
|
-
|
764
|
-
|
765
|
-
ctx.
|
766
|
-
|
883
|
+
msg_rank.rank,
|
884
|
+
{k: msg_rank[k] for k in msg_rank},
|
885
|
+
ctx.actor_instance.actor_id,
|
886
|
+
debug_controller(),
|
767
887
|
)
|
768
888
|
DebugContext.set(DebugContext(pdb_wrapper))
|
769
889
|
pdb_wrapper.post_mortem(exc_tb)
|
770
890
|
self._maybe_exit_debugger(do_continue=False)
|
771
891
|
|
892
|
+
def _handle_undeliverable_message(
|
893
|
+
self, message: UndeliverableMessageEnvelope
|
894
|
+
) -> bool:
|
895
|
+
handle_undeliverable = getattr(
|
896
|
+
self.instance, "_handle_undeliverable_message", None
|
897
|
+
)
|
898
|
+
if handle_undeliverable is not None:
|
899
|
+
return handle_undeliverable(message)
|
900
|
+
else:
|
901
|
+
return False
|
902
|
+
|
772
903
|
|
773
904
|
def _is_mailbox(x: object) -> bool:
|
774
905
|
if hasattr(x, "__monarch_ref__"):
|
@@ -787,7 +918,7 @@ def _pickle(obj: object) -> bytes:
|
|
787
918
|
return msg
|
788
919
|
|
789
920
|
|
790
|
-
class Actor(MeshTrait):
|
921
|
+
class Actor(MeshTrait, DeprecatedNotAFuture):
|
791
922
|
@functools.cached_property
|
792
923
|
def logger(cls) -> logging.Logger:
|
793
924
|
lgr = logging.getLogger(cls.__class__.__name__)
|
@@ -806,20 +937,39 @@ class Actor(MeshTrait):
|
|
806
937
|
"actor implementations are not meshes, but we can't convince the typechecker of it..."
|
807
938
|
)
|
808
939
|
|
809
|
-
def _new_with_shape(self, shape: Shape) ->
|
940
|
+
def _new_with_shape(self, shape: Shape) -> Self:
|
810
941
|
raise NotImplementedError(
|
811
942
|
"actor implementations are not meshes, but we can't convince the typechecker of it..."
|
812
943
|
)
|
813
944
|
|
945
|
+
@property
|
946
|
+
def initialized(self):
|
947
|
+
raise NotImplementedError(
|
948
|
+
"actor implementations are not meshes, but we can't convince the typechecker of it..."
|
949
|
+
)
|
950
|
+
|
951
|
+
def _handle_undeliverable_message(
|
952
|
+
self, message: UndeliverableMessageEnvelope
|
953
|
+
) -> bool:
|
954
|
+
# Return False to indicate that the undeliverable message was not handled.
|
955
|
+
return False
|
814
956
|
|
815
|
-
|
957
|
+
|
958
|
+
class ActorMesh(MeshTrait, Generic[T], DeprecatedNotAFuture):
|
816
959
|
def __init__(
|
817
|
-
self,
|
960
|
+
self,
|
961
|
+
Class: Type[T],
|
962
|
+
inner: "ActorMeshProtocol",
|
963
|
+
mailbox: Mailbox,
|
964
|
+
shape: Shape,
|
965
|
+
proc_mesh: "Optional[ProcMesh]",
|
818
966
|
) -> None:
|
819
967
|
self.__name__: str = Class.__name__
|
820
968
|
self._class: Type[T] = Class
|
821
|
-
self.
|
969
|
+
self._inner: "ActorMeshProtocol" = inner
|
822
970
|
self._mailbox: Mailbox = mailbox
|
971
|
+
self._shape = shape
|
972
|
+
self._proc_mesh = proc_mesh
|
823
973
|
for attr_name in dir(self._class):
|
824
974
|
attr_value = getattr(self._class, attr_name, None)
|
825
975
|
if isinstance(attr_value, EndpointProperty):
|
@@ -832,11 +982,9 @@ class ActorMeshRef(MeshTrait):
|
|
832
982
|
setattr(
|
833
983
|
self,
|
834
984
|
attr_name,
|
835
|
-
|
836
|
-
self._actor_mesh_ref,
|
985
|
+
self._endpoint(
|
837
986
|
kind(attr_name),
|
838
987
|
attr_value._method,
|
839
|
-
self._mailbox,
|
840
988
|
attr_value._propagator,
|
841
989
|
attr_value._explicit_response_port,
|
842
990
|
),
|
@@ -847,53 +995,95 @@ class ActorMeshRef(MeshTrait):
|
|
847
995
|
return NotAnEndpoint(self, attr)
|
848
996
|
raise AttributeError(attr)
|
849
997
|
|
850
|
-
def
|
998
|
+
def _endpoint(
|
851
999
|
self,
|
852
|
-
|
853
|
-
|
854
|
-
|
1000
|
+
name: MethodSpecifier,
|
1001
|
+
impl: Callable[Concatenate[Any, P], Awaitable[R]],
|
1002
|
+
propagator: Any,
|
1003
|
+
explicit_response_port: bool,
|
1004
|
+
):
|
1005
|
+
return ActorEndpoint(
|
1006
|
+
self._inner,
|
1007
|
+
self._shape,
|
1008
|
+
self._proc_mesh,
|
1009
|
+
name,
|
1010
|
+
impl,
|
1011
|
+
self._mailbox,
|
1012
|
+
propagator,
|
1013
|
+
explicit_response_port,
|
1014
|
+
)
|
1015
|
+
|
1016
|
+
@classmethod
|
1017
|
+
def _create(
|
1018
|
+
cls,
|
1019
|
+
Class: Type[T],
|
1020
|
+
actor_mesh: "PythonActorMesh | PythonActorMeshImpl",
|
1021
|
+
mailbox: Mailbox,
|
1022
|
+
shape: Shape,
|
1023
|
+
proc_mesh: "ProcMesh",
|
1024
|
+
controller_controller: Optional["_ControllerController"],
|
1025
|
+
# args and kwargs are passed to the __init__ method of the user defined
|
1026
|
+
# python actor object.
|
1027
|
+
*args: Any,
|
1028
|
+
**kwargs: Any,
|
1029
|
+
) -> "ActorMesh[T]":
|
1030
|
+
if isinstance(actor_mesh, PythonActorMeshImpl):
|
1031
|
+
actor_mesh = _ActorMeshRefImpl.from_hyperactor_mesh(
|
1032
|
+
mailbox, shape, actor_mesh, proc_mesh
|
1033
|
+
)
|
1034
|
+
|
1035
|
+
mesh = cls(Class, actor_mesh, mailbox, shape, proc_mesh)
|
1036
|
+
|
855
1037
|
async def null_func(*_args: Iterable[Any], **_kwargs: Dict[str, Any]) -> None:
|
856
1038
|
return None
|
857
1039
|
|
858
|
-
|
859
|
-
|
1040
|
+
# send __init__ message to the mesh to initialize the user defined
|
1041
|
+
# python actor object.
|
1042
|
+
ep = mesh._endpoint(
|
860
1043
|
MethodSpecifier.Init(),
|
861
1044
|
null_func,
|
862
|
-
self._mailbox,
|
863
1045
|
None,
|
864
1046
|
False,
|
865
1047
|
)
|
866
|
-
send(ep, (
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
1048
|
+
send(ep, (mesh._class, proc_mesh, controller_controller, *args), kwargs)
|
1049
|
+
|
1050
|
+
return mesh
|
1051
|
+
|
1052
|
+
@classmethod
|
1053
|
+
def from_actor_id(
|
1054
|
+
cls,
|
1055
|
+
Class: Type[T],
|
1056
|
+
actor_id: ActorId,
|
1057
|
+
mailbox: Mailbox,
|
1058
|
+
) -> "ActorMesh[T]":
|
1059
|
+
return cls(
|
1060
|
+
Class, _SingletonActorAdapator(actor_id), mailbox, singleton_shape, None
|
875
1061
|
)
|
876
1062
|
|
1063
|
+
def __reduce_ex__(self, protocol: ...) -> "Tuple[Type[ActorMesh], Tuple[Any, ...]]":
|
1064
|
+
return ActorMesh, (self._class, self._inner, self._mailbox, self._shape, None)
|
1065
|
+
|
877
1066
|
@property
|
878
1067
|
def _ndslice(self) -> NDSlice:
|
879
|
-
return self.
|
1068
|
+
return self._shape.ndslice
|
880
1069
|
|
881
1070
|
@property
|
882
1071
|
def _labels(self) -> Iterable[str]:
|
883
|
-
return self.
|
1072
|
+
return self._shape.labels
|
884
1073
|
|
885
|
-
def _new_with_shape(self, shape: Shape) -> "
|
886
|
-
|
887
|
-
|
888
|
-
_ActorMeshRefImpl.from_actor_ref_with_shape(self._actor_mesh_ref, shape),
|
889
|
-
self._mailbox,
|
890
|
-
)
|
1074
|
+
def _new_with_shape(self, shape: Shape) -> "ActorMesh[T]":
|
1075
|
+
sliced = self._inner.new_with_shape(shape)
|
1076
|
+
return ActorMesh(self._class, sliced, self._mailbox, shape, self._proc_mesh)
|
891
1077
|
|
892
1078
|
def __repr__(self) -> str:
|
893
|
-
return f"
|
1079
|
+
return f"ActorMesh(class={self._class}, shape={self._shape}), inner={type(self._inner)})"
|
1080
|
+
|
1081
|
+
def stop(self) -> "Future[None]":
|
1082
|
+
return Future(coro=self._inner.stop())
|
894
1083
|
|
895
|
-
|
896
|
-
|
1084
|
+
@property
|
1085
|
+
def initialized(self) -> Future[None]:
|
1086
|
+
return Future(coro=self._inner.initialized())
|
897
1087
|
|
898
1088
|
|
899
1089
|
class ActorError(Exception):
|
@@ -909,27 +1099,35 @@ class ActorError(Exception):
|
|
909
1099
|
message: str = "A remote actor call has failed.",
|
910
1100
|
) -> None:
|
911
1101
|
self.exception = exception
|
912
|
-
|
1102
|
+
# Need to stringify the exception early, because the PyPI package
|
1103
|
+
# exceptiongroup may monkeypatch the "TracebackException" class for python
|
1104
|
+
# versions < 3.11. If it gets unpickled in a different scope without
|
1105
|
+
# using that monkeypatch, it'll have an exception in "format()".
|
1106
|
+
# Store the traceback string instead which shouldn't change between machines.
|
1107
|
+
actor_mesh_ref_tb = TracebackException.from_exception(exception).format()
|
1108
|
+
# Replace any traceback lines to indicate it's a remote call traceback.
|
1109
|
+
actor_mesh_ref_tb = (
|
1110
|
+
s.replace(
|
1111
|
+
"Traceback (most recent call last):",
|
1112
|
+
"Traceback of where the remote call failed (most recent call last):",
|
1113
|
+
)
|
1114
|
+
for s in actor_mesh_ref_tb
|
1115
|
+
)
|
1116
|
+
self.exception_formatted = "".join(actor_mesh_ref_tb)
|
913
1117
|
self.message = message
|
914
1118
|
|
915
1119
|
def __str__(self) -> str:
|
916
|
-
|
917
|
-
actor_mesh_ref_tb = "".join(traceback.format_list(self.actor_mesh_ref_frames))
|
918
|
-
return (
|
919
|
-
f"{self.message}\n"
|
920
|
-
f"Traceback of where the remote call failed (most recent call last):\n{actor_mesh_ref_tb}{type(self.exception).__name__}: {exe}"
|
921
|
-
)
|
1120
|
+
return f"{self.message}\n {self.exception_formatted}"
|
922
1121
|
|
923
1122
|
|
924
1123
|
def current_actor_name() -> str:
|
925
|
-
return str(
|
1124
|
+
return str(context().actor_instance.actor_id)
|
926
1125
|
|
927
1126
|
|
928
1127
|
def current_rank() -> Point:
|
929
|
-
|
930
|
-
return ctx.point
|
1128
|
+
return context().message_rank
|
931
1129
|
|
932
1130
|
|
933
1131
|
def current_size() -> Dict[str, int]:
|
934
|
-
|
935
|
-
return
|
1132
|
+
r = context().message_rank.extent
|
1133
|
+
return {k: r[k] for k in r}
|