torchmonarch-nightly 2025.8.2__cp313-cp313-manylinux2014_x86_64.whl → 2025.9.4__cp313-cp313-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- monarch/_rust_bindings.so +0 -0
- monarch/_src/actor/actor_mesh.py +504 -218
- monarch/_src/actor/allocator.py +75 -6
- monarch/_src/actor/bootstrap_main.py +7 -4
- monarch/_src/actor/code_sync/__init__.py +2 -0
- monarch/_src/actor/debugger/__init__.py +7 -0
- monarch/_src/actor/{debugger.py → debugger/debugger.py} +246 -135
- monarch/_src/actor/{pdb_wrapper.py → debugger/pdb_wrapper.py} +62 -23
- monarch/_src/actor/endpoint.py +27 -45
- monarch/_src/actor/future.py +86 -24
- monarch/_src/actor/host_mesh.py +125 -0
- monarch/_src/actor/logging.py +94 -0
- monarch/_src/actor/pickle.py +25 -0
- monarch/_src/actor/proc_mesh.py +423 -156
- monarch/_src/actor/python_extension_methods.py +90 -0
- monarch/_src/actor/shape.py +8 -1
- monarch/_src/actor/source_loader.py +45 -0
- monarch/_src/actor/telemetry/__init__.py +172 -0
- monarch/_src/actor/telemetry/rust_span_tracing.py +6 -39
- monarch/_src/debug_cli/__init__.py +7 -0
- monarch/_src/debug_cli/debug_cli.py +43 -0
- monarch/_src/tensor_engine/rdma.py +64 -9
- monarch/_testing.py +1 -3
- monarch/actor/__init__.py +28 -4
- monarch/common/_C.so +0 -0
- monarch/common/device_mesh.py +14 -0
- monarch/common/future.py +10 -0
- monarch/common/remote.py +14 -25
- monarch/common/tensor.py +12 -0
- monarch/debug_cli/__init__.py +7 -0
- monarch/debug_cli/__main__.py +12 -0
- monarch/fetch.py +2 -2
- monarch/gradient/_gradient_generator.so +0 -0
- monarch/gradient_generator.py +4 -2
- monarch/mesh_controller.py +34 -14
- monarch/monarch_controller +0 -0
- monarch/tools/colors.py +25 -0
- monarch/tools/commands.py +42 -7
- monarch/tools/components/hyperactor.py +6 -4
- monarch/tools/config/__init__.py +35 -12
- monarch/tools/config/defaults.py +15 -5
- monarch/tools/config/environment.py +45 -0
- monarch/tools/config/workspace.py +165 -0
- monarch/tools/mesh_spec.py +3 -3
- monarch/utils/__init__.py +9 -0
- monarch/utils/utils.py +78 -0
- tests/error_test_binary.py +5 -3
- tests/python_actor_test_binary.py +52 -0
- tests/test_actor_error.py +142 -14
- tests/test_alloc.py +1 -1
- tests/test_allocator.py +59 -72
- tests/test_debugger.py +639 -45
- tests/test_env_before_cuda.py +4 -4
- tests/test_mesh_trait.py +38 -0
- tests/test_python_actors.py +965 -75
- tests/test_rdma.py +7 -6
- tests/test_tensor_engine.py +6 -6
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.4.dist-info}/METADATA +82 -4
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.4.dist-info}/RECORD +63 -47
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.4.dist-info}/WHEEL +0 -0
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.4.dist-info}/entry_points.txt +0 -0
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.4.dist-info}/licenses/LICENSE +0 -0
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.4.dist-info}/top_level.txt +0 -0
monarch/_src/actor/actor_mesh.py
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
|
7
7
|
# pyre-unsafe
|
8
8
|
|
9
|
+
import abc
|
9
10
|
import collections
|
10
11
|
import contextvars
|
11
12
|
import functools
|
@@ -14,17 +15,18 @@ import itertools
|
|
14
15
|
import logging
|
15
16
|
import random
|
16
17
|
import traceback
|
18
|
+
from abc import abstractmethod, abstractproperty
|
17
19
|
|
18
20
|
from dataclasses import dataclass
|
19
|
-
from
|
21
|
+
from pprint import pformat
|
22
|
+
from textwrap import indent
|
23
|
+
from traceback import TracebackException
|
20
24
|
from typing import (
|
21
25
|
Any,
|
22
|
-
AsyncGenerator,
|
23
26
|
Awaitable,
|
24
27
|
Callable,
|
25
28
|
cast,
|
26
29
|
Concatenate,
|
27
|
-
Coroutine,
|
28
30
|
Dict,
|
29
31
|
Generator,
|
30
32
|
Generic,
|
@@ -32,7 +34,6 @@ from typing import (
|
|
32
34
|
Iterator,
|
33
35
|
List,
|
34
36
|
Literal,
|
35
|
-
NamedTuple,
|
36
37
|
Optional,
|
37
38
|
overload,
|
38
39
|
ParamSpec,
|
@@ -48,24 +49,25 @@ from monarch._rust_bindings.monarch_hyperactor.actor import (
|
|
48
49
|
PythonMessage,
|
49
50
|
PythonMessageKind,
|
50
51
|
)
|
51
|
-
from monarch._rust_bindings.monarch_hyperactor.actor_mesh import
|
52
|
+
from monarch._rust_bindings.monarch_hyperactor.actor_mesh import (
|
53
|
+
PythonActorMesh,
|
54
|
+
PythonActorMeshImpl,
|
55
|
+
)
|
52
56
|
from monarch._rust_bindings.monarch_hyperactor.mailbox import (
|
53
57
|
Mailbox,
|
54
|
-
OncePortReceiver,
|
58
|
+
OncePortReceiver as HyOncePortReceiver, # noqa: F401
|
55
59
|
OncePortRef,
|
56
|
-
PortReceiver as HyPortReceiver,
|
60
|
+
PortReceiver as HyPortReceiver, # noqa: F401
|
57
61
|
PortRef,
|
62
|
+
UndeliverableMessageEnvelope,
|
58
63
|
)
|
59
|
-
|
60
|
-
if TYPE_CHECKING:
|
61
|
-
from monarch._rust_bindings.monarch_hyperactor.actor import PortProtocol
|
62
|
-
from monarch._rust_bindings.monarch_hyperactor.mailbox import PortReceiverBase
|
63
|
-
|
64
64
|
from monarch._rust_bindings.monarch_hyperactor.proc import ActorId
|
65
|
+
from monarch._rust_bindings.monarch_hyperactor.pytokio import PythonTask, Shared
|
66
|
+
from monarch._rust_bindings.monarch_hyperactor.selection import Selection as HySelection
|
65
67
|
from monarch._rust_bindings.monarch_hyperactor.shape import Point as HyPoint, Shape
|
66
68
|
from monarch._rust_bindings.monarch_hyperactor.supervision import SupervisionError
|
67
|
-
from monarch._rust_bindings.monarch_hyperactor.telemetry import enter_span, exit_span
|
68
69
|
from monarch._src.actor.allocator import LocalAllocator, ProcessAllocator
|
70
|
+
from monarch._src.actor.debugger.pdb_wrapper import PdbWrapper
|
69
71
|
from monarch._src.actor.endpoint import (
|
70
72
|
Endpoint,
|
71
73
|
EndpointProperty,
|
@@ -74,21 +76,28 @@ from monarch._src.actor.endpoint import (
|
|
74
76
|
Propagator,
|
75
77
|
Selection,
|
76
78
|
)
|
77
|
-
from monarch._src.actor.future import Future
|
78
|
-
from monarch._src.actor.pdb_wrapper import PdbWrapper
|
79
|
-
|
79
|
+
from monarch._src.actor.future import DeprecatedNotAFuture, Future
|
80
80
|
from monarch._src.actor.pickle import flatten, unflatten
|
81
|
-
|
81
|
+
from monarch._src.actor.python_extension_methods import rust_struct
|
82
82
|
from monarch._src.actor.shape import MeshTrait, NDSlice
|
83
83
|
from monarch._src.actor.sync_state import fake_sync_state
|
84
|
-
|
84
|
+
from monarch._src.actor.telemetry import METER
|
85
85
|
from monarch._src.actor.tensor_engine_shim import actor_rref, actor_send
|
86
|
+
from typing_extensions import Self
|
86
87
|
|
87
88
|
if TYPE_CHECKING:
|
88
|
-
from monarch.
|
89
|
+
from monarch._rust_bindings.monarch_hyperactor.actor import PortProtocol
|
90
|
+
from monarch._rust_bindings.monarch_hyperactor.actor_mesh import ActorMeshProtocol
|
91
|
+
from monarch._rust_bindings.monarch_hyperactor.mailbox import PortReceiverBase
|
92
|
+
from monarch._src.actor.proc_mesh import _ControllerController, ProcMesh
|
93
|
+
from monarch._src.actor.telemetry import get_monarch_tracer
|
94
|
+
|
95
|
+
CallMethod = PythonMessageKind.CallMethod
|
89
96
|
|
90
97
|
logger: logging.Logger = logging.getLogger(__name__)
|
91
98
|
|
99
|
+
TRACER = get_monarch_tracer()
|
100
|
+
|
92
101
|
Allocator = ProcessAllocator | LocalAllocator
|
93
102
|
|
94
103
|
try:
|
@@ -106,22 +115,103 @@ class Point(HyPoint, collections.abc.Mapping):
|
|
106
115
|
pass
|
107
116
|
|
108
117
|
|
109
|
-
@
|
110
|
-
class
|
111
|
-
|
112
|
-
|
113
|
-
|
118
|
+
@rust_struct("monarch_hyperactor::mailbox::Instance")
|
119
|
+
class Instance(abc.ABC):
|
120
|
+
@abstractproperty
|
121
|
+
def _mailbox(self) -> Mailbox:
|
122
|
+
"""
|
123
|
+
This can be removed once we fix all the uses of mailbox to just use context instead.
|
124
|
+
"""
|
125
|
+
...
|
126
|
+
|
127
|
+
@property
|
128
|
+
def proc_id(self) -> str:
|
129
|
+
"""
|
130
|
+
The proc_id of the current actor.
|
131
|
+
"""
|
132
|
+
return self.actor_id.proc_id
|
133
|
+
|
134
|
+
@abstractproperty
|
135
|
+
def actor_id(self) -> ActorId:
|
136
|
+
"""
|
137
|
+
The actor_id of the current actor.
|
138
|
+
"""
|
139
|
+
...
|
140
|
+
|
141
|
+
@property
|
142
|
+
def proc(self) -> "ProcMesh":
|
143
|
+
"""
|
144
|
+
The singleton proc mesh that corresponds to just this actor.
|
145
|
+
"""
|
146
|
+
|
147
|
+
return self.proc_mesh.slice(**self.rank)
|
148
|
+
|
149
|
+
"""
|
150
|
+
Every actor is spawned over some mesh of processes. This identifies the point in that mesh where
|
151
|
+
the current actor was spawned. In other words, it is the `monarch.current_rank()` of
|
152
|
+
The actors __init__ message.
|
153
|
+
"""
|
154
|
+
rank: Point
|
155
|
+
proc_mesh: "ProcMesh"
|
156
|
+
_controller_controller: "_ControllerController"
|
157
|
+
|
158
|
+
# this property is used to hold the handles to actors and processes launched by this actor
|
159
|
+
# in order to keep them alive until this actor exits.
|
160
|
+
_children: "Optional[List[ActorMesh | ProcMesh]]"
|
161
|
+
|
162
|
+
def _add_child(self, child: "ActorMesh | ProcMesh") -> None:
|
163
|
+
if self._children is None:
|
164
|
+
self._children = [child]
|
165
|
+
else:
|
166
|
+
self._children.append(child)
|
167
|
+
|
168
|
+
|
169
|
+
@rust_struct("monarch_hyperactor::mailbox::Context")
|
170
|
+
class Context:
|
171
|
+
@property
|
172
|
+
def actor_instance(self) -> Instance:
|
173
|
+
"""
|
174
|
+
Information about the actor currently running in this context.
|
175
|
+
"""
|
176
|
+
...
|
177
|
+
|
178
|
+
@property
|
179
|
+
def message_rank(self) -> Point:
|
180
|
+
"""
|
181
|
+
Every message is sent as some broadcast of messages. This call identifies the
|
182
|
+
point in this space where the current actor is participating.
|
183
|
+
|
184
|
+
This is not the same self.actor_instance.rank: if the message was sent to some slice of
|
185
|
+
actors this identifies where the actor appears in the slice and not the identity of the actor.
|
186
|
+
|
187
|
+
These Point objects always exist. For singletons it will have 0 dimensions.
|
188
|
+
"""
|
189
|
+
...
|
114
190
|
|
115
191
|
@staticmethod
|
116
|
-
def
|
117
|
-
return _context.get()
|
192
|
+
def _root_client_context() -> "Context": ...
|
118
193
|
|
119
194
|
|
120
|
-
_context: contextvars.ContextVar[
|
195
|
+
_context: contextvars.ContextVar[Context] = contextvars.ContextVar(
|
121
196
|
"monarch.actor_mesh._context"
|
122
197
|
)
|
123
198
|
|
124
199
|
|
200
|
+
def context() -> Context:
|
201
|
+
c = _context.get(None)
|
202
|
+
if c is None:
|
203
|
+
c = Context._root_client_context()
|
204
|
+
_context.set(c)
|
205
|
+
from monarch._src.actor.host_mesh import create_local_host_mesh
|
206
|
+
from monarch._src.actor.proc_mesh import _get_controller_controller
|
207
|
+
|
208
|
+
c.actor_instance.proc_mesh, c.actor_instance._controller_controller = (
|
209
|
+
_get_controller_controller()
|
210
|
+
)
|
211
|
+
c.actor_instance.proc_mesh._host_mesh = create_local_host_mesh()
|
212
|
+
return c
|
213
|
+
|
214
|
+
|
125
215
|
@dataclass
|
126
216
|
class DebugContext:
|
127
217
|
pdb_wrapper: Optional[PdbWrapper] = None
|
@@ -149,6 +239,37 @@ A = TypeVar("A")
|
|
149
239
|
_load_balancing_seed = random.Random(4)
|
150
240
|
|
151
241
|
|
242
|
+
class _SingletonActorAdapator:
|
243
|
+
def __init__(self, inner: ActorId, shape: Optional[Shape] = None) -> None:
|
244
|
+
self._inner: ActorId = inner
|
245
|
+
if shape is None:
|
246
|
+
shape = singleton_shape
|
247
|
+
self._shape = shape
|
248
|
+
|
249
|
+
def cast(
|
250
|
+
self,
|
251
|
+
message: PythonMessage,
|
252
|
+
selection: str,
|
253
|
+
mailbox: Mailbox,
|
254
|
+
) -> None:
|
255
|
+
mailbox.post(self._inner, message)
|
256
|
+
|
257
|
+
def new_with_shape(self, shape: Shape) -> "ActorMeshProtocol":
|
258
|
+
return _SingletonActorAdapator(self._inner, self._shape)
|
259
|
+
|
260
|
+
def supervision_event(self) -> "Optional[Shared[Exception]]":
|
261
|
+
return None
|
262
|
+
|
263
|
+
def stop(self) -> "PythonTask[None]":
|
264
|
+
raise NotImplementedError("stop()")
|
265
|
+
|
266
|
+
def initialized(self) -> "PythonTask[None]":
|
267
|
+
async def empty():
|
268
|
+
pass
|
269
|
+
|
270
|
+
return PythonTask.from_coroutine(empty())
|
271
|
+
|
272
|
+
|
152
273
|
# standin class for whatever is the serializable python object we use
|
153
274
|
# to name an actor mesh. Hacked up today because ActorMesh
|
154
275
|
# isn't plumbed to non-clients
|
@@ -156,7 +277,7 @@ class _ActorMeshRefImpl:
|
|
156
277
|
def __init__(
|
157
278
|
self,
|
158
279
|
mailbox: Mailbox,
|
159
|
-
hy_actor_mesh: Optional[
|
280
|
+
hy_actor_mesh: Optional[PythonActorMeshImpl],
|
160
281
|
proc_mesh: "Optional[ProcMesh]",
|
161
282
|
shape: Shape,
|
162
283
|
actor_ids: List[ActorId],
|
@@ -171,29 +292,19 @@ class _ActorMeshRefImpl:
|
|
171
292
|
|
172
293
|
@staticmethod
|
173
294
|
def from_hyperactor_mesh(
|
174
|
-
mailbox: Mailbox,
|
295
|
+
mailbox: Mailbox,
|
296
|
+
shape: Shape,
|
297
|
+
hy_actor_mesh: PythonActorMeshImpl,
|
298
|
+
proc_mesh: "ProcMesh",
|
175
299
|
) -> "_ActorMeshRefImpl":
|
176
|
-
shape: Shape = hy_actor_mesh.shape
|
177
300
|
return _ActorMeshRefImpl(
|
178
301
|
mailbox,
|
179
302
|
hy_actor_mesh,
|
180
303
|
proc_mesh,
|
181
|
-
|
304
|
+
shape,
|
182
305
|
[cast(ActorId, hy_actor_mesh.get(i)) for i in range(len(shape))],
|
183
306
|
)
|
184
307
|
|
185
|
-
@staticmethod
|
186
|
-
def from_actor_id(mailbox: Mailbox, actor_id: ActorId) -> "_ActorMeshRefImpl":
|
187
|
-
return _ActorMeshRefImpl(mailbox, None, None, singleton_shape, [actor_id])
|
188
|
-
|
189
|
-
@staticmethod
|
190
|
-
def from_actor_ref_with_shape(
|
191
|
-
ref: "_ActorMeshRefImpl", shape: Shape
|
192
|
-
) -> "_ActorMeshRefImpl":
|
193
|
-
return _ActorMeshRefImpl(
|
194
|
-
ref._mailbox, None, None, shape, ref._please_replace_me_actor_ids
|
195
|
-
)
|
196
|
-
|
197
308
|
def __getstate__(
|
198
309
|
self,
|
199
310
|
) -> Tuple[Shape, List[ActorId], Mailbox]:
|
@@ -214,22 +325,19 @@ class _ActorMeshRefImpl:
|
|
214
325
|
if self._actor_mesh is not None:
|
215
326
|
if self._actor_mesh.stopped:
|
216
327
|
raise SupervisionError(
|
217
|
-
"actor mesh is
|
328
|
+
"actor mesh is unhealthy with reason: actor mesh is stopped due to proc mesh shutdown. "
|
329
|
+
"`PythonActorMesh` has already been stopped."
|
218
330
|
)
|
219
331
|
|
220
332
|
event = self._actor_mesh.get_supervision_event()
|
221
333
|
if event is not None:
|
222
|
-
raise SupervisionError(f"actor mesh is
|
223
|
-
|
224
|
-
def send(self, rank: int, message: PythonMessage) -> None:
|
225
|
-
self._check_state()
|
226
|
-
actor = self._please_replace_me_actor_ids[rank]
|
227
|
-
self._mailbox.post(actor, message)
|
334
|
+
raise SupervisionError(f"actor mesh is unhealthy with reason: {event}")
|
228
335
|
|
229
336
|
def cast(
|
230
337
|
self,
|
231
338
|
message: PythonMessage,
|
232
|
-
selection:
|
339
|
+
selection: str,
|
340
|
+
mailbox: Mailbox,
|
233
341
|
) -> None:
|
234
342
|
self._check_state()
|
235
343
|
|
@@ -279,14 +387,44 @@ class _ActorMeshRefImpl:
|
|
279
387
|
actor_id0 = self._please_replace_me_actor_ids[0]
|
280
388
|
return actor_id0.actor_name, actor_id0.pid
|
281
389
|
|
282
|
-
|
283
|
-
|
390
|
+
@property
|
391
|
+
def shape(self) -> Shape:
|
392
|
+
return self._shape
|
393
|
+
|
394
|
+
@property
|
395
|
+
def proc_mesh(self) -> Optional["ProcMesh"]:
|
396
|
+
return self._proc_mesh
|
397
|
+
|
398
|
+
def new_with_shape(self, shape: Shape) -> "_ActorMeshRefImpl":
|
399
|
+
return _ActorMeshRefImpl(
|
400
|
+
self._mailbox, None, None, shape, self._please_replace_me_actor_ids
|
401
|
+
)
|
402
|
+
|
403
|
+
def supervision_event(self) -> "Optional[Shared[Exception]]":
|
404
|
+
if self._actor_mesh is None:
|
405
|
+
return None
|
406
|
+
return self._actor_mesh.supervision_event()
|
407
|
+
|
408
|
+
def stop(self) -> PythonTask[None]:
|
409
|
+
async def task():
|
410
|
+
if self._actor_mesh is not None:
|
411
|
+
self._actor_mesh.stop()
|
412
|
+
|
413
|
+
return PythonTask.from_coroutine(task())
|
414
|
+
|
415
|
+
def initialized(self) -> PythonTask[None]:
|
416
|
+
async def task():
|
417
|
+
pass
|
418
|
+
|
419
|
+
return PythonTask.from_coroutine(task())
|
284
420
|
|
285
421
|
|
286
422
|
class ActorEndpoint(Endpoint[P, R]):
|
287
423
|
def __init__(
|
288
424
|
self,
|
289
|
-
|
425
|
+
actor_mesh: "ActorMeshProtocol",
|
426
|
+
shape: Shape,
|
427
|
+
proc_mesh: "Optional[ProcMesh]",
|
290
428
|
name: MethodSpecifier,
|
291
429
|
impl: Callable[Concatenate[Any, P], Awaitable[R]],
|
292
430
|
mailbox: Mailbox,
|
@@ -294,16 +432,14 @@ class ActorEndpoint(Endpoint[P, R]):
|
|
294
432
|
explicit_response_port: bool,
|
295
433
|
) -> None:
|
296
434
|
super().__init__(propagator)
|
297
|
-
self._actor_mesh =
|
435
|
+
self._actor_mesh = actor_mesh
|
298
436
|
self._name = name
|
437
|
+
self._shape = shape
|
438
|
+
self._proc_mesh = proc_mesh
|
299
439
|
self._signature: inspect.Signature = inspect.signature(impl)
|
300
440
|
self._mailbox = mailbox
|
301
441
|
self._explicit_response_port = explicit_response_port
|
302
442
|
|
303
|
-
def _supervise(self, r: HyPortReceiver | OncePortReceiver) -> Any:
|
304
|
-
mesh = self._actor_mesh._actor_mesh
|
305
|
-
return r if mesh is None else mesh.supervise(r)
|
306
|
-
|
307
443
|
def _call_name(self) -> Any:
|
308
444
|
return self._name
|
309
445
|
|
@@ -334,19 +470,17 @@ class ActorEndpoint(Endpoint[P, R]):
|
|
334
470
|
),
|
335
471
|
bytes,
|
336
472
|
)
|
337
|
-
self._actor_mesh.cast(message, selection)
|
473
|
+
self._actor_mesh.cast(message, selection, self._mailbox)
|
338
474
|
else:
|
339
475
|
actor_send(self, bytes, objects, port, selection)
|
340
|
-
shape = self.
|
476
|
+
shape = self._shape
|
341
477
|
return Extent(shape.labels, shape.ndslice.sizes)
|
342
478
|
|
343
|
-
def _port(self, once: bool = False) -> "
|
344
|
-
p, r =
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
), "unexpected receiver type"
|
349
|
-
return PortTuple(p, PortReceiver(self._mailbox, self._supervise(r._receiver)))
|
479
|
+
def _port(self, once: bool = False) -> "Tuple[Port[R], PortReceiver[R]]":
|
480
|
+
p, r = super()._port(once=once)
|
481
|
+
monitor: Optional[Shared[Exception]] = self._actor_mesh.supervision_event()
|
482
|
+
r._set_monitor(monitor)
|
483
|
+
return (p, r)
|
350
484
|
|
351
485
|
def _rref(self, args, kwargs):
|
352
486
|
self._check_arguments(args, kwargs)
|
@@ -386,28 +520,48 @@ def as_endpoint(
|
|
386
520
|
if explicit_response_port
|
387
521
|
else MethodSpecifier.ReturnsResponse
|
388
522
|
)
|
389
|
-
return
|
390
|
-
not_an_endpoint._ref._actor_mesh_ref,
|
523
|
+
return not_an_endpoint._ref._endpoint(
|
391
524
|
kind(not_an_endpoint._name),
|
392
525
|
getattr(not_an_endpoint._ref, not_an_endpoint._name),
|
393
|
-
not_an_endpoint._ref._mailbox,
|
394
526
|
propagate,
|
395
527
|
explicit_response_port,
|
396
528
|
)
|
397
529
|
|
398
530
|
|
399
531
|
class Accumulator(Generic[P, R, A]):
|
532
|
+
"""
|
533
|
+
Accumulate the result of a broadcast invocation of an endpoint
|
534
|
+
across a sliced mesh.
|
535
|
+
|
536
|
+
Usage:
|
537
|
+
>>> counter = Accumulator(Actor.increment, 0, lambda x, y: x + y)
|
538
|
+
"""
|
539
|
+
|
400
540
|
def __init__(
|
401
541
|
self, endpoint: Endpoint[P, R], identity: A, combine: Callable[[A, R], A]
|
402
542
|
) -> None:
|
543
|
+
"""
|
544
|
+
Args:
|
545
|
+
endpoint: Endpoint to accumulate the result of.
|
546
|
+
identity: Initial value of the accumulated value before the first combine invocation.
|
547
|
+
combine: Lambda invoked for combining the result of the endpoint with the accumulated value.
|
548
|
+
"""
|
403
549
|
self._endpoint: Endpoint[P, R] = endpoint
|
404
550
|
self._identity: A = identity
|
405
551
|
self._combine: Callable[[A, R], A] = combine
|
406
552
|
|
407
553
|
def accumulate(self, *args: P.args, **kwargs: P.kwargs) -> "Future[A]":
|
408
|
-
|
409
|
-
|
410
|
-
|
554
|
+
"""
|
555
|
+
Accumulate the result of the endpoint invocation.
|
556
|
+
|
557
|
+
Args:
|
558
|
+
args: Arguments to pass to the endpoint.
|
559
|
+
kwargs: Keyword arguments to pass to the endpoint.
|
560
|
+
|
561
|
+
Returns:
|
562
|
+
Future that resolves to the accumulated value.
|
563
|
+
"""
|
564
|
+
gen: Generator[Future[R], None, None] = self._endpoint.stream(*args, **kwargs)
|
411
565
|
|
412
566
|
async def impl() -> A:
|
413
567
|
value = self._identity
|
@@ -420,7 +574,7 @@ class Accumulator(Generic[P, R, A]):
|
|
420
574
|
|
421
575
|
class ValueMesh(MeshTrait, Generic[R]):
|
422
576
|
"""
|
423
|
-
|
577
|
+
A mesh that holds the result of an endpoint invocation.
|
424
578
|
"""
|
425
579
|
|
426
580
|
def __init__(self, shape: Shape, values: List[R]) -> None:
|
@@ -431,6 +585,18 @@ class ValueMesh(MeshTrait, Generic[R]):
|
|
431
585
|
return ValueMesh(shape, self._values)
|
432
586
|
|
433
587
|
def item(self, **kwargs) -> R:
|
588
|
+
"""
|
589
|
+
Get the value at the given coordinates.
|
590
|
+
|
591
|
+
Args:
|
592
|
+
kwargs: Coordinates to get the value at.
|
593
|
+
|
594
|
+
Returns:
|
595
|
+
Value at the given coordinate.
|
596
|
+
|
597
|
+
Raises:
|
598
|
+
KeyError: If invalid coordinates are provided.
|
599
|
+
"""
|
434
600
|
coordinates = [kwargs.pop(label) for label in self._labels]
|
435
601
|
if kwargs:
|
436
602
|
raise KeyError(f"item has extra dimensions: {list(kwargs.keys())}")
|
@@ -438,17 +604,22 @@ class ValueMesh(MeshTrait, Generic[R]):
|
|
438
604
|
return self._values[self._ndslice.nditem(coordinates)]
|
439
605
|
|
440
606
|
def items(self) -> Iterable[Tuple[Point, R]]:
|
441
|
-
|
442
|
-
|
607
|
+
"""
|
608
|
+
Generator that returns values for the provided coordinates.
|
609
|
+
|
610
|
+
Returns:
|
611
|
+
Values at all coordinates.
|
612
|
+
"""
|
613
|
+
extent = self._shape.extent
|
614
|
+
for i, rank in enumerate(self._shape.ranks()):
|
615
|
+
yield Point(i, extent), self._values[rank]
|
443
616
|
|
444
617
|
def __iter__(self) -> Iterator[Tuple[Point, R]]:
|
445
618
|
return iter(self.items())
|
446
619
|
|
447
|
-
def __len__(self) -> int:
|
448
|
-
return len(self._shape)
|
449
|
-
|
450
620
|
def __repr__(self) -> str:
|
451
|
-
|
621
|
+
body = indent(pformat(tuple(self.items())), " ")
|
622
|
+
return f"ValueMesh({self._shape.extent}):\n{body}"
|
452
623
|
|
453
624
|
@property
|
454
625
|
def _ndslice(self) -> NDSlice:
|
@@ -467,14 +638,27 @@ def send(
|
|
467
638
|
selection: Selection = "all",
|
468
639
|
) -> None:
|
469
640
|
"""
|
470
|
-
|
641
|
+
Fire-and-forget broadcast invocation of the endpoint across a given selection of the mesh.
|
471
642
|
|
472
|
-
|
643
|
+
This sends the message to all actors but does not wait for any result. Use the port provided to
|
644
|
+
send the response back to the caller.
|
645
|
+
|
646
|
+
Args:
|
647
|
+
endpoint: Endpoint to invoke.
|
648
|
+
args: Arguments to pass to the endpoint.
|
649
|
+
kwargs: Keyword arguments to pass to the endpoint.
|
650
|
+
port: Handle to send the response to.
|
651
|
+
selection: Selection query representing a subset of the mesh.
|
473
652
|
"""
|
474
653
|
endpoint._send(args, kwargs, port, selection)
|
475
654
|
|
476
655
|
|
477
656
|
class Port(Generic[R]):
|
657
|
+
"""
|
658
|
+
Handle used to send reliable in-order messages through a channel to
|
659
|
+
a PortReceiver.
|
660
|
+
"""
|
661
|
+
|
478
662
|
def __init__(
|
479
663
|
self,
|
480
664
|
port_ref: PortRef | OncePortRef,
|
@@ -486,6 +670,13 @@ class Port(Generic[R]):
|
|
486
670
|
self._rank = rank
|
487
671
|
|
488
672
|
def send(self, obj: R) -> None:
|
673
|
+
"""
|
674
|
+
Fire-and-forget send R-typed objects in order
|
675
|
+
through a channel to its corresponding PortReceiver.
|
676
|
+
|
677
|
+
Args:
|
678
|
+
obj: R-typed object to send.
|
679
|
+
"""
|
489
680
|
self._port_ref.send(
|
490
681
|
self._mailbox,
|
491
682
|
PythonMessage(PythonMessageKind.Result(self._rank), _pickle(obj)),
|
@@ -522,62 +713,65 @@ R = TypeVar("R")
|
|
522
713
|
|
523
714
|
T = TypeVar("T")
|
524
715
|
|
525
|
-
if TYPE_CHECKING:
|
526
|
-
# Python <= 3.10 cannot inherit from Generic[R] and NamedTuple at the same time.
|
527
|
-
# we only need it for type checking though, so copypasta it until 3.11.
|
528
|
-
class PortTuple(NamedTuple, Generic[R]):
|
529
|
-
sender: "Port[R]"
|
530
|
-
receiver: "PortReceiver[R]"
|
531
|
-
|
532
|
-
@staticmethod
|
533
|
-
def create(mailbox: Mailbox, once: bool = False) -> "PortTuple[Any]":
|
534
|
-
handle, receiver = mailbox.open_once_port() if once else mailbox.open_port()
|
535
|
-
port_ref = handle.bind()
|
536
|
-
return PortTuple(
|
537
|
-
Port(port_ref, mailbox, rank=None),
|
538
|
-
PortReceiver(mailbox, receiver),
|
539
|
-
)
|
540
|
-
else:
|
541
|
-
|
542
|
-
class PortTuple(NamedTuple):
|
543
|
-
sender: "Port[Any]"
|
544
|
-
receiver: "PortReceiver[Any]"
|
545
|
-
|
546
|
-
@staticmethod
|
547
|
-
def create(mailbox: Mailbox, once: bool = False) -> "PortTuple[Any]":
|
548
|
-
handle, receiver = mailbox.open_once_port() if once else mailbox.open_port()
|
549
|
-
port_ref = handle.bind()
|
550
|
-
return PortTuple(
|
551
|
-
Port(port_ref, mailbox, rank=None),
|
552
|
-
PortReceiver(mailbox, receiver),
|
553
|
-
)
|
554
|
-
|
555
716
|
|
556
717
|
# advance lower-level API for sending messages. This is intentially
|
557
718
|
# not part of the Endpoint API because they way it accepts arguments
|
558
719
|
# and handles concerns is different.
|
559
|
-
|
560
|
-
|
720
|
+
class Channel(Generic[R]):
|
721
|
+
"""
|
722
|
+
An advanced low level API for a communication channel used for message passing
|
723
|
+
between actors.
|
561
724
|
|
725
|
+
Provides static methods to create communication channels with port pairs
|
726
|
+
for sending and receiving messages of type R.
|
727
|
+
"""
|
562
728
|
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
729
|
+
@staticmethod
|
730
|
+
def open(once: bool = False) -> Tuple["Port[R]", "PortReceiver[R]"]:
|
731
|
+
""" """
|
732
|
+
mailbox = context().actor_instance._mailbox
|
733
|
+
handle, receiver = mailbox.open_once_port() if once else mailbox.open_port()
|
734
|
+
port_ref = handle.bind()
|
735
|
+
return (
|
736
|
+
Port(port_ref, mailbox, rank=None),
|
737
|
+
PortReceiver(mailbox, receiver),
|
738
|
+
)
|
739
|
+
|
740
|
+
@staticmethod
|
741
|
+
def open_ranked(once: bool = False) -> Tuple["Port[R]", "RankedPortReceiver[R]"]:
|
742
|
+
send, recv = Channel[R].open()
|
743
|
+
return (send, recv.ranked())
|
568
744
|
|
569
745
|
|
570
746
|
class PortReceiver(Generic[R]):
|
747
|
+
"""
|
748
|
+
Receiver for messages sent through a communication channel.
|
749
|
+
|
750
|
+
Handles receiving R-typed objects sent from a corresponding Port.
|
751
|
+
Asynchronously message reception with optional supervision
|
752
|
+
monitoring for error handling.
|
753
|
+
"""
|
754
|
+
|
571
755
|
def __init__(
|
572
756
|
self,
|
573
757
|
mailbox: Mailbox,
|
574
758
|
receiver: "PortReceiverBase",
|
759
|
+
monitor: "Optional[Shared[Exception]]" = None,
|
575
760
|
) -> None:
|
576
761
|
self._mailbox: Mailbox = mailbox
|
762
|
+
self._monitor = monitor
|
577
763
|
self._receiver = receiver
|
578
764
|
|
579
765
|
async def _recv(self) -> R:
|
580
|
-
|
766
|
+
awaitable = self._receiver.recv_task()
|
767
|
+
if self._monitor is None:
|
768
|
+
result = await awaitable
|
769
|
+
else:
|
770
|
+
# type: ignore
|
771
|
+
result, i = await PythonTask.select_one([self._monitor.task(), awaitable])
|
772
|
+
if i == 0:
|
773
|
+
raise result
|
774
|
+
return self._process(result)
|
581
775
|
|
582
776
|
def _process(self, msg: PythonMessage) -> R:
|
583
777
|
# TODO: Try to do something more structured than a cast here
|
@@ -593,6 +787,12 @@ class PortReceiver(Generic[R]):
|
|
593
787
|
def recv(self) -> "Future[R]":
|
594
788
|
return Future(coro=self._recv())
|
595
789
|
|
790
|
+
def ranked(self) -> "RankedPortReceiver[R]":
|
791
|
+
return RankedPortReceiver[R](self._mailbox, self._receiver, self._monitor)
|
792
|
+
|
793
|
+
def _set_monitor(self, monitor: "Optional[Shared[Exception]]"):
|
794
|
+
self._monitor = monitor
|
795
|
+
|
596
796
|
|
597
797
|
class RankedPortReceiver(PortReceiver[Tuple[int, R]]):
|
598
798
|
def _process(self, msg: PythonMessage) -> Tuple[int, R]:
|
@@ -614,6 +814,8 @@ singleton_shape = Shape([], NDSlice(offset=0, sizes=[], strides=[]))
|
|
614
814
|
# we need to signal to the consumer of the PythonTask object that the thread really isn't in an async context.
|
615
815
|
# We do this by blanking out the running event loop during the call to the synchronous actor function.
|
616
816
|
|
817
|
+
MESSAGES_HANDLED = METER.create_counter("py_mesages_handled")
|
818
|
+
|
617
819
|
|
618
820
|
class _Actor:
|
619
821
|
"""
|
@@ -637,30 +839,28 @@ class _Actor:
|
|
637
839
|
|
638
840
|
async def handle(
|
639
841
|
self,
|
640
|
-
|
641
|
-
|
642
|
-
shape: Shape,
|
643
|
-
method_spec: MethodSpecifier,
|
842
|
+
ctx: Context,
|
843
|
+
method: MethodSpecifier,
|
644
844
|
message: bytes,
|
645
845
|
panic_flag: PanicFlag,
|
646
846
|
local_state: Iterable[Any],
|
647
|
-
|
847
|
+
response_port: "PortProtocol[Any]",
|
648
848
|
) -> None:
|
849
|
+
MESSAGES_HANDLED.add(1)
|
649
850
|
# response_port can be None. If so, then sending to port will drop the response,
|
650
851
|
# and raise any exceptions to the caller.
|
651
852
|
try:
|
652
|
-
ctx: MonarchContext = MonarchContext(
|
653
|
-
mailbox, mailbox.actor_id.proc_id, Point(rank, shape)
|
654
|
-
)
|
655
853
|
_context.set(ctx)
|
656
854
|
|
657
855
|
DebugContext.set(DebugContext())
|
658
856
|
|
659
857
|
args, kwargs = unflatten(message, local_state)
|
660
858
|
|
661
|
-
match
|
859
|
+
match method:
|
662
860
|
case MethodSpecifier.Init():
|
663
|
-
|
861
|
+
ins = ctx.actor_instance
|
862
|
+
Class, ins.proc_mesh, ins._controller_controller, *args = args
|
863
|
+
ins.rank = ctx.message_rank
|
664
864
|
try:
|
665
865
|
self.instance = Class(*args, **kwargs)
|
666
866
|
except Exception as e:
|
@@ -668,13 +868,13 @@ class _Actor:
|
|
668
868
|
e, f"Remote actor {Class}.__init__ call failed."
|
669
869
|
)
|
670
870
|
raise e
|
671
|
-
|
871
|
+
response_port.send(None)
|
672
872
|
return None
|
673
|
-
case MethodSpecifier.ReturnsResponse(name=
|
873
|
+
case MethodSpecifier.ReturnsResponse(name=method_name):
|
674
874
|
pass
|
675
|
-
case MethodSpecifier.ExplicitPort(name=
|
676
|
-
args = (
|
677
|
-
|
875
|
+
case MethodSpecifier.ExplicitPort(name=method_name):
|
876
|
+
args = (response_port, *args)
|
877
|
+
response_port = DroppingPort()
|
678
878
|
|
679
879
|
if self.instance is None:
|
680
880
|
# This could happen because of the following reasons. Both
|
@@ -687,52 +887,50 @@ class _Actor:
|
|
687
887
|
# should never happen. It indicates either a bug in the
|
688
888
|
# message delivery mechanism, or the framework accidentally
|
689
889
|
# mixed the usage of cast and direct send.
|
690
|
-
|
890
|
+
|
891
|
+
error_message = f"Actor object is missing when executing method {method_name} on actor {ctx.actor_instance.actor_id}."
|
691
892
|
if self._saved_error is not None:
|
692
893
|
error_message += (
|
693
894
|
f" This is likely due to an earlier error: {self._saved_error}"
|
694
895
|
)
|
695
896
|
raise AssertionError(error_message)
|
696
|
-
|
897
|
+
|
898
|
+
the_method = getattr(self.instance, method_name)
|
697
899
|
if isinstance(the_method, EndpointProperty):
|
698
|
-
module = the_method._method.__module__
|
699
900
|
the_method = functools.partial(the_method._method, self.instance)
|
700
|
-
else:
|
701
|
-
module = the_method.__module__
|
702
901
|
|
703
902
|
if inspect.iscoroutinefunction(the_method):
|
704
903
|
|
705
904
|
async def instrumented():
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
raise e
|
720
|
-
exit_span()
|
905
|
+
with TRACER.start_as_current_span(
|
906
|
+
method_name,
|
907
|
+
attributes={"actor_id": str(ctx.actor_instance.actor_id)},
|
908
|
+
):
|
909
|
+
try:
|
910
|
+
result = await the_method(*args, **kwargs)
|
911
|
+
self._maybe_exit_debugger()
|
912
|
+
except Exception as e:
|
913
|
+
logging.critical(
|
914
|
+
"Unhandled exception in actor endpoint",
|
915
|
+
exc_info=e,
|
916
|
+
)
|
917
|
+
raise e
|
721
918
|
return result
|
722
919
|
|
723
920
|
result = await instrumented()
|
724
921
|
else:
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
922
|
+
with TRACER.start_as_current_span(
|
923
|
+
method_name,
|
924
|
+
attributes={"actor_id": str(ctx.actor_instance.actor_id)},
|
925
|
+
):
|
926
|
+
with fake_sync_state():
|
927
|
+
result = the_method(*args, **kwargs)
|
928
|
+
self._maybe_exit_debugger()
|
929
|
+
|
930
|
+
response_port.send(result)
|
732
931
|
except Exception as e:
|
733
932
|
self._post_mortem_debug(e.__traceback__)
|
734
|
-
|
735
|
-
port.exception(ActorError(e))
|
933
|
+
response_port.exception(ActorError(e))
|
736
934
|
except BaseException as e:
|
737
935
|
self._post_mortem_debug(e.__traceback__)
|
738
936
|
# A BaseException can be thrown in the case of a Rust panic.
|
@@ -754,21 +952,33 @@ class _Actor:
|
|
754
952
|
DebugContext.set(DebugContext())
|
755
953
|
|
756
954
|
def _post_mortem_debug(self, exc_tb) -> None:
|
757
|
-
from monarch._src.actor.debugger import
|
955
|
+
from monarch._src.actor.debugger.debugger import debug_controller
|
758
956
|
|
759
957
|
if (pdb_wrapper := DebugContext.get().pdb_wrapper) is not None:
|
760
958
|
with fake_sync_state():
|
761
|
-
ctx =
|
959
|
+
ctx = context()
|
960
|
+
msg_rank = ctx.message_rank
|
762
961
|
pdb_wrapper = PdbWrapper(
|
763
|
-
|
764
|
-
|
765
|
-
ctx.
|
766
|
-
|
962
|
+
msg_rank.rank,
|
963
|
+
{k: msg_rank[k] for k in msg_rank},
|
964
|
+
ctx.actor_instance.actor_id,
|
965
|
+
debug_controller(),
|
767
966
|
)
|
768
967
|
DebugContext.set(DebugContext(pdb_wrapper))
|
769
968
|
pdb_wrapper.post_mortem(exc_tb)
|
770
969
|
self._maybe_exit_debugger(do_continue=False)
|
771
970
|
|
971
|
+
def _handle_undeliverable_message(
|
972
|
+
self, message: UndeliverableMessageEnvelope
|
973
|
+
) -> bool:
|
974
|
+
handle_undeliverable = getattr(
|
975
|
+
self.instance, "_handle_undeliverable_message", None
|
976
|
+
)
|
977
|
+
if handle_undeliverable is not None:
|
978
|
+
return handle_undeliverable(message)
|
979
|
+
else:
|
980
|
+
return False
|
981
|
+
|
772
982
|
|
773
983
|
def _is_mailbox(x: object) -> bool:
|
774
984
|
if hasattr(x, "__monarch_ref__"):
|
@@ -787,7 +997,7 @@ def _pickle(obj: object) -> bytes:
|
|
787
997
|
return msg
|
788
998
|
|
789
999
|
|
790
|
-
class Actor(MeshTrait):
|
1000
|
+
class Actor(MeshTrait, DeprecatedNotAFuture):
|
791
1001
|
@functools.cached_property
|
792
1002
|
def logger(cls) -> logging.Logger:
|
793
1003
|
lgr = logging.getLogger(cls.__class__.__name__)
|
@@ -806,20 +1016,48 @@ class Actor(MeshTrait):
|
|
806
1016
|
"actor implementations are not meshes, but we can't convince the typechecker of it..."
|
807
1017
|
)
|
808
1018
|
|
809
|
-
def _new_with_shape(self, shape: Shape) ->
|
1019
|
+
def _new_with_shape(self, shape: Shape) -> Self:
|
810
1020
|
raise NotImplementedError(
|
811
1021
|
"actor implementations are not meshes, but we can't convince the typechecker of it..."
|
812
1022
|
)
|
813
1023
|
|
1024
|
+
@property
|
1025
|
+
def initialized(self):
|
1026
|
+
raise NotImplementedError(
|
1027
|
+
"actor implementations are not meshes, but we can't convince the typechecker of it..."
|
1028
|
+
)
|
1029
|
+
|
1030
|
+
def _handle_undeliverable_message(
|
1031
|
+
self, message: UndeliverableMessageEnvelope
|
1032
|
+
) -> bool:
|
1033
|
+
# Return False to indicate that the undeliverable message was not handled.
|
1034
|
+
return False
|
1035
|
+
|
1036
|
+
|
1037
|
+
class ActorMesh(MeshTrait, Generic[T], DeprecatedNotAFuture):
|
1038
|
+
"""
|
1039
|
+
A group of actor instances of the same class.
|
1040
|
+
|
1041
|
+
Represents a collection of T-typed actor instances spawned at most once per process
|
1042
|
+
that can be communicated with collectively or individually. Provides
|
1043
|
+
methods for spawning actors, managing their lifecycle, and creating
|
1044
|
+
endpoints for method invocation across the mesh.
|
1045
|
+
"""
|
814
1046
|
|
815
|
-
class ActorMeshRef(MeshTrait):
|
816
1047
|
def __init__(
|
817
|
-
self,
|
1048
|
+
self,
|
1049
|
+
Class: Type[T],
|
1050
|
+
inner: "ActorMeshProtocol",
|
1051
|
+
mailbox: Mailbox,
|
1052
|
+
shape: Shape,
|
1053
|
+
proc_mesh: "Optional[ProcMesh]",
|
818
1054
|
) -> None:
|
819
1055
|
self.__name__: str = Class.__name__
|
820
1056
|
self._class: Type[T] = Class
|
821
|
-
self.
|
1057
|
+
self._inner: "ActorMeshProtocol" = inner
|
822
1058
|
self._mailbox: Mailbox = mailbox
|
1059
|
+
self._shape = shape
|
1060
|
+
self._proc_mesh = proc_mesh
|
823
1061
|
for attr_name in dir(self._class):
|
824
1062
|
attr_value = getattr(self._class, attr_name, None)
|
825
1063
|
if isinstance(attr_value, EndpointProperty):
|
@@ -832,11 +1070,9 @@ class ActorMeshRef(MeshTrait):
|
|
832
1070
|
setattr(
|
833
1071
|
self,
|
834
1072
|
attr_name,
|
835
|
-
|
836
|
-
self._actor_mesh_ref,
|
1073
|
+
self._endpoint(
|
837
1074
|
kind(attr_name),
|
838
1075
|
attr_value._method,
|
839
|
-
self._mailbox,
|
840
1076
|
attr_value._propagator,
|
841
1077
|
attr_value._explicit_response_port,
|
842
1078
|
),
|
@@ -847,53 +1083,95 @@ class ActorMeshRef(MeshTrait):
|
|
847
1083
|
return NotAnEndpoint(self, attr)
|
848
1084
|
raise AttributeError(attr)
|
849
1085
|
|
850
|
-
def
|
1086
|
+
def _endpoint(
|
851
1087
|
self,
|
852
|
-
|
853
|
-
|
854
|
-
|
1088
|
+
name: MethodSpecifier,
|
1089
|
+
impl: Callable[Concatenate[Any, P], Awaitable[R]],
|
1090
|
+
propagator: Any,
|
1091
|
+
explicit_response_port: bool,
|
1092
|
+
):
|
1093
|
+
return ActorEndpoint(
|
1094
|
+
self._inner,
|
1095
|
+
self._shape,
|
1096
|
+
self._proc_mesh,
|
1097
|
+
name,
|
1098
|
+
impl,
|
1099
|
+
self._mailbox,
|
1100
|
+
propagator,
|
1101
|
+
explicit_response_port,
|
1102
|
+
)
|
1103
|
+
|
1104
|
+
@classmethod
|
1105
|
+
def _create(
|
1106
|
+
cls,
|
1107
|
+
Class: Type[T],
|
1108
|
+
actor_mesh: "PythonActorMesh | PythonActorMeshImpl",
|
1109
|
+
mailbox: Mailbox,
|
1110
|
+
shape: Shape,
|
1111
|
+
proc_mesh: "ProcMesh",
|
1112
|
+
controller_controller: Optional["_ControllerController"],
|
1113
|
+
# args and kwargs are passed to the __init__ method of the user defined
|
1114
|
+
# python actor object.
|
1115
|
+
*args: Any,
|
1116
|
+
**kwargs: Any,
|
1117
|
+
) -> "ActorMesh[T]":
|
1118
|
+
if isinstance(actor_mesh, PythonActorMeshImpl):
|
1119
|
+
actor_mesh = _ActorMeshRefImpl.from_hyperactor_mesh(
|
1120
|
+
mailbox, shape, actor_mesh, proc_mesh
|
1121
|
+
)
|
1122
|
+
|
1123
|
+
mesh = cls(Class, actor_mesh, mailbox, shape, proc_mesh)
|
1124
|
+
|
855
1125
|
async def null_func(*_args: Iterable[Any], **_kwargs: Dict[str, Any]) -> None:
|
856
1126
|
return None
|
857
1127
|
|
858
|
-
|
859
|
-
|
1128
|
+
# send __init__ message to the mesh to initialize the user defined
|
1129
|
+
# python actor object.
|
1130
|
+
ep = mesh._endpoint(
|
860
1131
|
MethodSpecifier.Init(),
|
861
1132
|
null_func,
|
862
|
-
self._mailbox,
|
863
1133
|
None,
|
864
1134
|
False,
|
865
1135
|
)
|
866
|
-
send(ep, (
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
1136
|
+
send(ep, (mesh._class, proc_mesh, controller_controller, *args), kwargs)
|
1137
|
+
|
1138
|
+
return mesh
|
1139
|
+
|
1140
|
+
@classmethod
|
1141
|
+
def from_actor_id(
|
1142
|
+
cls,
|
1143
|
+
Class: Type[T],
|
1144
|
+
actor_id: ActorId,
|
1145
|
+
mailbox: Mailbox,
|
1146
|
+
) -> "ActorMesh[T]":
|
1147
|
+
return cls(
|
1148
|
+
Class, _SingletonActorAdapator(actor_id), mailbox, singleton_shape, None
|
875
1149
|
)
|
876
1150
|
|
1151
|
+
def __reduce_ex__(self, protocol: ...) -> "Tuple[Type[ActorMesh], Tuple[Any, ...]]":
|
1152
|
+
return ActorMesh, (self._class, self._inner, self._mailbox, self._shape, None)
|
1153
|
+
|
877
1154
|
@property
|
878
1155
|
def _ndslice(self) -> NDSlice:
|
879
|
-
return self.
|
1156
|
+
return self._shape.ndslice
|
880
1157
|
|
881
1158
|
@property
|
882
1159
|
def _labels(self) -> Iterable[str]:
|
883
|
-
return self.
|
1160
|
+
return self._shape.labels
|
884
1161
|
|
885
|
-
def _new_with_shape(self, shape: Shape) -> "
|
886
|
-
|
887
|
-
|
888
|
-
_ActorMeshRefImpl.from_actor_ref_with_shape(self._actor_mesh_ref, shape),
|
889
|
-
self._mailbox,
|
890
|
-
)
|
1162
|
+
def _new_with_shape(self, shape: Shape) -> "ActorMesh[T]":
|
1163
|
+
sliced = self._inner.new_with_shape(shape)
|
1164
|
+
return ActorMesh(self._class, sliced, self._mailbox, shape, self._proc_mesh)
|
891
1165
|
|
892
1166
|
def __repr__(self) -> str:
|
893
|
-
return f"
|
1167
|
+
return f"ActorMesh(class={self._class}, shape={self._shape}), inner={type(self._inner)})"
|
894
1168
|
|
895
|
-
|
896
|
-
|
1169
|
+
def stop(self) -> "Future[None]":
|
1170
|
+
return Future(coro=self._inner.stop())
|
1171
|
+
|
1172
|
+
@property
|
1173
|
+
def initialized(self) -> Future[None]:
|
1174
|
+
return Future(coro=self._inner.initialized())
|
897
1175
|
|
898
1176
|
|
899
1177
|
class ActorError(Exception):
|
@@ -909,27 +1187,35 @@ class ActorError(Exception):
|
|
909
1187
|
message: str = "A remote actor call has failed.",
|
910
1188
|
) -> None:
|
911
1189
|
self.exception = exception
|
912
|
-
|
1190
|
+
# Need to stringify the exception early, because the PyPI package
|
1191
|
+
# exceptiongroup may monkeypatch the "TracebackException" class for python
|
1192
|
+
# versions < 3.11. If it gets unpickled in a different scope without
|
1193
|
+
# using that monkeypatch, it'll have an exception in "format()".
|
1194
|
+
# Store the traceback string instead which shouldn't change between machines.
|
1195
|
+
actor_mesh_ref_tb = TracebackException.from_exception(exception).format()
|
1196
|
+
# Replace any traceback lines to indicate it's a remote call traceback.
|
1197
|
+
actor_mesh_ref_tb = (
|
1198
|
+
s.replace(
|
1199
|
+
"Traceback (most recent call last):",
|
1200
|
+
"Traceback of where the remote call failed (most recent call last):",
|
1201
|
+
)
|
1202
|
+
for s in actor_mesh_ref_tb
|
1203
|
+
)
|
1204
|
+
self.exception_formatted = "".join(actor_mesh_ref_tb)
|
913
1205
|
self.message = message
|
914
1206
|
|
915
1207
|
def __str__(self) -> str:
|
916
|
-
|
917
|
-
actor_mesh_ref_tb = "".join(traceback.format_list(self.actor_mesh_ref_frames))
|
918
|
-
return (
|
919
|
-
f"{self.message}\n"
|
920
|
-
f"Traceback of where the remote call failed (most recent call last):\n{actor_mesh_ref_tb}{type(self.exception).__name__}: {exe}"
|
921
|
-
)
|
1208
|
+
return f"{self.message}\n {self.exception_formatted}"
|
922
1209
|
|
923
1210
|
|
924
1211
|
def current_actor_name() -> str:
|
925
|
-
return str(
|
1212
|
+
return str(context().actor_instance.actor_id)
|
926
1213
|
|
927
1214
|
|
928
1215
|
def current_rank() -> Point:
|
929
|
-
|
930
|
-
return ctx.point
|
1216
|
+
return context().message_rank
|
931
1217
|
|
932
1218
|
|
933
1219
|
def current_size() -> Dict[str, int]:
|
934
|
-
|
935
|
-
return
|
1220
|
+
r = context().message_rank.extent
|
1221
|
+
return {k: r[k] for k in r}
|