torchmonarch-nightly 2025.6.16__cp310-cp310-manylinux2014_x86_64.whl → 2025.6.18__cp310-cp310-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
monarch/_rust_bindings.so CHANGED
Binary file
monarch/actor_mesh.py CHANGED
@@ -6,7 +6,6 @@
6
6
 
7
7
  # pyre-unsafe
8
8
 
9
- import asyncio
10
9
  import collections
11
10
  import contextvars
12
11
  import functools
@@ -27,9 +26,7 @@ from typing import (
27
26
  Callable,
28
27
  cast,
29
28
  Concatenate,
30
- Coroutine,
31
29
  Dict,
32
- Generator,
33
30
  Generic,
34
31
  Iterable,
35
32
  List,
@@ -99,39 +96,6 @@ _context: contextvars.ContextVar[MonarchContext] = contextvars.ContextVar(
99
96
  )
100
97
 
101
98
 
102
- # this was implemented in python 3.12 as an argument to task
103
- # but I have to backport to 3.10/3.11.
104
- def create_eager_task(coro: Awaitable[None]) -> asyncio.Future:
105
- iter = coro.__await__()
106
- try:
107
- first_yield = next(iter)
108
- return asyncio.create_task(RestOfCoroutine(first_yield, iter).run())
109
- except StopIteration as e:
110
- t = asyncio.Future()
111
- t.set_result(e.value)
112
- return t
113
-
114
-
115
- class RestOfCoroutine(Generic[T1, T2]):
116
- def __init__(self, first_yield: T1, iter: Generator[T2, None, T2]) -> None:
117
- self.first_yield: T1 | None = first_yield
118
- self.iter: Generator[T2, None, T2] = iter
119
-
120
- def __await__(self) -> Generator[T1, None, T1] | Generator[T2, None, T2]:
121
- first_yield = self.first_yield
122
- assert first_yield is not None
123
- yield first_yield
124
- self.first_yield = None
125
- while True:
126
- try:
127
- yield next(self.iter)
128
- except StopIteration as e:
129
- return e.value
130
-
131
- async def run(self) -> T1 | T2:
132
- return await self
133
-
134
-
135
99
  T = TypeVar("T")
136
100
  P = ParamSpec("P")
137
101
  R = TypeVar("R")
@@ -285,7 +249,18 @@ class Endpoint(Generic[P, R]):
285
249
  async def process() -> ValueMesh[R]:
286
250
  results: List[R] = [None] * len(self._actor_mesh) # pyre-fixme[9]
287
251
  for _ in range(len(self._actor_mesh)):
288
- rank, value = await r.recv() # pyre-fixme[23]
252
+ rank, value = await r.recv()
253
+ results[rank] = value
254
+ call_shape = Shape(
255
+ self._actor_mesh._shape.labels,
256
+ NDSlice.new_row_major(self._actor_mesh._shape.ndslice.sizes),
257
+ )
258
+ return ValueMesh(call_shape, results)
259
+
260
+ def process_blocking() -> ValueMesh[R]:
261
+ results: List[R] = [None] * len(self._actor_mesh) # pyre-fixme[9]
262
+ for _ in range(len(self._actor_mesh)):
263
+ rank, value = r.recv().get()
289
264
  results[rank] = value
290
265
  call_shape = Shape(
291
266
  self._actor_mesh._shape.labels,
@@ -293,7 +268,7 @@ class Endpoint(Generic[P, R]):
293
268
  )
294
269
  return ValueMesh(call_shape, results)
295
270
 
296
- return Future(process)
271
+ return Future(process, process_blocking)
297
272
 
298
273
  async def stream(self, *args: P.args, **kwargs: P.kwargs) -> AsyncGenerator[R, R]:
299
274
  """
@@ -362,6 +337,9 @@ class ValueMesh(MeshTrait, Generic[R]):
362
337
  def __len__(self) -> int:
363
338
  return len(self._shape)
364
339
 
340
+ def __repr__(self) -> str:
341
+ return f"ValueMesh({self._shape})"
342
+
365
343
  @property
366
344
  def _ndslice(self) -> NDSlice:
367
345
  return self._shape.ndslice
@@ -485,24 +463,36 @@ singleton_shape = Shape([], NDSlice(offset=0, sizes=[], strides=[]))
485
463
 
486
464
 
487
465
  class _Actor:
466
+ """
467
+ This is the message handling implementation of a Python actor.
468
+
469
+ The layering goes:
470
+ Rust `PythonActor` -> `_Actor` -> user-provided `Actor` instance
471
+
472
+ Messages are received from the Rust backend, and forwarded to the `handle`
473
+ methods on this class.
474
+
475
+ This class wraps the actual `Actor` instance provided by the user, and
476
+ routes messages to it, managing argument serialization/deserialization and
477
+ error handling.
478
+ """
479
+
488
480
  def __init__(self) -> None:
489
481
  self.instance: object | None = None
490
- self.active_requests: asyncio.Queue[asyncio.Future[object]] = asyncio.Queue()
491
- self.complete_task: asyncio.Task | None = None
492
482
 
493
- def handle(
483
+ async def handle(
494
484
  self, mailbox: Mailbox, message: PythonMessage, panic_flag: PanicFlag
495
- ) -> Optional[Coroutine[Any, Any, Any]]:
496
- return self.handle_cast(mailbox, 0, singleton_shape, message, panic_flag)
485
+ ) -> None:
486
+ return await self.handle_cast(mailbox, 0, singleton_shape, message, panic_flag)
497
487
 
498
- def handle_cast(
488
+ async def handle_cast(
499
489
  self,
500
490
  mailbox: Mailbox,
501
491
  rank: int,
502
492
  shape: Shape,
503
493
  message: PythonMessage,
504
494
  panic_flag: PanicFlag,
505
- ) -> Optional[Coroutine[Any, Any, Any]]:
495
+ ) -> None:
506
496
  port = (
507
497
  Port(message.response_port, mailbox, rank)
508
498
  if message.response_port
@@ -515,26 +505,21 @@ class _Actor:
515
505
  _context.set(ctx)
516
506
 
517
507
  args, kwargs = _unpickle(message.message, mailbox)
508
+
518
509
  if message.method == "__init__":
519
510
  Class, *args = args
520
511
  self.instance = Class(*args, **kwargs)
521
512
  return None
522
- else:
523
- the_method = getattr(self.instance, message.method)._method
524
513
 
525
- if not inspect.iscoroutinefunction(the_method):
526
- enter_span(
527
- the_method.__module__, message.method, str(ctx.mailbox.actor_id)
528
- )
529
- result = the_method(self.instance, *args, **kwargs)
530
- exit_span()
531
- if port is not None:
532
- port.send("result", result)
533
- return None
514
+ the_method = getattr(self.instance, message.method)._method
515
+
516
+ if inspect.iscoroutinefunction(the_method):
534
517
 
535
518
  async def instrumented():
536
519
  enter_span(
537
- the_method.__module__, message.method, str(ctx.mailbox.actor_id)
520
+ the_method.__module__,
521
+ message.method,
522
+ str(ctx.mailbox.actor_id),
538
523
  )
539
524
  try:
540
525
  result = await the_method(self.instance, *args, **kwargs)
@@ -547,39 +532,14 @@ class _Actor:
547
532
  exit_span()
548
533
  return result
549
534
 
550
- return self.run_async(
551
- ctx,
552
- self.run_task(port, instrumented(), panic_flag),
553
- )
554
- except Exception as e:
555
- traceback.print_exc()
556
- s = ActorError(e)
557
-
558
- # The exception is delivered to exactly one of:
559
- # (1) our caller, (2) our supervisor
560
- if port is not None:
561
- port.send("exception", s)
535
+ result = await instrumented()
562
536
  else:
563
- raise s from None
564
-
565
- async def run_async(
566
- self,
567
- ctx: MonarchContext,
568
- coroutine: Awaitable[None],
569
- ) -> None:
570
- _context.set(ctx)
571
- if self.complete_task is None:
572
- self.complete_task = asyncio.create_task(self._complete())
573
- await self.active_requests.put(create_eager_task(coroutine))
537
+ enter_span(
538
+ the_method.__module__, message.method, str(ctx.mailbox.actor_id)
539
+ )
540
+ result = the_method(self.instance, *args, **kwargs)
541
+ exit_span()
574
542
 
575
- async def run_task(
576
- self,
577
- port: Port | None,
578
- coroutine: Awaitable[Any],
579
- panic_flag: PanicFlag,
580
- ) -> None:
581
- try:
582
- result = await coroutine
583
543
  if port is not None:
584
544
  port.send("result", result)
585
545
  except Exception as e:
@@ -603,11 +563,6 @@ class _Actor:
603
563
  pass
604
564
  raise
605
565
 
606
- async def _complete(self) -> None:
607
- while True:
608
- task = await self.active_requests.get()
609
- await task
610
-
611
566
 
612
567
  def _is_mailbox(x: object) -> bool:
613
568
  return isinstance(x, Mailbox)
@@ -648,8 +603,8 @@ class Actor(MeshTrait):
648
603
  "actor implementations are not meshes, but we can't convince the typechecker of it..."
649
604
  )
650
605
 
651
- @endpoint
652
- async def _set_debug_client(self, client: "DebugClient") -> None:
606
+ @endpoint # pyre-ignore
607
+ def _set_debug_client(self, client: "DebugClient") -> None:
653
608
  point = MonarchContext.get().point
654
609
  # For some reason, using a lambda instead of functools.partial
655
610
  # confuses the pdb wrapper implementation.
@@ -750,6 +705,9 @@ class ActorMeshRef(MeshTrait):
750
705
  self._mailbox,
751
706
  )
752
707
 
708
+ def __repr__(self) -> str:
709
+ return f"ActorMeshRef(class={self._class}, shape={self._actor_mesh_ref._shape})"
710
+
753
711
 
754
712
  class ActorError(Exception):
755
713
  """
@@ -244,24 +244,24 @@ class DeviceMesh(Referenceable, MeshTrait):
244
244
  def rotate(self, **kwargs: Dict[str, int]):
245
245
  raise NotImplementedError()
246
246
 
247
- def rank(self, dims: Union[str, Sequence[str]]) -> int:
247
+ def rank(self, dims: Union[str, Sequence[str]]) -> torch.Tensor:
248
248
  self.define_remotely()
249
249
  if isinstance(dims, str):
250
250
  if dims not in self.names:
251
251
  raise KeyError(f"{self} does not have dimension {repr(dims)}")
252
252
  return _remote(
253
- "monarch.worker.worker._rank",
253
+ _rank,
254
254
  propagate=lambda _self, _dims: torch.full((), 0, dtype=torch.long),
255
255
  )(self, dims)
256
256
 
257
- combined_rank = 0
257
+ combined_rank: Any = 0
258
258
  for dim in dims:
259
259
  combined_rank *= self.size(dim)
260
260
  combined_rank += self.rank(dim)
261
261
  return combined_rank
262
262
 
263
263
  @property
264
- def ranks(self) -> dict[str, int]:
264
+ def ranks(self) -> dict[str, torch.Tensor]:
265
265
  return {dim: self.rank(dim) for dim in self.names}
266
266
 
267
267
  def process_idx(self):
@@ -334,6 +334,10 @@ class _ActiveMesh(TorchDispatchMode):
334
334
  return _remote(func, propagate=func)(*args, **kwargs)
335
335
 
336
336
 
337
+ def _rank(mesh, dim):
338
+ return torch.full((), mesh.dims[dim].rank, dtype=torch.long)
339
+
340
+
337
341
  @contextmanager
338
342
  def _dispatch():
339
343
  global _dispatch_enabled
@@ -401,7 +405,7 @@ def to_mesh(
401
405
 
402
406
  def slice_mesh(
403
407
  tensors: Any,
404
- **kwargs: Dict[str, Union[int, slice]],
408
+ **kwargs: Union[int, slice],
405
409
  ) -> Any:
406
410
  """
407
411
  Performs the slice_mesh operation for each tensor in tensors.
monarch/common/shape.py CHANGED
@@ -44,6 +44,9 @@ class MeshTrait(ABC):
44
44
  @abstractmethod
45
45
  def _labels(self) -> Tuple[str, ...]: ...
46
46
 
47
+ # mesh trait guarentees that its own calls to _new_with_shape
48
+ # will only ever select a shape that is a subspace of the
49
+ # current _ndslice.
47
50
  @abstractmethod
48
51
  def _new_with_shape(self, shape: Shape) -> Self: ...
49
52
 
monarch/common/tensor.py CHANGED
@@ -7,17 +7,20 @@
7
7
  # pyre-unsafe
8
8
  import itertools
9
9
  import traceback
10
+ import typing
10
11
  import warnings
11
12
  from collections import defaultdict
12
13
  from typing import (
13
14
  Any,
14
15
  Callable,
16
+ cast,
15
17
  Dict,
16
18
  Iterable,
17
19
  List,
18
20
  Literal,
19
21
  NamedTuple,
20
22
  Optional,
23
+ runtime_checkable,
21
24
  Sequence,
22
25
  TYPE_CHECKING,
23
26
  TypeVar,
@@ -35,7 +38,8 @@ from .base_tensor import BaseTensor
35
38
  from .borrows import StorageAliases
36
39
 
37
40
  if TYPE_CHECKING:
38
- from .device_mesh import DeviceMesh
41
+ from monarch.common.device_mesh import DeviceMesh
42
+
39
43
  from .fake import fake_call
40
44
  from .function import Propagator, ResolvableFunction
41
45
  from .invocation import Invocation
@@ -52,6 +56,12 @@ _valid_reduce = Literal[
52
56
  T = TypeVar("T")
53
57
 
54
58
 
59
+ @runtime_checkable
60
+ class HasDeviceMesh(typing.Protocol):
61
+ @property
62
+ def _device_mesh(self) -> "DeviceMesh": ...
63
+
64
+
55
65
  class DropLocation(NamedTuple):
56
66
  tensor_id: int
57
67
  traceback: List[traceback.FrameSummary]
@@ -167,7 +177,11 @@ class Tensor(Referenceable, BaseTensor):
167
177
  self._on_first_use(self)
168
178
  self._on_first_use = None
169
179
 
170
- def to_mesh(self, mesh: "DeviceMesh", stream: Optional["Stream"] = None):
180
+ def to_mesh(
181
+ self,
182
+ mesh: Union["DeviceMesh", "HasDeviceMesh"],
183
+ stream: Optional["Stream"] = None,
184
+ ):
171
185
  """
172
186
  Move data between one device mesh and another. Sizes of named dimensions must match.
173
187
  If mesh has dimensions that self.mesh does not, it will broadcast to those dimensions.
@@ -177,6 +191,8 @@ class Tensor(Referenceable, BaseTensor):
177
191
  t.slice_mesh(batch=0).to_mesh(t.mesh)
178
192
 
179
193
  """
194
+ if isinstance(mesh, HasDeviceMesh):
195
+ mesh = mesh._device_mesh
180
196
  return MeshSliceTensor(self, self.mesh).to_mesh(mesh, stream)
181
197
 
182
198
  def reduce_(
@@ -344,7 +360,7 @@ class Tensor(Referenceable, BaseTensor):
344
360
  )
345
361
  return r
346
362
 
347
- def slice_mesh(self, **kwargs: Dict[str, Union[int, slice]]) -> "MeshSliceTensor":
363
+ def slice_mesh(self, **kwargs: Union[int, slice]) -> "MeshSliceTensor":
348
364
  # technically a slice of a device mesh and a device mesh are not same thing
349
365
  # because a device mesh also has caches for doing collectives.
350
366
  # but this is an easy way to create a MeshSliceTensor until we optimize
@@ -368,8 +384,13 @@ class MeshSliceTensor:
368
384
  self.slicing = slicing
369
385
 
370
386
  def to_mesh(
371
- self, mesh: "DeviceMesh", stream: Optional["Stream"] = None
387
+ self,
388
+ mesh: Union["DeviceMesh", "HasDeviceMesh"],
389
+ stream: Optional["Stream"] = None,
372
390
  ) -> "Tensor":
391
+ if isinstance(mesh, HasDeviceMesh):
392
+ mesh = mesh._device_mesh
393
+
373
394
  if stream is None:
374
395
  stream = self.tensor.stream
375
396
 
@@ -11,7 +11,7 @@ import time
11
11
  import traceback
12
12
  from collections import deque
13
13
  from logging import Logger
14
- from typing import List, NamedTuple, Optional, Union
14
+ from typing import List, NamedTuple, Optional, TYPE_CHECKING, Union
15
15
 
16
16
  import torch.utils._python_dispatch
17
17
 
@@ -24,7 +24,13 @@ from monarch._rust_bindings.monarch_extension.mesh_controller import _Controller
24
24
  from monarch._rust_bindings.monarch_hyperactor.proc import ( # @manual=//monarch/monarch_extension:monarch_extension
25
25
  ActorId,
26
26
  )
27
- from monarch._rust_bindings.monarch_hyperactor.proc_mesh import ProcMesh as HyProcMesh
27
+
28
+ if TYPE_CHECKING:
29
+ from monarch._rust_bindings.monarch_hyperactor.proc_mesh import (
30
+ ProcMesh as HyProcMesh,
31
+ )
32
+ from monarch.proc_mesh import ProcMesh
33
+
28
34
  from monarch._rust_bindings.monarch_hyperactor.shape import Point
29
35
 
30
36
  from monarch._rust_bindings.monarch_messages.debugger import DebuggerAction
@@ -33,7 +39,6 @@ from monarch.common.controller_api import LogMessage, MessageResult
33
39
  from monarch.common.device_mesh import DeviceMesh, no_mesh
34
40
  from monarch.common.invocation import DeviceException, RemoteException
35
41
  from monarch.controller.debugger import read as debugger_read, write as debugger_write
36
- from monarch.proc_mesh import ProcMesh
37
42
  from monarch.rust_local_mesh import _get_worker_exec_info
38
43
  from pyre_extensions import none_throws
39
44
 
@@ -41,7 +46,7 @@ logger: Logger = logging.getLogger(__name__)
41
46
 
42
47
 
43
48
  class Controller(_Controller):
44
- def __init__(self, workers: HyProcMesh) -> None:
49
+ def __init__(self, workers: "HyProcMesh") -> None:
45
50
  super().__init__()
46
51
  # Buffer for messages unrelated to debugging that are received while a
47
52
  # debugger session is active.
@@ -250,7 +255,7 @@ class MeshClient(Client):
250
255
  self.inner.drain_and_stop()
251
256
 
252
257
 
253
- def spawn_tensor_engine(proc_mesh: ProcMesh) -> DeviceMesh:
258
+ def spawn_tensor_engine(proc_mesh: "ProcMesh") -> DeviceMesh:
254
259
  # This argument to Controller
255
260
  # is currently only used for debug printing. It should be fixed to
256
261
  # report the proc ID instead of the rank it currently does.
Binary file
monarch/proc_mesh.py CHANGED
@@ -7,8 +7,22 @@
7
7
  # pyre-strict
8
8
 
9
9
  import sys
10
+ from contextlib import AbstractContextManager
11
+
12
+ from typing import (
13
+ Any,
14
+ cast,
15
+ Dict,
16
+ List,
17
+ Optional,
18
+ Sequence,
19
+ Type,
20
+ TYPE_CHECKING,
21
+ TypeVar,
22
+ )
10
23
 
11
- from typing import Any, cast, List, Optional, Type, TypeVar
24
+ if TYPE_CHECKING:
25
+ import torch
12
26
 
13
27
  import monarch
14
28
  from monarch import ActorFuture as Future
@@ -24,7 +38,9 @@ from monarch._rust_bindings.monarch_hyperactor.shape import Shape, Slice
24
38
  from monarch.actor_mesh import _Actor, _ActorMeshRefImpl, Actor, ActorMeshRef
25
39
 
26
40
  from monarch.common._device_utils import _local_device_count
41
+ from monarch.common.device_mesh import DeviceMesh
27
42
  from monarch.common.shape import MeshTrait
43
+ from monarch.mesh_controller import spawn_tensor_engine
28
44
  from monarch.rdma import RDMAManager
29
45
 
30
46
  T = TypeVar("T")
@@ -45,25 +61,43 @@ def _allocate_blocking(alloc: Alloc) -> "ProcMesh":
45
61
 
46
62
 
47
63
  class ProcMesh(MeshTrait):
48
- def __init__(self, hy_proc_mesh: HyProcMesh) -> None:
64
+ def __init__(
65
+ self,
66
+ hy_proc_mesh: HyProcMesh,
67
+ _mock_shape: Optional[Shape] = None,
68
+ _device_mesh: Optional[DeviceMesh] = None,
69
+ ) -> None:
49
70
  self._proc_mesh = hy_proc_mesh
71
+ self._mock_shape: Optional[Shape] = _mock_shape
50
72
  self._mailbox: Mailbox = self._proc_mesh.client
51
- self._rdma_manager: RDMAManager = self._spawn_blocking(
52
- "rdma_manager", RDMAManager
53
- )
73
+ self._rdma_manager: Optional[RDMAManager] = None
74
+ self._maybe_device_mesh: Optional[DeviceMesh] = _device_mesh
75
+ if _mock_shape is None:
76
+ self._rdma_manager = self._spawn_blocking("rdma_manager", RDMAManager)
77
+
78
+ @property
79
+ def _shape(self) -> Shape:
80
+ return self._proc_mesh.shape if self._mock_shape is None else self._mock_shape
54
81
 
55
82
  @property
56
83
  def _ndslice(self) -> Slice:
57
- return self._proc_mesh.shape.ndslice
84
+ return self._shape.ndslice
58
85
 
59
86
  @property
60
87
  def _labels(self) -> List[str]:
61
- return self._proc_mesh.shape.labels
88
+ return self._shape.labels
62
89
 
63
90
  def _new_with_shape(self, shape: Shape) -> "ProcMesh":
64
- raise NotImplementedError("ProcMesh slicing is not implemeted yet.")
91
+ device_mesh = (
92
+ None
93
+ if self._device_mesh is None
94
+ else self._device_mesh._new_with_shape(shape)
95
+ )
96
+ return ProcMesh(self._proc_mesh, _mock_shape=shape, _device_mesh=device_mesh)
65
97
 
66
98
  def spawn(self, name: str, Class: Type[T], *args: Any, **kwargs: Any) -> Future[T]:
99
+ if self._mock_shape is not None:
100
+ raise NotImplementedError("NYI: spawn on slice of a proc mesh.")
67
101
  return Future(
68
102
  lambda: self._spawn_nonblocking(name, Class, *args, **kwargs),
69
103
  lambda: self._spawn_blocking(name, Class, *args, **kwargs),
@@ -120,6 +154,26 @@ class ProcMesh(MeshTrait):
120
154
  service._create(args, kwargs)
121
155
  return cast(T, service)
122
156
 
157
+ @property
158
+ def _device_mesh(self) -> "DeviceMesh":
159
+ if self._maybe_device_mesh is None:
160
+ if self._mock_shape is not None:
161
+ raise NotImplementedError(
162
+ "NYI: activating a proc mesh must first happen on the root proc_mesh until we fix spawning on submeshes."
163
+ )
164
+ self._maybe_device_mesh = spawn_tensor_engine(self)
165
+ return self._maybe_device_mesh
166
+
167
+ # pyre-ignore
168
+ def activate(self) -> AbstractContextManager:
169
+ return self._device_mesh.activate()
170
+
171
+ def rank_tensor(self, dim: str | Sequence[str]) -> "torch.Tensor":
172
+ return self._device_mesh.rank(dim)
173
+
174
+ def rank_tensors(self) -> Dict[str, "torch.Tensor"]:
175
+ return self._device_mesh.ranks
176
+
123
177
 
124
178
  async def local_proc_mesh_nonblocking(
125
179
  *, gpus: Optional[int] = None, hosts: int = 1
monarch/sim_mesh.py CHANGED
@@ -201,9 +201,11 @@ class Bootstrap:
201
201
 
202
202
  proxy_addr = proxy_addr or f"unix!@{_random_id()}-proxy"
203
203
  self.bootstrap_addr: str = f"sim!unix!@system,{proxy_addr}"
204
- self.client_listen_addr: str = f"sim!unix!@client,{proxy_addr}"
204
+
205
+ client_proxy_addr = f"unix!@{_random_id()}-proxy"
206
+ self.client_listen_addr: str = f"sim!unix!@client,{client_proxy_addr}"
205
207
  self.client_bootstrap_addr: str = (
206
- f"sim!unix!@client,{proxy_addr},unix!@system,{proxy_addr}"
208
+ f"sim!unix!@client,{client_proxy_addr},unix!@system,{proxy_addr}"
207
209
  )
208
210
  bootstrap_simulator_backend(self.bootstrap_addr, proxy_addr, world_size)
209
211
 
monarch/tools/cli.py CHANGED
@@ -112,7 +112,7 @@ class InfoCmd:
112
112
  file=sys.stderr,
113
113
  )
114
114
  else:
115
- json.dump(server_spec.to_json(), fp=sys.stdout)
115
+ json.dump(server_spec.to_json(), indent=2, fp=sys.stdout)
116
116
 
117
117
 
118
118
  class KillCmd:
@@ -25,6 +25,7 @@ def proc_mesh(
25
25
  meshes: list[str] = _DEFAULT_MESHES,
26
26
  env: Optional[dict[str, str]] = None,
27
27
  port: int = mesh_spec.DEFAULT_REMOTE_ALLOCATOR_PORT,
28
+ program: str = "monarch_bootstrap", # installed with monarch wheel (as console script)
28
29
  ) -> specs.AppDef:
29
30
  """
30
31
  Args:
@@ -33,6 +34,7 @@ def proc_mesh(
33
34
  meshes: list of mesh specs of the form "{name}:{num_hosts}:{host_type}"
34
35
  env: environment variables to be passed to the main command (e.g. ENV1=v1,ENV2=v2,ENV3=v3)
35
36
  port: the port that the remote process allocator runs on (must be reachable from the client)
37
+ program: path to the binary that the remote process allocator spawns on an allocation request
36
38
  """
37
39
 
38
40
  appdef = specs.AppDef(name)
@@ -41,11 +43,10 @@ def proc_mesh(
41
43
  mesh_role = specs.Role(
42
44
  name=mesh.name,
43
45
  image=image,
44
- entrypoint="process_allocator", # 'cargo install monarch_hyperactor' to get this binary
46
+ entrypoint="process_allocator", # run "cargo install monarch_hyperactor" to get this binary
45
47
  args=[
46
- "mesh-worker",
47
48
  f"--port={port}",
48
- "--program=monarch_bootstrap", # installed with monarch wheel (as console script)
49
+ f"--program={program}",
49
50
  ],
50
51
  num_replicas=mesh.num_hosts,
51
52
  resource=specs.resource(h=mesh.host_type),
@@ -0,0 +1,62 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # pyre-strict
8
+ import logging
9
+ import socket
10
+ from typing import Optional
11
+
12
+ logger: logging.Logger = logging.getLogger(__name__)
13
+
14
+
15
+ def get_ip_addr(hostname: str) -> str:
16
+ """Resolves and returns the ip address of the given hostname.
17
+
18
+ This function will return an ipv6 address if one that can bind
19
+ `SOCK_STREAM` (TCP) socket is found. Otherwise it will fall-back
20
+ to resolving an ipv4 `SOCK_STREAM` address.
21
+
22
+ Raises a `RuntimeError` if neither ipv6 or ipv4 ip can be resolved from hostname.
23
+ """
24
+
25
+ def get_sockaddr(family: socket.AddressFamily) -> Optional[str]:
26
+ try:
27
+ # patternlint-disable-next-line python-dns-deps (only used for oss)
28
+ addrs = socket.getaddrinfo(
29
+ hostname, port=None, family=family, type=socket.SOCK_STREAM
30
+ ) # tcp
31
+ if addrs:
32
+ # socket.getaddrinfo return a list of addr 5-tuple addr infos
33
+ _, _, _, _, sockaddr = addrs[0] # use the first address
34
+
35
+ # sockaddr is a tuple (ipv4) or a 4-tuple (ipv6) where the first element is the ip addr
36
+ ipaddr = str(sockaddr[0])
37
+
38
+ logger.info(
39
+ "Resolved %s address: `%s` for host: `%s`",
40
+ family.name,
41
+ ipaddr,
42
+ hostname,
43
+ )
44
+ return str(ipaddr)
45
+ else:
46
+ return None
47
+ except socket.gaierror as e:
48
+ logger.info(
49
+ "No %s address that can bind TCP sockets for host: %s. %s",
50
+ family.name,
51
+ hostname,
52
+ e,
53
+ )
54
+ return None
55
+
56
+ ipaddr = get_sockaddr(socket.AF_INET6) or get_sockaddr(socket.AF_INET)
57
+ if not ipaddr:
58
+ raise RuntimeError(
59
+ f"Unable to resolve `{hostname}` to ipv6 or ipv4 address that can bind TCP socket."
60
+ " Check the network configuration on the host."
61
+ )
62
+ return ipaddr
@@ -4,6 +4,7 @@
4
4
  # This source code is licensed under the BSD-style license found in the
5
5
  # LICENSE file in the root directory of this source tree.
6
6
 
7
+ import asyncio
7
8
  import ctypes
8
9
  import sys
9
10
 
@@ -11,7 +12,7 @@ import click
11
12
 
12
13
  from monarch._rust_bindings.monarch_extension.panic import panicking_function
13
14
 
14
- from monarch.actor_mesh import Actor, endpoint
15
+ from monarch.actor_mesh import Actor, endpoint, send
15
16
  from monarch.proc_mesh import proc_mesh
16
17
 
17
18
 
@@ -35,6 +36,12 @@ class ErrorActor(Actor):
35
36
  """Endpoint that calls a Rust function that panics."""
36
37
  panicking_function()
37
38
 
39
+ @endpoint
40
+ async def await_then_error(self) -> None:
41
+ await asyncio.sleep(0.1)
42
+ await asyncio.sleep(0.1)
43
+ raise RuntimeError("oh noez")
44
+
38
45
 
39
46
  class ErrorActorSync(Actor):
40
47
  """An actor that has endpoints cause segfaults."""
@@ -146,5 +153,28 @@ def error_bootstrap():
146
153
  proc_mesh(gpus=4, env={"MONARCH_ERROR_DURING_BOOTSTRAP_FOR_TESTING": "1"}).get()
147
154
 
148
155
 
156
+ async def _error_unmonitored():
157
+ print("I actually ran")
158
+ sys.stdout.flush()
159
+
160
+ proc = await proc_mesh(gpus=1)
161
+ actor = await proc.spawn("error_actor", ErrorActor)
162
+
163
+ # fire and forget
164
+ send(actor.await_then_error, (), {}, None, "all")
165
+
166
+ # Wait. Eventually a supervision event will get propagated and the process
167
+ # will exit.
168
+ #
169
+ # If an event is not delivered, the test will time out before this sleep
170
+ # finishes.
171
+ await asyncio.sleep(300)
172
+
173
+
174
+ @main.command("error-unmonitored")
175
+ def error_unmonitored():
176
+ asyncio.run(_error_unmonitored())
177
+
178
+
149
179
  if __name__ == "__main__":
150
180
  main()
tests/test_actor_error.py CHANGED
@@ -4,11 +4,12 @@
4
4
  # This source code is licensed under the BSD-style license found in the
5
5
  # LICENSE file in the root directory of this source tree.
6
6
 
7
+ import asyncio
7
8
  import importlib.resources
8
9
  import subprocess
9
10
 
10
11
  import pytest
11
- from monarch.actor_mesh import Actor, ActorError, endpoint
12
+ from monarch.actor_mesh import Actor, ActorError, endpoint, send
12
13
 
13
14
  from monarch.proc_mesh import proc_mesh
14
15
 
@@ -128,6 +129,7 @@ def test_actor_supervision(num_procs, sync_endpoint, sync_test_impl, endpoint_na
128
129
  f"--endpoint-name={endpoint_name}",
129
130
  ]
130
131
  try:
132
+ print("running cmd", " ".join(cmd))
131
133
  process = subprocess.run(cmd, capture_output=True, timeout=180)
132
134
  except subprocess.TimeoutExpired as e:
133
135
  print("timeout expired")
@@ -157,6 +159,7 @@ def test_proc_mesh_bootstrap_error():
157
159
  "error-bootstrap",
158
160
  ]
159
161
  try:
162
+ print("running cmd", " ".join(cmd))
160
163
  process = subprocess.run(cmd, capture_output=True, timeout=180)
161
164
  except subprocess.TimeoutExpired as e:
162
165
  print("timeout expired")
@@ -208,3 +211,30 @@ async def test_broken_pickle_class(raise_on_getstate, raise_on_setstate, num_pro
208
211
  await exception_actor.print_value.call_one(broken_obj)
209
212
  else:
210
213
  await exception_actor.print_value.call(broken_obj)
214
+
215
+
216
+ # oss_skip: importlib not pulling resource correctly in git CI, needs to be revisited
217
+ @pytest.mark.oss_skip
218
+ async def test_exception_after_wait_unmonitored():
219
+ # Run the test in a subprocess
220
+ test_bin = importlib.resources.files("monarch.python.tests").joinpath("test_bin")
221
+ cmd = [
222
+ str(test_bin),
223
+ "error-unmonitored",
224
+ ]
225
+ try:
226
+ print("running cmd", " ".join(cmd))
227
+ process = subprocess.run(cmd, capture_output=True, timeout=180)
228
+ except subprocess.TimeoutExpired as e:
229
+ print("timeout expired")
230
+ if e.stdout is not None:
231
+ print(e.stdout.decode())
232
+ if e.stderr is not None:
233
+ print(e.stderr.decode())
234
+ raise
235
+
236
+ # Assert that the subprocess exited with a non-zero code
237
+ assert "I actually ran" in process.stdout.decode()
238
+ assert (
239
+ process.returncode != 0
240
+ ), f"Expected non-zero exit code, got {process.returncode}"
@@ -391,10 +391,13 @@ def test_rust_binding_modules_correct() -> None:
391
391
  check(bindings, "monarch._rust_bindings")
392
392
 
393
393
 
394
- @pytest.mark.skipif(
394
+ two_gpu = pytest.mark.skipif(
395
395
  torch.cuda.device_count() < 2,
396
396
  reason="Not enough GPUs, this test requires at least 2 GPUs",
397
397
  )
398
+
399
+
400
+ @two_gpu
398
401
  def test_tensor_engine() -> None:
399
402
  pm = proc_mesh(gpus=2).get()
400
403
 
@@ -581,13 +584,80 @@ async def test_actor_tls() -> None:
581
584
  pm = await proc_mesh(gpus=1)
582
585
  am = await pm.spawn("tls", TLSActor)
583
586
  await am.increment.call_one()
584
- # TODO(suo): TLS is NOT preserved across async/sync endpoints, because currently
585
- # we run async endpoints on a different thread than sync ones.
586
- # Will fix this in a followup diff.
587
+ await am.increment_async.call_one()
588
+ await am.increment.call_one()
589
+ await am.increment_async.call_one()
590
+
591
+ assert 4 == await am.get.call_one()
592
+ assert 4 == await am.get_async.call_one()
593
+
594
+
595
+ class TLSActorFullSync(Actor):
596
+ """An actor that manages thread-local state."""
597
+
598
+ def __init__(self):
599
+ self.local = threading.local()
600
+ self.local.value = 0
601
+
602
+ @endpoint
603
+ def increment(self):
604
+ self.local.value += 1
605
+
606
+ @endpoint
607
+ def get(self):
608
+ return self.local.value
609
+
587
610
 
588
- # await am.increment_async.call_one()
611
+ async def test_actor_tls_full_sync() -> None:
612
+ """Test that thread-local state is respected."""
613
+ pm = await proc_mesh(gpus=1)
614
+ am = await pm.spawn("tls", TLSActorFullSync)
615
+ await am.increment.call_one()
616
+ await am.increment.call_one()
617
+ await am.increment.call_one()
589
618
  await am.increment.call_one()
590
- # await am.increment_async.call_one()
591
619
 
592
- assert 2 == await am.get.call_one()
593
- # assert 4 == await am.get_async.call_one()
620
+ assert 4 == await am.get.call_one()
621
+
622
+
623
+ @two_gpu
624
+ def test_proc_mesh_tensor_engine() -> None:
625
+ pm = proc_mesh(gpus=2).get()
626
+ with pm.activate():
627
+ f = 10 * pm.rank_tensor("gpus").cuda()
628
+ a = monarch.inspect(f, hosts=0, gpus=0)
629
+ b = monarch.inspect(f, hosts=0, gpus=1)
630
+
631
+ one = pm.slice(gpus=1)
632
+ with one.activate():
633
+ sliced_b = monarch.slice_mesh(f, gpus=1).to_mesh(one)
634
+ c = monarch.inspect(sliced_b * 10)
635
+ assert a == 0
636
+ assert b == 10
637
+ assert c == 100
638
+
639
+
640
+ class AsyncActor(Actor):
641
+ def __init__(self):
642
+ self.should_exit = False
643
+
644
+ @endpoint
645
+ async def sleep(self) -> None:
646
+ while True and not self.should_exit:
647
+ await asyncio.sleep(1)
648
+
649
+ @endpoint
650
+ async def no_more(self) -> None:
651
+ self.should_exit = True
652
+
653
+
654
+ @pytest.mark.timeout(15)
655
+ async def test_async_concurrency():
656
+ """Test that async endpoints will be processed concurrently."""
657
+ pm = await proc_mesh(gpus=1)
658
+ am = await pm.spawn("async", AsyncActor)
659
+ fut = am.sleep.call()
660
+ # This call should go through and exit the sleep loop, as long as we are
661
+ # actually concurrently processing messages.
662
+ await am.no_more.call()
663
+ await fut
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: torchmonarch-nightly
3
- Version: 2025.6.16
3
+ Version: 2025.6.18
4
4
  Summary: Monarch: Single controller library
5
5
  Author: Meta
6
6
  Author-email: oncall+monarch@xmail.facebook.com
@@ -1,7 +1,7 @@
1
1
  monarch/__init__.py,sha256=iUvWHc0-7Q2tovRoRxOIiA3TsefMXCbWl-jEfQ2djew,6897
2
- monarch/_rust_bindings.so,sha256=Mus3Wdk7VoEHkyPLV1_SQ2r2KAMOPNTv3rECuKH5Olk,40613688
2
+ monarch/_rust_bindings.so,sha256=RlkNuWQ74oxTOEfmaVFsgESTEdMP84vug1sRY4xya60,40803008
3
3
  monarch/_testing.py,sha256=jOIOG6jcZBzvEvG_DwSnwCkaMVXvSun6sJAG6nXemww,7859
4
- monarch/actor_mesh.py,sha256=nAW65WFEWMJWCv8zuH9GSOyTNXwFN8QNqZxMZTuSYxw,25537
4
+ monarch/actor_mesh.py,sha256=8hjIy0TSby33xfVXp_xZnqlPkxy3l6IGqEyPOhVtjvU,24197
5
5
  monarch/allocator.py,sha256=ylvYTf31o-PT385cYJPhi17uNbC4yl_RAraqD0fVe4g,4112
6
6
  monarch/bootstrap_main.py,sha256=RCUQhJk07yMFiKp6HzQuqZFUpkgsT9kVEyimiwjn6_E,1827
7
7
  monarch/cached_remote_function.py,sha256=kYdB6r4OHx_T_uX4q3tCNcp1t2DJwF8tPTIahUiT2pU,8785
@@ -10,13 +10,13 @@ monarch/fetch.py,sha256=61jxo7sx4QNUTkc0_rF5NaJROen4tKbAaiIjrXWLOvg,1705
10
10
  monarch/future.py,sha256=lcdFEe7m1shYPPuvZ1RkS6JUIChEKGBWe3v7x_nu4Hg,731
11
11
  monarch/gradient_generator.py,sha256=Rl3dmXGceTdCc1mYBg2JciR88ywGPnW7TVkL86KwqEA,6366
12
12
  monarch/memory.py,sha256=ol86dBhFAJqg78iF25-BuK0wuwj1onR8FIioZ_B0gjw,1377
13
- monarch/mesh_controller.py,sha256=Xft2edk7rz8_PPe-iIUZ09P-j4JDPGADBGHBiuiZ7YY,10363
14
- monarch/monarch_controller,sha256=OZYuYEUToULTaOxmUk3Dv-73n68gFua8z4pP5WCwU5I,20400832
13
+ monarch/mesh_controller.py,sha256=am1QP7dvn0OH1z9ADSKm41APs1HY_dHcBAhOVP-QDmE,10427
14
+ monarch/monarch_controller,sha256=HucZG4CSJhkVpbHElarAp2LUz1xW5bMNnAR3TNjWKks,20335344
15
15
  monarch/notebook.py,sha256=zu9MKDFKf1-rCM2TqFSRJjMBeiWuKcJSyUFLvoZRQzs,25949
16
16
  monarch/opaque_module.py,sha256=oajOu_WD1hD4hxE8HDdO-tvWY7KDHWd7VaAhJEa5L2I,10446
17
17
  monarch/opaque_object.py,sha256=IVpll4pyuKZMo_EnPh4s0qnx8RlAcJrJ1yoLX6E75wQ,2782
18
18
  monarch/pdb_wrapper.py,sha256=gm46AZnfR4amH1vYFWnWivEv5MaU3Nb6KIWjSM8KjWM,4052
19
- monarch/proc_mesh.py,sha256=xoaReM9Ab9TWkesxedWSyyk4TMD0HLV88dQ8CQcbqTI,6892
19
+ monarch/proc_mesh.py,sha256=5RaKPQZJD-sKzEAbqMorKsZA7SOUzIflk3Fn6cdfzvw,8607
20
20
  monarch/profiler.py,sha256=TQ9fnVM8H7smBWtYdB_6Irtzz8DBOmcp7U1T3wlUmco,4911
21
21
  monarch/python_local_mesh.py,sha256=YsureIzR9uGlNVrKd4vRghxOXBeYabkt9lICRErfRAI,3536
22
22
  monarch/random.py,sha256=f9QR7Esu4Vxqxs-KCf5QYyVqlWvXJ3-UtG90L_h4j40,1527
@@ -24,7 +24,7 @@ monarch/rdma.py,sha256=1pNh11S_FWeETRgkdUpauTMUlodrRohIq1UfQjKVnN8,5418
24
24
  monarch/remote_class.py,sha256=-OAowzU1aDP6i4ik_SjXntVUC9h4dqAzgqwohkQ6Grc,4167
25
25
  monarch/rust_backend_mesh.py,sha256=1htC62of4MgFtkezWGlsxSFtKJdc0CIeqeSuOx7yu3M,9944
26
26
  monarch/rust_local_mesh.py,sha256=7ASptybn3wy4J7eoBc7LhGW4j4AA6bigl5Kuhyflw8s,47405
27
- monarch/sim_mesh.py,sha256=9wkS99L0EpG2Gldi-nzA-3ww7z__DQ7Qp2uReMfn188,12183
27
+ monarch/sim_mesh.py,sha256=kDsbubv28YFg9ZQN4urt3oJGzR3CnnUiATnjUiSxrkE,12256
28
28
  monarch/telemetry.py,sha256=7JUZWaoD2Yn5Ae_7kNhkLFRBLYaSGfH071_m_qfVehI,525
29
29
  monarch/tensor_worker_main.py,sha256=Nbarl2sJKIddLeaRFsaUnqOerLHjzggUr9SqCr2_GYI,8300
30
30
  monarch/tensorboard.py,sha256=MnLgH5lbqeUJauEuirEgR6L_qYl2NGdtwZOWIAuOZao,2587
@@ -50,7 +50,7 @@ monarch/common/client.py,sha256=axo37s_z17nYQGOZG5fi_0zUEJ_8qw7INjs-Kw2vaVo,2493
50
50
  monarch/common/constants.py,sha256=ohvsVYMpfeWopv3KXDAeHWDFLukwc-OY37VRxpKNBE8,300
51
51
  monarch/common/context_manager.py,sha256=GOeyaFbyCqvQmkJ0oI7q6IxRd8_0mVyYKZRccI8iaug,1067
52
52
  monarch/common/controller_api.py,sha256=djGkK5aSd-V6pBkr3uBCXbfJv3OKf2o2VbBXJgFF2WI,3202
53
- monarch/common/device_mesh.py,sha256=fBZMYDpfAp5tAEXTe9l6eJxDI4-TMWVOMrAJXp5hzvI,12082
53
+ monarch/common/device_mesh.py,sha256=jo_qEIRlX6KzBlP2BUSS4XEELL-6_H08a47bUz8QYsA,12159
54
54
  monarch/common/fake.py,sha256=h57Cggz2qXNqImZ7yPuOZOSe9-l9i553ki1z-YHlgQA,1801
55
55
  monarch/common/function.py,sha256=V8kdgSRTvild2SpcewWa5IETX3QiWDZQ2BEIDFa5zz8,4374
56
56
  monarch/common/function_caching.py,sha256=HVdbWtv6Eea7ENMWi8iv36w1G1TaVuUJhkUX_JxGx5A,5060
@@ -67,9 +67,9 @@ monarch/common/recording.py,sha256=hoI9VY_FyW_xVx-jmfsKydqX5vW2GulwcDWsBdUVOm8,4
67
67
  monarch/common/reference.py,sha256=O26lkzEeVwj0S1xEy-OLqdHVnACmmlbQCUmXRrW4n1Q,938
68
68
  monarch/common/remote.py,sha256=qZWXkShX20l07TseQSpVECh2yXZaVKYUvQXkeEM-zvY,9220
69
69
  monarch/common/selection.py,sha256=lpWFbZs3ArYy29e-53eoAVAjQFksf1RvZz9NvM0CUW4,308
70
- monarch/common/shape.py,sha256=k6-0S0U19PmrfP62SMb9Ihx6_I4QQFUGErloZn8GcZ0,8144
70
+ monarch/common/shape.py,sha256=B-7DI768ZhT8ECUNCJcI7DfCB7iDFGFH0r-HmXaAfcM,8296
71
71
  monarch/common/stream.py,sha256=_ejoxafHtdD10lLzznRCXKwrkZ_ZH9k_VTgiA5yfBrI,3583
72
- monarch/common/tensor.py,sha256=mSXiHoD0Up4m2RLdQcsbesaz2N4QCFS34UNNX3Dbldk,28842
72
+ monarch/common/tensor.py,sha256=G26E8-qv7HnjZfz3Ka5a-u3vb6DadcDChOn6wpjkeZo,29273
73
73
  monarch/common/tensor_factory.py,sha256=qm8NZx-5ezMAFjNLiXQvb66okm5XgdboB_GRarGOdN0,801
74
74
  monarch/common/tree.py,sha256=1DG3siiE7ixBV6v5cwN8RT_17aJhYZTE-L3i7wZe2_c,2282
75
75
  monarch/controller/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
@@ -106,11 +106,12 @@ monarch/timer/example_spmd.py,sha256=p8i3_tO1AmpwSkZryiSjgkh7qaEZ6QXp2Fy1qtPpECA
106
106
  monarch/timer/execution_timer.py,sha256=1YsrLIZirdohKOeFAU2H4UcONhQXHuctJbYcoX8I6gY,6985
107
107
  monarch/timer/execution_timer_test.py,sha256=CSxTv44fFZQURJlCBmYvysQI1aS_zEGZs_uxl9SOHak,4486
108
108
  monarch/tools/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
109
- monarch/tools/cli.py,sha256=66F7dr90bh27P3kOCmxwJkVmWv2v4wBrkifvwqwUwFE,4967
109
+ monarch/tools/cli.py,sha256=EIdarsfuFX0WqRCe29_5GNKWJBhxx0lABalw3zPSagw,4977
110
110
  monarch/tools/commands.py,sha256=BfmXndJmU_cZP4cMPlknkxGca1NjqYd8_ReDePWksXw,6908
111
111
  monarch/tools/mesh_spec.py,sha256=JLykhgy1dClXiNbH1Qsl2fX5MbqplQAhl8LGoragvbo,3702
112
+ monarch/tools/network.py,sha256=bRj-jOs5qDqnM3BcE9MSXCLS01hiMN4YSWfKZ_d7bc4,2182
112
113
  monarch/tools/components/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
113
- monarch/tools/components/hyperactor.py,sha256=h0gy3QYZD-YJ7FHppJgbTKe4zOuNjUCGZqRlkwwGkhg,2012
114
+ monarch/tools/components/hyperactor.py,sha256=Ryi1X07VLcaQVlpc4af65JNBbZtOb9IAlKxSKMZ1AW4,2120
114
115
  monarch/tools/config/__init__.py,sha256=OPSflEmJB2zxAaRVzzWSWXV5M5vlknLgpulGdW1ze5U,510
115
116
  monarch/tools/config/defaults.py,sha256=34a3HQhyXqt9qR2SYMVCROoNsnwk37rIwLXXiKwqtog,1894
116
117
  monarch/worker/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
@@ -131,9 +132,9 @@ monarch_supervisor/python_executable.py,sha256=WfCiK3wdAvm9Jxx5jgjGF991NgGc9-oHU
131
132
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
132
133
  tests/dispatch_bench.py,sha256=sU_m-8KAjQgYTsxI5khV664NdgLLutidni69Rtowk98,3933
133
134
  tests/dispatch_bench_helper.py,sha256=1ORgAMrRgjAjmmWeCHLLQd_bda9mJk0rS2ucEbRu28s,633
134
- tests/error_test_binary.py,sha256=64H-ucdkQ2i7GD8sidStl227cOy7gyeqvO4kTm1y7Ic,4817
135
+ tests/error_test_binary.py,sha256=BRj13wAROsUWx4jcxc07HYN2n-xyBNhnnRAhjqah-A0,5582
135
136
  tests/sleep_binary.py,sha256=XfLYaAfwm9xgzM-svs8fhAeFhwYIg6SyVEnx4e6wbUw,1009
136
- tests/test_actor_error.py,sha256=z3Sf4lteUggTryPLOhRKJ55v0MwVK3a7QN7-U2U9iJg,7484
137
+ tests/test_actor_error.py,sha256=-0UJCEpyzsBh-RdbGhDiG1-sRtu7bJPQWmtjUD0ad48,8526
137
138
  tests/test_alloc.py,sha256=D6DdQbtOZEvvnnc7LV-WyWFMk0Xb77eblH6Oz90zJTA,745
138
139
  tests/test_allocator.py,sha256=P11sQ95ADjzC_-CfPs3CEP80nP8sn7wW8vVPsmpSVoM,8164
139
140
  tests/test_coalescing.py,sha256=JZ4YgQNlWWs7N-Z8KCCXQPANcuyyXEKjeHIXYbPnQhk,15606
@@ -144,7 +145,7 @@ tests/test_future.py,sha256=cXzaNi2YDwVyjR541ScXmgktX1YFsKzbl8wep0DMVbk,3032
144
145
  tests/test_grad_generator.py,sha256=p4Pm4kMEeGldt2jUVAkGKCB0mLccKI28pltH6OTGbQA,3412
145
146
  tests/test_mock_cuda.py,sha256=5hisElxeLJ5MHw3KM9gwxBiXiMaG-Rm382u3AsQcDOI,3068
146
147
  tests/test_pdb_actor.py,sha256=5KJhuhcZDPWMdjC6eAtDdwnz1W7jNFXvIrMSFaCWaPw,3858
147
- tests/test_python_actors.py,sha256=MzGeuhGVICzwiNaQt8SFCKyfwhNzdRzZ4s2rJxYbeoo,17283
148
+ tests/test_python_actors.py,sha256=ls0x_ie4i9KLuouecfxG_fHHZSZc2g_mQSAPJg70pgw,18949
148
149
  tests/test_remote_functions.py,sha256=5nxYB8dfA9NT9f9Od9O3htgQtPbiRNiXZ1Kgtn75sOQ,50056
149
150
  tests/test_rust_backend.py,sha256=94S3R995ZkyIhEiBsM5flcjf5X7bscEAHBtInbTRFe8,7776
150
151
  tests/test_signal_safe_block_on.py,sha256=bmal0XgzJowZXJV6T1Blow5a-vZluYWusCThLMGxyTE,3336
@@ -154,9 +155,9 @@ tests/simulator/test_profiling.py,sha256=TGYCfzTLdkpIwnOuO6KApprmrgPIRQe60KRX3wk
154
155
  tests/simulator/test_simulator.py,sha256=LO8lA0ssY-OGEBL5ipEu74f97Y765TEwfUOv-DtIptM,14568
155
156
  tests/simulator/test_task.py,sha256=ipqBDuDAysuo1xOB9S5psaFvwe6VATD43IovCTSs0t4,2327
156
157
  tests/simulator/test_worker.py,sha256=QrWWIJ3HDgDLkBPRc2mwYPlOQoXQcj1qRfc0WUfKkFY,3507
157
- torchmonarch_nightly-2025.6.16.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
158
- torchmonarch_nightly-2025.6.16.dist-info/METADATA,sha256=KwDmmYW1hUyjyax5yF9TE1Tk7JvYmylSpIe4e2T_aXI,2772
159
- torchmonarch_nightly-2025.6.16.dist-info/WHEEL,sha256=_wZSFk0d90K9wOBp8Q-UGxshyiJ987JoPiyUBNC6VLk,104
160
- torchmonarch_nightly-2025.6.16.dist-info/entry_points.txt,sha256=sqfQ16oZqjEvttUI-uj9BBXIIE6jt05bYFSmy-2hyXI,106
161
- torchmonarch_nightly-2025.6.16.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
162
- torchmonarch_nightly-2025.6.16.dist-info/RECORD,,
158
+ torchmonarch_nightly-2025.6.18.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
159
+ torchmonarch_nightly-2025.6.18.dist-info/METADATA,sha256=lPDac3GQrS5MmEp41wt6YCWHIluJzBgFfPY37x0cKJM,2772
160
+ torchmonarch_nightly-2025.6.18.dist-info/WHEEL,sha256=_wZSFk0d90K9wOBp8Q-UGxshyiJ987JoPiyUBNC6VLk,104
161
+ torchmonarch_nightly-2025.6.18.dist-info/entry_points.txt,sha256=sqfQ16oZqjEvttUI-uj9BBXIIE6jt05bYFSmy-2hyXI,106
162
+ torchmonarch_nightly-2025.6.18.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
163
+ torchmonarch_nightly-2025.6.18.dist-info/RECORD,,