torchmonarch-nightly 2025.6.5__cp310-cp310-manylinux2014_x86_64.whl → 2025.6.7__cp310-cp310-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
monarch/_rust_bindings.so CHANGED
Binary file
monarch/actor_mesh.py CHANGED
@@ -39,7 +39,7 @@ from typing import (
39
39
  import monarch
40
40
  from monarch import ActorFuture as Future
41
41
 
42
- from monarch._rust_bindings.monarch_hyperactor.actor import PythonMessage
42
+ from monarch._rust_bindings.monarch_hyperactor.actor import PanicFlag, PythonMessage
43
43
  from monarch._rust_bindings.monarch_hyperactor.actor_mesh import PythonActorMesh
44
44
  from monarch._rust_bindings.monarch_hyperactor.mailbox import (
45
45
  Mailbox,
@@ -50,7 +50,7 @@ from monarch._rust_bindings.monarch_hyperactor.mailbox import (
50
50
  from monarch._rust_bindings.monarch_hyperactor.proc import ActorId
51
51
  from monarch._rust_bindings.monarch_hyperactor.shape import Point as HyPoint, Shape
52
52
  from monarch.common.pickle_flatten import flatten, unflatten
53
- from monarch.common.shape import MeshTrait, NDSlice, Shape
53
+ from monarch.common.shape import MeshTrait, NDSlice
54
54
 
55
55
  logger = logging.getLogger(__name__)
56
56
 
@@ -158,7 +158,7 @@ class _ActorMeshRefImpl:
158
158
  mailbox,
159
159
  hy_actor_mesh,
160
160
  hy_actor_mesh.shape,
161
- [cast(ActorId, hy_actor_mesh.get(i)) for i in range(len(shape.ndslice))],
161
+ [cast(ActorId, hy_actor_mesh.get(i)) for i in range(len(shape))],
162
162
  )
163
163
 
164
164
  @staticmethod
@@ -204,7 +204,7 @@ class _ActorMeshRefImpl:
204
204
  # The fix is to provide a first-class reference into Python, and always call "cast"
205
205
  # on it, including for load balanced requests.
206
206
  if selection == "choose":
207
- idx = _load_balancing_seed.randrange(len(self._shape.ndslice))
207
+ idx = _load_balancing_seed.randrange(len(self._shape))
208
208
  actor_rank = self._shape.ndslice[idx]
209
209
  self._mailbox.post(self._please_replace_me_actor_ids[actor_rank], message)
210
210
  return
@@ -223,9 +223,8 @@ class _ActorMeshRefImpl:
223
223
  else:
224
224
  raise ValueError(f"invalid selection: {selection}")
225
225
 
226
- @property
227
- def len(self) -> int:
228
- return len(self._shape.ndslice)
226
+ def __len__(self) -> int:
227
+ return len(self._shape)
229
228
 
230
229
 
231
230
  class Endpoint(Generic[P, R]):
@@ -258,7 +257,7 @@ class Endpoint(Generic[P, R]):
258
257
  return r.recv()
259
258
 
260
259
  def call_one(self, *args: P.args, **kwargs: P.kwargs) -> Future[R]:
261
- if self._actor_mesh.len != 1:
260
+ if len(self._actor_mesh) != 1:
262
261
  raise ValueError(
263
262
  f"Can only use 'call_one' on a single Actor but this actor has shape {self._actor_mesh._shape}"
264
263
  )
@@ -270,8 +269,8 @@ class Endpoint(Generic[P, R]):
270
269
  send(self, args, kwargs, port=p)
271
270
 
272
271
  async def process():
273
- results = [None] * self._actor_mesh.len
274
- for _ in range(self._actor_mesh.len):
272
+ results = [None] * len(self._actor_mesh)
273
+ for _ in range(len(self._actor_mesh)):
275
274
  rank, value = await r.recv()
276
275
  results[rank] = value
277
276
  call_shape = Shape(
@@ -292,7 +291,7 @@ class Endpoint(Generic[P, R]):
292
291
  p, r = port(self)
293
292
  # pyre-ignore
294
293
  send(self, args, kwargs, port=p)
295
- for _ in range(self._actor_mesh.len):
294
+ for _ in range(len(self._actor_mesh)):
296
295
  yield await r.recv()
297
296
 
298
297
  def broadcast(self, *args: P.args, **kwargs: P.kwargs) -> None:
@@ -346,6 +345,9 @@ class ValueMesh(MeshTrait, Generic[R]):
346
345
  for rank in self._shape.ranks():
347
346
  yield Point(rank, self._shape), self._values[rank]
348
347
 
348
+ def __len__(self):
349
+ return len(self._shape)
350
+
349
351
  @property
350
352
  def _ndslice(self) -> NDSlice:
351
353
  return self._shape.ndslice
@@ -460,12 +462,12 @@ class _Actor:
460
462
  def __init__(self) -> None:
461
463
  self.instance: object | None = None
462
464
  self.active_requests: asyncio.Queue[asyncio.Future[object]] = asyncio.Queue()
463
- self.complete_task: object | None = None
465
+ self.complete_task: asyncio.Task | None = None
464
466
 
465
467
  def handle(
466
- self, mailbox: Mailbox, message: PythonMessage
468
+ self, mailbox: Mailbox, message: PythonMessage, panic_flag: PanicFlag
467
469
  ) -> Optional[Coroutine[Any, Any, Any]]:
468
- return self.handle_cast(mailbox, 0, singleton_shape, message)
470
+ return self.handle_cast(mailbox, 0, singleton_shape, message, panic_flag)
469
471
 
470
472
  def handle_cast(
471
473
  self,
@@ -473,6 +475,7 @@ class _Actor:
473
475
  rank: int,
474
476
  shape: Shape,
475
477
  message: PythonMessage,
478
+ panic_flag: PanicFlag,
476
479
  ) -> Optional[Coroutine[Any, Any, Any]]:
477
480
  port = None
478
481
  try:
@@ -493,10 +496,10 @@ class _Actor:
493
496
  port.send("result", result)
494
497
  return None
495
498
 
496
- return self.run_async(ctx, self.run_task(port, result))
499
+ return self.run_async(ctx, self.run_task(port, result, panic_flag))
497
500
  except Exception as e:
498
501
  traceback.print_exc()
499
- s = ActorMeshRefCallFailedException(e)
502
+ s = ActorError(e)
500
503
 
501
504
  # The exception is delivered to exactly one of:
502
505
  # (1) our caller, (2) our supervisor
@@ -508,17 +511,17 @@ class _Actor:
508
511
  async def run_async(self, ctx, coroutine):
509
512
  _context.set(ctx)
510
513
  if self.complete_task is None:
511
- asyncio.create_task(self._complete())
514
+ self.complete_task = asyncio.create_task(self._complete())
512
515
  await self.active_requests.put(create_eager_task(coroutine))
513
516
 
514
- async def run_task(self, port, coroutine):
517
+ async def run_task(self, port, coroutine, panic_flag):
515
518
  try:
516
519
  result = await coroutine
517
520
  if port is not None:
518
521
  port.send("result", result)
519
522
  except Exception as e:
520
523
  traceback.print_exc()
521
- s = ActorMeshRefCallFailedException(e)
524
+ s = ActorError(e)
522
525
 
523
526
  # The exception is delivered to exactly one of:
524
527
  # (1) our caller, (2) our supervisor
@@ -526,6 +529,16 @@ class _Actor:
526
529
  port.send("exception", s)
527
530
  else:
528
531
  raise s from None
532
+ except BaseException as e:
533
+ # A BaseException can be thrown in the case of a Rust panic.
534
+ # In this case, we need a way to signal the panic to the Rust side.
535
+ # See [Panics in async endpoints]
536
+ try:
537
+ panic_flag.signal_panic(e)
538
+ except Exception:
539
+ # The channel might be closed if the Rust side has already detected the error
540
+ pass
541
+ raise
529
542
 
530
543
  async def _complete(self) -> None:
531
544
  while True:
@@ -653,7 +666,7 @@ class ActorMeshRef(MeshTrait):
653
666
  )
654
667
 
655
668
 
656
- class ActorMeshRefCallFailedException(Exception):
669
+ class ActorError(Exception):
657
670
  """
658
671
  Deterministic problem with the user's code.
659
672
  For example, an OOM resulting in trying to allocate too much GPU memory, or violating
monarch/bootstrap_main.py CHANGED
@@ -53,6 +53,9 @@ def invoke_main():
53
53
  record.levelno,
54
54
  )
55
55
 
56
+ if os.environ.get("MONARCH_ERROR_DURING_BOOTSTRAP_FOR_TESTING") == "1":
57
+ raise RuntimeError("Error during bootstrap for testing")
58
+
56
59
  # forward logs to rust tracing. Defaults to on.
57
60
  if os.environ.get("MONARCH_PYTHON_LOG_TRACING", "1") == "1":
58
61
  logging.root.addHandler(TracingForwarder())
Binary file
Binary file
monarch/rdma.py CHANGED
@@ -6,10 +6,7 @@
6
6
 
7
7
  import ctypes
8
8
 
9
- import traceback
10
-
11
9
  from dataclasses import dataclass
12
- from traceback import extract_tb, StackSummary
13
10
  from typing import cast, Dict, Optional, Tuple
14
11
 
15
12
  import torch
@@ -163,28 +160,3 @@ class RDMABuffer:
163
160
  src.numel(),
164
161
  )
165
162
  await RDMAManager.on_proc(self.proc_id).put.call_one(self.addr, offset, bytes)
166
-
167
-
168
- class ActorMeshRefCallFailedException(Exception):
169
- """
170
- Deterministic problem with the user's code.
171
- For example, an OOM resulting in trying to allocate too much GPU memory, or violating
172
- some invariant enforced by the various APIs.
173
- """
174
-
175
- def __init__(
176
- self,
177
- exception: Exception,
178
- message: str = "A remote service call has failed asynchronously.",
179
- ) -> None:
180
- self.exception = exception
181
- self.actor_mesh_ref_frames: StackSummary = extract_tb(exception.__traceback__)
182
- self.message = message
183
-
184
- def __str__(self) -> str:
185
- exe = str(self.exception)
186
- actor_mesh_ref_tb = "".join(traceback.format_list(self.actor_mesh_ref_frames))
187
- return (
188
- f"{self.message}\n"
189
- f"Traceback of where the service call failed (most recent call last):\n{actor_mesh_ref_tb}{type(self.exception).__name__}: {exe}"
190
- )
monarch/sim_mesh.py CHANGED
@@ -205,7 +205,7 @@ class Bootstrap:
205
205
  self.client_bootstrap_addr: str = (
206
206
  f"sim!unix!@client,{proxy_addr},unix!@system,{proxy_addr}"
207
207
  )
208
- bootstrap_simulator_backend(self.bootstrap_addr, world_size)
208
+ bootstrap_simulator_backend(self.bootstrap_addr, proxy_addr, world_size)
209
209
 
210
210
  self._simulator_client = SimulatorClient(proxy_addr)
211
211
  for i in range(num_meshes):
@@ -7,6 +7,8 @@
7
7
  import ctypes
8
8
  import sys
9
9
 
10
+ import click
11
+
10
12
  from monarch._rust_bindings.monarch_extension.panic import panicking_function
11
13
 
12
14
  from monarch.actor_mesh import Actor, endpoint
@@ -115,24 +117,33 @@ def _run_error_test(num_procs, sync_endpoint, endpoint_name):
115
117
  asyncio.run(run_test())
116
118
 
117
119
 
120
+ @click.group()
118
121
  def main():
119
- import argparse
122
+ pass
120
123
 
121
- parser = argparse.ArgumentParser()
122
- parser.add_argument("--num-procs", type=int)
123
- parser.add_argument("--sync-test-impl", type=bool)
124
- parser.add_argument("--sync-endpoint", type=bool)
125
- parser.add_argument("--endpoint-name", type=str)
126
- args = parser.parse_args()
127
124
 
125
+ @main.command("error-endpoint")
126
+ @click.option("--num-procs", type=int, required=True)
127
+ @click.option("--sync-test-impl", type=bool, required=True)
128
+ @click.option("--sync-endpoint", type=bool, required=True)
129
+ @click.option("--endpoint-name", type=str, required=True)
130
+ def error_endpoint(num_procs, sync_test_impl, sync_endpoint, endpoint_name):
128
131
  print(
129
- f"Running segfault test: {args.num_procs=} {args.sync_test_impl=} {args.sync_endpoint=}, {args.endpoint_name=}"
132
+ f"Running segfault test: {num_procs=} {sync_test_impl=} {sync_endpoint=}, {endpoint_name=}"
130
133
  )
131
134
 
132
- if args.sync_test_impl:
133
- _run_error_test_sync(args.num_procs, args.sync_endpoint, args.endpoint_name)
135
+ if sync_test_impl:
136
+ _run_error_test_sync(num_procs, sync_endpoint, endpoint_name)
134
137
  else:
135
- _run_error_test(args.num_procs, args.sync_endpoint, args.endpoint_name)
138
+ _run_error_test(num_procs, sync_endpoint, endpoint_name)
139
+
140
+
141
+ @main.command("error-bootstrap")
142
+ def error_bootstrap():
143
+ print("I actually ran")
144
+ sys.stdout.flush()
145
+
146
+ proc_mesh(gpus=4, env={"MONARCH_ERROR_DURING_BOOTSTRAP_FOR_TESTING": "1"}).get()
136
147
 
137
148
 
138
149
  if __name__ == "__main__":
tests/test_actor_error.py CHANGED
@@ -8,47 +8,36 @@ import importlib.resources
8
8
  import subprocess
9
9
 
10
10
  import pytest
11
- from monarch.actor_mesh import Actor, ActorMeshRefCallFailedException, endpoint
11
+ from monarch.actor_mesh import Actor, ActorError, endpoint
12
12
 
13
13
  from monarch.proc_mesh import proc_mesh
14
14
 
15
15
 
16
16
  class ExceptionActor(Actor):
17
- """An actor that has endpoints which raise exceptions."""
18
-
19
17
  @endpoint
20
18
  async def raise_exception(self) -> None:
21
- """Endpoint that raises an exception."""
22
19
  raise Exception("This is a test exception")
23
20
 
24
21
 
25
22
  class ExceptionActorSync(Actor):
26
- """An actor that has endpoints which raise exceptions."""
27
-
28
23
  @endpoint # pyre-ignore
29
24
  def raise_exception(self) -> None:
30
- """Endpoint that raises an exception."""
31
25
  raise Exception("This is a test exception")
32
26
 
33
27
 
34
28
  @pytest.mark.parametrize(
35
- "actor_class,actor_name",
36
- [
37
- (ExceptionActor, "exception_actor_async_call"),
38
- (ExceptionActorSync, "exception_actor_sync_call"),
39
- ],
29
+ "actor_class",
30
+ [ExceptionActor, ExceptionActorSync],
40
31
  )
41
32
  @pytest.mark.parametrize("num_procs", [1, 2])
42
- async def test_actor_exception(actor_class, actor_name, num_procs):
33
+ async def test_actor_exception(actor_class, num_procs):
43
34
  """
44
35
  Test that exceptions raised in actor endpoints are propagated to the client.
45
36
  """
46
37
  proc = await proc_mesh(gpus=num_procs)
47
- exception_actor = await proc.spawn(actor_name, actor_class)
38
+ exception_actor = await proc.spawn("exception_actor", actor_class)
48
39
 
49
- with pytest.raises(
50
- ActorMeshRefCallFailedException, match="This is a test exception"
51
- ):
40
+ with pytest.raises(ActorError, match="This is a test exception"):
52
41
  if num_procs == 1:
53
42
  await exception_actor.raise_exception.call_one()
54
43
  else:
@@ -56,23 +45,18 @@ async def test_actor_exception(actor_class, actor_name, num_procs):
56
45
 
57
46
 
58
47
  @pytest.mark.parametrize(
59
- "actor_class,actor_name",
60
- [
61
- (ExceptionActor, "exception_actor_async_call"),
62
- (ExceptionActorSync, "exception_actor_sync_call"),
63
- ],
48
+ "actor_class",
49
+ [ExceptionActor, ExceptionActorSync],
64
50
  )
65
51
  @pytest.mark.parametrize("num_procs", [1, 2])
66
- def test_actor_exception_sync(actor_class, actor_name, num_procs):
52
+ def test_actor_exception_sync(actor_class, num_procs):
67
53
  """
68
54
  Test that exceptions raised in actor endpoints are propagated to the client.
69
55
  """
70
56
  proc = proc_mesh(gpus=num_procs).get()
71
- exception_actor = proc.spawn(actor_name, actor_class).get()
57
+ exception_actor = proc.spawn("exception_actor", actor_class).get()
72
58
 
73
- with pytest.raises(
74
- ActorMeshRefCallFailedException, match="This is a test exception"
75
- ):
59
+ with pytest.raises(ActorError, match="This is a test exception"):
76
60
  if num_procs == 1:
77
61
  exception_actor.raise_exception.call_one().get()
78
62
  else:
@@ -85,25 +69,62 @@ def test_actor_exception_sync(actor_class, actor_name, num_procs):
85
69
  @pytest.mark.parametrize("sync_endpoint", [False, True])
86
70
  @pytest.mark.parametrize("sync_test_impl", [False, True])
87
71
  @pytest.mark.parametrize("endpoint_name", ["cause_segfault", "cause_panic"])
88
- def test_actor_segfault(num_procs, sync_endpoint, sync_test_impl, endpoint_name):
72
+ def test_actor_supervision(num_procs, sync_endpoint, sync_test_impl, endpoint_name):
89
73
  """
90
- Test that segfaults in actor endpoints result in a non-zero exit code.
91
- This test spawns a subprocess that will segfault and checks its exit code.
74
+ Test that an endpoint causing spontaenous process exit is handled by the supervisor.
92
75
 
93
- Tests both ExceptionActor and ExceptionActorSync using async API.
76
+ Today, these events are delivered to the client and cause the client process
77
+ to exit with a non-zero code, so the only way we can test it is via a
78
+ subprocess harness.
94
79
  """
95
80
  # Run the segfault test in a subprocess
96
81
  test_bin = importlib.resources.files("monarch.python.tests").joinpath("test_bin")
97
82
  cmd = [
98
83
  str(test_bin),
84
+ "error-endpoint",
99
85
  f"--num-procs={num_procs}",
100
86
  f"--sync-endpoint={sync_endpoint}",
101
87
  f"--sync-test-impl={sync_test_impl}",
102
88
  f"--endpoint-name={endpoint_name}",
103
89
  ]
104
- process = subprocess.run(cmd, capture_output=True, timeout=60)
105
- print(process.stdout.decode())
106
- print(process.stderr.decode())
90
+ try:
91
+ process = subprocess.run(cmd, capture_output=True, timeout=180)
92
+ except subprocess.TimeoutExpired as e:
93
+ print("timeout expired")
94
+ if e.stdout is not None:
95
+ print(e.stdout.decode())
96
+ if e.stderr is not None:
97
+ print(e.stderr.decode())
98
+ raise
99
+
100
+ # Assert that the subprocess exited with a non-zero code
101
+ assert "I actually ran" in process.stdout.decode()
102
+ assert (
103
+ process.returncode != 0
104
+ ), f"Expected non-zero exit code, got {process.returncode}"
105
+
106
+
107
+ # oss_skip: importlib not pulling resource correctly in git CI, needs to be revisited
108
+ @pytest.mark.oss_skip
109
+ def test_proc_mesh_bootstrap_error():
110
+ """
111
+ Test that attempts to spawn a ProcMesh with a failure during bootstrap.
112
+ """
113
+ # Run the segfault test in a subprocess
114
+ test_bin = importlib.resources.files("monarch.python.tests").joinpath("test_bin")
115
+ cmd = [
116
+ str(test_bin),
117
+ "error-bootstrap",
118
+ ]
119
+ try:
120
+ process = subprocess.run(cmd, capture_output=True, timeout=180)
121
+ except subprocess.TimeoutExpired as e:
122
+ print("timeout expired")
123
+ if e.stdout is not None:
124
+ print(e.stdout.decode())
125
+ if e.stderr is not None:
126
+ print(e.stderr.decode())
127
+ raise
107
128
 
108
129
  # Assert that the subprocess exited with a non-zero code
109
130
  assert "I actually ran" in process.stdout.decode()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: torchmonarch-nightly
3
- Version: 2025.6.5
3
+ Version: 2025.6.7
4
4
  Summary: Monarch: Single controller library
5
5
  Author: Meta
6
6
  Author-email: oncall+monarch@xmail.facebook.com
@@ -60,13 +60,13 @@ rustup default nightly
60
60
  # Install non-python dependencies
61
61
  conda install libunwind -y
62
62
 
63
- # Install the correct cuda and cuda-toolkit versions for your machine, as well as NCCL-dev
64
- sudo dnf install cuda-toolkit-12-0 cuda-12-0 libnccl-devel
63
+ # Install the correct cuda and cuda-toolkit versions for your machine
64
+ sudo dnf install cuda-toolkit-12-0 cuda-12-0
65
65
 
66
- # Install clang dev
67
- sudo dnf install clang-devel
68
- # In some envrionments, the following may be necessary instead
69
- conda install conda-forge::clangdev
66
+ # Install clang-dev and nccl-dev
67
+ sudo dnf install clang-devel libnccl-devel
68
+ # Or, in some envrionments, the following may be necessary instead
69
+ conda install -c conda-forge clangdev nccl
70
70
  conda update -n monarchenv --all -c conda-forge -y
71
71
 
72
72
  # Install build dependencies
@@ -1,15 +1,15 @@
1
1
  monarch/__init__.py,sha256=iUvWHc0-7Q2tovRoRxOIiA3TsefMXCbWl-jEfQ2djew,6897
2
- monarch/_rust_bindings.so,sha256=8U1_usPpweGa-Q2Q_8SqsFgxRFb3UoKCVPwshobKk_8,39091976
2
+ monarch/_rust_bindings.so,sha256=seyf4m6FoGBVbC4DBiG2dJdoSqsVRTWTbH9KeBvc1Is,39128520
3
3
  monarch/_testing.py,sha256=MN8DK1e-wzV0-R_nFW1b_7-O5oKfWvZ12BMGD4Z7PQk,6755
4
- monarch/actor_mesh.py,sha256=kfky0QPtji5yDaZ_vVIbWncL7M_parqoxzECvZNc2_c,22301
4
+ monarch/actor_mesh.py,sha256=_IVpdQ1HIC5JumB7cwcamdeWREPKqeMYjiz7NOV9Klw,22842
5
5
  monarch/allocator.py,sha256=_2DKFP9pSD33zDgH7xZJC8Tq7BQrCeQEUmMB7_xCT0Y,1784
6
- monarch/bootstrap_main.py,sha256=_LgEvfI_kFHj2QWH8CLRBQI1tbxS0uWrnHqwzOVbjeI,2417
6
+ monarch/bootstrap_main.py,sha256=SYTOz-pTXiJNk78PPD5HAOJDSb8t2JfitRWdmWB3ogo,2559
7
7
  monarch/cached_remote_function.py,sha256=kYdB6r4OHx_T_uX4q3tCNcp1t2DJwF8tPTIahUiT2pU,8785
8
8
  monarch/fetch.py,sha256=61jxo7sx4QNUTkc0_rF5NaJROen4tKbAaiIjrXWLOvg,1705
9
9
  monarch/future.py,sha256=lcdFEe7m1shYPPuvZ1RkS6JUIChEKGBWe3v7x_nu4Hg,731
10
10
  monarch/gradient_generator.py,sha256=Rl3dmXGceTdCc1mYBg2JciR88ywGPnW7TVkL86KwqEA,6366
11
11
  monarch/memory.py,sha256=ol86dBhFAJqg78iF25-BuK0wuwj1onR8FIioZ_B0gjw,1377
12
- monarch/monarch_controller,sha256=3LmdS4fndvO_lkXcuOVzNZH-HvkTe8oWfQaLVb-om3w,20692200
12
+ monarch/monarch_controller,sha256=ptqqARjqzjjVP0R-1ZPAd0y_K1-0XHFQhE-HR4J6MOo,20389704
13
13
  monarch/notebook.py,sha256=zu9MKDFKf1-rCM2TqFSRJjMBeiWuKcJSyUFLvoZRQzs,25949
14
14
  monarch/opaque_module.py,sha256=oajOu_WD1hD4hxE8HDdO-tvWY7KDHWd7VaAhJEa5L2I,10446
15
15
  monarch/opaque_object.py,sha256=IVpll4pyuKZMo_EnPh4s0qnx8RlAcJrJ1yoLX6E75wQ,2782
@@ -17,11 +17,11 @@ monarch/proc_mesh.py,sha256=sTMmwQLKqM0h-yY0mn8uSzOb9B_MX9DKWCI9EsyfD6s,6384
17
17
  monarch/profiler.py,sha256=TQ9fnVM8H7smBWtYdB_6Irtzz8DBOmcp7U1T3wlUmco,4911
18
18
  monarch/python_local_mesh.py,sha256=YsureIzR9uGlNVrKd4vRghxOXBeYabkt9lICRErfRAI,3536
19
19
  monarch/random.py,sha256=f9QR7Esu4Vxqxs-KCf5QYyVqlWvXJ3-UtG90L_h4j40,1527
20
- monarch/rdma.py,sha256=eWwYKurW-Y6j68m0xH8jeyE3bfmSgB5ZwM2j-RmbCHc,6397
20
+ monarch/rdma.py,sha256=1pNh11S_FWeETRgkdUpauTMUlodrRohIq1UfQjKVnN8,5418
21
21
  monarch/remote_class.py,sha256=-OAowzU1aDP6i4ik_SjXntVUC9h4dqAzgqwohkQ6Grc,4167
22
22
  monarch/rust_backend_mesh.py,sha256=1htC62of4MgFtkezWGlsxSFtKJdc0CIeqeSuOx7yu3M,9944
23
23
  monarch/rust_local_mesh.py,sha256=7ASptybn3wy4J7eoBc7LhGW4j4AA6bigl5Kuhyflw8s,47405
24
- monarch/sim_mesh.py,sha256=pJ4DDn35Y7CobsIwbkUefBbnEHaAc7Ro_7YQdNaP2Dg,12171
24
+ monarch/sim_mesh.py,sha256=9wkS99L0EpG2Gldi-nzA-3ww7z__DQ7Qp2uReMfn188,12183
25
25
  monarch/tensor_worker_main.py,sha256=Nbarl2sJKIddLeaRFsaUnqOerLHjzggUr9SqCr2_GYI,8300
26
26
  monarch/tensorboard.py,sha256=MnLgH5lbqeUJauEuirEgR6L_qYl2NGdtwZOWIAuOZao,2587
27
27
  monarch/world_mesh.py,sha256=GqZpFoVNJPxYa70rLYgv0vu8Vg1nXqx_GYERRb1E9Pc,975
@@ -77,7 +77,7 @@ monarch/controller/rust_backend/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTY
77
77
  monarch/controller/rust_backend/controller.py,sha256=-bZYE6u5sB9C0Cnc6NiBoBit9TvolKHRn05I-LUpB8I,9516
78
78
  monarch/gradient/__init__.py,sha256=kqmzwt16mMpk0M3GhpgP_f7da4DGnaV9chDzbt66k4Q,308
79
79
  monarch/gradient/_gradient_generator.pyi,sha256=6cX0UxaDt9NAlwgIhTgnweqGOf6qRhHiGnUzSWNCxdU,630
80
- monarch/gradient/_gradient_generator.so,sha256=povu68MOK7Yx1HHCCrYdLIK7bKnyjz-ZEXycpCsNYZU,11456608
80
+ monarch/gradient/_gradient_generator.so,sha256=RCslwjx2Ji9uqcA9M0IqnEsKSYAnS6NdExwyqfM71YA,11456536
81
81
  monarch/parallel/__init__.py,sha256=6920kIkhiX7AiyjYvyc1ad8ccP-bStJJ1sS5KkeN2P0,352
82
82
  monarch/parallel/pipelining/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
83
83
  monarch/parallel/pipelining/runtime.py,sha256=KK8TG1gUYEzSsquiZoPTWGSIC74mlncD7cYknKxfb3c,32470
@@ -127,9 +127,9 @@ monarch_supervisor/python_executable.py,sha256=WfCiK3wdAvm9Jxx5jgjGF991NgGc9-oHU
127
127
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
128
128
  tests/dispatch_bench.py,sha256=sU_m-8KAjQgYTsxI5khV664NdgLLutidni69Rtowk98,3933
129
129
  tests/dispatch_bench_helper.py,sha256=1ORgAMrRgjAjmmWeCHLLQd_bda9mJk0rS2ucEbRu28s,633
130
- tests/error_test_binary.py,sha256=r9-mm4eDqaJYnBo3gXcuqwhpYq1HeH6xem3a4p8rakI,4600
130
+ tests/error_test_binary.py,sha256=64H-ucdkQ2i7GD8sidStl227cOy7gyeqvO4kTm1y7Ic,4817
131
131
  tests/sleep_binary.py,sha256=XfLYaAfwm9xgzM-svs8fhAeFhwYIg6SyVEnx4e6wbUw,1009
132
- tests/test_actor_error.py,sha256=YBDS6BKwZqgKTFtydEJt4qwJGXRfWx3hgxup9ayVbhY,3827
132
+ tests/test_actor_error.py,sha256=K4buy0Z3MfCF7uSgIMRCpw7A2fTl3iRh8g_aNiJHnBU,4530
133
133
  tests/test_alloc.py,sha256=D6DdQbtOZEvvnnc7LV-WyWFMk0Xb77eblH6Oz90zJTA,745
134
134
  tests/test_coalescing.py,sha256=-KtAWzTaeXbyzltplfojavx0iFeeZnvej-tFTlu2p5k,15616
135
135
  tests/test_controller.py,sha256=yxuVp2DG3TDKJlwuE3cFm9dbWMlbrYtG1uHfvVWRYbw,30935
@@ -149,9 +149,9 @@ tests/simulator/test_profiling.py,sha256=TGYCfzTLdkpIwnOuO6KApprmrgPIRQe60KRX3wk
149
149
  tests/simulator/test_simulator.py,sha256=LO8lA0ssY-OGEBL5ipEu74f97Y765TEwfUOv-DtIptM,14568
150
150
  tests/simulator/test_task.py,sha256=ipqBDuDAysuo1xOB9S5psaFvwe6VATD43IovCTSs0t4,2327
151
151
  tests/simulator/test_worker.py,sha256=QrWWIJ3HDgDLkBPRc2mwYPlOQoXQcj1qRfc0WUfKkFY,3507
152
- torchmonarch_nightly-2025.6.5.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
153
- torchmonarch_nightly-2025.6.5.dist-info/METADATA,sha256=fZ_j4y-CPXYKHI6JBmYaUrSKsQBwDpgboeLy_QY8yj4,2768
154
- torchmonarch_nightly-2025.6.5.dist-info/WHEEL,sha256=_wZSFk0d90K9wOBp8Q-UGxshyiJ987JoPiyUBNC6VLk,104
155
- torchmonarch_nightly-2025.6.5.dist-info/entry_points.txt,sha256=sqfQ16oZqjEvttUI-uj9BBXIIE6jt05bYFSmy-2hyXI,106
156
- torchmonarch_nightly-2025.6.5.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
157
- torchmonarch_nightly-2025.6.5.dist-info/RECORD,,
152
+ torchmonarch_nightly-2025.6.7.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
153
+ torchmonarch_nightly-2025.6.7.dist-info/METADATA,sha256=8082irkhKa1D8z8Dq0GBZAfdQXh_JXcirAjEAI-A2_8,2771
154
+ torchmonarch_nightly-2025.6.7.dist-info/WHEEL,sha256=_wZSFk0d90K9wOBp8Q-UGxshyiJ987JoPiyUBNC6VLk,104
155
+ torchmonarch_nightly-2025.6.7.dist-info/entry_points.txt,sha256=sqfQ16oZqjEvttUI-uj9BBXIIE6jt05bYFSmy-2hyXI,106
156
+ torchmonarch_nightly-2025.6.7.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
157
+ torchmonarch_nightly-2025.6.7.dist-info/RECORD,,