PyPI - torchmonarch-nightly - Versions diffs - 2025.7.25__cp311-cp311-manylinux2014_x86_64.whl → 2025.7.26__cp311-cp311-manylinux2014_x86_64.whl - Mend

torchmonarch-nightly 2025.7.25__cp311-cp311-manylinux2014_x86_64.whl → 2025.7.26__cp311-cp311-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

monarch/_rust_bindings.so +0 -0
monarch/_src/actor/actor_mesh.py +39 -35
monarch/_src/actor/endpoint.py +36 -3
monarch/_src/actor/event_loop.py +1 -1
monarch/_src/actor/proc_mesh.py +17 -9
monarch/_src/actor/tensor_engine_shim.py +5 -2
monarch/actor/__init__.py +2 -0
monarch/common/messages.py +9 -0
monarch/common/remote.py +2 -2
monarch/gradient/_gradient_generator.so +0 -0
monarch/mesh_controller.py +76 -14
monarch/monarch_controller +0 -0
monarch/tools/cli.py +2 -2
monarch/tools/commands.py +49 -27
monarch/tools/components/hyperactor.py +5 -3
monarch/tools/config/__init__.py +18 -1
monarch/tools/config/defaults.py +2 -2
monarch/tools/mesh_spec.py +4 -1
tests/test_allocator.py +11 -15
tests/test_env_before_cuda.py +2 -3
tests/test_tensor_engine.py +27 -1
{torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/METADATA +34 -1
{torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/RECORD +27 -27
{torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/WHEEL +0 -0
{torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/entry_points.txt +0 -0
{torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/licenses/LICENSE +0 -0
{torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/top_level.txt +0 -0

monarch/_rust_bindings.so CHANGED Viewed

Binary file

monarch/_src/actor/actor_mesh.py CHANGED Viewed

@@ -65,6 +65,7 @@ from monarch._src.actor.endpoint import (
     Endpoint,
     EndpointProperty,
     Extent,
+    NotAnEndpoint,
     Propagator,
     Selection,
 )
@@ -76,7 +77,7 @@ from monarch._src.actor.pickle import flatten, unflatten
 from monarch._src.actor.shape import MeshTrait, NDSlice
 from monarch._src.actor.sync_state import fake_sync_state
-from monarch._src.actor.tensor_engine_shim import actor_send
+from monarch._src.actor.tensor_engine_shim import actor_rref, actor_send
 if TYPE_CHECKING:
     from monarch._src.actor.proc_mesh import ProcMesh
@@ -313,8 +314,7 @@ class ActorEndpoint(Endpoint[P, R]):
         """
         self._signature.bind(None, *args, **kwargs)
         objects, bytes = flatten((args, kwargs), _is_ref_or_mailbox)
-        refs = [obj for obj in objects if hasattr(obj, "__monarch_ref__")]
-        if not refs:
+        if all(not hasattr(obj, "__monarch_ref__") for obj in objects):
             message = PythonMessage(
                 PythonMessageKind.CallMethod(
                     self._name, None if port is None else port._port_ref
@@ -323,7 +323,7 @@ class ActorEndpoint(Endpoint[P, R]):
             )
             self._actor_mesh.cast(message, selection)
         else:
-            actor_send(self, bytes, refs, port, selection)
+            actor_send(self, bytes, objects, port, selection)
         shape = self._actor_mesh._shape
         return Extent(shape.labels, shape.ndslice.sizes)
@@ -335,6 +335,26 @@ class ActorEndpoint(Endpoint[P, R]):
             ), "unexpected receiver type"
         return PortTuple(p, PortReceiver(self._mailbox, self._supervise(r._receiver)))
+    def _rref(self, args, kwargs):
+        self._signature.bind(None, *args, **kwargs)
+        refs, bytes = flatten((args, kwargs), _is_ref_or_mailbox)
+        return actor_rref(self, bytes, refs)
+def as_endpoint(
+    not_an_endpoint: Callable[P, R], *, propagate: Propagator = None
+) -> Endpoint[P, R]:
+    if not isinstance(not_an_endpoint, NotAnEndpoint):
+        raise ValueError("expected an method of a spawned actor")
+    return ActorEndpoint(
+        not_an_endpoint._ref._actor_mesh_ref,
+        not_an_endpoint._name,
+        getattr(not_an_endpoint._ref, not_an_endpoint._name),
+        not_an_endpoint._ref._mailbox,
+        propagate,
+    )
 class Accumulator(Generic[P, R, A]):
     def __init__(
@@ -625,18 +645,23 @@ class _Actor:
                         f" This is likely due to an earlier error: {self._saved_error}"
                     )
                 raise AssertionError(error_message)
-            the_method = getattr(self.instance, method)._method
+            the_method = getattr(self.instance, method)
+            if isinstance(the_method, EndpointProperty):
+                module = the_method._method.__module__
+                the_method = functools.partial(the_method._method, self.instance)
+            else:
+                module = the_method.__module__
             if inspect.iscoroutinefunction(the_method):
                 async def instrumented():
                     enter_span(
-                        the_method.__module__,
+                        module,
                         method,
                         str(ctx.mailbox.actor_id),
                     )
                     try:
-                        result = await the_method(self.instance, *args, **kwargs)
+                        result = await the_method(*args, **kwargs)
                         self._maybe_exit_debugger()
                     except Exception as e:
                         logging.critical(
@@ -649,9 +674,9 @@ class _Actor:
                 result = await instrumented()
             else:
-                enter_span(the_method.__module__, method, str(ctx.mailbox.actor_id))
+                enter_span(module, method, str(ctx.mailbox.actor_id))
                 with fake_sync_state():
-                    result = the_method(self.instance, *args, **kwargs)
+                    result = the_method(*args, **kwargs)
                 self._maybe_exit_debugger()
                 exit_span()
@@ -758,35 +783,14 @@ class ActorMeshRef(MeshTrait):
                         attr_name,
                         attr_value._method,
                         self._mailbox,
+                        attr_value._propagator,
                     ),
                 )
-    def __getattr__(self, name: str) -> Any:
-        # This method is called when an attribute is not found
-        # For linting purposes, we need to tell the type checker that any attribute
-        # could be an endpoint that's dynamically added at runtime
-        # At runtime, we still want to raise AttributeError for truly missing attributes
-        # Check if this is a method on the underlying class
-        if hasattr(self._class, name):
-            attr = getattr(self._class, name)
-            if isinstance(attr, EndpointProperty):
-                # Dynamically create the endpoint
-                endpoint = ActorEndpoint(
-                    self._actor_mesh_ref,
-                    name,
-                    attr._method,
-                    self._mailbox,
-                    propagator=attr._propagator,
-                )
-                # Cache it for future use
-                setattr(self, name, endpoint)
-                return endpoint
-        # If we get here, it's truly not found
-        raise AttributeError(
-            f"'{self.__class__.__name__}' object has no attribute '{name}'"
-        )
+    def __getattr__(self, attr: str) -> NotAnEndpoint:
+        if attr in dir(self._class):
+            return NotAnEndpoint(self, attr)
+        raise AttributeError(attr)
     def _create(
         self,

monarch/_src/actor/endpoint.py CHANGED Viewed

@@ -34,6 +34,7 @@ from monarch._src.actor.tensor_engine_shim import _cached_propagation, fake_call
 if TYPE_CHECKING:
     from monarch._src.actor.actor_mesh import (
+        ActorMeshRef,
         HyPortReceiver,
         OncePortReceiver,
         Port,
@@ -182,11 +183,22 @@ class Endpoint(ABC, Generic[P, R]):
         # pyre-ignore
         send(self, args, kwargs)
+    @abstractmethod
+    def _rref(self, args, kwargs) -> Any: ...
+    def rref(self, *args: P.args, **kwargs: P.kwargs) -> R:
+        return self._rref(args, kwargs)
     def _propagate(self, args, kwargs, fake_args, fake_kwargs):
         if self._propagator_arg is None or self._propagator_arg == "cached":
             if self._cache is None:
                 self._cache = {}
-            return _cached_propagation(self._cache, self._resolvable, args, kwargs)
+            resolvable = getattr(self, "_resolvable", None)
+            if resolvable is None:
+                raise NotImplementedError(
+                    "Cached propagation is not implemented for actor endpoints."
+                )
+            return _cached_propagation(self._cache, resolvable, args, kwargs)
         elif self._propagator_arg == "inspect":
             return None
         elif self._propagator_arg == "mocked":
@@ -229,13 +241,34 @@ class EndpointProperty(Generic[P, R]):
         return cast(Endpoint[P, R], self)
+class NotAnEndpoint:
+    """
+    Used as the dynamic value of functions on an ActorMeshRef that were not marked as endpoints.
+    This is used both to give a better error message (since we cannot prevent the type system from thinking they are methods),
+    and to provide the oppurtunity for someone to do endpoint(x.foo) on something that wasn't marked as an endpoint.
+    """
+    def __init__(self, ref: "ActorMeshRef", name: str):
+        self._ref = ref
+        self._name = name
+    def __call__(self, *args, **kwargs) -> None:
+        raise RuntimeError(
+            f"Actor {self._ref._class}.{self._name} is not annotated as an endpoint. To call it as one, add a @endpoint decorator to it, or directly wrap it in one as_endpoint(obj.method).call(...)"
+        )
 # This can't just be Callable because otherwise we are not
 # allowed to use type arguments in the return value.
 class EndpointIfy:
     @overload
-    def __call__(self, function: Callable[P, Awaitable[R]]) -> Endpoint[P, R]: ...
+    def __call__(
+        self, function: Callable[Concatenate[Any, P], Awaitable[R]]
+    ) -> Endpoint[P, R]: ...
     @overload
-    def __call__(self, function: Callable[P, R]) -> Endpoint[P, R]: ...
+    def __call__(
+        self, function: Callable[Concatenate[Any, P], R]
+    ) -> Endpoint[P, R]: ...
     def __call__(self, function: Any):
         pass

monarch/_src/actor/event_loop.py CHANGED Viewed

@@ -14,7 +14,7 @@ import logging
 import threading
 from typing import Optional
-from libfb.py.pyre import none_throws
+from pyre_extensions import none_throws
 logger = logging.getLogger(__name__)

monarch/_src/actor/proc_mesh.py CHANGED Viewed

@@ -43,7 +43,6 @@ from monarch._src.actor.actor_mesh import (
     Actor,
     ActorMeshRef,
     fake_sync_state,
-    MonarchContext,
 )
 from monarch._src.actor.allocator import LocalAllocator, ProcessAllocator, SimAllocator
@@ -89,7 +88,7 @@ class SetupActor(Actor):
     Typically used to setup the environment variables.
     """
-    def __init__(self, env: Callable[[MonarchContext], None]) -> None:
+    def __init__(self, env: Callable[[], None]) -> None:
         """
         Initialize the setup actor with the user defined setup method.
         """
@@ -100,8 +99,7 @@ class SetupActor(Actor):
         """
         Call the user defined setup method with the monarch context.
         """
-        ctx = MonarchContext.get()
-        self._setup_method(ctx)
+        self._setup_method()
 T = TypeVar("T")
@@ -114,7 +112,7 @@ except ImportError:
 async def _allocate_nonblocking(
-    alloc: Alloc, setup: Callable[[MonarchContext], None] | None = None
+    alloc: Alloc, setup: Callable[[], None] | None = None
 ) -> "ProcMesh":
     _proc_mesh = await HyProcMesh.allocate_nonblocking(alloc)
     if setup is None:
@@ -211,7 +209,7 @@ class ProcMesh(MeshTrait):
     @classmethod
     def from_alloc(
-        self, alloc: Alloc, setup: Callable[[MonarchContext], None] | None = None
+        self, alloc: Alloc, setup: Callable[[], None] | None = None
     ) -> Future["ProcMesh"]:
         """
         Allocate a process mesh according to the provided alloc.
@@ -219,7 +217,17 @@ class ProcMesh(MeshTrait):
         Arguments:
         - `alloc`: The alloc to allocate according to.
-        - `setup`: A lambda taking MonarchContext as param, can be used to setup env vars on the allocated mesh
+        - `setup`: An optional lambda function to configure environment variables on the allocated mesh.
+        Use the `current_rank()` method within the lambda to obtain the rank.
+        Example of a setup method to initialize torch distributed environment variables:
+        ```
+        def setup():
+            rank = current_rank()
+            os.environ["RANK"] = str(rank)
+            os.environ["WORLD_SIZE"] = str(len(rank.shape))
+            os.environ["LOCAL_RANK"] = str(rank["gpus"])
+        ```
         """
         return Future(
             impl=lambda: _allocate_nonblocking(alloc, setup),
@@ -428,7 +436,7 @@ async def proc_mesh_nonblocking(
     gpus: Optional[int] = None,
     hosts: int = 1,
     env: dict[str, str] | None = None,
-    setup: Callable[[MonarchContext], None] | None = None,
+    setup: Callable[[], None] | None = None,
 ) -> ProcMesh:
     if gpus is None:
         gpus = _local_device_count()
@@ -457,7 +465,7 @@ def proc_mesh(
     gpus: Optional[int] = None,
     hosts: int = 1,
     env: dict[str, str] | None = None,
-    setup: Callable[[MonarchContext], None] | None = None,
+    setup: Callable[[], None] | None = None,
 ) -> Future[ProcMesh]:
     return Future(
         impl=lambda: proc_mesh_nonblocking(

monarch/_src/actor/tensor_engine_shim.py CHANGED Viewed

@@ -19,7 +19,6 @@ time it is used.
 if TYPE_CHECKING:
     from monarch._src.actor.actor_mesh import ActorEndpoint, Port, Selection
-    from monarch._src.actor.endpoint import Endpoint
 def shim(fn=None, *, module=None):
@@ -48,8 +47,12 @@ def actor_send(
 ) -> None: ...
+@shim(module="monarch.mesh_controller")
+def actor_rref(endpoint, args_kwargs_tuple: bytes, refs: Sequence[Any]): ...
 @shim(module="monarch.common.remote")
-def _cached_propagation(_cache, rfunction: "Endpoint", args, kwargs) -> Any: ...
+def _cached_propagation(_cache, rfunction, args, kwargs) -> Any: ...
 @shim(module="monarch.common.fake")

monarch/actor/__init__.py CHANGED Viewed

@@ -12,6 +12,7 @@ from monarch._src.actor.actor_mesh import (
     Accumulator,
     Actor,
     ActorError,
+    as_endpoint,
     current_actor_name,
     current_rank,
     current_size,
@@ -35,6 +36,7 @@ __all__ = [
     "Actor",
     "ActorError",
     "current_actor_name",
+    "as_endpoint",
     "current_rank",
     "current_size",
     "endpoint",

monarch/common/messages.py CHANGED Viewed

@@ -435,6 +435,15 @@ class SendResultOfActorCall(NamedTuple):
     stream: tensor_worker.StreamRef
+class CallActorMethod(NamedTuple):
+    seq: int
+    result: object
+    broker_id: Tuple[str, int]
+    local_state: Sequence[Tensor | tensor_worker.Ref]
+    mutates: List[tensor_worker.Ref]
+    stream: tensor_worker.StreamRef
 class SplitComm(NamedTuple):
     dims: Dims
     device_mesh: DeviceMesh

monarch/common/remote.py CHANGED Viewed

@@ -157,7 +157,7 @@ class Remote(Generic[P, R], Endpoint[P, R]):
     def _maybe_resolvable(self):
         return None if self._remote_impl is None else self._resolvable
-    def rref(self, *args: P.args, **kwargs: P.kwargs) -> R:
+    def _rref(self, args, kwargs):
         return dtensor_dispatch(
             self._resolvable,
             self._propagate,
@@ -352,7 +352,7 @@ _miss = 0
 _hit = 0
-def _cached_propagation(_cache, rfunction: Endpoint, args, kwargs):
+def _cached_propagation(_cache, rfunction: ResolvableFunction, args, kwargs):
     tensors, shape_key = hashable_tensor_flatten(args, kwargs)
     # pyre-ignore
     inputs_group = TensorGroup([t._fake for t in tensors])

monarch/gradient/_gradient_generator.so CHANGED Viewed

Binary file

monarch/mesh_controller.py CHANGED Viewed

@@ -30,6 +30,7 @@ from monarch._rust_bindings.monarch_extension.client import (  # @manual=//monar
     WorldState,
 )
 from monarch._rust_bindings.monarch_extension.mesh_controller import _Controller
+from monarch._rust_bindings.monarch_extension.tensor_worker import Ref
 from monarch._rust_bindings.monarch_hyperactor.actor import (
     PythonMessage,
     PythonMessageKind,
@@ -44,10 +45,12 @@ from monarch._src.actor.endpoint import Selection
 from monarch._src.actor.shape import NDSlice
 from monarch.common import device_mesh, messages, stream
 from monarch.common.controller_api import TController
+from monarch.common.function import ResolvableFunction
 from monarch.common.invocation import Seq
 from monarch.common.messages import Referenceable, SendResultOfActorCall
 from monarch.common.stream import StreamRef
-from monarch.common.tensor import InputChecker, Tensor
+from monarch.common.tensor import dtensor_check, InputChecker, Tensor
+from monarch.common.tree import flatten
 from monarch.tensor_worker_main import _set_trace
 if TYPE_CHECKING:
@@ -265,17 +268,36 @@ class RemoteException(Exception):
             return "<exception formatting RemoteException>"
-def actor_send(
+def _cast_call_method_indirect(
     endpoint: ActorEndpoint,
+    selection: Selection,
+    client: MeshClient,
+    seq: Seq,
     args_kwargs_tuple: bytes,
     refs: Sequence[Any],
-    port: Optional[Port[Any]],
-    selection: Selection,
-):
+) -> Tuple[str, int]:
     unflatten_args = [
         UnflattenArg.PyObject if isinstance(ref, Tensor) else UnflattenArg.Mailbox
         for ref in refs
     ]
+    broker_id: Tuple[str, int] = client._mesh_controller.broker_id
+    actor_msg = PythonMessage(
+        PythonMessageKind.CallMethodIndirect(
+            endpoint._name, broker_id, seq, unflatten_args
+        ),
+        args_kwargs_tuple,
+    )
+    endpoint._actor_mesh.cast(actor_msg, selection)
+    return broker_id
+def actor_send(
+    endpoint: ActorEndpoint,
+    args_kwargs_tuple: bytes,
+    refs: Sequence[Any],
+    port: Optional[Port[Any]],
+    selection: Selection,
+):
     tensors = [ref for ref in refs if isinstance(ref, Tensor)]
     # we have some monarch references, we need to ensure their
     # proc_mesh matches that of the tensors we sent to it
@@ -284,7 +306,7 @@ def actor_send(
         if hasattr(t, "stream"):
             chosen_stream = t.stream
             break
-    with InputChecker(refs, lambda x: f"actor_call({x})") as checker:
+    with InputChecker(tensors, lambda x: f"actor_call({x})") as checker:
         checker.check_mesh_stream_local(device_mesh._active, chosen_stream)
         # TODO: move propagators into Endpoint abstraction and run the propagator to get the
         # mutates
@@ -300,8 +322,6 @@ def actor_send(
     client = cast(MeshClient, checker.mesh.client)
-    broker_id: Tuple[str, int] = client._mesh_controller.broker_id
     stream_ref = chosen_stream._to_ref(client)
     fut = (port, checker.mesh._ndslice) if port is not None else None
@@ -316,13 +336,9 @@ def actor_send(
     # The message to the generic actor tells it to first wait on the broker to get the local arguments
     # from the stream, then it will run the actor method, and send the result to response port.
-    actor_msg = PythonMessage(
-        PythonMessageKind.CallMethodIndirect(
-            endpoint._name, broker_id, ident, unflatten_args
-        ),
-        args_kwargs_tuple,
+    broker_id = _cast_call_method_indirect(
+        endpoint, selection, client, ident, args_kwargs_tuple, refs
     )
-    endpoint._actor_mesh.cast(actor_msg, selection)
     worker_msg = SendResultOfActorCall(ident, broker_id, tensors, [], stream_ref)
     client.send(checker.mesh._ndslice, worker_msg)
     # we have to ask for status updates
@@ -330,3 +346,49 @@ def actor_send(
     # enough work to count this future as finished,
     # and all potential errors have been reported
     client._request_status()
+def actor_rref(endpoint, args_kwargs_tuple: bytes, refs: Sequence[Any]):
+    chosen_stream = stream._active
+    fake_result, dtensors, mutates, mesh = dtensor_check(
+        endpoint._propagate,
+        cast(ResolvableFunction, endpoint._name),
+        refs,
+        {},
+        device_mesh._active,
+        chosen_stream,
+    )
+    assert mesh is not None
+    fake_result_dtensors, unflatten_result = flatten(
+        fake_result, lambda x: isinstance(x, torch.Tensor)
+    )
+    result_dtensors = tuple(
+        Tensor(fake, mesh, chosen_stream) for fake in fake_result_dtensors
+    )
+    seq = mesh.client.new_node(result_dtensors + mutates, dtensors)
+    assert all(t.ref is not None for t in result_dtensors)
+    assert all(t.ref is not None for t in mutates)
+    result = result_msg = unflatten_result(result_dtensors)
+    if len(result_dtensors) == 0:
+        result_msg = None
+    broker_id = _cast_call_method_indirect(
+        endpoint, "all", mesh.client, seq, args_kwargs_tuple, refs
+    )
+    # note the device mesh has to be defined regardles so the remote functions
+    # can invoke mesh.rank("...")
+    mesh.define_remotely()
+    mesh._send(
+        messages.CallActorMethod(
+            seq,
+            result_msg,
+            broker_id,
+            refs,
+            cast("List[Ref]", mutates),
+            stream._active._to_ref(mesh.client),
+        )
+    )
+    return result

monarch/monarch_controller CHANGED Viewed

Binary file

monarch/tools/cli.py CHANGED Viewed

@@ -86,9 +86,9 @@ class CreateCmd:
             else defaults.component_fn(config.scheduler)
         )
         component_args = component_args_from_cli(component_fn, args.component_args)
-        appdef = component_fn(**component_args)
+        config.appdef = component_fn(**component_args)
-        handle = create(config, appdef)
+        handle = create(config)
         print(handle)

monarch/tools/commands.py CHANGED Viewed

@@ -7,18 +7,19 @@
 # pyre-strict
 import argparse
+import asyncio
 import inspect
 import logging
 import os
-import time
-from datetime import timedelta
+from datetime import datetime, timedelta
 from typing import Any, Callable, Mapping, Optional, Union
+from monarch.tools.components.hyperactor import DEFAULT_NAME
 from monarch.tools.config import (  # @manual=//monarch/python/monarch/tools/config/meta:defaults
     Config,
     defaults,
 )
 from monarch.tools.mesh_spec import mesh_spec_from_metadata, ServerSpec
 from torchx.runner import Runner  # @manual=//torchx/runner:lib_core
 from torchx.specs import AppDef, AppDryRunInfo, AppState, CfgVal, parse_app_handle
@@ -83,7 +84,7 @@ def component_args_from_cli(
 def create(
     config: Config,
-    appdef: AppDef,
+    name: str = DEFAULT_NAME,
 ) -> Union[str, AppDryRunInfo]:
     """Creates a monarch server by submitting it as a job to the target scheduler.
@@ -94,7 +95,7 @@ def create(
         from monarch.tools.config import defaults
         config = defaults.config(scheduler="slurm")
-        appdef = defaults.component_fn(scheduler=config.scheduler)()
+        config.appdef = defaults.component_fn(scheduler=config.scheduler)()
         config.scheduler_args.update(
             {
@@ -105,7 +106,7 @@ def create(
         )
         config.dryrun = True
-        create(config, appdef)
+        create(config)
     Args:
@@ -114,6 +115,7 @@ def create(
         component_fn: a function that returns the AppDef (job def).
             If not provided, defaults to the configured default for the scheduler
             (in most cases ``monarch.tools.components.hyperactor.proc_mesh``)
+        name: the name of the job. If none, a default job name will be created.
     """
     scheduler: str = config.scheduler
     cfg: Mapping[str, CfgVal] = config.scheduler_args
@@ -122,6 +124,8 @@ def create(
     os.environ["TORCHX_CONTEXT_NAME"] = os.getenv("TORCHX_CONTEXT_NAME", "monarch")
     with torchx_runner() as runner:
+        appdef: AppDef = AppDef(name, config.appdef.roles, config.appdef.metadata)
         info = runner.dryrun(appdef, scheduler, cfg, config.workspace)
         info_json_fmt = AppDryRunInfo(
@@ -170,6 +174,8 @@ def info(server_handle: str) -> Optional[ServerSpec]:
         # null-guard since some schedulers do not fill replica_status
         if host_status := replica_status.get(role.name):
             spec.hostnames = [h.hostname for h in host_status]
+            # the mesh status is based on the "least progressive" replica status
+            spec.state = min(h.state for h in host_status)
         mesh_specs.append(spec)
@@ -211,6 +217,8 @@ async def server_ready(
     """
+    check_interval_seconds = check_interval.total_seconds()
+    start = datetime.now()
     while True:
         server_spec = info(server_handle)
@@ -220,42 +228,56 @@ async def server_ready(
         if server_spec.state <= AppState.PENDING:  # UNSUBMITTED or SUBMITTED or PENDING
             # NOTE: TorchX currently does not have async APIs so need to loop-on-interval
             # TODO maybe inverse exponential backoff instead of constant interval?
-            check_interval_seconds = check_interval.total_seconds()
-            logger.info(
-                "waiting for %s to be %s (current: %s), will check again in %g seconds...",
-                server_handle,
-                AppState.RUNNING,
-                server_spec.state,
-                check_interval_seconds,
+            print(
+                f"Waiting for {server_handle} to be {AppState.RUNNING} (current: {server_spec.state}); "
+                f"will check again in {check_interval_seconds} seconds. "
+                f"Total wait time: {datetime.now() - start}",
+                end="\r",
             )
-            time.sleep(check_interval_seconds)
+            await asyncio.sleep(check_interval_seconds)
             continue
-        else:
-            return server_spec
+        # check if hosts are allocated for all the meshes
+        if server_spec.state == AppState.RUNNING:
+            running = True
+            for mesh_spec in server_spec.meshes:
+                if mesh_spec.state <= AppState.PENDING:
+                    print(
+                        f"Job {server_handle} is running but waiting for mesh {mesh_spec.name} "
+                        f"to be {AppState.RUNNING} (current: {mesh_spec.state}); "
+                        f"will check again in {check_interval_seconds} seconds. "
+                        f"Total wait time: {datetime.now() - start}",
+                        end="\r",
+                    )
+                    running = False
+                    break
+            if not running:
+                await asyncio.sleep(check_interval_seconds)
+                continue
+        return server_spec
+# TODO: this API is overloaded. Ideally, we do not need config to get or an handle to create.
 async def get_or_create(
     name: str,
     config: Config,
-    appdef: AppDef,
     check_interval: timedelta = _5_SECONDS,
 ) -> ServerSpec:
-    """Waits for the server called `name` in the scheduler specified in the `config`
+    """Waits for the server based on identity `name` in the scheduler specified in the `config`
     to be ready (e.g. RUNNING). If the server is not found then this function creates one
-    per the `appdef` spec, and waits for the server to be ready before returning.
+    per the `config` spec, and waits for the server to be ready before returning.
     Usage:
     .. code-block:: python
-        import getpass
         from monarch.tools.config import defaults
-        USER = getpass.getuser()
         config = defaults.config(scheduler)
-        appdef = defaults.component_fn(config.scheduler)()
+        config.appdef = defaults.component_fn(config.scheduler)()
-        server_handle = get_or_create(f"{USER}_monarch", config, appdef)
+        server_handle = get_or_create(name="my_job_name", config)
         server_info = info(server_handle)
     Returns: A `ServerSpec` containing information about either the existing or the newly
@@ -273,7 +295,7 @@ async def get_or_create(
         )
         # no dryrun (see assertion above) support so will always be a handle (str)
-        new_server_handle = str(create(config, appdef))
+        new_server_handle = str(create(config, name))
         logger.info(f"created new `{new_server_handle}` waiting for it to be ready...")
@@ -289,10 +311,10 @@ async def get_or_create(
                 f"the new server `{new_server_handle}` has {server_info.state}"
             )
-        logger.info(f"server `{new_server_handle}` is: {server_info.state}")
+        print(f"\x1b[36mNew job `{new_server_handle}` is ready to serve. \x1b[0m")
         return server_info
     else:
-        logger.info("found existing RUNNING server `%s`", server_handle)
+        print(f"\x1b[36mFound existing job `{server_handle}` ready to serve. \x1b[0m")
         return server_info

monarch/tools/components/hyperactor.py CHANGED Viewed

@@ -9,6 +9,7 @@ import getpass
 from typing import Optional
 from monarch.tools import mesh_spec
+from monarch.tools.config import UnnamedAppDef
 from monarch.tools.mesh_spec import mesh_spec_from_str
 from torchx import specs
@@ -16,17 +17,18 @@ _DEFAULT_MESHES = ["mesh_0:1:gpu.small"]
 _USER: str = getpass.getuser()
+DEFAULT_NAME: str = f"monarch-{_USER}"
 __version__ = "latest"  # TODO get version from monarch.__version_
 def proc_mesh(
-    name: str = f"monarch-{_USER}",
     image: str = f"ghcr.io/pytorch-labs/monarch:{__version__}",  # TODO docker needs to be built and pushed to ghcr
     meshes: list[str] = _DEFAULT_MESHES,
     env: Optional[dict[str, str]] = None,
     port: int = mesh_spec.DEFAULT_REMOTE_ALLOCATOR_PORT,
     program: str = "monarch_bootstrap",  # installed with monarch wheel (as console script)
-) -> specs.AppDef:
+) -> UnnamedAppDef:
     """
     Args:
         name: the name of the monarch server job
@@ -37,7 +39,7 @@ def proc_mesh(
         program: path to the binary that the remote process allocator spawns on an allocation request
     """
-    appdef = specs.AppDef(name)
+    appdef = UnnamedAppDef()
     for mesh in [mesh_spec_from_str(mesh) for mesh in meshes]:
         mesh_role = specs.Role(

monarch/tools/config/__init__.py CHANGED Viewed

@@ -6,15 +6,32 @@
 # pyre-strict
 from dataclasses import dataclass, field
-from typing import Any, Optional
+from typing import Any, Dict, List, Optional
+from torchx.specs import Role
 NOT_SET: str = "__NOT_SET__"
+@dataclass
+class UnnamedAppDef:
+    """
+    A TorchX AppDef without a name.
+    """
+    roles: List[Role] = field(default_factory=list)
+    metadata: Dict[str, str] = field(default_factory=dict)
 @dataclass
 class Config:
+    """
+    All configs needed to schedule a mesh of allocators.
+    """
     scheduler: str = NOT_SET
     scheduler_args: dict[str, Any] = field(default_factory=dict)
     workspace: Optional[str] = None
     dryrun: bool = False
+    appdef: UnnamedAppDef = UnnamedAppDef()

monarch/tools/config/defaults.py CHANGED Viewed

@@ -11,7 +11,7 @@
 from typing import Callable, Optional
 from monarch.tools.components import hyperactor
-from monarch.tools.config import Config
+from monarch.tools.config import Config, UnnamedAppDef
 from torchx import specs
 from torchx.schedulers import (
@@ -23,7 +23,7 @@ from torchx.schedulers import (
 )
-def component_fn(scheduler: str) -> Callable[..., specs.AppDef]:
+def component_fn(scheduler: str) -> Callable[..., UnnamedAppDef]:
     """The default TorchX component function for the scheduler"""
     return hyperactor.proc_mesh

monarch/tools/mesh_spec.py CHANGED Viewed

@@ -9,6 +9,8 @@ import string
 from dataclasses import dataclass, field
 from typing import Any, Optional
+from monarch.tools.config import UnnamedAppDef
 from monarch.tools.network import get_sockaddr
 from torchx import specs
 from torchx.specs.api import is_terminal
@@ -39,6 +41,7 @@ class MeshSpec:
     transport: str = "tcp"
     port: int = DEFAULT_REMOTE_ALLOCATOR_PORT
     hostnames: list[str] = field(default_factory=list)
+    state: specs.AppState = specs.AppState.UNSUBMITTED
     def server_addrs(
         self, transport: Optional[str] = None, port: Optional[int] = None
@@ -69,7 +72,7 @@ def _tag(mesh_name: str, tag_template: str) -> str:
     return string.Template(tag_template).substitute(mesh_name=mesh_name)
-def tag_as_metadata(mesh_spec: MeshSpec, appdef: specs.AppDef) -> None:
+def tag_as_metadata(mesh_spec: MeshSpec, appdef: UnnamedAppDef) -> None:
     appdef.metadata[_tag(mesh_spec.name, _TAG_HOST_TYPE)] = mesh_spec.host_type
     appdef.metadata[_tag(mesh_spec.name, _TAG_GPUS)] = str(mesh_spec.gpus)
     appdef.metadata[_tag(mesh_spec.name, _TAG_TRANSPORT)] = mesh_spec.transport

tests/test_allocator.py CHANGED Viewed

@@ -33,7 +33,6 @@ from monarch._rust_bindings.monarch_hyperactor.channel import (
     ChannelTransport,
 )
-from monarch._src.actor.actor_mesh import MonarchContext
 from monarch._src.actor.allocator import (
     ALLOC_LABEL_PROC_MESH_NAME,
     LocalAllocator,
@@ -160,7 +159,7 @@ class TestSetupActorInAllocator(unittest.IsolatedAsyncioTestCase):
             "TEST_ENV_VAR_3": "value_3",
         }
-        def setup_multiple_env_vars(ctx: MonarchContext) -> None:
+        def setup_multiple_env_vars() -> None:
             for name, value in env_vars.items():
                 os.environ[name] = value
@@ -184,36 +183,33 @@ class TestSetupActorInAllocator(unittest.IsolatedAsyncioTestCase):
             await proc_mesh.stop()
     async def test_setup_lambda_with_context_info(self) -> None:
-        """Test that the setup lambda can access context information"""
-        context_var_name: str = "PROC_MESH_CONTEXT_INFO"
+        """Test that the setup lambda can access rank information"""
+        context_var_name: str = "PROC_MESH_RANK_INFO"
-        def setup_with_context(ctx: MonarchContext) -> None:
-            context_info = f"proc_id:{ctx.proc_id},point_rank:{ctx.point.rank}"
+        def setup_with_rank() -> None:
+            context_info = f"point_rank:{current_rank().rank}"
             os.environ[context_var_name] = context_info
         spec = AllocSpec(AllocConstraints(), gpus=1, hosts=1)
         allocator = LocalAllocator()
         alloc = await allocator.allocate(spec)
-        proc_mesh = await ProcMesh.from_alloc(alloc, setup=setup_with_context)
+        proc_mesh = await ProcMesh.from_alloc(alloc, setup=setup_with_rank)
         try:
             actor = await proc_mesh.spawn("env_check", EnvCheckActor)
-            context_info = await actor.get_env_var.call_one(context_var_name)
+            rank_info = await actor.get_env_var.call_one(context_var_name)
             self.assertNotEqual(
-                context_info,
+                rank_info,
                 "NOT_SET",
                 "Context information was not stored in the environment variable",
             )
-            self.assertIn(
-                "proc_id:", context_info, "Context information does not contain proc_id"
-            )
             self.assertIn(
                 "point_rank:0",
-                context_info,
-                f"Context information {context_info} does not contain point_rank",
+                rank_info,
+                f"Context information {rank_info} does not contain point_rank",
             )
         finally:
             await proc_mesh.stop()
@@ -435,7 +431,7 @@ class TestRemoteAllocator(unittest.IsolatedAsyncioTestCase):
         test_var_name: str = "TEST_ENV_VAR_FOR_PROC_MESH"
         test_var_value: str = "test_value_123"
-        def setup_env_vars(ctx: MonarchContext) -> None:
+        def setup_env_vars() -> None:
             os.environ[test_var_name] = test_var_value
         hosts = 2

tests/test_env_before_cuda.py CHANGED Viewed

@@ -15,7 +15,6 @@ import cloudpickle
 import torch
 from monarch._rust_bindings.monarch_hyperactor.alloc import AllocConstraints, AllocSpec
-from monarch._src.actor.actor_mesh import MonarchContext
 from monarch._src.actor.allocator import LocalAllocator
 from monarch._src.actor.proc_mesh import proc_mesh
 from monarch.actor import Actor, endpoint, ProcMesh
@@ -70,7 +69,7 @@ class TestEnvBeforeCuda(unittest.IsolatedAsyncioTestCase):
             "CUDA_LAUNCH_BLOCKING": "1",
         }
-        def setup_cuda_env(_: MonarchContext) -> None:
+        def setup_cuda_env() -> None:
             for name, value in cuda_env_vars.items():
                 os.environ[name] = value
@@ -107,7 +106,7 @@ class TestEnvBeforeCuda(unittest.IsolatedAsyncioTestCase):
             "CUDA_DEVICE_MAX_CONNECTIONS": "1",
         }
-        def setup_cuda_env(_: MonarchContext) -> None:
+        def setup_cuda_env() -> None:
             for name, value in cuda_env_vars.items():
                 os.environ[name] = value

tests/test_tensor_engine.py CHANGED Viewed

@@ -8,7 +8,7 @@ import monarch
 import pytest
 import torch
 from monarch import remote
-from monarch.actor import Actor, endpoint, proc_mesh
+from monarch.actor import Actor, as_endpoint, endpoint, proc_mesh
 from monarch.mesh_controller import spawn_tensor_engine
@@ -104,3 +104,29 @@ def test_actor_tensor_ordering() -> None:
             results.append(counter.incr.call(1))
         assert list(range(10)) == [r.get().item(hosts=0, gpus=0) for r in results]
+class Linear(Actor):
+    def __init__(self, N: int, M: int):
+        self.weight = torch.zeros((N, M))
+    def forward(self, x) -> torch.Tensor:
+        return x @ self.weight
+    @endpoint(propagate="inspect")
+    def update(self, w: torch.Tensor) -> None:
+        self.weight += w
+@two_gpu
+def test_rref_actor() -> None:
+    pm = proc_mesh(gpus=1).get()
+    with pm.activate():
+        x = pm.spawn("linear", Linear, 3, 4).get()
+        y = torch.ones((4, 3))
+        t = as_endpoint(x.forward, propagate=lambda x: torch.rand(3, 4)).rref(y)
+        assert monarch.inspect(t.sum()).item() == 0
+        x.update.rref(torch.ones((3, 4)))
+        t = as_endpoint(x.forward, propagate=lambda x: torch.rand(3, 4)).rref(y)
+        assert monarch.inspect(t.sum()).item() == 3 * 4 * 4

{torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: torchmonarch-nightly
-Version: 2025.7.25
+Version: 2025.7.26
 Summary: Monarch: Single controller library
 Author: Meta
 Author-email: oncall+monarch@xmail.facebook.com
@@ -44,6 +44,8 @@ Note: Monarch is currently only supported on Linux systems
 ## Installation
+### On Fedora distributions
 `pip install torchmonarch-nightly`
 or manually
@@ -88,6 +90,37 @@ pip install --no-build-isolation -e .
 pytest python/tests/ -v -m "not oss_skip"
 ```
+### On MacOS
+You can also build Monarch to run locally on a MacOS system.
+Note that this does not support tensor engine, which is tied to CUDA and RDMA (via ibverbs).
+```sh
+# Create and activate the conda environment
+conda create -n monarchenv python=3.10 -y
+conda activate monarchenv
+# Install nightly rust toolchain
+curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
+rustup toolchain install nightly
+rustup default nightly
+# Install build dependencies
+pip install -r build-requirements.txt
+# Install test dependencies
+pip install -r python/tests/requirements.txt
+# Build and install Monarch
+USE_TENSOR_ENGINE=0 pip install --no-build-isolation .
+# or setup for development
+USE_TENSOR_ENGINE=0 pip install --no-build-isolation -e .
+```
 ## Running examples
 Check out the `examples/` directory for demonstrations of how to use Monarch's APIs.

{torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 monarch/__init__.py,sha256=mgKiyD1kxky-1pvhMlNfF4VmxWnhi-FSYZNFzkW1BEM,7052
-monarch/_rust_bindings.so,sha256=nHFmFZoCgxS1M40RBTn52oOVV9nR7eJ88hwz0jf_g6c,47433680
+monarch/_rust_bindings.so,sha256=pvUGrYVAW1JrVgti7IJsFSb7Va0vWTB95RvoOgAIXGE,47520848
 monarch/_testing.py,sha256=_3MYNMq-_0T1qXCj2vxrW13GlWGdUuVFMskQF2Gsw_o,7877
 monarch/actor_mesh.py,sha256=VtPU9syi_vUdwDSJJ639Z4Y_EcWZUScyoj0lQ88RQPs,421
 monarch/bootstrap_main.py,sha256=39OZpNMrfvvNJf-iwuNzgslzYA_ItaRPHfXGn_V74N0,524
@@ -7,8 +7,8 @@ monarch/cached_remote_function.py,sha256=kYdB6r4OHx_T_uX4q3tCNcp1t2DJwF8tPTIahUi
 monarch/fetch.py,sha256=JMxC8HdWMvpik0T4E6e-gfxvmNnOkA0ul4eo4R3Jg_o,1712
 monarch/gradient_generator.py,sha256=Rl3dmXGceTdCc1mYBg2JciR88ywGPnW7TVkL86KwqEA,6366
 monarch/memory.py,sha256=ol86dBhFAJqg78iF25-BuK0wuwj1onR8FIioZ_B0gjw,1377
-monarch/mesh_controller.py,sha256=uFAExVNzMGoeQI0kmuv4-kMbKHn420oeaG1rTYFi2sg,11884
-monarch/monarch_controller,sha256=bHrKU_rB52VnriwATjytMs8vmeLmZfFC04B7g0X9aLE,21202456
+monarch/mesh_controller.py,sha256=mOk2misobJun2AgR_ALjFoopAEcOPYQVrrAJXa18ZTs,13810
+monarch/monarch_controller,sha256=1gXdTNolxIUp_88alJnW1onBHseo5PzuNnjEnBte4wI,21243128
 monarch/notebook.py,sha256=zu9MKDFKf1-rCM2TqFSRJjMBeiWuKcJSyUFLvoZRQzs,25949
 monarch/opaque_module.py,sha256=jCcg0DjbcEVXA9WNG0NhUzGteLHOJLTZEBvrIYJIAns,10436
 monarch/opaque_object.py,sha256=x1LoX6RIMGh4ux52xIfhPgoh6PhZHdkf9bMccHW3DW0,2808
@@ -25,27 +25,27 @@ monarch/tensorboard.py,sha256=MnLgH5lbqeUJauEuirEgR6L_qYl2NGdtwZOWIAuOZao,2587
 monarch/world_mesh.py,sha256=ob5dJWaC49Uw0xqClHBm8CQLvL4xKnjd4TGzk7k8NxI,980
 monarch/_src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 monarch/_src/actor/__init__.py,sha256=4iK3nzQZmEPe0HGNY70fABBenw3lCVVaaF0xddF5Fa0,235
-monarch/_src/actor/actor_mesh.py,sha256=nG56lE6RwcNHeF4SUutf1NPmO8GQMaIjCjphCxf_pRU,29233
+monarch/_src/actor/actor_mesh.py,sha256=guYD9nZHguLGJAvTisc3Q664ASkupcNC6z9iheeGFUQ,29188
 monarch/_src/actor/allocator.py,sha256=WpHEK1SvjgF3GdIWIYUkonXli2-gQVKJVZPInl2RFQo,8212
 monarch/_src/actor/bootstrap_main.py,sha256=e5eU3GvX60MWWmCty7VcZrAmukD29iJdWBysNgQ2o3A,2342
 monarch/_src/actor/debugger.py,sha256=t2iAAxz03b2KZ89T3VjRc__7GzSf83R8gM81SDyX3-o,19532
 monarch/_src/actor/device_utils.py,sha256=gBpl23wMjppVAEzzj8U9HyX-B7Bs2_3ftiMAkzUS4j4,577
-monarch/_src/actor/endpoint.py,sha256=V8J4LVTnTFeo4HhkdhISmepzJOxdqbHe6cxwYkj9Qyg,8462
-monarch/_src/actor/event_loop.py,sha256=gpfbPoOhrY8W2Z5cDcujIfGI0NTdLrRycs7FmbsVatE,2830
+monarch/_src/actor/endpoint.py,sha256=jM3XYWze6gB6ajE4AMojNFSN4IaaxgioNAErJkkywjE,9721
+monarch/_src/actor/event_loop.py,sha256=2i4fKIkemBzua_t47BqVa2roZ6fWB6sbmMFPNx2zKN0,2832
 monarch/_src/actor/future.py,sha256=jOGh1wfwKyGJxhl9t1P8eapXYywf8KwQldZCCbupmb8,4042
 monarch/_src/actor/pdb_wrapper.py,sha256=-QxRktntdEO2LdHixBGKLboYtADyh8bEIAoa3gFwIEo,4161
 monarch/_src/actor/pickle.py,sha256=jD_3E07OJmMIlcMOOrNFnIuRKZU2F_Q_BP-njDFXUNM,2044
-monarch/_src/actor/proc_mesh.py,sha256=mbXgoUAnajKqe54LQSV3QfWii2b28-NxL4YmbpS4hxM,16368
+monarch/_src/actor/proc_mesh.py,sha256=amF4fbO-33qHFudlS9WabYXIVh0Y_D_0nhCTxvOhpGg,16640
 monarch/_src/actor/shape.py,sha256=B-7DI768ZhT8ECUNCJcI7DfCB7iDFGFH0r-HmXaAfcM,8296
 monarch/_src/actor/sync_state.py,sha256=GB6bTAGztkcN8fZ9K7zXklOzjYzv6cvkJeBje20xFkE,471
-monarch/_src/actor/tensor_engine_shim.py,sha256=r9ZK0ELVvn-cbs4LdP7IxO9KZaLV9p6q36ERbthjEd0,1568
+monarch/_src/actor/tensor_engine_shim.py,sha256=hupavQ2rjPwECaTlDAhY-eeiEY18Wyyx59MZHcSEcYM,1622
 monarch/_src/actor/code_sync/__init__.py,sha256=qzWoFNJEJvEbqab0QuHbkvhdz6FHi7BOTw6-2B3p0A4,378
 monarch/_src/actor/code_sync/auto_reload.py,sha256=kqXCQuSzjxMw8bcDLsUZiL_NImo4j2EScfNklwpltmU,6685
 monarch/_src/actor/telemetry/__init__.py,sha256=sHA5fmFdWU9jcUJVszNFhbXbjRSIBmuDXDMwJrrE0hw,523
 monarch/_src/actor/telemetry/rust_span_tracing.py,sha256=UvkywuwjQX7tIyLdKZbF-fcmI_aHporAejsTRTyJNNg,4445
 monarch/_src/tensor_engine/__init__.py,sha256=Md3cCHD7Ano9kV15PqGbicgUO-RMdh4aVy1yKiDt_xE,208
 monarch/_src/tensor_engine/rdma.py,sha256=KbhJXMuuHruYXnmxzB3BpkpcGsZ4hSu_7C6wF-EPhDk,6331
-monarch/actor/__init__.py,sha256=oQY131pVHg9iBnyZIi-zHvDturO6VuSlfnWktLwI3cE,1014
+monarch/actor/__init__.py,sha256=F87BC7owDdH_yRjLvMu6pbICbajndsEbtWG2W53Rapo,1050
 monarch/builtins/__init__.py,sha256=QcfnHZGbc2qktBg7DyZt2ruE6VahnIt4S8lEZLHdJqU,443
 monarch/builtins/log.py,sha256=H1QkuVzwxyi36Zyv-XR0VN0QsNimBWwxE1__fjs0_2o,554
 monarch/builtins/random.py,sha256=wPbvscg7u53EXpMFo885fO2XOlsyjrNAJ4rBxLzfxdg,1839
@@ -67,14 +67,14 @@ monarch/common/function_caching.py,sha256=HVdbWtv6Eea7ENMWi8iv36w1G1TaVuUJhkUX_J
 monarch/common/future.py,sha256=D1UJ_8Rvb8-VG9vNE-z7xz2m2otMd2HgB0rnA02nlvA,4681
 monarch/common/invocation.py,sha256=L4mSmzqlHMxo1Tb71hBU_M8aBZCRCOcb6vvPhvvewec,4195
 monarch/common/mast.py,sha256=XTzYljGR0aZ7GjmNMPgU2HyuL4HWSAy4IwE3kEDqdOw,7735
-monarch/common/messages.py,sha256=Xp2TdVhCN52qpSnDGzi1o813okYZ9-vY9mBxw7ZFGVg,18606
+monarch/common/messages.py,sha256=jwwJMVVx3gKd39AXcnRxjMr7lPJRLimHtZYel3zjq4o,18833
 monarch/common/mock_cuda.py,sha256=x6ho1Ton6BbKjBZ5ZxnFOUaQM032X70wnpoUNB7Ci2w,1039
 monarch/common/opaque_ref.py,sha256=tWNvOC6CsjNPKD1JDx-8PSaeXqZC3eermgBExUPKML4,2871
 monarch/common/pipe.py,sha256=9pTf8--3yOv4HpnJEhgcmc_JM6Az4uL1y72TSQA55dw,5013
 monarch/common/process_group.py,sha256=FbJ_AJRZYFkvQ68L2naRq64J_aNuAKe5kO0MWdn_x74,1662
 monarch/common/recording.py,sha256=Q39Zhb3kT52NCPf4VVMox2WXjtXju5eTuvPMZ_QGW7o,4660
 monarch/common/reference.py,sha256=O26lkzEeVwj0S1xEy-OLqdHVnACmmlbQCUmXRrW4n1Q,938
-monarch/common/remote.py,sha256=h0niT9wDHuRuIPwQYBa4QVM803BxA91ANjsnKH2ZyXc,12144
+monarch/common/remote.py,sha256=Q2YpAo_fsdh22ElUNruxYyn-cNTecZr8POcHCGtuKyg,12129
 monarch/common/selection.py,sha256=lpWFbZs3ArYy29e-53eoAVAjQFksf1RvZz9NvM0CUW4,308
 monarch/common/stream.py,sha256=_ejoxafHtdD10lLzznRCXKwrkZ_ZH9k_VTgiA5yfBrI,3583
 monarch/common/tensor.py,sha256=ysT51NClNF4FxV0DFLJJUNmCRaVy8uQuYWpLViyPLdY,29292
@@ -89,7 +89,7 @@ monarch/controller/rust_backend/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTY
 monarch/controller/rust_backend/controller.py,sha256=8IYnVUiqEVKO9rGL3vKqcCSAhWJG1bYYQ0MoaMqsp78,9521
 monarch/gradient/__init__.py,sha256=kqmzwt16mMpk0M3GhpgP_f7da4DGnaV9chDzbt66k4Q,308
 monarch/gradient/_gradient_generator.pyi,sha256=6cX0UxaDt9NAlwgIhTgnweqGOf6qRhHiGnUzSWNCxdU,630
-monarch/gradient/_gradient_generator.so,sha256=WyGDzhfswBpL-pzHSG_FQVgOS-ZMTTk34gswjFtt8ZU,11531728
+monarch/gradient/_gradient_generator.so,sha256=VyBW9SsE1IJ5iiiq1Su0BgTR7vDBnbaRQj7yjAtBPUs,11531728
 monarch/parallel/__init__.py,sha256=6920kIkhiX7AiyjYvyc1ad8ccP-bStJJ1sS5KkeN2P0,352
 monarch/parallel/pipelining/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
 monarch/parallel/pipelining/runtime.py,sha256=KK8TG1gUYEzSsquiZoPTWGSIC74mlncD7cYknKxfb3c,32470
@@ -115,15 +115,15 @@ monarch/timer/example_spmd.py,sha256=p8i3_tO1AmpwSkZryiSjgkh7qaEZ6QXp2Fy1qtPpECA
 monarch/timer/execution_timer.py,sha256=1YsrLIZirdohKOeFAU2H4UcONhQXHuctJbYcoX8I6gY,6985
 monarch/timer/execution_timer_test.py,sha256=CSxTv44fFZQURJlCBmYvysQI1aS_zEGZs_uxl9SOHak,4486
 monarch/tools/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
-monarch/tools/cli.py,sha256=rni8Et8_uMZLTKwwdqeFTia84pDwMh6Ne-IJx36LYsc,5002
-monarch/tools/commands.py,sha256=fU4EPnNx0M2iH4eLJsMqPDzIGl6t9ALSJc1szLHQP6Y,10821
-monarch/tools/mesh_spec.py,sha256=kAo_GcU0LOm6cBkbeBGBtU9WKKS0kiDG-M7Uf9Wrp7Y,7831
+monarch/tools/cli.py,sha256=b3mKZnK-MwP7JwskTxHI0KcJXxSU6498jEb2ntVr_VM,5001
+monarch/tools/commands.py,sha256=3xuvHcMwl0t6cWTVUxI_r8EqrJZnay0bkKxOijhlKrw,12126
+monarch/tools/mesh_spec.py,sha256=in6txNRmA-UvveVSMHCjX6mGpofd3K8vl2Plz1eD6rg,7935
 monarch/tools/network.py,sha256=mN8Fx9mervxM3VdFHRn4ZXt4z7yWxZp52BTxx2tfpus,2455
 monarch/tools/utils.py,sha256=2GGUQQE0dLtzoKy40_tAsOfbSxE6krnL0WvwMgUBgmw,1213
 monarch/tools/components/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
-monarch/tools/components/hyperactor.py,sha256=Ryi1X07VLcaQVlpc4af65JNBbZtOb9IAlKxSKMZ1AW4,2120
-monarch/tools/config/__init__.py,sha256=OPSflEmJB2zxAaRVzzWSWXV5M5vlknLgpulGdW1ze5U,510
-monarch/tools/config/defaults.py,sha256=34a3HQhyXqt9qR2SYMVCROoNsnwk37rIwLXXiKwqtog,1894
+monarch/tools/components/hyperactor.py,sha256=gYZS8AcmoTuq48mRrZWWnyxQqaiwTNHv8YqHhHi799U,2169
+monarch/tools/config/__init__.py,sha256=MLa6uvVJssN_zTciCvCMeCURWglchCuqE3zdqA-gh4U,869
+monarch/tools/config/defaults.py,sha256=ZymyKr9fNnBIgsV-xz-KrtrXRLkJo3hymTqxjXXnBzs,1910
 monarch/worker/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
 monarch/worker/_testing_function.py,sha256=A81cVMKgdlO66XvoYcBCDrxIQIm3o3GgvcH_c8M9OmI,13480
 monarch/worker/compiled_block.py,sha256=hYx1F6PAu0_BnpKAprP_nV9qJtk5XWO7mcwH3JPDioU,10114
@@ -146,12 +146,12 @@ tests/error_test_binary.py,sha256=cgdrnVI3SIzAFSRXTvASfiR8eKSMrZ7N3tSCLVkJo44,78
 tests/sleep_binary.py,sha256=XfLYaAfwm9xgzM-svs8fhAeFhwYIg6SyVEnx4e6wbUw,1009
 tests/test_actor_error.py,sha256=kEfj1XW_WPk2mplucFBuzCWU3UrvzwkKoHSLIZfyQr0,20945
 tests/test_alloc.py,sha256=IW7yJSaKxhOYc8SJtFyREakDUwiKWq9M0CGgYyBYHoc,743
-tests/test_allocator.py,sha256=gETCLy7kMNVo17gxR3rnMq8kdH5IwcF2tVher-fAOxU,29047
+tests/test_allocator.py,sha256=4LcUB4QRNGDp0qBWAyLM6ektmoxpO922f-NcHZziJ_w,28762
 tests/test_coalescing.py,sha256=JZ4YgQNlWWs7N-Z8KCCXQPANcuyyXEKjeHIXYbPnQhk,15606
 tests/test_controller.py,sha256=CIMb-ApmBcBj1eCqccDUAbVyyJWMGooAha5gQk0AoeY,31452
 tests/test_debugger.py,sha256=mtd_no7dAooBePoQ_TZOxtgzwd1-x6xkpzAFK1_Y8B8,13703
 tests/test_device_mesh.py,sha256=DrbezYOM0thfP9MgLXb5-F0VoLOmSz5GR0GwjR_3bE4,5290
-tests/test_env_before_cuda.py,sha256=w00oi9aP0tFuZtUFggzA9h6qWXXgLo1rN1GoLJZbG10,5531
+tests/test_env_before_cuda.py,sha256=K5zdpXNRZB8hXQJaTN_CftcGHb3vzzdKasu8KFUoiCg,5440
 tests/test_fault_tolerance.py,sha256=u4wmG1z5MZ6PY6us5zUZHJh2pUC3L7i0wsUfRDNHmxA,14144
 tests/test_future.py,sha256=cXzaNi2YDwVyjR541ScXmgktX1YFsKzbl8wep0DMVbk,3032
 tests/test_grad_generator.py,sha256=p4Pm4kMEeGldt2jUVAkGKCB0mLccKI28pltH6OTGbQA,3412
@@ -163,15 +163,15 @@ tests/test_remote_functions.py,sha256=VT65W7htp1jCsP9-AsiO1dofhx-4OebWEOssDEgi3G
 tests/test_rust_backend.py,sha256=3TLu8dSVEqyLhjHED2DoAEQHTpbBQcr3WI6K2eGZLZw,7861
 tests/test_signal_safe_block_on.py,sha256=bmal0XgzJowZXJV6T1Blow5a-vZluYWusCThLMGxyTE,3336
 tests/test_sim_backend.py,sha256=kT7MnPo5O9xxX8f7uZOpR9Tkuz5brjaOyK1g1NqHRlo,1398
-tests/test_tensor_engine.py,sha256=LIJOb6hPVCpgLJjjPlcH2MgLIyM1JG7d-qMFpIUvFuQ,2793
+tests/test_tensor_engine.py,sha256=_F70SQiUCRVZcbq5JcP5XkGJFnul57pqBpu1rF9kipE,3591
 tests/simulator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/simulator/test_profiling.py,sha256=TGYCfzTLdkpIwnOuO6KApprmrgPIRQe60KRX3wkB0sg,4565
 tests/simulator/test_simulator.py,sha256=LO8lA0ssY-OGEBL5ipEu74f97Y765TEwfUOv-DtIptM,14568
 tests/simulator/test_task.py,sha256=ipqBDuDAysuo1xOB9S5psaFvwe6VATD43IovCTSs0t4,2327
 tests/simulator/test_worker.py,sha256=QrWWIJ3HDgDLkBPRc2mwYPlOQoXQcj1qRfc0WUfKkFY,3507
-torchmonarch_nightly-2025.7.25.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
-torchmonarch_nightly-2025.7.25.dist-info/METADATA,sha256=sumslJxhBqMCVPa_3AoGQh7fY7dJHR6DhInBfTc6skE,3063
-torchmonarch_nightly-2025.7.25.dist-info/WHEEL,sha256=JC9FVdjbTDi9l3EyrqUd11CgmN9LkBi1g5dFHayafwA,104
-torchmonarch_nightly-2025.7.25.dist-info/entry_points.txt,sha256=60QVSpYVzkzS4iDOiLp0fsLxVp47X3J2l3v7W-59LMo,117
-torchmonarch_nightly-2025.7.25.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
-torchmonarch_nightly-2025.7.25.dist-info/RECORD,,
+torchmonarch_nightly-2025.7.26.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
+torchmonarch_nightly-2025.7.26.dist-info/METADATA,sha256=rJuOVCi7kVf2R9tHhtMSlaWv80ybWS5g9MvMzmso5M8,3852
+torchmonarch_nightly-2025.7.26.dist-info/WHEEL,sha256=JC9FVdjbTDi9l3EyrqUd11CgmN9LkBi1g5dFHayafwA,104
+torchmonarch_nightly-2025.7.26.dist-info/entry_points.txt,sha256=60QVSpYVzkzS4iDOiLp0fsLxVp47X3J2l3v7W-59LMo,117
+torchmonarch_nightly-2025.7.26.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
+torchmonarch_nightly-2025.7.26.dist-info/RECORD,,

{torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/WHEEL RENAMED Viewed

File without changes

{torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/top_level.txt RENAMED Viewed

File without changes