PyPI - torchmonarch-nightly - Versions diffs - 2025.7.1__cp310-cp310-manylinux2014_x86_64.whl → 2025.7.25__cp310-cp310-manylinux2014_x86_64.whl - Mend

torchmonarch-nightly 2025.7.1__cp310-cp310-manylinux2014_x86_64.whl → 2025.7.25__cp310-cp310-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

monarch/__init__.py +13 -9
monarch/_rust_bindings.so +0 -0
monarch/{_monarch/selection → _src/actor}/__init__.py +3 -7
monarch/_src/actor/actor_mesh.py +874 -0
monarch/{allocator.py → _src/actor/allocator.py} +26 -17
monarch/_src/actor/bootstrap_main.py +73 -0
monarch/{code_sync.py → _src/actor/code_sync/__init__.py} +3 -1
monarch/_src/actor/code_sync/auto_reload.py +223 -0
monarch/_src/actor/debugger.py +565 -0
monarch/_src/actor/endpoint.py +270 -0
monarch/_src/actor/event_loop.py +97 -0
monarch/_src/actor/future.py +100 -0
monarch/{pdb_wrapper.py → _src/actor/pdb_wrapper.py} +47 -46
monarch/{common/pickle_flatten.py → _src/actor/pickle.py} +26 -2
monarch/_src/actor/proc_mesh.py +500 -0
monarch/_src/actor/sync_state.py +18 -0
monarch/{telemetry.py → _src/actor/telemetry/__init__.py} +1 -1
monarch/_src/actor/telemetry/rust_span_tracing.py +159 -0
monarch/_src/actor/tensor_engine_shim.py +56 -0
monarch/_src/tensor_engine/rdma.py +180 -0
monarch/_testing.py +3 -2
monarch/actor/__init__.py +51 -0
monarch/actor_mesh.py +6 -765
monarch/bootstrap_main.py +8 -47
monarch/common/client.py +1 -1
monarch/common/controller_api.py +2 -1
monarch/common/device_mesh.py +12 -2
monarch/common/messages.py +12 -1
monarch/common/recording.py +4 -3
monarch/common/remote.py +135 -52
monarch/common/tensor.py +2 -1
monarch/controller/backend.py +2 -2
monarch/controller/controller.py +2 -1
monarch/controller/rust_backend/controller.py +2 -1
monarch/fetch.py +3 -5
monarch/mesh_controller.py +201 -139
monarch/monarch_controller +0 -0
monarch/opaque_module.py +4 -6
monarch/opaque_object.py +3 -3
monarch/proc_mesh.py +6 -309
monarch/python_local_mesh.py +1 -1
monarch/rust_backend_mesh.py +2 -1
monarch/rust_local_mesh.py +4 -2
monarch/sim_mesh.py +10 -19
monarch/simulator/command_history.py +1 -1
monarch/simulator/interface.py +2 -1
monarch/simulator/mock_controller.py +1 -1
monarch/simulator/simulator.py +1 -1
monarch/tensor_engine/__init__.py +23 -0
monarch/tensor_worker_main.py +3 -1
monarch/tools/cli.py +3 -1
monarch/tools/commands.py +95 -35
monarch/tools/mesh_spec.py +55 -0
monarch/tools/utils.py +38 -0
monarch/worker/worker.py +1 -1
monarch/world_mesh.py +2 -1
monarch_supervisor/python_executable.py +6 -3
tests/error_test_binary.py +48 -10
tests/test_actor_error.py +370 -21
tests/test_alloc.py +1 -1
tests/test_allocator.py +373 -17
tests/test_controller.py +2 -0
tests/test_debugger.py +416 -0
tests/test_env_before_cuda.py +162 -0
tests/test_python_actors.py +184 -333
tests/test_rdma.py +198 -0
tests/test_remote_functions.py +40 -12
tests/test_rust_backend.py +7 -5
tests/test_sim_backend.py +1 -4
tests/test_tensor_engine.py +55 -1
{torchmonarch_nightly-2025.7.1.dist-info → torchmonarch_nightly-2025.7.25.dist-info}/METADATA +6 -1
{torchmonarch_nightly-2025.7.1.dist-info → torchmonarch_nightly-2025.7.25.dist-info}/RECORD +80 -68
torchmonarch_nightly-2025.7.25.dist-info/entry_points.txt +3 -0
monarch/_monarch/hyperactor/__init__.py +0 -58
monarch/_monarch/worker/debugger.py +0 -117
monarch/_monarch/worker/logging.py +0 -107
monarch/debugger.py +0 -379
monarch/future.py +0 -76
monarch/rdma.py +0 -162
torchmonarch_nightly-2025.7.1.dist-info/entry_points.txt +0 -3
/monarch/{_monarch/worker → _src}/__init__.py +0 -0
/monarch/{common/_device_utils.py → _src/actor/device_utils.py} +0 -0
/monarch/{common → _src/actor}/shape.py +0 -0
/monarch/{_monarch → _src/tensor_engine}/__init__.py +0 -0
{torchmonarch_nightly-2025.7.1.dist-info → torchmonarch_nightly-2025.7.25.dist-info}/WHEEL +0 -0
{torchmonarch_nightly-2025.7.1.dist-info → torchmonarch_nightly-2025.7.25.dist-info}/licenses/LICENSE +0 -0
{torchmonarch_nightly-2025.7.1.dist-info → torchmonarch_nightly-2025.7.25.dist-info}/top_level.txt +0 -0

monarch/proc_mesh.py CHANGED Viewed

@@ -4,315 +4,12 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
-# pyre-strict
+import warnings
-import os
-import sys
-from contextlib import AbstractContextManager
-from typing import (
-    Any,
-    cast,
-    Dict,
-    List,
-    Optional,
-    Sequence,
-    Type,
-    TYPE_CHECKING,
-    TypeVar,
-)
-if TYPE_CHECKING:
-    import torch
-import monarch
-from monarch import ActorFuture as Future
-# Conditionally import DeviceMesh and spawn_tensor_engine only if tensor_engine is available
-# pyre-ignore[21]
-from monarch._rust_bindings import has_tensor_engine
-from monarch._rust_bindings.hyperactor_extension.alloc import (  # @manual=//monarch/monarch_extension:monarch_extension  # @manual=//monarch/monarch_extension:monarch_extension
-    Alloc,
-    AllocConstraints,
-    AllocSpec,
-)
-from monarch._rust_bindings.monarch_hyperactor.mailbox import Mailbox
-from monarch._rust_bindings.monarch_hyperactor.proc_mesh import (
-    ProcMesh as HyProcMesh,
-    ProcMeshMonitor,
+warnings.warn(
+    "monarch.proc_mesh is deprecated, please import from monarch.actor instead.",
+    DeprecationWarning,
+    stacklevel=2,
 )
-from monarch._rust_bindings.monarch_hyperactor.shape import Shape, Slice
-from monarch.actor_mesh import _Actor, _ActorMeshRefImpl, Actor, ActorMeshRef
-from monarch.code_sync import RemoteWorkspace, RsyncMeshClient
-from monarch.common._device_utils import _local_device_count
-from monarch.common.shape import MeshTrait
-from monarch.rdma import RDMAManager
-if has_tensor_engine():
-    from monarch.common.device_mesh import DeviceMesh
-    from monarch.mesh_controller import spawn_tensor_engine
-else:
-    DeviceMesh = None
-    spawn_tensor_engine = None
-T = TypeVar("T")
-try:
-    from __manifest__ import fbmake  # noqa
-    IN_PAR = True
-except ImportError:
-    IN_PAR = False
-async def _allocate_nonblocking(alloc: Alloc) -> "ProcMesh":
-    return ProcMesh(await HyProcMesh.allocate_nonblocking(alloc))
-def _allocate_blocking(alloc: Alloc) -> "ProcMesh":
-    return ProcMesh(HyProcMesh.allocate_blocking(alloc))
-class ProcMesh(MeshTrait):
-    def __init__(
-        self,
-        hy_proc_mesh: HyProcMesh,
-        _mock_shape: Optional[Shape] = None,
-        _device_mesh: Optional[DeviceMesh] = None,
-    ) -> None:
-        self._proc_mesh = hy_proc_mesh
-        self._mock_shape: Optional[Shape] = _mock_shape
-        self._mailbox: Mailbox = self._proc_mesh.client
-        self._rdma_manager: Optional[RDMAManager] = None
-        self._rsync_mesh_client: Optional[RsyncMeshClient] = None
-        self._maybe_device_mesh: Optional[DeviceMesh] = _device_mesh
-        if _mock_shape is None:
-            self._rdma_manager = self._spawn_blocking("rdma_manager", RDMAManager)
-    @property
-    def _shape(self) -> Shape:
-        return self._proc_mesh.shape if self._mock_shape is None else self._mock_shape
-    @property
-    def _ndslice(self) -> Slice:
-        return self._shape.ndslice
-    @property
-    def _labels(self) -> List[str]:
-        return self._shape.labels
-    def _new_with_shape(self, shape: Shape) -> "ProcMesh":
-        device_mesh = (
-            None
-            if self._device_mesh is None
-            else self._device_mesh._new_with_shape(shape)
-        )
-        return ProcMesh(self._proc_mesh, _mock_shape=shape, _device_mesh=device_mesh)
-    def spawn(
-        self, name: str, Class: Type[T], *args: Any, **kwargs: Any
-    ) -> Future[ActorMeshRef[T]]:
-        if self._mock_shape is not None:
-            raise NotImplementedError("NYI: spawn on slice of a proc mesh.")
-        return Future(
-            lambda: self._spawn_nonblocking(name, Class, *args, **kwargs),
-            lambda: self._spawn_blocking(name, Class, *args, **kwargs),
-        )
-    async def monitor(self) -> ProcMeshMonitor:
-        """
-        Get a monitor (async iterator) of the proc mesh, it is used to
-        monitor the status of the proc mesh. This function can be called at most once.
-        Note: This API is experimental and subject to change.
-        Example:
-        async def monitor_loop(monitor):
-            async for event in monitor:
-                await handle_exception_event(event)
-        # Kick off in background
-        asyncio.create_task(monitor_loop(monitor))
-        """
-        return await self._proc_mesh.monitor()
-    @classmethod
-    def from_alloc(self, alloc: Alloc) -> Future["ProcMesh"]:
-        return Future(
-            lambda: _allocate_nonblocking(alloc),
-            lambda: _allocate_blocking(alloc),
-        )
-    def _spawn_blocking(
-        self, name: str, Class: Type[T], *args: Any, **kwargs: Any
-    ) -> T:
-        if not issubclass(Class, Actor):
-            raise ValueError(
-                f"{Class} must subclass monarch.service.Actor to spawn it."
-            )
-        actor_mesh = self._proc_mesh.spawn_blocking(name, _Actor)
-        service = ActorMeshRef(
-            Class,
-            _ActorMeshRefImpl.from_hyperactor_mesh(self._mailbox, actor_mesh),
-            self._mailbox,
-        )
-        # useful to have this separate, because eventually we can reconstitute ActorMeshRef objects across pickling by
-        # doing `ActorMeshRef(Class, actor_handle)` but not calling _create.
-        service._create(args, kwargs)
-        return cast(T, service)
-    def __repr__(self) -> str:
-        return repr(self._proc_mesh)
-    def __str__(self) -> str:
-        return str(self._proc_mesh)
-    async def _spawn_nonblocking(
-        self, name: str, Class: Type[T], *args: Any, **kwargs: Any
-    ) -> T:
-        if not issubclass(Class, Actor):
-            raise ValueError(
-                f"{Class} must subclass monarch.service.Actor to spawn it."
-            )
-        actor_mesh = await self._proc_mesh.spawn_nonblocking(name, _Actor)
-        service = ActorMeshRef(
-            Class,
-            _ActorMeshRefImpl.from_hyperactor_mesh(self._mailbox, actor_mesh),
-            self._mailbox,
-        )
-        # useful to have this separate, because eventually we can reconstitute ActorMeshRef objects across pickling by
-        # doing `ActorMeshRef(Class, actor_handle)` but not calling _create.
-        service._create(args, kwargs)
-        return cast(T, service)
-    @property
-    def _device_mesh(self) -> "DeviceMesh":
-        if spawn_tensor_engine is None:
-            raise RuntimeError(
-                "DeviceMesh is not available because tensor_engine was not compiled (USE_TENSOR_ENGINE=0)"
-            )
-        if self._maybe_device_mesh is None:
-            if self._mock_shape is not None:
-                raise NotImplementedError(
-                    "NYI: activating a proc mesh must first happen on the root proc_mesh until we fix spawning on submeshes."
-                )
-            self._maybe_device_mesh = spawn_tensor_engine(self)
-        return self._maybe_device_mesh
-    # pyre-ignore
-    def activate(self) -> AbstractContextManager:
-        return self._device_mesh.activate()
-    def rank_tensor(self, dim: str | Sequence[str]) -> "torch.Tensor":
-        return self._device_mesh.rank(dim)
-    def rank_tensors(self) -> Dict[str, "torch.Tensor"]:
-        return self._device_mesh.ranks
-    async def sync_workspace(self) -> None:
-        if self._rsync_mesh_client is None:
-            # TODO(agallagher): We need some way to configure and pass this
-            # in -- right now we're assuming the `gpu` dimension, which isn't
-            # correct.
-            assert set(self._proc_mesh.shape.labels).issubset({"gpus", "hosts"})
-            # The workspace shape (i.e. only perform one rsync per host).
-            workspace_shape = self.slice(gpus=slice(0, 1, 1))._mock_shape
-            assert workspace_shape is not None
-            # TODO(agallagher): We should probably hide this behind something
-            # like a `Workspace` class and support abstracting/configuring
-            # different sync methods.
-            self._rsync_mesh_client = RsyncMeshClient.spawn_blocking(
-                proc_mesh=self._proc_mesh,
-                shape=workspace_shape,
-                # TODO(agallagher): Is there a better way to infer/set the local
-                # workspace dir, rather than use PWD?
-                local_workspace=os.getcwd(),
-                remote_workspace=RemoteWorkspace.FromEnvVar("WORKSPACE_DIR"),
-            )
-        await self._rsync_mesh_client.sync_workspace()
-async def local_proc_mesh_nonblocking(
-    *, gpus: Optional[int] = None, hosts: int = 1
-) -> ProcMesh:
-    if gpus is None:
-        gpus = _local_device_count()
-    spec = AllocSpec(AllocConstraints(), gpus=gpus, hosts=hosts)
-    allocator = monarch.LocalAllocator()
-    alloc = await allocator.allocate(spec)
-    return await ProcMesh.from_alloc(alloc)
-def local_proc_mesh_blocking(*, gpus: Optional[int] = None, hosts: int = 1) -> ProcMesh:
-    if gpus is None:
-        gpus = _local_device_count()
-    spec = AllocSpec(AllocConstraints(), gpus=gpus, hosts=hosts)
-    allocator = monarch.LocalAllocator()
-    alloc = allocator.allocate(spec).get()
-    return ProcMesh.from_alloc(alloc).get()
-def local_proc_mesh(*, gpus: Optional[int] = None, hosts: int = 1) -> Future[ProcMesh]:
-    return Future(
-        lambda: local_proc_mesh_nonblocking(gpus=gpus, hosts=hosts),
-        lambda: local_proc_mesh_blocking(gpus=gpus, hosts=hosts),
-    )
-_BOOTSTRAP_MAIN = "monarch.bootstrap_main"
-def _get_bootstrap_args() -> tuple[str, Optional[list[str]], dict[str, str]]:
-    if IN_PAR:
-        cmd = sys.argv[0]
-        args = None
-        env = {
-            "PAR_MAIN_OVERRIDE": _BOOTSTRAP_MAIN,
-        }
-    else:
-        cmd = sys.executable
-        args = ["-m", _BOOTSTRAP_MAIN]
-        env = {}
-    return cmd, args, env
-async def proc_mesh_nonblocking(
-    *, gpus: Optional[int] = None, hosts: int = 1, env: Optional[dict[str, str]] = None
-) -> ProcMesh:
-    if gpus is None:
-        gpus = _local_device_count()
-    spec = AllocSpec(AllocConstraints(), gpus=gpus, hosts=hosts)
-    env = env or {}
-    cmd, args, base_env = _get_bootstrap_args()
-    env.update(base_env)
-    allocator = monarch.ProcessAllocator(cmd, args, env)
-    alloc = await allocator.allocate(spec)
-    return await ProcMesh.from_alloc(alloc)
-def proc_mesh_blocking(
-    *, gpus: Optional[int] = None, hosts: int = 1, env: Optional[dict[str, str]] = None
-) -> ProcMesh:
-    if gpus is None:
-        gpus = _local_device_count()
-    spec = AllocSpec(AllocConstraints(), gpus=gpus, hosts=hosts)
-    env = env or {}
-    cmd, args, base_env = _get_bootstrap_args()
-    env.update(base_env)
-    allocator = monarch.ProcessAllocator(cmd, args, env)
-    alloc = allocator.allocate(spec).get()
-    return ProcMesh.from_alloc(alloc).get()
-def proc_mesh(
-    *, gpus: Optional[int] = None, hosts: int = 1, env: Optional[dict[str, str]] = None
-) -> Future[ProcMesh]:
-    return Future(
-        lambda: proc_mesh_nonblocking(gpus=gpus, hosts=hosts, env=env),
-        lambda: proc_mesh_blocking(gpus=gpus, hosts=hosts, env=env),
-    )
+from monarch._src.actor.proc_mesh import *  # noqa

monarch/python_local_mesh.py CHANGED Viewed

@@ -11,7 +11,7 @@ from time import sleep
 from typing import Optional, TYPE_CHECKING
 import monarch_supervisor
-from monarch.common._device_utils import _local_device_count
+from monarch._src.actor.device_utils import _local_device_count
 from monarch.common.fake import fake_call
 from monarch.common.invocation import DeviceException, RemoteException
 from monarch.world_mesh import world_mesh

monarch/rust_backend_mesh.py CHANGED Viewed

@@ -20,11 +20,12 @@ from monarch._rust_bindings.monarch_hyperactor.proc import (  # @manual=//monarc
     init_proc,
     Proc,
 )
+from monarch._src.actor.shape import NDSlice
 from monarch.common.client import Client
 from monarch.common.device_mesh import DeviceMesh, DeviceMeshStatus
 from monarch.common.invocation import DeviceException, RemoteException
 from monarch.common.mast import MastJob
-from monarch.common.shape import NDSlice
 from monarch.controller.rust_backend.controller import RustController
 TORCHX_MAST_TASK_GROUP_NAME = "script"

monarch/rust_local_mesh.py CHANGED Viewed

@@ -71,7 +71,7 @@ _MONARCH_TENSOR_WORKER_MAIN = "monarch.tensor_worker_main"
 try:
     from __manifest__ import fbmake  # noqa
-    IN_PAR = True
+    IN_PAR = bool(fbmake.get("par_style"))
 except ImportError:
     IN_PAR = False
@@ -122,7 +122,9 @@ _PROC_ENV: dict[str, str] = {}
 def get_controller_main() -> tuple[Path, dict[str, str]]:
     with (
-        importlib.resources.path("monarch", "monarch_controller") as controller_main,
+        importlib.resources.as_file(
+            importlib.resources.files("monarch") / "monarch_controller"
+        ) as controller_main,
     ):
         if not controller_main.exists():
             if IN_PAR:

monarch/sim_mesh.py CHANGED Viewed

@@ -31,7 +31,6 @@ from monarch._rust_bindings.monarch_extension.client import (  # @manual=//monar
 )
 from monarch._rust_bindings.monarch_extension.simulator_client import (  # @manual=//monarch/monarch_extension:monarch_extension
-    bootstrap_simulator_backend,
     SimulatorClient,
 )
@@ -40,6 +39,8 @@ from monarch._rust_bindings.monarch_hyperactor.proc import (  # @manual=//monarc
     init_proc,
     Proc,
 )
+from monarch._src.actor.shape import NDSlice
 from monarch.common.client import Client
 from monarch.common.constants import (
     SIM_MESH_CLIENT_SUPERVISION_UPDATE_INTERVAL,
@@ -50,7 +51,6 @@ from monarch.common.fake import fake_call
 from monarch.common.future import Future, T
 from monarch.common.invocation import DeviceException, RemoteException
 from monarch.common.messages import Dims
-from monarch.common.shape import NDSlice
 from monarch.controller.rust_backend.controller import RustController
 from monarch.rust_backend_mesh import MeshWorld
@@ -58,9 +58,7 @@ from monarch.rust_backend_mesh import MeshWorld
 logger: logging.Logger = logging.getLogger(__name__)
-def sim_mesh(
-    n_meshes: int, hosts: int, gpus_per_host: int, proxy_addr: Optional[str] = None
-) -> List[DeviceMesh]:
+def sim_mesh(n_meshes: int, hosts: int, gpus_per_host: int) -> List[DeviceMesh]:
     """
     Creates a single simulated device mesh with the given number of per host.
@@ -75,7 +73,6 @@ def sim_mesh(
     bootstrap: Bootstrap = Bootstrap(
         n_meshes,
         mesh_world_state,
-        proxy_addr=proxy_addr,
         world_size=hosts * gpus_per_host,
     )
@@ -180,14 +177,12 @@ class Bootstrap:
         self,
         num_meshes: int,
         mesh_world_state: Dict[MeshWorld, Optional[DeviceMesh]],
-        proxy_addr: Optional[str] = None,
         world_size: int = 1,
     ) -> None:
         """
         Bootstraps a SimMesh.
         Args:
             num_meshes: int - number of meshes to create.
-            proxy_addr: Option[str] - the proxy address of the simulation process
             mesh_world_state: a state of the meshes. Keys are the MeshWorld and values are boolean indicating if this mesh is active.
         """
         # do a fake call to instantiate ThreadPoolExecutor so we don't block GIL later
@@ -198,17 +193,11 @@ class Bootstrap:
         self._mesh_world_state: Dict[MeshWorld, Optional[DeviceMesh]] = mesh_world_state
-        proxy_addr = proxy_addr or f"unix!@{_random_id()}-proxy"
-        self.bootstrap_addr: str = f"sim!unix!@system,{proxy_addr}"
-        client_proxy_addr = f"unix!@{_random_id()}-proxy"
-        self.client_listen_addr: str = f"sim!unix!@client,{client_proxy_addr}"
-        self.client_bootstrap_addr: str = (
-            f"sim!unix!@client,{client_proxy_addr},unix!@system,{proxy_addr}"
-        )
-        bootstrap_simulator_backend(self.bootstrap_addr, proxy_addr, world_size)
+        self.bootstrap_addr: str = "sim!unix!@system"
+        self.client_listen_addr = "sim!unix!@client"
+        self.client_bootstrap_addr = "sim!unix!@client,unix!@system"
-        self._simulator_client = SimulatorClient(proxy_addr)
+        self._simulator_client = SimulatorClient(self.bootstrap_addr, world_size)
         for i in range(num_meshes):
             mesh_name: str = f"mesh_{i}"
             controller_world: str = f"{mesh_name}_controller"
@@ -234,7 +223,9 @@ class Bootstrap:
         worker_world, controller_id = mesh_world
         controller_world = controller_id.world_name
         self._simulator_client.spawn_mesh(
-            self.bootstrap_addr, f"{controller_world}[0].root", worker_world
+            self.bootstrap_addr,
+            f"{controller_world}[0].root",
+            worker_world,
         )

monarch/simulator/command_history.py CHANGED Viewed

@@ -12,9 +12,9 @@ from dataclasses import dataclass
 from typing import List, NamedTuple, Optional, Sequence
 import torch
+from monarch._src.actor.shape import NDSlice
 from monarch.common import messages
-from monarch.common.shape import NDSlice
 from monarch.simulator.ir import IRGraph
 from monarch.simulator.tensor import DTensorRef
 from monarch.simulator.utils import clean_name, file_path_with_iter

monarch/simulator/interface.py CHANGED Viewed

@@ -6,9 +6,10 @@
 from typing import Union
+from monarch._src.actor.shape import NDSlice
 from monarch.common.client import Client as _Client
 from monarch.common.device_mesh import DeviceMesh
-from monarch.common.shape import NDSlice
 from monarch.simulator.ir import IRGraph
 from monarch.simulator.simulator import (

monarch/simulator/mock_controller.py CHANGED Viewed

@@ -25,6 +25,7 @@ from monarch._rust_bindings.monarch_extension.client import (  # @manual=//monar
 from monarch._rust_bindings.monarch_hyperactor.proc import (  # @manual=//monarch/monarch_extension:monarch_extension
     ActorId,
 )
+from monarch._src.actor.shape import iter_ranks, NDSlice, Slices as Ranks
 from monarch.common import messages
@@ -32,7 +33,6 @@ from monarch.common.controller_api import DebuggerMessage, LogMessage, MessageRe
 from monarch.common.device_mesh import no_mesh
 from monarch.common.invocation import Invocation, RemoteException, Seq
 from monarch.common.reference import Ref
-from monarch.common.shape import iter_ranks, NDSlice, Slices as Ranks
 from monarch.common.tree import flatten
 if TYPE_CHECKING:

monarch/simulator/simulator.py CHANGED Viewed

@@ -43,12 +43,12 @@ import torch
 from monarch._rust_bindings.monarch_hyperactor.proc import (  # @manual=//monarch/monarch_extension:monarch_extension
     ActorId,
 )
+from monarch._src.actor.shape import iter_ranks, NDSlice
 from monarch.common import messages
 from monarch.common.controller_api import LogMessage, MessageResult
 from monarch.common.device_mesh import DeviceMesh
 from monarch.common.function import ResolvableFunction, ResolvableFunctionFromPath
 from monarch.common.invocation import DeviceException
-from monarch.common.shape import iter_ranks, NDSlice
 from monarch.simulator.command_history import CommandHistory, DTensorRef
 from monarch.simulator.config import META_VAL
 from monarch.simulator.ir import IRGraph

monarch/tensor_engine/__init__.py ADDED Viewed

@@ -0,0 +1,23 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Monarch Tensor Engine API - Public interface for tensor engine functionality.
+"""
+from monarch._src.tensor_engine.rdma import (
+    is_available,
+    RDMABuffer,
+    RDMAReadTransferWarning,
+    RDMAWriteTransferWarning,
+)
+__all__ = [
+    "is_available",
+    "RDMABuffer",
+    "RDMAReadTransferWarning",
+    "RDMAWriteTransferWarning",
+]

monarch/tensor_worker_main.py CHANGED Viewed

@@ -249,7 +249,9 @@ if __name__ == "__main__":
     torch.cuda.set_device = check_set_device
     with (
-        importlib.resources.path("monarch", "py-spy") as pyspy,
+        importlib.resources.as_file(
+            importlib.resources.files("monarch") / "py-spy"
+        ) as pyspy,
     ):
         if pyspy.exists():
             os.environ["PYSPY_BIN"] = str(pyspy)

monarch/tools/cli.py CHANGED Viewed

@@ -86,7 +86,9 @@ class CreateCmd:
             else defaults.component_fn(config.scheduler)
         )
         component_args = component_args_from_cli(component_fn, args.component_args)
-        handle = create(config, component_fn)(**component_args)
+        appdef = component_fn(**component_args)
+        handle = create(config, appdef)
         print(handle)