PyPI - torchmonarch-nightly - Versions diffs - 2025.6.27__cp312-cp312-manylinux2014_x86_64.whl - Mend

torchmonarch-nightly 2025.6.27__cp312-cp312-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

monarch/__init__.py +189 -0
monarch/_monarch/__init__.py +5 -0
monarch/_monarch/hyperactor/__init__.py +58 -0
monarch/_monarch/selection/__init__.py +13 -0
monarch/_monarch/worker/__init__.py +0 -0
monarch/_monarch/worker/debugger.py +117 -0
monarch/_monarch/worker/logging.py +107 -0
monarch/_rust_bindings.so +0 -0
monarch/_testing.py +230 -0
monarch/actor_mesh.py +761 -0
monarch/allocator.py +220 -0
monarch/bootstrap_main.py +59 -0
monarch/builtins/__init__.py +14 -0
monarch/builtins/log.py +22 -0
monarch/builtins/random.py +68 -0
monarch/cached_remote_function.py +257 -0
monarch/code_sync.py +10 -0
monarch/common/_C.pyi +11 -0
monarch/common/_C.so +0 -0
monarch/common/__init__.py +0 -0
monarch/common/_coalescing.py +308 -0
monarch/common/_device_utils.py +18 -0
monarch/common/_tensor_to_table.py +172 -0
monarch/common/base_tensor.py +28 -0
monarch/common/borrows.py +143 -0
monarch/common/client.py +690 -0
monarch/common/constants.py +10 -0
monarch/common/context_manager.py +40 -0
monarch/common/controller_api.py +104 -0
monarch/common/device_mesh.py +417 -0
monarch/common/fake.py +55 -0
monarch/common/function.py +160 -0
monarch/common/function_caching.py +164 -0
monarch/common/future.py +168 -0
monarch/common/invocation.py +125 -0
monarch/common/mast.py +221 -0
monarch/common/messages.py +573 -0
monarch/common/mock_cuda.py +41 -0
monarch/common/opaque_ref.py +98 -0
monarch/common/pickle_flatten.py +48 -0
monarch/common/pipe.py +152 -0
monarch/common/process_group.py +55 -0
monarch/common/recording.py +127 -0
monarch/common/reference.py +33 -0
monarch/common/remote.py +297 -0
monarch/common/selection.py +9 -0
monarch/common/shape.py +229 -0
monarch/common/stream.py +114 -0
monarch/common/tensor.py +814 -0
monarch/common/tensor_factory.py +31 -0
monarch/common/tree.py +73 -0
monarch/controller/__init__.py +7 -0
monarch/controller/backend.py +223 -0
monarch/controller/controller.py +223 -0
monarch/controller/debugger.py +47 -0
monarch/controller/history.py +90 -0
monarch/controller/rust_backend/__init__.py +7 -0
monarch/controller/rust_backend/controller.py +245 -0
monarch/debugger.py +379 -0
monarch/fetch.py +55 -0
monarch/future.py +76 -0
monarch/gradient/__init__.py +11 -0
monarch/gradient/_gradient_generator.pyi +22 -0
monarch/gradient/_gradient_generator.so +0 -0
monarch/gradient_generator.py +185 -0
monarch/memory.py +43 -0
monarch/mesh_controller.py +271 -0
monarch/monarch_controller +0 -0
monarch/notebook.py +761 -0
monarch/opaque_module.py +235 -0
monarch/opaque_object.py +88 -0
monarch/parallel/__init__.py +9 -0
monarch/parallel/pipelining/__init__.py +7 -0
monarch/parallel/pipelining/runtime.py +847 -0
monarch/parallel/pipelining/schedule_ir.py +692 -0
monarch/parallel/pipelining/scheduler.py +249 -0
monarch/pdb_wrapper.py +135 -0
monarch/proc_mesh.py +299 -0
monarch/profiler.py +160 -0
monarch/python_local_mesh.py +107 -0
monarch/random.py +61 -0
monarch/rdma.py +162 -0
monarch/remote_class.py +114 -0
monarch/rust_backend_mesh.py +280 -0
monarch/rust_local_mesh.py +1402 -0
monarch/sim_mesh.py +359 -0
monarch/simulator/__init__.py +7 -0
monarch/simulator/command_history.py +424 -0
monarch/simulator/config.py +21 -0
monarch/simulator/interface.py +59 -0
monarch/simulator/ir.py +770 -0
monarch/simulator/mock_controller.py +214 -0
monarch/simulator/profiling.py +424 -0
monarch/simulator/simulator.py +1052 -0
monarch/simulator/task.py +255 -0
monarch/simulator/tensor.py +373 -0
monarch/simulator/trace.py +395 -0
monarch/simulator/utils.py +41 -0
monarch/simulator/worker.py +389 -0
monarch/telemetry.py +19 -0
monarch/tensor_worker_main.py +260 -0
monarch/tensorboard.py +84 -0
monarch/timer/__init__.py +21 -0
monarch/timer/example_monarch.py +78 -0
monarch/timer/example_spmd.py +55 -0
monarch/timer/execution_timer.py +199 -0
monarch/timer/execution_timer_test.py +131 -0
monarch/tools/__init__.py +7 -0
monarch/tools/cli.py +167 -0
monarch/tools/commands.py +251 -0
monarch/tools/components/__init__.py +7 -0
monarch/tools/components/hyperactor.py +58 -0
monarch/tools/config/__init__.py +20 -0
monarch/tools/config/defaults.py +54 -0
monarch/tools/mesh_spec.py +165 -0
monarch/tools/network.py +69 -0
monarch/worker/__init__.py +7 -0
monarch/worker/_testing_function.py +481 -0
monarch/worker/compiled_block.py +270 -0
monarch/worker/debugger.py +125 -0
monarch/worker/lines.py +47 -0
monarch/worker/monitor.py +53 -0
monarch/worker/worker.py +1191 -0
monarch/world_mesh.py +34 -0
monarch_supervisor/__init__.py +1044 -0
monarch_supervisor/_testing.py +44 -0
monarch_supervisor/function_call.py +30 -0
monarch_supervisor/host.py +386 -0
monarch_supervisor/launchers.py +145 -0
monarch_supervisor/log_pstree.py +48 -0
monarch_supervisor/logging.py +103 -0
monarch_supervisor/python_executable.py +42 -0
tests/__init__.py +0 -0
tests/dispatch_bench.py +124 -0
tests/dispatch_bench_helper.py +25 -0
tests/error_test_binary.py +180 -0
tests/simulator/__init__.py +0 -0
tests/simulator/test_profiling.py +136 -0
tests/simulator/test_simulator.py +411 -0
tests/simulator/test_task.py +64 -0
tests/simulator/test_worker.py +102 -0
tests/sleep_binary.py +35 -0
tests/test_actor_error.py +240 -0
tests/test_alloc.py +25 -0
tests/test_allocator.py +365 -0
tests/test_coalescing.py +492 -0
tests/test_controller.py +845 -0
tests/test_device_mesh.py +132 -0
tests/test_fault_tolerance.py +398 -0
tests/test_future.py +94 -0
tests/test_grad_generator.py +121 -0
tests/test_mock_cuda.py +74 -0
tests/test_pdb_actor.py +110 -0
tests/test_python_actors.py +736 -0
tests/test_remote_functions.py +1271 -0
tests/test_rust_backend.py +217 -0
tests/test_signal_safe_block_on.py +103 -0
tests/test_sim_backend.py +54 -0
tests/test_tensor_engine.py +52 -0
torchmonarch_nightly-2025.6.27.dist-info/METADATA +94 -0
torchmonarch_nightly-2025.6.27.dist-info/RECORD +165 -0
torchmonarch_nightly-2025.6.27.dist-info/WHEEL +5 -0
torchmonarch_nightly-2025.6.27.dist-info/entry_points.txt +3 -0
torchmonarch_nightly-2025.6.27.dist-info/licenses/LICENSE +29 -0
torchmonarch_nightly-2025.6.27.dist-info/top_level.txt +3 -0

tests/test_grad_generator.py ADDED Viewed

@@ -0,0 +1,121 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-unsafe
+from unittest import main, TestCase
+import torch
+from monarch.gradient._gradient_generator import GradientGenerator
+from monarch.gradient_generator import gradient_execution_order
+class TestGradIter(TestCase):
+    def checkEqual(self, r, r2):
+        self.assertEqual(len(r), len(r2))
+        for i, i2 in zip(r, r2):
+            self.assertTrue((i is None and i2 is None) or torch.allclose(i, i2))
+    def test_simple(self):
+        t = torch.rand(2, requires_grad=True)
+        t2 = torch.rand(2, requires_grad=True)
+        _ = t + t2
+        a, b = torch.std_mean(t + t2)
+        r2 = torch.autograd.grad([a, b], [t2, t], retain_graph=True)
+        r = list(GradientGenerator([a, b], [t2, t]))
+        print(a, b)
+        print(a.grad_fn, b.grad_fn)
+        print(r)
+        self.checkEqual(r, r2)
+    def test_pipeline_like(self):
+        t = torch.rand(3, 3, requires_grad=True)
+        w1 = torch.rand(3, 2, requires_grad=True)
+        w2 = torch.rand(3, 2, requires_grad=True)
+        w3 = torch.rand(3, 2, requires_grad=True)
+        u = torch.rand(3, 2, requires_grad=True)
+        _ = u * u
+        w4 = torch.rand(2, 3, requires_grad=True)
+        w5 = torch.rand(2, 3, requires_grad=True)
+        w6 = torch.rand(2, 3, requires_grad=True)
+        from torch.nn.functional import relu
+        a = relu(t @ (w1 @ w4))
+        a = relu(a @ (w2 @ w5))
+        a = relu(a @ (w3 @ w6))
+        std, mean = torch.std_mean(a)
+        loss = std + std
+        cgrads = torch.autograd.grad(
+            [loss], [t, w3, w6, u, w2, w5], allow_unused=True, retain_graph=True
+        )
+        gi = GradientGenerator([loss], [t, w3, w6, u, w2, w5])
+        grads = [*gi]
+        self.checkEqual(grads, cgrads)
+    def test_tree(self):
+        t = torch.rand(3, 3, requires_grad=True)
+        t2 = t + t
+        t3 = t * t
+        t4 = t / t
+        t5 = t - t
+        t6 = t2 * t3
+        t7 = t4 * t5
+        t8 = t2 * t4
+        t9 = t3 * t5
+        t10 = t6 + t7 + t8 + t9
+        t11 = t10.sum()
+        cgrads = torch.autograd.grad([t11], [t2, t], retain_graph=True)
+        gi = GradientGenerator([t11], [t2, t])
+        grads = [*gi]
+        self.checkEqual(grads, cgrads)
+    def test_broadcast(self):
+        t = torch.rand(3, 3, requires_grad=True)
+        t2 = torch.rand(3, requires_grad=True)
+        t3 = t2 / t2
+        r = (t * t3).sum()
+        cgrads = torch.autograd.grad([r], [t, t2], retain_graph=True)
+        gi = GradientGenerator([r], [t, t2])
+        grads = [*gi]
+        self.checkEqual(grads, cgrads)
+    def test_grad_order(self):
+        t = torch.rand(3, 3, requires_grad=True)
+        w1 = torch.rand(3, 3, requires_grad=True)
+        w2 = torch.rand(3, 3, requires_grad=True)
+        w3 = torch.rand(3, 3, requires_grad=True)
+        u = torch.rand(3, 2, requires_grad=True)
+        _ = u * u
+        from torch.nn.functional import relu
+        a = relu(t @ w1)
+        a = relu(a @ w2)
+        a = relu(a @ w3)
+        std, mean = torch.std_mean(a)
+        loss = std + std
+        order = gradient_execution_order([loss], [w2, w3, w1, a])
+        self.assertEqual(order, [3, 1, 0, 2])
+if __name__ == "__main__":
+    main()

tests/test_mock_cuda.py ADDED Viewed

@@ -0,0 +1,74 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-unsafe
+from unittest import main, TestCase
+import pytest
+import torch
+import monarch.common.mock_cuda  # usort: skip
+def simple_forward_backward(device: str) -> None:
+    torch.manual_seed(123)
+    m = torch.nn.Sequential(torch.nn.Linear(3, 3), torch.nn.ReLU()).to(device)
+    x = torch.rand(10, 3).to(device)
+    y = m(x)
+    loss_fn = torch.nn.CrossEntropyLoss()
+    loss = loss_fn(y, torch.randint(3, (10,)).to(device))
+    # Under the hood, enabling/disabling CUDA mocking is done with a thread-local
+    # flag. By default, backward() executes ops on a different thread than the one
+    # we enabled mocking on, which would lead to an invalid memory access. So we need
+    # to disable multithreading for backward.
+    with torch.autograd.set_multithreading_enabled(False):
+        loss.backward()
+    # pyre-ignore: Incompatible return type [7]: Expected `None` but got `Tuple[typing.Any, Union[None, Tensor, Module], Union[None, Tensor, Module]]`.
+    return y, m[0].weight.grad, m[0].bias.grad
+# Mock cuda depends on initialization load order
+# For OSS, run this test separately until it can be run in a subprocess.
+@pytest.mark.oss_skip
+class TestMockCuda(TestCase):
+    def setUp(self) -> None:
+        return super().setUp()
+    def test_output_is_garbage(self):
+        with monarch.common.mock_cuda.mock_cuda_guard():
+            x = torch.arange(9, device="cuda", dtype=torch.float32).reshape(3, 3)
+            y = 2 * torch.eye(3, device="cuda")
+            true_output = torch.tensor(
+                [[0, 2, 4], [6, 8, 10], [12, 14, 16]], dtype=torch.float32
+            )
+            self.assertFalse(torch.equal((x @ y).cpu(), true_output))
+    def test_simple_forward_backward(self):
+        # This test just makes sure that the forward and backward pass work
+        # and don't crash.
+        simple_forward_backward("cuda")
+    def test_turn_mock_on_and_off(self):
+        cpu_y, cpu_dw, cpu_db = simple_forward_backward("cpu")
+        real_y, real_dw, real_db = simple_forward_backward("cuda")
+        self.assertTrue(torch.allclose(cpu_y, real_y.cpu()))
+        self.assertTrue(torch.allclose(cpu_dw, real_dw.cpu()))
+        self.assertTrue(torch.allclose(cpu_db, real_db.cpu()))
+        with monarch.common.mock_cuda.mock_cuda_guard():
+            mocked_y, mocked_dw, mocked_db = simple_forward_backward("cuda")
+            self.assertFalse(torch.allclose(cpu_y, mocked_y.cpu()))
+            self.assertFalse(torch.allclose(cpu_dw, mocked_dw.cpu()))
+            self.assertFalse(torch.allclose(cpu_db, mocked_db.cpu()))
+        real_y, real_dw, real_db = simple_forward_backward("cuda")
+        self.assertTrue(torch.allclose(cpu_y, real_y.cpu()))
+        self.assertTrue(torch.allclose(cpu_dw, real_dw.cpu()))
+        self.assertTrue(torch.allclose(cpu_db, real_db.cpu()))
+if __name__ == "__main__":
+    main()

tests/test_pdb_actor.py ADDED Viewed

@@ -0,0 +1,110 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-unsafe
+import sys
+import traceback
+from contextlib import contextmanager
+from typing import Generator
+import pytest
+import torch
+from monarch import DeviceMesh, fetch_shard, remote, rust_local_mesh
+from monarch._rust_bindings.monarch_extension.client import (  # @manual=//monarch/monarch_extension:monarch_extension
+    ClientActor,
+    DebuggerMessage as ClientDebuggerMessage,
+)
+from monarch._rust_bindings.monarch_extension.debugger import (
+    DebuggerMessage as PdbDebuggerMessage,
+    get_bytes_from_write_action,
+)
+from monarch._rust_bindings.monarch_messages.debugger import DebuggerAction
+from monarch.rust_local_mesh import LoggingLocation, SocketType
+from monarch_supervisor.logging import fix_exception_lines
+def custom_excepthook(exc_type, exc_value, exc_traceback):
+    tb_lines = fix_exception_lines(
+        traceback.format_exception(exc_type, exc_value, exc_traceback)
+    )
+    print("\n".join(tb_lines), file=sys.stderr)
+sys.excepthook = custom_excepthook
+@contextmanager
+def local_mesh(
+    hosts: int = 1, gpu_per_host: int = 2, activate: bool = True
+) -> Generator[DeviceMesh, None, None]:
+    with rust_local_mesh.local_mesh(
+        hosts=hosts,
+        gpus_per_host=gpu_per_host,
+        socket_type=SocketType.UNIX,
+        logging_location=LoggingLocation.DEFAULT,
+    ) as dm:
+        try:
+            if activate:
+                with dm.activate():
+                    yield dm
+            else:
+                yield dm
+            dm.exit()
+        except Exception:
+            dm.client._shutdown = True
+            raise
+remote_test_pdb_actor = remote(
+    "monarch.worker._testing_function.test_pdb_actor",
+    propagate=lambda: torch.zeros(1),
+)
+@pytest.mark.skipif(
+    torch.cuda.device_count() < 2,
+    reason="Not enough GPUs, this test requires at least 2 GPUs",
+)
+# Set global timeout--sandcastle's timeout is 600s. A test that sandcastle times
+# out is not counted as a failure, so we set a more restrictive timeout to
+# ensure we see a hard failure in CI.
+@pytest.mark.timeout(120)
+class TestPdbActor:
+    def test_pdb_actor(self):
+        with local_mesh(1, 1) as dm:
+            with dm.activate():
+                client = dm.client.inner._actor
+                assert isinstance(client, ClientActor)
+                fut = fetch_shard(remote_test_pdb_actor())
+                msg = client.get_next_message(timeout_msec=None)
+                assert isinstance(msg, ClientDebuggerMessage)
+                assert isinstance(msg.action, DebuggerAction.Paused)
+                client.send(
+                    msg.debugger_actor_id,
+                    PdbDebuggerMessage(action=DebuggerAction.Attach()).serialize(),
+                )
+                msg = client.get_next_message(timeout_msec=None)
+                assert isinstance(msg, ClientDebuggerMessage)
+                assert isinstance(msg.action, DebuggerAction.Read)
+                assert msg.action.requested_size == 4
+                client.send(
+                    msg.debugger_actor_id,
+                    PdbDebuggerMessage(
+                        action=DebuggerAction.Write(b"1234")
+                    ).serialize(),
+                )
+                msg = client.get_next_message(timeout_msec=None)
+                assert isinstance(msg, ClientDebuggerMessage)
+                assert isinstance(msg.action, DebuggerAction.Write)
+                assert get_bytes_from_write_action(msg.action) == b"5678"
+                client.send(
+                    msg.debugger_actor_id,
+                    PdbDebuggerMessage(action=DebuggerAction.Detach()).serialize(),
+                )
+                fut.result()