PyPI - torchmonarch-nightly - Versions diffs - 2025.6.27__cp312-cp312-manylinux2014_x86_64.whl - Mend

torchmonarch-nightly 2025.6.27__cp312-cp312-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

monarch/__init__.py +189 -0
monarch/_monarch/__init__.py +5 -0
monarch/_monarch/hyperactor/__init__.py +58 -0
monarch/_monarch/selection/__init__.py +13 -0
monarch/_monarch/worker/__init__.py +0 -0
monarch/_monarch/worker/debugger.py +117 -0
monarch/_monarch/worker/logging.py +107 -0
monarch/_rust_bindings.so +0 -0
monarch/_testing.py +230 -0
monarch/actor_mesh.py +761 -0
monarch/allocator.py +220 -0
monarch/bootstrap_main.py +59 -0
monarch/builtins/__init__.py +14 -0
monarch/builtins/log.py +22 -0
monarch/builtins/random.py +68 -0
monarch/cached_remote_function.py +257 -0
monarch/code_sync.py +10 -0
monarch/common/_C.pyi +11 -0
monarch/common/_C.so +0 -0
monarch/common/__init__.py +0 -0
monarch/common/_coalescing.py +308 -0
monarch/common/_device_utils.py +18 -0
monarch/common/_tensor_to_table.py +172 -0
monarch/common/base_tensor.py +28 -0
monarch/common/borrows.py +143 -0
monarch/common/client.py +690 -0
monarch/common/constants.py +10 -0
monarch/common/context_manager.py +40 -0
monarch/common/controller_api.py +104 -0
monarch/common/device_mesh.py +417 -0
monarch/common/fake.py +55 -0
monarch/common/function.py +160 -0
monarch/common/function_caching.py +164 -0
monarch/common/future.py +168 -0
monarch/common/invocation.py +125 -0
monarch/common/mast.py +221 -0
monarch/common/messages.py +573 -0
monarch/common/mock_cuda.py +41 -0
monarch/common/opaque_ref.py +98 -0
monarch/common/pickle_flatten.py +48 -0
monarch/common/pipe.py +152 -0
monarch/common/process_group.py +55 -0
monarch/common/recording.py +127 -0
monarch/common/reference.py +33 -0
monarch/common/remote.py +297 -0
monarch/common/selection.py +9 -0
monarch/common/shape.py +229 -0
monarch/common/stream.py +114 -0
monarch/common/tensor.py +814 -0
monarch/common/tensor_factory.py +31 -0
monarch/common/tree.py +73 -0
monarch/controller/__init__.py +7 -0
monarch/controller/backend.py +223 -0
monarch/controller/controller.py +223 -0
monarch/controller/debugger.py +47 -0
monarch/controller/history.py +90 -0
monarch/controller/rust_backend/__init__.py +7 -0
monarch/controller/rust_backend/controller.py +245 -0
monarch/debugger.py +379 -0
monarch/fetch.py +55 -0
monarch/future.py +76 -0
monarch/gradient/__init__.py +11 -0
monarch/gradient/_gradient_generator.pyi +22 -0
monarch/gradient/_gradient_generator.so +0 -0
monarch/gradient_generator.py +185 -0
monarch/memory.py +43 -0
monarch/mesh_controller.py +271 -0
monarch/monarch_controller +0 -0
monarch/notebook.py +761 -0
monarch/opaque_module.py +235 -0
monarch/opaque_object.py +88 -0
monarch/parallel/__init__.py +9 -0
monarch/parallel/pipelining/__init__.py +7 -0
monarch/parallel/pipelining/runtime.py +847 -0
monarch/parallel/pipelining/schedule_ir.py +692 -0
monarch/parallel/pipelining/scheduler.py +249 -0
monarch/pdb_wrapper.py +135 -0
monarch/proc_mesh.py +299 -0
monarch/profiler.py +160 -0
monarch/python_local_mesh.py +107 -0
monarch/random.py +61 -0
monarch/rdma.py +162 -0
monarch/remote_class.py +114 -0
monarch/rust_backend_mesh.py +280 -0
monarch/rust_local_mesh.py +1402 -0
monarch/sim_mesh.py +359 -0
monarch/simulator/__init__.py +7 -0
monarch/simulator/command_history.py +424 -0
monarch/simulator/config.py +21 -0
monarch/simulator/interface.py +59 -0
monarch/simulator/ir.py +770 -0
monarch/simulator/mock_controller.py +214 -0
monarch/simulator/profiling.py +424 -0
monarch/simulator/simulator.py +1052 -0
monarch/simulator/task.py +255 -0
monarch/simulator/tensor.py +373 -0
monarch/simulator/trace.py +395 -0
monarch/simulator/utils.py +41 -0
monarch/simulator/worker.py +389 -0
monarch/telemetry.py +19 -0
monarch/tensor_worker_main.py +260 -0
monarch/tensorboard.py +84 -0
monarch/timer/__init__.py +21 -0
monarch/timer/example_monarch.py +78 -0
monarch/timer/example_spmd.py +55 -0
monarch/timer/execution_timer.py +199 -0
monarch/timer/execution_timer_test.py +131 -0
monarch/tools/__init__.py +7 -0
monarch/tools/cli.py +167 -0
monarch/tools/commands.py +251 -0
monarch/tools/components/__init__.py +7 -0
monarch/tools/components/hyperactor.py +58 -0
monarch/tools/config/__init__.py +20 -0
monarch/tools/config/defaults.py +54 -0
monarch/tools/mesh_spec.py +165 -0
monarch/tools/network.py +69 -0
monarch/worker/__init__.py +7 -0
monarch/worker/_testing_function.py +481 -0
monarch/worker/compiled_block.py +270 -0
monarch/worker/debugger.py +125 -0
monarch/worker/lines.py +47 -0
monarch/worker/monitor.py +53 -0
monarch/worker/worker.py +1191 -0
monarch/world_mesh.py +34 -0
monarch_supervisor/__init__.py +1044 -0
monarch_supervisor/_testing.py +44 -0
monarch_supervisor/function_call.py +30 -0
monarch_supervisor/host.py +386 -0
monarch_supervisor/launchers.py +145 -0
monarch_supervisor/log_pstree.py +48 -0
monarch_supervisor/logging.py +103 -0
monarch_supervisor/python_executable.py +42 -0
tests/__init__.py +0 -0
tests/dispatch_bench.py +124 -0
tests/dispatch_bench_helper.py +25 -0
tests/error_test_binary.py +180 -0
tests/simulator/__init__.py +0 -0
tests/simulator/test_profiling.py +136 -0
tests/simulator/test_simulator.py +411 -0
tests/simulator/test_task.py +64 -0
tests/simulator/test_worker.py +102 -0
tests/sleep_binary.py +35 -0
tests/test_actor_error.py +240 -0
tests/test_alloc.py +25 -0
tests/test_allocator.py +365 -0
tests/test_coalescing.py +492 -0
tests/test_controller.py +845 -0
tests/test_device_mesh.py +132 -0
tests/test_fault_tolerance.py +398 -0
tests/test_future.py +94 -0
tests/test_grad_generator.py +121 -0
tests/test_mock_cuda.py +74 -0
tests/test_pdb_actor.py +110 -0
tests/test_python_actors.py +736 -0
tests/test_remote_functions.py +1271 -0
tests/test_rust_backend.py +217 -0
tests/test_signal_safe_block_on.py +103 -0
tests/test_sim_backend.py +54 -0
tests/test_tensor_engine.py +52 -0
torchmonarch_nightly-2025.6.27.dist-info/METADATA +94 -0
torchmonarch_nightly-2025.6.27.dist-info/RECORD +165 -0
torchmonarch_nightly-2025.6.27.dist-info/WHEEL +5 -0
torchmonarch_nightly-2025.6.27.dist-info/entry_points.txt +3 -0
torchmonarch_nightly-2025.6.27.dist-info/licenses/LICENSE +29 -0
torchmonarch_nightly-2025.6.27.dist-info/top_level.txt +3 -0

tests/test_coalescing.py ADDED Viewed

@@ -0,0 +1,492 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-unsafe
+import itertools
+from contextlib import contextmanager
+from enum import Enum
+from typing import ContextManager, List
+from unittest.mock import patch
+import monarch
+import pytest
+import torch
+from monarch import (
+    coalescing,
+    DeviceMesh,
+    fetch_shard,
+    get_active_mesh,
+    get_active_stream,
+    no_mesh,
+    remote,
+    Stream,
+)
+from monarch._testing import TestingContext
+from monarch.common._coalescing import _record_and_define, compile
+from monarch.common.function_caching import AliasOf, Storage, TensorGroup
+from monarch.common.tensor import Tensor
+def _do_bogus_tensor_work(x, y, fail_rank=None):
+    return x + y  # real function actually does x @ y
+do_bogus_tensor_work = remote(
+    "monarch.worker._testing_function.do_bogus_tensor_work",
+    propagate=_do_bogus_tensor_work,
+)
+def inspect(x):
+    return fetch_shard(x).result().item()
+@pytest.fixture(scope="module", autouse=True)
+def testing_context():
+    global local
+    with TestingContext() as local:
+        yield
+class BackendType(Enum):
+    PY = "py"
+    RS = "rs"
+@pytest.mark.skipif(
+    torch.cuda.device_count() < 2,
+    reason="Not enough GPUs, this test requires at least 2 GPUs",
+)
+@pytest.mark.parametrize("backend_type", [BackendType.PY, BackendType.RS])
+class TestCoalescing:
+    @classmethod
+    def local_device_mesh(
+        cls,
+        num_hosts: int,
+        gpu_per_host: int,
+        backend_type: BackendType,
+        activate: bool = True,
+    ) -> ContextManager[DeviceMesh]:
+        # pyre-fixme[10]: pytest defines this fixture.
+        return local.local_device_mesh(
+            num_hosts,
+            gpu_per_host,
+            activate,
+            backend=str(backend_type),
+        )
+    @property
+    def num_outstanding_messages(self) -> int:
+        return sum(
+            len(msgs)
+            for msgs in get_active_mesh().client.recorder.flat_messages.values()
+        )
+    def test_basic_coalescing(self, backend_type) -> None:
+        with self.local_device_mesh(1, 1, backend_type):
+            with coalescing():
+                a = torch.zeros(3, 4)
+                for _ in range(1, 10):
+                    a = a + torch.ones(3, 4)
+                # no messages should have been sient since coalescing is enabled
+                assert self.num_outstanding_messages >= 10
+            # now that the coalesce is done we should have flushed the messages
+            assert self.num_outstanding_messages == 0
+    def test_repeat_simple(self, backend_type) -> None:
+        with self.local_device_mesh(1, 1, backend_type):
+            a = torch.zeros(())
+            @compile(verify=False)
+            def fn():
+                nonlocal a
+                z = torch.ones(())
+                a += z
+                return z
+            z = None
+            for _ in range(3):
+                z = fn()
+            assert inspect(a) == 3
+            assert inspect(z) == 1
+    def test_repeat_formals(self, backend_type) -> None:
+        with self.local_device_mesh(1, 1, backend_type):
+            a = torch.rand(3, 4)
+            @compile(verify=False)
+            def fn(a, b):
+                return 2 * a + b
+            for _ in range(3):
+                b = torch.rand(3, 4)
+                z = fn(a, b)
+                lz, la, lb = monarch.inspect((z, a, b))
+                assert isinstance(la, torch.Tensor)
+                assert isinstance(lb, torch.Tensor)
+                with no_mesh.activate():
+                    assert torch.allclose(lz, 2 * la + lb)
+            @compile(verify=False)
+            def fn(b):
+                return 2 * a + b
+            for _ in range(3):
+                b = torch.rand(3, 4)
+                z = fn(b)
+                lz, la, lb = monarch.inspect((z, a, b))
+                assert isinstance(la, torch.Tensor)
+                assert isinstance(lb, torch.Tensor)
+                with no_mesh.activate():
+                    assert torch.allclose(lz, 2 * la + lb)
+    def test_repeat_error_inside(self, backend_type) -> None:
+        with self.local_device_mesh(1, 1, backend_type):
+            a = torch.zeros(())
+            @compile(verify=False)
+            def fn():
+                nonlocal a
+                z = torch.ones(())
+                a += z
+                do_bogus_tensor_work(z, z)
+                return z
+            z = fn()
+            # recorded coalescing will lump errors together so check that
+            with pytest.raises(Exception, match="both arguments to matmul"):
+                inspect(z)
+    def test_repeat_inner_borrow(self, backend_type) -> None:
+        with self.local_device_mesh(1, 1, backend_type):
+            a = torch.zeros(())
+            other = Stream("other")
+            with other.activate():
+                b = torch.ones(())
+            @compile(verify=False)
+            def fn():
+                nonlocal a, b
+                c, borrow = get_active_stream().borrow(b)
+                with borrow:
+                    a += c
+            for _ in range(3):
+                fn()
+            assert inspect(a) == 3
+    def test_repeat_outer_borrow(self, backend_type) -> None:
+        with self.local_device_mesh(1, 1, backend_type):
+            a = torch.zeros(())
+            other = Stream("other")
+            with other.activate():
+                b = torch.ones(())
+            c, borrow = get_active_stream().borrow(b)
+            @compile(verify=False)
+            def fn():
+                nonlocal a, c
+                a += c
+                z = torch.rand(3, 4)
+                del c
+                return z
+            with borrow:
+                z = None
+                for _ in range(3):
+                    z = fn()
+            result = fetch_shard(a).result()
+            fetch_shard(z).result()
+            with no_mesh.activate():
+                assert result.item() == 3
+    def test_nested_coalescing(self, backend_type) -> None:
+        with self.local_device_mesh(1, 1, backend_type):
+            with coalescing():
+                a = torch.zeros(3, 4)
+                with coalescing():
+                    for _ in range(1, 10):
+                        a = a + torch.ones(3, 4)
+                    # confirm that there are messages awaiting to be send
+                    assert self.num_outstanding_messages >= 10
+                # since we are in the nested block we shouldn't have flushed the messages yet
+                assert self.num_outstanding_messages >= 10
+            # now that the outer coalesce is done we should have flushed the messages
+            assert self.num_outstanding_messages == 0
+    def test_no_coalescing(self, backend_type) -> None:
+        with self.local_device_mesh(1, 1, backend_type):
+            a = torch.zeros(3, 4)
+            for _ in range(1, 10):
+                a = a + torch.ones(3, 4)
+            # without coalescing the messages should be sent with nothing outstanding
+            assert self.num_outstanding_messages == 0
+    @contextmanager
+    def assertRecorded(self, times: int):
+        with patch(
+            "monarch.common._coalescing._record_and_define",
+            side_effect=_record_and_define,
+        ) as m:
+            yield
+            assert m.call_count == times
+    def assertAliases(self, tensors: List[Tensor], aliasing: List[int]):
+        group = TensorGroup([t._fake for t in tensors])
+        c = iter(itertools.count())
+        actual = []
+        assert len(group.pattern.entries) == len(tensors)
+        assert len(aliasing) == len(tensors)
+        for e in group.pattern.entries:
+            match e.storage:
+                case AliasOf(offset=offset):
+                    actual.append(offset)
+                case Storage():
+                    actual.append(next(c))
+        assert aliasing == actual
+    def test_compile_aliasing(self, backend_type) -> None:
+        with self.local_device_mesh(1, 1, backend_type):
+            @compile(verify=False)
+            def add(a, b):
+                return a + b
+            @compile(verify=False)
+            def return_cond(a, b, c):
+                if c:
+                    return a
+                else:
+                    return b
+            a = torch.rand(3, 4)
+            b = torch.rand(3, 4)
+            with self.assertRecorded(1):
+                r = add(a, b)
+                assert r.size() == (3, 4)
+                r2 = add(b, a)
+                self.assertAliases([a, b, r2, r], [0, 1, 2, 3])
+            c = torch.rand(4)
+            d = torch.rand(4, 4)
+            with self.assertRecorded(1):
+                e = add(c, d)
+                assert e.size() == (4, 4)
+                e = add(c, torch.rand(4, 4))
+                assert e.size() == (4, 4)
+            with self.assertRecorded(1):
+                r = add(a, 4)
+                self.assertAliases([r, a], [0, 1])
+            with self.assertRecorded(1):
+                r0 = return_cond(a, b, True)
+                self.assertAliases([a, b, r0], [0, 1, 0])
+                r1 = return_cond(b, a, True)
+                self.assertAliases([a, b, r1], [0, 1, 1])
+            with self.assertRecorded(1):
+                r0 = return_cond(a, b, False)
+                self.assertAliases([a, b, r0], [0, 1, 1])
+                r1 = return_cond(a, b, False)
+                self.assertAliases([b, a, r1], [0, 1, 0])
+            @compile(verify=False)
+            def captured(b):
+                return a + b
+            with self.assertRecorded(1):
+                r = captured(b)
+                self.assertAliases([a, b, r], [0, 1, 2])
+                r = captured(torch.rand(3, 4))
+                assert r.size() == (3, 4)
+            with self.assertRecorded(1):
+                # input aliased with capture
+                captured(a)
+                captured(a)
+            @compile(verify=False)
+            def weird(f, g):
+                o = f + g
+                return o, o[0], f[0], g[0], a[0]
+            with self.assertRecorded(1):
+                r0, r1, r2, r3, r4 = weird(c, d)
+                self.assertAliases(
+                    [c, d, a, r0, r1, r2, r3, r4], [0, 1, 2, 3, 3, 0, 1, 2]
+                )
+    def test_compile_input_permissions(self, backend_type):
+        with self.local_device_mesh(1, 1, backend_type):
+            a = torch.rand(3, 4)
+            @compile(verify=False)
+            def add(b):
+                return a + b
+            with self.assertRecorded(1):
+                c = add(torch.rand(3, 4))
+            other = Stream("other")
+            ab, borrow = other.borrow(a, mutable=True)
+            with borrow:
+                with pytest.raises(TypeError, match="BORROWED"):
+                    add(torch.rand(3, 4))
+            # test we can read it again
+            add(torch.rand(3, 4))
+            ab, borrow = other.borrow(a)
+            with borrow:
+                add(torch.rand(3, 4))
+            with self.assertRecorded(0):
+                with other.activate():
+                    c = torch.rand(3, 4)
+                c, borrow = monarch.get_active_stream().borrow(c)
+                with borrow:
+                    add(c)
+            a.drop()
+            with pytest.raises(TypeError, match="DROPPED"):
+                add(torch.rand(3, 4))
+    def test_compile_verify(self, backend_type):
+        with self.local_device_mesh(1, 1, backend_type):
+            a = torch.rand(3, 4)
+            @compile(verify=True)
+            def add(b):
+                return a + b
+            c = False
+            @compile(verify=True)
+            def add_broken(b):
+                nonlocal c
+                if c:
+                    a = torch.zeros(3, 4)
+                else:
+                    a = torch.rand(3, 4)
+                return a.add(b)
+            with self.assertRecorded(2):
+                add(torch.rand(3, 4))
+                add(torch.rand(3, 4))
+                add(torch.rand(3, 4))
+            add_broken(torch.rand(3, 4))
+            with pytest.raises(RuntimeError, match="diverges"):
+                c = True
+                add_broken(torch.rand(3, 4))
+    def test_dropped(self, backend_type):
+        with self.local_device_mesh(1, 1, backend_type):
+            a = torch.rand(3, 4)
+            b = None
+            @compile(verify=False)
+            def foo():
+                nonlocal b
+                b = a + a
+            foo()
+            with pytest.raises(TypeError, match="DROPPED"):
+                b.add(4)
+    def test_across_mesh(self, backend_type):
+        with self.local_device_mesh(2, 1, backend_type) as m:
+            m0 = m(host=0)
+            m1 = m(host=1)
+            @compile
+            def foo(a, b):
+                with m0.activate():
+                    r0 = a + a
+                with m1.activate():
+                    r1 = b + b
+                return r0, r1
+            with m0.activate():
+                a = torch.rand(3, 4)
+            with m1.activate():
+                b = torch.rand(3, 4)
+            r0, r1 = foo(a, b)
+            with m0.activate():
+                monarch.inspect(r0)
+            with m1.activate():
+                monarch.inspect(r0)
+    def test_grad_not_supported(self, backend_type):
+        with self.local_device_mesh(1, 1, backend_type):
+            @compile
+            def foo(x):
+                return x
+            y = torch.rand(3, requires_grad=True)
+            @compile
+            def returnit():
+                return y
+            with pytest.raises(TypeError, match="REQUIRES_GRAD"):
+                foo(torch.rand(3, requires_grad=True))
+            with pytest.raises(TypeError, match="REQUIRES_GRAD"):
+                returnit()
+    def test_mutate_inputs(self, backend_type):
+        with self.local_device_mesh(1, 1, backend_type) as mesh:
+            @compile(verify=False)
+            def foo(x_not_mutated, w_not_mutated, y, y_alias, z, z_alias):
+                u = (
+                    x_not_mutated.mul(2.0)
+                    + w_not_mutated
+                    + z_alias.unsqueeze(0).repeat(3, 1)
+                )
+                v = y.add(5.0)
+                stream = monarch.Stream("borrow")
+                borrowed_y_alias, y_alias_borrow = stream.borrow(y_alias, mutable=True)
+                with stream.activate():
+                    borrowed_y_alias.add_(1.0)
+                y_alias_borrow.drop()
+                z.add_(1.0)
+                return u, v
+            x_not_mutated = torch.rand(3, 3)
+            w_not_mutated = torch.rand(3, 3)
+            y = torch.rand(3, 3)
+            y_alias = y.reshape(-1)
+            z = torch.rand(3, 3)
+            z_alias = z[0, :]
+            mutated_inputs = (y, y_alias, z, z_alias)
+            mutated_aliases = set().union(*[t._aliases.aliases for t in mutated_inputs])
+            all_inputs = (x_not_mutated, w_not_mutated) + mutated_inputs
+            with patch.object(
+                mesh.client,
+                "new_node_nocoalesce",
+                side_effect=mesh.client.new_node_nocoalesce,
+            ) as new_node:
+                for _ in range(2):
+                    u, v = foo(*all_inputs)
+                    (mutated, used, _, _), _ = new_node.call_args
+                    assert mutated_aliases.union(
+                        u._aliases.aliases, v._aliases.aliases
+                    ) == set(mutated)
+                    assert set(all_inputs) == set(used)