PyPI - torchmonarch-nightly - Versions diffs - 2025.6.27__cp312-cp312-manylinux2014_x86_64.whl - Mend

torchmonarch-nightly 2025.6.27__cp312-cp312-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

monarch/__init__.py +189 -0
monarch/_monarch/__init__.py +5 -0
monarch/_monarch/hyperactor/__init__.py +58 -0
monarch/_monarch/selection/__init__.py +13 -0
monarch/_monarch/worker/__init__.py +0 -0
monarch/_monarch/worker/debugger.py +117 -0
monarch/_monarch/worker/logging.py +107 -0
monarch/_rust_bindings.so +0 -0
monarch/_testing.py +230 -0
monarch/actor_mesh.py +761 -0
monarch/allocator.py +220 -0
monarch/bootstrap_main.py +59 -0
monarch/builtins/__init__.py +14 -0
monarch/builtins/log.py +22 -0
monarch/builtins/random.py +68 -0
monarch/cached_remote_function.py +257 -0
monarch/code_sync.py +10 -0
monarch/common/_C.pyi +11 -0
monarch/common/_C.so +0 -0
monarch/common/__init__.py +0 -0
monarch/common/_coalescing.py +308 -0
monarch/common/_device_utils.py +18 -0
monarch/common/_tensor_to_table.py +172 -0
monarch/common/base_tensor.py +28 -0
monarch/common/borrows.py +143 -0
monarch/common/client.py +690 -0
monarch/common/constants.py +10 -0
monarch/common/context_manager.py +40 -0
monarch/common/controller_api.py +104 -0
monarch/common/device_mesh.py +417 -0
monarch/common/fake.py +55 -0
monarch/common/function.py +160 -0
monarch/common/function_caching.py +164 -0
monarch/common/future.py +168 -0
monarch/common/invocation.py +125 -0
monarch/common/mast.py +221 -0
monarch/common/messages.py +573 -0
monarch/common/mock_cuda.py +41 -0
monarch/common/opaque_ref.py +98 -0
monarch/common/pickle_flatten.py +48 -0
monarch/common/pipe.py +152 -0
monarch/common/process_group.py +55 -0
monarch/common/recording.py +127 -0
monarch/common/reference.py +33 -0
monarch/common/remote.py +297 -0
monarch/common/selection.py +9 -0
monarch/common/shape.py +229 -0
monarch/common/stream.py +114 -0
monarch/common/tensor.py +814 -0
monarch/common/tensor_factory.py +31 -0
monarch/common/tree.py +73 -0
monarch/controller/__init__.py +7 -0
monarch/controller/backend.py +223 -0
monarch/controller/controller.py +223 -0
monarch/controller/debugger.py +47 -0
monarch/controller/history.py +90 -0
monarch/controller/rust_backend/__init__.py +7 -0
monarch/controller/rust_backend/controller.py +245 -0
monarch/debugger.py +379 -0
monarch/fetch.py +55 -0
monarch/future.py +76 -0
monarch/gradient/__init__.py +11 -0
monarch/gradient/_gradient_generator.pyi +22 -0
monarch/gradient/_gradient_generator.so +0 -0
monarch/gradient_generator.py +185 -0
monarch/memory.py +43 -0
monarch/mesh_controller.py +271 -0
monarch/monarch_controller +0 -0
monarch/notebook.py +761 -0
monarch/opaque_module.py +235 -0
monarch/opaque_object.py +88 -0
monarch/parallel/__init__.py +9 -0
monarch/parallel/pipelining/__init__.py +7 -0
monarch/parallel/pipelining/runtime.py +847 -0
monarch/parallel/pipelining/schedule_ir.py +692 -0
monarch/parallel/pipelining/scheduler.py +249 -0
monarch/pdb_wrapper.py +135 -0
monarch/proc_mesh.py +299 -0
monarch/profiler.py +160 -0
monarch/python_local_mesh.py +107 -0
monarch/random.py +61 -0
monarch/rdma.py +162 -0
monarch/remote_class.py +114 -0
monarch/rust_backend_mesh.py +280 -0
monarch/rust_local_mesh.py +1402 -0
monarch/sim_mesh.py +359 -0
monarch/simulator/__init__.py +7 -0
monarch/simulator/command_history.py +424 -0
monarch/simulator/config.py +21 -0
monarch/simulator/interface.py +59 -0
monarch/simulator/ir.py +770 -0
monarch/simulator/mock_controller.py +214 -0
monarch/simulator/profiling.py +424 -0
monarch/simulator/simulator.py +1052 -0
monarch/simulator/task.py +255 -0
monarch/simulator/tensor.py +373 -0
monarch/simulator/trace.py +395 -0
monarch/simulator/utils.py +41 -0
monarch/simulator/worker.py +389 -0
monarch/telemetry.py +19 -0
monarch/tensor_worker_main.py +260 -0
monarch/tensorboard.py +84 -0
monarch/timer/__init__.py +21 -0
monarch/timer/example_monarch.py +78 -0
monarch/timer/example_spmd.py +55 -0
monarch/timer/execution_timer.py +199 -0
monarch/timer/execution_timer_test.py +131 -0
monarch/tools/__init__.py +7 -0
monarch/tools/cli.py +167 -0
monarch/tools/commands.py +251 -0
monarch/tools/components/__init__.py +7 -0
monarch/tools/components/hyperactor.py +58 -0
monarch/tools/config/__init__.py +20 -0
monarch/tools/config/defaults.py +54 -0
monarch/tools/mesh_spec.py +165 -0
monarch/tools/network.py +69 -0
monarch/worker/__init__.py +7 -0
monarch/worker/_testing_function.py +481 -0
monarch/worker/compiled_block.py +270 -0
monarch/worker/debugger.py +125 -0
monarch/worker/lines.py +47 -0
monarch/worker/monitor.py +53 -0
monarch/worker/worker.py +1191 -0
monarch/world_mesh.py +34 -0
monarch_supervisor/__init__.py +1044 -0
monarch_supervisor/_testing.py +44 -0
monarch_supervisor/function_call.py +30 -0
monarch_supervisor/host.py +386 -0
monarch_supervisor/launchers.py +145 -0
monarch_supervisor/log_pstree.py +48 -0
monarch_supervisor/logging.py +103 -0
monarch_supervisor/python_executable.py +42 -0
tests/__init__.py +0 -0
tests/dispatch_bench.py +124 -0
tests/dispatch_bench_helper.py +25 -0
tests/error_test_binary.py +180 -0
tests/simulator/__init__.py +0 -0
tests/simulator/test_profiling.py +136 -0
tests/simulator/test_simulator.py +411 -0
tests/simulator/test_task.py +64 -0
tests/simulator/test_worker.py +102 -0
tests/sleep_binary.py +35 -0
tests/test_actor_error.py +240 -0
tests/test_alloc.py +25 -0
tests/test_allocator.py +365 -0
tests/test_coalescing.py +492 -0
tests/test_controller.py +845 -0
tests/test_device_mesh.py +132 -0
tests/test_fault_tolerance.py +398 -0
tests/test_future.py +94 -0
tests/test_grad_generator.py +121 -0
tests/test_mock_cuda.py +74 -0
tests/test_pdb_actor.py +110 -0
tests/test_python_actors.py +736 -0
tests/test_remote_functions.py +1271 -0
tests/test_rust_backend.py +217 -0
tests/test_signal_safe_block_on.py +103 -0
tests/test_sim_backend.py +54 -0
tests/test_tensor_engine.py +52 -0
torchmonarch_nightly-2025.6.27.dist-info/METADATA +94 -0
torchmonarch_nightly-2025.6.27.dist-info/RECORD +165 -0
torchmonarch_nightly-2025.6.27.dist-info/WHEEL +5 -0
torchmonarch_nightly-2025.6.27.dist-info/entry_points.txt +3 -0
torchmonarch_nightly-2025.6.27.dist-info/licenses/LICENSE +29 -0
torchmonarch_nightly-2025.6.27.dist-info/top_level.txt +3 -0

monarch/opaque_module.py ADDED Viewed

@@ -0,0 +1,235 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import List
+import torch
+from monarch.common.function_caching import TensorGroup, TensorGroupPattern
+from monarch.common.opaque_ref import OpaqueRef
+from monarch.common.remote import remote
+from monarch.common.tensor_factory import TensorFactory
+from monarch.common.tree import flatten
+from monarch.opaque_object import _fresh_opaque_ref, OpaqueObject
+from torch.autograd.graph import get_gradient_edge
+def _get_parameters_shape(module: OpaqueRef) -> TensorGroupPattern:
+    the_module: torch.nn.Module = module.value
+    group = TensorGroup(list(the_module.parameters()))
+    return group.pattern
+def _get_parameters(module: OpaqueRef) -> List[torch.Tensor]:
+    # XXX - we do not want worker tensors refs to have requires grad on,
+    # because then any compute will create a backward graph
+    # which will never get used.
+    # This should be enforced at the worker level, but I think we are
+    # hijacking the requires_grad bit to communicate information in
+    # the autograd controller wrapper. We need to to use a different
+    # side-channel to do that.
+    return [p.detach() for p in module.value.parameters()]
+def _remote_forward(require_grads: List[bool], module: OpaqueRef, args, kwargs):
+    # forward on the worker
+    # the parameter tensors inside the module will be require_grad_(True),
+    # but the input worker tensors, like all worker state, do not have
+    # autograd recording on. We have to turn it on inside just this function
+    # to do an autograd pass.
+    # parameters has to match what _get_parameters returns for this to work.
+    parameters = list(module.value.parameters())
+    all_inputs, unflatten_inputs = flatten(
+        (args, kwargs, parameters), lambda x: isinstance(x, torch.Tensor)
+    )
+    # set requires grad on inputs. We skip the parameters because they
+    # will already have requires grad set, and we can't detach them
+    # here otherwise grad won't flow to them.
+    for i in range(len(all_inputs) - len(parameters)):
+        if require_grads[i]:
+            all_inputs[i] = all_inputs[i].detach().requires_grad_(True)
+    # we have to create this just in case the module doesn't actually
+    # _use_ the parameter, in which case we have to create the zero.
+    # we can't really tell apriori if it will be used or not.
+    input_factories = [TensorFactory.from_tensor(t) for t in all_inputs]
+    # we have to be careful to save just the autograph graph edges and not
+    # the input/output tensors. Otherwise we might keep them longer then they
+    # are truly needed.
+    all_inputs_require_grad_edges = [
+        get_gradient_edge(input) for input, rg in zip(all_inputs, require_grads) if rg
+    ]
+    args, kwargs, _ = unflatten_inputs(all_inputs)
+    # the real module gets called here.
+    result = module.value(*args, **kwargs)
+    all_outputs_requires_grad, unflatten_outputs = flatten(
+        result, lambda x: isinstance(x, torch.Tensor) and x.requires_grad
+    )
+    all_output_edges = [
+        get_gradient_edge(output) for output in all_outputs_requires_grad
+    ]
+    # this backward closure keeps the state around to invoke backward
+    # and is held as the OpaqueRef we return to the controller.
+    def backward(all_grad_outputs: List[torch.Tensor]):
+        # careful, do not capture any input/output tensors.
+        # they might not be required for gradient, and will waste memory.
+        with torch.no_grad():
+            grad_inputs = torch.autograd.grad(
+                inputs=all_inputs_require_grad_edges,
+                outputs=all_output_edges,
+                grad_outputs=all_grad_outputs,
+                allow_unused=True,
+            )
+        grad_inputs_iter = iter(grad_inputs)
+        all_grad_inputs = [
+            next(grad_inputs_iter) if rg else None for rg in require_grads
+        ]
+        for i, rg in enumerate(require_grads):
+            # if the grad turned out unused we have to make a zero tensor here
+            # because the controller is expecting tensors not None.
+            if rg and all_grad_inputs[i] is None:
+                all_grad_inputs[i] = input_factories[i].zeros()
+        return all_grad_inputs
+    # detach outputs, because worker tensors do not keep gradient state
+    # the only gradient state on the worker is localized to the backward closure.
+    result = unflatten_outputs(t.detach() for t in all_outputs_requires_grad)
+    return OpaqueRef(backward), result
+def _remote_backward(backward_closure: OpaqueRef, all_grad_outputs: List[torch.Tensor]):
+    # this is just a small trampoline that calls the closure that forward defined.
+    return backward_closure.value(all_grad_outputs)
+class OpaqueModule:
+    """
+    Provides an _unsafe_ wrapper around a stateful module object that lives on a remote mesh.
+        linear = OpaqueModule("torch.nn.Linear", 3, 3, device="cuda")
+        output = linear(input, propagate=lambda self, x: x.clone())
+        r = output.sum()
+        with torch.no_grad():
+            r.backward()
+    It supports:
+    * Accessing parameters of the module on the controller via m.parameters(), which will
+    use remote functions to figure out the shape of parameters and get a reference to them.
+    * invoking the forward of module by providing inputs and a manual shape propagation function.
+        m(input, propagate=lambda self, x: x.clone())
+      Trying to do a cached function in this situation is very tricky because of the boundaries
+      between autograd/noautograd so it is not implemented yet.
+    * calcuating gradients through the module invocation as if this module was a normal controller module.
+    In the future we should consider whether we want this to actually be a subclass of torch.nn.Module,
+    such that it could have hooks, and other features. If we do this, we need to implement most of
+    the existing torch.nn.Module API so that it behaves in the expected way.
+    """
+    def __init__(self, *args, **kwargs):
+        self._object = OpaqueObject(*args, **kwargs)
+        self._parameters: List[torch.Tensor] = None
+    def parameters(self):
+        if self._parameters is None:
+            tensor_group_pattern = (
+                remote(_get_parameters_shape)
+                .call_on_shard_and_fetch(self._object)
+                .result()
+            )
+            self._parameters = [
+                p.requires_grad_(True)
+                for p in remote(
+                    _get_parameters,
+                    propagate=lambda self: tensor_group_pattern.empty([]),
+                )(self._object)
+            ]
+        return self._parameters
+    def call_method(self, *args, **kwargs):
+        return self._object.call_method(*args, **kwargs)
+    def __call__(self, *args, propagator, **kwargs):
+        parameters = self.parameters()
+        # torch.autograd.Function only supports flat lists of input/output tensors
+        # so we have to do a bunch of flattenting unflattening to call it
+        all_inputs, unflatten_inputs = flatten(
+            (args, kwargs, parameters), lambda x: isinstance(x, torch.Tensor)
+        )
+        # the worker will need to understand which gradients to calculate,
+        # which we pass in as a flag array here.
+        requires_grad = [t.requires_grad for t in all_inputs]
+        if not sum(requires_grad):
+            # early exit if we do not have gradients (including toward the parameters)
+            return self._object.call_method("__call__", propagator, *args, **kwargs)
+        # these will be used to describe the shape of gradients to the inputs,
+        # so we cannot use TensorGroup to recover alias information. Having
+        # gradient tensors that alias each other coming out of one of this functions
+        # will break things.
+        input_factories = [TensorFactory.from_tensor(i) for i in all_inputs]
+        unflatten_outputs = None
+        backward_ctx = None
+        # we use this autograd function to define how to hook up the gradient
+        # calculated on the worker to the gradient graph _on the client_.
+        # This code runs entirely on the client.
+        class F(torch.autograd.Function):
+            @staticmethod
+            def forward(ctx, *all_inputs):
+                nonlocal backward_ctx, unflatten_outputs
+                args, kwargs, parameters = unflatten_inputs(all_inputs)
+                # this remote call invokes the forward pass on the worker.
+                # notice it returns the (non-gradient recording result) of the
+                # forward pass, and a backward_ctx opaque ref that we will
+                # call in the backward pass to flow controller gradients
+                # through the worker saved autograd state. Holding
+                # backward_ctx alive on the worker is what keeps
+                # the worker autograd state alive. We should check there is
+                # no funny business with class lifetimes.
+                backward_ctx, result = remote(
+                    _remote_forward,
+                    propagate=lambda requires_grad, obj, args, kwargs: (
+                        _fresh_opaque_ref(),
+                        propagator(obj, *args, **kwargs),
+                    ),
+                )(requires_grad, self._object, args, kwargs)
+                flat_outputs, unflatten_outputs = flatten(
+                    result, lambda x: isinstance(x, torch.Tensor)
+                )
+                return (*flat_outputs,)
+            @staticmethod
+            def backward(ctx, *all_grad_outputs):
+                # this instructs the worker to propgate output grads back to our input
+                # grads, all_grad_inputs has to match all_inputs of forward.
+                all_grad_inputs = remote(
+                    _remote_backward,
+                    propagate=lambda _ctx, _all_grad_outputs: tuple(
+                        f.empty() if rg else None
+                        for f, rg in zip(input_factories, requires_grad)
+                    ),
+                )(backward_ctx, all_grad_outputs)
+                return all_grad_inputs
+        # apply unwraps the gradient tensors and inserts our custom block.
+        flat_outputs = F.apply(*all_inputs)
+        result = unflatten_outputs(flat_outputs)
+        return result

monarch/opaque_object.py ADDED Viewed

@@ -0,0 +1,88 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import functools
+import torch
+from monarch.common.function import (
+    ConvertsToResolvable,
+    resolvable_function,
+    ResolvableFunction,
+)
+from monarch.common.opaque_ref import OpaqueRef
+from monarch.common.remote import remote
+def _invoke_method(obj: OpaqueRef, method_name: str, *args, **kwargs):
+    return getattr(obj.value, method_name)(*args, **kwargs)
+def _fresh_opaque_ref():
+    return OpaqueRef(torch.zeros(0, dtype=torch.int64))
+@remote(propagate=lambda *args, **kwargs: _fresh_opaque_ref())
+def _construct_object(
+    constructor_resolver: ResolvableFunction, *args, **kwargs
+) -> OpaqueRef:
+    constructor = constructor_resolver.resolve()
+    return OpaqueRef(constructor(*args, **kwargs))
+def opaque_method(fn):
+    method_name = fn.__name__
+    @functools.wraps(fn)
+    def impl(self, *args, **kwargs):
+        return self.call_method(method_name, fn, *args, **kwargs)
+    return impl
+class OpaqueObject(OpaqueRef):
+    """
+    Provides syntax sugar for working with OpaqueObjRef objects on the controller.
+    class MyWrapperObject(OpaqueObject):
+        # Declare that the object has a_remote_add method.
+        # The definition provides the shape propagation rule.
+        @opaque_method
+        def a_remote_add(self, t: torch.Tensor):
+            return t + t
+    # on the controller you can now create the wrapper
+    obj: MyWrapperObject = MyWrapperObject.construct("path.to.worker.constructor", torch.rand(3, 4))
+    # and call its methods
+    t: monarch.Tensor = obj.a_remote_add(torch.rand(3, 4))
+    This interface can be used to build (unsafe) wrappers around stateful things such torch.nn.Modules
+    in order to make porting them to monarch-first structures easier.
+    """
+    def __init__(self, constructor: ConvertsToResolvable | OpaqueRef, *args, **kwargs):
+        if isinstance(constructor, OpaqueRef):
+            self._key = constructor._key
+        else:
+            self._key = _construct_object(
+                resolvable_function(constructor), *args, **kwargs
+            )._key
+    def call_method(self, method_name, propagation, *args, **kwargs):
+        endpoint = remote(
+            _invoke_method,
+            propagate=lambda self, method_name, *args, **kwargs: propagation(
+                self, *args, **kwargs
+            ),
+        )
+        return endpoint(self, method_name, *args, **kwargs)
+    def call_method_on_shard_and_fetch(self, method_name, *args, **kwargs):
+        return remote(_invoke_method).call_on_shard_and_fetch(
+            self, method_name, *args, **kwargs
+        )

monarch/parallel/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from monarch.parallel.pipelining.runtime import get_parameter_udf, PipelineParallelism
+__all__ = ["PipelineParallelism", "get_parameter_udf"]

monarch/parallel/pipelining/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-strict