PyPI - torchmonarch-nightly - Versions diffs - 2025.6.27__cp313-cp313-manylinux2014_x86_64.whl - Mend

torchmonarch-nightly 2025.6.27__cp313-cp313-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

monarch/__init__.py +189 -0
monarch/_monarch/__init__.py +5 -0
monarch/_monarch/hyperactor/__init__.py +58 -0
monarch/_monarch/selection/__init__.py +13 -0
monarch/_monarch/worker/__init__.py +0 -0
monarch/_monarch/worker/debugger.py +117 -0
monarch/_monarch/worker/logging.py +107 -0
monarch/_rust_bindings.so +0 -0
monarch/_testing.py +230 -0
monarch/actor_mesh.py +761 -0
monarch/allocator.py +220 -0
monarch/bootstrap_main.py +59 -0
monarch/builtins/__init__.py +14 -0
monarch/builtins/log.py +22 -0
monarch/builtins/random.py +68 -0
monarch/cached_remote_function.py +257 -0
monarch/code_sync.py +10 -0
monarch/common/_C.pyi +11 -0
monarch/common/_C.so +0 -0
monarch/common/__init__.py +0 -0
monarch/common/_coalescing.py +308 -0
monarch/common/_device_utils.py +18 -0
monarch/common/_tensor_to_table.py +172 -0
monarch/common/base_tensor.py +28 -0
monarch/common/borrows.py +143 -0
monarch/common/client.py +690 -0
monarch/common/constants.py +10 -0
monarch/common/context_manager.py +40 -0
monarch/common/controller_api.py +104 -0
monarch/common/device_mesh.py +417 -0
monarch/common/fake.py +55 -0
monarch/common/function.py +160 -0
monarch/common/function_caching.py +164 -0
monarch/common/future.py +168 -0
monarch/common/invocation.py +125 -0
monarch/common/mast.py +221 -0
monarch/common/messages.py +573 -0
monarch/common/mock_cuda.py +41 -0
monarch/common/opaque_ref.py +98 -0
monarch/common/pickle_flatten.py +48 -0
monarch/common/pipe.py +152 -0
monarch/common/process_group.py +55 -0
monarch/common/recording.py +127 -0
monarch/common/reference.py +33 -0
monarch/common/remote.py +297 -0
monarch/common/selection.py +9 -0
monarch/common/shape.py +229 -0
monarch/common/stream.py +114 -0
monarch/common/tensor.py +814 -0
monarch/common/tensor_factory.py +31 -0
monarch/common/tree.py +73 -0
monarch/controller/__init__.py +7 -0
monarch/controller/backend.py +223 -0
monarch/controller/controller.py +223 -0
monarch/controller/debugger.py +47 -0
monarch/controller/history.py +90 -0
monarch/controller/rust_backend/__init__.py +7 -0
monarch/controller/rust_backend/controller.py +245 -0
monarch/debugger.py +379 -0
monarch/fetch.py +55 -0
monarch/future.py +76 -0
monarch/gradient/__init__.py +11 -0
monarch/gradient/_gradient_generator.pyi +22 -0
monarch/gradient/_gradient_generator.so +0 -0
monarch/gradient_generator.py +185 -0
monarch/memory.py +43 -0
monarch/mesh_controller.py +271 -0
monarch/monarch_controller +0 -0
monarch/notebook.py +761 -0
monarch/opaque_module.py +235 -0
monarch/opaque_object.py +88 -0
monarch/parallel/__init__.py +9 -0
monarch/parallel/pipelining/__init__.py +7 -0
monarch/parallel/pipelining/runtime.py +847 -0
monarch/parallel/pipelining/schedule_ir.py +692 -0
monarch/parallel/pipelining/scheduler.py +249 -0
monarch/pdb_wrapper.py +135 -0
monarch/proc_mesh.py +299 -0
monarch/profiler.py +160 -0
monarch/python_local_mesh.py +107 -0
monarch/random.py +61 -0
monarch/rdma.py +162 -0
monarch/remote_class.py +114 -0
monarch/rust_backend_mesh.py +280 -0
monarch/rust_local_mesh.py +1402 -0
monarch/sim_mesh.py +359 -0
monarch/simulator/__init__.py +7 -0
monarch/simulator/command_history.py +424 -0
monarch/simulator/config.py +21 -0
monarch/simulator/interface.py +59 -0
monarch/simulator/ir.py +770 -0
monarch/simulator/mock_controller.py +214 -0
monarch/simulator/profiling.py +424 -0
monarch/simulator/simulator.py +1052 -0
monarch/simulator/task.py +255 -0
monarch/simulator/tensor.py +373 -0
monarch/simulator/trace.py +395 -0
monarch/simulator/utils.py +41 -0
monarch/simulator/worker.py +389 -0
monarch/telemetry.py +19 -0
monarch/tensor_worker_main.py +260 -0
monarch/tensorboard.py +84 -0
monarch/timer/__init__.py +21 -0
monarch/timer/example_monarch.py +78 -0
monarch/timer/example_spmd.py +55 -0
monarch/timer/execution_timer.py +199 -0
monarch/timer/execution_timer_test.py +131 -0
monarch/tools/__init__.py +7 -0
monarch/tools/cli.py +167 -0
monarch/tools/commands.py +251 -0
monarch/tools/components/__init__.py +7 -0
monarch/tools/components/hyperactor.py +58 -0
monarch/tools/config/__init__.py +20 -0
monarch/tools/config/defaults.py +54 -0
monarch/tools/mesh_spec.py +165 -0
monarch/tools/network.py +69 -0
monarch/worker/__init__.py +7 -0
monarch/worker/_testing_function.py +481 -0
monarch/worker/compiled_block.py +270 -0
monarch/worker/debugger.py +125 -0
monarch/worker/lines.py +47 -0
monarch/worker/monitor.py +53 -0
monarch/worker/worker.py +1191 -0
monarch/world_mesh.py +34 -0
monarch_supervisor/__init__.py +1044 -0
monarch_supervisor/_testing.py +44 -0
monarch_supervisor/function_call.py +30 -0
monarch_supervisor/host.py +386 -0
monarch_supervisor/launchers.py +145 -0
monarch_supervisor/log_pstree.py +48 -0
monarch_supervisor/logging.py +103 -0
monarch_supervisor/python_executable.py +42 -0
tests/__init__.py +0 -0
tests/dispatch_bench.py +124 -0
tests/dispatch_bench_helper.py +25 -0
tests/error_test_binary.py +180 -0
tests/simulator/__init__.py +0 -0
tests/simulator/test_profiling.py +136 -0
tests/simulator/test_simulator.py +411 -0
tests/simulator/test_task.py +64 -0
tests/simulator/test_worker.py +102 -0
tests/sleep_binary.py +35 -0
tests/test_actor_error.py +240 -0
tests/test_alloc.py +25 -0
tests/test_allocator.py +365 -0
tests/test_coalescing.py +492 -0
tests/test_controller.py +845 -0
tests/test_device_mesh.py +132 -0
tests/test_fault_tolerance.py +398 -0
tests/test_future.py +94 -0
tests/test_grad_generator.py +121 -0
tests/test_mock_cuda.py +74 -0
tests/test_pdb_actor.py +110 -0
tests/test_python_actors.py +736 -0
tests/test_remote_functions.py +1271 -0
tests/test_rust_backend.py +217 -0
tests/test_signal_safe_block_on.py +103 -0
tests/test_sim_backend.py +54 -0
tests/test_tensor_engine.py +52 -0
torchmonarch_nightly-2025.6.27.dist-info/METADATA +94 -0
torchmonarch_nightly-2025.6.27.dist-info/RECORD +165 -0
torchmonarch_nightly-2025.6.27.dist-info/WHEEL +5 -0
torchmonarch_nightly-2025.6.27.dist-info/entry_points.txt +3 -0
torchmonarch_nightly-2025.6.27.dist-info/licenses/LICENSE +29 -0
torchmonarch_nightly-2025.6.27.dist-info/top_level.txt +3 -0

monarch/common/_coalescing.py ADDED Viewed

@@ -0,0 +1,308 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-unsafe
+import functools
+from collections import defaultdict
+from contextlib import contextmanager
+from dataclasses import dataclass
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Generator,
+    List,
+    NamedTuple,
+    Optional,
+    Sequence,
+    Tuple,
+    TYPE_CHECKING,
+)
+import torch
+from monarch.common import messages
+from monarch.common.fake import fake_call
+from monarch.common.function_caching import (
+    hashable_tensor_flatten,
+    TensorGroup,
+    TensorGroupPattern,
+)
+from monarch.common.tensor import InputChecker, Tensor
+from monarch.common.tree import flatten
+if TYPE_CHECKING:
+    from monarch.common.client import Recorder
+    from monarch.common.recording import Recording
+    from .client import Client
+_coalescing = None
+class CoalescingState:
+    def __init__(self, recording=False):
+        self.controller: Optional["Client"] = None
+        self.recorder: Optional["Recorder"] = None
+        self.recording = recording
+    def set_controller(self, controller: "Client"):
+        if self.controller is None:
+            self.controller = controller
+            controller.flush_deletes(False)
+        if self.controller is not controller:
+            raise ValueError(
+                "using multiple controllers in the same coalescing block is not supported"
+            )
+    @contextmanager
+    def activate(self) -> Generator[None, Any, Any]:
+        global _coalescing
+        assert _coalescing is None
+        finished = False
+        try:
+            _coalescing = self
+            yield
+            finished = True
+        finally:
+            ctrl = self.controller
+            if ctrl is not None:
+                if finished:
+                    ctrl.flush_deletes()
+                self.recorder = ctrl.reset_recorder()
+                if not finished:
+                    self.recorder.abandon()
+            _coalescing = None
+@contextmanager
+def coalescing() -> Generator[None, Any, Any]:
+    global _coalescing
+    if _coalescing is not None:
+        yield
+        return
+    state = CoalescingState()
+    with state.activate():
+        yield
+    if state.recorder is not None:
+        assert state.controller is not None
+        state.recorder.run_once(state.controller)
+def _record_and_define(
+    fn: Callable, args: Tuple[Any, ...], kwargs: Dict[str, Any]
+) -> "CacheEntry":
+    input_tensors, unflatten_input = flatten(
+        (args, kwargs), lambda x: isinstance(x, Tensor)
+    )
+    with InputChecker.from_flat_args(
+        "compile", input_tensors, unflatten_input
+    ) as checker:
+        checker.check_no_requires_grad()
+    for a in input_tensors:
+        assert a._seq is not None
+    state = CoalescingState(recording=True)
+    with state.activate():
+        formal_tensors = []
+        for i, input in enumerate(input_tensors):
+            state.set_controller(input.mesh.client)
+            t = Tensor(input._fake, input.mesh, input.stream)
+            input.mesh._send(
+                messages.RecordingFormal(t, i, t.stream._to_ref(input.mesh.client))
+            )
+            formal_tensors.append(t)
+        formal_args, formal_kwargs = unflatten_input(formal_tensors)
+        recorded_result = fn(*formal_args, **formal_kwargs)
+        output_tensors, unflatten_result = flatten(
+            recorded_result, lambda x: isinstance(x, Tensor)
+        )
+        with InputChecker(
+            output_tensors,
+            lambda ts: f"{unflatten_result(ts)} = compiled_function(...)",
+        ) as checker:
+            checker.check_no_requires_grad()
+        for i, output in enumerate(output_tensors):
+            state.set_controller(output.mesh.client)
+            output.mesh._send(
+                messages.RecordingResult(
+                    output, i, output.stream._to_ref(output.mesh.client)
+                )
+            )
+    recorder = state.recorder
+    if recorder is None:
+        # no input tensors or output tensors, so just cache the result
+        return CacheEntry(
+            TensorGroup([]),
+            TensorGroupPattern(()),
+            lambda args, kwargs: recorded_result,
+            None,
+        )
+    controller = state.controller
+    assert controller is not None
+    recorder.add((), output_tensors, [])
+    recording = recorder.define_recording(
+        controller, len(output_tensors), len(input_tensors)
+    )
+    fake_uses = [r._fake for r in recording.uses]
+    captures_group = TensorGroup(fake_uses)
+    inputs_group = TensorGroup([i._fake for i in input_tensors], parent=captures_group)
+    outputs_group = TensorGroup([o._fake for o in output_tensors], parent=inputs_group)
+    outputs_pattern = outputs_group.pattern
+    def run(args, kwargs):
+        actuals, _ = flatten((args, kwargs), lambda x: isinstance(x, Tensor))
+        for a in actuals:
+            assert a._seq is not None
+        fake_result_tensors = fake_call(
+            outputs_pattern.empty, [fake_uses, [a._fake for a in actuals]]
+        )
+        # recording.run does permissions checks on all the tensors.
+        # if those checks fail then the tensors here will have been created
+        # but not defined, causes spurious delete messages.
+        # To avoid this, we pass a generator rather than a list
+        # and only create the tensors in run
+        result_tensors_generator = (
+            Tensor(f, o.mesh, o.stream)
+            for f, o in zip(fake_result_tensors, output_tensors)
+        )
+        return unflatten_result(recording.run(result_tensors_generator, actuals))
+    return CacheEntry(captures_group, inputs_group.pattern, run, recording)
+@dataclass
+class CacheEntry:
+    captures_group: TensorGroup
+    inputs_pattern: TensorGroupPattern
+    run: Callable[[Tuple[Any, ...], Dict[str, Any]], Any]
+    to_verify: Optional["Recording"]
+    def matches(self, input_tensors: List[torch.Tensor]) -> bool:
+        # if an input aliases a captured tensor, then we have
+        # to check that all future inputs alias the _same exact_
+        # captured tensor. These are additional checks after
+        # matching on the pattern of aliasing for just the inputs because
+        # we do not what the captures would be without first matching the inputs without the captures.
+        inputs_group = TensorGroup(input_tensors, parent=self.captures_group)
+        return self.inputs_pattern == inputs_group.pattern
+def compile(fn=None, verify=True):
+    """
+    Wraps `fn` such that it records and later replays a single message to workers
+    to instruct them to run the entire contents of this function. Since the function invocation
+    is much smaller than the original set of messages and since we do not re-execute the python inside
+    the function after recording, this has substantially lower latency.
+    While eventually `compile` will be backed by `torch.compile`'s dynamo executor, it currently
+    works as a simple tracer with the following rules for when it chooses to trace vs when
+    it will reuse an existing trace.
+    A new trace is created whenever:
+    * The _values_ of a non-tensor argument to fn have not been seen before.
+    * The _metadata_ of a tensor arguments has not been seen before. Metadata includes the sizes, strides,
+      dtype, devices, layout, device meshes, streams, and pattern of aliasing of the arguments
+      with respect to other arguments and any values the trace captures.
+    A new trace will not be created in these following situations that are known to be **unsafe**:
+    * A value that is not an argument to the function but is used by the function (e.g. a global),
+      changes in a way that would affect what messages are being sent.
+    * A tensor that is not an argument to the function changes metadata, or gets reassigned to
+      a new tensor in Python.
+    The trace is allowed to use tensors that are referenced in the body but not listed as arguments,
+    such as globals or closure-captured locals as long as these values are not modified in the
+    the ways that are listed as unsafe above. When switched to a torch.compile backed version,
+    these safety caveats will be improved.
+    Compilation currently does not work if the inputs or outputs to the function have `requires_grad=True`,
+    because we will not generate a correctly backwards pass graph. However, captured tensors
+    are allowed to be requires_grad=True, and gradient calculation (forward+backward)
+    can run entirely within the function.
+    Can be used as a wrapper:
+        wrapped = compile(my_function, verify=False)
+    Or as a decorator:
+        @compile
+        def my_function(...):
+            ...
+        @compile(verify=False)
+        def my_function(...):
+            ...
+    Args:
+        fn (callable): the function to be wrapped. (Default: None, in which case we return a single argument,
+            function that can be used as a decorator)
+        verify (bool): To guard as much as possible against the above unsafe situations,
+            if `verify=True`, the first time we would reuse a trace, we additionally do another
+            recording and check the second recording matches the original recording, and report
+            where they diverge. (Default: True)
+    Returns:
+        If fn=None, it returns a function that can be used as a decorator on a function to
+        be wrapped. Otherwise, it returns the wrapped function itself.
+    """
+    if fn is None:
+        return lambda fn: compile(fn, verify)
+    cache: Dict[Any, Recording] = defaultdict(list)
+    @functools.wraps(fn)
+    def wrapper(*args, **kwargs):
+        global _coalescing
+        if _coalescing:
+            return fn(*args, **kwargs)
+        tensors, shape_key = hashable_tensor_flatten(args, kwargs)
+        input_group = TensorGroup([t._fake for t in tensors])
+        props = tuple((t.mesh, t.stream, t.requires_grad) for t in tensors)
+        key = (shape_key, input_group.pattern, props)
+        for entry in cache[key]:
+            if entry.matches(input_group.tensors):
+                if entry.to_verify is not None:
+                    entry.to_verify.client.recorder.verify_against(entry.to_verify)
+                    _record_and_define(fn, args, kwargs)
+                    entry.to_verify = None
+                return entry.run(args, kwargs)
+        entry = _record_and_define(fn, args, kwargs)
+        if not verify:
+            entry.to_verify = None
+        cache[key].append(entry)
+        return entry.run(args, kwargs)
+    return wrapper
+def is_active(controller: "Client"):
+    if _coalescing is None:
+        return False
+    _coalescing.set_controller(controller)
+    return True
+def is_recording(controller: "Client"):
+    return is_active(controller) and _coalescing.recording

monarch/common/_device_utils.py ADDED Viewed

@@ -0,0 +1,18 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import os
+import re
+from pathlib import Path
+def _local_device_count():
+    if "CUDA_VISIBLE_DEVICES" in os.environ:
+        return len(os.environ["CUDA_VISIBLE_DEVICES"].split(","))
+    dev_path = Path("/dev")
+    pattern = re.compile(r"nvidia\d+$")
+    nvidia_devices = [dev for dev in dev_path.iterdir() if pattern.match(dev.name)]
+    return len(nvidia_devices)

monarch/common/_tensor_to_table.py ADDED Viewed

@@ -0,0 +1,172 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Callable, List, Optional
+import torch
+def tensor_to_table(
+    tensor: torch.Tensor,
+    format_data: Callable,
+    axis_labels: Optional[List[List[str]]] = None,
+    axis_names: Optional[List[str]] = None,
+    format_spec: str = ".4f",
+    table_format: str = "grid",
+) -> str:
+    """
+    Convert a tensor into formatted tables with generic dimension handling.
+    Parameters:
+    -----------
+    tensor : torch.Tensor or np.ndarray
+        Input tensor to be converted (1D, 2D, or 3D)
+    axis_labels : list of lists, optional
+        Labels for each axis, ordered from outer to inner dimension
+        For 1D: [column_labels]
+        For 2D: [row_labels, column_labels]
+        For 3D: [depth_labels, row_labels, column_labels]
+    axis_names : list, optional
+        Names for each axis, ordered from outer to inner dimension
+        For 1D: [column_name]
+        For 2D: [row_name, column_name]
+        For 3D: [depth_name, row_name, column_name]
+    format_spec : str, optional
+        Format specification for numbers (default: ".4f")
+    table_format : str, optional
+        Table format style for tabulate (default: "grid")
+    Returns:
+    --------
+    str : Formatted table string
+    """
+    import numpy as np
+    from tabulate import tabulate
+    assert tensor.dtype == torch.int
+    # Convert tensor to numpy for easier manipulation
+    data = tensor.detach().cpu().numpy()
+    # Normalize dimensions
+    orig_ndim = data.ndim
+    if data.ndim == 1:
+        data = data.reshape(1, 1, -1)
+    elif data.ndim == 2:
+        data = data.reshape(1, *data.shape)
+    elif data.ndim > 3:
+        raise ValueError("Input tensor must be 1D, 2D, or 3D")
+    # Get tensor dimensions
+    depth, rows, cols = data.shape
+    # Generate or validate labels for each dimension
+    if axis_labels is None:
+        axis_labels = []
+    # Pad or truncate axis_labels based on tensor dimensions
+    ndim = orig_ndim
+    while len(axis_labels) < ndim:
+        dim_size = data.shape[-(len(axis_labels) + 1)]
+        axis_labels.insert(0, [f"D{len(axis_labels)}_{i+1}" for i in range(dim_size)])
+    axis_labels = axis_labels[-ndim:]
+    # Convert to internal format (depth, rows, cols)
+    all_labels = [None] * 3
+    if ndim == 1:
+        all_labels = [["1"], ["1"], axis_labels[0]]
+    elif ndim == 2:
+        all_labels = [["1"], axis_labels[0], axis_labels[1]]
+    else:
+        all_labels = axis_labels
+    # Handle axis names similarly
+    if axis_names is None:
+        axis_names = []
+    # Pad or truncate axis_names based on tensor dimensions
+    while len(axis_names) < ndim:
+        axis_names.insert(0, f"Dimension {len(axis_names)}")
+    axis_names = axis_names[-ndim:]
+    # Convert to internal format (depth, rows, cols)
+    all_names = [None] * 3
+    if ndim == 1:
+        all_names = [None, None, axis_names[0]]
+    elif ndim == 2:
+        all_names = [None, axis_names[0], axis_names[1]]
+    else:
+        all_names = axis_names
+    # Format output
+    tables = []
+    for d in range(depth):
+        # Format slice data
+        formatted_data = [[format_data(x) for x in row] for row in data[d]]
+        # Add row labels except for 1D tensors
+        if orig_ndim > 1:
+            formatted_data = [
+                [all_labels[1][i]] + row for i, row in enumerate(formatted_data)
+            ]
+        # Create slice header for 3D tensors
+        if orig_ndim == 3:
+            slice_header = (
+                f"\n{all_names[0]}: {all_labels[0][d]}\n"
+                if d > 0
+                else f"{all_names[0]}: {all_labels[0][d]}\n"
+            )
+        else:
+            slice_header = ""
+        # Create table
+        headers = [""] + all_labels[2] if orig_ndim > 1 else all_labels[2]
+        table = tabulate(
+            formatted_data,
+            headers=headers,
+            tablefmt=table_format,
+            stralign="right",
+            numalign="right",
+        )
+        # Add axis labels
+        lines = table.split("\n")
+        # Add column axis name for all dimensions on first slice
+        if d == 0 and all_names[2]:
+            if orig_ndim == 1:
+                # For 1D, center the column name over the entire table
+                col_label = f"{all_names[2]:^{len(lines[0])}}"
+            else:
+                # For 2D and 3D, account for row labels
+                total_width = len(lines[0])
+                y_axis_width = max(len(label) for label in all_labels[1]) + 4
+                data_width = total_width - y_axis_width
+                col_label = f"{' ' * y_axis_width}{all_names[2]:^{data_width}}"
+            lines.insert(0, col_label)
+        # Add row axis name (only for 2D and 3D tensors)
+        if orig_ndim > 1 and all_names[1]:
+            label_lines = lines[1:] if d == 0 and all_names[2] else lines
+            max_label_length = len(all_names[1])
+            padded_label = f"{all_names[1]:>{max_label_length}} │"
+            if d == 0 and all_names[2]:
+                lines[0] = f"{' ' * (max_label_length + 2)}{lines[0]}"
+            for i, line in enumerate(label_lines):
+                if i == len(label_lines) // 2:
+                    lines[i + (1 if d == 0 and all_names[2] else 0)] = (
+                        f"{padded_label}{line}"
+                    )
+                else:
+                    lines[i + (1 if d == 0 and all_names[2] else 0)] = (
+                        f"{' ' * max_label_length} │{line}"
+                    )
+        tables.append(slice_header + "\n".join(lines))
+    return "\n".join(tables)

monarch/common/base_tensor.py ADDED Viewed

@@ -0,0 +1,28 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-unsafe
+import torch
+# All of the tensor examples in this zoo inherit from BaseTensor.  Ideally,
+# however, they would inherit directly from Tensor.  This is just our staging
+# ground for applying behavior that hasn't yet made it into core but that
+# we would like to apply by default.
+class BaseTensor(torch.Tensor):
+    # See https://github.com/pytorch/pytorch/pull/73727 ; this is necessary
+    # to ensure that super().__new__ can cooperate with each other
+    @staticmethod
+    def __new__(cls, elem, *, requires_grad=None):
+        if requires_grad is None:
+            return super().__new__(cls, elem)
+        else:
+            return cls._make_subclass(cls, elem, requires_grad)
+    # If __torch_dispatch__ is defined (which it will be for all our examples)
+    # the default torch function implementation (which preserves subclasses)
+    # typically must be disabled
+    __torch_function__ = torch._C._disabled_torch_function_impl

monarch/common/borrows.py ADDED Viewed

@@ -0,0 +1,143 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-unsafe
+import traceback
+import warnings
+from typing import List, Optional, TYPE_CHECKING
+from weakref import ref, WeakSet
+from . import messages
+if TYPE_CHECKING:
+    from .device_mesh import DeviceMesh
+    from .tensor import Tensor
+# all the aliases for the same storage on a particular stream
+# borrows of a storage to another stream are not considered aliases
+# but instead copies and will have a different set of storage aliases.
+# conceptually, think of borrows as copies that have been guarded
+# so that we do not actually perform the data movement.
+class StorageAliases:
+    def __init__(self):
+        # how are we allowed to access this storage
+        # string containing 0 or more of:
+        # r - can read
+        # w - can write
+        self.access = "rw"
+        # what Tensor aliases exist for this storage
+        self.aliases = WeakSet()
+        # was this set of storages originally a borrow
+        # from another stream?
+        self._borrow: Optional[ref[Borrow]] = None
+        self.borrowed_from: "Optional[StorageAliases]" = None
+        # how many times has this storage been borrowed?
+        self.live_borrows = WeakSet()
+    @property
+    def borrow(self) -> "Borrow":
+        assert self._borrow is not None
+        borrow = self._borrow()
+        assert borrow is not None
+        return borrow
+    def register(self, tensor: "Tensor"):
+        self.aliases.add(tensor)
+        if self.borrowed_from is not None:
+            self.borrow._live_tensors += 1
+    def unregister(self, tensor: "Tensor"):
+        borrowed_from = self.borrowed_from
+        if borrowed_from is not None:
+            borrow = self.borrow
+            borrow._live_tensors -= 1
+            if borrow._live_tensors == 0:
+                borrow._use()
+                if self.access == "rw":
+                    # returning a mutable borrow needs to propagate errors
+                    # from the stream (which may have mutated the value) back to the values
+                    # on the origin stream. This does not happen automatically because
+                    # borrows are not tracked as tensor aliases, but are instead treated
+                    # as a kind of optimized copy or move.
+                    tensor.mesh.client.new_node(borrowed_from.aliases, (tensor,))
+                tensor.mesh._send(messages.BorrowLastUse(borrow._id))
+    def borrow_from(
+        self, id: int, mesh: "DeviceMesh", f: "StorageAliases", mutable: bool
+    ):
+        assert (
+            self.borrowed_from is None
+        ), "we should have created a new storage with no borrows"
+        if mutable:
+            if "w" not in f.access:
+                raise RuntimeError(
+                    "Cannot borrow this tensor mutably because it (or a view) is already being borrowed non-mutably."
+                )
+            f.access = ""
+            self.access = "rw"
+        else:
+            f.access = self.access = "r"
+        self.borrowed_from = f
+        borrow = Borrow(id, self, mesh)
+        f.live_borrows.add(borrow)
+        self._borrow = ref(borrow)
+        return borrow
+class Borrow:
+    def __init__(self, id: int, aliases: StorageAliases, mesh: "DeviceMesh"):
+        self._storage_aliases = aliases
+        self._mesh = mesh
+        self._id = id
+        self._live_tensors = 1
+        self._dropped = False
+        self._used = False
+        self._frames: List[traceback.FrameSummary] = traceback.extract_stack()
+    @property
+    def traceback_string(self):
+        return "".join(traceback.format_list(self._frames))
+    def __enter__(self):
+        pass
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.drop()
+    def _use(self):
+        if self._used:
+            return
+        self._used = True
+        self._mesh._send(messages.BorrowFirstUse(self._id))
+    def drop(self) -> None:
+        if self._dropped:
+            return
+        self._dropped = True
+        for alias in self._storage_aliases.aliases:
+            alias._drop_ref()
+        self._mesh.client.drop_borrow(self)
+        self._mesh._send(messages.BorrowDrop(self._id))
+        f = self._storage_aliases.borrowed_from
+        assert f is not None
+        f.live_borrows.remove(self)
+        if len(f.live_borrows) == 0:
+            f.access = "rw" if f.borrowed_from is None else self._storage_aliases.access
+    def __del__(self):
+        if not self._dropped:
+            current = "".join(traceback.format_stack())
+            warnings.warn(
+                "borrow.drop() must be called before a borrowed tensor is freed to specify when the borrowed tensor should return to its origin stream, but borrow is being deleted before drop."
+                "borrow.drop() is being called automatically here to ensure correctness, but this will force a synchronization back to the original stream at this point which might not be intended."
+                f"\nTraceback of __del__(most recent call last):\n{current}\nTraceback of original borrow (most recent call last):{self.traceback_string}",
+                stacklevel=2,
+            )
+            self.drop()