PyPI - torchmonarch-nightly - Versions diffs - 2025.6.4__cp310-cp310-manylinux2014_x86_64.whl - Mend

torchmonarch-nightly 2025.6.4__cp310-cp310-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (157) hide show

monarch/__init__.py +189 -0
monarch/_monarch/__init__.py +5 -0
monarch/_monarch/hyperactor/__init__.py +74 -0
monarch/_monarch/selection/__init__.py +13 -0
monarch/_monarch/worker/__init__.py +0 -0
monarch/_monarch/worker/debugger.py +117 -0
monarch/_monarch/worker/logging.py +107 -0
monarch/_rust_bindings.so +0 -0
monarch/_testing.py +198 -0
monarch/actor_mesh.py +692 -0
monarch/allocator.py +62 -0
monarch/bootstrap_main.py +75 -0
monarch/builtins/__init__.py +14 -0
monarch/builtins/log.py +22 -0
monarch/builtins/random.py +69 -0
monarch/cached_remote_function.py +257 -0
monarch/common/_C.pyi +11 -0
monarch/common/_C.so +0 -0
monarch/common/__init__.py +0 -0
monarch/common/_coalescing.py +308 -0
monarch/common/_device_utils.py +18 -0
monarch/common/_tensor_to_table.py +172 -0
monarch/common/base_tensor.py +28 -0
monarch/common/borrows.py +143 -0
monarch/common/client.py +646 -0
monarch/common/constants.py +10 -0
monarch/common/context_manager.py +40 -0
monarch/common/controller_api.py +104 -0
monarch/common/device_mesh.py +443 -0
monarch/common/fake.py +55 -0
monarch/common/function.py +160 -0
monarch/common/function_caching.py +164 -0
monarch/common/future.py +168 -0
monarch/common/invocation.py +125 -0
monarch/common/mast.py +221 -0
monarch/common/messages.py +572 -0
monarch/common/mock_cuda.py +41 -0
monarch/common/opaque_ref.py +98 -0
monarch/common/pickle_flatten.py +48 -0
monarch/common/pipe.py +152 -0
monarch/common/process_group.py +55 -0
monarch/common/recording.py +127 -0
monarch/common/reference.py +33 -0
monarch/common/remote.py +304 -0
monarch/common/selection.py +9 -0
monarch/common/shape.py +204 -0
monarch/common/stream.py +111 -0
monarch/common/tensor.py +793 -0
monarch/common/tensor_factory.py +31 -0
monarch/common/tree.py +73 -0
monarch/controller/__init__.py +7 -0
monarch/controller/backend.py +223 -0
monarch/controller/controller.py +223 -0
monarch/controller/debugger.py +47 -0
monarch/controller/history.py +90 -0
monarch/controller/rust_backend/__init__.py +7 -0
monarch/controller/rust_backend/controller.py +245 -0
monarch/fetch.py +55 -0
monarch/future.py +25 -0
monarch/gradient/__init__.py +11 -0
monarch/gradient/_gradient_generator.pyi +22 -0
monarch/gradient/_gradient_generator.so +0 -0
monarch/gradient_generator.py +185 -0
monarch/memory.py +43 -0
monarch/monarch_controller +0 -0
monarch/notebook.py +761 -0
monarch/opaque_module.py +235 -0
monarch/opaque_object.py +88 -0
monarch/parallel/__init__.py +9 -0
monarch/parallel/pipelining/__init__.py +7 -0
monarch/parallel/pipelining/runtime.py +847 -0
monarch/parallel/pipelining/schedule_ir.py +692 -0
monarch/parallel/pipelining/scheduler.py +249 -0
monarch/proc_mesh.py +188 -0
monarch/profiler.py +160 -0
monarch/python_local_mesh.py +107 -0
monarch/random.py +61 -0
monarch/rdma.py +190 -0
monarch/remote_class.py +114 -0
monarch/rust_backend_mesh.py +280 -0
monarch/rust_local_mesh.py +1402 -0
monarch/sim_mesh.py +357 -0
monarch/simulator/__init__.py +7 -0
monarch/simulator/command_history.py +424 -0
monarch/simulator/config.py +21 -0
monarch/simulator/interface.py +59 -0
monarch/simulator/ir.py +770 -0
monarch/simulator/mock_controller.py +214 -0
monarch/simulator/profiling.py +424 -0
monarch/simulator/simulator.py +1052 -0
monarch/simulator/task.py +255 -0
monarch/simulator/tensor.py +373 -0
monarch/simulator/trace.py +395 -0
monarch/simulator/utils.py +41 -0
monarch/simulator/worker.py +389 -0
monarch/tensor_worker_main.py +260 -0
monarch/tensorboard.py +84 -0
monarch/timer/__init__.py +21 -0
monarch/timer/example_monarch.py +78 -0
monarch/timer/example_spmd.py +55 -0
monarch/timer/execution_timer.py +199 -0
monarch/timer/execution_timer_test.py +131 -0
monarch/tools/__init__.py +7 -0
monarch/tools/cli.py +167 -0
monarch/tools/commands.py +189 -0
monarch/tools/components/__init__.py +7 -0
monarch/tools/components/hyperactor.py +57 -0
monarch/tools/config/__init__.py +20 -0
monarch/tools/config/defaults.py +54 -0
monarch/tools/mesh_spec.py +121 -0
monarch/worker/__init__.py +7 -0
monarch/worker/_testing_function.py +481 -0
monarch/worker/compiled_block.py +270 -0
monarch/worker/debugger.py +125 -0
monarch/worker/lines.py +47 -0
monarch/worker/monitor.py +53 -0
monarch/worker/worker.py +1191 -0
monarch/world_mesh.py +34 -0
monarch_supervisor/__init__.py +1044 -0
monarch_supervisor/_testing.py +44 -0
monarch_supervisor/function_call.py +30 -0
monarch_supervisor/host.py +386 -0
monarch_supervisor/launchers.py +145 -0
monarch_supervisor/log_pstree.py +48 -0
monarch_supervisor/logging.py +103 -0
monarch_supervisor/python_executable.py +42 -0
tests/__init__.py +0 -0
tests/dispatch_bench.py +124 -0
tests/dispatch_bench_helper.py +25 -0
tests/error_test_binary.py +139 -0
tests/simulator/__init__.py +0 -0
tests/simulator/test_profiling.py +136 -0
tests/simulator/test_simulator.py +411 -0
tests/simulator/test_task.py +64 -0
tests/simulator/test_worker.py +102 -0
tests/sleep_binary.py +35 -0
tests/test_actor_error.py +112 -0
tests/test_alloc.py +25 -0
tests/test_coalescing.py +492 -0
tests/test_controller.py +835 -0
tests/test_device_mesh.py +132 -0
tests/test_fault_tolerance.py +398 -0
tests/test_future.py +94 -0
tests/test_grad_generator.py +121 -0
tests/test_mock_cuda.py +74 -0
tests/test_pdb_actor.py +110 -0
tests/test_python_actors.py +372 -0
tests/test_remote_functions.py +1271 -0
tests/test_rust_backend.py +182 -0
tests/test_signal_safe_block_on.py +103 -0
tests/test_sim_backend.py +54 -0
torchmonarch_nightly-2025.6.4.dist-info/METADATA +94 -0
torchmonarch_nightly-2025.6.4.dist-info/RECORD +157 -0
torchmonarch_nightly-2025.6.4.dist-info/WHEEL +5 -0
torchmonarch_nightly-2025.6.4.dist-info/entry_points.txt +3 -0
torchmonarch_nightly-2025.6.4.dist-info/licenses/LICENSE +29 -0
torchmonarch_nightly-2025.6.4.dist-info/top_level.txt +3 -0

monarch/common/remote.py ADDED Viewed

@@ -0,0 +1,304 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-unsafe
+import functools
+import logging
+import warnings
+from logging import Logger
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Generic,
+    Literal,
+    Optional,
+    overload,
+    Protocol,
+    Tuple,
+    TypeVar,
+)
+import monarch.common.messages as messages
+import torch
+from monarch.common import _coalescing, device_mesh, messages, stream
+from monarch.common.device_mesh import RemoteProcessGroup
+from monarch.common.fake import fake_call
+from monarch.common.function import (
+    Propagator,
+    resolvable_function,
+    ResolvableFunction,
+    ResolvableFunctionFromPath,
+)
+from monarch.common.function_caching import (
+    hashable_tensor_flatten,
+    tensor_placeholder,
+    TensorGroup,
+    TensorPlaceholder,
+)
+from monarch.common.future import Future
+from monarch.common.messages import Dims
+from monarch.common.tensor import dtensor_check, dtensor_dispatch
+from monarch.common.tree import flatten, tree_map
+from torch import autograd, distributed as dist
+from typing_extensions import ParamSpec
+logger: Logger = logging.getLogger(__name__)
+P = ParamSpec("P")
+R = TypeVar("R")
+T = TypeVar("T")
+Propagator = Callable | Literal["mocked", "cached", "inspect"] | None
+class Remote(Generic[P, R]):
+    def __init__(self, impl: Any, propagator_arg: Propagator):
+        self._remote_impl = impl
+        self._propagator_arg = propagator_arg
+        self._cache: Optional[dict] = None
+    @property
+    def _resolvable(self):
+        return resolvable_function(self._remote_impl)
+    def _propagate(self, args, kwargs, fake_args, fake_kwargs):
+        if self._propagator_arg is None or self._propagator_arg == "cached":
+            if self._cache is None:
+                self._cache = {}
+            return _cached_propagation(self._cache, self._resolvable, args, kwargs)
+        elif self._propagator_arg == "inspect":
+            return None
+        elif self._propagator_arg == "mocked":
+            raise NotImplementedError("mocked propagation")
+        else:
+            return fake_call(self._propagator_arg, *fake_args, **fake_kwargs)
+    def _fetch_propagate(self, args, kwargs, fake_args, fake_kwargs):
+        if self._propagator_arg is None:
+            return  # no propgator provided, so we just assume no mutations
+        return self._propagate(args, kwargs, fake_args, fake_kwargs)
+    def _pipe_propagate(self, args, kwargs, fake_args, fake_kwargs):
+        if not callable(self._propagator_arg):
+            raise ValueError("Must specify explicit callable for pipe")
+        return self._propagate(args, kwargs, fake_args, fake_kwargs)
+    def __call__(self, *args: P.args, **kwargs: P.kwargs) -> R:
+        return dtensor_dispatch(
+            self._resolvable,
+            self._propagate,
+            args,
+            kwargs,
+            device_mesh._active,
+            stream._active,
+        )
+    def call_on_shard_and_fetch(
+        self, *args, shard: Dict[str, int] | None = None, **kwargs
+    ) -> Future[R]:
+        return _call_on_shard_and_fetch(
+            self._resolvable, self._fetch_propagate, *args, shard=shard, **kwargs
+        )
+# This can't just be Callable because otherwise we are not
+# allowed to use type arguments in the return value.
+class RemoteIfy(Protocol):
+    def __call__(self, function: Callable[P, R]) -> Remote[P, R]: ...
+@overload
+def remote(
+    function: Callable[P, R], *, propagate: Propagator = None
+) -> "Remote[P, R]": ...
+@overload
+def remote(
+    function: str, *, propagate: Literal["mocked", "cached", "inspect"] | None = None
+) -> "Remote": ...
+@overload
+def remote(function: str, *, propagate: Callable[P, R]) -> Remote[P, R]: ...
+@overload
+def remote(*, propagate: Propagator = None) -> RemoteIfy: ...  # type: ignore
+# ignore because otherwise it claims that the actual implementation doesn't
+# accept the above list of arguments
+def remote(function: Any = None, *, propagate: Propagator = None) -> Any:
+    if function is None:
+        return functools.partial(remote, propagate=propagate)
+    return Remote(function, propagate)
+def _call_on_shard_and_fetch(
+    rfunction: ResolvableFunction | None,
+    propagator: Any,
+    /,
+    *args: object,
+    shard: dict[str, int] | None = None,
+    **kwargs: object,
+) -> Future:
+    """
+    Call `function` at the coordinates `shard` of the current device mesh, and retrieve the result as a Future.
+        function - the remote function to call
+        *args/**kwargs - arguments to the function
+        shard - a dictionary from mesh dimension name to coordinate of the shard
+                If None, this will fetch from coordinate 0 for all dimensions (useful after all_reduce/all_gather)
+    """
+    ambient_mesh = device_mesh._active
+    if rfunction is None:
+        preprocess_message = None
+        rfunction = ResolvableFunctionFromPath("ident")
+    else:
+        preprocess_message = rfunction
+    _, dtensors, mutates, mesh = dtensor_check(
+        propagator, rfunction, args, kwargs, ambient_mesh, stream._active
+    )
+    client = mesh.client
+    if _coalescing.is_active(client):
+        raise NotImplementedError("NYI: fetching results during a coalescing block")
+    fut = Future(client)
+    ident = client.new_node(mutates, dtensors, fut)
+    process = mesh._process(shard)
+    client.send(
+        process,
+        messages.SendValue(
+            ident,
+            None,
+            mutates,
+            preprocess_message,
+            args,
+            kwargs,
+            stream._active._to_ref(client),
+        ),
+    )
+    # we have to ask for status updates
+    # from workers to be sure they have finished
+    # enough work to count this future as finished,
+    # and all potential errors have been reported
+    client._request_status()
+    return fut
+@remote
+def _propagate(
+    function: ResolvableFunction, args: Tuple[Any, ...], kwargs: Dict[str, Any]
+):
+    """
+    RF preprocess function
+    """
+    fn = function.resolve()
+    # XXX - in addition to the functional properties,
+    # and info about if any of the input tensors got mutated.
+    arg_tensors, _ = flatten((args, kwargs), lambda x: isinstance(x, torch.Tensor))
+    input_group = TensorGroup(arg_tensors)
+    result = fn(*args, **kwargs)
+    result_tensors, unflatten_result = flatten(
+        result, lambda x: isinstance(x, torch.Tensor)
+    )
+    output_group = TensorGroup(result_tensors, parent=input_group)
+    the_result = unflatten_result([tensor_placeholder for _ in result_tensors])
+    return (
+        the_result,
+        output_group.pattern,
+    )
+class DummyProcessGroup(dist.ProcessGroup):
+    def __init__(self, dims: Dims, world_size: int):
+        # pyre-ignore
+        super().__init__(0, world_size)
+        self.dims = dims
+        self.world_size = world_size
+    def allreduce(self, tensor, op=dist.ReduceOp.SUM, async_op=False):
+        class DummyWork:
+            def wait(self):
+                return tensor
+        return DummyWork()
+    def _allgather_base(self, output_tensor, input_tensor, opts):
+        class DummyWork:
+            def wait(self):
+                return output_tensor
+        return DummyWork()
+    def _reduce_scatter_base(self, output_tensor, input_tensor, opts):
+        class DummyWork:
+            def wait(self):
+                return output_tensor
+        return DummyWork()
+    def __getstate__(self):
+        return {"dims": self.dims, "world_size": self.world_size}
+    def __setstate__(self, state):
+        self.__init__(state["dims"], state["world_size"])
+def _mock_pgs(x):
+    if isinstance(x, autograd.function.FunctionCtx):
+        for attr in dir(x):
+            if not attr.startswith("__") and isinstance(attr, RemoteProcessGroup):
+                setattr(x, attr, DummyProcessGroup(attr.dims, attr.size()))
+        return x
+    if isinstance(x, RemoteProcessGroup):
+        return DummyProcessGroup(x.dims, x.size())
+    return x
+# for testing
+_miss = 0
+_hit = 0
+def _cached_propagation(_cache, rfunction, args, kwargs):
+    tensors, shape_key = hashable_tensor_flatten(args, kwargs)
+    inputs_group = TensorGroup([t._fake for t in tensors])
+    requires_grads = tuple(t.requires_grad for t in tensors)
+    key = (shape_key, inputs_group.pattern, requires_grads)
+    global _miss, _hit
+    if key not in _cache:
+        _miss += 1
+        args_no_pg, kwargs_no_pg = tree_map(_mock_pgs, (args, kwargs))
+        result_with_placeholders, output_pattern = _propagate.call_on_shard_and_fetch(
+            function=rfunction, args=args_no_pg, kwargs=kwargs_no_pg
+        ).result()
+        _, unflatten_result = flatten(
+            result_with_placeholders, lambda x: isinstance(x, TensorPlaceholder)
+        )
+        _cache[key] = (unflatten_result, output_pattern)
+    else:
+        _hit += 1
+    # return fresh fake result every time to avoid spurious aliasing
+    unflatten_result, output_pattern = _cache[key]
+    output_tensors = fake_call(output_pattern.empty, [inputs_group.tensors])
+    return unflatten_result(output_tensors)

monarch/common/selection.py ADDED Viewed

@@ -0,0 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from monarch._rust_bindings.monarch_hyperactor.selection import Selection
+__all__ = ["Selection"]

monarch/common/shape.py ADDED Viewed

@@ -0,0 +1,204 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import itertools
+import operator
+from abc import ABC, abstractmethod
+from typing import Dict, Generator, Sequence, Tuple
+from monarch._rust_bindings.monarch_hyperactor.shape import Shape, Slice
+from typing_extensions import Self
+NDSlice = Slice
+Slices = Slice | list[Slice]
+def iter_ranks(ranks: Slices) -> Generator[int, None, None]:
+    if isinstance(ranks, list):
+        seen = set()
+        for slice_ in ranks:
+            for rank in slice_:
+                if rank not in seen:
+                    seen.add(rank)
+                    yield rank
+    else:
+        yield from ranks
+class MeshTrait(ABC):
+    """
+    Mesh interface. Implemented via Shape.
+    """
+    @property
+    @abstractmethod
+    def _ndslice(self) -> NDSlice: ...
+    @property
+    @abstractmethod
+    def _labels(self) -> Tuple[str, ...]: ...
+    @abstractmethod
+    def _new_with_shape(self, shape: Shape) -> Self: ...
+    def slice(self, **kwargs) -> Self:
+        """
+        mesh.slice(batch=3) or mesh.slice(batch=slice(3, None))
+        """
+        ndslice = self._ndslice
+        labels = self._labels
+        offset = ndslice.offset
+        names = []
+        sizes = []
+        strides = []
+        for name, size, stride in zip(labels, ndslice.sizes, ndslice.strides):
+            if name in kwargs:
+                e = kwargs.pop(name)
+                if isinstance(e, slice):
+                    start, stop, slice_stride = e.indices(size)
+                    offset += start * stride
+                    names.append(name)
+                    sizes.append((stop - start) // slice_stride)
+                    strides.append(slice_stride * stride)
+                else:
+                    if e >= size or e < 0:
+                        raise IndexError("index out of range")
+                    offset += e * stride
+            else:
+                names.append(name)
+                sizes.append(size)
+                strides.append(stride)
+        if kwargs:
+            raise TypeError(
+                f"{self} does not have dimension(s) named {tuple(kwargs.keys())}"
+            )
+        new_ndslice = NDSlice(offset=offset, sizes=sizes, strides=strides)
+        return self._new_with_shape(Shape(names, new_ndslice))
+    def split(self, **kwargs) -> Self:
+        """
+        Returns a new device mesh with some dimensions of this mesh split.
+        For instance, this call splits the host dimension into dp and pp dimensions,
+        The size of 'pp' is specified and the dimension size is derived from it:
+            new_mesh = mesh.split(host=('dp', 'pp'), gpu=('tp','cp'), pp=16, cp=2)
+        Dimensions not specified will remain unchanged.
+        """
+        splits: Dict[str, Sequence[str]] = {}
+        size_constraints: Dict[str, int] = {}
+        for key, value in kwargs.items():
+            if key in self._labels:
+                if isinstance(value, str):
+                    raise ValueError(
+                        f"expected a sequence of dimensions, but got '{value}'"
+                    )
+                splits[key] = value
+            else:
+                if not isinstance(value, int):
+                    raise ValueError(
+                        f"'{key}' is not an existing dim. Expected an integer size constraint on a new dim."
+                    )
+                size_constraints[key] = value
+        names = []
+        sizes = []
+        strides = []
+        ndslice = self._ndslice
+        for name, size, stride in zip(self._labels, ndslice.sizes, ndslice.strides):
+            to_names = splits.get(name, (name,))
+            total_size = 1
+            unknown_size_name = None
+            for to_name in to_names:
+                if to_name in size_constraints:
+                    total_size *= size_constraints[to_name]
+                elif unknown_size_name is None:
+                    unknown_size_name = to_name
+                else:
+                    raise ValueError(
+                        f"Cannot infer size of {to_names} because both {to_name} and {unknown_size_name} have unknown size. Specify at least one as argument, e.g. {to_name}=4"
+                    )
+            if unknown_size_name is not None:
+                inferred_size, m = divmod(size, total_size)
+                if m != 0:
+                    to_sizes = tuple(
+                        (
+                            size_constraints[to_name]
+                            if to_name in size_constraints
+                            else "?"
+                        )
+                        for to_name in to_names
+                    )
+                    raise ValueError(
+                        f"Dimension '{name}' of size {size} is not evenly divided by {to_names!r} with sizes {to_sizes!r}"
+                    )
+                size_constraints[unknown_size_name] = inferred_size
+            elif total_size != size:
+                to_sizes = tuple(size_constraints[to_name] for to_name in to_names)
+                raise ValueError(
+                    f"Dimension '{name}' of size {size} is not evenly divided by {to_names!r} with sizes {to_sizes!r}"
+                )
+            new_sizes = [size_constraints.pop(to_name) for to_name in to_names]
+            new_strides_reversed = tuple(
+                itertools.accumulate(reversed(new_sizes), operator.mul, initial=stride)
+            )
+            sizes.extend(new_sizes)
+            strides.extend(reversed(new_strides_reversed[:-1]))
+            for name in to_names:
+                if name in names:
+                    raise ValueError(f"Duplicate dimension name '{name}'")
+            names.extend(to_names)
+        if size_constraints:
+            raise ValueError(
+                f"unused size constraints: {tuple(size_constraints.keys())}"
+            )
+        return self._new_with_shape(
+            Shape(names, NDSlice(offset=ndslice.offset, sizes=sizes, strides=strides))
+        )
+    def flatten(self, name: str) -> Self:
+        """
+        Returns a new device mesh with all dimensions flattened into a single dimension
+        with the given name.
+        Currently this supports only dense meshes: that is, all ranks must be contiguous
+        in the mesh.
+        """
+        ndslice = self._ndslice
+        dense_strides = tuple(
+            itertools.accumulate(reversed(ndslice.sizes), operator.mul, initial=1)
+        )
+        dense_strides, total_size = (
+            list(reversed(dense_strides[:-1])),
+            dense_strides[-1],
+        )
+        if dense_strides != ndslice.strides:
+            raise ValueError(
+                "cannot flatten sparse mesh: " f"{ndslice.strides=} != {dense_strides=}"
+            )
+        return self._new_with_shape(
+            Shape(
+                [name], NDSlice(offset=ndslice.offset, sizes=[total_size], strides=[1])
+            )
+        )
+    def rename(self, **kwargs) -> Self:
+        """
+        Returns a new device mesh with some of dimensions renamed.
+        Dimensions not mentioned are retained:
+            new_mesh = mesh.rename(host='dp', gpu='tp')
+        """
+        return self.split(**{k: (v,) for k, v in kwargs.items()})
+__all__ = ["NDSlice", "Shape", "MeshTrait"]

monarch/common/stream.py ADDED Viewed

@@ -0,0 +1,111 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-unsafe
+from typing import Callable, List, Tuple, TYPE_CHECKING
+from weakref import ref, WeakKeyDictionary
+from . import messages
+from .borrows import Borrow
+from .context_manager import activate_first_context_manager
+from .fake import fake_call
+from .reference import Referenceable
+if TYPE_CHECKING:
+    from monarch.common.client import Client  # @manual
+    from .tensor import Tensor
+class Stream:
+    def __init__(self, name: str, _default=False):
+        self.name = name
+        self.default: bool = _default
+        self.clients: WeakKeyDictionary["Client", "StreamRef"] = WeakKeyDictionary()
+    def __repr__(self):
+        return f"<Stream({repr(self.name)}) at {hex(id(self))}>"
+    def __str__(self):
+        return f"stream {repr(self.name)}"
+    def activate(self):
+        return _active_stream(self)
+    def _to_ref(self, client: "Client"):
+        if client not in self.clients:
+            self.clients[client] = StreamRef(client, self.name, self.default)
+        return self.clients[client]
+    def borrow(self, t: "Tensor", mutable: bool = False) -> Tuple["Tensor", "Borrow"]:
+        """
+            borrowed_tensor, borrow = self.borrow(t)
+        Borrows tensor 't' for use on this stream.
+        The memory of t will stay alive until borrow.drop() is called, which will free t and
+        and any of its alises on stream `self` and will cause t.stream to wait on self at that point so
+        that the memory of t can be reused.
+        If `mutable` then self can write to the storage of `t`, but t.stream cannot read or write `t` until,
+        the borrow is returned (becomes free and a wait_for has been issued).
+        If not `mutable` both `self` and `t.stream` can read from t's storage but neither can write to it.
+        """
+        client = t.mesh.client
+        aliases = t._aliases
+        r = type(t)(fake_call(t._fake.clone), t.mesh, self)
+        client.new_node((r,), (t,))
+        borrow = r._aliases.borrow_from(client.new_ref(), t.mesh, aliases, mutable)
+        client.new_borrow(borrow)
+        assert r.ref is not None
+        t.mesh._send(
+            messages.BorrowCreate(
+                r, borrow._id, t, t.stream._to_ref(client), self._to_ref(client)
+            )
+        )
+        r._on_first_use = lambda t: borrow._use()
+        return r, borrow
+class StreamRef(Referenceable):
+    def __init__(self, client: "Client", name: str, default: bool):
+        self.ref = client.new_ref()
+        self.client = ref(client)
+        self.name = name
+        self.default = default
+        client.send(
+            client.all_ranks,
+            messages.CreateStream(self, self.default),
+        )
+    def delete_ref(self, ref):
+        client = self.client()
+        if client is not None and not client._shutdown:
+            client.handle_deletes(client.all_ranks, [ref])
+_active = Stream("main", _default=True)
+_on_change: List[Callable] = []
+def get_active_stream():
+    return _active
+@activate_first_context_manager
+def _active_stream(stream: Stream):
+    global _active
+    for on_change in _on_change:
+        on_change(_active, stream)
+    _active, old = stream, _active
+    try:
+        yield
+    finally:
+        for on_change in _on_change:
+            on_change(_active, old)
+        _active = old