PyPI - torchmonarch-nightly - Versions diffs - 2025.8.1__cp313-cp313-manylinux2014_x86_64.whl → 2025.9.3__cp313-cp313-manylinux2014_x86_64.whl - Mend

torchmonarch-nightly 2025.8.1__cp313-cp313-manylinux2014_x86_64.whl → 2025.9.3__cp313-cp313-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

monarch/_rust_bindings.so +0 -0
monarch/_src/actor/actor_mesh.py +414 -216
monarch/_src/actor/allocator.py +75 -6
monarch/_src/actor/bootstrap_main.py +7 -4
monarch/_src/actor/code_sync/__init__.py +2 -0
monarch/_src/actor/debugger/__init__.py +7 -0
monarch/_src/actor/{debugger.py → debugger/debugger.py} +246 -135
monarch/_src/actor/{pdb_wrapper.py → debugger/pdb_wrapper.py} +62 -23
monarch/_src/actor/endpoint.py +27 -45
monarch/_src/actor/future.py +86 -24
monarch/_src/actor/host_mesh.py +125 -0
monarch/_src/actor/logging.py +94 -0
monarch/_src/actor/pickle.py +25 -0
monarch/_src/actor/proc_mesh.py +423 -156
monarch/_src/actor/python_extension_methods.py +90 -0
monarch/_src/actor/shape.py +8 -1
monarch/_src/actor/source_loader.py +45 -0
monarch/_src/actor/telemetry/__init__.py +172 -0
monarch/_src/actor/telemetry/rust_span_tracing.py +6 -39
monarch/_src/debug_cli/__init__.py +7 -0
monarch/_src/debug_cli/debug_cli.py +43 -0
monarch/_src/tensor_engine/rdma.py +64 -9
monarch/_testing.py +1 -3
monarch/actor/__init__.py +24 -4
monarch/common/_C.so +0 -0
monarch/common/device_mesh.py +14 -0
monarch/common/future.py +10 -0
monarch/common/remote.py +14 -25
monarch/common/tensor.py +12 -0
monarch/debug_cli/__init__.py +7 -0
monarch/debug_cli/__main__.py +12 -0
monarch/fetch.py +2 -2
monarch/gradient/_gradient_generator.so +0 -0
monarch/gradient_generator.py +4 -2
monarch/mesh_controller.py +34 -14
monarch/monarch_controller +0 -0
monarch/tools/colors.py +25 -0
monarch/tools/commands.py +42 -7
monarch/tools/components/hyperactor.py +1 -1
monarch/tools/config/__init__.py +31 -4
monarch/tools/config/defaults.py +13 -3
monarch/tools/config/environment.py +45 -0
monarch/tools/config/workspace.py +165 -0
monarch/tools/mesh_spec.py +2 -0
monarch/utils/__init__.py +9 -0
monarch/utils/utils.py +78 -0
tests/error_test_binary.py +5 -3
tests/python_actor_test_binary.py +52 -0
tests/test_actor_error.py +142 -14
tests/test_alloc.py +1 -1
tests/test_allocator.py +59 -72
tests/test_coalescing.py +1 -1
tests/test_debugger.py +639 -45
tests/test_env_before_cuda.py +4 -4
tests/test_mesh_trait.py +38 -0
tests/test_python_actors.py +979 -75
tests/test_rdma.py +7 -6
tests/test_tensor_engine.py +6 -6
{torchmonarch_nightly-2025.8.1.dist-info → torchmonarch_nightly-2025.9.3.dist-info}/METADATA +82 -4
{torchmonarch_nightly-2025.8.1.dist-info → torchmonarch_nightly-2025.9.3.dist-info}/RECORD +64 -48
{torchmonarch_nightly-2025.8.1.dist-info → torchmonarch_nightly-2025.9.3.dist-info}/WHEEL +0 -0
{torchmonarch_nightly-2025.8.1.dist-info → torchmonarch_nightly-2025.9.3.dist-info}/entry_points.txt +0 -0
{torchmonarch_nightly-2025.8.1.dist-info → torchmonarch_nightly-2025.9.3.dist-info}/licenses/LICENSE +0 -0
{torchmonarch_nightly-2025.8.1.dist-info → torchmonarch_nightly-2025.9.3.dist-info}/top_level.txt +0 -0

tests/test_debugger.py CHANGED Viewed

@@ -6,34 +6,47 @@
 # pyre-unsafe
 import asyncio
+import functools
+import importlib.resources
+import os
 import re
+import shutil
+import signal
+import subprocess
 import sys
-from typing import cast, List
+from typing import cast, List, Optional, Tuple
 from unittest.mock import AsyncMock, patch
+import cloudpickle
 import monarch
 import monarch.actor as actor
 import pytest
 import torch
-from monarch._src.actor.actor_mesh import Actor, ActorError, current_rank
-from monarch._src.actor.debugger import (
+from monarch._src.actor.actor_mesh import Actor, ActorError, current_rank, IN_PAR
+from monarch._src.actor.debugger.debugger import (
+    _MONARCH_DEBUG_SERVER_HOST_ENV_VAR,
+    _MONARCH_DEBUG_SERVER_PORT_ENV_VAR,
     Attach,
     Cast,
     Continue,
     DebugCommand,
+    DebugController,
     DebugSession,
     DebugSessionInfo,
     DebugSessions,
+    DebugStdIO,
     Help,
     ListCommand,
     Quit,
 )
 from monarch._src.actor.endpoint import endpoint
 from monarch._src.actor.proc_mesh import proc_mesh
+from monarch._src.actor.source_loader import SourceLoaderController
+from pyre_extensions import none_throws
 needs_cuda = pytest.mark.skipif(
     not torch.cuda.is_available(),
@@ -41,6 +54,70 @@ needs_cuda = pytest.mark.skipif(
 )
+debug_env = {
+    _MONARCH_DEBUG_SERVER_HOST_ENV_VAR: "0.0.0.0",
+    _MONARCH_DEBUG_SERVER_PORT_ENV_VAR: "0",
+}
+def isolate_in_subprocess(test_fn=None, *, env=None):
+    if test_fn is None:
+        return functools.partial(isolate_in_subprocess, env=env)
+    if env is None:
+        env = {}
+    def sync_test_fn():
+        asyncio.run(test_fn())
+    sync_test_fn_name = f"sync_{test_fn.__name__}"
+    setattr(sys.modules[__name__], sync_test_fn_name, sync_test_fn)
+    env.update(os.environ.copy())
+    def wrapper():
+        if IN_PAR:
+            assert (
+                subprocess.call(
+                    [
+                        str(
+                            importlib.resources.files("monarch.python.tests").joinpath(
+                                "run_test_bin"
+                            )
+                        ),
+                        sync_test_fn_name,
+                    ],
+                    env=env,
+                )
+                == 0
+            )
+        else:
+            assert (
+                subprocess.call(
+                    [
+                        sys.executable,
+                        "-c",
+                        f"import tests.test_debugger; tests.test_debugger.{sync_test_fn_name}()",
+                    ],
+                    env=env,
+                )
+                == 0
+            )
+    return wrapper
+def run_test_from_name():
+    getattr(sys.modules[__name__], sys.argv[1])()
+debug_cli_bin = (
+    str(importlib.resources.files("monarch.python.tests").joinpath("debug_cli_bin"))
+    if IN_PAR
+    else ""
+)
 def _bad_rank():
     raise ValueError("bad rank")
@@ -75,22 +152,45 @@ class DebugeeActor(Actor):
         return _debugee_actor_internal(rank)
-async def _wait_for_breakpoints(debug_client, n_breakpoints) -> List[DebugSessionInfo]:
+class DebugControllerForTesting(DebugController):
+    def __init__(self):
+        super().__init__()
+        self._debug_io = DebugStdIO()
+    @endpoint
+    async def blocking_enter(self):
+        async with self._task_lock:
+            assert self._task is None
+            await self._enter()
+    @endpoint
+    async def server_port(self):
+        server: asyncio.Server = await self._server
+        if len(server.sockets) > 0:
+            return server.sockets[0].getsockname()[1]
+async def _wait_for_breakpoints(
+    debug_controller, n_breakpoints, timeout_sec=20
+) -> List[DebugSessionInfo]:
     breakpoints: List[DebugSessionInfo] = []
-    for i in range(10):
-        breakpoints = await debug_client.list.call_one()
-        if len(breakpoints) == n_breakpoints:
-            break
+    for _ in range(timeout_sec):
         await asyncio.sleep(1)
-        if i == 9:
-            raise RuntimeError("timed out waiting for breakpoints")
-    return breakpoints
+        breakpoints = await debug_controller.list.call_one(print_output=False)
+        if len(breakpoints) == n_breakpoints:
+            return breakpoints
+    raise RuntimeError("timed out waiting for breakpoints")
+# We have to run this test in a separate process because there is only one
+# debug controller per process, and we don't want this to interfere with
+# the other two tests that access the debug controller.
+@isolate_in_subprocess(env=debug_env)
 @pytest.mark.skipif(
     torch.cuda.device_count() < 2,
     reason="Not enough GPUs, this test requires at least 2 GPUs",
 )
+@pytest.mark.timeout(60)
 async def test_debug() -> None:
     input_mock = AsyncMock()
     input_mock.side_effect = [
@@ -122,6 +222,7 @@ async def test_debug() -> None:
         "c",
         "quit",
         "continue",
+        "quit",
     ]
     outputs = []
@@ -130,16 +231,21 @@ async def test_debug() -> None:
         nonlocal outputs
         outputs.append(msg)
+    output_mock = AsyncMock()
+    output_mock.side_effect = _patch_output
     with patch(
-        "monarch._src.actor.debugger._debugger_input", side_effect=input_mock
-    ), patch("monarch._src.actor.debugger._debugger_output", new=_patch_output):
-        proc = await proc_mesh(hosts=2, gpus=2)
+        "monarch._src.actor.debugger.debugger.DebugStdIO.input", new=input_mock
+    ), patch("monarch._src.actor.debugger.debugger.DebugStdIO.output", new=output_mock):
+        proc = proc_mesh(hosts=2, gpus=2)
         debugee = await proc.spawn("debugee", DebugeeActor)
-        debug_client = actor.debug_client()
+        debug_controller = actor.get_or_spawn_controller(
+            "debug_controller", DebugControllerForTesting
+        ).get()
         fut = debugee.to_debug.call()
-        await debug_client.wait_pending_session.call_one()
-        breakpoints = await _wait_for_breakpoints(debug_client, 4)
+        await debug_controller.wait_pending_session.call_one()
+        breakpoints = await _wait_for_breakpoints(debug_controller, 4)
         initial_linenos = {}
         for i in range(len(breakpoints)):
@@ -150,7 +256,7 @@ async def test_debug() -> None:
             assert info.function == "test_debugger._debugee_actor_internal"
             assert info.lineno == cast(int, breakpoints[0].lineno) + 5 * info.rank
-        await debug_client.enter.call_one()
+        await debug_controller.blocking_enter.call_one()
         # Check that when detaching and re-attaching to a session, the last portion of the output is repeated
         expected_last_output = [
@@ -161,13 +267,22 @@ async def test_debug() -> None:
             r"\(Pdb\) ",
         ]
         output_len = len(expected_last_output)
-        assert outputs[-2 * output_len : -output_len] == outputs[-output_len:]
+        rev_outputs = outputs[::-1]
+        last_return = rev_outputs.index("--Return--")
+        second_to_last_return = rev_outputs.index("--Return--", last_return + 1)
+        last_return = len(rev_outputs) - last_return - 1
+        second_to_last_return = len(rev_outputs) - second_to_last_return - 1
+        assert (
+            outputs[second_to_last_return : second_to_last_return + output_len]  # noqa
+            == outputs[last_return : last_return + output_len]  # noqa
+        )
         for real_output, expected_output in zip(
-            outputs[-output_len:], expected_last_output
+            outputs[last_return : last_return + output_len],  # noqa
+            expected_last_output,
         ):
             assert re.match(expected_output, real_output) is not None
-        breakpoints = await debug_client.list.call_one()
+        breakpoints = await debug_controller.list.call_one(print_output=False)
         for i in range(len(breakpoints)):
             if i == 1:
                 assert breakpoints[i].function == "test_debugger.to_debug"
@@ -177,9 +292,9 @@ async def test_debug() -> None:
                 )
                 assert breakpoints[i].lineno == initial_linenos[i]
-        await debug_client.enter.call_one()
+        await debug_controller.blocking_enter.call_one()
-        breakpoints = await debug_client.list.call_one()
+        breakpoints = await debug_controller.list.call_one(print_output=False)
         for i in range(len(breakpoints)):
             if i == 1:
                 assert breakpoints[i].function == "test_debugger.to_debug"
@@ -194,14 +309,14 @@ async def test_debug() -> None:
                 )
                 assert breakpoints[i].lineno == initial_linenos[i]
-        await debug_client.enter.call_one()
+        await debug_controller.blocking_enter.call_one()
-        breakpoints = await debug_client.list.call_one()
+        breakpoints = await debug_controller.list.call_one(print_output=False)
         assert len(breakpoints) == 4
         # Expect post-mortem debugging for rank 2
         assert breakpoints[2].function == "test_debugger._bad_rank"
-        await debug_client.enter.call_one()
+        await debug_controller.blocking_enter.call_one()
         expected_last_output = [
             r"\s*(/.*/)+test_debugger.py\(\d+\)_debugee_actor_internal\(\)\n-> _bad_rank\(\)",
@@ -211,18 +326,24 @@ async def test_debug() -> None:
             r"\(Pdb\) ",
         ]
+        rev_outputs = outputs[::-1]
+        output_index = len(outputs) - (
+            rev_outputs.index("(Pdb) ") + len(expected_last_output)
+        )
         for output, expected_output in zip(
-            outputs[-len(expected_last_output) :], expected_last_output
+            outputs[output_index : output_index + len(expected_last_output)],  # noqa
+            expected_last_output,
         ):
             assert re.match(expected_output, output) is not None
-        breakpoints = await debug_client.list.call_one()
+        breakpoints = await debug_controller.list.call_one(print_output=False)
         assert len(breakpoints) == 3
         for i, rank in enumerate((0, 1, 3)):
             assert breakpoints[i].rank == rank
-        await debug_client.enter.call_one()
-        breakpoints = await debug_client.list.call_one()
+        await debug_controller.blocking_enter.call_one()
+        breakpoints = await debug_controller.list.call_one(print_output=False)
         assert len(breakpoints) == 0
         with pytest.raises(
@@ -231,10 +352,13 @@ async def test_debug() -> None:
             await fut
+# See earlier comment
+@isolate_in_subprocess(env=debug_env)
 @pytest.mark.skipif(
     torch.cuda.device_count() < 2,
     reason="Not enough GPUs, this test requires at least 2 GPUs",
 )
+@pytest.mark.timeout(60)
 async def test_debug_multi_actor() -> None:
     input_mock = AsyncMock()
     input_mock.side_effect = [
@@ -251,19 +375,24 @@ async def test_debug_multi_actor() -> None:
         "c",
         "quit",
         "continue",
+        "quit",
     ]
-    with patch("monarch._src.actor.debugger._debugger_input", side_effect=input_mock):
+    with patch(
+        "monarch._src.actor.debugger.debugger.DebugStdIO.input", side_effect=input_mock
+    ):
         proc = await proc_mesh(hosts=2, gpus=2)
         debugee_1 = await proc.spawn("debugee_1", DebugeeActor)
         debugee_2 = await proc.spawn("debugee_2", DebugeeActor)
-        debug_client = actor.debug_client()
+        debug_controller = actor.get_or_spawn_controller(
+            "debug_controller", DebugControllerForTesting
+        ).get()
         fut_1 = debugee_1.to_debug.call()
         fut_2 = debugee_2.to_debug.call()
-        await debug_client.wait_pending_session.call_one()
+        await debug_controller.wait_pending_session.call_one()
-        breakpoints = await _wait_for_breakpoints(debug_client, 8)
+        breakpoints = await _wait_for_breakpoints(debug_controller, 8)
         initial_linenos = {}
         for i in range(len(breakpoints)):
@@ -275,9 +404,9 @@ async def test_debug_multi_actor() -> None:
             assert info.function == "test_debugger._debugee_actor_internal"
             assert info.lineno == cast(int, breakpoints[0].lineno) + 5 * info.rank
-        await debug_client.enter.call_one()
+        await debug_controller.blocking_enter.call_one()
-        breakpoints = await _wait_for_breakpoints(debug_client, 8)
+        breakpoints = await _wait_for_breakpoints(debug_controller, 8)
         for i in range(len(breakpoints)):
             if i == 1:
                 assert breakpoints[i].actor_name == "debugee_1"
@@ -294,18 +423,18 @@ async def test_debug_multi_actor() -> None:
                 assert breakpoints[i].rank == i % 4
                 assert breakpoints[i].lineno == initial_linenos[breakpoints[i].rank]
-        await debug_client.enter.call_one()
+        await debug_controller.blocking_enter.call_one()
-        breakpoints = await _wait_for_breakpoints(debug_client, 1)
+        breakpoints = await _wait_for_breakpoints(debug_controller, 1)
         with pytest.raises(ActorError, match="ValueError: bad rank"):
             await fut_2
         assert breakpoints[0].actor_name == "debugee_1"
         assert breakpoints[0].rank == 2
         assert breakpoints[0].function == "test_debugger._bad_rank"
-        await debug_client.enter.call_one()
+        await debug_controller.blocking_enter.call_one()
-        breakpoints = await _wait_for_breakpoints(debug_client, 0)
+        breakpoints = await _wait_for_breakpoints(debug_controller, 0)
         with pytest.raises(ActorError, match="ValueError: bad rank"):
             await fut_1
@@ -512,7 +641,7 @@ async def test_debug_sessions_iter() -> None:
     ["user_input", "expected_output"],
     [
         ("attach debugee 1", Attach("debugee", 1)),
-        ("a my_awesome_actor 100", Attach("my_awesome_actor", 100)),
+        ("a my_awesome_actor-123_DBG 100", Attach("my_awesome_actor-123_DBG", 100)),
         ("list", ListCommand()),
         ("l", ListCommand()),
         ("help", Help()),
@@ -600,7 +729,7 @@ async def test_debug_sessions_iter() -> None:
     ],
 )
 async def test_debug_command_parser_valid_inputs(user_input, expected_output):
-    assert DebugCommand.parse(user_input) == expected_output
+    assert await DebugCommand.parse(DebugStdIO(), user_input) == expected_output
 @pytest.mark.parametrize(
@@ -641,4 +770,469 @@ async def test_debug_command_parser_valid_inputs(user_input, expected_output):
     ],
 )
 async def test_debug_command_parser_invalid_inputs(invalid_input):
-    assert DebugCommand.parse(invalid_input) is None
+    assert await DebugCommand.parse(DebugStdIO(), invalid_input) is None
+# See earlier comment
+@isolate_in_subprocess(env={"MONARCH_DEBUG_CLI_BIN": debug_cli_bin, **debug_env})
+@pytest.mark.skipif(
+    torch.cuda.device_count() < 2,
+    reason="Not enough GPUs, this test requires at least 2 GPUs",
+)
+@pytest.mark.timeout(60)
+async def test_debug_cli():
+    proc = proc_mesh(hosts=2, gpus=2)
+    debugee = await proc.spawn("debugee", DebugeeActor)
+    debug_controller = actor.get_or_spawn_controller(
+        "debug_controller", DebugControllerForTesting
+    ).get()
+    fut = debugee.to_debug.call()
+    # Stupidly high timeout because when CI tries to run many instances of this
+    # test in parallel, it can take a long time for breakpoints to actually show
+    # up.
+    breakpoints = await _wait_for_breakpoints(debug_controller, 4, timeout_sec=180)
+    initial_linenos = {}
+    for i in range(len(breakpoints)):
+        info = breakpoints[i]
+        initial_linenos[info.rank] = info.lineno
+        assert info.rank == i
+        assert info.coords == {"hosts": info.rank // 2, "gpus": info.rank % 2}
+        assert info.function == "test_debugger._debugee_actor_internal"
+        assert info.lineno == cast(int, breakpoints[0].lineno) + 5 * info.rank
+    port = debug_controller.server_port.call_one().get()
+    async def create_debug_cli_proc() -> (
+        Tuple[
+            Optional[asyncio.subprocess.Process],
+            asyncio.StreamWriter,
+            asyncio.StreamReader,
+        ]
+    ):
+        cmd = None
+        if IN_PAR:
+            cmd = [
+                os.environ["MONARCH_DEBUG_CLI_BIN"],
+                "--host",
+                os.environ[_MONARCH_DEBUG_SERVER_HOST_ENV_VAR],
+                "--port",
+                str(port),
+            ]
+        elif any(shutil.which(nc_cmd) for nc_cmd in ["ncat", "nc", "netcat"]):
+            cmd = [
+                sys.executable,
+                "-m",
+                "monarch.debug_cli",
+                "--host",
+                os.environ[_MONARCH_DEBUG_SERVER_HOST_ENV_VAR],
+                "--port",
+                str(port),
+            ]
+        if cmd:
+            debug_cli_proc = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+            )
+            debug_cli_stdin = none_throws(debug_cli_proc.stdin)
+            debug_cli_stdout = none_throws(debug_cli_proc.stdout)
+            return debug_cli_proc, debug_cli_stdin, debug_cli_stdout
+        else:
+            # Netcat isn't available in our github CI environment, so we can't
+            # run the monarch.debug_cli module
+            reader, writer = await asyncio.open_connection(
+                os.environ[_MONARCH_DEBUG_SERVER_HOST_ENV_VAR], port
+            )
+            return None, writer, reader
+    (
+        debug_cli_proc,
+        debug_cli_stdin,
+        debug_cli_stdout,
+    ) = await create_debug_cli_proc()
+    debug_cli_stdin.writelines(
+        [
+            b"attach debugee 1\n",
+            b"n\n",
+            b"n\n",
+            b"n\n",
+            b"n\n",
+            b"detach\n",
+            b"attach debugee 1\n",
+            b"print('test separator')\n",
+            b"detach\n",
+        ]
+    )
+    await debug_cli_stdin.drain()
+    # Check that when detaching and re-attaching to a session, the last portion of the output is repeated
+    expected_last_output = (
+        r"--Return--\n"
+        r"> (?:/.*/)+test_debugger.py\(\d+\)to_debug\(\)->5\n"
+        r"-> return _debugee_actor_internal\(rank\)\n"
+        r"\(Pdb\) "
+    )
+    outputs = (await debug_cli_stdout.readuntil(b"test separator")).decode()
+    assert len(re.findall(expected_last_output, outputs)) == 2
+    assert outputs[0] == outputs[1]
+    breakpoints = await debug_controller.list.call_one(print_output=False)
+    for i in range(len(breakpoints)):
+        if i == 1:
+            assert breakpoints[i].function == "test_debugger.to_debug"
+        else:
+            assert breakpoints[i].function == "test_debugger._debugee_actor_internal"
+            assert breakpoints[i].lineno == initial_linenos[i]
+    debug_cli_stdin.write(b"quit\n")
+    await debug_cli_stdin.drain()
+    # Yield and wait so that the debug controller has a chance to process the
+    # input before we close stdin.
+    await asyncio.sleep(1)
+    debug_cli_stdin.close()
+    await debug_cli_stdin.wait_closed()
+    if debug_cli_proc:
+        assert await debug_cli_proc.wait() == 0
+    (
+        debug_cli_proc,
+        debug_cli_stdin,
+        debug_cli_stdout,
+    ) = await create_debug_cli_proc()
+    debug_cli_stdin.writelines(
+        [
+            b"cast debugee ranks(0,3) n\n",
+            b"cast debugee ranks(0,3) n\n",
+            # Attaching to 0 and 3 ensures that when we call "list"
+            # the next time, their function/lineno info will be
+            # up-to-date.
+            b"attach debugee 0\n",
+            b"detach\n",
+            b"attach debugee 3\n",
+            b"detach\n",
+        ]
+    )
+    await debug_cli_stdin.drain()
+    # Make sure we have run all the commands before killing the CLI, otherwise
+    # the commands may not actually be sent to the debug controller.
+    await debug_cli_stdout.readuntil(b"Detached from debug session for debugee 3")
+    if debug_cli_proc:
+        # Even if we kill the proc using a signal, we should be able to reconnect
+        # without issue.
+        debug_cli_proc.send_signal(signal.SIGINT)
+        assert await debug_cli_proc.wait() != 0
+    else:
+        debug_cli_stdin.close()
+        await debug_cli_stdin.wait_closed()
+    breakpoints = await debug_controller.list.call_one(print_output=False)
+    for i in range(len(breakpoints)):
+        if i == 1:
+            assert breakpoints[i].function == "test_debugger.to_debug"
+        elif i in (0, 3):
+            assert breakpoints[i].function == "test_debugger._debugee_actor_internal"
+            assert breakpoints[i].lineno == initial_linenos[i] + 2
+        else:
+            assert breakpoints[i].function == "test_debugger._debugee_actor_internal"
+            assert breakpoints[i].lineno == initial_linenos[i]
+    (
+        debug_cli_proc,
+        debug_cli_stdin,
+        debug_cli_stdout,
+    ) = await create_debug_cli_proc()
+    debug_cli_stdin.writelines([b"attach debugee 2\n", b"c\n"])
+    await debug_cli_stdin.drain()
+    # Make sure we have run all the commands before killing the CLI, otherwise
+    # the commands may not actually be sent to the debug controller.
+    await debug_cli_stdout.readuntil(b"raise ValueError")
+    if debug_cli_proc:
+        # Even if we kill the proc using a signal while the debugger is attached to
+        # a specific rank, we should be able to reconnect to that rank later without
+        # issue.
+        debug_cli_proc.send_signal(signal.SIGINT)
+        assert await debug_cli_proc.wait() != 0
+    else:
+        debug_cli_stdin.close()
+        await debug_cli_stdin.wait_closed()
+    breakpoints = await debug_controller.list.call_one(print_output=False)
+    assert len(breakpoints) == 4
+    # Expect post-mortem debugging for rank 2
+    assert breakpoints[2].function == "test_debugger._bad_rank"
+    (
+        debug_cli_proc,
+        debug_cli_stdin,
+        debug_cli_stdout,
+    ) = await create_debug_cli_proc()
+    debug_cli_stdin.writelines([b"attach debugee 2\n", b"bt\n", b"c\n"])
+    await debug_cli_stdin.drain()
+    expected_output = (
+        r"(?:/.*/)+test_debugger.py\(\d+\)_debugee_actor_internal\(\)\n-> _bad_rank\(\)\n"
+        r'> (?:/.*/)+test_debugger.py\(\d+\)_bad_rank\(\)\n-> raise ValueError\("bad rank"\)\n'
+        r"\(Pdb\)"
+    )
+    output = (
+        await debug_cli_stdout.readuntil(b"Detached from debug session for debugee 2")
+    ).decode()
+    assert len(re.findall(expected_output, output)) == 1
+    debug_cli_stdin.writelines([b"quit\n"])
+    await debug_cli_stdin.drain()
+    debug_cli_stdin.close()
+    # Yield and wait so that the debug controller has a chance to process the
+    # input before we close stdin.
+    await asyncio.sleep(1)
+    await debug_cli_stdin.wait_closed()
+    if debug_cli_proc:
+        assert await debug_cli_proc.wait() == 0
+    breakpoints = await debug_controller.list.call_one(print_output=False)
+    assert len(breakpoints) == 3
+    for i, rank in enumerate((0, 1, 3)):
+        assert breakpoints[i].rank == rank
+    debug_cli_proc, debug_cli_stdin, _ = await create_debug_cli_proc()
+    debug_cli_stdin.writelines([b"continue\n", b"quit\n"])
+    await debug_cli_stdin.drain()
+    # Yield and wait so that the debug controller has a chance to process the
+    # input before we close stdin.
+    await asyncio.sleep(1)
+    debug_cli_stdin.close()
+    await debug_cli_stdin.wait_closed()
+    if debug_cli_proc:
+        assert await debug_cli_proc.wait() == 0
+    breakpoints = await _wait_for_breakpoints(debug_controller, 0)
+    assert len(breakpoints) == 0
+    with pytest.raises(
+        monarch._src.actor.actor_mesh.ActorError, match="ValueError: bad rank"
+    ):
+        await fut
+class_closure_source = """class ClassClosure:
+    def __init__(self, arg):
+        self.arg = arg
+    def closure(self):
+        arg = self.arg
+        class Internal:
+            def __init__(self):
+                self.arg = arg
+# noqa
+            def get_arg(self):
+                breakpoint()
+                return self.arg
+        return Internal
+"""
+function_closure_source = """def func_closure(arg, bp):
+    def func(internal):
+        if bp:
+            breakpoint()
+        return internal().get_arg() + arg
+    return func
+"""
+def load_class_closure():
+    pickled = b'\x80\x05\x95\xc7\x03\x00\x00\x00\x00\x00\x00\x8c\x17cloudpickle.cloudpickle\x94\x8c\x14_make_skeleton_class\x94\x93\x94(\x8c\x08builtins\x94\x8c\x04type\x94\x93\x94\x8c\x08Internal\x94h\x03\x8c\x06object\x94\x93\x94\x85\x94}\x94\x8c\n__module__\x94\x8c\rclass_closure\x94s\x8c 0f63369d5845486db9033c9f3c3253d5\x94Nt\x94R\x94h\x00\x8c\x0f_class_setstate\x94\x93\x94h\x0f}\x94(\x8c\x07__doc__\x94N\x8c\x08__init__\x94h\x00\x8c\x0e_make_function\x94\x93\x94(h\x00\x8c\r_builtin_type\x94\x93\x94\x8c\x08CodeType\x94\x85\x94R\x94(K\x01K\x00K\x00K\x01K\x02K\x13C\n\x88\x00|\x00_\x00d\x00S\x00\x94N\x85\x94\x8c\x03arg\x94\x85\x94\x8c\x04self\x94\x85\x94\x8c"/tmp/monarch_test/class_closure.py\x94\x8c\x08__init__\x94K\tC\x02\n\x01\x94h\x1e\x85\x94)t\x94R\x94}\x94(\x8c\x0b__package__\x94\x8c\x00\x94\x8c\x08__name__\x94h\x0c\x8c\x08__file__\x94h"uNNh\x00\x8c\x10_make_empty_cell\x94\x93\x94)R\x94\x85\x94t\x94R\x94h\x00\x8c\x12_function_setstate\x94\x93\x94h2}\x94}\x94(h+\x8c\x08__init__\x94\x8c\x0c__qualname__\x94\x8c/ClassClosure.closure.<locals>.Internal.__init__\x94\x8c\x0f__annotations__\x94}\x94\x8c\x0e__kwdefaults__\x94N\x8c\x0c__defaults__\x94Nh\x0bh\x0c\x8c\x07__doc__\x94N\x8c\x0b__closure__\x94h\x00\x8c\n_make_cell\x94\x93\x94K\n\x85\x94R\x94\x85\x94\x8c\x17_cloudpickle_submodules\x94]\x94\x8c\x0b__globals__\x94}\x94u\x86\x94\x86R0\x8c\n__module__\x94h\x0c\x8c\x07get_arg\x94h\x16(h\x1b(K\x01K\x00K\x00K\x01K\x01KSC\x0ct\x00\x83\x00\x01\x00|\x00j\x01S\x00\x94h\x1d\x8c\nbreakpoint\x94h\x1e\x86\x94h \x85\x94h"\x8c\x07get_arg\x94K\x0cC\x04\x06\x01\x06\x01\x94))t\x94R\x94h(NNNt\x94R\x94h4hU}\x94}\x94(h+\x8c\x07get_arg\x94h8\x8c.ClassClosure.closure.<locals>.Internal.get_arg\x94h:}\x94h<Nh=Nh\x0bh\x0ch>Nh?NhE]\x94hG}\x94u\x86\x94\x86R0u}\x94\x86\x94\x86R0.'
+    # Unpickle `ClassClosure(10).closure()``
+    return cloudpickle.loads(pickled)
+def load_func_closure():
+    pickled = b"\x80\x05\x95\xd9\x02\x00\x00\x00\x00\x00\x00\x8c\x17cloudpickle.cloudpickle\x94\x8c\x0e_make_function\x94\x93\x94(h\x00\x8c\r_builtin_type\x94\x93\x94\x8c\x08CodeType\x94\x85\x94R\x94(K\x01K\x00K\x00K\x01K\x02K\x13C\x18\x88\x01r\x05t\x00\x83\x00\x01\x00|\x00\x83\x00\xa0\x01\xa1\x00\x88\x00\x17\x00S\x00\x94N\x85\x94\x8c\nbreakpoint\x94\x8c\x07get_arg\x94\x86\x94\x8c\x08internal\x94\x85\x94\x8c%/tmp/monarch_test/function_closure.py\x94\x8c\x04func\x94K\x02C\x06\x04\x01\x06\x01\x0e\x01\x94\x8c\x03arg\x94\x8c\x02bp\x94\x86\x94)t\x94R\x94}\x94(\x8c\x0b__package__\x94\x8c\x00\x94\x8c\x08__name__\x94\x8c\x10function_closure\x94\x8c\x08__file__\x94h\x0fuNNh\x00\x8c\x10_make_empty_cell\x94\x93\x94)R\x94h\x1e)R\x94\x86\x94t\x94R\x94h\x00\x8c\x12_function_setstate\x94\x93\x94h#}\x94}\x94(h\x1a\x8c\x04func\x94\x8c\x0c__qualname__\x94\x8c\x1afunc_closure.<locals>.func\x94\x8c\x0f__annotations__\x94}\x94\x8c\x0e__kwdefaults__\x94N\x8c\x0c__defaults__\x94N\x8c\n__module__\x94h\x1b\x8c\x07__doc__\x94N\x8c\x0b__closure__\x94h\x00\x8c\n_make_cell\x94\x93\x94K\x05\x85\x94R\x94h3\x88\x85\x94R\x94\x86\x94\x8c\x17_cloudpickle_submodules\x94]\x94\x8c\x0b__globals__\x94}\x94u\x86\x94\x86R0h\x02(h\x16h\x17NNh\x1e)R\x94h\x1e)R\x94\x86\x94t\x94R\x94h%hB}\x94}\x94(h\x1a\x8c\x04func\x94h)\x8c\x1afunc_closure.<locals>.func\x94h+}\x94h-Nh.Nh/h\x1bh0Nh1h3K\x05\x85\x94R\x94h3\x89\x85\x94R\x94\x86\x94h9]\x94h;}\x94u\x86\x94\x86R0\x86\x94."
+    # Unpickle `(func(5, True), func(5, False))`
+    return cloudpickle.loads(pickled)
+class SourceLoaderControllerWithMockedSource(SourceLoaderController):
+    @endpoint
+    def get_source(self, filename: str) -> str:
+        if filename == "/tmp/monarch_test/class_closure.py":
+            return class_closure_source
+        elif filename == "/tmp/monarch_test/function_closure.py":
+            return function_closure_source
+        else:
+            raise ValueError(f"Test should not have requested source for {filename}")
+class ClosureDebugeeActor(Actor):
+    @endpoint
+    def debug_class_closure(self, class_closure) -> int:
+        return class_closure().get_arg()
+    @endpoint
+    def debug_func(self, func, class_closure) -> int:
+        return func(class_closure)
+# We have to run this test in a subprocess because it requires a special
+# instantiation of the debug controller singleton.
+@isolate_in_subprocess(env=debug_env)
+@pytest.mark.timeout(60)
+async def test_debug_with_pickle_by_value():
+    """
+    This test tests debugger functionality when there are breakpoints in
+    code that has been pickled by value (as opposed to pickling by reference,
+    where the pickled representation is essentially just "from <module> import
+    <code>"). Cloudpickle will pickle by value for a few reasons, the primary
+    among them being:
+      - The function, class, etc. was defined in the __main__ module
+      - The function, class, etc. is a closure
+      - The function is a lambda
+    When code that was pickled by value hits a breakpoint, if the original file
+    that the code came from doesn't exist on the host, we need to do some special
+    handling inside `monarch._src.actor.debugger.pdb_wrapper` to make all the pdb
+    commands work as expected.
+    For this test, I created two files: /tmp/monarch_test/class_closure.py and
+    /tmp/monarch_test/function_closure.py. Their source code is contained in
+    the variables `class_closure_source` and `function_closure_source`,
+    respectively, above. The functions `load_class_closure` and `load_func_closure`
+    above contain `cloudpickle.dumps(ClassClosure(10).closure())`, and
+    `cloudpickle.dumps((func(5, True), func(5, False)))`, respectively.
+    The test unpickles these and sends them to an actor endpoint, in which
+    breakpoints will be hit and we can test the special pdb handling logic.
+    """
+    input_mock = AsyncMock()
+    input_mock.side_effect = [
+        "attach debugee 0",
+        "c",
+        "quit",
+        "attach debugee 0",
+        "bt",
+        "c",
+        "quit",
+        "attach debugee 0",
+        "b /tmp/monarch_test/class_closure:10",
+        "c",
+        "detach",
+        "quit",
+        "attach debugee 0",
+        "c",
+        "detach",
+        "quit",
+        "c",
+        "quit",
+    ]
+    outputs = []
+    def _patch_output(msg):
+        nonlocal outputs
+        outputs.append(msg)
+    output_mock = AsyncMock()
+    output_mock.side_effect = _patch_output
+    with patch(
+        "monarch._src.actor.debugger.debugger.DebugStdIO.input", new=input_mock
+    ), patch("monarch._src.actor.debugger.debugger.DebugStdIO.output", new=output_mock):
+        pm = proc_mesh(gpus=1, hosts=1)
+        debug_controller = actor.get_or_spawn_controller(
+            "debug_controller", DebugControllerForTesting
+        ).get()
+        # Spawn a special source loader that knows how to retrieve the source code
+        # for /tmp/monarch_test/class_closure.py and
+        # /tmp/monarch_test/function_closure.py
+        actor.get_or_spawn_controller(
+            "source_loader", SourceLoaderControllerWithMockedSource
+        ).get()
+        debugee = pm.spawn("debugee", ClosureDebugeeActor)
+        class_closure = load_class_closure()
+        func_bp_true, func_bp_false = load_func_closure()
+        fut = debugee.debug_class_closure.call_one(class_closure)
+        breakpoints = await _wait_for_breakpoints(debug_controller, 1)
+        assert breakpoints[0].function == "class_closure.get_arg"
+        assert breakpoints[0].lineno == 14
+        debug_controller.blocking_enter.call_one().get()
+        assert (
+            "> /tmp/monarch_test/class_closure.py(14)get_arg()\n-> return self.arg"
+            in outputs
+        )
+        await fut
+        fut = debugee.debug_func.call_one(func_bp_false, class_closure)
+        breakpoints = await _wait_for_breakpoints(debug_controller, 1)
+        assert breakpoints[0].function == "class_closure.get_arg"
+        assert breakpoints[0].lineno == 14
+        debug_controller.blocking_enter.call_one().get()
+        expected_backtrace = [
+            (
+                "  /tmp/monarch_test/function_closure.py(5)func()\n"
+                "-> return internal().get_arg() + arg"
+            ),
+            "\n",
+            "> /tmp/monarch_test/class_closure.py(14)get_arg()\n-> return self.arg",
+            "\n",
+            "(Pdb) ",
+        ]
+        start = outputs.index(expected_backtrace[0])
+        assert expected_backtrace == outputs[start : start + len(expected_backtrace)]  # noqa
+        await fut
+        fut = debugee.debug_func.call_one(func_bp_true, class_closure)
+        breakpoints = await _wait_for_breakpoints(debug_controller, 1)
+        assert breakpoints[0].function == "function_closure.func"
+        assert breakpoints[0].lineno == 5
+        debug_controller.blocking_enter.call_one().get()
+        assert (
+            "> /tmp/monarch_test/function_closure.py(5)func()\n-> return internal().get_arg() + arg"
+            in outputs
+        )
+        assert "Breakpoint 1 at /tmp/monarch_test/class_closure.py:10" in outputs
+        assert (
+            "> /tmp/monarch_test/class_closure.py(10)__init__()\n-> self.arg = arg"
+            in outputs
+        )
+        breakpoints = await _wait_for_breakpoints(debug_controller, 1)
+        assert breakpoints[0].function == "class_closure.__init__"
+        assert breakpoints[0].lineno == 10
+        debug_controller.blocking_enter.call_one().get()
+        breakpoints = await _wait_for_breakpoints(debug_controller, 1)
+        assert breakpoints[0].function == "class_closure.get_arg"
+        assert breakpoints[0].lineno == 14
+        debug_controller.blocking_enter.call_one().get()
+        breakpoints = debug_controller.list.call_one().get()
+        assert len(breakpoints) == 0
+        await fut
+        await pm.stop()