PyPI - torchmonarch-nightly - Versions diffs - 2025.6.27__cp313-cp313-manylinux2014_x86_64.whl - Mend

torchmonarch-nightly 2025.6.27__cp313-cp313-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

monarch/__init__.py +189 -0
monarch/_monarch/__init__.py +5 -0
monarch/_monarch/hyperactor/__init__.py +58 -0
monarch/_monarch/selection/__init__.py +13 -0
monarch/_monarch/worker/__init__.py +0 -0
monarch/_monarch/worker/debugger.py +117 -0
monarch/_monarch/worker/logging.py +107 -0
monarch/_rust_bindings.so +0 -0
monarch/_testing.py +230 -0
monarch/actor_mesh.py +761 -0
monarch/allocator.py +220 -0
monarch/bootstrap_main.py +59 -0
monarch/builtins/__init__.py +14 -0
monarch/builtins/log.py +22 -0
monarch/builtins/random.py +68 -0
monarch/cached_remote_function.py +257 -0
monarch/code_sync.py +10 -0
monarch/common/_C.pyi +11 -0
monarch/common/_C.so +0 -0
monarch/common/__init__.py +0 -0
monarch/common/_coalescing.py +308 -0
monarch/common/_device_utils.py +18 -0
monarch/common/_tensor_to_table.py +172 -0
monarch/common/base_tensor.py +28 -0
monarch/common/borrows.py +143 -0
monarch/common/client.py +690 -0
monarch/common/constants.py +10 -0
monarch/common/context_manager.py +40 -0
monarch/common/controller_api.py +104 -0
monarch/common/device_mesh.py +417 -0
monarch/common/fake.py +55 -0
monarch/common/function.py +160 -0
monarch/common/function_caching.py +164 -0
monarch/common/future.py +168 -0
monarch/common/invocation.py +125 -0
monarch/common/mast.py +221 -0
monarch/common/messages.py +573 -0
monarch/common/mock_cuda.py +41 -0
monarch/common/opaque_ref.py +98 -0
monarch/common/pickle_flatten.py +48 -0
monarch/common/pipe.py +152 -0
monarch/common/process_group.py +55 -0
monarch/common/recording.py +127 -0
monarch/common/reference.py +33 -0
monarch/common/remote.py +297 -0
monarch/common/selection.py +9 -0
monarch/common/shape.py +229 -0
monarch/common/stream.py +114 -0
monarch/common/tensor.py +814 -0
monarch/common/tensor_factory.py +31 -0
monarch/common/tree.py +73 -0
monarch/controller/__init__.py +7 -0
monarch/controller/backend.py +223 -0
monarch/controller/controller.py +223 -0
monarch/controller/debugger.py +47 -0
monarch/controller/history.py +90 -0
monarch/controller/rust_backend/__init__.py +7 -0
monarch/controller/rust_backend/controller.py +245 -0
monarch/debugger.py +379 -0
monarch/fetch.py +55 -0
monarch/future.py +76 -0
monarch/gradient/__init__.py +11 -0
monarch/gradient/_gradient_generator.pyi +22 -0
monarch/gradient/_gradient_generator.so +0 -0
monarch/gradient_generator.py +185 -0
monarch/memory.py +43 -0
monarch/mesh_controller.py +271 -0
monarch/monarch_controller +0 -0
monarch/notebook.py +761 -0
monarch/opaque_module.py +235 -0
monarch/opaque_object.py +88 -0
monarch/parallel/__init__.py +9 -0
monarch/parallel/pipelining/__init__.py +7 -0
monarch/parallel/pipelining/runtime.py +847 -0
monarch/parallel/pipelining/schedule_ir.py +692 -0
monarch/parallel/pipelining/scheduler.py +249 -0
monarch/pdb_wrapper.py +135 -0
monarch/proc_mesh.py +299 -0
monarch/profiler.py +160 -0
monarch/python_local_mesh.py +107 -0
monarch/random.py +61 -0
monarch/rdma.py +162 -0
monarch/remote_class.py +114 -0
monarch/rust_backend_mesh.py +280 -0
monarch/rust_local_mesh.py +1402 -0
monarch/sim_mesh.py +359 -0
monarch/simulator/__init__.py +7 -0
monarch/simulator/command_history.py +424 -0
monarch/simulator/config.py +21 -0
monarch/simulator/interface.py +59 -0
monarch/simulator/ir.py +770 -0
monarch/simulator/mock_controller.py +214 -0
monarch/simulator/profiling.py +424 -0
monarch/simulator/simulator.py +1052 -0
monarch/simulator/task.py +255 -0
monarch/simulator/tensor.py +373 -0
monarch/simulator/trace.py +395 -0
monarch/simulator/utils.py +41 -0
monarch/simulator/worker.py +389 -0
monarch/telemetry.py +19 -0
monarch/tensor_worker_main.py +260 -0
monarch/tensorboard.py +84 -0
monarch/timer/__init__.py +21 -0
monarch/timer/example_monarch.py +78 -0
monarch/timer/example_spmd.py +55 -0
monarch/timer/execution_timer.py +199 -0
monarch/timer/execution_timer_test.py +131 -0
monarch/tools/__init__.py +7 -0
monarch/tools/cli.py +167 -0
monarch/tools/commands.py +251 -0
monarch/tools/components/__init__.py +7 -0
monarch/tools/components/hyperactor.py +58 -0
monarch/tools/config/__init__.py +20 -0
monarch/tools/config/defaults.py +54 -0
monarch/tools/mesh_spec.py +165 -0
monarch/tools/network.py +69 -0
monarch/worker/__init__.py +7 -0
monarch/worker/_testing_function.py +481 -0
monarch/worker/compiled_block.py +270 -0
monarch/worker/debugger.py +125 -0
monarch/worker/lines.py +47 -0
monarch/worker/monitor.py +53 -0
monarch/worker/worker.py +1191 -0
monarch/world_mesh.py +34 -0
monarch_supervisor/__init__.py +1044 -0
monarch_supervisor/_testing.py +44 -0
monarch_supervisor/function_call.py +30 -0
monarch_supervisor/host.py +386 -0
monarch_supervisor/launchers.py +145 -0
monarch_supervisor/log_pstree.py +48 -0
monarch_supervisor/logging.py +103 -0
monarch_supervisor/python_executable.py +42 -0
tests/__init__.py +0 -0
tests/dispatch_bench.py +124 -0
tests/dispatch_bench_helper.py +25 -0
tests/error_test_binary.py +180 -0
tests/simulator/__init__.py +0 -0
tests/simulator/test_profiling.py +136 -0
tests/simulator/test_simulator.py +411 -0
tests/simulator/test_task.py +64 -0
tests/simulator/test_worker.py +102 -0
tests/sleep_binary.py +35 -0
tests/test_actor_error.py +240 -0
tests/test_alloc.py +25 -0
tests/test_allocator.py +365 -0
tests/test_coalescing.py +492 -0
tests/test_controller.py +845 -0
tests/test_device_mesh.py +132 -0
tests/test_fault_tolerance.py +398 -0
tests/test_future.py +94 -0
tests/test_grad_generator.py +121 -0
tests/test_mock_cuda.py +74 -0
tests/test_pdb_actor.py +110 -0
tests/test_python_actors.py +736 -0
tests/test_remote_functions.py +1271 -0
tests/test_rust_backend.py +217 -0
tests/test_signal_safe_block_on.py +103 -0
tests/test_sim_backend.py +54 -0
tests/test_tensor_engine.py +52 -0
torchmonarch_nightly-2025.6.27.dist-info/METADATA +94 -0
torchmonarch_nightly-2025.6.27.dist-info/RECORD +165 -0
torchmonarch_nightly-2025.6.27.dist-info/WHEEL +5 -0
torchmonarch_nightly-2025.6.27.dist-info/entry_points.txt +3 -0
torchmonarch_nightly-2025.6.27.dist-info/licenses/LICENSE +29 -0
torchmonarch_nightly-2025.6.27.dist-info/top_level.txt +3 -0

monarch/simulator/trace.py ADDED Viewed

@@ -0,0 +1,395 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-unsafe
+import logging
+import pickle
+import subprocess
+import traceback
+from typing import Any, Dict, List, Literal, Sequence, TypedDict
+logger = logging.getLogger(__name__)
+class TraceEvent:
+    """
+    Represents a trace event in the simulation.
+    Args:
+        start (int): The start time, in nanoseconds, of the event.
+        runtime (int): The runtime, in nanoseconds, of the event.
+        meta (list): A list of metadata associated with the event.
+        command_id (int): The associated command id of this task.
+    """
+    def __init__(
+        self,
+        start: int,
+        runtime: int,
+        meta: List[str],
+        command_id: int,
+        traceback: Sequence[traceback.FrameSummary],
+    ):
+        self.start = start
+        self.runtime = runtime
+        self.end = start + runtime
+        self.meta = meta
+        self.command_id = command_id
+        self.traceback = traceback
+    def __repr__(self):
+        return f"E(meta={self.meta}, start={self.start:.2f}, end={self.end:.1f})"
+def visualize_events(worker_events):
+    import pandas as pd
+    import plotly.graph_objs as go
+    # Convert the data to a DataFrame
+    records = []
+    for key, events in worker_events.items():
+        for event in events:
+            records.append(
+                {
+                    "Process": key,
+                    "Event": event.meta,
+                    "Start": event.start,
+                    "End": event.end,
+                    "Duration": event.end - event.start,
+                }
+            )
+    df = pd.DataFrame(records)
+    # Create Gantt chart using plotly.graph_objs
+    fig = go.Figure()
+    fw_list = [
+        "#0000FF",  # Blue
+        "#1E90FF",  # Dodger Blue
+        "#00BFFF",  # Deep Sky Blue
+        "#5F9EA0",  # Cadet Blue
+        "#4682B4",  # Steel Blue
+        "#87CEFA",  # Light Sky Blue
+        "#6495ED",  # Cornflower Blue
+        "#4169E1",  # Royal Blue
+    ]
+    bw_list = [
+        "#FF0000",  # Red
+        "#FF4500",  # Orange Red
+        "#FF1493",  # Deep Pink
+        "#FF69B4",  # Hot Pink
+        "#DB7093",  # Pale Violet Red
+        "#B22222",  # Firebrick
+        "#8B0000",  # Dark Red
+        "#FF6347",  # Tomato
+    ]
+    # Map each event to a color
+    def get_color(metas):
+        if "fw" in metas:
+            for meta in metas:
+                if meta.isdigit():
+                    return fw_list[int(meta) % len(fw_list)]
+        elif "bw" in metas:
+            for meta in metas:
+                if meta.isdigit():
+                    return bw_list[int(meta) % len(fw_list)]
+        return "red"
+    for process in df["Process"].unique():
+        process_df = df[df["Process"] == process]
+        for _, row in process_df.iterrows():
+            color = get_color(row["Event"])
+            fig.add_trace(
+                go.Bar(
+                    x=[row["Duration"]],
+                    y=[str(process)],
+                    base=[row["Start"]],
+                    orientation="h",
+                    name=" ".join(row["Event"]),
+                    hoverinfo="name+x",
+                    marker={
+                        "color": color,
+                    },
+                    showlegend=False,  # Hide default legend
+                )
+            )
+    # Add custom legend
+    # annotations = []
+    # legend_x = 0.95
+    # legend_y = 1.0
+    fig.update_layout(
+        title="Timeline Visualization",
+        xaxis_title="Time",
+        yaxis_title="Process",
+        barmode="stack",
+        # annotations=annotations,
+        showlegend=False,  # Disable the default legend
+        yaxis={"autorange": "reversed"},  # Reverse the y-axis
+    )
+    # Show the plot
+    fig.write_html("sim.html")
+    # fig.show()
+def dump_process_name(trace: List[Dict[str, Any]], *, pid: int, name: str):
+    trace.append(
+        {
+            "name": "process_name",
+            "ph": "M",
+            "pid": pid,
+            "tid": 0,
+            "args": {"name": name},
+        }
+    )
+def _include_file(filename: str):
+    if "controller/" in filename:
+        return False
+    return True
+def _filter_traceback(tb: Sequence[traceback.FrameSummary]):
+    notebook = [i for i, f in enumerate(tb) if f.name == "run_code"]
+    if notebook:
+        tb = tb[notebook[-1] + 1 :]  # noqa: whitespace before ':'
+    filtered = [frame for frame in tb if _include_file(frame.filename)]
+    filtered.reverse()
+    return filtered
+def _format_traceback(tb):
+    return "Traceback (most recent call first)\n" + "".join(
+        traceback.format_list(_filter_traceback(tb))
+    )
+def dump_thread_event_trace(
+    trace: List[Dict[str, Any]],
+    events: List[TraceEvent],
+    *,
+    pid: int,
+    tid: int,
+    name: str,
+) -> float:
+    trace.append(
+        {
+            "name": "thread_name",
+            "ph": "M",
+            "pid": pid,
+            "tid": tid,
+            "args": {"name": name},
+        }
+    )
+    max_time = 0.0
+    for event in events:
+        name = " ".join(event.meta)
+        trace.append(
+            {
+                "name": name,
+                "cat": "compute",
+                "ph": "X",
+                "ts": event.start / 1000,
+                "dur": event.runtime / 1000,
+                "pid": pid,
+                "tid": tid,
+                "args": {
+                    "External id": event.command_id + pid * 10000,
+                    "correlation": event.command_id + pid * 10000,
+                    "cbid": event.command_id,
+                    " traceback": _format_traceback(event.traceback),
+                },
+                "cname": "rail_animation" if "waiting" in name else None,
+            }
+        )
+        max_time = max(max_time, (event.start + event.runtime) / 1000)
+    return max_time
+def dump_memory_trace(
+    trace: List[Dict[str, Any]], *, pid: int, memory: int, ts: int, name: str
+) -> None:
+    trace.append(
+        {
+            "name": name,
+            "cat": "memory",
+            "ph": "C",
+            "ts": ts / 1000,
+            "pid": pid,
+            "args": {
+                "allocated": memory / 10**6,
+            },
+        }
+    )
+def upload_trace(file_path) -> None:
+    logger.info("Uploading the trace file to Manifold...")
+    command_path = "~/fbsource/arvr/scripts/perfetto/share_trace.py"
+    command = [f"{command_path} {file_path}"]
+    result = subprocess.run(command, capture_output=True, text=True, shell=True)
+    if result.returncode == 0:
+        print(result.stdout)
+    else:
+        print("Failed to upload the file.")
+        print(result.stdout)
+        print(result.stderr)
+class Frame(TypedDict):
+    filename: str
+    line: int
+    name: str
+class Block(TypedDict):
+    # A piece of memory returned from the allocator, or
+    # current cached but inactive.
+    size: int
+    requested_size: int  # size requested during malloc, may be smaller than
+    # size due to rounding
+    address: int
+    state: Literal[
+        "active_allocated",  # used by a tensor
+        "active_awaiting_free",  # waiting for another stream to finish using
+        # this, then it will become free
+        "inactive",
+    ]  # free for reuse
+    frames: List[Frame]  # stack trace from where the allocation occurred
+class Segment(TypedDict):
+    # Segments are memory returned from a cudaMalloc call.
+    # The size of reserved memory is the sum of all Segments.
+    # Segments are cached and reused for future allocations.
+    # If the reuse is smaller than the segment, the segment
+    # is split into more then one Block.
+    # empty_cache() frees Segments that are entirely inactive.
+    address: int
+    total_size: int  # cudaMalloc'd size of segment
+    stream: int
+    segment_type: Literal["small", "large"]  # 'large' (>1MB)
+    allocated_size: int  # size of memory in use
+    active_size: int  # size of memory in use or in active_awaiting_free state
+    device: int
+    blocks: List[Block]
+class TraceEntry(TypedDict):
+    # When `torch.cuda.memory._record_memory_history()` is enabled,
+    # the snapshot will contain TraceEntry objects that record each
+    # action the allocator took.
+    action: Literal[
+        "alloc",  # memory allocated
+        "free_requested",  # the allocated received a call to free memory
+        "free_completed",  # the memory that was requested to be freed is now
+        # able to be used in future allocation calls
+        "segment_alloc",  # the caching allocator ask cudaMalloc for more memory
+        # and added it as a segment in its cache
+        "segment_free",  # the caching allocator called cudaFree to return memory
+        # to cuda possibly trying free up memory to
+        # allocate more segments or because empty_caches was called
+        "oom",  # the allocator threw an OOM exception. 'size' is
+        # the requested number of bytes that did not succeed
+        "snapshot",  # the allocator generated a memory snapshot
+        # useful to coorelate a previously taken
+        # snapshot with this trace
+    ]
+    addr: int  # not present for OOM
+    frames: List[Frame]
+    size: int
+    stream: int
+class Snapshot(TypedDict):
+    segments: List[Segment]
+    device_traces: List[List[TraceEntry]]
+class MemoryViewer:
+    def __init__(self) -> None:
+        self.current_segments = {}
+        self.snapshot: Snapshot = {"segments": [], "device_traces": []}
+        self.addr_map = {}
+    def next_device(self) -> None:
+        self.addr_map.clear()
+        self.current_segments.clear()
+        self.snapshot["device_traces"].append([])
+    def get_or_add_segment(self, stream: int):
+        if stream in self.current_segments:
+            return self.current_segments[stream]
+        s: Segment = {
+            "address": 0,
+            "total_size": 0,
+            "stream": stream,
+            "segment_type": "large",
+            "allocated_size": 0,
+            "active_size": 0,
+            "blocks": [],
+            "device": len(self.snapshot["device_traces"]) - 1,
+        }
+        self.current_segments[stream] = s
+        self.snapshot["segments"].append(s)
+        return s
+    def add_trace(self, addr: int, delta: int, stream: int, traceback) -> None:
+        segment = self.get_or_add_segment(stream)
+        if delta > 0:
+            maddr = self.addr_map[addr] = segment["allocated_size"]
+            segment["allocated_size"] += delta
+            action: Literal["alloc", "free_requested"] = "alloc"
+        else:
+            maddr = self.addr_map[addr]
+            action: Literal["alloc", "free_requested"] = "free_requested"
+        frames: List[Frame] = [
+            {"filename": frame.filename, "line": frame.lineno, "name": frame.name}
+            for frame in _filter_traceback(traceback)
+        ]
+        trace: TraceEntry = {
+            "addr": maddr,
+            "frames": frames,
+            "size": abs(delta),
+            "stream": stream,
+            "action": action,
+        }
+        self.snapshot["device_traces"][-1].append(trace)
+        if delta < 0:
+            self.snapshot["device_traces"][-1].append(
+                # pyre-ignore
+                {**trace, "action": "free_completed"}
+            )
+    def dump(self, path: str) -> None:
+        for segment in self.snapshot["segments"]:
+            sz = segment["total_size"] = segment["allocated_size"]
+            segment["blocks"].append(
+                {
+                    "address": 0,
+                    "size": sz,
+                    "requested_size": sz,
+                    "state": "inactive",
+                    "frames": [],
+                }
+            )
+        with open(path, "wb") as fp:
+            # @lint-ignore PYTHONPICKLEISBAD
+            pickle.dump(self.snapshot, fp)

monarch/simulator/utils.py ADDED Viewed

@@ -0,0 +1,41 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-unsafe
+import os
+import numpy as np
+def file_path_with_iter(file_path: str, iter_count: int) -> str:
+    dir_path = os.path.dirname(file_path)
+    file_name, file_postfix = os.path.basename(file_path).split(".")
+    file_name = f"{file_name}_{iter_count}.{file_postfix}"
+    return os.path.join(dir_path, file_name)
+def compress_workers_range(workers) -> str:
+    regions = []
+    start = workers[0]
+    end = workers[0]
+    sorted_workers = np.sort(workers)
+    for i in range(1, len(sorted_workers)):
+        if workers[i] == end + 1:
+            end = workers[i]
+        else:
+            regions.append(f"[{start}-{end}]")
+            start = workers[i]
+            end = workers[i]
+    regions.append(f"[{start}-{end}]")
+    return " ".join(regions)
+def clean_name(name: str) -> str:
+    if name.startswith("torch.ops.aten."):
+        name = name[len("torch.ops.") :]  # noqa: whitespace before ':'
+    if name.endswith(".default"):
+        name = name[: -len(".default")]
+    return name