torchmonarch-nightly 2025.9.3__cp311-cp311-manylinux2014_x86_64.whl → 2025.9.5__cp311-cp311-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
monarch/_rust_bindings.so CHANGED
Binary file
@@ -529,14 +529,38 @@ def as_endpoint(
529
529
 
530
530
 
531
531
  class Accumulator(Generic[P, R, A]):
532
+ """
533
+ Accumulate the result of a broadcast invocation of an endpoint
534
+ across a sliced mesh.
535
+
536
+ Usage:
537
+ >>> counter = Accumulator(Actor.increment, 0, lambda x, y: x + y)
538
+ """
539
+
532
540
  def __init__(
533
541
  self, endpoint: Endpoint[P, R], identity: A, combine: Callable[[A, R], A]
534
542
  ) -> None:
543
+ """
544
+ Args:
545
+ endpoint: Endpoint to accumulate the result of.
546
+ identity: Initial value of the accumulated value before the first combine invocation.
547
+ combine: Lambda invoked for combining the result of the endpoint with the accumulated value.
548
+ """
535
549
  self._endpoint: Endpoint[P, R] = endpoint
536
550
  self._identity: A = identity
537
551
  self._combine: Callable[[A, R], A] = combine
538
552
 
539
553
  def accumulate(self, *args: P.args, **kwargs: P.kwargs) -> "Future[A]":
554
+ """
555
+ Accumulate the result of the endpoint invocation.
556
+
557
+ Args:
558
+ args: Arguments to pass to the endpoint.
559
+ kwargs: Keyword arguments to pass to the endpoint.
560
+
561
+ Returns:
562
+ Future that resolves to the accumulated value.
563
+ """
540
564
  gen: Generator[Future[R], None, None] = self._endpoint.stream(*args, **kwargs)
541
565
 
542
566
  async def impl() -> A:
@@ -550,7 +574,7 @@ class Accumulator(Generic[P, R, A]):
550
574
 
551
575
  class ValueMesh(MeshTrait, Generic[R]):
552
576
  """
553
- Container of return values, indexed by rank.
577
+ A mesh that holds the result of an endpoint invocation.
554
578
  """
555
579
 
556
580
  def __init__(self, shape: Shape, values: List[R]) -> None:
@@ -561,6 +585,18 @@ class ValueMesh(MeshTrait, Generic[R]):
561
585
  return ValueMesh(shape, self._values)
562
586
 
563
587
  def item(self, **kwargs) -> R:
588
+ """
589
+ Get the value at the given coordinates.
590
+
591
+ Args:
592
+ kwargs: Coordinates to get the value at.
593
+
594
+ Returns:
595
+ Value at the given coordinate.
596
+
597
+ Raises:
598
+ KeyError: If invalid coordinates are provided.
599
+ """
564
600
  coordinates = [kwargs.pop(label) for label in self._labels]
565
601
  if kwargs:
566
602
  raise KeyError(f"item has extra dimensions: {list(kwargs.keys())}")
@@ -568,6 +604,12 @@ class ValueMesh(MeshTrait, Generic[R]):
568
604
  return self._values[self._ndslice.nditem(coordinates)]
569
605
 
570
606
  def items(self) -> Iterable[Tuple[Point, R]]:
607
+ """
608
+ Generator that returns values for the provided coordinates.
609
+
610
+ Returns:
611
+ Values at all coordinates.
612
+ """
571
613
  extent = self._shape.extent
572
614
  for i, rank in enumerate(self._shape.ranks()):
573
615
  yield Point(i, extent), self._values[rank]
@@ -596,14 +638,27 @@ def send(
596
638
  selection: Selection = "all",
597
639
  ) -> None:
598
640
  """
599
- Fire-and-forget broadcast invocation of the endpoint across all actors in the mesh.
641
+ Fire-and-forget broadcast invocation of the endpoint across a given selection of the mesh.
642
+
643
+ This sends the message to all actors but does not wait for any result. Use the port provided to
644
+ send the response back to the caller.
600
645
 
601
- This sends the message to all actors but does not wait for any result.
646
+ Args:
647
+ endpoint: Endpoint to invoke.
648
+ args: Arguments to pass to the endpoint.
649
+ kwargs: Keyword arguments to pass to the endpoint.
650
+ port: Handle to send the response to.
651
+ selection: Selection query representing a subset of the mesh.
602
652
  """
603
653
  endpoint._send(args, kwargs, port, selection)
604
654
 
605
655
 
606
656
  class Port(Generic[R]):
657
+ """
658
+ Handle used to send reliable in-order messages through a channel to
659
+ a PortReceiver.
660
+ """
661
+
607
662
  def __init__(
608
663
  self,
609
664
  port_ref: PortRef | OncePortRef,
@@ -615,6 +670,13 @@ class Port(Generic[R]):
615
670
  self._rank = rank
616
671
 
617
672
  def send(self, obj: R) -> None:
673
+ """
674
+ Fire-and-forget send R-typed objects in order
675
+ through a channel to its corresponding PortReceiver.
676
+
677
+ Args:
678
+ obj: R-typed object to send.
679
+ """
618
680
  self._port_ref.send(
619
681
  self._mailbox,
620
682
  PythonMessage(PythonMessageKind.Result(self._rank), _pickle(obj)),
@@ -656,8 +718,17 @@ T = TypeVar("T")
656
718
  # not part of the Endpoint API because they way it accepts arguments
657
719
  # and handles concerns is different.
658
720
  class Channel(Generic[R]):
721
+ """
722
+ An advanced low level API for a communication channel used for message passing
723
+ between actors.
724
+
725
+ Provides static methods to create communication channels with port pairs
726
+ for sending and receiving messages of type R.
727
+ """
728
+
659
729
  @staticmethod
660
730
  def open(once: bool = False) -> Tuple["Port[R]", "PortReceiver[R]"]:
731
+ """ """
661
732
  mailbox = context().actor_instance._mailbox
662
733
  handle, receiver = mailbox.open_once_port() if once else mailbox.open_port()
663
734
  port_ref = handle.bind()
@@ -673,6 +744,14 @@ class Channel(Generic[R]):
673
744
 
674
745
 
675
746
  class PortReceiver(Generic[R]):
747
+ """
748
+ Receiver for messages sent through a communication channel.
749
+
750
+ Handles receiving R-typed objects sent from a corresponding Port.
751
+ Asynchronously message reception with optional supervision
752
+ monitoring for error handling.
753
+ """
754
+
676
755
  def __init__(
677
756
  self,
678
757
  mailbox: Mailbox,
@@ -784,6 +863,7 @@ class _Actor:
784
863
  ins.rank = ctx.message_rank
785
864
  try:
786
865
  self.instance = Class(*args, **kwargs)
866
+ self._maybe_exit_debugger()
787
867
  except Exception as e:
788
868
  self._saved_error = ActorError(
789
869
  e, f"Remote actor {Class}.__init__ call failed."
@@ -956,6 +1036,15 @@ class Actor(MeshTrait, DeprecatedNotAFuture):
956
1036
 
957
1037
 
958
1038
  class ActorMesh(MeshTrait, Generic[T], DeprecatedNotAFuture):
1039
+ """
1040
+ A group of actor instances of the same class.
1041
+
1042
+ Represents a collection of T-typed actor instances spawned at most once per process
1043
+ that can be communicated with collectively or individually. Provides
1044
+ methods for spawning actors, managing their lifecycle, and creating
1045
+ endpoints for method invocation across the mesh.
1046
+ """
1047
+
959
1048
  def __init__(
960
1049
  self,
961
1050
  Class: Type[T],
@@ -35,11 +35,23 @@ def this_proc() -> "ProcMesh":
35
35
 
36
36
 
37
37
  def create_local_host_mesh() -> "HostMesh":
38
+ """
39
+ Create a local host mesh for the current machine.
40
+
41
+ Returns:
42
+ HostMesh: A single-host mesh configured for local process allocation.
43
+ """
38
44
  cmd, args, env = _get_bootstrap_args()
39
45
  return HostMesh(Shape.unity(), ProcessAllocator(cmd, args, env))
40
46
 
41
47
 
42
48
  class HostMesh(MeshTrait):
49
+ """
50
+ HostMesh represents a collection of compute hosts that can be used to spawn
51
+ processes and actors. The class requires you to provide your AllocateMixin that
52
+ interfaces with the underlying resource allocator of your choice.
53
+ """
54
+
43
55
  def __init__(self, shape: Shape, allocator: AllocateMixin):
44
56
  self._allocator = allocator
45
57
  self._shape = shape
@@ -57,12 +69,14 @@ class HostMesh(MeshTrait):
57
69
  """
58
70
  Start new processes on this host mesh. By default this starts one proc
59
71
  on each host in the mesh. Additional procs can be started using `per_host` to
60
- specify the local shape, e.g.
72
+ specify the local shape, e.g.`
61
73
  per_host = {'gpus': 8}
62
74
  Will create a proc mesh with an additional 'gpus' dimension.
63
75
 
64
76
  `bootstrap` is a function that will be run at startup on each proc and can be used to e.g.
65
77
  configure CUDA or NCCL. We guarantee that CUDA has not been initialized before boostrap is called.
78
+
79
+ TODO: For now, a new allocator is created for every new ProcMesh.
66
80
  """
67
81
  if per_host is None:
68
82
  per_host = {}
@@ -107,6 +121,12 @@ class HostMesh(MeshTrait):
107
121
 
108
122
 
109
123
  def fake_in_process_host() -> "HostMesh":
124
+ """
125
+ Create a host mesh for testing and development using a local allocator.
126
+
127
+ Returns:
128
+ HostMesh: A host mesh configured with local allocation for in-process use.
129
+ """
110
130
  return HostMesh(Shape.unity(), LocalAllocator())
111
131
 
112
132
 
@@ -95,14 +95,10 @@ if TYPE_CHECKING:
95
95
 
96
96
  class SetupActor(Actor):
97
97
  """
98
- A helper actor to setup the proc mesh with user defined setup method.
99
- Typically used to setup the environment variables.
98
+ A helper actor to set up the actor mesh with user defined setup method.
100
99
  """
101
100
 
102
101
  def __init__(self, env: Callable[[], None]) -> None:
103
- """
104
- Initialize the setup actor with the user defined setup method.
105
- """
106
102
  self._setup_method = env
107
103
 
108
104
  @endpoint
@@ -133,8 +129,12 @@ def _use_standin_mesh() -> bool:
133
129
  return os.getenv("USE_STANDIN_ACTOR_MESH", default="0") != "0"
134
130
 
135
131
 
136
- # Ultra-hack to allow actors to identify proc meshes but with no real functionality.
137
132
  class ProcMeshRef:
133
+ """
134
+ A serializable remote reference to a ProcMesh. The reference is weak: No support
135
+ for refcount'ing. Spawning actors on a ProcMeshRef a stopped or a failed mesh will fail.
136
+ """
137
+
138
138
  def __init__(self, proc_mesh_id: int) -> None:
139
139
  self._proc_mesh_id = proc_mesh_id
140
140
  self._host_mesh: Optional["HostMesh"] = None
@@ -179,6 +179,17 @@ def _deref_proc_mesh(proc_mesh: ProcMeshRef) -> "ProcMesh":
179
179
 
180
180
 
181
181
  class ProcMesh(MeshTrait, DeprecatedNotAFuture):
182
+ """
183
+ A distributed mesh of processes for actor computation.
184
+
185
+ ProcMesh represents a collection of processes that can spawn and manage actors.
186
+ It provides the foundation for distributed actor systems by managing process
187
+ allocation, lifecycle, and communication across multiple hosts and devices.
188
+
189
+ The ProcMesh supports spawning actors, monitoring process health, logging
190
+ configuration, and code synchronization across distributed processes.
191
+ """
192
+
182
193
  def __init__(
183
194
  self,
184
195
  hy_proc_mesh: "Shared[HyProcMesh]",
@@ -249,6 +260,22 @@ class ProcMesh(MeshTrait, DeprecatedNotAFuture):
249
260
  return pm
250
261
 
251
262
  def spawn(self, name: str, Class: Type[T], *args: Any, **kwargs: Any) -> T:
263
+ """
264
+ Spawn a T-typed actor mesh on the process mesh.
265
+
266
+ Args:
267
+ - `name`: The name of the actor.
268
+ - `Class`: The class of the actor to spawn.
269
+ - `args`: Positional arguments to pass to the actor's constructor.
270
+ - `kwargs`: Keyword arguments to pass to the actor's constructor.
271
+
272
+ Returns:
273
+ - The actor instance.
274
+
275
+ Usage:
276
+ >>> procs: ProcMesh = host_mesh.spawn_procs(per_host={"gpus": 8})
277
+ >>> counters: Counter = procs.spawn("counters", Counter, 0)
278
+ """
252
279
  if self._slice:
253
280
  raise NotImplementedError("NYI: spawn on slice of a proc mesh.")
254
281
  return self._spawn_nonblocking(name, Class, *args, **kwargs)
@@ -294,19 +321,9 @@ class ProcMesh(MeshTrait, DeprecatedNotAFuture):
294
321
  Allocate a process mesh according to the provided alloc.
295
322
  Returns when the mesh is fully allocated.
296
323
 
297
- Arguments:
298
- - `alloc`: The alloc to allocate according to.
324
+ Args:
325
+ - `alloc`: A generator that yields a list of allocations.
299
326
  - `setup`: An optional lambda function to configure environment variables on the allocated mesh.
300
- Use the `current_rank()` method within the lambda to obtain the rank.
301
-
302
- Example of a setup method to initialize torch distributed environment variables:
303
- ```
304
- def setup():
305
- rank = current_rank()
306
- os.environ["RANK"] = str(rank)
307
- os.environ["WORLD_SIZE"] = str(len(rank.shape))
308
- os.environ["LOCAL_RANK"] = str(rank["gpus"])
309
- ```
310
327
  """
311
328
 
312
329
  async def task() -> HyProcMesh:
@@ -432,6 +449,14 @@ class ProcMesh(MeshTrait, DeprecatedNotAFuture):
432
449
  conda: bool = False,
433
450
  auto_reload: bool = False,
434
451
  ) -> None:
452
+ """
453
+ Sync local code changes to the remote processes.
454
+
455
+ Args:
456
+ workspace: The workspace to sync.
457
+ conda: If True, also sync the currently activated conda env.
458
+ auto_reload: If True, automatically reload the workspace on changes.
459
+ """
435
460
  if self._code_sync_client is None:
436
461
  self._code_sync_client = CodeSyncMeshClient.spawn_blocking(
437
462
  proc_mesh=await self._proc_mesh_for_asyncio_fixme,
@@ -525,6 +550,10 @@ class ProcMesh(MeshTrait, DeprecatedNotAFuture):
525
550
  return self
526
551
 
527
552
  def stop(self) -> Future[None]:
553
+ """
554
+ This will stop all processes (and actors) in the mesh and
555
+ release any resources associated with the mesh.
556
+ """
528
557
  self._logging_manager.stop()
529
558
 
530
559
  async def _stop_nonblocking() -> None:
@@ -574,6 +603,23 @@ class ProcMesh(MeshTrait, DeprecatedNotAFuture):
574
603
 
575
604
 
576
605
  def local_proc_mesh(*, gpus: Optional[int] = None, hosts: int = 1) -> ProcMesh:
606
+ """
607
+ Create a local process mesh for testing and development.
608
+
609
+ This function creates a process mesh using local allocation instead of
610
+ distributed process allocation. Primarily used for testing scenarios.
611
+
612
+ Args:
613
+ gpus: Number of GPUs to allocate per host. If None, uses local device count.
614
+ hosts: Number of hosts to allocate. Defaults to 1.
615
+
616
+ Returns:
617
+ ProcMesh: A locally allocated process mesh.
618
+
619
+ Warning:
620
+ This function is deprecated. Use `fake_in_process_host().spawn_procs()`
621
+ for testing or `this_proc().spawn_procs()` for current process actors.
622
+ """
577
623
  warnings.warn(
578
624
  "Use monarch._src.actor.host_mesh.fake_in_process_host().spawn_procs for testing. For launching an actor in the current process use this_proc().spawn_procs()",
579
625
  DeprecationWarning,
@@ -596,6 +642,22 @@ def sim_proc_mesh(
596
642
  dcs: int = 1,
597
643
  regions: int = 1,
598
644
  ) -> ProcMesh:
645
+ """Create a simulated process mesh for testing distributed scenarios.
646
+
647
+ This function creates a process mesh using simulation allocation to test
648
+ distributed behavior without requiring actual remote resources.
649
+
650
+ Args:
651
+ gpus: Number of GPUs per host. Defaults to 1.
652
+ hosts: Number of hosts. Defaults to 1.
653
+ racks: Number of racks. Defaults to 1.
654
+ zones: Number of zones. Defaults to 1.
655
+ dcs: Number of data centers. Defaults to 1.
656
+ regions: Number of regions. Defaults to 1.
657
+
658
+ Returns:
659
+ ProcMesh: A simulated process mesh with the specified topology.
660
+ """
599
661
  spec: AllocSpec = AllocSpec(
600
662
  AllocConstraints(),
601
663
  hosts=hosts,
@@ -658,6 +720,25 @@ def proc_mesh(
658
720
  env: dict[str, str] | None = None,
659
721
  setup: Callable[[], None] | None = None,
660
722
  ) -> ProcMesh:
723
+ """
724
+ Create a distributed process mesh across hosts.
725
+
726
+ This function creates a process mesh using distributed process allocation
727
+ across multiple hosts and GPUs. Used for production distributed computing.
728
+
729
+ Args:
730
+ gpus: Number of GPUs per host. If None, uses local device count.
731
+ hosts: Number of hosts to allocate. Defaults to 1.
732
+ env: Environment variables to set on remote processes.
733
+ setup: Optional setup function to run on each process at startup.
734
+
735
+ Returns:
736
+ ProcMesh: A distributed process mesh with the specified configuration.
737
+
738
+ Warning:
739
+ This function is deprecated. Use `this_host().spawn_procs()` with
740
+ appropriate per_host configuration instead.
741
+ """
661
742
  warnings.warn(
662
743
  "use this_host().spawn_procs(per_host = {'hosts': 2, 'gpus': 3}) instead of monarch.actor.proc_mesh(hosts=2, gpus=3)",
663
744
  DeprecationWarning,
monarch/actor/__init__.py CHANGED
@@ -9,6 +9,7 @@
9
9
  Monarch Actor API - Public interface for actor functionality.
10
10
  """
11
11
 
12
+ from monarch._rust_bindings.monarch_hyperactor.shape import Extent
12
13
  from monarch._src.actor.actor_mesh import (
13
14
  Accumulator,
14
15
  Actor,
@@ -19,6 +20,7 @@ from monarch._src.actor.actor_mesh import (
19
20
  current_actor_name,
20
21
  current_rank,
21
22
  current_size,
23
+ Endpoint,
22
24
  Point,
23
25
  Port,
24
26
  PortReceiver,
@@ -70,4 +72,6 @@ __all__ = [
70
72
  "hosts_from_config",
71
73
  "Port",
72
74
  "PortReceiver",
75
+ "Endpoint",
76
+ "Extent",
73
77
  ]
monarch/common/tensor.py CHANGED
@@ -80,10 +80,6 @@ class Tensor(Referenceable, BaseTensor):
80
80
  in a device mesh. It provides the same interface as PyTorch tensors but
81
81
  enables distributed operations and communication patterns.
82
82
 
83
- Args:
84
- fake (torch.Tensor): A fake tensor representing the shape and type
85
- mesh (DeviceMesh): The device mesh this tensor is distributed across
86
- stream (Stream): The computation stream for this tensor
87
83
  """
88
84
 
89
85
  # pyre-fixme[13]: Attribute `stream` is never initialized.
Binary file
Binary file
@@ -9,7 +9,8 @@ import getpass
9
9
  from typing import Optional
10
10
 
11
11
  from monarch.tools import mesh_spec
12
- from monarch.tools.config import UnnamedAppDef
12
+
13
+ from monarch.tools.config import NOT_SET
13
14
  from monarch.tools.mesh_spec import mesh_spec_from_str
14
15
  from torchx import specs
15
16
 
@@ -19,6 +20,7 @@ _USER: str = getpass.getuser()
19
20
 
20
21
  DEFAULT_NAME: str = f"monarch-{_USER}"
21
22
 
23
+
22
24
  __version__ = "latest" # TODO get version from monarch.__version_
23
25
 
24
26
 
@@ -28,7 +30,7 @@ def host_mesh(
28
30
  env: Optional[dict[str, str]] = None,
29
31
  port: int = mesh_spec.DEFAULT_REMOTE_ALLOCATOR_PORT,
30
32
  program: str = "monarch_bootstrap", # installed with monarch wheel (as console script)
31
- ) -> UnnamedAppDef:
33
+ ) -> specs.AppDef:
32
34
  """
33
35
  Args:
34
36
  name: the name of the monarch server job
@@ -39,7 +41,7 @@ def host_mesh(
39
41
  program: path to the binary that the remote process allocator spawns on an allocation request
40
42
  """
41
43
 
42
- appdef = UnnamedAppDef()
44
+ appdef = specs.AppDef(name=NOT_SET)
43
45
 
44
46
  for mesh in [mesh_spec_from_str(mesh) for mesh in meshes]:
45
47
  mesh_role = specs.Role(
@@ -7,26 +7,22 @@
7
7
  # pyre-strict
8
8
  import warnings
9
9
  from dataclasses import dataclass, field
10
- from typing import Any, Dict, List, TYPE_CHECKING
10
+ from typing import Any
11
11
 
12
12
  from monarch.tools.config.workspace import Workspace
13
13
 
14
- # Defer the import of Role to avoid requiring torchx at import time
15
- if TYPE_CHECKING:
16
- from torchx.specs import Role
17
-
14
+ # Gracefully handle cases where torchx might not be installed
15
+ # NOTE: this can be removed once torchx.specs moves to monarch.session
16
+ try:
17
+ from torchx import specs
18
+ except ImportError:
19
+ pass
18
20
 
19
21
  NOT_SET: str = "__NOT_SET__"
20
22
 
21
23
 
22
- @dataclass
23
- class UnnamedAppDef:
24
- """
25
- A TorchX AppDef without a name.
26
- """
27
-
28
- roles: List["Role"] = field(default_factory=list)
29
- metadata: Dict[str, str] = field(default_factory=dict)
24
+ def _empty_appdef() -> "specs.AppDef":
25
+ return specs.AppDef(name=NOT_SET)
30
26
 
31
27
 
32
28
  @dataclass
@@ -39,7 +35,7 @@ class Config:
39
35
  scheduler_args: dict[str, Any] = field(default_factory=dict)
40
36
  workspace: Workspace = field(default_factory=Workspace.null)
41
37
  dryrun: bool = False
42
- appdef: UnnamedAppDef = field(default_factory=UnnamedAppDef)
38
+ appdef: "specs.AppDef" = field(default_factory=_empty_appdef)
43
39
 
44
40
  def __post_init__(self) -> None:
45
41
  # workspace used to be Optional[str]
@@ -12,7 +12,7 @@ import warnings
12
12
  from typing import Callable
13
13
 
14
14
  from monarch.tools.components import hyperactor
15
- from monarch.tools.config import Config, UnnamedAppDef
15
+ from monarch.tools.config import Config
16
16
  from monarch.tools.config.workspace import Workspace
17
17
 
18
18
  from torchx import specs
@@ -25,7 +25,7 @@ from torchx.schedulers import (
25
25
  )
26
26
 
27
27
 
28
- def component_fn(scheduler: str) -> Callable[..., UnnamedAppDef]:
28
+ def component_fn(scheduler: str) -> Callable[..., specs.AppDef]:
29
29
  """The default TorchX component function for the scheduler"""
30
30
  return hyperactor.host_mesh
31
31
 
@@ -9,8 +9,6 @@ import string
9
9
  from dataclasses import dataclass, field
10
10
  from typing import Any, Optional
11
11
 
12
- from monarch.tools.config import UnnamedAppDef
13
-
14
12
  from monarch.tools.network import get_sockaddr
15
13
  from torchx import specs
16
14
  from torchx.specs.api import is_terminal
@@ -72,7 +70,7 @@ def _tag(mesh_name: str, tag_template: str) -> str:
72
70
  return string.Template(tag_template).substitute(mesh_name=mesh_name)
73
71
 
74
72
 
75
- def tag_as_metadata(mesh_spec: MeshSpec, appdef: UnnamedAppDef) -> None:
73
+ def tag_as_metadata(mesh_spec: MeshSpec, appdef: specs.AppDef) -> None:
76
74
  appdef.metadata[_tag(mesh_spec.name, _TAG_HOST_TYPE)] = mesh_spec.host_type
77
75
  appdef.metadata[_tag(mesh_spec.name, _TAG_GPUS)] = str(mesh_spec.gpus)
78
76
  appdef.metadata[_tag(mesh_spec.name, _TAG_TRANSPORT)] = mesh_spec.transport
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: torchmonarch-nightly
3
- Version: 2025.9.3
3
+ Version: 2025.9.5
4
4
  Summary: Monarch: Single controller library
5
5
  Author: Meta
6
6
  Author-email: oncall+monarch@xmail.facebook.com
@@ -9,6 +9,7 @@ Requires-Python: >= 3.10
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
11
  Requires-Dist: torch
12
+ Requires-Dist: torchshow
12
13
  Requires-Dist: pyzmq
13
14
  Requires-Dist: requests
14
15
  Requires-Dist: numpy
@@ -64,7 +65,7 @@ fut.get()
64
65
  ```
65
66
 
66
67
 
67
- The [introduction to monarch concepts](getting_started.html) provides an introduction to using these features.
68
+ The [introduction to monarch concepts](https://meta-pytorch.org/monarch/generated/examples/getting_started.html) provides an introduction to using these features.
68
69
 
69
70
  > ⚠️ **Early Development Warning** Monarch is currently in an experimental
70
71
  > stage. You should expect bugs, incomplete features, and APIs that may change
@@ -73,18 +74,24 @@ The [introduction to monarch concepts](getting_started.html) provides an introdu
73
74
  > work. It's recommended that you signal your intention to contribute in the
74
75
  > issue tracker, either by filing a new issue or by claiming an existing one.
75
76
 
76
- Note: Monarch is currently only supported on Linux systems
77
-
78
77
  ## 📖 Documentation
79
78
 
80
79
  View Monarch's hosted documentation [at this link](https://meta-pytorch.org/monarch/).
81
80
 
82
81
  ## Installation
82
+ Note for running distributed tensors, the local torch version must match the version that monarch was built with.
83
83
 
84
84
  ### On Fedora distributions
85
+ ## Stable
86
+ `pip install torchmonarch`
87
+
88
+ torchmonarch stable is built with the latest stable torch.
85
89
 
90
+ ## Nightly
86
91
  `pip install torchmonarch-nightly`
87
92
 
93
+ torchmonarch-nightly is built with torch nightly.
94
+
88
95
  or manually
89
96
 
90
97
  ```sh
@@ -1,5 +1,5 @@
1
1
  monarch/__init__.py,sha256=mgKiyD1kxky-1pvhMlNfF4VmxWnhi-FSYZNFzkW1BEM,7052
2
- monarch/_rust_bindings.so,sha256=V26PTUdu3YGZbYGLq-vcGxhxMhNpG2A4waq8dUhiKRs,61340152
2
+ monarch/_rust_bindings.so,sha256=4PSOldY67JPiHo6-GBN1Cd5zhbVOxcCU5sTSrMACKAc,61372560
3
3
  monarch/_testing.py,sha256=5BDMVA4hBMo780rsJ39vRmUZi6mTN8aYY7I9grJRjJ8,7841
4
4
  monarch/actor_mesh.py,sha256=VtPU9syi_vUdwDSJJ639Z4Y_EcWZUScyoj0lQ88RQPs,421
5
5
  monarch/bootstrap_main.py,sha256=39OZpNMrfvvNJf-iwuNzgslzYA_ItaRPHfXGn_V74N0,524
@@ -8,7 +8,7 @@ monarch/fetch.py,sha256=CssP25dMqyJnJAWoC41lwkMnSbvS-f2DL9PRbudJXfc,1704
8
8
  monarch/gradient_generator.py,sha256=b7PmoN_F3c5hQglfHeW_v5htYnePKvJGkzZN-tpHR4A,6396
9
9
  monarch/memory.py,sha256=ol86dBhFAJqg78iF25-BuK0wuwj1onR8FIioZ_B0gjw,1377
10
10
  monarch/mesh_controller.py,sha256=Y_26Cnmp72TccNbWdDQhq18j7de7pSw83E_fREJX9Yo,15372
11
- monarch/monarch_controller,sha256=LVqFQjApcnCW9AWWvgSrIdggGFtJJ9hh0Cf2yU1Qxhs,32390232
11
+ monarch/monarch_controller,sha256=8pl9HaAiJvsa1X8MsUQnbVKiD6oClL730oHsUUdkzwo,32446984
12
12
  monarch/notebook.py,sha256=zu9MKDFKf1-rCM2TqFSRJjMBeiWuKcJSyUFLvoZRQzs,25949
13
13
  monarch/opaque_module.py,sha256=jCcg0DjbcEVXA9WNG0NhUzGteLHOJLTZEBvrIYJIAns,10436
14
14
  monarch/opaque_object.py,sha256=x1LoX6RIMGh4ux52xIfhPgoh6PhZHdkf9bMccHW3DW0,2808
@@ -25,17 +25,17 @@ monarch/tensorboard.py,sha256=MnLgH5lbqeUJauEuirEgR6L_qYl2NGdtwZOWIAuOZao,2587
25
25
  monarch/world_mesh.py,sha256=ob5dJWaC49Uw0xqClHBm8CQLvL4xKnjd4TGzk7k8NxI,980
26
26
  monarch/_src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  monarch/_src/actor/__init__.py,sha256=4iK3nzQZmEPe0HGNY70fABBenw3lCVVaaF0xddF5Fa0,235
28
- monarch/_src/actor/actor_mesh.py,sha256=Y4IaWtVwL4h8nZ2qpOEIs3sqI2uCpVwYQZUCCHUSO80,37707
28
+ monarch/_src/actor/actor_mesh.py,sha256=P9b4CvxYeYfJecPxeEtohAasvGgzzElcvxh9chALeAA,40526
29
29
  monarch/_src/actor/allocator.py,sha256=UVGhrkPQMqPQp6vUngPI361s6yCEfZ0gfz8WTtG2om4,9392
30
30
  monarch/_src/actor/bootstrap_main.py,sha256=7T7ARumcHLZ5RI-k5jej9tBil0J7-BUSVFKwAZO2tJU,2413
31
31
  monarch/_src/actor/device_utils.py,sha256=gBpl23wMjppVAEzzj8U9HyX-B7Bs2_3ftiMAkzUS4j4,577
32
32
  monarch/_src/actor/endpoint.py,sha256=_VaPHc0Fcj1P5nDzUXt8xnS6iw7-HO6hGx7W5RtU3eU,10916
33
33
  monarch/_src/actor/event_loop.py,sha256=2i4fKIkemBzua_t47BqVa2roZ6fWB6sbmMFPNx2zKN0,2832
34
34
  monarch/_src/actor/future.py,sha256=idgqzU_B5qWfClIP5dTLapmBflWq5va-ujAzUbT1Asc,7490
35
- monarch/_src/actor/host_mesh.py,sha256=xSto21T4ESnF55UfcQOkWZxsIiiv3VnOzyrcrZ_or7Y,4405
35
+ monarch/_src/actor/host_mesh.py,sha256=8SOkg_LhHuzLyhpwxT7Yw1_h8QrIlwfWhrSwHyAvfnk,5083
36
36
  monarch/_src/actor/logging.py,sha256=9aguohqCtvLVwWGTFM7o-rBptT26BjI2s6E5Of2imM4,3311
37
37
  monarch/_src/actor/pickle.py,sha256=FhdbAEsGrsD7f25bxF7HlROLm6j2TTvmToq8P1kyhB8,2913
38
- monarch/_src/actor/proc_mesh.py,sha256=fijaxhvVn1adtPFPaZ_-16pvWo1-rhjbB6qHkW2dAkk,24779
38
+ monarch/_src/actor/proc_mesh.py,sha256=lYrRMQNOGAdFXuFvc3lQ68xIS01YJWMkpi8qH5HHAHE,27791
39
39
  monarch/_src/actor/python_extension_methods.py,sha256=QujLWOQQbDdGCin8tZfDxyIwkM-Md4t9QtcTGTHOE_s,3493
40
40
  monarch/_src/actor/shape.py,sha256=PJqxpQEISHlxK8rrlKWpcNMEHiGxBbc6TsHcGZCOsyE,8472
41
41
  monarch/_src/actor/source_loader.py,sha256=TGHmExLyxPDcCyuG254zo6aUqHMpl-j0VWzxa9rkJYQ,1405
@@ -52,7 +52,7 @@ monarch/_src/debug_cli/__init__.py,sha256=NNrKh5KdiYdbxOhin8x-gw_-tvcuGex2UbS_z7
52
52
  monarch/_src/debug_cli/debug_cli.py,sha256=OJqqVFXcMkj-bnrxcE2VnjIgA5xrlKjEtCstrsdPcm0,1146
53
53
  monarch/_src/tensor_engine/__init__.py,sha256=Md3cCHD7Ano9kV15PqGbicgUO-RMdh4aVy1yKiDt_xE,208
54
54
  monarch/_src/tensor_engine/rdma.py,sha256=62saqcXo6oUxH9rEZShNGLYdRZ_zizLQKhub7LDSaBg,8418
55
- monarch/actor/__init__.py,sha256=9CXerFsBVR-lcpSEw_lIsNNP0qqCWoSA9exyDhhGvHI,1458
55
+ monarch/actor/__init__.py,sha256=hHf8ri2czQwi-Z23Z9cYZ2FvkVbYOcDA_GTLW_rju7k,1569
56
56
  monarch/builtins/__init__.py,sha256=QcfnHZGbc2qktBg7DyZt2ruE6VahnIt4S8lEZLHdJqU,443
57
57
  monarch/builtins/log.py,sha256=H1QkuVzwxyi36Zyv-XR0VN0QsNimBWwxE1__fjs0_2o,554
58
58
  monarch/builtins/random.py,sha256=wPbvscg7u53EXpMFo885fO2XOlsyjrNAJ4rBxLzfxdg,1839
@@ -84,7 +84,7 @@ monarch/common/reference.py,sha256=O26lkzEeVwj0S1xEy-OLqdHVnACmmlbQCUmXRrW4n1Q,9
84
84
  monarch/common/remote.py,sha256=uc2JUbYHMnEZRnw9ZuS4mgvK_UHCuRaIIFbzZpx89hQ,11485
85
85
  monarch/common/selection.py,sha256=lpWFbZs3ArYy29e-53eoAVAjQFksf1RvZz9NvM0CUW4,308
86
86
  monarch/common/stream.py,sha256=_ejoxafHtdD10lLzznRCXKwrkZ_ZH9k_VTgiA5yfBrI,3583
87
- monarch/common/tensor.py,sha256=tqoXyvxrmfTtOm6ToWoy9NVg1NCjlt1KPjrNtYOMCco,29824
87
+ monarch/common/tensor.py,sha256=9FIUn5--VeacYWhEhMvysdG0yc_zq4eW3X3526RCw3w,29598
88
88
  monarch/common/tensor_factory.py,sha256=qm8NZx-5ezMAFjNLiXQvb66okm5XgdboB_GRarGOdN0,801
89
89
  monarch/common/tree.py,sha256=1DG3siiE7ixBV6v5cwN8RT_17aJhYZTE-L3i7wZe2_c,2282
90
90
  monarch/controller/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
@@ -98,7 +98,7 @@ monarch/debug_cli/__init__.py,sha256=NNrKh5KdiYdbxOhin8x-gw_-tvcuGex2UbS_z7MV9g0
98
98
  monarch/debug_cli/__main__.py,sha256=FGsQn54RkC_3gpRrm_UFrGiDDHRbMeGzXXsGANr5UHU,317
99
99
  monarch/gradient/__init__.py,sha256=kqmzwt16mMpk0M3GhpgP_f7da4DGnaV9chDzbt66k4Q,308
100
100
  monarch/gradient/_gradient_generator.pyi,sha256=6cX0UxaDt9NAlwgIhTgnweqGOf6qRhHiGnUzSWNCxdU,630
101
- monarch/gradient/_gradient_generator.so,sha256=XfxpsBGsvv3LclZWw5tYpV_y9ZUL2erSKOSVD8pzK9M,12172008
101
+ monarch/gradient/_gradient_generator.so,sha256=kGzXSoii5ODi8ZA7gz9D38Lt5sxGi5POXcGYmidS0-Q,12174336
102
102
  monarch/parallel/__init__.py,sha256=6920kIkhiX7AiyjYvyc1ad8ccP-bStJJ1sS5KkeN2P0,352
103
103
  monarch/parallel/pipelining/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
104
104
  monarch/parallel/pipelining/runtime.py,sha256=KK8TG1gUYEzSsquiZoPTWGSIC74mlncD7cYknKxfb3c,32470
@@ -127,13 +127,13 @@ monarch/tools/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
127
127
  monarch/tools/cli.py,sha256=b3mKZnK-MwP7JwskTxHI0KcJXxSU6498jEb2ntVr_VM,5001
128
128
  monarch/tools/colors.py,sha256=XrBkslKoaoDeXqiTluiiuvFLYd-weKp1sjw7DYWz2RY,581
129
129
  monarch/tools/commands.py,sha256=z4vCPtn_Ypic7L4_Jd3nMJWyyE4olUPqDe4cpJsDKZ4,13873
130
- monarch/tools/mesh_spec.py,sha256=9CMiLWNrgSUF0oWoAtmUT2CweMgqi4pLI18w7-EZWxs,8032
130
+ monarch/tools/mesh_spec.py,sha256=lkKZ7RxuJKY19X6kdiU_V6IWlH1GHidynOaTbuCOsAY,7983
131
131
  monarch/tools/network.py,sha256=mN8Fx9mervxM3VdFHRn4ZXt4z7yWxZp52BTxx2tfpus,2455
132
132
  monarch/tools/utils.py,sha256=gcZyalfoBC6Y3v65h-QMngwXsn24ejXh2TH8RxlgXkA,1888
133
133
  monarch/tools/components/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
134
- monarch/tools/components/hyperactor.py,sha256=P_5XTbCl-niCj7wua-9iDaEuLVhYohetT3nScWgA7Uc,2169
135
- monarch/tools/config/__init__.py,sha256=3VV8zWKUc-yKA-wybCIbwSvDjtIzCS4dHX9KChvxn9A,2296
136
- monarch/tools/config/defaults.py,sha256=BRWDbfUkPX1Mz982EzRsYn-DlQpQw2kLIwVV2lbhpnE,2203
134
+ monarch/tools/components/hyperactor.py,sha256=OR5JtH2UCao3ke3vMohzVbuo_L0gZ_jTw8ud82qLj3M,2175
135
+ monarch/tools/config/__init__.py,sha256=wCw2qwGJL1gFuo9Wpvnrva6NKDLyjf2Yglm6Q9UJYkI,2224
136
+ monarch/tools/config/defaults.py,sha256=twUF6eT9HjJyxEZYrz2SoROHHXi3YPUDSeAelJRLBSU,2187
137
137
  monarch/tools/config/environment.py,sha256=ikEZKATa2e_8h9pN4_3TzhIHWb4ZZfRT5XtOVoOmHjI,1628
138
138
  monarch/tools/config/workspace.py,sha256=a2YzFBTLUB_VrO3kt6dCV5TlmhCH4LyRX3JCMzu7Iv0,6049
139
139
  monarch/utils/__init__.py,sha256=9ofjBGAMZo1VGsn7ufiDlrVheMw4Ye34p-isDfveUxc,295
@@ -186,9 +186,9 @@ tests/simulator/test_profiling.py,sha256=TGYCfzTLdkpIwnOuO6KApprmrgPIRQe60KRX3wk
186
186
  tests/simulator/test_simulator.py,sha256=LO8lA0ssY-OGEBL5ipEu74f97Y765TEwfUOv-DtIptM,14568
187
187
  tests/simulator/test_task.py,sha256=ipqBDuDAysuo1xOB9S5psaFvwe6VATD43IovCTSs0t4,2327
188
188
  tests/simulator/test_worker.py,sha256=QrWWIJ3HDgDLkBPRc2mwYPlOQoXQcj1qRfc0WUfKkFY,3507
189
- torchmonarch_nightly-2025.9.3.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
190
- torchmonarch_nightly-2025.9.3.dist-info/METADATA,sha256=L5a1VhSSe2RevR33LVzvECh6LjPdlCV_bWkaT1nD7Zk,6184
191
- torchmonarch_nightly-2025.9.3.dist-info/WHEEL,sha256=JC9FVdjbTDi9l3EyrqUd11CgmN9LkBi1g5dFHayafwA,104
192
- torchmonarch_nightly-2025.9.3.dist-info/entry_points.txt,sha256=60QVSpYVzkzS4iDOiLp0fsLxVp47X3J2l3v7W-59LMo,117
193
- torchmonarch_nightly-2025.9.3.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
194
- torchmonarch_nightly-2025.9.3.dist-info/RECORD,,
189
+ torchmonarch_nightly-2025.9.5.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
190
+ torchmonarch_nightly-2025.9.5.dist-info/METADATA,sha256=6WKzpl0pCJnxLhSxTckSsA7C5ncuUZJ2_NzujIuxhiQ,6474
191
+ torchmonarch_nightly-2025.9.5.dist-info/WHEEL,sha256=JC9FVdjbTDi9l3EyrqUd11CgmN9LkBi1g5dFHayafwA,104
192
+ torchmonarch_nightly-2025.9.5.dist-info/entry_points.txt,sha256=60QVSpYVzkzS4iDOiLp0fsLxVp47X3J2l3v7W-59LMo,117
193
+ torchmonarch_nightly-2025.9.5.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
194
+ torchmonarch_nightly-2025.9.5.dist-info/RECORD,,