torchmonarch-nightly 2025.7.29__cp311-cp311-manylinux2014_x86_64.whl → 2025.7.31__cp311-cp311-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,114 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # pyre-strict
8
+
9
+ from unittest import TestCase
10
+
11
+ from monarch._rust_bindings.monarch_hyperactor.shape import Shape, Slice
12
+ from monarch._src.actor.shape import ShapeExt
13
+
14
+
15
+ class TestShapeSlicing(TestCase):
16
+ def test_shape_at_removes_dimension(self) -> None:
17
+ """Test that at() removes dimensions and updates offset
18
+ correctly."""
19
+
20
+ slice_obj = Slice(offset=0, sizes=[2, 3, 4], strides=[12, 4, 1])
21
+ shape = Shape(["batch", "height", "width"], slice_obj)
22
+
23
+ # Test removing first dimension
24
+ result = shape.at("batch", 1)
25
+ self.assertEqual(result.labels, ["height", "width"])
26
+ self.assertEqual(result.ndslice.sizes, [3, 4])
27
+ self.assertEqual(result.ndslice.strides, [4, 1])
28
+ self.assertEqual(result.ndslice.offset, 12) # 1 * 12
29
+
30
+ # Test removing middle dimension
31
+ result = shape.at("height", 2)
32
+ self.assertEqual(result.labels, ["batch", "width"])
33
+ self.assertEqual(result.ndslice.sizes, [2, 4])
34
+ self.assertEqual(result.ndslice.strides, [12, 1])
35
+ self.assertEqual(result.ndslice.offset, 8) # 2 * 4
36
+
37
+ # Test removing last dimension
38
+ result = shape.at("width", 3)
39
+ self.assertEqual(result.labels, ["batch", "height"])
40
+ self.assertEqual(result.ndslice.sizes, [2, 3])
41
+ self.assertEqual(result.ndslice.strides, [12, 4])
42
+ self.assertEqual(result.ndslice.offset, 3) # 3 * 1
43
+
44
+ def test_shape_select_keeps_dimension(self) -> None:
45
+ """Test that select() keeps dimensions but changes sizes."""
46
+
47
+ slice_obj = Slice.new_row_major([4, 6])
48
+ shape = Shape(["rows", "cols"], slice_obj)
49
+
50
+ # Test range selection
51
+ result = shape.select("rows", slice(1, 3))
52
+ self.assertEqual(result.labels, ["rows", "cols"])
53
+ self.assertEqual(result.ndslice.sizes, [2, 6]) # 3-1=2 rows
54
+ self.assertEqual(result.ndslice.offset, 6) # 1 * 6
55
+
56
+ # Test step selection
57
+ result = shape.select("cols", slice(0, 6, 2))
58
+ self.assertEqual(result.labels, ["rows", "cols"])
59
+ self.assertEqual(result.ndslice.sizes, [4, 3]) # every 2nd col = 3 cols
60
+ self.assertEqual(result.ndslice.strides, [6, 2]) # stride becomes 2
61
+
62
+ def test_shape_slice_mixed_operations(self) -> None:
63
+ """Test mixing at() and select() operations."""
64
+
65
+ slice_obj = Slice.new_row_major([2, 3, 4])
66
+ shape = Shape(["batch", "height", "width"], slice_obj)
67
+
68
+ # Chain operations: select then at
69
+ result = shape.select("width", slice(1, 4)).at("batch", 0)
70
+ self.assertEqual(result.labels, ["height", "width"])
71
+ self.assertEqual(result.ndslice.sizes, [3, 3])
72
+
73
+ # Chain operations: at then select
74
+ result = shape.at("height", 1).select("width", slice(2, 4))
75
+ self.assertEqual(result.labels, ["batch", "width"])
76
+ self.assertEqual(result.ndslice.sizes, [2, 2])
77
+
78
+ def test_shape_slice_errors(self) -> None:
79
+ """Test error conditions."""
80
+ slice_obj = Slice.new_row_major([2, 3])
81
+ shape = Shape(["rows", "cols"], slice_obj)
82
+
83
+ # Test invalid label
84
+ with self.assertRaises(ValueError):
85
+ shape.at("nonexistent", 0)
86
+
87
+ # Test index out of range
88
+ with self.assertRaises(ValueError):
89
+ shape.at("rows", 5)
90
+
91
+ # Test negative index (Python-Rust boundary issue)
92
+ with self.assertRaises(OverflowError): # Changed from ValueError
93
+ shape.at("rows", -1)
94
+
95
+ def test_shape_slice_comprehensive(self) -> None:
96
+ """Comprehensive test of slice() method."""
97
+
98
+ slice_obj = Slice.new_row_major([4, 5, 6])
99
+ shape = Shape(["a", "b", "c"], slice_obj)
100
+
101
+ # Test integer selection (removes dimensions)
102
+ result = ShapeExt.slice(shape, a=1, c=2)
103
+ self.assertEqual(result.labels, ["b"])
104
+ self.assertEqual(result.ndslice.sizes, [5])
105
+
106
+ # Test slice selection (keeps dimensions)
107
+ result = ShapeExt.slice(shape, b=slice(1, 4), c=slice(0, 6, 2))
108
+ self.assertEqual(result.labels, ["a", "b", "c"])
109
+ self.assertEqual(result.ndslice.sizes, [4, 3, 3])
110
+
111
+ # Test mixed selection
112
+ result = ShapeExt.slice(shape, a=2, b=slice(1, 3))
113
+ self.assertEqual(result.labels, ["b", "c"])
114
+ self.assertEqual(result.ndslice.sizes, [2, 6])
tests/test_allocator.py CHANGED
@@ -104,7 +104,9 @@ class TestActor(Actor):
104
104
 
105
105
  @contextlib.contextmanager
106
106
  def remote_process_allocator(
107
- addr: Optional[str] = None, timeout: Optional[int] = None
107
+ addr: Optional[str] = None,
108
+ timeout: Optional[int] = None,
109
+ envs: Optional[dict[str, str]] = None,
108
110
  ) -> Generator[str, None, None]:
109
111
  """Start a remote process allocator on addr. If timeout is not None, have it
110
112
  timeout after that many seconds if no messages come in"""
@@ -120,16 +122,19 @@ def remote_process_allocator(
120
122
  if timeout is not None:
121
123
  args.append(f"--timeout-sec={timeout}")
122
124
 
125
+ env = {
126
+ # prefix PATH with this test module's directory to
127
+ # give 'process_allocator' and 'monarch_bootstrap' binary resources
128
+ # in this test module's directory precedence over the installed ones
129
+ # useful in BUCK where these binaries are added as 'resources' of this test target
130
+ "PATH": f"{package_path}:{os.getenv('PATH', '')}",
131
+ "RUST_LOG": "debug",
132
+ }
133
+ if envs:
134
+ env.update(envs)
123
135
  process_allocator = subprocess.Popen(
124
136
  args=args,
125
- env={
126
- # prefix PATH with this test module's directory to
127
- # give 'process_allocator' and 'monarch_bootstrap' binary resources
128
- # in this test module's directory precedence over the installed ones
129
- # useful in BUCK where these binaries are added as 'resources' of this test target
130
- "PATH": f"{package_path}:{os.getenv('PATH', '')}",
131
- "RUST_LOG": "debug",
132
- },
137
+ env=env,
133
138
  )
134
139
  try:
135
140
  yield addr
@@ -233,6 +238,26 @@ class TestRemoteAllocator(unittest.IsolatedAsyncioTestCase):
233
238
  computed_world_sizes = {p.rank: v for p, v in list(computed.flatten("rank"))}
234
239
  self.assertDictEqual(expected_world_sizes, computed_world_sizes)
235
240
 
241
+ async def test_allocate_failure_message(self) -> None:
242
+ spec = AllocSpec(AllocConstraints(), host=2, gpu=4)
243
+
244
+ with self.assertRaisesRegex(
245
+ Exception,
246
+ r"exited with code 1: Traceback \(most recent call last\).*",
247
+ ):
248
+ with remote_process_allocator(
249
+ envs={"MONARCH_ERROR_DURING_BOOTSTRAP_FOR_TESTING": "1"}
250
+ ) as host1, remote_process_allocator(
251
+ envs={"MONARCH_ERROR_DURING_BOOTSTRAP_FOR_TESTING": "1"}
252
+ ) as host2:
253
+ allocator = RemoteAllocator(
254
+ world_id="test_remote_allocator",
255
+ initializer=StaticRemoteAllocInitializer(host1, host2),
256
+ heartbeat_interval=_100_MILLISECONDS,
257
+ )
258
+ alloc = await allocator.allocate(spec)
259
+ await ProcMesh.from_alloc(alloc)
260
+
236
261
  async def test_call_allocate_twice(self) -> None:
237
262
  class DeletingAllocInitializer(StaticRemoteAllocInitializer):
238
263
  """test initializer that removes the last address from the list each time initialize_alloc() is called