xmanager-slurm 0.4.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xmanager-slurm might be problematic. Click here for more details.

Files changed (112) hide show
  1. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/PKG-INFO +1 -1
  2. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/job-array-sweep/launch.py +1 -1
  3. xmanager_slurm-0.4.1/examples/job-dependencies/eval.py +13 -0
  4. xmanager_slurm-0.4.1/examples/job-dependencies/launch.py +104 -0
  5. xmanager_slurm-0.4.1/examples/job-dependencies/pyproject.toml +6 -0
  6. xmanager_slurm-0.4.1/examples/job-dependencies/train.py +19 -0
  7. xmanager_slurm-0.4.1/examples/job-dependencies/uv.lock +86 -0
  8. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/pyproject.toml +4 -1
  9. xmanager_slurm-0.4.1/tests/test_dependencies.py +149 -0
  10. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/uv.lock +1 -1
  11. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/__init__.py +4 -2
  12. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/api.py +1 -1
  13. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/config.py +7 -2
  14. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/constants.py +4 -0
  15. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/contrib/clusters/__init__.py +25 -0
  16. xmanager_slurm-0.4.1/xm_slurm/dependencies.py +171 -0
  17. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/executables.py +20 -15
  18. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/execution.py +246 -96
  19. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/executors.py +8 -12
  20. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/experiment.py +374 -83
  21. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/experimental/parameter_controller.py +12 -10
  22. xmanager_slurm-0.4.0/xm_slurm/packaging/docker/local.py → xmanager_slurm-0.4.1/xm_slurm/packaging/docker.py +126 -32
  23. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/packaging/router.py +3 -1
  24. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/packaging/utils.py +4 -28
  25. xmanager_slurm-0.4.1/xm_slurm/scripts/cli.py +52 -0
  26. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/templates/docker/mamba.Dockerfile +1 -1
  27. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/templates/slurm/fragments/monitor.bash.j2 +5 -0
  28. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/templates/slurm/job-array.bash.j2 +1 -2
  29. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/templates/slurm/job.bash.j2 +4 -3
  30. xmanager_slurm-0.4.1/xm_slurm/types.py +23 -0
  31. xmanager_slurm-0.4.0/xm_slurm/packaging/docker/__init__.py +0 -69
  32. xmanager_slurm-0.4.0/xm_slurm/packaging/docker/abc.py +0 -112
  33. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/.devcontainer.json +0 -0
  34. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/.github/workflows/ci.yml +0 -0
  35. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/.github/workflows/deploy-docs.yml +0 -0
  36. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/.gitignore +0 -0
  37. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/.pre-commit-config.yaml +0 -0
  38. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/.vscode/settings.json +0 -0
  39. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/LICENSE.md +0 -0
  40. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/README.md +0 -0
  41. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/docs/api/executables.rst +0 -0
  42. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/docs/api/executors.rst +0 -0
  43. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/docs/api/packageables.rst +0 -0
  44. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/docs/assets/workflow-dark.svg +0 -0
  45. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/docs/assets/workflow-light.svg +0 -0
  46. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/docs/conf.py +0 -0
  47. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/docs/getting-started/xmanager.md +0 -0
  48. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/docs/guides/index.md +0 -0
  49. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/docs/guides/remote-dev.md +0 -0
  50. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/docs/index.md +0 -0
  51. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/conda/environment.yml +0 -0
  52. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/conda/launch.py +0 -0
  53. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/conda/main.py +0 -0
  54. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/conda/pyproject.toml +0 -0
  55. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/custom-dockerfile/Dockerfile +0 -0
  56. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/custom-dockerfile/launch.py +0 -0
  57. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/custom-dockerfile/pyproject.toml +0 -0
  58. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/job-array-sweep/main.py +0 -0
  59. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/job-array-sweep/pyproject.toml +0 -0
  60. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/job-array-sweep/uv.lock +0 -0
  61. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/job-group/Dockerfile +0 -0
  62. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/job-group/launch.py +0 -0
  63. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/job-group/pyproject.toml +0 -0
  64. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/job-group/uv.lock +0 -0
  65. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/metadata/launch.py +0 -0
  66. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/metadata/main.py +0 -0
  67. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/metadata/pyproject.toml +0 -0
  68. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/metadata/requirements.txt +0 -0
  69. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/parameter-controller/launch.py +0 -0
  70. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/parameter-controller/main.py +0 -0
  71. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/parameter-controller/pyproject.toml +0 -0
  72. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/parameter-controller/requirements.txt +0 -0
  73. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/pip/launch.py +0 -0
  74. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/pip/main.py +0 -0
  75. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/pip/pyproject.toml +0 -0
  76. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/pip/requirements.txt +0 -0
  77. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/uv/launch.py +0 -0
  78. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/uv/pyproject.toml +0 -0
  79. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/examples/uv/uv.lock +0 -0
  80. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/tests/integration/conftest.py +0 -0
  81. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/tests/integration/fixtures/slurm/Dockerfile +0 -0
  82. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/tests/integration/fixtures/slurm/README.md +0 -0
  83. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/tests/integration/fixtures/slurm/cgroup.conf +0 -0
  84. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/tests/integration/fixtures/slurm/docker-compose.yml +0 -0
  85. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/tests/integration/fixtures/slurm/docker-entrypoint.sh +0 -0
  86. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/tests/integration/fixtures/slurm/host_ed25519 +0 -0
  87. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/tests/integration/fixtures/slurm/host_ed25519.pub +0 -0
  88. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/tests/integration/fixtures/slurm/id_ed25519 +0 -0
  89. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/tests/integration/fixtures/slurm/id_ed25519.pub +0 -0
  90. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/tests/integration/fixtures/slurm/slurm.conf +0 -0
  91. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/tests/integration/fixtures/slurm/slurmdbd.conf +0 -0
  92. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/tests/integration/fixtures/slurm/sshd_config +0 -0
  93. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/tests/integration/test_remote_execution.py +0 -0
  94. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/batching.py +0 -0
  95. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/console.py +0 -0
  96. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/contrib/__init__.py +0 -0
  97. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/contrib/clusters/drac.py +0 -0
  98. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/job_blocks.py +0 -0
  99. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/packageables.py +0 -0
  100. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/packaging/__init__.py +0 -0
  101. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/packaging/registry.py +0 -0
  102. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/resources.py +0 -0
  103. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/scripts/_cloudpickle.py +0 -0
  104. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/status.py +0 -0
  105. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/templates/docker/docker-bake.hcl.j2 +0 -0
  106. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/templates/docker/python.Dockerfile +0 -0
  107. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/templates/docker/uv.Dockerfile +0 -0
  108. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/templates/slurm/fragments/proxy.bash.j2 +0 -0
  109. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/templates/slurm/job-group.bash.j2 +0 -0
  110. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/templates/slurm/runtimes/apptainer.bash.j2 +0 -0
  111. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/templates/slurm/runtimes/podman.bash.j2 +0 -0
  112. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.1}/xm_slurm/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: xmanager-slurm
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Slurm backend for XManager.
5
5
  Project-URL: GitHub, https://github.com/jessefarebro/xm-slurm
6
6
  Author-email: Jesse Farebrother <jfarebro@cs.mcgill.ca>
@@ -46,7 +46,7 @@ async def main(_):
46
46
  )
47
47
  )
48
48
 
49
- args = [{"args": {"scale": scale}} for scale in range(3)]
49
+ args = [xm_slurm.JobArgs(args={"scale": scale}) for scale in range(3)]
50
50
  wus = await experiment.add(make_job, args)
51
51
 
52
52
  for wu, status in zip(wus, await asyncio.gather(*[wu.get_status() for wu in wus])):
@@ -0,0 +1,13 @@
1
+ import numpy as np
2
+ from absl import app, flags
3
+
4
+ INPUT_FILE = flags.DEFINE_string("input_file", "result.npy", "Input file path")
5
+
6
+
7
+ def main(_):
8
+ result = np.load(INPUT_FILE.value)
9
+ print(f"Received result: {result}")
10
+
11
+
12
+ if __name__ == "__main__":
13
+ app.run(main)
@@ -0,0 +1,104 @@
1
+ import asyncio
2
+ import datetime as dt
3
+ import pathlib
4
+
5
+ from absl import app
6
+ from xmanager import xm
7
+
8
+ import xm_slurm
9
+ import xm_slurm.contrib.clusters
10
+
11
+
12
+ @xm.run_in_asyncio_loop
13
+ async def main(_):
14
+ async with xm_slurm.create_experiment("My Experiment") as experiment:
15
+ # Step 1: Specify the executor specification
16
+ executor_spec = xm_slurm.Slurm.Spec(tag="ghcr.io/jessefarebro/xm-slurm/test:latest")
17
+
18
+ # Step 2: Specify the executable and package it
19
+ [train_executable, eval_executable] = experiment.package(
20
+ [
21
+ xm_slurm.uv_container(
22
+ executor_spec=executor_spec,
23
+ entrypoint=xm.CommandList(["train.py"]),
24
+ ),
25
+ xm_slurm.uv_container(
26
+ executor_spec=executor_spec,
27
+ entrypoint=xm.CommandList(["eval.py"]),
28
+ ),
29
+ ],
30
+ )
31
+
32
+ workdir = pathlib.Path(f"/scratch/xm-slurm-examples/{experiment.experiment_id}")
33
+
34
+ # Step 4: Schedule train job
35
+ train_executor = xm_slurm.Slurm(
36
+ requirements=xm_slurm.JobRequirements(
37
+ CPU=1,
38
+ RAM=1.0 * xm.GiB,
39
+ GPU=1,
40
+ replicas=1,
41
+ cluster=xm_slurm.contrib.clusters.mila(),
42
+ ),
43
+ time=dt.timedelta(hours=1),
44
+ )
45
+
46
+ async def make_train_job(wu: xm.WorkUnit, args):
47
+ await wu.add(
48
+ xm.Job(
49
+ executable=train_executable,
50
+ executor=train_executor,
51
+ args=xm.merge_args(
52
+ [
53
+ "--output_file",
54
+ (workdir / f"{wu.work_unit_id}" / "result.npy").as_posix(),
55
+ ],
56
+ args,
57
+ ),
58
+ ),
59
+ )
60
+
61
+ train_wus = await experiment.add(
62
+ make_train_job,
63
+ args=[xm_slurm.JobArgs(args=["--seed", seed]) for seed in range(5)],
64
+ )
65
+
66
+ # Step 5: Schedule eval job
67
+ eval_executor = xm_slurm.Slurm(
68
+ requirements=xm_slurm.JobRequirements(
69
+ CPU=1,
70
+ RAM=1.0 * xm.GiB,
71
+ GPU=1,
72
+ replicas=1,
73
+ cluster=xm_slurm.contrib.clusters.mila(),
74
+ ),
75
+ time=dt.timedelta(hours=1),
76
+ )
77
+
78
+ eval_wus = await experiment.add(
79
+ xm.Job(
80
+ executable=eval_executable,
81
+ executor=eval_executor,
82
+ ),
83
+ args=[
84
+ xm_slurm.JobArgs(
85
+ args=[
86
+ "--input_file",
87
+ (workdir / f"{wu.work_unit_id}" / "result.npy").as_posix(),
88
+ ]
89
+ )
90
+ for wu in train_wus
91
+ ],
92
+ dependency=[train_wu.after_completed() for train_wu in train_wus],
93
+ )
94
+
95
+ for wu in asyncio.as_completed([
96
+ *[train_wu.wait_until_complete() for train_wu in train_wus],
97
+ *[eval_wu.wait_until_complete() for eval_wu in eval_wus],
98
+ ]):
99
+ wu = await wu
100
+ print(f"Work Unit {wu!r} finished executing with status {await wu.get_status()}")
101
+
102
+
103
+ if __name__ == "__main__":
104
+ app.run(main)
@@ -0,0 +1,6 @@
1
+ [project]
2
+ name = "xm-slurm-example"
3
+ description = "XManager Slurm test project"
4
+ version = "0.0.1"
5
+ requires-python = ">=3.10"
6
+ dependencies = ["numpy", "absl-py"]
@@ -0,0 +1,19 @@
1
+ import pathlib
2
+
3
+ import numpy as np
4
+ from absl import app, flags
5
+
6
+ OUTPUT_FILE = flags.DEFINE_string("output_file", "result.npy", "Output file path")
7
+ SEED = flags.DEFINE_integer("seed", 0, "Random seed")
8
+
9
+
10
+ def main(_):
11
+ np.random.seed(SEED.value)
12
+
13
+ pathlib.Path(OUTPUT_FILE.value).parent.mkdir(parents=True, exist_ok=True)
14
+ result = np.random.random((32,))
15
+ np.save(OUTPUT_FILE.value, result)
16
+
17
+
18
+ if __name__ == "__main__":
19
+ app.run(main)
@@ -0,0 +1,86 @@
1
+ version = 1
2
+ requires-python = ">=3.10"
3
+
4
+ [[package]]
5
+ name = "absl-py"
6
+ version = "2.1.0"
7
+ source = { registry = "https://pypi.org/simple" }
8
+ sdist = { url = "https://files.pythonhosted.org/packages/7a/8f/fc001b92ecc467cc32ab38398bd0bfb45df46e7523bf33c2ad22a505f06e/absl-py-2.1.0.tar.gz", hash = "sha256:7820790efbb316739cde8b4e19357243fc3608a152024288513dd968d7d959ff", size = 118055 }
9
+ wheels = [
10
+ { url = "https://files.pythonhosted.org/packages/a2/ad/e0d3c824784ff121c03cc031f944bc7e139a8f1870ffd2845cc2dd76f6c4/absl_py-2.1.0-py3-none-any.whl", hash = "sha256:526a04eadab8b4ee719ce68f204172ead1027549089702d99b9059f129ff1308", size = 133706 },
11
+ ]
12
+
13
+ [[package]]
14
+ name = "numpy"
15
+ version = "2.1.2"
16
+ source = { registry = "https://pypi.org/simple" }
17
+ sdist = { url = "https://files.pythonhosted.org/packages/4b/d1/8a730ea07f4a37d94f9172f4ce1d81064b7a64766b460378be278952de75/numpy-2.1.2.tar.gz", hash = "sha256:13532a088217fa624c99b843eeb54640de23b3414b14aa66d023805eb731066c", size = 18878063 }
18
+ wheels = [
19
+ { url = "https://files.pythonhosted.org/packages/1c/a2/40a76d357f168e9f9f06d6cc2c8e22dd5fb2bfbe63fe2c433057258c145a/numpy-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:30d53720b726ec36a7f88dc873f0eec8447fbc93d93a8f079dfac2629598d6ee", size = 21150947 },
20
+ { url = "https://files.pythonhosted.org/packages/b5/d0/ba271ea9108d7278d3889a7eb38d77370a88713fb94339964e71ac184d4a/numpy-2.1.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e8d3ca0a72dd8846eb6f7dfe8f19088060fcb76931ed592d29128e0219652884", size = 13758184 },
21
+ { url = "https://files.pythonhosted.org/packages/7c/b9/5c6507439cd756201010f7937bf90712c2469052ae094584af14557dd64f/numpy-2.1.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:fc44e3c68ff00fd991b59092a54350e6e4911152682b4782f68070985aa9e648", size = 5354091 },
22
+ { url = "https://files.pythonhosted.org/packages/60/21/7938cf724d9e84e45fb886f3fc794ab431d71facfebc261e3e9f19f3233a/numpy-2.1.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:7c1c60328bd964b53f8b835df69ae8198659e2b9302ff9ebb7de4e5a5994db3d", size = 6887169 },
23
+ { url = "https://files.pythonhosted.org/packages/09/8d/42a124657f5d31902fca73921b25a0d022cead2b32ce7e6975762cd2995a/numpy-2.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6cdb606a7478f9ad91c6283e238544451e3a95f30fb5467fbf715964341a8a86", size = 13888165 },
24
+ { url = "https://files.pythonhosted.org/packages/fb/25/ba023652a39a2c127200e85aed975fc6119b421e2c348e5d0171e2046edb/numpy-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d666cb72687559689e9906197e3bec7b736764df6a2e58ee265e360663e9baf7", size = 16326954 },
25
+ { url = "https://files.pythonhosted.org/packages/34/58/23e6b07fad492b7c47cf09cd8bad6983658f0f925b6c535fd008e3e86274/numpy-2.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c6eef7a2dbd0abfb0d9eaf78b73017dbfd0b54051102ff4e6a7b2980d5ac1a03", size = 16702916 },
26
+ { url = "https://files.pythonhosted.org/packages/91/24/37b5cf2dc7d385ac97f7b7fe50cba312abb70a2a5eac74c23af028811f73/numpy-2.1.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:12edb90831ff481f7ef5f6bc6431a9d74dc0e5ff401559a71e5e4611d4f2d466", size = 14384372 },
27
+ { url = "https://files.pythonhosted.org/packages/ea/ec/0f6d471058a01d1a05a50d2793898de1549280fa715a8537987ee866b5d9/numpy-2.1.2-cp310-cp310-win32.whl", hash = "sha256:a65acfdb9c6ebb8368490dbafe83c03c7e277b37e6857f0caeadbbc56e12f4fb", size = 6535361 },
28
+ { url = "https://files.pythonhosted.org/packages/c2/3d/293cc5927f916a7bc6bf74da8f6defab63d1b13f0959d7e21878ad8a20d8/numpy-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:860ec6e63e2c5c2ee5e9121808145c7bf86c96cca9ad396c0bd3e0f2798ccbe2", size = 12865501 },
29
+ { url = "https://files.pythonhosted.org/packages/aa/9c/9a6ec3ae89cd0648d419781284308f2956d2a61d932b5ac9682c956a171b/numpy-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b42a1a511c81cc78cbc4539675713bbcf9d9c3913386243ceff0e9429ca892fe", size = 21154845 },
30
+ { url = "https://files.pythonhosted.org/packages/02/69/9f05c4ecc75fabf297b17743996371b4c3dfc4d92e15c5c38d8bb3db8d74/numpy-2.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:faa88bc527d0f097abdc2c663cddf37c05a1c2f113716601555249805cf573f1", size = 13789409 },
31
+ { url = "https://files.pythonhosted.org/packages/34/4e/f95c99217bf77bbfaaf660d693c10bd0dc03b6032d19316d316088c9e479/numpy-2.1.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:c82af4b2ddd2ee72d1fc0c6695048d457e00b3582ccde72d8a1c991b808bb20f", size = 5352097 },
32
+ { url = "https://files.pythonhosted.org/packages/06/13/f5d87a497c16658e9af8920449b0b5692b469586b8231340c672962071c5/numpy-2.1.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:13602b3174432a35b16c4cfb5de9a12d229727c3dd47a6ce35111f2ebdf66ff4", size = 6891195 },
33
+ { url = "https://files.pythonhosted.org/packages/6c/89/691ac07429ac061b344d5e37fa8e94be51a6017734aea15f2d9d7c6d119a/numpy-2.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ebec5fd716c5a5b3d8dfcc439be82a8407b7b24b230d0ad28a81b61c2f4659a", size = 13895153 },
34
+ { url = "https://files.pythonhosted.org/packages/23/69/538317f0d925095537745f12aced33be1570bbdc4acde49b33748669af96/numpy-2.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2b49c3c0804e8ecb05d59af8386ec2f74877f7ca8fd9c1e00be2672e4d399b1", size = 16338306 },
35
+ { url = "https://files.pythonhosted.org/packages/af/03/863fe7062c2106d3c151f7df9353f2ae2237c1dd6900f127a3eb1f24cb1b/numpy-2.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2cbba4b30bf31ddbe97f1c7205ef976909a93a66bb1583e983adbd155ba72ac2", size = 16710893 },
36
+ { url = "https://files.pythonhosted.org/packages/70/77/0ad9efe25482009873f9660d29a40a8c41a6f0e8b541195e3c95c70684c5/numpy-2.1.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8e00ea6fc82e8a804433d3e9cedaa1051a1422cb6e443011590c14d2dea59146", size = 14398048 },
37
+ { url = "https://files.pythonhosted.org/packages/3e/0f/e785fe75544db9f2b0bb1c181e13ceff349ce49753d807fd9672916aa06d/numpy-2.1.2-cp311-cp311-win32.whl", hash = "sha256:5006b13a06e0b38d561fab5ccc37581f23c9511879be7693bd33c7cd15ca227c", size = 6533458 },
38
+ { url = "https://files.pythonhosted.org/packages/d4/96/450054662295125af861d48d2c4bc081dadcf1974a879b2104613157aa62/numpy-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:f1eb068ead09f4994dec71c24b2844f1e4e4e013b9629f812f292f04bd1510d9", size = 12870896 },
39
+ { url = "https://files.pythonhosted.org/packages/a0/7d/554a6838f37f3ada5a55f25173c619d556ae98092a6e01afb6e710501d70/numpy-2.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7bf0a4f9f15b32b5ba53147369e94296f5fffb783db5aacc1be15b4bf72f43b", size = 20848077 },
40
+ { url = "https://files.pythonhosted.org/packages/b0/29/cb48a402ea879e645b16218718f3f7d9588a77d674a9dcf22e4c43487636/numpy-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b1d0fcae4f0949f215d4632be684a539859b295e2d0cb14f78ec231915d644db", size = 13493242 },
41
+ { url = "https://files.pythonhosted.org/packages/56/44/f899b0581766c230da42f751b7b8896d096640b19b312164c267e48d36cb/numpy-2.1.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f751ed0a2f250541e19dfca9f1eafa31a392c71c832b6bb9e113b10d050cb0f1", size = 5089219 },
42
+ { url = "https://files.pythonhosted.org/packages/79/8f/b987070d45161a7a4504afc67ed38544ed2c0ed5576263599a0402204a9c/numpy-2.1.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:bd33f82e95ba7ad632bc57837ee99dba3d7e006536200c4e9124089e1bf42426", size = 6620167 },
43
+ { url = "https://files.pythonhosted.org/packages/c4/a7/af3329fda3c3ec31d9b650e42bbcd3422fc62a765cbb1405fde4177a0996/numpy-2.1.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b8cde4f11f0a975d1fd59373b32e2f5a562ade7cde4f85b7137f3de8fbb29a0", size = 13604905 },
44
+ { url = "https://files.pythonhosted.org/packages/9b/b4/e3c7e6fab0f77fff6194afa173d1f2342073d91b1d3b4b30b17c3fb4407a/numpy-2.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d95f286b8244b3649b477ac066c6906fbb2905f8ac19b170e2175d3d799f4df", size = 16041825 },
45
+ { url = "https://files.pythonhosted.org/packages/e9/50/6828e66a78aa03147c111f84d55f33ce2dde547cb578d6744a3b06a0124b/numpy-2.1.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ab4754d432e3ac42d33a269c8567413bdb541689b02d93788af4131018cbf366", size = 16409541 },
46
+ { url = "https://files.pythonhosted.org/packages/bf/72/66af7916d9c3c6dbfbc8acdd4930c65461e1953374a2bc43d00f948f004a/numpy-2.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e585c8ae871fd38ac50598f4763d73ec5497b0de9a0ab4ef5b69f01c6a046142", size = 14081134 },
47
+ { url = "https://files.pythonhosted.org/packages/dc/5a/59a67d84f33fe00ae74f0b5b69dd4f93a586a4aba7f7e19b54b2133db038/numpy-2.1.2-cp312-cp312-win32.whl", hash = "sha256:9c6c754df29ce6a89ed23afb25550d1c2d5fdb9901d9c67a16e0b16eaf7e2550", size = 6237784 },
48
+ { url = "https://files.pythonhosted.org/packages/4c/79/73735a6a5dad6059c085f240a4e74c9270feccd2bc66e4d31b5ca01d329c/numpy-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:456e3b11cb79ac9946c822a56346ec80275eaf2950314b249b512896c0d2505e", size = 12568254 },
49
+ { url = "https://files.pythonhosted.org/packages/16/72/716fa1dbe92395a9a623d5049203ff8ddb0cfce65b9df9117c3696ccc011/numpy-2.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a84498e0d0a1174f2b3ed769b67b656aa5460c92c9554039e11f20a05650f00d", size = 20834690 },
50
+ { url = "https://files.pythonhosted.org/packages/1e/fb/3e85a39511586053b5c6a59a643879e376fae22230ebfef9cfabb0e032e2/numpy-2.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4d6ec0d4222e8ffdab1744da2560f07856421b367928026fb540e1945f2eeeaf", size = 13507474 },
51
+ { url = "https://files.pythonhosted.org/packages/35/eb/5677556d9ba13436dab51e129f98d4829d95cd1b6bd0e199c14485a4bdb9/numpy-2.1.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:259ec80d54999cc34cd1eb8ded513cb053c3bf4829152a2e00de2371bd406f5e", size = 5074742 },
52
+ { url = "https://files.pythonhosted.org/packages/3e/c5/6c5ef5ba41b65a7e51bed50dbf3e1483eb578055633dd013e811a28e96a1/numpy-2.1.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:675c741d4739af2dc20cd6c6a5c4b7355c728167845e3c6b0e824e4e5d36a6c3", size = 6606787 },
53
+ { url = "https://files.pythonhosted.org/packages/08/ac/f2f29dd4fd325b379c7dc932a0ebab22f0e031dbe80b2f6019b291a3a544/numpy-2.1.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b2d4e667895cc55e3ff2b56077e4c8a5604361fc21a042845ea3ad67465aa8", size = 13601333 },
54
+ { url = "https://files.pythonhosted.org/packages/44/26/63f5f4e5089654dfb858f4892215ed968cd1a68e6f4a83f9961f84f855cb/numpy-2.1.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43cca367bf94a14aca50b89e9bc2061683116cfe864e56740e083392f533ce7a", size = 16038090 },
55
+ { url = "https://files.pythonhosted.org/packages/1d/21/015e0594de9c3a8d5edd24943d2bd23f102ec71aec026083f822f86497e2/numpy-2.1.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:76322dcdb16fccf2ac56f99048af32259dcc488d9b7e25b51e5eca5147a3fb98", size = 16410865 },
56
+ { url = "https://files.pythonhosted.org/packages/df/01/c1bcf9e6025d79077fbf3f3ee503b50aa7bfabfcd8f4b54f5829f4c00f3f/numpy-2.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:32e16a03138cabe0cb28e1007ee82264296ac0983714094380b408097a418cfe", size = 14078077 },
57
+ { url = "https://files.pythonhosted.org/packages/ba/06/db9d127d63bd11591770ba9f3d960f8041e0f895184b9351d4b1b5b56983/numpy-2.1.2-cp313-cp313-win32.whl", hash = "sha256:242b39d00e4944431a3cd2db2f5377e15b5785920421993770cddb89992c3f3a", size = 6234904 },
58
+ { url = "https://files.pythonhosted.org/packages/a9/96/9f61f8f95b6e0ea0aa08633b704c75d1882bdcb331bdf8bfd63263b25b00/numpy-2.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:f2ded8d9b6f68cc26f8425eda5d3877b47343e68ca23d0d0846f4d312ecaa445", size = 12561910 },
59
+ { url = "https://files.pythonhosted.org/packages/36/b8/033f627821784a48e8f75c218033471eebbaacdd933f8979c79637a1b44b/numpy-2.1.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2ffef621c14ebb0188a8633348504a35c13680d6da93ab5cb86f4e54b7e922b5", size = 20857719 },
60
+ { url = "https://files.pythonhosted.org/packages/96/46/af5726fde5b74ed83f2f17a73386d399319b7ed4d51279fb23b721d0816d/numpy-2.1.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ad369ed238b1959dfbade9018a740fb9392c5ac4f9b5173f420bd4f37ba1f7a0", size = 13518826 },
61
+ { url = "https://files.pythonhosted.org/packages/db/6e/8ce677edf36da1c4dae80afe5529f47690697eb55b4864673af260ccea7b/numpy-2.1.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d82075752f40c0ddf57e6e02673a17f6cb0f8eb3f587f63ca1eaab5594da5b17", size = 5115036 },
62
+ { url = "https://files.pythonhosted.org/packages/6a/ba/3cce44fb1b8438042c11847048812a776f75ee0e7070179c22e4cfbf420c/numpy-2.1.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:1600068c262af1ca9580a527d43dc9d959b0b1d8e56f8a05d830eea39b7c8af6", size = 6628641 },
63
+ { url = "https://files.pythonhosted.org/packages/59/c8/e722998720ccbd35ffbcf1d1b8ed0aa2304af88d3f1c38e06ebf983599b3/numpy-2.1.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a26ae94658d3ba3781d5e103ac07a876b3e9b29db53f68ed7df432fd033358a8", size = 13574803 },
64
+ { url = "https://files.pythonhosted.org/packages/7c/8e/fc1fdd83a55476765329ac2913321c4aed5b082a7915095628c4ca30ea72/numpy-2.1.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13311c2db4c5f7609b462bc0f43d3c465424d25c626d95040f073e30f7570e35", size = 16021174 },
65
+ { url = "https://files.pythonhosted.org/packages/2a/b6/a790742aa88067adb4bd6c89a946778c1417d4deaeafce3ca928f26d4c52/numpy-2.1.2-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:2abbf905a0b568706391ec6fa15161fad0fb5d8b68d73c461b3c1bab6064dd62", size = 16400117 },
66
+ { url = "https://files.pythonhosted.org/packages/48/6f/129e3c17e3befe7fefdeaa6890f4c4df3f3cf0831aa053802c3862da67aa/numpy-2.1.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ef444c57d664d35cac4e18c298c47d7b504c66b17c2ea91312e979fcfbdfb08a", size = 14066202 },
67
+ { url = "https://files.pythonhosted.org/packages/73/c9/3e1d6bbe6d3d2e2c5a9483b24b2f29a229b323f62054278a3bba7fee11e5/numpy-2.1.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:bdd407c40483463898b84490770199d5714dcc9dd9b792f6c6caccc523c00952", size = 20981945 },
68
+ { url = "https://files.pythonhosted.org/packages/6e/62/989c4988bde1a8e08117fccc3bab73d2886421fb98cde597168714f3c54e/numpy-2.1.2-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:da65fb46d4cbb75cb417cddf6ba5e7582eb7bb0b47db4b99c9fe5787ce5d91f5", size = 6750558 },
69
+ { url = "https://files.pythonhosted.org/packages/53/b1/00ef9f30975f1312a53257f68e57b4513d14d537e03d507e2773a684b1e8/numpy-2.1.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c193d0b0238638e6fc5f10f1b074a6993cb13b0b431f64079a509d63d3aa8b7", size = 16141552 },
70
+ { url = "https://files.pythonhosted.org/packages/c0/ec/0c04903b48dfea6be1d7b47ba70f98709fb7198fd970784a1400c391d522/numpy-2.1.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a7d80b2e904faa63068ead63107189164ca443b42dd1930299e0d1cb041cec2e", size = 12789924 },
71
+ ]
72
+
73
+ [[package]]
74
+ name = "xm-slurm-example"
75
+ version = "0.0.1"
76
+ source = { virtual = "." }
77
+ dependencies = [
78
+ { name = "absl-py" },
79
+ { name = "numpy" },
80
+ ]
81
+
82
+ [package.metadata]
83
+ requires-dist = [
84
+ { name = "absl-py" },
85
+ { name = "numpy" },
86
+ ]
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
5
5
  [project]
6
6
  name = "xmanager-slurm"
7
7
  description = "Slurm backend for XManager."
8
- version = "0.4.0"
8
+ version = "0.4.1"
9
9
  # readme = "README.md"
10
10
  requires-python = ">=3.10"
11
11
  license = { text = "MIT" }
@@ -36,6 +36,9 @@ dependencies = [
36
36
  [project.urls]
37
37
  "GitHub" = "https://github.com/jessefarebro/xm-slurm"
38
38
 
39
+ [project.scripts]
40
+ xm = "xm_slurm.scripts.cli:main"
41
+
39
42
  [tool.uv]
40
43
  dev-dependencies = [
41
44
  "mypy>=1.11.2",
@@ -0,0 +1,149 @@
1
+ import datetime as dt
2
+ from typing import Callable
3
+
4
+ import pytest
5
+
6
+ from xm_slurm import config, dependencies, execution
7
+
8
+ SlurmHandleGenerator = Callable[[str], execution.SlurmHandle]
9
+
10
+
11
+ @pytest.fixture
12
+ def slurm_handle() -> SlurmHandleGenerator:
13
+ dummy_ssh_config = config.SlurmSSHConfig(host="localhost")
14
+
15
+ def _slurm_handle(job_id: str):
16
+ return execution.SlurmHandle(
17
+ experiment_id=0, ssh=dummy_ssh_config, slurm_job=job_id, job_name="job"
18
+ )
19
+
20
+ return _slurm_handle
21
+
22
+
23
+ def test_slurm_job_dependency_and(slurm_handle: SlurmHandleGenerator):
24
+ dep1 = dependencies.SlurmJobDependencyAfter([slurm_handle("123")])
25
+ dep2 = dependencies.SlurmJobDependencyAfter([slurm_handle("456")])
26
+ combined_dep = dep1 & dep2
27
+ assert isinstance(combined_dep, dependencies.SlurmJobDependencyAND)
28
+ assert combined_dep.to_dependency_str() == "after:123,after:456"
29
+
30
+
31
+ def test_slurm_job_dependency_or(slurm_handle: SlurmHandleGenerator):
32
+ dep1 = dependencies.SlurmJobDependencyAfter([slurm_handle("123")])
33
+ dep2 = dependencies.SlurmJobDependencyAfter([slurm_handle("456")])
34
+ combined_dep = dep1 | dep2
35
+ assert isinstance(combined_dep, dependencies.SlurmJobDependencyOR)
36
+ assert combined_dep.to_dependency_str() == "after:123?after:456"
37
+
38
+
39
+ def test_slurm_job_dependency_mixing_logical_operations(slurm_handle: SlurmHandleGenerator):
40
+ dep1 = dependencies.SlurmJobDependencyAfter([slurm_handle("123")])
41
+ dep2 = dependencies.SlurmJobDependencyAfter([slurm_handle("456")])
42
+ dep3 = dependencies.SlurmJobDependencyAfter([slurm_handle("789")])
43
+ with pytest.raises(
44
+ dependencies.SlurmDependencyException,
45
+ match="Slurm only supports chaining dependencies with the same logical operator. ",
46
+ ):
47
+ dep1 & dep2 | dep3 # type: ignore
48
+
49
+ with pytest.raises(
50
+ dependencies.SlurmDependencyException,
51
+ match="Slurm only supports chaining dependencies with the same logical operator. ",
52
+ ):
53
+ dep1 | dep2 & dep3 # type: ignore
54
+
55
+
56
+ def test_slurm_job_dependency_chaining_and(slurm_handle: SlurmHandleGenerator):
57
+ dep1 = dependencies.SlurmJobDependencyAfter([slurm_handle("1")])
58
+ dep2 = dependencies.SlurmJobDependencyAfter([slurm_handle("2")])
59
+ dep3 = dependencies.SlurmJobDependencyAfter([slurm_handle("3")])
60
+ dep4 = dependencies.SlurmJobDependencyAfter([slurm_handle("4")])
61
+ combined_dep = dep1 & dep2 & dep3 & dep4
62
+ assert isinstance(combined_dep, dependencies.SlurmJobDependencyAND)
63
+ assert combined_dep.to_dependency_str() == "after:1,after:2,after:3,after:4"
64
+
65
+
66
+ def test_slurm_job_dependency_chaining_or(slurm_handle: SlurmHandleGenerator):
67
+ dep1 = dependencies.SlurmJobDependencyAfter([slurm_handle("1")])
68
+ dep2 = dependencies.SlurmJobDependencyAfter([slurm_handle("2")])
69
+ dep3 = dependencies.SlurmJobDependencyAfter([slurm_handle("3")])
70
+ dep4 = dependencies.SlurmJobDependencyAfter([slurm_handle("4")])
71
+ combined_dep = dep1 | dep2 | dep3 | dep4
72
+ assert isinstance(combined_dep, dependencies.SlurmJobDependencyOR)
73
+ assert combined_dep.to_dependency_str() == "after:1?after:2?after:3?after:4"
74
+
75
+
76
+ def test_slurm_job_dependency_after(slurm_handle: SlurmHandleGenerator):
77
+ dep = dependencies.SlurmJobDependencyAfter([slurm_handle("123")])
78
+ assert dep.to_dependency_str() == "after:123"
79
+
80
+
81
+ def test_slurm_job_dependency_after_with_time(slurm_handle: SlurmHandleGenerator):
82
+ dep = dependencies.SlurmJobDependencyAfter([slurm_handle("123")], time=dt.timedelta(minutes=10))
83
+ assert dep.to_dependency_str() == "after:123+10"
84
+
85
+
86
+ def test_slurm_job_dependency_after_with_invalid_time(slurm_handle: SlurmHandleGenerator):
87
+ with pytest.raises(
88
+ dependencies.SlurmDependencyException, match="Time must be specified in exact minutes"
89
+ ):
90
+ dependencies.SlurmJobDependencyAfter([slurm_handle("123")], time=dt.timedelta(seconds=30))
91
+
92
+
93
+ @pytest.mark.parametrize(
94
+ "dependency_cls,dependency_type",
95
+ [
96
+ (dependencies.SlurmJobDependencyAfter, "after"),
97
+ (dependencies.SlurmJobDependencyAfterAny, "afterany"),
98
+ (dependencies.SlurmJobDependencyAfterNotOK, "afternotok"),
99
+ (dependencies.SlurmJobDependencyAfterOK, "afterok"),
100
+ ],
101
+ )
102
+ def test_slurm_job_dependency_after_not_ok(
103
+ slurm_handle: SlurmHandleGenerator,
104
+ dependency_cls: type,
105
+ dependency_type: str,
106
+ ):
107
+ dep = dependency_cls([slurm_handle("123"), slurm_handle("456")])
108
+ assert dep.to_dependency_str() == f"{dependency_type}:123:456"
109
+
110
+
111
+ @pytest.mark.parametrize(
112
+ "dependency_cls",
113
+ [
114
+ dependencies.SlurmJobDependencyAfter,
115
+ dependencies.SlurmJobDependencyAfterAny,
116
+ dependencies.SlurmJobDependencyAfterNotOK,
117
+ dependencies.SlurmJobDependencyAfterOK,
118
+ ],
119
+ )
120
+ def test_slurm_job_dependency_after_no_handles(dependency_cls: type):
121
+ with pytest.raises(
122
+ dependencies.SlurmDependencyException, match="Dependency doesn't have any handles."
123
+ ):
124
+ dependency_cls([])
125
+
126
+
127
+ def test_dependency_flatten(slurm_handle: SlurmHandleGenerator):
128
+ dep1 = dependencies.SlurmJobDependencyAfter([slurm_handle("1")])
129
+ dep2 = dependencies.SlurmJobDependencyAfter([slurm_handle("2")])
130
+ dep3 = dependencies.SlurmJobDependencyAfter([slurm_handle("3")])
131
+ combined_dep = dep1 & dep2 & dep3
132
+ assert combined_dep.flatten() == (dep1, dep2, dep3)
133
+
134
+
135
+ def test_dependency_traverse(slurm_handle: SlurmHandleGenerator):
136
+ dep1 = dependencies.SlurmJobDependencyAfterOK([slurm_handle("1")])
137
+ dep2 = dependencies.SlurmJobDependencyAfterOK([slurm_handle("2")])
138
+ dep3 = dependencies.SlurmJobDependencyAfterOK([slurm_handle("3")])
139
+ combined_dep = dep1 & dep2 & dep3
140
+
141
+ def traverse_fn(dep: dependencies.SlurmJobDependency):
142
+ if isinstance(dep, dependencies.SlurmJobDependencyAfterOK):
143
+ return dependencies.SlurmJobDependencyAfterNotOK(dep.handles)
144
+ return dep
145
+
146
+ transformed_combined_dep = combined_dep.traverse(traverse_fn)
147
+ for dep in transformed_combined_dep.flatten():
148
+ assert isinstance(dep, dependencies.SlurmJobDependencyAfterNotOK)
149
+ assert transformed_combined_dep.to_dependency_str() == "afternotok:1,afternotok:2,afternotok:3"
@@ -2196,7 +2196,7 @@ wheels = [
2196
2196
 
2197
2197
  [[package]]
2198
2198
  name = "xmanager-slurm"
2199
- version = "0.3.2"
2199
+ version = "0.4.0"
2200
2200
  source = { editable = "." }
2201
2201
  dependencies = [
2202
2202
  { name = "asyncssh" },
@@ -10,6 +10,7 @@ from xm_slurm.experiment import (
10
10
  get_current_work_unit,
11
11
  get_experiment,
12
12
  )
13
+ from xm_slurm.job_blocks import JobArgs
13
14
  from xm_slurm.packageables import (
14
15
  conda_container,
15
16
  docker_container,
@@ -34,13 +35,14 @@ __all__ = [
34
35
  "get_current_experiment",
35
36
  "get_current_work_unit",
36
37
  "get_experiment",
38
+ "JobArgs",
37
39
  "JobRequirements",
38
40
  "mamba_container",
39
- "uv_container",
40
41
  "python_container",
41
42
  "ResourceQuantity",
42
43
  "ResourceType",
43
44
  "Slurm",
44
- "SlurmSpec",
45
45
  "SlurmExperiment",
46
+ "SlurmSpec",
47
+ "uv_container",
46
48
  ]
@@ -322,7 +322,7 @@ class XManagerSqliteAPI(XManagerAPI):
322
322
  db_path = Path(os.environ["XM_SLURM_STATE_DIR"]) / "db.sqlite3"
323
323
  else:
324
324
  db_path = Path.home() / ".local" / "state" / "xm-slurm" / "db.sqlite3"
325
- logging.debug("Looking for db at: ", db_path)
325
+ logger.debug("Looking for db at: ", db_path)
326
326
  db_path.parent.mkdir(parents=True, exist_ok=True)
327
327
  engine = create_engine(f"sqlite:///{db_path}")
328
328
  Base.metadata.create_all(engine)
@@ -5,9 +5,10 @@ import getpass
5
5
  import json
6
6
  import os
7
7
  import pathlib
8
- from typing import Literal, Mapping, NamedTuple
8
+ from typing import Callable, Literal, Mapping, NamedTuple
9
9
 
10
10
  import asyncssh
11
+ from xmanager import xm
11
12
 
12
13
  from xm_slurm import constants
13
14
 
@@ -137,7 +138,7 @@ class SlurmSSHConfig:
137
138
  )
138
139
 
139
140
  def __hash__(self):
140
- return hash((self.host, self.host_public_key, self.user, self.port))
141
+ return hash((type(self), self.host, self.host_public_key, self.user, self.port))
141
142
 
142
143
 
143
144
  @dataclasses.dataclass(frozen=True, kw_only=True)
@@ -176,6 +177,9 @@ class SlurmClusterConfig:
176
177
 
177
178
  features: Mapping["xm_slurm.FeatureType", str] = dataclasses.field(default_factory=dict) # type: ignore # noqa: F821
178
179
 
180
+ # Function to validate the Slurm executor config
181
+ validate: Callable[[xm.Job], None] | None = None
182
+
179
183
  def __post_init__(self) -> None:
180
184
  for src, dst in self.mounts.items():
181
185
  if not isinstance(src, (str, os.PathLike)):
@@ -194,6 +198,7 @@ class SlurmClusterConfig:
194
198
 
195
199
  def __hash__(self):
196
200
  return hash((
201
+ type(self),
197
202
  self.ssh,
198
203
  self.cwd,
199
204
  self.prolog,
@@ -1,5 +1,9 @@
1
1
  import re
2
2
 
3
+ SLURM_JOB_ID_REGEX = re.compile(
4
+ r"^(?P<jobid>\d+)(?:(?:\+(?P<componentid>\d+))|(?:_(?P<arraytaskid>\d+)))?$"
5
+ )
6
+
3
7
  IMAGE_URI_REGEX = re.compile(
4
8
  r"^(?P<scheme>(?:[^:]+://)?)?(?P<domain>[^/]+)(?P<path>/[^:]*)?(?::(?P<tag>[^@]+))?@?(?P<digest>.+)?$"
5
9
  )
@@ -1,13 +1,20 @@
1
+ import datetime as dt
2
+ import logging
1
3
  import os
2
4
 
5
+ from xmanager import xm
6
+
3
7
  from xm_slurm import config, resources
4
8
  from xm_slurm.contrib.clusters import drac
9
+ from xm_slurm.executors import Slurm
5
10
 
6
11
  # ComputeCanada alias
7
12
  cc = drac
8
13
 
9
14
  __all__ = ["drac", "mila", "cc"]
10
15
 
16
+ logger = logging.getLogger(__name__)
17
+
11
18
 
12
19
  def mila(
13
20
  *,
@@ -24,6 +31,23 @@ def mila(
24
31
  "/home/mila/${USER:0:1}/$USER/.ssh": "/home/mila/${USER:0:1}/$USER/.ssh",
25
32
  }
26
33
 
34
+ def validate(job: xm.Job) -> None:
35
+ assert isinstance(job.executor, Slurm)
36
+
37
+ wants_requeue_with_grace_period = (
38
+ job.executor.requeue and job.executor.timeout_signal_grace_period > dt.timedelta(0)
39
+ )
40
+ partition = job.executor.partition or "main"
41
+
42
+ if wants_requeue_with_grace_period and (
43
+ partition is None or not partition.endswith("-grace")
44
+ ):
45
+ logger.warning(
46
+ f"Job {job.name} wants requeue with grace period, but partition `{partition}` does not end with '-grace'. "
47
+ "Mila Cluster requires you specify a grace partition. "
48
+ "This may result in the job not being requeued properly."
49
+ )
50
+
27
51
  return config.SlurmClusterConfig(
28
52
  name="mila",
29
53
  ssh=config.SlurmSSHConfig(
@@ -58,4 +82,5 @@ def mila(
58
82
  resources.FeatureType.NVIDIA_MIG: "mig",
59
83
  resources.FeatureType.NVIDIA_NVLINK: "nvlink",
60
84
  },
85
+ validate=validate,
61
86
  )