xmanager-slurm 0.4.0__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xmanager-slurm might be problematic. Click here for more details.

Files changed (112) hide show
  1. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/PKG-INFO +1 -1
  2. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/job-array-sweep/launch.py +1 -1
  3. xmanager_slurm-0.4.2/examples/job-dependencies/eval.py +13 -0
  4. xmanager_slurm-0.4.2/examples/job-dependencies/launch.py +104 -0
  5. xmanager_slurm-0.4.2/examples/job-dependencies/pyproject.toml +6 -0
  6. xmanager_slurm-0.4.2/examples/job-dependencies/train.py +19 -0
  7. xmanager_slurm-0.4.2/examples/job-dependencies/uv.lock +86 -0
  8. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/pyproject.toml +4 -1
  9. xmanager_slurm-0.4.2/tests/test_dependencies.py +149 -0
  10. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/uv.lock +1 -1
  11. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/__init__.py +4 -2
  12. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/api.py +1 -1
  13. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/config.py +7 -2
  14. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/constants.py +4 -0
  15. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/contrib/clusters/__init__.py +9 -0
  16. xmanager_slurm-0.4.2/xm_slurm/dependencies.py +171 -0
  17. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/executables.py +20 -15
  18. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/execution.py +246 -96
  19. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/executors.py +8 -12
  20. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/experiment.py +374 -83
  21. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/experimental/parameter_controller.py +12 -10
  22. xmanager_slurm-0.4.0/xm_slurm/packaging/docker/local.py → xmanager_slurm-0.4.2/xm_slurm/packaging/docker.py +126 -32
  23. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/packaging/router.py +3 -1
  24. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/packaging/utils.py +4 -28
  25. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/resources.py +2 -0
  26. xmanager_slurm-0.4.2/xm_slurm/scripts/cli.py +77 -0
  27. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/templates/docker/mamba.Dockerfile +1 -1
  28. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/templates/slurm/fragments/monitor.bash.j2 +5 -0
  29. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/templates/slurm/job-array.bash.j2 +1 -2
  30. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/templates/slurm/job.bash.j2 +4 -3
  31. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/templates/slurm/runtimes/apptainer.bash.j2 +1 -0
  32. xmanager_slurm-0.4.2/xm_slurm/types.py +23 -0
  33. xmanager_slurm-0.4.0/xm_slurm/packaging/docker/__init__.py +0 -69
  34. xmanager_slurm-0.4.0/xm_slurm/packaging/docker/abc.py +0 -112
  35. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/.devcontainer.json +0 -0
  36. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/.github/workflows/ci.yml +0 -0
  37. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/.github/workflows/deploy-docs.yml +0 -0
  38. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/.gitignore +0 -0
  39. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/.pre-commit-config.yaml +0 -0
  40. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/.vscode/settings.json +0 -0
  41. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/LICENSE.md +0 -0
  42. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/README.md +0 -0
  43. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/docs/api/executables.rst +0 -0
  44. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/docs/api/executors.rst +0 -0
  45. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/docs/api/packageables.rst +0 -0
  46. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/docs/assets/workflow-dark.svg +0 -0
  47. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/docs/assets/workflow-light.svg +0 -0
  48. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/docs/conf.py +0 -0
  49. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/docs/getting-started/xmanager.md +0 -0
  50. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/docs/guides/index.md +0 -0
  51. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/docs/guides/remote-dev.md +0 -0
  52. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/docs/index.md +0 -0
  53. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/conda/environment.yml +0 -0
  54. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/conda/launch.py +0 -0
  55. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/conda/main.py +0 -0
  56. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/conda/pyproject.toml +0 -0
  57. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/custom-dockerfile/Dockerfile +0 -0
  58. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/custom-dockerfile/launch.py +0 -0
  59. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/custom-dockerfile/pyproject.toml +0 -0
  60. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/job-array-sweep/main.py +0 -0
  61. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/job-array-sweep/pyproject.toml +0 -0
  62. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/job-array-sweep/uv.lock +0 -0
  63. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/job-group/Dockerfile +0 -0
  64. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/job-group/launch.py +0 -0
  65. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/job-group/pyproject.toml +0 -0
  66. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/job-group/uv.lock +0 -0
  67. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/metadata/launch.py +0 -0
  68. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/metadata/main.py +0 -0
  69. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/metadata/pyproject.toml +0 -0
  70. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/metadata/requirements.txt +0 -0
  71. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/parameter-controller/launch.py +0 -0
  72. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/parameter-controller/main.py +0 -0
  73. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/parameter-controller/pyproject.toml +0 -0
  74. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/parameter-controller/requirements.txt +0 -0
  75. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/pip/launch.py +0 -0
  76. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/pip/main.py +0 -0
  77. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/pip/pyproject.toml +0 -0
  78. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/pip/requirements.txt +0 -0
  79. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/uv/launch.py +0 -0
  80. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/uv/pyproject.toml +0 -0
  81. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/examples/uv/uv.lock +0 -0
  82. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/tests/integration/conftest.py +0 -0
  83. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/tests/integration/fixtures/slurm/Dockerfile +0 -0
  84. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/tests/integration/fixtures/slurm/README.md +0 -0
  85. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/tests/integration/fixtures/slurm/cgroup.conf +0 -0
  86. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/tests/integration/fixtures/slurm/docker-compose.yml +0 -0
  87. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/tests/integration/fixtures/slurm/docker-entrypoint.sh +0 -0
  88. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/tests/integration/fixtures/slurm/host_ed25519 +0 -0
  89. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/tests/integration/fixtures/slurm/host_ed25519.pub +0 -0
  90. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/tests/integration/fixtures/slurm/id_ed25519 +0 -0
  91. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/tests/integration/fixtures/slurm/id_ed25519.pub +0 -0
  92. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/tests/integration/fixtures/slurm/slurm.conf +0 -0
  93. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/tests/integration/fixtures/slurm/slurmdbd.conf +0 -0
  94. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/tests/integration/fixtures/slurm/sshd_config +0 -0
  95. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/tests/integration/test_remote_execution.py +0 -0
  96. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/batching.py +0 -0
  97. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/console.py +0 -0
  98. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/contrib/__init__.py +0 -0
  99. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/contrib/clusters/drac.py +0 -0
  100. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/job_blocks.py +0 -0
  101. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/packageables.py +0 -0
  102. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/packaging/__init__.py +0 -0
  103. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/packaging/registry.py +0 -0
  104. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/scripts/_cloudpickle.py +0 -0
  105. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/status.py +0 -0
  106. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/templates/docker/docker-bake.hcl.j2 +0 -0
  107. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/templates/docker/python.Dockerfile +0 -0
  108. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/templates/docker/uv.Dockerfile +0 -0
  109. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/templates/slurm/fragments/proxy.bash.j2 +0 -0
  110. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/templates/slurm/job-group.bash.j2 +0 -0
  111. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/templates/slurm/runtimes/podman.bash.j2 +0 -0
  112. {xmanager_slurm-0.4.0 → xmanager_slurm-0.4.2}/xm_slurm/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: xmanager-slurm
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: Slurm backend for XManager.
5
5
  Project-URL: GitHub, https://github.com/jessefarebro/xm-slurm
6
6
  Author-email: Jesse Farebrother <jfarebro@cs.mcgill.ca>
@@ -46,7 +46,7 @@ async def main(_):
46
46
  )
47
47
  )
48
48
 
49
- args = [{"args": {"scale": scale}} for scale in range(3)]
49
+ args = [xm_slurm.JobArgs(args={"scale": scale}) for scale in range(3)]
50
50
  wus = await experiment.add(make_job, args)
51
51
 
52
52
  for wu, status in zip(wus, await asyncio.gather(*[wu.get_status() for wu in wus])):
@@ -0,0 +1,13 @@
1
+ import numpy as np
2
+ from absl import app, flags
3
+
4
+ INPUT_FILE = flags.DEFINE_string("input_file", "result.npy", "Input file path")
5
+
6
+
7
+ def main(_):
8
+ result = np.load(INPUT_FILE.value)
9
+ print(f"Received result: {result}")
10
+
11
+
12
+ if __name__ == "__main__":
13
+ app.run(main)
@@ -0,0 +1,104 @@
1
+ import asyncio
2
+ import datetime as dt
3
+ import pathlib
4
+
5
+ from absl import app
6
+ from xmanager import xm
7
+
8
+ import xm_slurm
9
+ import xm_slurm.contrib.clusters
10
+
11
+
12
+ @xm.run_in_asyncio_loop
13
+ async def main(_):
14
+ async with xm_slurm.create_experiment("My Experiment") as experiment:
15
+ # Step 1: Specify the executor specification
16
+ executor_spec = xm_slurm.Slurm.Spec(tag="ghcr.io/jessefarebro/xm-slurm/test:latest")
17
+
18
+ # Step 2: Specify the executable and package it
19
+ [train_executable, eval_executable] = experiment.package(
20
+ [
21
+ xm_slurm.uv_container(
22
+ executor_spec=executor_spec,
23
+ entrypoint=xm.CommandList(["train.py"]),
24
+ ),
25
+ xm_slurm.uv_container(
26
+ executor_spec=executor_spec,
27
+ entrypoint=xm.CommandList(["eval.py"]),
28
+ ),
29
+ ],
30
+ )
31
+
32
+ workdir = pathlib.Path(f"/scratch/xm-slurm-examples/{experiment.experiment_id}")
33
+
34
+ # Step 4: Schedule train job
35
+ train_executor = xm_slurm.Slurm(
36
+ requirements=xm_slurm.JobRequirements(
37
+ CPU=1,
38
+ RAM=1.0 * xm.GiB,
39
+ GPU=1,
40
+ replicas=1,
41
+ cluster=xm_slurm.contrib.clusters.mila(),
42
+ ),
43
+ time=dt.timedelta(hours=1),
44
+ )
45
+
46
+ async def make_train_job(wu: xm.WorkUnit, args):
47
+ await wu.add(
48
+ xm.Job(
49
+ executable=train_executable,
50
+ executor=train_executor,
51
+ args=xm.merge_args(
52
+ [
53
+ "--output_file",
54
+ (workdir / f"{wu.work_unit_id}" / "result.npy").as_posix(),
55
+ ],
56
+ args,
57
+ ),
58
+ ),
59
+ )
60
+
61
+ train_wus = await experiment.add(
62
+ make_train_job,
63
+ args=[xm_slurm.JobArgs(args=["--seed", seed]) for seed in range(5)],
64
+ )
65
+
66
+ # Step 5: Schedule eval job
67
+ eval_executor = xm_slurm.Slurm(
68
+ requirements=xm_slurm.JobRequirements(
69
+ CPU=1,
70
+ RAM=1.0 * xm.GiB,
71
+ GPU=1,
72
+ replicas=1,
73
+ cluster=xm_slurm.contrib.clusters.mila(),
74
+ ),
75
+ time=dt.timedelta(hours=1),
76
+ )
77
+
78
+ eval_wus = await experiment.add(
79
+ xm.Job(
80
+ executable=eval_executable,
81
+ executor=eval_executor,
82
+ ),
83
+ args=[
84
+ xm_slurm.JobArgs(
85
+ args=[
86
+ "--input_file",
87
+ (workdir / f"{wu.work_unit_id}" / "result.npy").as_posix(),
88
+ ]
89
+ )
90
+ for wu in train_wus
91
+ ],
92
+ dependency=[train_wu.after_completed() for train_wu in train_wus],
93
+ )
94
+
95
+ for wu in asyncio.as_completed([
96
+ *[train_wu.wait_until_complete() for train_wu in train_wus],
97
+ *[eval_wu.wait_until_complete() for eval_wu in eval_wus],
98
+ ]):
99
+ wu = await wu
100
+ print(f"Work Unit {wu!r} finished executing with status {await wu.get_status()}")
101
+
102
+
103
+ if __name__ == "__main__":
104
+ app.run(main)
@@ -0,0 +1,6 @@
1
+ [project]
2
+ name = "xm-slurm-example"
3
+ description = "XManager Slurm test project"
4
+ version = "0.0.1"
5
+ requires-python = ">=3.10"
6
+ dependencies = ["numpy", "absl-py"]
@@ -0,0 +1,19 @@
1
+ import pathlib
2
+
3
+ import numpy as np
4
+ from absl import app, flags
5
+
6
+ OUTPUT_FILE = flags.DEFINE_string("output_file", "result.npy", "Output file path")
7
+ SEED = flags.DEFINE_integer("seed", 0, "Random seed")
8
+
9
+
10
+ def main(_):
11
+ np.random.seed(SEED.value)
12
+
13
+ pathlib.Path(OUTPUT_FILE.value).parent.mkdir(parents=True, exist_ok=True)
14
+ result = np.random.random((32,))
15
+ np.save(OUTPUT_FILE.value, result)
16
+
17
+
18
+ if __name__ == "__main__":
19
+ app.run(main)
@@ -0,0 +1,86 @@
1
+ version = 1
2
+ requires-python = ">=3.10"
3
+
4
+ [[package]]
5
+ name = "absl-py"
6
+ version = "2.1.0"
7
+ source = { registry = "https://pypi.org/simple" }
8
+ sdist = { url = "https://files.pythonhosted.org/packages/7a/8f/fc001b92ecc467cc32ab38398bd0bfb45df46e7523bf33c2ad22a505f06e/absl-py-2.1.0.tar.gz", hash = "sha256:7820790efbb316739cde8b4e19357243fc3608a152024288513dd968d7d959ff", size = 118055 }
9
+ wheels = [
10
+ { url = "https://files.pythonhosted.org/packages/a2/ad/e0d3c824784ff121c03cc031f944bc7e139a8f1870ffd2845cc2dd76f6c4/absl_py-2.1.0-py3-none-any.whl", hash = "sha256:526a04eadab8b4ee719ce68f204172ead1027549089702d99b9059f129ff1308", size = 133706 },
11
+ ]
12
+
13
+ [[package]]
14
+ name = "numpy"
15
+ version = "2.1.2"
16
+ source = { registry = "https://pypi.org/simple" }
17
+ sdist = { url = "https://files.pythonhosted.org/packages/4b/d1/8a730ea07f4a37d94f9172f4ce1d81064b7a64766b460378be278952de75/numpy-2.1.2.tar.gz", hash = "sha256:13532a088217fa624c99b843eeb54640de23b3414b14aa66d023805eb731066c", size = 18878063 }
18
+ wheels = [
19
+ { url = "https://files.pythonhosted.org/packages/1c/a2/40a76d357f168e9f9f06d6cc2c8e22dd5fb2bfbe63fe2c433057258c145a/numpy-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:30d53720b726ec36a7f88dc873f0eec8447fbc93d93a8f079dfac2629598d6ee", size = 21150947 },
20
+ { url = "https://files.pythonhosted.org/packages/b5/d0/ba271ea9108d7278d3889a7eb38d77370a88713fb94339964e71ac184d4a/numpy-2.1.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e8d3ca0a72dd8846eb6f7dfe8f19088060fcb76931ed592d29128e0219652884", size = 13758184 },
21
+ { url = "https://files.pythonhosted.org/packages/7c/b9/5c6507439cd756201010f7937bf90712c2469052ae094584af14557dd64f/numpy-2.1.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:fc44e3c68ff00fd991b59092a54350e6e4911152682b4782f68070985aa9e648", size = 5354091 },
22
+ { url = "https://files.pythonhosted.org/packages/60/21/7938cf724d9e84e45fb886f3fc794ab431d71facfebc261e3e9f19f3233a/numpy-2.1.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:7c1c60328bd964b53f8b835df69ae8198659e2b9302ff9ebb7de4e5a5994db3d", size = 6887169 },
23
+ { url = "https://files.pythonhosted.org/packages/09/8d/42a124657f5d31902fca73921b25a0d022cead2b32ce7e6975762cd2995a/numpy-2.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6cdb606a7478f9ad91c6283e238544451e3a95f30fb5467fbf715964341a8a86", size = 13888165 },
24
+ { url = "https://files.pythonhosted.org/packages/fb/25/ba023652a39a2c127200e85aed975fc6119b421e2c348e5d0171e2046edb/numpy-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d666cb72687559689e9906197e3bec7b736764df6a2e58ee265e360663e9baf7", size = 16326954 },
25
+ { url = "https://files.pythonhosted.org/packages/34/58/23e6b07fad492b7c47cf09cd8bad6983658f0f925b6c535fd008e3e86274/numpy-2.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c6eef7a2dbd0abfb0d9eaf78b73017dbfd0b54051102ff4e6a7b2980d5ac1a03", size = 16702916 },
26
+ { url = "https://files.pythonhosted.org/packages/91/24/37b5cf2dc7d385ac97f7b7fe50cba312abb70a2a5eac74c23af028811f73/numpy-2.1.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:12edb90831ff481f7ef5f6bc6431a9d74dc0e5ff401559a71e5e4611d4f2d466", size = 14384372 },
27
+ { url = "https://files.pythonhosted.org/packages/ea/ec/0f6d471058a01d1a05a50d2793898de1549280fa715a8537987ee866b5d9/numpy-2.1.2-cp310-cp310-win32.whl", hash = "sha256:a65acfdb9c6ebb8368490dbafe83c03c7e277b37e6857f0caeadbbc56e12f4fb", size = 6535361 },
28
+ { url = "https://files.pythonhosted.org/packages/c2/3d/293cc5927f916a7bc6bf74da8f6defab63d1b13f0959d7e21878ad8a20d8/numpy-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:860ec6e63e2c5c2ee5e9121808145c7bf86c96cca9ad396c0bd3e0f2798ccbe2", size = 12865501 },
29
+ { url = "https://files.pythonhosted.org/packages/aa/9c/9a6ec3ae89cd0648d419781284308f2956d2a61d932b5ac9682c956a171b/numpy-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b42a1a511c81cc78cbc4539675713bbcf9d9c3913386243ceff0e9429ca892fe", size = 21154845 },
30
+ { url = "https://files.pythonhosted.org/packages/02/69/9f05c4ecc75fabf297b17743996371b4c3dfc4d92e15c5c38d8bb3db8d74/numpy-2.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:faa88bc527d0f097abdc2c663cddf37c05a1c2f113716601555249805cf573f1", size = 13789409 },
31
+ { url = "https://files.pythonhosted.org/packages/34/4e/f95c99217bf77bbfaaf660d693c10bd0dc03b6032d19316d316088c9e479/numpy-2.1.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:c82af4b2ddd2ee72d1fc0c6695048d457e00b3582ccde72d8a1c991b808bb20f", size = 5352097 },
32
+ { url = "https://files.pythonhosted.org/packages/06/13/f5d87a497c16658e9af8920449b0b5692b469586b8231340c672962071c5/numpy-2.1.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:13602b3174432a35b16c4cfb5de9a12d229727c3dd47a6ce35111f2ebdf66ff4", size = 6891195 },
33
+ { url = "https://files.pythonhosted.org/packages/6c/89/691ac07429ac061b344d5e37fa8e94be51a6017734aea15f2d9d7c6d119a/numpy-2.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ebec5fd716c5a5b3d8dfcc439be82a8407b7b24b230d0ad28a81b61c2f4659a", size = 13895153 },
34
+ { url = "https://files.pythonhosted.org/packages/23/69/538317f0d925095537745f12aced33be1570bbdc4acde49b33748669af96/numpy-2.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2b49c3c0804e8ecb05d59af8386ec2f74877f7ca8fd9c1e00be2672e4d399b1", size = 16338306 },
35
+ { url = "https://files.pythonhosted.org/packages/af/03/863fe7062c2106d3c151f7df9353f2ae2237c1dd6900f127a3eb1f24cb1b/numpy-2.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2cbba4b30bf31ddbe97f1c7205ef976909a93a66bb1583e983adbd155ba72ac2", size = 16710893 },
36
+ { url = "https://files.pythonhosted.org/packages/70/77/0ad9efe25482009873f9660d29a40a8c41a6f0e8b541195e3c95c70684c5/numpy-2.1.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8e00ea6fc82e8a804433d3e9cedaa1051a1422cb6e443011590c14d2dea59146", size = 14398048 },
37
+ { url = "https://files.pythonhosted.org/packages/3e/0f/e785fe75544db9f2b0bb1c181e13ceff349ce49753d807fd9672916aa06d/numpy-2.1.2-cp311-cp311-win32.whl", hash = "sha256:5006b13a06e0b38d561fab5ccc37581f23c9511879be7693bd33c7cd15ca227c", size = 6533458 },
38
+ { url = "https://files.pythonhosted.org/packages/d4/96/450054662295125af861d48d2c4bc081dadcf1974a879b2104613157aa62/numpy-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:f1eb068ead09f4994dec71c24b2844f1e4e4e013b9629f812f292f04bd1510d9", size = 12870896 },
39
+ { url = "https://files.pythonhosted.org/packages/a0/7d/554a6838f37f3ada5a55f25173c619d556ae98092a6e01afb6e710501d70/numpy-2.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7bf0a4f9f15b32b5ba53147369e94296f5fffb783db5aacc1be15b4bf72f43b", size = 20848077 },
40
+ { url = "https://files.pythonhosted.org/packages/b0/29/cb48a402ea879e645b16218718f3f7d9588a77d674a9dcf22e4c43487636/numpy-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b1d0fcae4f0949f215d4632be684a539859b295e2d0cb14f78ec231915d644db", size = 13493242 },
41
+ { url = "https://files.pythonhosted.org/packages/56/44/f899b0581766c230da42f751b7b8896d096640b19b312164c267e48d36cb/numpy-2.1.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f751ed0a2f250541e19dfca9f1eafa31a392c71c832b6bb9e113b10d050cb0f1", size = 5089219 },
42
+ { url = "https://files.pythonhosted.org/packages/79/8f/b987070d45161a7a4504afc67ed38544ed2c0ed5576263599a0402204a9c/numpy-2.1.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:bd33f82e95ba7ad632bc57837ee99dba3d7e006536200c4e9124089e1bf42426", size = 6620167 },
43
+ { url = "https://files.pythonhosted.org/packages/c4/a7/af3329fda3c3ec31d9b650e42bbcd3422fc62a765cbb1405fde4177a0996/numpy-2.1.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b8cde4f11f0a975d1fd59373b32e2f5a562ade7cde4f85b7137f3de8fbb29a0", size = 13604905 },
44
+ { url = "https://files.pythonhosted.org/packages/9b/b4/e3c7e6fab0f77fff6194afa173d1f2342073d91b1d3b4b30b17c3fb4407a/numpy-2.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d95f286b8244b3649b477ac066c6906fbb2905f8ac19b170e2175d3d799f4df", size = 16041825 },
45
+ { url = "https://files.pythonhosted.org/packages/e9/50/6828e66a78aa03147c111f84d55f33ce2dde547cb578d6744a3b06a0124b/numpy-2.1.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ab4754d432e3ac42d33a269c8567413bdb541689b02d93788af4131018cbf366", size = 16409541 },
46
+ { url = "https://files.pythonhosted.org/packages/bf/72/66af7916d9c3c6dbfbc8acdd4930c65461e1953374a2bc43d00f948f004a/numpy-2.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e585c8ae871fd38ac50598f4763d73ec5497b0de9a0ab4ef5b69f01c6a046142", size = 14081134 },
47
+ { url = "https://files.pythonhosted.org/packages/dc/5a/59a67d84f33fe00ae74f0b5b69dd4f93a586a4aba7f7e19b54b2133db038/numpy-2.1.2-cp312-cp312-win32.whl", hash = "sha256:9c6c754df29ce6a89ed23afb25550d1c2d5fdb9901d9c67a16e0b16eaf7e2550", size = 6237784 },
48
+ { url = "https://files.pythonhosted.org/packages/4c/79/73735a6a5dad6059c085f240a4e74c9270feccd2bc66e4d31b5ca01d329c/numpy-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:456e3b11cb79ac9946c822a56346ec80275eaf2950314b249b512896c0d2505e", size = 12568254 },
49
+ { url = "https://files.pythonhosted.org/packages/16/72/716fa1dbe92395a9a623d5049203ff8ddb0cfce65b9df9117c3696ccc011/numpy-2.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a84498e0d0a1174f2b3ed769b67b656aa5460c92c9554039e11f20a05650f00d", size = 20834690 },
50
+ { url = "https://files.pythonhosted.org/packages/1e/fb/3e85a39511586053b5c6a59a643879e376fae22230ebfef9cfabb0e032e2/numpy-2.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4d6ec0d4222e8ffdab1744da2560f07856421b367928026fb540e1945f2eeeaf", size = 13507474 },
51
+ { url = "https://files.pythonhosted.org/packages/35/eb/5677556d9ba13436dab51e129f98d4829d95cd1b6bd0e199c14485a4bdb9/numpy-2.1.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:259ec80d54999cc34cd1eb8ded513cb053c3bf4829152a2e00de2371bd406f5e", size = 5074742 },
52
+ { url = "https://files.pythonhosted.org/packages/3e/c5/6c5ef5ba41b65a7e51bed50dbf3e1483eb578055633dd013e811a28e96a1/numpy-2.1.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:675c741d4739af2dc20cd6c6a5c4b7355c728167845e3c6b0e824e4e5d36a6c3", size = 6606787 },
53
+ { url = "https://files.pythonhosted.org/packages/08/ac/f2f29dd4fd325b379c7dc932a0ebab22f0e031dbe80b2f6019b291a3a544/numpy-2.1.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b2d4e667895cc55e3ff2b56077e4c8a5604361fc21a042845ea3ad67465aa8", size = 13601333 },
54
+ { url = "https://files.pythonhosted.org/packages/44/26/63f5f4e5089654dfb858f4892215ed968cd1a68e6f4a83f9961f84f855cb/numpy-2.1.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43cca367bf94a14aca50b89e9bc2061683116cfe864e56740e083392f533ce7a", size = 16038090 },
55
+ { url = "https://files.pythonhosted.org/packages/1d/21/015e0594de9c3a8d5edd24943d2bd23f102ec71aec026083f822f86497e2/numpy-2.1.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:76322dcdb16fccf2ac56f99048af32259dcc488d9b7e25b51e5eca5147a3fb98", size = 16410865 },
56
+ { url = "https://files.pythonhosted.org/packages/df/01/c1bcf9e6025d79077fbf3f3ee503b50aa7bfabfcd8f4b54f5829f4c00f3f/numpy-2.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:32e16a03138cabe0cb28e1007ee82264296ac0983714094380b408097a418cfe", size = 14078077 },
57
+ { url = "https://files.pythonhosted.org/packages/ba/06/db9d127d63bd11591770ba9f3d960f8041e0f895184b9351d4b1b5b56983/numpy-2.1.2-cp313-cp313-win32.whl", hash = "sha256:242b39d00e4944431a3cd2db2f5377e15b5785920421993770cddb89992c3f3a", size = 6234904 },
58
+ { url = "https://files.pythonhosted.org/packages/a9/96/9f61f8f95b6e0ea0aa08633b704c75d1882bdcb331bdf8bfd63263b25b00/numpy-2.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:f2ded8d9b6f68cc26f8425eda5d3877b47343e68ca23d0d0846f4d312ecaa445", size = 12561910 },
59
+ { url = "https://files.pythonhosted.org/packages/36/b8/033f627821784a48e8f75c218033471eebbaacdd933f8979c79637a1b44b/numpy-2.1.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2ffef621c14ebb0188a8633348504a35c13680d6da93ab5cb86f4e54b7e922b5", size = 20857719 },
60
+ { url = "https://files.pythonhosted.org/packages/96/46/af5726fde5b74ed83f2f17a73386d399319b7ed4d51279fb23b721d0816d/numpy-2.1.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ad369ed238b1959dfbade9018a740fb9392c5ac4f9b5173f420bd4f37ba1f7a0", size = 13518826 },
61
+ { url = "https://files.pythonhosted.org/packages/db/6e/8ce677edf36da1c4dae80afe5529f47690697eb55b4864673af260ccea7b/numpy-2.1.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d82075752f40c0ddf57e6e02673a17f6cb0f8eb3f587f63ca1eaab5594da5b17", size = 5115036 },
62
+ { url = "https://files.pythonhosted.org/packages/6a/ba/3cce44fb1b8438042c11847048812a776f75ee0e7070179c22e4cfbf420c/numpy-2.1.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:1600068c262af1ca9580a527d43dc9d959b0b1d8e56f8a05d830eea39b7c8af6", size = 6628641 },
63
+ { url = "https://files.pythonhosted.org/packages/59/c8/e722998720ccbd35ffbcf1d1b8ed0aa2304af88d3f1c38e06ebf983599b3/numpy-2.1.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a26ae94658d3ba3781d5e103ac07a876b3e9b29db53f68ed7df432fd033358a8", size = 13574803 },
64
+ { url = "https://files.pythonhosted.org/packages/7c/8e/fc1fdd83a55476765329ac2913321c4aed5b082a7915095628c4ca30ea72/numpy-2.1.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13311c2db4c5f7609b462bc0f43d3c465424d25c626d95040f073e30f7570e35", size = 16021174 },
65
+ { url = "https://files.pythonhosted.org/packages/2a/b6/a790742aa88067adb4bd6c89a946778c1417d4deaeafce3ca928f26d4c52/numpy-2.1.2-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:2abbf905a0b568706391ec6fa15161fad0fb5d8b68d73c461b3c1bab6064dd62", size = 16400117 },
66
+ { url = "https://files.pythonhosted.org/packages/48/6f/129e3c17e3befe7fefdeaa6890f4c4df3f3cf0831aa053802c3862da67aa/numpy-2.1.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ef444c57d664d35cac4e18c298c47d7b504c66b17c2ea91312e979fcfbdfb08a", size = 14066202 },
67
+ { url = "https://files.pythonhosted.org/packages/73/c9/3e1d6bbe6d3d2e2c5a9483b24b2f29a229b323f62054278a3bba7fee11e5/numpy-2.1.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:bdd407c40483463898b84490770199d5714dcc9dd9b792f6c6caccc523c00952", size = 20981945 },
68
+ { url = "https://files.pythonhosted.org/packages/6e/62/989c4988bde1a8e08117fccc3bab73d2886421fb98cde597168714f3c54e/numpy-2.1.2-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:da65fb46d4cbb75cb417cddf6ba5e7582eb7bb0b47db4b99c9fe5787ce5d91f5", size = 6750558 },
69
+ { url = "https://files.pythonhosted.org/packages/53/b1/00ef9f30975f1312a53257f68e57b4513d14d537e03d507e2773a684b1e8/numpy-2.1.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c193d0b0238638e6fc5f10f1b074a6993cb13b0b431f64079a509d63d3aa8b7", size = 16141552 },
70
+ { url = "https://files.pythonhosted.org/packages/c0/ec/0c04903b48dfea6be1d7b47ba70f98709fb7198fd970784a1400c391d522/numpy-2.1.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a7d80b2e904faa63068ead63107189164ca443b42dd1930299e0d1cb041cec2e", size = 12789924 },
71
+ ]
72
+
73
+ [[package]]
74
+ name = "xm-slurm-example"
75
+ version = "0.0.1"
76
+ source = { virtual = "." }
77
+ dependencies = [
78
+ { name = "absl-py" },
79
+ { name = "numpy" },
80
+ ]
81
+
82
+ [package.metadata]
83
+ requires-dist = [
84
+ { name = "absl-py" },
85
+ { name = "numpy" },
86
+ ]
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
5
5
  [project]
6
6
  name = "xmanager-slurm"
7
7
  description = "Slurm backend for XManager."
8
- version = "0.4.0"
8
+ version = "0.4.2"
9
9
  # readme = "README.md"
10
10
  requires-python = ">=3.10"
11
11
  license = { text = "MIT" }
@@ -36,6 +36,9 @@ dependencies = [
36
36
  [project.urls]
37
37
  "GitHub" = "https://github.com/jessefarebro/xm-slurm"
38
38
 
39
+ [project.scripts]
40
+ xm = "xm_slurm.scripts.cli:main"
41
+
39
42
  [tool.uv]
40
43
  dev-dependencies = [
41
44
  "mypy>=1.11.2",
@@ -0,0 +1,149 @@
1
+ import datetime as dt
2
+ from typing import Callable
3
+
4
+ import pytest
5
+
6
+ from xm_slurm import config, dependencies, execution
7
+
8
+ SlurmHandleGenerator = Callable[[str], execution.SlurmHandle]
9
+
10
+
11
+ @pytest.fixture
12
+ def slurm_handle() -> SlurmHandleGenerator:
13
+ dummy_ssh_config = config.SlurmSSHConfig(host="localhost")
14
+
15
+ def _slurm_handle(job_id: str):
16
+ return execution.SlurmHandle(
17
+ experiment_id=0, ssh=dummy_ssh_config, slurm_job=job_id, job_name="job"
18
+ )
19
+
20
+ return _slurm_handle
21
+
22
+
23
+ def test_slurm_job_dependency_and(slurm_handle: SlurmHandleGenerator):
24
+ dep1 = dependencies.SlurmJobDependencyAfter([slurm_handle("123")])
25
+ dep2 = dependencies.SlurmJobDependencyAfter([slurm_handle("456")])
26
+ combined_dep = dep1 & dep2
27
+ assert isinstance(combined_dep, dependencies.SlurmJobDependencyAND)
28
+ assert combined_dep.to_dependency_str() == "after:123,after:456"
29
+
30
+
31
+ def test_slurm_job_dependency_or(slurm_handle: SlurmHandleGenerator):
32
+ dep1 = dependencies.SlurmJobDependencyAfter([slurm_handle("123")])
33
+ dep2 = dependencies.SlurmJobDependencyAfter([slurm_handle("456")])
34
+ combined_dep = dep1 | dep2
35
+ assert isinstance(combined_dep, dependencies.SlurmJobDependencyOR)
36
+ assert combined_dep.to_dependency_str() == "after:123?after:456"
37
+
38
+
39
+ def test_slurm_job_dependency_mixing_logical_operations(slurm_handle: SlurmHandleGenerator):
40
+ dep1 = dependencies.SlurmJobDependencyAfter([slurm_handle("123")])
41
+ dep2 = dependencies.SlurmJobDependencyAfter([slurm_handle("456")])
42
+ dep3 = dependencies.SlurmJobDependencyAfter([slurm_handle("789")])
43
+ with pytest.raises(
44
+ dependencies.SlurmDependencyException,
45
+ match="Slurm only supports chaining dependencies with the same logical operator. ",
46
+ ):
47
+ dep1 & dep2 | dep3 # type: ignore
48
+
49
+ with pytest.raises(
50
+ dependencies.SlurmDependencyException,
51
+ match="Slurm only supports chaining dependencies with the same logical operator. ",
52
+ ):
53
+ dep1 | dep2 & dep3 # type: ignore
54
+
55
+
56
+ def test_slurm_job_dependency_chaining_and(slurm_handle: SlurmHandleGenerator):
57
+ dep1 = dependencies.SlurmJobDependencyAfter([slurm_handle("1")])
58
+ dep2 = dependencies.SlurmJobDependencyAfter([slurm_handle("2")])
59
+ dep3 = dependencies.SlurmJobDependencyAfter([slurm_handle("3")])
60
+ dep4 = dependencies.SlurmJobDependencyAfter([slurm_handle("4")])
61
+ combined_dep = dep1 & dep2 & dep3 & dep4
62
+ assert isinstance(combined_dep, dependencies.SlurmJobDependencyAND)
63
+ assert combined_dep.to_dependency_str() == "after:1,after:2,after:3,after:4"
64
+
65
+
66
+ def test_slurm_job_dependency_chaining_or(slurm_handle: SlurmHandleGenerator):
67
+ dep1 = dependencies.SlurmJobDependencyAfter([slurm_handle("1")])
68
+ dep2 = dependencies.SlurmJobDependencyAfter([slurm_handle("2")])
69
+ dep3 = dependencies.SlurmJobDependencyAfter([slurm_handle("3")])
70
+ dep4 = dependencies.SlurmJobDependencyAfter([slurm_handle("4")])
71
+ combined_dep = dep1 | dep2 | dep3 | dep4
72
+ assert isinstance(combined_dep, dependencies.SlurmJobDependencyOR)
73
+ assert combined_dep.to_dependency_str() == "after:1?after:2?after:3?after:4"
74
+
75
+
76
+ def test_slurm_job_dependency_after(slurm_handle: SlurmHandleGenerator):
77
+ dep = dependencies.SlurmJobDependencyAfter([slurm_handle("123")])
78
+ assert dep.to_dependency_str() == "after:123"
79
+
80
+
81
+ def test_slurm_job_dependency_after_with_time(slurm_handle: SlurmHandleGenerator):
82
+ dep = dependencies.SlurmJobDependencyAfter([slurm_handle("123")], time=dt.timedelta(minutes=10))
83
+ assert dep.to_dependency_str() == "after:123+10"
84
+
85
+
86
+ def test_slurm_job_dependency_after_with_invalid_time(slurm_handle: SlurmHandleGenerator):
87
+ with pytest.raises(
88
+ dependencies.SlurmDependencyException, match="Time must be specified in exact minutes"
89
+ ):
90
+ dependencies.SlurmJobDependencyAfter([slurm_handle("123")], time=dt.timedelta(seconds=30))
91
+
92
+
93
+ @pytest.mark.parametrize(
94
+ "dependency_cls,dependency_type",
95
+ [
96
+ (dependencies.SlurmJobDependencyAfter, "after"),
97
+ (dependencies.SlurmJobDependencyAfterAny, "afterany"),
98
+ (dependencies.SlurmJobDependencyAfterNotOK, "afternotok"),
99
+ (dependencies.SlurmJobDependencyAfterOK, "afterok"),
100
+ ],
101
+ )
102
+ def test_slurm_job_dependency_after_not_ok(
103
+ slurm_handle: SlurmHandleGenerator,
104
+ dependency_cls: type,
105
+ dependency_type: str,
106
+ ):
107
+ dep = dependency_cls([slurm_handle("123"), slurm_handle("456")])
108
+ assert dep.to_dependency_str() == f"{dependency_type}:123:456"
109
+
110
+
111
+ @pytest.mark.parametrize(
112
+ "dependency_cls",
113
+ [
114
+ dependencies.SlurmJobDependencyAfter,
115
+ dependencies.SlurmJobDependencyAfterAny,
116
+ dependencies.SlurmJobDependencyAfterNotOK,
117
+ dependencies.SlurmJobDependencyAfterOK,
118
+ ],
119
+ )
120
+ def test_slurm_job_dependency_after_no_handles(dependency_cls: type):
121
+ with pytest.raises(
122
+ dependencies.SlurmDependencyException, match="Dependency doesn't have any handles."
123
+ ):
124
+ dependency_cls([])
125
+
126
+
127
+ def test_dependency_flatten(slurm_handle: SlurmHandleGenerator):
128
+ dep1 = dependencies.SlurmJobDependencyAfter([slurm_handle("1")])
129
+ dep2 = dependencies.SlurmJobDependencyAfter([slurm_handle("2")])
130
+ dep3 = dependencies.SlurmJobDependencyAfter([slurm_handle("3")])
131
+ combined_dep = dep1 & dep2 & dep3
132
+ assert combined_dep.flatten() == (dep1, dep2, dep3)
133
+
134
+
135
+ def test_dependency_traverse(slurm_handle: SlurmHandleGenerator):
136
+ dep1 = dependencies.SlurmJobDependencyAfterOK([slurm_handle("1")])
137
+ dep2 = dependencies.SlurmJobDependencyAfterOK([slurm_handle("2")])
138
+ dep3 = dependencies.SlurmJobDependencyAfterOK([slurm_handle("3")])
139
+ combined_dep = dep1 & dep2 & dep3
140
+
141
+ def traverse_fn(dep: dependencies.SlurmJobDependency):
142
+ if isinstance(dep, dependencies.SlurmJobDependencyAfterOK):
143
+ return dependencies.SlurmJobDependencyAfterNotOK(dep.handles)
144
+ return dep
145
+
146
+ transformed_combined_dep = combined_dep.traverse(traverse_fn)
147
+ for dep in transformed_combined_dep.flatten():
148
+ assert isinstance(dep, dependencies.SlurmJobDependencyAfterNotOK)
149
+ assert transformed_combined_dep.to_dependency_str() == "afternotok:1,afternotok:2,afternotok:3"
@@ -2196,7 +2196,7 @@ wheels = [
2196
2196
 
2197
2197
  [[package]]
2198
2198
  name = "xmanager-slurm"
2199
- version = "0.3.2"
2199
+ version = "0.4.2"
2200
2200
  source = { editable = "." }
2201
2201
  dependencies = [
2202
2202
  { name = "asyncssh" },
@@ -10,6 +10,7 @@ from xm_slurm.experiment import (
10
10
  get_current_work_unit,
11
11
  get_experiment,
12
12
  )
13
+ from xm_slurm.job_blocks import JobArgs
13
14
  from xm_slurm.packageables import (
14
15
  conda_container,
15
16
  docker_container,
@@ -34,13 +35,14 @@ __all__ = [
34
35
  "get_current_experiment",
35
36
  "get_current_work_unit",
36
37
  "get_experiment",
38
+ "JobArgs",
37
39
  "JobRequirements",
38
40
  "mamba_container",
39
- "uv_container",
40
41
  "python_container",
41
42
  "ResourceQuantity",
42
43
  "ResourceType",
43
44
  "Slurm",
44
- "SlurmSpec",
45
45
  "SlurmExperiment",
46
+ "SlurmSpec",
47
+ "uv_container",
46
48
  ]
@@ -322,7 +322,7 @@ class XManagerSqliteAPI(XManagerAPI):
322
322
  db_path = Path(os.environ["XM_SLURM_STATE_DIR"]) / "db.sqlite3"
323
323
  else:
324
324
  db_path = Path.home() / ".local" / "state" / "xm-slurm" / "db.sqlite3"
325
- logging.debug("Looking for db at: ", db_path)
325
+ logger.debug("Looking for db at: ", db_path)
326
326
  db_path.parent.mkdir(parents=True, exist_ok=True)
327
327
  engine = create_engine(f"sqlite:///{db_path}")
328
328
  Base.metadata.create_all(engine)
@@ -5,9 +5,10 @@ import getpass
5
5
  import json
6
6
  import os
7
7
  import pathlib
8
- from typing import Literal, Mapping, NamedTuple
8
+ from typing import Callable, Literal, Mapping, NamedTuple
9
9
 
10
10
  import asyncssh
11
+ from xmanager import xm
11
12
 
12
13
  from xm_slurm import constants
13
14
 
@@ -137,7 +138,7 @@ class SlurmSSHConfig:
137
138
  )
138
139
 
139
140
  def __hash__(self):
140
- return hash((self.host, self.host_public_key, self.user, self.port))
141
+ return hash((type(self), self.host, self.host_public_key, self.user, self.port))
141
142
 
142
143
 
143
144
  @dataclasses.dataclass(frozen=True, kw_only=True)
@@ -176,6 +177,9 @@ class SlurmClusterConfig:
176
177
 
177
178
  features: Mapping["xm_slurm.FeatureType", str] = dataclasses.field(default_factory=dict) # type: ignore # noqa: F821
178
179
 
180
+ # Function to validate the Slurm executor config
181
+ validate: Callable[[xm.Job], None] | None = None
182
+
179
183
  def __post_init__(self) -> None:
180
184
  for src, dst in self.mounts.items():
181
185
  if not isinstance(src, (str, os.PathLike)):
@@ -194,6 +198,7 @@ class SlurmClusterConfig:
194
198
 
195
199
  def __hash__(self):
196
200
  return hash((
201
+ type(self),
197
202
  self.ssh,
198
203
  self.cwd,
199
204
  self.prolog,
@@ -1,5 +1,9 @@
1
1
  import re
2
2
 
3
+ SLURM_JOB_ID_REGEX = re.compile(
4
+ r"^(?P<jobid>\d+)(?:(?:\+(?P<componentid>\d+))|(?:_(?P<arraytaskid>\d+)))?$"
5
+ )
6
+
3
7
  IMAGE_URI_REGEX = re.compile(
4
8
  r"^(?P<scheme>(?:[^:]+://)?)?(?P<domain>[^/]+)(?P<path>/[^:]*)?(?::(?P<tag>[^@]+))?@?(?P<digest>.+)?$"
5
9
  )
@@ -1,13 +1,20 @@
1
+ import datetime as dt
2
+ import logging
1
3
  import os
2
4
 
5
+ from xmanager import xm
6
+
3
7
  from xm_slurm import config, resources
4
8
  from xm_slurm.contrib.clusters import drac
9
+ from xm_slurm.executors import Slurm
5
10
 
6
11
  # ComputeCanada alias
7
12
  cc = drac
8
13
 
9
14
  __all__ = ["drac", "mila", "cc"]
10
15
 
16
+ logger = logging.getLogger(__name__)
17
+
11
18
 
12
19
  def mila(
13
20
  *,
@@ -53,6 +60,8 @@ def mila(
53
60
  resources.ResourceType.A100: "a100",
54
61
  resources.ResourceType.A100_80GIB: "a100l",
55
62
  resources.ResourceType.A6000: "a6000",
63
+ resources.ResourceType.L40S: "l40s",
64
+ resources.ResourceType.H100: "h100",
56
65
  },
57
66
  features={
58
67
  resources.FeatureType.NVIDIA_MIG: "mig",