xmanager-slurm 0.4.14__tar.gz → 0.4.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xmanager-slurm might be problematic. Click here for more details.

Files changed (128) hide show
  1. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/PKG-INFO +1 -1
  2. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/pyproject.toml +1 -1
  3. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/tests/integration/conftest.py +2 -2
  4. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/tests/test_dependencies.py +2 -2
  5. xmanager_slurm-0.4.15/tests/test_executors.py +526 -0
  6. xmanager_slurm-0.4.15/tests/test_resources.py +1325 -0
  7. xmanager_slurm-0.4.15/tests/test_utils.py +23 -0
  8. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/uv.lock +1 -1
  9. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/__init__.py +2 -1
  10. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/batching.py +11 -11
  11. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/config.py +10 -10
  12. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/contrib/clusters/drac.py +15 -29
  13. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/dependencies.py +7 -7
  14. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/executors.py +82 -12
  15. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/experimental/parameter_controller.py +18 -14
  16. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/job_blocks.py +3 -3
  17. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/packageables.py +23 -23
  18. xmanager_slurm-0.4.15/xm_slurm/packaging/registry.py +45 -0
  19. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/packaging/router.py +3 -3
  20. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/packaging/utils.py +5 -5
  21. xmanager_slurm-0.4.15/xm_slurm/resources.py +347 -0
  22. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/status.py +2 -2
  23. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/templates/slurm/job-array.bash.j2 +1 -1
  24. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/templates/slurm/job-group.bash.j2 +1 -0
  25. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/templates/slurm/job.bash.j2 +1 -1
  26. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/utils.py +8 -0
  27. xmanager_slurm-0.4.14/xm_slurm/packaging/registry.py +0 -45
  28. xmanager_slurm-0.4.14/xm_slurm/resources.py +0 -177
  29. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/.devcontainer.json +0 -0
  30. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/.github/workflows/ci.yml +0 -0
  31. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/.github/workflows/deploy-docs.yml +0 -0
  32. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/.gitignore +0 -0
  33. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/.pre-commit-config.yaml +0 -0
  34. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/.python-version +0 -0
  35. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/.vscode/settings.json +0 -0
  36. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/LICENSE.md +0 -0
  37. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/README.md +0 -0
  38. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/docs/api/executables.rst +0 -0
  39. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/docs/api/executors.rst +0 -0
  40. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/docs/api/packageables.rst +0 -0
  41. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/docs/assets/workflow-dark.svg +0 -0
  42. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/docs/assets/workflow-light.svg +0 -0
  43. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/docs/conf.py +0 -0
  44. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/docs/getting-started/xmanager.md +0 -0
  45. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/docs/guides/index.md +0 -0
  46. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/docs/guides/remote-dev.md +0 -0
  47. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/docs/index.md +0 -0
  48. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/conda/environment.yml +0 -0
  49. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/conda/launch.py +0 -0
  50. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/conda/main.py +0 -0
  51. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/conda/pyproject.toml +0 -0
  52. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/custom-dockerfile/Dockerfile +0 -0
  53. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/custom-dockerfile/launch.py +0 -0
  54. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/custom-dockerfile/pyproject.toml +0 -0
  55. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-array-sweep/launch.py +0 -0
  56. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-array-sweep/main.py +0 -0
  57. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-array-sweep/pyproject.toml +0 -0
  58. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-array-sweep/uv.lock +0 -0
  59. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-dependencies/eval.py +0 -0
  60. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-dependencies/launch.py +0 -0
  61. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-dependencies/pyproject.toml +0 -0
  62. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-dependencies/train.py +0 -0
  63. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-dependencies/uv.lock +0 -0
  64. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-group/Dockerfile +0 -0
  65. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-group/launch.py +0 -0
  66. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-group/pyproject.toml +0 -0
  67. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-group/uv.lock +0 -0
  68. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-timeout/launch.py +0 -0
  69. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-timeout/main.py +0 -0
  70. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-timeout/pyproject.toml +0 -0
  71. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/job-timeout/uv.lock +0 -0
  72. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/metadata/launch.py +0 -0
  73. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/metadata/main.py +0 -0
  74. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/metadata/pyproject.toml +0 -0
  75. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/metadata/requirements.txt +0 -0
  76. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/parameter-controller/launch.py +0 -0
  77. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/parameter-controller/main.py +0 -0
  78. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/parameter-controller/pyproject.toml +0 -0
  79. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/parameter-controller/requirements.txt +0 -0
  80. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/pip/launch.py +0 -0
  81. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/pip/main.py +0 -0
  82. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/pip/pyproject.toml +0 -0
  83. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/pip/requirements.txt +0 -0
  84. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/uv/launch.py +0 -0
  85. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/uv/pyproject.toml +0 -0
  86. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/examples/uv/uv.lock +0 -0
  87. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/tests/integration/fixtures/slurm/Dockerfile +0 -0
  88. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/tests/integration/fixtures/slurm/README.md +0 -0
  89. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/tests/integration/fixtures/slurm/cgroup.conf +0 -0
  90. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/tests/integration/fixtures/slurm/docker-compose.yml +0 -0
  91. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/tests/integration/fixtures/slurm/docker-entrypoint.sh +0 -0
  92. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/tests/integration/fixtures/slurm/host_ed25519 +0 -0
  93. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/tests/integration/fixtures/slurm/host_ed25519.pub +0 -0
  94. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/tests/integration/fixtures/slurm/id_ed25519 +0 -0
  95. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/tests/integration/fixtures/slurm/id_ed25519.pub +0 -0
  96. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/tests/integration/fixtures/slurm/slurm.conf +0 -0
  97. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/tests/integration/fixtures/slurm/slurmdbd.conf +0 -0
  98. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/tests/integration/fixtures/slurm/sshd_config +0 -0
  99. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/tests/integration/test_remote_execution.py +0 -0
  100. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/api/__init__.py +0 -0
  101. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/api/abc.py +0 -0
  102. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/api/models.py +0 -0
  103. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/api/sqlite/client.py +0 -0
  104. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/api/web/client.py +0 -0
  105. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/console.py +0 -0
  106. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/constants.py +0 -0
  107. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/contrib/__init__.py +0 -0
  108. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/contrib/clusters/__init__.py +0 -0
  109. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/executables.py +0 -0
  110. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/execution.py +0 -0
  111. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/experiment.py +0 -0
  112. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/filesystem.py +0 -0
  113. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/metadata_context.py +0 -0
  114. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/packaging/__init__.py +0 -0
  115. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/packaging/docker.py +0 -0
  116. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/scripts/_cloudpickle.py +0 -0
  117. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/scripts/cli.py +0 -0
  118. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/templates/docker/docker-bake.hcl.j2 +0 -0
  119. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/templates/docker/mamba.Dockerfile +0 -0
  120. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/templates/docker/python.Dockerfile +0 -0
  121. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/templates/docker/uv.Dockerfile +0 -0
  122. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/templates/slurm/entrypoint.bash.j2 +0 -0
  123. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/templates/slurm/fragments/monitor.bash.j2 +0 -0
  124. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/templates/slurm/fragments/proxy.bash.j2 +0 -0
  125. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/templates/slurm/library/retry.bash +0 -0
  126. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/templates/slurm/runtimes/apptainer.bash.j2 +0 -0
  127. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/templates/slurm/runtimes/podman.bash.j2 +0 -0
  128. {xmanager_slurm-0.4.14 → xmanager_slurm-0.4.15}/xm_slurm/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xmanager-slurm
3
- Version: 0.4.14
3
+ Version: 0.4.15
4
4
  Summary: Slurm backend for XManager.
5
5
  Project-URL: GitHub, https://github.com/jessefarebro/xm-slurm
6
6
  Author-email: Jesse Farebrother <jfarebro@cs.mcgill.ca>
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
5
5
  [project]
6
6
  name = "xmanager-slurm"
7
7
  description = "Slurm backend for XManager."
8
- version = "0.4.14"
8
+ version = "0.4.15"
9
9
  # readme = "README.md"
10
10
  requires-python = ">=3.10"
11
11
  license = { text = "MIT" }
@@ -1,7 +1,7 @@
1
1
  import pathlib
2
2
  import socket
3
3
  import time
4
- from typing import Iterator
4
+ import typing as tp
5
5
  from unittest.mock import PropertyMock, patch
6
6
 
7
7
  import pytest
@@ -33,7 +33,7 @@ def wait_for_ssh_server(host: str, port: int, *, timeout: float = 180.0) -> None
33
33
 
34
34
 
35
35
  @pytest.fixture(scope="session")
36
- def cluster_config() -> Iterator[config.SlurmClusterConfig]:
36
+ def cluster_config() -> tp.Iterator[config.SlurmClusterConfig]:
37
37
  slurmdir = pathlib.Path(__file__).parent / "fixtures" / "slurm"
38
38
  cluster = DockerCompose(slurmdir, build=True)
39
39
  cluster.start()
@@ -1,11 +1,11 @@
1
1
  import datetime as dt
2
- from typing import Callable
2
+ import typing as tp
3
3
 
4
4
  import pytest
5
5
 
6
6
  from xm_slurm import config, dependencies, execution
7
7
 
8
- SlurmHandleGenerator = Callable[[str], execution.SlurmHandle]
8
+ SlurmHandleGenerator = tp.Callable[[str], execution.SlurmHandle]
9
9
 
10
10
 
11
11
  @pytest.fixture
@@ -0,0 +1,526 @@
1
+ import datetime as dt
2
+ import signal
3
+ import typing as tp
4
+ from unittest.mock import MagicMock
5
+
6
+ import pytest
7
+
8
+ from xm_slurm import config, executors, resources
9
+
10
+
11
+ @pytest.fixture
12
+ def dummy_cluster_config():
13
+ """Create a dummy cluster configuration for testing."""
14
+ ssh_config = config.SlurmSSHConfig(host="localhost", user="testuser")
15
+ return config.SlurmClusterConfig(
16
+ name="test_cluster",
17
+ ssh=ssh_config,
18
+ runtime=config.ContainerRuntime.DOCKER,
19
+ )
20
+
21
+
22
+ @pytest.fixture
23
+ def basic_requirements(dummy_cluster_config):
24
+ """Create basic job requirements for testing."""
25
+ return resources.JobRequirements(
26
+ cpu=1,
27
+ memory=1024**3, # 1GB
28
+ cluster=dummy_cluster_config,
29
+ )
30
+
31
+
32
+ @pytest.mark.parametrize(
33
+ "tag",
34
+ [
35
+ "ghcr.io/my-project/my-image:latest",
36
+ "docker.io/image:v1.0",
37
+ None,
38
+ ],
39
+ )
40
+ def test_slurm_spec_creation(tag: str | None) -> None:
41
+ """Test creating a SlurmSpec with various tags."""
42
+ spec = executors.SlurmSpec(tag=tag)
43
+ assert spec.tag == tag
44
+
45
+
46
+ def test_slurm_spec_is_frozen() -> None:
47
+ """Test that SlurmSpec is frozen (immutable)."""
48
+ spec = executors.SlurmSpec(tag="test:latest")
49
+ with pytest.raises(AttributeError):
50
+ spec.tag = "new-tag:latest" # type: ignore
51
+
52
+
53
+ def test_slurm_spec_is_executor_spec() -> None:
54
+ """Test that SlurmSpec is an ExecutorSpec."""
55
+ spec = executors.SlurmSpec(tag="test:latest")
56
+ assert isinstance(spec, executors.xm.ExecutorSpec)
57
+
58
+
59
+ # Type validation tests
60
+ @pytest.mark.parametrize("invalid_requirements", [MagicMock(), "requirements", 123])
61
+ def test_slurm_executor_requirements_type_validation(invalid_requirements: tp.Any) -> None:
62
+ """Test that executor validates requirements type."""
63
+ with pytest.raises(TypeError, match="requirements must be a `xm_slurm.JobRequirements`"):
64
+ executors.Slurm(requirements=invalid_requirements, time=dt.timedelta(hours=1))
65
+
66
+
67
+ @pytest.mark.parametrize("invalid_time", ["1 hour", 3600, 1.5])
68
+ def test_slurm_executor_time_type_validation(
69
+ basic_requirements: resources.JobRequirements, invalid_time: tp.Any
70
+ ) -> None:
71
+ """Test that executor validates time type."""
72
+ with pytest.raises(TypeError, match="time must be a `datetime.timedelta`"):
73
+ executors.Slurm(requirements=basic_requirements, time=invalid_time)
74
+
75
+
76
+ def test_slurm_executor_bind_type_validation(basic_requirements: resources.JobRequirements) -> None:
77
+ """Test that executor validates bind type."""
78
+ with pytest.raises(TypeError, match="bind must be a mapping"):
79
+ executors.Slurm(
80
+ requirements=basic_requirements,
81
+ time=dt.timedelta(hours=1),
82
+ bind="gpu:none", # type: ignore
83
+ )
84
+
85
+
86
+ @pytest.mark.parametrize("invalid_signal", ["SIGUSR2", 12])
87
+ def test_slurm_executor_timeout_signal_type_validation(
88
+ basic_requirements: resources.JobRequirements, invalid_signal: tp.Any
89
+ ) -> None:
90
+ """Test that executor validates timeout_signal type."""
91
+ with pytest.raises(TypeError, match="termination_signal must be a `signal.Signals`"):
92
+ executors.Slurm(
93
+ requirements=basic_requirements,
94
+ time=dt.timedelta(hours=1),
95
+ timeout_signal=invalid_signal,
96
+ )
97
+
98
+
99
+ def test_slurm_executor_timeout_signal_grace_period_type_validation(
100
+ basic_requirements: resources.JobRequirements,
101
+ ) -> None:
102
+ """Test that executor validates timeout_signal_grace_period type."""
103
+ with pytest.raises(
104
+ TypeError, match="termination_signal_delay_time must be a `datetime.timedelta`"
105
+ ):
106
+ executors.Slurm(
107
+ requirements=basic_requirements,
108
+ time=dt.timedelta(hours=1),
109
+ timeout_signal_grace_period=90, # type: ignore
110
+ )
111
+
112
+
113
+ @pytest.mark.parametrize("invalid_attempts", [-1, -10])
114
+ def test_slurm_executor_requeue_max_attempts_validation(
115
+ basic_requirements: resources.JobRequirements, invalid_attempts: int
116
+ ) -> None:
117
+ """Test that executor validates requeue_max_attempts."""
118
+ with pytest.raises(ValueError, match="requeue_max_attempts must be greater than or equal to 0"):
119
+ executors.Slurm(
120
+ requirements=basic_requirements,
121
+ time=dt.timedelta(hours=1),
122
+ requeue_max_attempts=invalid_attempts,
123
+ )
124
+
125
+
126
+ def test_slurm_executor_requeue_on_exit_code_validation(
127
+ basic_requirements: resources.JobRequirements,
128
+ ) -> None:
129
+ """Test that executor validates requeue_on_exit_code."""
130
+ with pytest.raises(ValueError, match="requeue_on_exit_code should not be 0"):
131
+ executors.Slurm(
132
+ requirements=basic_requirements, time=dt.timedelta(hours=1), requeue_on_exit_code=0
133
+ )
134
+
135
+
136
+ def test_slurm_executor_exclusive_oversubscribe_conflict(
137
+ basic_requirements: resources.JobRequirements,
138
+ ) -> None:
139
+ """Test that exclusive and oversubscribe cannot be both True."""
140
+ with pytest.raises(ValueError, match="exclusive and oversubscribe are mutually exclusive"):
141
+ executors.Slurm(
142
+ requirements=basic_requirements,
143
+ time=dt.timedelta(hours=1),
144
+ exclusive=True,
145
+ oversubscribe=True,
146
+ )
147
+
148
+
149
+ @pytest.mark.parametrize("invalid_nice", [-2147483646, 2147483646])
150
+ def test_slurm_executor_nice_bounds_validation(
151
+ basic_requirements: resources.JobRequirements, invalid_nice: int
152
+ ) -> None:
153
+ """Test that executor validates nice bounds."""
154
+ with pytest.raises(ValueError, match="nice must be between"):
155
+ executors.Slurm(
156
+ requirements=basic_requirements, time=dt.timedelta(hours=1), nice=invalid_nice
157
+ )
158
+
159
+
160
+ @pytest.mark.parametrize("valid_nice", [-2147483645, -19, 0, 19, 2147483645])
161
+ def test_slurm_executor_nice_valid_bounds(
162
+ basic_requirements: resources.JobRequirements, valid_nice: int
163
+ ) -> None:
164
+ """Test that executor accepts valid nice values."""
165
+ executor = executors.Slurm(
166
+ requirements=basic_requirements, time=dt.timedelta(hours=1), nice=valid_nice
167
+ )
168
+ assert executor.nice == valid_nice
169
+
170
+
171
+ def test_slurm_executor_is_frozen(basic_requirements: resources.JobRequirements) -> None:
172
+ """Test that Slurm executor is frozen (immutable)."""
173
+ executor = executors.Slurm(requirements=basic_requirements, time=dt.timedelta(hours=1))
174
+ with pytest.raises(AttributeError):
175
+ executor.account = "new-account" # type: ignore
176
+
177
+
178
+ def test_slurm_executor_bind_resource_validation_invalid_string_key(
179
+ basic_requirements: resources.JobRequirements,
180
+ ) -> None:
181
+ """Test bind validation with invalid string key."""
182
+ with pytest.raises(TypeError, match="bind resource must be a"):
183
+ executors.Slurm(
184
+ requirements=basic_requirements,
185
+ time=dt.timedelta(hours=1),
186
+ bind={123: "closest"}, # type: ignore
187
+ )
188
+
189
+
190
+ def test_slurm_executor_bind_resource_validation_invalid_enum_key(
191
+ basic_requirements: resources.JobRequirements,
192
+ ) -> None:
193
+ """Test bind validation with invalid ResourceType key."""
194
+ with pytest.raises(TypeError, match="bind resource must be a"):
195
+ executors.Slurm(
196
+ requirements=basic_requirements,
197
+ time=dt.timedelta(hours=1),
198
+ bind={resources.ResourceType.CPU: "value"}, # type: ignore
199
+ )
200
+
201
+
202
+ def test_slurm_executor_bind_accepts_resource_type_enums(
203
+ basic_requirements: resources.JobRequirements,
204
+ ) -> None:
205
+ """Test bind accepts valid ResourceType enums."""
206
+ executor = executors.Slurm(
207
+ requirements=basic_requirements,
208
+ time=dt.timedelta(hours=1),
209
+ bind={resources.ResourceType.GPU: "closest"},
210
+ )
211
+ assert executor.bind == {resources.ResourceType.GPU: "closest"}
212
+
213
+
214
+ def test_slurm_executor_bind_accepts_custom_strings(
215
+ basic_requirements: resources.JobRequirements,
216
+ ) -> None:
217
+ """Test bind accepts custom string GRES names."""
218
+ executor = executors.Slurm(
219
+ requirements=basic_requirements,
220
+ time=dt.timedelta(hours=1),
221
+ bind={"custom_gres": "value"},
222
+ )
223
+ assert executor.bind == {"custom_gres": "value"}
224
+
225
+
226
+ def test_slurm_executor_to_directives_default(basic_requirements):
227
+ """Test directive generation with minimal configuration."""
228
+ executor = executors.Slurm(requirements=basic_requirements, time=dt.timedelta(hours=1))
229
+ directives = executor.to_directives()
230
+ assert any(d.startswith("--time=") for d in directives)
231
+ assert any(d.startswith("--kill-on-invalid-dep=") for d in directives)
232
+ assert any(d.startswith("--signal=") for d in directives)
233
+ assert any(d.startswith("--requeue") for d in directives)
234
+
235
+
236
+ @pytest.mark.parametrize(
237
+ "time,expected",
238
+ [
239
+ (dt.timedelta(hours=1), "--time=0-01:00:00"),
240
+ (dt.timedelta(days=1, hours=2, minutes=30), "--time=1-02:30:00"),
241
+ (dt.timedelta(minutes=5), "--time=0-00:05:00"),
242
+ ],
243
+ )
244
+ def test_slurm_executor_to_directives_time(basic_requirements, time, expected):
245
+ """Test time directive generation."""
246
+ executor = executors.Slurm(requirements=basic_requirements, time=time)
247
+ directives = executor.to_directives()
248
+ time_directive = [d for d in directives if d.startswith("--time=")][0]
249
+ assert time_directive == expected
250
+
251
+
252
+ @pytest.mark.parametrize(
253
+ "executor_kwargs,expected_directive",
254
+ [
255
+ ({"account": "my-account"}, "--account=my-account"),
256
+ ({"account": "project-123"}, "--account=project-123"),
257
+ ({"partition": "gpu-partition"}, "--partition=gpu-partition"),
258
+ ({"partition": "cpu-partition"}, "--partition=cpu-partition"),
259
+ ({"partition": "debug"}, "--partition=debug"),
260
+ ({"qos": "high"}, "--qos=high"),
261
+ ({"qos": "low"}, "--qos=low"),
262
+ ({"qos": "normal"}, "--qos=normal"),
263
+ ({"priority": 0}, "--priority=0"),
264
+ ({"priority": 100}, "--priority=100"),
265
+ ({"priority": 1000}, "--priority=1000"),
266
+ ({"nice": -5}, "--nice=-5"),
267
+ ({"nice": -19}, "--nice=-19"),
268
+ ({"nice": 0}, "--nice=0"),
269
+ ({"nice": 19}, "--nice=19"),
270
+ ],
271
+ )
272
+ def test_slurm_executor_to_directives_executor_params(
273
+ basic_requirements, executor_kwargs, expected_directive
274
+ ):
275
+ """Test directive generation for various executor parameters."""
276
+ executor = executors.Slurm(
277
+ requirements=basic_requirements,
278
+ time=dt.timedelta(hours=1),
279
+ **executor_kwargs,
280
+ )
281
+ directives = executor.to_directives()
282
+ assert expected_directive in directives
283
+
284
+
285
+ @pytest.mark.parametrize(
286
+ "reservation,expected",
287
+ [
288
+ ("my-reservation", "--reservation=my-reservation"),
289
+ (["res1", "res2", "res3"], "--reservation=res1,res2,res3"),
290
+ ],
291
+ )
292
+ def test_slurm_executor_to_directives_reservation(basic_requirements, reservation, expected):
293
+ """Test reservation directive generation."""
294
+ executor = executors.Slurm(
295
+ requirements=basic_requirements, time=dt.timedelta(hours=1), reservation=reservation
296
+ )
297
+ directives = executor.to_directives()
298
+ reservation_directive = [d for d in directives if d.startswith("--reservation=")][0]
299
+ assert reservation_directive == expected
300
+
301
+
302
+ @pytest.mark.parametrize(
303
+ "flag_kwargs,expected_directives",
304
+ [
305
+ ({"exclusive": True}, ["--exclusive"]),
306
+ ({"exclusive": False}, []),
307
+ ({"oversubscribe": True}, ["--oversubscribe"]),
308
+ ({"oversubscribe": False}, []),
309
+ ({"overcommit": True}, ["--overcommit"]),
310
+ ({"overcommit": False}, []),
311
+ ({"kill_on_invalid_dependencies": True}, ["--kill-on-invalid-dep=yes"]),
312
+ ({"kill_on_invalid_dependencies": False}, ["--kill-on-invalid-dep=no"]),
313
+ ],
314
+ )
315
+ def test_slurm_executor_to_directives_boolean_flags(
316
+ basic_requirements, flag_kwargs, expected_directives
317
+ ):
318
+ """Test directive generation for boolean flags."""
319
+ executor = executors.Slurm(
320
+ requirements=basic_requirements,
321
+ time=dt.timedelta(hours=1),
322
+ **flag_kwargs,
323
+ )
324
+ directives = executor.to_directives()
325
+ for expected in expected_directives:
326
+ assert any(expected in d for d in directives)
327
+ for unexpected in ["--exclusive", "--oversubscribe", "--overcommit"]:
328
+ if flag_kwargs and not any(expected in [unexpected] for expected in expected_directives):
329
+ pass # Skip negative checks for now
330
+
331
+
332
+ @pytest.mark.parametrize(
333
+ "timeout_signal,grace_period,expected_signal",
334
+ [
335
+ (signal.SIGUSR1, dt.timedelta(minutes=2), "USR1@120"),
336
+ (signal.SIGUSR2, dt.timedelta(seconds=90), "USR2@90"),
337
+ (signal.SIGTERM, dt.timedelta(seconds=30), "TERM@30"),
338
+ (signal.SIGKILL, dt.timedelta(seconds=1), "KILL@1"),
339
+ ],
340
+ )
341
+ def test_slurm_executor_to_directives_timeout_signal(
342
+ basic_requirements, timeout_signal, grace_period, expected_signal
343
+ ):
344
+ """Test timeout signal directive generation."""
345
+ executor = executors.Slurm(
346
+ requirements=basic_requirements,
347
+ time=dt.timedelta(hours=1),
348
+ timeout_signal=timeout_signal,
349
+ timeout_signal_grace_period=grace_period,
350
+ )
351
+ directives = executor.to_directives()
352
+ signal_directive = [d for d in directives if d.startswith("--signal=")][0]
353
+ assert signal_directive == f"--signal={expected_signal}"
354
+
355
+
356
+ @pytest.mark.parametrize(
357
+ "requeue,max_attempts,expected",
358
+ [
359
+ (True, 5, "--requeue"),
360
+ (True, 0, "--no-requeue"),
361
+ (False, 0, "--no-requeue"),
362
+ ],
363
+ )
364
+ def test_slurm_executor_to_directives_requeue(basic_requirements, requeue, max_attempts, expected):
365
+ """Test requeue directive generation."""
366
+ executor = executors.Slurm(
367
+ requirements=basic_requirements,
368
+ time=dt.timedelta(hours=1),
369
+ requeue=requeue,
370
+ requeue_max_attempts=max_attempts,
371
+ )
372
+ directives = executor.to_directives()
373
+ requeue_directives = [d for d in directives if "requeue" in d]
374
+ assert expected in requeue_directives
375
+
376
+
377
+ def test_slurm_executor_to_directives_bind_gpu(basic_requirements):
378
+ """Test GPU bind directive generation."""
379
+ executor = executors.Slurm(
380
+ requirements=basic_requirements,
381
+ time=dt.timedelta(hours=1),
382
+ bind={resources.ResourceType.GPU: "closest"},
383
+ )
384
+ directives = executor.to_directives()
385
+ gpu_bind_directive = [d for d in directives if d.startswith("--gpu-bind=")][0]
386
+ assert gpu_bind_directive == "--gpu-bind=closest"
387
+
388
+
389
+ @pytest.mark.parametrize(
390
+ "resource,value,expected",
391
+ [
392
+ (resources.ResourceType.GPU, "closest", "--gpu-bind=closest"),
393
+ (resources.ResourceType.MEMORY, "local", "--mem-bind=local"),
394
+ (resources.ResourceType.RAM, "map_mem:0,1", "--mem-bind=map_mem:0,1"),
395
+ (resources.ResourceType.GPU, None, "--gpu-bind=none"),
396
+ ],
397
+ )
398
+ def test_slurm_executor_to_directives_bind_resource_types(
399
+ basic_requirements, resource, value, expected
400
+ ):
401
+ """Test bind directive generation with ResourceType enums."""
402
+ executor = executors.Slurm(
403
+ requirements=basic_requirements,
404
+ time=dt.timedelta(hours=1),
405
+ bind={resource: value},
406
+ )
407
+ directives = executor.to_directives()
408
+ assert any(expected in d for d in directives)
409
+
410
+
411
+ @pytest.mark.parametrize(
412
+ "custom_gres,value,expected",
413
+ [
414
+ ("gpu_custom", "closest", "--tres-bind=gres/gpu_custom:closest"),
415
+ ("memory_pool", "local", "--tres-bind=gres/memory_pool:local"),
416
+ ("accelerator", None, "--tres-bind=gres/accelerator:none"),
417
+ ],
418
+ )
419
+ def test_slurm_executor_to_directives_bind_custom_gres(
420
+ basic_requirements, custom_gres, value, expected
421
+ ):
422
+ """Test bind directive generation with custom GRES strings."""
423
+ executor = executors.Slurm(
424
+ requirements=basic_requirements,
425
+ time=dt.timedelta(hours=1),
426
+ bind={custom_gres: value},
427
+ )
428
+ directives = executor.to_directives()
429
+ assert any(expected in d for d in directives)
430
+
431
+
432
+ def test_slurm_executor_to_directives_includes_requirements_directives(basic_requirements):
433
+ """Test that to_directives includes directives from requirements."""
434
+ executor = executors.Slurm(requirements=basic_requirements, time=dt.timedelta(hours=1))
435
+ directives = executor.to_directives()
436
+ assert any(d.startswith("--cpus-per-task=") for d in directives)
437
+ assert any(d.startswith("--mem-per-cpu=") for d in directives)
438
+
439
+
440
+ @pytest.mark.parametrize(
441
+ "grace_period,expected_diff",
442
+ [
443
+ (
444
+ dt.timedelta(minutes=1, seconds=30),
445
+ dt.timedelta(hours=1) - dt.timedelta(minutes=1, seconds=30),
446
+ ),
447
+ (dt.timedelta(minutes=30), dt.timedelta(hours=2) - dt.timedelta(minutes=30)),
448
+ ],
449
+ )
450
+ def test_slurm_executor_requeue_timeout_property(basic_requirements, grace_period, expected_diff):
451
+ """Test requeue_timeout property calculation."""
452
+ job_time = dt.timedelta(hours=2)
453
+ executor = executors.Slurm(
454
+ requirements=basic_requirements,
455
+ time=job_time,
456
+ timeout_signal_grace_period=grace_period,
457
+ )
458
+ assert executor.requeue_timeout == job_time - grace_period
459
+
460
+
461
+ @pytest.mark.parametrize("tag", ["test:latest", "ghcr.io/project/image:v1", None])
462
+ def test_slurm_executor_spec_classmethod(tag):
463
+ """Test that Executor.Spec() class method returns SlurmSpec."""
464
+ spec = executors.Slurm.Spec(tag=tag)
465
+ assert isinstance(spec, executors.SlurmSpec)
466
+ assert spec.tag == tag
467
+
468
+
469
+ def test_slurm_executor_with_multiple_binds(basic_requirements):
470
+ """Test executor with multiple resource binds."""
471
+ executor = executors.Slurm(
472
+ requirements=basic_requirements,
473
+ time=dt.timedelta(hours=1),
474
+ bind={
475
+ resources.ResourceType.GPU: "closest",
476
+ resources.ResourceType.MEMORY: "local",
477
+ },
478
+ )
479
+ directives = executor.to_directives()
480
+ assert any("--gpu-bind=closest" in d for d in directives)
481
+ assert any("--mem-bind=local" in d for d in directives)
482
+
483
+
484
+ @pytest.mark.parametrize("grace_period_secs", [1, 30, 3600])
485
+ def test_slurm_executor_with_various_grace_periods(basic_requirements, grace_period_secs):
486
+ """Test executor with various timeout grace periods."""
487
+ executor = executors.Slurm(
488
+ requirements=basic_requirements,
489
+ time=dt.timedelta(hours=24),
490
+ timeout_signal_grace_period=dt.timedelta(seconds=grace_period_secs),
491
+ )
492
+ directives = executor.to_directives()
493
+ signal_directive = [d for d in directives if d.startswith("--signal=")][0]
494
+ assert f"@{grace_period_secs}" in signal_directive
495
+
496
+
497
+ @pytest.mark.parametrize(
498
+ "job_time,grace_period",
499
+ [
500
+ (dt.timedelta(minutes=2), dt.timedelta(seconds=30)),
501
+ (dt.timedelta(hours=1), dt.timedelta(minutes=5)),
502
+ ],
503
+ )
504
+ def test_slurm_executor_requeue_timeout_calculation(basic_requirements, job_time, grace_period):
505
+ """Test requeue_timeout with various time combinations."""
506
+ executor = executors.Slurm(
507
+ requirements=basic_requirements,
508
+ time=job_time,
509
+ timeout_signal_grace_period=grace_period,
510
+ )
511
+ expected = job_time - grace_period
512
+ assert executor.requeue_timeout == expected
513
+
514
+
515
+ @pytest.mark.parametrize("sig", [signal.SIGUSR1, signal.SIGUSR2, signal.SIGTERM, signal.SIGKILL])
516
+ def test_slurm_executor_multiple_signal_types(basic_requirements, sig):
517
+ """Test executor with different signal types."""
518
+ executor = executors.Slurm(
519
+ requirements=basic_requirements,
520
+ time=dt.timedelta(hours=1),
521
+ timeout_signal=sig,
522
+ )
523
+ directives = executor.to_directives()
524
+ signal_directive = [d for d in directives if d.startswith("--signal=")][0]
525
+ expected_sig_name = sig.name.removeprefix("SIG")
526
+ assert expected_sig_name in signal_directive