xmanager-slurm 0.3.2__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xmanager-slurm might be problematic. Click here for more details.

Files changed (42) hide show
  1. xm_slurm/__init__.py +6 -2
  2. xm_slurm/api.py +301 -34
  3. xm_slurm/batching.py +4 -4
  4. xm_slurm/config.py +105 -55
  5. xm_slurm/constants.py +19 -0
  6. xm_slurm/contrib/__init__.py +0 -0
  7. xm_slurm/contrib/clusters/__init__.py +47 -13
  8. xm_slurm/contrib/clusters/drac.py +34 -16
  9. xm_slurm/dependencies.py +171 -0
  10. xm_slurm/executables.py +34 -22
  11. xm_slurm/execution.py +305 -107
  12. xm_slurm/executors.py +8 -12
  13. xm_slurm/experiment.py +601 -168
  14. xm_slurm/experimental/parameter_controller.py +202 -0
  15. xm_slurm/job_blocks.py +7 -0
  16. xm_slurm/packageables.py +42 -20
  17. xm_slurm/packaging/{docker/local.py → docker.py} +135 -40
  18. xm_slurm/packaging/router.py +3 -1
  19. xm_slurm/packaging/utils.py +9 -81
  20. xm_slurm/resources.py +28 -4
  21. xm_slurm/scripts/_cloudpickle.py +28 -0
  22. xm_slurm/scripts/cli.py +52 -0
  23. xm_slurm/status.py +9 -0
  24. xm_slurm/templates/docker/mamba.Dockerfile +4 -2
  25. xm_slurm/templates/docker/python.Dockerfile +18 -10
  26. xm_slurm/templates/docker/uv.Dockerfile +35 -0
  27. xm_slurm/templates/slurm/fragments/monitor.bash.j2 +5 -0
  28. xm_slurm/templates/slurm/job-array.bash.j2 +1 -2
  29. xm_slurm/templates/slurm/job.bash.j2 +4 -3
  30. xm_slurm/types.py +23 -0
  31. xm_slurm/utils.py +18 -10
  32. xmanager_slurm-0.4.1.dist-info/METADATA +26 -0
  33. xmanager_slurm-0.4.1.dist-info/RECORD +44 -0
  34. {xmanager_slurm-0.3.2.dist-info → xmanager_slurm-0.4.1.dist-info}/WHEEL +1 -1
  35. xmanager_slurm-0.4.1.dist-info/entry_points.txt +2 -0
  36. xmanager_slurm-0.4.1.dist-info/licenses/LICENSE.md +227 -0
  37. xm_slurm/packaging/docker/__init__.py +0 -75
  38. xm_slurm/packaging/docker/abc.py +0 -112
  39. xm_slurm/packaging/docker/cloud.py +0 -503
  40. xm_slurm/templates/docker/pdm.Dockerfile +0 -31
  41. xmanager_slurm-0.3.2.dist-info/METADATA +0 -25
  42. xmanager_slurm-0.3.2.dist-info/RECORD +0 -38
xm_slurm/executors.py CHANGED
@@ -48,6 +48,9 @@ class Slurm(xm.Executor):
48
48
  qos: str | None = None
49
49
  priority: int | None = None
50
50
 
51
+ # Job dependency handling
52
+ kill_on_invalid_dependencies: bool = True
53
+
51
54
  # Job rescheduling
52
55
  timeout_signal: signal.Signals = signal.SIGUSR2
53
56
  timeout_signal_grace_period: dt.timedelta = dt.timedelta(seconds=90)
@@ -93,6 +96,11 @@ class Slurm(xm.Executor):
93
96
  minutes, seconds = divmod(remainder, 60)
94
97
  directives.append(f"--time={days}-{hours:02}:{minutes:02}:{seconds:02}")
95
98
 
99
+ # Job dependency handling
100
+ directives.append(
101
+ f"--kill-on-invalid-dep={'yes' if self.kill_on_invalid_dependencies else 'no'}"
102
+ )
103
+
96
104
  # Placement
97
105
  if self.account:
98
106
  directives.append(f"--account={self.account}")
@@ -113,15 +121,3 @@ class Slurm(xm.Executor):
113
121
  directives.append("--no-requeue")
114
122
 
115
123
  return directives
116
-
117
-
118
- class DockerSpec(xm.ExecutorSpec):
119
- """Local Docker executor specification that describes the container runtime."""
120
-
121
-
122
- class Docker(xm.Executor):
123
- """Local Docker executor describing the runtime environment."""
124
-
125
- @classmethod
126
- def Spec(cls) -> DockerSpec:
127
- return DockerSpec()