xmanager-slurm 0.4.6__tar.gz → 0.4.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xmanager-slurm might be problematic. Click here for more details.

Files changed (122) hide show
  1. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/PKG-INFO +1 -1
  2. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/pyproject.toml +1 -1
  3. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/uv.lock +1 -1
  4. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/execution.py +23 -6
  5. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/templates/slurm/runtimes/podman.bash.j2 +0 -1
  6. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/.devcontainer.json +0 -0
  7. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/.github/workflows/ci.yml +0 -0
  8. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/.github/workflows/deploy-docs.yml +0 -0
  9. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/.gitignore +0 -0
  10. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/.pdm-build/.gitignore +0 -0
  11. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/.pdm-build/xm_slurm-0.1+editable.dist-info/METADATA +0 -0
  12. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/.pdm-build/xm_slurm-0.1+editable.dist-info/WHEEL +0 -0
  13. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/.pdm-build/xm_slurm.pth +0 -0
  14. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/.pre-commit-config.yaml +0 -0
  15. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/.python-version +0 -0
  16. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/.vscode/settings.json +0 -0
  17. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/LICENSE.md +0 -0
  18. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/README.md +0 -0
  19. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/docs/api/executables.rst +0 -0
  20. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/docs/api/executors.rst +0 -0
  21. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/docs/api/packageables.rst +0 -0
  22. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/docs/assets/workflow-dark.svg +0 -0
  23. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/docs/assets/workflow-light.svg +0 -0
  24. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/docs/conf.py +0 -0
  25. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/docs/getting-started/xmanager.md +0 -0
  26. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/docs/guides/index.md +0 -0
  27. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/docs/guides/remote-dev.md +0 -0
  28. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/docs/index.md +0 -0
  29. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/conda/environment.yml +0 -0
  30. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/conda/launch.py +0 -0
  31. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/conda/main.py +0 -0
  32. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/conda/pyproject.toml +0 -0
  33. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/custom-dockerfile/Dockerfile +0 -0
  34. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/custom-dockerfile/launch.py +0 -0
  35. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/custom-dockerfile/pyproject.toml +0 -0
  36. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/job-array-sweep/launch.py +0 -0
  37. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/job-array-sweep/main.py +0 -0
  38. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/job-array-sweep/pyproject.toml +0 -0
  39. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/job-array-sweep/uv.lock +0 -0
  40. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/job-dependencies/eval.py +0 -0
  41. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/job-dependencies/launch.py +0 -0
  42. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/job-dependencies/pyproject.toml +0 -0
  43. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/job-dependencies/train.py +0 -0
  44. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/job-dependencies/uv.lock +0 -0
  45. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/job-group/Dockerfile +0 -0
  46. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/job-group/launch.py +0 -0
  47. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/job-group/pyproject.toml +0 -0
  48. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/job-group/uv.lock +0 -0
  49. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/metadata/launch.py +0 -0
  50. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/metadata/main.py +0 -0
  51. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/metadata/pyproject.toml +0 -0
  52. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/metadata/requirements.txt +0 -0
  53. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/parameter-controller/launch.py +0 -0
  54. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/parameter-controller/main.py +0 -0
  55. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/parameter-controller/pyproject.toml +0 -0
  56. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/parameter-controller/requirements.txt +0 -0
  57. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/pip/launch.py +0 -0
  58. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/pip/main.py +0 -0
  59. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/pip/pyproject.toml +0 -0
  60. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/pip/requirements.txt +0 -0
  61. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/uv/launch.py +0 -0
  62. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/uv/pyproject.toml +0 -0
  63. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/examples/uv/uv.lock +0 -0
  64. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/tests/integration/conftest.py +0 -0
  65. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/tests/integration/fixtures/slurm/Dockerfile +0 -0
  66. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/tests/integration/fixtures/slurm/README.md +0 -0
  67. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/tests/integration/fixtures/slurm/cgroup.conf +0 -0
  68. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/tests/integration/fixtures/slurm/docker-compose.yml +0 -0
  69. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/tests/integration/fixtures/slurm/docker-entrypoint.sh +0 -0
  70. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/tests/integration/fixtures/slurm/host_ed25519 +0 -0
  71. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/tests/integration/fixtures/slurm/host_ed25519.pub +0 -0
  72. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/tests/integration/fixtures/slurm/id_ed25519 +0 -0
  73. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/tests/integration/fixtures/slurm/id_ed25519.pub +0 -0
  74. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/tests/integration/fixtures/slurm/slurm.conf +0 -0
  75. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/tests/integration/fixtures/slurm/slurmdbd.conf +0 -0
  76. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/tests/integration/fixtures/slurm/sshd_config +0 -0
  77. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/tests/integration/test_remote_execution.py +0 -0
  78. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/tests/test_dependencies.py +0 -0
  79. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/__init__.py +0 -0
  80. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/api/__init__.py +0 -0
  81. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/api/abc.py +0 -0
  82. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/api/models.py +0 -0
  83. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/api/sqlite/client.py +0 -0
  84. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/api/web/client.py +0 -0
  85. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/batching.py +0 -0
  86. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/config.py +0 -0
  87. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/console.py +0 -0
  88. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/constants.py +0 -0
  89. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/contrib/__init__.py +0 -0
  90. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/contrib/clusters/__init__.py +0 -0
  91. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/contrib/clusters/drac.py +0 -0
  92. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/dependencies.py +0 -0
  93. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/executables.py +0 -0
  94. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/executors.py +0 -0
  95. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/experiment.py +0 -0
  96. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/experimental/parameter_controller.py +0 -0
  97. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/filesystem.py +0 -0
  98. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/job_blocks.py +0 -0
  99. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/metadata_context.py +0 -0
  100. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/packageables.py +0 -0
  101. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/packaging/__init__.py +0 -0
  102. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/packaging/docker.py +0 -0
  103. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/packaging/registry.py +0 -0
  104. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/packaging/router.py +0 -0
  105. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/packaging/utils.py +0 -0
  106. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/resources.py +0 -0
  107. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/scripts/_cloudpickle.py +0 -0
  108. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/scripts/cli.py +0 -0
  109. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/status.py +0 -0
  110. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/templates/docker/docker-bake.hcl.j2 +0 -0
  111. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/templates/docker/mamba.Dockerfile +0 -0
  112. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/templates/docker/python.Dockerfile +0 -0
  113. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/templates/docker/uv.Dockerfile +0 -0
  114. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/templates/slurm/entrypoint.bash.j2 +0 -0
  115. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/templates/slurm/fragments/monitor.bash.j2 +0 -0
  116. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/templates/slurm/fragments/proxy.bash.j2 +0 -0
  117. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/templates/slurm/job-array.bash.j2 +0 -0
  118. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/templates/slurm/job-group.bash.j2 +0 -0
  119. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/templates/slurm/job.bash.j2 +0 -0
  120. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/templates/slurm/runtimes/apptainer.bash.j2 +0 -0
  121. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/types.py +0 -0
  122. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.7}/xm_slurm/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xmanager-slurm
3
- Version: 0.4.6
3
+ Version: 0.4.7
4
4
  Summary: Slurm backend for XManager.
5
5
  Project-URL: GitHub, https://github.com/jessefarebro/xm-slurm
6
6
  Author-email: Jesse Farebrother <jfarebro@cs.mcgill.ca>
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
5
5
  [project]
6
6
  name = "xmanager-slurm"
7
7
  description = "Slurm backend for XManager."
8
- version = "0.4.6"
8
+ version = "0.4.7"
9
9
  # readme = "README.md"
10
10
  requires-python = ">=3.10"
11
11
  license = { text = "MIT" }
@@ -2250,7 +2250,7 @@ wheels = [
2250
2250
 
2251
2251
  [[package]]
2252
2252
  name = "xmanager-slurm"
2253
- version = "0.4.6"
2253
+ version = "0.4.7"
2254
2254
  source = { editable = "." }
2255
2255
  dependencies = [
2256
2256
  { name = "aiofile" },
@@ -237,8 +237,8 @@ class SlurmHandle(_BatchedSlurmHandle, tp.Generic[SlurmJobT]):
237
237
  async def logs(
238
238
  self, *, num_lines: int, block_size: int, wait: bool, follow: bool
239
239
  ) -> tp.AsyncGenerator[ConsoleRenderable, None]:
240
- statedir = await get_client()._state_dir(self.ssh)
241
- file = statedir / f"{self.experiment_id}/slurm-{self.slurm_job.job_id}.out"
240
+ experiment_dir = await get_client().experiment_dir(self.ssh, self.experiment_id)
241
+ file = experiment_dir / f"slurm-{self.slurm_job.job_id}.out"
242
242
 
243
243
  fs = await get_client().fs(self.ssh)
244
244
 
@@ -455,9 +455,26 @@ class SlurmExecutionClient:
455
455
 
456
456
  @functools.cache
457
457
  async def _state_dir(self, ssh_config: SlurmSSHConfig) -> pathlib.Path:
458
- cmd = await self.run(ssh_config, "printenv HOME", check=True)
459
- assert isinstance(cmd.stdout, str)
460
- return pathlib.Path(cmd.stdout.strip()) / ".local" / "state" / "xm-slurm"
458
+ state_dirs = [
459
+ ("XM_SLURM_STATE_DIR", ""),
460
+ ("XDG_STATE_HOME", "xm-slurm"),
461
+ ("HOME", ".local/state/xm-slurm"),
462
+ ]
463
+
464
+ for env_var, subpath in state_dirs:
465
+ cmd = await self.run(ssh_config, f"printenv {env_var}", check=False)
466
+ assert isinstance(cmd.stdout, str)
467
+ if cmd.returncode == 0:
468
+ return pathlib.Path(cmd.stdout.strip()) / subpath
469
+
470
+ raise SlurmExecutionError(
471
+ "Failed to find a valid state directory for XManager. "
472
+ "We weren't able to resolve any of the following paths: "
473
+ f"{', '.join(env_var + ('/' + subpath if subpath else '') for env_var, subpath in state_dirs)}."
474
+ )
475
+
476
+ async def experiment_dir(self, ssh_config: SlurmSSHConfig, experiment_id: int) -> pathlib.Path:
477
+ return (await self._state_dir(ssh_config)) / f"{experiment_id:08d}"
461
478
 
462
479
  async def run(
463
480
  self,
@@ -646,7 +663,7 @@ class SlurmExecutionClient:
646
663
 
647
664
  fs = await self.fs(cluster.ssh)
648
665
 
649
- template_dir = (await self._state_dir(cluster.ssh)) / f"{experiment_id}"
666
+ template_dir = await self.experiment_dir(cluster.ssh, experiment_id)
650
667
 
651
668
  await fs.makedirs(template_dir, exist_ok=True)
652
669
  await fs.write(template_dir / submission_script_path, submission_script.encode())
@@ -3,7 +3,6 @@ time podman pull \
3
3
  {% if job.executable.credentials %}
4
4
  --creds {{ job.executable.credentials.username }}:{{ job.executable.credentials.password }} \
5
5
  {% endif %}
6
- --retry 3 \
7
6
  {{ job.executable.image }}
8
7
 
9
8
  cat << 'ENTRYPOINT_EOF' > "$SLURM_TMPDIR"/xm-slurm-entrypoint.sh
File without changes