xmanager-slurm 0.4.6__tar.gz → 0.4.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xmanager-slurm might be problematic. Click here for more details.

Files changed (122) hide show
  1. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/PKG-INFO +1 -1
  2. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/pyproject.toml +1 -1
  3. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/uv.lock +1 -1
  4. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/api/web/client.py +1 -1
  5. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/config.py +2 -1
  6. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/execution.py +47 -17
  7. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/templates/slurm/runtimes/podman.bash.j2 +0 -1
  8. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/utils.py +30 -0
  9. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/.devcontainer.json +0 -0
  10. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/.github/workflows/ci.yml +0 -0
  11. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/.github/workflows/deploy-docs.yml +0 -0
  12. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/.gitignore +0 -0
  13. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/.pdm-build/.gitignore +0 -0
  14. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/.pdm-build/xm_slurm-0.1+editable.dist-info/METADATA +0 -0
  15. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/.pdm-build/xm_slurm-0.1+editable.dist-info/WHEEL +0 -0
  16. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/.pdm-build/xm_slurm.pth +0 -0
  17. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/.pre-commit-config.yaml +0 -0
  18. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/.python-version +0 -0
  19. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/.vscode/settings.json +0 -0
  20. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/LICENSE.md +0 -0
  21. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/README.md +0 -0
  22. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/docs/api/executables.rst +0 -0
  23. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/docs/api/executors.rst +0 -0
  24. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/docs/api/packageables.rst +0 -0
  25. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/docs/assets/workflow-dark.svg +0 -0
  26. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/docs/assets/workflow-light.svg +0 -0
  27. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/docs/conf.py +0 -0
  28. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/docs/getting-started/xmanager.md +0 -0
  29. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/docs/guides/index.md +0 -0
  30. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/docs/guides/remote-dev.md +0 -0
  31. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/docs/index.md +0 -0
  32. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/conda/environment.yml +0 -0
  33. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/conda/launch.py +0 -0
  34. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/conda/main.py +0 -0
  35. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/conda/pyproject.toml +0 -0
  36. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/custom-dockerfile/Dockerfile +0 -0
  37. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/custom-dockerfile/launch.py +0 -0
  38. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/custom-dockerfile/pyproject.toml +0 -0
  39. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/job-array-sweep/launch.py +0 -0
  40. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/job-array-sweep/main.py +0 -0
  41. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/job-array-sweep/pyproject.toml +0 -0
  42. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/job-array-sweep/uv.lock +0 -0
  43. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/job-dependencies/eval.py +0 -0
  44. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/job-dependencies/launch.py +0 -0
  45. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/job-dependencies/pyproject.toml +0 -0
  46. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/job-dependencies/train.py +0 -0
  47. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/job-dependencies/uv.lock +0 -0
  48. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/job-group/Dockerfile +0 -0
  49. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/job-group/launch.py +0 -0
  50. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/job-group/pyproject.toml +0 -0
  51. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/job-group/uv.lock +0 -0
  52. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/metadata/launch.py +0 -0
  53. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/metadata/main.py +0 -0
  54. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/metadata/pyproject.toml +0 -0
  55. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/metadata/requirements.txt +0 -0
  56. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/parameter-controller/launch.py +0 -0
  57. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/parameter-controller/main.py +0 -0
  58. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/parameter-controller/pyproject.toml +0 -0
  59. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/parameter-controller/requirements.txt +0 -0
  60. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/pip/launch.py +0 -0
  61. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/pip/main.py +0 -0
  62. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/pip/pyproject.toml +0 -0
  63. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/pip/requirements.txt +0 -0
  64. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/uv/launch.py +0 -0
  65. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/uv/pyproject.toml +0 -0
  66. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/examples/uv/uv.lock +0 -0
  67. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/tests/integration/conftest.py +0 -0
  68. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/tests/integration/fixtures/slurm/Dockerfile +0 -0
  69. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/tests/integration/fixtures/slurm/README.md +0 -0
  70. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/tests/integration/fixtures/slurm/cgroup.conf +0 -0
  71. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/tests/integration/fixtures/slurm/docker-compose.yml +0 -0
  72. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/tests/integration/fixtures/slurm/docker-entrypoint.sh +0 -0
  73. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/tests/integration/fixtures/slurm/host_ed25519 +0 -0
  74. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/tests/integration/fixtures/slurm/host_ed25519.pub +0 -0
  75. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/tests/integration/fixtures/slurm/id_ed25519 +0 -0
  76. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/tests/integration/fixtures/slurm/id_ed25519.pub +0 -0
  77. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/tests/integration/fixtures/slurm/slurm.conf +0 -0
  78. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/tests/integration/fixtures/slurm/slurmdbd.conf +0 -0
  79. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/tests/integration/fixtures/slurm/sshd_config +0 -0
  80. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/tests/integration/test_remote_execution.py +0 -0
  81. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/tests/test_dependencies.py +0 -0
  82. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/__init__.py +0 -0
  83. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/api/__init__.py +0 -0
  84. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/api/abc.py +0 -0
  85. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/api/models.py +0 -0
  86. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/api/sqlite/client.py +0 -0
  87. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/batching.py +0 -0
  88. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/console.py +0 -0
  89. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/constants.py +0 -0
  90. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/contrib/__init__.py +0 -0
  91. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/contrib/clusters/__init__.py +0 -0
  92. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/contrib/clusters/drac.py +0 -0
  93. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/dependencies.py +0 -0
  94. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/executables.py +0 -0
  95. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/executors.py +0 -0
  96. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/experiment.py +0 -0
  97. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/experimental/parameter_controller.py +0 -0
  98. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/filesystem.py +0 -0
  99. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/job_blocks.py +0 -0
  100. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/metadata_context.py +0 -0
  101. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/packageables.py +0 -0
  102. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/packaging/__init__.py +0 -0
  103. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/packaging/docker.py +0 -0
  104. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/packaging/registry.py +0 -0
  105. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/packaging/router.py +0 -0
  106. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/packaging/utils.py +0 -0
  107. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/resources.py +0 -0
  108. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/scripts/_cloudpickle.py +0 -0
  109. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/scripts/cli.py +0 -0
  110. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/status.py +0 -0
  111. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/templates/docker/docker-bake.hcl.j2 +0 -0
  112. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/templates/docker/mamba.Dockerfile +0 -0
  113. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/templates/docker/python.Dockerfile +0 -0
  114. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/templates/docker/uv.Dockerfile +0 -0
  115. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/templates/slurm/entrypoint.bash.j2 +0 -0
  116. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/templates/slurm/fragments/monitor.bash.j2 +0 -0
  117. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/templates/slurm/fragments/proxy.bash.j2 +0 -0
  118. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/templates/slurm/job-array.bash.j2 +0 -0
  119. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/templates/slurm/job-group.bash.j2 +0 -0
  120. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/templates/slurm/job.bash.j2 +0 -0
  121. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/templates/slurm/runtimes/apptainer.bash.j2 +0 -0
  122. {xmanager_slurm-0.4.6 → xmanager_slurm-0.4.8}/xm_slurm/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xmanager-slurm
3
- Version: 0.4.6
3
+ Version: 0.4.8
4
4
  Summary: Slurm backend for XManager.
5
5
  Project-URL: GitHub, https://github.com/jessefarebro/xm-slurm
6
6
  Author-email: Jesse Farebrother <jfarebro@cs.mcgill.ca>
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
5
5
  [project]
6
6
  name = "xmanager-slurm"
7
7
  description = "Slurm backend for XManager."
8
- version = "0.4.6"
8
+ version = "0.4.8"
9
9
  # readme = "README.md"
10
10
  requires-python = ">=3.10"
11
11
  license = { text = "MIT" }
@@ -2250,7 +2250,7 @@ wheels = [
2250
2250
 
2251
2251
  [[package]]
2252
2252
  name = "xmanager-slurm"
2253
- version = "0.4.6"
2253
+ version = "0.4.8"
2254
2254
  source = { editable = "." }
2255
2255
  dependencies = [
2256
2256
  { name = "aiofile" },
@@ -33,7 +33,7 @@ class XManagerWebAPI(XManagerAPI):
33
33
  self.client = httpx.Client(headers={"Authorization": f"Bearer {token}"}, verify=False)
34
34
 
35
35
  def _make_url(self, path: str) -> str:
36
- return f"{self.base_url}/api{path}"
36
+ return f"{self.base_url}{path}"
37
37
 
38
38
  @with_backoff
39
39
  def get_experiment(self, xid: int) -> models.Experiment:
@@ -119,8 +119,9 @@ class SlurmSSHConfig:
119
119
  config=None,
120
120
  kbdint_auth=False,
121
121
  disable_trivial_auth=True,
122
+ known_hosts=self.known_hosts,
122
123
  )
123
- options.prepare(last_config=self.config, known_hosts=self.known_hosts)
124
+ options.prepare(last_config=self.config)
124
125
  return options
125
126
 
126
127
  def serialize(self):
@@ -235,11 +235,10 @@ class SlurmHandle(_BatchedSlurmHandle, tp.Generic[SlurmJobT]):
235
235
  return await self._batched_get_state(self.ssh, self.slurm_job)
236
236
 
237
237
  async def logs(
238
- self, *, num_lines: int, block_size: int, wait: bool, follow: bool
239
- ) -> tp.AsyncGenerator[ConsoleRenderable, None]:
240
- statedir = await get_client()._state_dir(self.ssh)
241
- file = statedir / f"{self.experiment_id}/slurm-{self.slurm_job.job_id}.out"
242
-
238
+ self, *, num_lines: int, block_size: int, wait: bool, follow: bool, raw: bool = False
239
+ ) -> tp.AsyncGenerator[tp.Union[str, ConsoleRenderable], None]:
240
+ experiment_dir = await get_client().experiment_dir(self.ssh, self.experiment_id)
241
+ file = experiment_dir / f"slurm-{self.slurm_job.job_id}.out"
243
242
  fs = await get_client().fs(self.ssh)
244
243
 
245
244
  if wait:
@@ -249,7 +248,7 @@ class SlurmHandle(_BatchedSlurmHandle, tp.Generic[SlurmJobT]):
249
248
  file_size = await fs.size(file)
250
249
  assert file_size is not None
251
250
 
252
- async with await fs.open(file, "rb") as remote_file: # type: ignore
251
+ async with await fs.open(file, "rb") as remote_file:
253
252
  data = b""
254
253
  lines = []
255
254
  position = file_size
@@ -257,27 +256,40 @@ class SlurmHandle(_BatchedSlurmHandle, tp.Generic[SlurmJobT]):
257
256
  while len(lines) <= num_lines and position > 0:
258
257
  read_size = min(block_size, position)
259
258
  position -= read_size
260
- await remote_file.seek(position) # type: ignore
259
+ await remote_file.seek(position)
261
260
  chunk = await remote_file.read(read_size)
262
261
  data = chunk + data
263
262
  lines = data.splitlines()
264
263
 
265
264
  if position <= 0:
266
- yield Rule("[bold red]BEGINNING OF FILE[/bold red]")
265
+ if raw:
266
+ yield "\033[31mBEGINNING OF FILE\033[0m\n"
267
+ else:
268
+ yield Rule("[bold red]BEGINNING OF FILE[/bold red]")
267
269
  for line in lines[-num_lines:]:
268
- yield Text.from_ansi(line.decode("utf-8", errors="replace"))
270
+ if raw:
271
+ yield line.decode("utf-8", errors="replace") + "\n"
272
+ else:
273
+ yield Text.from_ansi(line.decode("utf-8", errors="replace"))
269
274
 
270
275
  if (await self.get_state()) not in status.SlurmActiveJobStates:
271
- yield Rule("[bold red]END OF FILE[/bold red]")
272
- return
276
+ if raw:
277
+ yield "\033[31mEND OF FILE\033[0m\n"
278
+ return
279
+ else:
280
+ yield Rule("[bold red]END OF FILE[/bold red]")
281
+ return
273
282
 
274
283
  if not follow:
275
284
  return
276
285
 
277
- await remote_file.seek(file_size) # type: ignore
286
+ await remote_file.seek(file_size)
278
287
  while True:
279
288
  if new_data := (await remote_file.read(block_size)):
280
- yield Text.from_ansi(new_data.decode("utf-8", errors="replace"))
289
+ if raw:
290
+ yield new_data.decode("utf-8", errors="replace")
291
+ else:
292
+ yield Text.from_ansi(new_data.decode("utf-8", errors="replace"))
281
293
  else:
282
294
  await asyncio.sleep(0.25)
283
295
 
@@ -454,10 +466,28 @@ class SlurmExecutionClient:
454
466
  return False
455
467
 
456
468
  @functools.cache
469
+ @utils.reawaitable
457
470
  async def _state_dir(self, ssh_config: SlurmSSHConfig) -> pathlib.Path:
458
- cmd = await self.run(ssh_config, "printenv HOME", check=True)
459
- assert isinstance(cmd.stdout, str)
460
- return pathlib.Path(cmd.stdout.strip()) / ".local" / "state" / "xm-slurm"
471
+ state_dirs = [
472
+ ("XM_SLURM_STATE_DIR", ""),
473
+ ("XDG_STATE_HOME", "xm-slurm"),
474
+ ("HOME", ".local/state/xm-slurm"),
475
+ ]
476
+
477
+ for env_var, subpath in state_dirs:
478
+ cmd = await self.run(ssh_config, f"printenv {env_var}", check=False)
479
+ assert isinstance(cmd.stdout, str)
480
+ if cmd.returncode == 0:
481
+ return pathlib.Path(cmd.stdout.strip()) / subpath
482
+
483
+ raise SlurmExecutionError(
484
+ "Failed to find a valid state directory for XManager. "
485
+ "We weren't able to resolve any of the following paths: "
486
+ f"{', '.join(env_var + ('/' + subpath if subpath else '') for env_var, subpath in state_dirs)}."
487
+ )
488
+
489
+ async def experiment_dir(self, ssh_config: SlurmSSHConfig, experiment_id: int) -> pathlib.Path:
490
+ return (await self._state_dir(ssh_config)) / f"{experiment_id:08d}"
461
491
 
462
492
  async def run(
463
493
  self,
@@ -646,7 +676,7 @@ class SlurmExecutionClient:
646
676
 
647
677
  fs = await self.fs(cluster.ssh)
648
678
 
649
- template_dir = (await self._state_dir(cluster.ssh)) / f"{experiment_id}"
679
+ template_dir = await self.experiment_dir(cluster.ssh, experiment_id)
650
680
 
651
681
  await fs.makedirs(template_dir, exist_ok=True)
652
682
  await fs.write(template_dir / submission_script_path, submission_script.encode())
@@ -3,7 +3,6 @@ time podman pull \
3
3
  {% if job.executable.credentials %}
4
4
  --creds {{ job.executable.credentials.username }}:{{ job.executable.credentials.password }} \
5
5
  {% endif %}
6
- --retry 3 \
7
6
  {{ job.executable.image }}
8
7
 
9
8
  cat << 'ENTRYPOINT_EOF' > "$SLURM_TMPDIR"/xm-slurm-entrypoint.sh
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  import functools
2
3
  import logging
3
4
  import os
@@ -13,10 +14,39 @@ import typing as tp
13
14
  from xmanager import xm
14
15
 
15
16
  T = tp.TypeVar("T")
17
+ P = tp.ParamSpec("P")
16
18
 
17
19
  logger = logging.getLogger(__name__)
18
20
 
19
21
 
22
+ class CachedAwaitable(tp.Awaitable[T]):
23
+ def __init__(self, awaitable: tp.Awaitable[T]):
24
+ self.awaitable = awaitable
25
+ self.result: asyncio.Future[T] | None = None
26
+
27
+ def __await__(self):
28
+ if not self.result:
29
+ future = asyncio.get_event_loop().create_future()
30
+ self.result = future
31
+ try:
32
+ result = yield from self.awaitable.__await__()
33
+ future.set_result(result)
34
+ except Exception as e:
35
+ future.set_exception(e)
36
+
37
+ if not self.result.done():
38
+ yield from self.result
39
+ return self.result.result()
40
+
41
+
42
+ def reawaitable(f: tp.Callable[P, tp.Awaitable[T]]) -> tp.Callable[P, CachedAwaitable[T]]:
43
+ @functools.wraps(f)
44
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> CachedAwaitable[T]:
45
+ return CachedAwaitable(f(*args, **kwargs))
46
+
47
+ return wrapper
48
+
49
+
20
50
  @functools.cache
21
51
  def find_project_root() -> pathlib.Path:
22
52
  launch_script_path: pathlib.Path | None = None
File without changes