stepup-queue 1.1.0__tar.gz → 2.0.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. stepup_queue-2.0.0rc1/CLAUDE.md +126 -0
  2. {stepup_queue-1.1.0/stepup_queue.egg-info → stepup_queue-2.0.0rc1}/PKG-INFO +3 -2
  3. {stepup_queue-1.1.0 → stepup_queue-2.0.0rc1}/pyproject.toml +4 -3
  4. {stepup_queue-1.1.0 → stepup_queue-2.0.0rc1}/stepup/queue/__init__.py +1 -1
  5. {stepup_queue-1.1.0 → stepup_queue-2.0.0rc1}/stepup/queue/api.py +19 -22
  6. {stepup_queue-1.1.0 → stepup_queue-2.0.0rc1}/stepup/queue/canceljobs.py +35 -30
  7. stepup_queue-2.0.0rc1/stepup/queue/log.py +121 -0
  8. {stepup_queue-1.1.0 → stepup_queue-2.0.0rc1}/stepup/queue/removejobs.py +19 -9
  9. {stepup_queue-1.1.0 → stepup_queue-2.0.0rc1}/stepup/queue/sbatch.py +117 -191
  10. stepup_queue-2.0.0rc1/stepup/queue/utils.py +134 -0
  11. {stepup_queue-1.1.0 → stepup_queue-2.0.0rc1/stepup_queue.egg-info}/PKG-INFO +3 -2
  12. {stepup_queue-1.1.0 → stepup_queue-2.0.0rc1}/stepup_queue.egg-info/SOURCES.txt +4 -1
  13. {stepup_queue-1.1.0 → stepup_queue-2.0.0rc1}/stepup_queue.egg-info/entry_points.txt +2 -2
  14. {stepup_queue-1.1.0 → stepup_queue-2.0.0rc1}/stepup_queue.egg-info/requires.txt +2 -1
  15. stepup_queue-2.0.0rc1/stepup_queue.egg-info/scm_file_list.json +51 -0
  16. stepup_queue-2.0.0rc1/stepup_queue.egg-info/scm_version.json +8 -0
  17. stepup_queue-1.1.0/stepup/queue/actions.py +0 -57
  18. stepup_queue-1.1.0/stepup/queue/utils.py +0 -58
  19. {stepup_queue-1.1.0 → stepup_queue-2.0.0rc1}/LICENSE +0 -0
  20. {stepup_queue-1.1.0 → stepup_queue-2.0.0rc1}/MANIFEST.in +0 -0
  21. {stepup_queue-1.1.0 → stepup_queue-2.0.0rc1}/README.md +0 -0
  22. {stepup_queue-1.1.0 → stepup_queue-2.0.0rc1}/setup.cfg +0 -0
  23. {stepup_queue-1.1.0 → stepup_queue-2.0.0rc1}/stepup_queue.egg-info/dependency_links.txt +0 -0
  24. {stepup_queue-1.1.0 → stepup_queue-2.0.0rc1}/stepup_queue.egg-info/top_level.txt +0 -0
@@ -0,0 +1,126 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ StepUp Queue is a StepUp Core extension that integrates SLURM job scheduler workflows. It allows
8
+ StepUp workflows to submit SLURM jobs, wait for them, and resume from existing jobs after restarts
9
+ — making long-running HPC workflows resumable across interrupted sessions.
10
+
11
+ The related `stepup-core` repo is at `../stepup-core` and on GitHub.
12
+
13
+ ## Development Environment
14
+
15
+ Uses [uv](https://docs.astral.sh/uv/) for environment management:
16
+
17
+ ```bash
18
+ uv sync --extra dev
19
+ pre-commit install
20
+ direnv allow # activates .venv and sets env vars from .envrc
21
+ ```
22
+
23
+ The `.envrc` sets `STEPUP_DEBUG=1`, `STEPUP_BUILD_DURATION=0`, and `STEPUP_SYNC_RPC_TIMEOUT=30`.
24
+ Without `direnv`, prefix commands with `uv run`.
25
+
26
+ ## Common Commands
27
+
28
+ ```bash
29
+ # Run all tests (parallel by default via pytest-xdist, quite fast)
30
+ pytest -vv
31
+
32
+ # Run all linters
33
+ pre-commit run --all
34
+
35
+ # Docs live preview
36
+ mkdocs serve
37
+ ```
38
+
39
+ ## Architecture
40
+
41
+ ### Package layout
42
+
43
+ ```text
44
+ stepup/queue/
45
+ api.py — Public Python API: sbatch() for use in plan.py files
46
+ sbatch.py — sq-sbatch-and-wait CLI: submits, waits, polls, caches sacct output
47
+ log.py — slurmjob.log format (version 2): read/write/validate
48
+ utils.py — SLURM state sets, parse_sbatch(), search_jobs()
49
+ canceljobs.py — stepup canceljobs subcommand
50
+ removejobs.py — stepup removejobs subcommand
51
+ ```
52
+
53
+ ### How it fits into StepUp
54
+
55
+ `stepup.queue.api.sbatch()` is called from a user's `plan.py`. It calls
56
+ `stepup.core.api.run()` to register the `sq-sbatch-and-wait` step with StepUp Core.
57
+ When StepUp executes that step, `sq-sbatch-and-wait` (entry point for `stepup/queue/sbatch.py`)
58
+ runs in the working directory of the job.
59
+
60
+ ### Job lifecycle and files
61
+
62
+ Every SLURM job lives in its own working directory. The conventions are:
63
+
64
+ - `slurmjob{ext}` — the user-written job script (must be executable, must have shebang)
65
+ - `slurmjob.log` — StepUp Queue's log (volatile; tracks submission + SLURM state history)
66
+ - `slurmjob.out` / `slurmjob.err` — SLURM stdout/stderr (declared as `out`)
67
+ - `slurmjob.ret` — exit code written by wrapper script (declared as `out`)
68
+
69
+ `slurmjob.log` is declared as a `vol` (volatile) file in StepUp, not `out`, so it is not
70
+ treated as reproducible output. It contains: a version header, an input digest (SHA-256 of
71
+ all step inputs), and timestamped status lines (`Submitted <jobid>[;cluster]`, then SLURM states).
72
+
73
+ ### Idempotent submit-and-wait
74
+
75
+ `submit_once_and_wait()` in `sbatch.py` is the core function:
76
+
77
+ 1. Reads `slurmjob.log` and checks the stored input digest against `STEPUP_STEP_INP_DIGEST`.
78
+ 2. If no log exists → submits a new job via `sbatch --parsable`.
79
+ 3. If log exists with a matching digest → resumes waiting for the existing job.
80
+ 4. If digest mismatch → behaviour depends on `onchange` policy (`raise`/`resubmit`/`ignore`).
81
+ 5. Polls status via `sacct`, using a **shared on-disk cache** at
82
+ `.stepup/queue/sbatch_wait_sacct[.cluster].out` with `fcntl.LOCK_EX` to avoid
83
+ hammering SLURM when many jobs run in parallel.
84
+
85
+ ### sacct caching
86
+
87
+ `cached_run()` in `sbatch.py` manages the shared `sacct` cache. All concurrent `sq-sbatch-and-wait`
88
+ processes share a single cached file per cluster; only one process calls `sacct` at a time (via
89
+ `fcntl` lock). The cache file has a fixed-length header (`v1 datetime=... returncode=...`).
90
+
91
+ ### Entry points
92
+
93
+ - `sq-sbatch-and-wait` — CLI that wraps `sbatch()` → `submit_once_and_wait()`
94
+ - `stepup canceljobs` — registered as `stepup.tools` entry point; cancels running SLURM jobs
95
+ by reading `slurmjob.log` files recursively
96
+ - `stepup removejobs` — registered as `stepup.tools` entry point; removes directories of failed jobs
97
+
98
+ ### Key environment variables
99
+
100
+ | Variable | Default | Purpose |
101
+ | --- | --- | --- |
102
+ | `STEPUP_SBATCH_CACHE_TIMEOUT` | 30 | Seconds between sacct calls |
103
+ | `STEPUP_SBATCH_POLLING_MIN/MAX` | 10/20 | Random polling interval (seconds) |
104
+ | `STEPUP_SBATCH_RETRY_NUM` | 5 | sbatch retry attempts on transient failure |
105
+ | `STEPUP_SBATCH_RETRY_DELAY_MIN/MAX` | 60/120 | Retry delay range (seconds) |
106
+ | `STEPUP_SACCT_START_TIME` | now-7days | `-S` argument passed to sacct |
107
+ | `STEPUP_SBATCH_UNLISTED_TIMEOUT` | 600 | Seconds before unlisted job is declared failed |
108
+ | `STEPUP_QUEUE_ONCHANGE` | raise | Default `onchange` policy |
109
+
110
+ ### Linting
111
+
112
+ Ruff with `line-length = 100`, targeting Python 3.11+. The `ruff.lint` section in
113
+ `pyproject.toml` selects many rule sets; several `PLR` (complexity) rules are deliberately
114
+ disabled. Imports are sorted with `stepup` as a known-first-party package.
115
+
116
+ ### Testing
117
+
118
+ `pytest` is configured with `-n auto --dist worksteal -W error` — all warnings are errors,
119
+ tests run in parallel. The `conftest.py` provides only a `path_tmp` fixture wrapping `tmpdir`.
120
+ Tests are pure unit tests; no SLURM cluster is required.
121
+
122
+ ## Release Process
123
+
124
+ 1. Update `docs/changelog.md` with the new version.
125
+ 2. Commit and tag: `git tag vX.Y.Z`.
126
+ 3. Push with tags: `git push origin main --tags` (triggers PyPI GitHub Action).
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stepup-queue
3
- Version: 1.1.0
3
+ Version: 2.0.0rc1
4
4
  Summary: StepUp Queue integrates queued jobs into a StepUp workflow.
5
5
  Author-email: Toon Verstraelen <toon.verstraelen@ugent.be>
6
6
  License-Expression: GPL-3.0-or-later
@@ -25,7 +25,8 @@ Requires-Python: >=3.11
25
25
  Description-Content-Type: text/markdown
26
26
  License-File: LICENSE
27
27
  Requires-Dist: path>=16.14.0
28
- Requires-Dist: stepup<4.0.0,>=3.2.0
28
+ Requires-Dist: rich>=13.0.0
29
+ Requires-Dist: stepup<5.0.0a1,>=4.0.0rc3
29
30
  Provides-Extra: dev
30
31
  Requires-Dist: psutil; extra == "dev"
31
32
  Requires-Dist: pytest; extra == "dev"
@@ -29,7 +29,8 @@ classifiers = [
29
29
  dependencies = [
30
30
  # Ensure changes to these dependencies are reflected in .github/requirements-old.txt
31
31
  "path>=16.14.0",
32
- "stepup>=3.2.0,<4.0.0",
32
+ "rich>=13.0.0",
33
+ "stepup>=4.0.0rc3,<5.0.0a1",
33
34
  ]
34
35
  dynamic = ["version"]
35
36
 
@@ -52,8 +53,8 @@ Issues = "https://github.com/reproducible-reporting/stepup-queue/issues"
52
53
  Source = "https://github.com/reproducible-reporting/stepup-queue/"
53
54
  Changelog = "https://reproducible-reporting.github.io/stepup-queue/changelog/"
54
55
 
55
- [project.entry-points."stepup.actions"]
56
- sbatch = "stepup.queue.actions:sbatch"
56
+ [project.scripts]
57
+ sq-sbatch-and-wait = "stepup.queue.sbatch:sbatch"
57
58
 
58
59
  [project.entry-points."stepup.tools"]
59
60
  canceljobs = "stepup.queue.canceljobs:canceljobs_subcommand"
@@ -1,5 +1,5 @@
1
1
  # StepUp Queue integrates queued jobs into a StepUp workflow.
2
- # © 2025 Toon Verstraelen
2
+ # Copyright 2025-2026 Toon Verstraelen
3
3
  #
4
4
  # This file is part of StepUp Queue.
5
5
  #
@@ -1,5 +1,5 @@
1
1
  # StepUp Queue integrates queued jobs into a StepUp workflow.
2
- # © 2025 Toon Verstraelen
2
+ # Copyright 2025-2026 Toon Verstraelen
3
3
  #
4
4
  # This file is part of StepUp Queue.
5
5
  #
@@ -22,25 +22,24 @@
22
22
  import shlex
23
23
  from collections.abc import Collection
24
24
 
25
- from stepup.core.api import step
26
- from stepup.core.utils import string_to_list
25
+ from stepup.core.api import run
26
+ from stepup.core.path import StrPath, coerce_paths
27
27
 
28
28
  __all__ = ("sbatch",)
29
29
 
30
30
 
31
31
  def sbatch(
32
- workdir: str,
32
+ workdir: StrPath,
33
33
  *,
34
34
  ext: str = ".sh",
35
35
  rc: str | None = None,
36
- inp: Collection[str] | str = (),
36
+ inp: Collection[StrPath] | StrPath = (),
37
37
  env: Collection[str] | str = (),
38
- out: Collection[str] | str = (),
39
- vol: Collection[str] | str = (),
38
+ out: Collection[StrPath] | StrPath = (),
39
+ vol: Collection[StrPath] | StrPath = (),
40
40
  onchange: str | None = None,
41
41
  optional: bool = False,
42
- pool: str | None = None,
43
- block: bool = False,
42
+ resources: dict[str, int] | str | None = None,
44
43
  ):
45
44
  """Submit a SLURM job script.
46
45
 
@@ -60,8 +59,7 @@ def sbatch(
60
59
  If submitted, the step will wait until the job is finished.
61
60
  If already finished, the step will essentially be a no-op.
62
61
 
63
- See `step()` documentation in StepUp Core for all optional arguments.
64
- and the return value.
62
+ See `run()` documentation in StepUp Core for all optional arguments and return value.
65
63
  Note that the `inp`, `out` and `vol` arguments are extended
66
64
  with the files mentioned above and that any additional files you specify
67
65
  are interpreted relative to the working directory.
@@ -90,23 +88,22 @@ def sbatch(
90
88
  ext = f".{ext}"
91
89
  if ext in [".log", ".out", ".err", ".ret"]:
92
90
  raise ValueError(f"Invalid extension {ext}. The extension must not be .log, .out or .err.")
93
- action = "sbatch"
91
+ cmd = "sq-sbatch-and-wait"
94
92
  if ext != ".sh":
95
- action += f" {ext}"
93
+ cmd += f" {ext}"
96
94
  if rc is not None:
97
- action += f" --rc={shlex.quote(rc)}"
95
+ cmd += f" --rc={shlex.quote(rc)}"
98
96
  if onchange is not None:
99
97
  if onchange not in ["raise", "resubmit", "ignore"]:
100
98
  raise ValueError(f"Invalid onchange policy {onchange}.")
101
- action += f" --onchange={onchange}"
102
- return step(
103
- action,
104
- inp=[f"slurmjob{ext}", *string_to_list(inp)],
99
+ cmd += f" --onchange={onchange}"
100
+ return run(
101
+ cmd,
102
+ inp=[f"slurmjob{ext}", *coerce_paths(inp)],
105
103
  env=env,
106
- out=["slurmjob.out", "slurmjob.err", "slurmjob.ret", *string_to_list(out)],
107
- vol=["slurmjob.log", *string_to_list(vol)],
104
+ out=["slurmjob.out", "slurmjob.err", "slurmjob.ret", *coerce_paths(out)],
105
+ vol=["slurmjob.log", *coerce_paths(vol)],
108
106
  workdir=workdir,
109
107
  optional=optional,
110
- pool=pool,
111
- block=block,
108
+ resources=resources,
112
109
  )
@@ -1,5 +1,5 @@
1
1
  # StepUp Queue integrates queued jobs into a StepUp workflow.
2
- # © 2025 Toon Verstraelen
2
+ # Copyright 2025-2026 Toon Verstraelen
3
3
  #
4
4
  # This file is part of StepUp Queue.
5
5
  #
@@ -22,21 +22,30 @@
22
22
  import argparse
23
23
  import subprocess
24
24
  import sys
25
+ from collections.abc import Callable
25
26
 
26
27
  from path import Path
28
+ from rich.console import Console
27
29
 
28
- from .sbatch import DONE_STATES, parse_sbatch, read_log, read_status
29
- from .utils import search_jobs
30
+ from stepup.core.config import ConfigLoader
31
+
32
+ from .log import read_jobid_cluster_status
33
+ from .utils import DONE_STATES, search_jobs
30
34
 
31
35
 
32
36
  def canceljobs_tool(args: argparse.Namespace):
33
37
  """Iterate over all slurmjob.log files, read the SLURM job IDs, and cancel them."""
38
+ console = Console(highlight=False)
39
+ if not args.commit:
40
+ console.print("[yellow]# Note: No jobs are actually cancelled.[/]")
41
+ console.print("[yellow]# Use the --commit option to execute the cancellations.[/]")
42
+
34
43
  jobs = {}
35
- for path_log in search_jobs(args.paths, verbose=True):
44
+ for path_log in search_jobs(args.paths, console):
36
45
  try:
37
46
  job_id, cluster, status = read_jobid_cluster_status(path_log)
38
47
  except ValueError as e:
39
- print(f"# WARNING: Could not read job ID from {path_log}: {e}")
48
+ console.print(f"[red]# WARNING: Could not read job ID from {path_log}: {e}[/]")
40
49
  continue
41
50
  if args.all or status not in DONE_STATES:
42
51
  jobs.setdefault(cluster, []).append((job_id, path_log, status))
@@ -56,39 +65,21 @@ def canceljobs_tool(args: argparse.Namespace):
56
65
  command_args.extend(str(job_id) for job_id, _, _ in cancel_jobs)
57
66
 
58
67
  # Using subprocess.run for better control and error handling
59
- print(" ".join(command_args))
68
+ print_cancel_command(
69
+ console, [job_id for job_id, _, _ in cancel_jobs], cluster, None
70
+ )
60
71
  result = subprocess.run(command_args, check=False)
61
72
  all_good &= result.returncode == 0
62
73
  else:
63
74
  for job_id, path_log, status in cluster_jobs:
64
- command = "scancel"
65
- if cluster is not None:
66
- command += f" -M {cluster}"
67
- command += f" {job_id} # {path_log} {status}"
68
- print(command)
75
+ print_cancel_command(console, [job_id], cluster, f"{path_log} {status}")
69
76
  if not all_good:
70
- print("Some jobs could not be cancelled. See messages above.")
77
+ console.print("[red]Some jobs could not be cancelled. See messages above.[/]")
71
78
  sys.exit(1)
72
79
 
73
80
 
74
- def read_jobid_cluster_status(path_log: str) -> tuple[int, str | None, str | None]:
75
- """Read the job ID, cluster, and job status from the job log file."""
76
- lines = read_log(path_log, False)
77
- if len(lines) < 1:
78
- raise ValueError(f"Incomplete file: {path_log}.")
79
- words = lines[0].split()
80
- if len(words) != 3:
81
- raise ValueError(f"Could not read job ID from first status line: {lines[0]}")
82
- _, status, job_id_cluster = words
83
- if status != "Submitted":
84
- raise ValueError(f"No 'Submitted' on first status line: {lines[0]}")
85
- job_id, cluster = parse_sbatch(job_id_cluster)
86
- status = read_status(lines[-1:])[1]
87
- return job_id, cluster, status
88
-
89
-
90
- def canceljobs_subcommand(subparser: argparse.ArgumentParser) -> callable:
91
- parser = subparser.add_parser(
81
+ def canceljobs_subcommand(subparsers, loader: ConfigLoader) -> Callable:
82
+ parser = subparsers.add_parser(
92
83
  "canceljobs",
93
84
  help="Cancel running jobs in the current StepUp workflow.",
94
85
  )
@@ -114,4 +105,18 @@ def canceljobs_subcommand(subparser: argparse.ArgumentParser) -> callable:
114
105
  default=False,
115
106
  help="Select all jobs, including the ones that seem to be done already.",
116
107
  )
108
+ loader.patch_parser(parser)
117
109
  return canceljobs_tool
110
+
111
+
112
+ def print_cancel_command(
113
+ console: Console, job_ids: list[int], cluster: str | None, comment: str | None
114
+ ) -> str:
115
+ """Print the job cancellation command."""
116
+ parts = ["[green]scancel[/]"]
117
+ if cluster is not None:
118
+ parts.append(f"[cyan]-M {cluster}[/]")
119
+ parts.extend(str(job_id) for job_id in job_ids)
120
+ if comment is not None:
121
+ parts.append(f" [bright_black]# {comment}[/]")
122
+ console.print(" ".join(parts))
@@ -0,0 +1,121 @@
1
+ # StepUp Queue integrates queued jobs into a StepUp workflow.
2
+ # Copyright 2025-2026 Toon Verstraelen
3
+ #
4
+ # This file is part of StepUp Queue.
5
+ #
6
+ # StepUp Queue is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU General Public License
8
+ # as published by the Free Software Foundation; either version 3
9
+ # of the License, or (at your option) any later version.
10
+ #
11
+ # StepUp Queue is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program; if not, see <http://www.gnu.org/licenses/>
18
+ #
19
+ # --
20
+ """The job log file format and utilities to read and write it."""
21
+
22
+ from datetime import datetime
23
+
24
+ from path import Path
25
+
26
+ from .utils import parse_sbatch
27
+
28
+ __all__ = (
29
+ "FIRST_LINE",
30
+ "InpDigestError",
31
+ "init_log",
32
+ "log_status",
33
+ "read_jobid_cluster_status",
34
+ "read_log",
35
+ "read_status",
36
+ )
37
+
38
+ FIRST_LINE = "StepUp Queue sbatch wait log format version 2"
39
+
40
+
41
+ class InpDigestError(ValueError):
42
+ """The input digest in the log file does not match the one in the environment."""
43
+
44
+
45
+ def init_log(path_log: str, inp_digest: str):
46
+ """Initialize a new log file."""
47
+ with open(path_log, "w") as fh:
48
+ print(FIRST_LINE, file=fh)
49
+ print(inp_digest, file=fh)
50
+
51
+
52
+ def log_status(path_log: Path, status: str):
53
+ """Write a status to the log."""
54
+ dt = datetime.now().isoformat()
55
+ with open(path_log, "a") as f:
56
+ line = f"{dt} {status}"
57
+ f.write(f"{line}\n")
58
+
59
+
60
+ def read_jobid_cluster_status(path_log: str) -> tuple[int, str | None, str | None]:
61
+ """Read the job ID, cluster, and job status from the job log file."""
62
+ lines = read_log(path_log, None)
63
+ if len(lines) < 1:
64
+ raise ValueError(f"Incomplete file: {path_log}.")
65
+ words = lines[0].split()
66
+ if len(words) != 3:
67
+ raise ValueError(f"Could not read job ID from first status line: {lines[0]}")
68
+ _, status, job_id_cluster = words
69
+ if status != "Submitted":
70
+ raise ValueError(f"No 'Submitted' on first status line: {lines[0]}")
71
+ job_id, cluster = parse_sbatch(job_id_cluster)
72
+ status = read_status(lines[-1:])[1]
73
+ return job_id, cluster, status
74
+
75
+
76
+ def read_log(path_log: str, expected_inp_digest: str | None = None) -> list[str]:
77
+ """Read lines from a previously created log file."""
78
+ lines = []
79
+ with open(path_log) as f:
80
+ try:
81
+ check_log_version(next(f).strip())
82
+ except StopIteration as exc:
83
+ raise ValueError("Existing log file is empty.") from exc
84
+ try:
85
+ actual_inp_digest = next(f).strip()
86
+ except StopIteration as exc:
87
+ raise ValueError("Existing log file has no input digest.") from exc
88
+ if expected_inp_digest is not None:
89
+ check_log_inp_digest(actual_inp_digest, expected_inp_digest)
90
+ for line in f:
91
+ line = line.strip()
92
+ lines.append(line)
93
+ return lines
94
+
95
+
96
+ def check_log_version(line: str):
97
+ """Validate the log version, abort if there is a mismatch."""
98
+ if line != FIRST_LINE:
99
+ raise ValueError(
100
+ f"The first line of the log is wrong. Expected: '{FIRST_LINE}' Found: '{line}'"
101
+ )
102
+
103
+
104
+ def check_log_inp_digest(actual: str, expected: str):
105
+ """Validate the log input digest, abort if there is a mismatch."""
106
+ if actual != expected:
107
+ raise InpDigestError(
108
+ "The second line of the log contains the wrong input digest.\n"
109
+ f"Actual: {actual}\nExpected: {expected}\n"
110
+ )
111
+
112
+
113
+ def read_status(lines: list[str]) -> tuple[float | None, str | None]:
114
+ """Read a status from the log file."""
115
+ if len(lines) == 0:
116
+ return None, None
117
+ line = lines.pop(0)
118
+ words = line.split(maxsplit=1)
119
+ if len(words) != 2:
120
+ raise ValueError(f"Expected a status in log but found line '{line}'.")
121
+ return datetime.fromisoformat(words[0]).timestamp(), words[1].strip()
@@ -1,5 +1,5 @@
1
1
  # StepUp Queue integrates queued jobs into a StepUp workflow.
2
- # © 2025 Toon Verstraelen
2
+ # Copyright 2025-2026 Toon Verstraelen
3
3
  #
4
4
  # This file is part of StepUp Queue.
5
5
  #
@@ -21,10 +21,14 @@
21
21
 
22
22
  import argparse
23
23
  import shutil
24
+ from collections.abc import Callable
24
25
 
25
26
  from path import Path
27
+ from rich.console import Console
26
28
 
27
- from .sbatch import read_log, read_status
29
+ from stepup.core.config import ConfigLoader
30
+
31
+ from .log import read_log, read_status
28
32
  from .utils import search_jobs
29
33
 
30
34
  FAILED_STATES = {
@@ -45,31 +49,36 @@ FAILED_STATES = {
45
49
 
46
50
  def removejobs_tool(args: argparse.Namespace):
47
51
  """Iterate over all slurmjob.log files and remove their parent job directories."""
52
+ console = Console(highlight=False)
53
+ if not args.commit:
54
+ console.print("[yellow]# Note: No job directories are actually removed.[/]")
55
+ console.print("[yellow]# Use the --commit option to execute the removals.[/]")
56
+
48
57
  jobs = []
49
- for path_log in search_jobs(args.paths, verbose=True):
58
+ for path_log in search_jobs(args.paths, console):
50
59
  try:
51
60
  status = read_last_status(path_log)
52
61
  except ValueError as e:
53
- print(f"Warning: Could not read job status from {path_log}: {e}")
62
+ console.print(f"[red]# WARNING: Could not read job status from {path_log}: {e}[/]")
54
63
  status = None
55
64
  if args.all or status in FAILED_STATES:
56
65
  jobs.append((path_log, status))
57
66
 
58
67
  for path_log, status in jobs:
59
- command = f"rm -rf {path_log.parent} # state={status}"
60
- print(command)
68
+ command = f"[cyan]rm -rf[/] {path_log.parent} [bright_black]# state={status}[/]"
69
+ console.print(command)
61
70
  if args.commit:
62
71
  shutil.rmtree(path_log.parent)
63
72
 
64
73
 
65
74
  def read_last_status(path_log: str) -> str | None:
66
75
  """Read the last job status from the job log file."""
67
- lines = read_log(path_log, False)
76
+ lines = read_log(path_log, None)
68
77
  return read_status(lines[-1:])[1]
69
78
 
70
79
 
71
- def removejobs_subcommand(subparser: argparse.ArgumentParser) -> callable:
72
- parser = subparser.add_parser(
80
+ def removejobs_subcommand(subparsers, loader: ConfigLoader) -> Callable:
81
+ parser = subparsers.add_parser(
73
82
  "removejobs",
74
83
  help="Remove directories of failed (and optionally all completed) jobs "
75
84
  "in the current StepUp workflow.",
@@ -96,4 +105,5 @@ def removejobs_subcommand(subparser: argparse.ArgumentParser) -> callable:
96
105
  default=False,
97
106
  help="Remove all jobs, not only failed jobs.",
98
107
  )
108
+ loader.patch_parser(parser)
99
109
  return removejobs_tool