stepup-queue 1.0.3__tar.gz → 1.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/PKG-INFO +1 -1
  2. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/changelog.md +16 -0
  3. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/examples/slurm-perpetual/workflow.sh +10 -11
  4. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/usage.md +29 -14
  5. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/stepup/queue/actions.py +6 -3
  6. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/stepup/queue/api.py +8 -0
  7. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/stepup/queue/sbatch.py +14 -6
  8. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/stepup_queue.egg-info/PKG-INFO +1 -1
  9. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/.editorconfig +0 -0
  10. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/.github/requirements-old.txt +0 -0
  11. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/.github/scripts/extract-notes.sh +0 -0
  12. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/.github/workflows/mkdocs.yaml +0 -0
  13. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/.github/workflows/pytest.yaml +0 -0
  14. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/.github/workflows/release.yaml +0 -0
  15. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/.gitignore +0 -0
  16. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/.markdownlint-cli2.jsonc +0 -0
  17. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/.pre-commit-config.yaml +0 -0
  18. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/LICENSE +0 -0
  19. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/README.md +0 -0
  20. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/development.md +0 -0
  21. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/examples/slurm-basic/.gitignore +0 -0
  22. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/examples/slurm-basic/README.md +0 -0
  23. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/examples/slurm-basic/dynamic-template.sh +0 -0
  24. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/examples/slurm-basic/fail/slurmjob.sh +0 -0
  25. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/examples/slurm-basic/pass/slurmjob.py +0 -0
  26. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/examples/slurm-basic/plan.py +0 -0
  27. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/examples/slurm-perpetual/.gitignore +0 -0
  28. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/examples/slurm-perpetual/README.md +0 -0
  29. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/examples/slurm-perpetual/plan.py +0 -0
  30. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/examples/slurm-perpetual/step1/slurmjob.sh +0 -0
  31. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/examples/slurm-perpetual/step2/slurmjob.sh +0 -0
  32. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/index.md +0 -0
  33. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/installation.md +0 -0
  34. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/license.md +0 -0
  35. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/docs/stepup.queue.api.md +0 -0
  36. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/mkdocs.yaml +0 -0
  37. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/overrides/main.html +0 -0
  38. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/pyproject.toml +0 -0
  39. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/setup.cfg +0 -0
  40. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/stepup/queue/__init__.py +0 -0
  41. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/stepup/queue/canceljobs.py +0 -0
  42. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/stepup_queue.egg-info/SOURCES.txt +0 -0
  43. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/stepup_queue.egg-info/dependency_links.txt +0 -0
  44. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/stepup_queue.egg-info/entry_points.txt +0 -0
  45. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/stepup_queue.egg-info/requires.txt +0 -0
  46. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/stepup_queue.egg-info/top_level.txt +0 -0
  47. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/tests/conftest.py +0 -0
  48. {stepup_queue-1.0.3 → stepup_queue-1.0.5}/tests/test_sbatch.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stepup-queue
3
- Version: 1.0.3
3
+ Version: 1.0.5
4
4
  Summary: StepUp Queue integrates queued jobs into a StepUp workflow.
5
5
  Author-email: Toon Verstraelen <toon.verstraelen@ugent.be>
6
6
  License-Expression: GPL-3.0-or-later
@@ -12,6 +12,20 @@ and this project adheres to [Effort-based Versioning](https://jacobtomlinson.dev
12
12
 
13
13
  (no changes yet)
14
14
 
15
+ ## [1.0.5][] - 2025-05-23 {: #v1.0.4 }
16
+
17
+ ### Changed
18
+
19
+ - Replaced the old `STEPUP_QUEUE_RESUBMIT_CHANGED_INPUTS` environment variable
20
+ by the more powerful `STEPUP_QUEUE_ONCHANGE`.
21
+
22
+ ## [1.0.4][] - 2025-05-21 {: #v1.0.4 }
23
+
24
+ ### Fixed
25
+
26
+ - Minor typo fix in slurm wrapper script.
27
+ - Improved example perpetual workflow job script.
28
+
15
29
  ## [1.0.3][] - 2025-05-16 {: #v1.0.3 }
16
30
 
17
31
  ### Fixed
@@ -47,6 +61,8 @@ It was adapted to integrate well with StepUp Core 3.
47
61
  This release also features the `stepup canceljobs` tool, which was not present in Parman.
48
62
 
49
63
  [Unreleased]: https://github.com/reproducible-reporting/stepup-queue
64
+ [1.0.5]: https://github.com/reproducible-reporting/stepup-queue/releases/tag/v1.0.5
65
+ [1.0.4]: https://github.com/reproducible-reporting/stepup-queue/releases/tag/v1.0.4
50
66
  [1.0.3]: https://github.com/reproducible-reporting/stepup-queue/releases/tag/v1.0.3
51
67
  [1.0.2]: https://github.com/reproducible-reporting/stepup-queue/releases/tag/v1.0.2
52
68
  [1.0.1]: https://github.com/reproducible-reporting/stepup-queue/releases/tag/v1.0.1
@@ -25,23 +25,22 @@ trap 'rm -rv "$STEPUP_QUEUE_FLAG_DIR"' EXIT
25
25
  # The second will forcefully terminate remaining running steps.
26
26
  echo "Starting background process to monitor wall time."
27
27
  (
28
- sleep 30; # In production, 39600 seconds is reasonable.
29
- touch ${STEPUP_QUEUE_FLAG_DIR}/resubmit;
30
- stepup shutdown;
31
- sleep 10; # In production, 300 seconds is reasonable.
28
+ sleep 30 # In production, wall time minus 1800 seconds (half hour) is reasonable.
29
+ touch ${STEPUP_QUEUE_FLAG_DIR}/resubmit
30
+ stepup shutdown
31
+ sleep 10 # In production, 300 seconds (5 minutes) is reasonable.
32
32
  stepup shutdown
33
33
  ) &
34
34
  BGPID=$!
35
35
  trap "kill $BGPID" EXIT
36
36
 
37
- # Start StepUp with 5 workers.
38
- # This means that at most 5 jobs will be submitted concurrently.
37
+ NWORKER=5
38
+ echo "Starting stepup with a maximum of ${NWORKER} concurrent jobs."
39
+ stepup boot -n ${NWORKER}
40
+ # This means that at most ${NWORKER} jobs will be submitted concurrently.
39
41
  # You can adjust the number of workers based on your needs.
40
42
  # In fact, because this example is simple, a single worker would be sufficient.
41
- # Note that the number of workers is unrelated
42
- # to the single core used by this workflow script.
43
- echo "Starting stepup with a maximum of 5 concurrent jobs."
44
- stepup boot -n 5
43
+ # Note that the number of workers is unrelated to the single core used by this workflow script.
45
44
 
46
45
  # Use the temporary file to determine if the workflow script must be resubmitted.
47
46
  echo "Checking if stepup was forcibly stopped."
@@ -49,7 +48,7 @@ if [ -f ${STEPUP_QUEUE_FLAG_DIR}/resubmit ]; then
49
48
  echo "Resubmitting job script to let StepUp finalize the workflow."
50
49
  sbatch workflow.sh
51
50
  else
52
- echo "Stepup was stopped gracefully."
51
+ echo "Stepup stopped by itself."
53
52
  fi
54
53
 
55
54
  echo "StepUp workflow job ends:" $(date)
@@ -27,20 +27,35 @@ This can be useful when the workflow gets killed for some reason.
27
27
 
28
28
  The standard output and error of the job are written to `slurmjob.out` and `slurmjob.err`, respectively.
29
29
 
30
- The current status of the job is written to (and read from) the `slurmjob.log` file.
31
- By default, the job is not resubmitted if `slurmjob.log` exists.
32
- Instead, it waits for the job to complete without resubmitting it.
33
- You can remove `slurmjob.log` to ensure that the job is resubmitted,
34
- but this is obviously dangerous if the job is still running.
35
-
36
- If the inputs of the job specified with `sbatch("compute/", inp=["inp.txt"])` have changed,
37
- restarting the workflow will by default raise an exception.
38
- Ideally, you should clean up old outputs before restarting the workflow,
39
- and check that you really want to remove the data before doing so.
40
- If you feel this is overly cautious, you can set the `STEPUP_QUEUE_RESUBMIT_CHANGED_INPUTS`
41
- environment variable to `"yes"` to allow the workflow to resubmit jobs with changed inputs.
42
- Old outputs are not removed before resubmission.
43
- It is assumed that your job script will perform the necessary cleanup itself.
30
+ The current status of the job is stored in the `slurmjob.log` file,
31
+ which StepUp Queue both reads and writes.
32
+ When you restart StepUp and `slurmjob.log` exists for a given `sbatch()` step,
33
+ the job is not resubmitted; instead, StepUp waits for the existing job to finish.
34
+ To force a job to be resubmitted, you must delete `slurmjob.log`
35
+ and manually cancel the corresponding running job, before restarting StepUp.
36
+ Deleting `slurmjob.log` without cancelling the job
37
+ will cause inconsistencies that StepUp cannot detect.
38
+
39
+ If the job's inputs change and StepUp is restarted,
40
+ you can control how this situation is handled using
41
+ the `STEPUP_QUEUE_ONCHANGE` environment variable or the `onchange` argument of `sbatch()`:
42
+
43
+ 1. `onchange="raise"` (default):
44
+ Raises an exception and aborts the workflow.
45
+ This is the safest option, ensuring the workflow does not continue with inconsistent data.
46
+ 2. `onchange="resubmit"`:
47
+ Cancels any running job and removes it from the queue,
48
+ then resubmits the job with the new inputs.
49
+ Old outputs are not deleted before resubmission;
50
+ it is assumed your job script will handle any necessary cleanup.
51
+ 3. `onchange="ignore"`:
52
+ Does not resubmit the job; the workflow continues using any existing outputs.
53
+ This is useful if input changes do not affect outputs,
54
+ e.g., updating the job script to request more resources.
55
+ If outputs are missing but `slurmjob.log` exists, the step will fail.
56
+ If you manually remove `slurmjob.log` and cancel the running job,
57
+ the job will be resubmitted with the new inputs.
58
+ Use this option with caution, as it can lead to inconsistent workflow data.
44
59
 
45
60
  ## Examples
46
61
 
@@ -26,7 +26,6 @@ import shlex
26
26
 
27
27
  from path import Path
28
28
 
29
- from stepup.core.utils import string_to_bool
30
29
  from stepup.core.worker import WorkThread
31
30
 
32
31
  from .canceljobs import read_jobid_cluster
@@ -38,9 +37,13 @@ def sbatch(argstr: str, work_thread: WorkThread) -> int:
38
37
  parser = argparse.ArgumentParser()
39
38
  parser.add_argument("ext", nargs="?", default=".sh")
40
39
  parser.add_argument("--rc", default=None)
40
+ default_onchange = os.getenv("STEPUP_QUEUE_ONCHANGE", "raise")
41
+ parser.add_argument(
42
+ "--onchange", default=default_onchange, choices=["raise", "resubmit", "ignore"]
43
+ )
41
44
  args = parser.parse_args(shlex.split(argstr))
42
45
 
43
- if string_to_bool(os.getenv("STEPUP_QUEUE_RESUBMIT_CHANGED_INPUTS", "0")):
46
+ if args.onchange == "resubmit":
44
47
  with contextlib.suppress(InpDigestError):
45
48
  return submit_once_and_wait(work_thread, args.ext, args.rc)
46
49
  # Cancel running job (if any), clean log and resubmit
@@ -48,4 +51,4 @@ def sbatch(argstr: str, work_thread: WorkThread) -> int:
48
51
  job_id, cluster = read_jobid_cluster(path_log)
49
52
  work_thread.runsh(f"scancel -M {cluster} {job_id}")
50
53
  path_log.remove_p()
51
- return submit_once_and_wait(work_thread, args.ext, args.rc)
54
+ return submit_once_and_wait(work_thread, args.ext, args.rc, args.onchange != "ignore")
@@ -37,6 +37,7 @@ def sbatch(
37
37
  env: Collection[str] | str = (),
38
38
  out: Collection[str] | str = (),
39
39
  vol: Collection[str] | str = (),
40
+ onchange: str | None = None,
40
41
  optional: bool = False,
41
42
  pool: str | None = None,
42
43
  block: bool = False,
@@ -76,6 +77,9 @@ def sbatch(
76
77
  If multiple instructions are needed, put them in a file, e.g. `rc.sh`
77
78
  and pass it here as `source rc.sh`.
78
79
  In this case, you usually also want to include `rc.sh` in the `inp` list.
80
+ onchange
81
+ Policy when a the inputs of a previously submitted job have changed.
82
+ Must be one of `"raise"`, `"resubmit"` or `"ignore"`.
79
83
  """
80
84
  if ext == "":
81
85
  ext = ".sh"
@@ -88,6 +92,10 @@ def sbatch(
88
92
  action += f" {ext}"
89
93
  if rc is not None:
90
94
  action += f" --rc={shlex.quote(rc)}"
95
+ if onchange is not None:
96
+ if onchange not in ["raise", "resubmit", "ignore"]:
97
+ raise ValueError(f"Invalid onchange policy {onchange}.")
98
+ action += f" --onchange={onchange}"
91
99
  return step(
92
100
  action,
93
101
  inp=[f"slurmjob{ext}", *string_to_list(inp)],
@@ -40,7 +40,10 @@ TIME_MARGIN = int(os.getenv("STEPUP_SBATCH_TIME_MARGIN", "5"))
40
40
 
41
41
 
42
42
  def submit_once_and_wait(
43
- work_thread: WorkThread, job_ext: str, sbatch_rc: str | None = None
43
+ work_thread: WorkThread,
44
+ job_ext: str,
45
+ sbatch_rc: str | None = None,
46
+ validate_inp_digest: bool = True,
44
47
  ) -> int:
45
48
  """Submit a job and wait for it to complete. When called a second time, just wait.
46
49
 
@@ -53,6 +56,9 @@ def submit_once_and_wait(
53
56
  sbatch_rc
54
57
  A resource configuration needed before calling sbatch.
55
58
  This is executed in the same shell, right before calling sbatch.
59
+ validate_inp_digest
60
+ If False, the input digest is not checked.
61
+ This is useful when the job script is modified but the changes are harmless.
56
62
 
57
63
  Returns
58
64
  -------
@@ -63,7 +69,7 @@ def submit_once_and_wait(
63
69
  # Read previously logged steps
64
70
  path_log = Path("slurmjob.log")
65
71
  if path_log.is_file():
66
- previous_lines = _read_log(path_log)
72
+ previous_lines = read_log(path_log, validate_inp_digest)
67
73
  else:
68
74
  previous_lines = []
69
75
  _init_log(path_log)
@@ -105,7 +111,7 @@ def submit_once_and_wait(
105
111
  return int(returncode)
106
112
 
107
113
 
108
- def _read_log(path_log: str) -> list[str]:
114
+ def read_log(path_log: str, do_inp_digest: bool = True) -> list[str]:
109
115
  """Read lines from a previously created log file."""
110
116
  lines = []
111
117
  with open(path_log) as f:
@@ -114,9 +120,11 @@ def _read_log(path_log: str) -> list[str]:
114
120
  except StopIteration as exc:
115
121
  raise ValueError("Existing log file is empty.") from exc
116
122
  try:
117
- check_log_inp_digest(next(f).strip())
123
+ inp_digest = next(f).strip()
118
124
  except StopIteration as exc:
119
- raise ValueError("Existing log file is empty.") from exc
125
+ raise ValueError("Existing has no input digest.") from exc
126
+ if do_inp_digest:
127
+ check_log_inp_digest(inp_digest)
120
128
  for line in f:
121
129
  line = line.strip()
122
130
  lines.append(line)
@@ -235,7 +243,7 @@ chmod +x '{job_script}'
235
243
  ./'{job_script}'
236
244
  RETURN_CODE=$?
237
245
  echo $RETURN_CODE > slurmjob.ret
238
- exot $RETURN_CODE
246
+ exit $RETURN_CODE
239
247
  """
240
248
 
241
249
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stepup-queue
3
- Version: 1.0.3
3
+ Version: 1.0.5
4
4
  Summary: StepUp Queue integrates queued jobs into a StepUp workflow.
5
5
  Author-email: Toon Verstraelen <toon.verstraelen@ugent.be>
6
6
  License-Expression: GPL-3.0-or-later
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes