snakemake-executor-plugin-slurm 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snakemake-executor-plugin-slurm might be problematic. Click here for more details.
- snakemake_executor_plugin_slurm/__init__.py +56 -46
- {snakemake_executor_plugin_slurm-0.3.1.dist-info → snakemake_executor_plugin_slurm-0.4.0.dist-info}/METADATA +1 -1
- snakemake_executor_plugin_slurm-0.4.0.dist-info/RECORD +5 -0
- {snakemake_executor_plugin_slurm-0.3.1.dist-info → snakemake_executor_plugin_slurm-0.4.0.dist-info}/WHEEL +1 -1
- snakemake_executor_plugin_slurm-0.3.1.dist-info/RECORD +0 -5
- {snakemake_executor_plugin_slurm-0.3.1.dist-info → snakemake_executor_plugin_slurm-0.4.0.dist-info}/LICENSE +0 -0
|
@@ -67,7 +67,14 @@ class Executor(RemoteExecutor):
|
|
|
67
67
|
|
|
68
68
|
log_folder = f"group_{job.name}" if job.is_group() else f"rule_{job.name}"
|
|
69
69
|
|
|
70
|
-
|
|
70
|
+
try:
|
|
71
|
+
wildcard_str = f"_{'_'.join(job.wildcards)}" if job.wildcards else ""
|
|
72
|
+
except AttributeError:
|
|
73
|
+
wildcard_str = ""
|
|
74
|
+
|
|
75
|
+
slurm_logfile = os.path.abspath(
|
|
76
|
+
f".snakemake/slurm_logs/{log_folder}/%j{wildcard_str}.log"
|
|
77
|
+
)
|
|
71
78
|
os.makedirs(os.path.dirname(slurm_logfile), exist_ok=True)
|
|
72
79
|
|
|
73
80
|
# generic part of a submission string:
|
|
@@ -199,6 +206,7 @@ class Executor(RemoteExecutor):
|
|
|
199
206
|
|
|
200
207
|
active_jobs_ids = {job_info.external_jobid for job_info in active_jobs}
|
|
201
208
|
active_jobs_seen_by_sacct = set()
|
|
209
|
+
missing_sacct_status = set()
|
|
202
210
|
|
|
203
211
|
# We use this sacct syntax for argument 'starttime' to keep it compatible
|
|
204
212
|
# with slurm < 20.11
|
|
@@ -245,53 +253,55 @@ class Executor(RemoteExecutor):
|
|
|
245
253
|
self.logger.debug(f"missing_sacct_status are: {missing_sacct_status}")
|
|
246
254
|
if not missing_sacct_status:
|
|
247
255
|
break
|
|
248
|
-
if i >= status_attempts - 1:
|
|
249
|
-
self.logger.warning(
|
|
250
|
-
f"Unable to get the status of all active_jobs that should be "
|
|
251
|
-
f"in slurmdbd, even after {status_attempts} attempts.\n"
|
|
252
|
-
f"The jobs with the following slurm job ids were previously seen "
|
|
253
|
-
"by sacct, but sacct doesn't report them any more:\n"
|
|
254
|
-
f"{missing_sacct_status}\n"
|
|
255
|
-
f"Please double-check with your slurm cluster administrator, that "
|
|
256
|
-
"slurmdbd job accounting is properly set up.\n"
|
|
257
|
-
)
|
|
258
256
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
status = status_of_jobs[j.external_jobid]
|
|
269
|
-
if status == "COMPLETED":
|
|
270
|
-
self.report_job_success(j)
|
|
271
|
-
any_finished = True
|
|
272
|
-
active_jobs_seen_by_sacct.remove(j.external_jobid)
|
|
273
|
-
elif status == "UNKNOWN":
|
|
274
|
-
# the job probably does not exist anymore, but 'sacct' did not work
|
|
275
|
-
# so we assume it is finished
|
|
276
|
-
self.report_job_success(j)
|
|
277
|
-
any_finished = True
|
|
278
|
-
active_jobs_seen_by_sacct.remove(j.external_jobid)
|
|
279
|
-
elif status in fail_stati:
|
|
280
|
-
msg = (
|
|
281
|
-
f"SLURM-job '{j.external_jobid}' failed, SLURM status is: "
|
|
282
|
-
f"'{status}'"
|
|
283
|
-
)
|
|
284
|
-
self.report_job_error(j, msg=msg, aux_logs=[j.aux["slurm_logfile"]])
|
|
285
|
-
active_jobs_seen_by_sacct.remove(j.external_jobid)
|
|
286
|
-
else: # still running?
|
|
287
|
-
yield j
|
|
288
|
-
|
|
289
|
-
if not any_finished:
|
|
290
|
-
self.next_seconds_between_status_checks = min(
|
|
291
|
-
self.next_seconds_between_status_checks + 10, max_sleep_time
|
|
257
|
+
if missing_sacct_status:
|
|
258
|
+
self.logger.warning(
|
|
259
|
+
f"Unable to get the status of all active jobs that should be "
|
|
260
|
+
f"in slurmdbd, even after {status_attempts} attempts.\n"
|
|
261
|
+
f"The jobs with the following slurm job ids were previously seen "
|
|
262
|
+
"by sacct, but sacct doesn't report them any more:\n"
|
|
263
|
+
f"{missing_sacct_status}\n"
|
|
264
|
+
f"Please double-check with your slurm cluster administrator, that "
|
|
265
|
+
"slurmdbd job accounting is properly set up.\n"
|
|
292
266
|
)
|
|
293
|
-
|
|
294
|
-
|
|
267
|
+
|
|
268
|
+
if status_of_jobs is not None:
|
|
269
|
+
any_finished = False
|
|
270
|
+
for j in active_jobs:
|
|
271
|
+
# the job probably didn't make it into slurmdbd yet, so
|
|
272
|
+
# `sacct` doesn't return it
|
|
273
|
+
if j.external_jobid not in status_of_jobs:
|
|
274
|
+
# but the job should still be queueing or running and
|
|
275
|
+
# appear in slurmdbd (and thus `sacct` output) later
|
|
276
|
+
yield j
|
|
277
|
+
continue
|
|
278
|
+
status = status_of_jobs[j.external_jobid]
|
|
279
|
+
if status == "COMPLETED":
|
|
280
|
+
self.report_job_success(j)
|
|
281
|
+
any_finished = True
|
|
282
|
+
active_jobs_seen_by_sacct.remove(j.external_jobid)
|
|
283
|
+
elif status == "UNKNOWN":
|
|
284
|
+
# the job probably does not exist anymore, but 'sacct' did not work
|
|
285
|
+
# so we assume it is finished
|
|
286
|
+
self.report_job_success(j)
|
|
287
|
+
any_finished = True
|
|
288
|
+
active_jobs_seen_by_sacct.remove(j.external_jobid)
|
|
289
|
+
elif status in fail_stati:
|
|
290
|
+
msg = (
|
|
291
|
+
f"SLURM-job '{j.external_jobid}' failed, SLURM status is: "
|
|
292
|
+
f"'{status}'"
|
|
293
|
+
)
|
|
294
|
+
self.report_job_error(j, msg=msg, aux_logs=[j.aux["slurm_logfile"]])
|
|
295
|
+
active_jobs_seen_by_sacct.remove(j.external_jobid)
|
|
296
|
+
else: # still running?
|
|
297
|
+
yield j
|
|
298
|
+
|
|
299
|
+
if not any_finished:
|
|
300
|
+
self.next_seconds_between_status_checks = min(
|
|
301
|
+
self.next_seconds_between_status_checks + 10, max_sleep_time
|
|
302
|
+
)
|
|
303
|
+
else:
|
|
304
|
+
self.next_seconds_between_status_checks = None
|
|
295
305
|
|
|
296
306
|
def cancel_jobs(self, active_jobs: List[SubmittedJobInfo]):
|
|
297
307
|
# Cancel all active jobs.
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
snakemake_executor_plugin_slurm/__init__.py,sha256=KNJtszoEJIcRiO_2QPFhWSjfMj7CiUoPZeiHF8-Qus8,19939
|
|
2
|
+
snakemake_executor_plugin_slurm-0.4.0.dist-info/LICENSE,sha256=YVc4xTLWMqGfFL36120k7rzXtsT6e4RkJsh68VVn12s,1076
|
|
3
|
+
snakemake_executor_plugin_slurm-0.4.0.dist-info/METADATA,sha256=B3GYo_kbEAZJuPri9F4PYKu94HUREHs2uS4KoFUP6F0,1233
|
|
4
|
+
snakemake_executor_plugin_slurm-0.4.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
5
|
+
snakemake_executor_plugin_slurm-0.4.0.dist-info/RECORD,,
|
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
snakemake_executor_plugin_slurm/__init__.py,sha256=xOWNu2sEs95P-k0VD9HzzlsuLoa-MosARD0amev8y_0,19573
|
|
2
|
-
snakemake_executor_plugin_slurm-0.3.1.dist-info/LICENSE,sha256=YVc4xTLWMqGfFL36120k7rzXtsT6e4RkJsh68VVn12s,1076
|
|
3
|
-
snakemake_executor_plugin_slurm-0.3.1.dist-info/METADATA,sha256=cFmErk1uq995V5M7ePFsFF5dpS1ob69wxxjtitFA1pY,1233
|
|
4
|
-
snakemake_executor_plugin_slurm-0.3.1.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
|
5
|
-
snakemake_executor_plugin_slurm-0.3.1.dist-info/RECORD,,
|
|
File without changes
|