toil 6.1.0a1__py3-none-any.whl → 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +1 -232
- toil/batchSystems/abstractBatchSystem.py +41 -17
- toil/batchSystems/abstractGridEngineBatchSystem.py +79 -65
- toil/batchSystems/awsBatch.py +8 -8
- toil/batchSystems/cleanup_support.py +7 -3
- toil/batchSystems/contained_executor.py +4 -5
- toil/batchSystems/gridengine.py +1 -1
- toil/batchSystems/htcondor.py +5 -5
- toil/batchSystems/kubernetes.py +25 -11
- toil/batchSystems/local_support.py +3 -3
- toil/batchSystems/lsf.py +9 -9
- toil/batchSystems/mesos/batchSystem.py +4 -4
- toil/batchSystems/mesos/executor.py +3 -2
- toil/batchSystems/options.py +9 -0
- toil/batchSystems/singleMachine.py +11 -10
- toil/batchSystems/slurm.py +129 -16
- toil/batchSystems/torque.py +1 -1
- toil/bus.py +45 -3
- toil/common.py +56 -31
- toil/cwl/cwltoil.py +442 -371
- toil/deferred.py +1 -1
- toil/exceptions.py +1 -1
- toil/fileStores/abstractFileStore.py +69 -20
- toil/fileStores/cachingFileStore.py +6 -22
- toil/fileStores/nonCachingFileStore.py +6 -15
- toil/job.py +270 -86
- toil/jobStores/abstractJobStore.py +37 -31
- toil/jobStores/aws/jobStore.py +280 -218
- toil/jobStores/aws/utils.py +60 -31
- toil/jobStores/conftest.py +2 -2
- toil/jobStores/fileJobStore.py +3 -3
- toil/jobStores/googleJobStore.py +3 -4
- toil/leader.py +89 -38
- toil/lib/aws/__init__.py +26 -10
- toil/lib/aws/iam.py +2 -2
- toil/lib/aws/session.py +62 -22
- toil/lib/aws/utils.py +73 -37
- toil/lib/conversions.py +24 -1
- toil/lib/ec2.py +118 -69
- toil/lib/expando.py +1 -1
- toil/lib/generatedEC2Lists.py +8 -8
- toil/lib/io.py +42 -4
- toil/lib/misc.py +1 -3
- toil/lib/resources.py +57 -16
- toil/lib/retry.py +12 -5
- toil/lib/threading.py +29 -14
- toil/lib/throttle.py +1 -1
- toil/options/common.py +31 -30
- toil/options/wdl.py +5 -0
- toil/provisioners/__init__.py +9 -3
- toil/provisioners/abstractProvisioner.py +12 -2
- toil/provisioners/aws/__init__.py +20 -15
- toil/provisioners/aws/awsProvisioner.py +406 -329
- toil/provisioners/gceProvisioner.py +2 -2
- toil/provisioners/node.py +13 -5
- toil/server/app.py +1 -1
- toil/statsAndLogging.py +93 -23
- toil/test/__init__.py +27 -12
- toil/test/batchSystems/batchSystemTest.py +40 -33
- toil/test/batchSystems/batch_system_plugin_test.py +79 -0
- toil/test/batchSystems/test_slurm.py +22 -7
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +58 -0
- toil/test/cwl/cwlTest.py +245 -236
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +11 -14
- toil/test/jobStores/jobStoreTest.py +40 -54
- toil/test/lib/aws/test_iam.py +2 -2
- toil/test/lib/test_ec2.py +1 -1
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +37 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
- toil/test/provisioners/clusterTest.py +99 -16
- toil/test/server/serverTest.py +2 -2
- toil/test/src/autoDeploymentTest.py +1 -1
- toil/test/src/dockerCheckTest.py +2 -1
- toil/test/src/environmentTest.py +125 -0
- toil/test/src/fileStoreTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +18 -8
- toil/test/src/jobTest.py +1 -1
- toil/test/src/realtimeLoggerTest.py +4 -0
- toil/test/src/workerTest.py +52 -19
- toil/test/utils/toilDebugTest.py +62 -4
- toil/test/utils/utilsTest.py +23 -21
- toil/test/wdl/wdltoil_test.py +49 -21
- toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
- toil/toilState.py +68 -9
- toil/utils/toilDebugFile.py +1 -1
- toil/utils/toilDebugJob.py +153 -26
- toil/utils/toilLaunchCluster.py +12 -2
- toil/utils/toilRsyncCluster.py +7 -2
- toil/utils/toilSshCluster.py +7 -3
- toil/utils/toilStats.py +310 -266
- toil/utils/toilStatus.py +98 -52
- toil/version.py +11 -11
- toil/wdl/wdltoil.py +644 -225
- toil/worker.py +125 -83
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
- toil-7.0.0.dist-info/METADATA +158 -0
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/RECORD +103 -96
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0
toil/utils/toilStatus.py
CHANGED
|
@@ -82,7 +82,7 @@ class ToilStatus:
|
|
|
82
82
|
with job.getLogFileHandle(self.jobStore) as fH:
|
|
83
83
|
# TODO: This looks intended to be machine-readable, but the format is
|
|
84
84
|
# unspecified and no escaping is done. But keep these tags around.
|
|
85
|
-
print(StatsAndLogging.formatLogStream(fH,
|
|
85
|
+
print(StatsAndLogging.formatLogStream(fH, stream_name=f"LOG_FILE_OF_JOB:{job} LOG:"))
|
|
86
86
|
else:
|
|
87
87
|
print(f"LOG_FILE_OF_JOB: {job} LOG: Job has no log file")
|
|
88
88
|
|
|
@@ -94,65 +94,99 @@ class ToilStatus:
|
|
|
94
94
|
children += "\t(CHILD_JOB:%s,PRECEDENCE:%i)" % (childJob, level)
|
|
95
95
|
print(children)
|
|
96
96
|
|
|
97
|
-
def printAggregateJobStats(self, properties: List[str], childNumber: int) -> None:
|
|
98
|
-
"""
|
|
99
|
-
|
|
97
|
+
def printAggregateJobStats(self, properties: List[Set[str]], childNumber: List[int]) -> None:
|
|
98
|
+
"""
|
|
99
|
+
Prints each job's ID, log file, remaining tries, and other properties.
|
|
100
|
+
|
|
101
|
+
:param properties: A set of string flag names for each job in self.jobsToReport.
|
|
102
|
+
:param childNumber: A list of child counts for each job in self.jobsToReport.
|
|
103
|
+
"""
|
|
104
|
+
for job, job_properties, job_child_number in zip(self.jobsToReport, properties, childNumber):
|
|
100
105
|
|
|
101
106
|
def lf(x: str) -> str:
|
|
102
|
-
return f"{x}:{str(x in
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
107
|
+
return f"{x}:{str(x in job_properties)}"
|
|
108
|
+
# We use a sort of not-really-machine-readable key:value TSV format here.
|
|
109
|
+
# But we only include important keys to help the humans, and flags
|
|
110
|
+
# don't have a value, just a key.
|
|
111
|
+
parts = [f"JOB:{job}"]
|
|
112
|
+
for flag in ["COMPLETELY_FAILED", "READY_TO_RUN", "IS_ZOMBIE", "HAS_SERVICES", "IS_SERVICE"]:
|
|
113
|
+
if flag in job_properties:
|
|
114
|
+
parts.append(flag)
|
|
115
|
+
if job.logJobStoreFileID:
|
|
116
|
+
parts.append(f"LOG_FILE:{job.logJobStoreFileID}")
|
|
117
|
+
if job.remainingTryCount > 0:
|
|
118
|
+
parts.append(f"TRYS_REMAINING:{job.remainingTryCount}")
|
|
119
|
+
if job_child_number > 0:
|
|
120
|
+
parts.append(f"CHILD_NUMBER:{job_child_number}")
|
|
121
|
+
|
|
122
|
+
print("\t".join(parts))
|
|
109
123
|
|
|
110
124
|
def report_on_jobs(self) -> Dict[str, Any]:
|
|
111
125
|
"""
|
|
112
126
|
Gathers information about jobs such as its child jobs and status.
|
|
113
127
|
|
|
114
|
-
:returns jobStats:
|
|
115
|
-
|
|
128
|
+
:returns jobStats: Dict containing some lists of jobs by category, and
|
|
129
|
+
some lists of job properties for each job in self.jobsToReport.
|
|
116
130
|
"""
|
|
131
|
+
# These are lists of the matching jobs
|
|
117
132
|
hasChildren = []
|
|
118
133
|
readyToRun = []
|
|
119
134
|
zombies = []
|
|
120
135
|
hasLogFile: List[JobDescription] = []
|
|
121
136
|
hasServices = []
|
|
122
137
|
services: List[ServiceJobDescription] = []
|
|
123
|
-
|
|
138
|
+
completely_failed = []
|
|
139
|
+
|
|
140
|
+
# These are stats for jobs in self.jobsToReport
|
|
141
|
+
child_number: List[int] = []
|
|
142
|
+
properties: List[Set[str]] = []
|
|
143
|
+
|
|
144
|
+
# TODO: This mix of semantics is confusing and made per-job status be
|
|
145
|
+
# wrong for multiple years because it was not understood. Redesign it!
|
|
124
146
|
|
|
125
147
|
for job in self.jobsToReport:
|
|
148
|
+
job_properties: Set[str] = set()
|
|
126
149
|
if job.logJobStoreFileID is not None:
|
|
127
150
|
hasLogFile.append(job)
|
|
128
151
|
|
|
129
|
-
|
|
130
|
-
|
|
152
|
+
job_child_number = len(list(job.allSuccessors()))
|
|
153
|
+
child_number.append(job_child_number)
|
|
154
|
+
if job_child_number > 0: # Total number of successors > 0
|
|
131
155
|
hasChildren.append(job)
|
|
132
|
-
|
|
133
|
-
elif job.
|
|
134
|
-
# Job has no children and a
|
|
156
|
+
job_properties.add("HAS_CHILDREN")
|
|
157
|
+
elif job.has_body():
|
|
158
|
+
# Job has no children and a body to run. Indicates job could be run.
|
|
135
159
|
readyToRun.append(job)
|
|
136
|
-
|
|
160
|
+
job_properties.add("READY_TO_RUN")
|
|
137
161
|
else:
|
|
138
162
|
# Job has no successors and no command, so is a zombie job.
|
|
139
163
|
zombies.append(job)
|
|
140
|
-
|
|
164
|
+
job_properties.add("IS_ZOMBIE")
|
|
141
165
|
if job.services:
|
|
142
166
|
hasServices.append(job)
|
|
143
|
-
|
|
167
|
+
job_properties.add("HAS_SERVICES")
|
|
144
168
|
if isinstance(job, ServiceJobDescription):
|
|
145
169
|
services.append(job)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
170
|
+
job_properties.add("IS_SERVICE")
|
|
171
|
+
if job.remainingTryCount == 0:
|
|
172
|
+
# Job is out of tries (and thus completely failed)
|
|
173
|
+
job_properties.add("COMPLETELY_FAILED")
|
|
174
|
+
completely_failed.append(job)
|
|
175
|
+
properties.append(job_properties)
|
|
176
|
+
|
|
177
|
+
jobStats = {
|
|
178
|
+
# These are lists of the mathcing jobs
|
|
179
|
+
'hasChildren': hasChildren,
|
|
180
|
+
'readyToRun': readyToRun,
|
|
181
|
+
'zombies': zombies,
|
|
182
|
+
'hasServices': hasServices,
|
|
183
|
+
'services': services,
|
|
184
|
+
'hasLogFile': hasLogFile,
|
|
185
|
+
'completelyFailed': completely_failed,
|
|
186
|
+
# These are stats for jobs in self.jobsToReport
|
|
187
|
+
'properties': properties,
|
|
188
|
+
'childNumber': child_number
|
|
189
|
+
}
|
|
156
190
|
return jobStats
|
|
157
191
|
|
|
158
192
|
@staticmethod
|
|
@@ -251,8 +285,9 @@ class ToilStatus:
|
|
|
251
285
|
"""
|
|
252
286
|
try:
|
|
253
287
|
return self.jobStore.load_root_job()
|
|
254
|
-
except JobException:
|
|
255
|
-
|
|
288
|
+
except JobException as e:
|
|
289
|
+
logger.info(e)
|
|
290
|
+
print('Root job is absent. The workflow has may have completed successfully.')
|
|
256
291
|
raise
|
|
257
292
|
|
|
258
293
|
def fetchUserJobs(self, jobs: List[str]) -> List[JobDescription]:
|
|
@@ -326,7 +361,7 @@ def main() -> None:
|
|
|
326
361
|
help="Do not print overall, aggregate status of workflow.",
|
|
327
362
|
default=True)
|
|
328
363
|
|
|
329
|
-
parser.add_argument("--printDot", action="store_true",
|
|
364
|
+
parser.add_argument("--dot", "--printDot", dest="print_dot", action="store_true",
|
|
330
365
|
help="Print dot formatted description of the graph. If using --jobs will "
|
|
331
366
|
"restrict to subgraph including only those jobs. default=%(default)s",
|
|
332
367
|
default=False)
|
|
@@ -335,20 +370,24 @@ def main() -> None:
|
|
|
335
370
|
help="Restrict reporting to the following jobs (allows subsetting of the report).",
|
|
336
371
|
default=None)
|
|
337
372
|
|
|
338
|
-
parser.add_argument("--printPerJobStats", action="store_true",
|
|
373
|
+
parser.add_argument("--perJob", "--printPerJobStats", dest="print_per_job_stats", action="store_true",
|
|
339
374
|
help="Print info about each job. default=%(default)s",
|
|
340
375
|
default=False)
|
|
341
376
|
|
|
342
|
-
parser.add_argument("--printLogs", action="store_true",
|
|
377
|
+
parser.add_argument("--logs", "--printLogs", dest="print_logs", action="store_true",
|
|
343
378
|
help="Print the log files of jobs (if they exist). default=%(default)s",
|
|
344
379
|
default=False)
|
|
345
380
|
|
|
346
|
-
parser.add_argument("--printChildren", action="store_true",
|
|
381
|
+
parser.add_argument("--children", "--printChildren", dest="print_children", action="store_true",
|
|
347
382
|
help="Print children of each job. default=%(default)s",
|
|
348
383
|
default=False)
|
|
349
384
|
|
|
350
|
-
parser.add_argument("--printStatus", action="store_true",
|
|
385
|
+
parser.add_argument("--status", "--printStatus", dest="print_status", action="store_true",
|
|
351
386
|
help="Determine which jobs are currently running and the associated batch system ID")
|
|
387
|
+
|
|
388
|
+
parser.add_argument("--failed", "--printFailed", dest="print_failed", action="store_true",
|
|
389
|
+
help="List jobs which seem to have failed to run")
|
|
390
|
+
|
|
352
391
|
options = parser.parse_args()
|
|
353
392
|
set_logging_from_options(options)
|
|
354
393
|
|
|
@@ -356,13 +395,10 @@ def main() -> None:
|
|
|
356
395
|
parser.print_help()
|
|
357
396
|
sys.exit(0)
|
|
358
397
|
|
|
359
|
-
config = Config()
|
|
360
|
-
config.setOptions(options)
|
|
361
|
-
|
|
362
398
|
try:
|
|
363
|
-
status = ToilStatus(
|
|
399
|
+
status = ToilStatus(options.jobStore, options.jobs)
|
|
364
400
|
except NoSuchJobStoreException:
|
|
365
|
-
print('
|
|
401
|
+
print(f'The job store {options.jobStore} was not found.')
|
|
366
402
|
return
|
|
367
403
|
except JobException: # Workflow likely complete, user informed in ToilStatus()
|
|
368
404
|
return
|
|
@@ -370,34 +406,44 @@ def main() -> None:
|
|
|
370
406
|
jobStats = status.report_on_jobs()
|
|
371
407
|
|
|
372
408
|
# Info to be reported.
|
|
409
|
+
# These are lists of matching jobs.
|
|
373
410
|
hasChildren = jobStats['hasChildren']
|
|
374
411
|
readyToRun = jobStats['readyToRun']
|
|
375
412
|
zombies = jobStats['zombies']
|
|
376
413
|
hasServices = jobStats['hasServices']
|
|
377
414
|
services = jobStats['services']
|
|
378
415
|
hasLogFile = jobStats['hasLogFile']
|
|
416
|
+
completely_failed = jobStats['completelyFailed']
|
|
417
|
+
# These are results for corresponding jobs in status.jobsToReport
|
|
379
418
|
properties = jobStats['properties']
|
|
380
419
|
childNumber = jobStats['childNumber']
|
|
381
420
|
|
|
382
|
-
if options.
|
|
421
|
+
if options.print_per_job_stats:
|
|
383
422
|
status.printAggregateJobStats(properties, childNumber)
|
|
384
|
-
if options.
|
|
423
|
+
if options.print_logs:
|
|
385
424
|
status.printJobLog()
|
|
386
|
-
if options.
|
|
425
|
+
if options.print_children:
|
|
387
426
|
status.printJobChildren()
|
|
388
|
-
if options.
|
|
427
|
+
if options.print_dot:
|
|
389
428
|
status.print_dot_chart()
|
|
429
|
+
if options.print_failed:
|
|
430
|
+
print("Failed jobs:")
|
|
431
|
+
for job in completely_failed:
|
|
432
|
+
print(job)
|
|
390
433
|
if options.stats:
|
|
391
434
|
print('Of the %i jobs considered, '
|
|
392
|
-
'there are
|
|
435
|
+
'there are '
|
|
436
|
+
'%i completely failed jobs, '
|
|
437
|
+
'%i jobs with children, '
|
|
393
438
|
'%i jobs ready to run, '
|
|
394
439
|
'%i zombie jobs, '
|
|
395
440
|
'%i jobs with services, '
|
|
396
441
|
'%i services, '
|
|
397
442
|
'and %i jobs with log files currently in %s.' %
|
|
398
|
-
(len(status.jobsToReport), len(
|
|
399
|
-
len(
|
|
400
|
-
|
|
443
|
+
(len(status.jobsToReport), len(completely_failed), len(hasChildren),
|
|
444
|
+
len(readyToRun), len(zombies), len(hasServices), len(services),
|
|
445
|
+
len(hasLogFile), status.jobStore))
|
|
446
|
+
if options.print_status:
|
|
401
447
|
status.print_bus_messages()
|
|
402
448
|
if len(status.jobsToReport) > 0 and options.failIfNotComplete:
|
|
403
449
|
# Upon workflow completion, all jobs will have been removed from job store
|
toil/version.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
baseVersion = '
|
|
1
|
+
baseVersion = '7.0.0'
|
|
2
2
|
cgcloudVersion = '1.6.0a1.dev393'
|
|
3
|
-
version = '
|
|
4
|
-
cacheTag = 'cache-local-py3.
|
|
5
|
-
mainCacheTag = 'cache-master-py3.
|
|
6
|
-
distVersion = '
|
|
7
|
-
exactPython = 'python3.
|
|
8
|
-
python = 'python3.
|
|
9
|
-
dockerTag = '
|
|
10
|
-
currentCommit = '
|
|
3
|
+
version = '7.0.0-d569ea5711eb310ffd5703803f7250ebf7c19576'
|
|
4
|
+
cacheTag = 'cache-local-py3.9'
|
|
5
|
+
mainCacheTag = 'cache-master-py3.9'
|
|
6
|
+
distVersion = '7.0.0'
|
|
7
|
+
exactPython = 'python3.9'
|
|
8
|
+
python = 'python3.9'
|
|
9
|
+
dockerTag = '7.0.0-d569ea5711eb310ffd5703803f7250ebf7c19576-py3.9'
|
|
10
|
+
currentCommit = 'd569ea5711eb310ffd5703803f7250ebf7c19576'
|
|
11
11
|
dockerRegistry = 'quay.io/ucsc_cgl'
|
|
12
12
|
dockerName = 'toil'
|
|
13
|
-
dirty =
|
|
14
|
-
cwltool_version = '3.1.
|
|
13
|
+
dirty = False
|
|
14
|
+
cwltool_version = '3.1.20240508115724'
|