toil 6.1.0a1__py3-none-any.whl → 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. toil/__init__.py +1 -232
  2. toil/batchSystems/abstractBatchSystem.py +41 -17
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +79 -65
  4. toil/batchSystems/awsBatch.py +8 -8
  5. toil/batchSystems/cleanup_support.py +7 -3
  6. toil/batchSystems/contained_executor.py +4 -5
  7. toil/batchSystems/gridengine.py +1 -1
  8. toil/batchSystems/htcondor.py +5 -5
  9. toil/batchSystems/kubernetes.py +25 -11
  10. toil/batchSystems/local_support.py +3 -3
  11. toil/batchSystems/lsf.py +9 -9
  12. toil/batchSystems/mesos/batchSystem.py +4 -4
  13. toil/batchSystems/mesos/executor.py +3 -2
  14. toil/batchSystems/options.py +9 -0
  15. toil/batchSystems/singleMachine.py +11 -10
  16. toil/batchSystems/slurm.py +129 -16
  17. toil/batchSystems/torque.py +1 -1
  18. toil/bus.py +45 -3
  19. toil/common.py +56 -31
  20. toil/cwl/cwltoil.py +442 -371
  21. toil/deferred.py +1 -1
  22. toil/exceptions.py +1 -1
  23. toil/fileStores/abstractFileStore.py +69 -20
  24. toil/fileStores/cachingFileStore.py +6 -22
  25. toil/fileStores/nonCachingFileStore.py +6 -15
  26. toil/job.py +270 -86
  27. toil/jobStores/abstractJobStore.py +37 -31
  28. toil/jobStores/aws/jobStore.py +280 -218
  29. toil/jobStores/aws/utils.py +60 -31
  30. toil/jobStores/conftest.py +2 -2
  31. toil/jobStores/fileJobStore.py +3 -3
  32. toil/jobStores/googleJobStore.py +3 -4
  33. toil/leader.py +89 -38
  34. toil/lib/aws/__init__.py +26 -10
  35. toil/lib/aws/iam.py +2 -2
  36. toil/lib/aws/session.py +62 -22
  37. toil/lib/aws/utils.py +73 -37
  38. toil/lib/conversions.py +24 -1
  39. toil/lib/ec2.py +118 -69
  40. toil/lib/expando.py +1 -1
  41. toil/lib/generatedEC2Lists.py +8 -8
  42. toil/lib/io.py +42 -4
  43. toil/lib/misc.py +1 -3
  44. toil/lib/resources.py +57 -16
  45. toil/lib/retry.py +12 -5
  46. toil/lib/threading.py +29 -14
  47. toil/lib/throttle.py +1 -1
  48. toil/options/common.py +31 -30
  49. toil/options/wdl.py +5 -0
  50. toil/provisioners/__init__.py +9 -3
  51. toil/provisioners/abstractProvisioner.py +12 -2
  52. toil/provisioners/aws/__init__.py +20 -15
  53. toil/provisioners/aws/awsProvisioner.py +406 -329
  54. toil/provisioners/gceProvisioner.py +2 -2
  55. toil/provisioners/node.py +13 -5
  56. toil/server/app.py +1 -1
  57. toil/statsAndLogging.py +93 -23
  58. toil/test/__init__.py +27 -12
  59. toil/test/batchSystems/batchSystemTest.py +40 -33
  60. toil/test/batchSystems/batch_system_plugin_test.py +79 -0
  61. toil/test/batchSystems/test_slurm.py +22 -7
  62. toil/test/cactus/__init__.py +0 -0
  63. toil/test/cactus/test_cactus_integration.py +58 -0
  64. toil/test/cwl/cwlTest.py +245 -236
  65. toil/test/cwl/seqtk_seq.cwl +1 -1
  66. toil/test/docs/scriptsTest.py +11 -14
  67. toil/test/jobStores/jobStoreTest.py +40 -54
  68. toil/test/lib/aws/test_iam.py +2 -2
  69. toil/test/lib/test_ec2.py +1 -1
  70. toil/test/options/__init__.py +13 -0
  71. toil/test/options/options.py +37 -0
  72. toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
  73. toil/test/provisioners/clusterTest.py +99 -16
  74. toil/test/server/serverTest.py +2 -2
  75. toil/test/src/autoDeploymentTest.py +1 -1
  76. toil/test/src/dockerCheckTest.py +2 -1
  77. toil/test/src/environmentTest.py +125 -0
  78. toil/test/src/fileStoreTest.py +1 -1
  79. toil/test/src/jobDescriptionTest.py +18 -8
  80. toil/test/src/jobTest.py +1 -1
  81. toil/test/src/realtimeLoggerTest.py +4 -0
  82. toil/test/src/workerTest.py +52 -19
  83. toil/test/utils/toilDebugTest.py +62 -4
  84. toil/test/utils/utilsTest.py +23 -21
  85. toil/test/wdl/wdltoil_test.py +49 -21
  86. toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
  87. toil/toilState.py +68 -9
  88. toil/utils/toilDebugFile.py +1 -1
  89. toil/utils/toilDebugJob.py +153 -26
  90. toil/utils/toilLaunchCluster.py +12 -2
  91. toil/utils/toilRsyncCluster.py +7 -2
  92. toil/utils/toilSshCluster.py +7 -3
  93. toil/utils/toilStats.py +310 -266
  94. toil/utils/toilStatus.py +98 -52
  95. toil/version.py +11 -11
  96. toil/wdl/wdltoil.py +644 -225
  97. toil/worker.py +125 -83
  98. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
  99. toil-7.0.0.dist-info/METADATA +158 -0
  100. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/RECORD +103 -96
  101. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
  102. toil-6.1.0a1.dist-info/METADATA +0 -125
  103. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
  104. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0
toil/utils/toilStatus.py CHANGED
@@ -82,7 +82,7 @@ class ToilStatus:
82
82
  with job.getLogFileHandle(self.jobStore) as fH:
83
83
  # TODO: This looks intended to be machine-readable, but the format is
84
84
  # unspecified and no escaping is done. But keep these tags around.
85
- print(StatsAndLogging.formatLogStream(fH, job_name=f"LOG_FILE_OF_JOB:{job} LOG:"))
85
+ print(StatsAndLogging.formatLogStream(fH, stream_name=f"LOG_FILE_OF_JOB:{job} LOG:"))
86
86
  else:
87
87
  print(f"LOG_FILE_OF_JOB: {job} LOG: Job has no log file")
88
88
 
@@ -94,65 +94,99 @@ class ToilStatus:
94
94
  children += "\t(CHILD_JOB:%s,PRECEDENCE:%i)" % (childJob, level)
95
95
  print(children)
96
96
 
97
- def printAggregateJobStats(self, properties: List[str], childNumber: int) -> None:
98
- """Prints a job's ID, log file, remaining tries, and other properties."""
99
- for job in self.jobsToReport:
97
+ def printAggregateJobStats(self, properties: List[Set[str]], childNumber: List[int]) -> None:
98
+ """
99
+ Prints each job's ID, log file, remaining tries, and other properties.
100
+
101
+ :param properties: A set of string flag names for each job in self.jobsToReport.
102
+ :param childNumber: A list of child counts for each job in self.jobsToReport.
103
+ """
104
+ for job, job_properties, job_child_number in zip(self.jobsToReport, properties, childNumber):
100
105
 
101
106
  def lf(x: str) -> str:
102
- return f"{x}:{str(x in properties)}"
103
- print("\t".join(("JOB:%s" % job,
104
- "LOG_FILE:%s" % job.logJobStoreFileID,
105
- "TRYS_REMAINING:%i" % job.remainingTryCount,
106
- "CHILD_NUMBER:%s" % childNumber,
107
- lf("READY_TO_RUN"), lf("IS_ZOMBIE"),
108
- lf("HAS_SERVICES"), lf("IS_SERVICE"))))
107
+ return f"{x}:{str(x in job_properties)}"
108
+ # We use a sort of not-really-machine-readable key:value TSV format here.
109
+ # But we only include important keys to help the humans, and flags
110
+ # don't have a value, just a key.
111
+ parts = [f"JOB:{job}"]
112
+ for flag in ["COMPLETELY_FAILED", "READY_TO_RUN", "IS_ZOMBIE", "HAS_SERVICES", "IS_SERVICE"]:
113
+ if flag in job_properties:
114
+ parts.append(flag)
115
+ if job.logJobStoreFileID:
116
+ parts.append(f"LOG_FILE:{job.logJobStoreFileID}")
117
+ if job.remainingTryCount > 0:
118
+ parts.append(f"TRYS_REMAINING:{job.remainingTryCount}")
119
+ if job_child_number > 0:
120
+ parts.append(f"CHILD_NUMBER:{job_child_number}")
121
+
122
+ print("\t".join(parts))
109
123
 
110
124
  def report_on_jobs(self) -> Dict[str, Any]:
111
125
  """
112
126
  Gathers information about jobs such as its child jobs and status.
113
127
 
114
- :returns jobStats: Pairings of a useful category and a list of jobs which fall into it.
115
- :rtype dict:
128
+ :returns jobStats: Dict containing some lists of jobs by category, and
129
+ some lists of job properties for each job in self.jobsToReport.
116
130
  """
131
+ # These are lists of the matching jobs
117
132
  hasChildren = []
118
133
  readyToRun = []
119
134
  zombies = []
120
135
  hasLogFile: List[JobDescription] = []
121
136
  hasServices = []
122
137
  services: List[ServiceJobDescription] = []
123
- properties = set()
138
+ completely_failed = []
139
+
140
+ # These are stats for jobs in self.jobsToReport
141
+ child_number: List[int] = []
142
+ properties: List[Set[str]] = []
143
+
144
+ # TODO: This mix of semantics is confusing and made per-job status be
145
+ # wrong for multiple years because it was not understood. Redesign it!
124
146
 
125
147
  for job in self.jobsToReport:
148
+ job_properties: Set[str] = set()
126
149
  if job.logJobStoreFileID is not None:
127
150
  hasLogFile.append(job)
128
151
 
129
- childNumber = len(list(job.allSuccessors()))
130
- if childNumber > 0: # Total number of successors > 0
152
+ job_child_number = len(list(job.allSuccessors()))
153
+ child_number.append(job_child_number)
154
+ if job_child_number > 0: # Total number of successors > 0
131
155
  hasChildren.append(job)
132
- properties.add("HAS_CHILDREN")
133
- elif job.command is not None:
134
- # Job has no children and a command to run. Indicates job could be run.
156
+ job_properties.add("HAS_CHILDREN")
157
+ elif job.has_body():
158
+ # Job has no children and a body to run. Indicates job could be run.
135
159
  readyToRun.append(job)
136
- properties.add("READY_TO_RUN")
160
+ job_properties.add("READY_TO_RUN")
137
161
  else:
138
162
  # Job has no successors and no command, so is a zombie job.
139
163
  zombies.append(job)
140
- properties.add("IS_ZOMBIE")
164
+ job_properties.add("IS_ZOMBIE")
141
165
  if job.services:
142
166
  hasServices.append(job)
143
- properties.add("HAS_SERVICES")
167
+ job_properties.add("HAS_SERVICES")
144
168
  if isinstance(job, ServiceJobDescription):
145
169
  services.append(job)
146
- properties.add("IS_SERVICE")
147
-
148
- jobStats = {'hasChildren': hasChildren,
149
- 'readyToRun': readyToRun,
150
- 'zombies': zombies,
151
- 'hasServices': hasServices,
152
- 'services': services,
153
- 'hasLogFile': hasLogFile,
154
- 'properties': properties,
155
- 'childNumber': childNumber}
170
+ job_properties.add("IS_SERVICE")
171
+ if job.remainingTryCount == 0:
172
+ # Job is out of tries (and thus completely failed)
173
+ job_properties.add("COMPLETELY_FAILED")
174
+ completely_failed.append(job)
175
+ properties.append(job_properties)
176
+
177
+ jobStats = {
178
+ # These are lists of the mathcing jobs
179
+ 'hasChildren': hasChildren,
180
+ 'readyToRun': readyToRun,
181
+ 'zombies': zombies,
182
+ 'hasServices': hasServices,
183
+ 'services': services,
184
+ 'hasLogFile': hasLogFile,
185
+ 'completelyFailed': completely_failed,
186
+ # These are stats for jobs in self.jobsToReport
187
+ 'properties': properties,
188
+ 'childNumber': child_number
189
+ }
156
190
  return jobStats
157
191
 
158
192
  @staticmethod
@@ -251,8 +285,9 @@ class ToilStatus:
251
285
  """
252
286
  try:
253
287
  return self.jobStore.load_root_job()
254
- except JobException:
255
- print('Root job is absent. The workflow has may have completed successfully.', file=sys.stderr)
288
+ except JobException as e:
289
+ logger.info(e)
290
+ print('Root job is absent. The workflow has may have completed successfully.')
256
291
  raise
257
292
 
258
293
  def fetchUserJobs(self, jobs: List[str]) -> List[JobDescription]:
@@ -326,7 +361,7 @@ def main() -> None:
326
361
  help="Do not print overall, aggregate status of workflow.",
327
362
  default=True)
328
363
 
329
- parser.add_argument("--printDot", action="store_true",
364
+ parser.add_argument("--dot", "--printDot", dest="print_dot", action="store_true",
330
365
  help="Print dot formatted description of the graph. If using --jobs will "
331
366
  "restrict to subgraph including only those jobs. default=%(default)s",
332
367
  default=False)
@@ -335,20 +370,24 @@ def main() -> None:
335
370
  help="Restrict reporting to the following jobs (allows subsetting of the report).",
336
371
  default=None)
337
372
 
338
- parser.add_argument("--printPerJobStats", action="store_true",
373
+ parser.add_argument("--perJob", "--printPerJobStats", dest="print_per_job_stats", action="store_true",
339
374
  help="Print info about each job. default=%(default)s",
340
375
  default=False)
341
376
 
342
- parser.add_argument("--printLogs", action="store_true",
377
+ parser.add_argument("--logs", "--printLogs", dest="print_logs", action="store_true",
343
378
  help="Print the log files of jobs (if they exist). default=%(default)s",
344
379
  default=False)
345
380
 
346
- parser.add_argument("--printChildren", action="store_true",
381
+ parser.add_argument("--children", "--printChildren", dest="print_children", action="store_true",
347
382
  help="Print children of each job. default=%(default)s",
348
383
  default=False)
349
384
 
350
- parser.add_argument("--printStatus", action="store_true",
385
+ parser.add_argument("--status", "--printStatus", dest="print_status", action="store_true",
351
386
  help="Determine which jobs are currently running and the associated batch system ID")
387
+
388
+ parser.add_argument("--failed", "--printFailed", dest="print_failed", action="store_true",
389
+ help="List jobs which seem to have failed to run")
390
+
352
391
  options = parser.parse_args()
353
392
  set_logging_from_options(options)
354
393
 
@@ -356,13 +395,10 @@ def main() -> None:
356
395
  parser.print_help()
357
396
  sys.exit(0)
358
397
 
359
- config = Config()
360
- config.setOptions(options)
361
-
362
398
  try:
363
- status = ToilStatus(config.jobStore, options.jobs)
399
+ status = ToilStatus(options.jobStore, options.jobs)
364
400
  except NoSuchJobStoreException:
365
- print('No job store found.')
401
+ print(f'The job store {options.jobStore} was not found.')
366
402
  return
367
403
  except JobException: # Workflow likely complete, user informed in ToilStatus()
368
404
  return
@@ -370,34 +406,44 @@ def main() -> None:
370
406
  jobStats = status.report_on_jobs()
371
407
 
372
408
  # Info to be reported.
409
+ # These are lists of matching jobs.
373
410
  hasChildren = jobStats['hasChildren']
374
411
  readyToRun = jobStats['readyToRun']
375
412
  zombies = jobStats['zombies']
376
413
  hasServices = jobStats['hasServices']
377
414
  services = jobStats['services']
378
415
  hasLogFile = jobStats['hasLogFile']
416
+ completely_failed = jobStats['completelyFailed']
417
+ # These are results for corresponding jobs in status.jobsToReport
379
418
  properties = jobStats['properties']
380
419
  childNumber = jobStats['childNumber']
381
420
 
382
- if options.printPerJobStats:
421
+ if options.print_per_job_stats:
383
422
  status.printAggregateJobStats(properties, childNumber)
384
- if options.printLogs:
423
+ if options.print_logs:
385
424
  status.printJobLog()
386
- if options.printChildren:
425
+ if options.print_children:
387
426
  status.printJobChildren()
388
- if options.printDot:
427
+ if options.print_dot:
389
428
  status.print_dot_chart()
429
+ if options.print_failed:
430
+ print("Failed jobs:")
431
+ for job in completely_failed:
432
+ print(job)
390
433
  if options.stats:
391
434
  print('Of the %i jobs considered, '
392
- 'there are %i jobs with children, '
435
+ 'there are '
436
+ '%i completely failed jobs, '
437
+ '%i jobs with children, '
393
438
  '%i jobs ready to run, '
394
439
  '%i zombie jobs, '
395
440
  '%i jobs with services, '
396
441
  '%i services, '
397
442
  'and %i jobs with log files currently in %s.' %
398
- (len(status.jobsToReport), len(hasChildren), len(readyToRun), len(zombies),
399
- len(hasServices), len(services), len(hasLogFile), status.jobStore))
400
- if options.printStatus:
443
+ (len(status.jobsToReport), len(completely_failed), len(hasChildren),
444
+ len(readyToRun), len(zombies), len(hasServices), len(services),
445
+ len(hasLogFile), status.jobStore))
446
+ if options.print_status:
401
447
  status.print_bus_messages()
402
448
  if len(status.jobsToReport) > 0 and options.failIfNotComplete:
403
449
  # Upon workflow completion, all jobs will have been removed from job store
toil/version.py CHANGED
@@ -1,14 +1,14 @@
1
- baseVersion = '6.1.0a1'
1
+ baseVersion = '7.0.0'
2
2
  cgcloudVersion = '1.6.0a1.dev393'
3
- version = '6.1.0a1-04b966f2417ebf2752e6f216e7fbda3ce20b4a37-dirty'
4
- cacheTag = 'cache-local-py3.11'
5
- mainCacheTag = 'cache-master-py3.11'
6
- distVersion = '6.1.0a1'
7
- exactPython = 'python3.11'
8
- python = 'python3.11'
9
- dockerTag = '6.1.0a1-04b966f2417ebf2752e6f216e7fbda3ce20b4a37-dirty-py3.11'
10
- currentCommit = '04b966f2417ebf2752e6f216e7fbda3ce20b4a37'
3
+ version = '7.0.0-d569ea5711eb310ffd5703803f7250ebf7c19576'
4
+ cacheTag = 'cache-local-py3.9'
5
+ mainCacheTag = 'cache-master-py3.9'
6
+ distVersion = '7.0.0'
7
+ exactPython = 'python3.9'
8
+ python = 'python3.9'
9
+ dockerTag = '7.0.0-d569ea5711eb310ffd5703803f7250ebf7c19576-py3.9'
10
+ currentCommit = 'd569ea5711eb310ffd5703803f7250ebf7c19576'
11
11
  dockerRegistry = 'quay.io/ucsc_cgl'
12
12
  dockerName = 'toil'
13
- dirty = True
14
- cwltool_version = '3.1.20240112164112'
13
+ dirty = False
14
+ cwltool_version = '3.1.20240508115724'