toil 8.0.0__py3-none-any.whl → 8.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. toil/__init__.py +4 -39
  2. toil/batchSystems/abstractBatchSystem.py +1 -1
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +1 -1
  4. toil/batchSystems/awsBatch.py +1 -1
  5. toil/batchSystems/cleanup_support.py +1 -1
  6. toil/batchSystems/kubernetes.py +53 -7
  7. toil/batchSystems/local_support.py +1 -1
  8. toil/batchSystems/mesos/batchSystem.py +13 -8
  9. toil/batchSystems/mesos/test/__init__.py +3 -2
  10. toil/batchSystems/options.py +1 -0
  11. toil/batchSystems/singleMachine.py +1 -1
  12. toil/batchSystems/slurm.py +229 -84
  13. toil/bus.py +5 -3
  14. toil/common.py +198 -54
  15. toil/cwl/cwltoil.py +32 -11
  16. toil/job.py +110 -86
  17. toil/jobStores/abstractJobStore.py +24 -3
  18. toil/jobStores/aws/jobStore.py +46 -10
  19. toil/jobStores/fileJobStore.py +25 -1
  20. toil/jobStores/googleJobStore.py +104 -30
  21. toil/leader.py +9 -0
  22. toil/lib/accelerators.py +3 -1
  23. toil/lib/aws/session.py +14 -3
  24. toil/lib/aws/utils.py +92 -35
  25. toil/lib/aws/utils.py.orig +504 -0
  26. toil/lib/bioio.py +1 -1
  27. toil/lib/docker.py +252 -91
  28. toil/lib/dockstore.py +387 -0
  29. toil/lib/ec2nodes.py +3 -2
  30. toil/lib/exceptions.py +5 -3
  31. toil/lib/history.py +1345 -0
  32. toil/lib/history_submission.py +695 -0
  33. toil/lib/io.py +56 -23
  34. toil/lib/misc.py +25 -1
  35. toil/lib/resources.py +2 -1
  36. toil/lib/retry.py +10 -10
  37. toil/lib/threading.py +11 -10
  38. toil/lib/{integration.py → trs.py} +95 -46
  39. toil/lib/web.py +38 -0
  40. toil/options/common.py +25 -2
  41. toil/options/cwl.py +10 -0
  42. toil/options/wdl.py +11 -0
  43. toil/provisioners/gceProvisioner.py +4 -4
  44. toil/server/api_spec/LICENSE +201 -0
  45. toil/server/api_spec/README.rst +5 -0
  46. toil/server/cli/wes_cwl_runner.py +5 -4
  47. toil/server/utils.py +2 -3
  48. toil/statsAndLogging.py +35 -1
  49. toil/test/__init__.py +275 -115
  50. toil/test/batchSystems/batchSystemTest.py +227 -205
  51. toil/test/batchSystems/test_slurm.py +199 -2
  52. toil/test/cactus/pestis.tar.gz +0 -0
  53. toil/test/conftest.py +7 -0
  54. toil/test/cwl/2.fasta +11 -0
  55. toil/test/cwl/2.fastq +12 -0
  56. toil/test/cwl/conftest.py +39 -0
  57. toil/test/cwl/cwlTest.py +1015 -780
  58. toil/test/cwl/directory/directory/file.txt +15 -0
  59. toil/test/cwl/download_directory_file.json +4 -0
  60. toil/test/cwl/download_directory_s3.json +4 -0
  61. toil/test/cwl/download_file.json +6 -0
  62. toil/test/cwl/download_http.json +6 -0
  63. toil/test/cwl/download_https.json +6 -0
  64. toil/test/cwl/download_s3.json +6 -0
  65. toil/test/cwl/download_subdirectory_file.json +5 -0
  66. toil/test/cwl/download_subdirectory_s3.json +5 -0
  67. toil/test/cwl/empty.json +1 -0
  68. toil/test/cwl/mock_mpi/fake_mpi.yml +8 -0
  69. toil/test/cwl/mock_mpi/fake_mpi_run.py +42 -0
  70. toil/test/cwl/optional-file-exists.json +6 -0
  71. toil/test/cwl/optional-file-missing.json +6 -0
  72. toil/test/cwl/optional-file.cwl +18 -0
  73. toil/test/cwl/preemptible_expression.json +1 -0
  74. toil/test/cwl/revsort-job-missing.json +6 -0
  75. toil/test/cwl/revsort-job.json +6 -0
  76. toil/test/cwl/s3_secondary_file.json +16 -0
  77. toil/test/cwl/seqtk_seq_job.json +6 -0
  78. toil/test/cwl/stream.json +6 -0
  79. toil/test/cwl/test_filename_conflict_resolution.ms/table.dat +0 -0
  80. toil/test/cwl/test_filename_conflict_resolution.ms/table.f0 +0 -0
  81. toil/test/cwl/test_filename_conflict_resolution.ms/table.f1 +0 -0
  82. toil/test/cwl/test_filename_conflict_resolution.ms/table.f1i +0 -0
  83. toil/test/cwl/test_filename_conflict_resolution.ms/table.f2 +0 -0
  84. toil/test/cwl/test_filename_conflict_resolution.ms/table.f2_TSM0 +0 -0
  85. toil/test/cwl/test_filename_conflict_resolution.ms/table.f3 +0 -0
  86. toil/test/cwl/test_filename_conflict_resolution.ms/table.f3_TSM0 +0 -0
  87. toil/test/cwl/test_filename_conflict_resolution.ms/table.f4 +0 -0
  88. toil/test/cwl/test_filename_conflict_resolution.ms/table.f4_TSM0 +0 -0
  89. toil/test/cwl/test_filename_conflict_resolution.ms/table.f5 +0 -0
  90. toil/test/cwl/test_filename_conflict_resolution.ms/table.info +0 -0
  91. toil/test/cwl/test_filename_conflict_resolution.ms/table.lock +0 -0
  92. toil/test/cwl/whale.txt +16 -0
  93. toil/test/docs/scripts/example_alwaysfail.py +38 -0
  94. toil/test/docs/scripts/example_alwaysfail_with_files.wdl +33 -0
  95. toil/test/docs/scripts/example_cachingbenchmark.py +117 -0
  96. toil/test/docs/scripts/stagingExampleFiles/in.txt +1 -0
  97. toil/test/docs/scripts/stagingExampleFiles/out.txt +2 -0
  98. toil/test/docs/scripts/tutorial_arguments.py +23 -0
  99. toil/test/docs/scripts/tutorial_debugging.patch +12 -0
  100. toil/test/docs/scripts/tutorial_debugging_hangs.wdl +126 -0
  101. toil/test/docs/scripts/tutorial_debugging_works.wdl +129 -0
  102. toil/test/docs/scripts/tutorial_docker.py +20 -0
  103. toil/test/docs/scripts/tutorial_dynamic.py +24 -0
  104. toil/test/docs/scripts/tutorial_encapsulation.py +28 -0
  105. toil/test/docs/scripts/tutorial_encapsulation2.py +29 -0
  106. toil/test/docs/scripts/tutorial_helloworld.py +15 -0
  107. toil/test/docs/scripts/tutorial_invokeworkflow.py +27 -0
  108. toil/test/docs/scripts/tutorial_invokeworkflow2.py +30 -0
  109. toil/test/docs/scripts/tutorial_jobfunctions.py +22 -0
  110. toil/test/docs/scripts/tutorial_managing.py +29 -0
  111. toil/test/docs/scripts/tutorial_managing2.py +56 -0
  112. toil/test/docs/scripts/tutorial_multiplejobs.py +25 -0
  113. toil/test/docs/scripts/tutorial_multiplejobs2.py +21 -0
  114. toil/test/docs/scripts/tutorial_multiplejobs3.py +22 -0
  115. toil/test/docs/scripts/tutorial_promises.py +25 -0
  116. toil/test/docs/scripts/tutorial_promises2.py +30 -0
  117. toil/test/docs/scripts/tutorial_quickstart.py +22 -0
  118. toil/test/docs/scripts/tutorial_requirements.py +44 -0
  119. toil/test/docs/scripts/tutorial_services.py +45 -0
  120. toil/test/docs/scripts/tutorial_staging.py +45 -0
  121. toil/test/docs/scripts/tutorial_stats.py +64 -0
  122. toil/test/lib/aws/test_iam.py +3 -1
  123. toil/test/lib/dockerTest.py +205 -122
  124. toil/test/lib/test_history.py +236 -0
  125. toil/test/lib/test_trs.py +161 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +12 -9
  127. toil/test/provisioners/clusterTest.py +4 -4
  128. toil/test/provisioners/gceProvisionerTest.py +16 -14
  129. toil/test/sort/sort.py +4 -1
  130. toil/test/src/busTest.py +17 -17
  131. toil/test/src/deferredFunctionTest.py +145 -132
  132. toil/test/src/importExportFileTest.py +71 -63
  133. toil/test/src/jobEncapsulationTest.py +27 -28
  134. toil/test/src/jobServiceTest.py +149 -133
  135. toil/test/src/jobTest.py +219 -211
  136. toil/test/src/miscTests.py +66 -60
  137. toil/test/src/promisedRequirementTest.py +163 -169
  138. toil/test/src/regularLogTest.py +24 -24
  139. toil/test/src/resourceTest.py +82 -76
  140. toil/test/src/restartDAGTest.py +51 -47
  141. toil/test/src/resumabilityTest.py +24 -19
  142. toil/test/src/retainTempDirTest.py +60 -57
  143. toil/test/src/systemTest.py +17 -13
  144. toil/test/src/threadingTest.py +29 -32
  145. toil/test/utils/ABCWorkflowDebug/B_file.txt +1 -0
  146. toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +204 -0
  147. toil/test/utils/ABCWorkflowDebug/mkFile.py +16 -0
  148. toil/test/utils/ABCWorkflowDebug/sleep.cwl +12 -0
  149. toil/test/utils/ABCWorkflowDebug/sleep.yaml +1 -0
  150. toil/test/utils/toilDebugTest.py +117 -102
  151. toil/test/utils/toilKillTest.py +54 -53
  152. toil/test/utils/utilsTest.py +303 -229
  153. toil/test/wdl/lint_error.wdl +9 -0
  154. toil/test/wdl/md5sum/empty_file.json +1 -0
  155. toil/test/wdl/md5sum/md5sum-gs.json +1 -0
  156. toil/test/wdl/md5sum/md5sum.1.0.wdl +32 -0
  157. toil/test/wdl/md5sum/md5sum.input +1 -0
  158. toil/test/wdl/md5sum/md5sum.json +1 -0
  159. toil/test/wdl/md5sum/md5sum.wdl +25 -0
  160. toil/test/wdl/miniwdl_self_test/inputs-namespaced.json +1 -0
  161. toil/test/wdl/miniwdl_self_test/inputs.json +1 -0
  162. toil/test/wdl/miniwdl_self_test/self_test.wdl +40 -0
  163. toil/test/wdl/standard_library/as_map.json +16 -0
  164. toil/test/wdl/standard_library/as_map_as_input.wdl +23 -0
  165. toil/test/wdl/standard_library/as_pairs.json +7 -0
  166. toil/test/wdl/standard_library/as_pairs_as_input.wdl +23 -0
  167. toil/test/wdl/standard_library/ceil.json +3 -0
  168. toil/test/wdl/standard_library/ceil_as_command.wdl +16 -0
  169. toil/test/wdl/standard_library/ceil_as_input.wdl +16 -0
  170. toil/test/wdl/standard_library/collect_by_key.json +1 -0
  171. toil/test/wdl/standard_library/collect_by_key_as_input.wdl +23 -0
  172. toil/test/wdl/standard_library/cross.json +11 -0
  173. toil/test/wdl/standard_library/cross_as_input.wdl +19 -0
  174. toil/test/wdl/standard_library/flatten.json +7 -0
  175. toil/test/wdl/standard_library/flatten_as_input.wdl +18 -0
  176. toil/test/wdl/standard_library/floor.json +3 -0
  177. toil/test/wdl/standard_library/floor_as_command.wdl +16 -0
  178. toil/test/wdl/standard_library/floor_as_input.wdl +16 -0
  179. toil/test/wdl/standard_library/keys.json +8 -0
  180. toil/test/wdl/standard_library/keys_as_input.wdl +24 -0
  181. toil/test/wdl/standard_library/length.json +7 -0
  182. toil/test/wdl/standard_library/length_as_input.wdl +16 -0
  183. toil/test/wdl/standard_library/length_as_input_with_map.json +7 -0
  184. toil/test/wdl/standard_library/length_as_input_with_map.wdl +17 -0
  185. toil/test/wdl/standard_library/length_invalid.json +3 -0
  186. toil/test/wdl/standard_library/range.json +3 -0
  187. toil/test/wdl/standard_library/range_0.json +3 -0
  188. toil/test/wdl/standard_library/range_as_input.wdl +17 -0
  189. toil/test/wdl/standard_library/range_invalid.json +3 -0
  190. toil/test/wdl/standard_library/read_boolean.json +3 -0
  191. toil/test/wdl/standard_library/read_boolean_as_command.wdl +17 -0
  192. toil/test/wdl/standard_library/read_float.json +3 -0
  193. toil/test/wdl/standard_library/read_float_as_command.wdl +17 -0
  194. toil/test/wdl/standard_library/read_int.json +3 -0
  195. toil/test/wdl/standard_library/read_int_as_command.wdl +17 -0
  196. toil/test/wdl/standard_library/read_json.json +3 -0
  197. toil/test/wdl/standard_library/read_json_as_output.wdl +31 -0
  198. toil/test/wdl/standard_library/read_lines.json +3 -0
  199. toil/test/wdl/standard_library/read_lines_as_output.wdl +31 -0
  200. toil/test/wdl/standard_library/read_map.json +3 -0
  201. toil/test/wdl/standard_library/read_map_as_output.wdl +31 -0
  202. toil/test/wdl/standard_library/read_string.json +3 -0
  203. toil/test/wdl/standard_library/read_string_as_command.wdl +17 -0
  204. toil/test/wdl/standard_library/read_tsv.json +3 -0
  205. toil/test/wdl/standard_library/read_tsv_as_output.wdl +31 -0
  206. toil/test/wdl/standard_library/round.json +3 -0
  207. toil/test/wdl/standard_library/round_as_command.wdl +16 -0
  208. toil/test/wdl/standard_library/round_as_input.wdl +16 -0
  209. toil/test/wdl/standard_library/size.json +3 -0
  210. toil/test/wdl/standard_library/size_as_command.wdl +17 -0
  211. toil/test/wdl/standard_library/size_as_output.wdl +36 -0
  212. toil/test/wdl/standard_library/stderr.json +3 -0
  213. toil/test/wdl/standard_library/stderr_as_output.wdl +30 -0
  214. toil/test/wdl/standard_library/stdout.json +3 -0
  215. toil/test/wdl/standard_library/stdout_as_output.wdl +30 -0
  216. toil/test/wdl/standard_library/sub.json +3 -0
  217. toil/test/wdl/standard_library/sub_as_input.wdl +17 -0
  218. toil/test/wdl/standard_library/sub_as_input_with_file.wdl +17 -0
  219. toil/test/wdl/standard_library/transpose.json +6 -0
  220. toil/test/wdl/standard_library/transpose_as_input.wdl +18 -0
  221. toil/test/wdl/standard_library/write_json.json +6 -0
  222. toil/test/wdl/standard_library/write_json_as_command.wdl +17 -0
  223. toil/test/wdl/standard_library/write_lines.json +7 -0
  224. toil/test/wdl/standard_library/write_lines_as_command.wdl +17 -0
  225. toil/test/wdl/standard_library/write_map.json +6 -0
  226. toil/test/wdl/standard_library/write_map_as_command.wdl +17 -0
  227. toil/test/wdl/standard_library/write_tsv.json +6 -0
  228. toil/test/wdl/standard_library/write_tsv_as_command.wdl +17 -0
  229. toil/test/wdl/standard_library/zip.json +12 -0
  230. toil/test/wdl/standard_library/zip_as_input.wdl +19 -0
  231. toil/test/wdl/test.csv +3 -0
  232. toil/test/wdl/test.tsv +3 -0
  233. toil/test/wdl/testfiles/croo.wdl +38 -0
  234. toil/test/wdl/testfiles/drop_files.wdl +62 -0
  235. toil/test/wdl/testfiles/drop_files_subworkflow.wdl +13 -0
  236. toil/test/wdl/testfiles/empty.txt +0 -0
  237. toil/test/wdl/testfiles/not_enough_outputs.wdl +33 -0
  238. toil/test/wdl/testfiles/random.wdl +66 -0
  239. toil/test/wdl/testfiles/string_file_coercion.json +1 -0
  240. toil/test/wdl/testfiles/string_file_coercion.wdl +35 -0
  241. toil/test/wdl/testfiles/test.json +4 -0
  242. toil/test/wdl/testfiles/test_boolean.txt +1 -0
  243. toil/test/wdl/testfiles/test_float.txt +1 -0
  244. toil/test/wdl/testfiles/test_int.txt +1 -0
  245. toil/test/wdl/testfiles/test_lines.txt +5 -0
  246. toil/test/wdl/testfiles/test_map.txt +2 -0
  247. toil/test/wdl/testfiles/test_string.txt +1 -0
  248. toil/test/wdl/testfiles/url_to_file.wdl +13 -0
  249. toil/test/wdl/testfiles/url_to_optional_file.wdl +13 -0
  250. toil/test/wdl/testfiles/vocab.json +1 -0
  251. toil/test/wdl/testfiles/vocab.wdl +66 -0
  252. toil/test/wdl/testfiles/wait.wdl +34 -0
  253. toil/test/wdl/wdl_specification/type_pair.json +23 -0
  254. toil/test/wdl/wdl_specification/type_pair_basic.wdl +36 -0
  255. toil/test/wdl/wdl_specification/type_pair_with_files.wdl +36 -0
  256. toil/test/wdl/wdl_specification/v1_spec.json +1 -0
  257. toil/test/wdl/wdl_specification/v1_spec_declaration.wdl +39 -0
  258. toil/test/wdl/wdltoil_test.py +681 -408
  259. toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
  260. toil/version.py +10 -10
  261. toil/wdl/wdltoil.py +350 -123
  262. toil/worker.py +113 -33
  263. {toil-8.0.0.dist-info → toil-8.2.0.dist-info}/METADATA +13 -7
  264. toil-8.2.0.dist-info/RECORD +439 -0
  265. {toil-8.0.0.dist-info → toil-8.2.0.dist-info}/WHEEL +1 -1
  266. toil/test/lib/test_integration.py +0 -104
  267. toil-8.0.0.dist-info/RECORD +0 -253
  268. {toil-8.0.0.dist-info → toil-8.2.0.dist-info}/entry_points.txt +0 -0
  269. {toil-8.0.0.dist-info → toil-8.2.0.dist-info/licenses}/LICENSE +0 -0
  270. {toil-8.0.0.dist-info → toil-8.2.0.dist-info}/top_level.txt +0 -0
toil/job.py CHANGED
@@ -51,11 +51,11 @@ from typing import (
51
51
  from urllib.error import HTTPError
52
52
  from urllib.parse import urlsplit, unquote, urljoin
53
53
 
54
- from toil import memoize
55
-
56
54
  import dill
57
55
  from configargparse import ArgParser
58
56
 
57
+ from toil.lib.memoize import memoize
58
+ from toil.lib.misc import StrPath
59
59
  from toil.lib.io import is_remote_url
60
60
 
61
61
  if sys.version_info < (3, 11):
@@ -140,8 +140,8 @@ class FilesDownloadedStoppingPointReached(DebugStoppingPointReached):
140
140
  """
141
141
 
142
142
  def __init__(
143
- self, message, host_and_job_paths: Optional[list[tuple[str, str]]] = None
144
- ):
143
+ self, message: str, host_and_job_paths: Optional[list[tuple[str, str]]] = None
144
+ ) -> None:
145
145
  super().__init__(message)
146
146
 
147
147
  # Save the host and user-code-visible paths of files, in case we're
@@ -307,7 +307,7 @@ def parse_accelerator(
307
307
  parsed["model"] = possible_description
308
308
  elif isinstance(spec, dict):
309
309
  # It's a dict, so merge with the defaults.
310
- parsed.update(spec)
310
+ parsed.update(cast(AcceleratorRequirement, spec))
311
311
  # TODO: make sure they didn't misspell keys or something
312
312
  else:
313
313
  raise TypeError(
@@ -816,7 +816,7 @@ class JobDescription(Requirer):
816
816
 
817
817
  def __init__(
818
818
  self,
819
- requirements: Mapping[str, Union[int, str, bool]],
819
+ requirements: Mapping[str, Union[int, str, float, bool, list]],
820
820
  jobName: str,
821
821
  unitName: Optional[str] = "",
822
822
  displayName: Optional[str] = "",
@@ -1767,7 +1767,7 @@ class Job:
1767
1767
  # Holds flags set by set_debug_flag()
1768
1768
  self._debug_flags: set[str] = set()
1769
1769
 
1770
- def __str__(self):
1770
+ def __str__(self) -> str:
1771
1771
  """
1772
1772
  Produce a useful logging string to identify this Job and distinguish it
1773
1773
  from its JobDescription.
@@ -1812,16 +1812,16 @@ class Job:
1812
1812
  return self.description.disk
1813
1813
 
1814
1814
  @disk.setter
1815
- def disk(self, val):
1815
+ def disk(self, val: int) -> None:
1816
1816
  self.description.disk = val
1817
1817
 
1818
1818
  @property
1819
- def memory(self):
1819
+ def memory(self) -> int:
1820
1820
  """The maximum number of bytes of memory the job will require to run."""
1821
1821
  return self.description.memory
1822
1822
 
1823
1823
  @memory.setter
1824
- def memory(self, val):
1824
+ def memory(self, val: int) -> None:
1825
1825
  self.description.memory = val
1826
1826
 
1827
1827
  @property
@@ -1830,7 +1830,7 @@ class Job:
1830
1830
  return self.description.cores
1831
1831
 
1832
1832
  @cores.setter
1833
- def cores(self, val):
1833
+ def cores(self, val: int) -> None:
1834
1834
  self.description.cores = val
1835
1835
 
1836
1836
  @property
@@ -1848,11 +1848,11 @@ class Job:
1848
1848
  return self.description.preemptible
1849
1849
 
1850
1850
  @deprecated(new_function_name="preemptible")
1851
- def preemptable(self):
1851
+ def preemptable(self) -> bool:
1852
1852
  return self.description.preemptible
1853
1853
 
1854
1854
  @preemptible.setter
1855
- def preemptible(self, val):
1855
+ def preemptible(self, val: bool) -> None:
1856
1856
  self.description.preemptible = val
1857
1857
 
1858
1858
  @property
@@ -1865,13 +1865,13 @@ class Job:
1865
1865
  return self.description.files_to_use
1866
1866
 
1867
1867
  @files_to_use.setter
1868
- def files_to_use(self, val: set[FileID]):
1868
+ def files_to_use(self, val: set[FileID]) -> None:
1869
1869
  self.description.files_to_use = val
1870
1870
 
1871
- def add_to_files_to_use(self, val: FileID):
1871
+ def add_to_files_to_use(self, val: FileID) -> None:
1872
1872
  self.description.files_to_use.add(val)
1873
1873
 
1874
- def remove_from_files_to_use(self, val: FileID):
1874
+ def remove_from_files_to_use(self, val: FileID) -> None:
1875
1875
  self.description.files_to_use.remove(val)
1876
1876
 
1877
1877
  def assignConfig(self, config: Config) -> None:
@@ -2296,7 +2296,7 @@ class Job:
2296
2296
 
2297
2297
  return {self._registry[jid] for jid in roots}
2298
2298
 
2299
- def checkJobGraphConnected(self):
2299
+ def checkJobGraphConnected(self) -> None:
2300
2300
  """
2301
2301
  :raises toil.job.JobGraphDeadlockException: if :func:`toil.job.Job.getRootJobs` does \
2302
2302
  not contain exactly one root job.
@@ -2312,7 +2312,7 @@ class Job:
2312
2312
  "Graph does not contain exactly one" " root job: %s" % rootJobs
2313
2313
  )
2314
2314
 
2315
- def checkJobGraphAcylic(self):
2315
+ def checkJobGraphAcylic(self) -> None:
2316
2316
  """
2317
2317
  :raises toil.job.JobGraphDeadlockException: if the connected component \
2318
2318
  of jobs containing this job contains any cycles of child/followOn dependencies \
@@ -2486,10 +2486,13 @@ class Job:
2486
2486
  """Used to setup and run Toil workflow."""
2487
2487
 
2488
2488
  @staticmethod
2489
- def getDefaultArgumentParser(jobstore_as_flag: bool = False) -> ArgumentParser:
2489
+ def getDefaultArgumentParser(jobstore_as_flag: bool = False) -> ArgParser:
2490
2490
  """
2491
2491
  Get argument parser with added toil workflow options.
2492
2492
 
2493
+ This is the Right Way to get an argument parser in a Toil Python
2494
+ workflow.
2495
+
2493
2496
  :param jobstore_as_flag: make the job store option a --jobStore flag instead of a required jobStore positional argument.
2494
2497
  :returns: The argument parser used by a toil workflow with added Toil options.
2495
2498
  """
@@ -2499,7 +2502,7 @@ class Job:
2499
2502
 
2500
2503
  @staticmethod
2501
2504
  def getDefaultOptions(
2502
- jobStore: Optional[str] = None, jobstore_as_flag: bool = False
2505
+ jobStore: Optional[StrPath] = None, jobstore_as_flag: bool = False
2503
2506
  ) -> Namespace:
2504
2507
  """
2505
2508
  Get default options for a toil workflow.
@@ -2520,9 +2523,9 @@ class Job:
2520
2523
  )
2521
2524
  arguments = []
2522
2525
  if jobstore_as_flag and jobStore is not None:
2523
- arguments = ["--jobstore", jobStore]
2526
+ arguments = ["--jobstore", str(jobStore)]
2524
2527
  if not jobstore_as_flag and jobStore is not None:
2525
- arguments = [jobStore]
2528
+ arguments = [str(jobStore)]
2526
2529
  return parser.parse_args(args=arguments)
2527
2530
 
2528
2531
  @staticmethod
@@ -2534,6 +2537,13 @@ class Job:
2534
2537
  Adds the default toil options to an :mod:`optparse` or :mod:`argparse`
2535
2538
  parser object.
2536
2539
 
2540
+ Consider using :meth:`getDefaultArgumentParser` instead, which will
2541
+ produce a parser of the correct class to use Toil's config file and
2542
+ environment variables. If ther parser passed here is just an
2543
+ :class:`argparse.ArgumentParser` and not a
2544
+ :class:`configargparse.ArgParser`, the Toil config file and
2545
+ environment variables will not be respected.
2546
+
2537
2547
  :param parser: Options object to add toil options to.
2538
2548
  :param jobstore_as_flag: make the job store option a --jobStore flag instead of a required jobStore positional argument.
2539
2549
  """
@@ -2571,13 +2581,13 @@ class Job:
2571
2581
 
2572
2582
  def __init__(
2573
2583
  self,
2574
- memory=None,
2575
- cores=None,
2576
- disk=None,
2577
- accelerators=None,
2578
- preemptible=None,
2579
- unitName=None,
2580
- ):
2584
+ memory: Optional[ParseableIndivisibleResource] = None,
2585
+ cores: Optional[ParseableDivisibleResource] = None,
2586
+ disk: Optional[ParseableIndivisibleResource] = None,
2587
+ accelerators: Optional[ParseableAcceleratorRequirement] = None,
2588
+ preemptible: Optional[ParseableFlag] = None,
2589
+ unitName: Optional[str] = "",
2590
+ ) -> None:
2581
2591
  """
2582
2592
  Memory, core and disk requirements are specified identically to as in \
2583
2593
  :func:`toil.job.Job.__init__`.
@@ -2603,7 +2613,7 @@ class Job:
2603
2613
  self.hostID = None
2604
2614
 
2605
2615
  @abstractmethod
2606
- def start(self, job: "Job") -> Any:
2616
+ def start(self, job: "ServiceHostJob") -> Any:
2607
2617
  """
2608
2618
  Start the service.
2609
2619
 
@@ -2616,7 +2626,7 @@ class Job:
2616
2626
  """
2617
2627
 
2618
2628
  @abstractmethod
2619
- def stop(self, job: "Job") -> None:
2629
+ def stop(self, job: "ServiceHostJob") -> None:
2620
2630
  """
2621
2631
  Stops the service. Function can block until complete.
2622
2632
 
@@ -3141,45 +3151,54 @@ class Job:
3141
3151
  startClock = ResourceMonitor.get_total_cpu_time()
3142
3152
  baseDir = os.getcwd()
3143
3153
 
3144
- yield
3145
-
3146
- if "download_only" in self._debug_flags:
3147
- # We should stop right away
3148
- logger.debug("Job did not stop itself after downloading files; stopping.")
3149
- raise DebugStoppingPointReached()
3150
-
3151
- # If the job is not a checkpoint job, add the promise files to delete
3152
- # to the list of jobStoreFileIDs to delete
3153
- # TODO: why is Promise holding a global list here???
3154
- if not self.checkpoint:
3155
- for jobStoreFileID in Promise.filesToDelete:
3156
- # Make sure to wrap the job store ID in a FileID object so the file store will accept it
3157
- # TODO: talk directly to the job store here instead.
3158
- fileStore.deleteGlobalFile(FileID(jobStoreFileID, 0))
3159
- else:
3160
- # Else copy them to the job description to delete later
3161
- self.description.checkpointFilesToDelete = list(Promise.filesToDelete)
3162
- Promise.filesToDelete.clear()
3163
- # Now indicate the asynchronous update of the job can happen
3164
- fileStore.startCommit(jobState=True)
3165
- # Change dir back to cwd dir, if changed by job (this is a safety issue)
3166
- if os.getcwd() != baseDir:
3167
- os.chdir(baseDir)
3168
- # Finish up the stats
3169
- if stats is not None:
3170
- totalCpuTime, totalMemoryUsage = (
3171
- ResourceMonitor.get_total_cpu_time_and_memory_usage()
3172
- )
3173
- stats.jobs.append(
3174
- Expando(
3175
- time=str(time.time() - startTime),
3176
- clock=str(totalCpuTime - startClock),
3177
- class_name=self._jobName(),
3178
- memory=str(totalMemoryUsage),
3179
- requested_cores=str(self.cores),
3180
- disk=str(fileStore.get_disk_usage()),
3154
+ succeeded = False
3155
+ try:
3156
+ yield
3157
+
3158
+ if "download_only" in self._debug_flags:
3159
+ # We should stop right away
3160
+ logger.debug("Job did not stop itself after downloading files; stopping.")
3161
+ raise DebugStoppingPointReached()
3162
+
3163
+ # If the job is not a checkpoint job, add the promise files to delete
3164
+ # to the list of jobStoreFileIDs to delete
3165
+ # TODO: why is Promise holding a global list here???
3166
+ if not self.checkpoint:
3167
+ for jobStoreFileID in Promise.filesToDelete:
3168
+ # Make sure to wrap the job store ID in a FileID object so the file store will accept it
3169
+ # TODO: talk directly to the job store here instead.
3170
+ fileStore.deleteGlobalFile(FileID(jobStoreFileID, 0))
3171
+ else:
3172
+ # Else copy them to the job description to delete later
3173
+ self.description.checkpointFilesToDelete = list(Promise.filesToDelete)
3174
+ Promise.filesToDelete.clear()
3175
+ # Now indicate the asynchronous update of the job can happen
3176
+ fileStore.startCommit(jobState=True)
3177
+
3178
+ succeeded = True
3179
+ finally:
3180
+ # Change dir back to cwd dir, if changed by job (this is a safety issue)
3181
+ if os.getcwd() != baseDir:
3182
+ os.chdir(baseDir)
3183
+ # Finish up the stats
3184
+ if stats is not None:
3185
+ totalCpuTime, total_memory_kib = (
3186
+ ResourceMonitor.get_total_cpu_time_and_memory_usage()
3187
+ )
3188
+ stats.jobs.append(
3189
+ # TODO: We represent everything as strings in the stats
3190
+ # even though the JSON transport can take bools and floats.
3191
+ Expando(
3192
+ start=str(startTime),
3193
+ time=str(time.time() - startTime),
3194
+ clock=str(totalCpuTime - startClock),
3195
+ class_name=self._jobName(),
3196
+ memory=str(total_memory_kib),
3197
+ requested_cores=str(self.cores), # TODO: Isn't this really consumed cores?
3198
+ disk=str(fileStore.get_disk_usage()),
3199
+ succeeded=str(succeeded),
3200
+ )
3181
3201
  )
3182
- )
3183
3202
 
3184
3203
  def _runner(
3185
3204
  self,
@@ -3294,7 +3313,9 @@ class FunctionWrappingJob(Job):
3294
3313
  Job used to wrap a function. In its `run` method the wrapped function is called.
3295
3314
  """
3296
3315
 
3297
- def __init__(self, userFunction, *args, **kwargs):
3316
+ def __init__(
3317
+ self, userFunction: Callable[[...], Any], *args: Any, **kwargs: Any
3318
+ ) -> None:
3298
3319
  """
3299
3320
  :param callable userFunction: The function to wrap. It will be called with ``*args`` and
3300
3321
  ``**kwargs`` as arguments.
@@ -3317,7 +3338,9 @@ class FunctionWrappingJob(Job):
3317
3338
  list(zip(argSpec.args[-len(argSpec.defaults) :], argSpec.defaults))
3318
3339
  )
3319
3340
 
3320
- def resolve(key, default=None, dehumanize=False):
3341
+ def resolve(
3342
+ key, default: Optional[Any] = None, dehumanize: bool = False
3343
+ ) -> Any:
3321
3344
  try:
3322
3345
  # First, try constructor arguments, ...
3323
3346
  value = kwargs.pop(key)
@@ -3351,7 +3374,7 @@ class FunctionWrappingJob(Job):
3351
3374
  self._args = args
3352
3375
  self._kwargs = kwargs
3353
3376
 
3354
- def _getUserFunction(self):
3377
+ def _getUserFunction(self) -> Callable[..., Any]:
3355
3378
  logger.debug(
3356
3379
  "Loading user function %s from module %s.",
3357
3380
  self.userFunctionName,
@@ -3360,14 +3383,14 @@ class FunctionWrappingJob(Job):
3360
3383
  userFunctionModule = self._loadUserModule(self.userFunctionModule)
3361
3384
  return getattr(userFunctionModule, self.userFunctionName)
3362
3385
 
3363
- def run(self, fileStore):
3386
+ def run(self, fileStore: "AbstractFileStore") -> Any:
3364
3387
  userFunction = self._getUserFunction()
3365
3388
  return userFunction(*self._args, **self._kwargs)
3366
3389
 
3367
- def getUserScript(self):
3390
+ def getUserScript(self) -> str:
3368
3391
  return self.userFunctionModule
3369
3392
 
3370
- def _jobName(self):
3393
+ def _jobName(self) -> str:
3371
3394
  return ".".join(
3372
3395
  (
3373
3396
  self.__class__.__name__,
@@ -3405,10 +3428,10 @@ class JobFunctionWrappingJob(FunctionWrappingJob):
3405
3428
  """
3406
3429
 
3407
3430
  @property
3408
- def fileStore(self):
3431
+ def fileStore(self) -> "AbstractFileStore":
3409
3432
  return self._fileStore
3410
3433
 
3411
- def run(self, fileStore):
3434
+ def run(self, fileStore: "AbstractFileStore") -> Any:
3412
3435
  userFunction = self._getUserFunction()
3413
3436
  rValue = userFunction(*((self,) + tuple(self._args)), **self._kwargs)
3414
3437
  return rValue
@@ -3504,7 +3527,7 @@ class EncapsulatedJob(Job):
3504
3527
  the same value after A or A.encapsulate() has been run.
3505
3528
  """
3506
3529
 
3507
- def __init__(self, job, unitName=None):
3530
+ def __init__(self, job: Optional[Job], unitName: Optional[str] = None) -> None:
3508
3531
  """
3509
3532
  :param toil.job.Job job: the job to encapsulate.
3510
3533
  :param str unitName: human-readable name to identify this job instance.
@@ -3538,7 +3561,7 @@ class EncapsulatedJob(Job):
3538
3561
  self.encapsulatedJob = None
3539
3562
  self.encapsulatedFollowOn = None
3540
3563
 
3541
- def addChild(self, childJob):
3564
+ def addChild(self, childJob: Job) -> Job:
3542
3565
  if self.encapsulatedFollowOn is None:
3543
3566
  raise RuntimeError(
3544
3567
  "Children cannot be added to EncapsulatedJob while it is running"
@@ -3554,7 +3577,7 @@ class EncapsulatedJob(Job):
3554
3577
  self.encapsulatedFollowOn, service, parentService=parentService
3555
3578
  )
3556
3579
 
3557
- def addFollowOn(self, followOnJob):
3580
+ def addFollowOn(self, followOnJob: Job) -> Job:
3558
3581
  if self.encapsulatedFollowOn is None:
3559
3582
  raise RuntimeError(
3560
3583
  "Follow-ons cannot be added to EncapsulatedJob while it is running"
@@ -3977,7 +4000,7 @@ class CombineImportsJob(Job):
3977
4000
  self._d = d
3978
4001
  super().__init__(**kwargs)
3979
4002
 
3980
- def run(self, file_store: AbstractFileStore) -> Promised[Dict[str, FileID]]:
4003
+ def run(self, file_store: "AbstractFileStore") -> Promised[Dict[str, FileID]]:
3981
4004
  """
3982
4005
  Merge the dicts
3983
4006
  """
@@ -4032,7 +4055,7 @@ class WorkerImportJob(Job):
4032
4055
  path_to_fileid[file] = imported
4033
4056
  return path_to_fileid
4034
4057
 
4035
- def run(self, file_store: AbstractFileStore) -> Promised[Dict[str, FileID]]:
4058
+ def run(self, file_store: "AbstractFileStore") -> Promised[Dict[str, FileID]]:
4036
4059
  """
4037
4060
  Import the workflow inputs and then create and run the workflow.
4038
4061
  :return: Promise of workflow outputs
@@ -4068,7 +4091,7 @@ class ImportsJob(Job):
4068
4091
  self._import_worker_disk = import_worker_disk
4069
4092
 
4070
4093
  def run(
4071
- self, file_store: AbstractFileStore
4094
+ self, file_store: "AbstractFileStore"
4072
4095
  ) -> Tuple[Promised[Dict[str, FileID]], Dict[str, FileMetadata]]:
4073
4096
  """
4074
4097
  Import the workflow inputs and then create and run the workflow.
@@ -4098,7 +4121,8 @@ class ImportsJob(Job):
4098
4121
  # schedule the individual file
4099
4122
  per_batch_files.append(filename)
4100
4123
  file_batches.append(per_batch_files)
4101
- # reset batching calculation
4124
+ # reset batch to empty
4125
+ per_batch_files = []
4102
4126
  per_batch_size = 0
4103
4127
  else:
4104
4128
  per_batch_size += filesize
@@ -4265,7 +4289,7 @@ class PromisedRequirement:
4265
4289
  C = B.addChildFn(h, cores=PromisedRequirement(lambda x: 2*x, B.rv()))
4266
4290
  """
4267
4291
 
4268
- def __init__(self, valueOrCallable, *args):
4292
+ def __init__(self, valueOrCallable: Any, *args: Any) -> None:
4269
4293
  """
4270
4294
  Initialize this Promised Requirement.
4271
4295
 
@@ -4289,7 +4313,7 @@ class PromisedRequirement:
4289
4313
  self._func = dill.dumps(func)
4290
4314
  self._args = list(args)
4291
4315
 
4292
- def getValue(self):
4316
+ def getValue(self) -> Any:
4293
4317
  """Return PromisedRequirement value."""
4294
4318
  func = dill.loads(self._func)
4295
4319
  return func(*self._args)
@@ -1686,17 +1686,39 @@ class AbstractJobStore(ABC):
1686
1686
  sharedFileName: str,
1687
1687
  encoding: Optional[str] = None,
1688
1688
  errors: Optional[str] = None,
1689
- ) -> ContextManager[IO[bytes]]:
1689
+ ) -> Union[ContextManager[IO[str]], ContextManager[IO[bytes]]]:
1690
1690
  return self.read_shared_file_stream(sharedFileName, encoding, errors)
1691
1691
 
1692
+ @overload
1692
1693
  @abstractmethod
1693
1694
  @contextmanager
1694
1695
  def read_shared_file_stream(
1695
1696
  self,
1696
1697
  shared_file_name: str,
1697
- encoding: Optional[str] = None,
1698
+ encoding: str,
1699
+ errors: Optional[str] = None,
1700
+ ) -> Iterator[IO[str]]:
1701
+ """If encoding is specified, then a text file handle is provided."""
1702
+
1703
+ @overload
1704
+ @abstractmethod
1705
+ @contextmanager
1706
+ def read_shared_file_stream(
1707
+ self,
1708
+ shared_file_name: str,
1709
+ encoding: Literal[None] = None,
1698
1710
  errors: Optional[str] = None,
1699
1711
  ) -> Iterator[IO[bytes]]:
1712
+ """If no encoding is provided, then a bytest file handle is provided."""
1713
+
1714
+ @abstractmethod
1715
+ @contextmanager
1716
+ def read_shared_file_stream(
1717
+ self,
1718
+ shared_file_name: str,
1719
+ encoding: Optional[str] = None,
1720
+ errors: Optional[str] = None,
1721
+ ) -> Union[Iterator[IO[str]], Iterator[IO[bytes]]]:
1700
1722
  """
1701
1723
  Returns a context manager yielding a readable file handle to the global file referenced
1702
1724
  by the given name.
@@ -1711,7 +1733,6 @@ class AbstractJobStore(ABC):
1711
1733
  are the same as for open(). Defaults to 'strict' when an encoding is specified.
1712
1734
 
1713
1735
  :return: a context manager yielding a readable file handle
1714
- :rtype: Iterator[IO[bytes]]
1715
1736
  """
1716
1737
  raise NotImplementedError()
1717
1738
 
@@ -637,30 +637,61 @@ class AWSJobStore(AbstractJobStore):
637
637
  else:
638
638
  super()._default_export_file(otherCls, file_id, uri)
639
639
 
640
+ ###
641
+ # URL access implementation
642
+ ###
643
+
644
+ # URL access methods aren't used by the rest of the job store methods.
645
+
640
646
  @classmethod
641
647
  def _url_exists(cls, url: ParseResult) -> bool:
642
648
  try:
643
- get_object_for_url(url, existing=True)
649
+ try:
650
+ get_object_for_url(url, existing=True, anonymous=True)
651
+ except PermissionError:
652
+ # If we can't look anonymously, log in
653
+ get_object_for_url(url, existing=True)
644
654
  return True
645
655
  except FileNotFoundError:
646
656
  # Not a file
647
- # Might be a directory.
657
+ # Might be a directory. Or we might not have access to know.
658
+ # See if it's a directory.
648
659
  return cls._get_is_directory(url)
649
660
 
650
661
  @classmethod
651
662
  def _get_size(cls, url: ParseResult) -> int:
652
- return get_object_for_url(url, existing=True).content_length
663
+ try:
664
+ src_obj = get_object_for_url(url, existing=True, anonymous=True)
665
+ except PermissionError:
666
+ src_obj = get_object_for_url(url, existing=True)
667
+ return src_obj.content_length
653
668
 
654
669
  @classmethod
655
670
  def _read_from_url(cls, url: ParseResult, writable):
656
- srcObj = get_object_for_url(url, existing=True)
657
- srcObj.download_fileobj(writable)
658
- return (srcObj.content_length, False) # executable bit is always False
671
+ try:
672
+ src_obj = get_object_for_url(url, existing=True, anonymous=True)
673
+ src_obj.download_fileobj(writable)
674
+ except Exception as e:
675
+ if isinstance(e, PermissionError) or (isinstance(e, ClientError) and get_error_status(e) == 403):
676
+ # The object setup or the download does not have permission. Try again with a login.
677
+ src_obj = get_object_for_url(url, existing=True)
678
+ src_obj.download_fileobj(writable)
679
+ else:
680
+ raise
681
+ return (src_obj.content_length, False) # executable bit is always False
659
682
 
660
683
  @classmethod
661
684
  def _open_url(cls, url: ParseResult) -> IO[bytes]:
662
- src_obj = get_object_for_url(url, existing=True)
663
- response = src_obj.get()
685
+ try:
686
+ src_obj = get_object_for_url(url, existing=True, anonymous=True)
687
+ response = src_obj.get()
688
+ except Exception as e:
689
+ if isinstance(e, PermissionError) or (isinstance(e, ClientError) and get_error_status(e) == 403):
690
+ # The object setup or the download does not have permission. Try again with a login.
691
+ src_obj = get_object_for_url(url, existing=True)
692
+ response = src_obj.get()
693
+ else:
694
+ raise
664
695
  # We should get back a response with a stream in 'Body'
665
696
  if "Body" not in response:
666
697
  raise RuntimeError(f"Could not fetch body stream for {url}")
@@ -670,6 +701,7 @@ class AWSJobStore(AbstractJobStore):
670
701
  def _write_to_url(
671
702
  cls, readable, url: ParseResult, executable: bool = False
672
703
  ) -> None:
704
+ # Don't try to do anonympus writes.
673
705
  dstObj = get_object_for_url(url)
674
706
 
675
707
  logger.debug("Uploading %s", dstObj.key)
@@ -684,13 +716,17 @@ class AWSJobStore(AbstractJobStore):
684
716
 
685
717
  @classmethod
686
718
  def _list_url(cls, url: ParseResult) -> list[str]:
687
- return list_objects_for_url(url)
719
+ try:
720
+ return list_objects_for_url(url, anonymous=True)
721
+ except PermissionError:
722
+ return list_objects_for_url(url)
723
+
688
724
 
689
725
  @classmethod
690
726
  def _get_is_directory(cls, url: ParseResult) -> bool:
691
727
  # We consider it a directory if anything is in it.
692
728
  # TODO: Can we just get the first item and not the whole list?
693
- return len(list_objects_for_url(url)) > 0
729
+ return len(cls._list_url(url)) > 0
694
730
 
695
731
  @classmethod
696
732
  def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
@@ -395,6 +395,7 @@ class FileJobStore(AbstractJobStore):
395
395
 
396
396
  @classmethod
397
397
  def _url_exists(cls, url: ParseResult) -> bool:
398
+ # Note that broken symlinks will not be shown to exist.
398
399
  return os.path.exists(cls._extract_path_from_url(url))
399
400
 
400
401
  @classmethod
@@ -771,8 +772,31 @@ class FileJobStore(AbstractJobStore):
771
772
  ) as f:
772
773
  yield f
773
774
 
775
+ @overload
774
776
  @contextmanager
775
- def read_shared_file_stream(self, shared_file_name, encoding=None, errors=None):
777
+ def read_shared_file_stream(
778
+ self,
779
+ shared_file_name: str,
780
+ encoding: str,
781
+ errors: Optional[str] = None,
782
+ ) -> Iterator[IO[str]]: ...
783
+
784
+ @overload
785
+ @contextmanager
786
+ def read_shared_file_stream(
787
+ self,
788
+ shared_file_name: str,
789
+ encoding: Literal[None] = None,
790
+ errors: Optional[str] = None,
791
+ ) -> Iterator[IO[bytes]]: ...
792
+
793
+ @contextmanager
794
+ def read_shared_file_stream(
795
+ self,
796
+ shared_file_name: str,
797
+ encoding: Optional[str] = None,
798
+ errors: Optional[str] = None,
799
+ ) -> Union[Iterator[IO[bytes]], Iterator[IO[str]]]:
776
800
  self._requireValidSharedFileName(shared_file_name)
777
801
  try:
778
802
  with open(