toil 8.0.0__py3-none-any.whl → 8.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. toil/__init__.py +4 -39
  2. toil/batchSystems/abstractBatchSystem.py +1 -1
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +1 -1
  4. toil/batchSystems/awsBatch.py +1 -1
  5. toil/batchSystems/cleanup_support.py +1 -1
  6. toil/batchSystems/kubernetes.py +53 -7
  7. toil/batchSystems/local_support.py +1 -1
  8. toil/batchSystems/mesos/batchSystem.py +13 -8
  9. toil/batchSystems/mesos/test/__init__.py +3 -2
  10. toil/batchSystems/options.py +1 -0
  11. toil/batchSystems/singleMachine.py +1 -1
  12. toil/batchSystems/slurm.py +229 -84
  13. toil/bus.py +5 -3
  14. toil/common.py +198 -54
  15. toil/cwl/cwltoil.py +32 -11
  16. toil/job.py +110 -86
  17. toil/jobStores/abstractJobStore.py +24 -3
  18. toil/jobStores/aws/jobStore.py +46 -10
  19. toil/jobStores/fileJobStore.py +25 -1
  20. toil/jobStores/googleJobStore.py +104 -30
  21. toil/leader.py +9 -0
  22. toil/lib/accelerators.py +3 -1
  23. toil/lib/aws/session.py +14 -3
  24. toil/lib/aws/utils.py +92 -35
  25. toil/lib/aws/utils.py.orig +504 -0
  26. toil/lib/bioio.py +1 -1
  27. toil/lib/docker.py +252 -91
  28. toil/lib/dockstore.py +387 -0
  29. toil/lib/ec2nodes.py +3 -2
  30. toil/lib/exceptions.py +5 -3
  31. toil/lib/history.py +1345 -0
  32. toil/lib/history_submission.py +695 -0
  33. toil/lib/io.py +56 -23
  34. toil/lib/misc.py +25 -1
  35. toil/lib/resources.py +2 -1
  36. toil/lib/retry.py +10 -10
  37. toil/lib/threading.py +11 -10
  38. toil/lib/{integration.py → trs.py} +95 -46
  39. toil/lib/web.py +38 -0
  40. toil/options/common.py +25 -2
  41. toil/options/cwl.py +10 -0
  42. toil/options/wdl.py +11 -0
  43. toil/provisioners/gceProvisioner.py +4 -4
  44. toil/server/api_spec/LICENSE +201 -0
  45. toil/server/api_spec/README.rst +5 -0
  46. toil/server/cli/wes_cwl_runner.py +5 -4
  47. toil/server/utils.py +2 -3
  48. toil/statsAndLogging.py +35 -1
  49. toil/test/__init__.py +275 -115
  50. toil/test/batchSystems/batchSystemTest.py +227 -205
  51. toil/test/batchSystems/test_slurm.py +199 -2
  52. toil/test/cactus/pestis.tar.gz +0 -0
  53. toil/test/conftest.py +7 -0
  54. toil/test/cwl/2.fasta +11 -0
  55. toil/test/cwl/2.fastq +12 -0
  56. toil/test/cwl/conftest.py +39 -0
  57. toil/test/cwl/cwlTest.py +1015 -780
  58. toil/test/cwl/directory/directory/file.txt +15 -0
  59. toil/test/cwl/download_directory_file.json +4 -0
  60. toil/test/cwl/download_directory_s3.json +4 -0
  61. toil/test/cwl/download_file.json +6 -0
  62. toil/test/cwl/download_http.json +6 -0
  63. toil/test/cwl/download_https.json +6 -0
  64. toil/test/cwl/download_s3.json +6 -0
  65. toil/test/cwl/download_subdirectory_file.json +5 -0
  66. toil/test/cwl/download_subdirectory_s3.json +5 -0
  67. toil/test/cwl/empty.json +1 -0
  68. toil/test/cwl/mock_mpi/fake_mpi.yml +8 -0
  69. toil/test/cwl/mock_mpi/fake_mpi_run.py +42 -0
  70. toil/test/cwl/optional-file-exists.json +6 -0
  71. toil/test/cwl/optional-file-missing.json +6 -0
  72. toil/test/cwl/optional-file.cwl +18 -0
  73. toil/test/cwl/preemptible_expression.json +1 -0
  74. toil/test/cwl/revsort-job-missing.json +6 -0
  75. toil/test/cwl/revsort-job.json +6 -0
  76. toil/test/cwl/s3_secondary_file.json +16 -0
  77. toil/test/cwl/seqtk_seq_job.json +6 -0
  78. toil/test/cwl/stream.json +6 -0
  79. toil/test/cwl/test_filename_conflict_resolution.ms/table.dat +0 -0
  80. toil/test/cwl/test_filename_conflict_resolution.ms/table.f0 +0 -0
  81. toil/test/cwl/test_filename_conflict_resolution.ms/table.f1 +0 -0
  82. toil/test/cwl/test_filename_conflict_resolution.ms/table.f1i +0 -0
  83. toil/test/cwl/test_filename_conflict_resolution.ms/table.f2 +0 -0
  84. toil/test/cwl/test_filename_conflict_resolution.ms/table.f2_TSM0 +0 -0
  85. toil/test/cwl/test_filename_conflict_resolution.ms/table.f3 +0 -0
  86. toil/test/cwl/test_filename_conflict_resolution.ms/table.f3_TSM0 +0 -0
  87. toil/test/cwl/test_filename_conflict_resolution.ms/table.f4 +0 -0
  88. toil/test/cwl/test_filename_conflict_resolution.ms/table.f4_TSM0 +0 -0
  89. toil/test/cwl/test_filename_conflict_resolution.ms/table.f5 +0 -0
  90. toil/test/cwl/test_filename_conflict_resolution.ms/table.info +0 -0
  91. toil/test/cwl/test_filename_conflict_resolution.ms/table.lock +0 -0
  92. toil/test/cwl/whale.txt +16 -0
  93. toil/test/docs/scripts/example_alwaysfail.py +38 -0
  94. toil/test/docs/scripts/example_alwaysfail_with_files.wdl +33 -0
  95. toil/test/docs/scripts/example_cachingbenchmark.py +117 -0
  96. toil/test/docs/scripts/stagingExampleFiles/in.txt +1 -0
  97. toil/test/docs/scripts/stagingExampleFiles/out.txt +2 -0
  98. toil/test/docs/scripts/tutorial_arguments.py +23 -0
  99. toil/test/docs/scripts/tutorial_debugging.patch +12 -0
  100. toil/test/docs/scripts/tutorial_debugging_hangs.wdl +126 -0
  101. toil/test/docs/scripts/tutorial_debugging_works.wdl +129 -0
  102. toil/test/docs/scripts/tutorial_docker.py +20 -0
  103. toil/test/docs/scripts/tutorial_dynamic.py +24 -0
  104. toil/test/docs/scripts/tutorial_encapsulation.py +28 -0
  105. toil/test/docs/scripts/tutorial_encapsulation2.py +29 -0
  106. toil/test/docs/scripts/tutorial_helloworld.py +15 -0
  107. toil/test/docs/scripts/tutorial_invokeworkflow.py +27 -0
  108. toil/test/docs/scripts/tutorial_invokeworkflow2.py +30 -0
  109. toil/test/docs/scripts/tutorial_jobfunctions.py +22 -0
  110. toil/test/docs/scripts/tutorial_managing.py +29 -0
  111. toil/test/docs/scripts/tutorial_managing2.py +56 -0
  112. toil/test/docs/scripts/tutorial_multiplejobs.py +25 -0
  113. toil/test/docs/scripts/tutorial_multiplejobs2.py +21 -0
  114. toil/test/docs/scripts/tutorial_multiplejobs3.py +22 -0
  115. toil/test/docs/scripts/tutorial_promises.py +25 -0
  116. toil/test/docs/scripts/tutorial_promises2.py +30 -0
  117. toil/test/docs/scripts/tutorial_quickstart.py +22 -0
  118. toil/test/docs/scripts/tutorial_requirements.py +44 -0
  119. toil/test/docs/scripts/tutorial_services.py +45 -0
  120. toil/test/docs/scripts/tutorial_staging.py +45 -0
  121. toil/test/docs/scripts/tutorial_stats.py +64 -0
  122. toil/test/lib/aws/test_iam.py +3 -1
  123. toil/test/lib/dockerTest.py +205 -122
  124. toil/test/lib/test_history.py +236 -0
  125. toil/test/lib/test_trs.py +161 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +12 -9
  127. toil/test/provisioners/clusterTest.py +4 -4
  128. toil/test/provisioners/gceProvisionerTest.py +16 -14
  129. toil/test/sort/sort.py +4 -1
  130. toil/test/src/busTest.py +17 -17
  131. toil/test/src/deferredFunctionTest.py +145 -132
  132. toil/test/src/importExportFileTest.py +71 -63
  133. toil/test/src/jobEncapsulationTest.py +27 -28
  134. toil/test/src/jobServiceTest.py +149 -133
  135. toil/test/src/jobTest.py +219 -211
  136. toil/test/src/miscTests.py +66 -60
  137. toil/test/src/promisedRequirementTest.py +163 -169
  138. toil/test/src/regularLogTest.py +24 -24
  139. toil/test/src/resourceTest.py +82 -76
  140. toil/test/src/restartDAGTest.py +51 -47
  141. toil/test/src/resumabilityTest.py +24 -19
  142. toil/test/src/retainTempDirTest.py +60 -57
  143. toil/test/src/systemTest.py +17 -13
  144. toil/test/src/threadingTest.py +29 -32
  145. toil/test/utils/ABCWorkflowDebug/B_file.txt +1 -0
  146. toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +204 -0
  147. toil/test/utils/ABCWorkflowDebug/mkFile.py +16 -0
  148. toil/test/utils/ABCWorkflowDebug/sleep.cwl +12 -0
  149. toil/test/utils/ABCWorkflowDebug/sleep.yaml +1 -0
  150. toil/test/utils/toilDebugTest.py +117 -102
  151. toil/test/utils/toilKillTest.py +54 -53
  152. toil/test/utils/utilsTest.py +303 -229
  153. toil/test/wdl/lint_error.wdl +9 -0
  154. toil/test/wdl/md5sum/empty_file.json +1 -0
  155. toil/test/wdl/md5sum/md5sum-gs.json +1 -0
  156. toil/test/wdl/md5sum/md5sum.1.0.wdl +32 -0
  157. toil/test/wdl/md5sum/md5sum.input +1 -0
  158. toil/test/wdl/md5sum/md5sum.json +1 -0
  159. toil/test/wdl/md5sum/md5sum.wdl +25 -0
  160. toil/test/wdl/miniwdl_self_test/inputs-namespaced.json +1 -0
  161. toil/test/wdl/miniwdl_self_test/inputs.json +1 -0
  162. toil/test/wdl/miniwdl_self_test/self_test.wdl +40 -0
  163. toil/test/wdl/standard_library/as_map.json +16 -0
  164. toil/test/wdl/standard_library/as_map_as_input.wdl +23 -0
  165. toil/test/wdl/standard_library/as_pairs.json +7 -0
  166. toil/test/wdl/standard_library/as_pairs_as_input.wdl +23 -0
  167. toil/test/wdl/standard_library/ceil.json +3 -0
  168. toil/test/wdl/standard_library/ceil_as_command.wdl +16 -0
  169. toil/test/wdl/standard_library/ceil_as_input.wdl +16 -0
  170. toil/test/wdl/standard_library/collect_by_key.json +1 -0
  171. toil/test/wdl/standard_library/collect_by_key_as_input.wdl +23 -0
  172. toil/test/wdl/standard_library/cross.json +11 -0
  173. toil/test/wdl/standard_library/cross_as_input.wdl +19 -0
  174. toil/test/wdl/standard_library/flatten.json +7 -0
  175. toil/test/wdl/standard_library/flatten_as_input.wdl +18 -0
  176. toil/test/wdl/standard_library/floor.json +3 -0
  177. toil/test/wdl/standard_library/floor_as_command.wdl +16 -0
  178. toil/test/wdl/standard_library/floor_as_input.wdl +16 -0
  179. toil/test/wdl/standard_library/keys.json +8 -0
  180. toil/test/wdl/standard_library/keys_as_input.wdl +24 -0
  181. toil/test/wdl/standard_library/length.json +7 -0
  182. toil/test/wdl/standard_library/length_as_input.wdl +16 -0
  183. toil/test/wdl/standard_library/length_as_input_with_map.json +7 -0
  184. toil/test/wdl/standard_library/length_as_input_with_map.wdl +17 -0
  185. toil/test/wdl/standard_library/length_invalid.json +3 -0
  186. toil/test/wdl/standard_library/range.json +3 -0
  187. toil/test/wdl/standard_library/range_0.json +3 -0
  188. toil/test/wdl/standard_library/range_as_input.wdl +17 -0
  189. toil/test/wdl/standard_library/range_invalid.json +3 -0
  190. toil/test/wdl/standard_library/read_boolean.json +3 -0
  191. toil/test/wdl/standard_library/read_boolean_as_command.wdl +17 -0
  192. toil/test/wdl/standard_library/read_float.json +3 -0
  193. toil/test/wdl/standard_library/read_float_as_command.wdl +17 -0
  194. toil/test/wdl/standard_library/read_int.json +3 -0
  195. toil/test/wdl/standard_library/read_int_as_command.wdl +17 -0
  196. toil/test/wdl/standard_library/read_json.json +3 -0
  197. toil/test/wdl/standard_library/read_json_as_output.wdl +31 -0
  198. toil/test/wdl/standard_library/read_lines.json +3 -0
  199. toil/test/wdl/standard_library/read_lines_as_output.wdl +31 -0
  200. toil/test/wdl/standard_library/read_map.json +3 -0
  201. toil/test/wdl/standard_library/read_map_as_output.wdl +31 -0
  202. toil/test/wdl/standard_library/read_string.json +3 -0
  203. toil/test/wdl/standard_library/read_string_as_command.wdl +17 -0
  204. toil/test/wdl/standard_library/read_tsv.json +3 -0
  205. toil/test/wdl/standard_library/read_tsv_as_output.wdl +31 -0
  206. toil/test/wdl/standard_library/round.json +3 -0
  207. toil/test/wdl/standard_library/round_as_command.wdl +16 -0
  208. toil/test/wdl/standard_library/round_as_input.wdl +16 -0
  209. toil/test/wdl/standard_library/size.json +3 -0
  210. toil/test/wdl/standard_library/size_as_command.wdl +17 -0
  211. toil/test/wdl/standard_library/size_as_output.wdl +36 -0
  212. toil/test/wdl/standard_library/stderr.json +3 -0
  213. toil/test/wdl/standard_library/stderr_as_output.wdl +30 -0
  214. toil/test/wdl/standard_library/stdout.json +3 -0
  215. toil/test/wdl/standard_library/stdout_as_output.wdl +30 -0
  216. toil/test/wdl/standard_library/sub.json +3 -0
  217. toil/test/wdl/standard_library/sub_as_input.wdl +17 -0
  218. toil/test/wdl/standard_library/sub_as_input_with_file.wdl +17 -0
  219. toil/test/wdl/standard_library/transpose.json +6 -0
  220. toil/test/wdl/standard_library/transpose_as_input.wdl +18 -0
  221. toil/test/wdl/standard_library/write_json.json +6 -0
  222. toil/test/wdl/standard_library/write_json_as_command.wdl +17 -0
  223. toil/test/wdl/standard_library/write_lines.json +7 -0
  224. toil/test/wdl/standard_library/write_lines_as_command.wdl +17 -0
  225. toil/test/wdl/standard_library/write_map.json +6 -0
  226. toil/test/wdl/standard_library/write_map_as_command.wdl +17 -0
  227. toil/test/wdl/standard_library/write_tsv.json +6 -0
  228. toil/test/wdl/standard_library/write_tsv_as_command.wdl +17 -0
  229. toil/test/wdl/standard_library/zip.json +12 -0
  230. toil/test/wdl/standard_library/zip_as_input.wdl +19 -0
  231. toil/test/wdl/test.csv +3 -0
  232. toil/test/wdl/test.tsv +3 -0
  233. toil/test/wdl/testfiles/croo.wdl +38 -0
  234. toil/test/wdl/testfiles/drop_files.wdl +62 -0
  235. toil/test/wdl/testfiles/drop_files_subworkflow.wdl +13 -0
  236. toil/test/wdl/testfiles/empty.txt +0 -0
  237. toil/test/wdl/testfiles/not_enough_outputs.wdl +33 -0
  238. toil/test/wdl/testfiles/random.wdl +66 -0
  239. toil/test/wdl/testfiles/string_file_coercion.json +1 -0
  240. toil/test/wdl/testfiles/string_file_coercion.wdl +35 -0
  241. toil/test/wdl/testfiles/test.json +4 -0
  242. toil/test/wdl/testfiles/test_boolean.txt +1 -0
  243. toil/test/wdl/testfiles/test_float.txt +1 -0
  244. toil/test/wdl/testfiles/test_int.txt +1 -0
  245. toil/test/wdl/testfiles/test_lines.txt +5 -0
  246. toil/test/wdl/testfiles/test_map.txt +2 -0
  247. toil/test/wdl/testfiles/test_string.txt +1 -0
  248. toil/test/wdl/testfiles/url_to_file.wdl +13 -0
  249. toil/test/wdl/testfiles/url_to_optional_file.wdl +13 -0
  250. toil/test/wdl/testfiles/vocab.json +1 -0
  251. toil/test/wdl/testfiles/vocab.wdl +66 -0
  252. toil/test/wdl/testfiles/wait.wdl +34 -0
  253. toil/test/wdl/wdl_specification/type_pair.json +23 -0
  254. toil/test/wdl/wdl_specification/type_pair_basic.wdl +36 -0
  255. toil/test/wdl/wdl_specification/type_pair_with_files.wdl +36 -0
  256. toil/test/wdl/wdl_specification/v1_spec.json +1 -0
  257. toil/test/wdl/wdl_specification/v1_spec_declaration.wdl +39 -0
  258. toil/test/wdl/wdltoil_test.py +681 -408
  259. toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
  260. toil/version.py +10 -10
  261. toil/wdl/wdltoil.py +350 -123
  262. toil/worker.py +113 -33
  263. {toil-8.0.0.dist-info → toil-8.2.0.dist-info}/METADATA +13 -7
  264. toil-8.2.0.dist-info/RECORD +439 -0
  265. {toil-8.0.0.dist-info → toil-8.2.0.dist-info}/WHEEL +1 -1
  266. toil/test/lib/test_integration.py +0 -104
  267. toil-8.0.0.dist-info/RECORD +0 -253
  268. {toil-8.0.0.dist-info → toil-8.2.0.dist-info}/entry_points.txt +0 -0
  269. {toil-8.0.0.dist-info → toil-8.2.0.dist-info/licenses}/LICENSE +0 -0
  270. {toil-8.0.0.dist-info → toil-8.2.0.dist-info}/top_level.txt +0 -0
@@ -20,8 +20,8 @@ import uuid
20
20
  from contextlib import contextmanager
21
21
  from functools import wraps
22
22
  from io import BytesIO
23
- from typing import IO, Optional
24
- from urllib.parse import ParseResult
23
+ from typing import Any, IO, Iterator, Optional
24
+ from urllib.parse import ParseResult, urlunparse
25
25
 
26
26
  from google.api_core.exceptions import (
27
27
  GoogleAPICallError,
@@ -90,6 +90,46 @@ def google_retry(f):
90
90
 
91
91
  return wrapper
92
92
 
93
+ @contextmanager
94
+ def permission_error_reporter(url: ParseResult, notes: str) -> Iterator[None]:
95
+ """
96
+ Detect and usefully report permission errors.
97
+
98
+ If we fall back to anonymous credentials, but they don't have permission
99
+ for something, the Google Cloud Storage module will try to refresh them
100
+ behind the scenes. Then it will complain::
101
+
102
+ <class 'google.auth.exceptions.InvalidOperation'>: Anonymous credentials cannot be refreshed.
103
+
104
+ We need to detect this and report that the real problem is that the user
105
+ has not set up any credentials. When you try to make the client
106
+ non-anonymously and don't have credentials set up, you get a nice error
107
+ from Google::
108
+
109
+ google.auth.exceptions.DefaultCredentialsError: Your default credentials were not found. To set up Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc for more information.
110
+
111
+ But we swallow that when we fall back to anonymous access.
112
+
113
+ So we take the URL and any notes from client setup here, and if something
114
+ goes wrong that looks like a permission problem we complain with the notes
115
+ attached.
116
+ """
117
+ try:
118
+ yield
119
+ except exceptions.InvalidOperation as e:
120
+ if "Anonymous credentials cannot be refreshed" in str(e):
121
+ raise RuntimeError(
122
+ "Google Storage tried to refresh anonymous credentials. "
123
+ "Are you sure you have set up your Google Account login "
124
+ "for applications with permission to access "
125
+ f"{urlunparse(url)}? "
126
+ "Maybe try `gcloud auth application-default login`? "
127
+ f"Client setup said: {notes}"
128
+ ) from e
129
+ else:
130
+ raise
131
+
132
+
93
133
 
94
134
  class GoogleJobStore(AbstractJobStore):
95
135
 
@@ -117,10 +157,10 @@ class GoogleJobStore(AbstractJobStore):
117
157
  self.readStatsBaseID = self.statsReadPrefix + self.statsBaseID
118
158
 
119
159
  self.sseKey = None
120
- self.storageClient = self.create_client()
160
+ self.storageClient, self.auth_notes = self.create_client()
121
161
 
122
162
  @classmethod
123
- def create_client(cls) -> storage.Client:
163
+ def create_client(cls) -> tuple[storage.Client, str]:
124
164
  """
125
165
  Produce a client for Google Sotrage with the highest level of access we can get.
126
166
 
@@ -128,8 +168,28 @@ class GoogleJobStore(AbstractJobStore):
128
168
  Google Storage module's behavior.
129
169
 
130
170
  Warn if GOOGLE_APPLICATION_CREDENTIALS is set but not actually present.
171
+
172
+ :returns: the client, and any notes about why it might not have permissions.
131
173
  """
132
174
 
175
+ notes: list[str] = []
176
+ def add_note(message: str, *args: Any, warn: bool = False) -> None:
177
+ """
178
+ Add and possibly warn with a note about the client permissions.
179
+ """
180
+ note = message % args
181
+ if warn:
182
+ log.warning(note)
183
+ notes.append(note)
184
+ def compile_notes() -> str:
185
+ """
186
+ Make one string explainign why we might not have expected permissions.
187
+ """
188
+ if notes:
189
+ return f"Google authentication had {len(notes)} potential issues: {'; '.join(notes)}"
190
+ else:
191
+ return "Google authentication appeared successful."
192
+
133
193
  # Determine if we have an override environment variable for our credentials.
134
194
  # We get the path to check existence, but Google Storage works out what
135
195
  # to use later by looking at the environment again.
@@ -139,38 +199,42 @@ class GoogleJobStore(AbstractJobStore):
139
199
  if credentials_path is not None and not os.path.exists(credentials_path):
140
200
  # If the file is missing, complain.
141
201
  # This variable holds a file name and not any sensitive data itself.
142
- log.warning(
202
+ add_note(
143
203
  "File '%s' from GOOGLE_APPLICATION_CREDENTIALS is unavailable! "
144
204
  "We may not be able to authenticate!",
145
205
  credentials_path,
206
+ warn=True
146
207
  )
147
208
 
148
209
  if credentials_path is None and os.path.exists(cls.nodeServiceAccountJson):
149
210
  try:
150
- # load credentials from a particular file on GCE nodes if an override path is not set
211
+ # load credentials from a particular file on GCE nodes if an
212
+ # override path is not set
151
213
  return storage.Client.from_service_account_json(
152
214
  cls.nodeServiceAccountJson
153
- )
215
+ ), compile_notes()
154
216
  except OSError:
155
217
  # Probably we don't have permission to use the file.
156
- log.warning(
218
+ add_note(
157
219
  "File '%s' exists but didn't work to authenticate!",
158
220
  cls.nodeServiceAccountJson,
221
+ warn=True
159
222
  )
160
223
 
161
224
  # Either a filename is specified, or our fallback file isn't there.
162
225
  try:
163
226
  # See if Google can work out how to authenticate.
164
- return storage.Client()
165
- except (DefaultCredentialsError, OSError):
227
+ return storage.Client(), compile_notes()
228
+ except (DefaultCredentialsError, OSError) as e:
166
229
  # Depending on which Google codepath or module version (???)
167
230
  # realizes we have no credentials, we can get an EnvironemntError,
168
231
  # or the new DefaultCredentialsError we are supposedly specced to
169
232
  # get.
233
+ add_note("Could not make authenticated client: %s", e)
170
234
 
171
235
  # Google can't find credentials, fall back to being anonymous.
172
236
  # This is likely to happen all the time so don't warn.
173
- return storage.Client.create_anonymous_client()
237
+ return storage.Client.create_anonymous_client(), compile_notes()
174
238
 
175
239
  @google_retry
176
240
  def initialize(self, config=None):
@@ -406,19 +470,20 @@ class GoogleJobStore(AbstractJobStore):
406
470
 
407
471
  @classmethod
408
472
  @google_retry
409
- def _get_blob_from_url(cls, url, exists=False):
473
+ def _get_blob_from_url(cls, client: storage.Client, url: ParseResult, exists: bool = False) -> storage.blob.Blob:
410
474
  """
411
475
  Gets the blob specified by the url.
412
476
 
413
477
  caution: makes no api request. blob may not ACTUALLY exist
414
478
 
415
- :param urlparse.ParseResult url: the URL
479
+ :param client: The Google Sotrage client to use to connect with.
416
480
 
417
- :param bool exists: if True, then syncs local blob object with cloud
481
+ :param url: the URL
482
+
483
+ :param exists: if True, then syncs local blob object with cloud
418
484
  and raises exceptions if it doesn't exist remotely
419
485
 
420
486
  :return: the blob requested
421
- :rtype: :class:`~google.cloud.storage.blob.Blob`
422
487
  """
423
488
  bucketName = url.netloc
424
489
  fileName = url.path
@@ -427,8 +492,7 @@ class GoogleJobStore(AbstractJobStore):
427
492
  if fileName.startswith("/"):
428
493
  fileName = fileName[1:]
429
494
 
430
- storageClient = cls.create_client()
431
- bucket = storageClient.bucket(bucket_name=bucketName)
495
+ bucket = client.bucket(bucket_name=bucketName)
432
496
  blob = bucket.blob(compat_bytes(fileName))
433
497
 
434
498
  if exists:
@@ -440,26 +504,34 @@ class GoogleJobStore(AbstractJobStore):
440
504
 
441
505
  @classmethod
442
506
  def _url_exists(cls, url: ParseResult) -> bool:
443
- try:
444
- cls._get_blob_from_url(url, exists=True)
445
- return True
446
- except NoSuchFileException:
447
- return False
507
+ client, auth_notes = cls.create_client()
508
+ with permission_error_reporter(url, auth_notes):
509
+ try:
510
+ cls._get_blob_from_url(client, url, exists=True)
511
+ return True
512
+ except NoSuchFileException:
513
+ return False
448
514
 
449
515
  @classmethod
450
516
  def _get_size(cls, url):
451
- return cls._get_blob_from_url(url, exists=True).size
517
+ client, auth_notes = cls.create_client()
518
+ with permission_error_reporter(url, auth_notes):
519
+ return cls._get_blob_from_url(client, url, exists=True).size
452
520
 
453
521
  @classmethod
454
522
  def _read_from_url(cls, url, writable):
455
- blob = cls._get_blob_from_url(url, exists=True)
456
- blob.download_to_file(writable)
457
- return blob.size, False
523
+ client, auth_notes = cls.create_client()
524
+ with permission_error_reporter(url, auth_notes):
525
+ blob = cls._get_blob_from_url(client, url, exists=True)
526
+ blob.download_to_file(writable)
527
+ return blob.size, False
458
528
 
459
529
  @classmethod
460
530
  def _open_url(cls, url: ParseResult) -> IO[bytes]:
461
- blob = cls._get_blob_from_url(url, exists=True)
462
- return blob.open("rb")
531
+ client, auth_notes = cls.create_client()
532
+ with permission_error_reporter(url, auth_notes):
533
+ blob = cls._get_blob_from_url(client, url, exists=True)
534
+ return blob.open("rb")
463
535
 
464
536
  @classmethod
465
537
  def _supports_url(cls, url, export=False):
@@ -467,8 +539,10 @@ class GoogleJobStore(AbstractJobStore):
467
539
 
468
540
  @classmethod
469
541
  def _write_to_url(cls, readable: bytes, url: str, executable: bool = False) -> None:
470
- blob = cls._get_blob_from_url(url)
471
- blob.upload_from_file(readable)
542
+ client, auth_notes = cls.create_client()
543
+ with permission_error_reporter(url, auth_notes):
544
+ blob = cls._get_blob_from_url(client, url)
545
+ blob.upload_from_file(readable)
472
546
 
473
547
  @classmethod
474
548
  def _list_url(cls, url: ParseResult) -> list[str]:
toil/leader.py CHANGED
@@ -1780,6 +1780,15 @@ class Leader:
1780
1780
 
1781
1781
  self._updatePredecessorStatus(job_id)
1782
1782
 
1783
+ if self.config.stop_on_first_failure:
1784
+ # We want to stop the workflow on the first complete failure of a job.
1785
+ logger.error("Stopping workflow on first failure, which was: %s", job_desc)
1786
+ raise FailedJobsException(
1787
+ self.jobStore,
1788
+ [self.toilState.get_job(job_id)],
1789
+ exit_code=self.recommended_fail_exit_code,
1790
+ )
1791
+
1783
1792
  def _updatePredecessorStatus(self, jobStoreID: str) -> None:
1784
1793
  """Update status of predecessors for finished (possibly failed) successor job."""
1785
1794
  if jobStoreID in self.toilState.service_to_client:
toil/lib/accelerators.py CHANGED
@@ -103,7 +103,9 @@ def have_working_nvidia_docker_runtime() -> bool:
103
103
  "all",
104
104
  "ubuntu:20.04",
105
105
  "nvidia-smi",
106
- ]
106
+ ],
107
+ stdout=subprocess.DEVNULL,
108
+ stderr=subprocess.DEVNULL
107
109
  )
108
110
  except (
109
111
  FileNotFoundError,
toil/lib/aws/session.py CHANGED
@@ -35,6 +35,9 @@ if TYPE_CHECKING:
35
35
 
36
36
  logger = logging.getLogger(__name__)
37
37
 
38
+ # You can pass config=ANONYMOUS_CONFIG to make anonymous S3 accesses
39
+ ANONYMOUS_CONFIG = Config(signature_version=botocore.UNSIGNED)
40
+
38
41
  # A note on thread safety:
39
42
  #
40
43
  # Boto3 Session: Not thread safe, 1 per thread is required.
@@ -148,6 +151,7 @@ class AWSConnectionManager:
148
151
  region: Optional[str],
149
152
  service_name: Literal["s3"],
150
153
  endpoint_url: Optional[str] = None,
154
+ config: Optional[Config] = None,
151
155
  ) -> "S3ServiceResource": ...
152
156
  @overload
153
157
  def resource(
@@ -155,6 +159,7 @@ class AWSConnectionManager:
155
159
  region: Optional[str],
156
160
  service_name: Literal["iam"],
157
161
  endpoint_url: Optional[str] = None,
162
+ config: Optional[Config] = None,
158
163
  ) -> "IAMServiceResource": ...
159
164
  @overload
160
165
  def resource(
@@ -162,6 +167,7 @@ class AWSConnectionManager:
162
167
  region: Optional[str],
163
168
  service_name: Literal["ec2"],
164
169
  endpoint_url: Optional[str] = None,
170
+ config: Optional[Config] = None,
165
171
  ) -> "EC2ServiceResource": ...
166
172
 
167
173
  def resource(
@@ -169,6 +175,7 @@ class AWSConnectionManager:
169
175
  region: Optional[str],
170
176
  service_name: str,
171
177
  endpoint_url: Optional[str] = None,
178
+ config: Optional[Config] = None,
172
179
  ) -> boto3.resources.base.ServiceResource:
173
180
  """
174
181
  Get the Boto3 Resource to use with the given service (like 'ec2') in the given region.
@@ -188,10 +195,10 @@ class AWSConnectionManager:
188
195
  # The Boto3 stubs are missing an overload for `resource` that takes
189
196
  # a non-literal string. See
190
197
  # <https://github.com/vemel/mypy_boto3_builder/issues/121#issuecomment-1011322636>
191
- storage.item = self.session(region).resource(service_name, endpoint_url=endpoint_url) # type: ignore
198
+ storage.item = self.session(region).resource(service_name, endpoint_url=endpoint_url, config=config) # type: ignore
192
199
  else:
193
200
  # We might not be able to pass None to Boto3 and have it be the same as no argument.
194
- storage.item = self.session(region).resource(service_name) # type: ignore
201
+ storage.item = self.session(region).resource(service_name, config=config) # type: ignore
195
202
 
196
203
  return cast(boto3.resources.base.ServiceResource, storage.item)
197
204
 
@@ -369,18 +376,21 @@ def resource(
369
376
  service_name: Literal["s3"],
370
377
  region_name: Optional[str] = None,
371
378
  endpoint_url: Optional[str] = None,
379
+ config: Optional[Config] = None,
372
380
  ) -> "S3ServiceResource": ...
373
381
  @overload
374
382
  def resource(
375
383
  service_name: Literal["iam"],
376
384
  region_name: Optional[str] = None,
377
385
  endpoint_url: Optional[str] = None,
386
+ config: Optional[Config] = None,
378
387
  ) -> "IAMServiceResource": ...
379
388
  @overload
380
389
  def resource(
381
390
  service_name: Literal["ec2"],
382
391
  region_name: Optional[str] = None,
383
392
  endpoint_url: Optional[str] = None,
393
+ config: Optional[Config] = None,
384
394
  ) -> "EC2ServiceResource": ...
385
395
 
386
396
 
@@ -388,6 +398,7 @@ def resource(
388
398
  service_name: Literal["s3", "iam", "ec2"],
389
399
  region_name: Optional[str] = None,
390
400
  endpoint_url: Optional[str] = None,
401
+ config: Optional[Config] = None,
391
402
  ) -> boto3.resources.base.ServiceResource:
392
403
  """
393
404
  Get a Boto 3 resource for a particular AWS service, usable by the current thread.
@@ -397,5 +408,5 @@ def resource(
397
408
 
398
409
  # Just use a global version of the manager. Note that we change the argument order!
399
410
  return _global_manager.resource(
400
- region_name, service_name, endpoint_url=endpoint_url
411
+ region_name, service_name, endpoint_url=endpoint_url, config=config
401
412
  )
toil/lib/aws/utils.py CHANGED
@@ -19,8 +19,10 @@ from collections.abc import Iterable, Iterator
19
19
  from typing import TYPE_CHECKING, Any, Callable, ContextManager, Optional, cast
20
20
  from urllib.parse import ParseResult
21
21
 
22
+ # To import toil.lib.aws.session, the AWS libraries must be installed
22
23
  from toil.lib.aws import AWSRegionName, AWSServerErrors, session
23
24
  from toil.lib.conversions import strtobool
25
+ from toil.lib.memoize import memoize
24
26
  from toil.lib.misc import printq
25
27
  from toil.lib.retry import (
26
28
  DEFAULT_DELAYS,
@@ -37,12 +39,7 @@ if TYPE_CHECKING:
37
39
  from mypy_boto3_s3.service_resource import Object as S3Object
38
40
  from mypy_boto3_sdb.type_defs import AttributeTypeDef
39
41
 
40
- try:
41
- from botocore.exceptions import ClientError, EndpointConnectionError
42
- except ImportError:
43
- ClientError = None # type: ignore
44
- EndpointConnectionError = None # type: ignore
45
- # AWS/boto extra is not installed
42
+ from botocore.exceptions import ClientError, EndpointConnectionError
46
43
 
47
44
  logger = logging.getLogger(__name__)
48
45
 
@@ -232,6 +229,7 @@ def get_bucket_region(
232
229
  bucket_name: str,
233
230
  endpoint_url: Optional[str] = None,
234
231
  only_strategies: Optional[set[int]] = None,
232
+ anonymous: Optional[bool] = None
235
233
  ) -> str:
236
234
  """
237
235
  Get the AWS region name associated with the given S3 bucket, or raise NoBucketLocationError.
@@ -241,9 +239,13 @@ def get_bucket_region(
241
239
  Takes an optional S3 API URL override.
242
240
 
243
241
  :param only_strategies: For testing, use only strategies with 1-based numbers in this set.
242
+
243
+ :raises NoBucketLocationError: if the bucket's region cannot be determined
244
+ (possibly due to lack of permissions).
244
245
  """
245
246
 
246
- s3_client = session.client("s3", endpoint_url=endpoint_url)
247
+ config = session.ANONYMOUS_CONFIG if anonymous else None
248
+ s3_client = session.client("s3", endpoint_url=endpoint_url, config=config)
247
249
 
248
250
  def attempt_get_bucket_location() -> Optional[str]:
249
251
  """
@@ -267,7 +269,7 @@ def get_bucket_region(
267
269
  # It could also be because AWS open data buckets (which we tend to
268
270
  # encounter this problem for) tend to actually themselves be in
269
271
  # us-east-1.
270
- backup_s3_client = session.client("s3", region_name="us-east-1")
272
+ backup_s3_client = session.client("s3", region_name="us-east-1", config=config)
271
273
  return backup_s3_client.get_bucket_location(Bucket=bucket_name).get(
272
274
  "LocationConstraint", None
273
275
  )
@@ -337,6 +339,30 @@ def get_bucket_region(
337
339
  "Could not get bucket location: " + "\n".join(error_messages)
338
340
  ) from last_error
339
341
 
342
+ @memoize
343
+ def get_bucket_region_if_available(
344
+ bucket_name: str,
345
+ endpoint_url: Optional[str] = None,
346
+ only_strategies: Optional[set[int]] = None,
347
+ anonymous: Optional[bool] = None
348
+ ) -> Optional[str]:
349
+ """
350
+ Get the AWS region name associated with the given S3 bucket, or return None.
351
+
352
+ Caches results, so may not return the location for a bucket that has been
353
+ created but was previously observed to be nonexistent.
354
+
355
+ :param only_strategies: For testing, use only strategies with 1-based numbers in this set.
356
+ """
357
+
358
+ try:
359
+ return get_bucket_region(bucket_name, endpoint_url, only_strategies, anonymous)
360
+ except Exception as e:
361
+ if isinstance(e, NoBucketLocationError) or (isinstance(e, ClientError) and get_error_status(e) == 403):
362
+ # We can't know
363
+ return None
364
+ else:
365
+ raise
340
366
 
341
367
  def region_to_bucket_location(region: str) -> str:
342
368
  return "" if region == "us-east-1" else region
@@ -346,7 +372,7 @@ def bucket_location_to_region(location: Optional[str]) -> str:
346
372
  return "us-east-1" if location == "" or location is None else location
347
373
 
348
374
 
349
- def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "S3Object":
375
+ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None, anonymous: Optional[bool] = None) -> "S3Object":
350
376
  """
351
377
  Extracts a key (object) from a given parsed s3:// URL.
352
378
 
@@ -354,6 +380,10 @@ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "S3
354
380
 
355
381
  :param bool existing: If True, key is expected to exist. If False, key is expected not to
356
382
  exists and it will be created. If None, the key will be created if it doesn't exist.
383
+
384
+ :raises FileNotFoundError: when existing is True and the object does not exist.
385
+ :raises RuntimeError: when existing is False but the object exists.
386
+ :raises PermissionError: when we are not authorized to look at the object.
357
387
  """
358
388
 
359
389
  key_name = url.path[1:]
@@ -372,17 +402,19 @@ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "S3
372
402
  # TODO: OrdinaryCallingFormat equivalent in boto3?
373
403
  # if botoargs:
374
404
  # botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
375
-
376
- try:
377
- # Get the bucket's region to avoid a redirect per request
378
- region = get_bucket_region(bucket_name, endpoint_url=endpoint_url)
379
- s3 = session.resource("s3", region_name=region, endpoint_url=endpoint_url)
380
- except NoBucketLocationError as e:
381
- # Probably don't have permission.
382
- # TODO: check if it is that
383
- logger.debug("Couldn't get bucket location: %s", e)
405
+
406
+ config = session.ANONYMOUS_CONFIG if anonymous else None
407
+ # Get the bucket's region to avoid a redirect per request.
408
+ # Cache the result
409
+ region = get_bucket_region_if_available(bucket_name, endpoint_url=endpoint_url, anonymous=anonymous)
410
+ if region is not None:
411
+ s3 = session.resource("s3", region_name=region, endpoint_url=endpoint_url, config=config)
412
+ else:
413
+ # We can't get the bucket location, perhaps because we don't have
414
+ # permission to do that.
415
+ logger.debug("Couldn't get bucket location")
384
416
  logger.debug("Fall back to not specifying location")
385
- s3 = session.resource("s3", endpoint_url=endpoint_url)
417
+ s3 = session.resource("s3", endpoint_url=endpoint_url, config=config)
386
418
 
387
419
  obj = s3.Object(bucket_name, key_name)
388
420
  objExists = True
@@ -392,6 +424,10 @@ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "S3
392
424
  except ClientError as e:
393
425
  if get_error_status(e) == 404:
394
426
  objExists = False
427
+ elif get_error_status(e) == 403:
428
+ raise PermissionError(
429
+ f"Key '{key_name}' is not accessible in bucket '{bucket_name}'."
430
+ ) from e
395
431
  else:
396
432
  raise
397
433
  if existing is True and not objExists:
@@ -402,16 +438,27 @@ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "S3
402
438
  raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
403
439
 
404
440
  if not objExists:
405
- obj.put() # write an empty file
441
+ try:
442
+ obj.put() # write an empty file
443
+ except ClientError as e:
444
+ if get_error_status(e) == 403:
445
+ raise PermissionError(
446
+ f"Key '{key_name}' is not writable in bucket '{bucket_name}'."
447
+ ) from e
448
+ else:
449
+ raise
406
450
  return obj
407
451
 
408
452
 
409
453
  @retry(errors=[AWSServerErrors])
410
- def list_objects_for_url(url: ParseResult) -> list[str]:
454
+ def list_objects_for_url(url: ParseResult, anonymous: Optional[bool] = None) -> list[str]:
411
455
  """
412
456
  Extracts a key (object) from a given parsed s3:// URL. The URL will be
413
457
  supplemented with a trailing slash if it is missing.
458
+
459
+ :raises PermissionError: when we are not authorized to do the list operation.
414
460
  """
461
+
415
462
  key_name = url.path[1:]
416
463
  bucket_name = url.netloc
417
464
 
@@ -430,23 +477,33 @@ def list_objects_for_url(url: ParseResult) -> list[str]:
430
477
  protocol = "http"
431
478
  if host:
432
479
  endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
433
-
434
- client = session.client("s3", endpoint_url=endpoint_url)
480
+
481
+ config = session.ANONYMOUS_CONFIG if anonymous else None
482
+ client = session.client("s3", endpoint_url=endpoint_url, config=config)
435
483
 
436
484
  listing = []
485
+
486
+ try:
487
+ paginator = client.get_paginator("list_objects_v2")
488
+ result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter="/")
489
+ for page in result:
490
+ if "CommonPrefixes" in page:
491
+ for prefix_item in page["CommonPrefixes"]:
492
+ listing.append(prefix_item["Prefix"][len(key_name) :])
493
+ if "Contents" in page:
494
+ for content_item in page["Contents"]:
495
+ if content_item["Key"] == key_name:
496
+ # Ignore folder name itself
497
+ continue
498
+ listing.append(content_item["Key"][len(key_name) :])
499
+ except ClientError as e:
500
+ if get_error_status(e) == 403:
501
+ raise PermissionError(
502
+ f"Prefix '{key_name}' is not authorized to be listed in bucket '{bucket_name}'."
503
+ ) from e
504
+ else:
505
+ raise
437
506
 
438
- paginator = client.get_paginator("list_objects_v2")
439
- result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter="/")
440
- for page in result:
441
- if "CommonPrefixes" in page:
442
- for prefix_item in page["CommonPrefixes"]:
443
- listing.append(prefix_item["Prefix"][len(key_name) :])
444
- if "Contents" in page:
445
- for content_item in page["Contents"]:
446
- if content_item["Key"] == key_name:
447
- # Ignore folder name itself
448
- continue
449
- listing.append(content_item["Key"][len(key_name) :])
450
507
 
451
508
  logger.debug("Found in %s items: %s", url, listing)
452
509
  return listing