toil 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. toil/batchSystems/abstractBatchSystem.py +13 -5
  2. toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
  3. toil/batchSystems/kubernetes.py +13 -2
  4. toil/batchSystems/mesos/batchSystem.py +33 -2
  5. toil/batchSystems/registry.py +15 -118
  6. toil/batchSystems/slurm.py +191 -16
  7. toil/common.py +20 -1
  8. toil/cwl/cwltoil.py +97 -119
  9. toil/cwl/utils.py +103 -3
  10. toil/fileStores/__init__.py +1 -1
  11. toil/fileStores/abstractFileStore.py +5 -2
  12. toil/fileStores/cachingFileStore.py +1 -1
  13. toil/job.py +30 -14
  14. toil/jobStores/abstractJobStore.py +35 -255
  15. toil/jobStores/aws/jobStore.py +864 -1964
  16. toil/jobStores/aws/utils.py +24 -270
  17. toil/jobStores/fileJobStore.py +2 -1
  18. toil/jobStores/googleJobStore.py +32 -13
  19. toil/jobStores/utils.py +0 -327
  20. toil/leader.py +27 -22
  21. toil/lib/accelerators.py +1 -1
  22. toil/lib/aws/config.py +22 -0
  23. toil/lib/aws/s3.py +477 -9
  24. toil/lib/aws/utils.py +22 -33
  25. toil/lib/checksum.py +88 -0
  26. toil/lib/conversions.py +33 -31
  27. toil/lib/directory.py +217 -0
  28. toil/lib/ec2.py +97 -29
  29. toil/lib/exceptions.py +2 -1
  30. toil/lib/expando.py +2 -2
  31. toil/lib/generatedEC2Lists.py +138 -19
  32. toil/lib/io.py +33 -2
  33. toil/lib/memoize.py +21 -7
  34. toil/lib/misc.py +1 -1
  35. toil/lib/pipes.py +385 -0
  36. toil/lib/plugins.py +106 -0
  37. toil/lib/retry.py +1 -1
  38. toil/lib/threading.py +1 -1
  39. toil/lib/url.py +320 -0
  40. toil/lib/web.py +4 -5
  41. toil/options/cwl.py +13 -1
  42. toil/options/runner.py +17 -10
  43. toil/options/wdl.py +12 -1
  44. toil/provisioners/__init__.py +5 -2
  45. toil/provisioners/aws/__init__.py +43 -36
  46. toil/provisioners/aws/awsProvisioner.py +47 -15
  47. toil/provisioners/node.py +60 -12
  48. toil/resource.py +3 -13
  49. toil/server/app.py +12 -6
  50. toil/server/cli/wes_cwl_runner.py +2 -2
  51. toil/server/wes/abstract_backend.py +21 -43
  52. toil/server/wes/toil_backend.py +2 -2
  53. toil/test/__init__.py +16 -18
  54. toil/test/batchSystems/batchSystemTest.py +2 -9
  55. toil/test/batchSystems/batch_system_plugin_test.py +7 -0
  56. toil/test/batchSystems/test_slurm.py +103 -14
  57. toil/test/cwl/cwlTest.py +181 -8
  58. toil/test/cwl/staging_cat.cwl +27 -0
  59. toil/test/cwl/staging_make_file.cwl +25 -0
  60. toil/test/cwl/staging_workflow.cwl +43 -0
  61. toil/test/cwl/zero_default.cwl +61 -0
  62. toil/test/docs/scripts/tutorial_staging.py +17 -8
  63. toil/test/docs/scriptsTest.py +2 -1
  64. toil/test/jobStores/jobStoreTest.py +23 -133
  65. toil/test/lib/aws/test_iam.py +7 -7
  66. toil/test/lib/aws/test_s3.py +30 -33
  67. toil/test/lib/aws/test_utils.py +9 -9
  68. toil/test/lib/test_url.py +69 -0
  69. toil/test/lib/url_plugin_test.py +105 -0
  70. toil/test/provisioners/aws/awsProvisionerTest.py +60 -7
  71. toil/test/provisioners/clusterTest.py +15 -2
  72. toil/test/provisioners/gceProvisionerTest.py +1 -1
  73. toil/test/server/serverTest.py +78 -36
  74. toil/test/src/autoDeploymentTest.py +2 -3
  75. toil/test/src/fileStoreTest.py +89 -87
  76. toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
  77. toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
  78. toil/test/utils/toilKillTest.py +35 -28
  79. toil/test/wdl/md5sum/md5sum-gs.json +1 -1
  80. toil/test/wdl/md5sum/md5sum.json +1 -1
  81. toil/test/wdl/testfiles/read_file.wdl +18 -0
  82. toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
  83. toil/test/wdl/wdltoil_test.py +171 -162
  84. toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
  85. toil/utils/toilDebugFile.py +6 -3
  86. toil/utils/toilSshCluster.py +23 -0
  87. toil/utils/toilStats.py +17 -2
  88. toil/utils/toilUpdateEC2Instances.py +1 -0
  89. toil/version.py +10 -10
  90. toil/wdl/wdltoil.py +1179 -825
  91. toil/worker.py +16 -8
  92. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/METADATA +32 -32
  93. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/RECORD +97 -85
  94. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/WHEEL +1 -1
  95. toil/lib/iterables.py +0 -112
  96. toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
  97. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
  98. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
  99. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0
@@ -17,7 +17,7 @@ import logging
17
17
  import os
18
18
  import types
19
19
  from ssl import SSLError
20
- from typing import TYPE_CHECKING, Optional, cast
20
+ from typing import TYPE_CHECKING, IO, Optional, cast, Any
21
21
 
22
22
  from boto3.s3.transfer import TransferConfig
23
23
  from botocore.client import Config
@@ -37,8 +37,8 @@ from toil.lib.retry import (
37
37
  )
38
38
 
39
39
  if TYPE_CHECKING:
40
- from mypy_boto3_s3 import S3ServiceResource
41
- from mypy_boto3_sdb.type_defs import AttributeTypeDef, ItemTypeDef
40
+ from mypy_boto3_s3 import S3Client, S3ServiceResource
41
+ from mypy_boto3_s3.type_defs import CopySourceTypeDef
42
42
 
43
43
  logger = logging.getLogger(__name__)
44
44
 
@@ -54,210 +54,21 @@ DIAL_SPECIFIC_REGION_CONFIG = Config(
54
54
  )
55
55
 
56
56
 
57
- class SDBHelper:
58
- """
59
- A mixin with methods for storing limited amounts of binary data in an SDB item
60
-
61
- >>> import os
62
- >>> H=SDBHelper
63
- >>> H.presenceIndicator() # doctest: +ALLOW_UNICODE
64
- u'numChunks'
65
- >>> H.binaryToAttributes(None)['numChunks']
66
- 0
67
- >>> H.attributesToBinary({u'numChunks': 0})
68
- (None, 0)
69
- >>> H.binaryToAttributes(b'') # doctest: +ALLOW_UNICODE +ALLOW_BYTES
70
- {u'000': b'VQ==', u'numChunks': 1}
71
- >>> H.attributesToBinary({u'numChunks': 1, u'000': b'VQ=='}) # doctest: +ALLOW_BYTES
72
- (b'', 1)
73
-
74
- Good pseudo-random data is very likely smaller than its bzip2ed form. Subtract 1 for the type
75
- character, i.e 'C' or 'U', with which the string is prefixed. We should get one full chunk:
76
-
77
- >>> s = os.urandom(H.maxRawValueSize-1)
78
- >>> d = H.binaryToAttributes(s)
79
- >>> len(d), len(d['000'])
80
- (2, 1024)
81
- >>> H.attributesToBinary(d) == (s, 1)
82
- True
83
-
84
- One byte more and we should overflow four bytes into the second chunk, two bytes for
85
- base64-encoding the additional character and two bytes for base64-padding to the next quartet.
86
-
87
- >>> s += s[0:1]
88
- >>> d = H.binaryToAttributes(s)
89
- >>> len(d), len(d['000']), len(d['001'])
90
- (3, 1024, 4)
91
- >>> H.attributesToBinary(d) == (s, 2)
92
- True
93
-
94
- """
95
-
96
- # The SDB documentation is not clear as to whether the attribute value size limit of 1024
97
- # applies to the base64-encoded value or the raw value. It suggests that responses are
98
- # automatically encoded from which I conclude that the limit should apply to the raw,
99
- # unencoded value. However, there seems to be a discrepancy between how Boto computes the
100
- # request signature if a value contains a binary data, and how SDB does it. This causes
101
- # requests to fail signature verification, resulting in a 403. We therefore have to
102
- # base64-encode values ourselves even if that means we loose a quarter of capacity.
103
-
104
- maxAttributesPerItem = 256
105
- maxValueSize = 1024
106
- maxRawValueSize = maxValueSize * 3 // 4
107
- # Just make sure we don't have a problem with padding or integer truncation:
108
- assert len(base64.b64encode(b" " * maxRawValueSize)) == 1024
109
- assert len(base64.b64encode(b" " * (1 + maxRawValueSize))) > 1024
110
-
111
- @classmethod
112
- def _reservedAttributes(cls):
113
- """
114
- Override in subclass to reserve a certain number of attributes that can't be used for
115
- chunks.
116
- """
117
- return 1
118
-
119
- @classmethod
120
- def _maxChunks(cls):
121
- return cls.maxAttributesPerItem - cls._reservedAttributes()
122
-
123
- @classmethod
124
- def maxBinarySize(cls, extraReservedChunks=0):
125
- return (
126
- cls._maxChunks() - extraReservedChunks
127
- ) * cls.maxRawValueSize - 1 # for the 'C' or 'U' prefix
128
-
129
- @classmethod
130
- def _maxEncodedSize(cls):
131
- return cls._maxChunks() * cls.maxValueSize
132
-
133
- @classmethod
134
- def binaryToAttributes(cls, binary) -> dict[str, str]:
135
- """
136
- Turn a bytestring, or None, into SimpleDB attributes.
137
- """
138
- if binary is None:
139
- return {"numChunks": "0"}
140
- assert isinstance(binary, bytes)
141
- assert len(binary) <= cls.maxBinarySize()
142
- # The use of compression is just an optimization. We can't include it in the maxValueSize
143
- # computation because the compression ratio depends on the input.
144
- compressed = bz2.compress(binary)
145
- if len(compressed) > len(binary):
146
- compressed = b"U" + binary
147
- else:
148
- compressed = b"C" + compressed
149
- encoded = base64.b64encode(compressed)
150
- assert len(encoded) <= cls._maxEncodedSize()
151
- n = cls.maxValueSize
152
- chunks = (encoded[i : i + n] for i in range(0, len(encoded), n))
153
- attributes = {
154
- cls._chunkName(i): chunk.decode("utf-8") for i, chunk in enumerate(chunks)
155
- }
156
- attributes.update({"numChunks": str(len(attributes))})
157
- return attributes
158
-
159
- @classmethod
160
- def attributeDictToList(
161
- cls, attributes: dict[str, str]
162
- ) -> list["AttributeTypeDef"]:
163
- """
164
- Convert the attribute dict (ex: from binaryToAttributes) into a list of attribute typed dicts
165
- to be compatible with boto3 argument syntax
166
- :param attributes: Dict[str, str], attribute in object form
167
- :return: list of attributes in typed dict form
168
- """
169
- return [{"Name": name, "Value": value} for name, value in attributes.items()]
170
-
171
- @classmethod
172
- def attributeListToDict(
173
- cls, attributes: list["AttributeTypeDef"]
174
- ) -> dict[str, str]:
175
- """
176
- Convert the attribute boto3 representation of list of attribute typed dicts
177
- back to a dictionary with name, value pairs
178
- :param attribute: attribute in typed dict form
179
- :return: Dict[str, str], attribute in dict form
180
- """
181
- return {attribute["Name"]: attribute["Value"] for attribute in attributes}
182
-
183
- @classmethod
184
- def get_attributes_from_item(
185
- cls, item: "ItemTypeDef", keys: list[str]
186
- ) -> list[Optional[str]]:
187
- return_values: list[Optional[str]] = [None for _ in keys]
188
- mapped_indices: dict[str, int] = {
189
- name: index for index, name in enumerate(keys)
190
- }
191
- for attribute in item["Attributes"]:
192
- name = attribute["Name"]
193
- value = attribute["Value"]
194
- if name in mapped_indices:
195
- return_values[mapped_indices[name]] = value
196
- return return_values
197
-
198
- @classmethod
199
- def _chunkName(cls, i):
200
- return str(i).zfill(3)
201
-
202
- @classmethod
203
- def _isValidChunkName(cls, s):
204
- return len(s) == 3 and s.isdigit()
205
-
206
- @classmethod
207
- def presenceIndicator(cls):
208
- """
209
- The key that is guaranteed to be present in the return value of binaryToAttributes().
210
- Assuming that binaryToAttributes() is used with SDB's PutAttributes, the return value of
211
- this method could be used to detect the presence/absence of an item in SDB.
212
- """
213
- return "numChunks"
214
-
215
- @classmethod
216
- def attributesToBinary(
217
- cls, attributes: list["AttributeTypeDef"]
218
- ) -> tuple[bytes, int]:
219
- """
220
- :rtype: (str|None,int)
221
- :return: the binary data and the number of chunks it was composed from
222
- """
223
- chunks = []
224
- numChunks: int = 0
225
- for attribute in attributes:
226
- name = attribute["Name"]
227
- value = attribute["Value"]
228
- if cls._isValidChunkName(name):
229
- chunks.append((int(name), value))
230
- if name == "numChunks":
231
- numChunks = int(value)
232
- chunks.sort()
233
- if numChunks:
234
- serializedJob = b"".join(v.encode() for k, v in chunks)
235
- compressed = base64.b64decode(serializedJob)
236
- if compressed[0] == b"C"[0]:
237
- binary = bz2.decompress(compressed[1:])
238
- elif compressed[0] == b"U"[0]:
239
- binary = compressed[1:]
240
- else:
241
- raise RuntimeError(f"Unexpected prefix {compressed[0]}")
242
- else:
243
- binary = None
244
- return binary, numChunks
245
-
246
-
247
- def fileSizeAndTime(localFilePath):
57
+ def fileSizeAndTime(localFilePath: str) -> tuple[int, float]:
248
58
  file_stat = os.stat(localFilePath)
249
59
  return file_stat.st_size, file_stat.st_mtime
250
60
 
251
61
 
62
+ # TODO: This function is unused.
252
63
  @retry(errors=[AWSServerErrors])
253
64
  def uploadFromPath(
254
65
  localFilePath: str,
255
- resource,
66
+ resource: "S3ServiceResource",
256
67
  bucketName: str,
257
68
  fileID: str,
258
- headerArgs: Optional[dict] = None,
69
+ headerArgs: Optional[dict[str, Any]] = None,
259
70
  partSize: int = 50 << 20,
260
- ):
71
+ ) -> Optional[str]:
261
72
  """
262
73
  Uploads a file to s3, using multipart uploading if applicable
263
74
 
@@ -279,8 +90,12 @@ def uploadFromPath(
279
90
  version = uploadFile(
280
91
  localFilePath, resource, bucketName, fileID, headerArgs, partSize
281
92
  )
93
+
94
+ # Only pass along version if we got one.
95
+ version_args: dict[str, Any] = {"VersionId": version} if version is not None else {}
96
+
282
97
  info = client.head_object(
283
- Bucket=bucketName, Key=compat_bytes(fileID), VersionId=version, **headerArgs
98
+ Bucket=bucketName, Key=compat_bytes(fileID), **version_args, **headerArgs
284
99
  )
285
100
  size = info.get("ContentLength")
286
101
 
@@ -293,13 +108,13 @@ def uploadFromPath(
293
108
 
294
109
  @retry(errors=[AWSServerErrors])
295
110
  def uploadFile(
296
- readable,
297
- resource,
111
+ readable: IO[bytes],
112
+ resource: "S3ServiceResource",
298
113
  bucketName: str,
299
114
  fileID: str,
300
- headerArgs: Optional[dict] = None,
115
+ headerArgs: Optional[dict[str, Any]] = None,
301
116
  partSize: int = 50 << 20,
302
- ):
117
+ ) -> Optional[str]:
303
118
  """
304
119
  Upload a readable object to s3, using multipart uploading if applicable.
305
120
  :param readable: a readable stream or a file path to upload to s3
@@ -361,7 +176,7 @@ def copyKeyMultipart(
361
176
  sseKey: Optional[str] = None,
362
177
  copySourceSseAlgorithm: Optional[str] = None,
363
178
  copySourceSseKey: Optional[str] = None,
364
- ):
179
+ ) -> Optional[str]:
365
180
  """
366
181
  Copies a key from a source key to a destination key in multiple parts. Note that if the
367
182
  destination key exists it will be overwritten implicitly, and if it does not exist a new
@@ -393,12 +208,11 @@ def copyKeyMultipart(
393
208
  :param str copySourceSseAlgorithm: Server-side encryption algorithm for the source.
394
209
  :param str copySourceSseKey: Server-side encryption key for the source.
395
210
 
396
- :rtype: str
397
211
  :return: The version of the copied file (or None if versioning is not enabled for dstBucket).
398
212
  """
399
213
  dstBucket = resource.Bucket(compat_bytes(dstBucketName))
400
214
  dstObject = dstBucket.Object(compat_bytes(dstKeyName))
401
- copySource = {
215
+ copySource: "CopySourceTypeDef" = {
402
216
  "Bucket": compat_bytes(srcBucketName),
403
217
  "Key": compat_bytes(srcKeyName),
404
218
  }
@@ -410,23 +224,20 @@ def copyKeyMultipart(
410
224
  # object metadata. And we really want it to talk to the source region and
411
225
  # not wherever the bucket virtual hostnames go.
412
226
  source_region = get_bucket_region(srcBucketName)
413
- source_client = cast(
414
- "S3Client",
415
- session.client(
416
- "s3", region_name=source_region, config=DIAL_SPECIFIC_REGION_CONFIG
417
- ),
227
+ source_client = session.client(
228
+ "s3", region_name=source_region, config=DIAL_SPECIFIC_REGION_CONFIG
418
229
  )
419
230
 
420
231
  # The boto3 functions don't allow passing parameters as None to
421
232
  # indicate they weren't provided. So we have to do a bit of work
422
233
  # to ensure we only provide the parameters when they are actually
423
234
  # required.
424
- destEncryptionArgs = {}
235
+ destEncryptionArgs: dict[str, Any] = {}
425
236
  if sseKey is not None:
426
237
  destEncryptionArgs.update(
427
238
  {"SSECustomerAlgorithm": sseAlgorithm, "SSECustomerKey": sseKey}
428
239
  )
429
- copyEncryptionArgs = {}
240
+ copyEncryptionArgs: dict[str, Any] = {}
430
241
  if copySourceSseKey is not None:
431
242
  copyEncryptionArgs.update(
432
243
  {
@@ -479,63 +290,6 @@ def copyKeyMultipart(
479
290
  return info.get("VersionId", None)
480
291
 
481
292
 
482
- def _put_attributes_using_post(
483
- self, domain_or_name, item_name, attributes, replace=True, expected_value=None
484
- ):
485
- """
486
- Monkey-patched version of SDBConnection.put_attributes that uses POST instead of GET
487
-
488
- The GET version is subject to the URL length limit which kicks in before the 256 x 1024 limit
489
- for attribute values. Using POST prevents that.
490
-
491
- https://github.com/BD2KGenomics/toil/issues/502
492
- """
493
- domain, domain_name = self.get_domain_and_name(domain_or_name)
494
- params = {"DomainName": domain_name, "ItemName": item_name}
495
- self._build_name_value_list(params, attributes, replace)
496
- if expected_value:
497
- self._build_expected_value(params, expected_value)
498
- # The addition of the verb keyword argument is the only difference to put_attributes (Hannes)
499
- return self.get_status("PutAttributes", params, verb="POST")
500
-
501
-
502
- def monkeyPatchSdbConnection(sdb):
503
- """
504
- :type sdb: SDBConnection
505
- """
506
- sdb.put_attributes = types.MethodType(_put_attributes_using_post, sdb)
507
-
508
-
509
- def sdb_unavailable(e):
510
- # Since we're checking against a collection here we absolutely need an
511
- # integer status code. This is probably a BotoServerError, but other 500s
512
- # and 503s probably ought to be retried too.
513
- return get_error_status(e) in (500, 503)
514
-
515
-
516
- def no_such_sdb_domain(e):
517
- return (
518
- isinstance(e, ClientError)
519
- and get_error_code(e)
520
- and get_error_code(e).endswith("NoSuchDomain")
521
- )
522
-
523
-
524
- def retryable_ssl_error(e):
293
+ def retryable_ssl_error(e: BaseException) -> bool:
525
294
  # https://github.com/BD2KGenomics/toil/issues/978
526
295
  return isinstance(e, SSLError) and e.reason == "DECRYPTION_FAILED_OR_BAD_RECORD_MAC"
527
-
528
-
529
- def retryable_sdb_errors(e):
530
- return (
531
- sdb_unavailable(e)
532
- or no_such_sdb_domain(e)
533
- or connection_error(e)
534
- or retryable_ssl_error(e)
535
- )
536
-
537
-
538
- def retry_sdb(
539
- delays=DEFAULT_DELAYS, timeout=DEFAULT_TIMEOUT, predicate=retryable_sdb_errors
540
- ):
541
- return old_retry(delays=delays, timeout=timeout, predicate=predicate)
@@ -42,11 +42,12 @@ from toil.lib.io import (
42
42
  mkdtemp,
43
43
  robust_rmtree,
44
44
  )
45
+ from toil.lib.url import URLAccess
45
46
 
46
47
  logger = logging.getLogger(__name__)
47
48
 
48
49
 
49
- class FileJobStore(AbstractJobStore):
50
+ class FileJobStore(AbstractJobStore, URLAccess):
50
51
  """
51
52
  A job store that uses a directory on a locally attached file system. To be compatible with
52
53
  distributed batch systems, that file system must be shared by all worker nodes.
@@ -28,9 +28,10 @@ from google.api_core.exceptions import (
28
28
  InternalServerError,
29
29
  ServiceUnavailable,
30
30
  )
31
- from google.auth.exceptions import DefaultCredentialsError
31
+ from google.auth.exceptions import DefaultCredentialsError, InvalidOperation
32
32
  from google.cloud import exceptions, storage
33
33
 
34
+ from toil import memoize
34
35
  from toil.jobStores.abstractJobStore import (
35
36
  AbstractJobStore,
36
37
  JobStoreExistsException,
@@ -38,11 +39,12 @@ from toil.jobStores.abstractJobStore import (
38
39
  NoSuchJobException,
39
40
  NoSuchJobStoreException,
40
41
  )
41
- from toil.jobStores.utils import ReadablePipe, WritablePipe
42
+ from toil.lib.pipes import ReadablePipe, WritablePipe
42
43
  from toil.lib.compatibility import compat_bytes
43
44
  from toil.lib.io import AtomicFileCreate
44
45
  from toil.lib.misc import truncExpBackoff
45
46
  from toil.lib.retry import old_retry
47
+ from toil.lib.url import URLAccess
46
48
 
47
49
  log = logging.getLogger(__name__)
48
50
 
@@ -113,17 +115,23 @@ def permission_error_reporter(url: ParseResult, notes: str) -> Iterator[None]:
113
115
  So we take the URL and any notes from client setup here, and if something
114
116
  goes wrong that looks like a permission problem we complain with the notes
115
117
  attached.
118
+
119
+ Also, if you don't have a project available, you can't use the Python API
120
+ for storage with authentication at all. `gsutil` can do it but you can't.
121
+ TODO: Fall back on command-line gsutil for authenticated reads???
116
122
  """
117
123
  try:
118
124
  yield
119
- except exceptions.InvalidOperation as e:
125
+ except InvalidOperation as e:
120
126
  if "Anonymous credentials cannot be refreshed" in str(e):
121
127
  raise RuntimeError(
122
128
  "Google Storage tried to refresh anonymous credentials. "
123
129
  "Are you sure you have set up your Google Account login "
124
130
  "for applications with permission to access "
125
131
  f"{urlunparse(url)}? "
126
- "Maybe try `gcloud auth application-default login`? "
132
+ "Maybe try `gcloud auth application-default login` and "
133
+ "providing the (mandatory for the Python API) Google Cloud "
134
+ "project? "
127
135
  f"Client setup said: {notes}"
128
136
  ) from e
129
137
  else:
@@ -131,7 +139,7 @@ def permission_error_reporter(url: ParseResult, notes: str) -> Iterator[None]:
131
139
 
132
140
 
133
141
 
134
- class GoogleJobStore(AbstractJobStore):
142
+ class GoogleJobStore(AbstractJobStore, URLAccess):
135
143
 
136
144
  nodeServiceAccountJson = "/root/service_account.json"
137
145
 
@@ -160,9 +168,10 @@ class GoogleJobStore(AbstractJobStore):
160
168
  self.storageClient, self.auth_notes = self.create_client()
161
169
 
162
170
  @classmethod
171
+ @memoize
163
172
  def create_client(cls) -> tuple[storage.Client, str]:
164
173
  """
165
- Produce a client for Google Sotrage with the highest level of access we can get.
174
+ Produce a client for Google Storage with the highest level of access we can get.
166
175
 
167
176
  Fall back to anonymous access if no project is available, unlike the
168
177
  Google Storage module's behavior.
@@ -258,6 +267,8 @@ class GoogleJobStore(AbstractJobStore):
258
267
  raise NoSuchJobStoreException(self.locator, "google")
259
268
  super().resume()
260
269
 
270
+ # TODO: Don't we need to set up encryption here???
271
+
261
272
  @google_retry
262
273
  def destroy(self):
263
274
  try:
@@ -391,8 +402,13 @@ class GoogleJobStore(AbstractJobStore):
391
402
  ) as writable:
392
403
  yield writable, fileID
393
404
 
394
- def get_empty_file_store_id(self, jobStoreID=None, cleanup=False, basename=None):
395
- fileID = self._new_id(isFile=True, jobStoreID=jobStoreID if cleanup else None)
405
+ def get_empty_file_store_id(
406
+ self,
407
+ job_id=None,
408
+ cleanup=False,
409
+ basename=None,
410
+ ):
411
+ fileID = self._new_id(isFile=True, jobStoreID=job_id if cleanup else None)
396
412
  self._write_file(fileID, BytesIO(b""))
397
413
  return fileID
398
414
 
@@ -614,7 +630,10 @@ class GoogleJobStore(AbstractJobStore):
614
630
  return filesRead
615
631
 
616
632
  @staticmethod
617
- def _new_id(isFile=False, jobStoreID=None):
633
+ def _new_id(
634
+ isFile=False,
635
+ jobStoreID=None,
636
+ ) -> str:
618
637
  if isFile and jobStoreID: # file associated with job
619
638
  return jobStoreID + str(uuid.uuid4())
620
639
  elif isFile: # nonassociated file
@@ -668,7 +687,7 @@ class GoogleJobStore(AbstractJobStore):
668
687
  ):
669
688
  """
670
689
  Yields a context manager that can be used to write to the bucket
671
- with a stream. See :class:`~toil.jobStores.utils.WritablePipe` for an example.
690
+ with a stream. See :class:`~toil.lib.pipes.WritablePipe` for an example.
672
691
 
673
692
  Will throw assertion error if the file shouldn't be updated
674
693
  and yet exists.
@@ -689,7 +708,7 @@ class GoogleJobStore(AbstractJobStore):
689
708
  are the same as for open(). Defaults to 'strict' when an encoding is specified.
690
709
 
691
710
  :return: an instance of WritablePipe.
692
- :rtype: :class:`~toil.jobStores.utils.writablePipe`
711
+ :rtype: :class:`~toil.lib.pipes.WritablePipe`
693
712
  """
694
713
  blob = self.bucket.blob(
695
714
  compat_bytes(fileName), encryption_key=self.sseKey if encrypt else None
@@ -712,7 +731,7 @@ class GoogleJobStore(AbstractJobStore):
712
731
  def _download_stream(self, fileName, encrypt=True, encoding=None, errors=None):
713
732
  """
714
733
  Yields a context manager that can be used to read from the bucket
715
- with a stream. See :class:`~toil.jobStores.utils.WritablePipe` for an example.
734
+ with a stream. See :class:`~toil.lib.pipes.WritablePipe` for an example.
716
735
 
717
736
  :param fileName: name of file in bucket to be read
718
737
  :type fileName: str
@@ -727,7 +746,7 @@ class GoogleJobStore(AbstractJobStore):
727
746
  are the same as for open(). Defaults to 'strict' when an encoding is specified.
728
747
 
729
748
  :return: an instance of ReadablePipe.
730
- :rtype: :class:`~toil.jobStores.utils.ReadablePipe`
749
+ :rtype: :class:`~toil.lib.pipes.ReadablePipe`
731
750
  """
732
751
 
733
752
  blob = self.bucket.get_blob(