toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +124 -86
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +39 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +651 -155
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +784 -397
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1137 -534
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +1031 -349
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +772 -412
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +204 -58
- toil/lib/aws/utils.py +290 -213
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/dockstore.py +379 -0
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -105
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/history.py +1271 -0
- toil/lib/history_submission.py +681 -0
- toil/lib/humanize.py +6 -2
- toil/lib/io.py +121 -12
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +83 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +125 -87
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/lib/trs.py +390 -0
- toil/lib/web.py +38 -0
- toil/options/common.py +850 -402
- toil/options/cwl.py +185 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +283 -180
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +84 -55
- toil/server/utils.py +56 -31
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +183 -65
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +265 -49
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/conftest.py +39 -0
- toil/test/cwl/cwlTest.py +375 -72
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/optional-file.cwl +18 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_history.py +212 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/lib/test_trs.py +161 -0
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +6 -6
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3528 -1053
- toil/worker.py +370 -149
- toil-8.1.0b1.dist-info/METADATA +178 -0
- toil-8.1.0b1.dist-info/RECORD +259 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
toil/jobStores/googleJobStore.py
CHANGED
|
@@ -20,20 +20,24 @@ import uuid
|
|
|
20
20
|
from contextlib import contextmanager
|
|
21
21
|
from functools import wraps
|
|
22
22
|
from io import BytesIO
|
|
23
|
-
from typing import IO,
|
|
23
|
+
from typing import IO, Optional
|
|
24
24
|
from urllib.parse import ParseResult
|
|
25
25
|
|
|
26
|
-
from google.api_core.exceptions import (
|
|
27
|
-
|
|
28
|
-
|
|
26
|
+
from google.api_core.exceptions import (
|
|
27
|
+
GoogleAPICallError,
|
|
28
|
+
InternalServerError,
|
|
29
|
+
ServiceUnavailable,
|
|
30
|
+
)
|
|
29
31
|
from google.auth.exceptions import DefaultCredentialsError
|
|
30
32
|
from google.cloud import exceptions, storage
|
|
31
33
|
|
|
32
|
-
from toil.jobStores.abstractJobStore import (
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
from toil.jobStores.abstractJobStore import (
|
|
35
|
+
AbstractJobStore,
|
|
36
|
+
JobStoreExistsException,
|
|
37
|
+
NoSuchFileException,
|
|
38
|
+
NoSuchJobException,
|
|
39
|
+
NoSuchJobStoreException,
|
|
40
|
+
)
|
|
37
41
|
from toil.jobStores.utils import ReadablePipe, WritablePipe
|
|
38
42
|
from toil.lib.compatibility import compat_bytes
|
|
39
43
|
from toil.lib.io import AtomicFileCreate
|
|
@@ -42,7 +46,7 @@ from toil.lib.retry import old_retry
|
|
|
42
46
|
|
|
43
47
|
log = logging.getLogger(__name__)
|
|
44
48
|
|
|
45
|
-
GOOGLE_STORAGE =
|
|
49
|
+
GOOGLE_STORAGE = "gs"
|
|
46
50
|
|
|
47
51
|
MAX_BATCH_SIZE = 1000
|
|
48
52
|
|
|
@@ -75,19 +79,22 @@ def google_retry(f):
|
|
|
75
79
|
|
|
76
80
|
It should wrap any function that makes use of the Google Client API
|
|
77
81
|
"""
|
|
82
|
+
|
|
78
83
|
@wraps(f)
|
|
79
84
|
def wrapper(*args, **kwargs):
|
|
80
|
-
for attempt in old_retry(
|
|
81
|
-
|
|
82
|
-
|
|
85
|
+
for attempt in old_retry(
|
|
86
|
+
delays=truncExpBackoff(), timeout=300, predicate=google_retry_predicate
|
|
87
|
+
):
|
|
83
88
|
with attempt:
|
|
84
89
|
return f(*args, **kwargs)
|
|
90
|
+
|
|
85
91
|
return wrapper
|
|
86
92
|
|
|
87
93
|
|
|
88
94
|
class GoogleJobStore(AbstractJobStore):
|
|
89
95
|
|
|
90
|
-
nodeServiceAccountJson =
|
|
96
|
+
nodeServiceAccountJson = "/root/service_account.json"
|
|
97
|
+
|
|
91
98
|
def __init__(self, locator: str) -> None:
|
|
92
99
|
super().__init__(locator)
|
|
93
100
|
|
|
@@ -99,20 +106,19 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
99
106
|
projectID = None
|
|
100
107
|
|
|
101
108
|
self.projectID = projectID
|
|
102
|
-
self.bucketName = namePrefix+"--toil"
|
|
109
|
+
self.bucketName = namePrefix + "--toil"
|
|
103
110
|
log.debug("Instantiating google jobStore with name: %s", self.bucketName)
|
|
104
111
|
|
|
105
112
|
# this is a :class:`~google.cloud.storage.bucket.Bucket`
|
|
106
113
|
self.bucket = None
|
|
107
114
|
|
|
108
|
-
self.statsBaseID =
|
|
109
|
-
self.statsReadPrefix =
|
|
110
|
-
self.readStatsBaseID = self.statsReadPrefix+self.statsBaseID
|
|
115
|
+
self.statsBaseID = "f16eef0c-b597-4b8b-9b0c-4d605b4f506c"
|
|
116
|
+
self.statsReadPrefix = "_"
|
|
117
|
+
self.readStatsBaseID = self.statsReadPrefix + self.statsBaseID
|
|
111
118
|
|
|
112
119
|
self.sseKey = None
|
|
113
120
|
self.storageClient = self.create_client()
|
|
114
121
|
|
|
115
|
-
|
|
116
122
|
@classmethod
|
|
117
123
|
def create_client(cls) -> storage.Client:
|
|
118
124
|
"""
|
|
@@ -127,28 +133,36 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
127
133
|
# Determine if we have an override environment variable for our credentials.
|
|
128
134
|
# We get the path to check existence, but Google Storage works out what
|
|
129
135
|
# to use later by looking at the environment again.
|
|
130
|
-
credentials_path: Optional[str] = os.getenv(
|
|
136
|
+
credentials_path: Optional[str] = os.getenv(
|
|
137
|
+
"GOOGLE_APPLICATION_CREDENTIALS", None
|
|
138
|
+
)
|
|
131
139
|
if credentials_path is not None and not os.path.exists(credentials_path):
|
|
132
140
|
# If the file is missing, complain.
|
|
133
141
|
# This variable holds a file name and not any sensitive data itself.
|
|
134
|
-
log.warning(
|
|
135
|
-
|
|
136
|
-
|
|
142
|
+
log.warning(
|
|
143
|
+
"File '%s' from GOOGLE_APPLICATION_CREDENTIALS is unavailable! "
|
|
144
|
+
"We may not be able to authenticate!",
|
|
145
|
+
credentials_path,
|
|
146
|
+
)
|
|
137
147
|
|
|
138
148
|
if credentials_path is None and os.path.exists(cls.nodeServiceAccountJson):
|
|
139
149
|
try:
|
|
140
150
|
# load credentials from a particular file on GCE nodes if an override path is not set
|
|
141
|
-
return storage.Client.from_service_account_json(
|
|
151
|
+
return storage.Client.from_service_account_json(
|
|
152
|
+
cls.nodeServiceAccountJson
|
|
153
|
+
)
|
|
142
154
|
except OSError:
|
|
143
155
|
# Probably we don't have permission to use the file.
|
|
144
|
-
log.warning(
|
|
145
|
-
|
|
156
|
+
log.warning(
|
|
157
|
+
"File '%s' exists but didn't work to authenticate!",
|
|
158
|
+
cls.nodeServiceAccountJson,
|
|
159
|
+
)
|
|
146
160
|
|
|
147
161
|
# Either a filename is specified, or our fallback file isn't there.
|
|
148
162
|
try:
|
|
149
163
|
# See if Google can work out how to authenticate.
|
|
150
164
|
return storage.Client()
|
|
151
|
-
except (DefaultCredentialsError,
|
|
165
|
+
except (DefaultCredentialsError, OSError):
|
|
152
166
|
# Depending on which Google codepath or module version (???)
|
|
153
167
|
# realizes we have no credentials, we can get an EnvironemntError,
|
|
154
168
|
# or the new DefaultCredentialsError we are supposedly specced to
|
|
@@ -158,7 +172,6 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
158
172
|
# This is likely to happen all the time so don't warn.
|
|
159
173
|
return storage.Client.create_anonymous_client()
|
|
160
174
|
|
|
161
|
-
|
|
162
175
|
@google_retry
|
|
163
176
|
def initialize(self, config=None):
|
|
164
177
|
try:
|
|
@@ -169,7 +182,7 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
169
182
|
|
|
170
183
|
# set up sever side encryption after we set up config in super
|
|
171
184
|
if self.config.sseKey is not None:
|
|
172
|
-
with open(self.config.sseKey,
|
|
185
|
+
with open(self.config.sseKey, "rb") as f:
|
|
173
186
|
self.sseKey = compat_bytes(f.read())
|
|
174
187
|
assert len(self.sseKey) == 32
|
|
175
188
|
|
|
@@ -199,13 +212,13 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
199
212
|
count = 0
|
|
200
213
|
while count < len(blobs_to_delete):
|
|
201
214
|
with self.storageClient.batch():
|
|
202
|
-
for blob in blobs_to_delete[count:count + MAX_BATCH_SIZE]:
|
|
215
|
+
for blob in blobs_to_delete[count : count + MAX_BATCH_SIZE]:
|
|
203
216
|
blob.delete()
|
|
204
217
|
count = count + MAX_BATCH_SIZE
|
|
205
218
|
self.bucket.delete()
|
|
206
219
|
|
|
207
220
|
def _new_job_id(self):
|
|
208
|
-
return f
|
|
221
|
+
return f"job-{uuid.uuid4()}"
|
|
209
222
|
|
|
210
223
|
def assign_job_id(self, job_description):
|
|
211
224
|
jobStoreID = self._new_job_id()
|
|
@@ -219,12 +232,17 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
219
232
|
|
|
220
233
|
def create_job(self, job_description):
|
|
221
234
|
job_description.pre_update_hook()
|
|
222
|
-
self._write_bytes(
|
|
235
|
+
self._write_bytes(
|
|
236
|
+
job_description.jobStoreID,
|
|
237
|
+
pickle.dumps(job_description, protocol=pickle.HIGHEST_PROTOCOL),
|
|
238
|
+
)
|
|
223
239
|
return job_description
|
|
224
240
|
|
|
225
241
|
@google_retry
|
|
226
242
|
def job_exists(self, job_id):
|
|
227
|
-
return self.bucket.blob(
|
|
243
|
+
return self.bucket.blob(
|
|
244
|
+
compat_bytes(job_id), encryption_key=self.sseKey
|
|
245
|
+
).exists()
|
|
228
246
|
|
|
229
247
|
@google_retry
|
|
230
248
|
def get_public_url(self, fileName):
|
|
@@ -251,7 +269,11 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
251
269
|
|
|
252
270
|
def update_job(self, job):
|
|
253
271
|
job.pre_update_hook()
|
|
254
|
-
self._write_bytes(
|
|
272
|
+
self._write_bytes(
|
|
273
|
+
job.jobStoreID,
|
|
274
|
+
pickle.dumps(job, protocol=pickle.HIGHEST_PROTOCOL),
|
|
275
|
+
update=True,
|
|
276
|
+
)
|
|
255
277
|
|
|
256
278
|
@google_retry
|
|
257
279
|
def delete_job(self, job_id):
|
|
@@ -269,32 +291,40 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
269
291
|
|
|
270
292
|
env = {}
|
|
271
293
|
|
|
272
|
-
credentials_path: Optional[str] = os.getenv(
|
|
294
|
+
credentials_path: Optional[str] = os.getenv(
|
|
295
|
+
"GOOGLE_APPLICATION_CREDENTIALS", None
|
|
296
|
+
)
|
|
273
297
|
if credentials_path is not None:
|
|
274
298
|
# Send along the environment variable that points to the credentials file.
|
|
275
299
|
# It must be available in the same place on all nodes.
|
|
276
|
-
env[
|
|
300
|
+
env["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path
|
|
277
301
|
|
|
278
302
|
return env
|
|
279
303
|
|
|
280
304
|
@google_retry
|
|
281
305
|
def jobs(self):
|
|
282
|
-
for blob in self.bucket.list_blobs(prefix=b
|
|
306
|
+
for blob in self.bucket.list_blobs(prefix=b"job-"):
|
|
283
307
|
jobStoreID = blob.name
|
|
284
308
|
# TODO: do this better
|
|
285
|
-
if len(jobStoreID) == 40 and jobStoreID.startswith(
|
|
309
|
+
if len(jobStoreID) == 40 and jobStoreID.startswith(
|
|
310
|
+
"job-"
|
|
311
|
+
): # 'job-' + uuid length
|
|
286
312
|
yield self.load_job(jobStoreID)
|
|
287
313
|
|
|
288
314
|
def write_file(self, local_path, job_id=None, cleanup=False):
|
|
289
315
|
fileID = self._new_id(isFile=True, jobStoreID=job_id if cleanup else None)
|
|
290
|
-
with open(local_path,
|
|
316
|
+
with open(local_path, "rb") as f:
|
|
291
317
|
self._write_file(fileID, f)
|
|
292
318
|
return fileID
|
|
293
319
|
|
|
294
320
|
@contextmanager
|
|
295
|
-
def write_file_stream(
|
|
321
|
+
def write_file_stream(
|
|
322
|
+
self, job_id=None, cleanup=False, basename=None, encoding=None, errors=None
|
|
323
|
+
):
|
|
296
324
|
fileID = self._new_id(isFile=True, jobStoreID=job_id if cleanup else None)
|
|
297
|
-
with self._upload_stream(
|
|
325
|
+
with self._upload_stream(
|
|
326
|
+
fileID, update=False, encoding=encoding, errors=errors
|
|
327
|
+
) as writable:
|
|
298
328
|
yield writable, fileID
|
|
299
329
|
|
|
300
330
|
def get_empty_file_store_id(self, jobStoreID=None, cleanup=False, basename=None):
|
|
@@ -309,16 +339,19 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
309
339
|
if not self.file_exists(file_id):
|
|
310
340
|
raise NoSuchFileException(file_id)
|
|
311
341
|
with AtomicFileCreate(local_path) as tmpPath:
|
|
312
|
-
with open(tmpPath,
|
|
313
|
-
blob = self.bucket.get_blob(
|
|
342
|
+
with open(tmpPath, "wb") as writeable:
|
|
343
|
+
blob = self.bucket.get_blob(
|
|
344
|
+
compat_bytes(file_id), encryption_key=self.sseKey
|
|
345
|
+
)
|
|
314
346
|
blob.download_to_file(writeable)
|
|
315
|
-
if getattr(file_id,
|
|
347
|
+
if getattr(file_id, "executable", False):
|
|
316
348
|
os.chmod(local_path, os.stat(local_path).st_mode | stat.S_IXUSR)
|
|
317
349
|
|
|
318
350
|
@contextmanager
|
|
319
351
|
def read_file_stream(self, file_id, encoding=None, errors=None):
|
|
320
|
-
with self.read_shared_file_stream(
|
|
321
|
-
|
|
352
|
+
with self.read_shared_file_stream(
|
|
353
|
+
file_id, isProtected=True, encoding=encoding, errors=errors
|
|
354
|
+
) as readable:
|
|
322
355
|
yield readable
|
|
323
356
|
|
|
324
357
|
def delete_file(self, file_id):
|
|
@@ -326,32 +359,49 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
326
359
|
|
|
327
360
|
@google_retry
|
|
328
361
|
def file_exists(self, file_id):
|
|
329
|
-
return self.bucket.blob(
|
|
362
|
+
return self.bucket.blob(
|
|
363
|
+
compat_bytes(file_id), encryption_key=self.sseKey
|
|
364
|
+
).exists()
|
|
330
365
|
|
|
331
366
|
@google_retry
|
|
332
367
|
def get_file_size(self, file_id):
|
|
333
368
|
if not self.file_exists(file_id):
|
|
334
369
|
return 0
|
|
335
|
-
return self.bucket.get_blob(
|
|
370
|
+
return self.bucket.get_blob(
|
|
371
|
+
compat_bytes(file_id), encryption_key=self.sseKey
|
|
372
|
+
).size
|
|
336
373
|
|
|
337
374
|
def update_file(self, file_id, local_path):
|
|
338
|
-
with open(local_path,
|
|
375
|
+
with open(local_path, "rb") as f:
|
|
339
376
|
self._write_file(file_id, f, update=True)
|
|
340
377
|
|
|
341
378
|
@contextmanager
|
|
342
379
|
def update_file_stream(self, file_id, encoding=None, errors=None):
|
|
343
|
-
with self._upload_stream(
|
|
380
|
+
with self._upload_stream(
|
|
381
|
+
file_id, update=True, encoding=encoding, errors=errors
|
|
382
|
+
) as writable:
|
|
344
383
|
yield writable
|
|
345
384
|
|
|
346
385
|
@contextmanager
|
|
347
|
-
def write_shared_file_stream(
|
|
348
|
-
|
|
349
|
-
|
|
386
|
+
def write_shared_file_stream(
|
|
387
|
+
self, shared_file_name, encrypted=True, encoding=None, errors=None
|
|
388
|
+
):
|
|
389
|
+
with self._upload_stream(
|
|
390
|
+
shared_file_name,
|
|
391
|
+
encrypt=encrypted,
|
|
392
|
+
update=True,
|
|
393
|
+
encoding=encoding,
|
|
394
|
+
errors=errors,
|
|
395
|
+
) as writable:
|
|
350
396
|
yield writable
|
|
351
397
|
|
|
352
398
|
@contextmanager
|
|
353
|
-
def read_shared_file_stream(
|
|
354
|
-
|
|
399
|
+
def read_shared_file_stream(
|
|
400
|
+
self, shared_file_name, isProtected=True, encoding=None, errors=None
|
|
401
|
+
):
|
|
402
|
+
with self._download_stream(
|
|
403
|
+
shared_file_name, encrypt=isProtected, encoding=encoding, errors=errors
|
|
404
|
+
) as readable:
|
|
355
405
|
yield readable
|
|
356
406
|
|
|
357
407
|
@classmethod
|
|
@@ -374,7 +424,7 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
374
424
|
fileName = url.path
|
|
375
425
|
|
|
376
426
|
# remove leading '/', which can cause problems if fileName is a path
|
|
377
|
-
if fileName.startswith(
|
|
427
|
+
if fileName.startswith("/"):
|
|
378
428
|
fileName = fileName[1:]
|
|
379
429
|
|
|
380
430
|
storageClient = cls.create_client()
|
|
@@ -383,7 +433,7 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
383
433
|
|
|
384
434
|
if exists:
|
|
385
435
|
if not blob.exists():
|
|
386
|
-
raise NoSuchFileException
|
|
436
|
+
raise NoSuchFileException(fileName)
|
|
387
437
|
# sync with cloud so info like size is available
|
|
388
438
|
blob.reload()
|
|
389
439
|
return blob
|
|
@@ -413,7 +463,7 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
413
463
|
|
|
414
464
|
@classmethod
|
|
415
465
|
def _supports_url(cls, url, export=False):
|
|
416
|
-
return url.scheme.lower() ==
|
|
466
|
+
return url.scheme.lower() == "gs"
|
|
417
467
|
|
|
418
468
|
@classmethod
|
|
419
469
|
def _write_to_url(cls, readable: bytes, url: str, executable: bool = False) -> None:
|
|
@@ -421,12 +471,16 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
421
471
|
blob.upload_from_file(readable)
|
|
422
472
|
|
|
423
473
|
@classmethod
|
|
424
|
-
def _list_url(cls, url: ParseResult) ->
|
|
425
|
-
raise NotImplementedError(
|
|
474
|
+
def _list_url(cls, url: ParseResult) -> list[str]:
|
|
475
|
+
raise NotImplementedError(
|
|
476
|
+
"Listing files in Google buckets is not yet implemented!"
|
|
477
|
+
)
|
|
426
478
|
|
|
427
479
|
@classmethod
|
|
428
480
|
def _get_is_directory(cls, url: ParseResult) -> bool:
|
|
429
|
-
raise NotImplementedError(
|
|
481
|
+
raise NotImplementedError(
|
|
482
|
+
"Checking directory status in Google buckets is not yet implemented!"
|
|
483
|
+
)
|
|
430
484
|
|
|
431
485
|
@google_retry
|
|
432
486
|
def write_logs(self, msg: bytes) -> None:
|
|
@@ -456,7 +510,9 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
456
510
|
if not read_all:
|
|
457
511
|
# rename this file by copying it and deleting the old version to avoid
|
|
458
512
|
# rereading it
|
|
459
|
-
newID =
|
|
513
|
+
newID = (
|
|
514
|
+
self.readStatsBaseID + blob.name[len(self.statsBaseID) :]
|
|
515
|
+
)
|
|
460
516
|
# NOTE: just copies then deletes old.
|
|
461
517
|
self.bucket.rename_blob(blob, compat_bytes(newID))
|
|
462
518
|
except NoSuchFileException:
|
|
@@ -472,7 +528,7 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
472
528
|
if lastTry:
|
|
473
529
|
# this was our second try, we are reasonably sure there aren't any stats
|
|
474
530
|
# left to gather
|
|
475
|
-
|
|
531
|
+
break
|
|
476
532
|
# Try one more time in a couple seconds
|
|
477
533
|
time.sleep(5)
|
|
478
534
|
lastTry = True
|
|
@@ -486,11 +542,11 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
486
542
|
@staticmethod
|
|
487
543
|
def _new_id(isFile=False, jobStoreID=None):
|
|
488
544
|
if isFile and jobStoreID: # file associated with job
|
|
489
|
-
return jobStoreID+str(uuid.uuid4())
|
|
545
|
+
return jobStoreID + str(uuid.uuid4())
|
|
490
546
|
elif isFile: # nonassociated file
|
|
491
547
|
return str(uuid.uuid4())
|
|
492
548
|
else: # job id
|
|
493
|
-
return f
|
|
549
|
+
return f"job-{uuid.uuid4()}"
|
|
494
550
|
|
|
495
551
|
@google_retry
|
|
496
552
|
def _delete(self, jobStoreFileID):
|
|
@@ -514,8 +570,12 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
514
570
|
return job.download_as_string()
|
|
515
571
|
|
|
516
572
|
@google_retry
|
|
517
|
-
def _write_file(
|
|
518
|
-
|
|
573
|
+
def _write_file(
|
|
574
|
+
self, jobStoreID: str, fileObj: bytes, update=False, encrypt=True
|
|
575
|
+
) -> None:
|
|
576
|
+
blob = self.bucket.blob(
|
|
577
|
+
compat_bytes(jobStoreID), encryption_key=self.sseKey if encrypt else None
|
|
578
|
+
)
|
|
519
579
|
if not update:
|
|
520
580
|
# TODO: should probably raise a special exception and be added to all jobStores
|
|
521
581
|
assert not blob.exists()
|
|
@@ -529,7 +589,9 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
529
589
|
|
|
530
590
|
@contextmanager
|
|
531
591
|
@google_retry
|
|
532
|
-
def _upload_stream(
|
|
592
|
+
def _upload_stream(
|
|
593
|
+
self, fileName, update=False, encrypt=True, encoding=None, errors=None
|
|
594
|
+
):
|
|
533
595
|
"""
|
|
534
596
|
Yields a context manager that can be used to write to the bucket
|
|
535
597
|
with a stream. See :class:`~toil.jobStores.utils.WritablePipe` for an example.
|
|
@@ -555,7 +617,10 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
555
617
|
:return: an instance of WritablePipe.
|
|
556
618
|
:rtype: :class:`~toil.jobStores.utils.writablePipe`
|
|
557
619
|
"""
|
|
558
|
-
blob = self.bucket.blob(
|
|
620
|
+
blob = self.bucket.blob(
|
|
621
|
+
compat_bytes(fileName), encryption_key=self.sseKey if encrypt else None
|
|
622
|
+
)
|
|
623
|
+
|
|
559
624
|
class UploadPipe(WritablePipe):
|
|
560
625
|
def readFrom(self, readable):
|
|
561
626
|
if not update:
|
|
@@ -591,7 +656,9 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
591
656
|
:rtype: :class:`~toil.jobStores.utils.ReadablePipe`
|
|
592
657
|
"""
|
|
593
658
|
|
|
594
|
-
blob = self.bucket.get_blob(
|
|
659
|
+
blob = self.bucket.get_blob(
|
|
660
|
+
compat_bytes(fileName), encryption_key=self.sseKey if encrypt else None
|
|
661
|
+
)
|
|
595
662
|
if blob is None:
|
|
596
663
|
raise NoSuchFileException(fileName)
|
|
597
664
|
|
toil/jobStores/utils.py
CHANGED
|
@@ -10,6 +10,7 @@ from toil.lib.threading import ExceptionalThread
|
|
|
10
10
|
|
|
11
11
|
log = logging.getLogger(__name__)
|
|
12
12
|
|
|
13
|
+
|
|
13
14
|
class WritablePipe(ABC):
|
|
14
15
|
"""
|
|
15
16
|
An object-oriented wrapper for os.pipe. Clients should subclass it, implement
|
|
@@ -84,7 +85,7 @@ class WritablePipe(ABC):
|
|
|
84
85
|
raise NotImplementedError()
|
|
85
86
|
|
|
86
87
|
def _reader(self):
|
|
87
|
-
with os.fdopen(self.readable_fh,
|
|
88
|
+
with os.fdopen(self.readable_fh, "rb") as readable:
|
|
88
89
|
# TODO: If the reader somehow crashes here, both threads might try
|
|
89
90
|
# to close readable_fh. Fortunately we don't do anything that
|
|
90
91
|
# should be able to fail here.
|
|
@@ -112,7 +113,12 @@ class WritablePipe(ABC):
|
|
|
112
113
|
|
|
113
114
|
def __enter__(self):
|
|
114
115
|
self.readable_fh, writable_fh = os.pipe()
|
|
115
|
-
self.writable = os.fdopen(
|
|
116
|
+
self.writable = os.fdopen(
|
|
117
|
+
writable_fh,
|
|
118
|
+
"wb" if self.encoding == None else "wt",
|
|
119
|
+
encoding=self.encoding,
|
|
120
|
+
errors=self.errors,
|
|
121
|
+
)
|
|
116
122
|
self.thread = ExceptionalThread(target=self._reader)
|
|
117
123
|
self.thread.start()
|
|
118
124
|
return self.writable
|
|
@@ -132,7 +138,9 @@ class WritablePipe(ABC):
|
|
|
132
138
|
# already an exception in the main thread
|
|
133
139
|
raise
|
|
134
140
|
else:
|
|
135
|
-
log.error(
|
|
141
|
+
log.error(
|
|
142
|
+
"Swallowing additional exception in reader thread: %s", str(e)
|
|
143
|
+
)
|
|
136
144
|
finally:
|
|
137
145
|
# The responsibility for closing the readable end is generally that of the reader
|
|
138
146
|
# thread. To cover the small window before the reader takes over we also close it here.
|
|
@@ -217,7 +225,7 @@ class ReadablePipe(ABC):
|
|
|
217
225
|
|
|
218
226
|
def _writer(self):
|
|
219
227
|
try:
|
|
220
|
-
with os.fdopen(self.writable_fh,
|
|
228
|
+
with os.fdopen(self.writable_fh, "wb") as writable:
|
|
221
229
|
self.writeTo(writable)
|
|
222
230
|
except OSError as e:
|
|
223
231
|
# The other side of the pipe may have been closed by the
|
|
@@ -244,7 +252,12 @@ class ReadablePipe(ABC):
|
|
|
244
252
|
|
|
245
253
|
def __enter__(self):
|
|
246
254
|
readable_fh, self.writable_fh = os.pipe()
|
|
247
|
-
self.readable = os.fdopen(
|
|
255
|
+
self.readable = os.fdopen(
|
|
256
|
+
readable_fh,
|
|
257
|
+
"rb" if self.encoding == None else "rt",
|
|
258
|
+
encoding=self.encoding,
|
|
259
|
+
errors=self.errors,
|
|
260
|
+
)
|
|
248
261
|
self.thread = ExceptionalThread(target=self._writer)
|
|
249
262
|
self.thread.start()
|
|
250
263
|
return self.readable
|
|
@@ -264,6 +277,7 @@ class ReadablePipe(ABC):
|
|
|
264
277
|
# already an exception in the main thread
|
|
265
278
|
raise
|
|
266
279
|
|
|
280
|
+
|
|
267
281
|
class ReadableTransformingPipe(ReadablePipe):
|
|
268
282
|
"""
|
|
269
283
|
A pipe which is constructed around a readable stream, and which provides a
|
|
@@ -296,7 +310,6 @@ class ReadableTransformingPipe(ReadablePipe):
|
|
|
296
310
|
|
|
297
311
|
"""
|
|
298
312
|
|
|
299
|
-
|
|
300
313
|
def __init__(self, source, encoding=None, errors=None):
|
|
301
314
|
"""
|
|
302
315
|
:param str encoding: the name of the encoding used to encode the file. Encodings are the same
|
|
@@ -323,15 +336,17 @@ class ReadableTransformingPipe(ReadablePipe):
|
|
|
323
336
|
def writeTo(self, writable):
|
|
324
337
|
self.transform(self.source, writable)
|
|
325
338
|
|
|
339
|
+
|
|
326
340
|
class JobStoreUnavailableException(RuntimeError):
|
|
327
341
|
"""
|
|
328
342
|
Raised when a particular type of job store is requested but can't be used.
|
|
329
343
|
"""
|
|
330
344
|
|
|
345
|
+
|
|
331
346
|
def generate_locator(
|
|
332
347
|
job_store_type: str,
|
|
333
348
|
local_suggestion: Optional[str] = None,
|
|
334
|
-
decoration: Optional[str] = None
|
|
349
|
+
decoration: Optional[str] = None,
|
|
335
350
|
) -> str:
|
|
336
351
|
"""
|
|
337
352
|
Generate a random locator for a job store of the given type. Raises an
|
|
@@ -347,7 +362,7 @@ def generate_locator(
|
|
|
347
362
|
"""
|
|
348
363
|
|
|
349
364
|
# Prepare decoration for splicing into strings
|
|
350
|
-
decoration = (
|
|
365
|
+
decoration = ("-" + decoration) if decoration else ""
|
|
351
366
|
|
|
352
367
|
try:
|
|
353
368
|
if job_store_type == "google":
|
|
@@ -363,6 +378,7 @@ def generate_locator(
|
|
|
363
378
|
elif job_store_type == "aws":
|
|
364
379
|
# Make sure we have AWS
|
|
365
380
|
from toil.jobStores.aws.jobStore import AWSJobStore # noqa
|
|
381
|
+
|
|
366
382
|
# Find a region
|
|
367
383
|
from toil.lib.aws import get_current_aws_region
|
|
368
384
|
|
|
@@ -370,7 +386,9 @@ def generate_locator(
|
|
|
370
386
|
|
|
371
387
|
if not region:
|
|
372
388
|
# We can't generate an AWS job store without a region
|
|
373
|
-
raise JobStoreUnavailableException(
|
|
389
|
+
raise JobStoreUnavailableException(
|
|
390
|
+
f"{job_store_type} job store can't be made without a region"
|
|
391
|
+
)
|
|
374
392
|
|
|
375
393
|
# Roll a random name
|
|
376
394
|
return f"aws:{region}:toil{decoration}-{str(uuid.uuid4())}"
|
|
@@ -380,11 +398,14 @@ def generate_locator(
|
|
|
380
398
|
return local_suggestion
|
|
381
399
|
else:
|
|
382
400
|
# Pick a temp path
|
|
383
|
-
return os.path.join(
|
|
401
|
+
return os.path.join(
|
|
402
|
+
tempfile.gettempdir(), "toil-" + str(uuid.uuid4()) + decoration
|
|
403
|
+
)
|
|
384
404
|
else:
|
|
385
|
-
raise JobStoreUnavailableException(
|
|
405
|
+
raise JobStoreUnavailableException(
|
|
406
|
+
f"{job_store_type} job store isn't known"
|
|
407
|
+
)
|
|
386
408
|
except ImportError:
|
|
387
|
-
raise JobStoreUnavailableException(
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
409
|
+
raise JobStoreUnavailableException(
|
|
410
|
+
f"libraries for {job_store_type} job store are not installed"
|
|
411
|
+
)
|