toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/jobStores/googleJobStore.py
CHANGED
|
@@ -20,20 +20,24 @@ import uuid
|
|
|
20
20
|
from contextlib import contextmanager
|
|
21
21
|
from functools import wraps
|
|
22
22
|
from io import BytesIO
|
|
23
|
-
from typing import IO,
|
|
23
|
+
from typing import IO, Optional
|
|
24
24
|
from urllib.parse import ParseResult
|
|
25
25
|
|
|
26
|
-
from google.api_core.exceptions import (
|
|
27
|
-
|
|
28
|
-
|
|
26
|
+
from google.api_core.exceptions import (
|
|
27
|
+
GoogleAPICallError,
|
|
28
|
+
InternalServerError,
|
|
29
|
+
ServiceUnavailable,
|
|
30
|
+
)
|
|
29
31
|
from google.auth.exceptions import DefaultCredentialsError
|
|
30
32
|
from google.cloud import exceptions, storage
|
|
31
33
|
|
|
32
|
-
from toil.jobStores.abstractJobStore import (
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
from toil.jobStores.abstractJobStore import (
|
|
35
|
+
AbstractJobStore,
|
|
36
|
+
JobStoreExistsException,
|
|
37
|
+
NoSuchFileException,
|
|
38
|
+
NoSuchJobException,
|
|
39
|
+
NoSuchJobStoreException,
|
|
40
|
+
)
|
|
37
41
|
from toil.jobStores.utils import ReadablePipe, WritablePipe
|
|
38
42
|
from toil.lib.compatibility import compat_bytes
|
|
39
43
|
from toil.lib.io import AtomicFileCreate
|
|
@@ -42,7 +46,7 @@ from toil.lib.retry import old_retry
|
|
|
42
46
|
|
|
43
47
|
log = logging.getLogger(__name__)
|
|
44
48
|
|
|
45
|
-
GOOGLE_STORAGE =
|
|
49
|
+
GOOGLE_STORAGE = "gs"
|
|
46
50
|
|
|
47
51
|
MAX_BATCH_SIZE = 1000
|
|
48
52
|
|
|
@@ -75,19 +79,22 @@ def google_retry(f):
|
|
|
75
79
|
|
|
76
80
|
It should wrap any function that makes use of the Google Client API
|
|
77
81
|
"""
|
|
82
|
+
|
|
78
83
|
@wraps(f)
|
|
79
84
|
def wrapper(*args, **kwargs):
|
|
80
|
-
for attempt in old_retry(
|
|
81
|
-
|
|
82
|
-
|
|
85
|
+
for attempt in old_retry(
|
|
86
|
+
delays=truncExpBackoff(), timeout=300, predicate=google_retry_predicate
|
|
87
|
+
):
|
|
83
88
|
with attempt:
|
|
84
89
|
return f(*args, **kwargs)
|
|
90
|
+
|
|
85
91
|
return wrapper
|
|
86
92
|
|
|
87
93
|
|
|
88
94
|
class GoogleJobStore(AbstractJobStore):
|
|
89
95
|
|
|
90
|
-
nodeServiceAccountJson =
|
|
96
|
+
nodeServiceAccountJson = "/root/service_account.json"
|
|
97
|
+
|
|
91
98
|
def __init__(self, locator: str) -> None:
|
|
92
99
|
super().__init__(locator)
|
|
93
100
|
|
|
@@ -99,20 +106,19 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
99
106
|
projectID = None
|
|
100
107
|
|
|
101
108
|
self.projectID = projectID
|
|
102
|
-
self.bucketName = namePrefix+"--toil"
|
|
109
|
+
self.bucketName = namePrefix + "--toil"
|
|
103
110
|
log.debug("Instantiating google jobStore with name: %s", self.bucketName)
|
|
104
111
|
|
|
105
112
|
# this is a :class:`~google.cloud.storage.bucket.Bucket`
|
|
106
113
|
self.bucket = None
|
|
107
114
|
|
|
108
|
-
self.statsBaseID =
|
|
109
|
-
self.statsReadPrefix =
|
|
110
|
-
self.readStatsBaseID = self.statsReadPrefix+self.statsBaseID
|
|
115
|
+
self.statsBaseID = "f16eef0c-b597-4b8b-9b0c-4d605b4f506c"
|
|
116
|
+
self.statsReadPrefix = "_"
|
|
117
|
+
self.readStatsBaseID = self.statsReadPrefix + self.statsBaseID
|
|
111
118
|
|
|
112
119
|
self.sseKey = None
|
|
113
120
|
self.storageClient = self.create_client()
|
|
114
121
|
|
|
115
|
-
|
|
116
122
|
@classmethod
|
|
117
123
|
def create_client(cls) -> storage.Client:
|
|
118
124
|
"""
|
|
@@ -127,28 +133,36 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
127
133
|
# Determine if we have an override environment variable for our credentials.
|
|
128
134
|
# We get the path to check existence, but Google Storage works out what
|
|
129
135
|
# to use later by looking at the environment again.
|
|
130
|
-
credentials_path: Optional[str] = os.getenv(
|
|
136
|
+
credentials_path: Optional[str] = os.getenv(
|
|
137
|
+
"GOOGLE_APPLICATION_CREDENTIALS", None
|
|
138
|
+
)
|
|
131
139
|
if credentials_path is not None and not os.path.exists(credentials_path):
|
|
132
140
|
# If the file is missing, complain.
|
|
133
141
|
# This variable holds a file name and not any sensitive data itself.
|
|
134
|
-
log.warning(
|
|
135
|
-
|
|
136
|
-
|
|
142
|
+
log.warning(
|
|
143
|
+
"File '%s' from GOOGLE_APPLICATION_CREDENTIALS is unavailable! "
|
|
144
|
+
"We may not be able to authenticate!",
|
|
145
|
+
credentials_path,
|
|
146
|
+
)
|
|
137
147
|
|
|
138
148
|
if credentials_path is None and os.path.exists(cls.nodeServiceAccountJson):
|
|
139
149
|
try:
|
|
140
150
|
# load credentials from a particular file on GCE nodes if an override path is not set
|
|
141
|
-
return storage.Client.from_service_account_json(
|
|
151
|
+
return storage.Client.from_service_account_json(
|
|
152
|
+
cls.nodeServiceAccountJson
|
|
153
|
+
)
|
|
142
154
|
except OSError:
|
|
143
155
|
# Probably we don't have permission to use the file.
|
|
144
|
-
log.warning(
|
|
145
|
-
|
|
156
|
+
log.warning(
|
|
157
|
+
"File '%s' exists but didn't work to authenticate!",
|
|
158
|
+
cls.nodeServiceAccountJson,
|
|
159
|
+
)
|
|
146
160
|
|
|
147
161
|
# Either a filename is specified, or our fallback file isn't there.
|
|
148
162
|
try:
|
|
149
163
|
# See if Google can work out how to authenticate.
|
|
150
164
|
return storage.Client()
|
|
151
|
-
except (DefaultCredentialsError,
|
|
165
|
+
except (DefaultCredentialsError, OSError):
|
|
152
166
|
# Depending on which Google codepath or module version (???)
|
|
153
167
|
# realizes we have no credentials, we can get an EnvironemntError,
|
|
154
168
|
# or the new DefaultCredentialsError we are supposedly specced to
|
|
@@ -158,18 +172,17 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
158
172
|
# This is likely to happen all the time so don't warn.
|
|
159
173
|
return storage.Client.create_anonymous_client()
|
|
160
174
|
|
|
161
|
-
|
|
162
175
|
@google_retry
|
|
163
176
|
def initialize(self, config=None):
|
|
164
177
|
try:
|
|
165
178
|
self.bucket = self.storageClient.create_bucket(self.bucketName)
|
|
166
179
|
except exceptions.Conflict:
|
|
167
|
-
raise JobStoreExistsException(self.locator)
|
|
180
|
+
raise JobStoreExistsException(self.locator, "google")
|
|
168
181
|
super().initialize(config)
|
|
169
182
|
|
|
170
183
|
# set up sever side encryption after we set up config in super
|
|
171
184
|
if self.config.sseKey is not None:
|
|
172
|
-
with open(self.config.sseKey,
|
|
185
|
+
with open(self.config.sseKey, "rb") as f:
|
|
173
186
|
self.sseKey = compat_bytes(f.read())
|
|
174
187
|
assert len(self.sseKey) == 32
|
|
175
188
|
|
|
@@ -178,7 +191,7 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
178
191
|
try:
|
|
179
192
|
self.bucket = self.storageClient.get_bucket(self.bucketName)
|
|
180
193
|
except exceptions.NotFound:
|
|
181
|
-
raise NoSuchJobStoreException(self.locator)
|
|
194
|
+
raise NoSuchJobStoreException(self.locator, "google")
|
|
182
195
|
super().resume()
|
|
183
196
|
|
|
184
197
|
@google_retry
|
|
@@ -199,18 +212,17 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
199
212
|
count = 0
|
|
200
213
|
while count < len(blobs_to_delete):
|
|
201
214
|
with self.storageClient.batch():
|
|
202
|
-
for blob in blobs_to_delete[count:count + MAX_BATCH_SIZE]:
|
|
215
|
+
for blob in blobs_to_delete[count : count + MAX_BATCH_SIZE]:
|
|
203
216
|
blob.delete()
|
|
204
217
|
count = count + MAX_BATCH_SIZE
|
|
205
218
|
self.bucket.delete()
|
|
206
219
|
|
|
207
220
|
def _new_job_id(self):
|
|
208
|
-
return f
|
|
221
|
+
return f"job-{uuid.uuid4()}"
|
|
209
222
|
|
|
210
223
|
def assign_job_id(self, job_description):
|
|
211
224
|
jobStoreID = self._new_job_id()
|
|
212
|
-
log.debug("Assigning ID to job %s
|
|
213
|
-
jobStoreID, '<no command>' if job_description.command is None else job_description.command)
|
|
225
|
+
log.debug("Assigning ID to job %s", jobStoreID)
|
|
214
226
|
job_description.jobStoreID = jobStoreID
|
|
215
227
|
|
|
216
228
|
@contextmanager
|
|
@@ -220,12 +232,17 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
220
232
|
|
|
221
233
|
def create_job(self, job_description):
|
|
222
234
|
job_description.pre_update_hook()
|
|
223
|
-
self._write_bytes(
|
|
235
|
+
self._write_bytes(
|
|
236
|
+
job_description.jobStoreID,
|
|
237
|
+
pickle.dumps(job_description, protocol=pickle.HIGHEST_PROTOCOL),
|
|
238
|
+
)
|
|
224
239
|
return job_description
|
|
225
240
|
|
|
226
241
|
@google_retry
|
|
227
242
|
def job_exists(self, job_id):
|
|
228
|
-
return self.bucket.blob(
|
|
243
|
+
return self.bucket.blob(
|
|
244
|
+
compat_bytes(job_id), encryption_key=self.sseKey
|
|
245
|
+
).exists()
|
|
229
246
|
|
|
230
247
|
@google_retry
|
|
231
248
|
def get_public_url(self, fileName):
|
|
@@ -252,7 +269,11 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
252
269
|
|
|
253
270
|
def update_job(self, job):
|
|
254
271
|
job.pre_update_hook()
|
|
255
|
-
self._write_bytes(
|
|
272
|
+
self._write_bytes(
|
|
273
|
+
job.jobStoreID,
|
|
274
|
+
pickle.dumps(job, protocol=pickle.HIGHEST_PROTOCOL),
|
|
275
|
+
update=True,
|
|
276
|
+
)
|
|
256
277
|
|
|
257
278
|
@google_retry
|
|
258
279
|
def delete_job(self, job_id):
|
|
@@ -270,32 +291,40 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
270
291
|
|
|
271
292
|
env = {}
|
|
272
293
|
|
|
273
|
-
credentials_path: Optional[str] = os.getenv(
|
|
294
|
+
credentials_path: Optional[str] = os.getenv(
|
|
295
|
+
"GOOGLE_APPLICATION_CREDENTIALS", None
|
|
296
|
+
)
|
|
274
297
|
if credentials_path is not None:
|
|
275
298
|
# Send along the environment variable that points to the credentials file.
|
|
276
299
|
# It must be available in the same place on all nodes.
|
|
277
|
-
env[
|
|
300
|
+
env["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path
|
|
278
301
|
|
|
279
302
|
return env
|
|
280
303
|
|
|
281
304
|
@google_retry
|
|
282
305
|
def jobs(self):
|
|
283
|
-
for blob in self.bucket.list_blobs(prefix=b
|
|
306
|
+
for blob in self.bucket.list_blobs(prefix=b"job-"):
|
|
284
307
|
jobStoreID = blob.name
|
|
285
308
|
# TODO: do this better
|
|
286
|
-
if len(jobStoreID) == 40 and jobStoreID.startswith(
|
|
309
|
+
if len(jobStoreID) == 40 and jobStoreID.startswith(
|
|
310
|
+
"job-"
|
|
311
|
+
): # 'job-' + uuid length
|
|
287
312
|
yield self.load_job(jobStoreID)
|
|
288
313
|
|
|
289
314
|
def write_file(self, local_path, job_id=None, cleanup=False):
|
|
290
315
|
fileID = self._new_id(isFile=True, jobStoreID=job_id if cleanup else None)
|
|
291
|
-
with open(local_path,
|
|
316
|
+
with open(local_path, "rb") as f:
|
|
292
317
|
self._write_file(fileID, f)
|
|
293
318
|
return fileID
|
|
294
319
|
|
|
295
320
|
@contextmanager
|
|
296
|
-
def write_file_stream(
|
|
321
|
+
def write_file_stream(
|
|
322
|
+
self, job_id=None, cleanup=False, basename=None, encoding=None, errors=None
|
|
323
|
+
):
|
|
297
324
|
fileID = self._new_id(isFile=True, jobStoreID=job_id if cleanup else None)
|
|
298
|
-
with self._upload_stream(
|
|
325
|
+
with self._upload_stream(
|
|
326
|
+
fileID, update=False, encoding=encoding, errors=errors
|
|
327
|
+
) as writable:
|
|
299
328
|
yield writable, fileID
|
|
300
329
|
|
|
301
330
|
def get_empty_file_store_id(self, jobStoreID=None, cleanup=False, basename=None):
|
|
@@ -310,16 +339,19 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
310
339
|
if not self.file_exists(file_id):
|
|
311
340
|
raise NoSuchFileException(file_id)
|
|
312
341
|
with AtomicFileCreate(local_path) as tmpPath:
|
|
313
|
-
with open(tmpPath,
|
|
314
|
-
blob = self.bucket.get_blob(
|
|
342
|
+
with open(tmpPath, "wb") as writeable:
|
|
343
|
+
blob = self.bucket.get_blob(
|
|
344
|
+
compat_bytes(file_id), encryption_key=self.sseKey
|
|
345
|
+
)
|
|
315
346
|
blob.download_to_file(writeable)
|
|
316
|
-
if getattr(file_id,
|
|
347
|
+
if getattr(file_id, "executable", False):
|
|
317
348
|
os.chmod(local_path, os.stat(local_path).st_mode | stat.S_IXUSR)
|
|
318
349
|
|
|
319
350
|
@contextmanager
|
|
320
351
|
def read_file_stream(self, file_id, encoding=None, errors=None):
|
|
321
|
-
with self.read_shared_file_stream(
|
|
322
|
-
|
|
352
|
+
with self.read_shared_file_stream(
|
|
353
|
+
file_id, isProtected=True, encoding=encoding, errors=errors
|
|
354
|
+
) as readable:
|
|
323
355
|
yield readable
|
|
324
356
|
|
|
325
357
|
def delete_file(self, file_id):
|
|
@@ -327,32 +359,49 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
327
359
|
|
|
328
360
|
@google_retry
|
|
329
361
|
def file_exists(self, file_id):
|
|
330
|
-
return self.bucket.blob(
|
|
362
|
+
return self.bucket.blob(
|
|
363
|
+
compat_bytes(file_id), encryption_key=self.sseKey
|
|
364
|
+
).exists()
|
|
331
365
|
|
|
332
366
|
@google_retry
|
|
333
367
|
def get_file_size(self, file_id):
|
|
334
368
|
if not self.file_exists(file_id):
|
|
335
369
|
return 0
|
|
336
|
-
return self.bucket.get_blob(
|
|
370
|
+
return self.bucket.get_blob(
|
|
371
|
+
compat_bytes(file_id), encryption_key=self.sseKey
|
|
372
|
+
).size
|
|
337
373
|
|
|
338
374
|
def update_file(self, file_id, local_path):
|
|
339
|
-
with open(local_path,
|
|
375
|
+
with open(local_path, "rb") as f:
|
|
340
376
|
self._write_file(file_id, f, update=True)
|
|
341
377
|
|
|
342
378
|
@contextmanager
|
|
343
379
|
def update_file_stream(self, file_id, encoding=None, errors=None):
|
|
344
|
-
with self._upload_stream(
|
|
380
|
+
with self._upload_stream(
|
|
381
|
+
file_id, update=True, encoding=encoding, errors=errors
|
|
382
|
+
) as writable:
|
|
345
383
|
yield writable
|
|
346
384
|
|
|
347
385
|
@contextmanager
|
|
348
|
-
def write_shared_file_stream(
|
|
349
|
-
|
|
350
|
-
|
|
386
|
+
def write_shared_file_stream(
|
|
387
|
+
self, shared_file_name, encrypted=True, encoding=None, errors=None
|
|
388
|
+
):
|
|
389
|
+
with self._upload_stream(
|
|
390
|
+
shared_file_name,
|
|
391
|
+
encrypt=encrypted,
|
|
392
|
+
update=True,
|
|
393
|
+
encoding=encoding,
|
|
394
|
+
errors=errors,
|
|
395
|
+
) as writable:
|
|
351
396
|
yield writable
|
|
352
397
|
|
|
353
398
|
@contextmanager
|
|
354
|
-
def read_shared_file_stream(
|
|
355
|
-
|
|
399
|
+
def read_shared_file_stream(
|
|
400
|
+
self, shared_file_name, isProtected=True, encoding=None, errors=None
|
|
401
|
+
):
|
|
402
|
+
with self._download_stream(
|
|
403
|
+
shared_file_name, encrypt=isProtected, encoding=encoding, errors=errors
|
|
404
|
+
) as readable:
|
|
356
405
|
yield readable
|
|
357
406
|
|
|
358
407
|
@classmethod
|
|
@@ -375,7 +424,7 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
375
424
|
fileName = url.path
|
|
376
425
|
|
|
377
426
|
# remove leading '/', which can cause problems if fileName is a path
|
|
378
|
-
if fileName.startswith(
|
|
427
|
+
if fileName.startswith("/"):
|
|
379
428
|
fileName = fileName[1:]
|
|
380
429
|
|
|
381
430
|
storageClient = cls.create_client()
|
|
@@ -384,7 +433,7 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
384
433
|
|
|
385
434
|
if exists:
|
|
386
435
|
if not blob.exists():
|
|
387
|
-
raise NoSuchFileException
|
|
436
|
+
raise NoSuchFileException(fileName)
|
|
388
437
|
# sync with cloud so info like size is available
|
|
389
438
|
blob.reload()
|
|
390
439
|
return blob
|
|
@@ -414,7 +463,7 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
414
463
|
|
|
415
464
|
@classmethod
|
|
416
465
|
def _supports_url(cls, url, export=False):
|
|
417
|
-
return url.scheme.lower() ==
|
|
466
|
+
return url.scheme.lower() == "gs"
|
|
418
467
|
|
|
419
468
|
@classmethod
|
|
420
469
|
def _write_to_url(cls, readable: bytes, url: str, executable: bool = False) -> None:
|
|
@@ -422,12 +471,16 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
422
471
|
blob.upload_from_file(readable)
|
|
423
472
|
|
|
424
473
|
@classmethod
|
|
425
|
-
def _list_url(cls, url: ParseResult) ->
|
|
426
|
-
raise NotImplementedError(
|
|
474
|
+
def _list_url(cls, url: ParseResult) -> list[str]:
|
|
475
|
+
raise NotImplementedError(
|
|
476
|
+
"Listing files in Google buckets is not yet implemented!"
|
|
477
|
+
)
|
|
427
478
|
|
|
428
479
|
@classmethod
|
|
429
480
|
def _get_is_directory(cls, url: ParseResult) -> bool:
|
|
430
|
-
raise NotImplementedError(
|
|
481
|
+
raise NotImplementedError(
|
|
482
|
+
"Checking directory status in Google buckets is not yet implemented!"
|
|
483
|
+
)
|
|
431
484
|
|
|
432
485
|
@google_retry
|
|
433
486
|
def write_logs(self, msg: bytes) -> None:
|
|
@@ -457,7 +510,9 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
457
510
|
if not read_all:
|
|
458
511
|
# rename this file by copying it and deleting the old version to avoid
|
|
459
512
|
# rereading it
|
|
460
|
-
newID =
|
|
513
|
+
newID = (
|
|
514
|
+
self.readStatsBaseID + blob.name[len(self.statsBaseID) :]
|
|
515
|
+
)
|
|
461
516
|
# NOTE: just copies then deletes old.
|
|
462
517
|
self.bucket.rename_blob(blob, compat_bytes(newID))
|
|
463
518
|
except NoSuchFileException:
|
|
@@ -473,7 +528,7 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
473
528
|
if lastTry:
|
|
474
529
|
# this was our second try, we are reasonably sure there aren't any stats
|
|
475
530
|
# left to gather
|
|
476
|
-
|
|
531
|
+
break
|
|
477
532
|
# Try one more time in a couple seconds
|
|
478
533
|
time.sleep(5)
|
|
479
534
|
lastTry = True
|
|
@@ -487,11 +542,11 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
487
542
|
@staticmethod
|
|
488
543
|
def _new_id(isFile=False, jobStoreID=None):
|
|
489
544
|
if isFile and jobStoreID: # file associated with job
|
|
490
|
-
return jobStoreID+str(uuid.uuid4())
|
|
545
|
+
return jobStoreID + str(uuid.uuid4())
|
|
491
546
|
elif isFile: # nonassociated file
|
|
492
547
|
return str(uuid.uuid4())
|
|
493
548
|
else: # job id
|
|
494
|
-
return f
|
|
549
|
+
return f"job-{uuid.uuid4()}"
|
|
495
550
|
|
|
496
551
|
@google_retry
|
|
497
552
|
def _delete(self, jobStoreFileID):
|
|
@@ -515,8 +570,12 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
515
570
|
return job.download_as_string()
|
|
516
571
|
|
|
517
572
|
@google_retry
|
|
518
|
-
def _write_file(
|
|
519
|
-
|
|
573
|
+
def _write_file(
|
|
574
|
+
self, jobStoreID: str, fileObj: bytes, update=False, encrypt=True
|
|
575
|
+
) -> None:
|
|
576
|
+
blob = self.bucket.blob(
|
|
577
|
+
compat_bytes(jobStoreID), encryption_key=self.sseKey if encrypt else None
|
|
578
|
+
)
|
|
520
579
|
if not update:
|
|
521
580
|
# TODO: should probably raise a special exception and be added to all jobStores
|
|
522
581
|
assert not blob.exists()
|
|
@@ -530,7 +589,9 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
530
589
|
|
|
531
590
|
@contextmanager
|
|
532
591
|
@google_retry
|
|
533
|
-
def _upload_stream(
|
|
592
|
+
def _upload_stream(
|
|
593
|
+
self, fileName, update=False, encrypt=True, encoding=None, errors=None
|
|
594
|
+
):
|
|
534
595
|
"""
|
|
535
596
|
Yields a context manager that can be used to write to the bucket
|
|
536
597
|
with a stream. See :class:`~toil.jobStores.utils.WritablePipe` for an example.
|
|
@@ -556,7 +617,10 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
556
617
|
:return: an instance of WritablePipe.
|
|
557
618
|
:rtype: :class:`~toil.jobStores.utils.writablePipe`
|
|
558
619
|
"""
|
|
559
|
-
blob = self.bucket.blob(
|
|
620
|
+
blob = self.bucket.blob(
|
|
621
|
+
compat_bytes(fileName), encryption_key=self.sseKey if encrypt else None
|
|
622
|
+
)
|
|
623
|
+
|
|
560
624
|
class UploadPipe(WritablePipe):
|
|
561
625
|
def readFrom(self, readable):
|
|
562
626
|
if not update:
|
|
@@ -592,7 +656,9 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
592
656
|
:rtype: :class:`~toil.jobStores.utils.ReadablePipe`
|
|
593
657
|
"""
|
|
594
658
|
|
|
595
|
-
blob = self.bucket.get_blob(
|
|
659
|
+
blob = self.bucket.get_blob(
|
|
660
|
+
compat_bytes(fileName), encryption_key=self.sseKey if encrypt else None
|
|
661
|
+
)
|
|
596
662
|
if blob is None:
|
|
597
663
|
raise NoSuchFileException(fileName)
|
|
598
664
|
|
toil/jobStores/utils.py
CHANGED
|
@@ -10,6 +10,7 @@ from toil.lib.threading import ExceptionalThread
|
|
|
10
10
|
|
|
11
11
|
log = logging.getLogger(__name__)
|
|
12
12
|
|
|
13
|
+
|
|
13
14
|
class WritablePipe(ABC):
|
|
14
15
|
"""
|
|
15
16
|
An object-oriented wrapper for os.pipe. Clients should subclass it, implement
|
|
@@ -84,7 +85,7 @@ class WritablePipe(ABC):
|
|
|
84
85
|
raise NotImplementedError()
|
|
85
86
|
|
|
86
87
|
def _reader(self):
|
|
87
|
-
with os.fdopen(self.readable_fh,
|
|
88
|
+
with os.fdopen(self.readable_fh, "rb") as readable:
|
|
88
89
|
# TODO: If the reader somehow crashes here, both threads might try
|
|
89
90
|
# to close readable_fh. Fortunately we don't do anything that
|
|
90
91
|
# should be able to fail here.
|
|
@@ -112,7 +113,12 @@ class WritablePipe(ABC):
|
|
|
112
113
|
|
|
113
114
|
def __enter__(self):
|
|
114
115
|
self.readable_fh, writable_fh = os.pipe()
|
|
115
|
-
self.writable = os.fdopen(
|
|
116
|
+
self.writable = os.fdopen(
|
|
117
|
+
writable_fh,
|
|
118
|
+
"wb" if self.encoding == None else "wt",
|
|
119
|
+
encoding=self.encoding,
|
|
120
|
+
errors=self.errors,
|
|
121
|
+
)
|
|
116
122
|
self.thread = ExceptionalThread(target=self._reader)
|
|
117
123
|
self.thread.start()
|
|
118
124
|
return self.writable
|
|
@@ -132,7 +138,9 @@ class WritablePipe(ABC):
|
|
|
132
138
|
# already an exception in the main thread
|
|
133
139
|
raise
|
|
134
140
|
else:
|
|
135
|
-
log.error(
|
|
141
|
+
log.error(
|
|
142
|
+
"Swallowing additional exception in reader thread: %s", str(e)
|
|
143
|
+
)
|
|
136
144
|
finally:
|
|
137
145
|
# The responsibility for closing the readable end is generally that of the reader
|
|
138
146
|
# thread. To cover the small window before the reader takes over we also close it here.
|
|
@@ -217,7 +225,7 @@ class ReadablePipe(ABC):
|
|
|
217
225
|
|
|
218
226
|
def _writer(self):
|
|
219
227
|
try:
|
|
220
|
-
with os.fdopen(self.writable_fh,
|
|
228
|
+
with os.fdopen(self.writable_fh, "wb") as writable:
|
|
221
229
|
self.writeTo(writable)
|
|
222
230
|
except OSError as e:
|
|
223
231
|
# The other side of the pipe may have been closed by the
|
|
@@ -244,7 +252,12 @@ class ReadablePipe(ABC):
|
|
|
244
252
|
|
|
245
253
|
def __enter__(self):
|
|
246
254
|
readable_fh, self.writable_fh = os.pipe()
|
|
247
|
-
self.readable = os.fdopen(
|
|
255
|
+
self.readable = os.fdopen(
|
|
256
|
+
readable_fh,
|
|
257
|
+
"rb" if self.encoding == None else "rt",
|
|
258
|
+
encoding=self.encoding,
|
|
259
|
+
errors=self.errors,
|
|
260
|
+
)
|
|
248
261
|
self.thread = ExceptionalThread(target=self._writer)
|
|
249
262
|
self.thread.start()
|
|
250
263
|
return self.readable
|
|
@@ -264,6 +277,7 @@ class ReadablePipe(ABC):
|
|
|
264
277
|
# already an exception in the main thread
|
|
265
278
|
raise
|
|
266
279
|
|
|
280
|
+
|
|
267
281
|
class ReadableTransformingPipe(ReadablePipe):
|
|
268
282
|
"""
|
|
269
283
|
A pipe which is constructed around a readable stream, and which provides a
|
|
@@ -296,7 +310,6 @@ class ReadableTransformingPipe(ReadablePipe):
|
|
|
296
310
|
|
|
297
311
|
"""
|
|
298
312
|
|
|
299
|
-
|
|
300
313
|
def __init__(self, source, encoding=None, errors=None):
|
|
301
314
|
"""
|
|
302
315
|
:param str encoding: the name of the encoding used to encode the file. Encodings are the same
|
|
@@ -323,15 +336,17 @@ class ReadableTransformingPipe(ReadablePipe):
|
|
|
323
336
|
def writeTo(self, writable):
|
|
324
337
|
self.transform(self.source, writable)
|
|
325
338
|
|
|
339
|
+
|
|
326
340
|
class JobStoreUnavailableException(RuntimeError):
|
|
327
341
|
"""
|
|
328
342
|
Raised when a particular type of job store is requested but can't be used.
|
|
329
343
|
"""
|
|
330
344
|
|
|
345
|
+
|
|
331
346
|
def generate_locator(
|
|
332
347
|
job_store_type: str,
|
|
333
348
|
local_suggestion: Optional[str] = None,
|
|
334
|
-
decoration: Optional[str] = None
|
|
349
|
+
decoration: Optional[str] = None,
|
|
335
350
|
) -> str:
|
|
336
351
|
"""
|
|
337
352
|
Generate a random locator for a job store of the given type. Raises an
|
|
@@ -347,7 +362,7 @@ def generate_locator(
|
|
|
347
362
|
"""
|
|
348
363
|
|
|
349
364
|
# Prepare decoration for splicing into strings
|
|
350
|
-
decoration = (
|
|
365
|
+
decoration = ("-" + decoration) if decoration else ""
|
|
351
366
|
|
|
352
367
|
try:
|
|
353
368
|
if job_store_type == "google":
|
|
@@ -363,6 +378,7 @@ def generate_locator(
|
|
|
363
378
|
elif job_store_type == "aws":
|
|
364
379
|
# Make sure we have AWS
|
|
365
380
|
from toil.jobStores.aws.jobStore import AWSJobStore # noqa
|
|
381
|
+
|
|
366
382
|
# Find a region
|
|
367
383
|
from toil.lib.aws import get_current_aws_region
|
|
368
384
|
|
|
@@ -370,7 +386,9 @@ def generate_locator(
|
|
|
370
386
|
|
|
371
387
|
if not region:
|
|
372
388
|
# We can't generate an AWS job store without a region
|
|
373
|
-
raise JobStoreUnavailableException(
|
|
389
|
+
raise JobStoreUnavailableException(
|
|
390
|
+
f"{job_store_type} job store can't be made without a region"
|
|
391
|
+
)
|
|
374
392
|
|
|
375
393
|
# Roll a random name
|
|
376
394
|
return f"aws:{region}:toil{decoration}-{str(uuid.uuid4())}"
|
|
@@ -380,11 +398,14 @@ def generate_locator(
|
|
|
380
398
|
return local_suggestion
|
|
381
399
|
else:
|
|
382
400
|
# Pick a temp path
|
|
383
|
-
return os.path.join(
|
|
401
|
+
return os.path.join(
|
|
402
|
+
tempfile.gettempdir(), "toil-" + str(uuid.uuid4()) + decoration
|
|
403
|
+
)
|
|
384
404
|
else:
|
|
385
|
-
raise JobStoreUnavailableException(
|
|
405
|
+
raise JobStoreUnavailableException(
|
|
406
|
+
f"{job_store_type} job store isn't known"
|
|
407
|
+
)
|
|
386
408
|
except ImportError:
|
|
387
|
-
raise JobStoreUnavailableException(
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
409
|
+
raise JobStoreUnavailableException(
|
|
410
|
+
f"libraries for {job_store_type} job store are not installed"
|
|
411
|
+
)
|