toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/jobStores/fileJobStore.py
CHANGED
|
@@ -19,30 +19,29 @@ import random
|
|
|
19
19
|
import re
|
|
20
20
|
import shutil
|
|
21
21
|
import stat
|
|
22
|
-
import sys
|
|
23
22
|
import time
|
|
24
23
|
import uuid
|
|
24
|
+
from collections.abc import Iterable, Iterator
|
|
25
25
|
from contextlib import contextmanager
|
|
26
|
-
from typing import IO,
|
|
26
|
+
from typing import IO, Literal, Optional, Union, overload
|
|
27
27
|
from urllib.parse import ParseResult, quote, unquote
|
|
28
28
|
|
|
29
|
-
if sys.version_info >= (3, 8):
|
|
30
|
-
from typing import Literal
|
|
31
|
-
else:
|
|
32
|
-
from typing_extensions import Literal
|
|
33
|
-
|
|
34
29
|
from toil.fileStores import FileID
|
|
35
30
|
from toil.job import TemporaryID
|
|
36
|
-
from toil.jobStores.abstractJobStore import (
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
31
|
+
from toil.jobStores.abstractJobStore import (
|
|
32
|
+
AbstractJobStore,
|
|
33
|
+
JobStoreExistsException,
|
|
34
|
+
NoSuchFileException,
|
|
35
|
+
NoSuchJobException,
|
|
36
|
+
NoSuchJobStoreException,
|
|
37
|
+
)
|
|
38
|
+
from toil.lib.io import (
|
|
39
|
+
AtomicFileCreate,
|
|
40
|
+
atomic_copy,
|
|
41
|
+
atomic_copyobj,
|
|
42
|
+
mkdtemp,
|
|
43
|
+
robust_rmtree,
|
|
44
|
+
)
|
|
46
45
|
|
|
47
46
|
logger = logging.getLogger(__name__)
|
|
48
47
|
|
|
@@ -59,14 +58,19 @@ class FileJobStore(AbstractJobStore):
|
|
|
59
58
|
|
|
60
59
|
# What prefix should be on the per-job job directories, to distinguish them
|
|
61
60
|
# from the spray directories?
|
|
62
|
-
JOB_DIR_PREFIX =
|
|
61
|
+
JOB_DIR_PREFIX = "instance-"
|
|
63
62
|
|
|
64
63
|
# What prefix do we put on the per-job-name directories we sort jobs into?
|
|
65
|
-
JOB_NAME_DIR_PREFIX =
|
|
64
|
+
JOB_NAME_DIR_PREFIX = "kind-"
|
|
66
65
|
|
|
67
66
|
# 10Mb RAM chunks when reading/writing files
|
|
68
67
|
BUFFER_SIZE = 10485760 # 10Mb
|
|
69
68
|
|
|
69
|
+
# When a log file is still being written, what will its name end with?
|
|
70
|
+
LOG_TEMP_SUFFIX = ".new"
|
|
71
|
+
# All log files start with this prefix
|
|
72
|
+
LOG_PREFIX = "stats"
|
|
73
|
+
|
|
70
74
|
def default_caching(self) -> bool:
|
|
71
75
|
"""
|
|
72
76
|
Jobstore's preference as to whether it likes caching or doesn't care about it.
|
|
@@ -88,47 +92,58 @@ class FileJobStore(AbstractJobStore):
|
|
|
88
92
|
logger.debug("Path to job store directory is '%s'.", self.jobStoreDir)
|
|
89
93
|
|
|
90
94
|
# Directory where actual job files go, and their job-associated temp files
|
|
91
|
-
self.jobsDir = os.path.join(self.jobStoreDir,
|
|
95
|
+
self.jobsDir = os.path.join(self.jobStoreDir, "jobs")
|
|
92
96
|
# Directory where stats files go
|
|
93
|
-
self.statsDir = os.path.join(self.jobStoreDir,
|
|
97
|
+
self.statsDir = os.path.join(self.jobStoreDir, "stats")
|
|
98
|
+
# Which has subdirectories for new and seen stats files
|
|
99
|
+
self.stats_inbox = os.path.join(self.statsDir, "inbox")
|
|
100
|
+
self.stats_archive = os.path.join(self.statsDir, "archive")
|
|
94
101
|
# Directory where non-job-associated files for the file store go
|
|
95
|
-
self.filesDir = os.path.join(self.jobStoreDir,
|
|
102
|
+
self.filesDir = os.path.join(self.jobStoreDir, "files/no-job")
|
|
96
103
|
# Directory where job-associated files for the file store go.
|
|
97
104
|
# Each per-job directory in here will have separate directories for
|
|
98
105
|
# files to clean up and files to not clean up when the job is deleted.
|
|
99
|
-
self.jobFilesDir = os.path.join(self.jobStoreDir,
|
|
106
|
+
self.jobFilesDir = os.path.join(self.jobStoreDir, "files/for-job")
|
|
100
107
|
# Directory where shared files go
|
|
101
|
-
self.sharedFilesDir = os.path.join(self.jobStoreDir,
|
|
108
|
+
self.sharedFilesDir = os.path.join(self.jobStoreDir, "files/shared")
|
|
102
109
|
|
|
103
110
|
self.fanOut = fanOut
|
|
104
111
|
|
|
105
112
|
self.linkImports = None
|
|
106
113
|
self.moveExports = None
|
|
114
|
+
self.symlink_job_store_reads = None
|
|
107
115
|
|
|
108
116
|
def __repr__(self):
|
|
109
|
-
return f
|
|
117
|
+
return f"FileJobStore({self.jobStoreDir})"
|
|
110
118
|
|
|
111
119
|
def initialize(self, config):
|
|
112
120
|
try:
|
|
113
121
|
os.mkdir(self.jobStoreDir)
|
|
114
122
|
except OSError as e:
|
|
115
123
|
if e.errno == errno.EEXIST:
|
|
116
|
-
raise JobStoreExistsException(self.jobStoreDir)
|
|
124
|
+
raise JobStoreExistsException(self.jobStoreDir, "file")
|
|
117
125
|
else:
|
|
118
126
|
raise
|
|
119
127
|
os.makedirs(self.jobsDir, exist_ok=True)
|
|
120
128
|
os.makedirs(self.statsDir, exist_ok=True)
|
|
129
|
+
os.makedirs(self.stats_inbox, exist_ok=True)
|
|
130
|
+
os.makedirs(self.stats_archive, exist_ok=True)
|
|
121
131
|
os.makedirs(self.filesDir, exist_ok=True)
|
|
122
132
|
os.makedirs(self.jobFilesDir, exist_ok=True)
|
|
123
133
|
os.makedirs(self.sharedFilesDir, exist_ok=True)
|
|
124
134
|
self.linkImports = config.symlinkImports
|
|
125
135
|
self.moveExports = config.moveOutputs
|
|
136
|
+
self.symlink_job_store_reads = config.symlink_job_store_reads
|
|
126
137
|
super().initialize(config)
|
|
127
138
|
|
|
128
139
|
def resume(self):
|
|
129
140
|
if not os.path.isdir(self.jobStoreDir):
|
|
130
|
-
raise NoSuchJobStoreException(self.jobStoreDir)
|
|
141
|
+
raise NoSuchJobStoreException(self.jobStoreDir, "file")
|
|
131
142
|
super().resume()
|
|
143
|
+
# TODO: Unify with initialize() configuration
|
|
144
|
+
self.linkImports = self.config.symlinkImports
|
|
145
|
+
self.moveExports = self.config.moveOutputs
|
|
146
|
+
self.symlink_job_store_reads = self.config.symlink_job_store_reads
|
|
132
147
|
|
|
133
148
|
def destroy(self):
|
|
134
149
|
if os.path.exists(self.jobStoreDir):
|
|
@@ -147,8 +162,10 @@ class FileJobStore(AbstractJobStore):
|
|
|
147
162
|
|
|
148
163
|
# Make a unique temp directory under a directory for this job name,
|
|
149
164
|
# possibly sprayed across multiple levels of subdirectories.
|
|
150
|
-
absJobDir = mkdtemp(
|
|
151
|
-
|
|
165
|
+
absJobDir = mkdtemp(
|
|
166
|
+
prefix=self.JOB_DIR_PREFIX,
|
|
167
|
+
dir=self._get_arbitrary_jobs_dir_for_name(usefulFilename),
|
|
168
|
+
)
|
|
152
169
|
|
|
153
170
|
job_description.jobStoreID = self._get_job_id_from_dir(absJobDir)
|
|
154
171
|
|
|
@@ -174,7 +191,9 @@ class FileJobStore(AbstractJobStore):
|
|
|
174
191
|
Spin-wait and block for a job to appear before returning
|
|
175
192
|
False if it does not.
|
|
176
193
|
"""
|
|
177
|
-
return self._wait_for_file(
|
|
194
|
+
return self._wait_for_file(
|
|
195
|
+
self._get_job_file_name(jobStoreID), maxTries=maxTries, sleepTime=sleepTime
|
|
196
|
+
)
|
|
178
197
|
|
|
179
198
|
def _wait_for_file(self, fileName, maxTries=35, sleepTime=1):
|
|
180
199
|
"""
|
|
@@ -192,14 +211,18 @@ class FileJobStore(AbstractJobStore):
|
|
|
192
211
|
In practice, the need for retries happens rarely, but it does happen
|
|
193
212
|
over the course of large workflows with a jobStore on a busy NFS.
|
|
194
213
|
"""
|
|
195
|
-
for iTry in range(1,maxTries+1):
|
|
214
|
+
for iTry in range(1, maxTries + 1):
|
|
196
215
|
if os.path.exists(fileName):
|
|
197
216
|
return True
|
|
198
217
|
if iTry >= maxTries:
|
|
199
218
|
return False
|
|
200
219
|
elif iTry == 1:
|
|
201
|
-
logger.warning(
|
|
202
|
-
|
|
220
|
+
logger.warning(
|
|
221
|
+
(
|
|
222
|
+
"Path `{}` does not exist (yet). We will try #{} more times with {}s "
|
|
223
|
+
"intervals."
|
|
224
|
+
).format(fileName, maxTries - iTry, sleepTime)
|
|
225
|
+
)
|
|
203
226
|
time.sleep(sleepTime)
|
|
204
227
|
return False
|
|
205
228
|
|
|
@@ -210,7 +233,7 @@ class FileJobStore(AbstractJobStore):
|
|
|
210
233
|
self._check_job_store_file_id(jobStoreFileID)
|
|
211
234
|
jobStorePath = self._get_file_path_from_id(jobStoreFileID)
|
|
212
235
|
if os.path.exists(jobStorePath):
|
|
213
|
-
return
|
|
236
|
+
return "file:" + jobStorePath
|
|
214
237
|
else:
|
|
215
238
|
raise NoSuchFileException(jobStoreFileID)
|
|
216
239
|
|
|
@@ -218,7 +241,7 @@ class FileJobStore(AbstractJobStore):
|
|
|
218
241
|
jobStorePath = os.path.join(self.sharedFilesDir, sharedFileName)
|
|
219
242
|
if not os.path.exists(jobStorePath):
|
|
220
243
|
raise NoSuchFileException(sharedFileName)
|
|
221
|
-
return
|
|
244
|
+
return "file:" + jobStorePath
|
|
222
245
|
|
|
223
246
|
def load_job(self, job_id):
|
|
224
247
|
# If the job obviously doesn't exist, note that.
|
|
@@ -226,7 +249,7 @@ class FileJobStore(AbstractJobStore):
|
|
|
226
249
|
# Try to load a valid version of the job.
|
|
227
250
|
jobFile = self._get_job_file_name(job_id)
|
|
228
251
|
try:
|
|
229
|
-
with open(jobFile,
|
|
252
|
+
with open(jobFile, "rb") as fileHandle:
|
|
230
253
|
job = pickle.load(fileHandle)
|
|
231
254
|
except FileNotFoundError:
|
|
232
255
|
# We were racing a delete on a non-POSIX-compliant filesystem.
|
|
@@ -248,7 +271,9 @@ class FileJobStore(AbstractJobStore):
|
|
|
248
271
|
|
|
249
272
|
def update_job(self, job):
|
|
250
273
|
assert job.jobStoreID is not None, f"Tried to update job {job} without an ID"
|
|
251
|
-
assert not isinstance(
|
|
274
|
+
assert not isinstance(
|
|
275
|
+
job.jobStoreID, TemporaryID
|
|
276
|
+
), f"Tried to update job {job} without an assigned ID"
|
|
252
277
|
|
|
253
278
|
job.pre_update_hook()
|
|
254
279
|
|
|
@@ -261,10 +286,11 @@ class FileJobStore(AbstractJobStore):
|
|
|
261
286
|
# The file is then moved to its correct path.
|
|
262
287
|
# Atomicity guarantees use the fact the underlying file system's "move"
|
|
263
288
|
# function is atomic.
|
|
264
|
-
with open(dest_filename + ".new",
|
|
289
|
+
with open(dest_filename + ".new", "xb") as f:
|
|
265
290
|
pickle.dump(job, f)
|
|
266
291
|
# This should be atomic for the file system
|
|
267
292
|
os.rename(dest_filename + ".new", dest_filename)
|
|
293
|
+
|
|
268
294
|
def delete_job(self, job_id):
|
|
269
295
|
# The jobStoreID is the relative path to the directory containing the job,
|
|
270
296
|
# removing this directory deletes the job.
|
|
@@ -296,48 +322,50 @@ class FileJobStore(AbstractJobStore):
|
|
|
296
322
|
# Functions that deal with temporary files associated with jobs
|
|
297
323
|
##########################################
|
|
298
324
|
|
|
299
|
-
|
|
300
|
-
def optional_hard_copy(self, hardlink):
|
|
301
|
-
if hardlink:
|
|
302
|
-
saved = self.linkImports
|
|
303
|
-
self.linkImports = False
|
|
304
|
-
yield
|
|
305
|
-
if hardlink:
|
|
306
|
-
self.linkImports = saved
|
|
307
|
-
|
|
308
|
-
def _copy_or_link(self, src_path, dst_path, symlink=False):
|
|
325
|
+
def _copy_or_link(self, src_path, dst_path, hardlink=False, symlink=False):
|
|
309
326
|
# linking is not done be default because of issue #1755
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
327
|
+
# TODO: is hardlinking ever actually done?
|
|
328
|
+
src_path = self._extract_path_from_url(src_path)
|
|
329
|
+
if self.linkImports and not hardlink and symlink:
|
|
330
|
+
os.symlink(os.path.realpath(src_path), dst_path)
|
|
313
331
|
else:
|
|
314
|
-
atomic_copy(
|
|
315
|
-
|
|
316
|
-
def _import_file(
|
|
317
|
-
|
|
318
|
-
|
|
332
|
+
atomic_copy(src_path, dst_path)
|
|
333
|
+
|
|
334
|
+
def _import_file(
|
|
335
|
+
self, otherCls, uri, shared_file_name=None, hardlink=False, symlink=True
|
|
336
|
+
):
|
|
337
|
+
# symlink argument says whether the caller can take symlinks or not.
|
|
338
|
+
# ex: if false, it means the workflow cannot work with symlinks and we need to hardlink or copy.
|
|
339
|
+
# TODO: Do we ever actually hardlink?
|
|
319
340
|
# default is true since symlinking everything is ideal
|
|
320
341
|
uri_path = unquote(uri.path)
|
|
321
342
|
if issubclass(otherCls, FileJobStore):
|
|
322
343
|
if os.path.isdir(uri_path):
|
|
323
344
|
# Don't allow directories (unless someone is racing us)
|
|
324
|
-
raise IsADirectoryError(
|
|
345
|
+
raise IsADirectoryError(
|
|
346
|
+
f"URI {uri} points to a directory but a file was expected"
|
|
347
|
+
)
|
|
325
348
|
if shared_file_name is None:
|
|
326
349
|
executable = os.stat(uri_path).st_mode & stat.S_IXUSR != 0
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
350
|
+
# use this to get a valid path to write to in job store
|
|
351
|
+
absPath = self._get_unique_file_path(uri_path)
|
|
352
|
+
self._copy_or_link(uri, absPath, hardlink=hardlink, symlink=symlink)
|
|
330
353
|
# TODO: os.stat(absPath).st_size consistently gives values lower than
|
|
331
354
|
# getDirSizeRecursively()
|
|
332
|
-
return FileID(
|
|
355
|
+
return FileID(
|
|
356
|
+
self._get_file_id_from_path(absPath),
|
|
357
|
+
os.stat(absPath).st_size,
|
|
358
|
+
executable,
|
|
359
|
+
)
|
|
333
360
|
else:
|
|
334
361
|
self._requireValidSharedFileName(shared_file_name)
|
|
335
362
|
path = self._get_shared_file_path(shared_file_name)
|
|
336
|
-
|
|
337
|
-
self._copy_or_link(uri, path, symlink=symlink)
|
|
363
|
+
self._copy_or_link(uri, path, hardlink=hardlink, symlink=symlink)
|
|
338
364
|
return None
|
|
339
365
|
else:
|
|
340
|
-
return super()._import_file(
|
|
366
|
+
return super()._import_file(
|
|
367
|
+
otherCls, uri, shared_file_name=shared_file_name
|
|
368
|
+
)
|
|
341
369
|
|
|
342
370
|
def _export_file(self, otherCls, file_id, uri):
|
|
343
371
|
if issubclass(otherCls, FileJobStore):
|
|
@@ -346,7 +374,7 @@ class FileJobStore(AbstractJobStore):
|
|
|
346
374
|
# Make sure we don't need to worry about directories when exporting
|
|
347
375
|
# to local files, just like for cloud storage.
|
|
348
376
|
os.makedirs(os.path.dirname(destPath), exist_ok=True)
|
|
349
|
-
executable = getattr(file_id,
|
|
377
|
+
executable = getattr(file_id, "executable", False)
|
|
350
378
|
if self.moveExports:
|
|
351
379
|
self._move_and_linkback(srcPath, destPath, executable=executable)
|
|
352
380
|
else:
|
|
@@ -355,7 +383,11 @@ class FileJobStore(AbstractJobStore):
|
|
|
355
383
|
super()._default_export_file(otherCls, file_id, uri)
|
|
356
384
|
|
|
357
385
|
def _move_and_linkback(self, srcPath, destPath, executable):
|
|
358
|
-
logger.debug(
|
|
386
|
+
logger.debug(
|
|
387
|
+
"moveExports option, Moving src=%s to dest=%s ; then symlinking dest to src",
|
|
388
|
+
srcPath,
|
|
389
|
+
destPath,
|
|
390
|
+
)
|
|
359
391
|
shutil.move(srcPath, destPath)
|
|
360
392
|
os.symlink(destPath, srcPath)
|
|
361
393
|
if executable:
|
|
@@ -391,7 +423,7 @@ class FileJobStore(AbstractJobStore):
|
|
|
391
423
|
"""
|
|
392
424
|
Open a file URL as a binary stream.
|
|
393
425
|
"""
|
|
394
|
-
return open(cls._extract_path_from_url(url),
|
|
426
|
+
return open(cls._extract_path_from_url(url), "rb")
|
|
395
427
|
|
|
396
428
|
@classmethod
|
|
397
429
|
def _write_to_url(cls, readable, url, executable=False):
|
|
@@ -403,20 +435,24 @@ class FileJobStore(AbstractJobStore):
|
|
|
403
435
|
:param object readable: An open file object to read from.
|
|
404
436
|
"""
|
|
405
437
|
# we use a ~10Mb buffer to improve speed
|
|
406
|
-
atomic_copyobj(
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
438
|
+
atomic_copyobj(
|
|
439
|
+
readable,
|
|
440
|
+
cls._extract_path_from_url(url),
|
|
441
|
+
length=cls.BUFFER_SIZE,
|
|
442
|
+
executable=executable,
|
|
443
|
+
)
|
|
410
444
|
|
|
411
445
|
@classmethod
|
|
412
|
-
def _list_url(cls, url: ParseResult) ->
|
|
446
|
+
def _list_url(cls, url: ParseResult) -> list[str]:
|
|
413
447
|
path = cls._extract_path_from_url(url)
|
|
414
448
|
listing = []
|
|
415
449
|
for p in os.listdir(path):
|
|
416
450
|
# We know there are no slashes in these
|
|
417
451
|
component = quote(p)
|
|
418
452
|
# Return directories with trailing slashes and files without
|
|
419
|
-
listing.append(
|
|
453
|
+
listing.append(
|
|
454
|
+
(component + "/") if os.path.isdir(os.path.join(path, p)) else component
|
|
455
|
+
)
|
|
420
456
|
return listing
|
|
421
457
|
|
|
422
458
|
@classmethod
|
|
@@ -429,13 +465,13 @@ class FileJobStore(AbstractJobStore):
|
|
|
429
465
|
"""
|
|
430
466
|
:return: local file path of file pointed at by the given URL
|
|
431
467
|
"""
|
|
432
|
-
if url.netloc !=
|
|
468
|
+
if url.netloc != "" and url.netloc != "localhost":
|
|
433
469
|
raise RuntimeError("The URL '%s' is invalid" % url.geturl())
|
|
434
470
|
return unquote(url.path)
|
|
435
471
|
|
|
436
472
|
@classmethod
|
|
437
473
|
def _supports_url(cls, url, export=False):
|
|
438
|
-
return url.scheme.lower() ==
|
|
474
|
+
return url.scheme.lower() == "file"
|
|
439
475
|
|
|
440
476
|
def _make_string_filename_safe(self, arbitraryString, maxLength=240):
|
|
441
477
|
"""
|
|
@@ -464,7 +500,7 @@ class FileJobStore(AbstractJobStore):
|
|
|
464
500
|
parts.append("UNPRINTABLE")
|
|
465
501
|
|
|
466
502
|
# Glue it all together, and truncate to length
|
|
467
|
-
return
|
|
503
|
+
return "_".join(parts)[:maxLength]
|
|
468
504
|
|
|
469
505
|
def write_file(self, local_path, job_id=None, cleanup=False):
|
|
470
506
|
absPath = self._get_unique_file_path(local_path, job_id, cleanup)
|
|
@@ -473,20 +509,30 @@ class FileJobStore(AbstractJobStore):
|
|
|
473
509
|
return relPath
|
|
474
510
|
|
|
475
511
|
@contextmanager
|
|
476
|
-
def write_file_stream(
|
|
512
|
+
def write_file_stream(
|
|
513
|
+
self, job_id=None, cleanup=False, basename=None, encoding=None, errors=None
|
|
514
|
+
):
|
|
477
515
|
if not basename:
|
|
478
|
-
basename =
|
|
516
|
+
basename = "stream"
|
|
479
517
|
absPath = self._get_unique_file_path(basename, job_id, cleanup)
|
|
480
518
|
relPath = self._get_file_id_from_path(absPath)
|
|
481
519
|
|
|
482
|
-
with open(
|
|
520
|
+
with open(
|
|
521
|
+
absPath,
|
|
522
|
+
"wb" if encoding == None else "wt",
|
|
523
|
+
encoding=encoding,
|
|
524
|
+
errors=errors,
|
|
525
|
+
) as f:
|
|
483
526
|
# Don't yield while holding an open file descriptor to the temp
|
|
484
527
|
# file. That can result in temp files still being open when we try
|
|
485
528
|
# to clean ourselves up, somehow, for certain workloads.
|
|
486
529
|
yield f, relPath
|
|
487
530
|
|
|
488
531
|
def get_empty_file_store_id(self, jobStoreID=None, cleanup=False, basename=None):
|
|
489
|
-
with self.write_file_stream(jobStoreID, cleanup, basename) as (
|
|
532
|
+
with self.write_file_stream(jobStoreID, cleanup, basename) as (
|
|
533
|
+
fileHandle,
|
|
534
|
+
jobStoreFileID,
|
|
535
|
+
):
|
|
490
536
|
return jobStoreFileID
|
|
491
537
|
|
|
492
538
|
def update_file(self, file_id, local_path):
|
|
@@ -503,20 +549,23 @@ class FileJobStore(AbstractJobStore):
|
|
|
503
549
|
self._check_job_store_file_id(file_id)
|
|
504
550
|
jobStoreFilePath = self._get_file_path_from_id(file_id)
|
|
505
551
|
localDirPath = os.path.dirname(local_path)
|
|
506
|
-
executable = getattr(file_id,
|
|
552
|
+
executable = getattr(file_id, "executable", False)
|
|
507
553
|
|
|
508
554
|
if not symlink and os.path.islink(local_path):
|
|
509
555
|
# We had a symlink and want to clobber it with a hardlink or copy.
|
|
510
556
|
os.unlink(local_path)
|
|
511
557
|
|
|
512
|
-
if os.path.exists(local_path) and os.path.samefile(
|
|
558
|
+
if os.path.exists(local_path) and os.path.samefile(
|
|
559
|
+
jobStoreFilePath, local_path
|
|
560
|
+
):
|
|
513
561
|
# The files are already the same: same name, hardlinked, or
|
|
514
562
|
# symlinked. There is nothing to do, and trying to shutil.copyfile
|
|
515
563
|
# one over the other will fail.
|
|
516
564
|
return
|
|
517
565
|
|
|
518
|
-
if symlink:
|
|
519
|
-
# If the reader will accept a symlink,
|
|
566
|
+
if symlink and self.symlink_job_store_reads:
|
|
567
|
+
# If the reader will accept a symlink, and we are willing to
|
|
568
|
+
# symlink into the jobstore, always give them one.
|
|
520
569
|
# There's less that can go wrong.
|
|
521
570
|
try:
|
|
522
571
|
os.symlink(jobStoreFilePath, local_path)
|
|
@@ -537,7 +586,9 @@ class FileJobStore(AbstractJobStore):
|
|
|
537
586
|
# In this case, we try to make a hard link.
|
|
538
587
|
pass
|
|
539
588
|
else:
|
|
540
|
-
logger.error(
|
|
589
|
+
logger.error(
|
|
590
|
+
f"Unexpected OSError when reading file '{jobStoreFilePath}' from job store"
|
|
591
|
+
)
|
|
541
592
|
raise
|
|
542
593
|
|
|
543
594
|
# If we get here, symlinking isn't an option.
|
|
@@ -581,7 +632,9 @@ class FileJobStore(AbstractJobStore):
|
|
|
581
632
|
# hit the file copy case.
|
|
582
633
|
pass
|
|
583
634
|
else:
|
|
584
|
-
logger.error(
|
|
635
|
+
logger.error(
|
|
636
|
+
f"Unexpected OSError when reading file '{jobStoreFilePath}' from job store"
|
|
637
|
+
)
|
|
585
638
|
raise
|
|
586
639
|
|
|
587
640
|
# If we get here, neither a symlink nor a hardlink will work.
|
|
@@ -596,15 +649,17 @@ class FileJobStore(AbstractJobStore):
|
|
|
596
649
|
def file_exists(self, file_id):
|
|
597
650
|
absPath = self._get_file_path_from_id(file_id)
|
|
598
651
|
|
|
599
|
-
if (
|
|
600
|
-
not absPath.startswith(self.
|
|
601
|
-
not absPath.startswith(self.
|
|
652
|
+
if (
|
|
653
|
+
not absPath.startswith(self.jobsDir)
|
|
654
|
+
and not absPath.startswith(self.filesDir)
|
|
655
|
+
and not absPath.startswith(self.jobFilesDir)
|
|
656
|
+
):
|
|
602
657
|
# Don't even look for it, it is out of bounds.
|
|
603
658
|
raise NoSuchFileException(file_id)
|
|
604
659
|
|
|
605
660
|
try:
|
|
606
661
|
st = os.stat(absPath)
|
|
607
|
-
except
|
|
662
|
+
except OSError:
|
|
608
663
|
return False
|
|
609
664
|
if not stat.S_ISREG(st.st_mode):
|
|
610
665
|
raise NoSuchFileException(file_id)
|
|
@@ -614,15 +669,17 @@ class FileJobStore(AbstractJobStore):
|
|
|
614
669
|
# Duplicate a bunch of fileExists to save on stat calls
|
|
615
670
|
absPath = self._get_file_path_from_id(file_id)
|
|
616
671
|
|
|
617
|
-
if (
|
|
618
|
-
not absPath.startswith(self.
|
|
619
|
-
not absPath.startswith(self.
|
|
672
|
+
if (
|
|
673
|
+
not absPath.startswith(self.jobsDir)
|
|
674
|
+
and not absPath.startswith(self.filesDir)
|
|
675
|
+
and not absPath.startswith(self.jobFilesDir)
|
|
676
|
+
):
|
|
620
677
|
# Don't even look for it, it is out of bounds.
|
|
621
678
|
raise NoSuchFileException(file_id)
|
|
622
679
|
|
|
623
680
|
try:
|
|
624
681
|
st = os.stat(absPath)
|
|
625
|
-
except
|
|
682
|
+
except OSError:
|
|
626
683
|
return 0
|
|
627
684
|
return st.st_size
|
|
628
685
|
|
|
@@ -632,7 +689,12 @@ class FileJobStore(AbstractJobStore):
|
|
|
632
689
|
# File objects are context managers (CM) so we could simply return what open returns.
|
|
633
690
|
# However, it is better to wrap it in another CM so as to prevent users from accessing
|
|
634
691
|
# the file object directly, without a with statement.
|
|
635
|
-
with open(
|
|
692
|
+
with open(
|
|
693
|
+
self._get_file_path_from_id(file_id),
|
|
694
|
+
"wb" if encoding == None else "wt",
|
|
695
|
+
encoding=encoding,
|
|
696
|
+
errors=errors,
|
|
697
|
+
) as f:
|
|
636
698
|
yield f
|
|
637
699
|
|
|
638
700
|
@contextmanager
|
|
@@ -642,15 +704,13 @@ class FileJobStore(AbstractJobStore):
|
|
|
642
704
|
file_id: Union[str, FileID],
|
|
643
705
|
encoding: Literal[None] = None,
|
|
644
706
|
errors: Optional[str] = None,
|
|
645
|
-
) -> Iterator[IO[bytes]]:
|
|
646
|
-
...
|
|
707
|
+
) -> Iterator[IO[bytes]]: ...
|
|
647
708
|
|
|
648
709
|
@contextmanager
|
|
649
710
|
@overload
|
|
650
711
|
def read_file_stream(
|
|
651
712
|
self, file_id: Union[str, FileID], encoding: str, errors: Optional[str] = None
|
|
652
|
-
) -> Iterator[IO[str]]:
|
|
653
|
-
...
|
|
713
|
+
) -> Iterator[IO[str]]: ...
|
|
654
714
|
|
|
655
715
|
@contextmanager
|
|
656
716
|
@overload
|
|
@@ -659,8 +719,7 @@ class FileJobStore(AbstractJobStore):
|
|
|
659
719
|
file_id: Union[str, FileID],
|
|
660
720
|
encoding: Optional[str] = None,
|
|
661
721
|
errors: Optional[str] = None,
|
|
662
|
-
) -> Union[Iterator[IO[bytes]], Iterator[IO[str]]]:
|
|
663
|
-
...
|
|
722
|
+
) -> Union[Iterator[IO[bytes]], Iterator[IO[str]]]: ...
|
|
664
723
|
|
|
665
724
|
@contextmanager
|
|
666
725
|
def read_file_stream(
|
|
@@ -696,18 +755,32 @@ class FileJobStore(AbstractJobStore):
|
|
|
696
755
|
return os.path.join(self.sharedFilesDir, sharedFileName)
|
|
697
756
|
|
|
698
757
|
@contextmanager
|
|
699
|
-
def write_shared_file_stream(
|
|
758
|
+
def write_shared_file_stream(
|
|
759
|
+
self, shared_file_name, encrypted=None, encoding=None, errors=None
|
|
760
|
+
):
|
|
700
761
|
# the isProtected parameter has no effect on the fileStore
|
|
701
762
|
self._requireValidSharedFileName(shared_file_name)
|
|
702
|
-
with AtomicFileCreate(
|
|
703
|
-
|
|
763
|
+
with AtomicFileCreate(
|
|
764
|
+
self._get_shared_file_path(shared_file_name)
|
|
765
|
+
) as tmpSharedFilePath:
|
|
766
|
+
with open(
|
|
767
|
+
tmpSharedFilePath,
|
|
768
|
+
"wb" if encoding == None else "wt",
|
|
769
|
+
encoding=encoding,
|
|
770
|
+
errors=None,
|
|
771
|
+
) as f:
|
|
704
772
|
yield f
|
|
705
773
|
|
|
706
774
|
@contextmanager
|
|
707
775
|
def read_shared_file_stream(self, shared_file_name, encoding=None, errors=None):
|
|
708
776
|
self._requireValidSharedFileName(shared_file_name)
|
|
709
777
|
try:
|
|
710
|
-
with open(
|
|
778
|
+
with open(
|
|
779
|
+
self._get_shared_file_path(shared_file_name),
|
|
780
|
+
"rb" if encoding == None else "rt",
|
|
781
|
+
encoding=encoding,
|
|
782
|
+
errors=errors,
|
|
783
|
+
) as f:
|
|
711
784
|
yield f
|
|
712
785
|
|
|
713
786
|
except OSError as e:
|
|
@@ -745,15 +818,11 @@ class FileJobStore(AbstractJobStore):
|
|
|
745
818
|
job_id = self._get_job_id_from_files_dir(job_instance_dir)
|
|
746
819
|
jobs.append(job_id)
|
|
747
820
|
|
|
748
|
-
|
|
749
|
-
# Announce all the shared files
|
|
750
|
-
yield name
|
|
821
|
+
yield from os.listdir(self.sharedFilesDir)
|
|
751
822
|
|
|
752
823
|
for file_dir_path in self._list_dynamic_spray_dir(self.filesDir):
|
|
753
824
|
# Run on all the no-job files
|
|
754
|
-
|
|
755
|
-
# There ought to be just one file in here.
|
|
756
|
-
yield dir_file
|
|
825
|
+
yield from os.listdir(file_dir_path)
|
|
757
826
|
|
|
758
827
|
for job_store_id in jobs:
|
|
759
828
|
# Files from _get_job_files_dir
|
|
@@ -765,9 +834,7 @@ class FileJobStore(AbstractJobStore):
|
|
|
765
834
|
# Except the cleanup directory which we do later.
|
|
766
835
|
continue
|
|
767
836
|
file_dir_path = os.path.join(job_files_dir, file_dir)
|
|
768
|
-
|
|
769
|
-
# There ought to be just one file in here.
|
|
770
|
-
yield dir_file
|
|
837
|
+
yield from os.listdir(file_dir_path)
|
|
771
838
|
|
|
772
839
|
# Files from _get_job_files_cleanup_dir
|
|
773
840
|
job_cleanup_files_dir = os.path.join(job_files_dir, "cleanup")
|
|
@@ -775,35 +842,70 @@ class FileJobStore(AbstractJobStore):
|
|
|
775
842
|
for file_dir in os.listdir(job_cleanup_files_dir):
|
|
776
843
|
# Each file is in its own directory
|
|
777
844
|
file_dir_path = os.path.join(job_cleanup_files_dir, file_dir)
|
|
778
|
-
|
|
779
|
-
# There ought to be just one file in here.
|
|
780
|
-
yield dir_file
|
|
845
|
+
yield from os.listdir(file_dir_path)
|
|
781
846
|
|
|
782
847
|
def write_logs(self, msg):
|
|
783
848
|
# Temporary files are placed in the stats directory tree
|
|
784
|
-
tempStatsFileName =
|
|
785
|
-
tempStatsFile = os.path.join(self.
|
|
786
|
-
writeFormat =
|
|
849
|
+
tempStatsFileName = self.LOG_PREFIX + str(uuid.uuid4().hex) + self.LOG_TEMP_SUFFIX
|
|
850
|
+
tempStatsFile = os.path.join(self._get_arbitrary_stats_inbox_dir(), tempStatsFileName)
|
|
851
|
+
writeFormat = "w" if isinstance(msg, str) else "wb"
|
|
787
852
|
with open(tempStatsFile, writeFormat) as f:
|
|
788
853
|
f.write(msg)
|
|
789
|
-
os.rename(tempStatsFile, tempStatsFile[:-
|
|
854
|
+
os.rename(tempStatsFile, tempStatsFile[:-len(self.LOG_TEMP_SUFFIX)]) # This operation is atomic
|
|
790
855
|
|
|
791
856
|
def read_logs(self, callback, read_all=False):
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
857
|
+
files_processed = 0
|
|
858
|
+
|
|
859
|
+
# Holds pairs of a function to call to get directories to look at, and
|
|
860
|
+
# a flag for whether to archive the files found.
|
|
861
|
+
queries = []
|
|
862
|
+
if read_all:
|
|
863
|
+
# If looking at all logs, check the archive
|
|
864
|
+
queries.append((self._stats_archive_directories, False))
|
|
865
|
+
# Always check the inbox and archive from it. But do it after checking
|
|
866
|
+
# the archive to avoid duplicates in the same pass.
|
|
867
|
+
queries.append((self._stats_inbox_directories, True))
|
|
868
|
+
|
|
869
|
+
for to_call, should_archive in queries:
|
|
870
|
+
for log_dir in to_call():
|
|
871
|
+
for log_file in os.listdir(log_dir):
|
|
872
|
+
if not log_file.startswith(self.LOG_PREFIX):
|
|
873
|
+
# Skip anything not a log file (like the other spray
|
|
874
|
+
# directories)
|
|
875
|
+
continue
|
|
876
|
+
if log_file.endswith(self.LOG_TEMP_SUFFIX):
|
|
877
|
+
# Skip partially-written files, always.
|
|
878
|
+
continue
|
|
879
|
+
|
|
880
|
+
abs_log_file = os.path.join(log_dir, log_file)
|
|
881
|
+
if not os.path.isfile(abs_log_file):
|
|
882
|
+
# This can't be a log file.
|
|
883
|
+
continue
|
|
884
|
+
try:
|
|
885
|
+
opened_file = open(abs_log_file, "rb")
|
|
886
|
+
except FileNotFoundError:
|
|
887
|
+
# File disappeared before we could open it.
|
|
888
|
+
# Maybe someone else is reading logs?
|
|
889
|
+
continue
|
|
890
|
+
with opened_file as f:
|
|
891
|
+
callback(f)
|
|
892
|
+
files_processed += 1
|
|
893
|
+
|
|
894
|
+
if should_archive:
|
|
895
|
+
# We need to move the stats file to the archive.
|
|
896
|
+
# Since we have UUID stats file names we don't need
|
|
897
|
+
# to worry about collisions when it gets there.
|
|
898
|
+
new_dir = self._get_arbitrary_stats_archive_dir()
|
|
899
|
+
new_abs_log_file = os.path.join(new_dir, log_file)
|
|
900
|
+
try:
|
|
804
901
|
# Mark this item as read
|
|
805
|
-
os.rename(
|
|
806
|
-
|
|
902
|
+
os.rename(abs_log_file, new_abs_log_file)
|
|
903
|
+
except FileNotFoundError:
|
|
904
|
+
# File we wanted to archive disappeared.
|
|
905
|
+
# Maybe someone else is reading logs?
|
|
906
|
+
# TODO: Raise ConcurrentFileModificationException?
|
|
907
|
+
continue
|
|
908
|
+
return files_processed
|
|
807
909
|
|
|
808
910
|
##########################################
|
|
809
911
|
# Private methods
|
|
@@ -824,14 +926,14 @@ class FileJobStore(AbstractJobStore):
|
|
|
824
926
|
:param str absPath: The absolute path to a job directory under self.jobsDir which represents a job.
|
|
825
927
|
:rtype : string, string is the job ID, which is a path relative to self.jobsDir
|
|
826
928
|
"""
|
|
827
|
-
return absPath[len(self.jobsDir)+1:]
|
|
929
|
+
return absPath[len(self.jobsDir) + 1 :]
|
|
828
930
|
|
|
829
931
|
def _get_job_id_from_files_dir(self, absPath: str) -> str:
|
|
830
932
|
"""
|
|
831
933
|
:param str absPath: The absolute path to a job directory under self.jobFilesDir which holds a job's files.
|
|
832
934
|
:rtype : string, string is the job ID
|
|
833
935
|
"""
|
|
834
|
-
return absPath[len(self.jobFilesDir)+1:]
|
|
936
|
+
return absPath[len(self.jobFilesDir) + 1 :]
|
|
835
937
|
|
|
836
938
|
def _get_job_file_name(self, jobStoreID):
|
|
837
939
|
"""
|
|
@@ -913,14 +1015,14 @@ class FileJobStore(AbstractJobStore):
|
|
|
913
1015
|
:rtype : string, string is the file ID.
|
|
914
1016
|
"""
|
|
915
1017
|
|
|
916
|
-
return quote(absPath[len(self.jobStoreDir)+1:])
|
|
1018
|
+
return quote(absPath[len(self.jobStoreDir) + 1 :])
|
|
917
1019
|
|
|
918
1020
|
def _check_job_store_file_id(self, jobStoreFileID):
|
|
919
1021
|
"""
|
|
920
1022
|
:raise NoSuchFileException: if the file with ID jobStoreFileID does
|
|
921
1023
|
not exist or is not a file
|
|
922
1024
|
"""
|
|
923
|
-
if not self.file_exists(
|
|
1025
|
+
if not self.file_exists(jobStoreFileID):
|
|
924
1026
|
raise NoSuchFileException(jobStoreFileID)
|
|
925
1027
|
|
|
926
1028
|
def _get_arbitrary_jobs_dir_for_name(self, jobNameSlug):
|
|
@@ -943,15 +1045,35 @@ class FileJobStore(AbstractJobStore):
|
|
|
943
1045
|
if len(os.listdir(self.jobsDir)) > self.fanOut:
|
|
944
1046
|
# Make sure that we don't over-fill the root with too many unique job names.
|
|
945
1047
|
# Go in a subdirectory tree, and then go by job name and make another tree.
|
|
946
|
-
return self._get_dynamic_spray_dir(
|
|
947
|
-
|
|
1048
|
+
return self._get_dynamic_spray_dir(
|
|
1049
|
+
os.path.join(
|
|
1050
|
+
self._get_dynamic_spray_dir(self.jobsDir),
|
|
1051
|
+
self.JOB_NAME_DIR_PREFIX + jobNameSlug,
|
|
1052
|
+
)
|
|
1053
|
+
)
|
|
948
1054
|
else:
|
|
949
1055
|
# Just go in the root
|
|
950
|
-
return self._get_dynamic_spray_dir(
|
|
1056
|
+
return self._get_dynamic_spray_dir(
|
|
1057
|
+
os.path.join(self.jobsDir, self.JOB_NAME_DIR_PREFIX + jobNameSlug)
|
|
1058
|
+
)
|
|
1059
|
+
|
|
1060
|
+
def _get_arbitrary_stats_inbox_dir(self):
|
|
1061
|
+
"""
|
|
1062
|
+
Gets a temporary directory in a multi-level hierarchy in
|
|
1063
|
+
self.stats_inbox, where stats files not yet seen by the leader live.
|
|
1064
|
+
The directory is not unique and may already have other stats files in it.
|
|
1065
|
+
|
|
1066
|
+
:rtype : string, path to temporary directory in which to place files/directories.
|
|
951
1067
|
|
|
952
|
-
|
|
1068
|
+
|
|
1069
|
+
"""
|
|
1070
|
+
|
|
1071
|
+
return self._get_dynamic_spray_dir(self.stats_inbox)
|
|
1072
|
+
|
|
1073
|
+
def _get_arbitrary_stats_archive_dir(self):
|
|
953
1074
|
"""
|
|
954
|
-
Gets a temporary directory in a multi-level hierarchy in
|
|
1075
|
+
Gets a temporary directory in a multi-level hierarchy in
|
|
1076
|
+
self.stats_archive, where stats files already seen by the leader live.
|
|
955
1077
|
The directory is not unique and may already have other stats files in it.
|
|
956
1078
|
|
|
957
1079
|
:rtype : string, path to temporary directory in which to place files/directories.
|
|
@@ -959,7 +1081,7 @@ class FileJobStore(AbstractJobStore):
|
|
|
959
1081
|
|
|
960
1082
|
"""
|
|
961
1083
|
|
|
962
|
-
return self._get_dynamic_spray_dir(self.
|
|
1084
|
+
return self._get_dynamic_spray_dir(self.stats_archive)
|
|
963
1085
|
|
|
964
1086
|
def _get_arbitrary_files_dir(self):
|
|
965
1087
|
"""
|
|
@@ -1091,17 +1213,27 @@ class FileJobStore(AbstractJobStore):
|
|
|
1091
1213
|
continue
|
|
1092
1214
|
|
|
1093
1215
|
# Now we have only the directories that are named after jobs. Look inside them.
|
|
1094
|
-
yield from self._walk_dynamic_spray_dir(
|
|
1216
|
+
yield from self._walk_dynamic_spray_dir(
|
|
1217
|
+
os.path.join(jobHoldingDir, jobNameDir)
|
|
1218
|
+
)
|
|
1095
1219
|
|
|
1220
|
+
def _stats_inbox_directories(self):
|
|
1221
|
+
"""
|
|
1222
|
+
:returns: an iterator to the temporary directories containing new stats
|
|
1223
|
+
files. They may also contain directories containing more stats
|
|
1224
|
+
files.
|
|
1225
|
+
"""
|
|
1096
1226
|
|
|
1097
|
-
|
|
1227
|
+
return self._walk_dynamic_spray_dir(self.stats_inbox)
|
|
1228
|
+
|
|
1229
|
+
def _stats_archive_directories(self):
|
|
1098
1230
|
"""
|
|
1099
|
-
:
|
|
1100
|
-
|
|
1101
|
-
|
|
1231
|
+
:returns: an iterator to the temporary directories containing
|
|
1232
|
+
previously observed stats files. They may also contain directories
|
|
1233
|
+
containing more stats files.
|
|
1102
1234
|
"""
|
|
1103
1235
|
|
|
1104
|
-
return self._walk_dynamic_spray_dir(self.
|
|
1236
|
+
return self._walk_dynamic_spray_dir(self.stats_archive)
|
|
1105
1237
|
|
|
1106
1238
|
def _get_unique_file_path(self, fileName, jobStoreID=None, cleanup=False):
|
|
1107
1239
|
"""
|
|
@@ -1142,18 +1274,24 @@ class FileJobStore(AbstractJobStore):
|
|
|
1142
1274
|
self._check_job_store_id_assigned(jobStoreID)
|
|
1143
1275
|
# Find where all its created files should live, depending on if
|
|
1144
1276
|
# they need to go away when the job is deleted or not.
|
|
1145
|
-
jobFilesDir =
|
|
1277
|
+
jobFilesDir = (
|
|
1278
|
+
self._get_job_files_dir(jobStoreID)
|
|
1279
|
+
if not cleanup
|
|
1280
|
+
else self._get_job_files_cleanup_dir(jobStoreID)
|
|
1281
|
+
)
|
|
1146
1282
|
|
|
1147
1283
|
# Lazily create the parent directory.
|
|
1148
1284
|
# We don't want our tree filled with confusingly empty directories.
|
|
1149
1285
|
os.makedirs(jobFilesDir, exist_ok=True)
|
|
1150
1286
|
|
|
1151
1287
|
# Then make a temp directory inside it
|
|
1152
|
-
filesDir = os.path.join(jobFilesDir,
|
|
1288
|
+
filesDir = os.path.join(jobFilesDir, "file-" + uuid.uuid4().hex)
|
|
1153
1289
|
os.mkdir(filesDir)
|
|
1154
1290
|
return filesDir
|
|
1155
1291
|
else:
|
|
1156
1292
|
# Make a temporary file within the non-job-associated files hierarchy
|
|
1157
|
-
filesDir = os.path.join(
|
|
1293
|
+
filesDir = os.path.join(
|
|
1294
|
+
self._get_arbitrary_files_dir(), "file-" + uuid.uuid4().hex
|
|
1295
|
+
)
|
|
1158
1296
|
os.mkdir(filesDir)
|
|
1159
1297
|
return filesDir
|