PyPI - toil - Versions diffs - 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl - Mend

toil 6.1.0a1py3-none-any.whl → 8.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (193) hide show

toil/__init__.py +122 -315
toil/batchSystems/__init__.py +1 -0
toil/batchSystems/abstractBatchSystem.py +173 -89
toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
toil/batchSystems/awsBatch.py +244 -135
toil/batchSystems/cleanup_support.py +26 -16
toil/batchSystems/contained_executor.py +31 -28
toil/batchSystems/gridengine.py +86 -50
toil/batchSystems/htcondor.py +166 -89
toil/batchSystems/kubernetes.py +632 -382
toil/batchSystems/local_support.py +20 -15
toil/batchSystems/lsf.py +134 -81
toil/batchSystems/lsfHelper.py +13 -11
toil/batchSystems/mesos/__init__.py +41 -29
toil/batchSystems/mesos/batchSystem.py +290 -151
toil/batchSystems/mesos/executor.py +79 -50
toil/batchSystems/mesos/test/__init__.py +31 -23
toil/batchSystems/options.py +46 -28
toil/batchSystems/registry.py +53 -19
toil/batchSystems/singleMachine.py +296 -125
toil/batchSystems/slurm.py +603 -138
toil/batchSystems/torque.py +47 -33
toil/bus.py +186 -76
toil/common.py +664 -368
toil/cwl/__init__.py +1 -1
toil/cwl/cwltoil.py +1136 -483
toil/cwl/utils.py +17 -22
toil/deferred.py +63 -42
toil/exceptions.py +5 -3
toil/fileStores/__init__.py +5 -5
toil/fileStores/abstractFileStore.py +140 -60
toil/fileStores/cachingFileStore.py +717 -269
toil/fileStores/nonCachingFileStore.py +116 -87
toil/job.py +1225 -368
toil/jobStores/abstractJobStore.py +416 -266
toil/jobStores/aws/jobStore.py +863 -477
toil/jobStores/aws/utils.py +201 -120
toil/jobStores/conftest.py +3 -2
toil/jobStores/fileJobStore.py +292 -154
toil/jobStores/googleJobStore.py +140 -74
toil/jobStores/utils.py +36 -15
toil/leader.py +668 -272
toil/lib/accelerators.py +115 -18
toil/lib/aws/__init__.py +74 -31
toil/lib/aws/ami.py +122 -87
toil/lib/aws/iam.py +284 -108
toil/lib/aws/s3.py +31 -0
toil/lib/aws/session.py +214 -39
toil/lib/aws/utils.py +287 -231
toil/lib/bioio.py +13 -5
toil/lib/compatibility.py +11 -6
toil/lib/conversions.py +104 -47
toil/lib/docker.py +131 -103
toil/lib/ec2.py +361 -199
toil/lib/ec2nodes.py +174 -106
toil/lib/encryption/_dummy.py +5 -3
toil/lib/encryption/_nacl.py +10 -6
toil/lib/encryption/conftest.py +1 -0
toil/lib/exceptions.py +26 -7
toil/lib/expando.py +5 -3
toil/lib/ftp_utils.py +217 -0
toil/lib/generatedEC2Lists.py +127 -19
toil/lib/humanize.py +6 -2
toil/lib/integration.py +341 -0
toil/lib/io.py +141 -15
toil/lib/iterables.py +4 -2
toil/lib/memoize.py +12 -8
toil/lib/misc.py +66 -21
toil/lib/objects.py +2 -2
toil/lib/resources.py +68 -15
toil/lib/retry.py +126 -81
toil/lib/threading.py +299 -82
toil/lib/throttle.py +16 -15
toil/options/common.py +843 -409
toil/options/cwl.py +175 -90
toil/options/runner.py +50 -0
toil/options/wdl.py +73 -17
toil/provisioners/__init__.py +117 -46
toil/provisioners/abstractProvisioner.py +332 -157
toil/provisioners/aws/__init__.py +70 -33
toil/provisioners/aws/awsProvisioner.py +1145 -715
toil/provisioners/clusterScaler.py +541 -279
toil/provisioners/gceProvisioner.py +282 -179
toil/provisioners/node.py +155 -79
toil/realtimeLogger.py +34 -22
toil/resource.py +137 -75
toil/server/app.py +128 -62
toil/server/celery_app.py +3 -1
toil/server/cli/wes_cwl_runner.py +82 -53
toil/server/utils.py +54 -28
toil/server/wes/abstract_backend.py +64 -26
toil/server/wes/amazon_wes_utils.py +21 -15
toil/server/wes/tasks.py +121 -63
toil/server/wes/toil_backend.py +142 -107
toil/server/wsgi_app.py +4 -3
toil/serviceManager.py +58 -22
toil/statsAndLogging.py +224 -70
toil/test/__init__.py +282 -183
toil/test/batchSystems/batchSystemTest.py +460 -210
toil/test/batchSystems/batch_system_plugin_test.py +90 -0
toil/test/batchSystems/test_gridengine.py +173 -0
toil/test/batchSystems/test_lsf_helper.py +67 -58
toil/test/batchSystems/test_slurm.py +110 -49
toil/test/cactus/__init__.py +0 -0
toil/test/cactus/test_cactus_integration.py +56 -0
toil/test/cwl/cwlTest.py +496 -287
toil/test/cwl/measure_default_memory.cwl +12 -0
toil/test/cwl/not_run_required_input.cwl +29 -0
toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
toil/test/cwl/seqtk_seq.cwl +1 -1
toil/test/docs/scriptsTest.py +69 -46
toil/test/jobStores/jobStoreTest.py +427 -264
toil/test/lib/aws/test_iam.py +118 -50
toil/test/lib/aws/test_s3.py +16 -9
toil/test/lib/aws/test_utils.py +5 -6
toil/test/lib/dockerTest.py +118 -141
toil/test/lib/test_conversions.py +113 -115
toil/test/lib/test_ec2.py +58 -50
toil/test/lib/test_integration.py +104 -0
toil/test/lib/test_misc.py +12 -5
toil/test/mesos/MesosDataStructuresTest.py +23 -10
toil/test/mesos/helloWorld.py +7 -6
toil/test/mesos/stress.py +25 -20
toil/test/options/__init__.py +13 -0
toil/test/options/options.py +42 -0
toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
toil/test/provisioners/clusterScalerTest.py +440 -250
toil/test/provisioners/clusterTest.py +166 -44
toil/test/provisioners/gceProvisionerTest.py +174 -100
toil/test/provisioners/provisionerTest.py +25 -13
toil/test/provisioners/restartScript.py +5 -4
toil/test/server/serverTest.py +188 -141
toil/test/sort/restart_sort.py +137 -68
toil/test/sort/sort.py +134 -66
toil/test/sort/sortTest.py +91 -49
toil/test/src/autoDeploymentTest.py +141 -101
toil/test/src/busTest.py +20 -18
toil/test/src/checkpointTest.py +8 -2
toil/test/src/deferredFunctionTest.py +49 -35
toil/test/src/dockerCheckTest.py +32 -24
toil/test/src/environmentTest.py +135 -0
toil/test/src/fileStoreTest.py +539 -272
toil/test/src/helloWorldTest.py +7 -4
toil/test/src/importExportFileTest.py +61 -31
toil/test/src/jobDescriptionTest.py +46 -21
toil/test/src/jobEncapsulationTest.py +2 -0
toil/test/src/jobFileStoreTest.py +74 -50
toil/test/src/jobServiceTest.py +187 -73
toil/test/src/jobTest.py +121 -71
toil/test/src/miscTests.py +19 -18
toil/test/src/promisedRequirementTest.py +82 -36
toil/test/src/promisesTest.py +7 -6
toil/test/src/realtimeLoggerTest.py +10 -6
toil/test/src/regularLogTest.py +71 -37
toil/test/src/resourceTest.py +80 -49
toil/test/src/restartDAGTest.py +36 -22
toil/test/src/resumabilityTest.py +9 -2
toil/test/src/retainTempDirTest.py +45 -14
toil/test/src/systemTest.py +12 -8
toil/test/src/threadingTest.py +44 -25
toil/test/src/toilContextManagerTest.py +10 -7
toil/test/src/userDefinedJobArgTypeTest.py +8 -5
toil/test/src/workerTest.py +73 -23
toil/test/utils/toilDebugTest.py +103 -33
toil/test/utils/toilKillTest.py +4 -5
toil/test/utils/utilsTest.py +245 -106
toil/test/wdl/wdltoil_test.py +818 -149
toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
toil/toilState.py +120 -35
toil/utils/toilConfig.py +13 -4
toil/utils/toilDebugFile.py +44 -27
toil/utils/toilDebugJob.py +214 -27
toil/utils/toilDestroyCluster.py +11 -6
toil/utils/toilKill.py +8 -3
toil/utils/toilLaunchCluster.py +256 -140
toil/utils/toilMain.py +37 -16
toil/utils/toilRsyncCluster.py +32 -14
toil/utils/toilSshCluster.py +49 -22
toil/utils/toilStats.py +356 -273
toil/utils/toilStatus.py +292 -139
toil/utils/toilUpdateEC2Instances.py +3 -1
toil/version.py +12 -12
toil/wdl/utils.py +5 -5
toil/wdl/wdltoil.py +3913 -1033
toil/worker.py +367 -184
{toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
toil-8.0.0.dist-info/METADATA +173 -0
toil-8.0.0.dist-info/RECORD +253 -0
{toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
toil-6.1.0a1.dist-info/METADATA +0 -125
toil-6.1.0a1.dist-info/RECORD +0 -237
{toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
{toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0

toil/jobStores/fileJobStore.py CHANGED Viewed

@@ -19,30 +19,29 @@ import random
 import re
 import shutil
 import stat
-import sys
 import time
 import uuid
+from collections.abc import Iterable, Iterator
 from contextlib import contextmanager
-from typing import IO, Iterable, Iterator, List, Optional, Union, overload
+from typing import IO, Literal, Optional, Union, overload
 from urllib.parse import ParseResult, quote, unquote
-if sys.version_info >= (3, 8):
-    from typing import Literal
-else:
-    from typing_extensions import Literal
 from toil.fileStores import FileID
 from toil.job import TemporaryID
-from toil.jobStores.abstractJobStore import (AbstractJobStore,
-                                             JobStoreExistsException,
-                                             NoSuchFileException,
-                                             NoSuchJobException,
-                                             NoSuchJobStoreException)
-from toil.lib.io import (AtomicFileCreate,
-                         atomic_copy,
-                         atomic_copyobj,
-                         mkdtemp,
-                         robust_rmtree)
+from toil.jobStores.abstractJobStore import (
+    AbstractJobStore,
+    JobStoreExistsException,
+    NoSuchFileException,
+    NoSuchJobException,
+    NoSuchJobStoreException,
+)
+from toil.lib.io import (
+    AtomicFileCreate,
+    atomic_copy,
+    atomic_copyobj,
+    mkdtemp,
+    robust_rmtree,
+)
 logger = logging.getLogger(__name__)
@@ -59,14 +58,19 @@ class FileJobStore(AbstractJobStore):
     # What prefix should be on the per-job job directories, to distinguish them
     # from the spray directories?
-    JOB_DIR_PREFIX = 'instance-'
+    JOB_DIR_PREFIX = "instance-"
     # What prefix do we put on the per-job-name directories we sort jobs into?
-    JOB_NAME_DIR_PREFIX = 'kind-'
+    JOB_NAME_DIR_PREFIX = "kind-"
     # 10Mb RAM chunks when reading/writing files
     BUFFER_SIZE = 10485760  # 10Mb
+    # When a log file is still being written, what will its name end with?
+    LOG_TEMP_SUFFIX = ".new"
+    # All log files start with this prefix
+    LOG_PREFIX = "stats"
     def default_caching(self) -> bool:
         """
         Jobstore's preference as to whether it likes caching or doesn't care about it.
@@ -88,47 +92,58 @@ class FileJobStore(AbstractJobStore):
         logger.debug("Path to job store directory is '%s'.", self.jobStoreDir)
         # Directory where actual job files go, and their job-associated temp files
-        self.jobsDir = os.path.join(self.jobStoreDir, 'jobs')
+        self.jobsDir = os.path.join(self.jobStoreDir, "jobs")
         # Directory where stats files go
-        self.statsDir = os.path.join(self.jobStoreDir, 'stats')
+        self.statsDir = os.path.join(self.jobStoreDir, "stats")
+        # Which has subdirectories for new and seen stats files
+        self.stats_inbox = os.path.join(self.statsDir, "inbox")
+        self.stats_archive = os.path.join(self.statsDir, "archive")
         # Directory where non-job-associated files for the file store go
-        self.filesDir = os.path.join(self.jobStoreDir, 'files/no-job')
+        self.filesDir = os.path.join(self.jobStoreDir, "files/no-job")
         # Directory where job-associated files for the file store go.
         # Each per-job directory in here will have separate directories for
         # files to clean up and files to not clean up when the job is deleted.
-        self.jobFilesDir = os.path.join(self.jobStoreDir, 'files/for-job')
+        self.jobFilesDir = os.path.join(self.jobStoreDir, "files/for-job")
         # Directory where shared files go
-        self.sharedFilesDir = os.path.join(self.jobStoreDir, 'files/shared')
+        self.sharedFilesDir = os.path.join(self.jobStoreDir, "files/shared")
         self.fanOut = fanOut
         self.linkImports = None
         self.moveExports = None
+        self.symlink_job_store_reads = None
     def __repr__(self):
-        return f'FileJobStore({self.jobStoreDir})'
+        return f"FileJobStore({self.jobStoreDir})"
     def initialize(self, config):
         try:
             os.mkdir(self.jobStoreDir)
         except OSError as e:
             if e.errno == errno.EEXIST:
-                raise JobStoreExistsException(self.jobStoreDir)
+                raise JobStoreExistsException(self.jobStoreDir, "file")
             else:
                 raise
         os.makedirs(self.jobsDir, exist_ok=True)
         os.makedirs(self.statsDir, exist_ok=True)
+        os.makedirs(self.stats_inbox, exist_ok=True)
+        os.makedirs(self.stats_archive, exist_ok=True)
         os.makedirs(self.filesDir, exist_ok=True)
         os.makedirs(self.jobFilesDir, exist_ok=True)
         os.makedirs(self.sharedFilesDir, exist_ok=True)
         self.linkImports = config.symlinkImports
         self.moveExports = config.moveOutputs
+        self.symlink_job_store_reads = config.symlink_job_store_reads
         super().initialize(config)
     def resume(self):
         if not os.path.isdir(self.jobStoreDir):
-            raise NoSuchJobStoreException(self.jobStoreDir)
+            raise NoSuchJobStoreException(self.jobStoreDir, "file")
         super().resume()
+        # TODO: Unify with initialize() configuration
+        self.linkImports = self.config.symlinkImports
+        self.moveExports = self.config.moveOutputs
+        self.symlink_job_store_reads = self.config.symlink_job_store_reads
     def destroy(self):
         if os.path.exists(self.jobStoreDir):
@@ -147,8 +162,10 @@ class FileJobStore(AbstractJobStore):
         # Make a unique temp directory under a directory for this job name,
         # possibly sprayed across multiple levels of subdirectories.
-        absJobDir = mkdtemp(prefix=self.JOB_DIR_PREFIX,
-                            dir=self._get_arbitrary_jobs_dir_for_name(usefulFilename))
+        absJobDir = mkdtemp(
+            prefix=self.JOB_DIR_PREFIX,
+            dir=self._get_arbitrary_jobs_dir_for_name(usefulFilename),
+        )
         job_description.jobStoreID = self._get_job_id_from_dir(absJobDir)
@@ -174,7 +191,9 @@ class FileJobStore(AbstractJobStore):
         Spin-wait and block for a job to appear before returning
         False if it does not.
         """
-        return self._wait_for_file(self._get_job_file_name(jobStoreID), maxTries=maxTries, sleepTime=sleepTime)
+        return self._wait_for_file(
+            self._get_job_file_name(jobStoreID), maxTries=maxTries, sleepTime=sleepTime
+        )
     def _wait_for_file(self, fileName, maxTries=35, sleepTime=1):
         """
@@ -192,14 +211,18 @@ class FileJobStore(AbstractJobStore):
         In practice, the need for retries happens rarely, but it does happen
         over the course of large workflows with a jobStore on a busy NFS.
         """
-        for iTry in range(1,maxTries+1):
+        for iTry in range(1, maxTries + 1):
             if os.path.exists(fileName):
                 return True
             if iTry >= maxTries:
                 return False
             elif iTry == 1:
-                logger.warning(("Path `{}` does not exist (yet). We will try #{} more times with {}s "
-                        "intervals.").format(fileName, maxTries - iTry, sleepTime))
+                logger.warning(
+                    (
+                        "Path `{}` does not exist (yet). We will try #{} more times with {}s "
+                        "intervals."
+                    ).format(fileName, maxTries - iTry, sleepTime)
+                )
             time.sleep(sleepTime)
         return False
@@ -210,7 +233,7 @@ class FileJobStore(AbstractJobStore):
         self._check_job_store_file_id(jobStoreFileID)
         jobStorePath = self._get_file_path_from_id(jobStoreFileID)
         if os.path.exists(jobStorePath):
-            return 'file:' + jobStorePath
+            return "file:" + jobStorePath
         else:
             raise NoSuchFileException(jobStoreFileID)
@@ -218,7 +241,7 @@ class FileJobStore(AbstractJobStore):
         jobStorePath = os.path.join(self.sharedFilesDir, sharedFileName)
         if not os.path.exists(jobStorePath):
             raise NoSuchFileException(sharedFileName)
-        return 'file:' + jobStorePath
+        return "file:" + jobStorePath
     def load_job(self, job_id):
         # If the job obviously doesn't exist, note that.
@@ -226,7 +249,7 @@ class FileJobStore(AbstractJobStore):
         # Try to load a valid version of the job.
         jobFile = self._get_job_file_name(job_id)
         try:
-            with open(jobFile, 'rb') as fileHandle:
+            with open(jobFile, "rb") as fileHandle:
                 job = pickle.load(fileHandle)
         except FileNotFoundError:
             # We were racing a delete on a non-POSIX-compliant filesystem.
@@ -248,7 +271,9 @@ class FileJobStore(AbstractJobStore):
     def update_job(self, job):
         assert job.jobStoreID is not None, f"Tried to update job {job} without an ID"
-        assert not isinstance(job.jobStoreID, TemporaryID), f"Tried to update job {job} without an assigned ID"
+        assert not isinstance(
+            job.jobStoreID, TemporaryID
+        ), f"Tried to update job {job} without an assigned ID"
         job.pre_update_hook()
@@ -261,10 +286,11 @@ class FileJobStore(AbstractJobStore):
         # The file is then moved to its correct path.
         # Atomicity guarantees use the fact the underlying file system's "move"
         # function is atomic.
-        with open(dest_filename + ".new", 'xb') as f:
+        with open(dest_filename + ".new", "xb") as f:
             pickle.dump(job, f)
         # This should be atomic for the file system
         os.rename(dest_filename + ".new", dest_filename)
     def delete_job(self, job_id):
         # The jobStoreID is the relative path to the directory containing the job,
         # removing this directory deletes the job.
@@ -296,48 +322,50 @@ class FileJobStore(AbstractJobStore):
     # Functions that deal with temporary files associated with jobs
     ##########################################
-    @contextmanager
-    def optional_hard_copy(self, hardlink):
-        if hardlink:
-            saved = self.linkImports
-            self.linkImports = False
-        yield
-        if hardlink:
-            self.linkImports = saved
-    def _copy_or_link(self, src_path, dst_path, symlink=False):
+    def _copy_or_link(self, src_path, dst_path, hardlink=False, symlink=False):
         # linking is not done be default because of issue #1755
-        srcPath = self._extract_path_from_url(src_path)
-        if self.linkImports and symlink:
-            os.symlink(os.path.realpath(srcPath), dst_path)
+        # TODO: is hardlinking ever actually done?
+        src_path = self._extract_path_from_url(src_path)
+        if self.linkImports and not hardlink and symlink:
+            os.symlink(os.path.realpath(src_path), dst_path)
         else:
-            atomic_copy(srcPath, dst_path)
-    def _import_file(self, otherCls, uri, shared_file_name=None, hardlink=False, symlink=True):
-        # symlink argument says whether the caller can take symlinks or not
-        # ex: if false, it implies the workflow cannot work with symlinks and thus will hardlink imports
+            atomic_copy(src_path, dst_path)
+    def _import_file(
+        self, otherCls, uri, shared_file_name=None, hardlink=False, symlink=True
+    ):
+        # symlink argument says whether the caller can take symlinks or not.
+        # ex: if false, it means the workflow cannot work with symlinks and we need to hardlink or copy.
+        # TODO: Do we ever actually hardlink?
         # default is true since symlinking everything is ideal
         uri_path = unquote(uri.path)
         if issubclass(otherCls, FileJobStore):
             if os.path.isdir(uri_path):
                 # Don't allow directories (unless someone is racing us)
-                raise IsADirectoryError(f"URI {uri} points to a directory but a file was expected")
+                raise IsADirectoryError(
+                    f"URI {uri} points to a directory but a file was expected"
+                )
             if shared_file_name is None:
                 executable = os.stat(uri_path).st_mode & stat.S_IXUSR != 0
-                absPath = self._get_unique_file_path(uri_path)  # use this to get a valid path to write to in job store
-                with self.optional_hard_copy(hardlink):
-                    self._copy_or_link(uri, absPath, symlink=symlink)
+                # use this to get a valid path to write to in job store
+                absPath = self._get_unique_file_path(uri_path)
+                self._copy_or_link(uri, absPath, hardlink=hardlink, symlink=symlink)
                 # TODO: os.stat(absPath).st_size consistently gives values lower than
                 # getDirSizeRecursively()
-                return FileID(self._get_file_id_from_path(absPath), os.stat(absPath).st_size, executable)
+                return FileID(
+                    self._get_file_id_from_path(absPath),
+                    os.stat(absPath).st_size,
+                    executable,
+                )
             else:
                 self._requireValidSharedFileName(shared_file_name)
                 path = self._get_shared_file_path(shared_file_name)
-                with self.optional_hard_copy(hardlink):
-                    self._copy_or_link(uri, path, symlink=symlink)
+                self._copy_or_link(uri, path, hardlink=hardlink, symlink=symlink)
                 return None
         else:
-            return super()._import_file(otherCls, uri, shared_file_name=shared_file_name)
+            return super()._import_file(
+                otherCls, uri, shared_file_name=shared_file_name
+            )
     def _export_file(self, otherCls, file_id, uri):
         if issubclass(otherCls, FileJobStore):
@@ -346,7 +374,7 @@ class FileJobStore(AbstractJobStore):
             # Make sure we don't need to worry about directories when exporting
             # to local files, just like for cloud storage.
             os.makedirs(os.path.dirname(destPath), exist_ok=True)
-            executable = getattr(file_id, 'executable', False)
+            executable = getattr(file_id, "executable", False)
             if self.moveExports:
                 self._move_and_linkback(srcPath, destPath, executable=executable)
             else:
@@ -355,7 +383,11 @@ class FileJobStore(AbstractJobStore):
             super()._default_export_file(otherCls, file_id, uri)
     def _move_and_linkback(self, srcPath, destPath, executable):
-        logger.debug("moveExports option, Moving src=%s to dest=%s ; then symlinking dest to src", srcPath, destPath)
+        logger.debug(
+            "moveExports option, Moving src=%s to dest=%s ; then symlinking dest to src",
+            srcPath,
+            destPath,
+        )
         shutil.move(srcPath, destPath)
         os.symlink(destPath, srcPath)
         if executable:
@@ -391,7 +423,7 @@ class FileJobStore(AbstractJobStore):
         """
         Open a file URL as a binary stream.
         """
-        return open(cls._extract_path_from_url(url), 'rb')
+        return open(cls._extract_path_from_url(url), "rb")
     @classmethod
     def _write_to_url(cls, readable, url, executable=False):
@@ -403,20 +435,24 @@ class FileJobStore(AbstractJobStore):
         :param object readable: An open file object to read from.
         """
         # we use a ~10Mb buffer to improve speed
-        atomic_copyobj(readable,
-                       cls._extract_path_from_url(url),
-                       length=cls.BUFFER_SIZE,
-                       executable=executable)
+        atomic_copyobj(
+            readable,
+            cls._extract_path_from_url(url),
+            length=cls.BUFFER_SIZE,
+            executable=executable,
+        )
     @classmethod
-    def _list_url(cls, url: ParseResult) -> List[str]:
+    def _list_url(cls, url: ParseResult) -> list[str]:
         path = cls._extract_path_from_url(url)
         listing = []
         for p in os.listdir(path):
             # We know there are no slashes in these
             component = quote(p)
             # Return directories with trailing slashes and files without
-            listing.append((component + '/') if os.path.isdir(os.path.join(path, p)) else component)
+            listing.append(
+                (component + "/") if os.path.isdir(os.path.join(path, p)) else component
+            )
         return listing
     @classmethod
@@ -429,13 +465,13 @@ class FileJobStore(AbstractJobStore):
         """
         :return: local file path of file pointed at by the given URL
         """
-        if url.netloc != '' and url.netloc != 'localhost':
+        if url.netloc != "" and url.netloc != "localhost":
             raise RuntimeError("The URL '%s' is invalid" % url.geturl())
         return unquote(url.path)
     @classmethod
     def _supports_url(cls, url, export=False):
-        return url.scheme.lower() == 'file'
+        return url.scheme.lower() == "file"
     def _make_string_filename_safe(self, arbitraryString, maxLength=240):
         """
@@ -464,7 +500,7 @@ class FileJobStore(AbstractJobStore):
             parts.append("UNPRINTABLE")
         # Glue it all together, and truncate to length
-        return '_'.join(parts)[:maxLength]
+        return "_".join(parts)[:maxLength]
     def write_file(self, local_path, job_id=None, cleanup=False):
         absPath = self._get_unique_file_path(local_path, job_id, cleanup)
@@ -473,20 +509,30 @@ class FileJobStore(AbstractJobStore):
         return relPath
     @contextmanager
-    def write_file_stream(self, job_id=None, cleanup=False, basename=None, encoding=None, errors=None):
+    def write_file_stream(
+        self, job_id=None, cleanup=False, basename=None, encoding=None, errors=None
+    ):
         if not basename:
-            basename = 'stream'
+            basename = "stream"
         absPath = self._get_unique_file_path(basename, job_id, cleanup)
         relPath = self._get_file_id_from_path(absPath)
-        with open(absPath, 'wb' if encoding == None else 'wt', encoding=encoding, errors=errors) as f:
+        with open(
+            absPath,
+            "wb" if encoding == None else "wt",
+            encoding=encoding,
+            errors=errors,
+        ) as f:
             # Don't yield while holding an open file descriptor to the temp
             # file. That can result in temp files still being open when we try
             # to clean ourselves up, somehow, for certain workloads.
             yield f, relPath
     def get_empty_file_store_id(self, jobStoreID=None, cleanup=False, basename=None):
-        with self.write_file_stream(jobStoreID, cleanup, basename) as (fileHandle, jobStoreFileID):
+        with self.write_file_stream(jobStoreID, cleanup, basename) as (
+            fileHandle,
+            jobStoreFileID,
+        ):
             return jobStoreFileID
     def update_file(self, file_id, local_path):
@@ -503,20 +549,23 @@ class FileJobStore(AbstractJobStore):
         self._check_job_store_file_id(file_id)
         jobStoreFilePath = self._get_file_path_from_id(file_id)
         localDirPath = os.path.dirname(local_path)
-        executable = getattr(file_id, 'executable', False)
+        executable = getattr(file_id, "executable", False)
         if not symlink and os.path.islink(local_path):
             # We had a symlink and want to clobber it with a hardlink or copy.
             os.unlink(local_path)
-        if os.path.exists(local_path) and os.path.samefile(jobStoreFilePath, local_path):
+        if os.path.exists(local_path) and os.path.samefile(
+            jobStoreFilePath, local_path
+        ):
             # The files are already the same: same name, hardlinked, or
             # symlinked. There is nothing to do, and trying to shutil.copyfile
             # one over the other will fail.
             return
-        if symlink:
-            # If the reader will accept a symlink, so always give them one.
+        if symlink and self.symlink_job_store_reads:
+            # If the reader will accept a symlink, and we are willing to
+            # symlink into the jobstore, always give them one.
             # There's less that can go wrong.
             try:
                 os.symlink(jobStoreFilePath, local_path)
@@ -537,7 +586,9 @@ class FileJobStore(AbstractJobStore):
                     # In this case, we try to make a hard link.
                     pass
                 else:
-                    logger.error(f"Unexpected OSError when reading file '{jobStoreFilePath}' from job store")
+                    logger.error(
+                        f"Unexpected OSError when reading file '{jobStoreFilePath}' from job store"
+                    )
                     raise
         # If we get here, symlinking isn't an option.
@@ -581,7 +632,9 @@ class FileJobStore(AbstractJobStore):
                     # hit the file copy case.
                     pass
                 else:
-                    logger.error(f"Unexpected OSError when reading file '{jobStoreFilePath}' from job store")
+                    logger.error(
+                        f"Unexpected OSError when reading file '{jobStoreFilePath}' from job store"
+                    )
                     raise
         # If we get here, neither a symlink nor a hardlink will work.
@@ -596,15 +649,17 @@ class FileJobStore(AbstractJobStore):
     def file_exists(self, file_id):
         absPath = self._get_file_path_from_id(file_id)
-        if (not absPath.startswith(self.jobsDir) and
-            not absPath.startswith(self.filesDir) and
-            not absPath.startswith(self.jobFilesDir)):
+        if (
+            not absPath.startswith(self.jobsDir)
+            and not absPath.startswith(self.filesDir)
+            and not absPath.startswith(self.jobFilesDir)
+        ):
             # Don't even look for it, it is out of bounds.
             raise NoSuchFileException(file_id)
         try:
             st = os.stat(absPath)
-        except os.error:
+        except OSError:
             return False
         if not stat.S_ISREG(st.st_mode):
             raise NoSuchFileException(file_id)
@@ -614,15 +669,17 @@ class FileJobStore(AbstractJobStore):
         # Duplicate a bunch of fileExists to save on stat calls
         absPath = self._get_file_path_from_id(file_id)
-        if (not absPath.startswith(self.jobsDir) and
-            not absPath.startswith(self.filesDir) and
-            not absPath.startswith(self.jobFilesDir)):
+        if (
+            not absPath.startswith(self.jobsDir)
+            and not absPath.startswith(self.filesDir)
+            and not absPath.startswith(self.jobFilesDir)
+        ):
             # Don't even look for it, it is out of bounds.
             raise NoSuchFileException(file_id)
         try:
             st = os.stat(absPath)
-        except os.error:
+        except OSError:
             return 0
         return st.st_size
@@ -632,7 +689,12 @@ class FileJobStore(AbstractJobStore):
         # File objects are context managers (CM) so we could simply return what open returns.
         # However, it is better to wrap it in another CM so as to prevent users from accessing
         # the file object directly, without a with statement.
-        with open(self._get_file_path_from_id(file_id), 'wb' if encoding == None else 'wt', encoding=encoding, errors=errors) as f:
+        with open(
+            self._get_file_path_from_id(file_id),
+            "wb" if encoding == None else "wt",
+            encoding=encoding,
+            errors=errors,
+        ) as f:
             yield f
     @contextmanager
@@ -642,15 +704,13 @@ class FileJobStore(AbstractJobStore):
         file_id: Union[str, FileID],
         encoding: Literal[None] = None,
         errors: Optional[str] = None,
-    ) -> Iterator[IO[bytes]]:
-        ...
+    ) -> Iterator[IO[bytes]]: ...
     @contextmanager
     @overload
     def read_file_stream(
         self, file_id: Union[str, FileID], encoding: str, errors: Optional[str] = None
-    ) -> Iterator[IO[str]]:
-        ...
+    ) -> Iterator[IO[str]]: ...
     @contextmanager
     @overload
@@ -659,8 +719,7 @@ class FileJobStore(AbstractJobStore):
         file_id: Union[str, FileID],
         encoding: Optional[str] = None,
         errors: Optional[str] = None,
-    ) -> Union[Iterator[IO[bytes]], Iterator[IO[str]]]:
-        ...
+    ) -> Union[Iterator[IO[bytes]], Iterator[IO[str]]]: ...
     @contextmanager
     def read_file_stream(
@@ -696,18 +755,32 @@ class FileJobStore(AbstractJobStore):
         return os.path.join(self.sharedFilesDir, sharedFileName)
     @contextmanager
-    def write_shared_file_stream(self, shared_file_name, encrypted=None, encoding=None, errors=None):
+    def write_shared_file_stream(
+        self, shared_file_name, encrypted=None, encoding=None, errors=None
+    ):
         # the isProtected parameter has no effect on the fileStore
         self._requireValidSharedFileName(shared_file_name)
-        with AtomicFileCreate(self._get_shared_file_path(shared_file_name)) as tmpSharedFilePath:
-            with open(tmpSharedFilePath, 'wb' if encoding == None else 'wt', encoding=encoding, errors=None) as f:
+        with AtomicFileCreate(
+            self._get_shared_file_path(shared_file_name)
+        ) as tmpSharedFilePath:
+            with open(
+                tmpSharedFilePath,
+                "wb" if encoding == None else "wt",
+                encoding=encoding,
+                errors=None,
+            ) as f:
                 yield f
     @contextmanager
     def read_shared_file_stream(self, shared_file_name, encoding=None, errors=None):
         self._requireValidSharedFileName(shared_file_name)
         try:
-            with open(self._get_shared_file_path(shared_file_name), 'rb' if encoding == None else 'rt', encoding=encoding, errors=errors) as f:
+            with open(
+                self._get_shared_file_path(shared_file_name),
+                "rb" if encoding == None else "rt",
+                encoding=encoding,
+                errors=errors,
+            ) as f:
                 yield f
         except OSError as e:
@@ -745,15 +818,11 @@ class FileJobStore(AbstractJobStore):
                     job_id = self._get_job_id_from_files_dir(job_instance_dir)
                     jobs.append(job_id)
-            for name in os.listdir(self.sharedFilesDir):
-                # Announce all the shared files
-                yield name
+            yield from os.listdir(self.sharedFilesDir)
             for file_dir_path in self._list_dynamic_spray_dir(self.filesDir):
                 # Run on all the no-job files
-                for dir_file in os.listdir(file_dir_path):
-                    # There ought to be just one file in here.
-                    yield dir_file
+                yield from os.listdir(file_dir_path)
         for job_store_id in jobs:
             # Files from _get_job_files_dir
@@ -765,9 +834,7 @@ class FileJobStore(AbstractJobStore):
                         # Except the cleanup directory which we do later.
                         continue
                     file_dir_path = os.path.join(job_files_dir, file_dir)
-                    for dir_file in os.listdir(file_dir_path):
-                        # There ought to be just one file in here.
-                        yield dir_file
+                    yield from os.listdir(file_dir_path)
                 # Files from _get_job_files_cleanup_dir
                 job_cleanup_files_dir = os.path.join(job_files_dir, "cleanup")
@@ -775,35 +842,70 @@ class FileJobStore(AbstractJobStore):
                     for file_dir in os.listdir(job_cleanup_files_dir):
                         # Each file is in its own directory
                         file_dir_path = os.path.join(job_cleanup_files_dir, file_dir)
-                        for dir_file in os.listdir(file_dir_path):
-                            # There ought to be just one file in here.
-                            yield dir_file
+                        yield from os.listdir(file_dir_path)
     def write_logs(self, msg):
         # Temporary files are placed in the stats directory tree
-        tempStatsFileName = "stats" + str(uuid.uuid4().hex) + ".new"
-        tempStatsFile = os.path.join(self._get_arbitrary_stats_dir(), tempStatsFileName)
-        writeFormat = 'w' if isinstance(msg, str) else 'wb'
+        tempStatsFileName = self.LOG_PREFIX + str(uuid.uuid4().hex) + self.LOG_TEMP_SUFFIX
+        tempStatsFile = os.path.join(self._get_arbitrary_stats_inbox_dir(), tempStatsFileName)
+        writeFormat = "w" if isinstance(msg, str) else "wb"
         with open(tempStatsFile, writeFormat) as f:
             f.write(msg)
-        os.rename(tempStatsFile, tempStatsFile[:-4])  # This operation is atomic
+        os.rename(tempStatsFile, tempStatsFile[:-len(self.LOG_TEMP_SUFFIX)])  # This operation is atomic
     def read_logs(self, callback, read_all=False):
-        numberOfFilesProcessed = 0
-        for tempDir in self._stats_directories():
-            for tempFile in os.listdir(tempDir):
-                if tempFile.startswith('stats'):
-                    absTempFile = os.path.join(tempDir, tempFile)
-                    if os.path.isfile(absTempFile):
-                        if read_all or not tempFile.endswith('.new'):
-                            with open(absTempFile, 'rb') as fH:
-                                callback(fH)
-                            numberOfFilesProcessed += 1
-                            newName = tempFile.rsplit('.', 1)[0] + '.new'
-                            newAbsTempFile = os.path.join(tempDir, newName)
+        files_processed = 0
+        # Holds pairs of a function to call to get directories to look at, and
+        # a flag for whether to archive the files found.
+        queries = []
+        if read_all:
+            # If looking at all logs, check the archive
+            queries.append((self._stats_archive_directories, False))
+        # Always check the inbox and archive from it. But do it after checking
+        # the archive to avoid duplicates in the same pass.
+        queries.append((self._stats_inbox_directories, True))
+        for to_call, should_archive in queries:
+            for log_dir in to_call():
+                for log_file in os.listdir(log_dir):
+                    if not log_file.startswith(self.LOG_PREFIX):
+                        # Skip anything not a log file (like the other spray
+                        # directories)
+                        continue
+                    if log_file.endswith(self.LOG_TEMP_SUFFIX):
+                        # Skip partially-written files, always.
+                        continue
+                    abs_log_file = os.path.join(log_dir, log_file)
+                    if not os.path.isfile(abs_log_file):
+                        # This can't be a log file.
+                        continue
+                    try:
+                        opened_file = open(abs_log_file, "rb")
+                    except FileNotFoundError:
+                        # File disappeared before we could open it.
+                        # Maybe someone else is reading logs?
+                        continue
+                    with opened_file as f:
+                        callback(f)
+                    files_processed += 1
+                    if should_archive:
+                        # We need to move the stats file to the archive.
+                        # Since we have UUID stats file names we don't need
+                        # to worry about collisions when it gets there.
+                        new_dir = self._get_arbitrary_stats_archive_dir()
+                        new_abs_log_file = os.path.join(new_dir, log_file)
+                        try:
                             # Mark this item as read
-                            os.rename(absTempFile, newAbsTempFile)
-        return numberOfFilesProcessed
+                            os.rename(abs_log_file, new_abs_log_file)
+                        except FileNotFoundError:
+                            # File we wanted to archive disappeared.
+                            # Maybe someone else is reading logs?
+                            # TODO: Raise ConcurrentFileModificationException?
+                            continue
+        return files_processed
     ##########################################
     # Private methods
@@ -824,14 +926,14 @@ class FileJobStore(AbstractJobStore):
         :param str absPath: The absolute path to a job directory under self.jobsDir which represents a job.
         :rtype : string, string is the job ID, which is a path relative to self.jobsDir
         """
-        return absPath[len(self.jobsDir)+1:]
+        return absPath[len(self.jobsDir) + 1 :]
     def _get_job_id_from_files_dir(self, absPath: str) -> str:
         """
         :param str absPath: The absolute path to a job directory under self.jobFilesDir which holds a job's files.
         :rtype : string, string is the job ID
         """
-        return absPath[len(self.jobFilesDir)+1:]
+        return absPath[len(self.jobFilesDir) + 1 :]
     def _get_job_file_name(self, jobStoreID):
         """
@@ -913,14 +1015,14 @@ class FileJobStore(AbstractJobStore):
         :rtype : string, string is the file ID.
         """
-        return quote(absPath[len(self.jobStoreDir)+1:])
+        return quote(absPath[len(self.jobStoreDir) + 1 :])
     def _check_job_store_file_id(self, jobStoreFileID):
         """
         :raise NoSuchFileException: if the file with ID jobStoreFileID does
                                     not exist or is not a file
         """
-        if not self.file_exists(unquote(jobStoreFileID)):
+        if not self.file_exists(jobStoreFileID):
             raise NoSuchFileException(jobStoreFileID)
     def _get_arbitrary_jobs_dir_for_name(self, jobNameSlug):
@@ -943,15 +1045,35 @@ class FileJobStore(AbstractJobStore):
         if len(os.listdir(self.jobsDir)) > self.fanOut:
             # Make sure that we don't over-fill the root with too many unique job names.
             # Go in a subdirectory tree, and then go by job name and make another tree.
-            return self._get_dynamic_spray_dir(os.path.join(self._get_dynamic_spray_dir(self.jobsDir),
-                                                            self.JOB_NAME_DIR_PREFIX + jobNameSlug))
+            return self._get_dynamic_spray_dir(
+                os.path.join(
+                    self._get_dynamic_spray_dir(self.jobsDir),
+                    self.JOB_NAME_DIR_PREFIX + jobNameSlug,
+                )
+            )
         else:
             # Just go in the root
-            return self._get_dynamic_spray_dir(os.path.join(self.jobsDir, self.JOB_NAME_DIR_PREFIX + jobNameSlug))
+            return self._get_dynamic_spray_dir(
+                os.path.join(self.jobsDir, self.JOB_NAME_DIR_PREFIX + jobNameSlug)
+            )
+    def _get_arbitrary_stats_inbox_dir(self):
+        """
+        Gets a temporary directory in a multi-level hierarchy in
+        self.stats_inbox, where stats files not yet seen by the leader live.
+        The directory is not unique and may already have other stats files in it.
+        :rtype : string, path to temporary directory in which to place files/directories.
-    def _get_arbitrary_stats_dir(self):
+        """
+        return self._get_dynamic_spray_dir(self.stats_inbox)
+    def _get_arbitrary_stats_archive_dir(self):
         """
-        Gets a temporary directory in a multi-level hierarchy in self.statsDir.
+        Gets a temporary directory in a multi-level hierarchy in
+        self.stats_archive, where stats files already seen by the leader live.
         The directory is not unique and may already have other stats files in it.
         :rtype : string, path to temporary directory in which to place files/directories.
@@ -959,7 +1081,7 @@ class FileJobStore(AbstractJobStore):
         """
-        return self._get_dynamic_spray_dir(self.statsDir)
+        return self._get_dynamic_spray_dir(self.stats_archive)
     def _get_arbitrary_files_dir(self):
         """
@@ -1091,17 +1213,27 @@ class FileJobStore(AbstractJobStore):
                     continue
                 # Now we have only the directories that are named after jobs. Look inside them.
-                yield from self._walk_dynamic_spray_dir(os.path.join(jobHoldingDir, jobNameDir))
+                yield from self._walk_dynamic_spray_dir(
+                    os.path.join(jobHoldingDir, jobNameDir)
+                )
+    def _stats_inbox_directories(self):
+        """
+        :returns: an iterator to the temporary directories containing new stats
+            files. They may also contain directories containing more stats
+            files.
+        """
-    def _stats_directories(self):
+        return self._walk_dynamic_spray_dir(self.stats_inbox)
+    def _stats_archive_directories(self):
         """
-        :rtype : an iterator to the temporary directories containing stats
-                 files. They may also contain directories containing more
-                 stats files.
+        :returns: an iterator to the temporary directories containing
+            previously observed stats files. They may also contain directories
+            containing more stats files.
         """
-        return self._walk_dynamic_spray_dir(self.statsDir)
+        return self._walk_dynamic_spray_dir(self.stats_archive)
     def _get_unique_file_path(self, fileName, jobStoreID=None, cleanup=False):
         """
@@ -1142,18 +1274,24 @@ class FileJobStore(AbstractJobStore):
             self._check_job_store_id_assigned(jobStoreID)
             # Find where all its created files should live, depending on if
             # they need to go away when the job is deleted or not.
-            jobFilesDir = self._get_job_files_dir(jobStoreID) if not cleanup else self._get_job_files_cleanup_dir(jobStoreID)
+            jobFilesDir = (
+                self._get_job_files_dir(jobStoreID)
+                if not cleanup
+                else self._get_job_files_cleanup_dir(jobStoreID)
+            )
             # Lazily create the parent directory.
             # We don't want our tree filled with confusingly empty directories.
             os.makedirs(jobFilesDir, exist_ok=True)
             # Then make a temp directory inside it
-            filesDir = os.path.join(jobFilesDir, 'file-' + uuid.uuid4().hex)
+            filesDir = os.path.join(jobFilesDir, "file-" + uuid.uuid4().hex)
             os.mkdir(filesDir)
             return filesDir
         else:
             # Make a temporary file within the non-job-associated files hierarchy
-            filesDir = os.path.join(self._get_arbitrary_files_dir(), 'file-' + uuid.uuid4().hex)
+            filesDir = os.path.join(
+                self._get_arbitrary_files_dir(), "file-" + uuid.uuid4().hex
+            )
             os.mkdir(filesDir)
             return filesDir

toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

toil 6.1.0a1py3-none-any.whl → 8.0.0py3-none-any.whl