toil 5.12.0__py3-none-any.whl → 6.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +18 -13
- toil/batchSystems/abstractBatchSystem.py +21 -10
- toil/batchSystems/abstractGridEngineBatchSystem.py +2 -2
- toil/batchSystems/awsBatch.py +14 -14
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/htcondor.py +0 -1
- toil/batchSystems/kubernetes.py +34 -31
- toil/batchSystems/local_support.py +3 -1
- toil/batchSystems/mesos/batchSystem.py +7 -7
- toil/batchSystems/options.py +32 -83
- toil/batchSystems/registry.py +104 -23
- toil/batchSystems/singleMachine.py +16 -13
- toil/batchSystems/slurm.py +3 -3
- toil/batchSystems/torque.py +0 -1
- toil/bus.py +6 -8
- toil/common.py +532 -743
- toil/cwl/__init__.py +28 -32
- toil/cwl/cwltoil.py +523 -520
- toil/cwl/utils.py +55 -10
- toil/fileStores/__init__.py +2 -2
- toil/fileStores/abstractFileStore.py +36 -11
- toil/fileStores/cachingFileStore.py +607 -530
- toil/fileStores/nonCachingFileStore.py +43 -10
- toil/job.py +140 -75
- toil/jobStores/abstractJobStore.py +147 -79
- toil/jobStores/aws/jobStore.py +23 -9
- toil/jobStores/aws/utils.py +1 -2
- toil/jobStores/fileJobStore.py +117 -19
- toil/jobStores/googleJobStore.py +16 -7
- toil/jobStores/utils.py +5 -6
- toil/leader.py +71 -43
- toil/lib/accelerators.py +10 -5
- toil/lib/aws/__init__.py +3 -14
- toil/lib/aws/ami.py +22 -9
- toil/lib/aws/iam.py +21 -13
- toil/lib/aws/session.py +2 -16
- toil/lib/aws/utils.py +4 -5
- toil/lib/compatibility.py +1 -1
- toil/lib/conversions.py +7 -3
- toil/lib/docker.py +22 -23
- toil/lib/ec2.py +10 -6
- toil/lib/ec2nodes.py +106 -100
- toil/lib/encryption/_nacl.py +2 -1
- toil/lib/generatedEC2Lists.py +325 -18
- toil/lib/io.py +21 -0
- toil/lib/misc.py +1 -1
- toil/lib/resources.py +1 -1
- toil/lib/threading.py +74 -26
- toil/options/common.py +738 -0
- toil/options/cwl.py +336 -0
- toil/options/wdl.py +32 -0
- toil/provisioners/abstractProvisioner.py +1 -4
- toil/provisioners/aws/__init__.py +3 -6
- toil/provisioners/aws/awsProvisioner.py +6 -0
- toil/provisioners/clusterScaler.py +3 -2
- toil/provisioners/gceProvisioner.py +2 -2
- toil/realtimeLogger.py +2 -1
- toil/resource.py +24 -18
- toil/server/app.py +2 -3
- toil/server/cli/wes_cwl_runner.py +4 -4
- toil/server/utils.py +1 -1
- toil/server/wes/abstract_backend.py +3 -2
- toil/server/wes/amazon_wes_utils.py +5 -4
- toil/server/wes/tasks.py +2 -3
- toil/server/wes/toil_backend.py +2 -10
- toil/server/wsgi_app.py +2 -0
- toil/serviceManager.py +12 -10
- toil/statsAndLogging.py +5 -1
- toil/test/__init__.py +29 -54
- toil/test/batchSystems/batchSystemTest.py +11 -111
- toil/test/batchSystems/test_slurm.py +3 -2
- toil/test/cwl/cwlTest.py +213 -90
- toil/test/cwl/glob_dir.cwl +15 -0
- toil/test/cwl/preemptible.cwl +21 -0
- toil/test/cwl/preemptible_expression.cwl +28 -0
- toil/test/cwl/revsort.cwl +1 -1
- toil/test/cwl/revsort2.cwl +1 -1
- toil/test/docs/scriptsTest.py +0 -1
- toil/test/jobStores/jobStoreTest.py +27 -16
- toil/test/lib/aws/test_iam.py +4 -14
- toil/test/lib/aws/test_utils.py +0 -3
- toil/test/lib/dockerTest.py +4 -4
- toil/test/lib/test_ec2.py +11 -16
- toil/test/mesos/helloWorld.py +4 -5
- toil/test/mesos/stress.py +1 -1
- toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
- toil/test/provisioners/clusterScalerTest.py +6 -4
- toil/test/provisioners/clusterTest.py +14 -3
- toil/test/provisioners/gceProvisionerTest.py +0 -6
- toil/test/provisioners/restartScript.py +3 -2
- toil/test/server/serverTest.py +1 -1
- toil/test/sort/restart_sort.py +2 -1
- toil/test/sort/sort.py +2 -1
- toil/test/sort/sortTest.py +2 -13
- toil/test/src/autoDeploymentTest.py +45 -45
- toil/test/src/busTest.py +5 -5
- toil/test/src/checkpointTest.py +2 -2
- toil/test/src/deferredFunctionTest.py +1 -1
- toil/test/src/fileStoreTest.py +32 -16
- toil/test/src/helloWorldTest.py +1 -1
- toil/test/src/importExportFileTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +2 -1
- toil/test/src/jobServiceTest.py +1 -1
- toil/test/src/jobTest.py +18 -18
- toil/test/src/miscTests.py +5 -3
- toil/test/src/promisedRequirementTest.py +3 -3
- toil/test/src/realtimeLoggerTest.py +1 -1
- toil/test/src/resourceTest.py +2 -2
- toil/test/src/restartDAGTest.py +1 -1
- toil/test/src/resumabilityTest.py +36 -2
- toil/test/src/retainTempDirTest.py +1 -1
- toil/test/src/systemTest.py +2 -2
- toil/test/src/toilContextManagerTest.py +2 -2
- toil/test/src/userDefinedJobArgTypeTest.py +1 -1
- toil/test/utils/toilDebugTest.py +98 -32
- toil/test/utils/toilKillTest.py +2 -2
- toil/test/utils/utilsTest.py +20 -0
- toil/test/wdl/wdltoil_test.py +148 -45
- toil/toilState.py +7 -6
- toil/utils/toilClean.py +1 -1
- toil/utils/toilConfig.py +36 -0
- toil/utils/toilDebugFile.py +60 -33
- toil/utils/toilDebugJob.py +39 -12
- toil/utils/toilDestroyCluster.py +1 -1
- toil/utils/toilKill.py +1 -1
- toil/utils/toilLaunchCluster.py +13 -2
- toil/utils/toilMain.py +3 -2
- toil/utils/toilRsyncCluster.py +1 -1
- toil/utils/toilSshCluster.py +1 -1
- toil/utils/toilStats.py +240 -143
- toil/utils/toilStatus.py +1 -4
- toil/version.py +11 -11
- toil/wdl/utils.py +2 -122
- toil/wdl/wdltoil.py +999 -386
- toil/worker.py +25 -31
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/METADATA +60 -53
- toil-6.1.0a1.dist-info/RECORD +237 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/WHEEL +1 -1
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/entry_points.txt +0 -1
- toil/batchSystems/parasol.py +0 -379
- toil/batchSystems/tes.py +0 -459
- toil/test/batchSystems/parasolTestSupport.py +0 -117
- toil/test/wdl/builtinTest.py +0 -506
- toil/test/wdl/conftest.py +0 -23
- toil/test/wdl/toilwdlTest.py +0 -522
- toil/wdl/toilwdl.py +0 -141
- toil/wdl/versions/dev.py +0 -107
- toil/wdl/versions/draft2.py +0 -980
- toil/wdl/versions/v1.py +0 -794
- toil/wdl/wdl_analysis.py +0 -116
- toil/wdl/wdl_functions.py +0 -997
- toil/wdl/wdl_synthesis.py +0 -1011
- toil/wdl/wdl_types.py +0 -243
- toil-5.12.0.dist-info/RECORD +0 -244
- /toil/{wdl/versions → options}/__init__.py +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/LICENSE +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/top_level.txt +0 -0
toil/jobStores/fileJobStore.py
CHANGED
|
@@ -20,11 +20,10 @@ import re
|
|
|
20
20
|
import shutil
|
|
21
21
|
import stat
|
|
22
22
|
import sys
|
|
23
|
-
import tempfile
|
|
24
23
|
import time
|
|
25
24
|
import uuid
|
|
26
25
|
from contextlib import contextmanager
|
|
27
|
-
from typing import IO, Iterator, List, Optional, Union, overload
|
|
26
|
+
from typing import IO, Iterable, Iterator, List, Optional, Union, overload
|
|
28
27
|
from urllib.parse import ParseResult, quote, unquote
|
|
29
28
|
|
|
30
29
|
if sys.version_info >= (3, 8):
|
|
@@ -42,6 +41,7 @@ from toil.jobStores.abstractJobStore import (AbstractJobStore,
|
|
|
42
41
|
from toil.lib.io import (AtomicFileCreate,
|
|
43
42
|
atomic_copy,
|
|
44
43
|
atomic_copyobj,
|
|
44
|
+
mkdtemp,
|
|
45
45
|
robust_rmtree)
|
|
46
46
|
|
|
47
47
|
logger = logging.getLogger(__name__)
|
|
@@ -121,8 +121,8 @@ class FileJobStore(AbstractJobStore):
|
|
|
121
121
|
os.makedirs(self.filesDir, exist_ok=True)
|
|
122
122
|
os.makedirs(self.jobFilesDir, exist_ok=True)
|
|
123
123
|
os.makedirs(self.sharedFilesDir, exist_ok=True)
|
|
124
|
-
self.linkImports = config.
|
|
125
|
-
self.moveExports = config.
|
|
124
|
+
self.linkImports = config.symlinkImports
|
|
125
|
+
self.moveExports = config.moveOutputs
|
|
126
126
|
super().initialize(config)
|
|
127
127
|
|
|
128
128
|
def resume(self):
|
|
@@ -147,8 +147,8 @@ class FileJobStore(AbstractJobStore):
|
|
|
147
147
|
|
|
148
148
|
# Make a unique temp directory under a directory for this job name,
|
|
149
149
|
# possibly sprayed across multiple levels of subdirectories.
|
|
150
|
-
absJobDir =
|
|
151
|
-
|
|
150
|
+
absJobDir = mkdtemp(prefix=self.JOB_DIR_PREFIX,
|
|
151
|
+
dir=self._get_arbitrary_jobs_dir_for_name(usefulFilename))
|
|
152
152
|
|
|
153
153
|
job_description.jobStoreID = self._get_job_id_from_dir(absJobDir)
|
|
154
154
|
|
|
@@ -252,18 +252,19 @@ class FileJobStore(AbstractJobStore):
|
|
|
252
252
|
|
|
253
253
|
job.pre_update_hook()
|
|
254
254
|
|
|
255
|
+
dest_filename = self._get_job_file_name(job.jobStoreID)
|
|
256
|
+
|
|
255
257
|
# The job is serialised to a file suffixed by ".new"
|
|
256
258
|
# We insist on creating the file; an existing .new file indicates
|
|
257
259
|
# multiple simultaneous attempts to update the job, which will lose
|
|
258
260
|
# updates.
|
|
259
261
|
# The file is then moved to its correct path.
|
|
260
|
-
# Atomicity guarantees use the fact the underlying file
|
|
262
|
+
# Atomicity guarantees use the fact the underlying file system's "move"
|
|
261
263
|
# function is atomic.
|
|
262
|
-
with open(
|
|
264
|
+
with open(dest_filename + ".new", 'xb') as f:
|
|
263
265
|
pickle.dump(job, f)
|
|
264
266
|
# This should be atomic for the file system
|
|
265
|
-
os.rename(
|
|
266
|
-
|
|
267
|
+
os.rename(dest_filename + ".new", dest_filename)
|
|
267
268
|
def delete_job(self, job_id):
|
|
268
269
|
# The jobStoreID is the relative path to the directory containing the job,
|
|
269
270
|
# removing this directory deletes the job.
|
|
@@ -316,13 +317,14 @@ class FileJobStore(AbstractJobStore):
|
|
|
316
317
|
# symlink argument says whether the caller can take symlinks or not
|
|
317
318
|
# ex: if false, it implies the workflow cannot work with symlinks and thus will hardlink imports
|
|
318
319
|
# default is true since symlinking everything is ideal
|
|
320
|
+
uri_path = unquote(uri.path)
|
|
319
321
|
if issubclass(otherCls, FileJobStore):
|
|
320
|
-
if os.path.isdir(
|
|
322
|
+
if os.path.isdir(uri_path):
|
|
321
323
|
# Don't allow directories (unless someone is racing us)
|
|
322
324
|
raise IsADirectoryError(f"URI {uri} points to a directory but a file was expected")
|
|
323
325
|
if shared_file_name is None:
|
|
324
|
-
executable = os.stat(
|
|
325
|
-
absPath = self._get_unique_file_path(
|
|
326
|
+
executable = os.stat(uri_path).st_mode & stat.S_IXUSR != 0
|
|
327
|
+
absPath = self._get_unique_file_path(uri_path) # use this to get a valid path to write to in job store
|
|
326
328
|
with self.optional_hard_copy(hardlink):
|
|
327
329
|
self._copy_or_link(uri, absPath, symlink=symlink)
|
|
328
330
|
# TODO: os.stat(absPath).st_size consistently gives values lower than
|
|
@@ -341,6 +343,9 @@ class FileJobStore(AbstractJobStore):
|
|
|
341
343
|
if issubclass(otherCls, FileJobStore):
|
|
342
344
|
srcPath = self._get_file_path_from_id(file_id)
|
|
343
345
|
destPath = self._extract_path_from_url(uri)
|
|
346
|
+
# Make sure we don't need to worry about directories when exporting
|
|
347
|
+
# to local files, just like for cloud storage.
|
|
348
|
+
os.makedirs(os.path.dirname(destPath), exist_ok=True)
|
|
344
349
|
executable = getattr(file_id, 'executable', False)
|
|
345
350
|
if self.moveExports:
|
|
346
351
|
self._move_and_linkback(srcPath, destPath, executable=executable)
|
|
@@ -357,7 +362,11 @@ class FileJobStore(AbstractJobStore):
|
|
|
357
362
|
os.chmod(destPath, os.stat(destPath).st_mode | stat.S_IXUSR)
|
|
358
363
|
|
|
359
364
|
@classmethod
|
|
360
|
-
def
|
|
365
|
+
def _url_exists(cls, url: ParseResult) -> bool:
|
|
366
|
+
return os.path.exists(cls._extract_path_from_url(url))
|
|
367
|
+
|
|
368
|
+
@classmethod
|
|
369
|
+
def _get_size(cls, url):
|
|
361
370
|
return os.stat(cls._extract_path_from_url(url)).st_size
|
|
362
371
|
|
|
363
372
|
@classmethod
|
|
@@ -371,12 +380,18 @@ class FileJobStore(AbstractJobStore):
|
|
|
371
380
|
"""
|
|
372
381
|
|
|
373
382
|
# we use a ~10Mb buffer to improve speed
|
|
374
|
-
with
|
|
383
|
+
with cls._open_url(url) as readable:
|
|
375
384
|
shutil.copyfileobj(readable, writable, length=cls.BUFFER_SIZE)
|
|
376
385
|
# Return the number of bytes we read when we reached EOF.
|
|
377
386
|
executable = os.stat(readable.name).st_mode & stat.S_IXUSR
|
|
378
387
|
return readable.tell(), executable
|
|
379
388
|
|
|
389
|
+
@classmethod
|
|
390
|
+
def _open_url(cls, url: ParseResult) -> IO[bytes]:
|
|
391
|
+
"""
|
|
392
|
+
Open a file URL as a binary stream.
|
|
393
|
+
"""
|
|
394
|
+
return open(cls._extract_path_from_url(url), 'rb')
|
|
380
395
|
|
|
381
396
|
@classmethod
|
|
382
397
|
def _write_to_url(cls, readable, url, executable=False):
|
|
@@ -484,7 +499,7 @@ class FileJobStore(AbstractJobStore):
|
|
|
484
499
|
|
|
485
500
|
atomic_copy(local_path, jobStoreFilePath)
|
|
486
501
|
|
|
487
|
-
def read_file(self, file_id, local_path, symlink=False):
|
|
502
|
+
def read_file(self, file_id: str, local_path: str, symlink: bool = False) -> None:
|
|
488
503
|
self._check_job_store_file_id(file_id)
|
|
489
504
|
jobStoreFilePath = self._get_file_path_from_id(file_id)
|
|
490
505
|
localDirPath = os.path.dirname(local_path)
|
|
@@ -701,6 +716,69 @@ class FileJobStore(AbstractJobStore):
|
|
|
701
716
|
else:
|
|
702
717
|
raise
|
|
703
718
|
|
|
719
|
+
def list_all_file_names(self, for_job: Optional[str] = None) -> Iterable[str]:
|
|
720
|
+
"""
|
|
721
|
+
Get all the file names (not file IDs) of files stored in the job store.
|
|
722
|
+
|
|
723
|
+
Used for debugging.
|
|
724
|
+
|
|
725
|
+
:param for_job: If set, restrict the list to files for a particular job.
|
|
726
|
+
"""
|
|
727
|
+
|
|
728
|
+
# TODO: Promote to AbstractJobStore.
|
|
729
|
+
# TODO: Include stats-and-logging files?
|
|
730
|
+
|
|
731
|
+
if for_job is not None:
|
|
732
|
+
# Run on one job
|
|
733
|
+
jobs = [for_job]
|
|
734
|
+
else:
|
|
735
|
+
# Run on all the jobs
|
|
736
|
+
jobs = []
|
|
737
|
+
# But not all the jobs that exist, we want all the jobs that have
|
|
738
|
+
# files. So look at the file directories which mirror the job
|
|
739
|
+
# directories' structure.
|
|
740
|
+
for job_kind_dir in self._list_dynamic_spray_dir(self.jobFilesDir):
|
|
741
|
+
# First we sprayed all the job kinds over a tree
|
|
742
|
+
for job_instance_dir in self._list_dynamic_spray_dir(job_kind_dir):
|
|
743
|
+
# Then we sprayed the job instances over a tree
|
|
744
|
+
# And based on those we get the job name
|
|
745
|
+
job_id = self._get_job_id_from_files_dir(job_instance_dir)
|
|
746
|
+
jobs.append(job_id)
|
|
747
|
+
|
|
748
|
+
for name in os.listdir(self.sharedFilesDir):
|
|
749
|
+
# Announce all the shared files
|
|
750
|
+
yield name
|
|
751
|
+
|
|
752
|
+
for file_dir_path in self._list_dynamic_spray_dir(self.filesDir):
|
|
753
|
+
# Run on all the no-job files
|
|
754
|
+
for dir_file in os.listdir(file_dir_path):
|
|
755
|
+
# There ought to be just one file in here.
|
|
756
|
+
yield dir_file
|
|
757
|
+
|
|
758
|
+
for job_store_id in jobs:
|
|
759
|
+
# Files from _get_job_files_dir
|
|
760
|
+
job_files_dir = os.path.join(self.jobFilesDir, job_store_id)
|
|
761
|
+
if os.path.exists(job_files_dir):
|
|
762
|
+
for file_dir in os.listdir(job_files_dir):
|
|
763
|
+
# Each file is in its own directory
|
|
764
|
+
if file_dir == "cleanup":
|
|
765
|
+
# Except the cleanup directory which we do later.
|
|
766
|
+
continue
|
|
767
|
+
file_dir_path = os.path.join(job_files_dir, file_dir)
|
|
768
|
+
for dir_file in os.listdir(file_dir_path):
|
|
769
|
+
# There ought to be just one file in here.
|
|
770
|
+
yield dir_file
|
|
771
|
+
|
|
772
|
+
# Files from _get_job_files_cleanup_dir
|
|
773
|
+
job_cleanup_files_dir = os.path.join(job_files_dir, "cleanup")
|
|
774
|
+
if os.path.exists(job_cleanup_files_dir):
|
|
775
|
+
for file_dir in os.listdir(job_cleanup_files_dir):
|
|
776
|
+
# Each file is in its own directory
|
|
777
|
+
file_dir_path = os.path.join(job_cleanup_files_dir, file_dir)
|
|
778
|
+
for dir_file in os.listdir(file_dir_path):
|
|
779
|
+
# There ought to be just one file in here.
|
|
780
|
+
yield dir_file
|
|
781
|
+
|
|
704
782
|
def write_logs(self, msg):
|
|
705
783
|
# Temporary files are placed in the stats directory tree
|
|
706
784
|
tempStatsFileName = "stats" + str(uuid.uuid4().hex) + ".new"
|
|
@@ -748,6 +826,13 @@ class FileJobStore(AbstractJobStore):
|
|
|
748
826
|
"""
|
|
749
827
|
return absPath[len(self.jobsDir)+1:]
|
|
750
828
|
|
|
829
|
+
def _get_job_id_from_files_dir(self, absPath: str) -> str:
|
|
830
|
+
"""
|
|
831
|
+
:param str absPath: The absolute path to a job directory under self.jobFilesDir which holds a job's files.
|
|
832
|
+
:rtype : string, string is the job ID
|
|
833
|
+
"""
|
|
834
|
+
return absPath[len(self.jobFilesDir)+1:]
|
|
835
|
+
|
|
751
836
|
def _get_job_file_name(self, jobStoreID):
|
|
752
837
|
"""
|
|
753
838
|
Return the path to the file containing the serialised JobDescription instance for the given
|
|
@@ -815,7 +900,7 @@ class FileJobStore(AbstractJobStore):
|
|
|
815
900
|
"""
|
|
816
901
|
|
|
817
902
|
# We just make the file IDs paths under the job store overall.
|
|
818
|
-
absPath = os.path.join(self.jobStoreDir, jobStoreFileID)
|
|
903
|
+
absPath = os.path.join(self.jobStoreDir, unquote(jobStoreFileID))
|
|
819
904
|
|
|
820
905
|
# Don't validate here, we are called by the validation logic
|
|
821
906
|
|
|
@@ -828,14 +913,14 @@ class FileJobStore(AbstractJobStore):
|
|
|
828
913
|
:rtype : string, string is the file ID.
|
|
829
914
|
"""
|
|
830
915
|
|
|
831
|
-
return absPath[len(self.jobStoreDir)+1:]
|
|
916
|
+
return quote(absPath[len(self.jobStoreDir)+1:])
|
|
832
917
|
|
|
833
918
|
def _check_job_store_file_id(self, jobStoreFileID):
|
|
834
919
|
"""
|
|
835
920
|
:raise NoSuchFileException: if the file with ID jobStoreFileID does
|
|
836
921
|
not exist or is not a file
|
|
837
922
|
"""
|
|
838
|
-
if not self.file_exists(jobStoreFileID):
|
|
923
|
+
if not self.file_exists(unquote(jobStoreFileID)):
|
|
839
924
|
raise NoSuchFileException(jobStoreFileID)
|
|
840
925
|
|
|
841
926
|
def _get_arbitrary_jobs_dir_for_name(self, jobNameSlug):
|
|
@@ -966,6 +1051,19 @@ class FileJobStore(AbstractJobStore):
|
|
|
966
1051
|
# Recurse
|
|
967
1052
|
yield from self._walk_dynamic_spray_dir(childPath)
|
|
968
1053
|
|
|
1054
|
+
def _list_dynamic_spray_dir(self, root):
|
|
1055
|
+
"""
|
|
1056
|
+
For a directory tree filled in by _getDynamicSprayDir, yields each
|
|
1057
|
+
highest-level file or or directory *not* created by _getDynamicSprayDir
|
|
1058
|
+
(i.e. the actual contents).
|
|
1059
|
+
"""
|
|
1060
|
+
|
|
1061
|
+
for spray_dir in self._walk_dynamic_spray_dir(root):
|
|
1062
|
+
for child in os.listdir(spray_dir):
|
|
1063
|
+
if child not in self.validDirsSet:
|
|
1064
|
+
# This is a real content item we are storing
|
|
1065
|
+
yield os.path.join(spray_dir, child)
|
|
1066
|
+
|
|
969
1067
|
def _job_directories(self):
|
|
970
1068
|
"""
|
|
971
1069
|
:rtype : an iterator to the temporary directories containing job
|
toil/jobStores/googleJobStore.py
CHANGED
|
@@ -20,23 +20,20 @@ import uuid
|
|
|
20
20
|
from contextlib import contextmanager
|
|
21
21
|
from functools import wraps
|
|
22
22
|
from io import BytesIO
|
|
23
|
-
from typing import List, Optional
|
|
23
|
+
from typing import IO, List, Optional
|
|
24
24
|
from urllib.parse import ParseResult
|
|
25
25
|
|
|
26
26
|
from google.api_core.exceptions import (GoogleAPICallError,
|
|
27
27
|
InternalServerError,
|
|
28
28
|
ServiceUnavailable)
|
|
29
|
-
from google.cloud import exceptions, storage
|
|
30
29
|
from google.auth.exceptions import DefaultCredentialsError
|
|
30
|
+
from google.cloud import exceptions, storage
|
|
31
31
|
|
|
32
32
|
from toil.jobStores.abstractJobStore import (AbstractJobStore,
|
|
33
33
|
JobStoreExistsException,
|
|
34
34
|
NoSuchFileException,
|
|
35
35
|
NoSuchJobException,
|
|
36
36
|
NoSuchJobStoreException)
|
|
37
|
-
|
|
38
|
-
from toil.fileStores import FileID
|
|
39
|
-
|
|
40
37
|
from toil.jobStores.utils import ReadablePipe, WritablePipe
|
|
41
38
|
from toil.lib.compatibility import compat_bytes
|
|
42
39
|
from toil.lib.io import AtomicFileCreate
|
|
@@ -146,7 +143,6 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
146
143
|
# Probably we don't have permission to use the file.
|
|
147
144
|
log.warning("File '%s' exists but didn't work to authenticate!",
|
|
148
145
|
cls.nodeServiceAccountJson)
|
|
149
|
-
pass
|
|
150
146
|
|
|
151
147
|
# Either a filename is specified, or our fallback file isn't there.
|
|
152
148
|
try:
|
|
@@ -394,7 +390,15 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
394
390
|
return blob
|
|
395
391
|
|
|
396
392
|
@classmethod
|
|
397
|
-
def
|
|
393
|
+
def _url_exists(cls, url: ParseResult) -> bool:
|
|
394
|
+
try:
|
|
395
|
+
cls._get_blob_from_url(url, exists=True)
|
|
396
|
+
return True
|
|
397
|
+
except NoSuchFileException:
|
|
398
|
+
return False
|
|
399
|
+
|
|
400
|
+
@classmethod
|
|
401
|
+
def _get_size(cls, url):
|
|
398
402
|
return cls._get_blob_from_url(url, exists=True).size
|
|
399
403
|
|
|
400
404
|
@classmethod
|
|
@@ -403,6 +407,11 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
403
407
|
blob.download_to_file(writable)
|
|
404
408
|
return blob.size, False
|
|
405
409
|
|
|
410
|
+
@classmethod
|
|
411
|
+
def _open_url(cls, url: ParseResult) -> IO[bytes]:
|
|
412
|
+
blob = cls._get_blob_from_url(url, exists=True)
|
|
413
|
+
return blob.open("rb")
|
|
414
|
+
|
|
406
415
|
@classmethod
|
|
407
416
|
def _supports_url(cls, url, export=False):
|
|
408
417
|
return url.scheme.lower() == 'gs'
|
toil/jobStores/utils.py
CHANGED
|
@@ -79,7 +79,7 @@ class WritablePipe(ABC):
|
|
|
79
79
|
binary and text mode output.
|
|
80
80
|
|
|
81
81
|
:param file readable: the file object representing the readable end of the pipe. Do not
|
|
82
|
-
|
|
82
|
+
explicitly invoke the close() method of the object, that will be done automatically.
|
|
83
83
|
"""
|
|
84
84
|
raise NotImplementedError()
|
|
85
85
|
|
|
@@ -211,7 +211,7 @@ class ReadablePipe(ABC):
|
|
|
211
211
|
binary and text mode input.
|
|
212
212
|
|
|
213
213
|
:param file writable: the file object representing the writable end of the pipe. Do not
|
|
214
|
-
|
|
214
|
+
explicitly invoke the close() method of the object, that will be done automatically.
|
|
215
215
|
"""
|
|
216
216
|
raise NotImplementedError()
|
|
217
217
|
|
|
@@ -316,7 +316,7 @@ class ReadableTransformingPipe(ReadablePipe):
|
|
|
316
316
|
:param file readable: the input stream file object to transform.
|
|
317
317
|
|
|
318
318
|
:param file writable: the file object representing the writable end of the pipe. Do not
|
|
319
|
-
|
|
319
|
+
explicitly invoke the close() method of the object, that will be done automatically.
|
|
320
320
|
"""
|
|
321
321
|
raise NotImplementedError()
|
|
322
322
|
|
|
@@ -327,7 +327,6 @@ class JobStoreUnavailableException(RuntimeError):
|
|
|
327
327
|
"""
|
|
328
328
|
Raised when a particular type of job store is requested but can't be used.
|
|
329
329
|
"""
|
|
330
|
-
pass
|
|
331
330
|
|
|
332
331
|
def generate_locator(
|
|
333
332
|
job_store_type: str,
|
|
@@ -340,9 +339,9 @@ def generate_locator(
|
|
|
340
339
|
|
|
341
340
|
:param job_store_type: Registry name of the job store to use.
|
|
342
341
|
:param local_suggestion: Path to a nonexistent local directory suitable for
|
|
343
|
-
|
|
342
|
+
use as a file job store.
|
|
344
343
|
:param decoration: Extra string to add to the job store locator, if
|
|
345
|
-
|
|
344
|
+
convenient.
|
|
346
345
|
|
|
347
346
|
:return str: Job store locator for a usable job store.
|
|
348
347
|
"""
|
toil/leader.py
CHANGED
|
@@ -29,21 +29,21 @@ from toil import resolveEntryPoint
|
|
|
29
29
|
from toil.batchSystems import DeadlockException
|
|
30
30
|
from toil.batchSystems.abstractBatchSystem import (AbstractBatchSystem,
|
|
31
31
|
BatchJobExitReason)
|
|
32
|
-
from toil.bus import (
|
|
33
|
-
JobCompletedMessage,
|
|
32
|
+
from toil.bus import (JobCompletedMessage,
|
|
34
33
|
JobFailedMessage,
|
|
35
34
|
JobIssuedMessage,
|
|
36
35
|
JobMissingMessage,
|
|
37
36
|
JobUpdatedMessage,
|
|
38
|
-
QueueSizeMessage
|
|
39
|
-
|
|
37
|
+
QueueSizeMessage,
|
|
38
|
+
gen_message_bus_path)
|
|
39
|
+
from toil.common import Config, ToilMetrics
|
|
40
40
|
from toil.cwl.utils import CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
41
|
+
from toil.exceptions import FailedJobsException
|
|
41
42
|
from toil.job import (CheckpointJobDescription,
|
|
42
43
|
JobDescription,
|
|
43
44
|
ServiceJobDescription,
|
|
44
45
|
TemporaryID)
|
|
45
46
|
from toil.jobStores.abstractJobStore import (AbstractJobStore,
|
|
46
|
-
NoSuchFileException,
|
|
47
47
|
NoSuchJobException)
|
|
48
48
|
from toil.lib.throttle import LocalThrottle
|
|
49
49
|
from toil.provisioners.abstractProvisioner import AbstractProvisioner
|
|
@@ -51,7 +51,6 @@ from toil.provisioners.clusterScaler import ScalerThread
|
|
|
51
51
|
from toil.serviceManager import ServiceManager
|
|
52
52
|
from toil.statsAndLogging import StatsAndLogging
|
|
53
53
|
from toil.toilState import ToilState
|
|
54
|
-
from toil.exceptions import FailedJobsException
|
|
55
54
|
|
|
56
55
|
logger = logging.getLogger(__name__)
|
|
57
56
|
|
|
@@ -115,10 +114,14 @@ class Leader:
|
|
|
115
114
|
# state change information about jobs.
|
|
116
115
|
self.toilState = ToilState(self.jobStore)
|
|
117
116
|
|
|
118
|
-
if self.config.write_messages is
|
|
119
|
-
#
|
|
120
|
-
#
|
|
121
|
-
self.
|
|
117
|
+
if self.config.write_messages is None:
|
|
118
|
+
# The user hasn't specified a place for the message bus so we
|
|
119
|
+
# should make one.
|
|
120
|
+
self.config.write_messages = gen_message_bus_path()
|
|
121
|
+
|
|
122
|
+
# Message bus messages need to go to the given file.
|
|
123
|
+
# Keep a reference to the return value so the listener stays alive.
|
|
124
|
+
self._message_subscription = self.toilState.bus.connect_output_file(self.config.write_messages)
|
|
122
125
|
|
|
123
126
|
# Connect to the message bus, so we will get all the messages of these
|
|
124
127
|
# types in an inbox.
|
|
@@ -138,7 +141,8 @@ class Leader:
|
|
|
138
141
|
|
|
139
142
|
# Batch system
|
|
140
143
|
self.batchSystem = batchSystem
|
|
141
|
-
|
|
144
|
+
if len(self.batchSystem.getIssuedBatchJobIDs()) != 0:
|
|
145
|
+
raise RuntimeError("The initialized batchsystem did not start with 0 active jobs.")
|
|
142
146
|
logger.debug("Checked batch system has no running jobs and no updated jobs")
|
|
143
147
|
|
|
144
148
|
# Map of batch system IDs to job store IDs
|
|
@@ -370,7 +374,8 @@ class Leader:
|
|
|
370
374
|
|
|
371
375
|
# If the successor job's predecessors have all not all completed then
|
|
372
376
|
# ignore the successor as is not yet ready to run
|
|
373
|
-
|
|
377
|
+
if len(successor.predecessorsFinished) > successor.predecessorNumber:
|
|
378
|
+
raise RuntimeError("There are more finished predecessors than possible.")
|
|
374
379
|
if len(successor.predecessorsFinished) == successor.predecessorNumber:
|
|
375
380
|
# All the successor's predecessors are done now.
|
|
376
381
|
# Remove the successor job from the set of waiting multi-predecessor jobs.
|
|
@@ -391,8 +396,10 @@ class Leader:
|
|
|
391
396
|
#Build map from successor to predecessors.
|
|
392
397
|
if successor_id not in self.toilState.successor_to_predecessors:
|
|
393
398
|
self.toilState.successor_to_predecessors[successor_id] = set()
|
|
394
|
-
|
|
395
|
-
|
|
399
|
+
if not isinstance(successor_id, str):
|
|
400
|
+
raise RuntimeError("The given successor ID is invalid.")
|
|
401
|
+
if not isinstance(predecessor_id, str):
|
|
402
|
+
raise RuntimeError("The given predecessor ID is invalid.")
|
|
396
403
|
self.toilState.successor_to_predecessors[successor_id].add(predecessor_id)
|
|
397
404
|
|
|
398
405
|
# Grab the successor
|
|
@@ -423,7 +430,8 @@ class Leader:
|
|
|
423
430
|
predecessor_id, len(next_successors))
|
|
424
431
|
#Record the number of successors that must be completed before
|
|
425
432
|
#the job can be considered again
|
|
426
|
-
|
|
433
|
+
if self.toilState.count_pending_successors(predecessor_id) != 0:
|
|
434
|
+
raise RuntimeError('Attempted to schedule successors of the same job twice!')
|
|
427
435
|
self.toilState.successors_pending(predecessor_id, len(next_successors))
|
|
428
436
|
|
|
429
437
|
# For each successor schedule if all predecessors have been completed
|
|
@@ -534,11 +542,13 @@ class Leader:
|
|
|
534
542
|
# the job has services to run, which have not been started, start them
|
|
535
543
|
# Build a map from the service jobs to the job and a map
|
|
536
544
|
# of the services created for the job
|
|
537
|
-
|
|
545
|
+
if readyJob.jobStoreID in self.toilState.servicesIssued:
|
|
546
|
+
raise RuntimeError(f"The ready job: {readyJob.jobStoreID} was already issued.")
|
|
538
547
|
self.toilState.servicesIssued[readyJob.jobStoreID] = set()
|
|
539
548
|
for serviceJobList in readyJob.serviceHostIDsInBatches():
|
|
540
549
|
for serviceID in serviceJobList:
|
|
541
|
-
|
|
550
|
+
if serviceID in self.toilState.service_to_client:
|
|
551
|
+
raise RuntimeError(f"The ready service ID: {serviceID} was already added.")
|
|
542
552
|
self.toilState.reset_job(serviceID)
|
|
543
553
|
serviceHost = self.toilState.get_job(serviceID)
|
|
544
554
|
self.toilState.service_to_client[serviceID] = readyJob.jobStoreID
|
|
@@ -675,7 +685,8 @@ class Leader:
|
|
|
675
685
|
client = self.toilState.get_job(client_id)
|
|
676
686
|
|
|
677
687
|
# Make sure services still want to run
|
|
678
|
-
|
|
688
|
+
if next(client.serviceHostIDsInBatches(), None) is None:
|
|
689
|
+
raise RuntimeError("No more services want to run.")
|
|
679
690
|
|
|
680
691
|
# Mark the service job updated so we don't stop here.
|
|
681
692
|
self._messages.publish(JobUpdatedMessage(client_id, 1))
|
|
@@ -784,13 +795,16 @@ class Leader:
|
|
|
784
795
|
logger.debug("Finished the main loop: no jobs left to run.")
|
|
785
796
|
|
|
786
797
|
# Consistency check the toil state
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
798
|
+
if not self._messages.empty():
|
|
799
|
+
raise RuntimeError(f"Pending messages at shutdown: {self._messages}")
|
|
800
|
+
if self.toilState.successorCounts != {}:
|
|
801
|
+
raise RuntimeError(f"Jobs waiting on successors at shutdown: {self.toilState.successorCounts}")
|
|
802
|
+
if self.toilState.successor_to_predecessors != {}:
|
|
803
|
+
raise RuntimeError(f"Successors pending for their predecessors at shutdown: {self.toilState.successor_to_predecessors}")
|
|
804
|
+
if self.toilState.service_to_client != {}:
|
|
805
|
+
raise RuntimeError(f"Services pending for their clients at shutdown: {self.toilState.service_to_client}")
|
|
806
|
+
if self.toilState.servicesIssued != {}:
|
|
807
|
+
raise RuntimeError(f"Services running at shutdown: {self.toilState.servicesIssued}")
|
|
794
808
|
|
|
795
809
|
def checkForDeadlocks(self):
|
|
796
810
|
"""Check if the system is deadlocked running service jobs."""
|
|
@@ -865,8 +879,8 @@ class Leader:
|
|
|
865
879
|
def issueJob(self, jobNode: JobDescription) -> None:
|
|
866
880
|
"""Add a job to the queue of jobs currently trying to run."""
|
|
867
881
|
# Never issue the same job multiple times simultaneously
|
|
868
|
-
|
|
869
|
-
f"Attempted to issue {jobNode} multiple times simultaneously!"
|
|
882
|
+
if jobNode.jobStoreID in self.toilState.jobs_issued:
|
|
883
|
+
raise RuntimeError(f"Attempted to issue {jobNode} multiple times simultaneously!")
|
|
870
884
|
|
|
871
885
|
workerCommand = [resolveEntryPoint('_toil_worker'),
|
|
872
886
|
jobNode.jobName,
|
|
@@ -926,7 +940,8 @@ class Leader:
|
|
|
926
940
|
"""
|
|
927
941
|
# Grab the service job description
|
|
928
942
|
service = self.toilState.get_job(service_id)
|
|
929
|
-
|
|
943
|
+
if not isinstance(service, ServiceJobDescription):
|
|
944
|
+
raise RuntimeError("The grabbed service job description is not the right type.")
|
|
930
945
|
|
|
931
946
|
if service.preemptible:
|
|
932
947
|
self.preemptibleServiceJobsToBeIssued.append(service_id)
|
|
@@ -956,7 +971,8 @@ class Leader:
|
|
|
956
971
|
elif preemptible:
|
|
957
972
|
return self.preemptibleJobsIssued
|
|
958
973
|
else:
|
|
959
|
-
|
|
974
|
+
if len(self.issued_jobs_by_batch_system_id) < self.preemptibleJobsIssued:
|
|
975
|
+
raise RuntimeError("Number of jobs issued cannot be negative.")
|
|
960
976
|
return len(self.issued_jobs_by_batch_system_id) - self.preemptibleJobsIssued
|
|
961
977
|
|
|
962
978
|
def _getStatusHint(self) -> str:
|
|
@@ -990,16 +1006,19 @@ class Leader:
|
|
|
990
1006
|
|
|
991
1007
|
:return: Job description as it was issued.
|
|
992
1008
|
"""
|
|
993
|
-
|
|
1009
|
+
if jobBatchSystemID not in self.issued_jobs_by_batch_system_id:
|
|
1010
|
+
raise RuntimeError("Job was already removed or was never issued.")
|
|
994
1011
|
issuedDesc = self.toilState.get_job(self.issued_jobs_by_batch_system_id[jobBatchSystemID])
|
|
995
1012
|
if issuedDesc.preemptible:
|
|
996
1013
|
# len(issued_jobs_by_batch_system_id) should always be greater than or equal to preemptibleJobsIssued,
|
|
997
1014
|
# so decrement this value before removing the job from the issuedJob map
|
|
998
|
-
|
|
1015
|
+
if self.preemptibleJobsIssued <= 0:
|
|
1016
|
+
raise RuntimeError("The number of preemptive issued jobs cannot be negative.")
|
|
999
1017
|
self.preemptibleJobsIssued -= 1
|
|
1000
1018
|
# It's not issued anymore.
|
|
1001
1019
|
del self.issued_jobs_by_batch_system_id[jobBatchSystemID]
|
|
1002
|
-
|
|
1020
|
+
if issuedDesc.jobStoreID not in self.toilState.jobs_issued:
|
|
1021
|
+
raise RuntimeError(f"Job {issuedDesc} came back without being issued")
|
|
1003
1022
|
self.toilState.jobs_issued.remove(issuedDesc.jobStoreID)
|
|
1004
1023
|
# If service job
|
|
1005
1024
|
if issuedDesc.jobStoreID in self.toilState.service_to_client:
|
|
@@ -1090,8 +1109,9 @@ class Leader:
|
|
|
1090
1109
|
for jobBatchSystemID in missingJobIDsSet.difference(jobBatchSystemIDsSet):
|
|
1091
1110
|
self.reissueMissingJobs_missingHash.pop(jobBatchSystemID)
|
|
1092
1111
|
logger.warning("Batch system id: %s is no longer missing", str(jobBatchSystemID))
|
|
1093
|
-
|
|
1094
|
-
|
|
1112
|
+
# checks we have no unexpected jobs running
|
|
1113
|
+
if not issuedJobs.issubset(jobBatchSystemIDsSet):
|
|
1114
|
+
raise RuntimeError("An unexpected job is still running.")
|
|
1095
1115
|
jobsToKill = []
|
|
1096
1116
|
for jobBatchSystemID in set(jobBatchSystemIDsSet.difference(issuedJobs)):
|
|
1097
1117
|
jobStoreID = self.issued_jobs_by_batch_system_id[jobBatchSystemID]
|
|
@@ -1295,12 +1315,15 @@ class Leader:
|
|
|
1295
1315
|
# Is a service job
|
|
1296
1316
|
logger.debug("Service job is being processed as a totally failed job: %s", job_desc)
|
|
1297
1317
|
|
|
1298
|
-
|
|
1318
|
+
|
|
1319
|
+
if not isinstance(job_desc, ServiceJobDescription):
|
|
1320
|
+
raise RuntimeError("The service job description type is incorrect.")
|
|
1299
1321
|
|
|
1300
1322
|
# Grab the client, which is the predecessor.
|
|
1301
1323
|
client_id = self.toilState.service_to_client[job_id]
|
|
1302
1324
|
|
|
1303
|
-
|
|
1325
|
+
if client_id not in self.toilState.servicesIssued:
|
|
1326
|
+
raise RuntimeError("The client was never issued.")
|
|
1304
1327
|
|
|
1305
1328
|
# Leave the service job as a service of its predecessor, because it
|
|
1306
1329
|
# didn't work.
|
|
@@ -1331,8 +1354,10 @@ class Leader:
|
|
|
1331
1354
|
self.jobStore.delete_file(job_desc.startJobStoreID)
|
|
1332
1355
|
else:
|
|
1333
1356
|
# Is a non-service job
|
|
1334
|
-
|
|
1335
|
-
|
|
1357
|
+
if job_id in self.toilState.servicesIssued:
|
|
1358
|
+
raise RuntimeError("The non-service job should not have been issued.")
|
|
1359
|
+
if isinstance(job_desc, ServiceJobDescription):
|
|
1360
|
+
raise RuntimeError("The job description type is incorrect.")
|
|
1336
1361
|
|
|
1337
1362
|
# Traverse failed job's successor graph and get the jobStoreID of new successors.
|
|
1338
1363
|
# Any successor already in toilState.failedSuccessors will not be traversed
|
|
@@ -1401,11 +1426,13 @@ class Leader:
|
|
|
1401
1426
|
len(self.toilState.servicesIssued[client_id]))
|
|
1402
1427
|
elif jobStoreID not in self.toilState.successor_to_predecessors:
|
|
1403
1428
|
#We have reach the root job
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1429
|
+
if self._messages.count(JobUpdatedMessage) != 0:
|
|
1430
|
+
raise RuntimeError("Root job is done but other jobs are still updated")
|
|
1431
|
+
if len(self.toilState.successor_to_predecessors) != 0:
|
|
1432
|
+
raise RuntimeError("Job {} is finished and had no predecessor, but we have other outstanding jobs "
|
|
1407
1433
|
"with predecessors: {}".format(jobStoreID, self.toilState.successor_to_predecessors.keys()))
|
|
1408
|
-
|
|
1434
|
+
if len(self.toilState.successorCounts) != 0:
|
|
1435
|
+
raise RuntimeError("Root job is done but jobs waiting on successors: {self.toilState.successorCounts}")
|
|
1409
1436
|
logger.debug("Reached root job %s so no predecessors to clean up" % jobStoreID)
|
|
1410
1437
|
|
|
1411
1438
|
else:
|
|
@@ -1414,7 +1441,8 @@ class Leader:
|
|
|
1414
1441
|
|
|
1415
1442
|
# For each predecessor
|
|
1416
1443
|
for predecessor_id in self.toilState.successor_to_predecessors.pop(jobStoreID):
|
|
1417
|
-
|
|
1444
|
+
if not isinstance(predecessor_id, str):
|
|
1445
|
+
raise RuntimeError("Predecessor ID should be str but is {type(predecessor_id)}")
|
|
1418
1446
|
predecessor = self.toilState.get_job(predecessor_id)
|
|
1419
1447
|
|
|
1420
1448
|
# Tell the predecessor that this job is done (keep only other successor jobs)
|