PyPI - toil - Versions diffs - 8.0.0__py3-none-any.whl → 8.2.0__py3-none-any.whl - Mend

toil 8.0.0py3-none-any.whl → 8.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (270) hide show

toil/__init__.py +4 -39
toil/batchSystems/abstractBatchSystem.py +1 -1
toil/batchSystems/abstractGridEngineBatchSystem.py +1 -1
toil/batchSystems/awsBatch.py +1 -1
toil/batchSystems/cleanup_support.py +1 -1
toil/batchSystems/kubernetes.py +53 -7
toil/batchSystems/local_support.py +1 -1
toil/batchSystems/mesos/batchSystem.py +13 -8
toil/batchSystems/mesos/test/__init__.py +3 -2
toil/batchSystems/options.py +1 -0
toil/batchSystems/singleMachine.py +1 -1
toil/batchSystems/slurm.py +229 -84
toil/bus.py +5 -3
toil/common.py +198 -54
toil/cwl/cwltoil.py +32 -11
toil/job.py +110 -86
toil/jobStores/abstractJobStore.py +24 -3
toil/jobStores/aws/jobStore.py +46 -10
toil/jobStores/fileJobStore.py +25 -1
toil/jobStores/googleJobStore.py +104 -30
toil/leader.py +9 -0
toil/lib/accelerators.py +3 -1
toil/lib/aws/session.py +14 -3
toil/lib/aws/utils.py +92 -35
toil/lib/aws/utils.py.orig +504 -0
toil/lib/bioio.py +1 -1
toil/lib/docker.py +252 -91
toil/lib/dockstore.py +387 -0
toil/lib/ec2nodes.py +3 -2
toil/lib/exceptions.py +5 -3
toil/lib/history.py +1345 -0
toil/lib/history_submission.py +695 -0
toil/lib/io.py +56 -23
toil/lib/misc.py +25 -1
toil/lib/resources.py +2 -1
toil/lib/retry.py +10 -10
toil/lib/threading.py +11 -10
toil/lib/{integration.py → trs.py} +95 -46
toil/lib/web.py +38 -0
toil/options/common.py +25 -2
toil/options/cwl.py +10 -0
toil/options/wdl.py +11 -0
toil/provisioners/gceProvisioner.py +4 -4
toil/server/api_spec/LICENSE +201 -0
toil/server/api_spec/README.rst +5 -0
toil/server/cli/wes_cwl_runner.py +5 -4
toil/server/utils.py +2 -3
toil/statsAndLogging.py +35 -1
toil/test/__init__.py +275 -115
toil/test/batchSystems/batchSystemTest.py +227 -205
toil/test/batchSystems/test_slurm.py +199 -2
toil/test/cactus/pestis.tar.gz +0 -0
toil/test/conftest.py +7 -0
toil/test/cwl/2.fasta +11 -0
toil/test/cwl/2.fastq +12 -0
toil/test/cwl/conftest.py +39 -0
toil/test/cwl/cwlTest.py +1015 -780
toil/test/cwl/directory/directory/file.txt +15 -0
toil/test/cwl/download_directory_file.json +4 -0
toil/test/cwl/download_directory_s3.json +4 -0
toil/test/cwl/download_file.json +6 -0
toil/test/cwl/download_http.json +6 -0
toil/test/cwl/download_https.json +6 -0
toil/test/cwl/download_s3.json +6 -0
toil/test/cwl/download_subdirectory_file.json +5 -0
toil/test/cwl/download_subdirectory_s3.json +5 -0
toil/test/cwl/empty.json +1 -0
toil/test/cwl/mock_mpi/fake_mpi.yml +8 -0
toil/test/cwl/mock_mpi/fake_mpi_run.py +42 -0
toil/test/cwl/optional-file-exists.json +6 -0
toil/test/cwl/optional-file-missing.json +6 -0
toil/test/cwl/optional-file.cwl +18 -0
toil/test/cwl/preemptible_expression.json +1 -0
toil/test/cwl/revsort-job-missing.json +6 -0
toil/test/cwl/revsort-job.json +6 -0
toil/test/cwl/s3_secondary_file.json +16 -0
toil/test/cwl/seqtk_seq_job.json +6 -0
toil/test/cwl/stream.json +6 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.dat +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f0 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f1 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f1i +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f2 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f2_TSM0 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f3 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f3_TSM0 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f4 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f4_TSM0 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f5 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.info +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.lock +0 -0
toil/test/cwl/whale.txt +16 -0
toil/test/docs/scripts/example_alwaysfail.py +38 -0
toil/test/docs/scripts/example_alwaysfail_with_files.wdl +33 -0
toil/test/docs/scripts/example_cachingbenchmark.py +117 -0
toil/test/docs/scripts/stagingExampleFiles/in.txt +1 -0
toil/test/docs/scripts/stagingExampleFiles/out.txt +2 -0
toil/test/docs/scripts/tutorial_arguments.py +23 -0
toil/test/docs/scripts/tutorial_debugging.patch +12 -0
toil/test/docs/scripts/tutorial_debugging_hangs.wdl +126 -0
toil/test/docs/scripts/tutorial_debugging_works.wdl +129 -0
toil/test/docs/scripts/tutorial_docker.py +20 -0
toil/test/docs/scripts/tutorial_dynamic.py +24 -0
toil/test/docs/scripts/tutorial_encapsulation.py +28 -0
toil/test/docs/scripts/tutorial_encapsulation2.py +29 -0
toil/test/docs/scripts/tutorial_helloworld.py +15 -0
toil/test/docs/scripts/tutorial_invokeworkflow.py +27 -0
toil/test/docs/scripts/tutorial_invokeworkflow2.py +30 -0
toil/test/docs/scripts/tutorial_jobfunctions.py +22 -0
toil/test/docs/scripts/tutorial_managing.py +29 -0
toil/test/docs/scripts/tutorial_managing2.py +56 -0
toil/test/docs/scripts/tutorial_multiplejobs.py +25 -0
toil/test/docs/scripts/tutorial_multiplejobs2.py +21 -0
toil/test/docs/scripts/tutorial_multiplejobs3.py +22 -0
toil/test/docs/scripts/tutorial_promises.py +25 -0
toil/test/docs/scripts/tutorial_promises2.py +30 -0
toil/test/docs/scripts/tutorial_quickstart.py +22 -0
toil/test/docs/scripts/tutorial_requirements.py +44 -0
toil/test/docs/scripts/tutorial_services.py +45 -0
toil/test/docs/scripts/tutorial_staging.py +45 -0
toil/test/docs/scripts/tutorial_stats.py +64 -0
toil/test/lib/aws/test_iam.py +3 -1
toil/test/lib/dockerTest.py +205 -122
toil/test/lib/test_history.py +236 -0
toil/test/lib/test_trs.py +161 -0
toil/test/provisioners/aws/awsProvisionerTest.py +12 -9
toil/test/provisioners/clusterTest.py +4 -4
toil/test/provisioners/gceProvisionerTest.py +16 -14
toil/test/sort/sort.py +4 -1
toil/test/src/busTest.py +17 -17
toil/test/src/deferredFunctionTest.py +145 -132
toil/test/src/importExportFileTest.py +71 -63
toil/test/src/jobEncapsulationTest.py +27 -28
toil/test/src/jobServiceTest.py +149 -133
toil/test/src/jobTest.py +219 -211
toil/test/src/miscTests.py +66 -60
toil/test/src/promisedRequirementTest.py +163 -169
toil/test/src/regularLogTest.py +24 -24
toil/test/src/resourceTest.py +82 -76
toil/test/src/restartDAGTest.py +51 -47
toil/test/src/resumabilityTest.py +24 -19
toil/test/src/retainTempDirTest.py +60 -57
toil/test/src/systemTest.py +17 -13
toil/test/src/threadingTest.py +29 -32
toil/test/utils/ABCWorkflowDebug/B_file.txt +1 -0
toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +204 -0
toil/test/utils/ABCWorkflowDebug/mkFile.py +16 -0
toil/test/utils/ABCWorkflowDebug/sleep.cwl +12 -0
toil/test/utils/ABCWorkflowDebug/sleep.yaml +1 -0
toil/test/utils/toilDebugTest.py +117 -102
toil/test/utils/toilKillTest.py +54 -53
toil/test/utils/utilsTest.py +303 -229
toil/test/wdl/lint_error.wdl +9 -0
toil/test/wdl/md5sum/empty_file.json +1 -0
toil/test/wdl/md5sum/md5sum-gs.json +1 -0
toil/test/wdl/md5sum/md5sum.1.0.wdl +32 -0
toil/test/wdl/md5sum/md5sum.input +1 -0
toil/test/wdl/md5sum/md5sum.json +1 -0
toil/test/wdl/md5sum/md5sum.wdl +25 -0
toil/test/wdl/miniwdl_self_test/inputs-namespaced.json +1 -0
toil/test/wdl/miniwdl_self_test/inputs.json +1 -0
toil/test/wdl/miniwdl_self_test/self_test.wdl +40 -0
toil/test/wdl/standard_library/as_map.json +16 -0
toil/test/wdl/standard_library/as_map_as_input.wdl +23 -0
toil/test/wdl/standard_library/as_pairs.json +7 -0
toil/test/wdl/standard_library/as_pairs_as_input.wdl +23 -0
toil/test/wdl/standard_library/ceil.json +3 -0
toil/test/wdl/standard_library/ceil_as_command.wdl +16 -0
toil/test/wdl/standard_library/ceil_as_input.wdl +16 -0
toil/test/wdl/standard_library/collect_by_key.json +1 -0
toil/test/wdl/standard_library/collect_by_key_as_input.wdl +23 -0
toil/test/wdl/standard_library/cross.json +11 -0
toil/test/wdl/standard_library/cross_as_input.wdl +19 -0
toil/test/wdl/standard_library/flatten.json +7 -0
toil/test/wdl/standard_library/flatten_as_input.wdl +18 -0
toil/test/wdl/standard_library/floor.json +3 -0
toil/test/wdl/standard_library/floor_as_command.wdl +16 -0
toil/test/wdl/standard_library/floor_as_input.wdl +16 -0
toil/test/wdl/standard_library/keys.json +8 -0
toil/test/wdl/standard_library/keys_as_input.wdl +24 -0
toil/test/wdl/standard_library/length.json +7 -0
toil/test/wdl/standard_library/length_as_input.wdl +16 -0
toil/test/wdl/standard_library/length_as_input_with_map.json +7 -0
toil/test/wdl/standard_library/length_as_input_with_map.wdl +17 -0
toil/test/wdl/standard_library/length_invalid.json +3 -0
toil/test/wdl/standard_library/range.json +3 -0
toil/test/wdl/standard_library/range_0.json +3 -0
toil/test/wdl/standard_library/range_as_input.wdl +17 -0
toil/test/wdl/standard_library/range_invalid.json +3 -0
toil/test/wdl/standard_library/read_boolean.json +3 -0
toil/test/wdl/standard_library/read_boolean_as_command.wdl +17 -0
toil/test/wdl/standard_library/read_float.json +3 -0
toil/test/wdl/standard_library/read_float_as_command.wdl +17 -0
toil/test/wdl/standard_library/read_int.json +3 -0
toil/test/wdl/standard_library/read_int_as_command.wdl +17 -0
toil/test/wdl/standard_library/read_json.json +3 -0
toil/test/wdl/standard_library/read_json_as_output.wdl +31 -0
toil/test/wdl/standard_library/read_lines.json +3 -0
toil/test/wdl/standard_library/read_lines_as_output.wdl +31 -0
toil/test/wdl/standard_library/read_map.json +3 -0
toil/test/wdl/standard_library/read_map_as_output.wdl +31 -0
toil/test/wdl/standard_library/read_string.json +3 -0
toil/test/wdl/standard_library/read_string_as_command.wdl +17 -0
toil/test/wdl/standard_library/read_tsv.json +3 -0
toil/test/wdl/standard_library/read_tsv_as_output.wdl +31 -0
toil/test/wdl/standard_library/round.json +3 -0
toil/test/wdl/standard_library/round_as_command.wdl +16 -0
toil/test/wdl/standard_library/round_as_input.wdl +16 -0
toil/test/wdl/standard_library/size.json +3 -0
toil/test/wdl/standard_library/size_as_command.wdl +17 -0
toil/test/wdl/standard_library/size_as_output.wdl +36 -0
toil/test/wdl/standard_library/stderr.json +3 -0
toil/test/wdl/standard_library/stderr_as_output.wdl +30 -0
toil/test/wdl/standard_library/stdout.json +3 -0
toil/test/wdl/standard_library/stdout_as_output.wdl +30 -0
toil/test/wdl/standard_library/sub.json +3 -0
toil/test/wdl/standard_library/sub_as_input.wdl +17 -0
toil/test/wdl/standard_library/sub_as_input_with_file.wdl +17 -0
toil/test/wdl/standard_library/transpose.json +6 -0
toil/test/wdl/standard_library/transpose_as_input.wdl +18 -0
toil/test/wdl/standard_library/write_json.json +6 -0
toil/test/wdl/standard_library/write_json_as_command.wdl +17 -0
toil/test/wdl/standard_library/write_lines.json +7 -0
toil/test/wdl/standard_library/write_lines_as_command.wdl +17 -0
toil/test/wdl/standard_library/write_map.json +6 -0
toil/test/wdl/standard_library/write_map_as_command.wdl +17 -0
toil/test/wdl/standard_library/write_tsv.json +6 -0
toil/test/wdl/standard_library/write_tsv_as_command.wdl +17 -0
toil/test/wdl/standard_library/zip.json +12 -0
toil/test/wdl/standard_library/zip_as_input.wdl +19 -0
toil/test/wdl/test.csv +3 -0
toil/test/wdl/test.tsv +3 -0
toil/test/wdl/testfiles/croo.wdl +38 -0
toil/test/wdl/testfiles/drop_files.wdl +62 -0
toil/test/wdl/testfiles/drop_files_subworkflow.wdl +13 -0
toil/test/wdl/testfiles/empty.txt +0 -0
toil/test/wdl/testfiles/not_enough_outputs.wdl +33 -0
toil/test/wdl/testfiles/random.wdl +66 -0
toil/test/wdl/testfiles/string_file_coercion.json +1 -0
toil/test/wdl/testfiles/string_file_coercion.wdl +35 -0
toil/test/wdl/testfiles/test.json +4 -0
toil/test/wdl/testfiles/test_boolean.txt +1 -0
toil/test/wdl/testfiles/test_float.txt +1 -0
toil/test/wdl/testfiles/test_int.txt +1 -0
toil/test/wdl/testfiles/test_lines.txt +5 -0
toil/test/wdl/testfiles/test_map.txt +2 -0
toil/test/wdl/testfiles/test_string.txt +1 -0
toil/test/wdl/testfiles/url_to_file.wdl +13 -0
toil/test/wdl/testfiles/url_to_optional_file.wdl +13 -0
toil/test/wdl/testfiles/vocab.json +1 -0
toil/test/wdl/testfiles/vocab.wdl +66 -0
toil/test/wdl/testfiles/wait.wdl +34 -0
toil/test/wdl/wdl_specification/type_pair.json +23 -0
toil/test/wdl/wdl_specification/type_pair_basic.wdl +36 -0
toil/test/wdl/wdl_specification/type_pair_with_files.wdl +36 -0
toil/test/wdl/wdl_specification/v1_spec.json +1 -0
toil/test/wdl/wdl_specification/v1_spec_declaration.wdl +39 -0
toil/test/wdl/wdltoil_test.py +681 -408
toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
toil/version.py +10 -10
toil/wdl/wdltoil.py +350 -123
toil/worker.py +113 -33
{toil-8.0.0.dist-info → toil-8.2.0.dist-info}/METADATA +13 -7
toil-8.2.0.dist-info/RECORD +439 -0
{toil-8.0.0.dist-info → toil-8.2.0.dist-info}/WHEEL +1 -1
toil/test/lib/test_integration.py +0 -104
toil-8.0.0.dist-info/RECORD +0 -253
{toil-8.0.0.dist-info → toil-8.2.0.dist-info}/entry_points.txt +0 -0
{toil-8.0.0.dist-info → toil-8.2.0.dist-info/licenses}/LICENSE +0 -0
{toil-8.0.0.dist-info → toil-8.2.0.dist-info}/top_level.txt +0 -0

toil/batchSystems/slurm.py CHANGED Viewed

@@ -13,13 +13,14 @@
 # limitations under the License.
 from __future__ import annotations
+import errno
 import logging
 import math
 import os
 import sys
 from argparse import SUPPRESS, ArgumentParser, _ArgumentGroup
-from shlex import quote
-from typing import NamedTuple, TypeVar
+import shlex
+from typing import Callable, NamedTuple, TypeVar
 from toil.batchSystems.abstractBatchSystem import (
     EXIT_STATUS_UNAVAILABLE_VALUE,
@@ -100,6 +101,32 @@ def parse_slurm_time(slurm_time: str) -> int:
             total_seconds += multiplier * int(elapsed_split[index])
     return total_seconds
+# For parsing user-provided option overrides (or self-generated
+# options) for sbatch, we need a way to recognize long, long-with-equals, and
+# short forms.
+def option_detector(long: str, short: str | None = None) -> Callable[[str], bool]:
+    """
+    Get a function that returns true if it sees the long or short
+    option.
+    """
+    def is_match(option: str) -> bool:
+        return option == f"--{long}" or option.startswith(f"--{long}=") or (short is not None and option == f"-{short}")
+    return is_match
+def any_option_detector(options: list[str | tuple[str, str]]) -> Callable[[str], bool]:
+    """
+    Get a function that returns true if it sees any of the long
+    options or long or short option pairs.
+    """
+    detectors = [option_detector(o) if isinstance(o, str) else option_detector(*o) for o in options]
+    def is_match(option: str) -> bool:
+        for detector in detectors:
+            if detector(option):
+                return True
+        return False
+    return is_match
 class SlurmBatchSystem(AbstractGridEngineBatchSystem):
     class PartitionInfo(NamedTuple):
@@ -185,6 +212,8 @@ class SlurmBatchSystem(AbstractGridEngineBatchSystem):
         def get_partition(self, time_limit: float | None) -> str | None:
             """
             Get the partition name to use for a job with the given time limit.
+            :param time_limit: Time limit in seconds.
             """
             if time_limit is None:
@@ -193,17 +222,36 @@ class SlurmBatchSystem(AbstractGridEngineBatchSystem):
             winning_partition = None
             for partition in self.all_partitions:
-                if partition.time_limit >= time_limit and (
-                    winning_partition is None
-                    or partition.time_limit < winning_partition.time_limit
-                ):
-                    # If this partition can fit the job and is faster than the current winner, take it
+                if partition.time_limit < time_limit:
+                    # Can't use this
+                    continue
+                if winning_partition is None:
+                    # Anything beats None
+                    winning_partition = partition
+                    continue
+                if partition.gres and not winning_partition.gres:
+                    # Never use a partition witn GRES if you can avoid it
+                    continue
+                elif not partition.gres and winning_partition.gres:
+                    # Never keep a partition with GRES if we find one without
+                    winning_partition = partition
+                    continue
+                if partition.priority > winning_partition.priority:
+                    # After that, don't raise priority
+                    continue
+                elif partition.priority < winning_partition.priority:
+                    # And always lower it
+                    winning_partition = partition
+                    continue
+                if partition.time_limit < winning_partition.time_limit:
+                    # Finally, lower time limit
                     winning_partition = partition
             # TODO: Store partitions in a better indexed way
             if winning_partition is None and len(self.all_partitions) > 0:
                 # We have partitions and none of them can fit this
                 raise RuntimeError(
-                    "Could not find a Slurm partition that can fit a job that runs for {time_limit} seconds"
+                    f"Could not find a Slurm partition that can fit a job that runs for {time_limit} seconds"
                 )
             if winning_partition is None:
@@ -344,7 +392,9 @@ class SlurmBatchSystem(AbstractGridEngineBatchSystem):
             """
             try:
                 status_dict = self._getJobDetailsFromSacct(job_id_list)
-            except CalledProcessErrorStderr:
+            except (CalledProcessErrorStderr, OSError) as e:
+                if isinstance(e, OSError):
+                    logger.warning("Could not run sacct: %s", e)
                 status_dict = self._getJobDetailsFromScontrol(job_id_list)
             return status_dict
@@ -437,11 +487,25 @@ class SlurmBatchSystem(AbstractGridEngineBatchSystem):
                 "-S",
                 "1970-01-01",
             ]  # override start time limit
-            stdout = call_command(args, quiet=True)
             # Collect the job statuses in a dict; key is the job-id, value is a tuple containing
             # job state and exit status. Initialize dict before processing output of `sacct`.
             job_statuses: dict[int, tuple[str | None, int | None]] = {}
+            try:
+                stdout = call_command(args, quiet=True)
+            except OSError as e:
+                if e.errno == errno.E2BIG:
+                    # Argument list is too big, recurse on half the argument list
+                    if len(job_id_list) == 1:
+                        # 1 is too big, we can't recurse further, bail out
+                        raise
+                    job_statuses.update(self._getJobDetailsFromSacct(job_id_list[:len(job_id_list)//2]))
+                    job_statuses.update(self._getJobDetailsFromSacct(job_id_list[len(job_id_list)//2:]))
+                    return job_statuses
+                else:
+                    raise
             for job_id in job_id_list:
                 job_statuses[job_id] = (None, None)
@@ -609,104 +673,169 @@ class SlurmBatchSystem(AbstractGridEngineBatchSystem):
             # Also any extra arguments from --slurmArgs or TOIL_SLURM_ARGS
             nativeConfig: str = self.boss.config.slurm_args  # type: ignore[attr-defined]
+            is_any_mem_option = any_option_detector(["mem", "mem-per-cpu", "mem-per-gpu"])
+            is_any_cpus_option = any_option_detector([("cpus-per-task", "c"), "cpus-per-gpu"])
+            is_export_option = option_detector("export")
+            is_export_file_option = option_detector("export-file")
+            is_time_option = option_detector("time", "t")
+            is_partition_option = option_detector("partition", "p")
+            # We will fill these in with stuff parsed from TOIL_SLURM_ARGS, or
+            # with our own determinations if they aren't there.
             # --export=[ALL,]<environment_toil_variables>
-            set_exports = "--export=ALL"
+            export_all = True
+            export_list = [] # Some items here may be multiple comma-separated values
+            time_limit: int | None = self.boss.config.slurm_time  # type: ignore[attr-defined]
+            partition: str | None = None
             if nativeConfig is not None:
                 logger.debug(
                     "Native SLURM options appended to sbatch: %s", nativeConfig
                 )
-                for arg in nativeConfig.split():
-                    if arg.startswith("--mem") or arg.startswith("--cpus-per-task"):
+                # Do a mini argument parse to pull out export and parse time if
+                # needed
+                args = shlex.split(nativeConfig)
+                i = 0
+                while i < len(args):
+                    arg = args[i]
+                    if is_any_mem_option(arg) or is_any_cpus_option(arg):
+                        # Prohibit arguments that set CPUs or memory
                         raise ValueError(
-                            f"Some resource arguments are incompatible: {nativeConfig}"
+                            f"Cannot use Slurm argument {arg} which conflicts "
+                            f"with Toil's own arguments to Slurm"
                         )
-                    # repleace default behaviour by the one stated at TOIL_SLURM_ARGS
-                    if arg.startswith("--export"):
-                        set_exports = arg
-                sbatch_line.extend(nativeConfig.split())
+                    elif is_export_option(arg):
+                        # Capture the export argument value so we can modify it
+                        export_all = False
+                        if "=" not in arg:
+                            if i + 1 >= len(args):
+                                raise ValueError(
+                                    f"No value supplied for Slurm {arg} argument"
+                                )
+                            i += 1
+                            export_list.append(args[i])
+                        else:
+                            export_list.append(arg.split("=", 1)[1])
+                    elif is_export_file_option(arg):
+                        # Keep --export-file but turn off --export=ALL in that
+                        # case.
+                        export_all = False
+                        sbatch_line.append(arg)
+                    elif is_time_option(arg):
+                        # Capture the time limit in seconds so we can use it for picking a partition
+                        if "=" not in arg:
+                            if i + 1 >= len(args):
+                                raise ValueError(
+                                    f"No value supplied for Slurm {arg} argument"
+                                )
+                            i += 1
+                            time_string = args[i]
+                        else:
+                            time_string = arg.split("=", 1)[1]
+                        time_limit = parse_slurm_time(time_string)
+                    elif is_partition_option(arg):
+                        # Capture the partition so we can run checks on it and know not to assign one
+                        if "=" not in arg:
+                            if i + 1 >= len(args):
+                                raise ValueError(
+                                    f"No value supplied for Slurm {arg} argument"
+                                )
+                            i += 1
+                            partition = args[i]
+                        else:
+                            partition = arg.split("=", 1)[1]
+                    else:
+                        # Other arguments pass through.
+                        sbatch_line.append(arg)
+                    i += 1
+            if export_all:
+                # We don't have any export overrides so we ened to start with
+                # an ALL
+                export_list.append("ALL")
             if environment:
                 argList = []
                 for k, v in environment.items():
-                    quoted_value = quote(os.environ[k] if v is None else v)
+                    # TODO: The sbatch man page doesn't say we can quote these;
+                    # if we need to send characters like , itself we need to
+                    # use --export-file and clean it up when the command has
+                    # been issued.
+                    quoted_value = shlex.quote(os.environ[k] if v is None else v)
                     argList.append(f"{k}={quoted_value}")
-                set_exports += "," + ",".join(argList)
-            # add --export to the sbatch
-            sbatch_line.append(set_exports)
-            parallel_env: str = self.boss.config.slurm_pe  # type: ignore[attr-defined]
-            if cpu and cpu > 1 and parallel_env:
-                sbatch_line.append(f"--partition={parallel_env}")
+                export_list.extend(argList)
+            # If partition isn't set and we have a GPU partition override
+            # that applies, apply it
+            gpu_partition_override: str | None = self.boss.config.slurm_gpu_partition  # type: ignore[attr-defined]
+            if partition is None and gpus and gpu_partition_override:
+                partition = gpu_partition_override
+            # If partition isn't set and we have a parallel partition override
+            # that applies, apply it
+            parallel_env: str | None = self.boss.config.slurm_pe  # type: ignore[attr-defined]
+            if partition is None and cpu and cpu > 1 and parallel_env:
+                partition = parallel_env
+            # If partition isn't set and we have a general partition override
+            # that applies, apply it
+            partition_override: str | None = self.boss.config.slurm_partition  # type: ignore[attr-defined]
+            if partition is None and partition_override:
+                partition = partition_override
+            if partition is None and gpus:
+                # Send to a GPU partition
+                gpu_partition = self.boss.partitions.default_gpu_partition
+                if gpu_partition is None:
+                    # no gpu partitions are available, raise an error
+                    raise RuntimeError(
+                        f"The job {jobName} is requesting GPUs, but the Slurm cluster does not appear to have an accessible partition with GPUs"
+                    )
+                if (
+                    time_limit is not None
+                    and gpu_partition.time_limit < time_limit
+                ):
+                    # TODO: find the lowest-priority GPU partition that has at least each job's time limit!
+                    logger.warning(
+                        "Trying to submit a job that needs %s seconds to partition %s that has a limit of %s seconds",
+                        time_limit,
+                        gpu_partition.partition_name,
+                        gpu_partition.time_limit,
+                    )
+                partition = gpu_partition.partition_name
+            if partition is None:
+                # Pick a partition based on time limit
+                partition = self.boss.partitions.get_partition(time_limit)
+            # Now generate all the arguments
+            if len(export_list) > 0:
+                # add --export to the sbatch
+                sbatch_line.append("--export=" + ",".join(export_list))
+            if partition is not None:
+                sbatch_line.append(f"--partition={partition}")
+            if gpus:
+                # Generate GPU assignment argument
+                sbatch_line.append(f"--gres=gpu:{gpus}")
+                if partition is not None and partition not in self.boss.partitions.gpu_partitions:
+                    # the specified partition is not compatible, so warn the user that the job may not work
+                    logger.warning(
+                        f"Job {jobName} needs GPUs, but specified partition {partition} does not have them. This job may not work."
+                        f"Try specifying one of these partitions instead: {', '.join(self.boss.partitions.gpu_partitions)}."
+                    )
             if mem is not None and self.boss.config.slurm_allocate_mem:  # type: ignore[attr-defined]
                 # memory passed in is in bytes, but slurm expects megabytes
                 sbatch_line.append(f"--mem={math.ceil(mem / 2 ** 20)}")
             if cpu is not None:
                 sbatch_line.append(f"--cpus-per-task={math.ceil(cpu)}")
-            time_limit: int = self.boss.config.slurm_time  # type: ignore[attr-defined]
             if time_limit is not None:
                 # Put all the seconds in the seconds slot
                 sbatch_line.append(f"--time=0:{time_limit}")
-            if gpus:
-                # This block will add a gpu supported partition only if no partition is supplied by the user
-                sbatch_line = sbatch_line[:1] + [f"--gres=gpu:{gpus}"] + sbatch_line[1:]
-                if not any(option.startswith("--partition") for option in sbatch_line):
-                    # no partition specified, so specify one
-                    # try to get the name of the lowest priority gpu supported partition
-                    lowest_gpu_partition = self.boss.partitions.default_gpu_partition
-                    if lowest_gpu_partition is None:
-                        # no gpu partitions are available, raise an error
-                        raise RuntimeError(
-                            f"The job {jobName} is requesting GPUs, but the Slurm cluster does not appear to have an accessible partition with GPUs"
-                        )
-                    if (
-                        time_limit is not None
-                        and lowest_gpu_partition.time_limit < time_limit
-                    ):
-                        # TODO: find the lowest-priority GPU partition that has at least each job's time limit!
-                        logger.warning(
-                            "Trying to submit a job that needs %s seconds to partition %s that has a limit of %s seconds",
-                            time_limit,
-                            lowest_gpu_partition.partition_name,
-                            lowest_gpu_partition.time_limit,
-                        )
-                    sbatch_line.append(
-                        f"--partition={lowest_gpu_partition.partition_name}"
-                    )
-                else:
-                    # there is a partition specified already, check if the partition has GPUs
-                    for i, option in enumerate(sbatch_line):
-                        if option.startswith("--partition"):
-                            # grab the partition name depending on if it's specified via an "=" or a space
-                            if "=" in option:
-                                partition_name = option[len("--partition=") :]
-                            else:
-                                partition_name = option[i + 1]
-                            available_gpu_partitions = (
-                                self.boss.partitions.gpu_partitions
-                            )
-                            if partition_name not in available_gpu_partitions:
-                                # the specified partition is not compatible, so warn the user that the job may not work
-                                logger.warning(
-                                    f"Job {jobName} needs {gpus} GPUs, but specified partition {partition_name} is incompatible. This job may not work."
-                                    f"Try specifying one of these partitions instead: {', '.join(available_gpu_partitions)}."
-                                )
-                            break
-            if not any(option.startswith("--partition") for option in sbatch_line):
-                # Pick a partition ourselves
-                chosen_partition = self.boss.partitions.get_partition(time_limit)
-                if chosen_partition is not None:
-                    # Route to that partition
-                    sbatch_line.append(f"--partition={chosen_partition}")
             stdoutfile: str = self.boss.format_std_out_err_path(jobID, "%j", "out")
             stderrfile: str = self.boss.format_std_out_err_path(jobID, "%j", "err")
             sbatch_line.extend(["-o", stdoutfile, "-e", stderrfile])
@@ -714,7 +843,7 @@ class SlurmBatchSystem(AbstractGridEngineBatchSystem):
             return sbatch_line
     def __init__(
-        self, config: Config, maxCores: float, maxMemory: int, maxDisk: int
+        self, config: Config, maxCores: float, maxMemory: float, maxDisk: float
     ) -> None:
         super().__init__(config, maxCores, maxMemory, maxDisk)
         self.partitions = SlurmBatchSystem.PartitionSet()
@@ -830,6 +959,20 @@ class SlurmBatchSystem(AbstractGridEngineBatchSystem):
             env_var="TOIL_SLURM_TIME",
             help="Slurm job time limit, in [DD-]HH:MM:SS format.",
         )
+        parser.add_argument(
+            "--slurmPartition",
+            dest="slurm_partition",
+            default=None,
+            env_var="TOIL_SLURM_PARTITION",
+            help="Partition to send Slurm jobs to.",
+        )
+        parser.add_argument(
+            "--slurmGPUPartition",
+            dest="slurm_gpu_partition",
+            default=None,
+            env_var="TOIL_SLURM_GPU_PARTITION",
+            help="Partition to send Slurm jobs to if they ask for GPUs.",
+        )
         parser.add_argument(
             "--slurmPE",
             dest="slurm_pe",
@@ -852,5 +995,7 @@ class SlurmBatchSystem(AbstractGridEngineBatchSystem):
         setOption("slurm_allocate_mem")
         setOption("slurm_default_all_mem")
         setOption("slurm_time")
+        setOption("slurm_partition")
+        setOption("slurm_gpu_partition")
         setOption("slurm_pe")
         setOption("slurm_args")

toil/bus.py CHANGED Viewed

@@ -69,13 +69,15 @@ import tempfile
 import threading
 from collections.abc import Iterator
 from dataclasses import dataclass
-from typing import IO, Any, Callable, NamedTuple, Optional, TypeVar, cast
+from typing import IO, Any, Callable, NamedTuple, Optional, TypeVar, TYPE_CHECKING, cast
 from pubsub.core import Publisher
 from pubsub.core.listener import Listener
 from pubsub.core.topicobj import Topic
 from pubsub.core.topicutils import ALL_TOPICS
+from toil.lib.misc import FileDescriptorOrPath
 logger = logging.getLogger(__name__)
 # We define some ways to talk about jobs.
@@ -434,7 +436,7 @@ class MessageBus:
         connection._set_bus(self)
         return connection
-    def connect_output_file(self, file_path: str) -> Any:
+    def connect_output_file(self, file_path: FileDescriptorOrPath) -> Any:
         """
         Send copies of all messages to the given output file.
@@ -736,7 +738,7 @@ class JobStatus:
         )  # if the exit code is -1 and the job id is specified, we assume the job is running
-def replay_message_bus(path: str) -> dict[str, JobStatus]:
+def replay_message_bus(path: FileDescriptorOrPath) -> dict[str, JobStatus]:
     """
     Replay all the messages and work out what they mean for jobs.

toil 8.0.0__py3-none-any.whl → 8.2.0__py3-none-any.whl

toil 8.0.0py3-none-any.whl → 8.2.0py3-none-any.whl