toil 6.0.0__py3-none-any.whl → 6.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. toil/batchSystems/abstractBatchSystem.py +19 -4
  2. toil/batchSystems/abstractGridEngineBatchSystem.py +22 -22
  3. toil/batchSystems/cleanup_support.py +7 -3
  4. toil/batchSystems/lsf.py +7 -7
  5. toil/batchSystems/slurm.py +85 -14
  6. toil/bus.py +38 -0
  7. toil/common.py +20 -18
  8. toil/cwl/cwltoil.py +81 -63
  9. toil/exceptions.py +1 -1
  10. toil/fileStores/abstractFileStore.py +53 -4
  11. toil/fileStores/cachingFileStore.py +4 -20
  12. toil/fileStores/nonCachingFileStore.py +5 -14
  13. toil/job.py +46 -30
  14. toil/jobStores/abstractJobStore.py +21 -23
  15. toil/jobStores/aws/utils.py +5 -4
  16. toil/jobStores/fileJobStore.py +1 -1
  17. toil/leader.py +17 -14
  18. toil/lib/conversions.py +19 -0
  19. toil/lib/generatedEC2Lists.py +8 -8
  20. toil/lib/io.py +28 -2
  21. toil/lib/resources.py +8 -1
  22. toil/lib/threading.py +27 -12
  23. toil/options/common.py +5 -7
  24. toil/options/wdl.py +5 -0
  25. toil/provisioners/abstractProvisioner.py +8 -0
  26. toil/statsAndLogging.py +36 -8
  27. toil/test/batchSystems/test_slurm.py +21 -6
  28. toil/test/cactus/__init__.py +0 -0
  29. toil/test/cactus/test_cactus_integration.py +58 -0
  30. toil/test/cwl/cwlTest.py +243 -151
  31. toil/test/docs/scriptsTest.py +2 -2
  32. toil/test/jobStores/jobStoreTest.py +7 -5
  33. toil/test/lib/test_ec2.py +1 -1
  34. toil/test/options/__init__.py +13 -0
  35. toil/test/options/options.py +37 -0
  36. toil/test/provisioners/clusterTest.py +9 -8
  37. toil/test/utils/toilDebugTest.py +1 -1
  38. toil/test/utils/utilsTest.py +3 -3
  39. toil/test/wdl/wdltoil_test.py +91 -16
  40. toil/utils/toilDebugFile.py +1 -1
  41. toil/utils/toilStats.py +309 -266
  42. toil/utils/toilStatus.py +1 -1
  43. toil/version.py +9 -9
  44. toil/wdl/wdltoil.py +341 -189
  45. toil/worker.py +31 -16
  46. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/METADATA +6 -7
  47. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/RECORD +51 -47
  48. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
  49. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/WHEEL +0 -0
  50. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -0
  51. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
toil/lib/io.py CHANGED
@@ -182,17 +182,43 @@ def make_public_dir(in_directory: Optional[str] = None) -> str:
182
182
  os.chmod(this_should_never_happen, 0o777)
183
183
  return this_should_never_happen
184
184
 
185
- def try_path(path: str) -> Optional[str]:
185
+ def try_path(path: str, min_size: int = 100 * 1024 * 1024) -> Optional[str]:
186
186
  """
187
187
  Try to use the given path. Return it if it exists or can be made,
188
188
  and we can make things within it, or None otherwise.
189
+
190
+ :param min_size: Reject paths on filesystems smaller than this many bytes.
189
191
  """
192
+
190
193
  try:
191
194
  os.makedirs(path, exist_ok=True)
192
195
  except OSError:
193
196
  # Maybe we lack permissions
194
197
  return None
195
- return path if os.path.exists(path) and os.access(path, os.W_OK) else None
198
+
199
+ if not os.path.exists(path):
200
+ # We didn't manage to make it
201
+ return None
202
+
203
+ if not os.access(path, os.W_OK):
204
+ # It doesn't look writable
205
+ return None
206
+
207
+ try:
208
+ stats = os.statvfs(path)
209
+ except OSError:
210
+ # Maybe we lack permissions
211
+ return None
212
+
213
+ # Is the filesystem big enough?
214
+ # We need to look at the FS size and not the free space so we don't change
215
+ # over to a different filesystem when this one fills up.
216
+ fs_size = stats.f_frsize * stats.f_blocks
217
+ if fs_size < min_size:
218
+ # Too small
219
+ return None
220
+
221
+ return path
196
222
 
197
223
 
198
224
  class WriteWatchingStream:
toil/lib/resources.py CHANGED
@@ -13,6 +13,8 @@
13
13
  # limitations under the License.
14
14
  import fnmatch
15
15
  import os
16
+ import math
17
+ import sys
16
18
  import resource
17
19
  from typing import List, Tuple
18
20
 
@@ -20,12 +22,17 @@ from typing import List, Tuple
20
22
  def get_total_cpu_time_and_memory_usage() -> Tuple[float, int]:
21
23
  """
22
24
  Gives the total cpu time of itself and all its children, and the maximum RSS memory usage of
23
- itself and its single largest child.
25
+ itself and its single largest child (in kibibytes).
24
26
  """
25
27
  me = resource.getrusage(resource.RUSAGE_SELF)
26
28
  children = resource.getrusage(resource.RUSAGE_CHILDREN)
27
29
  total_cpu_time = me.ru_utime + me.ru_stime + children.ru_utime + children.ru_stime
28
30
  total_memory_usage = me.ru_maxrss + children.ru_maxrss
31
+ if sys.platform == "darwin":
32
+ # On Linux, getrusage works in "kilobytes" (really kibibytes), but on
33
+ # Mac it works in bytes. See
34
+ # <https://github.com/python/cpython/issues/74698>
35
+ total_memory_usage = int(math.ceil(total_memory_usage / 1024))
29
36
  return total_cpu_time, total_memory_usage
30
37
 
31
38
 
toil/lib/threading.py CHANGED
@@ -109,9 +109,12 @@ def cpu_count() -> int:
109
109
  return cast(int, cached)
110
110
 
111
111
  # Get the fallback answer of all the CPUs on the machine
112
- total_machine_size = cast(int, psutil.cpu_count(logical=True))
112
+ psutil_cpu_count = cast(Optional[int], psutil.cpu_count(logical=True))
113
+ if psutil_cpu_count is None:
114
+ logger.debug('Could not retrieve the logical CPU count.')
113
115
 
114
- logger.debug('Total machine size: %d cores', total_machine_size)
116
+ total_machine_size: Union[float, int] = psutil_cpu_count if psutil_cpu_count is not None else float('inf')
117
+ logger.debug('Total machine size: %s core(s)', total_machine_size)
115
118
 
116
119
  # cgroups may limit the size
117
120
  cgroup_size: Union[float, int] = float('inf')
@@ -151,13 +154,13 @@ def cpu_count() -> int:
151
154
  if quota == -1:
152
155
  # But the quota can be -1 for unset.
153
156
  # Assume we can use the whole machine.
154
- return total_machine_size
155
-
156
- # The thread count is how many multiples of a wall clock period we
157
- # can burn in that period.
158
- cgroup_size = int(math.ceil(float(quota)/float(period)))
157
+ cgroup_size = float('inf')
158
+ else:
159
+ # The thread count is how many multiples of a wall clock period we
160
+ # can burn in that period.
161
+ cgroup_size = int(math.ceil(float(quota)/float(period)))
159
162
 
160
- logger.debug('Control group size in cores: %d', cgroup_size)
163
+ logger.debug('Control group size in cores: %s', cgroup_size)
161
164
  except:
162
165
  # We can't actually read these cgroup fields. Maybe we are a mac or something.
163
166
  logger.debug('Could not inspect cgroup: %s', traceback.format_exc())
@@ -175,9 +178,16 @@ def cpu_count() -> int:
175
178
  else:
176
179
  logger.debug('CPU affinity not available')
177
180
 
178
- # Return the smaller of the actual thread count and the cgroup's limit, minimum 1.
179
- result = cast(int, max(1, min(min(affinity_size, cgroup_size), total_machine_size)))
180
- logger.debug('cpu_count: %s', str(result))
181
+ limit: Union[float, int] = float('inf')
182
+ # Apply all the limits to take the smallest
183
+ limit = min(limit, total_machine_size)
184
+ limit = min(limit, cgroup_size)
185
+ limit = min(limit, affinity_size)
186
+ if limit < 1 or limit == float('inf'):
187
+ # Fall back to 1 if we can't get a size
188
+ limit = 1
189
+ result = int(limit)
190
+ logger.debug('cpu_count: %s', result)
181
191
  # Make sure to remember it for the next call
182
192
  setattr(cpu_count, 'result', result)
183
193
  return result
@@ -529,9 +539,14 @@ class LastProcessStandingArena:
529
539
  os.mkdir(self.lockfileDir)
530
540
  except FileExistsError:
531
541
  pass
542
+ except Exception as e:
543
+ raise RuntimeError("Could not make lock file directory " + self.lockfileDir) from e
532
544
 
533
545
  # Make ourselves a file in it and lock it to prove we are alive.
534
- self.lockfileFD, self.lockfileName = tempfile.mkstemp(dir=self.lockfileDir) # type: ignore
546
+ try:
547
+ self.lockfileFD, self.lockfileName = tempfile.mkstemp(dir=self.lockfileDir) # type: ignore
548
+ except Exception as e:
549
+ raise RuntimeError("Could not make lock file in " + self.lockfileDir) from e
535
550
  # Nobody can see it yet, so lock it right away
536
551
  fcntl.lockf(self.lockfileFD, fcntl.LOCK_EX) # type: ignore
537
552
 
toil/options/common.py CHANGED
@@ -2,13 +2,12 @@ import os
2
2
  from argparse import ArgumentParser, Action, _AppendAction
3
3
  from typing import Any, Optional, Union, Type, Callable, List, Dict, TYPE_CHECKING
4
4
 
5
- from distutils.util import strtobool
6
5
  from configargparse import SUPPRESS
7
6
  import logging
8
7
 
9
8
  from ruamel.yaml import YAML
10
9
 
11
- from toil.lib.conversions import bytes2human, human2bytes
10
+ from toil.lib.conversions import bytes2human, human2bytes, strtobool
12
11
 
13
12
  from toil.batchSystems.options import add_all_batchsystem_options
14
13
  from toil.provisioners import parse_node_types
@@ -595,12 +594,12 @@ def add_base_toil_options(parser: ArgumentParser, jobstore_as_flag: bool = False
595
594
  title="Toil log management options.",
596
595
  description="Options for how Toil should manage its logs."
597
596
  )
598
- log_options.add_argument("--maxLogFileSize", dest="maxLogFileSize", default=64000, type=h2b,
597
+ log_options.add_argument("--maxLogFileSize", dest="maxLogFileSize", default=100 * 1024 * 1024, type=h2b,
599
598
  action=make_open_interval_action(1),
600
599
  help=f"The maximum size of a job log file to keep (in bytes), log files larger than "
601
600
  f"this will be truncated to the last X bytes. Setting this option to zero will "
602
601
  f"prevent any truncation. Setting this option to a negative value will truncate "
603
- f"from the beginning. Default={bytes2human(64000)}")
602
+ f"from the beginning. Default={bytes2human(100 * 1024 * 1024)}")
604
603
  log_options.add_argument("--writeLogs", dest="writeLogs", nargs='?', action='store', default=None,
605
604
  const=os.getcwd(), metavar="OPT_PATH",
606
605
  help="Write worker logs received by the leader into their own files at the specified "
@@ -692,8 +691,7 @@ def add_base_toil_options(parser: ArgumentParser, jobstore_as_flag: bool = False
692
691
  'TOIL_APPLIANCE_SELF, which Toil uses to provision mesos for autoscaling.')
693
692
  misc_options.add_argument('--statusWait', dest='statusWait', type=int, default=3600, metavar="INT",
694
693
  help="Seconds to wait between reports of running jobs.")
695
- misc_options.add_argument('--disableProgress', dest='disableProgress', type=convert_bool, default=False,
696
- metavar="BOOL",
694
+ misc_options.add_argument('--disableProgress', dest='disableProgress', action="store_true", default=False,
697
695
  help="Disables the progress bar shown when standard error is a terminal.")
698
696
 
699
697
  # Debug options
@@ -735,4 +733,4 @@ def add_base_toil_options(parser: ArgumentParser, jobstore_as_flag: bool = False
735
733
 
736
734
  # dest is set to enableCaching to not conflict with the current --caching destination
737
735
  caching.add_argument('--disableCaching', dest='enableCaching', action='store_false', help=SUPPRESS)
738
- caching.set_defaults(disableCaching=None)
736
+ caching.set_defaults(enableCaching=None)
toil/options/wdl.py CHANGED
@@ -13,6 +13,8 @@ def add_wdl_options(parser: ArgumentParser, suppress: bool = True) -> None:
13
13
  suppress_help = SUPPRESS if suppress else None
14
14
  # include arg names without a wdl specifier if suppress is False
15
15
  # this is to avoid possible duplicate options in custom toil scripts, ex outputFile can be a common argument name
16
+ # TODO: Why do we even need them at all in other Toil scripts? Do we have to worry about dest= collisions?
17
+ # TODO: Can the better option name be first?
16
18
  output_dialect_arguments = ["--wdlOutputDialect"] + (["--outputDialect"] if not suppress else [])
17
19
  parser.add_argument(*output_dialect_arguments, dest="output_dialect", type=str, default='cromwell',
18
20
  choices=['cromwell', 'miniwdl'],
@@ -30,3 +32,6 @@ def add_wdl_options(parser: ArgumentParser, suppress: bool = True) -> None:
30
32
  reference_inputs_arguments = ["--wdlReferenceInputs"] + (["--referenceInputs"] if not suppress else [])
31
33
  parser.add_argument(*reference_inputs_arguments, dest="reference_inputs", type=bool, default=False,
32
34
  help=suppress_help or "Pass input files by URL")
35
+ container_arguments = ["--wdlContainer"] + (["--container"] if not suppress else [])
36
+ parser.add_argument(*container_arguments, dest="container", type=str, choices=["singularity", "docker", "auto"], default="auto",
37
+ help=suppress_help or "Container engine to use to run WDL tasks")
@@ -812,6 +812,14 @@ class AbstractProvisioner(ABC):
812
812
  -v /opt:/opt \\
813
813
  -v /etc/kubernetes:/etc/kubernetes \\
814
814
  -v /etc/kubernetes/admin.conf:/root/.kube/config \\
815
+ # Pass in a path to use for singularity image caching into the container
816
+ -e TOIL_KUBERNETES_HOST_PATH=/var/lib/toil \\
817
+ -e SINGULARITY_CACHEDIR=/var/lib/toil/singularity \\
818
+ -e MINIWDL__SINGULARITY__IMAGE_CACHE=/var/lib/toil/miniwdl \\
819
+ # These rules are necessary in order to get user namespaces working
820
+ # https://github.com/apptainer/singularity/issues/5806
821
+ --security-opt seccomp=unconfined \\
822
+ --security-opt systempaths=unconfined \\
815
823
  --name=toil_{role} \\
816
824
  {applianceSelf()} \\
817
825
  {entryPointArgs}
toil/statsAndLogging.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import gzip
15
+ import io
15
16
  import json
16
17
  import logging
17
18
  import os
@@ -49,7 +50,7 @@ class StatsAndLogging:
49
50
  self._worker.start()
50
51
 
51
52
  @classmethod
52
- def formatLogStream(cls, stream: Union[IO[str], IO[bytes]], job_name: Optional[str] = None) -> str:
53
+ def formatLogStream(cls, stream: Union[IO[str], IO[bytes]], stream_name: str) -> str:
53
54
  """
54
55
  Given a stream of text or bytes, and the job name, job itself, or some
55
56
  other optional stringifyable identity info for the job, return a big
@@ -62,7 +63,7 @@ class StatsAndLogging:
62
63
 
63
64
  :param stream: The stream of text or bytes to print for the user.
64
65
  """
65
- lines = [f'Log from job "{job_name}" follows:', '=========>']
66
+ lines = [f'{stream_name} follows:', '=========>']
66
67
 
67
68
  for line in stream:
68
69
  if isinstance(line, bytes):
@@ -75,13 +76,13 @@ class StatsAndLogging:
75
76
 
76
77
 
77
78
  @classmethod
78
- def logWithFormatting(cls, jobStoreID: str, jobLogs: Union[IO[str], IO[bytes]], method: Callable[[str], None] = logger.debug,
79
+ def logWithFormatting(cls, stream_name: str, jobLogs: Union[IO[str], IO[bytes]], method: Callable[[str], None] = logger.debug,
79
80
  message: Optional[str] = None) -> None:
80
81
  if message is not None:
81
82
  method(message)
82
83
 
83
- # Format and log the logs, identifying the job with its job store ID.
84
- method(cls.formatLogStream(jobLogs, jobStoreID))
84
+ # Format and log the logs, identifying the stream with the given name.
85
+ method(cls.formatLogStream(jobLogs, stream_name))
85
86
 
86
87
  @classmethod
87
88
  def writeLogFiles(cls, jobNames: List[str], jobLogList: List[str], config: 'Config', failed: bool = False) -> None:
@@ -95,7 +96,7 @@ class StatsAndLogging:
95
96
  logName = ('failed_' if failed else '') + logName
96
97
  counter = 0
97
98
  while True:
98
- suffix = str(counter).zfill(3) + logExtension
99
+ suffix = '_' + str(counter).zfill(3) + logExtension
99
100
  fullName = os.path.join(logPath, logName + suffix)
100
101
  # The maximum file name size in the default HFS+ file system is 255 UTF-16 encoding units, so basically 255 characters
101
102
  if len(fullName) >= 255:
@@ -118,6 +119,9 @@ class StatsAndLogging:
118
119
  # we don't have anywhere to write the logs, return now
119
120
  return
120
121
 
122
+ # Make sure the destination exists
123
+ os.makedirs(path, exist_ok=True)
124
+
121
125
  fullName = createName(path, mainFileName, extension, failed)
122
126
  with writeFn(fullName, 'wb') as f:
123
127
  for l in jobLogList:
@@ -150,8 +154,10 @@ class StatsAndLogging:
150
154
  stats = json.loads(statsStr, object_hook=Expando)
151
155
  if not stats:
152
156
  return
157
+
153
158
  try:
154
- logs = stats.workers.logsToMaster
159
+ # Handle all the log_to_leader messages
160
+ logs = stats.workers.logs_to_leader
155
161
  except AttributeError:
156
162
  # To be expected if there were no calls to log_to_leader()
157
163
  pass
@@ -160,6 +166,28 @@ class StatsAndLogging:
160
166
  logger.log(int(message.level),
161
167
  'Got message from job at time %s: %s',
162
168
  time.strftime('%m-%d-%Y %H:%M:%S'), message.text)
169
+
170
+ try:
171
+ # Handle all the user-level text streams reported back (command output, etc.)
172
+ user_logs = stats.workers.logging_user_streams
173
+ except AttributeError:
174
+ # To be expected if there were no calls to log_user_stream()
175
+ pass
176
+ else:
177
+ for stream_entry in user_logs:
178
+ try:
179
+ # Unpack the stream name and text.
180
+ name, text = stream_entry.name, stream_entry.text
181
+ except AttributeError:
182
+ # Doesn't have a user-provided stream name and stream
183
+ # text, so skip it.
184
+ continue
185
+ # Since this is sent as inline text we need to pretend to stream it.
186
+ # TODO: Save these as individual files if they start to get too big?
187
+ cls.logWithFormatting(name, io.StringIO(text), logger.info)
188
+ # Save it as a log file, as if it were a Toil-level job.
189
+ cls.writeLogFiles([name], [text], config=config)
190
+
163
191
  try:
164
192
  logs = stats.logs
165
193
  except AttributeError:
@@ -168,7 +196,7 @@ class StatsAndLogging:
168
196
  # we may have multiple jobs per worker
169
197
  jobNames = logs.names
170
198
  messages = logs.messages
171
- cls.logWithFormatting(jobNames[0], messages,
199
+ cls.logWithFormatting(f'Log from job "{jobNames[0]}"', messages,
172
200
  message='Received Toil worker log. Disable debug level logging to hide this output')
173
201
  cls.writeLogFiles(jobNames, messages, config=config)
174
202
 
@@ -4,6 +4,7 @@ from queue import Queue
4
4
  import pytest
5
5
 
6
6
  import toil.batchSystems.slurm
7
+ from toil.batchSystems.abstractBatchSystem import BatchJobExitReason, EXIT_STATUS_UNAVAILABLE_VALUE
7
8
  from toil.common import Config
8
9
  from toil.lib.misc import CalledProcessErrorStderr
9
10
  from toil.test import ToilTest
@@ -284,7 +285,7 @@ class SlurmTest(ToilTest):
284
285
  def test_getJobExitCode_job_exists(self):
285
286
  self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
286
287
  job_id = '785023' # FAILED
287
- expected_result = 127
288
+ expected_result = (127, BatchJobExitReason.FAILED)
288
289
  result = self.worker.getJobExitCode(job_id)
289
290
  assert result == expected_result, f"{result} != {expected_result}"
290
291
 
@@ -303,7 +304,7 @@ class SlurmTest(ToilTest):
303
304
  self.monkeypatch.setattr(self.worker, "_getJobDetailsFromSacct", call_sacct_raises)
304
305
  self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
305
306
  job_id = '787204' # COMPLETED
306
- expected_result = 0
307
+ expected_result = (0, BatchJobExitReason.FINISHED)
307
308
  result = self.worker.getJobExitCode(job_id)
308
309
  assert result == expected_result, f"{result} != {expected_result}"
309
310
 
@@ -329,7 +330,7 @@ class SlurmTest(ToilTest):
329
330
  def test_coalesce_job_exit_codes_one_exists(self):
330
331
  self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
331
332
  job_ids = ['785023'] # FAILED
332
- expected_result = [127]
333
+ expected_result = [(127, BatchJobExitReason.FAILED)]
333
334
  result = self.worker.coalesce_job_exit_codes(job_ids)
334
335
  assert result == expected_result, f"{result} != {expected_result}"
335
336
 
@@ -347,7 +348,14 @@ class SlurmTest(ToilTest):
347
348
  '789724', # RUNNING,
348
349
  '789868', # PENDING,
349
350
  '789869'] # COMPLETED
350
- expected_result = [0, 1, None, None, 0] # RUNNING and PENDING jobs should return None
351
+ # RUNNING and PENDING jobs should return None
352
+ expected_result = [
353
+ (EXIT_STATUS_UNAVAILABLE_VALUE, BatchJobExitReason.KILLED),
354
+ (1, BatchJobExitReason.FAILED),
355
+ None,
356
+ None,
357
+ (0, BatchJobExitReason.FINISHED)
358
+ ]
351
359
  result = self.worker.coalesce_job_exit_codes(job_ids)
352
360
  assert result == expected_result, f"{result} != {expected_result}"
353
361
 
@@ -358,7 +366,14 @@ class SlurmTest(ToilTest):
358
366
  '789724', # RUNNING,
359
367
  '999999', # Non-existent,
360
368
  '789869'] # COMPLETED
361
- expected_result = [130, 2, None, None, 0] # RUNNING job should return None
369
+ # RUNNING job should return None
370
+ expected_result = [
371
+ (130, BatchJobExitReason.FAILED),
372
+ (2, BatchJobExitReason.FAILED),
373
+ None,
374
+ None,
375
+ (0, BatchJobExitReason.FINISHED)
376
+ ]
362
377
  result = self.worker.coalesce_job_exit_codes(job_ids)
363
378
  assert result == expected_result, f"{result} != {expected_result}"
364
379
 
@@ -370,7 +385,7 @@ class SlurmTest(ToilTest):
370
385
  self.monkeypatch.setattr(self.worker, "_getJobDetailsFromSacct", call_sacct_raises)
371
386
  self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
372
387
  job_ids = ['787204'] # COMPLETED
373
- expected_result = [0]
388
+ expected_result = [(0, BatchJobExitReason.FINISHED)]
374
389
  result = self.worker.coalesce_job_exit_codes(job_ids)
375
390
  assert result == expected_result, f"{result} != {expected_result}"
376
391
 
File without changes
@@ -0,0 +1,58 @@
1
+ import os
2
+ import uuid
3
+
4
+ from toil.provisioners import cluster_factory
5
+ from toil.test.provisioners.clusterTest import AbstractClusterTest
6
+
7
+
8
+ class CactusIntegrationTest(AbstractClusterTest):
9
+ """
10
+ Run the Cactus Integration test on a Kubernetes AWS cluster
11
+ """
12
+
13
+ def __init__(self, methodName):
14
+ super().__init__(methodName=methodName)
15
+ self.clusterName = "cactus-test-" + str(uuid.uuid4())
16
+ self.leaderNodeType = "t2.medium"
17
+ self.clusterType = "kubernetes"
18
+
19
+ def setUp(self):
20
+ super().setUp()
21
+ self.jobStore = f"aws:{self.awsRegion()}:cluster-{uuid.uuid4()}"
22
+
23
+ def test_cactus_integration(self):
24
+ # Make a cluster with worker nodes
25
+ self.createClusterUtil(args=["--nodeTypes=t2.xlarge", "-w=1-3"])
26
+ # get the leader so we know the IP address - we don't need to wait since create cluster
27
+ # already ensures the leader is running
28
+ self.cluster = cluster_factory(
29
+ provisioner="aws", zone=self.zone, clusterName=self.clusterName
30
+ )
31
+ self.leader = self.cluster.getLeader()
32
+
33
+ CACTUS_COMMIT_SHA = os.environ["CACTUS_COMMIT_SHA"] or "f5adf4013326322ae58ef1eccb8409b71d761583" # default cactus commit
34
+
35
+ # command to install and run cactus on the cluster
36
+ cactus_command = ("python -m virtualenv --system-site-packages venv && "
37
+ ". venv/bin/activate && "
38
+ "git clone https://github.com/ComparativeGenomicsToolkit/cactus.git --recursive && "
39
+ "cd cactus && "
40
+ "git fetch origin && "
41
+ f"git checkout {CACTUS_COMMIT_SHA} && "
42
+ "git submodule update --init --recursive && "
43
+ "pip install --upgrade 'setuptools<66' pip && "
44
+ "pip install --upgrade . && "
45
+ "pip install --upgrade numpy psutil && "
46
+ "time cactus --batchSystem kubernetes --retryCount=3 "
47
+ f"--consCores 2 --binariesMode singularity --clean always {self.jobStore} "
48
+ "examples/evolverMammals.txt examples/evolverMammals.hal --root mr --defaultDisk 8G --logDebug")
49
+
50
+ # run cactus
51
+ self.sshUtil(
52
+ [
53
+ "bash",
54
+ "-c",
55
+ cactus_command
56
+ ]
57
+ )
58
+