toil 9.0.0__py3-none-any.whl → 9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. toil/batchSystems/abstractBatchSystem.py +13 -5
  2. toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
  3. toil/batchSystems/kubernetes.py +13 -2
  4. toil/batchSystems/mesos/batchSystem.py +33 -2
  5. toil/batchSystems/slurm.py +191 -16
  6. toil/cwl/cwltoil.py +17 -82
  7. toil/fileStores/__init__.py +1 -1
  8. toil/fileStores/abstractFileStore.py +5 -2
  9. toil/fileStores/cachingFileStore.py +1 -1
  10. toil/job.py +30 -14
  11. toil/jobStores/abstractJobStore.py +24 -19
  12. toil/jobStores/aws/jobStore.py +862 -1963
  13. toil/jobStores/aws/utils.py +24 -270
  14. toil/jobStores/googleJobStore.py +25 -9
  15. toil/jobStores/utils.py +0 -327
  16. toil/leader.py +27 -22
  17. toil/lib/aws/config.py +22 -0
  18. toil/lib/aws/s3.py +477 -9
  19. toil/lib/aws/utils.py +22 -33
  20. toil/lib/checksum.py +88 -0
  21. toil/lib/conversions.py +33 -31
  22. toil/lib/directory.py +217 -0
  23. toil/lib/ec2.py +97 -29
  24. toil/lib/exceptions.py +2 -1
  25. toil/lib/expando.py +2 -2
  26. toil/lib/generatedEC2Lists.py +73 -16
  27. toil/lib/io.py +33 -2
  28. toil/lib/memoize.py +21 -7
  29. toil/lib/pipes.py +385 -0
  30. toil/lib/retry.py +1 -1
  31. toil/lib/threading.py +1 -1
  32. toil/lib/web.py +4 -5
  33. toil/provisioners/__init__.py +5 -2
  34. toil/provisioners/aws/__init__.py +43 -36
  35. toil/provisioners/aws/awsProvisioner.py +22 -13
  36. toil/provisioners/node.py +60 -12
  37. toil/resource.py +3 -13
  38. toil/test/__init__.py +14 -16
  39. toil/test/batchSystems/test_slurm.py +103 -14
  40. toil/test/cwl/staging_cat.cwl +27 -0
  41. toil/test/cwl/staging_make_file.cwl +25 -0
  42. toil/test/cwl/staging_workflow.cwl +43 -0
  43. toil/test/cwl/zero_default.cwl +61 -0
  44. toil/test/docs/scripts/tutorial_staging.py +17 -8
  45. toil/test/jobStores/jobStoreTest.py +23 -133
  46. toil/test/lib/aws/test_iam.py +7 -7
  47. toil/test/lib/aws/test_s3.py +30 -33
  48. toil/test/lib/aws/test_utils.py +9 -9
  49. toil/test/provisioners/aws/awsProvisionerTest.py +59 -6
  50. toil/test/src/autoDeploymentTest.py +2 -3
  51. toil/test/src/fileStoreTest.py +89 -87
  52. toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
  53. toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
  54. toil/test/utils/toilKillTest.py +35 -28
  55. toil/test/wdl/md5sum/md5sum.json +1 -1
  56. toil/test/wdl/testfiles/gather.wdl +52 -0
  57. toil/test/wdl/wdltoil_test.py +120 -38
  58. toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
  59. toil/utils/toilDebugFile.py +6 -3
  60. toil/utils/toilStats.py +17 -2
  61. toil/version.py +6 -6
  62. toil/wdl/wdltoil.py +1038 -549
  63. toil/worker.py +5 -2
  64. {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/METADATA +12 -12
  65. {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/RECORD +69 -61
  66. toil/lib/iterables.py +0 -112
  67. toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
  68. {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/WHEEL +0 -0
  69. {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/entry_points.txt +0 -0
  70. {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/licenses/LICENSE +0 -0
  71. {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/top_level.txt +0 -0
toil/jobStores/utils.py CHANGED
@@ -10,333 +10,6 @@ from toil.lib.threading import ExceptionalThread
10
10
 
11
11
  log = logging.getLogger(__name__)
12
12
 
13
-
14
- class WritablePipe(ABC):
15
- """
16
- An object-oriented wrapper for os.pipe. Clients should subclass it, implement
17
- :meth:`.readFrom` to consume the readable end of the pipe, then instantiate the class as a
18
- context manager to get the writable end. See the example below.
19
-
20
- >>> import sys, shutil
21
- >>> class MyPipe(WritablePipe):
22
- ... def readFrom(self, readable):
23
- ... shutil.copyfileobj(codecs.getreader('utf-8')(readable), sys.stdout)
24
- >>> with MyPipe() as writable:
25
- ... _ = writable.write('Hello, world!\\n'.encode('utf-8'))
26
- Hello, world!
27
-
28
- Each instance of this class creates a thread and invokes the readFrom method in that thread.
29
- The thread will be join()ed upon normal exit from the context manager, i.e. the body of the
30
- `with` statement. If an exception occurs, the thread will not be joined but a well-behaved
31
- :meth:`.readFrom` implementation will terminate shortly thereafter due to the pipe having
32
- been closed.
33
-
34
- Now, exceptions in the reader thread will be reraised in the main thread:
35
-
36
- >>> class MyPipe(WritablePipe):
37
- ... def readFrom(self, readable):
38
- ... raise RuntimeError('Hello, world!')
39
- >>> with MyPipe() as writable:
40
- ... pass
41
- Traceback (most recent call last):
42
- ...
43
- RuntimeError: Hello, world!
44
-
45
- More complicated, less illustrative tests:
46
-
47
- Same as above, but proving that handles are closed:
48
-
49
- >>> x = os.dup(0); os.close(x)
50
- >>> class MyPipe(WritablePipe):
51
- ... def readFrom(self, readable):
52
- ... raise RuntimeError('Hello, world!')
53
- >>> with MyPipe() as writable:
54
- ... pass
55
- Traceback (most recent call last):
56
- ...
57
- RuntimeError: Hello, world!
58
- >>> y = os.dup(0); os.close(y); x == y
59
- True
60
-
61
- Exceptions in the body of the with statement aren't masked, and handles are closed:
62
-
63
- >>> x = os.dup(0); os.close(x)
64
- >>> class MyPipe(WritablePipe):
65
- ... def readFrom(self, readable):
66
- ... pass
67
- >>> with MyPipe() as writable:
68
- ... raise RuntimeError('Hello, world!')
69
- Traceback (most recent call last):
70
- ...
71
- RuntimeError: Hello, world!
72
- >>> y = os.dup(0); os.close(y); x == y
73
- True
74
- """
75
-
76
- @abstractmethod
77
- def readFrom(self, readable):
78
- """
79
- Implement this method to read data from the pipe. This method should support both
80
- binary and text mode output.
81
-
82
- :param file readable: the file object representing the readable end of the pipe. Do not
83
- explicitly invoke the close() method of the object, that will be done automatically.
84
- """
85
- raise NotImplementedError()
86
-
87
- def _reader(self):
88
- with os.fdopen(self.readable_fh, "rb") as readable:
89
- # TODO: If the reader somehow crashes here, both threads might try
90
- # to close readable_fh. Fortunately we don't do anything that
91
- # should be able to fail here.
92
- self.readable_fh = None # signal to parent thread that we've taken over
93
- self.readFrom(readable)
94
- self.reader_done = True
95
-
96
- def __init__(self, encoding=None, errors=None):
97
- """
98
- The specified encoding and errors apply to the writable end of the pipe.
99
-
100
- :param str encoding: the name of the encoding used to encode the file. Encodings are the same
101
- as for encode(). Defaults to None which represents binary mode.
102
-
103
- :param str errors: an optional string that specifies how encoding errors are to be handled. Errors
104
- are the same as for open(). Defaults to 'strict' when an encoding is specified.
105
- """
106
- super().__init__()
107
- self.encoding = encoding
108
- self.errors = errors
109
- self.readable_fh = None
110
- self.writable = None
111
- self.thread = None
112
- self.reader_done = False
113
-
114
- def __enter__(self):
115
- self.readable_fh, writable_fh = os.pipe()
116
- self.writable = os.fdopen(
117
- writable_fh,
118
- "wb" if self.encoding == None else "wt",
119
- encoding=self.encoding,
120
- errors=self.errors,
121
- )
122
- self.thread = ExceptionalThread(target=self._reader)
123
- self.thread.start()
124
- return self.writable
125
-
126
- def __exit__(self, exc_type, exc_val, exc_tb):
127
- # Closing the writable end will send EOF to the readable and cause the reader thread
128
- # to finish.
129
- # TODO: Can close() fail? If so, would we try and clean up after the reader?
130
- self.writable.close()
131
- try:
132
- if self.thread is not None:
133
- # reraises any exception that was raised in the thread
134
- self.thread.join()
135
- except Exception as e:
136
- if exc_type is None:
137
- # Only raise the child exception if there wasn't
138
- # already an exception in the main thread
139
- raise
140
- else:
141
- log.error(
142
- "Swallowing additional exception in reader thread: %s", str(e)
143
- )
144
- finally:
145
- # The responsibility for closing the readable end is generally that of the reader
146
- # thread. To cover the small window before the reader takes over we also close it here.
147
- readable_fh = self.readable_fh
148
- if readable_fh is not None:
149
- # Close the file handle. The reader thread must be dead now.
150
- os.close(readable_fh)
151
-
152
-
153
- class ReadablePipe(ABC):
154
- """
155
- An object-oriented wrapper for os.pipe. Clients should subclass it, implement
156
- :meth:`.writeTo` to place data into the writable end of the pipe, then instantiate the class
157
- as a context manager to get the writable end. See the example below.
158
-
159
- >>> import sys, shutil
160
- >>> class MyPipe(ReadablePipe):
161
- ... def writeTo(self, writable):
162
- ... writable.write('Hello, world!\\n'.encode('utf-8'))
163
- >>> with MyPipe() as readable:
164
- ... shutil.copyfileobj(codecs.getreader('utf-8')(readable), sys.stdout)
165
- Hello, world!
166
-
167
- Each instance of this class creates a thread and invokes the :meth:`.writeTo` method in that
168
- thread. The thread will be join()ed upon normal exit from the context manager, i.e. the body
169
- of the `with` statement. If an exception occurs, the thread will not be joined but a
170
- well-behaved :meth:`.writeTo` implementation will terminate shortly thereafter due to the
171
- pipe having been closed.
172
-
173
- Now, exceptions in the reader thread will be reraised in the main thread:
174
-
175
- >>> class MyPipe(ReadablePipe):
176
- ... def writeTo(self, writable):
177
- ... raise RuntimeError('Hello, world!')
178
- >>> with MyPipe() as readable:
179
- ... pass
180
- Traceback (most recent call last):
181
- ...
182
- RuntimeError: Hello, world!
183
-
184
- More complicated, less illustrative tests:
185
-
186
- Same as above, but proving that handles are closed:
187
-
188
- >>> x = os.dup(0); os.close(x)
189
- >>> class MyPipe(ReadablePipe):
190
- ... def writeTo(self, writable):
191
- ... raise RuntimeError('Hello, world!')
192
- >>> with MyPipe() as readable:
193
- ... pass
194
- Traceback (most recent call last):
195
- ...
196
- RuntimeError: Hello, world!
197
- >>> y = os.dup(0); os.close(y); x == y
198
- True
199
-
200
- Exceptions in the body of the with statement aren't masked, and handles are closed:
201
-
202
- >>> x = os.dup(0); os.close(x)
203
- >>> class MyPipe(ReadablePipe):
204
- ... def writeTo(self, writable):
205
- ... pass
206
- >>> with MyPipe() as readable:
207
- ... raise RuntimeError('Hello, world!')
208
- Traceback (most recent call last):
209
- ...
210
- RuntimeError: Hello, world!
211
- >>> y = os.dup(0); os.close(y); x == y
212
- True
213
- """
214
-
215
- @abstractmethod
216
- def writeTo(self, writable):
217
- """
218
- Implement this method to write data from the pipe. This method should support both
219
- binary and text mode input.
220
-
221
- :param file writable: the file object representing the writable end of the pipe. Do not
222
- explicitly invoke the close() method of the object, that will be done automatically.
223
- """
224
- raise NotImplementedError()
225
-
226
- def _writer(self):
227
- try:
228
- with os.fdopen(self.writable_fh, "wb") as writable:
229
- self.writeTo(writable)
230
- except OSError as e:
231
- # The other side of the pipe may have been closed by the
232
- # reading thread, which is OK.
233
- if e.errno != errno.EPIPE:
234
- raise
235
-
236
- def __init__(self, encoding=None, errors=None):
237
- """
238
- The specified encoding and errors apply to the readable end of the pipe.
239
-
240
- :param str encoding: the name of the encoding used to encode the file. Encodings are the same
241
- as for encode(). Defaults to None which represents binary mode.
242
-
243
- :param str errors: an optional string that specifies how encoding errors are to be handled. Errors
244
- are the same as for open(). Defaults to 'strict' when an encoding is specified.
245
- """
246
- super().__init__()
247
- self.encoding = encoding
248
- self.errors = errors
249
- self.writable_fh = None
250
- self.readable = None
251
- self.thread = None
252
-
253
- def __enter__(self):
254
- readable_fh, self.writable_fh = os.pipe()
255
- self.readable = os.fdopen(
256
- readable_fh,
257
- "rb" if self.encoding == None else "rt",
258
- encoding=self.encoding,
259
- errors=self.errors,
260
- )
261
- self.thread = ExceptionalThread(target=self._writer)
262
- self.thread.start()
263
- return self.readable
264
-
265
- def __exit__(self, exc_type, exc_val, exc_tb):
266
- # Close the read end of the pipe. The writing thread may
267
- # still be writing to the other end, but this will wake it up
268
- # if that's the case.
269
- self.readable.close()
270
- try:
271
- if self.thread is not None:
272
- # reraises any exception that was raised in the thread
273
- self.thread.join()
274
- except:
275
- if exc_type is None:
276
- # Only raise the child exception if there wasn't
277
- # already an exception in the main thread
278
- raise
279
-
280
-
281
- class ReadableTransformingPipe(ReadablePipe):
282
- """
283
- A pipe which is constructed around a readable stream, and which provides a
284
- context manager that gives a readable stream.
285
-
286
- Useful as a base class for pipes which have to transform or otherwise visit
287
- bytes that flow through them, instead of just consuming or producing data.
288
-
289
- Clients should subclass it and implement :meth:`.transform`, like so:
290
-
291
- >>> import sys, shutil
292
- >>> class MyPipe(ReadableTransformingPipe):
293
- ... def transform(self, readable, writable):
294
- ... writable.write(readable.read().decode('utf-8').upper().encode('utf-8'))
295
- >>> class SourcePipe(ReadablePipe):
296
- ... def writeTo(self, writable):
297
- ... writable.write('Hello, world!\\n'.encode('utf-8'))
298
- >>> with SourcePipe() as source:
299
- ... with MyPipe(source) as transformed:
300
- ... shutil.copyfileobj(codecs.getreader('utf-8')(transformed), sys.stdout)
301
- HELLO, WORLD!
302
-
303
- The :meth:`.transform` method runs in its own thread, and should move data
304
- chunk by chunk instead of all at once. It should finish normally if it
305
- encounters either an EOF on the readable, or a :class:`BrokenPipeError` on
306
- the writable. This means that it should make sure to actually catch a
307
- :class:`BrokenPipeError` when writing.
308
-
309
- See also: :class:`toil.lib.misc.WriteWatchingStream`.
310
-
311
- """
312
-
313
- def __init__(self, source, encoding=None, errors=None):
314
- """
315
- :param str encoding: the name of the encoding used to encode the file. Encodings are the same
316
- as for encode(). Defaults to None which represents binary mode.
317
-
318
- :param str errors: an optional string that specifies how encoding errors are to be handled. Errors
319
- are the same as for open(). Defaults to 'strict' when an encoding is specified.
320
- """
321
- super().__init__(encoding=encoding, errors=errors)
322
- self.source = source
323
-
324
- @abstractmethod
325
- def transform(self, readable, writable):
326
- """
327
- Implement this method to ship data through the pipe.
328
-
329
- :param file readable: the input stream file object to transform.
330
-
331
- :param file writable: the file object representing the writable end of the pipe. Do not
332
- explicitly invoke the close() method of the object, that will be done automatically.
333
- """
334
- raise NotImplementedError()
335
-
336
- def writeTo(self, writable):
337
- self.transform(self.source, writable)
338
-
339
-
340
13
  class JobStoreUnavailableException(RuntimeError):
341
14
  """
342
15
  Raised when a particular type of job store is requested but can't be used.
toil/leader.py CHANGED
@@ -31,6 +31,7 @@ from toil.batchSystems.abstractBatchSystem import (
31
31
  EXIT_STATUS_UNAVAILABLE_VALUE,
32
32
  AbstractBatchSystem,
33
33
  BatchJobExitReason,
34
+ UpdatedBatchJobInfo,
34
35
  )
35
36
  from toil.bus import (
36
37
  JobCompletedMessage,
@@ -829,40 +830,41 @@ class Leader:
829
830
  # Mark the service job updated so we don't stop here.
830
831
  self._messages.publish(JobUpdatedMessage(client_id, 1))
831
832
 
832
- def _gatherUpdatedJobs(self, updatedJobTuple):
833
+ def _gatherUpdatedJobs(self, update: UpdatedBatchJobInfo):
833
834
  """Gather any new, updated JobDescriptions from the batch system."""
834
- bsID, exitStatus, exitReason, wallTime = (
835
- updatedJobTuple.jobID,
836
- updatedJobTuple.exitStatus,
837
- updatedJobTuple.exitReason,
838
- updatedJobTuple.wallTime,
839
- )
840
835
  # easy, track different state
841
836
  try:
842
837
  updatedJob = self.toilState.get_job(
843
- self.issued_jobs_by_batch_system_id[bsID]
838
+ self.issued_jobs_by_batch_system_id[update.jobID]
844
839
  )
845
840
  except KeyError:
846
841
  logger.warning(
847
- "A result seems to already have been processed for job %s", bsID
842
+ "A result seems to already have been processed for job %s", update.jobID
848
843
  )
849
844
  else:
850
- if exitStatus == 0:
845
+ if update.exitStatus == 0:
851
846
  logger.debug("Job ended: %s", updatedJob)
852
847
  else:
853
848
  status_string = (
854
- str(exitStatus)
855
- if exitStatus != EXIT_STATUS_UNAVAILABLE_VALUE
849
+ str(update.exitStatus)
850
+ if update.exitStatus != EXIT_STATUS_UNAVAILABLE_VALUE
856
851
  else "<UNAVAILABLE>"
857
852
  )
853
+ message = [
854
+ f"Job failed with exit value {status_string}: {updatedJob}",
855
+ f"Exit reason: {BatchJobExitReason.to_string(update.exitReason)}"
856
+ ]
857
+ if update.backing_id is not None:
858
+ # Report the job in the backing scheduler in case the user
859
+ # needs to follow it down a level.
860
+ message.append(f"Failed job in backing scheduler: {update.backing_id}")
858
861
  logger.warning(
859
- f"Job failed with exit value {status_string}: {updatedJob}\n"
860
- f"Exit reason: {BatchJobExitReason.to_string(exitReason)}"
862
+ "\n".join(message)
861
863
  )
862
864
  # This logic is undefined for which of the failing jobs will send its exit code
863
865
  # when there are multiple failing jobs with different exit statuses
864
- self.recommended_fail_exit_code = exitStatus
865
- if exitStatus == CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE:
866
+ self.recommended_fail_exit_code = update.exitStatus
867
+ if update.exitStatus == CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE:
866
868
  # This is a CWL job informing us that the workflow is
867
869
  # asking things of us that Toil can't do. When we raise an
868
870
  # exception because of this, make sure to forward along
@@ -876,11 +878,14 @@ class Leader:
876
878
  JobCompletedMessage(
877
879
  get_job_kind(updatedJob.get_names()),
878
880
  updatedJob.jobStoreID,
879
- exitStatus,
881
+ update.exitStatus,
880
882
  )
881
883
  )
882
884
  self.process_finished_job(
883
- bsID, exitStatus, wall_time=wallTime, exit_reason=exitReason
885
+ update.jobID,
886
+ update.exitStatus,
887
+ wall_time=update.wallTime,
888
+ exit_reason=update.exitReason,
884
889
  )
885
890
 
886
891
  def _processLostJobs(self):
@@ -926,10 +931,10 @@ class Leader:
926
931
  self._processJobsWithFailedServices()
927
932
 
928
933
  # check in with the batch system
929
- updatedJobTuple = self.batchSystem.getUpdatedBatchJob(maxWait=2)
930
- if updatedJobTuple is not None:
934
+ update: UpdatedBatchJobInfo = self.batchSystem.getUpdatedBatchJob(maxWait=2)
935
+ if update is not None:
931
936
  # Collect and process all the updates
932
- self._gatherUpdatedJobs(updatedJobTuple)
937
+ self._gatherUpdatedJobs(update)
933
938
  # As long as we are getting updates we definitely can't be
934
939
  # deadlocked.
935
940
  self.feed_deadlock_watchdog()
@@ -1125,7 +1130,7 @@ class Leader:
1125
1130
  self.preemptibleJobsIssued += 1
1126
1131
  cur_logger = logger.debug if jobNode.local else logger.info
1127
1132
  cur_logger(
1128
- "Issued job %s with job batch system ID: " "%s and %s",
1133
+ "Issued job %s with job batch system ID: %s and %s",
1129
1134
  jobNode,
1130
1135
  str(jobBatchSystemID),
1131
1136
  jobNode.requirements_string(),
toil/lib/aws/config.py ADDED
@@ -0,0 +1,22 @@
1
+ S3_PARALLELIZATION_FACTOR = 8
2
+ S3_PART_SIZE = 16 * 1024 * 1024
3
+ KiB = 1024
4
+ MiB = KiB * KiB
5
+
6
+ # Files must be larger than this before we consider multipart uploads.
7
+ AWS_MIN_CHUNK_SIZE = 64 * MiB
8
+ # Convenience variable for Boto3 TransferConfig(multipart_threhold=).
9
+ MULTIPART_THRESHOLD = AWS_MIN_CHUNK_SIZE + 1
10
+ # Maximum number of parts allowed in a multipart upload. This is a limitation imposed by S3.
11
+ AWS_MAX_MULTIPART_COUNT = 10000
12
+
13
+
14
+ def get_s3_multipart_chunk_size(filesize: int) -> int:
15
+ """Returns the chunk size of the S3 multipart object, given a file's size in bytes."""
16
+ if filesize <= AWS_MAX_MULTIPART_COUNT * AWS_MIN_CHUNK_SIZE:
17
+ return AWS_MIN_CHUNK_SIZE
18
+ else:
19
+ div = filesize // AWS_MAX_MULTIPART_COUNT
20
+ if div * AWS_MAX_MULTIPART_COUNT < filesize:
21
+ div += 1
22
+ return ((div + MiB - 1) // MiB) * MiB