toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
@@ -29,7 +29,7 @@ from toil.realtimeLogger import RealtimeLogger
29
29
 
30
30
  defaultLines = 1000
31
31
  defaultLineLen = 50
32
- sortMemory = '600M'
32
+ sortMemory = "600M"
33
33
 
34
34
 
35
35
  def setup(job, inputFile, N, downCheckpoints, options):
@@ -38,12 +38,16 @@ def setup(job, inputFile, N, downCheckpoints, options):
38
38
  Returns the FileID of the sorted file
39
39
  """
40
40
  RealtimeLogger.info("Starting the merge sort")
41
- return job.addChildJobFn(down,
42
- inputFile, N, 'root',
43
- downCheckpoints,
44
- options = options,
45
- preemptible=True,
46
- memory=sortMemory).rv()
41
+ return job.addChildJobFn(
42
+ down,
43
+ inputFile,
44
+ N,
45
+ "root",
46
+ downCheckpoints,
47
+ options=options,
48
+ preemptible=True,
49
+ memory=sortMemory,
50
+ ).rv()
47
51
 
48
52
 
49
53
  def down(job, inputFileStoreID, N, path, downCheckpoints, options, memory=sortMemory):
@@ -61,34 +65,57 @@ def down(job, inputFileStoreID, N, path, downCheckpoints, options, memory=sortMe
61
65
  length = os.path.getsize(inputFile)
62
66
  if length > N:
63
67
  # We will subdivide the file
64
- RealtimeLogger.critical("Splitting file: %s of size: %s"
65
- % (inputFileStoreID, length))
68
+ RealtimeLogger.critical(
69
+ "Splitting file: %s of size: %s" % (inputFileStoreID, length)
70
+ )
66
71
  # Split the file into two copies
67
72
  midPoint = getMidPoint(inputFile, 0, length)
68
73
  t1 = job.fileStore.getLocalTempFile()
69
- with open(t1, 'w') as fH:
70
- fH.write(copySubRangeOfFile(inputFile, 0, midPoint+1))
74
+ with open(t1, "w") as fH:
75
+ fH.write(copySubRangeOfFile(inputFile, 0, midPoint + 1))
71
76
  t2 = job.fileStore.getLocalTempFile()
72
- with open(t2, 'w') as fH:
73
- fH.write(copySubRangeOfFile(inputFile, midPoint+1, length))
77
+ with open(t2, "w") as fH:
78
+ fH.write(copySubRangeOfFile(inputFile, midPoint + 1, length))
74
79
  # Call down recursively. By giving the rv() of the two jobs as inputs to the follow-on job, up,
75
80
  # we communicate the dependency without hindering concurrency.
76
- result = job.addFollowOnJobFn(up,
77
- job.addChildJobFn(down, job.fileStore.writeGlobalFile(t1), N, path + '/0',
78
- downCheckpoints, checkpoint=downCheckpoints, options=options,
79
- preemptible=True, memory=options.sortMemory).rv(),
80
- job.addChildJobFn(down, job.fileStore.writeGlobalFile(t2), N, path + '/1',
81
- downCheckpoints, checkpoint=downCheckpoints, options=options,
82
- preemptible=True, memory=options.mergeMemory).rv(),
83
- path + '/up', preemptible=True, options=options, memory=options.sortMemory).rv()
81
+ result = job.addFollowOnJobFn(
82
+ up,
83
+ job.addChildJobFn(
84
+ down,
85
+ job.fileStore.writeGlobalFile(t1),
86
+ N,
87
+ path + "/0",
88
+ downCheckpoints,
89
+ checkpoint=downCheckpoints,
90
+ options=options,
91
+ preemptible=True,
92
+ memory=options.sortMemory,
93
+ ).rv(),
94
+ job.addChildJobFn(
95
+ down,
96
+ job.fileStore.writeGlobalFile(t2),
97
+ N,
98
+ path + "/1",
99
+ downCheckpoints,
100
+ checkpoint=downCheckpoints,
101
+ options=options,
102
+ preemptible=True,
103
+ memory=options.mergeMemory,
104
+ ).rv(),
105
+ path + "/up",
106
+ preemptible=True,
107
+ options=options,
108
+ memory=options.sortMemory,
109
+ ).rv()
84
110
  else:
85
111
  # We can sort this bit of the file
86
- RealtimeLogger.critical("Sorting file: %s of size: %s"
87
- % (inputFileStoreID, length))
112
+ RealtimeLogger.critical(
113
+ "Sorting file: %s of size: %s" % (inputFileStoreID, length)
114
+ )
88
115
  # Sort the copy and write back to the fileStore
89
- shutil.copyfile(inputFile, inputFile + '.sort')
90
- sort(inputFile + '.sort')
91
- result = job.fileStore.writeGlobalFile(inputFile + '.sort')
116
+ shutil.copyfile(inputFile, inputFile + ".sort")
117
+ sort(inputFile + ".sort")
118
+ result = job.fileStore.writeGlobalFile(inputFile + ".sort")
92
119
 
93
120
  RealtimeLogger.info("Down job finished: %s" % path)
94
121
  return result
@@ -102,13 +129,15 @@ def up(job, inputFileID1, inputFileID2, path, options, memory=sortMemory):
102
129
  RealtimeLogger.info("Up job starting: %s" % path)
103
130
 
104
131
  with job.fileStore.writeGlobalFileStream() as (fileHandle, outputFileStoreID):
105
- fileHandle = codecs.getwriter('utf-8')(fileHandle)
132
+ fileHandle = codecs.getwriter("utf-8")(fileHandle)
106
133
  with job.fileStore.readGlobalFileStream(inputFileID1) as inputFileHandle1:
107
- inputFileHandle1 = codecs.getreader('utf-8')(inputFileHandle1)
134
+ inputFileHandle1 = codecs.getreader("utf-8")(inputFileHandle1)
108
135
  with job.fileStore.readGlobalFileStream(inputFileID2) as inputFileHandle2:
109
- inputFileHandle2 = codecs.getreader('utf-8')(inputFileHandle2)
110
- RealtimeLogger.info("Merging %s and %s to %s"
111
- % (inputFileID1, inputFileID2, outputFileStoreID))
136
+ inputFileHandle2 = codecs.getreader("utf-8")(inputFileHandle2)
137
+ RealtimeLogger.info(
138
+ "Merging %s and %s to %s"
139
+ % (inputFileID1, inputFileID2, outputFileStoreID)
140
+ )
112
141
  merge(inputFileHandle1, inputFileHandle2, fileHandle)
113
142
  # Cleanup up the input files - these deletes will occur after the completion is successful.
114
143
  job.fileStore.deleteGlobalFile(inputFileID1)
@@ -126,7 +155,7 @@ def sort(file):
126
155
 
127
156
  lines.sort()
128
157
 
129
- with open(file, 'w') as f:
158
+ with open(file, "w") as f:
130
159
  for line in lines:
131
160
  f.write(line)
132
161
 
@@ -181,9 +210,12 @@ def getMidPoint(file, fileStart, fileEnd):
181
210
 
182
211
 
183
212
  def makeFileToSort(fileName, lines=defaultLines, lineLen=defaultLineLen):
184
- with open(fileName, 'w') as f:
213
+ with open(fileName, "w") as f:
185
214
  for _ in range(lines):
186
- line = "".join(random.choice('actgACTGNXYZ') for _ in range(lineLen - 1)) + '\n'
215
+ line = (
216
+ "".join(random.choice("actgACTGNXYZ") for _ in range(lineLen - 1))
217
+ + "\n"
218
+ )
187
219
  f.write(line)
188
220
 
189
221
 
@@ -192,25 +224,51 @@ def main(options=None):
192
224
  # deal with command line arguments
193
225
  parser = ArgumentParser()
194
226
  Job.Runner.addToilOptions(parser)
195
- parser.add_argument('--numLines', default=defaultLines, help='Number of lines in file to sort.', type=int)
196
- parser.add_argument('--lineLength', default=defaultLineLen, help='Length of lines in file to sort.', type=int)
227
+ parser.add_argument(
228
+ "--numLines",
229
+ default=defaultLines,
230
+ help="Number of lines in file to sort.",
231
+ type=int,
232
+ )
233
+ parser.add_argument(
234
+ "--lineLength",
235
+ default=defaultLineLen,
236
+ help="Length of lines in file to sort.",
237
+ type=int,
238
+ )
197
239
  parser.add_argument("--fileToSort", help="The file you wish to sort")
198
240
  parser.add_argument("--outputFile", help="Where the sorted output will go")
199
- parser.add_argument("--overwriteOutput", help="Write over the output file if it already exists.", default=True)
200
- parser.add_argument("--N", dest="N",
201
- help="The threshold below which a serial sort function is used to sort file. "
202
- "All lines must of length less than or equal to N or program will fail",
203
- default=10000)
204
- parser.add_argument('--downCheckpoints', action='store_true',
205
- help='If this option is set, the workflow will make checkpoints on its way through'
206
- 'the recursive "down" part of the sort')
207
- parser.add_argument("--sortMemory", dest="sortMemory",
208
- help="Memory for jobs that sort chunks of the file.",
209
- default=None)
210
-
211
- parser.add_argument("--mergeMemory", dest="mergeMemory",
212
- help="Memory for jobs that collate results.",
213
- default=None)
241
+ parser.add_argument(
242
+ "--overwriteOutput",
243
+ help="Write over the output file if it already exists.",
244
+ default=True,
245
+ )
246
+ parser.add_argument(
247
+ "--N",
248
+ dest="N",
249
+ help="The threshold below which a serial sort function is used to sort file. "
250
+ "All lines must of length less than or equal to N or program will fail",
251
+ default=10000,
252
+ )
253
+ parser.add_argument(
254
+ "--downCheckpoints",
255
+ action="store_true",
256
+ help="If this option is set, the workflow will make checkpoints on its way through"
257
+ 'the recursive "down" part of the sort',
258
+ )
259
+ parser.add_argument(
260
+ "--sortMemory",
261
+ dest="sortMemory",
262
+ help="Memory for jobs that sort chunks of the file.",
263
+ default=None,
264
+ )
265
+
266
+ parser.add_argument(
267
+ "--mergeMemory",
268
+ dest="mergeMemory",
269
+ help="Memory for jobs that collate results.",
270
+ default=None,
271
+ )
214
272
 
215
273
  options = parser.parse_args()
216
274
  if not hasattr(options, "sortMemory") or not options.sortMemory:
@@ -221,19 +279,25 @@ def main(options=None):
221
279
  # do some input verification
222
280
  sortedFileName = options.outputFile or "sortedFile.txt"
223
281
  if not options.overwriteOutput and os.path.exists(sortedFileName):
224
- print(f'Output file {sortedFileName} already exists. '
225
- f'Delete it to run the sort example again or use --overwriteOutput=True')
282
+ print(
283
+ f"Output file {sortedFileName} already exists. "
284
+ f"Delete it to run the sort example again or use --overwriteOutput=True"
285
+ )
226
286
  exit()
227
287
 
228
288
  fileName = options.fileToSort
229
289
  if options.fileToSort is None:
230
290
  # make the file ourselves
231
- fileName = 'fileToSort.txt'
291
+ fileName = "fileToSort.txt"
232
292
  if os.path.exists(fileName):
233
- print(f'Sorting existing file: {fileName}')
293
+ print(f"Sorting existing file: {fileName}")
234
294
  else:
235
- print(f'No sort file specified. Generating one automatically called: {fileName}.')
236
- makeFileToSort(fileName=fileName, lines=options.numLines, lineLen=options.lineLength)
295
+ print(
296
+ f"No sort file specified. Generating one automatically called: {fileName}."
297
+ )
298
+ makeFileToSort(
299
+ fileName=fileName, lines=options.numLines, lineLen=options.lineLength
300
+ )
237
301
  else:
238
302
  if not os.path.exists(options.fileToSort):
239
303
  raise RuntimeError("File to sort does not exist: %s" % options.fileToSort)
@@ -241,24 +305,29 @@ def main(options=None):
241
305
  if int(options.N) <= 0:
242
306
  raise RuntimeError("Invalid value of N: %s" % options.N)
243
307
 
244
-
245
308
  # Now we are ready to run
246
309
  with Toil(options) as workflow:
247
- sortedFileURL = 'file://' + os.path.abspath(sortedFileName)
248
- #raise Exception('test')
310
+ sortedFileURL = "file://" + os.path.abspath(sortedFileName)
311
+ # raise Exception('test')
249
312
 
250
313
  if not workflow.options.restart:
251
- sortFileURL = 'file://' + os.path.abspath(fileName)
314
+ sortFileURL = "file://" + os.path.abspath(fileName)
252
315
  sortFileID = workflow.importFile(sortFileURL)
253
- sortedFileID = workflow.start(Job.wrapJobFn(setup,
254
- sortFileID,
255
- int(options.N),
256
- options.downCheckpoints,
257
- options=options,
258
- memory=sortMemory))
316
+ sortedFileID = workflow.start(
317
+ Job.wrapJobFn(
318
+ setup,
319
+ sortFileID,
320
+ int(options.N),
321
+ options.downCheckpoints,
322
+ options=options,
323
+ memory=sortMemory,
324
+ )
325
+ )
259
326
  """
260
327
  The else block is removed here to test that the job store is not
261
328
  destroyed when attempting to resume without restart().
262
329
  """
263
- if __name__ == '__main__':
330
+
331
+
332
+ if __name__ == "__main__":
264
333
  main()
toil/test/sort/sort.py CHANGED
@@ -27,7 +27,7 @@ from toil.realtimeLogger import RealtimeLogger
27
27
 
28
28
  defaultLines = 1000
29
29
  defaultLineLen = 50
30
- sortMemory = '600M'
30
+ sortMemory = "600M"
31
31
 
32
32
 
33
33
  def setup(job, inputFile, N, downCheckpoints, options):
@@ -36,12 +36,16 @@ def setup(job, inputFile, N, downCheckpoints, options):
36
36
  Returns the FileID of the sorted file
37
37
  """
38
38
  RealtimeLogger.info("Starting the merge sort")
39
- return job.addChildJobFn(down,
40
- inputFile, N, 'root',
41
- downCheckpoints,
42
- options = options,
43
- preemptible=True,
44
- memory=sortMemory).rv()
39
+ return job.addChildJobFn(
40
+ down,
41
+ inputFile,
42
+ N,
43
+ "root",
44
+ downCheckpoints,
45
+ options=options,
46
+ preemptible=True,
47
+ memory=sortMemory,
48
+ ).rv()
45
49
 
46
50
 
47
51
  def down(job, inputFileStoreID, N, path, downCheckpoints, options, memory=sortMemory):
@@ -59,34 +63,57 @@ def down(job, inputFileStoreID, N, path, downCheckpoints, options, memory=sortMe
59
63
  length = os.path.getsize(inputFile)
60
64
  if length > N:
61
65
  # We will subdivide the file
62
- RealtimeLogger.critical("Splitting file: %s of size: %s"
63
- % (inputFileStoreID, length))
66
+ RealtimeLogger.critical(
67
+ "Splitting file: %s of size: %s" % (inputFileStoreID, length)
68
+ )
64
69
  # Split the file into two copies
65
70
  midPoint = getMidPoint(inputFile, 0, length)
66
71
  t1 = job.fileStore.getLocalTempFile()
67
- with open(t1, 'w') as fH:
68
- fH.write(copySubRangeOfFile(inputFile, 0, midPoint+1))
72
+ with open(t1, "w") as fH:
73
+ fH.write(copySubRangeOfFile(inputFile, 0, midPoint + 1))
69
74
  t2 = job.fileStore.getLocalTempFile()
70
- with open(t2, 'w') as fH:
71
- fH.write(copySubRangeOfFile(inputFile, midPoint+1, length))
75
+ with open(t2, "w") as fH:
76
+ fH.write(copySubRangeOfFile(inputFile, midPoint + 1, length))
72
77
  # Call down recursively. By giving the rv() of the two jobs as inputs to the follow-on job, up,
73
78
  # we communicate the dependency without hindering concurrency.
74
- result = job.addFollowOnJobFn(up,
75
- job.addChildJobFn(down, job.fileStore.writeGlobalFile(t1), N, path + '/0',
76
- downCheckpoints, checkpoint=downCheckpoints, options=options,
77
- preemptible=True, memory=options.sortMemory).rv(),
78
- job.addChildJobFn(down, job.fileStore.writeGlobalFile(t2), N, path + '/1',
79
- downCheckpoints, checkpoint=downCheckpoints, options=options,
80
- preemptible=True, memory=options.mergeMemory).rv(),
81
- path + '/up', preemptible=True, options=options, memory=options.sortMemory).rv()
79
+ result = job.addFollowOnJobFn(
80
+ up,
81
+ job.addChildJobFn(
82
+ down,
83
+ job.fileStore.writeGlobalFile(t1),
84
+ N,
85
+ path + "/0",
86
+ downCheckpoints,
87
+ checkpoint=downCheckpoints,
88
+ options=options,
89
+ preemptible=True,
90
+ memory=options.sortMemory,
91
+ ).rv(),
92
+ job.addChildJobFn(
93
+ down,
94
+ job.fileStore.writeGlobalFile(t2),
95
+ N,
96
+ path + "/1",
97
+ downCheckpoints,
98
+ checkpoint=downCheckpoints,
99
+ options=options,
100
+ preemptible=True,
101
+ memory=options.mergeMemory,
102
+ ).rv(),
103
+ path + "/up",
104
+ preemptible=True,
105
+ options=options,
106
+ memory=options.sortMemory,
107
+ ).rv()
82
108
  else:
83
109
  # We can sort this bit of the file
84
- RealtimeLogger.critical("Sorting file: %s of size: %s"
85
- % (inputFileStoreID, length))
110
+ RealtimeLogger.critical(
111
+ "Sorting file: %s of size: %s" % (inputFileStoreID, length)
112
+ )
86
113
  # Sort the copy and write back to the fileStore
87
- shutil.copyfile(inputFile, inputFile + '.sort')
88
- sort(inputFile + '.sort')
89
- result = job.fileStore.writeGlobalFile(inputFile + '.sort')
114
+ shutil.copyfile(inputFile, inputFile + ".sort")
115
+ sort(inputFile + ".sort")
116
+ result = job.fileStore.writeGlobalFile(inputFile + ".sort")
90
117
 
91
118
  RealtimeLogger.info("Down job finished: %s" % path)
92
119
  return result
@@ -100,13 +127,15 @@ def up(job, inputFileID1, inputFileID2, path, options, memory=sortMemory):
100
127
  RealtimeLogger.info("Up job starting: %s" % path)
101
128
 
102
129
  with job.fileStore.writeGlobalFileStream() as (fileHandle, outputFileStoreID):
103
- fileHandle = codecs.getwriter('utf-8')(fileHandle)
130
+ fileHandle = codecs.getwriter("utf-8")(fileHandle)
104
131
  with job.fileStore.readGlobalFileStream(inputFileID1) as inputFileHandle1:
105
- inputFileHandle1 = codecs.getreader('utf-8')(inputFileHandle1)
132
+ inputFileHandle1 = codecs.getreader("utf-8")(inputFileHandle1)
106
133
  with job.fileStore.readGlobalFileStream(inputFileID2) as inputFileHandle2:
107
- inputFileHandle2 = codecs.getreader('utf-8')(inputFileHandle2)
108
- RealtimeLogger.info("Merging %s and %s to %s"
109
- % (inputFileID1, inputFileID2, outputFileStoreID))
134
+ inputFileHandle2 = codecs.getreader("utf-8")(inputFileHandle2)
135
+ RealtimeLogger.info(
136
+ "Merging %s and %s to %s"
137
+ % (inputFileID1, inputFileID2, outputFileStoreID)
138
+ )
110
139
  merge(inputFileHandle1, inputFileHandle2, fileHandle)
111
140
  # Cleanup up the input files - these deletes will occur after the completion is successful.
112
141
  job.fileStore.deleteGlobalFile(inputFileID1)
@@ -124,7 +153,7 @@ def sort(file):
124
153
 
125
154
  lines.sort()
126
155
 
127
- with open(file, 'w') as f:
156
+ with open(file, "w") as f:
128
157
  for line in lines:
129
158
  f.write(line)
130
159
 
@@ -179,9 +208,12 @@ def getMidPoint(file, fileStart, fileEnd):
179
208
 
180
209
 
181
210
  def makeFileToSort(fileName, lines=defaultLines, lineLen=defaultLineLen):
182
- with open(fileName, 'w') as f:
211
+ with open(fileName, "w") as f:
183
212
  for _ in range(lines):
184
- line = "".join(random.choice('actgACTGNXYZ') for _ in range(lineLen - 1)) + '\n'
213
+ line = (
214
+ "".join(random.choice("actgACTGNXYZ") for _ in range(lineLen - 1))
215
+ + "\n"
216
+ )
185
217
  f.write(line)
186
218
 
187
219
 
@@ -190,25 +222,51 @@ def main(options=None):
190
222
  # deal with command line arguments
191
223
  parser = ArgumentParser()
192
224
  Job.Runner.addToilOptions(parser)
193
- parser.add_argument('--numLines', default=defaultLines, help='Number of lines in file to sort.', type=int)
194
- parser.add_argument('--lineLength', default=defaultLineLen, help='Length of lines in file to sort.', type=int)
225
+ parser.add_argument(
226
+ "--numLines",
227
+ default=defaultLines,
228
+ help="Number of lines in file to sort.",
229
+ type=int,
230
+ )
231
+ parser.add_argument(
232
+ "--lineLength",
233
+ default=defaultLineLen,
234
+ help="Length of lines in file to sort.",
235
+ type=int,
236
+ )
195
237
  parser.add_argument("--fileToSort", help="The file you wish to sort")
196
238
  parser.add_argument("--outputFile", help="Where the sorted output will go")
197
- parser.add_argument("--overwriteOutput", help="Write over the output file if it already exists.", default=True)
198
- parser.add_argument("--N", dest="N",
199
- help="The threshold below which a serial sort function is used to sort file. "
200
- "All lines must of length less than or equal to N or program will fail",
201
- default=10000)
202
- parser.add_argument('--downCheckpoints', action='store_true',
203
- help='If this option is set, the workflow will make checkpoints on its way through'
204
- 'the recursive "down" part of the sort')
205
- parser.add_argument("--sortMemory", dest="sortMemory",
206
- help="Memory for jobs that sort chunks of the file.",
207
- default=None)
208
-
209
- parser.add_argument("--mergeMemory", dest="mergeMemory",
210
- help="Memory for jobs that collate results.",
211
- default=None)
239
+ parser.add_argument(
240
+ "--overwriteOutput",
241
+ help="Write over the output file if it already exists.",
242
+ default=True,
243
+ )
244
+ parser.add_argument(
245
+ "--N",
246
+ dest="N",
247
+ help="The threshold below which a serial sort function is used to sort file. "
248
+ "All lines must of length less than or equal to N or program will fail",
249
+ default=10000,
250
+ )
251
+ parser.add_argument(
252
+ "--downCheckpoints",
253
+ action="store_true",
254
+ help="If this option is set, the workflow will make checkpoints on its way through"
255
+ 'the recursive "down" part of the sort',
256
+ )
257
+ parser.add_argument(
258
+ "--sortMemory",
259
+ dest="sortMemory",
260
+ help="Memory for jobs that sort chunks of the file.",
261
+ default=None,
262
+ )
263
+
264
+ parser.add_argument(
265
+ "--mergeMemory",
266
+ dest="mergeMemory",
267
+ help="Memory for jobs that collate results.",
268
+ default=None,
269
+ )
212
270
 
213
271
  options = parser.parse_args()
214
272
  if not hasattr(options, "sortMemory") or not options.sortMemory:
@@ -219,19 +277,25 @@ def main(options=None):
219
277
  # do some input verification
220
278
  sortedFileName = options.outputFile or "sortedFile.txt"
221
279
  if not options.overwriteOutput and os.path.exists(sortedFileName):
222
- print(f'Output file {sortedFileName} already exists. '
223
- f'Delete it to run the sort example again or use --overwriteOutput=True')
280
+ print(
281
+ f"Output file {sortedFileName} already exists. "
282
+ f"Delete it to run the sort example again or use --overwriteOutput=True"
283
+ )
224
284
  exit()
225
285
 
226
286
  fileName = options.fileToSort
227
287
  if options.fileToSort is None:
228
288
  # make the file ourselves
229
- fileName = 'fileToSort.txt'
289
+ fileName = "fileToSort.txt"
230
290
  if os.path.exists(fileName):
231
- print(f'Sorting existing file: {fileName}')
291
+ print(f"Sorting existing file: {fileName}")
232
292
  else:
233
- print(f'No sort file specified. Generating one automatically called: {fileName}.')
234
- makeFileToSort(fileName=fileName, lines=options.numLines, lineLen=options.lineLength)
293
+ print(
294
+ f"No sort file specified. Generating one automatically called: {fileName}."
295
+ )
296
+ makeFileToSort(
297
+ fileName=fileName, lines=options.numLines, lineLen=options.lineLength
298
+ )
235
299
  else:
236
300
  if not os.path.exists(options.fileToSort):
237
301
  raise RuntimeError("File to sort does not exist: %s" % options.fileToSort)
@@ -241,20 +305,24 @@ def main(options=None):
241
305
 
242
306
  # Now we are ready to run
243
307
  with Toil(options) as workflow:
244
- sortedFileURL = 'file://' + os.path.abspath(sortedFileName)
308
+ sortedFileURL = "file://" + os.path.abspath(sortedFileName)
245
309
  if not workflow.options.restart:
246
- sortFileURL = 'file://' + os.path.abspath(fileName)
310
+ sortFileURL = "file://" + os.path.abspath(fileName)
247
311
  sortFileID = workflow.importFile(sortFileURL)
248
- sortedFileID = workflow.start(Job.wrapJobFn(setup,
249
- sortFileID,
250
- int(options.N),
251
- options.downCheckpoints,
252
- options=options,
253
- memory=sortMemory))
312
+ sortedFileID = workflow.start(
313
+ Job.wrapJobFn(
314
+ setup,
315
+ sortFileID,
316
+ int(options.N),
317
+ options.downCheckpoints,
318
+ options=options,
319
+ memory=sortMemory,
320
+ )
321
+ )
254
322
  else:
255
323
  sortedFileID = workflow.restart()
256
324
  workflow.exportFile(sortedFileID, sortedFileURL)
257
325
 
258
326
 
259
- if __name__ == '__main__':
327
+ if __name__ == "__main__":
260
328
  main()