toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
@@ -17,7 +17,7 @@ import os
17
17
  import threading
18
18
  import time
19
19
  import uuid
20
- from typing import Optional, Set
20
+ from typing import Optional
21
21
 
22
22
  import requests
23
23
  from libcloud.compute.drivers.gce import GCEFailedNode
@@ -25,8 +25,8 @@ from libcloud.compute.providers import get_driver
25
25
  from libcloud.compute.types import Provider
26
26
 
27
27
  from toil.jobStores.googleJobStore import GoogleJobStore
28
- from toil.lib.conversions import human2bytes
29
28
  from toil.lib.compatibility import compat_bytes_recursive
29
+ from toil.lib.conversions import human2bytes
30
30
  from toil.provisioners import NoSuchClusterException
31
31
  from toil.provisioners.abstractProvisioner import AbstractProvisioner, Shape
32
32
  from toil.provisioners.node import Node
@@ -34,24 +34,41 @@ from toil.provisioners.node import Node
34
34
  logger = logging.getLogger(__name__)
35
35
  logging.getLogger("urllib3.connectionpool").setLevel(logging.WARNING)
36
36
 
37
+
37
38
  class GCEProvisioner(AbstractProvisioner):
38
39
  """
39
40
  Implements a Google Compute Engine Provisioner using libcloud.
40
41
  """
41
42
 
42
43
  NODE_BOTO_PATH = "/root/.boto" # boto file path on instances
43
- SOURCE_IMAGE = b'projects/kinvolk-public/global/images/family/flatcar-stable'
44
-
45
- def __init__(self, clusterName, clusterType, zone, nodeStorage, nodeStorageOverrides, sseKey):
46
- self.cloud = 'gce'
44
+ SOURCE_IMAGE = b"projects/kinvolk-public/global/images/family/flatcar-stable"
45
+
46
+ def __init__(
47
+ self,
48
+ clusterName,
49
+ clusterType,
50
+ zone,
51
+ nodeStorage,
52
+ nodeStorageOverrides,
53
+ sseKey,
54
+ enable_fuse,
55
+ ):
56
+ self.cloud = "gce"
47
57
  self._sseKey = sseKey
48
58
 
49
59
  # Call base class constructor, which will call createClusterSettings()
50
60
  # or readClusterSettings()
51
- super().__init__(clusterName, clusterType, zone, nodeStorage, nodeStorageOverrides)
61
+ super().__init__(
62
+ clusterName,
63
+ clusterType,
64
+ zone,
65
+ nodeStorage,
66
+ nodeStorageOverrides,
67
+ enable_fuse,
68
+ )
52
69
 
53
70
  def supportedClusterTypes(self):
54
- return {'mesos'}
71
+ return {"mesos"}
55
72
 
56
73
  def createClusterSettings(self):
57
74
  # All we need to do is read the Google credentials we need to provision
@@ -65,30 +82,38 @@ class GCEProvisioner(AbstractProvisioner):
65
82
  reading the metadata.
66
83
  """
67
84
  metadata_server = "http://metadata/computeMetadata/v1/instance/"
68
- metadata_flavor = {'Metadata-Flavor': 'Google'}
69
- zone = requests.get(metadata_server + 'zone', headers = metadata_flavor).text
70
- self._zone = zone.split('/')[-1]
85
+ metadata_flavor = {"Metadata-Flavor": "Google"}
86
+ zone = requests.get(metadata_server + "zone", headers=metadata_flavor).text
87
+ self._zone = zone.split("/")[-1]
71
88
 
72
89
  project_metadata_server = "http://metadata/computeMetadata/v1/project/"
73
- self._projectId = requests.get(project_metadata_server + 'project-id', headers = metadata_flavor).text
90
+ self._projectId = requests.get(
91
+ project_metadata_server + "project-id", headers=metadata_flavor
92
+ ).text
74
93
 
75
94
  # From a GCE instance, these values can be blank. Only the projectId is needed
76
- self._googleJson = ''
77
- self._clientEmail = ''
95
+ self._googleJson = ""
96
+ self._clientEmail = ""
78
97
 
79
- self._tags = requests.get(metadata_server + 'description', headers = metadata_flavor).text
98
+ self._tags = requests.get(
99
+ metadata_server + "description", headers=metadata_flavor
100
+ ).text
80
101
  tags = json.loads(self._tags)
81
- self.clusterName = tags['clusterName']
102
+ self.clusterName = tags["clusterName"]
82
103
  self._gceDriver = self._getDriver()
83
- self._instanceGroup = self._gceDriver.ex_get_instancegroup(self.clusterName, zone=self._zone)
104
+ self._instanceGroup = self._gceDriver.ex_get_instancegroup(
105
+ self.clusterName, zone=self._zone
106
+ )
84
107
 
85
108
  leader = self.getLeader()
86
109
  self._leaderPrivateIP = leader.privateIP
87
110
 
88
111
  # The location of the Google credentials file on instances.
89
112
  self._credentialsPath = GoogleJobStore.nodeServiceAccountJson
90
- self._keyName = 'core' # key name leader users to communicate with works
91
- self._botoPath = self.NODE_BOTO_PATH # boto credentials (used if reading an AWS bucket)
113
+ self._keyName = "core" # key name leader users to communicate with works
114
+ self._botoPath = (
115
+ self.NODE_BOTO_PATH
116
+ ) # boto credentials (used if reading an AWS bucket)
92
117
 
93
118
  # Let the base provisioner work out how to deploy duly authorized
94
119
  # workers for this leader.
@@ -98,28 +123,32 @@ class GCEProvisioner(AbstractProvisioner):
98
123
  """
99
124
  Get the credentials from the file specified by GOOGLE_APPLICATION_CREDENTIALS.
100
125
  """
101
- self._googleJson = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
126
+ self._googleJson = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
102
127
  if not self._googleJson:
103
- raise RuntimeError('GOOGLE_APPLICATION_CREDENTIALS not set.')
128
+ raise RuntimeError("GOOGLE_APPLICATION_CREDENTIALS not set.")
104
129
  try:
105
130
  with open(self._googleJson) as jsonFile:
106
131
  self.googleConnectionParams = json.loads(jsonFile.read())
107
132
  except:
108
- raise RuntimeError('GCEProvisioner: Could not parse the Google service account json file %s'
109
- % self._googleJson)
133
+ raise RuntimeError(
134
+ "GCEProvisioner: Could not parse the Google service account json file %s"
135
+ % self._googleJson
136
+ )
110
137
 
111
- self._projectId = self.googleConnectionParams['project_id']
112
- self._clientEmail = self.googleConnectionParams['client_email']
138
+ self._projectId = self.googleConnectionParams["project_id"]
139
+ self._clientEmail = self.googleConnectionParams["client_email"]
113
140
  self._credentialsPath = self._googleJson
114
141
  self._clearLeaderWorkerAuthentication() # TODO: Why are we doing this?
115
142
  self._gceDriver = self._getDriver()
116
143
 
117
144
  def _write_file_to_cloud(self, key: str, contents: bytes) -> str:
118
- raise NotImplementedError("The gceProvisioner doesn't support _write_file_to_cloud().")
145
+ raise NotImplementedError(
146
+ "The gceProvisioner doesn't support _write_file_to_cloud()."
147
+ )
119
148
 
120
149
  def _get_user_data_limit(self) -> int:
121
150
  # See: https://cloud.google.com/compute/docs/metadata/setting-custom-metadata#limitations
122
- return human2bytes('256KB')
151
+ return human2bytes("256KB")
123
152
 
124
153
  def launchCluster(self, leaderNodeType, leaderStorage, owner, **kwargs):
125
154
  """
@@ -131,39 +160,42 @@ class GCEProvisioner(AbstractProvisioner):
131
160
  vpcSubnet: A subnet (optional).
132
161
  use_private_ip: even though a public ip exists, ignore it (optional)
133
162
  """
134
- if 'keyName' not in kwargs:
163
+ if "keyName" not in kwargs:
135
164
  raise RuntimeError("A keyPairName is required for the GCE provisioner.")
136
- self._keyName = kwargs['keyName']
137
- if 'botoPath' in kwargs:
138
- self._botoPath = kwargs['botoPath']
139
- self._vpcSubnet = kwargs.get('vpcSubnet', None)
140
- self._network = kwargs.get('network', None)
141
- self._use_private_ip = kwargs.get('use_private_ip', None)
165
+ self._keyName = kwargs["keyName"]
166
+ if "botoPath" in kwargs:
167
+ self._botoPath = kwargs["botoPath"]
168
+ self._vpcSubnet = kwargs.get("vpcSubnet", None)
169
+ self._network = kwargs.get("network", None)
170
+ self._use_private_ip = kwargs.get("use_private_ip", None)
142
171
 
143
172
  # Throws an error if cluster exists
144
- self._instanceGroup = self._gceDriver.ex_create_instancegroup(self.clusterName, self._zone)
145
- logger.debug('Launching leader')
173
+ self._instanceGroup = self._gceDriver.ex_create_instancegroup(
174
+ self.clusterName, self._zone
175
+ )
176
+ logger.debug("Launching leader")
146
177
 
147
178
  # GCE doesn't have a dictionary tags field. The tags field is just a string list.
148
179
  # Therefore, dumping tags into the description.
149
- tags = {'Owner': self._keyName, 'clusterName': self.clusterName}
150
- if 'userTags' in kwargs:
151
- tags.update(kwargs['userTags'])
180
+ tags = {"Owner": self._keyName, "clusterName": self.clusterName}
181
+ if "userTags" in kwargs:
182
+ tags.update(kwargs["userTags"])
152
183
  self._tags = json.dumps(tags)
153
184
 
154
- metadata = {'items': [{'key': 'user-data', 'value': self._getIgnitionUserData('leader')}]}
155
- imageType = 'flatcar-stable'
156
- sa_scopes = [{'scopes': ['compute', 'storage-full']}]
185
+ metadata = {
186
+ "items": [
187
+ {"key": "user-data", "value": self._getIgnitionUserData("leader")}
188
+ ]
189
+ }
190
+ imageType = "flatcar-stable"
191
+ sa_scopes = [{"scopes": ["compute", "storage-full"]}]
157
192
  disk = {}
158
- disk['initializeParams'] = {
159
- 'sourceImage': self.SOURCE_IMAGE,
160
- 'diskSizeGb': leaderStorage
193
+ disk["initializeParams"] = {
194
+ "sourceImage": self.SOURCE_IMAGE,
195
+ "diskSizeGb": leaderStorage,
161
196
  }
162
- disk.update({
163
- 'boot': True,
164
- 'autoDelete': True
165
- })
166
- name = 'l' + str(uuid.uuid4())
197
+ disk.update({"boot": True, "autoDelete": True})
198
+ name = "l" + str(uuid.uuid4())
167
199
 
168
200
  leader = self._gceDriver.create_node(
169
201
  name,
@@ -174,9 +206,9 @@ class GCEProvisioner(AbstractProvisioner):
174
206
  ex_metadata=compat_bytes_recursive(metadata),
175
207
  ex_network=self._network,
176
208
  ex_subnetwork=self._vpcSubnet,
177
- ex_disks_gce_struct = [ compat_bytes_recursive(disk) ],
209
+ ex_disks_gce_struct=[compat_bytes_recursive(disk)],
178
210
  description=self._tags,
179
- ex_preemptible=False
211
+ ex_preemptible=False,
180
212
  )
181
213
 
182
214
  self._instanceGroup.add_instances([leader])
@@ -184,18 +216,27 @@ class GCEProvisioner(AbstractProvisioner):
184
216
  # self.subnetID = leader.subnet_id # TODO: get subnetID
185
217
 
186
218
  # Wait for the appliance to start and inject credentials.
187
- leaderNode = Node(publicIP=leader.public_ips[0], privateIP=leader.private_ips[0],
188
- name=leader.name, launchTime=leader.created_at, nodeType=leader.size,
189
- preemptible=False, tags=self._tags, use_private_ip=self._use_private_ip)
190
- leaderNode.waitForNode('toil_leader', keyName=self._keyName)
219
+ leaderNode = Node(
220
+ publicIP=leader.public_ips[0],
221
+ privateIP=leader.private_ips[0],
222
+ name=leader.name,
223
+ launchTime=leader.created_at,
224
+ nodeType=leader.size,
225
+ preemptible=False,
226
+ tags=self._tags,
227
+ use_private_ip=self._use_private_ip,
228
+ )
229
+ leaderNode.waitForNode("toil_leader", keyName=self._keyName)
191
230
  leaderNode.copySshKeys(self._keyName)
192
- leaderNode.injectFile(self._credentialsPath, GoogleJobStore.nodeServiceAccountJson, 'toil_leader')
231
+ leaderNode.injectFile(
232
+ self._credentialsPath, GoogleJobStore.nodeServiceAccountJson, "toil_leader"
233
+ )
193
234
  if self._botoPath:
194
- leaderNode.injectFile(self._botoPath, self.NODE_BOTO_PATH, 'toil_leader')
235
+ leaderNode.injectFile(self._botoPath, self.NODE_BOTO_PATH, "toil_leader")
195
236
  # Download credentials
196
237
  self._setLeaderWorkerAuthentication(leaderNode)
197
238
 
198
- logger.debug('Launched leader')
239
+ logger.debug("Launched leader")
199
240
 
200
241
  def getNodeShape(self, instance_type: str, preemptible=False) -> Shape:
201
242
  # TODO: read this value only once
@@ -208,21 +249,25 @@ class GCEProvisioner(AbstractProvisioner):
208
249
  if disk == 0:
209
250
  # This is an EBS-backed instance. We will use the root
210
251
  # volume, so add the amount of EBS storage requested forhe root volume
211
- disk = self._nodeStorageOverrides.get(instance_type, self._nodeStorage) * 2 ** 30
252
+ disk = (
253
+ self._nodeStorageOverrides.get(instance_type, self._nodeStorage) * 2**30
254
+ )
212
255
 
213
256
  # Ram is in M.
214
257
  # Underestimate memory by 100M to prevent autoscaler from disagreeing with
215
258
  # mesos about whether a job can run on a particular node type
216
- memory = (instanceType.ram/1000 - 0.1) * 2 ** 30
217
- return Shape(wallTime=60 * 60,
218
- memory=memory,
219
- cores=instanceType.extra['guestCpus'],
220
- disk=disk,
221
- preemptible=preemptible)
259
+ memory = (instanceType.ram / 1000 - 0.1) * 2**30
260
+ return Shape(
261
+ wallTime=60 * 60,
262
+ memory=memory,
263
+ cores=instanceType.extra["guestCpus"],
264
+ disk=disk,
265
+ preemptible=preemptible,
266
+ )
222
267
 
223
268
  @staticmethod
224
269
  def retryPredicate(e):
225
- """ Not used by GCE """
270
+ """Not used by GCE"""
226
271
  return False
227
272
 
228
273
  def destroyCluster(self) -> None:
@@ -238,7 +283,9 @@ class GCEProvisioner(AbstractProvisioner):
238
283
  attempts += 1
239
284
 
240
285
  # remove group
241
- instanceGroup = self._gceDriver.ex_get_instancegroup(self.clusterName, zone=self._zone)
286
+ instanceGroup = self._gceDriver.ex_get_instancegroup(
287
+ self.clusterName, zone=self._zone
288
+ )
242
289
  instanceGroup.destroy()
243
290
 
244
291
  def terminateNodes(self, nodes):
@@ -248,7 +295,7 @@ class GCEProvisioner(AbstractProvisioner):
248
295
  instancesToKill = [i for i in instances if i.name in nodeNames]
249
296
  self._terminateInstances(instancesToKill)
250
297
 
251
- def addNodes(self, nodeTypes: Set[str], numNodes, preemptible, spotBid=None) -> int:
298
+ def addNodes(self, nodeTypes: set[str], numNodes, preemptible, spotBid=None) -> int:
252
299
  assert self._leaderPrivateIP
253
300
 
254
301
  # We don't support any balancing here so just pick one of the
@@ -268,23 +315,21 @@ class GCEProvisioner(AbstractProvisioner):
268
315
  keyPath = self._sseKey
269
316
 
270
317
  if not preemptible:
271
- logger.debug('Launching %s non-preemptible nodes', numNodes)
318
+ logger.debug("Launching %s non-preemptible nodes", numNodes)
272
319
  else:
273
- logger.debug('Launching %s preemptible nodes', numNodes)
320
+ logger.debug("Launching %s preemptible nodes", numNodes)
274
321
 
275
322
  # kwargs["subnet_id"] = self.subnetID if self.subnetID else self._getClusterInstance(self.instanceMetaData).subnet_id
276
- userData = self._getIgnitionUserData('worker', keyPath, preemptible)
277
- metadata = {'items': [{'key': 'user-data', 'value': userData}]}
278
- imageType = 'flatcar-stable'
279
- sa_scopes = [{'scopes': ['compute', 'storage-full']}]
323
+ userData = self._getIgnitionUserData("worker", keyPath, preemptible)
324
+ metadata = {"items": [{"key": "user-data", "value": userData}]}
325
+ imageType = "flatcar-stable"
326
+ sa_scopes = [{"scopes": ["compute", "storage-full"]}]
280
327
  disk = {}
281
- disk['initializeParams'] = {
282
- 'sourceImage': self.SOURCE_IMAGE,
283
- 'diskSizeGb': self._nodeStorageOverrides.get(node_type, self._nodeStorage) }
284
- disk.update({
285
- 'boot': True,
286
- 'autoDelete': True
287
- })
328
+ disk["initializeParams"] = {
329
+ "sourceImage": self.SOURCE_IMAGE,
330
+ "diskSizeGb": self._nodeStorageOverrides.get(node_type, self._nodeStorage),
331
+ }
332
+ disk.update({"boot": True, "autoDelete": True})
288
333
 
289
334
  # TODO:
290
335
  # - bug in gce.py for ex_create_multiple_nodes (erroneously, doesn't allow image and disk to specified)
@@ -294,26 +339,38 @@ class GCEProvisioner(AbstractProvisioner):
294
339
  retries = 0
295
340
  workersCreated = 0
296
341
  # Try a few times to create the requested number of workers
297
- while numNodes-workersCreated > 0 and retries < 3:
342
+ while numNodes - workersCreated > 0 and retries < 3:
298
343
  instancesLaunched = self.ex_create_multiple_nodes(
299
- '', node_type, imageType, numNodes-workersCreated,
300
- location=self._zone,
301
- ex_service_accounts=sa_scopes,
302
- ex_metadata=metadata,
303
- ex_disks_gce_struct=[disk],
304
- description=self._tags,
305
- ex_preemptible=preemptible
306
- )
344
+ "",
345
+ node_type,
346
+ imageType,
347
+ numNodes - workersCreated,
348
+ location=self._zone,
349
+ ex_service_accounts=sa_scopes,
350
+ ex_metadata=metadata,
351
+ ex_disks_gce_struct=[disk],
352
+ description=self._tags,
353
+ ex_preemptible=preemptible,
354
+ )
307
355
  failedWorkers = []
308
356
  for instance in instancesLaunched:
309
357
  if isinstance(instance, GCEFailedNode):
310
- logger.error("Worker failed to launch with code %s. Error message: %s"
311
- % (instance.code, instance.error))
358
+ logger.error(
359
+ "Worker failed to launch with code %s. Error message: %s"
360
+ % (instance.code, instance.error)
361
+ )
312
362
  continue
313
363
 
314
- node = Node(publicIP=instance.public_ips[0], privateIP=instance.private_ips[0],
315
- name=instance.name, launchTime=instance.created_at, nodeType=instance.size,
316
- preemptible=False, tags=self._tags, use_private_ip=self._use_private_ip) # FIXME: what should tags be set to?
364
+ node = Node(
365
+ publicIP=instance.public_ips[0],
366
+ privateIP=instance.private_ips[0],
367
+ name=instance.name,
368
+ launchTime=instance.created_at,
369
+ nodeType=instance.size,
370
+ preemptible=False,
371
+ tags=self._tags,
372
+ use_private_ip=self._use_private_ip,
373
+ ) # FIXME: what should tags be set to?
317
374
 
318
375
  try:
319
376
  self._injectWorkerFiles(node, botoExists)
@@ -321,43 +378,55 @@ class GCEProvisioner(AbstractProvisioner):
321
378
  self._instanceGroup.add_instances([instance])
322
379
  workersCreated += 1
323
380
  except Exception as e:
324
- logger.error(f"Failed to configure worker {node.name}. Error message: {e}")
381
+ logger.error(
382
+ f"Failed to configure worker {node.name}. Error message: {e}"
383
+ )
325
384
  failedWorkers.append(instance)
326
385
  if failedWorkers:
327
386
  logger.error("Terminating %d failed workers" % len(failedWorkers))
328
387
  self._terminateInstances(failedWorkers)
329
388
  retries += 1
330
389
 
331
- logger.debug('Launched %d new instance(s)', numNodes)
390
+ logger.debug("Launched %d new instance(s)", numNodes)
332
391
  if numNodes != workersCreated:
333
- logger.error("Failed to launch %d worker(s)", numNodes-workersCreated)
392
+ logger.error("Failed to launch %d worker(s)", numNodes - workersCreated)
334
393
  return workersCreated
335
394
 
336
- def getProvisionedWorkers(self, instance_type: Optional[str] = None, preemptible: Optional[bool] = None):
395
+ def getProvisionedWorkers(
396
+ self, instance_type: Optional[str] = None, preemptible: Optional[bool] = None
397
+ ):
337
398
  assert self._leaderPrivateIP
338
399
  entireCluster = self._getNodesInCluster(instance_type=instance_type)
339
- logger.debug('All nodes in cluster: %s', entireCluster)
400
+ logger.debug("All nodes in cluster: %s", entireCluster)
340
401
  workerInstances = []
341
402
  for instance in entireCluster:
342
403
  if preemptible is not None:
343
- scheduling = instance.extra.get('scheduling')
404
+ scheduling = instance.extra.get("scheduling")
344
405
  # If this field is not found in the extra meta-data, assume the node is not preemptible.
345
- if scheduling and scheduling.get('preemptible', False) != preemptible:
406
+ if scheduling and scheduling.get("preemptible", False) != preemptible:
346
407
  continue
347
408
  isWorker = True
348
409
  for ip in instance.private_ips:
349
410
  if ip == self._leaderPrivateIP:
350
411
  isWorker = False
351
412
  break # don't include the leader
352
- if isWorker and instance.state == 'running':
413
+ if isWorker and instance.state == "running":
353
414
  workerInstances.append(instance)
354
415
 
355
- logger.debug('All workers found in cluster: %s', workerInstances)
356
- return [Node(publicIP=i.public_ips[0], privateIP=i.private_ips[0],
357
- name=i.name, launchTime=i.created_at, nodeType=i.size,
358
- preemptible=i.extra.get('scheduling', {}).get('preemptible', False),
359
- tags=None, use_private_ip=self._use_private_ip)
360
- for i in workerInstances]
416
+ logger.debug("All workers found in cluster: %s", workerInstances)
417
+ return [
418
+ Node(
419
+ publicIP=i.public_ips[0],
420
+ privateIP=i.private_ips[0],
421
+ name=i.name,
422
+ launchTime=i.created_at,
423
+ nodeType=i.size,
424
+ preemptible=i.extra.get("scheduling", {}).get("preemptible", False),
425
+ tags=None,
426
+ use_private_ip=self._use_private_ip,
427
+ )
428
+ for i in workerInstances
429
+ ]
361
430
 
362
431
  def getLeader(self):
363
432
  instances = self._getNodesInCluster()
@@ -366,49 +435,64 @@ class GCEProvisioner(AbstractProvisioner):
366
435
  leader = instances[0] # assume leader was launched first
367
436
  except IndexError:
368
437
  raise NoSuchClusterException(self.clusterName)
369
- return Node(publicIP=leader.public_ips[0], privateIP=leader.private_ips[0],
370
- name=leader.name, launchTime=leader.created_at, nodeType=leader.size,
371
- preemptible=False, tags=None, use_private_ip=self._use_private_ip)
438
+ return Node(
439
+ publicIP=leader.public_ips[0],
440
+ privateIP=leader.private_ips[0],
441
+ name=leader.name,
442
+ launchTime=leader.created_at,
443
+ nodeType=leader.size,
444
+ preemptible=False,
445
+ tags=None,
446
+ use_private_ip=self._use_private_ip,
447
+ )
372
448
 
373
449
  def _injectWorkerFiles(self, node, botoExists):
374
450
  """
375
451
  Set up the credentials on the worker.
376
452
  """
377
- node.waitForNode('toil_worker', keyName=self._keyName)
453
+ node.waitForNode("toil_worker", keyName=self._keyName)
378
454
  node.copySshKeys(self._keyName)
379
- node.injectFile(self._credentialsPath, GoogleJobStore.nodeServiceAccountJson, 'toil_worker')
455
+ node.injectFile(
456
+ self._credentialsPath, GoogleJobStore.nodeServiceAccountJson, "toil_worker"
457
+ )
380
458
  if self._sseKey:
381
- node.injectFile(self._sseKey, self._sseKey, 'toil_worker')
459
+ node.injectFile(self._sseKey, self._sseKey, "toil_worker")
382
460
  if botoExists:
383
- node.injectFile(self._botoPath, self.NODE_BOTO_PATH, 'toil_worker')
461
+ node.injectFile(self._botoPath, self.NODE_BOTO_PATH, "toil_worker")
384
462
 
385
463
  def _getNodesInCluster(self, instance_type: Optional[str] = None):
386
- instanceGroup = self._gceDriver.ex_get_instancegroup(self.clusterName, zone=self._zone)
464
+ instanceGroup = self._gceDriver.ex_get_instancegroup(
465
+ self.clusterName, zone=self._zone
466
+ )
387
467
  instances = instanceGroup.list_instances()
388
468
  if instance_type:
389
- instances = [instance for instance in instances if instance.size == instance_type]
469
+ instances = [
470
+ instance for instance in instances if instance.size == instance_type
471
+ ]
390
472
  return instances
391
473
 
392
474
  def _getDriver(self):
393
- """ Connect to GCE """
475
+ """Connect to GCE"""
394
476
  driverCls = get_driver(Provider.GCE)
395
- return driverCls(self._clientEmail,
396
- self._googleJson,
397
- project=self._projectId,
398
- datacenter=self._zone)
477
+ return driverCls(
478
+ self._clientEmail,
479
+ self._googleJson,
480
+ project=self._projectId,
481
+ datacenter=self._zone,
482
+ )
399
483
 
400
484
  def _terminateInstances(self, instances):
401
485
  def worker(driver, instance):
402
- logger.debug('Terminating instance: %s', instance.name)
486
+ logger.debug("Terminating instance: %s", instance.name)
403
487
  driver.destroy_node(instance)
404
488
 
405
489
  threads = []
406
490
  for instance in instances:
407
- t = threading.Thread(target=worker, args=(self._gceDriver,instance))
491
+ t = threading.Thread(target=worker, args=(self._gceDriver, instance))
408
492
  threads.append(t)
409
493
  t.start()
410
494
 
411
- logger.debug('... Waiting for instance(s) to shut down...')
495
+ logger.debug("... Waiting for instance(s) to shut down...")
412
496
  for t in threads:
413
497
  t.join()
414
498
 
@@ -416,20 +500,37 @@ class GCEProvisioner(AbstractProvisioner):
416
500
  DEFAULT_TASK_COMPLETION_TIMEOUT = 180
417
501
 
418
502
  def ex_create_multiple_nodes(
419
- self, base_name, size, image, number, location=None,
420
- ex_network='default', ex_subnetwork=None, ex_tags=None,
421
- ex_metadata=None, ignore_errors=True, use_existing_disk=True,
422
- poll_interval=2, external_ip='ephemeral',
423
- ex_disk_type='pd-standard', ex_disk_auto_delete=True,
424
- ex_service_accounts=None, timeout=DEFAULT_TASK_COMPLETION_TIMEOUT,
425
- description=None, ex_can_ip_forward=None, ex_disks_gce_struct=None,
426
- ex_nic_gce_struct=None, ex_on_host_maintenance=None,
427
- ex_automatic_restart=None, ex_image_family=None,
428
- ex_preemptible=None):
503
+ self,
504
+ base_name,
505
+ size,
506
+ image,
507
+ number,
508
+ location=None,
509
+ ex_network="default",
510
+ ex_subnetwork=None,
511
+ ex_tags=None,
512
+ ex_metadata=None,
513
+ ignore_errors=True,
514
+ use_existing_disk=True,
515
+ poll_interval=2,
516
+ external_ip="ephemeral",
517
+ ex_disk_type="pd-standard",
518
+ ex_disk_auto_delete=True,
519
+ ex_service_accounts=None,
520
+ timeout=DEFAULT_TASK_COMPLETION_TIMEOUT,
521
+ description=None,
522
+ ex_can_ip_forward=None,
523
+ ex_disks_gce_struct=None,
524
+ ex_nic_gce_struct=None,
525
+ ex_on_host_maintenance=None,
526
+ ex_automatic_restart=None,
527
+ ex_image_family=None,
528
+ ex_preemptible=None,
529
+ ):
429
530
  """
430
- Monkey patch to gce.py in libcloud to allow disk and images to be specified.
431
- Also changed name to a uuid below.
432
- The prefix 'wp' identifies preemptible nodes and 'wn' non-preemptible nodes.
531
+ Monkey patch to gce.py in libcloud to allow disk and images to be specified.
532
+ Also changed name to a uuid below.
533
+ The prefix 'wp' identifies preemptible nodes and 'wn' non-preemptible nodes.
433
534
  """
434
535
  # if image and ex_disks_gce_struct:
435
536
  # raise ValueError("Cannot specify both 'image' and "
@@ -437,78 +538,80 @@ class GCEProvisioner(AbstractProvisioner):
437
538
 
438
539
  driver = self._getDriver()
439
540
  if image and ex_image_family:
440
- raise ValueError("Cannot specify both 'image' and "
441
- "'ex_image_family'")
541
+ raise ValueError("Cannot specify both 'image' and " "'ex_image_family'")
442
542
 
443
543
  location = location or driver.zone
444
- if not hasattr(location, 'name'):
544
+ if not hasattr(location, "name"):
445
545
  location = driver.ex_get_zone(location)
446
- if not hasattr(size, 'name'):
546
+ if not hasattr(size, "name"):
447
547
  size = driver.ex_get_size(size, location)
448
- if not hasattr(ex_network, 'name'):
548
+ if not hasattr(ex_network, "name"):
449
549
  ex_network = driver.ex_get_network(ex_network)
450
- if ex_subnetwork and not hasattr(ex_subnetwork, 'name'):
451
- ex_subnetwork = \
452
- driver.ex_get_subnetwork(ex_subnetwork,
453
- region=driver._get_region_from_zone(location))
550
+ if ex_subnetwork and not hasattr(ex_subnetwork, "name"):
551
+ ex_subnetwork = driver.ex_get_subnetwork(
552
+ ex_subnetwork, region=driver._get_region_from_zone(location)
553
+ )
454
554
  if ex_image_family:
455
555
  image = driver.ex_get_image_from_family(ex_image_family)
456
- if image and not hasattr(image, 'name'):
556
+ if image and not hasattr(image, "name"):
457
557
  image = driver.ex_get_image(image)
458
- if not hasattr(ex_disk_type, 'name'):
558
+ if not hasattr(ex_disk_type, "name"):
459
559
  ex_disk_type = driver.ex_get_disktype(ex_disk_type, zone=location)
460
560
 
461
- node_attrs = {'size': size,
462
- 'image': image,
463
- 'location': location,
464
- 'network': ex_network,
465
- 'subnetwork': ex_subnetwork,
466
- 'tags': ex_tags,
467
- 'metadata': ex_metadata,
468
- 'ignore_errors': ignore_errors,
469
- 'use_existing_disk': use_existing_disk,
470
- 'external_ip': external_ip,
471
- 'ex_disk_type': ex_disk_type,
472
- 'ex_disk_auto_delete': ex_disk_auto_delete,
473
- 'ex_service_accounts': ex_service_accounts,
474
- 'description': description,
475
- 'ex_can_ip_forward': ex_can_ip_forward,
476
- 'ex_disks_gce_struct': ex_disks_gce_struct,
477
- 'ex_nic_gce_struct': ex_nic_gce_struct,
478
- 'ex_on_host_maintenance': ex_on_host_maintenance,
479
- 'ex_automatic_restart': ex_automatic_restart,
480
- 'ex_preemptible': ex_preemptible}
561
+ node_attrs = {
562
+ "size": size,
563
+ "image": image,
564
+ "location": location,
565
+ "network": ex_network,
566
+ "subnetwork": ex_subnetwork,
567
+ "tags": ex_tags,
568
+ "metadata": ex_metadata,
569
+ "ignore_errors": ignore_errors,
570
+ "use_existing_disk": use_existing_disk,
571
+ "external_ip": external_ip,
572
+ "ex_disk_type": ex_disk_type,
573
+ "ex_disk_auto_delete": ex_disk_auto_delete,
574
+ "ex_service_accounts": ex_service_accounts,
575
+ "description": description,
576
+ "ex_can_ip_forward": ex_can_ip_forward,
577
+ "ex_disks_gce_struct": ex_disks_gce_struct,
578
+ "ex_nic_gce_struct": ex_nic_gce_struct,
579
+ "ex_on_host_maintenance": ex_on_host_maintenance,
580
+ "ex_automatic_restart": ex_automatic_restart,
581
+ "ex_preemptible": ex_preemptible,
582
+ }
481
583
  # List for holding the status information for disk/node creation.
482
584
  status_list = []
483
585
 
484
586
  for i in range(number):
485
- name = 'wp' if ex_preemptible else 'wn'
587
+ name = "wp" if ex_preemptible else "wn"
486
588
  name += str(uuid.uuid4()) # '%s-%03d' % (base_name, i)
487
- status = {'name': name, 'node_response': None, 'node': None}
589
+ status = {"name": name, "node_response": None, "node": None}
488
590
  status_list.append(status)
489
591
 
490
592
  start_time = time.time()
491
593
  complete = False
492
594
  while not complete:
493
595
  if time.time() - start_time >= timeout:
494
- raise Exception("Timeout (%s sec) while waiting for multiple "
495
- "instances")
596
+ raise Exception(
597
+ "Timeout (%s sec) while waiting for multiple " "instances"
598
+ )
496
599
  complete = True
497
600
  time.sleep(poll_interval)
498
601
  for status in status_list:
499
602
  # Create the node or check status if already in progress.
500
- if not status['node']:
501
- if not status['node_response']:
603
+ if not status["node"]:
604
+ if not status["node_response"]:
502
605
  driver._multi_create_node(status, node_attrs)
503
606
  else:
504
607
  driver._multi_check_node(status, node_attrs)
505
608
  # If any of the nodes have not been created (or failed) we are
506
609
  # not done yet.
507
- if not status['node']:
610
+ if not status["node"]:
508
611
  complete = False
509
612
 
510
613
  # Return list of nodes
511
614
  node_list = []
512
615
  for status in status_list:
513
- node_list.append(status['node'])
616
+ node_list.append(status["node"])
514
617
  return node_list