xpk 0.13.0__py3-none-any.whl → 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. integration/__init__.py +15 -0
  2. integration/docker_manager_test.py +102 -0
  3. integration/gcluster_a3mega_test.py +204 -0
  4. integration/gcluster_a3ultra_test.py +176 -0
  5. integration/gcluster_a4_test.py +176 -0
  6. integration/gcluster_test.py +107 -0
  7. xpk/commands/batch.py +9 -2
  8. xpk/commands/cluster.py +143 -117
  9. xpk/commands/cluster_gcluster.py +81 -14
  10. xpk/commands/cluster_gcluster_test.py +177 -0
  11. xpk/commands/cluster_test.py +92 -0
  12. xpk/commands/common.py +14 -26
  13. xpk/commands/info.py +11 -9
  14. xpk/commands/inspector.py +21 -10
  15. xpk/commands/job.py +25 -9
  16. xpk/commands/kind.py +39 -40
  17. xpk/commands/kjob_common.py +4 -4
  18. xpk/commands/run.py +9 -2
  19. xpk/commands/shell.py +13 -10
  20. xpk/commands/storage.py +21 -0
  21. xpk/commands/version.py +0 -4
  22. xpk/commands/workload.py +84 -29
  23. xpk/commands/workload_test.py +81 -0
  24. xpk/core/blueprint/blueprint_generator.py +4 -40
  25. xpk/core/blueprint/blueprint_test.py +0 -6
  26. xpk/core/blueprint/testing/__init__.py +15 -0
  27. xpk/core/capacity.py +6 -5
  28. xpk/core/cluster.py +91 -194
  29. xpk/core/cluster_private.py +6 -11
  30. xpk/core/commands.py +11 -18
  31. xpk/core/config.py +1 -1
  32. xpk/core/docker_image.py +3 -4
  33. xpk/core/gcloud_context.py +26 -2
  34. xpk/core/gcloud_context_test.py +96 -0
  35. xpk/core/gcluster_manager.py +0 -3
  36. xpk/core/jobset.py +4 -7
  37. xpk/core/kjob.py +14 -27
  38. xpk/core/kueue_manager.py +423 -0
  39. xpk/core/kueue_manager_test.py +574 -0
  40. xpk/core/monitoring.py +1 -1
  41. xpk/core/nap.py +10 -15
  42. xpk/core/network.py +17 -18
  43. xpk/core/nodepool.py +66 -77
  44. xpk/core/nodepool_test.py +198 -1
  45. xpk/core/pathways.py +5 -5
  46. xpk/core/ray.py +10 -14
  47. xpk/core/resources.py +6 -11
  48. xpk/core/scheduling.py +19 -1
  49. xpk/core/scheduling_test.py +31 -0
  50. xpk/core/system_characteristics.py +350 -232
  51. xpk/core/system_characteristics_test.py +73 -0
  52. xpk/core/vertex.py +1 -1
  53. xpk/core/workload.py +7 -8
  54. xpk/main.py +2 -4
  55. xpk/parser/cluster.py +7 -0
  56. xpk/parser/cluster_test.py +66 -0
  57. xpk/parser/common.py +11 -0
  58. xpk/parser/workload.py +62 -25
  59. xpk/parser/workload_test.py +82 -0
  60. xpk/templates/cluster_preheat.yaml.j2 +31 -0
  61. xpk/templates/filestore-pv.yaml +17 -0
  62. xpk/templates/filestore-pvc.yaml +11 -0
  63. xpk/templates/filestore-sc.yaml +10 -0
  64. xpk/templates/fuse-pv.yaml +17 -0
  65. xpk/templates/fuse-pvc.yaml +13 -0
  66. xpk/templates/kueue_config.yaml.j2 +95 -0
  67. xpk/templates/kueue_gke_default_topology.yaml.j2 +10 -0
  68. xpk/templates/kueue_sub_slicing_topology.yaml.j2 +14 -0
  69. xpk/templates/mtc-cpc.yaml +15 -0
  70. xpk/templates/volume_bundle.yaml +7 -0
  71. xpk/utils/feature_flags.py +28 -0
  72. xpk/utils/kueue.py +20 -0
  73. xpk/utils/templates.py +15 -0
  74. xpk/utils/topology.py +46 -0
  75. xpk/utils/topology_test.py +63 -0
  76. xpk/utils/validation.py +79 -55
  77. xpk/utils/validation_test.py +37 -0
  78. {xpk-0.13.0.dist-info → xpk-0.14.1.dist-info}/METADATA +6 -1
  79. xpk-0.14.1.dist-info/RECORD +133 -0
  80. xpk-0.14.1.dist-info/top_level.txt +2 -0
  81. xpk/core/kueue.py +0 -561
  82. xpk-0.13.0.dist-info/RECORD +0 -101
  83. xpk-0.13.0.dist-info/top_level.txt +0 -1
  84. {xpk-0.13.0.dist-info → xpk-0.14.1.dist-info}/WHEEL +0 -0
  85. {xpk-0.13.0.dist-info → xpk-0.14.1.dist-info}/entry_points.txt +0 -0
  86. {xpk-0.13.0.dist-info → xpk-0.14.1.dist-info}/licenses/LICENSE +0 -0
@@ -15,8 +15,8 @@ limitations under the License.
15
15
  """
16
16
 
17
17
  from dataclasses import dataclass
18
- from functools import reduce
19
- from operator import mul
18
+ from ..utils.topology import get_topology_product
19
+
20
20
 
21
21
  AcceleratorType = {'TPU': 1, 'GPU': 2, 'CPU': 3}
22
22
 
@@ -29,27 +29,52 @@ class AcceleratorCharacteristics:
29
29
 
30
30
 
31
31
  AcceleratorTypeToAcceleratorCharacteristics = {
32
- # TPU
33
32
  AcceleratorType['TPU']: AcceleratorCharacteristics(
34
- 'google.com/tpu',
35
- 'cloud.google.com/gke-tpu-accelerator',
36
- 'cloud.google.com/gke-tpu-topology',
33
+ resource_type='google.com/tpu',
34
+ accelerator_label='cloud.google.com/gke-tpu-accelerator',
35
+ machine_label='cloud.google.com/gke-tpu-topology',
37
36
  ),
38
- # GPU
39
37
  AcceleratorType['GPU']: AcceleratorCharacteristics(
40
- 'nvidia.com/gpu',
41
- 'cloud.google.com/gke-accelerator',
42
- 'cloud.google.com/gce-machine-type',
38
+ resource_type='nvidia.com/gpu',
39
+ accelerator_label='cloud.google.com/gke-accelerator',
40
+ machine_label='cloud.google.com/gce-machine-type',
43
41
  ),
44
- # CPU
45
42
  AcceleratorType['CPU']: AcceleratorCharacteristics(
46
- 'cpu', '', 'cloud.google.com/gke-nodepool'
43
+ resource_type='cpu',
44
+ accelerator_label='',
45
+ machine_label='cloud.google.com/gke-nodepool',
47
46
  ),
48
47
  }
49
48
 
50
49
 
51
50
  @dataclass
52
51
  class SystemCharacteristics:
52
+ """Contains the defining characteristics of a specific accelerator system.
53
+
54
+ This dataclass holds the hardware and configuration details for a given
55
+ accelerator type, such as its topology, machine type, and chip count. It
56
+ provides a standardized way to access system-specific information throughout
57
+ the application.
58
+
59
+ Attributes:
60
+ topology: The physical or logical layout of the accelerator chips (e.g.,
61
+ '2x2x1' for TPUs, 'N/A' for single-VM GPUs).
62
+ vms_per_slice: The number of Virtual Machines that constitute a single
63
+ accelerator slice.
64
+ gke_accelerator: The name of the accelerator as recognized by GKE (e.g.,
65
+ 'nvidia-l4', 'tpu7x').
66
+ gce_machine_type: The GCE machine type that hosts the accelerator (e.g.,
67
+ 'g2-standard-12').
68
+ chips_per_vm: The number of accelerator chips attached to a single VM.
69
+ accelerator_type: The category of the accelerator (e.g., TPU, GPU, CPU)
70
+ from the AcceleratorType enum.
71
+ device_type: A user-facing name for the specific hardware configuration
72
+ (e.g., 'l4-1', 'h100-80gb-8').
73
+ supports_sub_slicing: Whether the Sub-slicing feature is supported.
74
+ requires_workload_policy: A boolean indicating if a GCE resource
75
+ workload policy is required. This is automatically set to True for GPUs.
76
+ """
77
+
53
78
  topology: str
54
79
  vms_per_slice: int
55
80
  gke_accelerator: str
@@ -57,6 +82,12 @@ class SystemCharacteristics:
57
82
  chips_per_vm: int
58
83
  accelerator_type: int # TODO: use enums
59
84
  device_type: str
85
+ supports_sub_slicing: bool
86
+ requires_workload_policy: bool = False
87
+
88
+ def __post_init__(self):
89
+ if self.accelerator_type == AcceleratorType['GPU']:
90
+ self.requires_workload_policy = True
60
91
 
61
92
 
62
93
  def get_system_characteristics(
@@ -99,21 +130,24 @@ def get_tpu_system_characteristics_map(
99
130
  gke_accelerator: str,
100
131
  machine_type: str,
101
132
  supported_topologies: list[str],
133
+ supports_sub_slicing: bool,
134
+ requires_workload_policy: bool = False,
102
135
  ) -> dict[str, SystemCharacteristics]:
103
136
  system_characteristics_map = {}
104
137
  for topology in supported_topologies:
105
- total_chips = reduce(mul, (int(x) for x in topology.split('x')), 1)
106
- num_tensorcores = total_chips * tensorcores_per_chip
107
- chips_per_vm = 1 if total_chips == 1 else 4
108
- vms_per_slice = total_chips // chips_per_vm
138
+ chips_per_vm = compute_chips_per_vm(topology)
139
+ vms_per_slice = compute_vms_per_slice(topology)
140
+ num_tensorcores = compute_num_tensorcores(tensorcores_per_chip, topology)
109
141
  system = SystemCharacteristics(
110
- topology,
111
- vms_per_slice,
112
- gke_accelerator,
113
- machine_type,
114
- chips_per_vm,
115
- AcceleratorType['TPU'],
116
- f'{prefix}-{num_tensorcores}',
142
+ topology=topology,
143
+ vms_per_slice=vms_per_slice,
144
+ gke_accelerator=gke_accelerator,
145
+ gce_machine_type=machine_type,
146
+ chips_per_vm=chips_per_vm,
147
+ accelerator_type=AcceleratorType['TPU'],
148
+ device_type=f'{prefix}-{num_tensorcores}',
149
+ requires_workload_policy=requires_workload_policy,
150
+ supports_sub_slicing=supports_sub_slicing,
117
151
  )
118
152
  system_characteristics_map[f'{prefix}-{topology}'] = system
119
153
  system_characteristics_map[f'{prefix}-{num_tensorcores}'] = system
@@ -121,6 +155,19 @@ def get_tpu_system_characteristics_map(
121
155
  return system_characteristics_map
122
156
 
123
157
 
158
+ def compute_chips_per_vm(topology: str) -> int:
159
+ return 1 if get_topology_product(topology) == 1 else 4
160
+
161
+
162
+ def compute_num_tensorcores(tensorcores_per_chip: int, topology: str) -> int:
163
+ return get_topology_product(topology) * tensorcores_per_chip
164
+
165
+
166
+ def compute_vms_per_slice(topology: str) -> int:
167
+ chips_per_vm = compute_chips_per_vm(topology)
168
+ return get_topology_product(topology) // chips_per_vm
169
+
170
+
124
171
  ################### Subcommand Helper Functions #############################
125
172
  """ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
126
173
  IF YOU MODIFY THE BELOW UserFacingNameToSystemCharacteristics MAP YOU SHOULD
@@ -131,126 +178,166 @@ UserFacingNameToSystemCharacteristics = {
131
178
  # GPU system characteristics
132
179
  # l4-$CHIPSc
133
180
  'l4-1': SystemCharacteristics(
134
- 'N/A',
135
- 1,
136
- 'nvidia-l4',
137
- 'g2-standard-12',
138
- 1,
139
- AcceleratorType['GPU'],
140
- 'l4-1',
181
+ topology='N/A',
182
+ vms_per_slice=1,
183
+ gke_accelerator='nvidia-l4',
184
+ gce_machine_type='g2-standard-12',
185
+ chips_per_vm=1,
186
+ accelerator_type=AcceleratorType['GPU'],
187
+ device_type='l4-1',
188
+ supports_sub_slicing=False,
141
189
  ),
142
190
  'l4-2': SystemCharacteristics(
143
- 'N/A',
144
- 1,
145
- 'nvidia-l4',
146
- 'g2-standard-24',
147
- 2,
148
- AcceleratorType['GPU'],
149
- 'l4-2',
191
+ topology='N/A',
192
+ vms_per_slice=1,
193
+ gke_accelerator='nvidia-l4',
194
+ gce_machine_type='g2-standard-24',
195
+ chips_per_vm=2,
196
+ accelerator_type=AcceleratorType['GPU'],
197
+ device_type='l4-2',
198
+ supports_sub_slicing=False,
150
199
  ),
151
200
  'l4-4': SystemCharacteristics(
152
- 'N/A',
153
- 1,
154
- 'nvidia-l4',
155
- 'g2-standard-48',
156
- 4,
157
- AcceleratorType['GPU'],
158
- 'l4-4',
201
+ topology='N/A',
202
+ vms_per_slice=1,
203
+ gke_accelerator='nvidia-l4',
204
+ gce_machine_type='g2-standard-48',
205
+ chips_per_vm=4,
206
+ accelerator_type=AcceleratorType['GPU'],
207
+ device_type='l4-4',
208
+ supports_sub_slicing=False,
159
209
  ),
160
210
  'l4-8': SystemCharacteristics(
161
- 'N/A',
162
- 1,
163
- 'nvidia-l4',
164
- 'g2-standard-96',
165
- 8,
166
- AcceleratorType['GPU'],
167
- 'l4-8',
211
+ topology='N/A',
212
+ vms_per_slice=1,
213
+ gke_accelerator='nvidia-l4',
214
+ gce_machine_type='g2-standard-96',
215
+ chips_per_vm=8,
216
+ accelerator_type=AcceleratorType['GPU'],
217
+ device_type='l4-8',
218
+ supports_sub_slicing=False,
168
219
  ),
169
220
  # A100-40gb-$CHIPSc
170
221
  'a100-40gb-1': SystemCharacteristics(
171
- 'N/A',
172
- 1,
173
- 'nvidia-tesla-a100',
174
- 'a2-highgpu-1g',
175
- 1,
176
- AcceleratorType['GPU'],
177
- 'a100-40gb-1',
222
+ topology='N/A',
223
+ vms_per_slice=1,
224
+ gke_accelerator='nvidia-tesla-a100',
225
+ gce_machine_type='a2-highgpu-1g',
226
+ chips_per_vm=1,
227
+ accelerator_type=AcceleratorType['GPU'],
228
+ device_type='a100-40gb-1',
229
+ supports_sub_slicing=False,
178
230
  ),
179
231
  'a100-40gb-2': SystemCharacteristics(
180
- 'N/A',
181
- 1,
182
- 'nvidia-tesla-a100',
183
- 'a2-highgpu-2g',
184
- 2,
185
- AcceleratorType['GPU'],
186
- 'a100-40gb-2',
232
+ topology='N/A',
233
+ vms_per_slice=1,
234
+ gke_accelerator='nvidia-tesla-a100',
235
+ gce_machine_type='a2-highgpu-2g',
236
+ chips_per_vm=2,
237
+ accelerator_type=AcceleratorType['GPU'],
238
+ device_type='a100-40gb-2',
239
+ supports_sub_slicing=False,
187
240
  ),
188
241
  'a100-40gb-4': SystemCharacteristics(
189
- 'N/A',
190
- 1,
191
- 'nvidia-tesla-a100',
192
- 'a2-highgpu-4g',
193
- 4,
194
- AcceleratorType['GPU'],
195
- 'a100-40gb-4',
242
+ topology='N/A',
243
+ vms_per_slice=1,
244
+ gke_accelerator='nvidia-tesla-a100',
245
+ gce_machine_type='a2-highgpu-4g',
246
+ chips_per_vm=4,
247
+ accelerator_type=AcceleratorType['GPU'],
248
+ device_type='a100-40gb-4',
249
+ supports_sub_slicing=False,
196
250
  ),
197
251
  'a100-40gb-8': SystemCharacteristics(
198
- 'N/A',
199
- 1,
200
- 'nvidia-tesla-a100',
201
- 'a2-highgpu-8g',
202
- 8,
203
- AcceleratorType['GPU'],
204
- 'a100-40gb-8',
252
+ topology='N/A',
253
+ vms_per_slice=1,
254
+ gke_accelerator='nvidia-tesla-a100',
255
+ gce_machine_type='a2-highgpu-8g',
256
+ chips_per_vm=8,
257
+ accelerator_type=AcceleratorType['GPU'],
258
+ device_type='a100-40gb-8',
259
+ supports_sub_slicing=False,
260
+ ),
261
+ 'gb200-4': SystemCharacteristics(
262
+ topology='1x72',
263
+ vms_per_slice=1,
264
+ gke_accelerator='nvidia-gb200',
265
+ gce_machine_type='a4x-highgpu-4g',
266
+ chips_per_vm=4,
267
+ accelerator_type=AcceleratorType['GPU'],
268
+ device_type='gb200-4',
269
+ supports_sub_slicing=False,
270
+ ),
271
+ 'gb200-4-nolssd': SystemCharacteristics(
272
+ topology='1x72',
273
+ vms_per_slice=1,
274
+ gke_accelerator='nvidia-gb200',
275
+ gce_machine_type='a4x-highgpu-4g-nolssd',
276
+ chips_per_vm=4,
277
+ accelerator_type=AcceleratorType['GPU'],
278
+ device_type='gb200-4',
279
+ supports_sub_slicing=False,
205
280
  ),
206
281
  'b200-8': SystemCharacteristics(
207
- 'N/A',
208
- 1,
209
- 'nvidia-b200',
210
- 'a4-highgpu-8g',
211
- 8,
212
- AcceleratorType['GPU'],
213
- 'b200-8',
282
+ topology='N/A',
283
+ vms_per_slice=1,
284
+ gke_accelerator='nvidia-b200',
285
+ gce_machine_type='a4-highgpu-8g',
286
+ chips_per_vm=8,
287
+ accelerator_type=AcceleratorType['GPU'],
288
+ device_type='b200-8',
289
+ supports_sub_slicing=False,
214
290
  ),
215
291
  'h200-141gb-8': SystemCharacteristics(
216
- 'N/A',
217
- 1,
218
- 'nvidia-h200-141gb',
219
- 'a3-ultragpu-8g',
220
- 8,
221
- AcceleratorType['GPU'],
222
- 'h200-141gb-8',
292
+ topology='N/A',
293
+ vms_per_slice=1,
294
+ gke_accelerator='nvidia-h200-141gb',
295
+ gce_machine_type='a3-ultragpu-8g',
296
+ chips_per_vm=8,
297
+ accelerator_type=AcceleratorType['GPU'],
298
+ device_type='h200-141gb-8',
299
+ supports_sub_slicing=False,
223
300
  ),
224
301
  # H100-80gb-$CHIPS
225
302
  'h100-80gb-8': SystemCharacteristics(
226
- 'N/A',
227
- 1,
228
- 'nvidia-h100-80gb',
229
- 'a3-highgpu-8g',
230
- 8,
231
- AcceleratorType['GPU'],
232
- 'h100-80gb-8',
303
+ topology='N/A',
304
+ vms_per_slice=1,
305
+ gke_accelerator='nvidia-h100-80gb',
306
+ gce_machine_type='a3-highgpu-8g',
307
+ chips_per_vm=8,
308
+ accelerator_type=AcceleratorType['GPU'],
309
+ device_type='h100-80gb-8',
310
+ supports_sub_slicing=False,
233
311
  ),
234
312
  # H100-mega-80gb-$CHIPS
235
313
  'h100-mega-80gb-8': SystemCharacteristics(
236
- 'N/A',
237
- 1,
238
- 'nvidia-h100-mega-80gb',
239
- 'a3-megagpu-8g',
240
- 8,
241
- AcceleratorType['GPU'],
242
- 'h100-mega-80gb-8',
314
+ topology='N/A',
315
+ vms_per_slice=1,
316
+ gke_accelerator='nvidia-h100-mega-80gb',
317
+ gce_machine_type='a3-megagpu-8g',
318
+ chips_per_vm=8,
319
+ accelerator_type=AcceleratorType['GPU'],
320
+ device_type='h100-mega-80gb-8',
321
+ supports_sub_slicing=False,
243
322
  ),
244
323
  # TPU system characteristics
245
324
  **get_tpu_system_characteristics_map(
246
- 'tpu7x', 2, 'tpu7x', 'tpu7x-standard-1t', ['1x1x1']
325
+ prefix='tpu7x',
326
+ tensorcores_per_chip=2,
327
+ gke_accelerator='tpu7x',
328
+ machine_type='tpu7x-standard-1t',
329
+ supported_topologies=['1x1x1'],
330
+ requires_workload_policy=True,
331
+ supports_sub_slicing=False,
247
332
  ),
248
333
  **get_tpu_system_characteristics_map(
249
- 'tpu7x',
250
- 2,
251
- 'tpu7x',
252
- 'tpu7x-standard-4t',
253
- [
334
+ prefix='tpu7x',
335
+ tensorcores_per_chip=2,
336
+ gke_accelerator='tpu7x',
337
+ machine_type='tpu7x-standard-4t',
338
+ requires_workload_policy=True,
339
+ supports_sub_slicing=False,
340
+ supported_topologies=[
254
341
  '12x12x12',
255
342
  '12x12x16',
256
343
  '12x12x20',
@@ -352,21 +439,36 @@ UserFacingNameToSystemCharacteristics = {
352
439
  ],
353
440
  ),
354
441
  **get_tpu_system_characteristics_map(
355
- 'v6e', 1, 'tpu-v6e-slice', 'ct6e-standard-1t', ['1x1']
442
+ prefix='v6e',
443
+ tensorcores_per_chip=1,
444
+ gke_accelerator='tpu-v6e-slice',
445
+ machine_type='ct6e-standard-1t',
446
+ supports_sub_slicing=False,
447
+ supported_topologies=['1x1'],
356
448
  ),
357
449
  **get_tpu_system_characteristics_map(
358
- 'v6e',
359
- 1,
360
- 'tpu-v6e-slice',
361
- 'ct6e-standard-4t',
362
- ['2x2', '2x4', '4x4', '4x8', '8x8', '8x16', '16x16'],
450
+ prefix='v6e',
451
+ tensorcores_per_chip=1,
452
+ gke_accelerator='tpu-v6e-slice',
453
+ machine_type='ct6e-standard-4t',
454
+ supports_sub_slicing=True,
455
+ supported_topologies=[
456
+ '2x2',
457
+ '2x4',
458
+ '4x4',
459
+ '4x8',
460
+ '8x8',
461
+ '8x16',
462
+ '16x16',
463
+ ],
363
464
  ),
364
465
  **get_tpu_system_characteristics_map(
365
- 'v5p',
366
- 2,
367
- 'tpu-v5p-slice',
368
- 'ct5p-hightpu-4t',
369
- [
466
+ prefix='v5p',
467
+ tensorcores_per_chip=2,
468
+ gke_accelerator='tpu-v5p-slice',
469
+ machine_type='ct5p-hightpu-4t',
470
+ supports_sub_slicing=False,
471
+ supported_topologies=[
370
472
  '2x2x1',
371
473
  '2x2x2',
372
474
  '2x2x4',
@@ -466,18 +568,20 @@ UserFacingNameToSystemCharacteristics = {
466
568
  ],
467
569
  ),
468
570
  **get_tpu_system_characteristics_map(
469
- 'v5litepod',
470
- 1,
471
- 'tpu-v5-lite-podslice',
472
- 'ct5lp-hightpu-4t',
473
- ['2x4', '4x4', '4x8', '8x8', '8x16', '16x16'],
571
+ prefix='v5litepod',
572
+ tensorcores_per_chip=1,
573
+ gke_accelerator='tpu-v5-lite-podslice',
574
+ machine_type='ct5lp-hightpu-4t',
575
+ supports_sub_slicing=False,
576
+ supported_topologies=['2x4', '4x4', '4x8', '8x8', '8x16', '16x16'],
474
577
  ),
475
578
  **get_tpu_system_characteristics_map(
476
- 'v4',
477
- 2,
478
- 'tpu-v4-podslice',
479
- 'ct4p-hightpu-4t',
480
- [
579
+ prefix='v4',
580
+ tensorcores_per_chip=2,
581
+ gke_accelerator='tpu-v4-podslice',
582
+ machine_type='ct4p-hightpu-4t',
583
+ supports_sub_slicing=False,
584
+ supported_topologies=[
481
585
  '2x2x1',
482
586
  '2x2x2',
483
587
  '2x2x4',
@@ -496,131 +600,145 @@ UserFacingNameToSystemCharacteristics = {
496
600
  # There are no chips in CPUs.
497
601
  # m1-megamem-#vCPUs-#VMs
498
602
  'm1-megamem-96-1': SystemCharacteristics(
499
- 'N/A',
500
- 1,
501
- 'N/A',
502
- 'm1-megamem-96',
503
- 96,
504
- AcceleratorType['CPU'],
505
- 'm1-megamem-96-1',
603
+ topology='N/A',
604
+ vms_per_slice=1,
605
+ gke_accelerator='N/A',
606
+ gce_machine_type='m1-megamem-96',
607
+ chips_per_vm=96,
608
+ accelerator_type=AcceleratorType['CPU'],
609
+ device_type='m1-megamem-96-1',
610
+ supports_sub_slicing=False,
506
611
  ),
507
612
  # n2-standard-#vCPUs-#VMs
508
613
  'n2-standard-64-1': SystemCharacteristics(
509
- 'N/A',
510
- 1,
511
- 'N/A',
512
- 'n2-standard-64',
513
- 64,
514
- AcceleratorType['CPU'],
515
- 'n2-standard-64-1',
614
+ topology='N/A',
615
+ vms_per_slice=1,
616
+ gke_accelerator='N/A',
617
+ gce_machine_type='n2-standard-64',
618
+ chips_per_vm=64,
619
+ accelerator_type=AcceleratorType['CPU'],
620
+ device_type='n2-standard-64-1',
621
+ supports_sub_slicing=False,
516
622
  ),
517
623
  'n2-standard-32-1': SystemCharacteristics(
518
- 'N/A',
519
- 1,
520
- 'N/A',
521
- 'n2-standard-32',
522
- 32,
523
- AcceleratorType['CPU'],
524
- 'n2-standard-32-1',
624
+ topology='N/A',
625
+ vms_per_slice=1,
626
+ gke_accelerator='N/A',
627
+ gce_machine_type='n2-standard-32',
628
+ chips_per_vm=32,
629
+ accelerator_type=AcceleratorType['CPU'],
630
+ device_type='n2-standard-32-1',
631
+ supports_sub_slicing=False,
525
632
  ),
526
633
  'n2-standard-32-2': SystemCharacteristics(
527
- 'N/A',
528
- 2,
529
- 'N/A',
530
- 'n2-standard-32',
531
- 32,
532
- AcceleratorType['CPU'],
533
- 'n2-standard-32-2',
634
+ topology='N/A',
635
+ vms_per_slice=2,
636
+ gke_accelerator='N/A',
637
+ gce_machine_type='n2-standard-32',
638
+ chips_per_vm=32,
639
+ accelerator_type=AcceleratorType['CPU'],
640
+ device_type='n2-standard-32-2',
641
+ supports_sub_slicing=False,
534
642
  ),
535
643
  'n2-standard-32-4': SystemCharacteristics(
536
- 'N/A',
537
- 4,
538
- 'N/A',
539
- 'n2-standard-32',
540
- 32,
541
- AcceleratorType['CPU'],
542
- 'n2-standard-32-4',
644
+ topology='N/A',
645
+ vms_per_slice=4,
646
+ gke_accelerator='N/A',
647
+ gce_machine_type='n2-standard-32',
648
+ chips_per_vm=32,
649
+ accelerator_type=AcceleratorType['CPU'],
650
+ device_type='n2-standard-32-4',
651
+ supports_sub_slicing=False,
543
652
  ),
544
653
  'n2-standard-32-8': SystemCharacteristics(
545
- 'N/A',
546
- 8,
547
- 'N/A',
548
- 'n2-standard-32',
549
- 32,
550
- AcceleratorType['CPU'],
551
- 'n2-standard-32-8',
654
+ topology='N/A',
655
+ vms_per_slice=8,
656
+ gke_accelerator='N/A',
657
+ gce_machine_type='n2-standard-32',
658
+ chips_per_vm=32,
659
+ accelerator_type=AcceleratorType['CPU'],
660
+ device_type='n2-standard-32-8',
661
+ supports_sub_slicing=False,
552
662
  ),
553
663
  'n2-standard-32-16': SystemCharacteristics(
554
- 'N/A',
555
- 16,
556
- 'N/A',
557
- 'n2-standard-32',
558
- 32,
559
- AcceleratorType['CPU'],
560
- 'n2-standard-32-16',
664
+ topology='N/A',
665
+ vms_per_slice=16,
666
+ gke_accelerator='N/A',
667
+ gce_machine_type='n2-standard-32',
668
+ chips_per_vm=32,
669
+ accelerator_type=AcceleratorType['CPU'],
670
+ device_type='n2-standard-32-16',
671
+ supports_sub_slicing=False,
561
672
  ),
562
673
  'n2-standard-32-32': SystemCharacteristics(
563
- 'N/A',
564
- 32,
565
- 'N/A',
566
- 'n2-standard-32',
567
- 32,
568
- AcceleratorType['CPU'],
569
- 'n2-standard-32-32',
674
+ topology='N/A',
675
+ vms_per_slice=32,
676
+ gke_accelerator='N/A',
677
+ gce_machine_type='n2-standard-32',
678
+ chips_per_vm=32,
679
+ accelerator_type=AcceleratorType['CPU'],
680
+ device_type='n2-standard-32-32',
681
+ supports_sub_slicing=False,
570
682
  ),
571
683
  'n2-standard-32-64': SystemCharacteristics(
572
- 'N/A',
573
- 64,
574
- 'N/A',
575
- 'n2-standard-32',
576
- 32,
577
- AcceleratorType['CPU'],
578
- 'n2-standard-32-64',
684
+ topology='N/A',
685
+ vms_per_slice=64,
686
+ gke_accelerator='N/A',
687
+ gce_machine_type='n2-standard-32',
688
+ chips_per_vm=32,
689
+ accelerator_type=AcceleratorType['CPU'],
690
+ device_type='n2-standard-32-64',
691
+ supports_sub_slicing=False,
579
692
  ),
580
693
  'n2-standard-32-128': SystemCharacteristics(
581
- 'N/A',
582
- 128,
583
- 'N/A',
584
- 'n2-standard-32',
585
- 32,
586
- AcceleratorType['CPU'],
587
- 'n2-standard-32-128',
694
+ topology='N/A',
695
+ vms_per_slice=128,
696
+ gke_accelerator='N/A',
697
+ gce_machine_type='n2-standard-32',
698
+ chips_per_vm=32,
699
+ accelerator_type=AcceleratorType['CPU'],
700
+ device_type='n2-standard-32-128',
701
+ supports_sub_slicing=False,
588
702
  ),
589
703
  'n2-standard-32-256': SystemCharacteristics(
590
- 'N/A',
591
- 256,
592
- 'N/A',
593
- 'n2-standard-32',
594
- 32,
595
- AcceleratorType['CPU'],
596
- 'n2-standard-32-256',
704
+ topology='N/A',
705
+ vms_per_slice=256,
706
+ gke_accelerator='N/A',
707
+ gce_machine_type='n2-standard-32',
708
+ chips_per_vm=32,
709
+ accelerator_type=AcceleratorType['CPU'],
710
+ device_type='n2-standard-32-256',
711
+ supports_sub_slicing=False,
597
712
  ),
598
713
  'n2-standard-32-512': SystemCharacteristics(
599
- 'N/A',
600
- 512,
601
- 'N/A',
602
- 'n2-standard-32',
603
- 32,
604
- AcceleratorType['CPU'],
605
- 'n2-standard-32-512',
714
+ topology='N/A',
715
+ vms_per_slice=512,
716
+ gke_accelerator='N/A',
717
+ gce_machine_type='n2-standard-32',
718
+ chips_per_vm=32,
719
+ accelerator_type=AcceleratorType['CPU'],
720
+ device_type='n2-standard-32-512',
721
+ supports_sub_slicing=False,
606
722
  ),
607
723
  'n2-standard-32-1024': SystemCharacteristics(
608
- 'N/A',
609
- 1024,
610
- 'N/A',
611
- 'n2-standard-32',
612
- 32,
613
- AcceleratorType['CPU'],
614
- 'n2-standard-32-1024',
724
+ topology='N/A',
725
+ vms_per_slice=1024,
726
+ gke_accelerator='N/A',
727
+ gce_machine_type='n2-standard-32',
728
+ chips_per_vm=32,
729
+ accelerator_type=AcceleratorType['CPU'],
730
+ device_type='n2-standard-32-1024',
731
+ supports_sub_slicing=False,
615
732
  ),
616
733
  'n2-standard-32-2048': SystemCharacteristics(
617
- 'N/A',
618
- 2048,
619
- 'N/A',
620
- 'n2-standard-32',
621
- 32,
622
- AcceleratorType['CPU'],
623
- 'n2-standard-32-2048',
734
+ topology='N/A',
735
+ vms_per_slice=2048,
736
+ gke_accelerator='N/A',
737
+ gce_machine_type='n2-standard-32',
738
+ chips_per_vm=32,
739
+ accelerator_type=AcceleratorType['CPU'],
740
+ device_type='n2-standard-32-2048',
741
+ supports_sub_slicing=False,
624
742
  ),
625
743
  }
626
744
  """ If you modify UserFacingNameToSystemCharacteristics you should also modify