xpk 0.7.2__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. xpk/commands/batch.py +19 -13
  2. xpk/commands/cluster.py +240 -71
  3. xpk/commands/cluster_gcluster.py +22 -5
  4. xpk/commands/common.py +33 -1
  5. xpk/commands/info.py +2 -4
  6. xpk/commands/job.py +7 -8
  7. xpk/commands/kjob_common.py +30 -18
  8. xpk/commands/run.py +17 -12
  9. xpk/commands/shell.py +3 -4
  10. xpk/commands/storage.py +75 -19
  11. xpk/commands/workload.py +161 -324
  12. xpk/core/blueprint/blueprint_definitions.py +2 -0
  13. xpk/core/blueprint/blueprint_generator.py +335 -45
  14. xpk/core/capacity.py +1 -0
  15. xpk/core/cluster.py +193 -12
  16. xpk/core/config.py +3 -1
  17. xpk/core/docker_manager.py +1 -1
  18. xpk/core/docker_resources.py +9 -21
  19. xpk/core/filestore.py +5 -1
  20. xpk/core/gcsfuse.py +27 -6
  21. xpk/core/kjob.py +66 -20
  22. xpk/core/kueue.py +30 -0
  23. xpk/core/mtc.py +195 -0
  24. xpk/core/nap.py +4 -0
  25. xpk/core/network.py +34 -22
  26. xpk/core/nodepool.py +28 -26
  27. xpk/core/pathways.py +165 -210
  28. xpk/core/resources.py +21 -0
  29. xpk/core/scheduling.py +36 -0
  30. xpk/core/storage.py +66 -12
  31. xpk/core/system_characteristics.py +9 -0
  32. xpk/core/workload.py +28 -83
  33. xpk/core/workload_decorators/rdma_decorator.py +11 -15
  34. xpk/core/workload_decorators/storage_decorator.py +8 -3
  35. xpk/core/workload_decorators/tcpx_decorator.py +179 -0
  36. xpk/core/workload_decorators/tcpxo_decorator.py +17 -16
  37. xpk/parser/cluster.py +574 -381
  38. xpk/parser/storage.py +25 -5
  39. xpk/parser/workload.py +59 -31
  40. xpk/utils/kubectl.py +4 -1
  41. {xpk-0.7.2.dist-info → xpk-0.9.0.dist-info}/METADATA +192 -93
  42. {xpk-0.7.2.dist-info → xpk-0.9.0.dist-info}/RECORD +46 -44
  43. {xpk-0.7.2.dist-info → xpk-0.9.0.dist-info}/WHEEL +1 -1
  44. {xpk-0.7.2.dist-info → xpk-0.9.0.dist-info}/entry_points.txt +0 -0
  45. {xpk-0.7.2.dist-info → xpk-0.9.0.dist-info}/licenses/LICENSE +0 -0
  46. {xpk-0.7.2.dist-info → xpk-0.9.0.dist-info}/top_level.txt +0 -0
@@ -22,32 +22,34 @@ from ruamel import yaml
22
22
 
23
23
  from ...utils.console import xpk_exit, xpk_print
24
24
  from ...utils.file import ensure_directory_exists
25
- from ..capacity import H100_MEGA_DEVICE_TYPE, H200_DEVICE_TYPE, CapacityType
25
+ from ..capacity import (
26
+ B200_DEVICE_TYPE,
27
+ H100_MEGA_DEVICE_TYPE,
28
+ H200_DEVICE_TYPE,
29
+ CapacityType,
30
+ )
26
31
  from ..system_characteristics import get_system_characteristics_by_device_type
27
32
  from .blueprint_definitions import Blueprint, DeploymentGroup, DeploymentModule
28
33
 
34
+
29
35
  yaml = yaml.YAML()
30
36
 
31
37
  a3mega_device_type = H100_MEGA_DEVICE_TYPE
32
38
  a3ultra_device_type = H200_DEVICE_TYPE
33
- supported_device_types = {a3mega_device_type, a3ultra_device_type}
39
+ a4_device_type = B200_DEVICE_TYPE
40
+ supported_device_types = {
41
+ a3mega_device_type,
42
+ a3ultra_device_type,
43
+ a4_device_type,
44
+ }
34
45
  blueprint_dependencies_dir = {
35
46
  a3mega_device_type: "src/xpk/blueprints/a3mega",
36
47
  a3ultra_device_type: "src/xpk/blueprints/a3ultra",
48
+ a4_device_type: "src/xpk/blueprints/a4",
37
49
  }
38
50
 
39
51
  cluster_toolkit_url = "github.com/GoogleCloudPlatform/cluster-toolkit"
40
- cluster_toolkit_version = "v1.45.1"
41
-
42
-
43
- def get_subnetworks_for_a3mega(cluster_name: str) -> list[str]:
44
- return [f"{cluster_name}-gpunet-{i}-subnet" for i in range(8)]
45
-
46
-
47
- def get_subnetworks_for_a3ultra(cluster_name: str) -> list[str]:
48
- return [f"{cluster_name}-sub-1"] + [
49
- f"{cluster_name}-rdma-sub-{i}" for i in range(8)
50
- ]
52
+ cluster_toolkit_version = "v1.48.0"
51
53
 
52
54
 
53
55
  class BlueprintGeneratorOutput:
@@ -157,6 +159,11 @@ class BlueprintGenerator:
157
159
  "total_min_nodes": system_node_pool_min_node_count,
158
160
  "total_max_nodes": 1000,
159
161
  },
162
+ "k8s_network_names": {
163
+ "gvnic_prefix": f"{cluster_name}-gpunet-",
164
+ "gvnic_postfix": "-subnet",
165
+ "gvnic_start_index": 0,
166
+ },
160
167
  },
161
168
  outputs=["instructions"],
162
169
  )
@@ -173,13 +180,17 @@ class BlueprintGenerator:
173
180
  a3_megagpu_pool_0 = DeploymentModule(
174
181
  id="a3_megagpu_pool_0",
175
182
  source="modules/compute/gke-node-pool",
176
- use=["gke_cluster", gpu_subnets_name, "group_placement_0"],
183
+ use=["gke_cluster", gpu_subnets_name],
177
184
  settings={
178
185
  "name": f"{cluster_name}-a3-megagpu-pool-0",
179
186
  "machine_type": system.gce_machine_type,
180
187
  "static_node_count": num_nodes,
181
188
  "zones": [zone],
182
- "host_maintenance_interval": "PERIODIC",
189
+ "host_maintenance_interval": (
190
+ None
191
+ if capacity_type == CapacityType.RESERVATION
192
+ else "PERIODIC"
193
+ ),
183
194
  "reservation_affinity": self._getblock_reservation_affinity(
184
195
  reservation
185
196
  ),
@@ -190,6 +201,9 @@ class BlueprintGenerator:
190
201
  },
191
202
  outputs=["instructions"],
192
203
  )
204
+
205
+ set_placement_policy = capacity_type != CapacityType.SPOT
206
+ tas_name = "topologyName: 'gke-default'" if set_placement_policy else ""
193
207
  num_chips = num_nodes * system.chips_per_vm
194
208
  workload = DeploymentModule(
195
209
  id="workload_component_install",
@@ -200,7 +214,10 @@ class BlueprintGenerator:
200
214
  "install": True,
201
215
  "version": "v0.10.0", # TAS feature-gates is enabled in CT
202
216
  "config_path": f'$(ghpc_stage("{blueprint_name}"))/kueue-xpk-configuration.yaml.tftpl',
203
- "config_template_vars": {"num_chips": f"{num_chips}"},
217
+ "config_template_vars": {
218
+ "num_chips": num_chips,
219
+ "tas_name": tas_name,
220
+ },
204
221
  },
205
222
  "jobset": {"install": True, "version": "v0.7.2"},
206
223
  "apply_manifests": [{
@@ -236,15 +253,19 @@ class BlueprintGenerator:
236
253
  primary_vpc,
237
254
  gpunets,
238
255
  gke_cluster,
239
- group_placement_0,
240
256
  a3_megagpu_pool_0,
241
257
  workload,
242
258
  workload_configmap,
243
259
  ],
244
260
  )
261
+
262
+ if set_placement_policy:
263
+ a3_megagpu_pool_0.use.append(group_placement_0.id)
264
+ primary_group.modules.append(group_placement_0)
265
+
245
266
  a3_mega_blueprint = Blueprint(
246
267
  terraform_backend_defaults=self._getblock_terraform_backend(
247
- gcs_bucket, prefix
268
+ gcs_bucket, cluster_name, prefix
248
269
  ),
249
270
  blueprint_name=blueprint_name,
250
271
  toolkit_modules_url=cluster_toolkit_url,
@@ -261,8 +282,8 @@ class BlueprintGenerator:
261
282
  blueprint_file_path = self._save_blueprint_to_file(
262
283
  blueprint_name, a3_mega_blueprint, prefix
263
284
  )
264
- blueprint_dependencies = self._get_a3_mega_blueprint_dependencies(
265
- blueprint_name, prefix
285
+ blueprint_dependencies = self._get_blueprint_dependencies(
286
+ a3mega_device_type, blueprint_name, prefix
266
287
  )
267
288
  xpk_print(f"Blueprint file path: {blueprint_file_path}")
268
289
  xpk_print(
@@ -331,7 +352,7 @@ class BlueprintGenerator:
331
352
  )
332
353
  ml_gke = Blueprint(
333
354
  terraform_backend_defaults=self._getblock_terraform_backend(
334
- gcs_bucket, prefix
355
+ gcs_bucket, cluster_name, prefix
335
356
  ),
336
357
  blueprint_name=blueprint_name,
337
358
  toolkit_modules_url=cluster_toolkit_url,
@@ -490,6 +511,13 @@ class BlueprintGenerator:
490
511
  " alias_ip_range=[]}],"
491
512
  f" {cluster_name}-rdma-net.subnetwork_interfaces_gke))"
492
513
  ),
514
+ "k8s_network_names": {
515
+ "rdma_prefix": f"{cluster_name}-rdma-sub-",
516
+ "rdma_start_index": 0,
517
+ "rdma_postfix": "",
518
+ "gvnic_prefix": f"{cluster_name}-sub-",
519
+ "gvnic_start_index": 1,
520
+ },
493
521
  },
494
522
  outputs=["instructions"],
495
523
  )
@@ -546,7 +574,7 @@ class BlueprintGenerator:
546
574
  "install": True,
547
575
  "version": "v0.10.0", # TAS feature-gates is enabled in CT
548
576
  "config_path": f'$(ghpc_stage("{blueprint_name}"))/kueue-xpk-configuration.yaml.tftpl',
549
- "config_template_vars": {"num_chips": f"{num_chips}"},
577
+ "config_template_vars": {"num_chips": num_chips},
550
578
  },
551
579
  "jobset": {"install": True, "version": "v0.7.2"},
552
580
  "apply_manifests": [
@@ -597,7 +625,7 @@ class BlueprintGenerator:
597
625
  )
598
626
  a3_ultra_blueprint = Blueprint(
599
627
  terraform_backend_defaults=self._getblock_terraform_backend(
600
- gcs_bucket, prefix
628
+ gcs_bucket, cluster_name, prefix
601
629
  ),
602
630
  blueprint_name=blueprint_name,
603
631
  toolkit_modules_url=cluster_toolkit_url,
@@ -614,8 +642,276 @@ class BlueprintGenerator:
614
642
  blueprint_file_path = self._save_blueprint_to_file(
615
643
  blueprint_name, a3_ultra_blueprint, prefix
616
644
  )
617
- blueprint_dependencies = self._get_a3_ultra_blueprint_dependencies(
618
- blueprint_name, prefix
645
+ blueprint_dependencies = self._get_blueprint_dependencies(
646
+ a3ultra_device_type, blueprint_name, prefix
647
+ )
648
+ return BlueprintGeneratorOutput(
649
+ blueprint_file=blueprint_file_path,
650
+ blueprint_dependencies=blueprint_dependencies,
651
+ )
652
+
653
+ def generate_a4_blueprint(
654
+ self,
655
+ project_id: str,
656
+ cluster_name: str,
657
+ blueprint_name: str,
658
+ region: str,
659
+ zone: str,
660
+ auth_cidr: str,
661
+ system_node_pool_machine_type: str,
662
+ reservation: Optional[str | None] = None,
663
+ gcs_bucket: Optional[str | None] = None,
664
+ num_nodes: int = 2,
665
+ prefix: str = "",
666
+ system_node_pool_min_node_count: int = 2,
667
+ capacity_type: CapacityType = CapacityType.ON_DEMAND,
668
+ ) -> BlueprintGeneratorOutput:
669
+ """Create A4 blueprint.
670
+
671
+ Args:
672
+ Returns:
673
+ - Blueprint representing cluster toolkit blueprint
674
+ """
675
+ nccl_installer_path = (
676
+ f'$(ghpc_stage("{blueprint_name}"))/nccl-rdma-installer-a4.yaml'
677
+ )
678
+
679
+ net_0_id = f"{cluster_name}-net-0"
680
+ gpu_net_0 = DeploymentModule(
681
+ id=net_0_id,
682
+ source="modules/network/vpc",
683
+ settings={
684
+ "network_name": f"{cluster_name}-net-0",
685
+ "mtu": 8896,
686
+ "subnetworks": [{
687
+ "subnet_name": f"{cluster_name}-sub-0",
688
+ "subnet_region": region,
689
+ "subnet_ip": "192.168.0.0/18",
690
+ }],
691
+ "secondary_ranges_list": [{
692
+ "subnetwork_name": f"{cluster_name}-sub-0",
693
+ "ranges": [
694
+ {"range_name": "pods", "ip_cidr_range": "10.4.0.0/14"},
695
+ {"range_name": "services", "ip_cidr_range": "10.0.32.0/20"},
696
+ ],
697
+ }],
698
+ "firewall_rules": [{
699
+ "name": f"{cluster_name}-internal-0",
700
+ "ranges": ["192.168.0.0/16"],
701
+ "allow": [
702
+ {"protocol": "tcp", "ports": ["0-65535"]},
703
+ {"protocol": "udp", "ports": ["0-65535"]},
704
+ {"protocol": "icmp"},
705
+ ],
706
+ }],
707
+ },
708
+ )
709
+ net_1_id = f"{cluster_name}-net-1"
710
+ gpu_net_1 = DeploymentModule(
711
+ id=net_1_id,
712
+ source="modules/network/vpc",
713
+ settings={
714
+ "network_name": f"{cluster_name}-net-1",
715
+ "mtu": 8896,
716
+ "subnetworks": [{
717
+ "subnet_name": f"{cluster_name}-sub-1",
718
+ "subnet_region": region,
719
+ "subnet_ip": "192.168.64.0/18",
720
+ }],
721
+ "firewall_rules": [{
722
+ "name": f"{cluster_name}-internal-1",
723
+ "ranges": ["192.168.0.0/16"],
724
+ "allow": [
725
+ {"protocol": "tcp", "ports": ["0-65535"]},
726
+ {"protocol": "udp", "ports": ["0-65535"]},
727
+ {"protocol": "icmp"},
728
+ ],
729
+ }],
730
+ },
731
+ )
732
+ rma_net_id = f"{cluster_name}-rdma-net"
733
+ rma_net = DeploymentModule(
734
+ id=rma_net_id,
735
+ source="modules/network/gpu-rdma-vpc",
736
+ settings={
737
+ "network_name": f"{cluster_name}-rdma-net",
738
+ "mtu": 8896,
739
+ "network_profile": f"https://www.googleapis.com/compute/beta/projects/{project_id}/global/networkProfiles/{zone}-vpc-roce",
740
+ "network_routing_mode": "REGIONAL",
741
+ "subnetworks_template": {
742
+ "name_prefix": f"{cluster_name}-rdma-sub",
743
+ "count": 8,
744
+ "ip_range": "192.168.128.0/18",
745
+ "region": region,
746
+ },
747
+ },
748
+ )
749
+ cluster_id = f"{cluster_name}-a4-cluster"
750
+ a4_cluster = DeploymentModule(
751
+ id=cluster_id,
752
+ source="modules/scheduler/gke-cluster",
753
+ use=[net_0_id],
754
+ settings={
755
+ "system_node_pool_machine_type": system_node_pool_machine_type,
756
+ "system_node_pool_node_count": {
757
+ "total_min_nodes": system_node_pool_min_node_count,
758
+ "total_max_nodes": 1000,
759
+ },
760
+ "prefix_with_deployment_name": False,
761
+ "name_suffix": cluster_name,
762
+ "enable_dcgm_monitoring": True,
763
+ "enable_gcsfuse_csi": True,
764
+ "enable_private_endpoint": False,
765
+ "master_authorized_networks": [{
766
+ "cidr_block": auth_cidr,
767
+ "display_name": "kubectl-access-network",
768
+ }],
769
+ "additional_networks": (
770
+ f"$(concat([{{network={cluster_name}-net-1.network_name,"
771
+ f" subnetwork={cluster_name}-net-1.subnetwork_name,"
772
+ f' subnetwork_project="{project_id}", nic_type="GVNIC",'
773
+ " queue_count=null, network_ip=null, stack_type=null,"
774
+ " access_config=[{nat_ip=null, public_ptr_domain_name=null,"
775
+ " network_tier=null}], ipv6_access_config=[],"
776
+ " alias_ip_range=[]}],"
777
+ f" {cluster_name}-rdma-net.subnetwork_interfaces_gke))"
778
+ ),
779
+ "version_prefix": "1.32.",
780
+ "release_channel": "RAPID",
781
+ "maintenance_exclusions": [{
782
+ "name": "no-minor-or-node-upgrades-indefinite",
783
+ "start_time": "2024-12-01T00:00:00Z",
784
+ "end_time": "2025-12-22T00:00:00Z",
785
+ "exclusion_scope": "NO_MINOR_OR_NODE_UPGRADES",
786
+ }],
787
+ },
788
+ outputs=["instructions"],
789
+ )
790
+ system, _ = get_system_characteristics_by_device_type(a4_device_type)
791
+ if system is None:
792
+ xpk_print(
793
+ "Error: Could not retrieve system characteristics for"
794
+ f" {a4_device_type} device_type."
795
+ )
796
+ xpk_exit(1)
797
+ gpu_pool = DeploymentModule(
798
+ id=f"{cluster_name}-a4-pool",
799
+ source="modules/compute/gke-node-pool",
800
+ use=[cluster_id],
801
+ settings={
802
+ "machine_type": system.gce_machine_type,
803
+ "auto_upgrade": True,
804
+ "zones": [zone],
805
+ "disk_type": "hyperdisk-balanced",
806
+ "static_node_count": num_nodes,
807
+ "local_ssd_count_ephemeral_storage": 32,
808
+ "spot": capacity_type == CapacityType.SPOT,
809
+ "reservation_affinity": self._getblock_reservation_affinity(
810
+ reservation
811
+ ),
812
+ "max_pods_per_node": 32,
813
+ "guest_accelerator": [{
814
+ "type": system.gke_accelerator,
815
+ "count": 8,
816
+ "gpu_driver_installation_config": {
817
+ "gpu_driver_version": "LATEST"
818
+ },
819
+ }],
820
+ "additional_networks": (
821
+ f"$(concat([{{network={cluster_name}-net-1.network_name,"
822
+ f" subnetwork={cluster_name}-net-1.subnetwork_name,"
823
+ f' subnetwork_project="{project_id}", nic_type="GVNIC",'
824
+ " queue_count=null, network_ip=null, stack_type=null,"
825
+ " access_config=[{nat_ip=null, public_ptr_domain_name=null,"
826
+ " network_tier=null}], ipv6_access_config=[],"
827
+ " alias_ip_range=[]}],"
828
+ f" {cluster_name}-rdma-net.subnetwork_interfaces_gke))"
829
+ ),
830
+ },
831
+ outputs=["instructions"],
832
+ )
833
+
834
+ num_chips = num_nodes * system.chips_per_vm
835
+ workload_manager_install_id = "workload-manager-install"
836
+ workload_manager_install = DeploymentModule(
837
+ id=workload_manager_install_id,
838
+ source="modules/management/kubectl-apply",
839
+ use=[cluster_id],
840
+ settings={
841
+ "kueue": {
842
+ "install": True,
843
+ "version": "v0.10.0", # TAS feature-gates is enabled in CT
844
+ "config_path": f'$(ghpc_stage("{blueprint_name}"))/kueue-xpk-configuration.yaml.tftpl',
845
+ "config_template_vars": {"num_chips": num_chips},
846
+ },
847
+ "jobset": {"install": True, "version": "v0.7.2"},
848
+ "apply_manifests": [
849
+ {"source": nccl_installer_path},
850
+ {
851
+ "source": (
852
+ f'$(ghpc_stage("{blueprint_name}"))/storage_crd.yaml'
853
+ )
854
+ },
855
+ ],
856
+ },
857
+ )
858
+
859
+ workload_configmap = DeploymentModule(
860
+ id="workload_configmap",
861
+ source="modules/management/kubectl-apply",
862
+ use=[cluster_id],
863
+ settings={
864
+ "apply_manifests": [{
865
+ "source": (
866
+ f'$(ghpc_stage("{blueprint_name}"))/config-map.yaml.tftpl'
867
+ ),
868
+ "template_vars": {
869
+ "resource_config_name": (
870
+ f"{cluster_name}-resources-configmap"
871
+ ),
872
+ "num_nodes": f"{num_nodes}",
873
+ "cluster_config_name": f"{cluster_name}-metadata-configmap",
874
+ "capacity_type": f"{capacity_type.value}",
875
+ "reservation": f"{reservation}",
876
+ },
877
+ }]
878
+ },
879
+ )
880
+
881
+ primary_group = DeploymentGroup(
882
+ group="primary",
883
+ modules=[
884
+ gpu_net_0,
885
+ gpu_net_1,
886
+ rma_net,
887
+ a4_cluster,
888
+ gpu_pool,
889
+ workload_manager_install,
890
+ workload_configmap,
891
+ ],
892
+ )
893
+
894
+ a4_blueprint = Blueprint(
895
+ terraform_backend_defaults=self._getblock_terraform_backend(
896
+ gcs_bucket, cluster_name, prefix
897
+ ),
898
+ blueprint_name=blueprint_name,
899
+ toolkit_modules_url=cluster_toolkit_url,
900
+ toolkit_modules_version=cluster_toolkit_version,
901
+ deployment_groups=[primary_group],
902
+ vars={
903
+ "project_id": project_id,
904
+ "deployment_name": blueprint_name,
905
+ "region": region,
906
+ "zone": zone,
907
+ },
908
+ )
909
+
910
+ blueprint_file_path = self._save_blueprint_to_file(
911
+ blueprint_name, a4_blueprint, prefix
912
+ )
913
+ blueprint_dependencies = self._get_blueprint_dependencies(
914
+ a4_device_type, blueprint_name, prefix
619
915
  )
620
916
  return BlueprintGeneratorOutput(
621
917
  blueprint_file=blueprint_file_path,
@@ -638,7 +934,7 @@ class BlueprintGenerator:
638
934
  )
639
935
 
640
936
  def _getblock_terraform_backend(
641
- self, gcs_bucket: str, prefix: str = ""
937
+ self, gcs_bucket: str, cluster_name: str, prefix: str = ""
642
938
  ) -> dict | None:
643
939
  if gcs_bucket is None:
644
940
  return None
@@ -646,12 +942,19 @@ class BlueprintGenerator:
646
942
  "type": "gcs",
647
943
  "configuration": {
648
944
  "bucket": gcs_bucket,
649
- "prefix": self._get_terraforrm_backend_full_prefix(prefix),
945
+ "prefix": self._get_terraforrm_backend_full_prefix(
946
+ cluster_name, prefix
947
+ ),
650
948
  },
651
949
  }
652
950
 
653
- def _get_terraforrm_backend_full_prefix(self, prefix: str = "") -> str:
654
- return f"xpk_terraform_state/{prefix}/tfstate/"
951
+ def _get_terraforrm_backend_full_prefix(
952
+ self, cluster_name: str, prefix: str = ""
953
+ ) -> str:
954
+ full_prefix = "xpk_terraform_state"
955
+ if prefix:
956
+ full_prefix += f"/{prefix}"
957
+ return f"{full_prefix}/{cluster_name}/"
655
958
 
656
959
  def _save_blueprint_to_file(
657
960
  self, blueprint_name: str, xpk_blueprint: Blueprint, prefix: str = ""
@@ -676,27 +979,14 @@ class BlueprintGenerator:
676
979
  blueprint_path = self._get_blueprint_path(blueprint_name, prefix)
677
980
  return os.path.exists(blueprint_path)
678
981
 
679
- def _get_a3_mega_blueprint_dependencies(
680
- self, blueprint_name: str, prefix: str = ""
681
- ) -> str:
682
- deployment_files_path = os.path.join(
683
- self._get_storage_path(prefix), blueprint_name
684
- )
685
- shutil.copytree(
686
- blueprint_dependencies_dir[a3mega_device_type],
687
- deployment_files_path,
688
- dirs_exist_ok=True,
689
- )
690
- return deployment_files_path
691
-
692
- def _get_a3_ultra_blueprint_dependencies(
693
- self, blueprint_name: str, prefix: str = ""
982
+ def _get_blueprint_dependencies(
983
+ self, device_type: str, blueprint_name: str, prefix: str = ""
694
984
  ) -> str:
695
985
  deployment_files_path = os.path.join(
696
986
  self._get_storage_path(prefix), blueprint_name
697
987
  )
698
988
  shutil.copytree(
699
- blueprint_dependencies_dir[a3ultra_device_type],
989
+ blueprint_dependencies_dir[device_type],
700
990
  deployment_files_path,
701
991
  dirs_exist_ok=True,
702
992
  )
xpk/core/capacity.py CHANGED
@@ -27,6 +27,7 @@ CAPACITY_TYPE_CONFIG_KEY = 'capacity_type'
27
27
  H100_DEVICE_TYPE = 'h100-80gb-8'
28
28
  H100_MEGA_DEVICE_TYPE = 'h100-mega-80gb-8'
29
29
  H200_DEVICE_TYPE = 'h200-141gb-8'
30
+ B200_DEVICE_TYPE = 'b200-8'
30
31
  RESERVATION_CONFIG_KEY = 'reservation_id'
31
32
 
32
33