xpk 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. xpk/commands/cluster.py +29 -30
  2. xpk/commands/cluster_gcluster.py +19 -14
  3. xpk/commands/cluster_test.py +1 -21
  4. xpk/commands/common.py +39 -6
  5. xpk/commands/common_test.py +170 -0
  6. xpk/commands/info.py +9 -5
  7. xpk/commands/inspector.py +33 -4
  8. xpk/commands/inspector_test.py +142 -0
  9. xpk/commands/workload.py +35 -17
  10. xpk/commands/workload_test.py +70 -3
  11. xpk/core/blueprint/blueprint_generator.py +19 -8
  12. xpk/core/blueprint/testing/data/a3_ultra.yaml +3 -1
  13. xpk/core/blueprint/testing/data/a4.yaml +3 -1
  14. xpk/core/capacity.py +37 -17
  15. xpk/core/capacity_test.py +66 -1
  16. xpk/core/cluster.py +10 -10
  17. xpk/core/cluster_private.py +3 -3
  18. xpk/core/cluster_test.py +29 -2
  19. xpk/core/docker_container.py +55 -30
  20. xpk/core/docker_manager.py +4 -4
  21. xpk/core/docker_resources.py +4 -1
  22. xpk/core/kueue_manager.py +6 -8
  23. xpk/core/kueue_manager_test.py +4 -5
  24. xpk/core/nap.py +14 -3
  25. xpk/core/nodepool.py +46 -13
  26. xpk/core/nodepool_test.py +143 -8
  27. xpk/core/pathways.py +4 -8
  28. xpk/core/remote_state/fuse_remote_state.py +1 -1
  29. xpk/core/scheduling.py +16 -13
  30. xpk/core/scheduling_test.py +15 -7
  31. xpk/core/system_characteristics.py +6 -0
  32. xpk/core/telemetry.py +11 -1
  33. xpk/core/telemetry_test.py +39 -0
  34. xpk/core/testing/commands_tester.py +26 -0
  35. xpk/core/testing/commands_tester_test.py +20 -1
  36. xpk/core/workload_decorators/rdma_decorator.py +9 -0
  37. xpk/parser/cluster.py +11 -1
  38. xpk/parser/cluster_test.py +59 -1
  39. xpk/parser/common.py +11 -0
  40. xpk/parser/storage.py +3 -3
  41. xpk/utils/console.py +1 -1
  42. xpk/utils/feature_flags.py +7 -3
  43. {xpk-1.0.0.dist-info → xpk-1.1.1.dist-info}/METADATA +37 -21
  44. {xpk-1.0.0.dist-info → xpk-1.1.1.dist-info}/RECORD +48 -55
  45. xpk-1.1.1.dist-info/top_level.txt +1 -0
  46. integration/README.md +0 -19
  47. integration/__init__.py +0 -15
  48. integration/docker_manager_test.py +0 -102
  49. integration/gcluster_a3mega_test.py +0 -215
  50. integration/gcluster_a3ultra_test.py +0 -187
  51. integration/gcluster_a4_test.py +0 -187
  52. integration/gcluster_test.py +0 -107
  53. xpk/utils/user_input.py +0 -48
  54. xpk/utils/user_input_test.py +0 -92
  55. xpk-1.0.0.dist-info/top_level.txt +0 -2
  56. {xpk-1.0.0.dist-info → xpk-1.1.1.dist-info}/WHEEL +0 -0
  57. {xpk-1.0.0.dist-info → xpk-1.1.1.dist-info}/entry_points.txt +0 -0
  58. {xpk-1.0.0.dist-info → xpk-1.1.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,215 +0,0 @@
1
- """
2
- Copyright 2024 Google LLC
3
-
4
- Licensed under the Apache License, Version 2.0 (the "License");
5
- you may not use this file except in compliance with the License.
6
- You may obtain a copy of the License at
7
-
8
- https://www.apache.org/licenses/LICENSE-2.0
9
-
10
- Unless required by applicable law or agreed to in writing, software
11
- distributed under the License is distributed on an "AS IS" BASIS,
12
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- See the License for the specific language governing permissions and
14
- limitations under the License.
15
- """
16
-
17
- from xpk.commands.cluster_gcluster import get_unique_name
18
- from xpk.core.docker_manager import DockerManager
19
- from xpk.core.gcluster_manager import GclusterManager
20
- from xpk.core.blueprint.blueprint_generator import BlueprintGenerator
21
- from xpk.utils.versions import ReleaseChannel
22
- import pytest
23
- import os
24
- import shutil
25
-
26
- ctk_gcloud_cfg = os.getenv("GCLOUD_CFG_PATH")
27
- project_id = os.getenv("PROJECT_ID")
28
- region = os.getenv("REGION")
29
- zone = os.getenv("ZONE")
30
- auth_cidr = os.getenv("AUTH_CIDR")
31
- cluster_name = os.getenv("A3_MEGA_TEST_CLUSTER_NAME")
32
- release_channel = os.getenv("RELEASE_CHANNEL")
33
- cluster_version = os.getenv("CLUSTER_VERSION")
34
-
35
- uploads_dir = "uploads"
36
-
37
-
38
- @pytest.fixture(name="setup_tests")
39
- def prepare_test():
40
- pwd = os.getcwd()
41
- docker_path = os.path.join(pwd, "xpk_test_docker_dir")
42
- bp_path = os.path.join(pwd, "xpk_bp_path")
43
- if not os.path.exists(docker_path):
44
- os.makedirs(docker_path)
45
- if not os.path.exists(bp_path):
46
- os.makedirs(bp_path)
47
- yield (docker_path, bp_path)
48
- shutil.rmtree(docker_path)
49
- shutil.rmtree(bp_path)
50
-
51
-
52
- @pytest.mark.skip(
53
- reason=(
54
- "This test requires A3 capacity, therefore it should not be run on each"
55
- " build. Please invoke it manually if needed. "
56
- )
57
- )
58
- def test_deploy_a3_mega_deployment(setup_tests):
59
- docker_path, bp_path = setup_tests[0], setup_tests[1]
60
- (
61
- blueprint_name,
62
- prefix,
63
- gcluster_manager,
64
- staged_bp_path,
65
- ) = create_test_a3_mega_deployment(docker_path, bp_path)
66
- gcluster_manager.deploy(
67
- blueprint_path=staged_bp_path,
68
- deployment_name=blueprint_name,
69
- prefix=prefix,
70
- )
71
-
72
- # cleanup part
73
- gcluster_manager.destroy_deployment(
74
- deployment_name=blueprint_name, prefix=prefix
75
- )
76
- shutil.rmtree(docker_path)
77
- shutil.rmtree(bp_path)
78
-
79
-
80
- @pytest.mark.skip(
81
- reason=(
82
- "This test requires A3 capacity, therefore it should not be run on each"
83
- " build. Please invoke it manually if needed. "
84
- )
85
- )
86
- def test_create_a3_mega_deployment_files(setup_tests):
87
- assert project_id is not None
88
- assert region is not None
89
- assert zone is not None
90
- assert auth_cidr is not None
91
- assert ctk_gcloud_cfg is not None
92
- assert cluster_name is not None
93
- assert release_channel is not None
94
- assert cluster_version is not None
95
- docker_path, bp_path = setup_tests[0], setup_tests[1]
96
-
97
- blueprint_name = f"{cluster_name}-a3-mega-xpk"
98
- prefix = "prefix"
99
-
100
- docker_manager = DockerManager(
101
- gcloud_cfg_path=ctk_gcloud_cfg, working_dir=docker_path
102
- )
103
- docker_manager.initialize()
104
-
105
- bpm = BlueprintGenerator(storage_path=bp_path)
106
- a3_mega_blueprint = bpm.generate_a3_mega_blueprint(
107
- cluster_name=cluster_name,
108
- blueprint_name=blueprint_name,
109
- prefix=prefix,
110
- region=region,
111
- project_id=project_id,
112
- auth_cidr=auth_cidr,
113
- zone=zone,
114
- system_node_pool_min_node_count=3,
115
- release_channel=ReleaseChannel(release_channel),
116
- cluster_version=cluster_version,
117
- )
118
- blueprint_test_path = os.path.join(bp_path, prefix, f"{blueprint_name}.yaml")
119
- blueprint_deps_test_path = os.path.join(bp_path, prefix, blueprint_name)
120
-
121
- assert a3_mega_blueprint.blueprint_file == blueprint_test_path
122
- assert a3_mega_blueprint.blueprint_dependencies == blueprint_deps_test_path
123
-
124
- assert os.path.isfile(blueprint_test_path)
125
- assert os.path.isdir(blueprint_deps_test_path)
126
- assert os.path.isfile(
127
- os.path.join(blueprint_deps_test_path, "config-map.yaml.tftpl")
128
- )
129
-
130
- gcluster_manager = GclusterManager(
131
- gcluster_command_runner=docker_manager, remote_state_client=None
132
- )
133
-
134
- staged_bp_path = gcluster_manager.stage_files(
135
- blueprint_file=a3_mega_blueprint.blueprint_file,
136
- blueprint_dependencies=a3_mega_blueprint.blueprint_dependencies,
137
- prefix=prefix,
138
- )
139
-
140
- assert staged_bp_path == os.path.join(
141
- "/out", uploads_dir, prefix, f"{blueprint_name}.yaml"
142
- )
143
-
144
- staged_bp_path_local = os.path.join(
145
- docker_path, uploads_dir, prefix, f"{blueprint_name}.yaml"
146
- )
147
- staged_bp_deps_path_local = os.path.join(
148
- docker_path, uploads_dir, prefix, blueprint_name
149
- )
150
-
151
- assert os.path.isfile(staged_bp_path_local)
152
- assert os.path.isdir(staged_bp_deps_path_local)
153
- assert os.path.isfile(
154
- os.path.join(staged_bp_deps_path_local, "config-map.yaml.tftpl")
155
- )
156
- assert os.path.isfile(
157
- os.path.join(
158
- staged_bp_deps_path_local, "kueue-xpk-configuration.yaml.tftpl"
159
- )
160
- )
161
- unique_name = get_unique_name(project_id, region, zone)
162
- gcluster_manager.deploy(
163
- blueprint_path=staged_bp_path, deployment_name=unique_name, dry_run=True
164
- )
165
-
166
-
167
- def create_test_a3_mega_deployment(docker_path: str, bp_path: str):
168
- assert project_id is not None
169
- assert region is not None
170
- assert zone is not None
171
- assert auth_cidr is not None
172
- assert ctk_gcloud_cfg is not None
173
- assert cluster_name is not None
174
- assert release_channel is not None
175
- assert cluster_version is not None
176
-
177
- blueprint_name = f"{cluster_name}-a3-mega-xpk"
178
- prefix = "prefix"
179
-
180
- docker_manager = DockerManager(
181
- gcloud_cfg_path=ctk_gcloud_cfg, working_dir=docker_path
182
- )
183
- docker_manager.initialize()
184
-
185
- bpm = BlueprintGenerator(storage_path=bp_path)
186
- a3_mega_blueprint = bpm.generate_a3_mega_blueprint(
187
- cluster_name=cluster_name,
188
- blueprint_name=blueprint_name,
189
- prefix=prefix,
190
- region=region,
191
- project_id=project_id,
192
- auth_cidr=auth_cidr,
193
- zone=zone,
194
- system_node_pool_min_node_count=3,
195
- release_channel=ReleaseChannel(release_channel),
196
- cluster_version=cluster_version,
197
- )
198
-
199
- gcluster_manager = GclusterManager(
200
- gcluster_command_runner=docker_manager,
201
- remote_state_client=None,
202
- )
203
-
204
- staged_bp_path = gcluster_manager.stage_files(
205
- blueprint_file=a3_mega_blueprint.blueprint_file,
206
- blueprint_dependencies=a3_mega_blueprint.blueprint_dependencies,
207
- prefix=prefix,
208
- )
209
-
210
- return (
211
- blueprint_name,
212
- prefix,
213
- gcluster_manager,
214
- staged_bp_path,
215
- )
@@ -1,187 +0,0 @@
1
- """
2
- Copyright 2024 Google LLC
3
-
4
- Licensed under the Apache License, Version 2.0 (the "License");
5
- you may not use this file except in compliance with the License.
6
- You may obtain a copy of the License at
7
-
8
- https://www.apache.org/licenses/LICENSE-2.0
9
-
10
- Unless required by applicable law or agreed to in writing, software
11
- distributed under the License is distributed on an "AS IS" BASIS,
12
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- See the License for the specific language governing permissions and
14
- limitations under the License.
15
- """
16
-
17
- import os
18
- import shutil
19
-
20
- import pytest
21
-
22
- from xpk.commands.cluster_gcluster import get_unique_name
23
- from xpk.core.blueprint.blueprint_generator import BlueprintGenerator
24
- from xpk.core.capacity import CapacityType
25
- from xpk.core.docker_manager import DockerManager
26
- from xpk.core.gcluster_manager import GclusterManager
27
- from xpk.utils.versions import ReleaseChannel
28
-
29
- ctk_gcloud_cfg = os.getenv("GCLOUD_CFG_PATH")
30
- project_id = os.getenv("PROJECT_ID")
31
- region = os.getenv("REGION")
32
- zone = os.getenv("ZONE")
33
- auth_cidr = os.getenv("AUTH_CIDR")
34
- cluster_name = os.getenv("A3_ULTRA_TEST_CLUSTER_NAME")
35
- release_channel = os.getenv("RELEASE_CHANNEL")
36
- cluster_version = os.getenv("CLUSTER_VERSION")
37
-
38
-
39
- @pytest.fixture(name="setup_tests")
40
- def prepare_test():
41
- pwd = os.getcwd()
42
- docker_path = os.path.join(pwd, "xpk_test_docker_dir")
43
- bp_path = os.path.join(pwd, "xpk_test_bp_dir")
44
- if not os.path.exists(docker_path):
45
- os.makedirs(docker_path)
46
- if not os.path.exists(bp_path):
47
- os.makedirs(bp_path)
48
- yield (docker_path, bp_path)
49
- shutil.rmtree(docker_path)
50
- shutil.rmtree(bp_path)
51
-
52
-
53
- @pytest.mark.skip(
54
- reason=(
55
- "This test requires A3 capacity, therefore it should not be run on each"
56
- " build. Please invoke it manually if needed. "
57
- )
58
- )
59
- def test_create_a3_ultra_deployment_files(setup_tests):
60
- assert project_id is not None
61
- assert region is not None
62
- assert zone is not None
63
- assert auth_cidr is not None
64
- assert ctk_gcloud_cfg is not None
65
- assert cluster_name is not None
66
- assert release_channel is not None
67
- assert cluster_version is not None
68
- docker_path, bp_path = setup_tests[0], setup_tests[1]
69
- blueprint_name = f"{cluster_name}-a3-ultra-xpk"
70
-
71
- docker_manager = DockerManager(
72
- gcloud_cfg_path=ctk_gcloud_cfg, working_dir=docker_path
73
- )
74
- docker_manager.initialize()
75
- prefix = f"{project_id}-{region}".lower()
76
- bpm = BlueprintGenerator(storage_path=bp_path)
77
- a3_mega_blueprint = bpm.generate_a3_ultra_blueprint(
78
- cluster_name=cluster_name,
79
- blueprint_name=blueprint_name,
80
- region=region,
81
- project_id=project_id,
82
- auth_cidr=auth_cidr,
83
- zone=zone,
84
- reservation="foo",
85
- num_nodes=1,
86
- system_node_pool_machine_type="e2-standard-16",
87
- prefix=prefix,
88
- release_channel=ReleaseChannel(release_channel),
89
- cluster_version=cluster_version,
90
- )
91
- blueprint_test_path = os.path.join(bp_path, prefix, f"{blueprint_name}.yaml")
92
- blueprint_deps_test_path = os.path.join(bp_path, blueprint_name)
93
- assert a3_mega_blueprint.blueprint_file == blueprint_test_path
94
- assert a3_mega_blueprint.blueprint_dependencies == blueprint_deps_test_path
95
-
96
- assert os.path.isfile(blueprint_test_path)
97
- assert os.path.isdir(blueprint_deps_test_path)
98
- assert os.path.isfile(
99
- os.path.join(blueprint_deps_test_path, "mlgru-disable.yaml")
100
- )
101
- assert os.path.isfile(
102
- os.path.join(blueprint_deps_test_path, "nccl-installer.yaml")
103
- )
104
- gcluster_manager = GclusterManager(
105
- gcluster_command_runner=docker_manager, remote_state_client=None
106
- )
107
-
108
- staged_bp_path = gcluster_manager.stage_files(
109
- blueprint_file=a3_mega_blueprint.blueprint_file,
110
- blueprint_dependencies=a3_mega_blueprint.blueprint_dependencies,
111
- prefix=prefix,
112
- )
113
- assert staged_bp_path == os.path.join(
114
- "/out/uploads", prefix, f"{blueprint_name}.yaml"
115
- )
116
- unique_name = get_unique_name(project_id, region, zone)
117
- gcluster_manager.deploy(
118
- blueprint_path=staged_bp_path, deployment_name=unique_name, dry_run=True
119
- )
120
-
121
-
122
- @pytest.mark.skip(
123
- reason=(
124
- "This test requires A3 capacity, therefore it should not be run on each"
125
- " build. Please invoke it manually if needed. "
126
- )
127
- )
128
- def test_create_a3_ultra_deployment(setup_tests):
129
- assert project_id is not None
130
- assert region is not None
131
- assert zone is not None
132
- assert auth_cidr is not None
133
- assert ctk_gcloud_cfg is not None
134
- assert cluster_name is not None
135
- assert release_channel is not None
136
- assert cluster_version is not None
137
- docker_path, bp_path = setup_tests[0], setup_tests[1]
138
- blueprint_name = f"{cluster_name}-a3-ultra-xpk"
139
-
140
- docker_manager = DockerManager(
141
- gcloud_cfg_path=ctk_gcloud_cfg, working_dir=docker_path
142
- )
143
- docker_manager.initialize()
144
-
145
- bpm = BlueprintGenerator(storage_path=bp_path)
146
- a3_mega_blueprint = bpm.generate_a3_ultra_blueprint(
147
- cluster_name=cluster_name,
148
- blueprint_name=blueprint_name,
149
- region=region,
150
- project_id=project_id,
151
- auth_cidr=auth_cidr,
152
- zone=zone,
153
- capacity_type=CapacityType.SPOT,
154
- num_nodes=1,
155
- system_node_pool_machine_type="e2-standard-16",
156
- release_channel=ReleaseChannel(release_channel),
157
- cluster_version=cluster_version,
158
- )
159
- blueprint_test_path = os.path.join(bp_path, f"{blueprint_name}.yaml")
160
- blueprint_deps_test_path = os.path.join(bp_path, blueprint_name)
161
-
162
- assert a3_mega_blueprint.blueprint_file == blueprint_test_path
163
- assert a3_mega_blueprint.blueprint_dependencies == blueprint_deps_test_path
164
-
165
- assert os.path.isfile(blueprint_test_path)
166
- assert os.path.isdir(blueprint_deps_test_path)
167
- assert os.path.isfile(
168
- os.path.join(blueprint_deps_test_path, "mlgru-disable.yaml")
169
- )
170
- assert os.path.isfile(
171
- os.path.join(blueprint_deps_test_path, "nccl-installer.yaml")
172
- )
173
- gcluster_manager = GclusterManager(
174
- gcluster_command_runner=docker_manager, remote_state_client=None
175
- )
176
-
177
- staged_bp_path = gcluster_manager.stage_files(
178
- blueprint_file=a3_mega_blueprint.blueprint_file,
179
- blueprint_dependencies=a3_mega_blueprint.blueprint_dependencies,
180
- )
181
-
182
- gcluster_manager.deploy(
183
- blueprint_path=staged_bp_path, deployment_name=blueprint_name
184
- )
185
-
186
- # cleanup part
187
- gcluster_manager.destroy_deployment(deployment_name=blueprint_name)
@@ -1,187 +0,0 @@
1
- """
2
- Copyright 2024 Google LLC
3
-
4
- Licensed under the Apache License, Version 2.0 (the "License");
5
- you may not use this file except in compliance with the License.
6
- You may obtain a copy of the License at
7
-
8
- https://www.apache.org/licenses/LICENSE-2.0
9
-
10
- Unless required by applicable law or agreed to in writing, software
11
- distributed under the License is distributed on an "AS IS" BASIS,
12
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- See the License for the specific language governing permissions and
14
- limitations under the License.
15
- """
16
-
17
- import os
18
- import shutil
19
-
20
- import pytest
21
-
22
- from xpk.commands.cluster_gcluster import get_unique_name
23
- from xpk.core.blueprint.blueprint_generator import BlueprintGenerator
24
- from xpk.core.capacity import CapacityType
25
- from xpk.core.docker_manager import DockerManager
26
- from xpk.core.gcluster_manager import GclusterManager
27
- from xpk.utils.versions import ReleaseChannel
28
-
29
- ctk_gcloud_cfg = os.getenv("GCLOUD_CFG_PATH")
30
- project_id = os.getenv("PROJECT_ID")
31
- region = os.getenv("REGION")
32
- zone = os.getenv("ZONE")
33
- auth_cidr = os.getenv("AUTH_CIDR")
34
- cluster_name = os.getenv("A4_TEST_CLUSTER_NAME")
35
- release_channel = os.getenv("RELEASE_CHANNEL")
36
- cluster_version = os.getenv("CLUSTER_VERSION")
37
-
38
-
39
- @pytest.fixture(name="setup_tests")
40
- def prepare_test():
41
- pwd = os.getcwd()
42
- docker_path = os.path.join(pwd, "xpk_test_docker_dir")
43
- bp_path = os.path.join(pwd, "xpk_test_bp_dir")
44
- if not os.path.exists(docker_path):
45
- os.makedirs(docker_path)
46
- if not os.path.exists(bp_path):
47
- os.makedirs(bp_path)
48
- yield (docker_path, bp_path)
49
- shutil.rmtree(docker_path)
50
- shutil.rmtree(bp_path)
51
-
52
-
53
- @pytest.mark.skip(
54
- reason=(
55
- "This test requires A4 capacity, therefore it should not be run on each"
56
- " build. Please invoke it manually if needed. "
57
- )
58
- )
59
- def test_create_a4_deployment_files(setup_tests):
60
- assert project_id is not None
61
- assert region is not None
62
- assert zone is not None
63
- assert auth_cidr is not None
64
- assert ctk_gcloud_cfg is not None
65
- assert cluster_name is not None
66
- assert release_channel is not None
67
- assert cluster_version is not None
68
- docker_path, bp_path = setup_tests[0], setup_tests[1]
69
- blueprint_name = f"{cluster_name}-a4-xpk"
70
-
71
- docker_manager = DockerManager(
72
- gcloud_cfg_path=ctk_gcloud_cfg, working_dir=docker_path
73
- )
74
- docker_manager.initialize()
75
- prefix = f"{project_id}-{region}".lower()
76
- bpm = BlueprintGenerator(storage_path=bp_path)
77
- a4_blueprint = bpm.generate_a4_blueprint(
78
- cluster_name=cluster_name,
79
- blueprint_name=blueprint_name,
80
- region=region,
81
- project_id=project_id,
82
- auth_cidr=auth_cidr,
83
- zone=zone,
84
- reservation="foo",
85
- num_nodes=1,
86
- system_node_pool_machine_type="e2-standard-16",
87
- prefix=prefix,
88
- release_channel=ReleaseChannel(release_channel),
89
- cluster_version=cluster_version,
90
- )
91
- blueprint_test_path = os.path.join(bp_path, prefix, f"{blueprint_name}.yaml")
92
- blueprint_deps_test_path = os.path.join(bp_path, blueprint_name)
93
- assert a4_blueprint.blueprint_file == blueprint_test_path
94
- assert a4_blueprint.blueprint_dependencies == blueprint_deps_test_path
95
-
96
- assert os.path.isfile(blueprint_test_path)
97
- assert os.path.isdir(blueprint_deps_test_path)
98
- assert os.path.isfile(
99
- os.path.join(blueprint_deps_test_path, "mlgru-disable.yaml")
100
- )
101
- assert os.path.isfile(
102
- os.path.join(blueprint_deps_test_path, "nccl-installer.yaml")
103
- )
104
- gcluster_manager = GclusterManager(
105
- gcluster_command_runner=docker_manager, remote_state_client=None
106
- )
107
-
108
- staged_bp_path = gcluster_manager.stage_files(
109
- blueprint_file=a4_blueprint.blueprint_file,
110
- blueprint_dependencies=a4_blueprint.blueprint_dependencies,
111
- prefix=prefix,
112
- )
113
- assert staged_bp_path == os.path.join(
114
- "/out/uploads", prefix, f"{blueprint_name}.yaml"
115
- )
116
- unique_name = get_unique_name(project_id, region, zone)
117
- gcluster_manager.deploy(
118
- blueprint_path=staged_bp_path, deployment_name=unique_name, dry_run=True
119
- )
120
-
121
-
122
- @pytest.mark.skip(
123
- reason=(
124
- "This test requires A4 capacity, therefore it should not be run on each"
125
- " build. Please invoke it manually if needed. "
126
- )
127
- )
128
- def test_create_a4_deployment(setup_tests):
129
- assert project_id is not None
130
- assert region is not None
131
- assert zone is not None
132
- assert auth_cidr is not None
133
- assert ctk_gcloud_cfg is not None
134
- assert cluster_name is not None
135
- assert release_channel is not None
136
- assert cluster_version is not None
137
- docker_path, bp_path = setup_tests[0], setup_tests[1]
138
- blueprint_name = f"{cluster_name}-a4-xpk"
139
-
140
- docker_manager = DockerManager(
141
- gcloud_cfg_path=ctk_gcloud_cfg, working_dir=docker_path
142
- )
143
- docker_manager.initialize()
144
-
145
- bpm = BlueprintGenerator(storage_path=bp_path)
146
- a4_blueprint = bpm.generate_a4_blueprint(
147
- cluster_name=cluster_name,
148
- blueprint_name=blueprint_name,
149
- region=region,
150
- project_id=project_id,
151
- auth_cidr=auth_cidr,
152
- zone=zone,
153
- capacity_type=CapacityType.SPOT,
154
- num_nodes=1,
155
- system_node_pool_machine_type="e2-standard-16",
156
- release_channel=ReleaseChannel(release_channel),
157
- cluster_version=cluster_version,
158
- )
159
- blueprint_test_path = os.path.join(bp_path, f"{blueprint_name}.yaml")
160
- blueprint_deps_test_path = os.path.join(bp_path, blueprint_name)
161
-
162
- assert a4_blueprint.blueprint_file == blueprint_test_path
163
- assert a4_blueprint.blueprint_dependencies == blueprint_deps_test_path
164
-
165
- assert os.path.isfile(blueprint_test_path)
166
- assert os.path.isdir(blueprint_deps_test_path)
167
- assert os.path.isfile(
168
- os.path.join(blueprint_deps_test_path, "mlgru-disable.yaml")
169
- )
170
- assert os.path.isfile(
171
- os.path.join(blueprint_deps_test_path, "nccl-installer.yaml")
172
- )
173
- gcluster_manager = GclusterManager(
174
- gcluster_command_runner=docker_manager, remote_state_client=None
175
- )
176
-
177
- staged_bp_path = gcluster_manager.stage_files(
178
- blueprint_file=a4_blueprint.blueprint_file,
179
- blueprint_dependencies=a4_blueprint.blueprint_dependencies,
180
- )
181
-
182
- gcluster_manager.deploy(
183
- blueprint_path=staged_bp_path, deployment_name=blueprint_name
184
- )
185
-
186
- # cleanup part
187
- gcluster_manager.destroy_deployment(deployment_name=blueprint_name)
@@ -1,107 +0,0 @@
1
- """
2
- Copyright 2024 Google LLC
3
-
4
- Licensed under the Apache License, Version 2.0 (the "License");
5
- you may not use this file except in compliance with the License.
6
- You may obtain a copy of the License at
7
-
8
- https://www.apache.org/licenses/LICENSE-2.0
9
-
10
- Unless required by applicable law or agreed to in writing, software
11
- distributed under the License is distributed on an "AS IS" BASIS,
12
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- See the License for the specific language governing permissions and
14
- limitations under the License.
15
- """
16
-
17
- from xpk.core.docker_manager import DockerManager
18
- from xpk.core.gcluster_manager import GclusterManager
19
- from xpk.core.blueprint.blueprint_generator import BlueprintGenerator
20
- import os
21
- import pytest
22
- import shutil
23
-
24
- ctk_gcloud_cfg = os.getenv("GCLOUD_CFG_PATH")
25
- project_id = os.getenv("PROJECT_ID")
26
- region = os.getenv("REGION")
27
- zone = os.getenv("ZONE")
28
- auth_cidr = os.getenv("AUTH_CIDR")
29
- cluster_name = os.getenv("GKE_ML_TEST_CLUSTER_NAME")
30
-
31
- uploads_dir = "uploads"
32
-
33
-
34
- def prepare_test(docker_path: str, bp_path: str) -> None:
35
- if not os.path.exists(docker_path):
36
- os.makedirs(docker_path)
37
- if not os.path.exists(bp_path):
38
- os.makedirs(bp_path)
39
-
40
-
41
- @pytest.mark.skip(reason="Credentails not working. Skipping for now")
42
- def test_create_deployment():
43
- assert project_id is not None
44
- assert region is not None
45
- assert zone is not None
46
- assert auth_cidr is not None
47
- assert ctk_gcloud_cfg is not None
48
- assert cluster_name is not None
49
-
50
- pwd = os.getcwd()
51
- test_docker_working_dir = os.path.join(
52
- pwd, "xpkclusters/tests/xpk_test_docker_dir"
53
- )
54
- test_bp_dir = os.path.join(pwd, "xpkclusters/tests/xpk_test_bp_dir")
55
- prepare_test(test_docker_working_dir, test_bp_dir)
56
- blueprint_name = "my-test-blueprint"
57
- prefix = "prefix"
58
-
59
- docker_manager = DockerManager(
60
- gcloud_cfg_path=ctk_gcloud_cfg, working_dir=test_docker_working_dir
61
- )
62
- docker_manager.initialize()
63
-
64
- bpm = BlueprintGenerator(storage_path=test_bp_dir)
65
- ml_gke_blueprint = bpm.generate_gke_ml_blueprint(
66
- cluster_name=cluster_name,
67
- blueprint_name=blueprint_name,
68
- prefix=prefix,
69
- region=region,
70
- project_id=project_id,
71
- auth_cidr=auth_cidr,
72
- )
73
- blueprint_test_path = os.path.join(
74
- test_bp_dir, prefix, f"{blueprint_name}.yaml"
75
- )
76
- # there are no files in ghcp stage for this blueprint
77
- blueprint_deps_test_path = ""
78
-
79
- assert ml_gke_blueprint.blueprint_file == blueprint_test_path
80
- assert ml_gke_blueprint.blueprint_dependencies == blueprint_deps_test_path
81
-
82
- assert os.path.exists(blueprint_test_path)
83
-
84
- gcluster_manager = GclusterManager(
85
- gcluster_command_runner=docker_manager, remote_state_client=None
86
- )
87
-
88
- staged_bp_path = gcluster_manager.stage_files(
89
- blueprint_file=ml_gke_blueprint.blueprint_file,
90
- blueprint_dependencies=ml_gke_blueprint.blueprint_dependencies,
91
- prefix=prefix,
92
- )
93
-
94
- assert staged_bp_path == os.path.join(
95
- "/out", uploads_dir, prefix, f"{blueprint_name}.yaml"
96
- )
97
-
98
- gcluster_manager.deploy(
99
- blueprint_path=staged_bp_path,
100
- deployment_name=blueprint_name,
101
- prefix=prefix,
102
- )
103
- gcluster_manager.destroy_deployment(
104
- deployment_name=blueprint_name, prefix=prefix
105
- )
106
- shutil.rmtree(test_docker_working_dir)
107
- shutil.rmtree(test_bp_dir)