xpk 0.6.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. xpk/api/__init__.py +15 -0
  2. xpk/api/storage_crd.yaml +52 -0
  3. xpk/commands/batch.py +27 -5
  4. xpk/commands/cluster.py +104 -80
  5. xpk/commands/cluster_gcluster.py +94 -10
  6. xpk/commands/common.py +44 -0
  7. xpk/commands/config.py +29 -0
  8. xpk/commands/info.py +8 -10
  9. xpk/commands/inspector.py +5 -11
  10. xpk/commands/job.py +9 -7
  11. xpk/commands/kind.py +34 -4
  12. xpk/commands/kjob_common.py +44 -0
  13. xpk/commands/run.py +128 -0
  14. xpk/commands/shell.py +27 -7
  15. xpk/commands/storage.py +280 -0
  16. xpk/commands/version.py +6 -18
  17. xpk/commands/workload.py +381 -184
  18. xpk/core/blueprint/blueprint_definitions.py +1 -0
  19. xpk/core/blueprint/blueprint_generator.py +132 -76
  20. xpk/core/capacity.py +185 -0
  21. xpk/core/cluster.py +564 -0
  22. xpk/core/cluster_private.py +6 -3
  23. xpk/core/commands.py +18 -14
  24. xpk/core/config.py +179 -0
  25. xpk/core/docker_container.py +225 -0
  26. xpk/core/docker_image.py +210 -0
  27. xpk/core/docker_resources.py +350 -0
  28. xpk/core/filestore.py +251 -0
  29. xpk/core/gcloud_context.py +196 -0
  30. xpk/core/gcluster_manager.py +20 -2
  31. xpk/core/gcsfuse.py +50 -0
  32. xpk/core/kjob.py +257 -18
  33. xpk/core/kueue.py +12 -6
  34. xpk/core/monitoring.py +134 -0
  35. xpk/core/nap.py +32 -20
  36. xpk/core/network.py +377 -0
  37. xpk/core/nodepool.py +581 -0
  38. xpk/core/pathways.py +124 -45
  39. xpk/core/remote_state/__init__.py +15 -0
  40. xpk/core/remote_state/fuse_remote_state.py +99 -0
  41. xpk/core/remote_state/remote_state_client.py +38 -0
  42. xpk/core/resources.py +238 -0
  43. xpk/core/scheduling.py +253 -0
  44. xpk/core/storage.py +581 -0
  45. xpk/core/system_characteristics.py +38 -1
  46. xpk/core/vertex.py +105 -0
  47. xpk/core/workload.py +209 -1
  48. xpk/core/workload_decorators/rdma_decorator.py +25 -5
  49. xpk/core/workload_decorators/storage_decorator.py +52 -0
  50. xpk/core/workload_decorators/tcpxo_decorator.py +70 -37
  51. xpk/main.py +3 -1
  52. xpk/parser/batch.py +10 -151
  53. xpk/parser/cluster.py +49 -8
  54. xpk/parser/common.py +189 -1
  55. xpk/parser/config.py +49 -0
  56. xpk/parser/core.py +27 -1
  57. xpk/parser/info.py +2 -1
  58. xpk/parser/inspector.py +3 -3
  59. xpk/parser/job.py +25 -4
  60. xpk/parser/kind.py +3 -2
  61. xpk/parser/run.py +47 -0
  62. xpk/parser/shell.py +10 -1
  63. xpk/parser/storage.py +326 -0
  64. xpk/parser/validators.py +3 -3
  65. xpk/parser/workload.py +118 -76
  66. xpk/templates/__init__.py +15 -0
  67. xpk/templates/storage.yaml +13 -0
  68. xpk/utils/gcs_utils.py +125 -0
  69. xpk/utils/kubectl.py +57 -0
  70. xpk/utils/objects.py +8 -5
  71. xpk/utils/templates.py +28 -0
  72. xpk/utils/validation.py +80 -0
  73. {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info}/METADATA +169 -15
  74. xpk-0.7.1.dist-info/RECORD +92 -0
  75. {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info}/WHEEL +1 -1
  76. xpk/core/core.py +0 -2824
  77. xpk-0.6.0.dist-info/RECORD +0 -57
  78. {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info}/entry_points.txt +0 -0
  79. {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info/licenses}/LICENSE +0 -0
  80. {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info}/top_level.txt +0 -0
xpk/core/config.py ADDED
@@ -0,0 +1,179 @@
1
+ """
2
+ Copyright 2025 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ import os
18
+ import re
19
+
20
+ import ruamel.yaml
21
+
22
+ from ..utils import file
23
+ from ..utils.console import xpk_print
24
+ from .system_characteristics import AcceleratorType, SystemCharacteristics
25
+
26
+ # This is the version for XPK PyPI package
27
+ __version__ = 'v0.7.1'
28
+ XPK_CURRENT_VERSION = __version__
29
+ XPK_CONFIG_FILE = os.path.expanduser('~/.config/xpk/config.yaml')
30
+
31
+ CONFIGS_KEY = 'configs'
32
+ CFG_BUCKET_KEY = 'cluster-state-gcs-bucket'
33
+ CLUSTER_NAME_KEY = 'cluster-name'
34
+ PROJECT_KEY = 'project-id'
35
+ ZONE_KEY = 'zone'
36
+ KJOB_BATCH_IMAGE = 'batch-image'
37
+ KJOB_BATCH_WORKING_DIRECTORY = 'batch-working-directory'
38
+ KJOB_SHELL_IMAGE = 'shell-image'
39
+ KJOB_SHELL_INTERACTIVE_COMMAND = 'shell-interactive-command'
40
+ KJOB_SHELL_WORKING_DIRECTORY = 'shell-working-directory'
41
+ CONFIGS_KEY = 'configs'
42
+ DEPENDENCIES_KEY = 'deps-verified-version'
43
+ XPK_CONFIG_FILE = os.path.expanduser('~/.config/xpk/config.yaml')
44
+
45
+ DEFAULT_KEYS = [
46
+ CFG_BUCKET_KEY,
47
+ CLUSTER_NAME_KEY,
48
+ PROJECT_KEY,
49
+ ZONE_KEY,
50
+ DEPENDENCIES_KEY,
51
+ KJOB_BATCH_IMAGE,
52
+ KJOB_BATCH_WORKING_DIRECTORY,
53
+ KJOB_SHELL_IMAGE,
54
+ KJOB_SHELL_INTERACTIVE_COMMAND,
55
+ KJOB_SHELL_WORKING_DIRECTORY,
56
+ ]
57
+ VERTEX_TENSORBOARD_FEATURE_FLAG = XPK_CURRENT_VERSION >= '0.4.0'
58
+
59
+
60
+ yaml = ruamel.yaml.YAML()
61
+
62
+
63
+ class XpkConfig:
64
+ """XpkConfig is a class for setting and getting values from .yaml config file."""
65
+
66
+ def __init__(self, custom_config_file: str = XPK_CONFIG_FILE) -> None:
67
+ self._config = custom_config_file
68
+ self._allowed_keys = DEFAULT_KEYS
69
+
70
+ def _open_configs(self) -> dict | None:
71
+ dir_path = '/'.join(self._config.split('/')[:-1])
72
+ file.ensure_directory_exists(dir_path)
73
+
74
+ config_yaml = {'version': 'v1', CONFIGS_KEY: {}}
75
+ if not os.path.exists(self._config):
76
+ return None
77
+
78
+ with open(self._config, encoding='utf-8', mode='r') as stream:
79
+ config_yaml: dict = yaml.load(stream)
80
+ return config_yaml
81
+
82
+ def _save_configs(self, config_yaml: dict) -> None:
83
+ with open(self._config, encoding='utf-8', mode='w') as stream:
84
+ yaml.dump(config_yaml, stream)
85
+
86
+ def set(self, key: str, value: str) -> None:
87
+ if key not in self._allowed_keys:
88
+ xpk_print(f'Key {key} is not an allowed xpk config key.')
89
+ return
90
+
91
+ config_yaml = self._open_configs()
92
+ if config_yaml is None:
93
+ config_yaml = {'version': 'v1', CONFIGS_KEY: {}}
94
+
95
+ config_yaml[CONFIGS_KEY][key] = value
96
+ self._save_configs(config_yaml)
97
+
98
+ def get(self, key: str) -> str | None:
99
+ if key not in self._allowed_keys:
100
+ xpk_print(f'Key {key} is not an allowed xpk config key.')
101
+ return None
102
+
103
+ config_yaml = self._open_configs()
104
+ if config_yaml is None:
105
+ return None
106
+
107
+ vals: dict[str, str] = config_yaml[CONFIGS_KEY]
108
+ return vals.get(key)
109
+
110
+ def get_all(
111
+ self,
112
+ ) -> dict[str, dict[str, str] | str] | None:
113
+ config_yaml = self._open_configs()
114
+ if config_yaml is None:
115
+ return None
116
+ val: dict[str, str] = config_yaml[CONFIGS_KEY]
117
+ return val
118
+
119
+
120
+ def parse_env_config(args, tensorboard_config, system: SystemCharacteristics):
121
+ """Parses the environment configurations to the jobset config.
122
+
123
+ Args:
124
+ args: user provided arguments for running the command.
125
+ tensorboard_config: configuration of Vertex Tensorboard.
126
+ system: system characteristics.
127
+ """
128
+ env = {}
129
+
130
+ env_pat = re.compile(r'(^[a-zA-Z_][a-zA-Z0-9_]*?)(?:=(.*))?$', re.M)
131
+ if args.env_file:
132
+ print('Setting container environment from', args.env_file)
133
+ with open(file=args.env_file, mode='r', encoding='utf-8') as f:
134
+ for match in env_pat.finditer(f.read()):
135
+ variable = match.group(1)
136
+ if match.group(2) is not None:
137
+ env[variable] = match.group(2)
138
+ else:
139
+ assert variable in os.environ, (
140
+ f'Variable {variable} is not set in the current '
141
+ 'environment, a value must be specified.'
142
+ )
143
+ env[variable] = os.environ[variable]
144
+ if args.env:
145
+ for var in args.env:
146
+ match = env_pat.match(var)
147
+ assert match and match.group(2) is not None, (
148
+ 'Invalid environment variable, format must be '
149
+ f'`--env VARIABLE=value`: {var}'
150
+ )
151
+ variable = match.group(1)
152
+ env[variable] = match.group(2)
153
+
154
+ if not args.use_pathways:
155
+ if args.debug_dump_gcs:
156
+ if 'XLA_FLAGS' in env:
157
+ raise ValueError(
158
+ 'Conflict: XLA_FLAGS defined in both --debug_dump_gcs '
159
+ 'and environment file. Please choose one way to define '
160
+ 'XLA_FLAGS.'
161
+ )
162
+ env['XLA_FLAGS'] = '--xla_dump_to=/tmp/xla_dump/'
163
+
164
+ if tensorboard_config:
165
+ env['UPLOAD_DATA_TO_TENSORBOARD'] = True
166
+ for key, value in tensorboard_config.items():
167
+ env[key.upper()] = value
168
+
169
+ if system.accelerator_type == AcceleratorType['GPU']:
170
+ # For GPUs, it has two more spaces ahead of name and value respectively
171
+ env_format = '''
172
+ - name: {key}
173
+ value: "{value}"'''
174
+ else:
175
+ env_format = '''
176
+ - name: {key}
177
+ value: "{value}"'''
178
+
179
+ args.env = ''.join(env_format.format(key=k, value=v) for k, v in env.items())
@@ -0,0 +1,225 @@
1
+ """
2
+ Copyright 2025 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from ..utils.console import xpk_exit, xpk_print
18
+ from .docker_image import setup_docker_image
19
+ from .docker_resources import (
20
+ add_container_ports,
21
+ add_image_pull_policy_for_pw_or_gpu,
22
+ add_jax_coordinator_port,
23
+ get_env_container,
24
+ get_main_container_resources,
25
+ get_volume_mounts,
26
+ )
27
+ from .monitoring import get_gke_debugging_dashboard
28
+ from .system_characteristics import (
29
+ AcceleratorType,
30
+ AcceleratorTypeToAcceleratorCharacteristics,
31
+ SystemCharacteristics,
32
+ )
33
+
34
+
35
+ def get_main_and_sidecar_container(args, system, docker_image) -> str:
36
+ """Generate yaml for main and sidecar container.
37
+ Args:
38
+ args: user provided arguments for running the command.
39
+ system: system characteristics
40
+ docker_image: docker image
41
+
42
+ Returns:
43
+ str:
44
+ yaml for main and sidecar container
45
+ """
46
+ resource_type = AcceleratorTypeToAcceleratorCharacteristics[
47
+ system.accelerator_type
48
+ ].resource_type
49
+ main_container = get_main_container(args, system, docker_image, resource_type)
50
+ yaml = """- name: stacktrace-explorer
51
+ image: busybox:1.28
52
+ args: [/bin/sh, -c, "check_signal() (while [ ! -f /shared-volume/stacktrace_signal ]; do sleep 1; done; pid=$(pidof 'tail'); kill $pid;); check_signal & while [ ! -d /tmp/debugging ]; do sleep 60; done; while [ ! -e /tmp/debugging/* ]; do sleep 60; done; tail -n+1 -f /tmp/debugging/*; exit 0;"]
53
+ volumeMounts:
54
+ - name: tpu-stack-trace
55
+ readOnly: true
56
+ mountPath: /tmp/debugging
57
+ - name: shared-data
58
+ mountPath: /shared-volume
59
+ {main_container}
60
+ """
61
+ return yaml.format(main_container=main_container)
62
+
63
+
64
+ def get_main_container(args, system, docker_image, resource_type) -> str:
65
+ """Generate yaml for main container including the xpk command.
66
+ Args:
67
+ args: user provided arguments for running the command.
68
+ system: system characteristics
69
+ docker_image: docker image
70
+ resource_type: The label to describe the resource type for TPUs/GPUs/CPUs.
71
+
72
+ Returns:
73
+ str:
74
+ yaml for main container
75
+ """
76
+
77
+ xpk_internal_commands = ''
78
+ gsutil_test_command = ''
79
+ if not args.use_pathways and args.debug_dump_gcs:
80
+ gsutil_test_command = (
81
+ 'which gsutil >/dev/null 2>&1 || { echo >&2 "gsutil'
82
+ ' is required but not installed. Aborting"; exit 24;};'
83
+ )
84
+ xpk_internal_commands += (
85
+ 'WORKER_ID=$HOSTNAME;'
86
+ f'gsutil -m cp -r /tmp/xla_dump/ {args.debug_dump_gcs}/$WORKER_ID;'
87
+ )
88
+
89
+ command = args.command
90
+ if args.enable_debug_logs:
91
+ command = (
92
+ 'export TPU_STDERR_LOG_LEVEL=0 &&'
93
+ ' export TPU_MIN_LOG_LEVEL=0 &&'
94
+ ' export TF_CPP_MIN_LOG_LEVEL=0 &&'
95
+ ' export TPU_VMODULE=real_program_continuator=1 &&'
96
+ f' {args.command}'
97
+ )
98
+
99
+ gpu_workload_terminate_command = ''
100
+ if system.accelerator_type == AcceleratorType['GPU']:
101
+ gpu_workload_terminate_command = (
102
+ 'echo Main app is done > /usr/share/workload/workload_terminated; '
103
+ )
104
+
105
+ tpu_stacktrace_terminate_command = ''
106
+ if (
107
+ not args.use_pathways
108
+ and system.accelerator_type == AcceleratorType['TPU']
109
+ and args.deploy_stacktrace_sidecar
110
+ ):
111
+ tpu_stacktrace_terminate_command = (
112
+ 'touch /shared-volume/stacktrace_signal; '
113
+ )
114
+
115
+ yaml = """- name: {docker_name}
116
+ image: {docker_image}
117
+ {image_pull_policy}
118
+ env: {env}
119
+ ports:
120
+ {container_ports}
121
+ {jax_coordinator_port}
122
+ securityContext:
123
+ privileged: true
124
+ command:
125
+ - bash
126
+ - -c
127
+ - |
128
+ echo XPK Start: $(date);
129
+ _sigterm() (kill -SIGTERM $! 2>/dev/null;);
130
+ trap _sigterm SIGTERM;
131
+ {gsutil_test_command}
132
+ ({command}) & PID=$!;
133
+ while kill -0 $PID 2>/dev/null;
134
+ do sleep 5;
135
+ done;
136
+ wait $PID;
137
+ EXIT_CODE=$?;
138
+ {xpk_internal_commands}
139
+ echo XPK End: $(date);
140
+ echo EXIT_CODE=$EXIT_CODE;
141
+ {tpu_stacktrace_terminate_command}
142
+ {gpu_workload_terminate_command}
143
+ exit $EXIT_CODE
144
+ resources:
145
+ limits:
146
+ {resources}
147
+ """
148
+ volume_mounts = get_volume_mounts(args, system)
149
+ if volume_mounts != '':
150
+ yaml += """
151
+ volumeMounts:
152
+ {volume_mounts}
153
+ """
154
+ return yaml.format(
155
+ args=args,
156
+ system=system,
157
+ image_pull_policy=add_image_pull_policy_for_pw_or_gpu(args, system),
158
+ env=get_env_container(args, system),
159
+ container_ports=add_container_ports(args, system),
160
+ jax_coordinator_port=add_jax_coordinator_port(system),
161
+ docker_name=get_main_container_docker_image(args, system),
162
+ docker_image=docker_image,
163
+ gsutil_test_command=gsutil_test_command,
164
+ command=command,
165
+ tpu_stacktrace_terminate_command=tpu_stacktrace_terminate_command,
166
+ gpu_workload_terminate_command=gpu_workload_terminate_command,
167
+ xpk_internal_commands=xpk_internal_commands,
168
+ resources=get_main_container_resources(args, system, resource_type),
169
+ volume_mounts=volume_mounts,
170
+ )
171
+
172
+
173
+ def get_user_workload_container(args, system: SystemCharacteristics):
174
+ """Deploy user workload container
175
+
176
+ Args:
177
+ args: user provided args.
178
+ system: system characteristics.
179
+
180
+ Returns:
181
+ container: main container
182
+ debugging_dashboard_id: id of the GKE dashboard
183
+ """
184
+
185
+ setup_docker_image_code, docker_image = setup_docker_image(args)
186
+ if setup_docker_image_code != 0:
187
+ xpk_exit(setup_docker_image_code)
188
+
189
+ # Determine if we deploy a sidecar and if we deploy a container.
190
+ debugging_dashboard_id = None
191
+ resource_type = AcceleratorTypeToAcceleratorCharacteristics[
192
+ system.accelerator_type
193
+ ].resource_type
194
+ if (
195
+ not args.use_pathways
196
+ and system.accelerator_type == AcceleratorType['TPU']
197
+ and args.deploy_stacktrace_sidecar
198
+ ):
199
+ xpk_print(
200
+ 'Sidecar container to display stack traces for TPU workloads will also'
201
+ ' be deployed.'
202
+ )
203
+ container = get_main_and_sidecar_container(args, system, docker_image)
204
+ # Get GKE debugging dashboard only when sidecar container is deployed for TPU workloads
205
+ debugging_dashboard_id = get_gke_debugging_dashboard(args)
206
+ else:
207
+ container = get_main_container(args, system, docker_image, resource_type)
208
+ return container, debugging_dashboard_id
209
+
210
+
211
+ def get_main_container_docker_image(args, system: SystemCharacteristics) -> str:
212
+ """Docker name for the main container.
213
+ Args:
214
+ args: user provided args.
215
+ system: system characteristics.
216
+
217
+ Returns:
218
+ str:
219
+ Workload docker image as a YAML string
220
+ """
221
+
222
+ if system.accelerator_type == AcceleratorType['GPU']:
223
+ return 'gpu-image'
224
+
225
+ return f'{args.docker_name}'
@@ -0,0 +1,210 @@
1
+ """
2
+ Copyright 2025 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ import datetime
18
+ import os
19
+ import random
20
+ import string
21
+
22
+ from ..utils.console import xpk_exit, xpk_print
23
+ from ..utils.file import write_tmp_file
24
+ from .commands import run_command_with_updates
25
+
26
+ DEFAULT_DOCKER_IMAGE = 'python:3.10'
27
+ DEFAULT_SCRIPT_DIR = os.getcwd()
28
+ PLATFORM = 'linux/amd64'
29
+
30
+
31
+ def validate_docker_image(docker_image, args) -> int:
32
+ """Validates that the user provided docker image exists in your project.
33
+
34
+ Args:
35
+ docker_image: The docker image to verify.
36
+ args: user provided arguments for running the command.
37
+
38
+ Returns:
39
+ 0 if successful and 1 otherwise.
40
+ """
41
+
42
+ project = args.project
43
+
44
+ if not any(repo in docker_image for repo in ['gcr.io', 'docker.pkg.dev']):
45
+ return 0
46
+
47
+ command = (
48
+ f'gcloud container images describe {docker_image} --project {project}'
49
+ )
50
+ return_code = run_command_with_updates(
51
+ command, 'Validate Docker Image', args, verbose=False
52
+ )
53
+ if return_code != 0:
54
+ xpk_print(
55
+ 'Failed to validate your docker image, check that the docker image'
56
+ f' exists. You may be able to find the {docker_image} in {project}.'
57
+ ' If the docker image exists, the service account of this'
58
+ ' project maybe be missing the permissions to access the docker image.'
59
+ )
60
+ return return_code
61
+ else:
62
+ return 0
63
+
64
+
65
+ def build_docker_image_from_base_image(args, verbose=True) -> tuple[int, str]:
66
+ """Adds script dir to the base docker image and uploads the image.
67
+
68
+ Args:
69
+ args: user provided arguments for running the command.
70
+
71
+ Returns:
72
+ Tuple of:
73
+ 0 if successful and 1 otherwise.
74
+ Name of the Docker image created.
75
+ """
76
+
77
+ # Pick a name for the docker image.
78
+ docker_image_prefix = os.getenv('USER', 'unknown')
79
+ docker_name = f'{docker_image_prefix}-runner'
80
+
81
+ script_dir_dockerfile = """FROM {base_docker_image}
82
+
83
+ # Set the working directory in the container
84
+ WORKDIR /app
85
+
86
+ # Copy all files from local workspace into docker container
87
+ COPY . .
88
+
89
+ WORKDIR /app
90
+ """
91
+
92
+ docker_file = script_dir_dockerfile.format(
93
+ base_docker_image=args.base_docker_image,
94
+ )
95
+ tmp = write_tmp_file(docker_file)
96
+ docker_build_command = (
97
+ f'docker buildx build --platform={PLATFORM} -f {str(tmp.file.name)} -t'
98
+ f' {docker_name} {args.script_dir}'
99
+ )
100
+ xpk_print(f'Building {args.script_dir} into docker image.')
101
+ return_code = run_command_with_updates(
102
+ docker_build_command,
103
+ 'Building script_dir into docker image',
104
+ args,
105
+ verbose=verbose,
106
+ )
107
+ if return_code != 0:
108
+ xpk_print(
109
+ 'Failed to add script_dir to docker image, check the base docker image.'
110
+ f' You should be able to navigate to the URL {args.base_docker_image}'
111
+ f' in {args.project}.'
112
+ )
113
+ xpk_exit(1)
114
+
115
+ # Pick a randomly generated `tag_length` character docker tag.
116
+ tag_length = 4
117
+ tag_random_prefix = ''.join(
118
+ random.choices(string.ascii_lowercase, k=tag_length)
119
+ )
120
+ tag_datetime = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
121
+ tag_name = f'{tag_random_prefix}-{tag_datetime}'
122
+ cloud_docker_image = f'gcr.io/{args.project}/{docker_name}:{tag_name}'
123
+ xpk_print(f'Adding Docker Image: {cloud_docker_image} to {args.project}')
124
+
125
+ # Tag the docker image.
126
+ tag_docker_image_command = f'docker tag {docker_name} {cloud_docker_image}'
127
+ return_code = run_command_with_updates(
128
+ tag_docker_image_command, 'Tag Docker Image', args, verbose=verbose
129
+ )
130
+ if return_code != 0:
131
+ xpk_print(
132
+ f'Failed to tag docker image with tag: {tag_name}.'
133
+ f' You should be able to navigate to the URL {cloud_docker_image} in'
134
+ f' {args.project}.'
135
+ )
136
+ xpk_exit(1)
137
+
138
+ # Upload image to Artifact Registry.
139
+ upload_docker_image_command = f'docker push {cloud_docker_image}'
140
+ return_code = run_command_with_updates(
141
+ upload_docker_image_command, 'Upload Docker Image', args, verbose=verbose
142
+ )
143
+ if return_code != 0:
144
+ xpk_print(
145
+ 'Failed to upload docker image.'
146
+ f' You should be able to navigate to the URL {cloud_docker_image} in'
147
+ f' {args.project}.'
148
+ )
149
+ xpk_exit(1)
150
+ return return_code, cloud_docker_image
151
+
152
+
153
+ def setup_docker_image(args) -> tuple[int, str]:
154
+ """Does steps to verify docker args, check image, and build image (if asked).
155
+
156
+ Args:
157
+ args: user provided arguments for running the command.
158
+
159
+ Returns:
160
+ tuple:
161
+ 0 if successful and 1 otherwise.
162
+ Name of the docker image to use.
163
+ """
164
+ use_base_docker_image = use_base_docker_image_or_docker_image(args)
165
+
166
+ docker_image = args.base_docker_image
167
+ if use_base_docker_image:
168
+ validate_docker_image_code = validate_docker_image(docker_image, args)
169
+ if validate_docker_image_code != 0:
170
+ xpk_exit(validate_docker_image_code)
171
+ build_docker_image_code, docker_image = build_docker_image_from_base_image(
172
+ args
173
+ )
174
+ if build_docker_image_code != 0:
175
+ xpk_exit(build_docker_image_code)
176
+ else:
177
+ docker_image = args.docker_image
178
+ validate_docker_image_code = validate_docker_image(args.docker_image, args)
179
+ if validate_docker_image_code != 0:
180
+ xpk_exit(validate_docker_image_code)
181
+
182
+ return 0, docker_image
183
+
184
+
185
+ def use_base_docker_image_or_docker_image(args) -> bool:
186
+ """Checks for correct docker image arguments.
187
+
188
+ Args:
189
+ args: user provided arguments for running the command.
190
+
191
+ Returns:
192
+ True if intended to use base docker image, False to use docker image.
193
+ """
194
+ use_base_docker_image = True
195
+ # Check if (base_docker_image and script_dir) or (docker_image) is set.
196
+ if args.docker_image is not None:
197
+ if args.script_dir is not DEFAULT_SCRIPT_DIR:
198
+ xpk_print(
199
+ '`--script-dir` and --docker-image can not be used together. Please'
200
+ ' see `--help` command for more details.'
201
+ )
202
+ xpk_exit(1)
203
+ if args.base_docker_image is not DEFAULT_DOCKER_IMAGE:
204
+ xpk_print(
205
+ '`--base-docker-image` and --docker-image can not be used together.'
206
+ ' Please see `--help` command for more details.'
207
+ )
208
+ xpk_exit(1)
209
+ use_base_docker_image = False
210
+ return use_base_docker_image