viettelcloud-aiplatform 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. viettelcloud/__init__.py +1 -0
  2. viettelcloud/aiplatform/__init__.py +15 -0
  3. viettelcloud/aiplatform/common/__init__.py +0 -0
  4. viettelcloud/aiplatform/common/constants.py +22 -0
  5. viettelcloud/aiplatform/common/types.py +28 -0
  6. viettelcloud/aiplatform/common/utils.py +40 -0
  7. viettelcloud/aiplatform/hub/OWNERS +14 -0
  8. viettelcloud/aiplatform/hub/__init__.py +25 -0
  9. viettelcloud/aiplatform/hub/api/__init__.py +13 -0
  10. viettelcloud/aiplatform/hub/api/_proxy_client.py +355 -0
  11. viettelcloud/aiplatform/hub/api/model_registry_client.py +561 -0
  12. viettelcloud/aiplatform/hub/api/model_registry_client_test.py +462 -0
  13. viettelcloud/aiplatform/optimizer/__init__.py +45 -0
  14. viettelcloud/aiplatform/optimizer/api/__init__.py +0 -0
  15. viettelcloud/aiplatform/optimizer/api/optimizer_client.py +248 -0
  16. viettelcloud/aiplatform/optimizer/backends/__init__.py +13 -0
  17. viettelcloud/aiplatform/optimizer/backends/base.py +77 -0
  18. viettelcloud/aiplatform/optimizer/backends/kubernetes/__init__.py +13 -0
  19. viettelcloud/aiplatform/optimizer/backends/kubernetes/backend.py +563 -0
  20. viettelcloud/aiplatform/optimizer/backends/kubernetes/utils.py +112 -0
  21. viettelcloud/aiplatform/optimizer/constants/__init__.py +13 -0
  22. viettelcloud/aiplatform/optimizer/constants/constants.py +59 -0
  23. viettelcloud/aiplatform/optimizer/types/__init__.py +13 -0
  24. viettelcloud/aiplatform/optimizer/types/algorithm_types.py +87 -0
  25. viettelcloud/aiplatform/optimizer/types/optimization_types.py +135 -0
  26. viettelcloud/aiplatform/optimizer/types/search_types.py +95 -0
  27. viettelcloud/aiplatform/py.typed +0 -0
  28. viettelcloud/aiplatform/trainer/__init__.py +82 -0
  29. viettelcloud/aiplatform/trainer/api/__init__.py +3 -0
  30. viettelcloud/aiplatform/trainer/api/trainer_client.py +277 -0
  31. viettelcloud/aiplatform/trainer/api/trainer_client_test.py +72 -0
  32. viettelcloud/aiplatform/trainer/backends/__init__.py +0 -0
  33. viettelcloud/aiplatform/trainer/backends/base.py +94 -0
  34. viettelcloud/aiplatform/trainer/backends/container/adapters/base.py +195 -0
  35. viettelcloud/aiplatform/trainer/backends/container/adapters/docker.py +231 -0
  36. viettelcloud/aiplatform/trainer/backends/container/adapters/podman.py +258 -0
  37. viettelcloud/aiplatform/trainer/backends/container/backend.py +668 -0
  38. viettelcloud/aiplatform/trainer/backends/container/backend_test.py +867 -0
  39. viettelcloud/aiplatform/trainer/backends/container/runtime_loader.py +631 -0
  40. viettelcloud/aiplatform/trainer/backends/container/runtime_loader_test.py +637 -0
  41. viettelcloud/aiplatform/trainer/backends/container/types.py +67 -0
  42. viettelcloud/aiplatform/trainer/backends/container/utils.py +213 -0
  43. viettelcloud/aiplatform/trainer/backends/kubernetes/__init__.py +0 -0
  44. viettelcloud/aiplatform/trainer/backends/kubernetes/backend.py +710 -0
  45. viettelcloud/aiplatform/trainer/backends/kubernetes/backend_test.py +1344 -0
  46. viettelcloud/aiplatform/trainer/backends/kubernetes/constants.py +15 -0
  47. viettelcloud/aiplatform/trainer/backends/kubernetes/utils.py +636 -0
  48. viettelcloud/aiplatform/trainer/backends/kubernetes/utils_test.py +582 -0
  49. viettelcloud/aiplatform/trainer/backends/localprocess/__init__.py +0 -0
  50. viettelcloud/aiplatform/trainer/backends/localprocess/backend.py +306 -0
  51. viettelcloud/aiplatform/trainer/backends/localprocess/backend_test.py +501 -0
  52. viettelcloud/aiplatform/trainer/backends/localprocess/constants.py +90 -0
  53. viettelcloud/aiplatform/trainer/backends/localprocess/job.py +184 -0
  54. viettelcloud/aiplatform/trainer/backends/localprocess/types.py +52 -0
  55. viettelcloud/aiplatform/trainer/backends/localprocess/utils.py +302 -0
  56. viettelcloud/aiplatform/trainer/constants/__init__.py +0 -0
  57. viettelcloud/aiplatform/trainer/constants/constants.py +179 -0
  58. viettelcloud/aiplatform/trainer/options/__init__.py +52 -0
  59. viettelcloud/aiplatform/trainer/options/common.py +55 -0
  60. viettelcloud/aiplatform/trainer/options/kubernetes.py +502 -0
  61. viettelcloud/aiplatform/trainer/options/kubernetes_test.py +259 -0
  62. viettelcloud/aiplatform/trainer/options/localprocess.py +20 -0
  63. viettelcloud/aiplatform/trainer/test/common.py +22 -0
  64. viettelcloud/aiplatform/trainer/types/__init__.py +0 -0
  65. viettelcloud/aiplatform/trainer/types/types.py +517 -0
  66. viettelcloud/aiplatform/trainer/types/types_test.py +115 -0
  67. viettelcloud_aiplatform-0.3.0.dist-info/METADATA +226 -0
  68. viettelcloud_aiplatform-0.3.0.dist-info/RECORD +71 -0
  69. viettelcloud_aiplatform-0.3.0.dist-info/WHEEL +4 -0
  70. viettelcloud_aiplatform-0.3.0.dist-info/licenses/LICENSE +201 -0
  71. viettelcloud_aiplatform-0.3.0.dist-info/licenses/NOTICE +36 -0
@@ -0,0 +1,502 @@
1
+ # Copyright 2025 The Kubeflow Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Kubernetes-specific training options for the Kubeflow Trainer SDK."""
16
+
17
+ from dataclasses import dataclass
18
+ from typing import Any, Optional, Union
19
+
20
+ from viettelcloud.aiplatform.trainer.backends.base import RuntimeBackend
21
+ from viettelcloud.aiplatform.trainer.types.types import (
22
+ BuiltinTrainer,
23
+ CustomTrainer,
24
+ CustomTrainerContainer,
25
+ )
26
+
27
+
28
+ @dataclass
29
+ class ContainerOverride:
30
+ """Configuration for overriding a specific container in a pod.
31
+
32
+ Args:
33
+ name: Name of the container to override (must exist in TrainingRuntime).
34
+ env: Environment variables to add/merge with the container.
35
+ Each dict should have 'name' and 'value' or 'valueFrom' keys.
36
+ volume_mounts: Volume mounts to add/merge with the container.
37
+ Each dict should have 'name' and 'mountPath' keys at minimum.
38
+ """
39
+
40
+ name: str
41
+ env: Optional[list[dict]] = None
42
+ volume_mounts: Optional[list[dict]] = None
43
+
44
+ def __post_init__(self):
45
+ """Validate the container override configuration."""
46
+ # Validate container name
47
+ if not self.name or not self.name.strip():
48
+ raise ValueError("Container name must be a non-empty string")
49
+
50
+ if self.env is not None:
51
+ if not isinstance(self.env, list):
52
+ raise ValueError("env must be a list of dictionaries")
53
+ for env_var in self.env:
54
+ if not isinstance(env_var, dict):
55
+ raise ValueError("Each env entry must be a dictionary")
56
+ if "name" not in env_var:
57
+ raise ValueError("Each env entry must have a 'name' key")
58
+ if not env_var.get("name"):
59
+ raise ValueError("env 'name' must be a non-empty string")
60
+ if "value" not in env_var and "valueFrom" not in env_var:
61
+ raise ValueError("Each env entry must have either 'value' or 'valueFrom' key")
62
+ # Validate valueFrom structure if present
63
+ if "valueFrom" in env_var:
64
+ value_from = env_var["valueFrom"]
65
+ if not isinstance(value_from, dict):
66
+ raise ValueError("env 'valueFrom' must be a dictionary")
67
+ # valueFrom must have one of these keys
68
+ valid_keys = {"configMapKeyRef", "secretKeyRef", "fieldRef", "resourceFieldRef"}
69
+ if not any(key in value_from for key in valid_keys):
70
+ raise ValueError(
71
+ f"env 'valueFrom' must contain one of: {', '.join(valid_keys)}"
72
+ )
73
+
74
+ if self.volume_mounts is not None:
75
+ if not isinstance(self.volume_mounts, list):
76
+ raise ValueError("volume_mounts must be a list of dictionaries")
77
+ for mount in self.volume_mounts:
78
+ if not isinstance(mount, dict):
79
+ raise ValueError("Each volume_mounts entry must be a dictionary")
80
+ if "name" not in mount:
81
+ raise ValueError("Each volume_mounts entry must have a 'name' key")
82
+ if not mount.get("name"):
83
+ raise ValueError("volume_mounts 'name' must be a non-empty string")
84
+ if "mountPath" not in mount:
85
+ raise ValueError("Each volume_mounts entry must have a 'mountPath' key")
86
+ mount_path = mount.get("mountPath")
87
+ if not mount_path or not isinstance(mount_path, str):
88
+ raise ValueError("volume_mounts 'mountPath' must be a non-empty string")
89
+ if not mount_path.startswith("/"):
90
+ raise ValueError(
91
+ f"volume_mounts 'mountPath' must be an absolute path "
92
+ f"(start with /): {mount_path}"
93
+ )
94
+
95
+
96
+ @dataclass
97
+ class PodSpecOverride:
98
+ """Configuration for overriding pod template specifications.
99
+
100
+ Args:
101
+ service_account_name: Service account to use for the pods.
102
+ node_selector: Node selector to place pods on specific nodes.
103
+ affinity: Affinity rules for pod scheduling.
104
+ tolerations: Tolerations for pod scheduling.
105
+ volumes: Volumes to add/merge with the pod.
106
+ init_containers: Init containers to add/merge with the pod.
107
+ containers: Containers to add/merge with the pod.
108
+ scheduling_gates: Scheduling gates for the pods.
109
+ image_pull_secrets: Image pull secrets for the pods.
110
+ """
111
+
112
+ service_account_name: Optional[str] = None
113
+ node_selector: Optional[dict[str, str]] = None
114
+ affinity: Optional[dict] = None
115
+ tolerations: Optional[list[dict]] = None
116
+ volumes: Optional[list[dict]] = None
117
+ init_containers: Optional[list[ContainerOverride]] = None
118
+ containers: Optional[list[ContainerOverride]] = None
119
+ scheduling_gates: Optional[list[dict]] = None
120
+ image_pull_secrets: Optional[list[dict]] = None
121
+
122
+
123
+ @dataclass
124
+ class PodTemplateOverride:
125
+ """Configuration for overriding pod templates for specific job types.
126
+
127
+ Args:
128
+ target_jobs: List of job names to apply the overrides to (e.g., ["node", "launcher"]).
129
+ metadata: Metadata overrides for the pod template (labels, annotations).
130
+ spec: Spec overrides for the pod template.
131
+ """
132
+
133
+ target_jobs: list[str]
134
+ metadata: Optional[dict] = None
135
+ spec: Optional[PodSpecOverride] = None
136
+
137
+
138
+ @dataclass
139
+ class Labels:
140
+ """Add labels to the TrainJob resource metadata (.metadata.labels).
141
+
142
+ Supported backends:
143
+ - Kubernetes
144
+
145
+ Args:
146
+ labels: Dictionary of label key-value pairs to add to TrainJob metadata.
147
+ """
148
+
149
+ labels: dict[str, str]
150
+
151
+ def __call__(
152
+ self,
153
+ job_spec: dict[str, Any],
154
+ trainer: Optional[Union[CustomTrainer, BuiltinTrainer]],
155
+ backend: RuntimeBackend,
156
+ ) -> None:
157
+ """Apply labels to the job specification.
158
+
159
+ Args:
160
+ job_spec: Job specification dictionary to modify.
161
+ trainer: Optional trainer instance for context.
162
+ backend: Backend instance for validation.
163
+
164
+ Raises:
165
+ ValueError: If backend does not support labels.
166
+ """
167
+ from viettelcloud.aiplatform.trainer.backends.kubernetes.backend import KubernetesBackend
168
+
169
+ if not isinstance(backend, KubernetesBackend):
170
+ raise ValueError(
171
+ f"Labels option is not compatible with {type(backend).__name__}. "
172
+ f"Supported backends: KubernetesBackend"
173
+ )
174
+
175
+ metadata = job_spec.setdefault("metadata", {})
176
+ metadata["labels"] = self.labels
177
+
178
+
179
+ @dataclass
180
+ class Annotations:
181
+ """Add annotations to the TrainJob resource metadata (.metadata.annotations).
182
+
183
+ Supported backends:
184
+ - Kubernetes
185
+
186
+ Args:
187
+ annotations: Dictionary of annotation key-value pairs to add to TrainJob metadata.
188
+ """
189
+
190
+ annotations: dict[str, str]
191
+
192
+ def __call__(
193
+ self,
194
+ job_spec: dict[str, Any],
195
+ trainer: Optional[Union[CustomTrainer, BuiltinTrainer]],
196
+ backend: RuntimeBackend,
197
+ ) -> None:
198
+ """Apply annotations to the job specification.
199
+
200
+ Args:
201
+ job_spec: Job specification dictionary to modify.
202
+ trainer: Optional trainer instance for context.
203
+ backend: Backend instance for validation.
204
+
205
+ Raises:
206
+ ValueError: If backend does not support annotations.
207
+ """
208
+ from viettelcloud.aiplatform.trainer.backends.kubernetes.backend import KubernetesBackend
209
+
210
+ if not isinstance(backend, KubernetesBackend):
211
+ raise ValueError(
212
+ f"Annotations option is not compatible with {type(backend).__name__}. "
213
+ f"Supported backends: KubernetesBackend"
214
+ )
215
+
216
+ metadata = job_spec.setdefault("metadata", {})
217
+ metadata["annotations"] = self.annotations
218
+
219
+
220
+ @dataclass
221
+ class SpecLabels:
222
+ """Add labels to derivative JobSet and Jobs (.spec.labels).
223
+
224
+ These labels will be merged with the TrainingRuntime values and applied to
225
+ the JobSet and Jobs created by the TrainJob.
226
+
227
+ Supported backends:
228
+ - Kubernetes
229
+
230
+ Args:
231
+ labels: Dictionary of label key-value pairs to add to JobSet and Jobs.
232
+ """
233
+
234
+ labels: dict[str, str]
235
+
236
+ def __call__(
237
+ self,
238
+ job_spec: dict[str, Any],
239
+ trainer: Optional[Union[CustomTrainer, BuiltinTrainer]],
240
+ backend: RuntimeBackend,
241
+ ) -> None:
242
+ """Apply spec-level labels to the job specification.
243
+
244
+ Args:
245
+ job_spec: Job specification dictionary to modify.
246
+ trainer: Optional trainer instance for context.
247
+ backend: Backend instance for validation.
248
+
249
+ Raises:
250
+ ValueError: If backend does not support spec labels.
251
+ """
252
+ from viettelcloud.aiplatform.trainer.backends.kubernetes.backend import KubernetesBackend
253
+
254
+ if not isinstance(backend, KubernetesBackend):
255
+ raise ValueError(
256
+ f"SpecLabels option is not compatible with {type(backend).__name__}. "
257
+ f"Supported backends: KubernetesBackend"
258
+ )
259
+
260
+ spec = job_spec.setdefault("spec", {})
261
+ spec["labels"] = self.labels
262
+
263
+
264
+ @dataclass
265
+ class SpecAnnotations:
266
+ """Add annotations to derivative JobSet and Jobs (.spec.annotations).
267
+
268
+ These annotations will be merged with the TrainingRuntime values and applied to
269
+ the JobSet and Jobs created by the TrainJob.
270
+
271
+ Supported backends:
272
+ - Kubernetes
273
+
274
+ Args:
275
+ annotations: Dictionary of annotation key-value pairs to add to JobSet and Jobs.
276
+ """
277
+
278
+ annotations: dict[str, str]
279
+
280
+ def __call__(
281
+ self,
282
+ job_spec: dict[str, Any],
283
+ trainer: Optional[Union[CustomTrainer, BuiltinTrainer]],
284
+ backend: RuntimeBackend,
285
+ ) -> None:
286
+ """Apply spec-level annotations to the job specification.
287
+
288
+ Args:
289
+ job_spec: Job specification dictionary to modify.
290
+ trainer: Optional trainer instance for context.
291
+ backend: Backend instance for validation.
292
+
293
+ Raises:
294
+ ValueError: If backend does not support spec annotations.
295
+ """
296
+ from viettelcloud.aiplatform.trainer.backends.kubernetes.backend import KubernetesBackend
297
+
298
+ if not isinstance(backend, KubernetesBackend):
299
+ raise ValueError(
300
+ f"SpecAnnotations option is not compatible with {type(backend).__name__}. "
301
+ f"Supported backends: KubernetesBackend"
302
+ )
303
+
304
+ spec = job_spec.setdefault("spec", {})
305
+ spec["annotations"] = self.annotations
306
+
307
+
308
+ class PodTemplateOverrides:
309
+ """Add pod template overrides to the TrainJob (.spec.podTemplateOverrides).
310
+
311
+ Supported backends:
312
+ - Kubernetes
313
+
314
+ Args:
315
+ *overrides: One or more PodTemplateOverride objects.
316
+ """
317
+
318
+ def __init__(self, *overrides: PodTemplateOverride):
319
+ """Initialize with variable number of PodTemplateOverride objects."""
320
+ if not overrides:
321
+ raise ValueError("At least one PodTemplateOverride must be provided")
322
+ self.pod_overrides = list(overrides)
323
+
324
+ def __call__(
325
+ self,
326
+ job_spec: dict[str, Any],
327
+ trainer: Optional[Union[CustomTrainer, BuiltinTrainer]],
328
+ backend: RuntimeBackend,
329
+ ) -> None:
330
+ """Apply pod template overrides to the job specification.
331
+
332
+ Args:
333
+ job_spec: Job specification dictionary to modify.
334
+ trainer: Optional trainer instance for context.
335
+ backend: Backend instance for validation.
336
+
337
+ Raises:
338
+ ValueError: If backend does not support pod template overrides.
339
+ """
340
+ from viettelcloud.aiplatform.trainer.backends.kubernetes.backend import KubernetesBackend
341
+
342
+ if not isinstance(backend, KubernetesBackend):
343
+ raise ValueError(
344
+ f"PodTemplateOverrides option is not compatible with {type(backend).__name__}. "
345
+ f"Supported backends: KubernetesBackend"
346
+ )
347
+ spec = job_spec.setdefault("spec", {})
348
+ pod_overrides = spec.setdefault("podTemplateOverrides", [])
349
+
350
+ for override in self.pod_overrides:
351
+ api_override = {"targetJobs": [{"name": job} for job in override.target_jobs]}
352
+
353
+ if override.metadata:
354
+ api_override["metadata"] = override.metadata
355
+
356
+ if override.spec:
357
+ spec_dict = {}
358
+
359
+ if override.spec.service_account_name:
360
+ spec_dict["serviceAccountName"] = override.spec.service_account_name
361
+ if override.spec.node_selector:
362
+ spec_dict["nodeSelector"] = override.spec.node_selector
363
+ if override.spec.affinity:
364
+ spec_dict["affinity"] = override.spec.affinity
365
+ if override.spec.tolerations:
366
+ spec_dict["tolerations"] = override.spec.tolerations
367
+ if override.spec.volumes:
368
+ spec_dict["volumes"] = override.spec.volumes
369
+ if override.spec.scheduling_gates:
370
+ spec_dict["schedulingGates"] = override.spec.scheduling_gates
371
+ if override.spec.image_pull_secrets:
372
+ spec_dict["imagePullSecrets"] = override.spec.image_pull_secrets
373
+
374
+ # Handle container overrides
375
+ if override.spec.init_containers:
376
+ spec_dict["initContainers"] = []
377
+ for container in override.spec.init_containers:
378
+ container_dict = {"name": container.name}
379
+ if container.env:
380
+ container_dict["env"] = container.env
381
+ if container.volume_mounts:
382
+ container_dict["volumeMounts"] = container.volume_mounts
383
+ spec_dict["initContainers"].append(container_dict)
384
+
385
+ if override.spec.containers:
386
+ spec_dict["containers"] = []
387
+ for container in override.spec.containers:
388
+ container_dict = {"name": container.name}
389
+ if container.env:
390
+ container_dict["env"] = container.env
391
+ if container.volume_mounts:
392
+ container_dict["volumeMounts"] = container.volume_mounts
393
+ spec_dict["containers"].append(container_dict)
394
+
395
+ if spec_dict:
396
+ api_override["spec"] = spec_dict
397
+
398
+ pod_overrides.append(api_override)
399
+
400
+
401
+ @dataclass
402
+ class TrainerCommand:
403
+ """Override the trainer container command (.spec.trainer.command).
404
+
405
+ Can only be used with CustomTrainerContainer. CustomTrainer generates its own
406
+ command from the function, and BuiltinTrainer uses pre-configured commands.
407
+
408
+ Supported backends:
409
+ - Kubernetes
410
+
411
+ Args:
412
+ command: List of command strings to override the default trainer command.
413
+ """
414
+
415
+ command: list[str]
416
+
417
+ def __call__(
418
+ self,
419
+ job_spec: dict[str, Any],
420
+ trainer: Optional[Union[CustomTrainer, BuiltinTrainer, CustomTrainerContainer]],
421
+ backend: RuntimeBackend,
422
+ ) -> None:
423
+ """Apply trainer command override to the job specification.
424
+
425
+ Args:
426
+ job_spec: The job specification to modify.
427
+ trainer: Optional trainer context for validation.
428
+ backend: Backend instance for validation.
429
+
430
+ Raises:
431
+ ValueError: If backend doesn't support or trainer type conflicts.
432
+ """
433
+ from viettelcloud.aiplatform.trainer.backends.kubernetes.backend import KubernetesBackend
434
+
435
+ if not isinstance(backend, KubernetesBackend):
436
+ raise ValueError(
437
+ f"TrainerCommand option is not compatible with {type(backend).__name__}. "
438
+ f"Supported backends: KubernetesBackend"
439
+ )
440
+
441
+ if trainer is not None and not isinstance(trainer, CustomTrainerContainer):
442
+ raise ValueError(
443
+ "TrainerCommand can only be used with CustomTrainerContainer. "
444
+ "CustomTrainer generates its own command from the function, and "
445
+ "BuiltinTrainer uses pre-configured commands."
446
+ )
447
+
448
+ spec = job_spec.setdefault("spec", {})
449
+ trainer_spec = spec.setdefault("trainer", {})
450
+ trainer_spec["command"] = self.command
451
+
452
+
453
+ @dataclass
454
+ class TrainerArgs:
455
+ """Override the trainer container arguments (.spec.trainer.args).
456
+
457
+ Can only be used with CustomTrainerContainer. CustomTrainer generates its own
458
+ arguments from the function, and BuiltinTrainer uses pre-configured arguments.
459
+
460
+ Supported backends:
461
+ - Kubernetes
462
+
463
+ Args:
464
+ args: List of argument strings to override the default trainer arguments.
465
+ """
466
+
467
+ args: list[str]
468
+
469
+ def __call__(
470
+ self,
471
+ job_spec: dict[str, Any],
472
+ trainer: Optional[Union[CustomTrainer, BuiltinTrainer, CustomTrainerContainer]],
473
+ backend: RuntimeBackend,
474
+ ) -> None:
475
+ """Apply trainer args override to the job specification.
476
+
477
+ Args:
478
+ job_spec: The job specification to modify.
479
+ trainer: Optional trainer context for validation.
480
+ backend: Backend instance for validation.
481
+
482
+ Raises:
483
+ ValueError: If backend doesn't support or trainer type conflicts.
484
+ """
485
+ from viettelcloud.aiplatform.trainer.backends.kubernetes.backend import KubernetesBackend
486
+
487
+ if not isinstance(backend, KubernetesBackend):
488
+ raise ValueError(
489
+ f"TrainerArgs option is not compatible with {type(backend).__name__}. "
490
+ f"Supported backends: KubernetesBackend"
491
+ )
492
+
493
+ if trainer is not None and not isinstance(trainer, CustomTrainerContainer):
494
+ raise ValueError(
495
+ "TrainerArgs can only be used with CustomTrainerContainer. "
496
+ "CustomTrainer generates its own arguments from the function, and "
497
+ "BuiltinTrainer uses pre-configured arguments."
498
+ )
499
+
500
+ spec = job_spec.setdefault("spec", {})
501
+ trainer_spec = spec.setdefault("trainer", {})
502
+ trainer_spec["args"] = self.args