viettelcloud-aiplatform 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. viettelcloud/__init__.py +1 -0
  2. viettelcloud/aiplatform/__init__.py +15 -0
  3. viettelcloud/aiplatform/common/__init__.py +0 -0
  4. viettelcloud/aiplatform/common/constants.py +22 -0
  5. viettelcloud/aiplatform/common/types.py +28 -0
  6. viettelcloud/aiplatform/common/utils.py +40 -0
  7. viettelcloud/aiplatform/hub/OWNERS +14 -0
  8. viettelcloud/aiplatform/hub/__init__.py +25 -0
  9. viettelcloud/aiplatform/hub/api/__init__.py +13 -0
  10. viettelcloud/aiplatform/hub/api/_proxy_client.py +355 -0
  11. viettelcloud/aiplatform/hub/api/model_registry_client.py +561 -0
  12. viettelcloud/aiplatform/hub/api/model_registry_client_test.py +462 -0
  13. viettelcloud/aiplatform/optimizer/__init__.py +45 -0
  14. viettelcloud/aiplatform/optimizer/api/__init__.py +0 -0
  15. viettelcloud/aiplatform/optimizer/api/optimizer_client.py +248 -0
  16. viettelcloud/aiplatform/optimizer/backends/__init__.py +13 -0
  17. viettelcloud/aiplatform/optimizer/backends/base.py +77 -0
  18. viettelcloud/aiplatform/optimizer/backends/kubernetes/__init__.py +13 -0
  19. viettelcloud/aiplatform/optimizer/backends/kubernetes/backend.py +563 -0
  20. viettelcloud/aiplatform/optimizer/backends/kubernetes/utils.py +112 -0
  21. viettelcloud/aiplatform/optimizer/constants/__init__.py +13 -0
  22. viettelcloud/aiplatform/optimizer/constants/constants.py +59 -0
  23. viettelcloud/aiplatform/optimizer/types/__init__.py +13 -0
  24. viettelcloud/aiplatform/optimizer/types/algorithm_types.py +87 -0
  25. viettelcloud/aiplatform/optimizer/types/optimization_types.py +135 -0
  26. viettelcloud/aiplatform/optimizer/types/search_types.py +95 -0
  27. viettelcloud/aiplatform/py.typed +0 -0
  28. viettelcloud/aiplatform/trainer/__init__.py +82 -0
  29. viettelcloud/aiplatform/trainer/api/__init__.py +3 -0
  30. viettelcloud/aiplatform/trainer/api/trainer_client.py +277 -0
  31. viettelcloud/aiplatform/trainer/api/trainer_client_test.py +72 -0
  32. viettelcloud/aiplatform/trainer/backends/__init__.py +0 -0
  33. viettelcloud/aiplatform/trainer/backends/base.py +94 -0
  34. viettelcloud/aiplatform/trainer/backends/container/adapters/base.py +195 -0
  35. viettelcloud/aiplatform/trainer/backends/container/adapters/docker.py +231 -0
  36. viettelcloud/aiplatform/trainer/backends/container/adapters/podman.py +258 -0
  37. viettelcloud/aiplatform/trainer/backends/container/backend.py +668 -0
  38. viettelcloud/aiplatform/trainer/backends/container/backend_test.py +867 -0
  39. viettelcloud/aiplatform/trainer/backends/container/runtime_loader.py +631 -0
  40. viettelcloud/aiplatform/trainer/backends/container/runtime_loader_test.py +637 -0
  41. viettelcloud/aiplatform/trainer/backends/container/types.py +67 -0
  42. viettelcloud/aiplatform/trainer/backends/container/utils.py +213 -0
  43. viettelcloud/aiplatform/trainer/backends/kubernetes/__init__.py +0 -0
  44. viettelcloud/aiplatform/trainer/backends/kubernetes/backend.py +710 -0
  45. viettelcloud/aiplatform/trainer/backends/kubernetes/backend_test.py +1344 -0
  46. viettelcloud/aiplatform/trainer/backends/kubernetes/constants.py +15 -0
  47. viettelcloud/aiplatform/trainer/backends/kubernetes/utils.py +636 -0
  48. viettelcloud/aiplatform/trainer/backends/kubernetes/utils_test.py +582 -0
  49. viettelcloud/aiplatform/trainer/backends/localprocess/__init__.py +0 -0
  50. viettelcloud/aiplatform/trainer/backends/localprocess/backend.py +306 -0
  51. viettelcloud/aiplatform/trainer/backends/localprocess/backend_test.py +501 -0
  52. viettelcloud/aiplatform/trainer/backends/localprocess/constants.py +90 -0
  53. viettelcloud/aiplatform/trainer/backends/localprocess/job.py +184 -0
  54. viettelcloud/aiplatform/trainer/backends/localprocess/types.py +52 -0
  55. viettelcloud/aiplatform/trainer/backends/localprocess/utils.py +302 -0
  56. viettelcloud/aiplatform/trainer/constants/__init__.py +0 -0
  57. viettelcloud/aiplatform/trainer/constants/constants.py +179 -0
  58. viettelcloud/aiplatform/trainer/options/__init__.py +52 -0
  59. viettelcloud/aiplatform/trainer/options/common.py +55 -0
  60. viettelcloud/aiplatform/trainer/options/kubernetes.py +502 -0
  61. viettelcloud/aiplatform/trainer/options/kubernetes_test.py +259 -0
  62. viettelcloud/aiplatform/trainer/options/localprocess.py +20 -0
  63. viettelcloud/aiplatform/trainer/test/common.py +22 -0
  64. viettelcloud/aiplatform/trainer/types/__init__.py +0 -0
  65. viettelcloud/aiplatform/trainer/types/types.py +517 -0
  66. viettelcloud/aiplatform/trainer/types/types_test.py +115 -0
  67. viettelcloud_aiplatform-0.3.0.dist-info/METADATA +226 -0
  68. viettelcloud_aiplatform-0.3.0.dist-info/RECORD +71 -0
  69. viettelcloud_aiplatform-0.3.0.dist-info/WHEEL +4 -0
  70. viettelcloud_aiplatform-0.3.0.dist-info/licenses/LICENSE +201 -0
  71. viettelcloud_aiplatform-0.3.0.dist-info/licenses/NOTICE +36 -0
@@ -0,0 +1,248 @@
1
+ # Copyright 2025 The Kubeflow Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from collections.abc import Callable, Iterator
16
+ import logging
17
+ from typing import Any, Optional
18
+
19
+ from viettelcloud.aiplatform.common.types import KubernetesBackendConfig
20
+ from viettelcloud.aiplatform.optimizer.backends.kubernetes.backend import KubernetesBackend
21
+ from viettelcloud.aiplatform.optimizer.constants import constants
22
+ from viettelcloud.aiplatform.optimizer.types.algorithm_types import BaseAlgorithm
23
+ from viettelcloud.aiplatform.optimizer.types.optimization_types import (
24
+ Objective,
25
+ OptimizationJob,
26
+ Result,
27
+ TrialConfig,
28
+ )
29
+ from viettelcloud.aiplatform.trainer.types.types import Event, TrainJobTemplate
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ class OptimizerClient:
35
+ def __init__(
36
+ self,
37
+ backend_config: Optional[KubernetesBackendConfig] = None,
38
+ ):
39
+ """Initialize a Kubeflow Optimizer client.
40
+
41
+ Args:
42
+ backend_config: Backend configuration. Either KubernetesBackendConfig or None to use
43
+ default config class. Defaults to KubernetesBackendConfig.
44
+
45
+ Raises:
46
+ ValueError: Invalid backend configuration.
47
+
48
+ """
49
+ # Set the default backend config.
50
+ if not backend_config:
51
+ backend_config = KubernetesBackendConfig()
52
+
53
+ if isinstance(backend_config, KubernetesBackendConfig):
54
+ self.backend = KubernetesBackend(backend_config)
55
+ else:
56
+ raise ValueError(f"Invalid backend config '{backend_config}'")
57
+
58
+ def optimize(
59
+ self,
60
+ trial_template: TrainJobTemplate,
61
+ *,
62
+ trial_config: Optional[TrialConfig] = None,
63
+ search_space: dict[str, Any],
64
+ objectives: Optional[list[Objective]] = None,
65
+ algorithm: Optional[BaseAlgorithm] = None,
66
+ ) -> str:
67
+ """Create an OptimizationJob for hyperparameter tuning.
68
+
69
+ Args:
70
+ trial_template: The TrainJob template defining the training script.
71
+ trial_config: Optional configuration to run Trials.
72
+ objectives: List of objectives to optimize.
73
+ search_space: Dictionary mapping parameter names to Search specifications using
74
+ Search.uniform(), Search.loguniform(), Search.choice(), etc.
75
+ algorithm: The optimization algorithm to use. Defaults to RandomSearch.
76
+
77
+ Returns:
78
+ The unique name of the Experiment that has been generated.
79
+
80
+ Raises:
81
+ ValueError: Input arguments are invalid.
82
+ TimeoutError: Timeout to create Experiment.
83
+ RuntimeError: Failed to create Experiment.
84
+ """
85
+ return self.backend.optimize(
86
+ trial_template=trial_template,
87
+ trial_config=trial_config,
88
+ objectives=objectives,
89
+ search_space=search_space,
90
+ algorithm=algorithm,
91
+ )
92
+
93
+ def list_jobs(self) -> list[OptimizationJob]:
94
+ """List of the created OptimizationJobs
95
+
96
+ Returns:
97
+ List of created OptimizationJobs. If no OptimizationJob exist,
98
+ an empty list is returned.
99
+
100
+ Raises:
101
+ TimeoutError: Timeout to list OptimizationJobs.
102
+ RuntimeError: Failed to list OptimizationJobs.
103
+ """
104
+
105
+ return self.backend.list_jobs()
106
+
107
+ def get_job(self, name: str) -> OptimizationJob:
108
+ """Get the OptimizationJob object
109
+
110
+ Args:
111
+ name: Name of the OptimizationJob.
112
+
113
+ Returns:
114
+ A OptimizationJob object.
115
+
116
+ Raises:
117
+ TimeoutError: Timeout to get a OptimizationJob.
118
+ RuntimeError: Failed to get a OptimizationJob.
119
+ """
120
+
121
+ return self.backend.get_job(name=name)
122
+
123
+ def get_job_logs(
124
+ self,
125
+ name: str,
126
+ trial_name: Optional[str] = None,
127
+ follow: bool = False,
128
+ ) -> Iterator[str]:
129
+ """Get logs from a specific trial of an OptimizationJob.
130
+
131
+ You can watch for the logs in realtime as follows:
132
+ ```python
133
+ from viettelcloud.aiplatform.optimizer import OptimizerClient
134
+
135
+ # Get logs from the best current trial
136
+ for logline in OptimizerClient().get_job_logs(name="n7fb28dbee94"):
137
+ print(logline)
138
+
139
+ # Get logs from a specific trial
140
+ for logline in OptimizerClient().get_job_logs(
141
+ name="n7fb28dbee94", trial_name="n7fb28dbee94-abc123", follow=True
142
+ ):
143
+ print(logline)
144
+ ```
145
+
146
+ Args:
147
+ name: Name of the OptimizationJob.
148
+ trial_name: Optional name of a specific Trial. If not provided, logs from the
149
+ current best trial are returned. If no best trial is available yet, logs
150
+ from the first trial are returned.
151
+ follow: Whether to stream logs in realtime as they are produced.
152
+
153
+ Returns:
154
+ Iterator of log lines.
155
+
156
+
157
+ Raises:
158
+ TimeoutError: Timeout to get an OptimizationJob.
159
+ RuntimeError: Failed to get an OptimizationJob.
160
+ """
161
+ return self.backend.get_job_logs(name=name, trial_name=trial_name, follow=follow)
162
+
163
+ def get_best_results(self, name: str) -> Optional[Result]:
164
+ """Get the best hyperparameters and metrics from an OptimizationJob.
165
+
166
+ This method retrieves the optimal hyperparameters and their corresponding metrics
167
+ from the best trial found during the optimization process.
168
+
169
+ Args:
170
+ name: Name of the OptimizationJob.
171
+
172
+ Returns:
173
+ A Result object containing the best hyperparameters and metrics, or None if
174
+ no best trial is available yet.
175
+
176
+ Raises:
177
+ TimeoutError: Timeout to get an OptimizationJob.
178
+ RuntimeError: Failed to get an OptimizationJob.
179
+ """
180
+ return self.backend.get_best_results(name=name)
181
+
182
+ def wait_for_job_status(
183
+ self,
184
+ name: str,
185
+ status: set[str] = {constants.OPTIMIZATION_JOB_COMPLETE},
186
+ timeout: int = 3600,
187
+ polling_interval: int = 2,
188
+ callbacks: Optional[list[Callable[[OptimizationJob], None]]] = None,
189
+ ) -> OptimizationJob:
190
+ """Wait for an OptimizationJob to reach a desired status.
191
+
192
+ Args:
193
+ name: Name of the OptimizationJob.
194
+ status: Expected statuses. Must be a subset of Created, Running, Complete, and
195
+ Failed statuses.
196
+ timeout: Maximum number of seconds to wait for the OptimizationJob to reach one of the
197
+ expected statuses.
198
+ polling_interval: The polling interval in seconds to check OptimizationJob status.
199
+ callbacks: Optional list of callback functions to be invoked after each polling
200
+ interval. Each callback should accept a single argument: the OptimizationJob object.
201
+
202
+ Returns:
203
+ An OptimizationJob object that reaches the desired status.
204
+
205
+ Raises:
206
+ ValueError: The input values are incorrect.
207
+ RuntimeError: Failed to get OptimizationJob or OptimizationJob reaches unexpected
208
+ Failed status.
209
+ TimeoutError: Timeout to wait for OptimizationJob status.
210
+ """
211
+ return self.backend.wait_for_job_status(
212
+ name=name,
213
+ status=status,
214
+ timeout=timeout,
215
+ polling_interval=polling_interval,
216
+ callbacks=callbacks,
217
+ )
218
+
219
+ def delete_job(self, name: str):
220
+ """Delete the OptimizationJob.
221
+
222
+ Args:
223
+ name: Name of the OptimizationJob.
224
+
225
+ Raises:
226
+ TimeoutError: Timeout to delete OptimizationJob.
227
+ RuntimeError: Failed to delete OptimizationJob.
228
+ """
229
+ return self.backend.delete_job(name=name)
230
+
231
+ def get_job_events(self, name: str) -> list[Event]:
232
+ """Get events for an OptimizationJob.
233
+
234
+ This provides additional clarity about the state of the OptimizationJob
235
+ when logs alone are not sufficient. Events include information about
236
+ trial state changes, errors, and other significant occurrences.
237
+
238
+ Args:
239
+ name: Name of the OptimizationJob.
240
+
241
+ Returns:
242
+ A list of Event objects associated with the OptimizationJob.
243
+
244
+ Raises:
245
+ TimeoutError: Timeout to get an OptimizationJob events.
246
+ RuntimeError: Failed to get an OptimizationJob events.
247
+ """
248
+ return self.backend.get_job_events(name=name)
@@ -0,0 +1,13 @@
1
+ # Copyright 2025 The Kubeflow Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,77 @@
1
+ # Copyright 2025 The Kubeflow Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import abc
16
+ from collections.abc import Callable, Iterator
17
+ from typing import Any, Optional
18
+
19
+ from viettelcloud.aiplatform.optimizer.constants import constants
20
+ from viettelcloud.aiplatform.optimizer.types.algorithm_types import RandomSearch
21
+ from viettelcloud.aiplatform.optimizer.types.optimization_types import (
22
+ Objective,
23
+ OptimizationJob,
24
+ Result,
25
+ TrialConfig,
26
+ )
27
+ from viettelcloud.aiplatform.trainer.types.types import TrainJobTemplate
28
+
29
+
30
+ class RuntimeBackend(abc.ABC):
31
+ @abc.abstractmethod
32
+ def optimize(
33
+ self,
34
+ trial_template: TrainJobTemplate,
35
+ *,
36
+ search_space: dict[str, Any],
37
+ trial_config: Optional[TrialConfig] = None,
38
+ objectives: Optional[list[Objective]] = None,
39
+ algorithm: Optional[RandomSearch] = None,
40
+ ) -> str:
41
+ raise NotImplementedError()
42
+
43
+ @abc.abstractmethod
44
+ def list_jobs(self) -> list[OptimizationJob]:
45
+ raise NotImplementedError()
46
+
47
+ @abc.abstractmethod
48
+ def get_job(self, name: str) -> OptimizationJob:
49
+ raise NotImplementedError()
50
+
51
+ @abc.abstractmethod
52
+ def get_job_logs(
53
+ self,
54
+ name: str,
55
+ trial_name: Optional[str],
56
+ follow: bool,
57
+ ) -> Iterator[str]:
58
+ raise NotImplementedError()
59
+
60
+ @abc.abstractmethod
61
+ def get_best_results(self, name: str) -> Optional[Result]:
62
+ raise NotImplementedError()
63
+
64
+ @abc.abstractmethod
65
+ def wait_for_job_status(
66
+ self,
67
+ name: str,
68
+ status: set[str] = {constants.OPTIMIZATION_JOB_COMPLETE},
69
+ timeout: int = 3600,
70
+ polling_interval: int = 2,
71
+ callbacks: Optional[list[Callable[[OptimizationJob], None]]] = None,
72
+ ) -> OptimizationJob:
73
+ raise NotImplementedError()
74
+
75
+ @abc.abstractmethod
76
+ def delete_job(self, name: str):
77
+ raise NotImplementedError()
@@ -0,0 +1,13 @@
1
+ # Copyright 2025 The Kubeflow Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.