viettelcloud-aiplatform 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. viettelcloud/__init__.py +1 -0
  2. viettelcloud/aiplatform/__init__.py +15 -0
  3. viettelcloud/aiplatform/common/__init__.py +0 -0
  4. viettelcloud/aiplatform/common/constants.py +22 -0
  5. viettelcloud/aiplatform/common/types.py +28 -0
  6. viettelcloud/aiplatform/common/utils.py +40 -0
  7. viettelcloud/aiplatform/hub/OWNERS +14 -0
  8. viettelcloud/aiplatform/hub/__init__.py +25 -0
  9. viettelcloud/aiplatform/hub/api/__init__.py +13 -0
  10. viettelcloud/aiplatform/hub/api/_proxy_client.py +355 -0
  11. viettelcloud/aiplatform/hub/api/model_registry_client.py +561 -0
  12. viettelcloud/aiplatform/hub/api/model_registry_client_test.py +462 -0
  13. viettelcloud/aiplatform/optimizer/__init__.py +45 -0
  14. viettelcloud/aiplatform/optimizer/api/__init__.py +0 -0
  15. viettelcloud/aiplatform/optimizer/api/optimizer_client.py +248 -0
  16. viettelcloud/aiplatform/optimizer/backends/__init__.py +13 -0
  17. viettelcloud/aiplatform/optimizer/backends/base.py +77 -0
  18. viettelcloud/aiplatform/optimizer/backends/kubernetes/__init__.py +13 -0
  19. viettelcloud/aiplatform/optimizer/backends/kubernetes/backend.py +563 -0
  20. viettelcloud/aiplatform/optimizer/backends/kubernetes/utils.py +112 -0
  21. viettelcloud/aiplatform/optimizer/constants/__init__.py +13 -0
  22. viettelcloud/aiplatform/optimizer/constants/constants.py +59 -0
  23. viettelcloud/aiplatform/optimizer/types/__init__.py +13 -0
  24. viettelcloud/aiplatform/optimizer/types/algorithm_types.py +87 -0
  25. viettelcloud/aiplatform/optimizer/types/optimization_types.py +135 -0
  26. viettelcloud/aiplatform/optimizer/types/search_types.py +95 -0
  27. viettelcloud/aiplatform/py.typed +0 -0
  28. viettelcloud/aiplatform/trainer/__init__.py +82 -0
  29. viettelcloud/aiplatform/trainer/api/__init__.py +3 -0
  30. viettelcloud/aiplatform/trainer/api/trainer_client.py +277 -0
  31. viettelcloud/aiplatform/trainer/api/trainer_client_test.py +72 -0
  32. viettelcloud/aiplatform/trainer/backends/__init__.py +0 -0
  33. viettelcloud/aiplatform/trainer/backends/base.py +94 -0
  34. viettelcloud/aiplatform/trainer/backends/container/adapters/base.py +195 -0
  35. viettelcloud/aiplatform/trainer/backends/container/adapters/docker.py +231 -0
  36. viettelcloud/aiplatform/trainer/backends/container/adapters/podman.py +258 -0
  37. viettelcloud/aiplatform/trainer/backends/container/backend.py +668 -0
  38. viettelcloud/aiplatform/trainer/backends/container/backend_test.py +867 -0
  39. viettelcloud/aiplatform/trainer/backends/container/runtime_loader.py +631 -0
  40. viettelcloud/aiplatform/trainer/backends/container/runtime_loader_test.py +637 -0
  41. viettelcloud/aiplatform/trainer/backends/container/types.py +67 -0
  42. viettelcloud/aiplatform/trainer/backends/container/utils.py +213 -0
  43. viettelcloud/aiplatform/trainer/backends/kubernetes/__init__.py +0 -0
  44. viettelcloud/aiplatform/trainer/backends/kubernetes/backend.py +710 -0
  45. viettelcloud/aiplatform/trainer/backends/kubernetes/backend_test.py +1344 -0
  46. viettelcloud/aiplatform/trainer/backends/kubernetes/constants.py +15 -0
  47. viettelcloud/aiplatform/trainer/backends/kubernetes/utils.py +636 -0
  48. viettelcloud/aiplatform/trainer/backends/kubernetes/utils_test.py +582 -0
  49. viettelcloud/aiplatform/trainer/backends/localprocess/__init__.py +0 -0
  50. viettelcloud/aiplatform/trainer/backends/localprocess/backend.py +306 -0
  51. viettelcloud/aiplatform/trainer/backends/localprocess/backend_test.py +501 -0
  52. viettelcloud/aiplatform/trainer/backends/localprocess/constants.py +90 -0
  53. viettelcloud/aiplatform/trainer/backends/localprocess/job.py +184 -0
  54. viettelcloud/aiplatform/trainer/backends/localprocess/types.py +52 -0
  55. viettelcloud/aiplatform/trainer/backends/localprocess/utils.py +302 -0
  56. viettelcloud/aiplatform/trainer/constants/__init__.py +0 -0
  57. viettelcloud/aiplatform/trainer/constants/constants.py +179 -0
  58. viettelcloud/aiplatform/trainer/options/__init__.py +52 -0
  59. viettelcloud/aiplatform/trainer/options/common.py +55 -0
  60. viettelcloud/aiplatform/trainer/options/kubernetes.py +502 -0
  61. viettelcloud/aiplatform/trainer/options/kubernetes_test.py +259 -0
  62. viettelcloud/aiplatform/trainer/options/localprocess.py +20 -0
  63. viettelcloud/aiplatform/trainer/test/common.py +22 -0
  64. viettelcloud/aiplatform/trainer/types/__init__.py +0 -0
  65. viettelcloud/aiplatform/trainer/types/types.py +517 -0
  66. viettelcloud/aiplatform/trainer/types/types_test.py +115 -0
  67. viettelcloud_aiplatform-0.3.0.dist-info/METADATA +226 -0
  68. viettelcloud_aiplatform-0.3.0.dist-info/RECORD +71 -0
  69. viettelcloud_aiplatform-0.3.0.dist-info/WHEEL +4 -0
  70. viettelcloud_aiplatform-0.3.0.dist-info/licenses/LICENSE +201 -0
  71. viettelcloud_aiplatform-0.3.0.dist-info/licenses/NOTICE +36 -0
@@ -0,0 +1,59 @@
1
+ # Copyright 2025 The Kubeflow Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ # Common constants.
17
+ GROUP = "kubeflow.org"
18
+ VERSION = "v1beta1"
19
+ API_VERSION = f"{GROUP}/{VERSION}"
20
+
21
+ # The Kind name for the Experiment.
22
+ EXPERIMENT_KIND = "Experiment"
23
+
24
+ # The plural for the Experiment.
25
+ EXPERIMENT_PLURAL = "experiments"
26
+
27
+ # The succeeded condition for the Experiment.
28
+ EXPERIMENT_SUCCEEDED = "Succeeded"
29
+
30
+ # Label to identify Experiment's resources.
31
+ EXPERIMENT_LABEL = "katib.kubeflow.org/experiment"
32
+
33
+ # The plural for the Trials.
34
+ TRIAL_PLURAL = "trials"
35
+
36
+ # The Kind name for the Trials.
37
+ TRIAL_KIND = "Trial"
38
+
39
+ # The Kind name for the OptimizationJob.
40
+ OPTIMIZATION_JOB_KIND = "OptimizationJob"
41
+
42
+ # The default status for the OptimizationJob once users create it.
43
+ OPTIMIZATION_JOB_CREATED = "Created"
44
+
45
+ # The running status of the OptimizationJob, defined when at least one TrainJob is running.
46
+ OPTIMIZATION_JOB_RUNNING = "Running"
47
+
48
+ # The complete status of the OptimizationJob, defined when Experiment CR has succeeded condition.
49
+ OPTIMIZATION_JOB_COMPLETE = "Complete"
50
+
51
+ # The failed status of the OptimizationJob, defined when Experiment CR has failed condition.
52
+ OPTIMIZATION_JOB_FAILED = "Failed"
53
+
54
+ # The name of the Katib metrics collector sidecar container.
55
+ METRICS_COLLECTOR_CONTAINER = "metrics-logger-and-collector"
56
+
57
+ # Katib search space parameter types.
58
+ DOUBLE_PARAMETER = "double"
59
+ CATEGORICAL_PARAMETERS = "categorical"
@@ -0,0 +1,13 @@
1
+ # Copyright 2025 The Kubeflow Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,87 @@
1
+ # Copyright 2025 The Kubeflow Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import abc
16
+ from dataclasses import dataclass, fields
17
+ from typing import Any, Optional
18
+
19
+ from kubeflow_katib_api import models
20
+
21
+
22
+ def algorithm_to_katib_spec(obj: Any) -> models.V1beta1AlgorithmSpec:
23
+ """Convert any dataclass-based algorithm to a Katib AlgorithmSpec"""
24
+ settings = []
25
+ for f in fields(obj):
26
+ value = getattr(obj, f.name)
27
+ if value is not None:
28
+ settings.append(
29
+ models.V1beta1AlgorithmSetting(
30
+ name=f.name,
31
+ value=str(value),
32
+ )
33
+ )
34
+
35
+ return models.V1beta1AlgorithmSpec(
36
+ algorithmName=obj.algorithm_name,
37
+ algorithmSettings=settings or None,
38
+ )
39
+
40
+
41
+ # Base implementation for the search algorithm.
42
+ class BaseAlgorithm(abc.ABC):
43
+ @property
44
+ @abc.abstractmethod
45
+ def algorithm_name(self) -> str:
46
+ pass
47
+
48
+ @abc.abstractmethod
49
+ def _to_katib_spec(self):
50
+ raise NotImplementedError()
51
+
52
+
53
+ @dataclass
54
+ class GridSearch(BaseAlgorithm):
55
+ """Grid search algorithm."""
56
+
57
+ @property
58
+ def algorithm_name(self) -> str:
59
+ return "grid"
60
+
61
+ def _to_katib_spec(self):
62
+ return algorithm_to_katib_spec(self)
63
+
64
+
65
+ @dataclass
66
+ class RandomSearch(BaseAlgorithm):
67
+ """Random search algorithm.
68
+
69
+ Args:
70
+ random_state (`Optional[int]`): Random seed for reproducibility.
71
+ """
72
+
73
+ random_state: Optional[int] = None
74
+
75
+ @property
76
+ def algorithm_name(self) -> str:
77
+ return "random"
78
+
79
+ def _to_katib_spec(self):
80
+ return algorithm_to_katib_spec(self)
81
+
82
+
83
+ # Registry of supported search algorithms.
84
+ ALGORITHM_REGISTRY = {
85
+ GridSearch().algorithm_name: GridSearch,
86
+ RandomSearch().algorithm_name: RandomSearch,
87
+ }
@@ -0,0 +1,135 @@
1
+ # Copyright 2025 The Kubeflow Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from dataclasses import dataclass, field
16
+ from datetime import datetime
17
+ from enum import Enum
18
+ from typing import Optional, Union
19
+
20
+ import viettelcloud.aiplatform.common.constants as common_constants
21
+ from viettelcloud.aiplatform.optimizer.types.algorithm_types import GridSearch, RandomSearch
22
+ from viettelcloud.aiplatform.optimizer.types.search_types import (
23
+ CategoricalSearchSpace,
24
+ ContinuousSearchSpace,
25
+ )
26
+ from viettelcloud.aiplatform.trainer.types.types import TrainJob
27
+
28
+
29
+ # Direction for optimization objective
30
+ class Direction(Enum):
31
+ """Direction for optimization objective."""
32
+
33
+ MAXIMIZE = "maximize"
34
+ MINIMIZE = "minimize"
35
+
36
+
37
+ # Configuration for the objective metric
38
+ @dataclass
39
+ class Objective:
40
+ """Objective configuration for hyperparameter optimization.
41
+
42
+ Args:
43
+ metric (`str`): The name of the metric to optimize. Defaults to "loss".
44
+ direction (`Direction`): Whether to maximize or minimize the metric. Defaults to "minimize".
45
+ """
46
+
47
+ metric: str = "loss"
48
+ direction: Direction = Direction.MINIMIZE
49
+
50
+ def __post_init__(self):
51
+ if isinstance(self.direction, str):
52
+ self.direction = Direction(self.direction)
53
+
54
+
55
+ # Configuration for trial execution
56
+ @dataclass
57
+ class TrialConfig:
58
+ """Trial configuration for hyperparameter optimization.
59
+
60
+ Args:
61
+ num_trials (`int`): Number of trials to run. Defaults to 10.
62
+ parallel_trials (`int`): Number of trials to run in parallel. Defaults to 1.
63
+ max_failed_trials (`Optional[int]`): Maximum number of failed trials before stopping.
64
+ """
65
+
66
+ num_trials: int = 10
67
+ parallel_trials: int = 1
68
+ max_failed_trials: Optional[int] = None
69
+
70
+
71
+ @dataclass
72
+ class Metric:
73
+ name: str
74
+ min: str
75
+ max: str
76
+ latest: str
77
+
78
+
79
+ @dataclass
80
+ class Result:
81
+ """Result containing the best hyperparameters and metrics.
82
+
83
+ Args:
84
+ parameters (`dict[str, str]`): The best hyperparameters found during optimization.
85
+ metrics (`list[Metric]`): The metrics achieved with these hyperparameters.
86
+ """
87
+
88
+ parameters: dict[str, str]
89
+ metrics: list[Metric]
90
+
91
+
92
+ # Representation of the single trial
93
+ @dataclass
94
+ class Trial:
95
+ """Representation for a trial.
96
+
97
+ Args:
98
+ name (`str`): The name of the Trial.
99
+ parameters (`dict[str, str]`): Hyperparameters assigned to this Trial.
100
+ metrics (`list[Metric]`): Observed metrics for this Trial. The metrics are collected
101
+ only for completed Trials.
102
+ trainjob (`TrainJob`): Representation of the TrainJob
103
+ """
104
+
105
+ name: str
106
+ parameters: dict[str, str]
107
+ trainjob: TrainJob
108
+ metrics: list[Metric] = field(default_factory=list)
109
+
110
+
111
+ # Representation for the OptimizationJob
112
+ @dataclass
113
+ class OptimizationJob:
114
+ """Representation for an optimization job.
115
+
116
+ Args:
117
+ name (`str`): The name of the OptimizationJob.
118
+ objectives (`list[Objective]`): The objective configuration. Currently, only the
119
+ first metric defined in the objectives list is optimized. Any additional metrics are
120
+ collected and displayed in the Trial results.
121
+ algorithm (`RandomSearch`): The algorithm configuration.
122
+ trial_config (`TrialConfig`): The trial configuration.
123
+ trials (`list[Trial]`): The list of created Trials.
124
+ creation_timestamp (`datetime`): The creation timestamp.
125
+ status (`str`): The current status of the optimization job.
126
+ """
127
+
128
+ name: str
129
+ search_space: dict[str, Union[ContinuousSearchSpace, CategoricalSearchSpace]]
130
+ objectives: list[Objective]
131
+ algorithm: Union[GridSearch, RandomSearch]
132
+ trial_config: TrialConfig
133
+ trials: list[Trial]
134
+ creation_timestamp: datetime
135
+ status: str = common_constants.UNKNOWN
@@ -0,0 +1,95 @@
1
+ # Copyright 2025 The Kubeflow Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from dataclasses import dataclass
16
+ from enum import Enum
17
+ from typing import Union
18
+
19
+ from kubeflow_katib_api import models as katib_models
20
+
21
+ import viettelcloud.aiplatform.optimizer.constants.constants as constants
22
+
23
+
24
+ # Search space distribution helpers
25
+ class Search:
26
+ """Helper class for defining search space parameters."""
27
+
28
+ @staticmethod
29
+ def uniform(min: float, max: float) -> katib_models.V1beta1ParameterSpec:
30
+ """Sample a float value uniformly between `min` and `max`.
31
+
32
+ Args:
33
+ min: Lower boundary for the float value.
34
+ max: Upper boundary for the float value.
35
+
36
+ Returns:
37
+ Katib ParameterSpec object.
38
+ """
39
+ return katib_models.V1beta1ParameterSpec(
40
+ parameterType=constants.DOUBLE_PARAMETER,
41
+ feasibleSpace=katib_models.V1beta1FeasibleSpace(
42
+ min=str(min), max=str(max), distribution=Distribution.UNIFORM.value
43
+ ),
44
+ )
45
+
46
+ @staticmethod
47
+ def loguniform(min: float, max: float) -> katib_models.V1beta1ParameterSpec:
48
+ """Sample a float value with log-uniform distribution between `min` and `max`.
49
+
50
+ Args:
51
+ min: Lower boundary for the float value.
52
+ max: Upper boundary for the float value.
53
+
54
+ Returns:
55
+ Katib ParameterSpec object.
56
+ """
57
+ return katib_models.V1beta1ParameterSpec(
58
+ parameterType=constants.DOUBLE_PARAMETER,
59
+ feasibleSpace=katib_models.V1beta1FeasibleSpace(
60
+ min=str(min), max=str(max), distribution=Distribution.LOG_UNIFORM.value
61
+ ),
62
+ )
63
+
64
+ @staticmethod
65
+ def choice(values: list) -> katib_models.V1beta1ParameterSpec:
66
+ """Sample a categorical value from the list.
67
+
68
+ Args:
69
+ values: List of categorical values.
70
+
71
+ Returns:
72
+ Katib ParameterSpec object.
73
+ """
74
+ return katib_models.V1beta1ParameterSpec(
75
+ parameterType=constants.CATEGORICAL_PARAMETERS,
76
+ feasibleSpace=katib_models.V1beta1FeasibleSpace(list=[str(v) for v in values]),
77
+ )
78
+
79
+
80
+ # Distribution for the search space.
81
+ class Distribution(Enum):
82
+ UNIFORM = "uniform"
83
+ LOG_UNIFORM = "logUniform"
84
+
85
+
86
+ @dataclass
87
+ class ContinuousSearchSpace:
88
+ min: Union[float, int]
89
+ max: Union[float, int]
90
+ distribution: Distribution
91
+
92
+
93
+ @dataclass
94
+ class CategoricalSearchSpace:
95
+ choices: list
File without changes
@@ -0,0 +1,82 @@
1
+ # Copyright 2024 The Kubeflow Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ # Import common types.
17
+ from viettelcloud.aiplatform.common.types import KubernetesBackendConfig
18
+
19
+ # Import the Kubeflow Trainer client.
20
+ from viettelcloud.aiplatform.trainer.api.trainer_client import TrainerClient
21
+ from viettelcloud.aiplatform.trainer.backends.container.types import (
22
+ ContainerBackendConfig,
23
+ TrainingRuntimeSource,
24
+ )
25
+ from viettelcloud.aiplatform.trainer.backends.localprocess.types import LocalProcessBackendConfig
26
+
27
+ # Import the Kubeflow Trainer constants.
28
+ from viettelcloud.aiplatform.trainer.constants.constants import DATASET_PATH, MODEL_PATH
29
+
30
+ # Import the Kubeflow Trainer types.
31
+ from viettelcloud.aiplatform.trainer.types.types import (
32
+ BuiltinTrainer,
33
+ CustomTrainer,
34
+ CustomTrainerContainer,
35
+ DataCacheInitializer,
36
+ DataFormat,
37
+ DataType,
38
+ Event,
39
+ HuggingFaceDatasetInitializer,
40
+ HuggingFaceModelInitializer,
41
+ Initializer,
42
+ LoraConfig,
43
+ Loss,
44
+ Runtime,
45
+ RuntimeTrainer,
46
+ S3DatasetInitializer,
47
+ S3ModelInitializer,
48
+ TorchTuneConfig,
49
+ TorchTuneInstructDataset,
50
+ TrainerType,
51
+ TrainJobTemplate,
52
+ )
53
+
54
+ __all__ = [
55
+ "BuiltinTrainer",
56
+ "CustomTrainer",
57
+ "CustomTrainerContainer",
58
+ "DataCacheInitializer",
59
+ "DataFormat",
60
+ "DATASET_PATH",
61
+ "DataType",
62
+ "Event",
63
+ "HuggingFaceDatasetInitializer",
64
+ "HuggingFaceModelInitializer",
65
+ "Initializer",
66
+ "LoraConfig",
67
+ "Loss",
68
+ "MODEL_PATH",
69
+ "Runtime",
70
+ "TorchTuneConfig",
71
+ "TorchTuneInstructDataset",
72
+ "RuntimeTrainer",
73
+ "S3DatasetInitializer",
74
+ "S3ModelInitializer",
75
+ "TrainJobTemplate",
76
+ "TrainerClient",
77
+ "TrainerType",
78
+ "LocalProcessBackendConfig",
79
+ "ContainerBackendConfig",
80
+ "KubernetesBackendConfig",
81
+ "TrainingRuntimeSource",
82
+ ]
@@ -0,0 +1,3 @@
1
+ # ruff: noqa
2
+
3
+ # import apis into api package