zenml-nightly 0.83.1.dev20250626__py3-none-any.whl → 0.83.1.dev20250628__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zenml/VERSION +1 -1
- zenml/client.py +8 -2
- zenml/integrations/aws/flavors/sagemaker_orchestrator_flavor.py +1 -1
- zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator.py +43 -8
- zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator_entrypoint.py +88 -64
- zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator_entrypoint_configuration.py +0 -12
- zenml/integrations/kubernetes/orchestrators/manifest_utils.py +6 -20
- zenml/integrations/kubernetes/step_operators/kubernetes_step_operator.py +4 -2
- zenml/integrations/vllm/services/vllm_deployment.py +1 -1
- zenml/models/v2/core/pipeline_run.py +10 -0
- zenml/orchestrators/dag_runner.py +12 -3
- zenml/orchestrators/input_utils.py +6 -35
- zenml/orchestrators/step_run_utils.py +89 -15
- zenml/pipelines/pipeline_definition.py +6 -2
- zenml/pipelines/run_utils.py +5 -9
- zenml/stack/stack_component.py +1 -1
- zenml/zen_server/template_execution/utils.py +0 -1
- zenml/zen_stores/schemas/pipeline_run_schemas.py +38 -19
- zenml/zen_stores/schemas/step_run_schemas.py +44 -14
- zenml/zen_stores/sql_zen_store.py +75 -49
- {zenml_nightly-0.83.1.dev20250626.dist-info → zenml_nightly-0.83.1.dev20250628.dist-info}/METADATA +1 -1
- {zenml_nightly-0.83.1.dev20250626.dist-info → zenml_nightly-0.83.1.dev20250628.dist-info}/RECORD +25 -25
- {zenml_nightly-0.83.1.dev20250626.dist-info → zenml_nightly-0.83.1.dev20250628.dist-info}/LICENSE +0 -0
- {zenml_nightly-0.83.1.dev20250626.dist-info → zenml_nightly-0.83.1.dev20250628.dist-info}/WHEEL +0 -0
- {zenml_nightly-0.83.1.dev20250626.dist-info → zenml_nightly-0.83.1.dev20250628.dist-info}/entry_points.txt +0 -0
zenml/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.83.1.
|
1
|
+
0.83.1.dev20250628
|
zenml/client.py
CHANGED
@@ -6420,9 +6420,15 @@ class Client(metaclass=ClientMetaClass):
|
|
6420
6420
|
if model_version_name_or_number_or_id is None:
|
6421
6421
|
model_version_name_or_number_or_id = ModelStages.LATEST
|
6422
6422
|
|
6423
|
-
if
|
6423
|
+
if is_valid_uuid(model_version_name_or_number_or_id):
|
6424
|
+
assert not isinstance(model_version_name_or_number_or_id, int)
|
6425
|
+
model_version_id = (
|
6426
|
+
UUID(model_version_name_or_number_or_id)
|
6427
|
+
if isinstance(model_version_name_or_number_or_id, str)
|
6428
|
+
else model_version_name_or_number_or_id
|
6429
|
+
)
|
6424
6430
|
return self.zen_store.get_model_version(
|
6425
|
-
model_version_id=
|
6431
|
+
model_version_id=model_version_id,
|
6426
6432
|
hydrate=hydrate,
|
6427
6433
|
)
|
6428
6434
|
elif isinstance(model_version_name_or_number_or_id, int):
|
@@ -130,7 +130,7 @@ class SagemakerOrchestratorSettings(BaseSettings):
|
|
130
130
|
)
|
131
131
|
|
132
132
|
processor_role: Optional[str] = None
|
133
|
-
processor_tags: Dict[str, str] =
|
133
|
+
processor_tags: Optional[Dict[str, str]] = None
|
134
134
|
_deprecation_validator = deprecation_utils.deprecate_pydantic_attributes(
|
135
135
|
("processor_role", "execution_role"), ("processor_tags", "tags")
|
136
136
|
)
|
@@ -46,6 +46,9 @@ from kubernetes import client as k8s_client
|
|
46
46
|
from kubernetes import config as k8s_config
|
47
47
|
|
48
48
|
from zenml.config.base_settings import BaseSettings
|
49
|
+
from zenml.constants import (
|
50
|
+
METADATA_ORCHESTRATOR_RUN_ID,
|
51
|
+
)
|
49
52
|
from zenml.enums import StackComponentType
|
50
53
|
from zenml.integrations.kubernetes.flavors.kubernetes_orchestrator_flavor import (
|
51
54
|
KubernetesOrchestratorConfig,
|
@@ -61,6 +64,7 @@ from zenml.integrations.kubernetes.orchestrators.manifest_utils import (
|
|
61
64
|
)
|
62
65
|
from zenml.integrations.kubernetes.pod_settings import KubernetesPodSettings
|
63
66
|
from zenml.logger import get_logger
|
67
|
+
from zenml.metadata.metadata_types import MetadataType
|
64
68
|
from zenml.orchestrators import ContainerizedOrchestrator, SubmissionResult
|
65
69
|
from zenml.orchestrators.utils import get_orchestrator_run_name
|
66
70
|
from zenml.stack import StackValidator
|
@@ -467,9 +471,7 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
|
|
467
471
|
# This will internally also build the command/args for all step pods.
|
468
472
|
command = KubernetesOrchestratorEntrypointConfiguration.get_entrypoint_command()
|
469
473
|
args = KubernetesOrchestratorEntrypointConfiguration.get_entrypoint_arguments(
|
470
|
-
run_name=orchestrator_run_name,
|
471
474
|
deployment_id=deployment.id,
|
472
|
-
kubernetes_namespace=self.config.kubernetes_namespace,
|
473
475
|
run_id=placeholder_run.id if placeholder_run else None,
|
474
476
|
)
|
475
477
|
|
@@ -508,6 +510,18 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
|
|
508
510
|
}
|
509
511
|
)
|
510
512
|
|
513
|
+
orchestrator_pod_labels = {
|
514
|
+
"pipeline": kube_utils.sanitize_label(pipeline_name),
|
515
|
+
}
|
516
|
+
|
517
|
+
if placeholder_run:
|
518
|
+
orchestrator_pod_labels["run_id"] = kube_utils.sanitize_label(
|
519
|
+
str(placeholder_run.id)
|
520
|
+
)
|
521
|
+
orchestrator_pod_labels["run_name"] = kube_utils.sanitize_label(
|
522
|
+
str(placeholder_run.name)
|
523
|
+
)
|
524
|
+
|
511
525
|
# Schedule as CRON job if CRON schedule is given.
|
512
526
|
if deployment.schedule:
|
513
527
|
if not deployment.schedule.cron_expression:
|
@@ -519,9 +533,7 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
|
|
519
533
|
cron_expression = deployment.schedule.cron_expression
|
520
534
|
cron_job_manifest = build_cron_job_manifest(
|
521
535
|
cron_expression=cron_expression,
|
522
|
-
run_name=orchestrator_run_name,
|
523
536
|
pod_name=pod_name,
|
524
|
-
pipeline_name=pipeline_name,
|
525
537
|
image_name=image,
|
526
538
|
command=command,
|
527
539
|
args=args,
|
@@ -533,6 +545,7 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
|
|
533
545
|
successful_jobs_history_limit=settings.successful_jobs_history_limit,
|
534
546
|
failed_jobs_history_limit=settings.failed_jobs_history_limit,
|
535
547
|
ttl_seconds_after_finished=settings.ttl_seconds_after_finished,
|
548
|
+
labels=orchestrator_pod_labels,
|
536
549
|
)
|
537
550
|
|
538
551
|
self._k8s_batch_api.create_namespaced_cron_job(
|
@@ -547,9 +560,7 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
|
|
547
560
|
else:
|
548
561
|
# Create and run the orchestrator pod.
|
549
562
|
pod_manifest = build_pod_manifest(
|
550
|
-
run_name=orchestrator_run_name,
|
551
563
|
pod_name=pod_name,
|
552
|
-
pipeline_name=pipeline_name,
|
553
564
|
image_name=image,
|
554
565
|
command=command,
|
555
566
|
args=args,
|
@@ -557,6 +568,7 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
|
|
557
568
|
pod_settings=orchestrator_pod_settings,
|
558
569
|
service_account_name=service_account_name,
|
559
570
|
env=environment,
|
571
|
+
labels=orchestrator_pod_labels,
|
560
572
|
mount_local_stores=self.config.is_local,
|
561
573
|
)
|
562
574
|
|
@@ -572,6 +584,11 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
|
|
572
584
|
startup_timeout=settings.pod_startup_timeout,
|
573
585
|
)
|
574
586
|
|
587
|
+
metadata: Dict[str, MetadataType] = {
|
588
|
+
METADATA_ORCHESTRATOR_RUN_ID: pod_name,
|
589
|
+
}
|
590
|
+
|
591
|
+
# Wait for the orchestrator pod to finish and stream logs.
|
575
592
|
if settings.synchronous:
|
576
593
|
|
577
594
|
def _wait_for_run_to_finish() -> None:
|
@@ -588,7 +605,8 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
|
|
588
605
|
)
|
589
606
|
|
590
607
|
return SubmissionResult(
|
591
|
-
|
608
|
+
metadata=metadata,
|
609
|
+
wait_for_completion=_wait_for_run_to_finish,
|
592
610
|
)
|
593
611
|
else:
|
594
612
|
logger.info(
|
@@ -597,7 +615,9 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
|
|
597
615
|
f"Run the following command to inspect the logs: "
|
598
616
|
f"`kubectl logs {pod_name} -n {self.config.kubernetes_namespace}`."
|
599
617
|
)
|
600
|
-
return
|
618
|
+
return SubmissionResult(
|
619
|
+
metadata=metadata,
|
620
|
+
)
|
601
621
|
|
602
622
|
def _get_service_account_name(
|
603
623
|
self, settings: KubernetesOrchestratorSettings
|
@@ -642,3 +662,18 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
|
|
642
662
|
"Unable to read run id from environment variable "
|
643
663
|
f"{ENV_ZENML_KUBERNETES_RUN_ID}."
|
644
664
|
)
|
665
|
+
|
666
|
+
def get_pipeline_run_metadata(
|
667
|
+
self, run_id: UUID
|
668
|
+
) -> Dict[str, "MetadataType"]:
|
669
|
+
"""Get general component-specific metadata for a pipeline run.
|
670
|
+
|
671
|
+
Args:
|
672
|
+
run_id: The ID of the pipeline run.
|
673
|
+
|
674
|
+
Returns:
|
675
|
+
A dictionary of metadata.
|
676
|
+
"""
|
677
|
+
return {
|
678
|
+
METADATA_ORCHESTRATOR_RUN_ID: self.get_orchestrator_run_id(),
|
679
|
+
}
|
@@ -15,8 +15,7 @@
|
|
15
15
|
|
16
16
|
import argparse
|
17
17
|
import socket
|
18
|
-
from typing import
|
19
|
-
from uuid import UUID
|
18
|
+
from typing import Callable, Dict, Optional, cast
|
20
19
|
|
21
20
|
from kubernetes import client as k8s_client
|
22
21
|
|
@@ -41,10 +40,16 @@ from zenml.integrations.kubernetes.orchestrators.manifest_utils import (
|
|
41
40
|
from zenml.logger import get_logger
|
42
41
|
from zenml.orchestrators import publish_utils
|
43
42
|
from zenml.orchestrators.dag_runner import NodeStatus, ThreadedDagRunner
|
43
|
+
from zenml.orchestrators.step_run_utils import (
|
44
|
+
StepRunRequestFactory,
|
45
|
+
fetch_step_runs_by_names,
|
46
|
+
publish_cached_step_run,
|
47
|
+
)
|
44
48
|
from zenml.orchestrators.utils import (
|
45
49
|
get_config_environment_vars,
|
46
50
|
get_orchestrator_run_name,
|
47
51
|
)
|
52
|
+
from zenml.pipelines.run_utils import create_placeholder_run
|
48
53
|
|
49
54
|
logger = get_logger(__name__)
|
50
55
|
|
@@ -56,9 +61,7 @@ def parse_args() -> argparse.Namespace:
|
|
56
61
|
Parsed args.
|
57
62
|
"""
|
58
63
|
parser = argparse.ArgumentParser()
|
59
|
-
parser.add_argument("--run_name", type=str, required=True)
|
60
64
|
parser.add_argument("--deployment_id", type=str, required=True)
|
61
|
-
parser.add_argument("--kubernetes_namespace", type=str, required=True)
|
62
65
|
parser.add_argument("--run_id", type=str, required=False)
|
63
66
|
return parser.parse_args()
|
64
67
|
|
@@ -68,7 +71,6 @@ def main() -> None:
|
|
68
71
|
# Log to the container's stdout so it can be streamed by the client.
|
69
72
|
logger.info("Kubernetes orchestrator pod started.")
|
70
73
|
|
71
|
-
# Parse / extract args.
|
72
74
|
args = parse_args()
|
73
75
|
|
74
76
|
orchestrator_pod_name = socket.gethostname()
|
@@ -77,6 +79,7 @@ def main() -> None:
|
|
77
79
|
active_stack = client.active_stack
|
78
80
|
orchestrator = active_stack.orchestrator
|
79
81
|
assert isinstance(orchestrator, KubernetesOrchestrator)
|
82
|
+
namespace = orchestrator.config.kubernetes_namespace
|
80
83
|
|
81
84
|
deployment = client.get_deployment(args.deployment_id)
|
82
85
|
pipeline_settings = cast(
|
@@ -86,20 +89,6 @@ def main() -> None:
|
|
86
89
|
|
87
90
|
step_command = StepEntrypointConfiguration.get_entrypoint_command()
|
88
91
|
|
89
|
-
if args.run_id and not pipeline_settings.prevent_orchestrator_pod_caching:
|
90
|
-
from zenml.orchestrators import cache_utils
|
91
|
-
|
92
|
-
run_required = (
|
93
|
-
cache_utils.create_cached_step_runs_and_prune_deployment(
|
94
|
-
deployment=deployment,
|
95
|
-
pipeline_run=client.get_pipeline_run(args.run_id),
|
96
|
-
stack=active_stack,
|
97
|
-
)
|
98
|
-
)
|
99
|
-
|
100
|
-
if not run_required:
|
101
|
-
return
|
102
|
-
|
103
92
|
mount_local_stores = active_stack.orchestrator.config.is_local
|
104
93
|
|
105
94
|
# Get a Kubernetes client from the active Kubernetes orchestrator, but
|
@@ -115,7 +104,7 @@ def main() -> None:
|
|
115
104
|
owner_references = kube_utils.get_pod_owner_references(
|
116
105
|
core_api=core_api,
|
117
106
|
pod_name=orchestrator_pod_name,
|
118
|
-
namespace=
|
107
|
+
namespace=namespace,
|
119
108
|
)
|
120
109
|
except Exception as e:
|
121
110
|
logger.warning(f"Failed to get pod owner references: {str(e)}")
|
@@ -126,6 +115,62 @@ def main() -> None:
|
|
126
115
|
for owner_reference in owner_references:
|
127
116
|
owner_reference.controller = False
|
128
117
|
|
118
|
+
if args.run_id:
|
119
|
+
pipeline_run = client.get_pipeline_run(args.run_id)
|
120
|
+
else:
|
121
|
+
pipeline_run = create_placeholder_run(
|
122
|
+
deployment=deployment,
|
123
|
+
orchestrator_run_id=orchestrator_pod_name,
|
124
|
+
)
|
125
|
+
|
126
|
+
pre_step_run: Optional[Callable[[str], bool]] = None
|
127
|
+
|
128
|
+
if not pipeline_settings.prevent_orchestrator_pod_caching:
|
129
|
+
step_run_request_factory = StepRunRequestFactory(
|
130
|
+
deployment=deployment,
|
131
|
+
pipeline_run=pipeline_run,
|
132
|
+
stack=active_stack,
|
133
|
+
)
|
134
|
+
step_runs = {}
|
135
|
+
|
136
|
+
def pre_step_run(step_name: str) -> bool:
|
137
|
+
"""Pre-step run.
|
138
|
+
|
139
|
+
Args:
|
140
|
+
step_name: Name of the step.
|
141
|
+
|
142
|
+
Returns:
|
143
|
+
Whether the step node needs to be run.
|
144
|
+
"""
|
145
|
+
step_run_request = step_run_request_factory.create_request(
|
146
|
+
step_name
|
147
|
+
)
|
148
|
+
try:
|
149
|
+
step_run_request_factory.populate_request(step_run_request)
|
150
|
+
except Exception as e:
|
151
|
+
logger.error(
|
152
|
+
f"Failed to populate step run request for step {step_name}: {e}"
|
153
|
+
)
|
154
|
+
return True
|
155
|
+
|
156
|
+
if step_run_request.status == ExecutionStatus.CACHED:
|
157
|
+
step_run = publish_cached_step_run(
|
158
|
+
step_run_request, pipeline_run
|
159
|
+
)
|
160
|
+
step_runs[step_name] = step_run
|
161
|
+
logger.info("Using cached version of step `%s`.", step_name)
|
162
|
+
return False
|
163
|
+
|
164
|
+
return True
|
165
|
+
|
166
|
+
step_pod_labels = {
|
167
|
+
"run_id": kube_utils.sanitize_label(str(pipeline_run.id)),
|
168
|
+
"run_name": kube_utils.sanitize_label(str(pipeline_run.name)),
|
169
|
+
"pipeline": kube_utils.sanitize_label(
|
170
|
+
deployment.pipeline_configuration.name
|
171
|
+
),
|
172
|
+
}
|
173
|
+
|
129
174
|
def run_step_on_kubernetes(step_name: str) -> None:
|
130
175
|
"""Run a pipeline step in a separate Kubernetes pod.
|
131
176
|
|
@@ -146,7 +191,7 @@ def main() -> None:
|
|
146
191
|
):
|
147
192
|
max_length = (
|
148
193
|
kube_utils.calculate_max_pod_name_length_for_namespace(
|
149
|
-
namespace=
|
194
|
+
namespace=namespace
|
150
195
|
)
|
151
196
|
)
|
152
197
|
pod_name_prefix = get_orchestrator_run_name(
|
@@ -156,9 +201,7 @@ def main() -> None:
|
|
156
201
|
else:
|
157
202
|
pod_name = f"{orchestrator_pod_name}-{step_name}"
|
158
203
|
|
159
|
-
pod_name = kube_utils.sanitize_pod_name(
|
160
|
-
pod_name, namespace=args.kubernetes_namespace
|
161
|
-
)
|
204
|
+
pod_name = kube_utils.sanitize_pod_name(pod_name, namespace=namespace)
|
162
205
|
|
163
206
|
image = KubernetesOrchestrator.get_image(
|
164
207
|
deployment=deployment, step_name=step_name
|
@@ -195,8 +238,6 @@ def main() -> None:
|
|
195
238
|
# Define Kubernetes pod manifest.
|
196
239
|
pod_manifest = build_pod_manifest(
|
197
240
|
pod_name=pod_name,
|
198
|
-
run_name=args.run_name,
|
199
|
-
pipeline_name=deployment.pipeline_configuration.name,
|
200
241
|
image_name=image,
|
201
242
|
command=step_command,
|
202
243
|
args=step_args,
|
@@ -207,6 +248,7 @@ def main() -> None:
|
|
207
248
|
or settings.service_account_name,
|
208
249
|
mount_local_stores=mount_local_stores,
|
209
250
|
owner_references=owner_references,
|
251
|
+
labels=step_pod_labels,
|
210
252
|
)
|
211
253
|
|
212
254
|
kube_utils.create_and_wait_for_pod_to_start(
|
@@ -214,7 +256,7 @@ def main() -> None:
|
|
214
256
|
pod_display_name=f"pod for step `{step_name}`",
|
215
257
|
pod_name=pod_name,
|
216
258
|
pod_manifest=pod_manifest,
|
217
|
-
namespace=
|
259
|
+
namespace=namespace,
|
218
260
|
startup_max_retries=settings.pod_failure_max_retries,
|
219
261
|
startup_failure_delay=settings.pod_failure_retry_delay,
|
220
262
|
startup_failure_backoff=settings.pod_failure_backoff,
|
@@ -229,7 +271,7 @@ def main() -> None:
|
|
229
271
|
incluster=True
|
230
272
|
),
|
231
273
|
pod_name=pod_name,
|
232
|
-
namespace=
|
274
|
+
namespace=namespace,
|
233
275
|
exit_condition_lambda=kube_utils.pod_is_done,
|
234
276
|
stream_logs=True,
|
235
277
|
)
|
@@ -249,30 +291,15 @@ def main() -> None:
|
|
249
291
|
try:
|
250
292
|
# Some steps may have failed because the pods could not be created.
|
251
293
|
# We need to check for this and mark the step run as failed if so.
|
252
|
-
|
253
|
-
# Fetch the pipeline run using any means possible.
|
254
|
-
list_args: Dict[str, Any] = {}
|
255
|
-
if args.run_id:
|
256
|
-
# For a run triggered outside of a schedule, we can use the
|
257
|
-
# placeholder run ID to find the pipeline run.
|
258
|
-
list_args = dict(id=UUID(args.run_id))
|
259
|
-
else:
|
260
|
-
# For a run triggered by a schedule, we can only use the
|
261
|
-
# orchestrator run ID to find the pipeline run.
|
262
|
-
list_args = dict(orchestrator_run_id=orchestrator_pod_name)
|
263
|
-
|
264
|
-
pipeline_runs = client.list_pipeline_runs(
|
265
|
-
hydrate=True,
|
266
|
-
project=deployment.project_id,
|
267
|
-
deployment_id=deployment.id,
|
268
|
-
**list_args,
|
269
|
-
)
|
270
|
-
if not len(pipeline_runs):
|
271
|
-
# No pipeline run found, so we can't mark any step runs as failed.
|
272
|
-
return
|
273
|
-
|
274
|
-
pipeline_run = pipeline_runs[0]
|
275
294
|
pipeline_failed = False
|
295
|
+
failed_step_names = [
|
296
|
+
step_name
|
297
|
+
for step_name, node_state in node_states.items()
|
298
|
+
if node_state == NodeStatus.FAILED
|
299
|
+
]
|
300
|
+
step_runs = fetch_step_runs_by_names(
|
301
|
+
step_run_names=failed_step_names, pipeline_run=pipeline_run
|
302
|
+
)
|
276
303
|
|
277
304
|
for step_name, node_state in node_states.items():
|
278
305
|
if node_state != NodeStatus.FAILED:
|
@@ -280,18 +307,14 @@ def main() -> None:
|
|
280
307
|
|
281
308
|
pipeline_failed = True
|
282
309
|
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
ExecutionStatus.INITIALIZING,
|
292
|
-
ExecutionStatus.RUNNING,
|
293
|
-
}:
|
294
|
-
publish_utils.publish_failed_step_run(step_run.id)
|
310
|
+
if step_run := step_runs.get(step_name, None):
|
311
|
+
# Try to update the step run status, if it exists and is in
|
312
|
+
# a transient state.
|
313
|
+
if step_run and step_run.status in {
|
314
|
+
ExecutionStatus.INITIALIZING,
|
315
|
+
ExecutionStatus.RUNNING,
|
316
|
+
}:
|
317
|
+
publish_utils.publish_failed_step_run(step_run.id)
|
295
318
|
|
296
319
|
# If any steps failed and the pipeline run is still in a transient
|
297
320
|
# state, we need to mark it as failed.
|
@@ -319,6 +342,7 @@ def main() -> None:
|
|
319
342
|
ThreadedDagRunner(
|
320
343
|
dag=pipeline_dag,
|
321
344
|
run_fn=run_step_on_kubernetes,
|
345
|
+
preparation_fn=pre_step_run,
|
322
346
|
finalize_fn=finalize_run,
|
323
347
|
parallel_node_startup_waiting_period=parallel_node_startup_waiting_period,
|
324
348
|
max_parallelism=pipeline_settings.max_parallelism,
|
@@ -333,7 +357,7 @@ def main() -> None:
|
|
333
357
|
try:
|
334
358
|
kube_utils.delete_secret(
|
335
359
|
core_api=core_api,
|
336
|
-
namespace=
|
360
|
+
namespace=namespace,
|
337
361
|
secret_name=secret_name,
|
338
362
|
)
|
339
363
|
except k8s_client.rest.ApiException as e:
|
zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator_entrypoint_configuration.py
CHANGED
@@ -18,9 +18,7 @@ from typing import TYPE_CHECKING, List, Optional, Set
|
|
18
18
|
if TYPE_CHECKING:
|
19
19
|
from uuid import UUID
|
20
20
|
|
21
|
-
RUN_NAME_OPTION = "run_name"
|
22
21
|
DEPLOYMENT_ID_OPTION = "deployment_id"
|
23
|
-
NAMESPACE_OPTION = "kubernetes_namespace"
|
24
22
|
RUN_ID_OPTION = "run_id"
|
25
23
|
|
26
24
|
|
@@ -35,9 +33,7 @@ class KubernetesOrchestratorEntrypointConfiguration:
|
|
35
33
|
Entrypoint options.
|
36
34
|
"""
|
37
35
|
options = {
|
38
|
-
RUN_NAME_OPTION,
|
39
36
|
DEPLOYMENT_ID_OPTION,
|
40
|
-
NAMESPACE_OPTION,
|
41
37
|
}
|
42
38
|
return options
|
43
39
|
|
@@ -58,29 +54,21 @@ class KubernetesOrchestratorEntrypointConfiguration:
|
|
58
54
|
@classmethod
|
59
55
|
def get_entrypoint_arguments(
|
60
56
|
cls,
|
61
|
-
run_name: str,
|
62
57
|
deployment_id: "UUID",
|
63
|
-
kubernetes_namespace: str,
|
64
58
|
run_id: Optional["UUID"] = None,
|
65
59
|
) -> List[str]:
|
66
60
|
"""Gets all arguments that the entrypoint command should be called with.
|
67
61
|
|
68
62
|
Args:
|
69
|
-
run_name: Name of the ZenML run.
|
70
63
|
deployment_id: ID of the deployment.
|
71
|
-
kubernetes_namespace: Name of the Kubernetes namespace.
|
72
64
|
run_id: Optional ID of the pipeline run. Not set for scheduled runs.
|
73
65
|
|
74
66
|
Returns:
|
75
67
|
List of entrypoint arguments.
|
76
68
|
"""
|
77
69
|
args = [
|
78
|
-
f"--{RUN_NAME_OPTION}",
|
79
|
-
run_name,
|
80
70
|
f"--{DEPLOYMENT_ID_OPTION}",
|
81
71
|
str(deployment_id),
|
82
|
-
f"--{NAMESPACE_OPTION}",
|
83
|
-
kubernetes_namespace,
|
84
72
|
]
|
85
73
|
|
86
74
|
if run_id:
|
@@ -26,7 +26,6 @@ from zenml.constants import ENV_ZENML_ENABLE_REPO_INIT_WARNINGS
|
|
26
26
|
from zenml.integrations.airflow.orchestrators.dag_generator import (
|
27
27
|
ENV_ZENML_LOCAL_STORES_PATH,
|
28
28
|
)
|
29
|
-
from zenml.integrations.kubernetes.orchestrators import kube_utils
|
30
29
|
from zenml.integrations.kubernetes.pod_settings import KubernetesPodSettings
|
31
30
|
from zenml.logger import get_logger
|
32
31
|
|
@@ -97,8 +96,6 @@ def add_local_stores_mount(
|
|
97
96
|
|
98
97
|
def build_pod_manifest(
|
99
98
|
pod_name: str,
|
100
|
-
run_name: str,
|
101
|
-
pipeline_name: str,
|
102
99
|
image_name: str,
|
103
100
|
command: List[str],
|
104
101
|
args: List[str],
|
@@ -106,6 +103,7 @@ def build_pod_manifest(
|
|
106
103
|
pod_settings: Optional[KubernetesPodSettings] = None,
|
107
104
|
service_account_name: Optional[str] = None,
|
108
105
|
env: Optional[Dict[str, str]] = None,
|
106
|
+
labels: Optional[Dict[str, str]] = None,
|
109
107
|
mount_local_stores: bool = False,
|
110
108
|
owner_references: Optional[List[k8s_client.V1OwnerReference]] = None,
|
111
109
|
) -> k8s_client.V1Pod:
|
@@ -113,8 +111,6 @@ def build_pod_manifest(
|
|
113
111
|
|
114
112
|
Args:
|
115
113
|
pod_name: Name of the pod.
|
116
|
-
run_name: Name of the ZenML run.
|
117
|
-
pipeline_name: Name of the ZenML pipeline.
|
118
114
|
image_name: Name of the Docker image.
|
119
115
|
command: Command to execute the entrypoint in the pod.
|
120
116
|
args: Arguments provided to the entrypoint command.
|
@@ -124,6 +120,7 @@ def build_pod_manifest(
|
|
124
120
|
Can be used to assign certain roles to a pod, e.g., to allow it to
|
125
121
|
run Kubernetes commands from within the cluster.
|
126
122
|
env: Environment variables to set.
|
123
|
+
labels: Labels to add to the pod.
|
127
124
|
mount_local_stores: Whether to mount the local stores path inside the
|
128
125
|
pod.
|
129
126
|
owner_references: List of owner references for the pod.
|
@@ -162,7 +159,7 @@ def build_pod_manifest(
|
|
162
159
|
if service_account_name is not None:
|
163
160
|
pod_spec.service_account_name = service_account_name
|
164
161
|
|
165
|
-
labels = {}
|
162
|
+
labels = labels or {}
|
166
163
|
|
167
164
|
if pod_settings:
|
168
165
|
add_pod_settings(pod_spec, pod_settings)
|
@@ -171,14 +168,6 @@ def build_pod_manifest(
|
|
171
168
|
if pod_settings.labels:
|
172
169
|
labels.update(pod_settings.labels)
|
173
170
|
|
174
|
-
# Add run_name and pipeline_name to the labels
|
175
|
-
labels.update(
|
176
|
-
{
|
177
|
-
"run": kube_utils.sanitize_label(run_name),
|
178
|
-
"pipeline": kube_utils.sanitize_label(pipeline_name),
|
179
|
-
}
|
180
|
-
)
|
181
|
-
|
182
171
|
pod_metadata = k8s_client.V1ObjectMeta(
|
183
172
|
name=pod_name,
|
184
173
|
labels=labels,
|
@@ -272,8 +261,6 @@ def add_pod_settings(
|
|
272
261
|
def build_cron_job_manifest(
|
273
262
|
cron_expression: str,
|
274
263
|
pod_name: str,
|
275
|
-
run_name: str,
|
276
|
-
pipeline_name: str,
|
277
264
|
image_name: str,
|
278
265
|
command: List[str],
|
279
266
|
args: List[str],
|
@@ -281,6 +268,7 @@ def build_cron_job_manifest(
|
|
281
268
|
pod_settings: Optional[KubernetesPodSettings] = None,
|
282
269
|
service_account_name: Optional[str] = None,
|
283
270
|
env: Optional[Dict[str, str]] = None,
|
271
|
+
labels: Optional[Dict[str, str]] = None,
|
284
272
|
mount_local_stores: bool = False,
|
285
273
|
successful_jobs_history_limit: Optional[int] = None,
|
286
274
|
failed_jobs_history_limit: Optional[int] = None,
|
@@ -291,8 +279,6 @@ def build_cron_job_manifest(
|
|
291
279
|
Args:
|
292
280
|
cron_expression: CRON job schedule expression, e.g. "* * * * *".
|
293
281
|
pod_name: Name of the pod.
|
294
|
-
run_name: Name of the ZenML run.
|
295
|
-
pipeline_name: Name of the ZenML pipeline.
|
296
282
|
image_name: Name of the Docker image.
|
297
283
|
command: Command to execute the entrypoint in the pod.
|
298
284
|
args: Arguments provided to the entrypoint command.
|
@@ -302,6 +288,7 @@ def build_cron_job_manifest(
|
|
302
288
|
Can be used to assign certain roles to a pod, e.g., to allow it to
|
303
289
|
run Kubernetes commands from within the cluster.
|
304
290
|
env: Environment variables to set.
|
291
|
+
labels: Labels to add to the pod.
|
305
292
|
mount_local_stores: Whether to mount the local stores path inside the
|
306
293
|
pod.
|
307
294
|
successful_jobs_history_limit: The number of successful jobs to retain.
|
@@ -314,8 +301,6 @@ def build_cron_job_manifest(
|
|
314
301
|
"""
|
315
302
|
pod_manifest = build_pod_manifest(
|
316
303
|
pod_name=pod_name,
|
317
|
-
run_name=run_name,
|
318
|
-
pipeline_name=pipeline_name,
|
319
304
|
image_name=image_name,
|
320
305
|
command=command,
|
321
306
|
args=args,
|
@@ -323,6 +308,7 @@ def build_cron_job_manifest(
|
|
323
308
|
pod_settings=pod_settings,
|
324
309
|
service_account_name=service_account_name,
|
325
310
|
env=env,
|
311
|
+
labels=labels,
|
326
312
|
mount_local_stores=mount_local_stores,
|
327
313
|
)
|
328
314
|
|
@@ -205,9 +205,7 @@ class KubernetesStepOperator(BaseStepOperator):
|
|
205
205
|
|
206
206
|
# Create and run the orchestrator pod.
|
207
207
|
pod_manifest = build_pod_manifest(
|
208
|
-
run_name=info.run_name,
|
209
208
|
pod_name=pod_name,
|
210
|
-
pipeline_name=info.pipeline.name,
|
211
209
|
image_name=image_name,
|
212
210
|
command=command,
|
213
211
|
args=args,
|
@@ -216,6 +214,10 @@ class KubernetesStepOperator(BaseStepOperator):
|
|
216
214
|
pod_settings=settings.pod_settings,
|
217
215
|
env=environment,
|
218
216
|
mount_local_stores=False,
|
217
|
+
labels={
|
218
|
+
"run_id": kube_utils.sanitize_label(str(info.run_id)),
|
219
|
+
"pipeline": kube_utils.sanitize_label(info.pipeline.name),
|
220
|
+
},
|
219
221
|
)
|
220
222
|
|
221
223
|
kube_utils.create_and_wait_for_pod_to_start(
|
@@ -150,7 +150,7 @@ class VLLMDeploymentService(LocalDaemonService, BaseDeploymentService):
|
|
150
150
|
|
151
151
|
try:
|
152
152
|
parser: argparse.ArgumentParser = make_arg_parser(
|
153
|
-
FlexibleArgumentParser()
|
153
|
+
FlexibleArgumentParser()
|
154
154
|
)
|
155
155
|
# pass in empty list to get default args
|
156
156
|
# otherwise it will try to get the args from sys.argv
|
@@ -125,6 +125,15 @@ class PipelineRunRequest(ProjectScopedRequest):
|
|
125
125
|
title="Logs of the pipeline run.",
|
126
126
|
)
|
127
127
|
|
128
|
+
@property
|
129
|
+
def is_placeholder_request(self) -> bool:
|
130
|
+
"""Whether the request is a placeholder request.
|
131
|
+
|
132
|
+
Returns:
|
133
|
+
Whether the request is a placeholder request.
|
134
|
+
"""
|
135
|
+
return self.status == ExecutionStatus.INITIALIZING
|
136
|
+
|
128
137
|
model_config = ConfigDict(protected_namespaces=())
|
129
138
|
|
130
139
|
|
@@ -480,6 +489,7 @@ class PipelineRunResponse(
|
|
480
489
|
for step in pagination_utils.depaginate(
|
481
490
|
Client().list_run_steps,
|
482
491
|
pipeline_run_id=self.id,
|
492
|
+
project=self.project_id,
|
483
493
|
)
|
484
494
|
}
|
485
495
|
|