PyPI - zenml-nightly - Versions diffs - 0.83.1.dev20250709__py3-none-any.whl → 0.83.1.dev20250710__py3-none-any.whl - Mend

zenml-nightly 0.83.1.dev20250709py3-none-any.whl → 0.83.1.dev20250710py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

zenml/VERSION +1 -1
zenml/cli/login.py +141 -18
zenml/cli/project.py +8 -6
zenml/cli/utils.py +63 -16
zenml/client.py +4 -1
zenml/config/compiler.py +1 -0
zenml/config/retry_config.py +5 -3
zenml/config/step_configurations.py +7 -1
zenml/console.py +4 -1
zenml/constants.py +0 -1
zenml/enums.py +13 -4
zenml/integrations/kubernetes/flavors/kubernetes_orchestrator_flavor.py +58 -4
zenml/integrations/kubernetes/orchestrators/kube_utils.py +172 -0
zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator.py +37 -23
zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator_entrypoint.py +92 -22
zenml/integrations/kubernetes/orchestrators/manifest_utils.py +59 -0
zenml/logger.py +6 -4
zenml/login/web_login.py +13 -6
zenml/models/v2/core/model_version.py +9 -1
zenml/models/v2/core/pipeline_run.py +1 -0
zenml/models/v2/core/step_run.py +35 -1
zenml/orchestrators/base_orchestrator.py +63 -8
zenml/orchestrators/dag_runner.py +3 -1
zenml/orchestrators/publish_utils.py +4 -1
zenml/orchestrators/step_launcher.py +77 -139
zenml/orchestrators/step_run_utils.py +16 -0
zenml/orchestrators/step_runner.py +1 -4
zenml/pipelines/pipeline_decorator.py +6 -1
zenml/pipelines/pipeline_definition.py +7 -0
zenml/zen_server/auth.py +0 -1
zenml/zen_stores/migrations/versions/360fa84718bf_step_run_versioning.py +64 -0
zenml/zen_stores/migrations/versions/85289fea86ff_adding_source_to_logs.py +1 -1
zenml/zen_stores/schemas/pipeline_deployment_schemas.py +21 -0
zenml/zen_stores/schemas/pipeline_run_schemas.py +31 -2
zenml/zen_stores/schemas/step_run_schemas.py +41 -17
zenml/zen_stores/sql_zen_store.py +152 -32
zenml/zen_stores/template_utils.py +29 -9
zenml_nightly-0.83.1.dev20250710.dist-info/METADATA +499 -0
{zenml_nightly-0.83.1.dev20250709.dist-info → zenml_nightly-0.83.1.dev20250710.dist-info}/RECORD +42 -41
zenml_nightly-0.83.1.dev20250709.dist-info/METADATA +0 -538
{zenml_nightly-0.83.1.dev20250709.dist-info → zenml_nightly-0.83.1.dev20250710.dist-info}/LICENSE +0 -0
{zenml_nightly-0.83.1.dev20250709.dist-info → zenml_nightly-0.83.1.dev20250710.dist-info}/WHEEL +0 -0
{zenml_nightly-0.83.1.dev20250709.dist-info → zenml_nightly-0.83.1.dev20250710.dist-info}/entry_points.txt +0 -0

zenml/integrations/kubernetes/flavors/kubernetes_orchestrator_flavor.py CHANGED Viewed

@@ -13,9 +13,9 @@
 #  permissions and limitations under the License.
 """Kubernetes orchestrator flavor."""
-from typing import TYPE_CHECKING, Optional, Type
+from typing import TYPE_CHECKING, Any, Dict, Optional, Type
-from pydantic import NonNegativeInt, PositiveInt
+from pydantic import NonNegativeInt, PositiveInt, field_validator
 from zenml.config.base_settings import BaseSettings
 from zenml.constants import KUBERNETES_CLUSTER_RESOURCE_TYPE
@@ -40,6 +40,9 @@ class KubernetesOrchestratorSettings(BaseSettings):
             asynchronously. Defaults to `True`.
         timeout: How many seconds to wait for synchronous runs. `0` means
             to wait for an unlimited duration.
+        stream_step_logs: If `True`, the orchestrator pod will stream the logs
+            of the step pods. This only has an effect if specified on the
+            pipeline, not on individual steps.
         service_account_name: Name of the service account to use for the
             orchestrator pod. If not provided, a new service account with "edit"
             permissions will be created.
@@ -65,8 +68,26 @@ class KubernetesOrchestratorSettings(BaseSettings):
         failed_jobs_history_limit: The number of failed jobs to retain.
             This only applies to jobs created when scheduling a pipeline.
         ttl_seconds_after_finished: The amount of seconds to keep finished jobs
-            before deleting them. This only applies to jobs created when
-            scheduling a pipeline.
+            before deleting them. **Note**: This does not clean up the
+            orchestrator pod for non-scheduled runs.
+        active_deadline_seconds: The active deadline seconds for the job that is
+            executing the step.
+        backoff_limit_margin: The value to add to the backoff limit in addition
+            to the step retries. The retry configuration defined on the step
+            defines the maximum number of retries that the server will accept
+            for a step. For this orchestrator, this controls how often the
+            job running the step will try to start the step pod. There are some
+            circumstances however where the job will start the pod, but the pod
+            doesn't actually get to the point of running the step. That means
+            the server will not receive the maximum amount of retry requests,
+            which in turn causes other inconsistencies like wrong step statuses.
+            To mitigate this, this attribute allows to add a margin to the
+            backoff limit. This means that the job will retry the pod startup
+            for the configured amount of times plus the margin, which increases
+            the chance of the server receiving the maximum amount of retry
+            requests.
+        pod_failure_policy: The pod failure policy to use for the job that is
+            executing the step.
         prevent_orchestrator_pod_caching: If `True`, the orchestrator pod will
             not try to compute cached steps before starting the step pods.
         always_build_pipeline_image: If `True`, the orchestrator will always
@@ -77,6 +98,7 @@ class KubernetesOrchestratorSettings(BaseSettings):
     synchronous: bool = True
     timeout: int = 0
+    stream_step_logs: bool = True
     service_account_name: Optional[str] = None
     step_pod_service_account_name: Optional[str] = None
     privileged: bool = False
@@ -91,10 +113,33 @@ class KubernetesOrchestratorSettings(BaseSettings):
     successful_jobs_history_limit: Optional[NonNegativeInt] = None
     failed_jobs_history_limit: Optional[NonNegativeInt] = None
     ttl_seconds_after_finished: Optional[NonNegativeInt] = None
+    active_deadline_seconds: Optional[NonNegativeInt] = None
+    backoff_limit_margin: NonNegativeInt = 0
+    pod_failure_policy: Optional[Dict[str, Any]] = None
     prevent_orchestrator_pod_caching: bool = False
     always_build_pipeline_image: bool = False
     pod_stop_grace_period: PositiveInt = 30
+    @field_validator("pod_failure_policy", mode="before")
+    @classmethod
+    def _convert_pod_failure_policy(cls, value: Any) -> Any:
+        """Converts Kubernetes pod failure policy to a dict.
+        Args:
+            value: The pod failure policy value.
+        Returns:
+            The converted value.
+        """
+        from kubernetes.client.models import V1PodFailurePolicy
+        from zenml.integrations.kubernetes import serialization_utils
+        if isinstance(value, V1PodFailurePolicy):
+            return serialization_utils.serialize_kubernetes_model(value)
+        else:
+            return value
 class KubernetesOrchestratorConfig(
     BaseOrchestratorConfig, KubernetesOrchestratorSettings
@@ -187,6 +232,15 @@ class KubernetesOrchestratorConfig(
         # This is currently not supported when using client-side caching.
         return False
+    @property
+    def handles_step_retries(self) -> bool:
+        """Whether the orchestrator handles step retries.
+        Returns:
+            Whether the orchestrator handles step retries.
+        """
+        return True
 class KubernetesOrchestratorFlavor(BaseOrchestratorFlavor):
     """Kubernetes orchestrator flavor."""

zenml/integrations/kubernetes/orchestrators/kube_utils.py CHANGED Viewed

@@ -32,8 +32,10 @@ Adjusted from https://github.com/tensorflow/tfx/blob/master/tfx/utils/kube_utils
 """
 import enum
+import functools
 import re
 import time
+from collections import defaultdict
 from typing import Any, Callable, Dict, List, Optional, TypeVar, cast
 from kubernetes import client as k8s_client
@@ -51,6 +53,8 @@ from zenml.utils.time_utils import utc_now
 logger = get_logger(__name__)
+R = TypeVar("R")
 class PodPhase(enum.Enum):
     """Phase of the Kubernetes pod.
@@ -581,3 +585,171 @@ def get_pod_owner_references(
     return cast(
         List[k8s_client.V1OwnerReference], pod.metadata.owner_references
     )
+def retry_on_api_exception(
+    func: Callable[..., R],
+    max_retries: int = 3,
+    delay: float = 1,
+    backoff: float = 1,
+) -> Callable[..., R]:
+    """Retry a function on API exceptions.
+    Args:
+        func: The function to retry.
+        max_retries: The maximum number of retries.
+        delay: The delay between retries.
+        backoff: The backoff factor.
+    Returns:
+        The wrapped function with retry logic.
+    """
+    @functools.wraps(func)
+    def wrapper(*args: Any, **kwargs: Any) -> R:
+        _delay = delay
+        retries = 0
+        while retries <= max_retries:
+            try:
+                return func(*args, **kwargs)
+            except ApiException as e:
+                retries += 1
+                if retries <= max_retries:
+                    logger.warning("Error calling %s: %s.", func.__name__, e)
+                    time.sleep(_delay)
+                    _delay *= backoff
+                else:
+                    raise
+        raise RuntimeError(
+            f"Failed to call {func.__name__} after {max_retries} retries."
+        )
+    return wrapper
+def create_job(
+    batch_api: k8s_client.BatchV1Api,
+    namespace: str,
+    job_manifest: k8s_client.V1Job,
+) -> None:
+    """Create a Kubernetes job.
+    Args:
+        batch_api: Kubernetes batch api.
+        namespace: Kubernetes namespace.
+        job_manifest: The manifest of the job to create.
+    """
+    retry_on_api_exception(batch_api.create_namespaced_job)(
+        namespace=namespace,
+        body=job_manifest,
+    )
+def wait_for_job_to_finish(
+    batch_api: k8s_client.BatchV1Api,
+    core_api: k8s_client.CoreV1Api,
+    namespace: str,
+    job_name: str,
+    backoff_interval: float = 1,
+    maximum_backoff: float = 32,
+    exponential_backoff: bool = False,
+    container_name: Optional[str] = None,
+    stream_logs: bool = True,
+) -> None:
+    """Wait for a job to finish.
+    Args:
+        batch_api: Kubernetes BatchV1Api client.
+        core_api: Kubernetes CoreV1Api client.
+        namespace: Kubernetes namespace.
+        job_name: Name of the job for which to wait.
+        backoff_interval: The interval to wait between polling the job status.
+        maximum_backoff: The maximum interval to wait between polling the job
+            status.
+        exponential_backoff: Whether to use exponential backoff.
+        stream_logs: Whether to stream the job logs.
+        container_name: Name of the container to stream logs from.
+    Raises:
+        RuntimeError: If the job failed or timed out.
+    """
+    logged_lines_per_pod: Dict[str, int] = defaultdict(int)
+    finished_pods = set()
+    while True:
+        job: k8s_client.V1Job = retry_on_api_exception(
+            batch_api.read_namespaced_job
+        )(name=job_name, namespace=namespace)
+        if job.status.conditions:
+            for condition in job.status.conditions:
+                if condition.type == "Complete" and condition.status == "True":
+                    return
+                if condition.type == "Failed" and condition.status == "True":
+                    raise RuntimeError(
+                        f"Job `{namespace}:{job_name}` failed: "
+                        f"{condition.message}"
+                    )
+        if stream_logs:
+            try:
+                pod_list: k8s_client.V1PodList = core_api.list_namespaced_pod(
+                    namespace=namespace,
+                    label_selector=f"job-name={job_name}",
+                )
+            except ApiException as e:
+                logger.error("Error fetching pods: %s.", e)
+                pod_list = []
+            else:
+                # Sort pods by creation timestamp, oldest first
+                pod_list.items.sort(
+                    key=lambda pod: pod.metadata.creation_timestamp,
+                )
+            for pod in pod_list.items:
+                pod_name = pod.metadata.name
+                pod_status = pod.status.phase
+                if pod_name in finished_pods:
+                    # We've already streamed all logs for this pod, so we can
+                    # skip it.
+                    continue
+                if pod_status == PodPhase.PENDING.value:
+                    # The pod is still pending, so we can't stream logs for it
+                    # yet.
+                    continue
+                if pod_status in [
+                    PodPhase.SUCCEEDED.value,
+                    PodPhase.FAILED.value,
+                ]:
+                    finished_pods.add(pod_name)
+                containers = pod.spec.containers
+                if not container_name:
+                    container_name = containers[0].name
+                try:
+                    response = core_api.read_namespaced_pod_log(
+                        name=pod_name,
+                        namespace=namespace,
+                        container=container_name,
+                        _preload_content=False,
+                    )
+                except ApiException as e:
+                    logger.error("Error reading pod logs: %s.", e)
+                else:
+                    raw_data = response.data
+                    decoded_log = raw_data.decode("utf-8", errors="replace")
+                    logs = decoded_log.splitlines()
+                    logged_lines = logged_lines_per_pod[pod_name]
+                    if len(logs) > logged_lines:
+                        for line in logs[logged_lines:]:
+                            logger.info(line)
+                        logged_lines_per_pod[pod_name] = len(logs)
+        time.sleep(backoff_interval)
+        if exponential_backoff and backoff_interval < maximum_backoff:
+            backoff_interval *= 2

zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator.py CHANGED Viewed

@@ -447,6 +447,13 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
                     step_name,
                 )
+            if retry_config := step.config.retry:
+                if retry_config.delay or retry_config.backoff:
+                    logger.warning(
+                        "Specifying retry delay or backoff is not supported "
+                        "for the Kubernetes orchestrator."
+                    )
         pipeline_name = deployment.pipeline_configuration.name
         settings = cast(
             KubernetesOrchestratorSettings, self.get_settings(deployment)
@@ -693,7 +700,7 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
         Args:
             run: The run that was executed by this orchestrator.
             graceful: If True, does nothing (lets the orchestrator and steps finish naturally).
-                If False, stops all running step pods.
+                If False, stops all running step jobs.
         Raises:
             RuntimeError: If we fail to stop the run.
@@ -706,55 +713,63 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
             )
             return
-        pods_stopped = []
+        jobs_stopped = []
         errors = []
-        # Find all pods with the orchestrator run ID label
+        # Find all jobs running steps of the pipeline
         label_selector = f"run_id={kube_utils.sanitize_label(str(run.id))}"
         try:
-            pods = self._k8s_core_api.list_namespaced_pod(
+            jobs = self._k8s_batch_api.list_namespaced_job(
                 namespace=self.config.kubernetes_namespace,
                 label_selector=label_selector,
             )
         except Exception as e:
             raise RuntimeError(
-                f"Failed to list step pods with run ID {run.id}: {e}"
+                f"Failed to list step jobs with run ID {run.id}: {e}"
             )
-        # Filter to only include running or pending pods
-        for pod in pods.items:
-            if pod.status.phase not in ["Running", "Pending"]:
-                logger.debug(
-                    f"Skipping pod {pod.metadata.name} with status {pod.status.phase}"
-                )
-                continue
+        for job in jobs.items:
+            if job.status.conditions:
+                # Don't delete completed/failed jobs
+                for condition in job.status.conditions:
+                    if (
+                        condition.type == "Complete"
+                        and condition.status == "True"
+                    ):
+                        continue
+                    if (
+                        condition.type == "Failed"
+                        and condition.status == "True"
+                    ):
+                        continue
             try:
-                self._k8s_core_api.delete_namespaced_pod(
-                    name=pod.metadata.name,
+                self._k8s_batch_api.delete_namespaced_job(
+                    name=job.metadata.name,
                     namespace=self.config.kubernetes_namespace,
+                    propagation_policy="Foreground",
                 )
-                pods_stopped.append(f"step pod: {pod.metadata.name}")
+                jobs_stopped.append(f"step job: {job.metadata.name}")
                 logger.debug(
-                    f"Successfully initiated graceful stop of step pod: {pod.metadata.name}"
+                    f"Successfully initiated graceful stop of step job: {job.metadata.name}"
                 )
             except Exception as e:
-                error_msg = f"Failed to stop step pod {pod.metadata.name}: {e}"
+                error_msg = f"Failed to stop step job {job.metadata.name}: {e}"
                 logger.warning(error_msg)
                 errors.append(error_msg)
         # Summary logging
         settings = cast(KubernetesOrchestratorSettings, self.get_settings(run))
         grace_period_seconds = settings.pod_stop_grace_period
-        if pods_stopped:
+        if jobs_stopped:
             logger.debug(
-                f"Successfully initiated graceful termination of: {', '.join(pods_stopped)}. "
+                f"Successfully initiated graceful termination of: {', '.join(jobs_stopped)}. "
                 f"Pods will terminate within {grace_period_seconds} seconds."
             )
         if errors:
             error_summary = "; ".join(errors)
-            if not pods_stopped:
+            if not jobs_stopped:
                 # If nothing was stopped successfully, raise an error
                 raise RuntimeError(
                     f"Failed to stop pipeline run: {error_summary}"
@@ -765,10 +780,9 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
                     f"Partial stop operation completed with errors: {error_summary}"
                 )
-        # If no step pods were found and no errors occurred
-        if not pods_stopped and not errors:
+        if not jobs_stopped and not errors:
             logger.info(
-                f"No running step pods found for pipeline run with ID: {run.id}"
+                f"No running step jobs found for pipeline run with ID: {run.id}"
             )
     def get_pipeline_run_metadata(

zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator_entrypoint.py CHANGED Viewed

@@ -14,6 +14,7 @@
 """Entrypoint of the Kubernetes master/orchestrator pod."""
 import argparse
+import random
 import socket
 from typing import Callable, Dict, Optional, cast
@@ -36,7 +37,9 @@ from zenml.integrations.kubernetes.orchestrators.kubernetes_orchestrator import
     KubernetesOrchestrator,
 )
 from zenml.integrations.kubernetes.orchestrators.manifest_utils import (
+    build_job_manifest,
     build_pod_manifest,
+    pod_template_manifest_from_pod,
 )
 from zenml.logger import get_logger
 from zenml.logging.step_logging import setup_orchestrator_logging
@@ -110,8 +113,16 @@ def main() -> None:
         # Get a Kubernetes client from the active Kubernetes orchestrator, but
         # override the `incluster` setting to `True` since we are running inside
         # the Kubernetes cluster.
-        kube_client = orchestrator.get_kube_client(incluster=True)
+        api_client_config = orchestrator.get_kube_client(
+            incluster=True
+        ).configuration
+        api_client_config.connection_pool_maxsize = (
+            pipeline_settings.max_parallelism
+        )
+        kube_client = k8s_client.ApiClient(api_client_config)
         core_api = k8s_client.CoreV1Api(kube_client)
+        batch_api = k8s_client.BatchV1Api(kube_client)
         env = get_config_environment_vars()
         env[ENV_ZENML_KUBERNETES_RUN_ID] = orchestrator_pod_name
@@ -150,6 +161,9 @@ def main() -> None:
                 Returns:
                     Whether the step node needs to be run.
                 """
+                if not step_run_request_factory.has_caching_enabled(step_name):
+                    return True
                 step_run_request = step_run_request_factory.create_request(
                     step_name
                 )
@@ -266,39 +280,95 @@ def main() -> None:
                 service_account_name=settings.step_pod_service_account_name
                 or settings.service_account_name,
                 mount_local_stores=mount_local_stores,
-                owner_references=owner_references,
                 termination_grace_period_seconds=settings.pod_stop_grace_period,
                 labels=step_pod_labels,
             )
-            kube_utils.create_and_wait_for_pod_to_start(
-                core_api=core_api,
-                pod_display_name=f"pod for step `{step_name}`",
-                pod_name=pod_name,
-                pod_manifest=pod_manifest,
+            retry_config = step_config.retry
+            backoff_limit = (
+                retry_config.max_retries if retry_config else 0
+            ) + settings.backoff_limit_margin
+            # This is to fix a bug in the kubernetes client which has some wrong
+            # client-side validations that means the `on_exit_codes` field is
+            # unusable. See https://github.com/kubernetes-client/python/issues/2056
+            class PatchedFailurePolicyRule(k8s_client.V1PodFailurePolicyRule):  # type: ignore[misc]
+                @property
+                def on_pod_conditions(self):  # type: ignore[no-untyped-def]
+                    return self._on_pod_conditions
+                @on_pod_conditions.setter
+                def on_pod_conditions(self, on_pod_conditions):  # type: ignore[no-untyped-def]
+                    self._on_pod_conditions = on_pod_conditions
+            k8s_client.V1PodFailurePolicyRule = PatchedFailurePolicyRule
+            k8s_client.models.V1PodFailurePolicyRule = PatchedFailurePolicyRule
+            pod_failure_policy = settings.pod_failure_policy or {
+                # These rules are applied sequentially. This means any failure in
+                # the main container will count towards the max retries. Any other
+                # disruption will not count towards the max retries.
+                "rules": [
+                    # If the main container fails, we count it towards the max
+                    # retries.
+                    {
+                        "action": "Count",
+                        "onExitCodes": {
+                            "containerName": "main",
+                            "operator": "NotIn",
+                            "values": [0],
+                        },
+                    },
+                    # If the pod is interrupted at any other time, we don't count
+                    # it as a retry
+                    {
+                        "action": "Ignore",
+                        "onPodConditions": [
+                            {
+                                "type": "DisruptionTarget",
+                            }
+                        ],
+                    },
+                ]
+            }
+            job_name = settings.pod_name_prefix or ""
+            random_prefix = "".join(random.choices("0123456789abcdef", k=8))
+            job_name += f"-{random_prefix}-{step_name}-{deployment.pipeline_configuration.name}"
+            # The job name will be used as a label on the pods, so we need to make
+            # sure it doesn't exceed the label length limit
+            job_name = kube_utils.sanitize_label(job_name)
+            job_manifest = build_job_manifest(
+                job_name=job_name,
+                pod_template=pod_template_manifest_from_pod(pod_manifest),
+                backoff_limit=backoff_limit,
+                ttl_seconds_after_finished=settings.ttl_seconds_after_finished,
+                active_deadline_seconds=settings.active_deadline_seconds,
+                pod_failure_policy=pod_failure_policy,
+                owner_references=owner_references,
+                labels=step_pod_labels,
+            )
+            kube_utils.create_job(
+                batch_api=batch_api,
                 namespace=namespace,
-                startup_max_retries=settings.pod_failure_max_retries,
-                startup_failure_delay=settings.pod_failure_retry_delay,
-                startup_failure_backoff=settings.pod_failure_backoff,
-                startup_timeout=settings.pod_startup_timeout,
+                job_manifest=job_manifest,
             )
-            # Wait for pod to finish.
-            logger.info(f"Waiting for pod of step `{step_name}` to finish...")
+            logger.info(f"Waiting for job of step `{step_name}` to finish...")
             try:
-                kube_utils.wait_pod(
-                    kube_client_fn=lambda: orchestrator.get_kube_client(
-                        incluster=True
-                    ),
-                    pod_name=pod_name,
+                kube_utils.wait_for_job_to_finish(
+                    batch_api=batch_api,
+                    core_api=core_api,
                     namespace=namespace,
-                    exit_condition_lambda=kube_utils.pod_is_done,
-                    stream_logs=True,
+                    job_name=job_name,
+                    stream_logs=pipeline_settings.stream_step_logs,
                 )
-                logger.info(f"Pod for step `{step_name}` completed.")
+                logger.info(f"Job for step `{step_name}` completed.")
             except Exception:
-                logger.error(f"Pod for step `{step_name}` failed.")
+                logger.error(f"Job for step `{step_name}` failed.")
                 raise

zenml/integrations/kubernetes/orchestrators/manifest_utils.py CHANGED Viewed

@@ -450,3 +450,62 @@ def build_secret_manifest(
         "type": secret_type,
         "data": encoded_data,
     }
+def pod_template_manifest_from_pod(
+    pod: k8s_client.V1Pod,
+) -> k8s_client.V1PodTemplateSpec:
+    """Build a Kubernetes pod template manifest from a pod.
+    Args:
+        pod: The pod manifest to build the template from.
+    Returns:
+        The pod template manifest.
+    """
+    return k8s_client.V1PodTemplateSpec(
+        metadata=pod.metadata,
+        spec=pod.spec,
+    )
+def build_job_manifest(
+    job_name: str,
+    pod_template: k8s_client.V1PodTemplateSpec,
+    backoff_limit: Optional[int] = None,
+    ttl_seconds_after_finished: Optional[int] = None,
+    labels: Optional[Dict[str, str]] = None,
+    active_deadline_seconds: Optional[int] = None,
+    pod_failure_policy: Optional[Dict[str, Any]] = None,
+    owner_references: Optional[List[k8s_client.V1OwnerReference]] = None,
+) -> k8s_client.V1Job:
+    """Build a Kubernetes job manifest.
+    Args:
+        job_name: Name of the job.
+        pod_template: The pod template to use for the job.
+        backoff_limit: The backoff limit for the job.
+        ttl_seconds_after_finished: The TTL seconds after finished for the job.
+        labels: The labels to use for the job.
+        active_deadline_seconds: The active deadline seconds for the job.
+        pod_failure_policy: The pod failure policy for the job.
+        owner_references: The owner references for the job.
+    Returns:
+        The Kubernetes job manifest.
+    """
+    job_spec = k8s_client.V1JobSpec(
+        template=pod_template,
+        backoff_limit=backoff_limit,
+        parallelism=1,
+        ttl_seconds_after_finished=ttl_seconds_after_finished,
+        active_deadline_seconds=active_deadline_seconds,
+        pod_failure_policy=pod_failure_policy,
+    )
+    job_metadata = k8s_client.V1ObjectMeta(
+        name=job_name,
+        labels=labels,
+        owner_references=owner_references,
+    )
+    return k8s_client.V1Job(spec=job_spec, metadata=job_metadata)

zenml/logger.py CHANGED Viewed

@@ -39,14 +39,15 @@ ZENML_LOGGING_COLORS_DISABLED = handle_bool_env_var(
 class CustomFormatter(logging.Formatter):
     """Formats logs according to custom specifications."""
-    grey: str = "\x1b[38;21m"
+    grey: str = "\x1b[90m"
+    white: str = "\x1b[37m"
     pink: str = "\x1b[35m"
     green: str = "\x1b[32m"
     yellow: str = "\x1b[33m"
     red: str = "\x1b[31m"
     cyan: str = "\x1b[1;36m"
     bold_red: str = "\x1b[31;1m"
-    purple: str = "\x1b[1;35m"
+    purple: str = "\x1b[38;5;105m"
     blue: str = "\x1b[34m"
     reset: str = "\x1b[0m"
@@ -59,7 +60,7 @@ class CustomFormatter(logging.Formatter):
     COLORS: Dict[LoggingLevels, str] = {
         LoggingLevels.DEBUG: grey,
-        LoggingLevels.INFO: purple,
+        LoggingLevels.INFO: white,
         LoggingLevels.WARN: yellow,
         LoggingLevels.ERROR: red,
         LoggingLevels.CRITICAL: bold_red,
@@ -87,12 +88,13 @@ class CustomFormatter(logging.Formatter):
             )
             formatter = logging.Formatter(log_fmt)
             formatted_message = formatter.format(record)
             quoted_groups = re.findall("`([^`]*)`", formatted_message)
             for quoted in quoted_groups:
                 formatted_message = formatted_message.replace(
                     "`" + quoted + "`",
                     self.reset
-                    + self.cyan
+                    + self.purple
                     + quoted
                     + self.COLORS.get(LoggingLevels(record.levelno)),
                 )

zenml-nightly 0.83.1.dev20250709__py3-none-any.whl → 0.83.1.dev20250710__py3-none-any.whl

zenml-nightly 0.83.1.dev20250709py3-none-any.whl → 0.83.1.dev20250710py3-none-any.whl