PyPI - zenml-nightly - Versions diffs - 0.83.1.dev20250706__py3-none-any.whl → 0.83.1.dev20250708__py3-none-any.whl - Mend

zenml-nightly 0.83.1.dev20250706py3-none-any.whl → 0.83.1.dev20250708py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

zenml/integrations/kubernetes/pod_settings.py CHANGED Viewed

@@ -19,6 +19,36 @@ from pydantic import field_validator
 from zenml.config.base_settings import BaseSettings
 from zenml.integrations.kubernetes import serialization_utils
+from zenml.logger import get_logger
+logger = get_logger(__name__)
+_pod_settings_logged_warnings = []
+def warn_if_invalid_model_data(data: Any, class_name: str) -> None:
+    """Validates the data of a Kubernetes model.
+    Args:
+        data: The data to validate.
+        class_name: Name of the class of the model.
+    """
+    if not isinstance(data, dict):
+        return
+    try:
+        serialization_utils.deserialize_kubernetes_model(data, class_name)
+    except KeyError as e:
+        if str(e) not in _pod_settings_logged_warnings:
+            _pod_settings_logged_warnings.append(str(e))
+            logger.warning(
+                "Invalid data for Kubernetes model class `%s`: %s. "
+                "Hint: Kubernetes expects attribute names in CamelCase, not "
+                "snake_case.",
+                class_name,
+                e,
+            )
 class KubernetesPodSettings(BaseSettings):
@@ -77,6 +107,7 @@ class KubernetesPodSettings(BaseSettings):
                     serialization_utils.serialize_kubernetes_model(element)
                 )
             else:
+                warn_if_invalid_model_data(element, "V1Volume")
                 result.append(element)
         return result
@@ -101,6 +132,7 @@ class KubernetesPodSettings(BaseSettings):
                     serialization_utils.serialize_kubernetes_model(element)
                 )
             else:
+                warn_if_invalid_model_data(element, "V1VolumeMount")
                 result.append(element)
         return result
@@ -121,6 +153,7 @@ class KubernetesPodSettings(BaseSettings):
         if isinstance(value, V1Affinity):
             return serialization_utils.serialize_kubernetes_model(value)
         else:
+            warn_if_invalid_model_data(value, "V1Affinity")
             return value
     @field_validator("tolerations", mode="before")
@@ -143,6 +176,7 @@ class KubernetesPodSettings(BaseSettings):
                     serialization_utils.serialize_kubernetes_model(element)
                 )
             else:
+                warn_if_invalid_model_data(element, "V1Toleration")
                 result.append(element)
         return result
@@ -163,6 +197,7 @@ class KubernetesPodSettings(BaseSettings):
         if isinstance(value, V1ResourceRequirements):
             return serialization_utils.serialize_kubernetes_model(value)
         else:
+            warn_if_invalid_model_data(value, "V1ResourceRequirements")
             return value
     @field_validator("env", mode="before")
@@ -185,6 +220,7 @@ class KubernetesPodSettings(BaseSettings):
                     serialization_utils.serialize_kubernetes_model(element)
                 )
             else:
+                warn_if_invalid_model_data(element, "V1EnvVar")
                 result.append(element)
         return result
@@ -209,6 +245,7 @@ class KubernetesPodSettings(BaseSettings):
                     serialization_utils.serialize_kubernetes_model(element)
                 )
             else:
+                warn_if_invalid_model_data(element, "V1EnvFromSource")
                 result.append(element)
         return result

zenml/integrations/kubernetes/serialization_utils.py CHANGED Viewed

@@ -117,7 +117,8 @@ def deserialize_kubernetes_model(data: Dict[str, Any], class_name: str) -> Any:
         if key not in attribute_mapping:
             raise KeyError(
                 f"Got value for attribute {key} which is not one of the "
-                f"available attributes {set(attribute_mapping)}."
+                f"available attributes for class {class_name}: "
+                f"{set(attribute_mapping)}."
             )
         attribute_name = attribute_mapping[key]

zenml/logging/step_logging.py CHANGED Viewed

@@ -18,6 +18,7 @@ import os
 import re
 import sys
 import time
+from contextlib import nullcontext
 from contextvars import ContextVar
 from types import TracebackType
 from typing import Any, Callable, List, Optional, Type, Union
@@ -30,7 +31,9 @@ from zenml.artifacts.utils import (
     _load_file_from_artifact_store,
     _strip_timestamp_from_multiline_string,
 )
+from zenml.client import Client
 from zenml.constants import (
+    ENV_ZENML_DISABLE_PIPELINE_LOGS_STORAGE,
     ENV_ZENML_DISABLE_STEP_NAMES_IN_LOGS,
     handle_bool_env_var,
 )
@@ -41,6 +44,11 @@ from zenml.logging import (
     STEP_LOGS_STORAGE_MAX_MESSAGES,
     STEP_LOGS_STORAGE_MERGE_INTERVAL_SECONDS,
 )
+from zenml.models import (
+    LogsRequest,
+    PipelineDeploymentResponse,
+    PipelineRunUpdate,
+)
 from zenml.utils.time_utils import utc_now
 from zenml.zen_stores.base_zen_store import BaseZenStore
@@ -584,3 +592,76 @@ class PipelineLogsStorageContext:
             return output
         return wrapped_flush
+def setup_orchestrator_logging(
+    run_id: str, deployment: "PipelineDeploymentResponse"
+) -> Any:
+    """Set up logging for an orchestrator environment.
+    This function can be reused by different orchestrators to set up
+    consistent logging behavior.
+    Args:
+        run_id: The pipeline run ID.
+        deployment: The deployment of the pipeline run.
+    Returns:
+        The logs context (PipelineLogsStorageContext)
+    """
+    try:
+        step_logging_enabled = True
+        # Check whether logging is enabled
+        if handle_bool_env_var(ENV_ZENML_DISABLE_PIPELINE_LOGS_STORAGE, False):
+            step_logging_enabled = False
+        else:
+            if (
+                deployment.pipeline_configuration.enable_pipeline_logs
+                is not None
+            ):
+                step_logging_enabled = (
+                    deployment.pipeline_configuration.enable_pipeline_logs
+                )
+        if not step_logging_enabled:
+            return nullcontext()
+        # Fetch the active stack
+        client = Client()
+        active_stack = client.active_stack
+        # Configure the logs
+        logs_uri = prepare_logs_uri(
+            artifact_store=active_stack.artifact_store,
+        )
+        logs_context = PipelineLogsStorageContext(
+            logs_uri=logs_uri,
+            artifact_store=active_stack.artifact_store,
+            prepend_step_name=False,
+        )
+        logs_model = LogsRequest(
+            uri=logs_uri,
+            source="orchestrator",
+            artifact_store_id=active_stack.artifact_store.id,
+        )
+        # Add orchestrator logs to the pipeline run
+        try:
+            run_update = PipelineRunUpdate(add_logs=[logs_model])
+            client.zen_store.update_run(
+                run_id=UUID(run_id), run_update=run_update
+            )
+        except Exception as e:
+            logger.error(
+                f"Failed to add orchestrator logs to the run {run_id}: {e}"
+            )
+            raise e
+        return logs_context
+    except Exception as e:
+        logger.error(
+            f"Failed to setup orchestrator logging for run {run_id}: {e}"
+        )
+        return nullcontext()

zenml/models/v2/core/logs.py CHANGED Viewed

@@ -34,7 +34,7 @@ class LogsRequest(BaseRequest):
     """Request model for logs."""
     uri: str = Field(title="The uri of the logs file")
+    source: str = Field(title="The source of the logs file")
     artifact_store_id: UUID = Field(
         title="The artifact store ID to associate the logs with.",
     )
@@ -75,6 +75,10 @@ class LogsResponseBody(BaseDatedResponseBody):
         title="The uri of the logs file",
         max_length=TEXT_FIELD_MAX_LENGTH,
     )
+    source: str = Field(
+        title="The source of the logs file",
+        max_length=TEXT_FIELD_MAX_LENGTH,
+    )
 class LogsResponseMetadata(BaseResponseMetadata):
@@ -126,6 +130,15 @@ class LogsResponse(
         """
         return self.get_body().uri
+    @property
+    def source(self) -> str:
+        """The `source` property.
+        Returns:
+            the value of the property.
+        """
+        return self.get_body().source
     @property
     def step_run_id(self) -> Optional[UUID]:
         """The `step_run_id` property.

zenml/models/v2/core/pipeline_run.py CHANGED Viewed

@@ -153,6 +153,9 @@ class PipelineRunUpdate(BaseUpdate):
     remove_tags: Optional[List[str]] = Field(
         default=None, title="Tags to remove from the pipeline run."
     )
+    add_logs: Optional[List[LogsRequest]] = Field(
+        default=None, title="New logs to add to the pipeline run."
+    )
     model_config = ConfigDict(protected_namespaces=())
@@ -265,6 +268,10 @@ class PipelineRunResponseResources(ProjectScopedResponseResources):
         title="Logs associated with this pipeline run.",
         default=None,
     )
+    log_collection: Optional[List["LogsResponse"]] = Field(
+        title="Logs associated with this pipeline run.",
+        default=None,
+    )
     # TODO: In Pydantic v2, the `model_` is a protected namespaces for all
     #  fields defined under base models. If not handled, this raises a warning.
@@ -601,6 +608,15 @@ class PipelineRunResponse(
         """
         return self.get_resources().logs
+    @property
+    def log_collection(self) -> Optional[List["LogsResponse"]]:
+        """The `log_collection` property.
+        Returns:
+            the value of the property.
+        """
+        return self.get_resources().log_collection
 # ------------------ Filter Model ------------------

zenml/orchestrators/step_launcher.py CHANGED Viewed

@@ -241,6 +241,7 @@ class StepLauncher:
             logs_model = LogsRequest(
                 uri=logs_uri,
+                source="execution",
                 artifact_store_id=self._stack.artifact_store.id,
             )

zenml/pipelines/pipeline_definition.py CHANGED Viewed

@@ -856,6 +856,7 @@ To avoid this consider setting pipeline parameters only in one place (config or
                 logs_model = LogsRequest(
                     uri=logs_uri,
+                    source="client",
                     artifact_store_id=stack.artifact_store.id,
                 )

zenml/zen_server/routers/runs_endpoints.py CHANGED Viewed

@@ -436,22 +436,24 @@ def stop_run(
 @async_fastapi_endpoint_wrapper
 def run_logs(
     run_id: UUID,
+    source: str,
     offset: int = 0,
     length: int = 1024 * 1024 * 16,  # Default to 16MiB of data
     _: AuthContext = Security(authorize),
 ) -> str:
-    """Get pipeline run logs.
+    """Get pipeline run logs for a specific source.
     Args:
         run_id: ID of the pipeline run.
+        source: Required source to get logs for.
         offset: The offset from which to start reading.
         length: The amount of bytes that should be read.
     Returns:
-        The pipeline run logs.
+        Logs for the specified source.
     Raises:
-        KeyError: If no logs are available for the pipeline run.
+        KeyError: If no logs are found for the specified source.
     """
     store = zen_store()
@@ -461,19 +463,26 @@ def run_logs(
         hydrate=True,
     )
-    if run.deployment_id:
+    # Handle runner logs from workload manager
+    if run.deployment_id and source == "runner":
         deployment = store.get_deployment(run.deployment_id)
         if deployment.template_id and server_config().workload_manager_enabled:
-            return workload_manager().get_logs(workload_id=deployment.id)
-    logs = run.logs
-    if logs is None:
-        raise KeyError("No logs available for this pipeline run")
-    return fetch_logs(
-        zen_store=store,
-        artifact_store_id=logs.artifact_store_id,
-        logs_uri=logs.uri,
-        offset=offset,
-        length=length,
-    )
+            workload_logs = workload_manager().get_logs(
+                workload_id=deployment.id
+            )
+            return workload_logs
+    # Handle logs from log collection
+    if run.log_collection:
+        for log_entry in run.log_collection:
+            if log_entry.source == source:
+                return fetch_logs(
+                    zen_store=store,
+                    artifact_store_id=log_entry.artifact_store_id,
+                    logs_uri=log_entry.uri,
+                    offset=offset,
+                    length=length,
+                )
+    # If no logs found for the specified source, raise an error
+    raise KeyError(f"No logs found for source '{source}' in run {run_id}")

zenml/zen_stores/migrations/versions/85289fea86ff_adding_source_to_logs.py ADDED Viewed

@@ -0,0 +1,68 @@
+"""adding-source-to-logs [85289fea86ff].
+Revision ID: 85289fea86ff
+Revises: 5bb25e95849c
+Create Date: 2025-06-30 18:18:24.539265
+"""
+import sqlalchemy as sa
+from alembic import op
+# revision identifiers, used by Alembic.
+revision = "85289fea86ff"
+down_revision = "5bb25e95849c"
+branch_labels = None
+depends_on = None
+def upgrade() -> None:
+    """Upgrade database schema and/or data, creating a new revision."""
+    # Add the source column as nullable first
+    with op.batch_alter_table("logs", schema=None) as batch_op:
+        batch_op.add_column(
+            sa.Column("source", sa.VARCHAR(255), nullable=True)
+        )
+    # Populate the source field based on existing data
+    connection = op.get_bind()
+    # Set source to "step" where step_run_id is present
+    connection.execute(
+        sa.text("""
+            UPDATE logs
+            SET source = 'step'
+            WHERE step_run_id IS NOT NULL
+        """)
+    )
+    # Set source to "client" for all other cases (where step_run_id is null)
+    connection.execute(
+        sa.text("""
+            UPDATE logs
+            SET source = 'client'
+            WHERE step_run_id IS NULL
+        """)
+    )
+    # Make the source column not nullable
+    with op.batch_alter_table("logs", schema=None) as batch_op:
+        batch_op.alter_column(
+            "source",
+            existing_type=sa.VARCHAR(255),
+            nullable=False,
+        )
+        # Add unique constraint: source is unique for each combination of pipeline_run_id and step_run_id
+        batch_op.create_unique_constraint(
+            "unique_source_per_run_and_step",
+            ["source", "pipeline_run_id", "step_run_id"],
+        )
+def downgrade() -> None:
+    """Downgrade database schema and/or data back to the previous revision."""
+    with op.batch_alter_table("logs", schema=None) as batch_op:
+        batch_op.drop_constraint(
+            "unique_source_per_run_and_step", type_="unique"
+        )
+        batch_op.drop_column("source")

zenml/zen_stores/schemas/logs_schemas.py CHANGED Viewed

@@ -16,7 +16,7 @@
 from typing import Any, Optional
 from uuid import UUID
-from sqlalchemy import TEXT, Column
+from sqlalchemy import TEXT, VARCHAR, Column, UniqueConstraint
 from sqlmodel import Field, Relationship
 from zenml.models import (
@@ -35,9 +35,18 @@ class LogsSchema(BaseSchema, table=True):
     """SQL Model for logs."""
     __tablename__ = "logs"
+    __table_args__ = (
+        UniqueConstraint(
+            "source",
+            "pipeline_run_id",
+            "step_run_id",
+            name="unique_source_per_run_and_step",
+        ),
+    )
     # Fields
     uri: str = Field(sa_column=Column(TEXT, nullable=False))
+    source: str = Field(sa_column=Column(VARCHAR(255), nullable=False))
     # Foreign Keys
     pipeline_run_id: Optional[UUID] = build_foreign_key_field(
@@ -87,12 +96,12 @@ class LogsSchema(BaseSchema, table=True):
             include_resources: Whether the resources will be filled.
             **kwargs: Keyword arguments to allow schema specific logic
         Returns:
             The created `LogsResponse`.
         """
         body = LogsResponseBody(
             uri=self.uri,
+            source=self.source,
             created=self.created,
             updated=self.updated,
         )

zenml/zen_stores/schemas/pipeline_run_schemas.py CHANGED Viewed

@@ -158,9 +158,9 @@ class PipelineRunSchema(NamedSchema, RunMetadataInterface, table=True):
             overlaps="run_metadata",
         ),
     )
-    logs: Optional["LogsSchema"] = Relationship(
+    logs: List["LogsSchema"] = Relationship(
         back_populates="pipeline_run",
-        sa_relationship_kwargs={"cascade": "delete", "uselist": False},
+        sa_relationship_kwargs={"cascade": "delete"},
     )
     step_runs: List["StepRunSchema"] = Relationship(
         sa_relationship_kwargs={"cascade": "delete"},
@@ -531,13 +531,22 @@ class PipelineRunSchema(NamedSchema, RunMetadataInterface, table=True):
         resources = None
         if include_resources:
+            # Add the client logs as "logs" if they exist, for backwards compatibility
+            # TODO: This will be safe to remove in future releases (>0.84.0).
+            client_logs = [
+                log_entry
+                for log_entry in self.logs
+                if log_entry.source == "client"
+            ]
             resources = PipelineRunResponseResources(
                 user=self.user.to_model() if self.user else None,
                 model_version=self.model_version.to_model()
                 if self.model_version
                 else None,
                 tags=[tag.to_model() for tag in self.tags],
-                logs=self.logs.to_model() if self.logs else None,
+                logs=client_logs[0].to_model() if client_logs else None,
+                log_collection=[log.to_model() for log in self.logs],
             )
         return PipelineRunResponse(

zenml/zen_stores/sql_zen_store.py CHANGED Viewed

@@ -5677,7 +5677,9 @@ class SqlZenStore(BaseZenStore):
             The created pipeline run.
         Raises:
-            EntityExistsError: If a run with the same name already exists.
+            EntityExistsError: If a run with the same name already exists or
+                a log entry with the same source already exists within the
+                scope of the same pipeline run.
         """
         self._set_request_user_id(request_model=pipeline_run, session=session)
         self._get_reference_schema_by_id(
@@ -5698,23 +5700,6 @@ class SqlZenStore(BaseZenStore):
         session.add(new_run)
-        # Add logs entry for the run if exists
-        if pipeline_run.logs is not None:
-            self._get_reference_schema_by_id(
-                resource=pipeline_run,
-                reference_schema=StackComponentSchema,
-                reference_id=pipeline_run.logs.artifact_store_id,
-                session=session,
-                reference_type="logs artifact store",
-            )
-            log_entry = LogsSchema(
-                uri=pipeline_run.logs.uri,
-                pipeline_run_id=new_run.id,
-                artifact_store_id=pipeline_run.logs.artifact_store_id,
-            )
-            session.add(log_entry)
         try:
             session.commit()
         except IntegrityError:
@@ -5736,6 +5721,33 @@ class SqlZenStore(BaseZenStore):
                 "already exists."
             )
+        # Add logs entry for the run if exists
+        if pipeline_run.logs is not None:
+            self._get_reference_schema_by_id(
+                resource=pipeline_run,
+                reference_schema=StackComponentSchema,
+                reference_id=pipeline_run.logs.artifact_store_id,
+                session=session,
+                reference_type="logs artifact store",
+            )
+            log_entry = LogsSchema(
+                uri=pipeline_run.logs.uri,
+                source=pipeline_run.logs.source,
+                pipeline_run_id=new_run.id,
+                artifact_store_id=pipeline_run.logs.artifact_store_id,
+            )
+            try:
+                session.add(log_entry)
+                session.commit()
+            except IntegrityError:
+                session.rollback()
+                raise EntityExistsError(
+                    "Unable to create log entry: A log entry with this "
+                    f"source '{pipeline_run.logs.source}' already exists "
+                    f"within the scope of the same pipeline run '{new_run.id}'."
+                )
         if model_version_id := self._get_or_create_model_version_for_run(
             new_run
         ):
@@ -6095,6 +6107,10 @@ class SqlZenStore(BaseZenStore):
         Returns:
             The updated pipeline run.
+        Raises:
+            EntityExistsError: If a log entry with the same source already
+                exists within the scope of the same pipeline run.
         """
         with Session(self.engine) as session:
             # Check if pipeline run with the given ID exists
@@ -6109,6 +6125,39 @@ class SqlZenStore(BaseZenStore):
             session.commit()
             session.refresh(existing_run)
+            # Add logs if specified
+            if run_update.add_logs:
+                try:
+                    for log_request in run_update.add_logs:
+                        # Validate the artifact store exists
+                        self._get_reference_schema_by_id(
+                            resource=log_request,
+                            reference_schema=StackComponentSchema,
+                            reference_id=log_request.artifact_store_id,
+                            session=session,
+                            reference_type="logs artifact store",
+                        )
+                        # Create the log entry
+                        log_entry = LogsSchema(
+                            uri=log_request.uri,
+                            source=log_request.source,
+                            pipeline_run_id=existing_run.id,
+                            artifact_store_id=log_request.artifact_store_id,
+                        )
+                        session.add(log_entry)
+                    session.commit()
+                except IntegrityError:
+                    session.rollback()
+                    raise EntityExistsError(
+                        "Unable to create log entry: One of the provided sources "
+                        f"({', '.join(log.source for log in run_update.add_logs)}) "
+                        "already exists within the scope of the same pipeline run "
+                        f"'{existing_run.id}'. Existing entry sources: "
+                        f"{', '.join(log.source for log in existing_run.logs)}"
+                    )
             self._attach_tags_to_resources(
                 tags=run_update.add_tags,
                 resources=existing_run,
@@ -8830,7 +8879,9 @@ class SqlZenStore(BaseZenStore):
             The created step run.
         Raises:
-            EntityExistsError: if the step run already exists.
+            EntityExistsError: if the step run already exists or a log entry
+                with the same source already exists within the scope of the
+                same step.
             IllegalOperationError: if the pipeline run is stopped or stopping.
         """
         with Session(self.engine) as session:
@@ -8889,11 +8940,20 @@ class SqlZenStore(BaseZenStore):
                 log_entry = LogsSchema(
                     uri=step_run.logs.uri,
+                    source=step_run.logs.source,
                     step_run_id=step_schema.id,
                     artifact_store_id=step_run.logs.artifact_store_id,
                 )
-                session.add(log_entry)
+                try:
+                    session.add(log_entry)
+                    session.commit()
+                except IntegrityError:
+                    session.rollback()
+                    raise EntityExistsError(
+                        "Unable to create log entry: A log entry with this "
+                        f"source '{step_run.logs.source}' already exists "
+                        f"within the scope of the same step '{step_schema.id}'."
+                    )
             # If cached, attach metadata of the original step
             if (
                 step_run.status == ExecutionStatus.CACHED

{zenml_nightly-0.83.1.dev20250706.dist-info → zenml_nightly-0.83.1.dev20250708.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: zenml-nightly
-Version: 0.83.1.dev20250706
+Version: 0.83.1.dev20250708
 Summary: ZenML: Write production-ready ML code.
 License: Apache-2.0
 Keywords: machine learning,production,pipeline,mlops,devops

zenml-nightly 0.83.1.dev20250706__py3-none-any.whl → 0.83.1.dev20250708__py3-none-any.whl

zenml-nightly 0.83.1.dev20250706py3-none-any.whl → 0.83.1.dev20250708py3-none-any.whl