PyPI - zenml-nightly - Versions diffs - 0.83.1.dev20250626__py3-none-any.whl → 0.83.1.dev20250628__py3-none-any.whl - Mend

zenml-nightly 0.83.1.dev20250626py3-none-any.whl → 0.83.1.dev20250628py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

zenml/orchestrators/dag_runner.py CHANGED Viewed

@@ -72,6 +72,7 @@ class ThreadedDagRunner:
         self,
         dag: Dict[str, List[str]],
         run_fn: Callable[[str], Any],
+        preparation_fn: Optional[Callable[[str], bool]] = None,
         finalize_fn: Optional[Callable[[Dict[str, NodeStatus]], None]] = None,
         parallel_node_startup_waiting_period: float = 0.0,
         max_parallelism: Optional[int] = None,
@@ -83,6 +84,9 @@ class ThreadedDagRunner:
                 E.g.: [(1->2), (1->3), (2->4), (3->4)] should be represented as
                 `dag={2: [1], 3: [1], 4: [2, 3]}`
             run_fn: A function `run_fn(node)` that runs a single node
+            preparation_fn: A function that is called before the node is run.
+                If provided, the function return value determines whether the
+                node should be run or can be skipped.
             finalize_fn: A function `finalize_fn(node_states)` that is called
                 when all nodes have completed.
             parallel_node_startup_waiting_period: Delay in seconds to wait in
@@ -102,6 +106,7 @@ class ThreadedDagRunner:
         self.dag = dag
         self.reversed_dag = reverse_dag(dag)
         self.run_fn = run_fn
+        self.preparation_fn = preparation_fn
         self.finalize_fn = finalize_fn
         self.nodes = dag.keys()
         self.node_states = {
@@ -156,7 +161,7 @@ class ThreadedDagRunner:
                         break
                 logger.debug(f"Waiting for {running_nodes} nodes to finish.")
-                time.sleep(10)
+                time.sleep(1)
     def _run_node(self, node: str) -> None:
         """Run a single node.
@@ -168,6 +173,12 @@ class ThreadedDagRunner:
         """
         self._prepare_node_run(node)
+        if self.preparation_fn:
+            run_required = self.preparation_fn(node)
+            if not run_required:
+                self._finish_node(node)
+                return
         try:
             self.run_fn(node)
             self._finish_node(node)
@@ -203,8 +214,6 @@ class ThreadedDagRunner:
             node: The node.
             failed: Whether the node failed.
         """
-        # Update node status to completed.
-        assert self.node_states[node] == NodeStatus.RUNNING
         with self._lock:
             if failed:
                 self.node_states[node] = NodeStatus.FAILED

zenml/orchestrators/input_utils.py CHANGED Viewed

@@ -13,14 +13,13 @@
 #  permissions and limitations under the License.
 """Utilities for inputs."""
-import json
 from typing import TYPE_CHECKING, Dict, Optional
 from zenml.client import Client
 from zenml.config.step_configurations import Step
 from zenml.enums import StepRunInputArtifactType
 from zenml.exceptions import InputResolutionError
-from zenml.utils import pagination_utils, string_utils
+from zenml.utils import string_utils
 if TYPE_CHECKING:
     from zenml.models import PipelineRunResponse, StepRunResponse
@@ -52,6 +51,7 @@ def resolve_step_inputs(
     """
     from zenml.models import ArtifactVersionResponse
     from zenml.models.v2.core.step_run import StepRunInputResponse
+    from zenml.orchestrators.step_run_utils import fetch_step_runs_by_names
     step_runs = step_runs or {}
@@ -62,40 +62,11 @@ def resolve_step_inputs(
     steps_to_fetch.difference_update(step_runs.keys())
     if steps_to_fetch:
-        # The list of steps might be too big to fit in the default max URL
-        # length of 8KB supported by most servers. So we need to split it into
-        # smaller chunks.
-        steps_list = list(steps_to_fetch)
-        chunks = []
-        current_chunk = []
-        current_length = 0
-        # stay under 6KB for good measure.
-        max_chunk_length = 6000
-        for step_name in steps_list:
-            current_chunk.append(step_name)
-            current_length += len(step_name) + 5  # 5 is for the JSON encoding
-            if current_length > max_chunk_length:
-                chunks.append(current_chunk)
-                current_chunk = []
-                current_length = 0
-        if current_chunk:
-            chunks.append(current_chunk)
-        for chunk in chunks:
-            step_runs.update(
-                {
-                    run_step.name: run_step
-                    for run_step in pagination_utils.depaginate(
-                        Client().list_run_steps,
-                        pipeline_run_id=pipeline_run.id,
-                        project=pipeline_run.project_id,
-                        name="oneof:" + json.dumps(chunk),
-                    )
-                }
+        step_runs.update(
+            fetch_step_runs_by_names(
+                step_run_names=list(steps_to_fetch), pipeline_run=pipeline_run
             )
+        )
     input_artifacts: Dict[str, StepRunInputResponse] = {}
     for name, input_ in step.spec.inputs.items():

zenml/orchestrators/step_run_utils.py CHANGED Viewed

@@ -13,6 +13,7 @@
 #  permissions and limitations under the License.
 """Utilities for creating step runs."""
+import json
 from typing import Dict, List, Optional, Set, Tuple, Union
 from zenml import Tag, add_tags
@@ -32,6 +33,7 @@ from zenml.models import (
 )
 from zenml.orchestrators import cache_utils, input_utils, utils
 from zenml.stack import Stack
+from zenml.utils import pagination_utils
 from zenml.utils.time_utils import utc_now
 logger = get_logger(__name__)
@@ -151,6 +153,15 @@ class StepRunRequestFactory:
                 request.status = ExecutionStatus.CACHED
                 request.end_time = request.start_time
+                # As a last resort, we try to reuse the docstring/source code
+                # from the cached step run. This is part of the cache key
+                # computation, so it must be identical to the one we would have
+                # computed ourselves.
+                if request.source_code is None:
+                    request.source_code = cached_step_run.source_code
+                if request.docstring is None:
+                    request.docstring = cached_step_run.docstring
     def _get_docstring_and_source_code(
         self, invocation_id: str
     ) -> Tuple[Optional[str], Optional[str]]:
@@ -333,27 +344,15 @@ def create_cached_step_runs(
                 # -> We don't need to do anything here
                 continue
-            step_run = Client().zen_store.create_run_step(step_run_request)
+            step_run = publish_cached_step_run(
+                step_run_request, pipeline_run=pipeline_run
+            )
             # Include the newly created step run in the step runs dictionary to
             # avoid fetching it again later when downstream steps need it for
             # input resolution.
             step_runs[invocation_id] = step_run
-            if (
-                model_version := step_run.model_version
-                or pipeline_run.model_version
-            ):
-                link_output_artifacts_to_model_version(
-                    artifacts=step_run.outputs,
-                    model_version=model_version,
-                )
-            cascade_tags_for_output_artifacts(
-                artifacts=step_run.outputs,
-                tags=pipeline_run.config.tags,
-            )
             logger.info("Using cached version of step `%s`.", invocation_id)
             cached_invocations.add(invocation_id)
@@ -426,3 +425,78 @@ def cascade_tags_for_output_artifacts(
                 tags=[t.name for t in cascade_tags],
                 artifact_version_id=output_artifact.id,
             )
+def publish_cached_step_run(
+    request: "StepRunRequest", pipeline_run: "PipelineRunResponse"
+) -> "StepRunResponse":
+    """Create a cached step run and link to model version and tags.
+    Args:
+        request: The request for the step run.
+        pipeline_run: The pipeline run of the step.
+    Returns:
+        The createdstep run.
+    """
+    step_run = Client().zen_store.create_run_step(request)
+    if model_version := step_run.model_version or pipeline_run.model_version:
+        link_output_artifacts_to_model_version(
+            artifacts=step_run.outputs,
+            model_version=model_version,
+        )
+    cascade_tags_for_output_artifacts(
+        artifacts=step_run.outputs,
+        tags=pipeline_run.config.tags,
+    )
+    return step_run
+def fetch_step_runs_by_names(
+    step_run_names: List[str], pipeline_run: "PipelineRunResponse"
+) -> Dict[str, "StepRunResponse"]:
+    """Fetch step runs by names.
+    Args:
+        step_run_names: The names of the step runs to fetch.
+        pipeline_run: The pipeline run of the step runs.
+    Returns:
+        A dictionary of step runs by name.
+    """
+    step_runs = {}
+    chunks = []
+    current_chunk = []
+    current_length = 0
+    # stay under 6KB for good measure.
+    max_chunk_length = 6000
+    for step_name in step_run_names:
+        current_chunk.append(step_name)
+        current_length += len(step_name) + 5  # 5 is for the JSON encoding
+        if current_length > max_chunk_length:
+            chunks.append(current_chunk)
+            current_chunk = []
+            current_length = 0
+    if current_chunk:
+        chunks.append(current_chunk)
+    for chunk in chunks:
+        step_runs.update(
+            {
+                run_step.name: run_step
+                for run_step in pagination_utils.depaginate(
+                    Client().list_run_steps,
+                    pipeline_run_id=pipeline_run.id,
+                    project=pipeline_run.project_id,
+                    name="oneof:" + json.dumps(chunk),
+                )
+            }
+        )
+    return step_runs

zenml/pipelines/pipeline_definition.py CHANGED Viewed

@@ -863,8 +863,12 @@ To avoid this consider setting pipeline parameters only in one place (config or
                 deployment = self._create_deployment(**self._run_args)
                 self.log_pipeline_deployment_metadata(deployment)
-                run = create_placeholder_run(
-                    deployment=deployment, logs=logs_model
+                run = (
+                    create_placeholder_run(
+                        deployment=deployment, logs=logs_model
+                    )
+                    if not deployment.schedule
+                    else None
                 )
                 analytics_handler.metadata = (

zenml/pipelines/run_utils.py CHANGED Viewed

@@ -51,23 +51,19 @@ def get_default_run_name(pipeline_name: str) -> str:
 def create_placeholder_run(
     deployment: "PipelineDeploymentResponse",
+    orchestrator_run_id: Optional[str] = None,
     logs: Optional["LogsRequest"] = None,
-) -> Optional["PipelineRunResponse"]:
+) -> "PipelineRunResponse":
     """Create a placeholder run for the deployment.
-    If the deployment contains a schedule, no placeholder run will be
-    created.
     Args:
         deployment: The deployment for which to create the placeholder run.
+        orchestrator_run_id: The orchestrator run ID for the run.
         logs: The logs for the run.
     Returns:
-        The placeholder run or `None` if no run was created.
+        The placeholder run.
     """
-    if deployment.schedule:
-        return None
     start_time = utc_now()
     run_request = PipelineRunRequest(
         name=string_utils.format_name_template(
@@ -83,7 +79,7 @@ def create_placeholder_run(
         # the start_time is only set once the first step starts
         # running.
         start_time=start_time,
-        orchestrator_run_id=None,
+        orchestrator_run_id=orchestrator_run_id,
         project=deployment.project_id,
         deployment=deployment.id,
         pipeline=deployment.pipeline.id if deployment.pipeline else None,

zenml/stack/stack_component.py CHANGED Viewed

@@ -527,7 +527,7 @@ class StackComponent:
         )
         # Use the current config as a base
-        settings_dict = self.config.model_dump()
+        settings_dict = self.config.model_dump(exclude_unset=True)
         if key in all_settings:
             settings_dict.update(dict(all_settings[key]))

zenml/zen_server/template_execution/utils.py CHANGED Viewed

@@ -193,7 +193,6 @@ def run_template(
     zenml_version = build.zenml_version
     placeholder_run = create_placeholder_run(deployment=new_deployment)
-    assert placeholder_run
     report_usage(
         feature=RUN_TEMPLATE_TRIGGERS_FEATURE_NAME,

zenml/zen_stores/schemas/pipeline_run_schemas.py CHANGED Viewed

@@ -20,7 +20,7 @@ from uuid import UUID
 from pydantic import ConfigDict
 from sqlalchemy import UniqueConstraint
-from sqlalchemy.orm import joinedload
+from sqlalchemy.orm import selectinload
 from sqlalchemy.sql.base import ExecutableOption
 from sqlmodel import TEXT, Column, Field, Relationship
@@ -51,7 +51,9 @@ from zenml.zen_stores.schemas.pipeline_deployment_schemas import (
 from zenml.zen_stores.schemas.pipeline_schemas import PipelineSchema
 from zenml.zen_stores.schemas.project_schemas import ProjectSchema
 from zenml.zen_stores.schemas.schedule_schema import ScheduleSchema
-from zenml.zen_stores.schemas.schema_utils import build_foreign_key_field
+from zenml.zen_stores.schemas.schema_utils import (
+    build_foreign_key_field,
+)
 from zenml.zen_stores.schemas.stack_schemas import StackSchema
 from zenml.zen_stores.schemas.trigger_schemas import TriggerExecutionSchema
 from zenml.zen_stores.schemas.user_schemas import UserSchema
@@ -259,19 +261,19 @@ class PipelineRunSchema(NamedSchema, RunMetadataInterface, table=True):
         from zenml.zen_stores.schemas import ModelVersionSchema
         options = [
-            joinedload(jl_arg(PipelineRunSchema.deployment)).joinedload(
+            selectinload(jl_arg(PipelineRunSchema.deployment)).joinedload(
                 jl_arg(PipelineDeploymentSchema.pipeline)
             ),
-            joinedload(jl_arg(PipelineRunSchema.deployment)).joinedload(
+            selectinload(jl_arg(PipelineRunSchema.deployment)).joinedload(
                 jl_arg(PipelineDeploymentSchema.stack)
             ),
-            joinedload(jl_arg(PipelineRunSchema.deployment)).joinedload(
+            selectinload(jl_arg(PipelineRunSchema.deployment)).joinedload(
                 jl_arg(PipelineDeploymentSchema.build)
             ),
-            joinedload(jl_arg(PipelineRunSchema.deployment)).joinedload(
+            selectinload(jl_arg(PipelineRunSchema.deployment)).joinedload(
                 jl_arg(PipelineDeploymentSchema.schedule)
             ),
-            joinedload(jl_arg(PipelineRunSchema.deployment)).joinedload(
+            selectinload(jl_arg(PipelineRunSchema.deployment)).joinedload(
                 jl_arg(PipelineDeploymentSchema.code_reference)
             ),
         ]
@@ -286,14 +288,14 @@ class PipelineRunSchema(NamedSchema, RunMetadataInterface, table=True):
         if include_resources:
             options.extend(
                 [
-                    joinedload(
+                    selectinload(
                         jl_arg(PipelineRunSchema.model_version)
                     ).joinedload(
                         jl_arg(ModelVersionSchema.model), innerjoin=True
                     ),
-                    joinedload(jl_arg(PipelineRunSchema.logs)),
-                    joinedload(jl_arg(PipelineRunSchema.user)),
-                    # joinedload(jl_arg(PipelineRunSchema.tags)),
+                    selectinload(jl_arg(PipelineRunSchema.logs)),
+                    selectinload(jl_arg(PipelineRunSchema.user)),
+                    selectinload(jl_arg(PipelineRunSchema.tags)),
                 ]
             )
@@ -550,8 +552,8 @@ class PipelineRunSchema(NamedSchema, RunMetadataInterface, table=True):
         Raises:
             RuntimeError: If the DB entry does not represent a placeholder run.
-            ValueError: If the run request does not match the deployment or
-                pipeline ID of the placeholder run.
+            ValueError: If the run request is not a valid request to replace the
+                placeholder run.
         Returns:
             The updated `PipelineRunSchema`.
@@ -562,13 +564,33 @@ class PipelineRunSchema(NamedSchema, RunMetadataInterface, table=True):
                 "placeholder run."
             )
+        if request.is_placeholder_request:
+            raise ValueError(
+                "Cannot replace a placeholder run with another placeholder run."
+            )
         if (
             self.deployment_id != request.deployment
             or self.pipeline_id != request.pipeline
+            or self.project_id != request.project
         ):
             raise ValueError(
-                "Deployment or orchestrator run ID of placeholder run do not "
-                "match the IDs of the run request."
+                "Deployment, project or pipeline ID of placeholder run "
+                "do not match the IDs of the run request."
+            )
+        if not request.orchestrator_run_id:
+            raise ValueError(
+                "Orchestrator run ID is required to replace a placeholder run."
+            )
+        if (
+            self.orchestrator_run_id
+            and self.orchestrator_run_id != request.orchestrator_run_id
+        ):
+            raise ValueError(
+                "Orchestrator run ID of placeholder run does not match the "
+                "ID of the run request."
             )
         orchestrator_environment = json.dumps(request.orchestrator_environment)
@@ -587,7 +609,4 @@ class PipelineRunSchema(NamedSchema, RunMetadataInterface, table=True):
         Returns:
             Whether the pipeline run is a placeholder run.
         """
-        return (
-            self.orchestrator_run_id is None
-            and self.status == ExecutionStatus.INITIALIZING
-        )
+        return self.status == ExecutionStatus.INITIALIZING.value

zenml/zen_stores/schemas/step_run_schemas.py CHANGED Viewed

@@ -21,7 +21,7 @@ from uuid import UUID
 from pydantic import ConfigDict
 from sqlalchemy import TEXT, Column, String, UniqueConstraint
 from sqlalchemy.dialects.mysql import MEDIUMTEXT
-from sqlalchemy.orm import joinedload
+from sqlalchemy.orm import joinedload, selectinload
 from sqlalchemy.sql.base import ExecutableOption
 from sqlmodel import Field, Relationship, SQLModel
@@ -50,6 +50,7 @@ from zenml.zen_stores.schemas.base_schemas import NamedSchema
 from zenml.zen_stores.schemas.constants import MODEL_VERSION_TABLENAME
 from zenml.zen_stores.schemas.pipeline_deployment_schemas import (
     PipelineDeploymentSchema,
+    StepConfigurationSchema,
 )
 from zenml.zen_stores.schemas.pipeline_run_schemas import PipelineRunSchema
 from zenml.zen_stores.schemas.project_schemas import ProjectSchema
@@ -187,6 +188,14 @@ class StepRunSchema(NamedSchema, RunMetadataInterface, table=True):
     original_step_run: Optional["StepRunSchema"] = Relationship(
         sa_relationship_kwargs={"remote_side": "StepRunSchema.id"}
     )
+    step_configuration_schema: Optional["StepConfigurationSchema"] = (
+        Relationship(
+            sa_relationship_kwargs=dict(
+                viewonly=True,
+                primaryjoin="and_(foreign(StepConfigurationSchema.name) == StepRunSchema.name, foreign(StepConfigurationSchema.deployment_id) == StepRunSchema.deployment_id)",
+            ),
+        )
+    )
     model_config = ConfigDict(protected_namespaces=())  # type: ignore[assignment]
@@ -209,17 +218,25 @@ class StepRunSchema(NamedSchema, RunMetadataInterface, table=True):
         Returns:
             A list of query options.
         """
-        from zenml.zen_stores.schemas import ModelVersionSchema
+        from zenml.zen_stores.schemas import (
+            ArtifactVersionSchema,
+            ModelVersionSchema,
+        )
         options = [
-            joinedload(jl_arg(StepRunSchema.deployment)),
-            joinedload(jl_arg(StepRunSchema.pipeline_run)),
+            selectinload(jl_arg(StepRunSchema.deployment)).load_only(
+                jl_arg(PipelineDeploymentSchema.pipeline_configuration)
+            ),
+            selectinload(jl_arg(StepRunSchema.pipeline_run)).load_only(
+                jl_arg(PipelineRunSchema.start_time)
+            ),
+            joinedload(jl_arg(StepRunSchema.step_configuration_schema)),
         ]
         if include_metadata:
             options.extend(
                 [
-                    joinedload(jl_arg(StepRunSchema.logs)),
+                    selectinload(jl_arg(StepRunSchema.logs)),
                     # joinedload(jl_arg(StepRunSchema.parents)),
                     # joinedload(jl_arg(StepRunSchema.run_metadata)),
                 ]
@@ -228,12 +245,28 @@ class StepRunSchema(NamedSchema, RunMetadataInterface, table=True):
         if include_resources:
             options.extend(
                 [
-                    joinedload(jl_arg(StepRunSchema.model_version)).joinedload(
+                    selectinload(
+                        jl_arg(StepRunSchema.model_version)
+                    ).joinedload(
                         jl_arg(ModelVersionSchema.model), innerjoin=True
                     ),
-                    joinedload(jl_arg(StepRunSchema.user)),
-                    # joinedload(jl_arg(StepRunSchema.input_artifacts)),
-                    # joinedload(jl_arg(StepRunSchema.output_artifacts)),
+                    selectinload(jl_arg(StepRunSchema.user)),
+                    selectinload(jl_arg(StepRunSchema.input_artifacts))
+                    .joinedload(
+                        jl_arg(StepRunInputArtifactSchema.artifact_version),
+                        innerjoin=True,
+                    )
+                    .joinedload(
+                        jl_arg(ArtifactVersionSchema.artifact), innerjoin=True
+                    ),
+                    selectinload(jl_arg(StepRunSchema.output_artifacts))
+                    .joinedload(
+                        jl_arg(StepRunOutputArtifactSchema.artifact_version),
+                        innerjoin=True,
+                    )
+                    .joinedload(
+                        jl_arg(ArtifactVersionSchema.artifact), innerjoin=True
+                    ),
                 ]
             )
@@ -290,10 +323,7 @@ class StepRunSchema(NamedSchema, RunMetadataInterface, table=True):
         """
         step = None
         if self.deployment is not None:
-            step_configurations = self.deployment.get_step_configurations(
-                include=[self.name]
-            )
-            if step_configurations:
+            if self.step_configuration_schema:
                 pipeline_configuration = (
                     PipelineConfiguration.model_validate_json(
                         self.deployment.pipeline_configuration
@@ -304,7 +334,7 @@ class StepRunSchema(NamedSchema, RunMetadataInterface, table=True):
                     inplace=True,
                 )
                 step = Step.from_dict(
-                    json.loads(step_configurations[0].config),
+                    json.loads(self.step_configuration_schema.config),
                     pipeline_configuration=pipeline_configuration,
                 )
         if not step and self.step_configuration:

zenml-nightly 0.83.1.dev20250626__py3-none-any.whl → 0.83.1.dev20250628__py3-none-any.whl

zenml-nightly 0.83.1.dev20250626py3-none-any.whl → 0.83.1.dev20250628py3-none-any.whl