PyPI - zenml-nightly - Versions diffs - 0.83.1.dev20250624__py3-none-any.whl → 0.83.1.dev20250626__py3-none-any.whl - Mend

zenml-nightly 0.83.1.dev20250624py3-none-any.whl → 0.83.1.dev20250626py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

zenml/VERSION +1 -1
zenml/cli/base.py +3 -2
zenml/cli/login.py +21 -3
zenml/cli/service_connectors.py +5 -12
zenml/cli/stack.py +1 -5
zenml/cli/utils.py +8 -52
zenml/client.py +32 -40
zenml/config/__init__.py +13 -2
zenml/constants.py +0 -1
zenml/exceptions.py +16 -0
zenml/integrations/airflow/orchestrators/airflow_orchestrator.py +15 -6
zenml/integrations/aws/container_registries/aws_container_registry.py +3 -1
zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py +54 -58
zenml/integrations/azure/orchestrators/azureml_orchestrator.py +28 -19
zenml/integrations/databricks/orchestrators/databricks_orchestrator.py +19 -63
zenml/integrations/databricks/orchestrators/databricks_orchestrator_entrypoint_config.py +8 -3
zenml/integrations/gcp/orchestrators/vertex_orchestrator.py +36 -61
zenml/integrations/hyperai/orchestrators/hyperai_orchestrator.py +19 -22
zenml/integrations/integration.py +23 -58
zenml/integrations/kubeflow/orchestrators/kubeflow_orchestrator.py +28 -31
zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator.py +33 -20
zenml/integrations/lightning/orchestrators/lightning_orchestrator.py +25 -100
zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py +19 -8
zenml/integrations/skypilot/utils.py +17 -13
zenml/integrations/tekton/orchestrators/tekton_orchestrator.py +28 -12
zenml/models/__init__.py +2 -0
zenml/models/v2/core/service_connector.py +178 -108
zenml/models/v2/core/step_run.py +1 -0
zenml/orchestrators/__init__.py +2 -0
zenml/orchestrators/base_orchestrator.py +137 -66
zenml/orchestrators/input_utils.py +5 -13
zenml/orchestrators/local/local_orchestrator.py +19 -9
zenml/orchestrators/local_docker/local_docker_orchestrator.py +15 -5
zenml/orchestrators/publish_utils.py +24 -0
zenml/orchestrators/step_run_utils.py +1 -2
zenml/pipelines/run_utils.py +12 -7
zenml/service_connectors/service_connector.py +11 -61
zenml/service_connectors/service_connector_utils.py +4 -2
zenml/step_operators/step_operator_entrypoint_configuration.py +1 -1
zenml/utils/package_utils.py +111 -1
zenml/zen_server/routers/service_connectors_endpoints.py +7 -22
zenml/zen_stores/migrations/versions/5bb25e95849c_add_internal_secrets.py +62 -0
zenml/zen_stores/rest_zen_store.py +204 -132
zenml/zen_stores/schemas/secret_schemas.py +5 -0
zenml/zen_stores/schemas/service_connector_schemas.py +16 -14
zenml/zen_stores/secrets_stores/service_connector_secrets_store.py +4 -1
zenml/zen_stores/sql_zen_store.py +241 -119
zenml/zen_stores/zen_store_interface.py +9 -1
{zenml_nightly-0.83.1.dev20250624.dist-info → zenml_nightly-0.83.1.dev20250626.dist-info}/METADATA +1 -1
{zenml_nightly-0.83.1.dev20250624.dist-info → zenml_nightly-0.83.1.dev20250626.dist-info}/RECORD +53 -53
zenml/utils/integration_utils.py +0 -34
{zenml_nightly-0.83.1.dev20250624.dist-info → zenml_nightly-0.83.1.dev20250626.dist-info}/LICENSE +0 -0
{zenml_nightly-0.83.1.dev20250624.dist-info → zenml_nightly-0.83.1.dev20250626.dist-info}/WHEEL +0 -0
{zenml_nightly-0.83.1.dev20250624.dist-info → zenml_nightly-0.83.1.dev20250626.dist-info}/entry_points.txt +0 -0

zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py CHANGED Viewed

@@ -19,7 +19,6 @@ from typing import (
     TYPE_CHECKING,
     Any,
     Dict,
-    Iterator,
     List,
     Optional,
     Tuple,
@@ -60,7 +59,6 @@ from zenml.constants import (
 )
 from zenml.enums import (
     ExecutionStatus,
-    MetadataResourceTypes,
     StackComponentType,
 )
 from zenml.integrations.aws.flavors.sagemaker_orchestrator_flavor import (
@@ -73,7 +71,7 @@ from zenml.integrations.aws.orchestrators.sagemaker_orchestrator_entrypoint_conf
 )
 from zenml.logger import get_logger
 from zenml.metadata.metadata_types import MetadataType, Uri
-from zenml.orchestrators import ContainerizedOrchestrator
+from zenml.orchestrators import ContainerizedOrchestrator, SubmissionResult
 from zenml.orchestrators.utils import get_orchestrator_run_name
 from zenml.stack import StackValidator
 from zenml.utils.env_utils import split_environment_variables
@@ -273,20 +271,25 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
             boto_session=boto_session, default_bucket=self.config.bucket
         )
-    def prepare_or_run_pipeline(
+    def submit_pipeline(
         self,
         deployment: "PipelineDeploymentResponse",
         stack: "Stack",
         environment: Dict[str, str],
         placeholder_run: Optional["PipelineRunResponse"] = None,
-    ) -> Iterator[Dict[str, MetadataType]]:
-        """Prepares or runs a pipeline on Sagemaker.
+    ) -> Optional[SubmissionResult]:
+        """Submits a pipeline to the orchestrator.
+        This method should only submit the pipeline and not wait for it to
+        complete. If the orchestrator is configured to wait for the pipeline run
+        to complete, a function that waits for the pipeline run to complete can
+        be passed as part of the submission result.
         Args:
-            deployment: The deployment to prepare or run.
-            stack: The stack to run on.
+            deployment: The pipeline deployment to submit.
+            stack: The stack the pipeline will run on.
             environment: Environment variables to set in the orchestration
-                environment.
+                environment. These don't need to be set if running locally.
             placeholder_run: An optional placeholder run for the deployment.
         Raises:
@@ -296,8 +299,8 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
                 AWS SageMaker NetworkConfig class.
             ValueError: If the schedule is not valid.
-        Yields:
-            A dictionary of metadata related to the pipeline run.
+        Returns:
+            Optional submission result.
         """
         # sagemaker requires pipelineName to use alphanum and hyphens only
         unsanitized_orchestrator_run_name = get_orchestrator_run_name(
@@ -705,26 +708,14 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
             )
             logger.info(f"The schedule ARN is: {triggers[0]}")
+            schedule_metadata = {}
             try:
-                from zenml.models import RunMetadataResource
                 schedule_metadata = self.generate_schedule_metadata(
                     schedule_arn=triggers[0]
                 )
-                Client().create_run_metadata(
-                    metadata=schedule_metadata,  # type: ignore[arg-type]
-                    resources=[
-                        RunMetadataResource(
-                            id=deployment.schedule.id,
-                            type=MetadataResourceTypes.SCHEDULE,
-                        )
-                    ],
-                )
             except Exception as e:
                 logger.debug(
-                    "There was an error attaching metadata to the "
-                    f"schedule: {e}"
+                    "There was an error generating schedule metadata: %s", e
                 )
             logger.info(
@@ -749,6 +740,7 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
             logger.info(
                 f"`aws scheduler delete-schedule --name {schedule_name}`"
             )
+            return SubmissionResult(metadata=schedule_metadata)
         else:
             # Execute the pipeline immediately if no schedule is specified
             execution = pipeline.start()
@@ -757,33 +749,40 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
                 "when using the Sagemaker Orchestrator."
             )
-            # Yield metadata based on the generated execution object
-            yield from self.compute_metadata(
+            run_metadata = self.compute_metadata(
                 execution_arn=execution.arn, settings=settings
             )
-            # mainly for testing purposes, we wait for the pipeline to finish
+            _wait_for_completion = None
             if settings.synchronous:
-                logger.info(
-                    "Executing synchronously. Waiting for pipeline to "
-                    "finish... \n"
-                    "At this point you can `Ctrl-C` out without cancelling the "
-                    "execution."
-                )
-                try:
-                    execution.wait(
-                        delay=POLLING_DELAY, max_attempts=MAX_POLLING_ATTEMPTS
-                    )
-                    logger.info("Pipeline completed successfully.")
-                except WaiterError:
-                    raise RuntimeError(
-                        "Timed out while waiting for pipeline execution to "
-                        "finish. For long-running pipelines we recommend "
-                        "configuring your orchestrator for asynchronous "
-                        "execution. The following command does this for you: \n"
-                        f"`zenml orchestrator update {self.name} "
-                        f"--synchronous=False`"
+                def _wait_for_completion() -> None:
+                    logger.info(
+                        "Executing synchronously. Waiting for pipeline to "
+                        "finish... \n"
+                        "At this point you can `Ctrl-C` out without cancelling the "
+                        "execution."
                     )
+                    try:
+                        execution.wait(
+                            delay=POLLING_DELAY,
+                            max_attempts=MAX_POLLING_ATTEMPTS,
+                        )
+                        logger.info("Pipeline completed successfully.")
+                    except WaiterError:
+                        raise RuntimeError(
+                            "Timed out while waiting for pipeline execution to "
+                            "finish. For long-running pipelines we recommend "
+                            "configuring your orchestrator for asynchronous "
+                            "execution. The following command does this for you: \n"
+                            f"`zenml orchestrator update {self.name} "
+                            f"--synchronous=False`"
+                        )
+            return SubmissionResult(
+                wait_for_completion=_wait_for_completion,
+                metadata=run_metadata,
+            )
     def get_pipeline_run_metadata(
         self, run_id: UUID
@@ -798,20 +797,15 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
         """
         execution_arn = os.environ[ENV_ZENML_SAGEMAKER_RUN_ID]
-        run_metadata: Dict[str, "MetadataType"] = {}
         settings = cast(
             SagemakerOrchestratorSettings,
             self.get_settings(Client().get_pipeline_run(run_id)),
         )
-        for metadata in self.compute_metadata(
+        return self.compute_metadata(
             execution_arn=execution_arn,
             settings=settings,
-        ):
-            run_metadata.update(metadata)
-        return run_metadata
+        )
     def fetch_status(self, run: "PipelineRunResponse") -> ExecutionStatus:
         """Refreshes the status of a specific pipeline run.
@@ -873,14 +867,14 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
         self,
         execution_arn: str,
         settings: SagemakerOrchestratorSettings,
-    ) -> Iterator[Dict[str, MetadataType]]:
+    ) -> Dict[str, MetadataType]:
         """Generate run metadata based on the generated Sagemaker Execution.
         Args:
             execution_arn: The ARN of the pipeline execution.
             settings: The Sagemaker orchestrator settings.
-        Yields:
+        Returns:
             A dictionary of metadata related to the pipeline run.
         """
         # Orchestrator Run ID
@@ -901,7 +895,7 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
         ):
             metadata[METADATA_ORCHESTRATOR_LOGS_URL] = Uri(logs_url)
-        yield metadata
+        return metadata
     def _compute_orchestrator_url(
         self,
@@ -979,7 +973,9 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
             return None
     @staticmethod
-    def generate_schedule_metadata(schedule_arn: str) -> Dict[str, str]:
+    def generate_schedule_metadata(
+        schedule_arn: str,
+    ) -> Dict[str, MetadataType]:
         """Attaches metadata to the ZenML Schedules.
         Args:

zenml/integrations/azure/orchestrators/azureml_orchestrator.py CHANGED Viewed

@@ -19,7 +19,6 @@ from typing import (
     TYPE_CHECKING,
     Any,
     Dict,
-    Iterator,
     List,
     Optional,
     Tuple,
@@ -63,7 +62,7 @@ from zenml.integrations.azure.orchestrators.azureml_orchestrator_entrypoint_conf
 )
 from zenml.logger import get_logger
 from zenml.metadata.metadata_types import MetadataType, Uri
-from zenml.orchestrators import ContainerizedOrchestrator
+from zenml.orchestrators import ContainerizedOrchestrator, SubmissionResult
 from zenml.orchestrators.utils import get_orchestrator_run_name
 from zenml.stack import StackValidator
 from zenml.utils.string_utils import b64_encode
@@ -198,27 +197,32 @@ class AzureMLOrchestrator(ContainerizedOrchestrator):
             command=" ".join(command + arguments),
         )
-    def prepare_or_run_pipeline(
+    def submit_pipeline(
         self,
         deployment: "PipelineDeploymentResponse",
         stack: "Stack",
         environment: Dict[str, str],
         placeholder_run: Optional["PipelineRunResponse"] = None,
-    ) -> Iterator[Dict[str, MetadataType]]:
-        """Prepares or runs a pipeline on AzureML.
+    ) -> Optional[SubmissionResult]:
+        """Submits a pipeline to the orchestrator.
+        This method should only submit the pipeline and not wait for it to
+        complete. If the orchestrator is configured to wait for the pipeline run
+        to complete, a function that waits for the pipeline run to complete can
+        be passed as part of the submission result.
         Args:
-            deployment: The deployment to prepare or run.
-            stack: The stack to run on.
+            deployment: The pipeline deployment to submit.
+            stack: The stack the pipeline will run on.
             environment: Environment variables to set in the orchestration
-                environment.
+                environment. These don't need to be set if running locally.
             placeholder_run: An optional placeholder run for the deployment.
         Raises:
             RuntimeError: If the creation of the schedule fails.
-        Yields:
-            A dictionary of metadata related to the pipeline run.
+        Returns:
+            Optional submission result.
         """
         # Authentication
         if connector := self.get_connector():
@@ -384,14 +388,11 @@ class AzureMLOrchestrator(ContainerizedOrchestrator):
                     "Failed to create schedule for the pipeline "
                     f"'{run_name}': {str(e)}"
                 )
+            return None
         else:
             job = ml_client.jobs.create_or_update(pipeline_job)
             logger.info(f"Pipeline {run_name} has been started.")
-            # Yield metadata based on the generated job object
-            yield from self.compute_metadata(job)
             assert job.services is not None
             assert job.name is not None
@@ -401,9 +402,17 @@ class AzureMLOrchestrator(ContainerizedOrchestrator):
                 f"{job.services['Studio'].endpoint}"
             )
+            _wait_for_completion = None
             if settings.synchronous:
-                logger.info("Waiting for pipeline to finish...")
-                ml_client.jobs.stream(job.name)
+                def _wait_for_completion() -> None:
+                    logger.info("Waiting for pipeline to finish...")
+                    ml_client.jobs.stream(job.name)
+            return SubmissionResult(
+                metadata=self.compute_metadata(job),
+                wait_for_completion=_wait_for_completion,
+            )
     def get_pipeline_run_metadata(
         self, run_id: UUID
@@ -518,13 +527,13 @@ class AzureMLOrchestrator(ContainerizedOrchestrator):
         else:
             raise ValueError("Unknown status for the pipeline job.")
-    def compute_metadata(self, job: Any) -> Iterator[Dict[str, MetadataType]]:
+    def compute_metadata(self, job: Any) -> Dict[str, MetadataType]:
         """Generate run metadata based on the generated AzureML PipelineJob.
         Args:
             job: The corresponding PipelineJob object.
-        Yields:
+        Returns:
             A dictionary of metadata related to the pipeline run.
         """
         # Metadata
@@ -538,7 +547,7 @@ class AzureMLOrchestrator(ContainerizedOrchestrator):
         if orchestrator_url := self._compute_orchestrator_url(job):
             metadata[METADATA_ORCHESTRATOR_URL] = Uri(orchestrator_url)
-        yield metadata
+        return metadata
     @staticmethod
     def _compute_orchestrator_url(job: Any) -> Optional[str]:

zenml/integrations/databricks/orchestrators/databricks_orchestrator.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import itertools
 import os
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, cast
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, cast
 from uuid import UUID
 from databricks.sdk import WorkspaceClient as DatabricksClient
@@ -48,10 +48,12 @@ from zenml.io import fileio
 from zenml.logger import get_logger
 from zenml.metadata.metadata_types import MetadataType, Uri
 from zenml.models.v2.core.schedule import ScheduleResponse
+from zenml.orchestrators import (
+    SubmissionResult,
+    WheeledOrchestrator,
+)
 from zenml.orchestrators.utils import get_orchestrator_run_name
-from zenml.orchestrators.wheeled_orchestrator import WheeledOrchestrator
 from zenml.stack import StackValidator
-from zenml.utils import io_utils
 from zenml.utils.package_utils import clean_requirements
 from zenml.utils.pipeline_docker_image_builder import (
     PipelineDockerImageBuilder,
@@ -67,20 +69,13 @@ logger = get_logger(__name__)
 ZENML_STEP_DEFAULT_ENTRYPOINT_COMMAND = "entrypoint.main"
 DATABRICKS_WHEELS_DIRECTORY_PREFIX = "dbfs:/FileStore/zenml"
 DATABRICKS_LOCAL_FILESYSTEM_PREFIX = "file:/"
-DATABRICKS_CLUSTER_DEFAULT_NAME = "zenml-databricks-cluster"
 DATABRICKS_SPARK_DEFAULT_VERSION = "15.3.x-scala2.12"
 DATABRICKS_JOB_ID_PARAMETER_REFERENCE = "{{job.id}}"
 DATABRICKS_ZENML_DEFAULT_CUSTOM_REPOSITORY_PATH = "."
 class DatabricksOrchestrator(WheeledOrchestrator):
-    """Base class for Orchestrator responsible for running pipelines remotely in a VM.
-    This orchestrator does not support running on a schedule.
-    """
-    # The default instance type to use if none is specified in settings
-    DEFAULT_INSTANCE_TYPE: Optional[str] = None
+    """Databricks orchestrator."""
     @property
     def validator(self) -> Optional[StackValidator]:
@@ -168,69 +163,39 @@ class DatabricksOrchestrator(WheeledOrchestrator):
                 f"{ENV_ZENML_DATABRICKS_ORCHESTRATOR_RUN_ID}."
             )
-    @property
-    def root_directory(self) -> str:
-        """Path to the root directory for all files concerning this orchestrator.
-        Returns:
-            Path to the root directory.
-        """
-        return os.path.join(
-            io_utils.get_global_config_directory(),
-            "databricks",
-            str(self.id),
-        )
-    @property
-    def pipeline_directory(self) -> str:
-        """Returns path to a directory in which the kubeflow pipeline files are stored.
-        Returns:
-            Path to the pipeline directory.
-        """
-        return os.path.join(self.root_directory, "pipelines")
     def setup_credentials(self) -> None:
         """Set up credentials for the orchestrator."""
         connector = self.get_connector()
         assert connector is not None
         connector.configure_local_client()
-    def prepare_or_run_pipeline(
+    def submit_pipeline(
         self,
         deployment: "PipelineDeploymentResponse",
         stack: "Stack",
         environment: Dict[str, str],
         placeholder_run: Optional["PipelineRunResponse"] = None,
-    ) -> Any:
-        """Creates a wheel and uploads the pipeline to Databricks.
-        This functions as an intermediary representation of the pipeline which
-        is then deployed to the kubeflow pipelines instance.
-        How it works:
-        -------------
-        Before this method is called the `prepare_pipeline_deployment()`
-        method builds a docker image that contains the code for the
-        pipeline, all steps the context around these files.
+    ) -> Optional[SubmissionResult]:
+        """Submits a pipeline to the orchestrator.
-        Based on this docker image a callable is created which builds
-        task for each step (`_construct_databricks_pipeline`).
-        To do this the entrypoint of the docker image is configured to
-        run the correct step within the docker image. The dependencies
-        between these task are then also configured onto each
-        task by pointing at the downstream steps.
+        This method should only submit the pipeline and not wait for it to
+        complete. If the orchestrator is configured to wait for the pipeline run
+        to complete, a function that waits for the pipeline run to complete can
+        be passed as part of the submission result.
         Args:
-            deployment: The pipeline deployment to prepare or run.
+            deployment: The pipeline deployment to submit.
             stack: The stack the pipeline will run on.
             environment: Environment variables to set in the orchestration
-                environment.
+                environment. These don't need to be set if running locally.
             placeholder_run: An optional placeholder run for the deployment.
         Raises:
             ValueError: If the schedule is not set or if the cron expression
                 is not set.
+        Returns:
+            Optional submission result.
         """
         settings = cast(
             DatabricksOrchestratorSettings, self.get_settings(deployment)
@@ -339,11 +304,6 @@ class DatabricksOrchestrator(WheeledOrchestrator):
         orchestrator_run_name = get_orchestrator_run_name(
             pipeline_name=deployment.pipeline_configuration.name
         )
-        # Get a filepath to use to save the finished yaml to
-        fileio.makedirs(self.pipeline_directory)
-        pipeline_file_path = os.path.join(
-            self.pipeline_directory, f"{orchestrator_run_name}.yaml"
-        )
         # Copy the repository to a temporary directory and add a setup.py file
         repository_temp_dir = (
@@ -382,11 +342,6 @@ class DatabricksOrchestrator(WheeledOrchestrator):
         fileio.rmtree(repository_temp_dir)
-        logger.info(
-            "Writing Databricks workflow definition to `%s`.",
-            pipeline_file_path,
-        )
         # using the databricks client uploads the pipeline to databricks
         job_cluster_key = self.sanitize_name(f"{deployment_id}")
         self._upload_and_run_pipeline(
@@ -399,6 +354,7 @@ class DatabricksOrchestrator(WheeledOrchestrator):
             job_cluster_key=job_cluster_key,
             schedule=deployment.schedule,
         )
+        return None
     def _upload_and_run_pipeline(
         self,

zenml/integrations/databricks/orchestrators/databricks_orchestrator_entrypoint_config.py CHANGED Viewed

@@ -17,7 +17,10 @@ import os
 import sys
 from typing import Any, List, Set
-import pkg_resources
+if sys.version_info < (3, 10):
+    from importlib_metadata import distribution
+else:
+    from importlib.metadata import distribution
 from zenml.entrypoints.step_entrypoint_configuration import (
     StepEntrypointConfiguration,
@@ -81,8 +84,10 @@ class DatabricksEntrypointConfiguration(StepEntrypointConfiguration):
         """Runs the step."""
         # Get the wheel package and add it to the sys path
         wheel_package = self.entrypoint_args[WHEEL_PACKAGE_OPTION]
-        distribution = pkg_resources.get_distribution(wheel_package)
-        project_root = os.path.join(distribution.location, wheel_package)
+        dist = distribution(wheel_package)
+        project_root = os.path.join(dist.locate_file("."), wheel_package)
         if project_root not in sys.path:
             sys.path.insert(0, project_root)
             sys.path.insert(-1, project_root)

zenml-nightly 0.83.1.dev20250624__py3-none-any.whl → 0.83.1.dev20250626__py3-none-any.whl

zenml-nightly 0.83.1.dev20250624py3-none-any.whl → 0.83.1.dev20250626py3-none-any.whl