zenml-nightly 0.66.0.dev20240924__py3-none-any.whl → 0.66.0.dev20240926__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zenml/VERSION +1 -1
- zenml/cli/__init__.py +7 -0
- zenml/cli/pipeline.py +21 -0
- zenml/constants.py +3 -0
- zenml/integrations/__init__.py +1 -0
- zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py +288 -71
- zenml/integrations/azure/orchestrators/azureml_orchestrator.py +157 -4
- zenml/integrations/constants.py +1 -0
- zenml/integrations/deepchecks/__init__.py +1 -1
- zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py +55 -14
- zenml/integrations/deepchecks/validation_checks.py +62 -5
- zenml/integrations/gcp/orchestrators/vertex_orchestrator.py +207 -18
- zenml/integrations/lightning/__init__.py +1 -1
- zenml/integrations/lightning/flavors/lightning_orchestrator_flavor.py +9 -0
- zenml/integrations/lightning/orchestrators/lightning_orchestrator.py +18 -17
- zenml/integrations/lightning/orchestrators/lightning_orchestrator_entrypoint.py +2 -6
- zenml/integrations/mlflow/steps/mlflow_registry.py +2 -0
- zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py +38 -26
- zenml/integrations/skypilot_kubernetes/__init__.py +52 -0
- zenml/integrations/skypilot_kubernetes/flavors/__init__.py +26 -0
- zenml/integrations/skypilot_kubernetes/flavors/skypilot_orchestrator_kubernetes_vm_flavor.py +125 -0
- zenml/integrations/skypilot_kubernetes/orchestrators/__init__.py +25 -0
- zenml/integrations/skypilot_kubernetes/orchestrators/skypilot_kubernetes_vm_orchestrator.py +74 -0
- zenml/models/v2/core/pipeline_run.py +62 -1
- zenml/new/pipelines/run_utils.py +4 -1
- zenml/orchestrators/base_orchestrator.py +41 -12
- zenml/stack/stack.py +11 -2
- zenml/zen_server/cloud_utils.py +33 -8
- zenml/zen_server/dashboard/assets/{404-iO8vpun1.js → 404-CMnKjD-L.js} +1 -1
- zenml/zen_server/dashboard/assets/{@reactflow-B6kq9fJZ.js → @reactflow-CEC2f0cl.js} +1 -1
- zenml/zen_server/dashboard/assets/AlertDialogDropdownItem-BqM1UpCD.js +1 -0
- zenml/zen_server/dashboard/assets/{CodeSnippet-DNWdQmbo.js → CodeSnippet-DRy_0J4D.js} +2 -2
- zenml/zen_server/dashboard/assets/{CollapsibleCard-B2OVjWYE.js → CollapsibleCard-lE-75Zob.js} +1 -1
- zenml/zen_server/dashboard/assets/{Commands-DsoaVElZ.js → Commands-CVx2RAoT.js} +1 -1
- zenml/zen_server/dashboard/assets/{CopyButton-BqE_-PHO.js → CopyButton-C_yRGWuP.js} +1 -1
- zenml/zen_server/dashboard/assets/{CsvVizualization-Dyasr2jU.js → CsvVizualization-Dd0P02Iz.js} +1 -1
- zenml/zen_server/dashboard/assets/{DialogItem-Cz1VLRwa.js → DialogItem-BCrc2wIk.js} +1 -1
- zenml/zen_server/dashboard/assets/{Error-DorJD_va.js → Error-BuMJbG-M.js} +1 -1
- zenml/zen_server/dashboard/assets/{ExecutionStatus-CIfQTutR.js → ExecutionStatus-fIulMG4w.js} +1 -1
- zenml/zen_server/dashboard/assets/{Helpbox-CmfvtNeq.js → Helpbox-CJAp4kbv.js} +1 -1
- zenml/zen_server/dashboard/assets/Infobox-CC70zvGO.js +1 -0
- zenml/zen_server/dashboard/assets/{InlineAvatar-Ds2ZFHPc.js → InlineAvatar-C3QXdFW1.js} +1 -1
- zenml/zen_server/dashboard/assets/{Partials-DX-8iEa1.js → Partials-Cb8lrNsi.js} +1 -1
- zenml/zen_server/dashboard/assets/{ProviderIcon-BOQJgapd.js → ProviderIcon-C9BuYVSN.js} +1 -1
- zenml/zen_server/dashboard/assets/{ProviderRadio-BsYBw9YA.js → ProviderRadio-GYc9PJtG.js} +1 -1
- zenml/zen_server/dashboard/assets/{SearchField-W3GXpLlI.js → SearchField-BeF1yR7M.js} +1 -1
- zenml/zen_server/dashboard/assets/SecretTooltip-DgVWrPxX.js +1 -0
- zenml/zen_server/dashboard/assets/{SetPassword-B-0a8UCj.js → SetPassword-nAhHddXW.js} +1 -1
- zenml/zen_server/dashboard/assets/{Tick-i1DYsVcX.js → Tick-C5ZVvNRQ.js} +1 -1
- zenml/zen_server/dashboard/assets/{UpdatePasswordSchemas-C6Zb7ASL.js → UpdatePasswordSchemas-7KFsDbKb.js} +1 -1
- zenml/zen_server/dashboard/assets/UsageReason-DL5NL_ZD.js +1 -0
- zenml/zen_server/dashboard/assets/{WizardFooter-BHbO7zOa.js → WizardFooter-CgvFSppz.js} +1 -1
- zenml/zen_server/dashboard/assets/{all-pipeline-runs-query-BBEe6I9-.js → all-pipeline-runs-query-DAPSF_74.js} +1 -1
- zenml/zen_server/dashboard/assets/{cloud-only-BuP4Kt_7.js → cloud-only-CxoNxh86.js} +1 -1
- zenml/zen_server/dashboard/assets/{create-stack-B2x2d4r1.js → create-stack-BfgeXFuV.js} +1 -1
- zenml/zen_server/dashboard/assets/delete-run-OkGmZQ5G.js +1 -0
- zenml/zen_server/dashboard/assets/{form-schemas-Bap0f854.js → form-schemas-C09PrQUJ.js} +1 -1
- zenml/zen_server/dashboard/assets/{index-DFi8BroH.js → index-CLT4K7oC.js} +1 -1
- zenml/zen_server/dashboard/assets/{index-B9wVwe7u.js → index-D0bJjaey.js} +3 -3
- zenml/zen_server/dashboard/assets/index-PcI3Xw77.css +1 -0
- zenml/zen_server/dashboard/assets/{login-mutation-DwxUz8VA.js → login-mutation-CB45FHbP.js} +1 -1
- zenml/zen_server/dashboard/assets/{not-found-D5i9DunU.js → not-found-NtCUfXiV.js} +1 -1
- zenml/zen_server/dashboard/assets/page-AvcQe_oR.js +1 -0
- zenml/zen_server/dashboard/assets/page-B6DccgPa.js +1 -0
- zenml/zen_server/dashboard/assets/{page-xQG6GmFJ.js → page-B7DTiwhv.js} +1 -1
- zenml/zen_server/dashboard/assets/{page-CIbehp7V.js → page-B7LduaiG.js} +1 -1
- zenml/zen_server/dashboard/assets/{page-CEJWu1YO.js → page-B8WlhDq6.js} +1 -1
- zenml/zen_server/dashboard/assets/{page-BitfWsiW.js → page-BIhP9udn.js} +1 -1
- zenml/zen_server/dashboard/assets/{page-DE03uZZR.js → page-BLS9bXB8.js} +1 -1
- zenml/zen_server/dashboard/assets/{page-DFCK65G9.js → page-BYXn4SXu.js} +1 -1
- zenml/zen_server/dashboard/assets/{page-bimkItOg.js → page-Bfvwt3AB.js} +1 -1
- zenml/zen_server/dashboard/assets/{page-D5F3DJjm.js → page-BipKr1Pt.js} +1 -1
- zenml/zen_server/dashboard/assets/page-BwG4f5qc.js +1 -0
- zenml/zen_server/dashboard/assets/page-C1c_unjg.js +9 -0
- zenml/zen_server/dashboard/assets/{page-DQdwZZ9x.js → page-C25tiRdj.js} +1 -1
- zenml/zen_server/dashboard/assets/page-CIATsAA7.js +1 -0
- zenml/zen_server/dashboard/assets/{page-iwoJnwPv.js → page-CKUVhcYr.js} +1 -1
- zenml/zen_server/dashboard/assets/{page-BiF8hLbO.js → page-CXLwze-m.js} +1 -1
- zenml/zen_server/dashboard/assets/page-D7TD0k_A.js +1 -0
- zenml/zen_server/dashboard/assets/{page-CDOQLrPC.js → page-DIlOQjGU.js} +1 -1
- zenml/zen_server/dashboard/assets/{page-DGMa3ZQL.js → page-DJ31Huvj.js} +1 -1
- zenml/zen_server/dashboard/assets/{page-J0s8Sq3N.js → page-DOqsdVzG.js} +1 -1
- zenml/zen_server/dashboard/assets/{page-DQGCHKrQ.js → page-DUapawuM.js} +1 -1
- zenml/zen_server/dashboard/assets/{page-WCQ659by.js → page-Dd3jZyrf.js} +1 -1
- zenml/zen_server/dashboard/assets/{page-CrSdkteO.js → page-DyZzYHWA.js} +2 -2
- zenml/zen_server/dashboard/assets/page-L_xNBh_5.js +3 -0
- zenml/zen_server/dashboard/assets/{page-oS4hqS8M.js → page-VsrKiIdF.js} +1 -1
- zenml/zen_server/dashboard/assets/{page-DgM-N9RL.js → page-ioO58ULo.js} +1 -1
- zenml/zen_server/dashboard/assets/page-kalpiPZz.js +6 -0
- zenml/zen_server/dashboard/assets/{persist-xsYgVtR1.js → persist-ChKZVcn3.js} +1 -1
- zenml/zen_server/dashboard/assets/{persist-mEZN_fgH.js → persist-DodaLO0k.js} +1 -1
- zenml/zen_server/dashboard/assets/{sharedSchema-BfZcy7aP.js → sharedSchema-BvRWAv-c.js} +1 -1
- zenml/zen_server/dashboard/assets/{stack-detail-query-CU4egfhp.js → stack-detail-query-C9XwNP1E.js} +1 -1
- zenml/zen_server/dashboard/assets/tick-circle-m-hJG8i9.js +1 -0
- zenml/zen_server/dashboard/assets/{update-server-settings-mutation-DNqmQXDM.js → update-server-settings-mutation-DJDefwqW.js} +1 -1
- zenml/zen_server/dashboard/assets/{url-DwbuKk1b.js → url-DdWrpIhi.js} +1 -1
- zenml/zen_server/dashboard/index.html +4 -4
- zenml/zen_server/dashboard_legacy/asset-manifest.json +4 -4
- zenml/zen_server/dashboard_legacy/index.html +1 -1
- zenml/zen_server/dashboard_legacy/{precache-manifest.290b95d5b43efa3368b3dc63d20c4782.js → precache-manifest.4f9db97de1b48fd5944e8a766c1300fe.js} +4 -4
- zenml/zen_server/dashboard_legacy/service-worker.js +1 -1
- zenml/zen_server/dashboard_legacy/static/js/{main.840d1bf0.chunk.js → main.0fdd4aad.chunk.js} +2 -2
- zenml/zen_server/dashboard_legacy/static/js/{main.840d1bf0.chunk.js.map → main.0fdd4aad.chunk.js.map} +1 -1
- zenml/zen_server/routers/runs_endpoints.py +89 -3
- {zenml_nightly-0.66.0.dev20240924.dist-info → zenml_nightly-0.66.0.dev20240926.dist-info}/METADATA +8 -1
- {zenml_nightly-0.66.0.dev20240924.dist-info → zenml_nightly-0.66.0.dev20240926.dist-info}/RECORD +109 -102
- zenml/zen_server/dashboard/assets/AlertDialogDropdownItem-BXeSvmMY.js +0 -1
- zenml/zen_server/dashboard/assets/EditSecretDialog-Du423_3U.js +0 -1
- zenml/zen_server/dashboard/assets/Infobox-BL9NOS37.js +0 -1
- zenml/zen_server/dashboard/assets/UsageReason-CCnzmwS8.js +0 -1
- zenml/zen_server/dashboard/assets/index-6DYjZgDn.css +0 -1
- zenml/zen_server/dashboard/assets/page-BFuJICXM.js +0 -9
- zenml/zen_server/dashboard/assets/page-CLiRGfWo.js +0 -1
- zenml/zen_server/dashboard/assets/page-CV44mQn9.js +0 -1
- zenml/zen_server/dashboard/assets/page-DI-qTWrm.js +0 -1
- zenml/zen_server/dashboard/assets/page-Dt8VgzbE.js +0 -1
- zenml/zen_server/dashboard/assets/page-oSqx9dkH.js +0 -1
- zenml/zen_server/dashboard/assets/page-p3GqEAUW.js +0 -1
- zenml/zen_server/dashboard/assets/page-qvcUVPE-.js +0 -1
- {zenml_nightly-0.66.0.dev20240924.dist-info → zenml_nightly-0.66.0.dev20240926.dist-info}/LICENSE +0 -0
- {zenml_nightly-0.66.0.dev20240924.dist-info → zenml_nightly-0.66.0.dev20240926.dist-info}/WHEEL +0 -0
- {zenml_nightly-0.66.0.dev20240924.dist-info → zenml_nightly-0.66.0.dev20240926.dist-info}/entry_points.txt +0 -0
@@ -32,10 +32,12 @@
|
|
32
32
|
import os
|
33
33
|
import re
|
34
34
|
import types
|
35
|
+
import urllib
|
35
36
|
from typing import (
|
36
37
|
TYPE_CHECKING,
|
37
38
|
Any,
|
38
39
|
Dict,
|
40
|
+
Iterator,
|
39
41
|
List,
|
40
42
|
Optional,
|
41
43
|
Tuple,
|
@@ -46,15 +48,18 @@ from uuid import UUID
|
|
46
48
|
|
47
49
|
from google.api_core import exceptions as google_exceptions
|
48
50
|
from google.cloud import aiplatform
|
51
|
+
from google.cloud.aiplatform_v1.types import PipelineState
|
49
52
|
from kfp import dsl
|
50
53
|
from kfp.compiler import Compiler
|
51
54
|
|
52
55
|
from zenml.config.resource_settings import ResourceSettings
|
53
56
|
from zenml.constants import (
|
57
|
+
METADATA_ORCHESTRATOR_LOGS_URL,
|
58
|
+
METADATA_ORCHESTRATOR_RUN_ID,
|
54
59
|
METADATA_ORCHESTRATOR_URL,
|
55
60
|
)
|
56
61
|
from zenml.entrypoints import StepEntrypointConfiguration
|
57
|
-
from zenml.enums import StackComponentType
|
62
|
+
from zenml.enums import ExecutionStatus, StackComponentType
|
58
63
|
from zenml.integrations.gcp import GCP_ARTIFACT_STORE_FLAVOR
|
59
64
|
from zenml.integrations.gcp.constants import (
|
60
65
|
GKE_ACCELERATOR_NODE_SELECTOR_CONSTRAINT_LABEL,
|
@@ -77,7 +82,11 @@ from zenml.utils.io_utils import get_global_config_directory
|
|
77
82
|
|
78
83
|
if TYPE_CHECKING:
|
79
84
|
from zenml.config.base_settings import BaseSettings
|
80
|
-
from zenml.models import
|
85
|
+
from zenml.models import (
|
86
|
+
PipelineDeploymentResponse,
|
87
|
+
PipelineRunResponse,
|
88
|
+
ScheduleResponse,
|
89
|
+
)
|
81
90
|
from zenml.stack import Stack
|
82
91
|
|
83
92
|
logger = get_logger(__name__)
|
@@ -245,8 +254,8 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
245
254
|
):
|
246
255
|
logger.warning(
|
247
256
|
"Vertex orchestrator only uses schedules with the "
|
248
|
-
"`cron_expression` property, with optional `start_time`
|
249
|
-
"All other properties are ignored."
|
257
|
+
"`cron_expression` property, with optional `start_time` "
|
258
|
+
"and/or `end_time`. All other properties are ignored."
|
250
259
|
)
|
251
260
|
if deployment.schedule.cron_expression is None:
|
252
261
|
raise ValueError(
|
@@ -302,7 +311,7 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
302
311
|
deployment: "PipelineDeploymentResponse",
|
303
312
|
stack: "Stack",
|
304
313
|
environment: Dict[str, str],
|
305
|
-
) ->
|
314
|
+
) -> Iterator[Dict[str, MetadataType]]:
|
306
315
|
"""Creates a KFP JSON pipeline.
|
307
316
|
|
308
317
|
# noqa: DAR402
|
@@ -337,12 +346,15 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
337
346
|
environment.
|
338
347
|
|
339
348
|
Raises:
|
340
|
-
ValueError: If the attribute `pipeline_root` is not set and it
|
349
|
+
ValueError: If the attribute `pipeline_root` is not set, and it
|
341
350
|
can be not generated using the path of the artifact store in the
|
342
351
|
stack because it is not a
|
343
352
|
`zenml.integrations.gcp.artifact_store.GCPArtifactStore`. Also gets
|
344
353
|
raised if attempting to schedule pipeline run without using the
|
345
354
|
`zenml.integrations.gcp.artifact_store.GCPArtifactStore`.
|
355
|
+
|
356
|
+
Yields:
|
357
|
+
A dictionary of metadata related to the pipeline run.
|
346
358
|
"""
|
347
359
|
orchestrator_run_name = get_orchestrator_run_name(
|
348
360
|
pipeline_name=deployment.pipeline_configuration.name
|
@@ -556,15 +568,15 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
556
568
|
)
|
557
569
|
|
558
570
|
# Using the Google Cloud AIPlatform client, upload and execute the
|
559
|
-
# pipeline
|
560
|
-
|
561
|
-
self._upload_and_run_pipeline(
|
571
|
+
# pipeline on the Vertex AI Pipelines service.
|
572
|
+
if metadata := self._upload_and_run_pipeline(
|
562
573
|
pipeline_name=deployment.pipeline_configuration.name,
|
563
574
|
pipeline_file_path=pipeline_file_path,
|
564
575
|
run_name=orchestrator_run_name,
|
565
576
|
settings=settings,
|
566
577
|
schedule=deployment.schedule,
|
567
|
-
)
|
578
|
+
):
|
579
|
+
yield from metadata
|
568
580
|
|
569
581
|
def _upload_and_run_pipeline(
|
570
582
|
self,
|
@@ -573,7 +585,7 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
573
585
|
run_name: str,
|
574
586
|
settings: VertexOrchestratorSettings,
|
575
587
|
schedule: Optional["ScheduleResponse"] = None,
|
576
|
-
) ->
|
588
|
+
) -> Iterator[Dict[str, MetadataType]]:
|
577
589
|
"""Uploads and run the pipeline on the Vertex AI Pipelines service.
|
578
590
|
|
579
591
|
Args:
|
@@ -585,7 +597,11 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
585
597
|
schedule: The schedule the pipeline will run on.
|
586
598
|
|
587
599
|
Raises:
|
588
|
-
RuntimeError: If the Vertex Orchestrator fails to provision or any
|
600
|
+
RuntimeError: If the Vertex Orchestrator fails to provision or any
|
601
|
+
other Runtime errors.
|
602
|
+
|
603
|
+
Yields:
|
604
|
+
A dictionary of metadata related to the pipeline run.
|
589
605
|
"""
|
590
606
|
# We have to replace the hyphens in the run name with underscores
|
591
607
|
# and lower case the string, because the Vertex AI Pipelines service
|
@@ -593,8 +609,7 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
593
609
|
job_id = _clean_pipeline_name(run_name)
|
594
610
|
|
595
611
|
# Get the credentials that would be used to create the Vertex AI
|
596
|
-
# Pipelines
|
597
|
-
# job.
|
612
|
+
# Pipelines job.
|
598
613
|
credentials, project_id = self._get_authentication()
|
599
614
|
|
600
615
|
# Instantiate the Vertex AI Pipelines job
|
@@ -629,7 +644,8 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
629
644
|
try:
|
630
645
|
if schedule:
|
631
646
|
logger.info(
|
632
|
-
"Scheduling job using native Vertex AI Pipelines
|
647
|
+
"Scheduling job using native Vertex AI Pipelines "
|
648
|
+
"scheduling..."
|
633
649
|
)
|
634
650
|
run.create_schedule(
|
635
651
|
display_name=schedule.name,
|
@@ -645,13 +661,12 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
645
661
|
"No schedule detected. Creating one-off Vertex job..."
|
646
662
|
)
|
647
663
|
logger.info(
|
648
|
-
"Submitting pipeline job with job_id `%s` to Vertex AI
|
649
|
-
"service.",
|
664
|
+
"Submitting pipeline job with job_id `%s` to Vertex AI "
|
665
|
+
"Pipelines service.",
|
650
666
|
job_id,
|
651
667
|
)
|
652
668
|
|
653
669
|
# Submit the job to Vertex AI Pipelines service.
|
654
|
-
|
655
670
|
run.submit(
|
656
671
|
service_account=self.config.workload_service_account,
|
657
672
|
network=self.config.network,
|
@@ -661,6 +676,9 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
661
676
|
run._dashboard_uri(),
|
662
677
|
)
|
663
678
|
|
679
|
+
# Yield metadata based on the generated job object
|
680
|
+
yield from self.compute_metadata(run)
|
681
|
+
|
664
682
|
if settings.synchronous:
|
665
683
|
logger.info(
|
666
684
|
"Waiting for the Vertex AI Pipelines job to finish..."
|
@@ -738,6 +756,7 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
738
756
|
The dynamic component with the resource settings applied.
|
739
757
|
"""
|
740
758
|
# Set optional CPU, RAM and GPU constraints for the pipeline
|
759
|
+
cpu_limit = None
|
741
760
|
if resource_settings:
|
742
761
|
cpu_limit = resource_settings.cpu_count or self.config.cpu_limit
|
743
762
|
|
@@ -778,3 +797,173 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
778
797
|
)
|
779
798
|
|
780
799
|
return dynamic_component
|
800
|
+
|
801
|
+
def fetch_status(self, run: "PipelineRunResponse") -> ExecutionStatus:
|
802
|
+
"""Refreshes the status of a specific pipeline run.
|
803
|
+
|
804
|
+
Args:
|
805
|
+
run: The run that was executed by this orchestrator.
|
806
|
+
|
807
|
+
Returns:
|
808
|
+
the actual status of the pipeline job.
|
809
|
+
|
810
|
+
Raises:
|
811
|
+
AssertionError: If the run was not executed by to this orchestrator.
|
812
|
+
ValueError: If it fetches an unknown state or if we can not fetch
|
813
|
+
the orchestrator run ID.
|
814
|
+
"""
|
815
|
+
# Make sure that the stack exists and is accessible
|
816
|
+
if run.stack is None:
|
817
|
+
raise ValueError(
|
818
|
+
"The stack that the run was executed on is not available "
|
819
|
+
"anymore."
|
820
|
+
)
|
821
|
+
|
822
|
+
# Make sure that the run belongs to this orchestrator
|
823
|
+
assert (
|
824
|
+
self.id
|
825
|
+
== run.stack.components[StackComponentType.ORCHESTRATOR][0].id
|
826
|
+
)
|
827
|
+
|
828
|
+
# Initialize the Vertex client
|
829
|
+
credentials, project_id = self._get_authentication()
|
830
|
+
aiplatform.init(
|
831
|
+
project=project_id,
|
832
|
+
location=self.config.location,
|
833
|
+
credentials=credentials,
|
834
|
+
)
|
835
|
+
|
836
|
+
# Fetch the status of the PipelineJob
|
837
|
+
if METADATA_ORCHESTRATOR_RUN_ID in run.run_metadata:
|
838
|
+
run_id = run.run_metadata[METADATA_ORCHESTRATOR_RUN_ID].value
|
839
|
+
elif run.orchestrator_run_id is not None:
|
840
|
+
run_id = run.orchestrator_run_id
|
841
|
+
else:
|
842
|
+
raise ValueError(
|
843
|
+
"Can not find the orchestrator run ID, thus can not fetch "
|
844
|
+
"the status."
|
845
|
+
)
|
846
|
+
status = aiplatform.PipelineJob.get(run_id).state
|
847
|
+
|
848
|
+
# Map the potential outputs to ZenML ExecutionStatus. Potential values:
|
849
|
+
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/describe_pipeline_execution.html#
|
850
|
+
if status in [PipelineState.PIPELINE_STATE_UNSPECIFIED]:
|
851
|
+
return run.status
|
852
|
+
elif status in [
|
853
|
+
PipelineState.PIPELINE_STATE_QUEUED,
|
854
|
+
PipelineState.PIPELINE_STATE_PENDING,
|
855
|
+
]:
|
856
|
+
return ExecutionStatus.INITIALIZING
|
857
|
+
elif status in [
|
858
|
+
PipelineState.PIPELINE_STATE_RUNNING,
|
859
|
+
PipelineState.PIPELINE_STATE_PAUSED,
|
860
|
+
]:
|
861
|
+
return ExecutionStatus.RUNNING
|
862
|
+
elif status in [PipelineState.PIPELINE_STATE_SUCCEEDED]:
|
863
|
+
return ExecutionStatus.COMPLETED
|
864
|
+
|
865
|
+
elif status in [
|
866
|
+
PipelineState.PIPELINE_STATE_FAILED,
|
867
|
+
PipelineState.PIPELINE_STATE_CANCELLING,
|
868
|
+
PipelineState.PIPELINE_STATE_CANCELLED,
|
869
|
+
]:
|
870
|
+
return ExecutionStatus.FAILED
|
871
|
+
else:
|
872
|
+
raise ValueError("Unknown status for the pipeline job.")
|
873
|
+
|
874
|
+
def compute_metadata(
|
875
|
+
self, job: aiplatform.PipelineJob
|
876
|
+
) -> Iterator[Dict[str, MetadataType]]:
|
877
|
+
"""Generate run metadata based on the corresponding Vertex PipelineJob.
|
878
|
+
|
879
|
+
Args:
|
880
|
+
job: The corresponding PipelineJob object.
|
881
|
+
|
882
|
+
Yields:
|
883
|
+
A dictionary of metadata related to the pipeline run.
|
884
|
+
"""
|
885
|
+
metadata: Dict[str, MetadataType] = {}
|
886
|
+
|
887
|
+
# Orchestrator Run ID
|
888
|
+
if run_id := self._compute_orchestrator_run_id(job):
|
889
|
+
metadata[METADATA_ORCHESTRATOR_RUN_ID] = run_id
|
890
|
+
|
891
|
+
# URL to the Vertex's pipeline view
|
892
|
+
if orchestrator_url := self._compute_orchestrator_url(job):
|
893
|
+
metadata[METADATA_ORCHESTRATOR_URL] = Uri(orchestrator_url)
|
894
|
+
|
895
|
+
# URL to the corresponding Logs Explorer page
|
896
|
+
if logs_url := self._compute_orchestrator_logs_url(job):
|
897
|
+
metadata[METADATA_ORCHESTRATOR_LOGS_URL] = Uri(logs_url)
|
898
|
+
|
899
|
+
yield metadata
|
900
|
+
|
901
|
+
@staticmethod
|
902
|
+
def _compute_orchestrator_url(
|
903
|
+
job: aiplatform.PipelineJob,
|
904
|
+
) -> Optional[str]:
|
905
|
+
"""Generate the Orchestrator Dashboard URL upon pipeline execution.
|
906
|
+
|
907
|
+
Args:
|
908
|
+
job: The corresponding PipelineJob object.
|
909
|
+
|
910
|
+
Returns:
|
911
|
+
the URL to the dashboard view in Vertex.
|
912
|
+
"""
|
913
|
+
try:
|
914
|
+
return str(job._dashboard_uri())
|
915
|
+
except Exception as e:
|
916
|
+
logger.warning(
|
917
|
+
f"There was an issue while extracting the pipeline url: {e}"
|
918
|
+
)
|
919
|
+
return None
|
920
|
+
|
921
|
+
@staticmethod
|
922
|
+
def _compute_orchestrator_logs_url(
|
923
|
+
job: aiplatform.PipelineJob,
|
924
|
+
) -> Optional[str]:
|
925
|
+
"""Generate the Logs Explorer URL upon pipeline execution.
|
926
|
+
|
927
|
+
Args:
|
928
|
+
job: The corresponding PipelineJob object.
|
929
|
+
|
930
|
+
Returns:
|
931
|
+
the URL querying the pipeline logs in Logs Explorer on GCP.
|
932
|
+
"""
|
933
|
+
try:
|
934
|
+
base_url = "https://console.cloud.google.com/logs/query"
|
935
|
+
query = f"""
|
936
|
+
resource.type="aiplatform.googleapis.com/PipelineJob"
|
937
|
+
resource.labels.pipeline_job_id="{job.job_id}"
|
938
|
+
"""
|
939
|
+
encoded_query = urllib.parse.quote(query)
|
940
|
+
return f"{base_url}?project={job.project}&query={encoded_query}"
|
941
|
+
|
942
|
+
except Exception as e:
|
943
|
+
logger.warning(
|
944
|
+
f"There was an issue while extracting the logs url: {e}"
|
945
|
+
)
|
946
|
+
return None
|
947
|
+
|
948
|
+
@staticmethod
|
949
|
+
def _compute_orchestrator_run_id(
|
950
|
+
job: aiplatform.PipelineJob,
|
951
|
+
) -> Optional[str]:
|
952
|
+
"""Fetch the Orchestrator Run ID upon pipeline execution.
|
953
|
+
|
954
|
+
Args:
|
955
|
+
job: The corresponding PipelineJob object.
|
956
|
+
|
957
|
+
Returns:
|
958
|
+
the Execution ID of the run in Vertex.
|
959
|
+
"""
|
960
|
+
try:
|
961
|
+
if job.job_id:
|
962
|
+
return str(job.job_id)
|
963
|
+
|
964
|
+
return None
|
965
|
+
except Exception as e:
|
966
|
+
logger.warning(
|
967
|
+
f"There was an issue while extracting the pipeline run ID: {e}"
|
968
|
+
)
|
969
|
+
return None
|
@@ -28,7 +28,7 @@ class LightningIntegration(Integration):
|
|
28
28
|
"""Definition of Lightning Integration for ZenML."""
|
29
29
|
|
30
30
|
NAME = LIGHTNING
|
31
|
-
REQUIREMENTS = ["lightning-sdk"]
|
31
|
+
REQUIREMENTS = ["lightning-sdk>=0.1.17"]
|
32
32
|
|
33
33
|
@classmethod
|
34
34
|
def flavors(cls) -> List[Type[Flavor]]:
|
@@ -85,6 +85,15 @@ class LightningOrchestratorConfig(
|
|
85
85
|
"""
|
86
86
|
return self.synchronous
|
87
87
|
|
88
|
+
@property
|
89
|
+
def is_schedulable(self) -> bool:
|
90
|
+
"""Whether the orchestrator is schedulable or not.
|
91
|
+
|
92
|
+
Returns:
|
93
|
+
Whether the orchestrator is schedulable or not.
|
94
|
+
"""
|
95
|
+
return False
|
96
|
+
|
88
97
|
|
89
98
|
class LightningOrchestratorFlavor(BaseOrchestratorFlavor):
|
90
99
|
"""Lightning orchestrator flavor."""
|
@@ -103,20 +103,29 @@ class LightningOrchestrator(WheeledOrchestrator):
|
|
103
103
|
|
104
104
|
Args:
|
105
105
|
deployment: The pipeline deployment to prepare or run.
|
106
|
+
|
107
|
+
Raises:
|
108
|
+
ValueError: If the user id and api key or username and organization
|
106
109
|
"""
|
107
110
|
settings = cast(
|
108
111
|
LightningOrchestratorSettings, self.get_settings(deployment)
|
109
112
|
)
|
110
|
-
if settings.user_id:
|
111
|
-
|
112
|
-
|
113
|
-
|
113
|
+
if not settings.user_id or not settings.api_key:
|
114
|
+
raise ValueError(
|
115
|
+
"Lightning orchestrator requires `user_id` and `api_key` both to be set in the settings."
|
116
|
+
)
|
117
|
+
os.environ["LIGHTNING_USER_ID"] = settings.user_id
|
118
|
+
os.environ["LIGHTNING_API_KEY"] = settings.api_key
|
114
119
|
if settings.username:
|
115
120
|
os.environ["LIGHTNING_USERNAME"] = settings.username
|
121
|
+
elif settings.organization:
|
122
|
+
os.environ["LIGHTNING_ORG"] = settings.organization
|
123
|
+
else:
|
124
|
+
raise ValueError(
|
125
|
+
"Lightning orchestrator requires either `username` or `organization` to be set in the settings."
|
126
|
+
)
|
116
127
|
if settings.teamspace:
|
117
128
|
os.environ["LIGHTNING_TEAMSPACE"] = settings.teamspace
|
118
|
-
if settings.organization:
|
119
|
-
os.environ["LIGHTNING_ORG"] = settings.organization
|
120
129
|
|
121
130
|
@property
|
122
131
|
def config(self) -> LightningOrchestratorConfig:
|
@@ -267,9 +276,7 @@ class LightningOrchestrator(WheeledOrchestrator):
|
|
267
276
|
) as code_file:
|
268
277
|
code_archive.write_archive(code_file)
|
269
278
|
code_path = code_file.name
|
270
|
-
|
271
279
|
filename = f"{orchestrator_run_name}.tar.gz"
|
272
|
-
|
273
280
|
# Construct the env variables for the pipeline
|
274
281
|
env_vars = environment.copy()
|
275
282
|
orchestrator_run_id = str(uuid4())
|
@@ -392,9 +399,7 @@ class LightningOrchestrator(WheeledOrchestrator):
|
|
392
399
|
f"Installing requirements: {pipeline_requirements_to_string}"
|
393
400
|
)
|
394
401
|
studio.run(f"uv pip install {pipeline_requirements_to_string}")
|
395
|
-
studio.run(
|
396
|
-
"pip uninstall zenml -y && pip install git+https://github.com/zenml-io/zenml.git@feature/lightening-studio-orchestrator"
|
397
|
-
)
|
402
|
+
studio.run("pip install zenml -y")
|
398
403
|
|
399
404
|
for custom_command in settings.custom_commands or []:
|
400
405
|
studio.run(
|
@@ -488,9 +493,7 @@ class LightningOrchestrator(WheeledOrchestrator):
|
|
488
493
|
)
|
489
494
|
studio.run("pip install uv")
|
490
495
|
studio.run(f"uv pip install {requirements}")
|
491
|
-
studio.run(
|
492
|
-
"pip uninstall zenml -y && pip install git+https://github.com/zenml-io/zenml.git@feature/lightening-studio-orchestrator"
|
493
|
-
)
|
496
|
+
studio.run("pip install zenml -y")
|
494
497
|
# studio.run(f"pip install {wheel_path.rsplit('/', 1)[-1]}")
|
495
498
|
for command in settings.custom_commands or []:
|
496
499
|
output = studio.run(
|
@@ -563,9 +566,7 @@ class LightningOrchestrator(WheeledOrchestrator):
|
|
563
566
|
)
|
564
567
|
studio.run("pip install uv")
|
565
568
|
studio.run(f"uv pip install {details['requirements']}")
|
566
|
-
studio.run(
|
567
|
-
"pip uninstall zenml -y && pip install git+https://github.com/zenml-io/zenml.git@feature/lightening-studio-orchestrator"
|
568
|
-
)
|
569
|
+
studio.run("pip install zenml -y")
|
569
570
|
# studio.run(f"pip install {wheel_path.rsplit('/', 1)[-1]}")
|
570
571
|
for command in custom_commands or []:
|
571
572
|
output = studio.run(
|
@@ -166,9 +166,7 @@ def main() -> None:
|
|
166
166
|
f"uv pip install {pipeline_requirements_to_string}"
|
167
167
|
)
|
168
168
|
logger.info(output)
|
169
|
-
output = main_studio.run(
|
170
|
-
"pip uninstall zenml -y && pip install git+https://github.com/zenml-io/zenml.git@feature/lightening-studio-orchestrator"
|
171
|
-
)
|
169
|
+
output = main_studio.run("pip install zenml -y")
|
172
170
|
logger.info(output)
|
173
171
|
|
174
172
|
for command in pipeline_settings.custom_commands or []:
|
@@ -250,9 +248,7 @@ def main() -> None:
|
|
250
248
|
f"uv pip install {step_requirements_to_string}"
|
251
249
|
)
|
252
250
|
logger.info(output)
|
253
|
-
output = studio.run(
|
254
|
-
"pip uninstall zenml -y && pip install git+https://github.com/zenml-io/zenml.git@feature/lightening-studio-orchestrator"
|
255
|
-
)
|
251
|
+
output = studio.run("pip install zenml -y")
|
256
252
|
logger.info(output)
|
257
253
|
for command in step_settings.custom_commands or []:
|
258
254
|
output = studio.run(
|
@@ -146,6 +146,8 @@ def mlflow_register_model_step(
|
|
146
146
|
metadata.zenml_pipeline_run_uuid = pipeline_run_uuid
|
147
147
|
if metadata.zenml_workspace is None:
|
148
148
|
metadata.zenml_workspace = zenml_workspace
|
149
|
+
if getattr(metadata, "mlflow_run_id", None) is None:
|
150
|
+
setattr(metadata, "mlflow_run_id", mlflow_run_id)
|
149
151
|
|
150
152
|
# Register model version
|
151
153
|
model_version = model_registry.register_model_version(
|
@@ -250,6 +250,7 @@ class SkypilotBaseOrchestrator(ContainerizedOrchestrator):
|
|
250
250
|
entrypoint_str = " ".join(command)
|
251
251
|
arguments_str = " ".join(args)
|
252
252
|
|
253
|
+
task_envs = environment
|
253
254
|
docker_environment_str = " ".join(
|
254
255
|
f"-e {k}={v}" for k, v in environment.items()
|
255
256
|
)
|
@@ -271,13 +272,10 @@ class SkypilotBaseOrchestrator(ContainerizedOrchestrator):
|
|
271
272
|
f"sudo docker login --username $DOCKER_USERNAME --password "
|
272
273
|
f"$DOCKER_PASSWORD {stack.container_registry.config.uri}"
|
273
274
|
)
|
274
|
-
task_envs =
|
275
|
-
|
276
|
-
"DOCKER_PASSWORD": docker_password,
|
277
|
-
}
|
275
|
+
task_envs["DOCKER_USERNAME"] = docker_username
|
276
|
+
task_envs["DOCKER_PASSWORD"] = docker_password
|
278
277
|
else:
|
279
278
|
setup = None
|
280
|
-
task_envs = None
|
281
279
|
|
282
280
|
# Run the entire pipeline
|
283
281
|
|
@@ -285,15 +283,22 @@ class SkypilotBaseOrchestrator(ContainerizedOrchestrator):
|
|
285
283
|
self.prepare_environment_variable(set=True)
|
286
284
|
|
287
285
|
try:
|
286
|
+
if isinstance(self.cloud, sky.clouds.Kubernetes):
|
287
|
+
run_command = f"${{VIRTUAL_ENV:+$VIRTUAL_ENV/bin/}}{entrypoint_str} {arguments_str}"
|
288
|
+
setup = None
|
289
|
+
down = False
|
290
|
+
idle_minutes_to_autostop = None
|
291
|
+
else:
|
292
|
+
run_command = f"sudo docker run --rm {custom_run_args}{docker_environment_str} {image} {entrypoint_str} {arguments_str}"
|
293
|
+
down = settings.down
|
294
|
+
idle_minutes_to_autostop = settings.idle_minutes_to_autostop
|
288
295
|
task = sky.Task(
|
289
|
-
run=
|
296
|
+
run=run_command,
|
290
297
|
setup=setup,
|
291
298
|
envs=task_envs,
|
292
299
|
)
|
293
|
-
logger.debug(
|
294
|
-
|
295
|
-
)
|
296
|
-
logger.debug(f"Running run: {setup}")
|
300
|
+
logger.debug(f"Running run: {run_command}")
|
301
|
+
|
297
302
|
task = task.set_resources(
|
298
303
|
sky.Resources(
|
299
304
|
cloud=self.cloud,
|
@@ -306,15 +311,24 @@ class SkypilotBaseOrchestrator(ContainerizedOrchestrator):
|
|
306
311
|
job_recovery=settings.job_recovery,
|
307
312
|
region=settings.region,
|
308
313
|
zone=settings.zone,
|
309
|
-
image_id=
|
314
|
+
image_id=image
|
315
|
+
if isinstance(self.cloud, sky.clouds.Kubernetes)
|
316
|
+
else settings.image_id,
|
310
317
|
disk_size=settings.disk_size,
|
311
318
|
disk_tier=settings.disk_tier,
|
312
319
|
)
|
313
320
|
)
|
314
|
-
|
315
321
|
# Set the cluster name
|
316
|
-
|
317
|
-
|
322
|
+
if settings.cluster_name:
|
323
|
+
sky.exec(
|
324
|
+
task,
|
325
|
+
settings.cluster_name,
|
326
|
+
down=down,
|
327
|
+
stream_logs=settings.stream_logs,
|
328
|
+
backend=None,
|
329
|
+
detach_run=True,
|
330
|
+
)
|
331
|
+
else:
|
318
332
|
# Find existing cluster
|
319
333
|
for i in sky.status(refresh=True):
|
320
334
|
if isinstance(
|
@@ -324,21 +338,19 @@ class SkypilotBaseOrchestrator(ContainerizedOrchestrator):
|
|
324
338
|
logger.info(
|
325
339
|
f"Found existing cluster {cluster_name}. Reusing..."
|
326
340
|
)
|
327
|
-
if cluster_name is None:
|
328
341
|
cluster_name = self.sanitize_cluster_name(
|
329
342
|
f"{orchestrator_run_name}"
|
330
343
|
)
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
)
|
344
|
+
# Launch the cluster
|
345
|
+
sky.launch(
|
346
|
+
task,
|
347
|
+
cluster_name,
|
348
|
+
retry_until_up=settings.retry_until_up,
|
349
|
+
idle_minutes_to_autostop=idle_minutes_to_autostop,
|
350
|
+
down=down,
|
351
|
+
stream_logs=settings.stream_logs,
|
352
|
+
detach_setup=True,
|
353
|
+
)
|
342
354
|
|
343
355
|
except Exception as e:
|
344
356
|
logger.error(f"Pipeline run failed: {e}")
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# Copyright (c) ZenML GmbH 2024. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at:
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
12
|
+
# or implied. See the License for the specific language governing
|
13
|
+
# permissions and limitations under the License.
|
14
|
+
"""Initialization of the Skypilot Kubernetes integration for ZenML.
|
15
|
+
|
16
|
+
The Skypilot integration sub-module powers an alternative to the local
|
17
|
+
orchestrator for a remote orchestration of ZenML pipelines on VMs.
|
18
|
+
"""
|
19
|
+
from typing import List, Type
|
20
|
+
|
21
|
+
from zenml.integrations.constants import (
|
22
|
+
SKYPILOT_KUBERNETES,
|
23
|
+
)
|
24
|
+
from zenml.integrations.integration import Integration
|
25
|
+
from zenml.stack import Flavor
|
26
|
+
|
27
|
+
SKYPILOT_KUBERNETES_ORCHESTRATOR_FLAVOR = "vm_kubernetes"
|
28
|
+
|
29
|
+
|
30
|
+
class SkypilotKubernetesIntegration(Integration):
|
31
|
+
"""Definition of Skypilot Kubernetes Integration for ZenML."""
|
32
|
+
|
33
|
+
NAME = SKYPILOT_KUBERNETES
|
34
|
+
# all 0.6.x versions of skypilot[kubernetes] are compatible
|
35
|
+
REQUIREMENTS = ["skypilot[kubernetes]~=0.6.1"]
|
36
|
+
APT_PACKAGES = ["openssh-client", "rsync"]
|
37
|
+
|
38
|
+
@classmethod
|
39
|
+
def flavors(cls) -> List[Type[Flavor]]:
|
40
|
+
"""Declare the stack component flavors for the Skypilot Kubernetes integration.
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
List of stack component flavors for this integration.
|
44
|
+
"""
|
45
|
+
from zenml.integrations.skypilot_kubernetes.flavors import (
|
46
|
+
SkypilotKubernetesOrchestratorFlavor,
|
47
|
+
)
|
48
|
+
|
49
|
+
return [SkypilotKubernetesOrchestratorFlavor]
|
50
|
+
|
51
|
+
|
52
|
+
SkypilotKubernetesIntegration.check_installation()
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# Copyright (c) ZenML GmbH 2024. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at:
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
12
|
+
# or implied. See the License for the specific language governing
|
13
|
+
# permissions and limitations under the License.
|
14
|
+
"""Skypilot integration flavor for Skypilot Kubernetes orchestrator."""
|
15
|
+
|
16
|
+
from zenml.integrations.skypilot_kubernetes.flavors.skypilot_orchestrator_kubernetes_vm_flavor import (
|
17
|
+
SkypilotKubernetesOrchestratorConfig,
|
18
|
+
SkypilotKubernetesOrchestratorFlavor,
|
19
|
+
SkypilotKubernetesOrchestratorSettings,
|
20
|
+
)
|
21
|
+
|
22
|
+
__all__ = [
|
23
|
+
"SkypilotKubernetesOrchestratorConfig",
|
24
|
+
"SkypilotKubernetesOrchestratorFlavor",
|
25
|
+
"SkypilotKubernetesOrchestratorSettings",
|
26
|
+
]
|