zenml-nightly 0.66.0.dev20240924__py3-none-any.whl → 0.66.0.dev20240926__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. zenml/VERSION +1 -1
  2. zenml/cli/__init__.py +7 -0
  3. zenml/cli/pipeline.py +21 -0
  4. zenml/constants.py +3 -0
  5. zenml/integrations/__init__.py +1 -0
  6. zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py +288 -71
  7. zenml/integrations/azure/orchestrators/azureml_orchestrator.py +157 -4
  8. zenml/integrations/constants.py +1 -0
  9. zenml/integrations/deepchecks/__init__.py +1 -1
  10. zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py +55 -14
  11. zenml/integrations/deepchecks/validation_checks.py +62 -5
  12. zenml/integrations/gcp/orchestrators/vertex_orchestrator.py +207 -18
  13. zenml/integrations/lightning/__init__.py +1 -1
  14. zenml/integrations/lightning/flavors/lightning_orchestrator_flavor.py +9 -0
  15. zenml/integrations/lightning/orchestrators/lightning_orchestrator.py +18 -17
  16. zenml/integrations/lightning/orchestrators/lightning_orchestrator_entrypoint.py +2 -6
  17. zenml/integrations/mlflow/steps/mlflow_registry.py +2 -0
  18. zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py +38 -26
  19. zenml/integrations/skypilot_kubernetes/__init__.py +52 -0
  20. zenml/integrations/skypilot_kubernetes/flavors/__init__.py +26 -0
  21. zenml/integrations/skypilot_kubernetes/flavors/skypilot_orchestrator_kubernetes_vm_flavor.py +125 -0
  22. zenml/integrations/skypilot_kubernetes/orchestrators/__init__.py +25 -0
  23. zenml/integrations/skypilot_kubernetes/orchestrators/skypilot_kubernetes_vm_orchestrator.py +74 -0
  24. zenml/models/v2/core/pipeline_run.py +62 -1
  25. zenml/new/pipelines/run_utils.py +4 -1
  26. zenml/orchestrators/base_orchestrator.py +41 -12
  27. zenml/stack/stack.py +11 -2
  28. zenml/zen_server/cloud_utils.py +33 -8
  29. zenml/zen_server/dashboard/assets/{404-iO8vpun1.js → 404-CMnKjD-L.js} +1 -1
  30. zenml/zen_server/dashboard/assets/{@reactflow-B6kq9fJZ.js → @reactflow-CEC2f0cl.js} +1 -1
  31. zenml/zen_server/dashboard/assets/AlertDialogDropdownItem-BqM1UpCD.js +1 -0
  32. zenml/zen_server/dashboard/assets/{CodeSnippet-DNWdQmbo.js → CodeSnippet-DRy_0J4D.js} +2 -2
  33. zenml/zen_server/dashboard/assets/{CollapsibleCard-B2OVjWYE.js → CollapsibleCard-lE-75Zob.js} +1 -1
  34. zenml/zen_server/dashboard/assets/{Commands-DsoaVElZ.js → Commands-CVx2RAoT.js} +1 -1
  35. zenml/zen_server/dashboard/assets/{CopyButton-BqE_-PHO.js → CopyButton-C_yRGWuP.js} +1 -1
  36. zenml/zen_server/dashboard/assets/{CsvVizualization-Dyasr2jU.js → CsvVizualization-Dd0P02Iz.js} +1 -1
  37. zenml/zen_server/dashboard/assets/{DialogItem-Cz1VLRwa.js → DialogItem-BCrc2wIk.js} +1 -1
  38. zenml/zen_server/dashboard/assets/{Error-DorJD_va.js → Error-BuMJbG-M.js} +1 -1
  39. zenml/zen_server/dashboard/assets/{ExecutionStatus-CIfQTutR.js → ExecutionStatus-fIulMG4w.js} +1 -1
  40. zenml/zen_server/dashboard/assets/{Helpbox-CmfvtNeq.js → Helpbox-CJAp4kbv.js} +1 -1
  41. zenml/zen_server/dashboard/assets/Infobox-CC70zvGO.js +1 -0
  42. zenml/zen_server/dashboard/assets/{InlineAvatar-Ds2ZFHPc.js → InlineAvatar-C3QXdFW1.js} +1 -1
  43. zenml/zen_server/dashboard/assets/{Partials-DX-8iEa1.js → Partials-Cb8lrNsi.js} +1 -1
  44. zenml/zen_server/dashboard/assets/{ProviderIcon-BOQJgapd.js → ProviderIcon-C9BuYVSN.js} +1 -1
  45. zenml/zen_server/dashboard/assets/{ProviderRadio-BsYBw9YA.js → ProviderRadio-GYc9PJtG.js} +1 -1
  46. zenml/zen_server/dashboard/assets/{SearchField-W3GXpLlI.js → SearchField-BeF1yR7M.js} +1 -1
  47. zenml/zen_server/dashboard/assets/SecretTooltip-DgVWrPxX.js +1 -0
  48. zenml/zen_server/dashboard/assets/{SetPassword-B-0a8UCj.js → SetPassword-nAhHddXW.js} +1 -1
  49. zenml/zen_server/dashboard/assets/{Tick-i1DYsVcX.js → Tick-C5ZVvNRQ.js} +1 -1
  50. zenml/zen_server/dashboard/assets/{UpdatePasswordSchemas-C6Zb7ASL.js → UpdatePasswordSchemas-7KFsDbKb.js} +1 -1
  51. zenml/zen_server/dashboard/assets/UsageReason-DL5NL_ZD.js +1 -0
  52. zenml/zen_server/dashboard/assets/{WizardFooter-BHbO7zOa.js → WizardFooter-CgvFSppz.js} +1 -1
  53. zenml/zen_server/dashboard/assets/{all-pipeline-runs-query-BBEe6I9-.js → all-pipeline-runs-query-DAPSF_74.js} +1 -1
  54. zenml/zen_server/dashboard/assets/{cloud-only-BuP4Kt_7.js → cloud-only-CxoNxh86.js} +1 -1
  55. zenml/zen_server/dashboard/assets/{create-stack-B2x2d4r1.js → create-stack-BfgeXFuV.js} +1 -1
  56. zenml/zen_server/dashboard/assets/delete-run-OkGmZQ5G.js +1 -0
  57. zenml/zen_server/dashboard/assets/{form-schemas-Bap0f854.js → form-schemas-C09PrQUJ.js} +1 -1
  58. zenml/zen_server/dashboard/assets/{index-DFi8BroH.js → index-CLT4K7oC.js} +1 -1
  59. zenml/zen_server/dashboard/assets/{index-B9wVwe7u.js → index-D0bJjaey.js} +3 -3
  60. zenml/zen_server/dashboard/assets/index-PcI3Xw77.css +1 -0
  61. zenml/zen_server/dashboard/assets/{login-mutation-DwxUz8VA.js → login-mutation-CB45FHbP.js} +1 -1
  62. zenml/zen_server/dashboard/assets/{not-found-D5i9DunU.js → not-found-NtCUfXiV.js} +1 -1
  63. zenml/zen_server/dashboard/assets/page-AvcQe_oR.js +1 -0
  64. zenml/zen_server/dashboard/assets/page-B6DccgPa.js +1 -0
  65. zenml/zen_server/dashboard/assets/{page-xQG6GmFJ.js → page-B7DTiwhv.js} +1 -1
  66. zenml/zen_server/dashboard/assets/{page-CIbehp7V.js → page-B7LduaiG.js} +1 -1
  67. zenml/zen_server/dashboard/assets/{page-CEJWu1YO.js → page-B8WlhDq6.js} +1 -1
  68. zenml/zen_server/dashboard/assets/{page-BitfWsiW.js → page-BIhP9udn.js} +1 -1
  69. zenml/zen_server/dashboard/assets/{page-DE03uZZR.js → page-BLS9bXB8.js} +1 -1
  70. zenml/zen_server/dashboard/assets/{page-DFCK65G9.js → page-BYXn4SXu.js} +1 -1
  71. zenml/zen_server/dashboard/assets/{page-bimkItOg.js → page-Bfvwt3AB.js} +1 -1
  72. zenml/zen_server/dashboard/assets/{page-D5F3DJjm.js → page-BipKr1Pt.js} +1 -1
  73. zenml/zen_server/dashboard/assets/page-BwG4f5qc.js +1 -0
  74. zenml/zen_server/dashboard/assets/page-C1c_unjg.js +9 -0
  75. zenml/zen_server/dashboard/assets/{page-DQdwZZ9x.js → page-C25tiRdj.js} +1 -1
  76. zenml/zen_server/dashboard/assets/page-CIATsAA7.js +1 -0
  77. zenml/zen_server/dashboard/assets/{page-iwoJnwPv.js → page-CKUVhcYr.js} +1 -1
  78. zenml/zen_server/dashboard/assets/{page-BiF8hLbO.js → page-CXLwze-m.js} +1 -1
  79. zenml/zen_server/dashboard/assets/page-D7TD0k_A.js +1 -0
  80. zenml/zen_server/dashboard/assets/{page-CDOQLrPC.js → page-DIlOQjGU.js} +1 -1
  81. zenml/zen_server/dashboard/assets/{page-DGMa3ZQL.js → page-DJ31Huvj.js} +1 -1
  82. zenml/zen_server/dashboard/assets/{page-J0s8Sq3N.js → page-DOqsdVzG.js} +1 -1
  83. zenml/zen_server/dashboard/assets/{page-DQGCHKrQ.js → page-DUapawuM.js} +1 -1
  84. zenml/zen_server/dashboard/assets/{page-WCQ659by.js → page-Dd3jZyrf.js} +1 -1
  85. zenml/zen_server/dashboard/assets/{page-CrSdkteO.js → page-DyZzYHWA.js} +2 -2
  86. zenml/zen_server/dashboard/assets/page-L_xNBh_5.js +3 -0
  87. zenml/zen_server/dashboard/assets/{page-oS4hqS8M.js → page-VsrKiIdF.js} +1 -1
  88. zenml/zen_server/dashboard/assets/{page-DgM-N9RL.js → page-ioO58ULo.js} +1 -1
  89. zenml/zen_server/dashboard/assets/page-kalpiPZz.js +6 -0
  90. zenml/zen_server/dashboard/assets/{persist-xsYgVtR1.js → persist-ChKZVcn3.js} +1 -1
  91. zenml/zen_server/dashboard/assets/{persist-mEZN_fgH.js → persist-DodaLO0k.js} +1 -1
  92. zenml/zen_server/dashboard/assets/{sharedSchema-BfZcy7aP.js → sharedSchema-BvRWAv-c.js} +1 -1
  93. zenml/zen_server/dashboard/assets/{stack-detail-query-CU4egfhp.js → stack-detail-query-C9XwNP1E.js} +1 -1
  94. zenml/zen_server/dashboard/assets/tick-circle-m-hJG8i9.js +1 -0
  95. zenml/zen_server/dashboard/assets/{update-server-settings-mutation-DNqmQXDM.js → update-server-settings-mutation-DJDefwqW.js} +1 -1
  96. zenml/zen_server/dashboard/assets/{url-DwbuKk1b.js → url-DdWrpIhi.js} +1 -1
  97. zenml/zen_server/dashboard/index.html +4 -4
  98. zenml/zen_server/dashboard_legacy/asset-manifest.json +4 -4
  99. zenml/zen_server/dashboard_legacy/index.html +1 -1
  100. zenml/zen_server/dashboard_legacy/{precache-manifest.290b95d5b43efa3368b3dc63d20c4782.js → precache-manifest.4f9db97de1b48fd5944e8a766c1300fe.js} +4 -4
  101. zenml/zen_server/dashboard_legacy/service-worker.js +1 -1
  102. zenml/zen_server/dashboard_legacy/static/js/{main.840d1bf0.chunk.js → main.0fdd4aad.chunk.js} +2 -2
  103. zenml/zen_server/dashboard_legacy/static/js/{main.840d1bf0.chunk.js.map → main.0fdd4aad.chunk.js.map} +1 -1
  104. zenml/zen_server/routers/runs_endpoints.py +89 -3
  105. {zenml_nightly-0.66.0.dev20240924.dist-info → zenml_nightly-0.66.0.dev20240926.dist-info}/METADATA +8 -1
  106. {zenml_nightly-0.66.0.dev20240924.dist-info → zenml_nightly-0.66.0.dev20240926.dist-info}/RECORD +109 -102
  107. zenml/zen_server/dashboard/assets/AlertDialogDropdownItem-BXeSvmMY.js +0 -1
  108. zenml/zen_server/dashboard/assets/EditSecretDialog-Du423_3U.js +0 -1
  109. zenml/zen_server/dashboard/assets/Infobox-BL9NOS37.js +0 -1
  110. zenml/zen_server/dashboard/assets/UsageReason-CCnzmwS8.js +0 -1
  111. zenml/zen_server/dashboard/assets/index-6DYjZgDn.css +0 -1
  112. zenml/zen_server/dashboard/assets/page-BFuJICXM.js +0 -9
  113. zenml/zen_server/dashboard/assets/page-CLiRGfWo.js +0 -1
  114. zenml/zen_server/dashboard/assets/page-CV44mQn9.js +0 -1
  115. zenml/zen_server/dashboard/assets/page-DI-qTWrm.js +0 -1
  116. zenml/zen_server/dashboard/assets/page-Dt8VgzbE.js +0 -1
  117. zenml/zen_server/dashboard/assets/page-oSqx9dkH.js +0 -1
  118. zenml/zen_server/dashboard/assets/page-p3GqEAUW.js +0 -1
  119. zenml/zen_server/dashboard/assets/page-qvcUVPE-.js +0 -1
  120. {zenml_nightly-0.66.0.dev20240924.dist-info → zenml_nightly-0.66.0.dev20240926.dist-info}/LICENSE +0 -0
  121. {zenml_nightly-0.66.0.dev20240924.dist-info → zenml_nightly-0.66.0.dev20240926.dist-info}/WHEEL +0 -0
  122. {zenml_nightly-0.66.0.dev20240924.dist-info → zenml_nightly-0.66.0.dev20240926.dist-info}/entry_points.txt +0 -0
@@ -32,10 +32,12 @@
32
32
  import os
33
33
  import re
34
34
  import types
35
+ import urllib
35
36
  from typing import (
36
37
  TYPE_CHECKING,
37
38
  Any,
38
39
  Dict,
40
+ Iterator,
39
41
  List,
40
42
  Optional,
41
43
  Tuple,
@@ -46,15 +48,18 @@ from uuid import UUID
46
48
 
47
49
  from google.api_core import exceptions as google_exceptions
48
50
  from google.cloud import aiplatform
51
+ from google.cloud.aiplatform_v1.types import PipelineState
49
52
  from kfp import dsl
50
53
  from kfp.compiler import Compiler
51
54
 
52
55
  from zenml.config.resource_settings import ResourceSettings
53
56
  from zenml.constants import (
57
+ METADATA_ORCHESTRATOR_LOGS_URL,
58
+ METADATA_ORCHESTRATOR_RUN_ID,
54
59
  METADATA_ORCHESTRATOR_URL,
55
60
  )
56
61
  from zenml.entrypoints import StepEntrypointConfiguration
57
- from zenml.enums import StackComponentType
62
+ from zenml.enums import ExecutionStatus, StackComponentType
58
63
  from zenml.integrations.gcp import GCP_ARTIFACT_STORE_FLAVOR
59
64
  from zenml.integrations.gcp.constants import (
60
65
  GKE_ACCELERATOR_NODE_SELECTOR_CONSTRAINT_LABEL,
@@ -77,7 +82,11 @@ from zenml.utils.io_utils import get_global_config_directory
77
82
 
78
83
  if TYPE_CHECKING:
79
84
  from zenml.config.base_settings import BaseSettings
80
- from zenml.models import PipelineDeploymentResponse, ScheduleResponse
85
+ from zenml.models import (
86
+ PipelineDeploymentResponse,
87
+ PipelineRunResponse,
88
+ ScheduleResponse,
89
+ )
81
90
  from zenml.stack import Stack
82
91
 
83
92
  logger = get_logger(__name__)
@@ -245,8 +254,8 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
245
254
  ):
246
255
  logger.warning(
247
256
  "Vertex orchestrator only uses schedules with the "
248
- "`cron_expression` property, with optional `start_time` and/or `end_time`. "
249
- "All other properties are ignored."
257
+ "`cron_expression` property, with optional `start_time` "
258
+ "and/or `end_time`. All other properties are ignored."
250
259
  )
251
260
  if deployment.schedule.cron_expression is None:
252
261
  raise ValueError(
@@ -302,7 +311,7 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
302
311
  deployment: "PipelineDeploymentResponse",
303
312
  stack: "Stack",
304
313
  environment: Dict[str, str],
305
- ) -> Any:
314
+ ) -> Iterator[Dict[str, MetadataType]]:
306
315
  """Creates a KFP JSON pipeline.
307
316
 
308
317
  # noqa: DAR402
@@ -337,12 +346,15 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
337
346
  environment.
338
347
 
339
348
  Raises:
340
- ValueError: If the attribute `pipeline_root` is not set and it
349
+ ValueError: If the attribute `pipeline_root` is not set, and it
341
350
  can be not generated using the path of the artifact store in the
342
351
  stack because it is not a
343
352
  `zenml.integrations.gcp.artifact_store.GCPArtifactStore`. Also gets
344
353
  raised if attempting to schedule pipeline run without using the
345
354
  `zenml.integrations.gcp.artifact_store.GCPArtifactStore`.
355
+
356
+ Yields:
357
+ A dictionary of metadata related to the pipeline run.
346
358
  """
347
359
  orchestrator_run_name = get_orchestrator_run_name(
348
360
  pipeline_name=deployment.pipeline_configuration.name
@@ -556,15 +568,15 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
556
568
  )
557
569
 
558
570
  # Using the Google Cloud AIPlatform client, upload and execute the
559
- # pipeline
560
- # on the Vertex AI Pipelines service.
561
- self._upload_and_run_pipeline(
571
+ # pipeline on the Vertex AI Pipelines service.
572
+ if metadata := self._upload_and_run_pipeline(
562
573
  pipeline_name=deployment.pipeline_configuration.name,
563
574
  pipeline_file_path=pipeline_file_path,
564
575
  run_name=orchestrator_run_name,
565
576
  settings=settings,
566
577
  schedule=deployment.schedule,
567
- )
578
+ ):
579
+ yield from metadata
568
580
 
569
581
  def _upload_and_run_pipeline(
570
582
  self,
@@ -573,7 +585,7 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
573
585
  run_name: str,
574
586
  settings: VertexOrchestratorSettings,
575
587
  schedule: Optional["ScheduleResponse"] = None,
576
- ) -> None:
588
+ ) -> Iterator[Dict[str, MetadataType]]:
577
589
  """Uploads and run the pipeline on the Vertex AI Pipelines service.
578
590
 
579
591
  Args:
@@ -585,7 +597,11 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
585
597
  schedule: The schedule the pipeline will run on.
586
598
 
587
599
  Raises:
588
- RuntimeError: If the Vertex Orchestrator fails to provision or any other Runtime errors
600
+ RuntimeError: If the Vertex Orchestrator fails to provision or any
601
+ other Runtime errors.
602
+
603
+ Yields:
604
+ A dictionary of metadata related to the pipeline run.
589
605
  """
590
606
  # We have to replace the hyphens in the run name with underscores
591
607
  # and lower case the string, because the Vertex AI Pipelines service
@@ -593,8 +609,7 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
593
609
  job_id = _clean_pipeline_name(run_name)
594
610
 
595
611
  # Get the credentials that would be used to create the Vertex AI
596
- # Pipelines
597
- # job.
612
+ # Pipelines job.
598
613
  credentials, project_id = self._get_authentication()
599
614
 
600
615
  # Instantiate the Vertex AI Pipelines job
@@ -629,7 +644,8 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
629
644
  try:
630
645
  if schedule:
631
646
  logger.info(
632
- "Scheduling job using native Vertex AI Pipelines scheduling..."
647
+ "Scheduling job using native Vertex AI Pipelines "
648
+ "scheduling..."
633
649
  )
634
650
  run.create_schedule(
635
651
  display_name=schedule.name,
@@ -645,13 +661,12 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
645
661
  "No schedule detected. Creating one-off Vertex job..."
646
662
  )
647
663
  logger.info(
648
- "Submitting pipeline job with job_id `%s` to Vertex AI Pipelines "
649
- "service.",
664
+ "Submitting pipeline job with job_id `%s` to Vertex AI "
665
+ "Pipelines service.",
650
666
  job_id,
651
667
  )
652
668
 
653
669
  # Submit the job to Vertex AI Pipelines service.
654
-
655
670
  run.submit(
656
671
  service_account=self.config.workload_service_account,
657
672
  network=self.config.network,
@@ -661,6 +676,9 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
661
676
  run._dashboard_uri(),
662
677
  )
663
678
 
679
+ # Yield metadata based on the generated job object
680
+ yield from self.compute_metadata(run)
681
+
664
682
  if settings.synchronous:
665
683
  logger.info(
666
684
  "Waiting for the Vertex AI Pipelines job to finish..."
@@ -738,6 +756,7 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
738
756
  The dynamic component with the resource settings applied.
739
757
  """
740
758
  # Set optional CPU, RAM and GPU constraints for the pipeline
759
+ cpu_limit = None
741
760
  if resource_settings:
742
761
  cpu_limit = resource_settings.cpu_count or self.config.cpu_limit
743
762
 
@@ -778,3 +797,173 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
778
797
  )
779
798
 
780
799
  return dynamic_component
800
+
801
+ def fetch_status(self, run: "PipelineRunResponse") -> ExecutionStatus:
802
+ """Refreshes the status of a specific pipeline run.
803
+
804
+ Args:
805
+ run: The run that was executed by this orchestrator.
806
+
807
+ Returns:
808
+ the actual status of the pipeline job.
809
+
810
+ Raises:
811
+ AssertionError: If the run was not executed by to this orchestrator.
812
+ ValueError: If it fetches an unknown state or if we can not fetch
813
+ the orchestrator run ID.
814
+ """
815
+ # Make sure that the stack exists and is accessible
816
+ if run.stack is None:
817
+ raise ValueError(
818
+ "The stack that the run was executed on is not available "
819
+ "anymore."
820
+ )
821
+
822
+ # Make sure that the run belongs to this orchestrator
823
+ assert (
824
+ self.id
825
+ == run.stack.components[StackComponentType.ORCHESTRATOR][0].id
826
+ )
827
+
828
+ # Initialize the Vertex client
829
+ credentials, project_id = self._get_authentication()
830
+ aiplatform.init(
831
+ project=project_id,
832
+ location=self.config.location,
833
+ credentials=credentials,
834
+ )
835
+
836
+ # Fetch the status of the PipelineJob
837
+ if METADATA_ORCHESTRATOR_RUN_ID in run.run_metadata:
838
+ run_id = run.run_metadata[METADATA_ORCHESTRATOR_RUN_ID].value
839
+ elif run.orchestrator_run_id is not None:
840
+ run_id = run.orchestrator_run_id
841
+ else:
842
+ raise ValueError(
843
+ "Can not find the orchestrator run ID, thus can not fetch "
844
+ "the status."
845
+ )
846
+ status = aiplatform.PipelineJob.get(run_id).state
847
+
848
+ # Map the potential outputs to ZenML ExecutionStatus. Potential values:
849
+ # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/describe_pipeline_execution.html#
850
+ if status in [PipelineState.PIPELINE_STATE_UNSPECIFIED]:
851
+ return run.status
852
+ elif status in [
853
+ PipelineState.PIPELINE_STATE_QUEUED,
854
+ PipelineState.PIPELINE_STATE_PENDING,
855
+ ]:
856
+ return ExecutionStatus.INITIALIZING
857
+ elif status in [
858
+ PipelineState.PIPELINE_STATE_RUNNING,
859
+ PipelineState.PIPELINE_STATE_PAUSED,
860
+ ]:
861
+ return ExecutionStatus.RUNNING
862
+ elif status in [PipelineState.PIPELINE_STATE_SUCCEEDED]:
863
+ return ExecutionStatus.COMPLETED
864
+
865
+ elif status in [
866
+ PipelineState.PIPELINE_STATE_FAILED,
867
+ PipelineState.PIPELINE_STATE_CANCELLING,
868
+ PipelineState.PIPELINE_STATE_CANCELLED,
869
+ ]:
870
+ return ExecutionStatus.FAILED
871
+ else:
872
+ raise ValueError("Unknown status for the pipeline job.")
873
+
874
+ def compute_metadata(
875
+ self, job: aiplatform.PipelineJob
876
+ ) -> Iterator[Dict[str, MetadataType]]:
877
+ """Generate run metadata based on the corresponding Vertex PipelineJob.
878
+
879
+ Args:
880
+ job: The corresponding PipelineJob object.
881
+
882
+ Yields:
883
+ A dictionary of metadata related to the pipeline run.
884
+ """
885
+ metadata: Dict[str, MetadataType] = {}
886
+
887
+ # Orchestrator Run ID
888
+ if run_id := self._compute_orchestrator_run_id(job):
889
+ metadata[METADATA_ORCHESTRATOR_RUN_ID] = run_id
890
+
891
+ # URL to the Vertex's pipeline view
892
+ if orchestrator_url := self._compute_orchestrator_url(job):
893
+ metadata[METADATA_ORCHESTRATOR_URL] = Uri(orchestrator_url)
894
+
895
+ # URL to the corresponding Logs Explorer page
896
+ if logs_url := self._compute_orchestrator_logs_url(job):
897
+ metadata[METADATA_ORCHESTRATOR_LOGS_URL] = Uri(logs_url)
898
+
899
+ yield metadata
900
+
901
+ @staticmethod
902
+ def _compute_orchestrator_url(
903
+ job: aiplatform.PipelineJob,
904
+ ) -> Optional[str]:
905
+ """Generate the Orchestrator Dashboard URL upon pipeline execution.
906
+
907
+ Args:
908
+ job: The corresponding PipelineJob object.
909
+
910
+ Returns:
911
+ the URL to the dashboard view in Vertex.
912
+ """
913
+ try:
914
+ return str(job._dashboard_uri())
915
+ except Exception as e:
916
+ logger.warning(
917
+ f"There was an issue while extracting the pipeline url: {e}"
918
+ )
919
+ return None
920
+
921
+ @staticmethod
922
+ def _compute_orchestrator_logs_url(
923
+ job: aiplatform.PipelineJob,
924
+ ) -> Optional[str]:
925
+ """Generate the Logs Explorer URL upon pipeline execution.
926
+
927
+ Args:
928
+ job: The corresponding PipelineJob object.
929
+
930
+ Returns:
931
+ the URL querying the pipeline logs in Logs Explorer on GCP.
932
+ """
933
+ try:
934
+ base_url = "https://console.cloud.google.com/logs/query"
935
+ query = f"""
936
+ resource.type="aiplatform.googleapis.com/PipelineJob"
937
+ resource.labels.pipeline_job_id="{job.job_id}"
938
+ """
939
+ encoded_query = urllib.parse.quote(query)
940
+ return f"{base_url}?project={job.project}&query={encoded_query}"
941
+
942
+ except Exception as e:
943
+ logger.warning(
944
+ f"There was an issue while extracting the logs url: {e}"
945
+ )
946
+ return None
947
+
948
+ @staticmethod
949
+ def _compute_orchestrator_run_id(
950
+ job: aiplatform.PipelineJob,
951
+ ) -> Optional[str]:
952
+ """Fetch the Orchestrator Run ID upon pipeline execution.
953
+
954
+ Args:
955
+ job: The corresponding PipelineJob object.
956
+
957
+ Returns:
958
+ the Execution ID of the run in Vertex.
959
+ """
960
+ try:
961
+ if job.job_id:
962
+ return str(job.job_id)
963
+
964
+ return None
965
+ except Exception as e:
966
+ logger.warning(
967
+ f"There was an issue while extracting the pipeline run ID: {e}"
968
+ )
969
+ return None
@@ -28,7 +28,7 @@ class LightningIntegration(Integration):
28
28
  """Definition of Lightning Integration for ZenML."""
29
29
 
30
30
  NAME = LIGHTNING
31
- REQUIREMENTS = ["lightning-sdk"]
31
+ REQUIREMENTS = ["lightning-sdk>=0.1.17"]
32
32
 
33
33
  @classmethod
34
34
  def flavors(cls) -> List[Type[Flavor]]:
@@ -85,6 +85,15 @@ class LightningOrchestratorConfig(
85
85
  """
86
86
  return self.synchronous
87
87
 
88
+ @property
89
+ def is_schedulable(self) -> bool:
90
+ """Whether the orchestrator is schedulable or not.
91
+
92
+ Returns:
93
+ Whether the orchestrator is schedulable or not.
94
+ """
95
+ return False
96
+
88
97
 
89
98
  class LightningOrchestratorFlavor(BaseOrchestratorFlavor):
90
99
  """Lightning orchestrator flavor."""
@@ -103,20 +103,29 @@ class LightningOrchestrator(WheeledOrchestrator):
103
103
 
104
104
  Args:
105
105
  deployment: The pipeline deployment to prepare or run.
106
+
107
+ Raises:
108
+ ValueError: If the user id and api key or username and organization
106
109
  """
107
110
  settings = cast(
108
111
  LightningOrchestratorSettings, self.get_settings(deployment)
109
112
  )
110
- if settings.user_id:
111
- os.environ["LIGHTNING_USER_ID"] = settings.user_id
112
- if settings.api_key:
113
- os.environ["LIGHTNING_API_KEY"] = settings.api_key
113
+ if not settings.user_id or not settings.api_key:
114
+ raise ValueError(
115
+ "Lightning orchestrator requires `user_id` and `api_key` both to be set in the settings."
116
+ )
117
+ os.environ["LIGHTNING_USER_ID"] = settings.user_id
118
+ os.environ["LIGHTNING_API_KEY"] = settings.api_key
114
119
  if settings.username:
115
120
  os.environ["LIGHTNING_USERNAME"] = settings.username
121
+ elif settings.organization:
122
+ os.environ["LIGHTNING_ORG"] = settings.organization
123
+ else:
124
+ raise ValueError(
125
+ "Lightning orchestrator requires either `username` or `organization` to be set in the settings."
126
+ )
116
127
  if settings.teamspace:
117
128
  os.environ["LIGHTNING_TEAMSPACE"] = settings.teamspace
118
- if settings.organization:
119
- os.environ["LIGHTNING_ORG"] = settings.organization
120
129
 
121
130
  @property
122
131
  def config(self) -> LightningOrchestratorConfig:
@@ -267,9 +276,7 @@ class LightningOrchestrator(WheeledOrchestrator):
267
276
  ) as code_file:
268
277
  code_archive.write_archive(code_file)
269
278
  code_path = code_file.name
270
-
271
279
  filename = f"{orchestrator_run_name}.tar.gz"
272
-
273
280
  # Construct the env variables for the pipeline
274
281
  env_vars = environment.copy()
275
282
  orchestrator_run_id = str(uuid4())
@@ -392,9 +399,7 @@ class LightningOrchestrator(WheeledOrchestrator):
392
399
  f"Installing requirements: {pipeline_requirements_to_string}"
393
400
  )
394
401
  studio.run(f"uv pip install {pipeline_requirements_to_string}")
395
- studio.run(
396
- "pip uninstall zenml -y && pip install git+https://github.com/zenml-io/zenml.git@feature/lightening-studio-orchestrator"
397
- )
402
+ studio.run("pip install zenml -y")
398
403
 
399
404
  for custom_command in settings.custom_commands or []:
400
405
  studio.run(
@@ -488,9 +493,7 @@ class LightningOrchestrator(WheeledOrchestrator):
488
493
  )
489
494
  studio.run("pip install uv")
490
495
  studio.run(f"uv pip install {requirements}")
491
- studio.run(
492
- "pip uninstall zenml -y && pip install git+https://github.com/zenml-io/zenml.git@feature/lightening-studio-orchestrator"
493
- )
496
+ studio.run("pip install zenml -y")
494
497
  # studio.run(f"pip install {wheel_path.rsplit('/', 1)[-1]}")
495
498
  for command in settings.custom_commands or []:
496
499
  output = studio.run(
@@ -563,9 +566,7 @@ class LightningOrchestrator(WheeledOrchestrator):
563
566
  )
564
567
  studio.run("pip install uv")
565
568
  studio.run(f"uv pip install {details['requirements']}")
566
- studio.run(
567
- "pip uninstall zenml -y && pip install git+https://github.com/zenml-io/zenml.git@feature/lightening-studio-orchestrator"
568
- )
569
+ studio.run("pip install zenml -y")
569
570
  # studio.run(f"pip install {wheel_path.rsplit('/', 1)[-1]}")
570
571
  for command in custom_commands or []:
571
572
  output = studio.run(
@@ -166,9 +166,7 @@ def main() -> None:
166
166
  f"uv pip install {pipeline_requirements_to_string}"
167
167
  )
168
168
  logger.info(output)
169
- output = main_studio.run(
170
- "pip uninstall zenml -y && pip install git+https://github.com/zenml-io/zenml.git@feature/lightening-studio-orchestrator"
171
- )
169
+ output = main_studio.run("pip install zenml -y")
172
170
  logger.info(output)
173
171
 
174
172
  for command in pipeline_settings.custom_commands or []:
@@ -250,9 +248,7 @@ def main() -> None:
250
248
  f"uv pip install {step_requirements_to_string}"
251
249
  )
252
250
  logger.info(output)
253
- output = studio.run(
254
- "pip uninstall zenml -y && pip install git+https://github.com/zenml-io/zenml.git@feature/lightening-studio-orchestrator"
255
- )
251
+ output = studio.run("pip install zenml -y")
256
252
  logger.info(output)
257
253
  for command in step_settings.custom_commands or []:
258
254
  output = studio.run(
@@ -146,6 +146,8 @@ def mlflow_register_model_step(
146
146
  metadata.zenml_pipeline_run_uuid = pipeline_run_uuid
147
147
  if metadata.zenml_workspace is None:
148
148
  metadata.zenml_workspace = zenml_workspace
149
+ if getattr(metadata, "mlflow_run_id", None) is None:
150
+ setattr(metadata, "mlflow_run_id", mlflow_run_id)
149
151
 
150
152
  # Register model version
151
153
  model_version = model_registry.register_model_version(
@@ -250,6 +250,7 @@ class SkypilotBaseOrchestrator(ContainerizedOrchestrator):
250
250
  entrypoint_str = " ".join(command)
251
251
  arguments_str = " ".join(args)
252
252
 
253
+ task_envs = environment
253
254
  docker_environment_str = " ".join(
254
255
  f"-e {k}={v}" for k, v in environment.items()
255
256
  )
@@ -271,13 +272,10 @@ class SkypilotBaseOrchestrator(ContainerizedOrchestrator):
271
272
  f"sudo docker login --username $DOCKER_USERNAME --password "
272
273
  f"$DOCKER_PASSWORD {stack.container_registry.config.uri}"
273
274
  )
274
- task_envs = {
275
- "DOCKER_USERNAME": docker_username,
276
- "DOCKER_PASSWORD": docker_password,
277
- }
275
+ task_envs["DOCKER_USERNAME"] = docker_username
276
+ task_envs["DOCKER_PASSWORD"] = docker_password
278
277
  else:
279
278
  setup = None
280
- task_envs = None
281
279
 
282
280
  # Run the entire pipeline
283
281
 
@@ -285,15 +283,22 @@ class SkypilotBaseOrchestrator(ContainerizedOrchestrator):
285
283
  self.prepare_environment_variable(set=True)
286
284
 
287
285
  try:
286
+ if isinstance(self.cloud, sky.clouds.Kubernetes):
287
+ run_command = f"${{VIRTUAL_ENV:+$VIRTUAL_ENV/bin/}}{entrypoint_str} {arguments_str}"
288
+ setup = None
289
+ down = False
290
+ idle_minutes_to_autostop = None
291
+ else:
292
+ run_command = f"sudo docker run --rm {custom_run_args}{docker_environment_str} {image} {entrypoint_str} {arguments_str}"
293
+ down = settings.down
294
+ idle_minutes_to_autostop = settings.idle_minutes_to_autostop
288
295
  task = sky.Task(
289
- run=f"sudo docker run --rm {custom_run_args}{docker_environment_str} {image} {entrypoint_str} {arguments_str}",
296
+ run=run_command,
290
297
  setup=setup,
291
298
  envs=task_envs,
292
299
  )
293
- logger.debug(
294
- f"Running run: sudo docker run --rm {custom_run_args}{docker_environment_str} {image} {entrypoint_str} {arguments_str}"
295
- )
296
- logger.debug(f"Running run: {setup}")
300
+ logger.debug(f"Running run: {run_command}")
301
+
297
302
  task = task.set_resources(
298
303
  sky.Resources(
299
304
  cloud=self.cloud,
@@ -306,15 +311,24 @@ class SkypilotBaseOrchestrator(ContainerizedOrchestrator):
306
311
  job_recovery=settings.job_recovery,
307
312
  region=settings.region,
308
313
  zone=settings.zone,
309
- image_id=settings.image_id,
314
+ image_id=image
315
+ if isinstance(self.cloud, sky.clouds.Kubernetes)
316
+ else settings.image_id,
310
317
  disk_size=settings.disk_size,
311
318
  disk_tier=settings.disk_tier,
312
319
  )
313
320
  )
314
-
315
321
  # Set the cluster name
316
- cluster_name = settings.cluster_name
317
- if cluster_name is None:
322
+ if settings.cluster_name:
323
+ sky.exec(
324
+ task,
325
+ settings.cluster_name,
326
+ down=down,
327
+ stream_logs=settings.stream_logs,
328
+ backend=None,
329
+ detach_run=True,
330
+ )
331
+ else:
318
332
  # Find existing cluster
319
333
  for i in sky.status(refresh=True):
320
334
  if isinstance(
@@ -324,21 +338,19 @@ class SkypilotBaseOrchestrator(ContainerizedOrchestrator):
324
338
  logger.info(
325
339
  f"Found existing cluster {cluster_name}. Reusing..."
326
340
  )
327
- if cluster_name is None:
328
341
  cluster_name = self.sanitize_cluster_name(
329
342
  f"{orchestrator_run_name}"
330
343
  )
331
-
332
- # Launch the cluster
333
- sky.launch(
334
- task,
335
- cluster_name,
336
- retry_until_up=settings.retry_until_up,
337
- idle_minutes_to_autostop=settings.idle_minutes_to_autostop,
338
- down=settings.down,
339
- stream_logs=settings.stream_logs,
340
- detach_setup=True,
341
- )
344
+ # Launch the cluster
345
+ sky.launch(
346
+ task,
347
+ cluster_name,
348
+ retry_until_up=settings.retry_until_up,
349
+ idle_minutes_to_autostop=idle_minutes_to_autostop,
350
+ down=down,
351
+ stream_logs=settings.stream_logs,
352
+ detach_setup=True,
353
+ )
342
354
 
343
355
  except Exception as e:
344
356
  logger.error(f"Pipeline run failed: {e}")
@@ -0,0 +1,52 @@
1
+ # Copyright (c) ZenML GmbH 2024. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12
+ # or implied. See the License for the specific language governing
13
+ # permissions and limitations under the License.
14
+ """Initialization of the Skypilot Kubernetes integration for ZenML.
15
+
16
+ The Skypilot integration sub-module powers an alternative to the local
17
+ orchestrator for a remote orchestration of ZenML pipelines on VMs.
18
+ """
19
+ from typing import List, Type
20
+
21
+ from zenml.integrations.constants import (
22
+ SKYPILOT_KUBERNETES,
23
+ )
24
+ from zenml.integrations.integration import Integration
25
+ from zenml.stack import Flavor
26
+
27
+ SKYPILOT_KUBERNETES_ORCHESTRATOR_FLAVOR = "vm_kubernetes"
28
+
29
+
30
+ class SkypilotKubernetesIntegration(Integration):
31
+ """Definition of Skypilot Kubernetes Integration for ZenML."""
32
+
33
+ NAME = SKYPILOT_KUBERNETES
34
+ # all 0.6.x versions of skypilot[kubernetes] are compatible
35
+ REQUIREMENTS = ["skypilot[kubernetes]~=0.6.1"]
36
+ APT_PACKAGES = ["openssh-client", "rsync"]
37
+
38
+ @classmethod
39
+ def flavors(cls) -> List[Type[Flavor]]:
40
+ """Declare the stack component flavors for the Skypilot Kubernetes integration.
41
+
42
+ Returns:
43
+ List of stack component flavors for this integration.
44
+ """
45
+ from zenml.integrations.skypilot_kubernetes.flavors import (
46
+ SkypilotKubernetesOrchestratorFlavor,
47
+ )
48
+
49
+ return [SkypilotKubernetesOrchestratorFlavor]
50
+
51
+
52
+ SkypilotKubernetesIntegration.check_installation()
@@ -0,0 +1,26 @@
1
+ # Copyright (c) ZenML GmbH 2024. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12
+ # or implied. See the License for the specific language governing
13
+ # permissions and limitations under the License.
14
+ """Skypilot integration flavor for Skypilot Kubernetes orchestrator."""
15
+
16
+ from zenml.integrations.skypilot_kubernetes.flavors.skypilot_orchestrator_kubernetes_vm_flavor import (
17
+ SkypilotKubernetesOrchestratorConfig,
18
+ SkypilotKubernetesOrchestratorFlavor,
19
+ SkypilotKubernetesOrchestratorSettings,
20
+ )
21
+
22
+ __all__ = [
23
+ "SkypilotKubernetesOrchestratorConfig",
24
+ "SkypilotKubernetesOrchestratorFlavor",
25
+ "SkypilotKubernetesOrchestratorSettings",
26
+ ]