zenml-nightly 0.66.0.dev20240923__py3-none-any.whl → 0.66.0.dev20240925__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zenml/VERSION +1 -1
- zenml/cli/__init__.py +7 -0
- zenml/cli/base.py +2 -2
- zenml/cli/pipeline.py +21 -0
- zenml/cli/utils.py +14 -11
- zenml/client.py +68 -3
- zenml/config/step_configurations.py +0 -5
- zenml/constants.py +3 -0
- zenml/enums.py +2 -0
- zenml/integrations/aws/flavors/sagemaker_orchestrator_flavor.py +76 -7
- zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py +370 -115
- zenml/integrations/azure/orchestrators/azureml_orchestrator.py +157 -4
- zenml/integrations/gcp/orchestrators/vertex_orchestrator.py +207 -18
- zenml/integrations/lightning/__init__.py +1 -1
- zenml/integrations/lightning/flavors/lightning_orchestrator_flavor.py +9 -0
- zenml/integrations/lightning/orchestrators/lightning_orchestrator.py +18 -17
- zenml/integrations/lightning/orchestrators/lightning_orchestrator_entrypoint.py +2 -6
- zenml/integrations/mlflow/steps/mlflow_registry.py +2 -0
- zenml/integrations/tensorboard/visualizers/tensorboard_visualizer.py +1 -1
- zenml/models/v2/base/filter.py +315 -149
- zenml/models/v2/base/scoped.py +5 -2
- zenml/models/v2/core/artifact_version.py +69 -8
- zenml/models/v2/core/model.py +43 -6
- zenml/models/v2/core/model_version.py +49 -1
- zenml/models/v2/core/model_version_artifact.py +18 -3
- zenml/models/v2/core/model_version_pipeline_run.py +18 -4
- zenml/models/v2/core/pipeline.py +108 -1
- zenml/models/v2/core/pipeline_run.py +172 -21
- zenml/models/v2/core/run_template.py +53 -1
- zenml/models/v2/core/stack.py +33 -5
- zenml/models/v2/core/step_run.py +7 -0
- zenml/new/pipelines/pipeline.py +4 -0
- zenml/new/pipelines/run_utils.py +4 -1
- zenml/orchestrators/base_orchestrator.py +41 -12
- zenml/stack/stack.py +11 -2
- zenml/utils/env_utils.py +54 -1
- zenml/utils/string_utils.py +50 -0
- zenml/zen_server/cloud_utils.py +33 -8
- zenml/zen_server/routers/runs_endpoints.py +89 -3
- zenml/zen_stores/sql_zen_store.py +1 -0
- {zenml_nightly-0.66.0.dev20240923.dist-info → zenml_nightly-0.66.0.dev20240925.dist-info}/METADATA +8 -1
- {zenml_nightly-0.66.0.dev20240923.dist-info → zenml_nightly-0.66.0.dev20240925.dist-info}/RECORD +45 -45
- {zenml_nightly-0.66.0.dev20240923.dist-info → zenml_nightly-0.66.0.dev20240925.dist-info}/LICENSE +0 -0
- {zenml_nightly-0.66.0.dev20240923.dist-info → zenml_nightly-0.66.0.dev20240925.dist-info}/WHEEL +0 -0
- {zenml_nightly-0.66.0.dev20240923.dist-info → zenml_nightly-0.66.0.dev20240925.dist-info}/entry_points.txt +0 -0
@@ -19,6 +19,7 @@ from typing import (
|
|
19
19
|
TYPE_CHECKING,
|
20
20
|
Any,
|
21
21
|
Dict,
|
22
|
+
Iterator,
|
22
23
|
List,
|
23
24
|
Optional,
|
24
25
|
Tuple,
|
@@ -46,8 +47,11 @@ from azure.identity import DefaultAzureCredential
|
|
46
47
|
|
47
48
|
from zenml.config.base_settings import BaseSettings
|
48
49
|
from zenml.config.step_configurations import Step
|
49
|
-
from zenml.constants import
|
50
|
-
|
50
|
+
from zenml.constants import (
|
51
|
+
METADATA_ORCHESTRATOR_RUN_ID,
|
52
|
+
METADATA_ORCHESTRATOR_URL,
|
53
|
+
)
|
54
|
+
from zenml.enums import ExecutionStatus, StackComponentType
|
51
55
|
from zenml.integrations.azure.azureml_utils import create_or_get_compute
|
52
56
|
from zenml.integrations.azure.flavors.azureml import AzureMLComputeTypes
|
53
57
|
from zenml.integrations.azure.flavors.azureml_orchestrator_flavor import (
|
@@ -65,7 +69,7 @@ from zenml.stack import StackValidator
|
|
65
69
|
from zenml.utils.string_utils import b64_encode
|
66
70
|
|
67
71
|
if TYPE_CHECKING:
|
68
|
-
from zenml.models import PipelineDeploymentResponse
|
72
|
+
from zenml.models import PipelineDeploymentResponse, PipelineRunResponse
|
69
73
|
from zenml.stack import Stack
|
70
74
|
|
71
75
|
logger = get_logger(__name__)
|
@@ -199,7 +203,7 @@ class AzureMLOrchestrator(ContainerizedOrchestrator):
|
|
199
203
|
deployment: "PipelineDeploymentResponse",
|
200
204
|
stack: "Stack",
|
201
205
|
environment: Dict[str, str],
|
202
|
-
) ->
|
206
|
+
) -> Iterator[Dict[str, MetadataType]]:
|
203
207
|
"""Prepares or runs a pipeline on AzureML.
|
204
208
|
|
205
209
|
Args:
|
@@ -210,6 +214,9 @@ class AzureMLOrchestrator(ContainerizedOrchestrator):
|
|
210
214
|
|
211
215
|
Raises:
|
212
216
|
RuntimeError: If the creation of the schedule fails.
|
217
|
+
|
218
|
+
Yields:
|
219
|
+
A dictionary of metadata related to the pipeline run.
|
213
220
|
"""
|
214
221
|
# Authentication
|
215
222
|
if connector := self.get_connector():
|
@@ -379,6 +386,10 @@ class AzureMLOrchestrator(ContainerizedOrchestrator):
|
|
379
386
|
else:
|
380
387
|
job = ml_client.jobs.create_or_update(pipeline_job)
|
381
388
|
logger.info(f"Pipeline {run_name} has been started.")
|
389
|
+
|
390
|
+
# Yield metadata based on the generated job object
|
391
|
+
yield from self.compute_metadata(job)
|
392
|
+
|
382
393
|
assert job.services is not None
|
383
394
|
assert job.name is not None
|
384
395
|
|
@@ -428,3 +439,145 @@ class AzureMLOrchestrator(ContainerizedOrchestrator):
|
|
428
439
|
f"job: {e}"
|
429
440
|
)
|
430
441
|
return {}
|
442
|
+
|
443
|
+
def fetch_status(self, run: "PipelineRunResponse") -> ExecutionStatus:
|
444
|
+
"""Refreshes the status of a specific pipeline run.
|
445
|
+
|
446
|
+
Args:
|
447
|
+
run: The run that was executed by this orchestrator.
|
448
|
+
|
449
|
+
Returns:
|
450
|
+
the actual status of the pipeline execution.
|
451
|
+
|
452
|
+
Raises:
|
453
|
+
AssertionError: If the run was not executed by to this orchestrator.
|
454
|
+
ValueError: If it fetches an unknown state or if we can not fetch
|
455
|
+
the orchestrator run ID.
|
456
|
+
"""
|
457
|
+
# Make sure that the stack exists and is accessible
|
458
|
+
if run.stack is None:
|
459
|
+
raise ValueError(
|
460
|
+
"The stack that the run was executed on is not available "
|
461
|
+
"anymore."
|
462
|
+
)
|
463
|
+
|
464
|
+
# Make sure that the run belongs to this orchestrator
|
465
|
+
assert (
|
466
|
+
self.id
|
467
|
+
== run.stack.components[StackComponentType.ORCHESTRATOR][0].id
|
468
|
+
)
|
469
|
+
|
470
|
+
# Initialize the AzureML client
|
471
|
+
if connector := self.get_connector():
|
472
|
+
credentials = connector.connect()
|
473
|
+
else:
|
474
|
+
credentials = DefaultAzureCredential()
|
475
|
+
|
476
|
+
ml_client = MLClient(
|
477
|
+
credential=credentials,
|
478
|
+
subscription_id=self.config.subscription_id,
|
479
|
+
resource_group_name=self.config.resource_group,
|
480
|
+
workspace_name=self.config.workspace,
|
481
|
+
)
|
482
|
+
|
483
|
+
# Fetch the status of the PipelineJob
|
484
|
+
if METADATA_ORCHESTRATOR_RUN_ID in run.run_metadata:
|
485
|
+
run_id = run.run_metadata[METADATA_ORCHESTRATOR_RUN_ID].value
|
486
|
+
elif run.orchestrator_run_id is not None:
|
487
|
+
run_id = run.orchestrator_run_id
|
488
|
+
else:
|
489
|
+
raise ValueError(
|
490
|
+
"Can not find the orchestrator run ID, thus can not fetch "
|
491
|
+
"the status."
|
492
|
+
)
|
493
|
+
status = ml_client.jobs.get(run_id).status
|
494
|
+
|
495
|
+
# Map the potential outputs to ZenML ExecutionStatus. Potential values:
|
496
|
+
# https://learn.microsoft.com/en-us/python/api/azure-ai-ml/azure.ai.ml.entities.pipelinejob?view=azure-python#azure-ai-ml-entities-pipelinejob-status
|
497
|
+
if status in [
|
498
|
+
"NotStarted",
|
499
|
+
"Starting",
|
500
|
+
"Provisioning",
|
501
|
+
"Preparing",
|
502
|
+
"Queued",
|
503
|
+
]:
|
504
|
+
return ExecutionStatus.INITIALIZING
|
505
|
+
elif status in ["Running", "Finalizing"]:
|
506
|
+
return ExecutionStatus.RUNNING
|
507
|
+
elif status in [
|
508
|
+
"CancelRequested",
|
509
|
+
"Failed",
|
510
|
+
"Canceled",
|
511
|
+
"NotResponding",
|
512
|
+
]:
|
513
|
+
return ExecutionStatus.FAILED
|
514
|
+
elif status in ["Completed"]:
|
515
|
+
return ExecutionStatus.COMPLETED
|
516
|
+
else:
|
517
|
+
raise ValueError("Unknown status for the pipeline job.")
|
518
|
+
|
519
|
+
def compute_metadata(self, job: Any) -> Iterator[Dict[str, MetadataType]]:
|
520
|
+
"""Generate run metadata based on the generated AzureML PipelineJob.
|
521
|
+
|
522
|
+
Args:
|
523
|
+
job: The corresponding PipelineJob object.
|
524
|
+
|
525
|
+
Yields:
|
526
|
+
A dictionary of metadata related to the pipeline run.
|
527
|
+
"""
|
528
|
+
# Metadata
|
529
|
+
metadata: Dict[str, MetadataType] = {}
|
530
|
+
|
531
|
+
# Orchestrator Run ID
|
532
|
+
if run_id := self._compute_orchestrator_run_id(job):
|
533
|
+
metadata[METADATA_ORCHESTRATOR_RUN_ID] = run_id
|
534
|
+
|
535
|
+
# URL to the AzureML's pipeline view
|
536
|
+
if orchestrator_url := self._compute_orchestrator_url(job):
|
537
|
+
metadata[METADATA_ORCHESTRATOR_URL] = Uri(orchestrator_url)
|
538
|
+
|
539
|
+
yield metadata
|
540
|
+
|
541
|
+
@staticmethod
|
542
|
+
def _compute_orchestrator_url(job: Any) -> Optional[str]:
|
543
|
+
"""Generate the Orchestrator Dashboard URL upon pipeline execution.
|
544
|
+
|
545
|
+
Args:
|
546
|
+
job: The corresponding PipelineJob object.
|
547
|
+
|
548
|
+
Returns:
|
549
|
+
the URL to the dashboard view in AzureML.
|
550
|
+
"""
|
551
|
+
try:
|
552
|
+
if job.studio_url:
|
553
|
+
return str(job.studio_url)
|
554
|
+
|
555
|
+
return None
|
556
|
+
|
557
|
+
except Exception as e:
|
558
|
+
logger.warning(
|
559
|
+
f"There was an issue while extracting the pipeline url: {e}"
|
560
|
+
)
|
561
|
+
return None
|
562
|
+
|
563
|
+
@staticmethod
|
564
|
+
def _compute_orchestrator_run_id(job: Any) -> Optional[str]:
|
565
|
+
"""Generate the Orchestrator Dashboard URL upon pipeline execution.
|
566
|
+
|
567
|
+
Args:
|
568
|
+
job: The corresponding PipelineJob object.
|
569
|
+
|
570
|
+
Returns:
|
571
|
+
the URL to the dashboard view in AzureML.
|
572
|
+
"""
|
573
|
+
try:
|
574
|
+
if job.name:
|
575
|
+
return str(job.name)
|
576
|
+
|
577
|
+
return None
|
578
|
+
|
579
|
+
except Exception as e:
|
580
|
+
logger.warning(
|
581
|
+
f"There was an issue while extracting the pipeline run ID: {e}"
|
582
|
+
)
|
583
|
+
return None
|
@@ -32,10 +32,12 @@
|
|
32
32
|
import os
|
33
33
|
import re
|
34
34
|
import types
|
35
|
+
import urllib
|
35
36
|
from typing import (
|
36
37
|
TYPE_CHECKING,
|
37
38
|
Any,
|
38
39
|
Dict,
|
40
|
+
Iterator,
|
39
41
|
List,
|
40
42
|
Optional,
|
41
43
|
Tuple,
|
@@ -46,15 +48,18 @@ from uuid import UUID
|
|
46
48
|
|
47
49
|
from google.api_core import exceptions as google_exceptions
|
48
50
|
from google.cloud import aiplatform
|
51
|
+
from google.cloud.aiplatform_v1.types import PipelineState
|
49
52
|
from kfp import dsl
|
50
53
|
from kfp.compiler import Compiler
|
51
54
|
|
52
55
|
from zenml.config.resource_settings import ResourceSettings
|
53
56
|
from zenml.constants import (
|
57
|
+
METADATA_ORCHESTRATOR_LOGS_URL,
|
58
|
+
METADATA_ORCHESTRATOR_RUN_ID,
|
54
59
|
METADATA_ORCHESTRATOR_URL,
|
55
60
|
)
|
56
61
|
from zenml.entrypoints import StepEntrypointConfiguration
|
57
|
-
from zenml.enums import StackComponentType
|
62
|
+
from zenml.enums import ExecutionStatus, StackComponentType
|
58
63
|
from zenml.integrations.gcp import GCP_ARTIFACT_STORE_FLAVOR
|
59
64
|
from zenml.integrations.gcp.constants import (
|
60
65
|
GKE_ACCELERATOR_NODE_SELECTOR_CONSTRAINT_LABEL,
|
@@ -77,7 +82,11 @@ from zenml.utils.io_utils import get_global_config_directory
|
|
77
82
|
|
78
83
|
if TYPE_CHECKING:
|
79
84
|
from zenml.config.base_settings import BaseSettings
|
80
|
-
from zenml.models import
|
85
|
+
from zenml.models import (
|
86
|
+
PipelineDeploymentResponse,
|
87
|
+
PipelineRunResponse,
|
88
|
+
ScheduleResponse,
|
89
|
+
)
|
81
90
|
from zenml.stack import Stack
|
82
91
|
|
83
92
|
logger = get_logger(__name__)
|
@@ -245,8 +254,8 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
245
254
|
):
|
246
255
|
logger.warning(
|
247
256
|
"Vertex orchestrator only uses schedules with the "
|
248
|
-
"`cron_expression` property, with optional `start_time`
|
249
|
-
"All other properties are ignored."
|
257
|
+
"`cron_expression` property, with optional `start_time` "
|
258
|
+
"and/or `end_time`. All other properties are ignored."
|
250
259
|
)
|
251
260
|
if deployment.schedule.cron_expression is None:
|
252
261
|
raise ValueError(
|
@@ -302,7 +311,7 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
302
311
|
deployment: "PipelineDeploymentResponse",
|
303
312
|
stack: "Stack",
|
304
313
|
environment: Dict[str, str],
|
305
|
-
) ->
|
314
|
+
) -> Iterator[Dict[str, MetadataType]]:
|
306
315
|
"""Creates a KFP JSON pipeline.
|
307
316
|
|
308
317
|
# noqa: DAR402
|
@@ -337,12 +346,15 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
337
346
|
environment.
|
338
347
|
|
339
348
|
Raises:
|
340
|
-
ValueError: If the attribute `pipeline_root` is not set and it
|
349
|
+
ValueError: If the attribute `pipeline_root` is not set, and it
|
341
350
|
can be not generated using the path of the artifact store in the
|
342
351
|
stack because it is not a
|
343
352
|
`zenml.integrations.gcp.artifact_store.GCPArtifactStore`. Also gets
|
344
353
|
raised if attempting to schedule pipeline run without using the
|
345
354
|
`zenml.integrations.gcp.artifact_store.GCPArtifactStore`.
|
355
|
+
|
356
|
+
Yields:
|
357
|
+
A dictionary of metadata related to the pipeline run.
|
346
358
|
"""
|
347
359
|
orchestrator_run_name = get_orchestrator_run_name(
|
348
360
|
pipeline_name=deployment.pipeline_configuration.name
|
@@ -556,15 +568,15 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
556
568
|
)
|
557
569
|
|
558
570
|
# Using the Google Cloud AIPlatform client, upload and execute the
|
559
|
-
# pipeline
|
560
|
-
|
561
|
-
self._upload_and_run_pipeline(
|
571
|
+
# pipeline on the Vertex AI Pipelines service.
|
572
|
+
if metadata := self._upload_and_run_pipeline(
|
562
573
|
pipeline_name=deployment.pipeline_configuration.name,
|
563
574
|
pipeline_file_path=pipeline_file_path,
|
564
575
|
run_name=orchestrator_run_name,
|
565
576
|
settings=settings,
|
566
577
|
schedule=deployment.schedule,
|
567
|
-
)
|
578
|
+
):
|
579
|
+
yield from metadata
|
568
580
|
|
569
581
|
def _upload_and_run_pipeline(
|
570
582
|
self,
|
@@ -573,7 +585,7 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
573
585
|
run_name: str,
|
574
586
|
settings: VertexOrchestratorSettings,
|
575
587
|
schedule: Optional["ScheduleResponse"] = None,
|
576
|
-
) ->
|
588
|
+
) -> Iterator[Dict[str, MetadataType]]:
|
577
589
|
"""Uploads and run the pipeline on the Vertex AI Pipelines service.
|
578
590
|
|
579
591
|
Args:
|
@@ -585,7 +597,11 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
585
597
|
schedule: The schedule the pipeline will run on.
|
586
598
|
|
587
599
|
Raises:
|
588
|
-
RuntimeError: If the Vertex Orchestrator fails to provision or any
|
600
|
+
RuntimeError: If the Vertex Orchestrator fails to provision or any
|
601
|
+
other Runtime errors.
|
602
|
+
|
603
|
+
Yields:
|
604
|
+
A dictionary of metadata related to the pipeline run.
|
589
605
|
"""
|
590
606
|
# We have to replace the hyphens in the run name with underscores
|
591
607
|
# and lower case the string, because the Vertex AI Pipelines service
|
@@ -593,8 +609,7 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
593
609
|
job_id = _clean_pipeline_name(run_name)
|
594
610
|
|
595
611
|
# Get the credentials that would be used to create the Vertex AI
|
596
|
-
# Pipelines
|
597
|
-
# job.
|
612
|
+
# Pipelines job.
|
598
613
|
credentials, project_id = self._get_authentication()
|
599
614
|
|
600
615
|
# Instantiate the Vertex AI Pipelines job
|
@@ -629,7 +644,8 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
629
644
|
try:
|
630
645
|
if schedule:
|
631
646
|
logger.info(
|
632
|
-
"Scheduling job using native Vertex AI Pipelines
|
647
|
+
"Scheduling job using native Vertex AI Pipelines "
|
648
|
+
"scheduling..."
|
633
649
|
)
|
634
650
|
run.create_schedule(
|
635
651
|
display_name=schedule.name,
|
@@ -645,13 +661,12 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
645
661
|
"No schedule detected. Creating one-off Vertex job..."
|
646
662
|
)
|
647
663
|
logger.info(
|
648
|
-
"Submitting pipeline job with job_id `%s` to Vertex AI
|
649
|
-
"service.",
|
664
|
+
"Submitting pipeline job with job_id `%s` to Vertex AI "
|
665
|
+
"Pipelines service.",
|
650
666
|
job_id,
|
651
667
|
)
|
652
668
|
|
653
669
|
# Submit the job to Vertex AI Pipelines service.
|
654
|
-
|
655
670
|
run.submit(
|
656
671
|
service_account=self.config.workload_service_account,
|
657
672
|
network=self.config.network,
|
@@ -661,6 +676,9 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
661
676
|
run._dashboard_uri(),
|
662
677
|
)
|
663
678
|
|
679
|
+
# Yield metadata based on the generated job object
|
680
|
+
yield from self.compute_metadata(run)
|
681
|
+
|
664
682
|
if settings.synchronous:
|
665
683
|
logger.info(
|
666
684
|
"Waiting for the Vertex AI Pipelines job to finish..."
|
@@ -738,6 +756,7 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
738
756
|
The dynamic component with the resource settings applied.
|
739
757
|
"""
|
740
758
|
# Set optional CPU, RAM and GPU constraints for the pipeline
|
759
|
+
cpu_limit = None
|
741
760
|
if resource_settings:
|
742
761
|
cpu_limit = resource_settings.cpu_count or self.config.cpu_limit
|
743
762
|
|
@@ -778,3 +797,173 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
|
|
778
797
|
)
|
779
798
|
|
780
799
|
return dynamic_component
|
800
|
+
|
801
|
+
def fetch_status(self, run: "PipelineRunResponse") -> ExecutionStatus:
|
802
|
+
"""Refreshes the status of a specific pipeline run.
|
803
|
+
|
804
|
+
Args:
|
805
|
+
run: The run that was executed by this orchestrator.
|
806
|
+
|
807
|
+
Returns:
|
808
|
+
the actual status of the pipeline job.
|
809
|
+
|
810
|
+
Raises:
|
811
|
+
AssertionError: If the run was not executed by to this orchestrator.
|
812
|
+
ValueError: If it fetches an unknown state or if we can not fetch
|
813
|
+
the orchestrator run ID.
|
814
|
+
"""
|
815
|
+
# Make sure that the stack exists and is accessible
|
816
|
+
if run.stack is None:
|
817
|
+
raise ValueError(
|
818
|
+
"The stack that the run was executed on is not available "
|
819
|
+
"anymore."
|
820
|
+
)
|
821
|
+
|
822
|
+
# Make sure that the run belongs to this orchestrator
|
823
|
+
assert (
|
824
|
+
self.id
|
825
|
+
== run.stack.components[StackComponentType.ORCHESTRATOR][0].id
|
826
|
+
)
|
827
|
+
|
828
|
+
# Initialize the Vertex client
|
829
|
+
credentials, project_id = self._get_authentication()
|
830
|
+
aiplatform.init(
|
831
|
+
project=project_id,
|
832
|
+
location=self.config.location,
|
833
|
+
credentials=credentials,
|
834
|
+
)
|
835
|
+
|
836
|
+
# Fetch the status of the PipelineJob
|
837
|
+
if METADATA_ORCHESTRATOR_RUN_ID in run.run_metadata:
|
838
|
+
run_id = run.run_metadata[METADATA_ORCHESTRATOR_RUN_ID].value
|
839
|
+
elif run.orchestrator_run_id is not None:
|
840
|
+
run_id = run.orchestrator_run_id
|
841
|
+
else:
|
842
|
+
raise ValueError(
|
843
|
+
"Can not find the orchestrator run ID, thus can not fetch "
|
844
|
+
"the status."
|
845
|
+
)
|
846
|
+
status = aiplatform.PipelineJob.get(run_id).state
|
847
|
+
|
848
|
+
# Map the potential outputs to ZenML ExecutionStatus. Potential values:
|
849
|
+
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/describe_pipeline_execution.html#
|
850
|
+
if status in [PipelineState.PIPELINE_STATE_UNSPECIFIED]:
|
851
|
+
return run.status
|
852
|
+
elif status in [
|
853
|
+
PipelineState.PIPELINE_STATE_QUEUED,
|
854
|
+
PipelineState.PIPELINE_STATE_PENDING,
|
855
|
+
]:
|
856
|
+
return ExecutionStatus.INITIALIZING
|
857
|
+
elif status in [
|
858
|
+
PipelineState.PIPELINE_STATE_RUNNING,
|
859
|
+
PipelineState.PIPELINE_STATE_PAUSED,
|
860
|
+
]:
|
861
|
+
return ExecutionStatus.RUNNING
|
862
|
+
elif status in [PipelineState.PIPELINE_STATE_SUCCEEDED]:
|
863
|
+
return ExecutionStatus.COMPLETED
|
864
|
+
|
865
|
+
elif status in [
|
866
|
+
PipelineState.PIPELINE_STATE_FAILED,
|
867
|
+
PipelineState.PIPELINE_STATE_CANCELLING,
|
868
|
+
PipelineState.PIPELINE_STATE_CANCELLED,
|
869
|
+
]:
|
870
|
+
return ExecutionStatus.FAILED
|
871
|
+
else:
|
872
|
+
raise ValueError("Unknown status for the pipeline job.")
|
873
|
+
|
874
|
+
def compute_metadata(
|
875
|
+
self, job: aiplatform.PipelineJob
|
876
|
+
) -> Iterator[Dict[str, MetadataType]]:
|
877
|
+
"""Generate run metadata based on the corresponding Vertex PipelineJob.
|
878
|
+
|
879
|
+
Args:
|
880
|
+
job: The corresponding PipelineJob object.
|
881
|
+
|
882
|
+
Yields:
|
883
|
+
A dictionary of metadata related to the pipeline run.
|
884
|
+
"""
|
885
|
+
metadata: Dict[str, MetadataType] = {}
|
886
|
+
|
887
|
+
# Orchestrator Run ID
|
888
|
+
if run_id := self._compute_orchestrator_run_id(job):
|
889
|
+
metadata[METADATA_ORCHESTRATOR_RUN_ID] = run_id
|
890
|
+
|
891
|
+
# URL to the Vertex's pipeline view
|
892
|
+
if orchestrator_url := self._compute_orchestrator_url(job):
|
893
|
+
metadata[METADATA_ORCHESTRATOR_URL] = Uri(orchestrator_url)
|
894
|
+
|
895
|
+
# URL to the corresponding Logs Explorer page
|
896
|
+
if logs_url := self._compute_orchestrator_logs_url(job):
|
897
|
+
metadata[METADATA_ORCHESTRATOR_LOGS_URL] = Uri(logs_url)
|
898
|
+
|
899
|
+
yield metadata
|
900
|
+
|
901
|
+
@staticmethod
|
902
|
+
def _compute_orchestrator_url(
|
903
|
+
job: aiplatform.PipelineJob,
|
904
|
+
) -> Optional[str]:
|
905
|
+
"""Generate the Orchestrator Dashboard URL upon pipeline execution.
|
906
|
+
|
907
|
+
Args:
|
908
|
+
job: The corresponding PipelineJob object.
|
909
|
+
|
910
|
+
Returns:
|
911
|
+
the URL to the dashboard view in Vertex.
|
912
|
+
"""
|
913
|
+
try:
|
914
|
+
return str(job._dashboard_uri())
|
915
|
+
except Exception as e:
|
916
|
+
logger.warning(
|
917
|
+
f"There was an issue while extracting the pipeline url: {e}"
|
918
|
+
)
|
919
|
+
return None
|
920
|
+
|
921
|
+
@staticmethod
|
922
|
+
def _compute_orchestrator_logs_url(
|
923
|
+
job: aiplatform.PipelineJob,
|
924
|
+
) -> Optional[str]:
|
925
|
+
"""Generate the Logs Explorer URL upon pipeline execution.
|
926
|
+
|
927
|
+
Args:
|
928
|
+
job: The corresponding PipelineJob object.
|
929
|
+
|
930
|
+
Returns:
|
931
|
+
the URL querying the pipeline logs in Logs Explorer on GCP.
|
932
|
+
"""
|
933
|
+
try:
|
934
|
+
base_url = "https://console.cloud.google.com/logs/query"
|
935
|
+
query = f"""
|
936
|
+
resource.type="aiplatform.googleapis.com/PipelineJob"
|
937
|
+
resource.labels.pipeline_job_id="{job.job_id}"
|
938
|
+
"""
|
939
|
+
encoded_query = urllib.parse.quote(query)
|
940
|
+
return f"{base_url}?project={job.project}&query={encoded_query}"
|
941
|
+
|
942
|
+
except Exception as e:
|
943
|
+
logger.warning(
|
944
|
+
f"There was an issue while extracting the logs url: {e}"
|
945
|
+
)
|
946
|
+
return None
|
947
|
+
|
948
|
+
@staticmethod
|
949
|
+
def _compute_orchestrator_run_id(
|
950
|
+
job: aiplatform.PipelineJob,
|
951
|
+
) -> Optional[str]:
|
952
|
+
"""Fetch the Orchestrator Run ID upon pipeline execution.
|
953
|
+
|
954
|
+
Args:
|
955
|
+
job: The corresponding PipelineJob object.
|
956
|
+
|
957
|
+
Returns:
|
958
|
+
the Execution ID of the run in Vertex.
|
959
|
+
"""
|
960
|
+
try:
|
961
|
+
if job.job_id:
|
962
|
+
return str(job.job_id)
|
963
|
+
|
964
|
+
return None
|
965
|
+
except Exception as e:
|
966
|
+
logger.warning(
|
967
|
+
f"There was an issue while extracting the pipeline run ID: {e}"
|
968
|
+
)
|
969
|
+
return None
|
@@ -28,7 +28,7 @@ class LightningIntegration(Integration):
|
|
28
28
|
"""Definition of Lightning Integration for ZenML."""
|
29
29
|
|
30
30
|
NAME = LIGHTNING
|
31
|
-
REQUIREMENTS = ["lightning-sdk"]
|
31
|
+
REQUIREMENTS = ["lightning-sdk>=0.1.17"]
|
32
32
|
|
33
33
|
@classmethod
|
34
34
|
def flavors(cls) -> List[Type[Flavor]]:
|
@@ -85,6 +85,15 @@ class LightningOrchestratorConfig(
|
|
85
85
|
"""
|
86
86
|
return self.synchronous
|
87
87
|
|
88
|
+
@property
|
89
|
+
def is_schedulable(self) -> bool:
|
90
|
+
"""Whether the orchestrator is schedulable or not.
|
91
|
+
|
92
|
+
Returns:
|
93
|
+
Whether the orchestrator is schedulable or not.
|
94
|
+
"""
|
95
|
+
return False
|
96
|
+
|
88
97
|
|
89
98
|
class LightningOrchestratorFlavor(BaseOrchestratorFlavor):
|
90
99
|
"""Lightning orchestrator flavor."""
|
@@ -103,20 +103,29 @@ class LightningOrchestrator(WheeledOrchestrator):
|
|
103
103
|
|
104
104
|
Args:
|
105
105
|
deployment: The pipeline deployment to prepare or run.
|
106
|
+
|
107
|
+
Raises:
|
108
|
+
ValueError: If the user id and api key or username and organization
|
106
109
|
"""
|
107
110
|
settings = cast(
|
108
111
|
LightningOrchestratorSettings, self.get_settings(deployment)
|
109
112
|
)
|
110
|
-
if settings.user_id:
|
111
|
-
|
112
|
-
|
113
|
-
|
113
|
+
if not settings.user_id or not settings.api_key:
|
114
|
+
raise ValueError(
|
115
|
+
"Lightning orchestrator requires `user_id` and `api_key` both to be set in the settings."
|
116
|
+
)
|
117
|
+
os.environ["LIGHTNING_USER_ID"] = settings.user_id
|
118
|
+
os.environ["LIGHTNING_API_KEY"] = settings.api_key
|
114
119
|
if settings.username:
|
115
120
|
os.environ["LIGHTNING_USERNAME"] = settings.username
|
121
|
+
elif settings.organization:
|
122
|
+
os.environ["LIGHTNING_ORG"] = settings.organization
|
123
|
+
else:
|
124
|
+
raise ValueError(
|
125
|
+
"Lightning orchestrator requires either `username` or `organization` to be set in the settings."
|
126
|
+
)
|
116
127
|
if settings.teamspace:
|
117
128
|
os.environ["LIGHTNING_TEAMSPACE"] = settings.teamspace
|
118
|
-
if settings.organization:
|
119
|
-
os.environ["LIGHTNING_ORG"] = settings.organization
|
120
129
|
|
121
130
|
@property
|
122
131
|
def config(self) -> LightningOrchestratorConfig:
|
@@ -267,9 +276,7 @@ class LightningOrchestrator(WheeledOrchestrator):
|
|
267
276
|
) as code_file:
|
268
277
|
code_archive.write_archive(code_file)
|
269
278
|
code_path = code_file.name
|
270
|
-
|
271
279
|
filename = f"{orchestrator_run_name}.tar.gz"
|
272
|
-
|
273
280
|
# Construct the env variables for the pipeline
|
274
281
|
env_vars = environment.copy()
|
275
282
|
orchestrator_run_id = str(uuid4())
|
@@ -392,9 +399,7 @@ class LightningOrchestrator(WheeledOrchestrator):
|
|
392
399
|
f"Installing requirements: {pipeline_requirements_to_string}"
|
393
400
|
)
|
394
401
|
studio.run(f"uv pip install {pipeline_requirements_to_string}")
|
395
|
-
studio.run(
|
396
|
-
"pip uninstall zenml -y && pip install git+https://github.com/zenml-io/zenml.git@feature/lightening-studio-orchestrator"
|
397
|
-
)
|
402
|
+
studio.run("pip install zenml -y")
|
398
403
|
|
399
404
|
for custom_command in settings.custom_commands or []:
|
400
405
|
studio.run(
|
@@ -488,9 +493,7 @@ class LightningOrchestrator(WheeledOrchestrator):
|
|
488
493
|
)
|
489
494
|
studio.run("pip install uv")
|
490
495
|
studio.run(f"uv pip install {requirements}")
|
491
|
-
studio.run(
|
492
|
-
"pip uninstall zenml -y && pip install git+https://github.com/zenml-io/zenml.git@feature/lightening-studio-orchestrator"
|
493
|
-
)
|
496
|
+
studio.run("pip install zenml -y")
|
494
497
|
# studio.run(f"pip install {wheel_path.rsplit('/', 1)[-1]}")
|
495
498
|
for command in settings.custom_commands or []:
|
496
499
|
output = studio.run(
|
@@ -563,9 +566,7 @@ class LightningOrchestrator(WheeledOrchestrator):
|
|
563
566
|
)
|
564
567
|
studio.run("pip install uv")
|
565
568
|
studio.run(f"uv pip install {details['requirements']}")
|
566
|
-
studio.run(
|
567
|
-
"pip uninstall zenml -y && pip install git+https://github.com/zenml-io/zenml.git@feature/lightening-studio-orchestrator"
|
568
|
-
)
|
569
|
+
studio.run("pip install zenml -y")
|
569
570
|
# studio.run(f"pip install {wheel_path.rsplit('/', 1)[-1]}")
|
570
571
|
for command in custom_commands or []:
|
571
572
|
output = studio.run(
|
@@ -166,9 +166,7 @@ def main() -> None:
|
|
166
166
|
f"uv pip install {pipeline_requirements_to_string}"
|
167
167
|
)
|
168
168
|
logger.info(output)
|
169
|
-
output = main_studio.run(
|
170
|
-
"pip uninstall zenml -y && pip install git+https://github.com/zenml-io/zenml.git@feature/lightening-studio-orchestrator"
|
171
|
-
)
|
169
|
+
output = main_studio.run("pip install zenml -y")
|
172
170
|
logger.info(output)
|
173
171
|
|
174
172
|
for command in pipeline_settings.custom_commands or []:
|
@@ -250,9 +248,7 @@ def main() -> None:
|
|
250
248
|
f"uv pip install {step_requirements_to_string}"
|
251
249
|
)
|
252
250
|
logger.info(output)
|
253
|
-
output = studio.run(
|
254
|
-
"pip uninstall zenml -y && pip install git+https://github.com/zenml-io/zenml.git@feature/lightening-studio-orchestrator"
|
255
|
-
)
|
251
|
+
output = studio.run("pip install zenml -y")
|
256
252
|
logger.info(output)
|
257
253
|
for command in step_settings.custom_commands or []:
|
258
254
|
output = studio.run(
|
@@ -146,6 +146,8 @@ def mlflow_register_model_step(
|
|
146
146
|
metadata.zenml_pipeline_run_uuid = pipeline_run_uuid
|
147
147
|
if metadata.zenml_workspace is None:
|
148
148
|
metadata.zenml_workspace = zenml_workspace
|
149
|
+
if getattr(metadata, "mlflow_run_id", None) is None:
|
150
|
+
setattr(metadata, "mlflow_run_id", mlflow_run_id)
|
149
151
|
|
150
152
|
# Register model version
|
151
153
|
model_version = model_registry.register_model_version(
|