zenml-nightly 0.83.1.dev20250624__py3-none-any.whl → 0.83.1.dev20250626__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zenml/VERSION +1 -1
- zenml/cli/base.py +3 -2
- zenml/cli/login.py +21 -3
- zenml/cli/service_connectors.py +5 -12
- zenml/cli/stack.py +1 -5
- zenml/cli/utils.py +8 -52
- zenml/client.py +32 -40
- zenml/config/__init__.py +13 -2
- zenml/constants.py +0 -1
- zenml/exceptions.py +16 -0
- zenml/integrations/airflow/orchestrators/airflow_orchestrator.py +15 -6
- zenml/integrations/aws/container_registries/aws_container_registry.py +3 -1
- zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py +54 -58
- zenml/integrations/azure/orchestrators/azureml_orchestrator.py +28 -19
- zenml/integrations/databricks/orchestrators/databricks_orchestrator.py +19 -63
- zenml/integrations/databricks/orchestrators/databricks_orchestrator_entrypoint_config.py +8 -3
- zenml/integrations/gcp/orchestrators/vertex_orchestrator.py +36 -61
- zenml/integrations/hyperai/orchestrators/hyperai_orchestrator.py +19 -22
- zenml/integrations/integration.py +23 -58
- zenml/integrations/kubeflow/orchestrators/kubeflow_orchestrator.py +28 -31
- zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator.py +33 -20
- zenml/integrations/lightning/orchestrators/lightning_orchestrator.py +25 -100
- zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py +19 -8
- zenml/integrations/skypilot/utils.py +17 -13
- zenml/integrations/tekton/orchestrators/tekton_orchestrator.py +28 -12
- zenml/models/__init__.py +2 -0
- zenml/models/v2/core/service_connector.py +178 -108
- zenml/models/v2/core/step_run.py +1 -0
- zenml/orchestrators/__init__.py +2 -0
- zenml/orchestrators/base_orchestrator.py +137 -66
- zenml/orchestrators/input_utils.py +5 -13
- zenml/orchestrators/local/local_orchestrator.py +19 -9
- zenml/orchestrators/local_docker/local_docker_orchestrator.py +15 -5
- zenml/orchestrators/publish_utils.py +24 -0
- zenml/orchestrators/step_run_utils.py +1 -2
- zenml/pipelines/run_utils.py +12 -7
- zenml/service_connectors/service_connector.py +11 -61
- zenml/service_connectors/service_connector_utils.py +4 -2
- zenml/step_operators/step_operator_entrypoint_configuration.py +1 -1
- zenml/utils/package_utils.py +111 -1
- zenml/zen_server/routers/service_connectors_endpoints.py +7 -22
- zenml/zen_stores/migrations/versions/5bb25e95849c_add_internal_secrets.py +62 -0
- zenml/zen_stores/rest_zen_store.py +204 -132
- zenml/zen_stores/schemas/secret_schemas.py +5 -0
- zenml/zen_stores/schemas/service_connector_schemas.py +16 -14
- zenml/zen_stores/secrets_stores/service_connector_secrets_store.py +4 -1
- zenml/zen_stores/sql_zen_store.py +241 -119
- zenml/zen_stores/zen_store_interface.py +9 -1
- {zenml_nightly-0.83.1.dev20250624.dist-info → zenml_nightly-0.83.1.dev20250626.dist-info}/METADATA +1 -1
- {zenml_nightly-0.83.1.dev20250624.dist-info → zenml_nightly-0.83.1.dev20250626.dist-info}/RECORD +53 -53
- zenml/utils/integration_utils.py +0 -34
- {zenml_nightly-0.83.1.dev20250624.dist-info → zenml_nightly-0.83.1.dev20250626.dist-info}/LICENSE +0 -0
- {zenml_nightly-0.83.1.dev20250624.dist-info → zenml_nightly-0.83.1.dev20250626.dist-info}/WHEEL +0 -0
- {zenml_nightly-0.83.1.dev20250624.dist-info → zenml_nightly-0.83.1.dev20250626.dist-info}/entry_points.txt +0 -0
@@ -19,7 +19,6 @@ from typing import (
|
|
19
19
|
TYPE_CHECKING,
|
20
20
|
Any,
|
21
21
|
Dict,
|
22
|
-
Iterator,
|
23
22
|
List,
|
24
23
|
Optional,
|
25
24
|
Tuple,
|
@@ -60,7 +59,6 @@ from zenml.constants import (
|
|
60
59
|
)
|
61
60
|
from zenml.enums import (
|
62
61
|
ExecutionStatus,
|
63
|
-
MetadataResourceTypes,
|
64
62
|
StackComponentType,
|
65
63
|
)
|
66
64
|
from zenml.integrations.aws.flavors.sagemaker_orchestrator_flavor import (
|
@@ -73,7 +71,7 @@ from zenml.integrations.aws.orchestrators.sagemaker_orchestrator_entrypoint_conf
|
|
73
71
|
)
|
74
72
|
from zenml.logger import get_logger
|
75
73
|
from zenml.metadata.metadata_types import MetadataType, Uri
|
76
|
-
from zenml.orchestrators import ContainerizedOrchestrator
|
74
|
+
from zenml.orchestrators import ContainerizedOrchestrator, SubmissionResult
|
77
75
|
from zenml.orchestrators.utils import get_orchestrator_run_name
|
78
76
|
from zenml.stack import StackValidator
|
79
77
|
from zenml.utils.env_utils import split_environment_variables
|
@@ -273,20 +271,25 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
|
|
273
271
|
boto_session=boto_session, default_bucket=self.config.bucket
|
274
272
|
)
|
275
273
|
|
276
|
-
def
|
274
|
+
def submit_pipeline(
|
277
275
|
self,
|
278
276
|
deployment: "PipelineDeploymentResponse",
|
279
277
|
stack: "Stack",
|
280
278
|
environment: Dict[str, str],
|
281
279
|
placeholder_run: Optional["PipelineRunResponse"] = None,
|
282
|
-
) ->
|
283
|
-
"""
|
280
|
+
) -> Optional[SubmissionResult]:
|
281
|
+
"""Submits a pipeline to the orchestrator.
|
282
|
+
|
283
|
+
This method should only submit the pipeline and not wait for it to
|
284
|
+
complete. If the orchestrator is configured to wait for the pipeline run
|
285
|
+
to complete, a function that waits for the pipeline run to complete can
|
286
|
+
be passed as part of the submission result.
|
284
287
|
|
285
288
|
Args:
|
286
|
-
deployment: The deployment to
|
287
|
-
stack: The stack
|
289
|
+
deployment: The pipeline deployment to submit.
|
290
|
+
stack: The stack the pipeline will run on.
|
288
291
|
environment: Environment variables to set in the orchestration
|
289
|
-
environment.
|
292
|
+
environment. These don't need to be set if running locally.
|
290
293
|
placeholder_run: An optional placeholder run for the deployment.
|
291
294
|
|
292
295
|
Raises:
|
@@ -296,8 +299,8 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
|
|
296
299
|
AWS SageMaker NetworkConfig class.
|
297
300
|
ValueError: If the schedule is not valid.
|
298
301
|
|
299
|
-
|
300
|
-
|
302
|
+
Returns:
|
303
|
+
Optional submission result.
|
301
304
|
"""
|
302
305
|
# sagemaker requires pipelineName to use alphanum and hyphens only
|
303
306
|
unsanitized_orchestrator_run_name = get_orchestrator_run_name(
|
@@ -705,26 +708,14 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
|
|
705
708
|
)
|
706
709
|
logger.info(f"The schedule ARN is: {triggers[0]}")
|
707
710
|
|
711
|
+
schedule_metadata = {}
|
708
712
|
try:
|
709
|
-
from zenml.models import RunMetadataResource
|
710
|
-
|
711
713
|
schedule_metadata = self.generate_schedule_metadata(
|
712
714
|
schedule_arn=triggers[0]
|
713
715
|
)
|
714
|
-
|
715
|
-
Client().create_run_metadata(
|
716
|
-
metadata=schedule_metadata, # type: ignore[arg-type]
|
717
|
-
resources=[
|
718
|
-
RunMetadataResource(
|
719
|
-
id=deployment.schedule.id,
|
720
|
-
type=MetadataResourceTypes.SCHEDULE,
|
721
|
-
)
|
722
|
-
],
|
723
|
-
)
|
724
716
|
except Exception as e:
|
725
717
|
logger.debug(
|
726
|
-
"There was an error
|
727
|
-
f"schedule: {e}"
|
718
|
+
"There was an error generating schedule metadata: %s", e
|
728
719
|
)
|
729
720
|
|
730
721
|
logger.info(
|
@@ -749,6 +740,7 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
|
|
749
740
|
logger.info(
|
750
741
|
f"`aws scheduler delete-schedule --name {schedule_name}`"
|
751
742
|
)
|
743
|
+
return SubmissionResult(metadata=schedule_metadata)
|
752
744
|
else:
|
753
745
|
# Execute the pipeline immediately if no schedule is specified
|
754
746
|
execution = pipeline.start()
|
@@ -757,33 +749,40 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
|
|
757
749
|
"when using the Sagemaker Orchestrator."
|
758
750
|
)
|
759
751
|
|
760
|
-
|
761
|
-
yield from self.compute_metadata(
|
752
|
+
run_metadata = self.compute_metadata(
|
762
753
|
execution_arn=execution.arn, settings=settings
|
763
754
|
)
|
764
755
|
|
765
|
-
|
756
|
+
_wait_for_completion = None
|
766
757
|
if settings.synchronous:
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
execution.wait(
|
775
|
-
delay=POLLING_DELAY, max_attempts=MAX_POLLING_ATTEMPTS
|
776
|
-
)
|
777
|
-
logger.info("Pipeline completed successfully.")
|
778
|
-
except WaiterError:
|
779
|
-
raise RuntimeError(
|
780
|
-
"Timed out while waiting for pipeline execution to "
|
781
|
-
"finish. For long-running pipelines we recommend "
|
782
|
-
"configuring your orchestrator for asynchronous "
|
783
|
-
"execution. The following command does this for you: \n"
|
784
|
-
f"`zenml orchestrator update {self.name} "
|
785
|
-
f"--synchronous=False`"
|
758
|
+
|
759
|
+
def _wait_for_completion() -> None:
|
760
|
+
logger.info(
|
761
|
+
"Executing synchronously. Waiting for pipeline to "
|
762
|
+
"finish... \n"
|
763
|
+
"At this point you can `Ctrl-C` out without cancelling the "
|
764
|
+
"execution."
|
786
765
|
)
|
766
|
+
try:
|
767
|
+
execution.wait(
|
768
|
+
delay=POLLING_DELAY,
|
769
|
+
max_attempts=MAX_POLLING_ATTEMPTS,
|
770
|
+
)
|
771
|
+
logger.info("Pipeline completed successfully.")
|
772
|
+
except WaiterError:
|
773
|
+
raise RuntimeError(
|
774
|
+
"Timed out while waiting for pipeline execution to "
|
775
|
+
"finish. For long-running pipelines we recommend "
|
776
|
+
"configuring your orchestrator for asynchronous "
|
777
|
+
"execution. The following command does this for you: \n"
|
778
|
+
f"`zenml orchestrator update {self.name} "
|
779
|
+
f"--synchronous=False`"
|
780
|
+
)
|
781
|
+
|
782
|
+
return SubmissionResult(
|
783
|
+
wait_for_completion=_wait_for_completion,
|
784
|
+
metadata=run_metadata,
|
785
|
+
)
|
787
786
|
|
788
787
|
def get_pipeline_run_metadata(
|
789
788
|
self, run_id: UUID
|
@@ -798,20 +797,15 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
|
|
798
797
|
"""
|
799
798
|
execution_arn = os.environ[ENV_ZENML_SAGEMAKER_RUN_ID]
|
800
799
|
|
801
|
-
run_metadata: Dict[str, "MetadataType"] = {}
|
802
|
-
|
803
800
|
settings = cast(
|
804
801
|
SagemakerOrchestratorSettings,
|
805
802
|
self.get_settings(Client().get_pipeline_run(run_id)),
|
806
803
|
)
|
807
804
|
|
808
|
-
|
805
|
+
return self.compute_metadata(
|
809
806
|
execution_arn=execution_arn,
|
810
807
|
settings=settings,
|
811
|
-
)
|
812
|
-
run_metadata.update(metadata)
|
813
|
-
|
814
|
-
return run_metadata
|
808
|
+
)
|
815
809
|
|
816
810
|
def fetch_status(self, run: "PipelineRunResponse") -> ExecutionStatus:
|
817
811
|
"""Refreshes the status of a specific pipeline run.
|
@@ -873,14 +867,14 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
|
|
873
867
|
self,
|
874
868
|
execution_arn: str,
|
875
869
|
settings: SagemakerOrchestratorSettings,
|
876
|
-
) ->
|
870
|
+
) -> Dict[str, MetadataType]:
|
877
871
|
"""Generate run metadata based on the generated Sagemaker Execution.
|
878
872
|
|
879
873
|
Args:
|
880
874
|
execution_arn: The ARN of the pipeline execution.
|
881
875
|
settings: The Sagemaker orchestrator settings.
|
882
876
|
|
883
|
-
|
877
|
+
Returns:
|
884
878
|
A dictionary of metadata related to the pipeline run.
|
885
879
|
"""
|
886
880
|
# Orchestrator Run ID
|
@@ -901,7 +895,7 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
|
|
901
895
|
):
|
902
896
|
metadata[METADATA_ORCHESTRATOR_LOGS_URL] = Uri(logs_url)
|
903
897
|
|
904
|
-
|
898
|
+
return metadata
|
905
899
|
|
906
900
|
def _compute_orchestrator_url(
|
907
901
|
self,
|
@@ -979,7 +973,9 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
|
|
979
973
|
return None
|
980
974
|
|
981
975
|
@staticmethod
|
982
|
-
def generate_schedule_metadata(
|
976
|
+
def generate_schedule_metadata(
|
977
|
+
schedule_arn: str,
|
978
|
+
) -> Dict[str, MetadataType]:
|
983
979
|
"""Attaches metadata to the ZenML Schedules.
|
984
980
|
|
985
981
|
Args:
|
@@ -19,7 +19,6 @@ from typing import (
|
|
19
19
|
TYPE_CHECKING,
|
20
20
|
Any,
|
21
21
|
Dict,
|
22
|
-
Iterator,
|
23
22
|
List,
|
24
23
|
Optional,
|
25
24
|
Tuple,
|
@@ -63,7 +62,7 @@ from zenml.integrations.azure.orchestrators.azureml_orchestrator_entrypoint_conf
|
|
63
62
|
)
|
64
63
|
from zenml.logger import get_logger
|
65
64
|
from zenml.metadata.metadata_types import MetadataType, Uri
|
66
|
-
from zenml.orchestrators import ContainerizedOrchestrator
|
65
|
+
from zenml.orchestrators import ContainerizedOrchestrator, SubmissionResult
|
67
66
|
from zenml.orchestrators.utils import get_orchestrator_run_name
|
68
67
|
from zenml.stack import StackValidator
|
69
68
|
from zenml.utils.string_utils import b64_encode
|
@@ -198,27 +197,32 @@ class AzureMLOrchestrator(ContainerizedOrchestrator):
|
|
198
197
|
command=" ".join(command + arguments),
|
199
198
|
)
|
200
199
|
|
201
|
-
def
|
200
|
+
def submit_pipeline(
|
202
201
|
self,
|
203
202
|
deployment: "PipelineDeploymentResponse",
|
204
203
|
stack: "Stack",
|
205
204
|
environment: Dict[str, str],
|
206
205
|
placeholder_run: Optional["PipelineRunResponse"] = None,
|
207
|
-
) ->
|
208
|
-
"""
|
206
|
+
) -> Optional[SubmissionResult]:
|
207
|
+
"""Submits a pipeline to the orchestrator.
|
208
|
+
|
209
|
+
This method should only submit the pipeline and not wait for it to
|
210
|
+
complete. If the orchestrator is configured to wait for the pipeline run
|
211
|
+
to complete, a function that waits for the pipeline run to complete can
|
212
|
+
be passed as part of the submission result.
|
209
213
|
|
210
214
|
Args:
|
211
|
-
deployment: The deployment to
|
212
|
-
stack: The stack
|
215
|
+
deployment: The pipeline deployment to submit.
|
216
|
+
stack: The stack the pipeline will run on.
|
213
217
|
environment: Environment variables to set in the orchestration
|
214
|
-
environment.
|
218
|
+
environment. These don't need to be set if running locally.
|
215
219
|
placeholder_run: An optional placeholder run for the deployment.
|
216
220
|
|
217
221
|
Raises:
|
218
222
|
RuntimeError: If the creation of the schedule fails.
|
219
223
|
|
220
|
-
|
221
|
-
|
224
|
+
Returns:
|
225
|
+
Optional submission result.
|
222
226
|
"""
|
223
227
|
# Authentication
|
224
228
|
if connector := self.get_connector():
|
@@ -384,14 +388,11 @@ class AzureMLOrchestrator(ContainerizedOrchestrator):
|
|
384
388
|
"Failed to create schedule for the pipeline "
|
385
389
|
f"'{run_name}': {str(e)}"
|
386
390
|
)
|
387
|
-
|
391
|
+
return None
|
388
392
|
else:
|
389
393
|
job = ml_client.jobs.create_or_update(pipeline_job)
|
390
394
|
logger.info(f"Pipeline {run_name} has been started.")
|
391
395
|
|
392
|
-
# Yield metadata based on the generated job object
|
393
|
-
yield from self.compute_metadata(job)
|
394
|
-
|
395
396
|
assert job.services is not None
|
396
397
|
assert job.name is not None
|
397
398
|
|
@@ -401,9 +402,17 @@ class AzureMLOrchestrator(ContainerizedOrchestrator):
|
|
401
402
|
f"{job.services['Studio'].endpoint}"
|
402
403
|
)
|
403
404
|
|
405
|
+
_wait_for_completion = None
|
404
406
|
if settings.synchronous:
|
405
|
-
|
406
|
-
|
407
|
+
|
408
|
+
def _wait_for_completion() -> None:
|
409
|
+
logger.info("Waiting for pipeline to finish...")
|
410
|
+
ml_client.jobs.stream(job.name)
|
411
|
+
|
412
|
+
return SubmissionResult(
|
413
|
+
metadata=self.compute_metadata(job),
|
414
|
+
wait_for_completion=_wait_for_completion,
|
415
|
+
)
|
407
416
|
|
408
417
|
def get_pipeline_run_metadata(
|
409
418
|
self, run_id: UUID
|
@@ -518,13 +527,13 @@ class AzureMLOrchestrator(ContainerizedOrchestrator):
|
|
518
527
|
else:
|
519
528
|
raise ValueError("Unknown status for the pipeline job.")
|
520
529
|
|
521
|
-
def compute_metadata(self, job: Any) ->
|
530
|
+
def compute_metadata(self, job: Any) -> Dict[str, MetadataType]:
|
522
531
|
"""Generate run metadata based on the generated AzureML PipelineJob.
|
523
532
|
|
524
533
|
Args:
|
525
534
|
job: The corresponding PipelineJob object.
|
526
535
|
|
527
|
-
|
536
|
+
Returns:
|
528
537
|
A dictionary of metadata related to the pipeline run.
|
529
538
|
"""
|
530
539
|
# Metadata
|
@@ -538,7 +547,7 @@ class AzureMLOrchestrator(ContainerizedOrchestrator):
|
|
538
547
|
if orchestrator_url := self._compute_orchestrator_url(job):
|
539
548
|
metadata[METADATA_ORCHESTRATOR_URL] = Uri(orchestrator_url)
|
540
549
|
|
541
|
-
|
550
|
+
return metadata
|
542
551
|
|
543
552
|
@staticmethod
|
544
553
|
def _compute_orchestrator_url(job: Any) -> Optional[str]:
|
@@ -15,7 +15,7 @@
|
|
15
15
|
|
16
16
|
import itertools
|
17
17
|
import os
|
18
|
-
from typing import TYPE_CHECKING,
|
18
|
+
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, cast
|
19
19
|
from uuid import UUID
|
20
20
|
|
21
21
|
from databricks.sdk import WorkspaceClient as DatabricksClient
|
@@ -48,10 +48,12 @@ from zenml.io import fileio
|
|
48
48
|
from zenml.logger import get_logger
|
49
49
|
from zenml.metadata.metadata_types import MetadataType, Uri
|
50
50
|
from zenml.models.v2.core.schedule import ScheduleResponse
|
51
|
+
from zenml.orchestrators import (
|
52
|
+
SubmissionResult,
|
53
|
+
WheeledOrchestrator,
|
54
|
+
)
|
51
55
|
from zenml.orchestrators.utils import get_orchestrator_run_name
|
52
|
-
from zenml.orchestrators.wheeled_orchestrator import WheeledOrchestrator
|
53
56
|
from zenml.stack import StackValidator
|
54
|
-
from zenml.utils import io_utils
|
55
57
|
from zenml.utils.package_utils import clean_requirements
|
56
58
|
from zenml.utils.pipeline_docker_image_builder import (
|
57
59
|
PipelineDockerImageBuilder,
|
@@ -67,20 +69,13 @@ logger = get_logger(__name__)
|
|
67
69
|
ZENML_STEP_DEFAULT_ENTRYPOINT_COMMAND = "entrypoint.main"
|
68
70
|
DATABRICKS_WHEELS_DIRECTORY_PREFIX = "dbfs:/FileStore/zenml"
|
69
71
|
DATABRICKS_LOCAL_FILESYSTEM_PREFIX = "file:/"
|
70
|
-
DATABRICKS_CLUSTER_DEFAULT_NAME = "zenml-databricks-cluster"
|
71
72
|
DATABRICKS_SPARK_DEFAULT_VERSION = "15.3.x-scala2.12"
|
72
73
|
DATABRICKS_JOB_ID_PARAMETER_REFERENCE = "{{job.id}}"
|
73
74
|
DATABRICKS_ZENML_DEFAULT_CUSTOM_REPOSITORY_PATH = "."
|
74
75
|
|
75
76
|
|
76
77
|
class DatabricksOrchestrator(WheeledOrchestrator):
|
77
|
-
"""
|
78
|
-
|
79
|
-
This orchestrator does not support running on a schedule.
|
80
|
-
"""
|
81
|
-
|
82
|
-
# The default instance type to use if none is specified in settings
|
83
|
-
DEFAULT_INSTANCE_TYPE: Optional[str] = None
|
78
|
+
"""Databricks orchestrator."""
|
84
79
|
|
85
80
|
@property
|
86
81
|
def validator(self) -> Optional[StackValidator]:
|
@@ -168,69 +163,39 @@ class DatabricksOrchestrator(WheeledOrchestrator):
|
|
168
163
|
f"{ENV_ZENML_DATABRICKS_ORCHESTRATOR_RUN_ID}."
|
169
164
|
)
|
170
165
|
|
171
|
-
@property
|
172
|
-
def root_directory(self) -> str:
|
173
|
-
"""Path to the root directory for all files concerning this orchestrator.
|
174
|
-
|
175
|
-
Returns:
|
176
|
-
Path to the root directory.
|
177
|
-
"""
|
178
|
-
return os.path.join(
|
179
|
-
io_utils.get_global_config_directory(),
|
180
|
-
"databricks",
|
181
|
-
str(self.id),
|
182
|
-
)
|
183
|
-
|
184
|
-
@property
|
185
|
-
def pipeline_directory(self) -> str:
|
186
|
-
"""Returns path to a directory in which the kubeflow pipeline files are stored.
|
187
|
-
|
188
|
-
Returns:
|
189
|
-
Path to the pipeline directory.
|
190
|
-
"""
|
191
|
-
return os.path.join(self.root_directory, "pipelines")
|
192
|
-
|
193
166
|
def setup_credentials(self) -> None:
|
194
167
|
"""Set up credentials for the orchestrator."""
|
195
168
|
connector = self.get_connector()
|
196
169
|
assert connector is not None
|
197
170
|
connector.configure_local_client()
|
198
171
|
|
199
|
-
def
|
172
|
+
def submit_pipeline(
|
200
173
|
self,
|
201
174
|
deployment: "PipelineDeploymentResponse",
|
202
175
|
stack: "Stack",
|
203
176
|
environment: Dict[str, str],
|
204
177
|
placeholder_run: Optional["PipelineRunResponse"] = None,
|
205
|
-
) ->
|
206
|
-
"""
|
207
|
-
|
208
|
-
This functions as an intermediary representation of the pipeline which
|
209
|
-
is then deployed to the kubeflow pipelines instance.
|
210
|
-
|
211
|
-
How it works:
|
212
|
-
-------------
|
213
|
-
Before this method is called the `prepare_pipeline_deployment()`
|
214
|
-
method builds a docker image that contains the code for the
|
215
|
-
pipeline, all steps the context around these files.
|
178
|
+
) -> Optional[SubmissionResult]:
|
179
|
+
"""Submits a pipeline to the orchestrator.
|
216
180
|
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
between these task are then also configured onto each
|
222
|
-
task by pointing at the downstream steps.
|
181
|
+
This method should only submit the pipeline and not wait for it to
|
182
|
+
complete. If the orchestrator is configured to wait for the pipeline run
|
183
|
+
to complete, a function that waits for the pipeline run to complete can
|
184
|
+
be passed as part of the submission result.
|
223
185
|
|
224
186
|
Args:
|
225
|
-
deployment: The pipeline deployment to
|
187
|
+
deployment: The pipeline deployment to submit.
|
226
188
|
stack: The stack the pipeline will run on.
|
227
189
|
environment: Environment variables to set in the orchestration
|
228
|
-
environment.
|
190
|
+
environment. These don't need to be set if running locally.
|
229
191
|
placeholder_run: An optional placeholder run for the deployment.
|
230
192
|
|
231
193
|
Raises:
|
232
194
|
ValueError: If the schedule is not set or if the cron expression
|
233
195
|
is not set.
|
196
|
+
|
197
|
+
Returns:
|
198
|
+
Optional submission result.
|
234
199
|
"""
|
235
200
|
settings = cast(
|
236
201
|
DatabricksOrchestratorSettings, self.get_settings(deployment)
|
@@ -339,11 +304,6 @@ class DatabricksOrchestrator(WheeledOrchestrator):
|
|
339
304
|
orchestrator_run_name = get_orchestrator_run_name(
|
340
305
|
pipeline_name=deployment.pipeline_configuration.name
|
341
306
|
)
|
342
|
-
# Get a filepath to use to save the finished yaml to
|
343
|
-
fileio.makedirs(self.pipeline_directory)
|
344
|
-
pipeline_file_path = os.path.join(
|
345
|
-
self.pipeline_directory, f"{orchestrator_run_name}.yaml"
|
346
|
-
)
|
347
307
|
|
348
308
|
# Copy the repository to a temporary directory and add a setup.py file
|
349
309
|
repository_temp_dir = (
|
@@ -382,11 +342,6 @@ class DatabricksOrchestrator(WheeledOrchestrator):
|
|
382
342
|
|
383
343
|
fileio.rmtree(repository_temp_dir)
|
384
344
|
|
385
|
-
logger.info(
|
386
|
-
"Writing Databricks workflow definition to `%s`.",
|
387
|
-
pipeline_file_path,
|
388
|
-
)
|
389
|
-
|
390
345
|
# using the databricks client uploads the pipeline to databricks
|
391
346
|
job_cluster_key = self.sanitize_name(f"{deployment_id}")
|
392
347
|
self._upload_and_run_pipeline(
|
@@ -399,6 +354,7 @@ class DatabricksOrchestrator(WheeledOrchestrator):
|
|
399
354
|
job_cluster_key=job_cluster_key,
|
400
355
|
schedule=deployment.schedule,
|
401
356
|
)
|
357
|
+
return None
|
402
358
|
|
403
359
|
def _upload_and_run_pipeline(
|
404
360
|
self,
|
@@ -17,7 +17,10 @@ import os
|
|
17
17
|
import sys
|
18
18
|
from typing import Any, List, Set
|
19
19
|
|
20
|
-
|
20
|
+
if sys.version_info < (3, 10):
|
21
|
+
from importlib_metadata import distribution
|
22
|
+
else:
|
23
|
+
from importlib.metadata import distribution
|
21
24
|
|
22
25
|
from zenml.entrypoints.step_entrypoint_configuration import (
|
23
26
|
StepEntrypointConfiguration,
|
@@ -81,8 +84,10 @@ class DatabricksEntrypointConfiguration(StepEntrypointConfiguration):
|
|
81
84
|
"""Runs the step."""
|
82
85
|
# Get the wheel package and add it to the sys path
|
83
86
|
wheel_package = self.entrypoint_args[WHEEL_PACKAGE_OPTION]
|
84
|
-
|
85
|
-
|
87
|
+
|
88
|
+
dist = distribution(wheel_package)
|
89
|
+
project_root = os.path.join(dist.locate_file("."), wheel_package)
|
90
|
+
|
86
91
|
if project_root not in sys.path:
|
87
92
|
sys.path.insert(0, project_root)
|
88
93
|
sys.path.insert(-1, project_root)
|