zenml-nightly 0.83.1.dev20250710__py3-none-any.whl → 0.84.0.dev20250712__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zenml/VERSION +1 -1
- zenml/artifact_stores/base_artifact_store.py +51 -23
- zenml/artifacts/utils.py +3 -1
- zenml/cli/pipeline.py +13 -2
- zenml/constants.py +4 -0
- zenml/container_registries/base_container_registry.py +17 -5
- zenml/enums.py +9 -0
- zenml/integrations/aws/flavors/sagemaker_orchestrator_flavor.py +150 -117
- zenml/integrations/aws/flavors/sagemaker_step_operator_flavor.py +43 -42
- zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py +16 -7
- zenml/integrations/azure/orchestrators/azureml_orchestrator.py +18 -12
- zenml/integrations/bentoml/flavors/bentoml_model_deployer_flavor.py +7 -1
- zenml/integrations/databricks/flavors/databricks_orchestrator_flavor.py +58 -23
- zenml/integrations/feast/flavors/feast_feature_store_flavor.py +18 -5
- zenml/integrations/gcp/flavors/vertex_experiment_tracker_flavor.py +10 -42
- zenml/integrations/gcp/flavors/vertex_orchestrator_flavor.py +99 -92
- zenml/integrations/gcp/google_credentials_mixin.py +13 -8
- zenml/integrations/gcp/orchestrators/vertex_orchestrator.py +18 -9
- zenml/integrations/huggingface/__init__.py +1 -1
- zenml/integrations/hyperai/flavors/hyperai_orchestrator_flavor.py +28 -30
- zenml/integrations/kaniko/flavors/kaniko_image_builder_flavor.py +56 -40
- zenml/integrations/kubeflow/flavors/kubeflow_orchestrator_flavor.py +59 -48
- zenml/integrations/kubernetes/flavors/kubernetes_orchestrator_flavor.py +159 -121
- zenml/integrations/kubernetes/flavors/kubernetes_step_operator_flavor.py +48 -33
- zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator.py +182 -1
- zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator_entrypoint.py +7 -3
- zenml/integrations/lightning/flavors/lightning_orchestrator_flavor.py +41 -25
- zenml/integrations/mlflow/flavors/mlflow_experiment_tracker_flavor.py +51 -44
- zenml/integrations/mlflow/flavors/mlflow_model_deployer_flavor.py +9 -4
- zenml/integrations/neptune/flavors/neptune_experiment_tracker_flavor.py +13 -12
- zenml/integrations/s3/flavors/s3_artifact_store_flavor.py +32 -7
- zenml/integrations/vllm/flavors/vllm_model_deployer_flavor.py +7 -1
- zenml/integrations/wandb/flavors/wandb_experiment_tracker_flavor.py +34 -25
- zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py +14 -11
- zenml/logging/step_logging.py +8 -7
- zenml/models/v2/core/logs.py +2 -1
- zenml/models/v2/core/pipeline_run.py +0 -59
- zenml/orchestrators/base_orchestrator.py +7 -1
- zenml/pipelines/build_utils.py +2 -1
- zenml/stack/authentication_mixin.py +6 -5
- zenml/stack/flavor.py +5 -1
- zenml/utils/code_utils.py +2 -1
- zenml/utils/docker_utils.py +22 -0
- zenml/utils/io_utils.py +18 -0
- zenml/utils/pipeline_docker_image_builder.py +4 -1
- zenml/utils/run_utils.py +101 -8
- zenml/zen_server/dashboard/assets/{404-B5eko6XL.js → 404-B5cfnwZ1.js} +1 -1
- zenml/zen_server/dashboard/assets/{@radix-Cdvw4jJ8.js → @radix-C_LirfyT.js} +1 -1
- zenml/zen_server/dashboard/assets/{@react-router-DeDfXbUF.js → @react-router-BSsrkPOd.js} +1 -1
- zenml/zen_server/dashboard/assets/{@reactflow-B_iCtR7X.js → @reactflow-D9hglKLF.js} +2 -2
- zenml/zen_server/dashboard/assets/{@tanstack-5gTMR7G2.js → @tanstack-C0SeHZng.js} +1 -1
- zenml/zen_server/dashboard/assets/AlertDialogDropdownItem-druRNuO2.js +1 -0
- zenml/zen_server/dashboard/assets/ButtonGroup-SF2DlzHV.js +1 -0
- zenml/zen_server/dashboard/assets/{CodeSnippet-Ctplhzdc.js → CodeSnippet-D8iBqOVv.js} +1 -1
- zenml/zen_server/dashboard/assets/CollapsibleCard-D0-pQi1n.js +1 -0
- zenml/zen_server/dashboard/assets/{ComponentBadge-Cnecw3qz.js → ComponentBadge-mw2Ja_ON.js} +1 -1
- zenml/zen_server/dashboard/assets/ComponentIcon-BXgpt-jw.js +1 -0
- zenml/zen_server/dashboard/assets/{DeleteAlertDialog-DEI0YDzP.js → DeleteAlertDialog-BbyFVnVI.js} +1 -1
- zenml/zen_server/dashboard/assets/DialogItem-DeME0oSt.js +1 -0
- zenml/zen_server/dashboard/assets/{DisplayDate-8RESqe5H.js → DisplayDate-v3KW7oez.js} +1 -1
- zenml/zen_server/dashboard/assets/{EmptyState-CjrgDtVk.js → EmptyState-DG0m-CGv.js} +1 -1
- zenml/zen_server/dashboard/assets/Error-DcVLcrLs.js +1 -0
- zenml/zen_server/dashboard/assets/ExecutionStatus-C4tlFnlh.js +1 -0
- zenml/zen_server/dashboard/assets/{Helpbox-DtUG2Bf_.js → Helpbox-C-RGHz3S.js} +1 -1
- zenml/zen_server/dashboard/assets/{Infobox-CSBRrM6r.js → Infobox-DFCWPbMb.js} +1 -1
- zenml/zen_server/dashboard/assets/{LeftSideMenu-DPsCCK3z.js → LeftSideMenu-Czev0KCA.js} +1 -1
- zenml/zen_server/dashboard/assets/{Lock-CrIAdQo6.js → Lock-CRP5J_su.js} +1 -1
- zenml/zen_server/dashboard/assets/NestedCollapsible-CN9scBUn.js +1 -0
- zenml/zen_server/dashboard/assets/{NumberBox-DtCv7jh3.js → NumberBox-CoQjQYDJ.js} +1 -1
- zenml/zen_server/dashboard/assets/{Pagination-CWnEpSpN.js → Pagination-CcDD5yHh.js} +1 -1
- zenml/zen_server/dashboard/assets/Partials-DlMzfKgs.js +1 -0
- zenml/zen_server/dashboard/assets/{PasswordChecker-B88WjuCe.js → PasswordChecker-BZwoeQIm.js} +1 -1
- zenml/zen_server/dashboard/assets/{ProCta-CNyp04C8.js → ProCta-CU2ycJDo.js} +1 -1
- zenml/zen_server/dashboard/assets/ProviderIcon-BMAn9Jld.js +1 -0
- zenml/zen_server/dashboard/assets/ProviderRadio-D_q9tE3G.js +1 -0
- zenml/zen_server/dashboard/assets/RunsBody-BToytB8e.js +1 -0
- zenml/zen_server/dashboard/assets/{SearchField-BtUi6cYl.js → SearchField-D_0-uAPj.js} +1 -1
- zenml/zen_server/dashboard/assets/SecretTooltip-BcWMKb9f.js +1 -0
- zenml/zen_server/dashboard/assets/{SetPassword-BmbgL_ed.js → SetPassword-CaKVSqAL.js} +1 -1
- zenml/zen_server/dashboard/assets/{SheetHeader-DkH7aG9K.js → SheetHeader-7vwlsY_i.js} +1 -1
- zenml/zen_server/dashboard/assets/StackComponentList-s7eSfm8o.js +1 -0
- zenml/zen_server/dashboard/assets/StackList-Dt0FrIkM.js +1 -0
- zenml/zen_server/dashboard/assets/Tabs-B27AHUfo.js +1 -0
- zenml/zen_server/dashboard/assets/Tick-DDeDgTuT.js +1 -0
- zenml/zen_server/dashboard/assets/{UpdatePasswordSchemas-D_DCETSO.js → UpdatePasswordSchemas-Da5RndbV.js} +1 -1
- zenml/zen_server/dashboard/assets/{Wizard-BHvY75u_.js → Wizard-8aJzxUjb.js} +1 -1
- zenml/zen_server/dashboard/assets/WizardFooter-Bt7_UE14.js +1 -0
- zenml/zen_server/dashboard/assets/{all-pipeline-runs-query-DpKw9WL9.js → all-pipeline-runs-query-gorNNEaT.js} +1 -1
- zenml/zen_server/dashboard/assets/{arrow-left-MRXv5pAH.js → arrow-left-hcj2H8HY.js} +1 -1
- zenml/zen_server/dashboard/assets/bar-chart-square-check-9siI9icm.js +1 -0
- zenml/zen_server/dashboard/assets/{bulk-delete-CzYA--cC.js → bulk-delete-B5RTlnD_.js} +1 -1
- zenml/zen_server/dashboard/assets/{check-B9QMTa3f.js → check-D1bHMJkL.js} +1 -1
- zenml/zen_server/dashboard/assets/{check-circle-C4tYvbtw.js → check-circle-mnEgPhPF.js} +1 -1
- zenml/zen_server/dashboard/assets/{chevron-down-jbbQh82s.js → chevron-down-Z3nUe-0U.js} +1 -1
- zenml/zen_server/dashboard/assets/{chevron-right-double-Dgp_gEsp.js → chevron-right-double-CbRQKN4Q.js} +1 -1
- zenml/zen_server/dashboard/assets/{clock-B_mTG8PH.js → clock-BMjHXT3f.js} +1 -1
- zenml/zen_server/dashboard/assets/{code-browser-CiD8qkBx.js → code-browser-DftoiCIg.js} +1 -1
- zenml/zen_server/dashboard/assets/configuration-form-Yz8m0QIG.js +1 -0
- zenml/zen_server/dashboard/assets/constants-DeV48DuZ.js +1 -0
- zenml/zen_server/dashboard/assets/{create-stack-BpZrmKDu.js → create-stack-BruqH_6X.js} +1 -1
- zenml/zen_server/dashboard/assets/credit-card-CH1BHrXY.js +1 -0
- zenml/zen_server/dashboard/assets/dataflow-2-qHjWt7zp.js +1 -0
- zenml/zen_server/dashboard/assets/{delete-run-BkyDsKQc.js → delete-run-ibBtciMR.js} +1 -1
- zenml/zen_server/dashboard/assets/{expand-full-BPiXpch2.js → expand-full-CD4fFvM-.js} +1 -1
- zenml/zen_server/dashboard/assets/{eye-CbVlAYty.js → eye-CLNgIh_K.js} +1 -1
- zenml/zen_server/dashboard/assets/{file-text-Cd8wVfq5.js → file-text-CltVhgwZ.js} +1 -1
- zenml/zen_server/dashboard/assets/form-6aSt3tIl.js +1 -0
- zenml/zen_server/dashboard/assets/form-schemas-B9XgTS1V.js +1 -0
- zenml/zen_server/dashboard/assets/gradient_bg-BH8t8fi6.webp +0 -0
- zenml/zen_server/dashboard/assets/{help-Co6aedki.js → help-B0CvBhCm.js} +1 -1
- zenml/zen_server/dashboard/assets/icon-hDriJUXY.js +1 -0
- zenml/zen_server/dashboard/assets/{index-eoDB_1XX.js → index-B7CRNU8l.js} +1 -1
- zenml/zen_server/dashboard/assets/index-BQWlHo1Y.js +1 -0
- zenml/zen_server/dashboard/assets/{index-DWpiv-Ft.js → index-BRhKF2z-.js} +1 -1
- zenml/zen_server/dashboard/assets/{index-BgEfQ3_G.js → index-BacoJBEQ.js} +11 -11
- zenml/zen_server/dashboard/assets/{index-BBt0LDtR.js → index-dCcVgFNl.js} +1 -1
- zenml/zen_server/dashboard/assets/index-eggipFZS.css +1 -0
- zenml/zen_server/dashboard/assets/index-mA8kL088.js +14 -0
- zenml/zen_server/dashboard/assets/{index.es-C1gfATPn.js → index.es-DcVFDpJU.js} +1 -1
- zenml/zen_server/dashboard/assets/{index.esm-DhJo3mA6.js → index.esm-COnaHLSh.js} +1 -1
- zenml/zen_server/dashboard/assets/{info-QkbQz4QU.js → info-CyMih3vQ.js} +1 -1
- zenml/zen_server/dashboard/assets/{key-icon-C07HKw8z.js → key-icon-HOx2gazv.js} +1 -1
- zenml/zen_server/dashboard/assets/{layout-DBbfEFBe.js → layout-C5dgIReC.js} +1 -1
- zenml/zen_server/dashboard/assets/layout-CFLL6-CM.js +1 -0
- zenml/zen_server/dashboard/assets/{login-mutation-C1hvP_cX.js → login-mutation-CidpsqyH.js} +1 -1
- zenml/zen_server/dashboard/assets/{logs-CQKlJjo0.js → logs-DoLoTEfj.js} +1 -1
- zenml/zen_server/dashboard/assets/mail-C160gvB0.js +1 -0
- zenml/zen_server/dashboard/assets/message-chat-square-DLz6XmPS.js +1 -0
- zenml/zen_server/dashboard/assets/{package-miExReQl.js → package-BhYXGPxF.js} +1 -1
- zenml/zen_server/dashboard/assets/page-6huxSHEu.js +1 -0
- zenml/zen_server/dashboard/assets/page-7CJ4Wq3O.js +1 -0
- zenml/zen_server/dashboard/assets/page-8U20Tu_8.js +1 -0
- zenml/zen_server/dashboard/assets/{page-4zc4xPv2.js → page-BByayrO-.js} +2 -2
- zenml/zen_server/dashboard/assets/page-BCRXJXC9.js +1 -0
- zenml/zen_server/dashboard/assets/page-BK59rZvf.js +1 -0
- zenml/zen_server/dashboard/assets/page-BMpXak4U.js +1 -0
- zenml/zen_server/dashboard/assets/page-BTDi81N3.js +1 -0
- zenml/zen_server/dashboard/assets/{page-D-tJ_Y0a.js → page-BX67x4iL.js} +1 -1
- zenml/zen_server/dashboard/assets/page-Bjmcdg64.js +1 -0
- zenml/zen_server/dashboard/assets/page-BsAn8p4m.js +1 -0
- zenml/zen_server/dashboard/assets/{page-C2i-C7jv.js → page-BwjPRuaY.js} +1 -1
- zenml/zen_server/dashboard/assets/page-CDtSVkNc.js +1 -0
- zenml/zen_server/dashboard/assets/page-CEDU0L2T.js +1 -0
- zenml/zen_server/dashboard/assets/page-COJK90rG.js +1 -0
- zenml/zen_server/dashboard/assets/page-CY0LPcAJ.js +1 -0
- zenml/zen_server/dashboard/assets/page-C_XMn4GU.js +1 -0
- zenml/zen_server/dashboard/assets/page-Cb3KGsPK.js +22 -0
- zenml/zen_server/dashboard/assets/page-Cc8owYXQ.js +1 -0
- zenml/zen_server/dashboard/assets/{page-C3JfJxuR.js → page-CeGBDh1Q.js} +1 -1
- zenml/zen_server/dashboard/assets/page-CiGOVsj3.js +1 -0
- zenml/zen_server/dashboard/assets/page-CmLSFMkW.js +1 -0
- zenml/zen_server/dashboard/assets/page-CnfCptXq.js +1 -0
- zenml/zen_server/dashboard/assets/page-CvllZMBF.js +1 -0
- zenml/zen_server/dashboard/assets/page-CxzglV3-.js +1 -0
- zenml/zen_server/dashboard/assets/{page-rVhXI5ZO.js → page-D6cvOG8w.js} +1 -1
- zenml/zen_server/dashboard/assets/{page-BxeZrG_t.js → page-DDWW21kl.js} +1 -1
- zenml/zen_server/dashboard/assets/{page-DiHZK-1w.js → page-DF4FVxxW.js} +2 -2
- zenml/zen_server/dashboard/assets/page-DSZfclXt.js +1 -0
- zenml/zen_server/dashboard/assets/page-DVLez4R1.js +1 -0
- zenml/zen_server/dashboard/assets/page-DcXrWWWh.js +1 -0
- zenml/zen_server/dashboard/assets/page-Dg7-H_9i.js +1 -0
- zenml/zen_server/dashboard/assets/{page-BPQ66vR-.js → page-DgldL5UB.js} +2 -2
- zenml/zen_server/dashboard/assets/page-Dw7XuiSo.js +18 -0
- zenml/zen_server/dashboard/assets/{page-DOCOmmKn.js → page-FQxi1Otg.js} +1 -1
- zenml/zen_server/dashboard/assets/page-XrmOHHg7.js +1 -0
- zenml/zen_server/dashboard/assets/page-YdWnx9MX.js +1 -0
- zenml/zen_server/dashboard/assets/page-oRm7D4TC.js +1 -0
- zenml/zen_server/dashboard/assets/{page-uxjMX8Iq.js → page-q41JNDWO.js} +1 -1
- zenml/zen_server/dashboard/assets/page-x2GXC8sI.js +1 -0
- zenml/zen_server/dashboard/assets/page-z2FXP4GY.js +1 -0
- zenml/zen_server/dashboard/assets/{persist-CFPbMcJX.js → persist-BKKcL1Kp.js} +1 -1
- zenml/zen_server/dashboard/assets/{persist-BsdEtCkd.js → persist-DxiyfAax.js} +1 -1
- zenml/zen_server/dashboard/assets/{pipeline-CSUlkd50.js → pipeline-BJ8liDnl.js} +1 -1
- zenml/zen_server/dashboard/assets/{plus-Cl0_rCVF.js → plus-cI8zD2xh.js} +1 -1
- zenml/zen_server/dashboard/assets/primary-role-CPGHymjN.js +1 -0
- zenml/zen_server/dashboard/assets/{react-error-boundary.esm-7_MuhCay.js → react-error-boundary.esm-DoXxY4pT.js} +1 -1
- zenml/zen_server/dashboard/assets/{refresh-BcTM09NW.js → refresh-3EF2R7ja.js} +1 -1
- zenml/zen_server/dashboard/assets/{resource-tyes-list-79FqS3LY.js → resource-tyes-list-B5rkZcbc.js} +1 -1
- zenml/zen_server/dashboard/assets/resource-type-tooltip-E97WGqfk.js +1 -0
- zenml/zen_server/dashboard/assets/service-B9aVzfAF.js +2 -0
- zenml/zen_server/dashboard/assets/service-connectors-DL2-k_E2.js +1 -0
- zenml/zen_server/dashboard/assets/{sharedSchema-C_HkejsG.js → sharedSchema-DyUO09BR.js} +1 -1
- zenml/zen_server/dashboard/assets/slash-circle-D2Lb2FyR.js +1 -0
- zenml/zen_server/dashboard/assets/stack-detail-query-Bc4QKlWg.js +1 -0
- zenml/zen_server/dashboard/assets/{terminal-XFL_4QN-.js → terminal-BObrvDlO.js} +1 -1
- zenml/zen_server/dashboard/assets/{terminal-square-XFL_4QN-.js → terminal-square-BObrvDlO.js} +1 -1
- zenml/zen_server/dashboard/assets/{transform-CeZdrxDZ.js → transform-DFpKTKgF.js} +1 -1
- zenml/zen_server/dashboard/assets/{trash-DP6Tpp_E.js → trash-HKxXWbSG.js} +1 -1
- zenml/zen_server/dashboard/assets/{update-current-user-mutation-Ca-Lmwuj.js → update-current-user-mutation-DSyUyHVj.js} +1 -1
- zenml/zen_server/dashboard/assets/update-server-settings-mutation-CdM-Sdds.js +1 -0
- zenml/zen_server/dashboard/assets/{zod-XdS2h1ws.js → zod-DgEcN9jD.js} +1 -1
- zenml/zen_server/dashboard/index.html +7 -7
- zenml/zen_server/deploy/daemon/daemon_zen_server.py +4 -0
- zenml/zen_server/deploy/docker/docker_zen_server.py +2 -0
- zenml/zen_server/routers/runs_endpoints.py +20 -28
- zenml/zen_stores/migrations/versions/0.84.0_release.py +23 -0
- zenml/zen_stores/sql_zen_store.py +9 -3
- {zenml_nightly-0.83.1.dev20250710.dist-info → zenml_nightly-0.84.0.dev20250712.dist-info}/METADATA +9 -22
- {zenml_nightly-0.83.1.dev20250710.dist-info → zenml_nightly-0.84.0.dev20250712.dist-info}/RECORD +202 -195
- zenml/zen_server/dashboard/assets/AlertDialogDropdownItem-DsOmO1FH.js +0 -1
- zenml/zen_server/dashboard/assets/ButtonGroup-4sPZDv70.js +0 -1
- zenml/zen_server/dashboard/assets/CollapsibleCard-CBKenz9f.js +0 -1
- zenml/zen_server/dashboard/assets/ComponentIcon-CMiVW-O6.js +0 -1
- zenml/zen_server/dashboard/assets/DialogItem-CRCDpYU6.js +0 -1
- zenml/zen_server/dashboard/assets/Error-BG6f_WRd.js +0 -1
- zenml/zen_server/dashboard/assets/ExecutionStatus-BuhNAE9w.js +0 -1
- zenml/zen_server/dashboard/assets/NestedCollapsible-CMuDIJlp.js +0 -1
- zenml/zen_server/dashboard/assets/Partials-CfHD6OH5.js +0 -1
- zenml/zen_server/dashboard/assets/ProviderIcon-DHejyg7C.js +0 -1
- zenml/zen_server/dashboard/assets/ProviderRadio-tGtie8Gc.js +0 -1
- zenml/zen_server/dashboard/assets/RunsBody-mYwMcWWj.js +0 -1
- zenml/zen_server/dashboard/assets/SecretTooltip-B5u1UsQ9.js +0 -1
- zenml/zen_server/dashboard/assets/StackComponentList-Bi8BKqCu.js +0 -1
- zenml/zen_server/dashboard/assets/StackList-Cvxapo0p.js +0 -1
- zenml/zen_server/dashboard/assets/StackName-CFSZL8ec.js +0 -1
- zenml/zen_server/dashboard/assets/Tabs-D4dv48ry.js +0 -1
- zenml/zen_server/dashboard/assets/Tick-Qquvr4P3.js +0 -1
- zenml/zen_server/dashboard/assets/UsageReason-DhiUV1bu.js +0 -1
- zenml/zen_server/dashboard/assets/WizardFooter-FQm8y-jP.js +0 -1
- zenml/zen_server/dashboard/assets/configuration-form-DSoMMiPE.js +0 -1
- zenml/zen_server/dashboard/assets/constants-DTfsIqHy.js +0 -1
- zenml/zen_server/dashboard/assets/flyte-Cj-xy_8I.svg +0 -10
- zenml/zen_server/dashboard/assets/form-BgtamtJm.js +0 -1
- zenml/zen_server/dashboard/assets/form-schemas-dyDkAxXP.js +0 -1
- zenml/zen_server/dashboard/assets/gcp-B1I3Qvcx.js +0 -1
- zenml/zen_server/dashboard/assets/index-BfNISy0X.css +0 -1
- zenml/zen_server/dashboard/assets/layout-o1x87a3q.js +0 -1
- zenml/zen_server/dashboard/assets/metaflow-weOkWNyT.svg +0 -10
- zenml/zen_server/dashboard/assets/page-4xUZpMN0.js +0 -31
- zenml/zen_server/dashboard/assets/page-B0104V6C.js +0 -1
- zenml/zen_server/dashboard/assets/page-BNJsjvof.js +0 -1
- zenml/zen_server/dashboard/assets/page-BQgSZ2nH.js +0 -1
- zenml/zen_server/dashboard/assets/page-BXl2ZX6J.js +0 -1
- zenml/zen_server/dashboard/assets/page-C2A-2Cj_.js +0 -1
- zenml/zen_server/dashboard/assets/page-CESEqC2L.js +0 -1
- zenml/zen_server/dashboard/assets/page-CF8cTZ7l.js +0 -1
- zenml/zen_server/dashboard/assets/page-CKjsimVu.js +0 -1
- zenml/zen_server/dashboard/assets/page-COLzBwff.js +0 -1
- zenml/zen_server/dashboard/assets/page-COifg5fa.js +0 -1
- zenml/zen_server/dashboard/assets/page-CQeJuA8T.js +0 -1
- zenml/zen_server/dashboard/assets/page-CcjWEjre.js +0 -1
- zenml/zen_server/dashboard/assets/page-CefGLeWy.js +0 -1
- zenml/zen_server/dashboard/assets/page-CfxpV3j4.js +0 -1
- zenml/zen_server/dashboard/assets/page-ClcUzawe.js +0 -1
- zenml/zen_server/dashboard/assets/page-Ct_LB3zo.js +0 -1
- zenml/zen_server/dashboard/assets/page-D-ZWUMYY.js +0 -1
- zenml/zen_server/dashboard/assets/page-DHrvih9u.js +0 -1
- zenml/zen_server/dashboard/assets/page-DMhaHZDw.js +0 -1
- zenml/zen_server/dashboard/assets/page-DcQmxKLp.js +0 -1
- zenml/zen_server/dashboard/assets/page-Dh4GRWw5.js +0 -1
- zenml/zen_server/dashboard/assets/page-Dn7ZNapg.js +0 -1
- zenml/zen_server/dashboard/assets/page-Dy4vSQY7.js +0 -1
- zenml/zen_server/dashboard/assets/page-QrvWQwZb.js +0 -1
- zenml/zen_server/dashboard/assets/page-RF3Fup0q.js +0 -40
- zenml/zen_server/dashboard/assets/page-WuvCrN47.js +0 -1
- zenml/zen_server/dashboard/assets/page-_WnHBI1F.js +0 -1
- zenml/zen_server/dashboard/assets/page-ghjVNgVE.js +0 -1
- zenml/zen_server/dashboard/assets/page-iDsDiDXw.js +0 -1
- zenml/zen_server/dashboard/assets/resource-type-tooltip-BL9ZTRKi.js +0 -1
- zenml/zen_server/dashboard/assets/service-connectors-Q8h7-_rG.js +0 -1
- zenml/zen_server/dashboard/assets/service-k-9Vsb30.js +0 -2
- zenml/zen_server/dashboard/assets/stack-detail-query-CNmVZ0Bo.js +0 -1
- zenml/zen_server/dashboard/assets/type-guards-CNgPYg8l.js +0 -1
- zenml/zen_server/dashboard/assets/update-server-settings-mutation-Bwe3gUt4.js +0 -1
- {zenml_nightly-0.83.1.dev20250710.dist-info → zenml_nightly-0.84.0.dev20250712.dist-info}/LICENSE +0 -0
- {zenml_nightly-0.83.1.dev20250710.dist-info → zenml_nightly-0.84.0.dev20250712.dist-info}/WHEEL +0 -0
- {zenml_nightly-0.83.1.dev20250710.dist-info → zenml_nightly-0.84.0.dev20250712.dist-info}/entry_points.txt +0 -0
@@ -15,7 +15,7 @@
|
|
15
15
|
|
16
16
|
from typing import TYPE_CHECKING, Any, Dict, Optional, Type
|
17
17
|
|
18
|
-
from pydantic import NonNegativeInt, PositiveInt, field_validator
|
18
|
+
from pydantic import Field, NonNegativeInt, PositiveInt, field_validator
|
19
19
|
|
20
20
|
from zenml.config.base_settings import BaseSettings
|
21
21
|
from zenml.constants import KUBERNETES_CLUSTER_RESOURCE_TYPE
|
@@ -33,92 +33,122 @@ if TYPE_CHECKING:
|
|
33
33
|
class KubernetesOrchestratorSettings(BaseSettings):
|
34
34
|
"""Settings for the Kubernetes orchestrator.
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
orchestrator waits until all steps finish running. If `False`,
|
39
|
-
the client returns immediately and the pipeline is executed
|
40
|
-
asynchronously. Defaults to `True`.
|
41
|
-
timeout: How many seconds to wait for synchronous runs. `0` means
|
42
|
-
to wait for an unlimited duration.
|
43
|
-
stream_step_logs: If `True`, the orchestrator pod will stream the logs
|
44
|
-
of the step pods. This only has an effect if specified on the
|
45
|
-
pipeline, not on individual steps.
|
46
|
-
service_account_name: Name of the service account to use for the
|
47
|
-
orchestrator pod. If not provided, a new service account with "edit"
|
48
|
-
permissions will be created.
|
49
|
-
step_pod_service_account_name: Name of the service account to use for the
|
50
|
-
step pods. If not provided, the default service account will be used.
|
51
|
-
privileged: If the container should be run in privileged mode.
|
52
|
-
pod_settings: Pod settings to apply to pods executing the steps.
|
53
|
-
orchestrator_pod_settings: Pod settings to apply to the pod which is
|
54
|
-
launching the actual steps.
|
55
|
-
pod_name_prefix: Prefix to use for the pod name.
|
56
|
-
pod_startup_timeout: The maximum time to wait for a pending step pod to
|
57
|
-
start (in seconds).
|
58
|
-
pod_failure_max_retries: The maximum number of times to retry a step
|
59
|
-
pod if the step Kubernetes pod fails to start
|
60
|
-
pod_failure_retry_delay: The delay in seconds between pod
|
61
|
-
failure retries and pod startup retries (in seconds)
|
62
|
-
pod_failure_backoff: The backoff factor for pod failure retries and
|
63
|
-
pod startup retries.
|
64
|
-
max_parallelism: Maximum number of steps to run in parallel.
|
65
|
-
successful_jobs_history_limit: The number of successful jobs
|
66
|
-
to retain. This only applies to jobs created when scheduling a
|
67
|
-
pipeline.
|
68
|
-
failed_jobs_history_limit: The number of failed jobs to retain.
|
69
|
-
This only applies to jobs created when scheduling a pipeline.
|
70
|
-
ttl_seconds_after_finished: The amount of seconds to keep finished jobs
|
71
|
-
before deleting them. **Note**: This does not clean up the
|
72
|
-
orchestrator pod for non-scheduled runs.
|
73
|
-
active_deadline_seconds: The active deadline seconds for the job that is
|
74
|
-
executing the step.
|
75
|
-
backoff_limit_margin: The value to add to the backoff limit in addition
|
76
|
-
to the step retries. The retry configuration defined on the step
|
77
|
-
defines the maximum number of retries that the server will accept
|
78
|
-
for a step. For this orchestrator, this controls how often the
|
79
|
-
job running the step will try to start the step pod. There are some
|
80
|
-
circumstances however where the job will start the pod, but the pod
|
81
|
-
doesn't actually get to the point of running the step. That means
|
82
|
-
the server will not receive the maximum amount of retry requests,
|
83
|
-
which in turn causes other inconsistencies like wrong step statuses.
|
84
|
-
To mitigate this, this attribute allows to add a margin to the
|
85
|
-
backoff limit. This means that the job will retry the pod startup
|
86
|
-
for the configured amount of times plus the margin, which increases
|
87
|
-
the chance of the server receiving the maximum amount of retry
|
88
|
-
requests.
|
89
|
-
pod_failure_policy: The pod failure policy to use for the job that is
|
90
|
-
executing the step.
|
91
|
-
prevent_orchestrator_pod_caching: If `True`, the orchestrator pod will
|
92
|
-
not try to compute cached steps before starting the step pods.
|
93
|
-
always_build_pipeline_image: If `True`, the orchestrator will always
|
94
|
-
build the pipeline image, even if all steps have a custom build.
|
95
|
-
pod_stop_grace_period: When stopping a pipeline run, the amount of
|
96
|
-
seconds to wait for a step pod to shutdown gracefully.
|
36
|
+
Configuration options for how pipelines are executed on Kubernetes clusters.
|
37
|
+
Field descriptions are defined inline using Field() descriptors.
|
97
38
|
"""
|
98
39
|
|
99
|
-
synchronous: bool =
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
40
|
+
synchronous: bool = Field(
|
41
|
+
default=True,
|
42
|
+
description="Whether to wait for all pipeline steps to complete. "
|
43
|
+
"When `False`, the client returns immediately and execution continues asynchronously.",
|
44
|
+
)
|
45
|
+
timeout: int = Field(
|
46
|
+
default=0,
|
47
|
+
description="Maximum seconds to wait for synchronous runs. Set to `0` for unlimited duration.",
|
48
|
+
)
|
49
|
+
stream_step_logs: bool = Field(
|
50
|
+
default=True,
|
51
|
+
description="If `True`, the orchestrator pod will stream the logs "
|
52
|
+
"of the step pods. This only has an effect if specified on the "
|
53
|
+
"pipeline, not on individual steps.",
|
54
|
+
)
|
55
|
+
service_account_name: Optional[str] = Field(
|
56
|
+
default=None,
|
57
|
+
description="Kubernetes service account for the orchestrator pod. "
|
58
|
+
"If not specified, creates a new account with 'edit' permissions.",
|
59
|
+
)
|
60
|
+
step_pod_service_account_name: Optional[str] = Field(
|
61
|
+
default=None,
|
62
|
+
description="Kubernetes service account for step execution pods. "
|
63
|
+
"Uses the default service account if not specified.",
|
64
|
+
)
|
65
|
+
privileged: bool = Field(
|
66
|
+
default=False,
|
67
|
+
description="Whether to run containers in privileged mode with extended permissions.",
|
68
|
+
)
|
69
|
+
pod_settings: Optional[KubernetesPodSettings] = Field(
|
70
|
+
default=None,
|
71
|
+
description="Pod configuration for step execution containers.",
|
72
|
+
)
|
73
|
+
orchestrator_pod_settings: Optional[KubernetesPodSettings] = Field(
|
74
|
+
default=None,
|
75
|
+
description="Pod configuration for the orchestrator container that launches step pods.",
|
76
|
+
)
|
77
|
+
pod_name_prefix: Optional[str] = Field(
|
78
|
+
default=None,
|
79
|
+
description="Custom prefix for generated pod names. Helps identify pods in the cluster.",
|
80
|
+
)
|
81
|
+
pod_startup_timeout: int = Field(
|
82
|
+
default=600,
|
83
|
+
description="Maximum seconds to wait for step pods to start. Default is 10 minutes.",
|
84
|
+
)
|
85
|
+
pod_failure_max_retries: int = Field(
|
86
|
+
default=3,
|
87
|
+
description="Maximum retry attempts when step pods fail to start.",
|
88
|
+
)
|
89
|
+
pod_failure_retry_delay: int = Field(
|
90
|
+
default=10,
|
91
|
+
description="Delay in seconds between pod failure retry attempts.",
|
92
|
+
)
|
93
|
+
pod_failure_backoff: float = Field(
|
94
|
+
default=1.0,
|
95
|
+
description="Exponential backoff factor for retry delays. Values > 1.0 increase delay with each retry.",
|
96
|
+
)
|
97
|
+
max_parallelism: Optional[PositiveInt] = Field(
|
98
|
+
default=None,
|
99
|
+
description="Maximum number of step pods to run concurrently. No limit if not specified.",
|
100
|
+
)
|
101
|
+
successful_jobs_history_limit: Optional[NonNegativeInt] = Field(
|
102
|
+
default=None,
|
103
|
+
description="Number of successful scheduled jobs to retain in cluster history.",
|
104
|
+
)
|
105
|
+
failed_jobs_history_limit: Optional[NonNegativeInt] = Field(
|
106
|
+
default=None,
|
107
|
+
description="Number of failed scheduled jobs to retain in cluster history.",
|
108
|
+
)
|
109
|
+
ttl_seconds_after_finished: Optional[NonNegativeInt] = Field(
|
110
|
+
default=None,
|
111
|
+
description="Seconds to keep finished scheduled jobs before automatic cleanup.",
|
112
|
+
)
|
113
|
+
active_deadline_seconds: Optional[NonNegativeInt] = Field(
|
114
|
+
default=None,
|
115
|
+
description="Deadline in seconds for the active pod. If the pod is inactive for this many seconds, it will be terminated.",
|
116
|
+
)
|
117
|
+
backoff_limit_margin: NonNegativeInt = Field(
|
118
|
+
default=0,
|
119
|
+
description="The value to add to the backoff limit in addition "
|
120
|
+
"to the step retries. The retry configuration defined on the step "
|
121
|
+
"defines the maximum number of retries that the server will accept "
|
122
|
+
"for a step. For this orchestrator, this controls how often the "
|
123
|
+
"job running the step will try to start the step pod. There are some "
|
124
|
+
"circumstances however where the job will start the pod, but the pod "
|
125
|
+
"doesn't actually get to the point of running the step. That means "
|
126
|
+
"the server will not receive the maximum amount of retry requests, "
|
127
|
+
"which in turn causes other inconsistencies like wrong step statuses. "
|
128
|
+
"To mitigate this, this attribute allows to add a margin to the "
|
129
|
+
"backoff limit. This means that the job will retry the pod startup "
|
130
|
+
"for the configured amount of times plus the margin, which increases "
|
131
|
+
"the chance of the server receiving the maximum amount of retry "
|
132
|
+
"requests.",
|
133
|
+
)
|
134
|
+
pod_failure_policy: Optional[Dict[str, Any]] = Field(
|
135
|
+
default=None,
|
136
|
+
description="The pod failure policy to use for the job that is "
|
137
|
+
"executing the step.",
|
138
|
+
)
|
139
|
+
prevent_orchestrator_pod_caching: bool = Field(
|
140
|
+
default=False,
|
141
|
+
description="Whether to disable caching optimization in the orchestrator pod.",
|
142
|
+
)
|
143
|
+
always_build_pipeline_image: bool = Field(
|
144
|
+
default=False,
|
145
|
+
description="If `True`, the orchestrator will always build the pipeline image, "
|
146
|
+
"even if all steps have a custom build.",
|
147
|
+
)
|
148
|
+
pod_stop_grace_period: PositiveInt = Field(
|
149
|
+
default=30,
|
150
|
+
description="When stopping a pipeline run, the amount of seconds to wait for a step pod to shutdown gracefully.",
|
151
|
+
)
|
122
152
|
|
123
153
|
@field_validator("pod_failure_policy", mode="before")
|
124
154
|
@classmethod
|
@@ -144,42 +174,50 @@ class KubernetesOrchestratorSettings(BaseSettings):
|
|
144
174
|
class KubernetesOrchestratorConfig(
|
145
175
|
BaseOrchestratorConfig, KubernetesOrchestratorSettings
|
146
176
|
):
|
147
|
-
"""Configuration for the Kubernetes orchestrator.
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
177
|
+
"""Configuration for the Kubernetes orchestrator."""
|
178
|
+
|
179
|
+
incluster: bool = Field(
|
180
|
+
False,
|
181
|
+
description="If `True`, the orchestrator will run the pipeline inside the "
|
182
|
+
"same cluster in which it itself is running. This requires the client "
|
183
|
+
"to run in a Kubernetes pod itself. If set, the `kubernetes_context` "
|
184
|
+
"config option is ignored. If the stack component is linked to a "
|
185
|
+
"Kubernetes service connector, this field is ignored.",
|
186
|
+
)
|
187
|
+
kubernetes_context: Optional[str] = Field(
|
188
|
+
None,
|
189
|
+
description="Name of a Kubernetes context to run pipelines in. "
|
190
|
+
"If the stack component is linked to a Kubernetes service connector, "
|
191
|
+
"this field is ignored. Otherwise, it is mandatory.",
|
192
|
+
)
|
193
|
+
kubernetes_namespace: str = Field(
|
194
|
+
"zenml",
|
195
|
+
description="Name of the Kubernetes namespace to be used. "
|
196
|
+
"If not provided, `zenml` namespace will be used.",
|
197
|
+
)
|
198
|
+
local: bool = Field(
|
199
|
+
False,
|
200
|
+
description="If `True`, the orchestrator will assume it is connected to a "
|
201
|
+
"local kubernetes cluster and will perform additional validations and "
|
202
|
+
"operations to allow using the orchestrator in combination with other "
|
203
|
+
"local stack components that store data in the local filesystem "
|
204
|
+
"(i.e. it will mount the local stores directory into the pipeline containers).",
|
205
|
+
)
|
206
|
+
skip_local_validations: bool = Field(
|
207
|
+
False, description="If `True`, the local validations will be skipped."
|
208
|
+
)
|
209
|
+
parallel_step_startup_waiting_period: Optional[float] = Field(
|
210
|
+
None,
|
211
|
+
description="How long to wait in between starting parallel steps. "
|
212
|
+
"This can be used to distribute server load when running pipelines "
|
213
|
+
"with a huge amount of parallel steps.",
|
214
|
+
)
|
215
|
+
pass_zenml_token_as_secret: bool = Field(
|
216
|
+
False,
|
217
|
+
description="If `True`, the ZenML token will be passed as a Kubernetes secret "
|
218
|
+
"to the pods. For this to work, the Kubernetes client must have permissions "
|
219
|
+
"to create secrets in the namespace.",
|
220
|
+
)
|
183
221
|
|
184
222
|
@property
|
185
223
|
def is_remote(self) -> bool:
|
@@ -15,6 +15,8 @@
|
|
15
15
|
|
16
16
|
from typing import TYPE_CHECKING, Optional, Type
|
17
17
|
|
18
|
+
from pydantic import Field
|
19
|
+
|
18
20
|
from zenml.config.base_settings import BaseSettings
|
19
21
|
from zenml.constants import KUBERNETES_CLUSTER_RESOURCE_TYPE
|
20
22
|
from zenml.integrations.kubernetes import KUBERNETES_STEP_OPERATOR_FLAVOR
|
@@ -31,27 +33,38 @@ if TYPE_CHECKING:
|
|
31
33
|
class KubernetesStepOperatorSettings(BaseSettings):
|
32
34
|
"""Settings for the Kubernetes step operator.
|
33
35
|
|
34
|
-
|
35
|
-
|
36
|
-
service_account_name: Name of the service account to use for the pod.
|
37
|
-
privileged: If the container should be run in privileged mode.
|
38
|
-
pod_startup_timeout: The maximum time to wait for a pending step pod to
|
39
|
-
start (in seconds).
|
40
|
-
pod_failure_max_retries: The maximum number of times to retry a step
|
41
|
-
pod if the step Kubernetes pod fails to start
|
42
|
-
pod_failure_retry_delay: The delay in seconds between pod
|
43
|
-
failure retries and pod startup retries (in seconds)
|
44
|
-
pod_failure_backoff: The backoff factor for pod failure retries and
|
45
|
-
pod startup retries.
|
36
|
+
Configuration options for individual step execution on Kubernetes.
|
37
|
+
Field descriptions are defined inline using Field() descriptors.
|
46
38
|
"""
|
47
39
|
|
48
|
-
pod_settings: Optional[KubernetesPodSettings] =
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
40
|
+
pod_settings: Optional[KubernetesPodSettings] = Field(
|
41
|
+
default=None,
|
42
|
+
description="Pod configuration for step execution containers.",
|
43
|
+
)
|
44
|
+
service_account_name: Optional[str] = Field(
|
45
|
+
default=None,
|
46
|
+
description="Kubernetes service account for step pods. Uses default account if not specified.",
|
47
|
+
)
|
48
|
+
privileged: bool = Field(
|
49
|
+
default=False,
|
50
|
+
description="Whether to run step containers in privileged mode with extended permissions.",
|
51
|
+
)
|
52
|
+
pod_startup_timeout: int = Field(
|
53
|
+
default=600,
|
54
|
+
description="Maximum seconds to wait for step pods to start. Default is 10 minutes.",
|
55
|
+
)
|
56
|
+
pod_failure_max_retries: int = Field(
|
57
|
+
default=3,
|
58
|
+
description="Maximum retry attempts when step pods fail to start.",
|
59
|
+
)
|
60
|
+
pod_failure_retry_delay: int = Field(
|
61
|
+
default=10,
|
62
|
+
description="Delay in seconds between pod failure retry attempts.",
|
63
|
+
)
|
64
|
+
pod_failure_backoff: float = Field(
|
65
|
+
default=1.0,
|
66
|
+
description="Exponential backoff factor for retry delays. Values > 1.0 increase delay with each retry.",
|
67
|
+
)
|
55
68
|
|
56
69
|
|
57
70
|
class KubernetesStepOperatorConfig(
|
@@ -59,22 +72,24 @@ class KubernetesStepOperatorConfig(
|
|
59
72
|
):
|
60
73
|
"""Configuration for the Kubernetes step operator.
|
61
74
|
|
62
|
-
|
63
|
-
|
64
|
-
incluster: If `True`, the step operator will run the pipeline inside the
|
65
|
-
same cluster in which the orchestrator is running. For this to work,
|
66
|
-
the pod running the orchestrator needs permissions to create new
|
67
|
-
pods. If set, the `kubernetes_context` config option is ignored. If
|
68
|
-
the stack component is linked to a Kubernetes service connector,
|
69
|
-
this field is ignored.
|
70
|
-
kubernetes_context: Name of a Kubernetes context to run pipelines in.
|
71
|
-
If the stack component is linked to a Kubernetes service connector,
|
72
|
-
this field is ignored. Otherwise, it is mandatory.
|
75
|
+
Defines cluster connection and execution settings.
|
76
|
+
Field descriptions are defined inline using Field() descriptors.
|
73
77
|
"""
|
74
78
|
|
75
|
-
kubernetes_namespace: str =
|
76
|
-
|
77
|
-
|
79
|
+
kubernetes_namespace: str = Field(
|
80
|
+
default="zenml",
|
81
|
+
description="Kubernetes namespace for step execution. Must be a valid namespace name.",
|
82
|
+
)
|
83
|
+
incluster: bool = Field(
|
84
|
+
default=False,
|
85
|
+
description="Whether to execute within the same cluster as the orchestrator. "
|
86
|
+
"Requires appropriate pod creation permissions.",
|
87
|
+
)
|
88
|
+
kubernetes_context: Optional[str] = Field(
|
89
|
+
default=None,
|
90
|
+
description="Kubernetes context name for cluster connection. "
|
91
|
+
"Ignored when using service connectors or in-cluster execution.",
|
92
|
+
)
|
78
93
|
|
79
94
|
@property
|
80
95
|
def is_remote(self) -> bool:
|
@@ -49,7 +49,7 @@ from zenml.config.base_settings import BaseSettings
|
|
49
49
|
from zenml.constants import (
|
50
50
|
METADATA_ORCHESTRATOR_RUN_ID,
|
51
51
|
)
|
52
|
-
from zenml.enums import StackComponentType
|
52
|
+
from zenml.enums import ExecutionStatus, StackComponentType
|
53
53
|
from zenml.integrations.kubernetes.flavors.kubernetes_orchestrator_flavor import (
|
54
54
|
KubernetesOrchestratorConfig,
|
55
55
|
KubernetesOrchestratorSettings,
|
@@ -785,6 +785,187 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
|
|
785
785
|
f"No running step jobs found for pipeline run with ID: {run.id}"
|
786
786
|
)
|
787
787
|
|
788
|
+
def fetch_status(
|
789
|
+
self, run: "PipelineRunResponse", include_steps: bool = False
|
790
|
+
) -> Tuple[
|
791
|
+
Optional[ExecutionStatus], Optional[Dict[str, ExecutionStatus]]
|
792
|
+
]:
|
793
|
+
"""Refreshes the status of a specific pipeline run.
|
794
|
+
|
795
|
+
Args:
|
796
|
+
run: The run that was executed by this orchestrator.
|
797
|
+
include_steps: If True, also fetch the status of individual steps.
|
798
|
+
|
799
|
+
Returns:
|
800
|
+
A tuple of (pipeline_status, step_statuses).
|
801
|
+
If include_steps is False, step_statuses will be None.
|
802
|
+
If include_steps is True, step_statuses will be a dict (possibly empty).
|
803
|
+
|
804
|
+
Raises:
|
805
|
+
ValueError: If the orchestrator run ID cannot be found or if the
|
806
|
+
stack components are not accessible.
|
807
|
+
"""
|
808
|
+
# Get the orchestrator run ID which corresponds to the orchestrator pod name
|
809
|
+
orchestrator_run_id = run.orchestrator_run_id
|
810
|
+
if not orchestrator_run_id:
|
811
|
+
raise ValueError(
|
812
|
+
"Cannot determine orchestrator run ID for the run. "
|
813
|
+
"Unable to fetch the status."
|
814
|
+
)
|
815
|
+
|
816
|
+
# Check the orchestrator pod status (only if run is not finished)
|
817
|
+
if not run.status.is_finished:
|
818
|
+
orchestrator_pod_phase = self._check_pod_status(
|
819
|
+
pod_name=orchestrator_run_id,
|
820
|
+
)
|
821
|
+
pipeline_status = self._map_pod_phase_to_execution_status(
|
822
|
+
orchestrator_pod_phase
|
823
|
+
)
|
824
|
+
else:
|
825
|
+
# Run is already finished, don't change status
|
826
|
+
pipeline_status = None
|
827
|
+
|
828
|
+
step_statuses = None
|
829
|
+
if include_steps:
|
830
|
+
step_statuses = self._fetch_step_statuses(run)
|
831
|
+
|
832
|
+
return pipeline_status, step_statuses
|
833
|
+
|
834
|
+
def _check_pod_status(
|
835
|
+
self,
|
836
|
+
pod_name: str,
|
837
|
+
) -> kube_utils.PodPhase:
|
838
|
+
"""Check pod status and handle deletion scenarios for both orchestrator and step pods.
|
839
|
+
|
840
|
+
This method should only be called for non-finished pipeline runs/steps.
|
841
|
+
|
842
|
+
Args:
|
843
|
+
pod_name: The name of the pod to check.
|
844
|
+
|
845
|
+
Returns:
|
846
|
+
The pod phase if the pod exists, or PodPhase.FAILED if pod was deleted.
|
847
|
+
"""
|
848
|
+
pod = kube_utils.get_pod(
|
849
|
+
core_api=self._k8s_core_api,
|
850
|
+
pod_name=pod_name,
|
851
|
+
namespace=self.config.kubernetes_namespace,
|
852
|
+
)
|
853
|
+
|
854
|
+
if pod and pod.status and pod.status.phase:
|
855
|
+
try:
|
856
|
+
return kube_utils.PodPhase(pod.status.phase)
|
857
|
+
except ValueError:
|
858
|
+
# Handle unknown pod phases
|
859
|
+
logger.warning(
|
860
|
+
f"Unknown pod phase for pod {pod_name}: {pod.status.phase}"
|
861
|
+
)
|
862
|
+
return kube_utils.PodPhase.UNKNOWN
|
863
|
+
else:
|
864
|
+
logger.warning(
|
865
|
+
f"Can't fetch the status of pod {pod_name} "
|
866
|
+
f"in namespace {self.config.kubernetes_namespace}."
|
867
|
+
)
|
868
|
+
return kube_utils.PodPhase.UNKNOWN
|
869
|
+
|
870
|
+
def _map_pod_phase_to_execution_status(
|
871
|
+
self, pod_phase: kube_utils.PodPhase
|
872
|
+
) -> Optional[ExecutionStatus]:
|
873
|
+
"""Map Kubernetes pod phase to ZenML execution status.
|
874
|
+
|
875
|
+
Args:
|
876
|
+
pod_phase: The Kubernetes pod phase.
|
877
|
+
|
878
|
+
Returns:
|
879
|
+
The corresponding ZenML execution status.
|
880
|
+
"""
|
881
|
+
if pod_phase == kube_utils.PodPhase.PENDING:
|
882
|
+
return ExecutionStatus.INITIALIZING
|
883
|
+
elif pod_phase == kube_utils.PodPhase.RUNNING:
|
884
|
+
return ExecutionStatus.RUNNING
|
885
|
+
elif pod_phase == kube_utils.PodPhase.SUCCEEDED:
|
886
|
+
return ExecutionStatus.COMPLETED
|
887
|
+
elif pod_phase == kube_utils.PodPhase.FAILED:
|
888
|
+
return ExecutionStatus.FAILED
|
889
|
+
else: # UNKNOWN - no update
|
890
|
+
return None
|
891
|
+
|
892
|
+
def _map_job_status_to_execution_status(
|
893
|
+
self, job: k8s_client.V1Job
|
894
|
+
) -> Optional[ExecutionStatus]:
|
895
|
+
"""Map Kubernetes job status to ZenML execution status.
|
896
|
+
|
897
|
+
Args:
|
898
|
+
job: The Kubernetes job.
|
899
|
+
|
900
|
+
Returns:
|
901
|
+
The corresponding ZenML execution status, or None if no clear status.
|
902
|
+
"""
|
903
|
+
# Check job conditions first
|
904
|
+
if job.status and job.status.conditions:
|
905
|
+
for condition in job.status.conditions:
|
906
|
+
if condition.type == "Complete" and condition.status == "True":
|
907
|
+
return ExecutionStatus.COMPLETED
|
908
|
+
elif condition.type == "Failed" and condition.status == "True":
|
909
|
+
return ExecutionStatus.FAILED
|
910
|
+
|
911
|
+
# Return None if no clear status - don't update
|
912
|
+
return None
|
913
|
+
|
914
|
+
def _fetch_step_statuses(
|
915
|
+
self, run: "PipelineRunResponse"
|
916
|
+
) -> Dict[str, ExecutionStatus]:
|
917
|
+
"""Fetch the statuses of individual pipeline steps.
|
918
|
+
|
919
|
+
Args:
|
920
|
+
run: The pipeline run response.
|
921
|
+
|
922
|
+
Returns:
|
923
|
+
A dictionary mapping step names to their execution statuses.
|
924
|
+
"""
|
925
|
+
step_statuses = {}
|
926
|
+
|
927
|
+
# Query all jobs for this run and match them to steps
|
928
|
+
label_selector = f"run_id={kube_utils.sanitize_label(str(run.id))}"
|
929
|
+
|
930
|
+
try:
|
931
|
+
jobs = self._k8s_batch_api.list_namespaced_job(
|
932
|
+
namespace=self.config.kubernetes_namespace,
|
933
|
+
label_selector=label_selector,
|
934
|
+
)
|
935
|
+
except Exception as e:
|
936
|
+
logger.warning(f"Failed to list jobs for run {run.id}: {e}")
|
937
|
+
return {}
|
938
|
+
|
939
|
+
# Fetch the steps from the run response
|
940
|
+
steps_dict = run.steps
|
941
|
+
|
942
|
+
for job in jobs.items:
|
943
|
+
# Extract step name from job labels
|
944
|
+
if not job.metadata or not job.metadata.labels:
|
945
|
+
continue
|
946
|
+
|
947
|
+
step_name = job.metadata.labels.get("step_name")
|
948
|
+
if not step_name:
|
949
|
+
continue
|
950
|
+
|
951
|
+
# Check if this step is already finished
|
952
|
+
step_response = steps_dict.get(step_name, None)
|
953
|
+
|
954
|
+
# If the step is not in the run response yet, skip, we can't update
|
955
|
+
if step_response is None:
|
956
|
+
continue
|
957
|
+
|
958
|
+
# If the step is already in a finished state, skip
|
959
|
+
if step_response and step_response.status.is_finished:
|
960
|
+
continue
|
961
|
+
|
962
|
+
# Check job status and map to execution status
|
963
|
+
execution_status = self._map_job_status_to_execution_status(job)
|
964
|
+
if execution_status is not None:
|
965
|
+
step_statuses[step_name] = execution_status
|
966
|
+
|
967
|
+
return step_statuses
|
968
|
+
|
788
969
|
def get_pipeline_run_metadata(
|
789
970
|
self, run_id: UUID
|
790
971
|
) -> Dict[str, "MetadataType"]:
|
@@ -187,7 +187,7 @@ def main() -> None:
|
|
187
187
|
|
188
188
|
return True
|
189
189
|
|
190
|
-
|
190
|
+
base_labels = {
|
191
191
|
"run_id": kube_utils.sanitize_label(str(pipeline_run.id)),
|
192
192
|
"run_name": kube_utils.sanitize_label(str(pipeline_run.name)),
|
193
193
|
"pipeline": kube_utils.sanitize_label(
|
@@ -234,6 +234,10 @@ def main() -> None:
|
|
234
234
|
pod_name, namespace=namespace
|
235
235
|
)
|
236
236
|
|
237
|
+
# Add step name to labels so both pod and job have consistent labeling
|
238
|
+
step_labels = base_labels.copy()
|
239
|
+
step_labels["step_name"] = kube_utils.sanitize_label(step_name)
|
240
|
+
|
237
241
|
image = KubernetesOrchestrator.get_image(
|
238
242
|
deployment=deployment, step_name=step_name
|
239
243
|
)
|
@@ -281,7 +285,7 @@ def main() -> None:
|
|
281
285
|
or settings.service_account_name,
|
282
286
|
mount_local_stores=mount_local_stores,
|
283
287
|
termination_grace_period_seconds=settings.pod_stop_grace_period,
|
284
|
-
labels=
|
288
|
+
labels=step_labels,
|
285
289
|
)
|
286
290
|
|
287
291
|
retry_config = step_config.retry
|
@@ -347,7 +351,7 @@ def main() -> None:
|
|
347
351
|
active_deadline_seconds=settings.active_deadline_seconds,
|
348
352
|
pod_failure_policy=pod_failure_policy,
|
349
353
|
owner_references=owner_references,
|
350
|
-
labels=
|
354
|
+
labels=step_labels,
|
351
355
|
)
|
352
356
|
|
353
357
|
kube_utils.create_job(
|