zenml-nightly 0.83.1.dev20250710__py3-none-any.whl → 0.84.0.dev20250712__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. zenml/VERSION +1 -1
  2. zenml/artifact_stores/base_artifact_store.py +51 -23
  3. zenml/artifacts/utils.py +3 -1
  4. zenml/cli/pipeline.py +13 -2
  5. zenml/constants.py +4 -0
  6. zenml/container_registries/base_container_registry.py +17 -5
  7. zenml/enums.py +9 -0
  8. zenml/integrations/aws/flavors/sagemaker_orchestrator_flavor.py +150 -117
  9. zenml/integrations/aws/flavors/sagemaker_step_operator_flavor.py +43 -42
  10. zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py +16 -7
  11. zenml/integrations/azure/orchestrators/azureml_orchestrator.py +18 -12
  12. zenml/integrations/bentoml/flavors/bentoml_model_deployer_flavor.py +7 -1
  13. zenml/integrations/databricks/flavors/databricks_orchestrator_flavor.py +58 -23
  14. zenml/integrations/feast/flavors/feast_feature_store_flavor.py +18 -5
  15. zenml/integrations/gcp/flavors/vertex_experiment_tracker_flavor.py +10 -42
  16. zenml/integrations/gcp/flavors/vertex_orchestrator_flavor.py +99 -92
  17. zenml/integrations/gcp/google_credentials_mixin.py +13 -8
  18. zenml/integrations/gcp/orchestrators/vertex_orchestrator.py +18 -9
  19. zenml/integrations/huggingface/__init__.py +1 -1
  20. zenml/integrations/hyperai/flavors/hyperai_orchestrator_flavor.py +28 -30
  21. zenml/integrations/kaniko/flavors/kaniko_image_builder_flavor.py +56 -40
  22. zenml/integrations/kubeflow/flavors/kubeflow_orchestrator_flavor.py +59 -48
  23. zenml/integrations/kubernetes/flavors/kubernetes_orchestrator_flavor.py +159 -121
  24. zenml/integrations/kubernetes/flavors/kubernetes_step_operator_flavor.py +48 -33
  25. zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator.py +182 -1
  26. zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator_entrypoint.py +7 -3
  27. zenml/integrations/lightning/flavors/lightning_orchestrator_flavor.py +41 -25
  28. zenml/integrations/mlflow/flavors/mlflow_experiment_tracker_flavor.py +51 -44
  29. zenml/integrations/mlflow/flavors/mlflow_model_deployer_flavor.py +9 -4
  30. zenml/integrations/neptune/flavors/neptune_experiment_tracker_flavor.py +13 -12
  31. zenml/integrations/s3/flavors/s3_artifact_store_flavor.py +32 -7
  32. zenml/integrations/vllm/flavors/vllm_model_deployer_flavor.py +7 -1
  33. zenml/integrations/wandb/flavors/wandb_experiment_tracker_flavor.py +34 -25
  34. zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py +14 -11
  35. zenml/logging/step_logging.py +8 -7
  36. zenml/models/v2/core/logs.py +2 -1
  37. zenml/models/v2/core/pipeline_run.py +0 -59
  38. zenml/orchestrators/base_orchestrator.py +7 -1
  39. zenml/pipelines/build_utils.py +2 -1
  40. zenml/stack/authentication_mixin.py +6 -5
  41. zenml/stack/flavor.py +5 -1
  42. zenml/utils/code_utils.py +2 -1
  43. zenml/utils/docker_utils.py +22 -0
  44. zenml/utils/io_utils.py +18 -0
  45. zenml/utils/pipeline_docker_image_builder.py +4 -1
  46. zenml/utils/run_utils.py +101 -8
  47. zenml/zen_server/dashboard/assets/{404-B5eko6XL.js → 404-B5cfnwZ1.js} +1 -1
  48. zenml/zen_server/dashboard/assets/{@radix-Cdvw4jJ8.js → @radix-C_LirfyT.js} +1 -1
  49. zenml/zen_server/dashboard/assets/{@react-router-DeDfXbUF.js → @react-router-BSsrkPOd.js} +1 -1
  50. zenml/zen_server/dashboard/assets/{@reactflow-B_iCtR7X.js → @reactflow-D9hglKLF.js} +2 -2
  51. zenml/zen_server/dashboard/assets/{@tanstack-5gTMR7G2.js → @tanstack-C0SeHZng.js} +1 -1
  52. zenml/zen_server/dashboard/assets/AlertDialogDropdownItem-druRNuO2.js +1 -0
  53. zenml/zen_server/dashboard/assets/ButtonGroup-SF2DlzHV.js +1 -0
  54. zenml/zen_server/dashboard/assets/{CodeSnippet-Ctplhzdc.js → CodeSnippet-D8iBqOVv.js} +1 -1
  55. zenml/zen_server/dashboard/assets/CollapsibleCard-D0-pQi1n.js +1 -0
  56. zenml/zen_server/dashboard/assets/{ComponentBadge-Cnecw3qz.js → ComponentBadge-mw2Ja_ON.js} +1 -1
  57. zenml/zen_server/dashboard/assets/ComponentIcon-BXgpt-jw.js +1 -0
  58. zenml/zen_server/dashboard/assets/{DeleteAlertDialog-DEI0YDzP.js → DeleteAlertDialog-BbyFVnVI.js} +1 -1
  59. zenml/zen_server/dashboard/assets/DialogItem-DeME0oSt.js +1 -0
  60. zenml/zen_server/dashboard/assets/{DisplayDate-8RESqe5H.js → DisplayDate-v3KW7oez.js} +1 -1
  61. zenml/zen_server/dashboard/assets/{EmptyState-CjrgDtVk.js → EmptyState-DG0m-CGv.js} +1 -1
  62. zenml/zen_server/dashboard/assets/Error-DcVLcrLs.js +1 -0
  63. zenml/zen_server/dashboard/assets/ExecutionStatus-C4tlFnlh.js +1 -0
  64. zenml/zen_server/dashboard/assets/{Helpbox-DtUG2Bf_.js → Helpbox-C-RGHz3S.js} +1 -1
  65. zenml/zen_server/dashboard/assets/{Infobox-CSBRrM6r.js → Infobox-DFCWPbMb.js} +1 -1
  66. zenml/zen_server/dashboard/assets/{LeftSideMenu-DPsCCK3z.js → LeftSideMenu-Czev0KCA.js} +1 -1
  67. zenml/zen_server/dashboard/assets/{Lock-CrIAdQo6.js → Lock-CRP5J_su.js} +1 -1
  68. zenml/zen_server/dashboard/assets/NestedCollapsible-CN9scBUn.js +1 -0
  69. zenml/zen_server/dashboard/assets/{NumberBox-DtCv7jh3.js → NumberBox-CoQjQYDJ.js} +1 -1
  70. zenml/zen_server/dashboard/assets/{Pagination-CWnEpSpN.js → Pagination-CcDD5yHh.js} +1 -1
  71. zenml/zen_server/dashboard/assets/Partials-DlMzfKgs.js +1 -0
  72. zenml/zen_server/dashboard/assets/{PasswordChecker-B88WjuCe.js → PasswordChecker-BZwoeQIm.js} +1 -1
  73. zenml/zen_server/dashboard/assets/{ProCta-CNyp04C8.js → ProCta-CU2ycJDo.js} +1 -1
  74. zenml/zen_server/dashboard/assets/ProviderIcon-BMAn9Jld.js +1 -0
  75. zenml/zen_server/dashboard/assets/ProviderRadio-D_q9tE3G.js +1 -0
  76. zenml/zen_server/dashboard/assets/RunsBody-BToytB8e.js +1 -0
  77. zenml/zen_server/dashboard/assets/{SearchField-BtUi6cYl.js → SearchField-D_0-uAPj.js} +1 -1
  78. zenml/zen_server/dashboard/assets/SecretTooltip-BcWMKb9f.js +1 -0
  79. zenml/zen_server/dashboard/assets/{SetPassword-BmbgL_ed.js → SetPassword-CaKVSqAL.js} +1 -1
  80. zenml/zen_server/dashboard/assets/{SheetHeader-DkH7aG9K.js → SheetHeader-7vwlsY_i.js} +1 -1
  81. zenml/zen_server/dashboard/assets/StackComponentList-s7eSfm8o.js +1 -0
  82. zenml/zen_server/dashboard/assets/StackList-Dt0FrIkM.js +1 -0
  83. zenml/zen_server/dashboard/assets/Tabs-B27AHUfo.js +1 -0
  84. zenml/zen_server/dashboard/assets/Tick-DDeDgTuT.js +1 -0
  85. zenml/zen_server/dashboard/assets/{UpdatePasswordSchemas-D_DCETSO.js → UpdatePasswordSchemas-Da5RndbV.js} +1 -1
  86. zenml/zen_server/dashboard/assets/{Wizard-BHvY75u_.js → Wizard-8aJzxUjb.js} +1 -1
  87. zenml/zen_server/dashboard/assets/WizardFooter-Bt7_UE14.js +1 -0
  88. zenml/zen_server/dashboard/assets/{all-pipeline-runs-query-DpKw9WL9.js → all-pipeline-runs-query-gorNNEaT.js} +1 -1
  89. zenml/zen_server/dashboard/assets/{arrow-left-MRXv5pAH.js → arrow-left-hcj2H8HY.js} +1 -1
  90. zenml/zen_server/dashboard/assets/bar-chart-square-check-9siI9icm.js +1 -0
  91. zenml/zen_server/dashboard/assets/{bulk-delete-CzYA--cC.js → bulk-delete-B5RTlnD_.js} +1 -1
  92. zenml/zen_server/dashboard/assets/{check-B9QMTa3f.js → check-D1bHMJkL.js} +1 -1
  93. zenml/zen_server/dashboard/assets/{check-circle-C4tYvbtw.js → check-circle-mnEgPhPF.js} +1 -1
  94. zenml/zen_server/dashboard/assets/{chevron-down-jbbQh82s.js → chevron-down-Z3nUe-0U.js} +1 -1
  95. zenml/zen_server/dashboard/assets/{chevron-right-double-Dgp_gEsp.js → chevron-right-double-CbRQKN4Q.js} +1 -1
  96. zenml/zen_server/dashboard/assets/{clock-B_mTG8PH.js → clock-BMjHXT3f.js} +1 -1
  97. zenml/zen_server/dashboard/assets/{code-browser-CiD8qkBx.js → code-browser-DftoiCIg.js} +1 -1
  98. zenml/zen_server/dashboard/assets/configuration-form-Yz8m0QIG.js +1 -0
  99. zenml/zen_server/dashboard/assets/constants-DeV48DuZ.js +1 -0
  100. zenml/zen_server/dashboard/assets/{create-stack-BpZrmKDu.js → create-stack-BruqH_6X.js} +1 -1
  101. zenml/zen_server/dashboard/assets/credit-card-CH1BHrXY.js +1 -0
  102. zenml/zen_server/dashboard/assets/dataflow-2-qHjWt7zp.js +1 -0
  103. zenml/zen_server/dashboard/assets/{delete-run-BkyDsKQc.js → delete-run-ibBtciMR.js} +1 -1
  104. zenml/zen_server/dashboard/assets/{expand-full-BPiXpch2.js → expand-full-CD4fFvM-.js} +1 -1
  105. zenml/zen_server/dashboard/assets/{eye-CbVlAYty.js → eye-CLNgIh_K.js} +1 -1
  106. zenml/zen_server/dashboard/assets/{file-text-Cd8wVfq5.js → file-text-CltVhgwZ.js} +1 -1
  107. zenml/zen_server/dashboard/assets/form-6aSt3tIl.js +1 -0
  108. zenml/zen_server/dashboard/assets/form-schemas-B9XgTS1V.js +1 -0
  109. zenml/zen_server/dashboard/assets/gradient_bg-BH8t8fi6.webp +0 -0
  110. zenml/zen_server/dashboard/assets/{help-Co6aedki.js → help-B0CvBhCm.js} +1 -1
  111. zenml/zen_server/dashboard/assets/icon-hDriJUXY.js +1 -0
  112. zenml/zen_server/dashboard/assets/{index-eoDB_1XX.js → index-B7CRNU8l.js} +1 -1
  113. zenml/zen_server/dashboard/assets/index-BQWlHo1Y.js +1 -0
  114. zenml/zen_server/dashboard/assets/{index-DWpiv-Ft.js → index-BRhKF2z-.js} +1 -1
  115. zenml/zen_server/dashboard/assets/{index-BgEfQ3_G.js → index-BacoJBEQ.js} +11 -11
  116. zenml/zen_server/dashboard/assets/{index-BBt0LDtR.js → index-dCcVgFNl.js} +1 -1
  117. zenml/zen_server/dashboard/assets/index-eggipFZS.css +1 -0
  118. zenml/zen_server/dashboard/assets/index-mA8kL088.js +14 -0
  119. zenml/zen_server/dashboard/assets/{index.es-C1gfATPn.js → index.es-DcVFDpJU.js} +1 -1
  120. zenml/zen_server/dashboard/assets/{index.esm-DhJo3mA6.js → index.esm-COnaHLSh.js} +1 -1
  121. zenml/zen_server/dashboard/assets/{info-QkbQz4QU.js → info-CyMih3vQ.js} +1 -1
  122. zenml/zen_server/dashboard/assets/{key-icon-C07HKw8z.js → key-icon-HOx2gazv.js} +1 -1
  123. zenml/zen_server/dashboard/assets/{layout-DBbfEFBe.js → layout-C5dgIReC.js} +1 -1
  124. zenml/zen_server/dashboard/assets/layout-CFLL6-CM.js +1 -0
  125. zenml/zen_server/dashboard/assets/{login-mutation-C1hvP_cX.js → login-mutation-CidpsqyH.js} +1 -1
  126. zenml/zen_server/dashboard/assets/{logs-CQKlJjo0.js → logs-DoLoTEfj.js} +1 -1
  127. zenml/zen_server/dashboard/assets/mail-C160gvB0.js +1 -0
  128. zenml/zen_server/dashboard/assets/message-chat-square-DLz6XmPS.js +1 -0
  129. zenml/zen_server/dashboard/assets/{package-miExReQl.js → package-BhYXGPxF.js} +1 -1
  130. zenml/zen_server/dashboard/assets/page-6huxSHEu.js +1 -0
  131. zenml/zen_server/dashboard/assets/page-7CJ4Wq3O.js +1 -0
  132. zenml/zen_server/dashboard/assets/page-8U20Tu_8.js +1 -0
  133. zenml/zen_server/dashboard/assets/{page-4zc4xPv2.js → page-BByayrO-.js} +2 -2
  134. zenml/zen_server/dashboard/assets/page-BCRXJXC9.js +1 -0
  135. zenml/zen_server/dashboard/assets/page-BK59rZvf.js +1 -0
  136. zenml/zen_server/dashboard/assets/page-BMpXak4U.js +1 -0
  137. zenml/zen_server/dashboard/assets/page-BTDi81N3.js +1 -0
  138. zenml/zen_server/dashboard/assets/{page-D-tJ_Y0a.js → page-BX67x4iL.js} +1 -1
  139. zenml/zen_server/dashboard/assets/page-Bjmcdg64.js +1 -0
  140. zenml/zen_server/dashboard/assets/page-BsAn8p4m.js +1 -0
  141. zenml/zen_server/dashboard/assets/{page-C2i-C7jv.js → page-BwjPRuaY.js} +1 -1
  142. zenml/zen_server/dashboard/assets/page-CDtSVkNc.js +1 -0
  143. zenml/zen_server/dashboard/assets/page-CEDU0L2T.js +1 -0
  144. zenml/zen_server/dashboard/assets/page-COJK90rG.js +1 -0
  145. zenml/zen_server/dashboard/assets/page-CY0LPcAJ.js +1 -0
  146. zenml/zen_server/dashboard/assets/page-C_XMn4GU.js +1 -0
  147. zenml/zen_server/dashboard/assets/page-Cb3KGsPK.js +22 -0
  148. zenml/zen_server/dashboard/assets/page-Cc8owYXQ.js +1 -0
  149. zenml/zen_server/dashboard/assets/{page-C3JfJxuR.js → page-CeGBDh1Q.js} +1 -1
  150. zenml/zen_server/dashboard/assets/page-CiGOVsj3.js +1 -0
  151. zenml/zen_server/dashboard/assets/page-CmLSFMkW.js +1 -0
  152. zenml/zen_server/dashboard/assets/page-CnfCptXq.js +1 -0
  153. zenml/zen_server/dashboard/assets/page-CvllZMBF.js +1 -0
  154. zenml/zen_server/dashboard/assets/page-CxzglV3-.js +1 -0
  155. zenml/zen_server/dashboard/assets/{page-rVhXI5ZO.js → page-D6cvOG8w.js} +1 -1
  156. zenml/zen_server/dashboard/assets/{page-BxeZrG_t.js → page-DDWW21kl.js} +1 -1
  157. zenml/zen_server/dashboard/assets/{page-DiHZK-1w.js → page-DF4FVxxW.js} +2 -2
  158. zenml/zen_server/dashboard/assets/page-DSZfclXt.js +1 -0
  159. zenml/zen_server/dashboard/assets/page-DVLez4R1.js +1 -0
  160. zenml/zen_server/dashboard/assets/page-DcXrWWWh.js +1 -0
  161. zenml/zen_server/dashboard/assets/page-Dg7-H_9i.js +1 -0
  162. zenml/zen_server/dashboard/assets/{page-BPQ66vR-.js → page-DgldL5UB.js} +2 -2
  163. zenml/zen_server/dashboard/assets/page-Dw7XuiSo.js +18 -0
  164. zenml/zen_server/dashboard/assets/{page-DOCOmmKn.js → page-FQxi1Otg.js} +1 -1
  165. zenml/zen_server/dashboard/assets/page-XrmOHHg7.js +1 -0
  166. zenml/zen_server/dashboard/assets/page-YdWnx9MX.js +1 -0
  167. zenml/zen_server/dashboard/assets/page-oRm7D4TC.js +1 -0
  168. zenml/zen_server/dashboard/assets/{page-uxjMX8Iq.js → page-q41JNDWO.js} +1 -1
  169. zenml/zen_server/dashboard/assets/page-x2GXC8sI.js +1 -0
  170. zenml/zen_server/dashboard/assets/page-z2FXP4GY.js +1 -0
  171. zenml/zen_server/dashboard/assets/{persist-CFPbMcJX.js → persist-BKKcL1Kp.js} +1 -1
  172. zenml/zen_server/dashboard/assets/{persist-BsdEtCkd.js → persist-DxiyfAax.js} +1 -1
  173. zenml/zen_server/dashboard/assets/{pipeline-CSUlkd50.js → pipeline-BJ8liDnl.js} +1 -1
  174. zenml/zen_server/dashboard/assets/{plus-Cl0_rCVF.js → plus-cI8zD2xh.js} +1 -1
  175. zenml/zen_server/dashboard/assets/primary-role-CPGHymjN.js +1 -0
  176. zenml/zen_server/dashboard/assets/{react-error-boundary.esm-7_MuhCay.js → react-error-boundary.esm-DoXxY4pT.js} +1 -1
  177. zenml/zen_server/dashboard/assets/{refresh-BcTM09NW.js → refresh-3EF2R7ja.js} +1 -1
  178. zenml/zen_server/dashboard/assets/{resource-tyes-list-79FqS3LY.js → resource-tyes-list-B5rkZcbc.js} +1 -1
  179. zenml/zen_server/dashboard/assets/resource-type-tooltip-E97WGqfk.js +1 -0
  180. zenml/zen_server/dashboard/assets/service-B9aVzfAF.js +2 -0
  181. zenml/zen_server/dashboard/assets/service-connectors-DL2-k_E2.js +1 -0
  182. zenml/zen_server/dashboard/assets/{sharedSchema-C_HkejsG.js → sharedSchema-DyUO09BR.js} +1 -1
  183. zenml/zen_server/dashboard/assets/slash-circle-D2Lb2FyR.js +1 -0
  184. zenml/zen_server/dashboard/assets/stack-detail-query-Bc4QKlWg.js +1 -0
  185. zenml/zen_server/dashboard/assets/{terminal-XFL_4QN-.js → terminal-BObrvDlO.js} +1 -1
  186. zenml/zen_server/dashboard/assets/{terminal-square-XFL_4QN-.js → terminal-square-BObrvDlO.js} +1 -1
  187. zenml/zen_server/dashboard/assets/{transform-CeZdrxDZ.js → transform-DFpKTKgF.js} +1 -1
  188. zenml/zen_server/dashboard/assets/{trash-DP6Tpp_E.js → trash-HKxXWbSG.js} +1 -1
  189. zenml/zen_server/dashboard/assets/{update-current-user-mutation-Ca-Lmwuj.js → update-current-user-mutation-DSyUyHVj.js} +1 -1
  190. zenml/zen_server/dashboard/assets/update-server-settings-mutation-CdM-Sdds.js +1 -0
  191. zenml/zen_server/dashboard/assets/{zod-XdS2h1ws.js → zod-DgEcN9jD.js} +1 -1
  192. zenml/zen_server/dashboard/index.html +7 -7
  193. zenml/zen_server/deploy/daemon/daemon_zen_server.py +4 -0
  194. zenml/zen_server/deploy/docker/docker_zen_server.py +2 -0
  195. zenml/zen_server/routers/runs_endpoints.py +20 -28
  196. zenml/zen_stores/migrations/versions/0.84.0_release.py +23 -0
  197. zenml/zen_stores/sql_zen_store.py +9 -3
  198. {zenml_nightly-0.83.1.dev20250710.dist-info → zenml_nightly-0.84.0.dev20250712.dist-info}/METADATA +9 -22
  199. {zenml_nightly-0.83.1.dev20250710.dist-info → zenml_nightly-0.84.0.dev20250712.dist-info}/RECORD +202 -195
  200. zenml/zen_server/dashboard/assets/AlertDialogDropdownItem-DsOmO1FH.js +0 -1
  201. zenml/zen_server/dashboard/assets/ButtonGroup-4sPZDv70.js +0 -1
  202. zenml/zen_server/dashboard/assets/CollapsibleCard-CBKenz9f.js +0 -1
  203. zenml/zen_server/dashboard/assets/ComponentIcon-CMiVW-O6.js +0 -1
  204. zenml/zen_server/dashboard/assets/DialogItem-CRCDpYU6.js +0 -1
  205. zenml/zen_server/dashboard/assets/Error-BG6f_WRd.js +0 -1
  206. zenml/zen_server/dashboard/assets/ExecutionStatus-BuhNAE9w.js +0 -1
  207. zenml/zen_server/dashboard/assets/NestedCollapsible-CMuDIJlp.js +0 -1
  208. zenml/zen_server/dashboard/assets/Partials-CfHD6OH5.js +0 -1
  209. zenml/zen_server/dashboard/assets/ProviderIcon-DHejyg7C.js +0 -1
  210. zenml/zen_server/dashboard/assets/ProviderRadio-tGtie8Gc.js +0 -1
  211. zenml/zen_server/dashboard/assets/RunsBody-mYwMcWWj.js +0 -1
  212. zenml/zen_server/dashboard/assets/SecretTooltip-B5u1UsQ9.js +0 -1
  213. zenml/zen_server/dashboard/assets/StackComponentList-Bi8BKqCu.js +0 -1
  214. zenml/zen_server/dashboard/assets/StackList-Cvxapo0p.js +0 -1
  215. zenml/zen_server/dashboard/assets/StackName-CFSZL8ec.js +0 -1
  216. zenml/zen_server/dashboard/assets/Tabs-D4dv48ry.js +0 -1
  217. zenml/zen_server/dashboard/assets/Tick-Qquvr4P3.js +0 -1
  218. zenml/zen_server/dashboard/assets/UsageReason-DhiUV1bu.js +0 -1
  219. zenml/zen_server/dashboard/assets/WizardFooter-FQm8y-jP.js +0 -1
  220. zenml/zen_server/dashboard/assets/configuration-form-DSoMMiPE.js +0 -1
  221. zenml/zen_server/dashboard/assets/constants-DTfsIqHy.js +0 -1
  222. zenml/zen_server/dashboard/assets/flyte-Cj-xy_8I.svg +0 -10
  223. zenml/zen_server/dashboard/assets/form-BgtamtJm.js +0 -1
  224. zenml/zen_server/dashboard/assets/form-schemas-dyDkAxXP.js +0 -1
  225. zenml/zen_server/dashboard/assets/gcp-B1I3Qvcx.js +0 -1
  226. zenml/zen_server/dashboard/assets/index-BfNISy0X.css +0 -1
  227. zenml/zen_server/dashboard/assets/layout-o1x87a3q.js +0 -1
  228. zenml/zen_server/dashboard/assets/metaflow-weOkWNyT.svg +0 -10
  229. zenml/zen_server/dashboard/assets/page-4xUZpMN0.js +0 -31
  230. zenml/zen_server/dashboard/assets/page-B0104V6C.js +0 -1
  231. zenml/zen_server/dashboard/assets/page-BNJsjvof.js +0 -1
  232. zenml/zen_server/dashboard/assets/page-BQgSZ2nH.js +0 -1
  233. zenml/zen_server/dashboard/assets/page-BXl2ZX6J.js +0 -1
  234. zenml/zen_server/dashboard/assets/page-C2A-2Cj_.js +0 -1
  235. zenml/zen_server/dashboard/assets/page-CESEqC2L.js +0 -1
  236. zenml/zen_server/dashboard/assets/page-CF8cTZ7l.js +0 -1
  237. zenml/zen_server/dashboard/assets/page-CKjsimVu.js +0 -1
  238. zenml/zen_server/dashboard/assets/page-COLzBwff.js +0 -1
  239. zenml/zen_server/dashboard/assets/page-COifg5fa.js +0 -1
  240. zenml/zen_server/dashboard/assets/page-CQeJuA8T.js +0 -1
  241. zenml/zen_server/dashboard/assets/page-CcjWEjre.js +0 -1
  242. zenml/zen_server/dashboard/assets/page-CefGLeWy.js +0 -1
  243. zenml/zen_server/dashboard/assets/page-CfxpV3j4.js +0 -1
  244. zenml/zen_server/dashboard/assets/page-ClcUzawe.js +0 -1
  245. zenml/zen_server/dashboard/assets/page-Ct_LB3zo.js +0 -1
  246. zenml/zen_server/dashboard/assets/page-D-ZWUMYY.js +0 -1
  247. zenml/zen_server/dashboard/assets/page-DHrvih9u.js +0 -1
  248. zenml/zen_server/dashboard/assets/page-DMhaHZDw.js +0 -1
  249. zenml/zen_server/dashboard/assets/page-DcQmxKLp.js +0 -1
  250. zenml/zen_server/dashboard/assets/page-Dh4GRWw5.js +0 -1
  251. zenml/zen_server/dashboard/assets/page-Dn7ZNapg.js +0 -1
  252. zenml/zen_server/dashboard/assets/page-Dy4vSQY7.js +0 -1
  253. zenml/zen_server/dashboard/assets/page-QrvWQwZb.js +0 -1
  254. zenml/zen_server/dashboard/assets/page-RF3Fup0q.js +0 -40
  255. zenml/zen_server/dashboard/assets/page-WuvCrN47.js +0 -1
  256. zenml/zen_server/dashboard/assets/page-_WnHBI1F.js +0 -1
  257. zenml/zen_server/dashboard/assets/page-ghjVNgVE.js +0 -1
  258. zenml/zen_server/dashboard/assets/page-iDsDiDXw.js +0 -1
  259. zenml/zen_server/dashboard/assets/resource-type-tooltip-BL9ZTRKi.js +0 -1
  260. zenml/zen_server/dashboard/assets/service-connectors-Q8h7-_rG.js +0 -1
  261. zenml/zen_server/dashboard/assets/service-k-9Vsb30.js +0 -2
  262. zenml/zen_server/dashboard/assets/stack-detail-query-CNmVZ0Bo.js +0 -1
  263. zenml/zen_server/dashboard/assets/type-guards-CNgPYg8l.js +0 -1
  264. zenml/zen_server/dashboard/assets/update-server-settings-mutation-Bwe3gUt4.js +0 -1
  265. {zenml_nightly-0.83.1.dev20250710.dist-info → zenml_nightly-0.84.0.dev20250712.dist-info}/LICENSE +0 -0
  266. {zenml_nightly-0.83.1.dev20250710.dist-info → zenml_nightly-0.84.0.dev20250712.dist-info}/WHEEL +0 -0
  267. {zenml_nightly-0.83.1.dev20250710.dist-info → zenml_nightly-0.84.0.dev20250712.dist-info}/entry_points.txt +0 -0
@@ -15,7 +15,7 @@
15
15
 
16
16
  from typing import TYPE_CHECKING, Any, Dict, Optional, Type
17
17
 
18
- from pydantic import NonNegativeInt, PositiveInt, field_validator
18
+ from pydantic import Field, NonNegativeInt, PositiveInt, field_validator
19
19
 
20
20
  from zenml.config.base_settings import BaseSettings
21
21
  from zenml.constants import KUBERNETES_CLUSTER_RESOURCE_TYPE
@@ -33,92 +33,122 @@ if TYPE_CHECKING:
33
33
  class KubernetesOrchestratorSettings(BaseSettings):
34
34
  """Settings for the Kubernetes orchestrator.
35
35
 
36
- Attributes:
37
- synchronous: If `True`, the client running a pipeline using this
38
- orchestrator waits until all steps finish running. If `False`,
39
- the client returns immediately and the pipeline is executed
40
- asynchronously. Defaults to `True`.
41
- timeout: How many seconds to wait for synchronous runs. `0` means
42
- to wait for an unlimited duration.
43
- stream_step_logs: If `True`, the orchestrator pod will stream the logs
44
- of the step pods. This only has an effect if specified on the
45
- pipeline, not on individual steps.
46
- service_account_name: Name of the service account to use for the
47
- orchestrator pod. If not provided, a new service account with "edit"
48
- permissions will be created.
49
- step_pod_service_account_name: Name of the service account to use for the
50
- step pods. If not provided, the default service account will be used.
51
- privileged: If the container should be run in privileged mode.
52
- pod_settings: Pod settings to apply to pods executing the steps.
53
- orchestrator_pod_settings: Pod settings to apply to the pod which is
54
- launching the actual steps.
55
- pod_name_prefix: Prefix to use for the pod name.
56
- pod_startup_timeout: The maximum time to wait for a pending step pod to
57
- start (in seconds).
58
- pod_failure_max_retries: The maximum number of times to retry a step
59
- pod if the step Kubernetes pod fails to start
60
- pod_failure_retry_delay: The delay in seconds between pod
61
- failure retries and pod startup retries (in seconds)
62
- pod_failure_backoff: The backoff factor for pod failure retries and
63
- pod startup retries.
64
- max_parallelism: Maximum number of steps to run in parallel.
65
- successful_jobs_history_limit: The number of successful jobs
66
- to retain. This only applies to jobs created when scheduling a
67
- pipeline.
68
- failed_jobs_history_limit: The number of failed jobs to retain.
69
- This only applies to jobs created when scheduling a pipeline.
70
- ttl_seconds_after_finished: The amount of seconds to keep finished jobs
71
- before deleting them. **Note**: This does not clean up the
72
- orchestrator pod for non-scheduled runs.
73
- active_deadline_seconds: The active deadline seconds for the job that is
74
- executing the step.
75
- backoff_limit_margin: The value to add to the backoff limit in addition
76
- to the step retries. The retry configuration defined on the step
77
- defines the maximum number of retries that the server will accept
78
- for a step. For this orchestrator, this controls how often the
79
- job running the step will try to start the step pod. There are some
80
- circumstances however where the job will start the pod, but the pod
81
- doesn't actually get to the point of running the step. That means
82
- the server will not receive the maximum amount of retry requests,
83
- which in turn causes other inconsistencies like wrong step statuses.
84
- To mitigate this, this attribute allows to add a margin to the
85
- backoff limit. This means that the job will retry the pod startup
86
- for the configured amount of times plus the margin, which increases
87
- the chance of the server receiving the maximum amount of retry
88
- requests.
89
- pod_failure_policy: The pod failure policy to use for the job that is
90
- executing the step.
91
- prevent_orchestrator_pod_caching: If `True`, the orchestrator pod will
92
- not try to compute cached steps before starting the step pods.
93
- always_build_pipeline_image: If `True`, the orchestrator will always
94
- build the pipeline image, even if all steps have a custom build.
95
- pod_stop_grace_period: When stopping a pipeline run, the amount of
96
- seconds to wait for a step pod to shutdown gracefully.
36
+ Configuration options for how pipelines are executed on Kubernetes clusters.
37
+ Field descriptions are defined inline using Field() descriptors.
97
38
  """
98
39
 
99
- synchronous: bool = True
100
- timeout: int = 0
101
- stream_step_logs: bool = True
102
- service_account_name: Optional[str] = None
103
- step_pod_service_account_name: Optional[str] = None
104
- privileged: bool = False
105
- pod_settings: Optional[KubernetesPodSettings] = None
106
- orchestrator_pod_settings: Optional[KubernetesPodSettings] = None
107
- pod_name_prefix: Optional[str] = None
108
- pod_startup_timeout: int = 60 * 10 # Default 10 minutes
109
- pod_failure_max_retries: int = 3
110
- pod_failure_retry_delay: int = 10
111
- pod_failure_backoff: float = 1.0
112
- max_parallelism: Optional[PositiveInt] = None
113
- successful_jobs_history_limit: Optional[NonNegativeInt] = None
114
- failed_jobs_history_limit: Optional[NonNegativeInt] = None
115
- ttl_seconds_after_finished: Optional[NonNegativeInt] = None
116
- active_deadline_seconds: Optional[NonNegativeInt] = None
117
- backoff_limit_margin: NonNegativeInt = 0
118
- pod_failure_policy: Optional[Dict[str, Any]] = None
119
- prevent_orchestrator_pod_caching: bool = False
120
- always_build_pipeline_image: bool = False
121
- pod_stop_grace_period: PositiveInt = 30
40
+ synchronous: bool = Field(
41
+ default=True,
42
+ description="Whether to wait for all pipeline steps to complete. "
43
+ "When `False`, the client returns immediately and execution continues asynchronously.",
44
+ )
45
+ timeout: int = Field(
46
+ default=0,
47
+ description="Maximum seconds to wait for synchronous runs. Set to `0` for unlimited duration.",
48
+ )
49
+ stream_step_logs: bool = Field(
50
+ default=True,
51
+ description="If `True`, the orchestrator pod will stream the logs "
52
+ "of the step pods. This only has an effect if specified on the "
53
+ "pipeline, not on individual steps.",
54
+ )
55
+ service_account_name: Optional[str] = Field(
56
+ default=None,
57
+ description="Kubernetes service account for the orchestrator pod. "
58
+ "If not specified, creates a new account with 'edit' permissions.",
59
+ )
60
+ step_pod_service_account_name: Optional[str] = Field(
61
+ default=None,
62
+ description="Kubernetes service account for step execution pods. "
63
+ "Uses the default service account if not specified.",
64
+ )
65
+ privileged: bool = Field(
66
+ default=False,
67
+ description="Whether to run containers in privileged mode with extended permissions.",
68
+ )
69
+ pod_settings: Optional[KubernetesPodSettings] = Field(
70
+ default=None,
71
+ description="Pod configuration for step execution containers.",
72
+ )
73
+ orchestrator_pod_settings: Optional[KubernetesPodSettings] = Field(
74
+ default=None,
75
+ description="Pod configuration for the orchestrator container that launches step pods.",
76
+ )
77
+ pod_name_prefix: Optional[str] = Field(
78
+ default=None,
79
+ description="Custom prefix for generated pod names. Helps identify pods in the cluster.",
80
+ )
81
+ pod_startup_timeout: int = Field(
82
+ default=600,
83
+ description="Maximum seconds to wait for step pods to start. Default is 10 minutes.",
84
+ )
85
+ pod_failure_max_retries: int = Field(
86
+ default=3,
87
+ description="Maximum retry attempts when step pods fail to start.",
88
+ )
89
+ pod_failure_retry_delay: int = Field(
90
+ default=10,
91
+ description="Delay in seconds between pod failure retry attempts.",
92
+ )
93
+ pod_failure_backoff: float = Field(
94
+ default=1.0,
95
+ description="Exponential backoff factor for retry delays. Values > 1.0 increase delay with each retry.",
96
+ )
97
+ max_parallelism: Optional[PositiveInt] = Field(
98
+ default=None,
99
+ description="Maximum number of step pods to run concurrently. No limit if not specified.",
100
+ )
101
+ successful_jobs_history_limit: Optional[NonNegativeInt] = Field(
102
+ default=None,
103
+ description="Number of successful scheduled jobs to retain in cluster history.",
104
+ )
105
+ failed_jobs_history_limit: Optional[NonNegativeInt] = Field(
106
+ default=None,
107
+ description="Number of failed scheduled jobs to retain in cluster history.",
108
+ )
109
+ ttl_seconds_after_finished: Optional[NonNegativeInt] = Field(
110
+ default=None,
111
+ description="Seconds to keep finished scheduled jobs before automatic cleanup.",
112
+ )
113
+ active_deadline_seconds: Optional[NonNegativeInt] = Field(
114
+ default=None,
115
+ description="Deadline in seconds for the active pod. If the pod is inactive for this many seconds, it will be terminated.",
116
+ )
117
+ backoff_limit_margin: NonNegativeInt = Field(
118
+ default=0,
119
+ description="The value to add to the backoff limit in addition "
120
+ "to the step retries. The retry configuration defined on the step "
121
+ "defines the maximum number of retries that the server will accept "
122
+ "for a step. For this orchestrator, this controls how often the "
123
+ "job running the step will try to start the step pod. There are some "
124
+ "circumstances however where the job will start the pod, but the pod "
125
+ "doesn't actually get to the point of running the step. That means "
126
+ "the server will not receive the maximum amount of retry requests, "
127
+ "which in turn causes other inconsistencies like wrong step statuses. "
128
+ "To mitigate this, this attribute allows to add a margin to the "
129
+ "backoff limit. This means that the job will retry the pod startup "
130
+ "for the configured amount of times plus the margin, which increases "
131
+ "the chance of the server receiving the maximum amount of retry "
132
+ "requests.",
133
+ )
134
+ pod_failure_policy: Optional[Dict[str, Any]] = Field(
135
+ default=None,
136
+ description="The pod failure policy to use for the job that is "
137
+ "executing the step.",
138
+ )
139
+ prevent_orchestrator_pod_caching: bool = Field(
140
+ default=False,
141
+ description="Whether to disable caching optimization in the orchestrator pod.",
142
+ )
143
+ always_build_pipeline_image: bool = Field(
144
+ default=False,
145
+ description="If `True`, the orchestrator will always build the pipeline image, "
146
+ "even if all steps have a custom build.",
147
+ )
148
+ pod_stop_grace_period: PositiveInt = Field(
149
+ default=30,
150
+ description="When stopping a pipeline run, the amount of seconds to wait for a step pod to shutdown gracefully.",
151
+ )
122
152
 
123
153
  @field_validator("pod_failure_policy", mode="before")
124
154
  @classmethod
@@ -144,42 +174,50 @@ class KubernetesOrchestratorSettings(BaseSettings):
144
174
  class KubernetesOrchestratorConfig(
145
175
  BaseOrchestratorConfig, KubernetesOrchestratorSettings
146
176
  ):
147
- """Configuration for the Kubernetes orchestrator.
148
-
149
- Attributes:
150
- incluster: If `True`, the orchestrator will run the pipeline inside the
151
- same cluster in which it itself is running. This requires the client
152
- to run in a Kubernetes pod itself. If set, the `kubernetes_context`
153
- config option is ignored. If the stack component is linked to a
154
- Kubernetes service connector, this field is ignored.
155
- kubernetes_context: Name of a Kubernetes context to run pipelines in.
156
- If the stack component is linked to a Kubernetes service connector,
157
- this field is ignored. Otherwise, it is mandatory.
158
- kubernetes_namespace: Name of the Kubernetes namespace to be used.
159
- If not provided, `zenml` namespace will be used.
160
- local: If `True`, the orchestrator will assume it is connected to a
161
- local kubernetes cluster and will perform additional validations and
162
- operations to allow using the orchestrator in combination with other
163
- local stack components that store data in the local filesystem
164
- (i.e. it will mount the local stores directory into the pipeline
165
- containers).
166
- skip_local_validations: If `True`, the local validations will be
167
- skipped.
168
- parallel_step_startup_waiting_period: How long to wait in between
169
- starting parallel steps. This can be used to distribute server
170
- load when running pipelines with a huge amount of parallel steps.
171
- pass_zenml_token_as_secret: If `True`, the ZenML token will be passed
172
- as a Kubernetes secret to the pods. For this to work, the Kubernetes
173
- client must have permissions to create secrets in the namespace.
174
- """
175
-
176
- incluster: bool = False
177
- kubernetes_context: Optional[str] = None
178
- kubernetes_namespace: str = "zenml"
179
- local: bool = False
180
- skip_local_validations: bool = False
181
- parallel_step_startup_waiting_period: Optional[float] = None
182
- pass_zenml_token_as_secret: bool = False
177
+ """Configuration for the Kubernetes orchestrator."""
178
+
179
+ incluster: bool = Field(
180
+ False,
181
+ description="If `True`, the orchestrator will run the pipeline inside the "
182
+ "same cluster in which it itself is running. This requires the client "
183
+ "to run in a Kubernetes pod itself. If set, the `kubernetes_context` "
184
+ "config option is ignored. If the stack component is linked to a "
185
+ "Kubernetes service connector, this field is ignored.",
186
+ )
187
+ kubernetes_context: Optional[str] = Field(
188
+ None,
189
+ description="Name of a Kubernetes context to run pipelines in. "
190
+ "If the stack component is linked to a Kubernetes service connector, "
191
+ "this field is ignored. Otherwise, it is mandatory.",
192
+ )
193
+ kubernetes_namespace: str = Field(
194
+ "zenml",
195
+ description="Name of the Kubernetes namespace to be used. "
196
+ "If not provided, `zenml` namespace will be used.",
197
+ )
198
+ local: bool = Field(
199
+ False,
200
+ description="If `True`, the orchestrator will assume it is connected to a "
201
+ "local kubernetes cluster and will perform additional validations and "
202
+ "operations to allow using the orchestrator in combination with other "
203
+ "local stack components that store data in the local filesystem "
204
+ "(i.e. it will mount the local stores directory into the pipeline containers).",
205
+ )
206
+ skip_local_validations: bool = Field(
207
+ False, description="If `True`, the local validations will be skipped."
208
+ )
209
+ parallel_step_startup_waiting_period: Optional[float] = Field(
210
+ None,
211
+ description="How long to wait in between starting parallel steps. "
212
+ "This can be used to distribute server load when running pipelines "
213
+ "with a huge amount of parallel steps.",
214
+ )
215
+ pass_zenml_token_as_secret: bool = Field(
216
+ False,
217
+ description="If `True`, the ZenML token will be passed as a Kubernetes secret "
218
+ "to the pods. For this to work, the Kubernetes client must have permissions "
219
+ "to create secrets in the namespace.",
220
+ )
183
221
 
184
222
  @property
185
223
  def is_remote(self) -> bool:
@@ -15,6 +15,8 @@
15
15
 
16
16
  from typing import TYPE_CHECKING, Optional, Type
17
17
 
18
+ from pydantic import Field
19
+
18
20
  from zenml.config.base_settings import BaseSettings
19
21
  from zenml.constants import KUBERNETES_CLUSTER_RESOURCE_TYPE
20
22
  from zenml.integrations.kubernetes import KUBERNETES_STEP_OPERATOR_FLAVOR
@@ -31,27 +33,38 @@ if TYPE_CHECKING:
31
33
  class KubernetesStepOperatorSettings(BaseSettings):
32
34
  """Settings for the Kubernetes step operator.
33
35
 
34
- Attributes:
35
- pod_settings: Pod settings to apply to pods executing the steps.
36
- service_account_name: Name of the service account to use for the pod.
37
- privileged: If the container should be run in privileged mode.
38
- pod_startup_timeout: The maximum time to wait for a pending step pod to
39
- start (in seconds).
40
- pod_failure_max_retries: The maximum number of times to retry a step
41
- pod if the step Kubernetes pod fails to start
42
- pod_failure_retry_delay: The delay in seconds between pod
43
- failure retries and pod startup retries (in seconds)
44
- pod_failure_backoff: The backoff factor for pod failure retries and
45
- pod startup retries.
36
+ Configuration options for individual step execution on Kubernetes.
37
+ Field descriptions are defined inline using Field() descriptors.
46
38
  """
47
39
 
48
- pod_settings: Optional[KubernetesPodSettings] = None
49
- service_account_name: Optional[str] = None
50
- privileged: bool = False
51
- pod_startup_timeout: int = 60 * 10 # Default 10 minutes
52
- pod_failure_max_retries: int = 3
53
- pod_failure_retry_delay: int = 10
54
- pod_failure_backoff: float = 1.0
40
+ pod_settings: Optional[KubernetesPodSettings] = Field(
41
+ default=None,
42
+ description="Pod configuration for step execution containers.",
43
+ )
44
+ service_account_name: Optional[str] = Field(
45
+ default=None,
46
+ description="Kubernetes service account for step pods. Uses default account if not specified.",
47
+ )
48
+ privileged: bool = Field(
49
+ default=False,
50
+ description="Whether to run step containers in privileged mode with extended permissions.",
51
+ )
52
+ pod_startup_timeout: int = Field(
53
+ default=600,
54
+ description="Maximum seconds to wait for step pods to start. Default is 10 minutes.",
55
+ )
56
+ pod_failure_max_retries: int = Field(
57
+ default=3,
58
+ description="Maximum retry attempts when step pods fail to start.",
59
+ )
60
+ pod_failure_retry_delay: int = Field(
61
+ default=10,
62
+ description="Delay in seconds between pod failure retry attempts.",
63
+ )
64
+ pod_failure_backoff: float = Field(
65
+ default=1.0,
66
+ description="Exponential backoff factor for retry delays. Values > 1.0 increase delay with each retry.",
67
+ )
55
68
 
56
69
 
57
70
  class KubernetesStepOperatorConfig(
@@ -59,22 +72,24 @@ class KubernetesStepOperatorConfig(
59
72
  ):
60
73
  """Configuration for the Kubernetes step operator.
61
74
 
62
- Attributes:
63
- kubernetes_namespace: Name of the Kubernetes namespace to be used.
64
- incluster: If `True`, the step operator will run the pipeline inside the
65
- same cluster in which the orchestrator is running. For this to work,
66
- the pod running the orchestrator needs permissions to create new
67
- pods. If set, the `kubernetes_context` config option is ignored. If
68
- the stack component is linked to a Kubernetes service connector,
69
- this field is ignored.
70
- kubernetes_context: Name of a Kubernetes context to run pipelines in.
71
- If the stack component is linked to a Kubernetes service connector,
72
- this field is ignored. Otherwise, it is mandatory.
75
+ Defines cluster connection and execution settings.
76
+ Field descriptions are defined inline using Field() descriptors.
73
77
  """
74
78
 
75
- kubernetes_namespace: str = "zenml"
76
- incluster: bool = False
77
- kubernetes_context: Optional[str] = None
79
+ kubernetes_namespace: str = Field(
80
+ default="zenml",
81
+ description="Kubernetes namespace for step execution. Must be a valid namespace name.",
82
+ )
83
+ incluster: bool = Field(
84
+ default=False,
85
+ description="Whether to execute within the same cluster as the orchestrator. "
86
+ "Requires appropriate pod creation permissions.",
87
+ )
88
+ kubernetes_context: Optional[str] = Field(
89
+ default=None,
90
+ description="Kubernetes context name for cluster connection. "
91
+ "Ignored when using service connectors or in-cluster execution.",
92
+ )
78
93
 
79
94
  @property
80
95
  def is_remote(self) -> bool:
@@ -49,7 +49,7 @@ from zenml.config.base_settings import BaseSettings
49
49
  from zenml.constants import (
50
50
  METADATA_ORCHESTRATOR_RUN_ID,
51
51
  )
52
- from zenml.enums import StackComponentType
52
+ from zenml.enums import ExecutionStatus, StackComponentType
53
53
  from zenml.integrations.kubernetes.flavors.kubernetes_orchestrator_flavor import (
54
54
  KubernetesOrchestratorConfig,
55
55
  KubernetesOrchestratorSettings,
@@ -785,6 +785,187 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
785
785
  f"No running step jobs found for pipeline run with ID: {run.id}"
786
786
  )
787
787
 
788
+ def fetch_status(
789
+ self, run: "PipelineRunResponse", include_steps: bool = False
790
+ ) -> Tuple[
791
+ Optional[ExecutionStatus], Optional[Dict[str, ExecutionStatus]]
792
+ ]:
793
+ """Refreshes the status of a specific pipeline run.
794
+
795
+ Args:
796
+ run: The run that was executed by this orchestrator.
797
+ include_steps: If True, also fetch the status of individual steps.
798
+
799
+ Returns:
800
+ A tuple of (pipeline_status, step_statuses).
801
+ If include_steps is False, step_statuses will be None.
802
+ If include_steps is True, step_statuses will be a dict (possibly empty).
803
+
804
+ Raises:
805
+ ValueError: If the orchestrator run ID cannot be found or if the
806
+ stack components are not accessible.
807
+ """
808
+ # Get the orchestrator run ID which corresponds to the orchestrator pod name
809
+ orchestrator_run_id = run.orchestrator_run_id
810
+ if not orchestrator_run_id:
811
+ raise ValueError(
812
+ "Cannot determine orchestrator run ID for the run. "
813
+ "Unable to fetch the status."
814
+ )
815
+
816
+ # Check the orchestrator pod status (only if run is not finished)
817
+ if not run.status.is_finished:
818
+ orchestrator_pod_phase = self._check_pod_status(
819
+ pod_name=orchestrator_run_id,
820
+ )
821
+ pipeline_status = self._map_pod_phase_to_execution_status(
822
+ orchestrator_pod_phase
823
+ )
824
+ else:
825
+ # Run is already finished, don't change status
826
+ pipeline_status = None
827
+
828
+ step_statuses = None
829
+ if include_steps:
830
+ step_statuses = self._fetch_step_statuses(run)
831
+
832
+ return pipeline_status, step_statuses
833
+
834
+ def _check_pod_status(
835
+ self,
836
+ pod_name: str,
837
+ ) -> kube_utils.PodPhase:
838
+ """Check pod status and handle deletion scenarios for both orchestrator and step pods.
839
+
840
+ This method should only be called for non-finished pipeline runs/steps.
841
+
842
+ Args:
843
+ pod_name: The name of the pod to check.
844
+
845
+ Returns:
846
+ The pod phase if the pod exists, or PodPhase.FAILED if pod was deleted.
847
+ """
848
+ pod = kube_utils.get_pod(
849
+ core_api=self._k8s_core_api,
850
+ pod_name=pod_name,
851
+ namespace=self.config.kubernetes_namespace,
852
+ )
853
+
854
+ if pod and pod.status and pod.status.phase:
855
+ try:
856
+ return kube_utils.PodPhase(pod.status.phase)
857
+ except ValueError:
858
+ # Handle unknown pod phases
859
+ logger.warning(
860
+ f"Unknown pod phase for pod {pod_name}: {pod.status.phase}"
861
+ )
862
+ return kube_utils.PodPhase.UNKNOWN
863
+ else:
864
+ logger.warning(
865
+ f"Can't fetch the status of pod {pod_name} "
866
+ f"in namespace {self.config.kubernetes_namespace}."
867
+ )
868
+ return kube_utils.PodPhase.UNKNOWN
869
+
870
+ def _map_pod_phase_to_execution_status(
871
+ self, pod_phase: kube_utils.PodPhase
872
+ ) -> Optional[ExecutionStatus]:
873
+ """Map Kubernetes pod phase to ZenML execution status.
874
+
875
+ Args:
876
+ pod_phase: The Kubernetes pod phase.
877
+
878
+ Returns:
879
+ The corresponding ZenML execution status.
880
+ """
881
+ if pod_phase == kube_utils.PodPhase.PENDING:
882
+ return ExecutionStatus.INITIALIZING
883
+ elif pod_phase == kube_utils.PodPhase.RUNNING:
884
+ return ExecutionStatus.RUNNING
885
+ elif pod_phase == kube_utils.PodPhase.SUCCEEDED:
886
+ return ExecutionStatus.COMPLETED
887
+ elif pod_phase == kube_utils.PodPhase.FAILED:
888
+ return ExecutionStatus.FAILED
889
+ else: # UNKNOWN - no update
890
+ return None
891
+
892
+ def _map_job_status_to_execution_status(
893
+ self, job: k8s_client.V1Job
894
+ ) -> Optional[ExecutionStatus]:
895
+ """Map Kubernetes job status to ZenML execution status.
896
+
897
+ Args:
898
+ job: The Kubernetes job.
899
+
900
+ Returns:
901
+ The corresponding ZenML execution status, or None if no clear status.
902
+ """
903
+ # Check job conditions first
904
+ if job.status and job.status.conditions:
905
+ for condition in job.status.conditions:
906
+ if condition.type == "Complete" and condition.status == "True":
907
+ return ExecutionStatus.COMPLETED
908
+ elif condition.type == "Failed" and condition.status == "True":
909
+ return ExecutionStatus.FAILED
910
+
911
+ # Return None if no clear status - don't update
912
+ return None
913
+
914
+ def _fetch_step_statuses(
915
+ self, run: "PipelineRunResponse"
916
+ ) -> Dict[str, ExecutionStatus]:
917
+ """Fetch the statuses of individual pipeline steps.
918
+
919
+ Args:
920
+ run: The pipeline run response.
921
+
922
+ Returns:
923
+ A dictionary mapping step names to their execution statuses.
924
+ """
925
+ step_statuses = {}
926
+
927
+ # Query all jobs for this run and match them to steps
928
+ label_selector = f"run_id={kube_utils.sanitize_label(str(run.id))}"
929
+
930
+ try:
931
+ jobs = self._k8s_batch_api.list_namespaced_job(
932
+ namespace=self.config.kubernetes_namespace,
933
+ label_selector=label_selector,
934
+ )
935
+ except Exception as e:
936
+ logger.warning(f"Failed to list jobs for run {run.id}: {e}")
937
+ return {}
938
+
939
+ # Fetch the steps from the run response
940
+ steps_dict = run.steps
941
+
942
+ for job in jobs.items:
943
+ # Extract step name from job labels
944
+ if not job.metadata or not job.metadata.labels:
945
+ continue
946
+
947
+ step_name = job.metadata.labels.get("step_name")
948
+ if not step_name:
949
+ continue
950
+
951
+ # Check if this step is already finished
952
+ step_response = steps_dict.get(step_name, None)
953
+
954
+ # If the step is not in the run response yet, skip, we can't update
955
+ if step_response is None:
956
+ continue
957
+
958
+ # If the step is already in a finished state, skip
959
+ if step_response and step_response.status.is_finished:
960
+ continue
961
+
962
+ # Check job status and map to execution status
963
+ execution_status = self._map_job_status_to_execution_status(job)
964
+ if execution_status is not None:
965
+ step_statuses[step_name] = execution_status
966
+
967
+ return step_statuses
968
+
788
969
  def get_pipeline_run_metadata(
789
970
  self, run_id: UUID
790
971
  ) -> Dict[str, "MetadataType"]:
@@ -187,7 +187,7 @@ def main() -> None:
187
187
 
188
188
  return True
189
189
 
190
- step_pod_labels = {
190
+ base_labels = {
191
191
  "run_id": kube_utils.sanitize_label(str(pipeline_run.id)),
192
192
  "run_name": kube_utils.sanitize_label(str(pipeline_run.name)),
193
193
  "pipeline": kube_utils.sanitize_label(
@@ -234,6 +234,10 @@ def main() -> None:
234
234
  pod_name, namespace=namespace
235
235
  )
236
236
 
237
+ # Add step name to labels so both pod and job have consistent labeling
238
+ step_labels = base_labels.copy()
239
+ step_labels["step_name"] = kube_utils.sanitize_label(step_name)
240
+
237
241
  image = KubernetesOrchestrator.get_image(
238
242
  deployment=deployment, step_name=step_name
239
243
  )
@@ -281,7 +285,7 @@ def main() -> None:
281
285
  or settings.service_account_name,
282
286
  mount_local_stores=mount_local_stores,
283
287
  termination_grace_period_seconds=settings.pod_stop_grace_period,
284
- labels=step_pod_labels,
288
+ labels=step_labels,
285
289
  )
286
290
 
287
291
  retry_config = step_config.retry
@@ -347,7 +351,7 @@ def main() -> None:
347
351
  active_deadline_seconds=settings.active_deadline_seconds,
348
352
  pod_failure_policy=pod_failure_policy,
349
353
  owner_references=owner_references,
350
- labels=step_pod_labels,
354
+ labels=step_labels,
351
355
  )
352
356
 
353
357
  kube_utils.create_job(