zenml-nightly 0.82.1.dev20250527__py3-none-any.whl → 0.83.0.dev20250529__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zenml/VERSION +1 -1
- zenml/cli/base.py +6 -1
- zenml/cli/model.py +16 -36
- zenml/cli/server.py +8 -3
- zenml/client.py +20 -4
- zenml/client_lazy_loader.py +2 -0
- zenml/config/docker_settings.py +15 -2
- zenml/enums.py +3 -0
- zenml/event_hub/event_hub.py +1 -1
- zenml/integrations/bitbucket/plugins/event_sources/bitbucket_webhook_event_source.py +1 -1
- zenml/integrations/github/plugins/event_sources/github_webhook_event_source.py +1 -1
- zenml/integrations/kubernetes/flavors/kubernetes_orchestrator_flavor.py +3 -0
- zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator_entrypoint.py +37 -26
- zenml/integrations/skypilot/flavors/skypilot_orchestrator_base_vm_config.py +45 -4
- zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py +92 -84
- zenml/integrations/skypilot/orchestrators/skypilot_orchestrator_entrypoint.py +207 -179
- zenml/integrations/skypilot/utils.py +273 -0
- zenml/integrations/skypilot_aws/__init__.py +1 -2
- zenml/integrations/skypilot_azure/__init__.py +1 -2
- zenml/integrations/skypilot_gcp/__init__.py +9 -1
- zenml/integrations/skypilot_kubernetes/__init__.py +2 -3
- zenml/integrations/skypilot_lambda/__init__.py +1 -2
- zenml/login/web_login.py +1 -1
- zenml/model/model.py +13 -23
- zenml/models/__init__.py +39 -2
- zenml/models/v2/base/scoped.py +34 -15
- zenml/models/v2/core/artifact.py +10 -9
- zenml/models/v2/core/artifact_version.py +16 -16
- zenml/models/v2/core/logs.py +8 -50
- zenml/models/v2/core/model.py +10 -10
- zenml/models/v2/core/model_version.py +155 -88
- zenml/models/v2/core/pipeline.py +10 -11
- zenml/models/v2/core/pipeline_deployment.py +1 -9
- zenml/models/v2/core/pipeline_run.py +10 -17
- zenml/models/v2/core/run_template.py +10 -10
- zenml/models/v2/core/step_run.py +100 -16
- zenml/models/v2/core/tag.py +5 -4
- zenml/models/v2/misc/pipeline_run_dag.py +46 -0
- zenml/orchestrators/base_orchestrator.py +8 -19
- zenml/orchestrators/cache_utils.py +48 -1
- zenml/orchestrators/input_utils.py +35 -39
- zenml/orchestrators/step_launcher.py +1 -1
- zenml/orchestrators/step_run_utils.py +26 -10
- zenml/pipelines/pipeline_definition.py +3 -3
- zenml/pipelines/run_utils.py +2 -3
- zenml/service_connectors/service_connector.py +5 -1
- zenml/stack/stack_component.py +1 -4
- zenml/steps/step_context.py +1 -1
- zenml/utils/dashboard_utils.py +3 -3
- zenml/zen_server/auth.py +6 -3
- zenml/zen_server/dashboard/assets/{404-_AtuLtaX.js → 404-DmJUgorp.js} +1 -1
- zenml/zen_server/dashboard/assets/{@radix-C7hRs6Kx.js → @radix-Cdvw4jJ8.js} +1 -1
- zenml/zen_server/dashboard/assets/{@react-router-CNP6g_RL.js → @react-router-DeDfXbUF.js} +5 -5
- zenml/zen_server/dashboard/assets/{@reactflow-CQi1Z1Wq.js → @reactflow-8OCk19Fi.js} +1 -1
- zenml/zen_server/dashboard/assets/{@tanstack-CSxjHCME.js → @tanstack-5gTMR7G2.js} +4 -4
- zenml/zen_server/dashboard/assets/AlertDialogDropdownItem-CZW4QyWn.js +1 -0
- zenml/zen_server/dashboard/assets/ButtonGroup-DFWWFGUE.js +1 -0
- zenml/zen_server/dashboard/assets/{CodeSnippet-CvI6D0wx.js → CodeSnippet-D2HkkAGr.js} +1 -1
- zenml/zen_server/dashboard/assets/CollapsibleCard-CnS09ljw.js +1 -0
- zenml/zen_server/dashboard/assets/{ComponentBadge-DKw7Gndh.js → ComponentBadge-CDgdd0Ks.js} +1 -1
- zenml/zen_server/dashboard/assets/ComponentIcon-CbbOc7lb.js +1 -0
- zenml/zen_server/dashboard/assets/{DeleteAlertDialog-DVvXt-S6.js → DeleteAlertDialog-VIOMDLmx.js} +1 -1
- zenml/zen_server/dashboard/assets/DialogItem-ClFCqxEp.js +1 -0
- zenml/zen_server/dashboard/assets/{DisplayDate-CYVBBSgr.js → DisplayDate-8RESqe5H.js} +1 -1
- zenml/zen_server/dashboard/assets/{EmptyState-M1jafpg6.js → EmptyState-CjrgDtVk.js} +1 -1
- zenml/zen_server/dashboard/assets/{Error-BWaXP0VK.js → Error-CQzjbDcN.js} +1 -1
- zenml/zen_server/dashboard/assets/ExecutionStatus-CWreILP0.js +1 -0
- zenml/zen_server/dashboard/assets/{Helpbox-iE1xLmiZ.js → Helpbox-CiKxG5_X.js} +1 -1
- zenml/zen_server/dashboard/assets/Infobox-CGxFvqzi.js +1 -0
- zenml/zen_server/dashboard/assets/LeftSideMenu-DCsKdIjC.js +1 -0
- zenml/zen_server/dashboard/assets/{Lock-DW-0_M0o.js → Lock-CrIAdQo6.js} +1 -1
- zenml/zen_server/dashboard/assets/NestedCollapsible-3M4llYtH.js +1 -0
- zenml/zen_server/dashboard/assets/NumberBox-C0mQktmV.js +1 -0
- zenml/zen_server/dashboard/assets/Partials-DSjkttlz.js +1 -0
- zenml/zen_server/dashboard/assets/{PasswordChecker-lYTOtNom.js → PasswordChecker-B88WjuCe.js} +1 -1
- zenml/zen_server/dashboard/assets/ProCta-Dm5cWKpS.js +1 -0
- zenml/zen_server/dashboard/assets/{ProviderIcon-DLo7t1lo.js → ProviderIcon-DPwMR6nF.js} +1 -1
- zenml/zen_server/dashboard/assets/ProviderRadio-DEDNRgAb.js +1 -0
- zenml/zen_server/dashboard/assets/RunsBody-BRBn1e2O.js +1 -0
- zenml/zen_server/dashboard/assets/SearchField-DY6-UbRT.js +1 -0
- zenml/zen_server/dashboard/assets/SecretTooltip-CZTRnaCV.js +1 -0
- zenml/zen_server/dashboard/assets/{SetPassword-DR-EiLI5.js → SetPassword-BjNGDC5e.js} +1 -1
- zenml/zen_server/dashboard/assets/SheetHeader-CASpN2Lz.js +1 -0
- zenml/zen_server/dashboard/assets/StackComponentList-Be1pQt9m.js +1 -0
- zenml/zen_server/dashboard/assets/StackList-BdiR5DvR.js +1 -0
- zenml/zen_server/dashboard/assets/StackName-ojLC6xdl.js +1 -0
- zenml/zen_server/dashboard/assets/Tabs-DNSKblCM.js +1 -0
- zenml/zen_server/dashboard/assets/Tick-BPrWnNlN.js +1 -0
- zenml/zen_server/dashboard/assets/{UpdatePasswordSchemas-DbFEaezI.js → UpdatePasswordSchemas-CNfKDo2Q.js} +1 -1
- zenml/zen_server/dashboard/assets/UsageReason-Cb-mpV8M.js +1 -0
- zenml/zen_server/dashboard/assets/{Wizard-CMI6Ksgz.js → Wizard-Dg8Pmn5A.js} +1 -1
- zenml/zen_server/dashboard/assets/WizardFooter-BcNDIvlQ.js +1 -0
- zenml/zen_server/dashboard/assets/{all-pipeline-runs-query-BGASHYtF.js → all-pipeline-runs-query-DCdax7I5.js} +1 -1
- zenml/zen_server/dashboard/assets/{arrow-left-CwgF2MEM.js → arrow-left-MRXv5pAH.js} +1 -1
- zenml/zen_server/dashboard/assets/bulk-delete-C_kpIB9A.js +3 -0
- zenml/zen_server/dashboard/assets/{check-DK77doTf.js → check-B9QMTa3f.js} +1 -1
- zenml/zen_server/dashboard/assets/{check-circle-mvyzYvIW.js → check-circle-C4tYvbtw.js} +1 -1
- zenml/zen_server/dashboard/assets/{chevron-down-A-rmltmI.js → chevron-down-jbbQh82s.js} +1 -1
- zenml/zen_server/dashboard/assets/{chevron-right-double-uNWbJT-C.js → chevron-right-double-Dgp_gEsp.js} +1 -1
- zenml/zen_server/dashboard/assets/{clock-CPA5cYxq.js → clock-B_mTG8PH.js} +1 -1
- zenml/zen_server/dashboard/assets/{code-browser-j2EpcxIA.js → code-browser-CiD8qkBx.js} +1 -1
- zenml/zen_server/dashboard/assets/configuration-form-B2hmKGnF.js +1 -0
- zenml/zen_server/dashboard/assets/connectivity-4UKGMYnr.webp +0 -0
- zenml/zen_server/dashboard/assets/constants-1EZZxtay.js +1 -0
- zenml/zen_server/dashboard/assets/create-stack-TKmMtrkQ.js +1 -0
- zenml/zen_server/dashboard/assets/dates-Buh6SMo7.js +1 -0
- zenml/zen_server/dashboard/assets/delete-run-CCR9md_s.js +1 -0
- zenml/zen_server/dashboard/assets/eye-CbVlAYty.js +1 -0
- zenml/zen_server/dashboard/assets/{file-text-BdxZdjP_.js → file-text-Cd8wVfq5.js} +1 -1
- zenml/zen_server/dashboard/assets/form-DFJkaFDX.js +1 -0
- zenml/zen_server/dashboard/assets/form-schemas-CrznJVzA.js +1 -0
- zenml/zen_server/dashboard/assets/{gcp-CHNvgEss.js → gcp-B1I3Qvcx.js} +1 -1
- zenml/zen_server/dashboard/assets/{help-DyMolRxD.js → help-Co6aedki.js} +1 -1
- zenml/zen_server/dashboard/assets/index-BFqbGSck.js +308 -0
- zenml/zen_server/dashboard/assets/{index-CrhdX_qG.js → index-BjUu1mP4.js} +1 -1
- zenml/zen_server/dashboard/assets/{index-DR30v9MZ.js → index-DWpiv-Ft.js} +1 -1
- zenml/zen_server/dashboard/assets/index-DuhuqTCI.css +1 -0
- zenml/zen_server/dashboard/assets/index-U992soPJ.js +1 -0
- zenml/zen_server/dashboard/assets/index.es-C1gfATPn.js +14 -0
- zenml/zen_server/dashboard/assets/{index.esm-D7jFlf5N.js → index.esm-DhJo3mA6.js} +1 -1
- zenml/zen_server/dashboard/assets/info-QkbQz4QU.js +1 -0
- zenml/zen_server/dashboard/assets/{key-icon-DO4DPJHZ.js → key-icon-C07HKw8z.js} +1 -1
- zenml/zen_server/dashboard/assets/{layout-h3cbx8WZ.js → layout-DBbfEFBe.js} +1 -1
- zenml/zen_server/dashboard/assets/layout-Do9YI4QX.js +1 -0
- zenml/zen_server/dashboard/assets/login-mutation-D3tFP6Wm.js +1 -0
- zenml/zen_server/dashboard/assets/{logs-B5n0U7tB.js → logs-CQKlJjo0.js} +1 -1
- zenml/zen_server/dashboard/assets/{package-D1Mhqeh8.js → package-miExReQl.js} +1 -1
- zenml/zen_server/dashboard/assets/page-9RjCitFH.js +1 -0
- zenml/zen_server/dashboard/assets/page-B0PsXWiT.js +1 -0
- zenml/zen_server/dashboard/assets/page-BCrKmYIZ.js +1 -0
- zenml/zen_server/dashboard/assets/page-BcRI3-aR.js +29 -0
- zenml/zen_server/dashboard/assets/page-Be3R2uYn.js +1 -0
- zenml/zen_server/dashboard/assets/page-BgknnddT.js +1 -0
- zenml/zen_server/dashboard/assets/page-BrT0_zSJ.js +40 -0
- zenml/zen_server/dashboard/assets/page-Bs3W2FDi.js +1 -0
- zenml/zen_server/dashboard/assets/page-C210HcBA.js +1 -0
- zenml/zen_server/dashboard/assets/page-C6KaiZ_W.js +1 -0
- zenml/zen_server/dashboard/assets/page-CAJ8B0vb.js +1 -0
- zenml/zen_server/dashboard/assets/page-CAUYrfui.js +1 -0
- zenml/zen_server/dashboard/assets/page-CHxVhF3x.js +1 -0
- zenml/zen_server/dashboard/assets/{page-CSwZxZMQ.js → page-CN7lkvXr.js} +1 -1
- zenml/zen_server/dashboard/assets/page-CUaMMoPG.js +1 -0
- zenml/zen_server/dashboard/assets/page-Cal6XQ4U.js +1 -0
- zenml/zen_server/dashboard/assets/page-CdZCmszX.js +1 -0
- zenml/zen_server/dashboard/assets/page-ChGcZI_6.js +1 -0
- zenml/zen_server/dashboard/assets/page-CktmtZ8Z.js +1 -0
- zenml/zen_server/dashboard/assets/page-ClvmVesa.js +1 -0
- zenml/zen_server/dashboard/assets/page-CnbIYE80.js +1 -0
- zenml/zen_server/dashboard/assets/page-CoXzjeEY.js +1 -0
- zenml/zen_server/dashboard/assets/page-CtiuMP_r.js +1 -0
- zenml/zen_server/dashboard/assets/page-D9Hfx6GV.js +1 -0
- zenml/zen_server/dashboard/assets/page-D9iuB88h.js +1 -0
- zenml/zen_server/dashboard/assets/page-DCcuPZ8P.js +1 -0
- zenml/zen_server/dashboard/assets/page-DEohTSz6.js +1 -0
- zenml/zen_server/dashboard/assets/page-DJIGaUQ9.js +1 -0
- zenml/zen_server/dashboard/assets/page-DKK6ulgy.js +1 -0
- zenml/zen_server/dashboard/assets/page-DNjKHjnH.js +1 -0
- zenml/zen_server/dashboard/assets/page-DUK0Nd_1.js +1 -0
- zenml/zen_server/dashboard/assets/page-DUKbOhaD.js +1 -0
- zenml/zen_server/dashboard/assets/page-DYOucPtA.js +1 -0
- zenml/zen_server/dashboard/assets/page-DpqRelAy.js +1 -0
- zenml/zen_server/dashboard/assets/{page-ZfTtFicG.js → page-DwVPpCFg.js} +2 -2
- zenml/zen_server/dashboard/assets/page-XURWnYZP.js +1 -0
- zenml/zen_server/dashboard/assets/page-abw-2oeW.js +1 -0
- zenml/zen_server/dashboard/assets/page-akLcPcKw.js +1 -0
- zenml/zen_server/dashboard/assets/page-n9ejQ2V3.js +2 -0
- zenml/zen_server/dashboard/assets/page-sJjNT9xA.js +6 -0
- zenml/zen_server/dashboard/assets/{persist-UUym702q.js → persist-DWMWVP-y.js} +1 -1
- zenml/zen_server/dashboard/assets/{persist-D87V82eO.js → persist-Dec_w7aB.js} +1 -1
- zenml/zen_server/dashboard/assets/pipeline-CSUlkd50.js +1 -0
- zenml/zen_server/dashboard/assets/{plus-COjQg3AG.js → plus-Cl0_rCVF.js} +1 -1
- zenml/zen_server/dashboard/assets/{react-error-boundary.esm-fyoUBS25.js → react-error-boundary.esm-7_MuhCay.js} +1 -1
- zenml/zen_server/dashboard/assets/{refresh-CM5T3QeU.js → refresh-BcTM09NW.js} +1 -1
- zenml/zen_server/dashboard/assets/resource-tyes-list-o2LXiMay.js +1 -0
- zenml/zen_server/dashboard/assets/resource-type-tooltip-DwHrJstL.js +1 -0
- zenml/zen_server/dashboard/assets/service-connectors-DSEMwJ5A.js +1 -0
- zenml/zen_server/dashboard/assets/{service-BQ9KIhls.js → service-jxtvgks0.js} +2 -2
- zenml/zen_server/dashboard/assets/sharedSchema-BXzg0EZz.js +1 -0
- zenml/zen_server/dashboard/assets/stack-detail-query-Cm0fsgo-.js +1 -0
- zenml/zen_server/dashboard/assets/{terminal-square-DMtel8mb.js → terminal-XFL_4QN-.js} +1 -1
- zenml/zen_server/dashboard/assets/terminal-square-XFL_4QN-.js +1 -0
- zenml/zen_server/dashboard/assets/transform-CeZdrxDZ.js +1 -0
- zenml/zen_server/dashboard/assets/{trash-BWSZ7NRK.js → trash-DP6Tpp_E.js} +1 -1
- zenml/zen_server/dashboard/assets/type-guards-CNgPYg8l.js +1 -0
- zenml/zen_server/dashboard/assets/update-current-user-mutation-D5MjcQ6F.js +1 -0
- zenml/zen_server/dashboard/assets/update-server-settings-mutation-CmnxdxiK.js +1 -0
- zenml/zen_server/dashboard/assets/{zod-C0xYeTvL.js → zod-XdS2h1ws.js} +1 -1
- zenml/zen_server/dashboard/index.html +7 -7
- zenml/zen_server/rbac/utils.py +2 -2
- zenml/zen_server/routers/auth_endpoints.py +2 -2
- zenml/zen_server/routers/devices_endpoints.py +8 -5
- zenml/zen_server/routers/pipeline_deployments_endpoints.py +1 -1
- zenml/zen_server/routers/pipelines_endpoints.py +1 -1
- zenml/zen_server/routers/run_templates_endpoints.py +3 -3
- zenml/zen_server/routers/runs_endpoints.py +35 -0
- zenml/zen_server/template_execution/utils.py +6 -6
- zenml/zen_stores/dag_generator.py +171 -0
- zenml/zen_stores/migrations/versions/0.83.0_release.py +23 -0
- zenml/zen_stores/rest_zen_store.py +17 -3
- zenml/zen_stores/schemas/action_schemas.py +40 -4
- zenml/zen_stores/schemas/api_key_schemas.py +29 -1
- zenml/zen_stores/schemas/artifact_schemas.py +168 -48
- zenml/zen_stores/schemas/base_schemas.py +26 -1
- zenml/zen_stores/schemas/code_repository_schemas.py +46 -5
- zenml/zen_stores/schemas/component_schemas.py +44 -3
- zenml/zen_stores/schemas/device_schemas.py +43 -2
- zenml/zen_stores/schemas/event_source_schemas.py +41 -5
- zenml/zen_stores/schemas/flavor_schemas.py +42 -2
- zenml/zen_stores/schemas/model_schemas.py +113 -77
- zenml/zen_stores/schemas/pipeline_build_schemas.py +53 -4
- zenml/zen_stores/schemas/pipeline_deployment_schemas.py +53 -4
- zenml/zen_stores/schemas/pipeline_run_schemas.py +111 -47
- zenml/zen_stores/schemas/pipeline_schemas.py +41 -9
- zenml/zen_stores/schemas/run_template_schemas.py +75 -11
- zenml/zen_stores/schemas/schedule_schema.py +50 -5
- zenml/zen_stores/schemas/secret_schemas.py +39 -2
- zenml/zen_stores/schemas/service_connector_schemas.py +39 -2
- zenml/zen_stores/schemas/service_schemas.py +39 -4
- zenml/zen_stores/schemas/stack_schemas.py +47 -2
- zenml/zen_stores/schemas/step_run_schemas.py +89 -26
- zenml/zen_stores/schemas/tag_schemas.py +69 -5
- zenml/zen_stores/schemas/trigger_schemas.py +44 -5
- zenml/zen_stores/schemas/utils.py +25 -4
- zenml/zen_stores/sql_zen_store.py +471 -28
- zenml/zen_stores/zen_store_interface.py +9 -1
- {zenml_nightly-0.82.1.dev20250527.dist-info → zenml_nightly-0.83.0.dev20250529.dist-info}/METADATA +2 -2
- {zenml_nightly-0.82.1.dev20250527.dist-info → zenml_nightly-0.83.0.dev20250529.dist-info}/RECORD +230 -222
- zenml/zen_server/dashboard/assets/AlertDialogDropdownItem-BG7-Ki1L.js +0 -1
- zenml/zen_server/dashboard/assets/CollapsibleCard-D20FtrzC.js +0 -1
- zenml/zen_server/dashboard/assets/Commands-DGbAvMDk.js +0 -1
- zenml/zen_server/dashboard/assets/ComponentIcon-ils7uNAk.js +0 -1
- zenml/zen_server/dashboard/assets/CsvVizualization-DVN541XF.js +0 -15
- zenml/zen_server/dashboard/assets/DialogItem-BHWf3sIB.js +0 -1
- zenml/zen_server/dashboard/assets/ExecutionStatus-XrvT2r65.js +0 -1
- zenml/zen_server/dashboard/assets/Infobox-D9k5TFH4.js +0 -1
- zenml/zen_server/dashboard/assets/InlineAvatar-Cfz4WSLK.js +0 -1
- zenml/zen_server/dashboard/assets/MarkdownVisualization-URCyUPcZ.js +0 -14
- zenml/zen_server/dashboard/assets/NestedCollapsible-Dor-bi98.js +0 -1
- zenml/zen_server/dashboard/assets/NumberBox-D2A7ENHb.js +0 -1
- zenml/zen_server/dashboard/assets/Partials-DQJFw1yW.js +0 -1
- zenml/zen_server/dashboard/assets/ProBadge-Cp4hb1YT.js +0 -1
- zenml/zen_server/dashboard/assets/ProCta-EYoV9CvK.js +0 -1
- zenml/zen_server/dashboard/assets/ProviderRadio-BVDA-fAr.js +0 -1
- zenml/zen_server/dashboard/assets/RunSelector-BLuBYNJt.js +0 -1
- zenml/zen_server/dashboard/assets/RunsBody-W4WHf-sq.js +0 -1
- zenml/zen_server/dashboard/assets/SearchField-D-h6jXyg.js +0 -1
- zenml/zen_server/dashboard/assets/SecretTooltip-CePCL8kd.js +0 -1
- zenml/zen_server/dashboard/assets/StackList-CgmN5H-i.js +0 -1
- zenml/zen_server/dashboard/assets/Tabs-DxQ8PDOD.js +0 -1
- zenml/zen_server/dashboard/assets/Tick-CEsT3HPR.js +0 -1
- zenml/zen_server/dashboard/assets/UsageReason-DjI5qMje.js +0 -1
- zenml/zen_server/dashboard/assets/WizardFooter-CFBHFZas.js +0 -1
- zenml/zen_server/dashboard/assets/cloud-squares-DeRLMopf.svg +0 -43
- zenml/zen_server/dashboard/assets/configuration-form-BtI2Y4eX.js +0 -1
- zenml/zen_server/dashboard/assets/connectors-video-C9qY4syJ.svg +0 -21
- zenml/zen_server/dashboard/assets/constants-DP3ZEnXH.js +0 -1
- zenml/zen_server/dashboard/assets/create-stack-BJ6x5rzj.js +0 -1
- zenml/zen_server/dashboard/assets/dates-3pMLCNrD.js +0 -1
- zenml/zen_server/dashboard/assets/delete-run-DlSLEl5T.js +0 -1
- zenml/zen_server/dashboard/assets/docker-BuDBFEDL.js +0 -1
- zenml/zen_server/dashboard/assets/dots-horizontal-BGRJCPCs.js +0 -1
- zenml/zen_server/dashboard/assets/flavor-select-BnPxvQDN.js +0 -1
- zenml/zen_server/dashboard/assets/form-schemas-CbvoEUHr.js +0 -1
- zenml/zen_server/dashboard/assets/index-CFESYpe4.js +0 -1
- zenml/zen_server/dashboard/assets/index-CmLcvK2z.js +0 -1
- zenml/zen_server/dashboard/assets/index-CzX3ZYlI.css +0 -1
- zenml/zen_server/dashboard/assets/index-D2iSHVZq.js +0 -64
- zenml/zen_server/dashboard/assets/kubernetes-D6OUjwSK.js +0 -1
- zenml/zen_server/dashboard/assets/link-external-DUhCSKNm.js +0 -1
- zenml/zen_server/dashboard/assets/login-command-CkqxPtV3.js +0 -1
- zenml/zen_server/dashboard/assets/login-mutation-CXc-Klim.js +0 -1
- zenml/zen_server/dashboard/assets/not-found-olRU3fnu.js +0 -1
- zenml/zen_server/dashboard/assets/page-7keIM1V3.js +0 -1
- zenml/zen_server/dashboard/assets/page-B31neFwG.js +0 -1
- zenml/zen_server/dashboard/assets/page-B3zo4KYS.js +0 -1
- zenml/zen_server/dashboard/assets/page-BN3MHq1a.js +0 -1
- zenml/zen_server/dashboard/assets/page-BNgVExjN.js +0 -1
- zenml/zen_server/dashboard/assets/page-BPtvu74G.js +0 -1
- zenml/zen_server/dashboard/assets/page-BTIuG0ki.js +0 -2
- zenml/zen_server/dashboard/assets/page-BcQzleH6.js +0 -1
- zenml/zen_server/dashboard/assets/page-C05Jw4M2.js +0 -1
- zenml/zen_server/dashboard/assets/page-C28a7K8h.js +0 -1
- zenml/zen_server/dashboard/assets/page-C9WLk0X-.js +0 -1
- zenml/zen_server/dashboard/assets/page-CINMx64X.js +0 -1
- zenml/zen_server/dashboard/assets/page-CYrJbk7P.js +0 -1
- zenml/zen_server/dashboard/assets/page-Ce0cqLo3.js +0 -1
- zenml/zen_server/dashboard/assets/page-CgNsEkw-.js +0 -1
- zenml/zen_server/dashboard/assets/page-Ct2FUYuR.js +0 -1
- zenml/zen_server/dashboard/assets/page-D8G2B3Bu.js +0 -1
- zenml/zen_server/dashboard/assets/page-DL8a4_lg.js +0 -3
- zenml/zen_server/dashboard/assets/page-DMhYn1cF.js +0 -1
- zenml/zen_server/dashboard/assets/page-Dd_Yq-Uf.js +0 -6
- zenml/zen_server/dashboard/assets/page-DfSvqT8g.js +0 -1
- zenml/zen_server/dashboard/assets/page-Dt6ANUTx.js +0 -1
- zenml/zen_server/dashboard/assets/page-DtvTleaT.js +0 -1
- zenml/zen_server/dashboard/assets/page-DwfGTiVs.js +0 -1
- zenml/zen_server/dashboard/assets/page-JgomSTDc.js +0 -1
- zenml/zen_server/dashboard/assets/page-L84ig6HB.js +0 -1
- zenml/zen_server/dashboard/assets/page-Mabsn4QJ.js +0 -1
- zenml/zen_server/dashboard/assets/page-P04L5cm9.js +0 -1
- zenml/zen_server/dashboard/assets/page-PfhAnvq4.js +0 -1
- zenml/zen_server/dashboard/assets/page-WdRrlNt_.js +0 -1
- zenml/zen_server/dashboard/assets/page-cqJDDDeK.js +0 -1
- zenml/zen_server/dashboard/assets/page-k-Wxh9L_.js +0 -1
- zenml/zen_server/dashboard/assets/page-y-zV4n0c.js +0 -1
- zenml/zen_server/dashboard/assets/rocket-Cf-B-XOR.js +0 -1
- zenml/zen_server/dashboard/assets/settings_preview-0JLrRgHP.webp +0 -0
- zenml/zen_server/dashboard/assets/sharedSchema-Bse2agAf.js +0 -14
- zenml/zen_server/dashboard/assets/stack-detail-query-BAcZJrN3.js +0 -1
- zenml/zen_server/dashboard/assets/tick-circle-m94Aa6Zt.js +0 -1
- zenml/zen_server/dashboard/assets/tour-cover-BYfeen6M.webp +0 -0
- zenml/zen_server/dashboard/assets/type-guards-CaeD8wHO.js +0 -1
- zenml/zen_server/dashboard/assets/update-server-settings-mutation-DwMM1LJz.js +0 -1
- {zenml_nightly-0.82.1.dev20250527.dist-info → zenml_nightly-0.83.0.dev20250529.dist-info}/LICENSE +0 -0
- {zenml_nightly-0.82.1.dev20250527.dist-info → zenml_nightly-0.83.0.dev20250529.dist-info}/WHEEL +0 -0
- {zenml_nightly-0.82.1.dev20250527.dist-info → zenml_nightly-0.83.0.dev20250529.dist-info}/entry_points.txt +0 -0
@@ -14,12 +14,12 @@
|
|
14
14
|
"""Implementation of the Skypilot base VM orchestrator."""
|
15
15
|
|
16
16
|
import os
|
17
|
-
import re
|
18
17
|
from abc import abstractmethod
|
19
18
|
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, cast
|
20
19
|
from uuid import uuid4
|
21
20
|
|
22
21
|
import sky
|
22
|
+
from sky import StatusRefreshMode
|
23
23
|
|
24
24
|
from zenml.entrypoints import PipelineEntrypointConfiguration
|
25
25
|
from zenml.enums import StackComponentType
|
@@ -31,6 +31,15 @@ from zenml.integrations.skypilot.flavors.skypilot_orchestrator_base_vm_config im
|
|
31
31
|
from zenml.integrations.skypilot.orchestrators.skypilot_orchestrator_entrypoint_configuration import (
|
32
32
|
SkypilotOrchestratorEntrypointConfiguration,
|
33
33
|
)
|
34
|
+
from zenml.integrations.skypilot.utils import (
|
35
|
+
create_docker_run_command,
|
36
|
+
prepare_docker_setup,
|
37
|
+
prepare_launch_kwargs,
|
38
|
+
prepare_resources_kwargs,
|
39
|
+
prepare_task_kwargs,
|
40
|
+
sanitize_cluster_name,
|
41
|
+
sky_job_get,
|
42
|
+
)
|
34
43
|
from zenml.logger import get_logger
|
35
44
|
from zenml.orchestrators import (
|
36
45
|
ContainerizedOrchestrator,
|
@@ -252,32 +261,21 @@ class SkypilotBaseOrchestrator(ContainerizedOrchestrator):
|
|
252
261
|
entrypoint_str = " ".join(command)
|
253
262
|
arguments_str = " ".join(args)
|
254
263
|
|
255
|
-
task_envs = environment
|
256
|
-
docker_environment_str = " ".join(
|
257
|
-
f"-e {k}={v}" for k, v in environment.items()
|
258
|
-
)
|
259
|
-
custom_run_args = " ".join(settings.docker_run_args)
|
260
|
-
if custom_run_args:
|
261
|
-
custom_run_args += " "
|
262
|
-
|
263
|
-
instance_type = settings.instance_type or self.DEFAULT_INSTANCE_TYPE
|
264
|
+
task_envs = environment.copy()
|
264
265
|
|
265
266
|
# Set up credentials
|
266
267
|
self.setup_credentials()
|
267
268
|
|
268
|
-
#
|
269
|
-
|
269
|
+
# Prepare Docker setup
|
270
|
+
setup, docker_creds_envs = prepare_docker_setup(
|
271
|
+
container_registry_uri=stack.container_registry.config.uri,
|
272
|
+
credentials=stack.container_registry.credentials,
|
273
|
+
use_sudo=True, # Base orchestrator uses sudo
|
274
|
+
)
|
270
275
|
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
f"sudo docker login --username $DOCKER_USERNAME --password "
|
275
|
-
f"$DOCKER_PASSWORD {stack.container_registry.config.uri}"
|
276
|
-
)
|
277
|
-
task_envs["DOCKER_USERNAME"] = docker_username
|
278
|
-
task_envs["DOCKER_PASSWORD"] = docker_password
|
279
|
-
else:
|
280
|
-
setup = None
|
276
|
+
# Update task_envs with Docker credentials
|
277
|
+
if docker_creds_envs:
|
278
|
+
task_envs.update(docker_creds_envs)
|
281
279
|
|
282
280
|
# Run the entire pipeline
|
283
281
|
|
@@ -291,45 +289,49 @@ class SkypilotBaseOrchestrator(ContainerizedOrchestrator):
|
|
291
289
|
down = False
|
292
290
|
idle_minutes_to_autostop = None
|
293
291
|
else:
|
294
|
-
run_command =
|
292
|
+
run_command = create_docker_run_command(
|
293
|
+
image=image,
|
294
|
+
entrypoint_str=entrypoint_str,
|
295
|
+
arguments_str=arguments_str,
|
296
|
+
environment=task_envs,
|
297
|
+
docker_run_args=settings.docker_run_args,
|
298
|
+
use_sudo=True, # Base orchestrator uses sudo
|
299
|
+
)
|
295
300
|
down = settings.down
|
296
301
|
idle_minutes_to_autostop = settings.idle_minutes_to_autostop
|
297
|
-
|
298
|
-
|
302
|
+
|
303
|
+
# Create the Task with all parameters and task settings
|
304
|
+
task_kwargs = prepare_task_kwargs(
|
305
|
+
settings=settings,
|
306
|
+
run_command=run_command,
|
299
307
|
setup=setup,
|
300
|
-
|
308
|
+
task_envs=task_envs,
|
309
|
+
task_name=f"{orchestrator_run_name}",
|
301
310
|
)
|
311
|
+
|
312
|
+
task = sky.Task(**task_kwargs)
|
302
313
|
logger.debug(f"Running run: {run_command}")
|
303
314
|
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
use_spot=settings.use_spot,
|
313
|
-
job_recovery=settings.job_recovery,
|
314
|
-
region=settings.region,
|
315
|
-
zone=settings.zone,
|
316
|
-
image_id=image
|
317
|
-
if isinstance(self.cloud, sky.clouds.Kubernetes)
|
318
|
-
else settings.image_id,
|
319
|
-
disk_size=settings.disk_size,
|
320
|
-
disk_tier=settings.disk_tier,
|
321
|
-
)
|
315
|
+
# Set resources with all parameters and resource settings
|
316
|
+
resources_kwargs = prepare_resources_kwargs(
|
317
|
+
cloud=self.cloud,
|
318
|
+
settings=settings,
|
319
|
+
default_instance_type=self.DEFAULT_INSTANCE_TYPE,
|
320
|
+
kubernetes_image=image
|
321
|
+
if isinstance(self.cloud, sky.clouds.Kubernetes)
|
322
|
+
else None,
|
322
323
|
)
|
323
|
-
|
324
|
-
|
325
|
-
# Could also be a parameter in the settings to control this behavior
|
326
|
-
detach_run = not settings.stream_logs
|
324
|
+
|
325
|
+
task = task.set_resources(sky.Resources(**resources_kwargs))
|
327
326
|
|
328
327
|
launch_new_cluster = True
|
329
328
|
if settings.cluster_name:
|
330
|
-
|
331
|
-
refresh=
|
329
|
+
status_request_id = sky.status(
|
330
|
+
refresh=StatusRefreshMode.AUTO,
|
331
|
+
cluster_names=[settings.cluster_name],
|
332
332
|
)
|
333
|
+
cluster_info = sky.stream_and_get(status_request_id)
|
334
|
+
|
333
335
|
if cluster_info:
|
334
336
|
logger.info(
|
335
337
|
f"Found existing cluster {settings.cluster_name}. Reusing..."
|
@@ -342,7 +344,7 @@ class SkypilotBaseOrchestrator(ContainerizedOrchestrator):
|
|
342
344
|
)
|
343
345
|
cluster_name = settings.cluster_name
|
344
346
|
else:
|
345
|
-
cluster_name =
|
347
|
+
cluster_name = sanitize_cluster_name(
|
346
348
|
f"{orchestrator_run_name}"
|
347
349
|
)
|
348
350
|
logger.info(
|
@@ -350,33 +352,55 @@ class SkypilotBaseOrchestrator(ContainerizedOrchestrator):
|
|
350
352
|
)
|
351
353
|
|
352
354
|
if launch_new_cluster:
|
353
|
-
|
355
|
+
# Prepare launch parameters with additional launch settings
|
356
|
+
launch_kwargs = prepare_launch_kwargs(
|
357
|
+
settings=settings,
|
358
|
+
down=down,
|
359
|
+
idle_minutes_to_autostop=idle_minutes_to_autostop,
|
360
|
+
)
|
361
|
+
logger.info(
|
362
|
+
f"Launching the task on a new cluster: {cluster_name}"
|
363
|
+
)
|
364
|
+
launch_job_id = sky.launch(
|
354
365
|
task,
|
355
366
|
cluster_name,
|
356
|
-
|
357
|
-
idle_minutes_to_autostop=idle_minutes_to_autostop,
|
358
|
-
down=down,
|
359
|
-
stream_logs=settings.stream_logs,
|
360
|
-
backend=None,
|
361
|
-
detach_setup=True,
|
362
|
-
detach_run=detach_run,
|
367
|
+
**launch_kwargs,
|
363
368
|
)
|
369
|
+
sky_job_get(launch_job_id, settings.stream_logs, cluster_name)
|
370
|
+
|
364
371
|
else:
|
365
|
-
#
|
366
|
-
|
367
|
-
|
372
|
+
# Prepare exec parameters with additional launch settings
|
373
|
+
exec_kwargs = {
|
374
|
+
"down": down,
|
375
|
+
"backend": None,
|
376
|
+
**settings.launch_settings, # Can reuse same settings for exec
|
377
|
+
}
|
378
|
+
|
379
|
+
# Remove None values to avoid overriding SkyPilot defaults
|
380
|
+
exec_kwargs = {
|
381
|
+
k: v for k, v in exec_kwargs.items() if v is not None
|
382
|
+
}
|
383
|
+
|
384
|
+
# Make sure the cluster is up
|
385
|
+
start_request_id = sky.start(
|
368
386
|
settings.cluster_name,
|
369
387
|
down=down,
|
370
388
|
idle_minutes_to_autostop=idle_minutes_to_autostop,
|
371
389
|
retry_until_up=settings.retry_until_up,
|
372
390
|
)
|
373
|
-
sky.
|
391
|
+
sky.stream_and_get(start_request_id)
|
392
|
+
|
393
|
+
logger.info(
|
394
|
+
f"Executing the task on the cluster: {settings.cluster_name}"
|
395
|
+
)
|
396
|
+
exec_job_id = sky.exec(
|
374
397
|
task,
|
375
|
-
settings.cluster_name,
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
398
|
+
cluster_name=settings.cluster_name,
|
399
|
+
**exec_kwargs,
|
400
|
+
)
|
401
|
+
assert settings.cluster_name is not None
|
402
|
+
sky_job_get(
|
403
|
+
exec_job_id, settings.stream_logs, settings.cluster_name
|
380
404
|
)
|
381
405
|
|
382
406
|
except Exception as e:
|
@@ -386,19 +410,3 @@ class SkypilotBaseOrchestrator(ContainerizedOrchestrator):
|
|
386
410
|
finally:
|
387
411
|
# Unset the service connector AWS profile ENV variable
|
388
412
|
self.prepare_environment_variable(set=False)
|
389
|
-
|
390
|
-
def sanitize_cluster_name(self, name: str) -> str:
|
391
|
-
"""Sanitize the value to be used in a cluster name.
|
392
|
-
|
393
|
-
Args:
|
394
|
-
name: Arbitrary input cluster name.
|
395
|
-
|
396
|
-
Returns:
|
397
|
-
Sanitized cluster name.
|
398
|
-
"""
|
399
|
-
name = re.sub(
|
400
|
-
r"[^a-z0-9-]", "-", name.lower()
|
401
|
-
) # replaces any character that is not a lowercase letter, digit, or hyphen with a hyphen
|
402
|
-
name = re.sub(r"^[-]+", "", name) # trim leading hyphens
|
403
|
-
name = re.sub(r"[-]+$", "", name) # trim trailing hyphens
|
404
|
-
return name
|
@@ -32,8 +32,20 @@ from zenml.integrations.skypilot.orchestrators.skypilot_base_vm_orchestrator imp
|
|
32
32
|
ENV_ZENML_SKYPILOT_ORCHESTRATOR_RUN_ID,
|
33
33
|
SkypilotBaseOrchestrator,
|
34
34
|
)
|
35
|
+
from zenml.integrations.skypilot.utils import (
|
36
|
+
create_docker_run_command,
|
37
|
+
prepare_docker_setup,
|
38
|
+
prepare_launch_kwargs,
|
39
|
+
prepare_resources_kwargs,
|
40
|
+
prepare_task_kwargs,
|
41
|
+
sanitize_cluster_name,
|
42
|
+
sky_job_get,
|
43
|
+
)
|
35
44
|
from zenml.logger import get_logger
|
36
|
-
from zenml.orchestrators.dag_runner import ThreadedDagRunner
|
45
|
+
from zenml.orchestrators.dag_runner import NodeStatus, ThreadedDagRunner
|
46
|
+
from zenml.orchestrators.publish_utils import (
|
47
|
+
publish_failed_pipeline_run,
|
48
|
+
)
|
37
49
|
from zenml.orchestrators.utils import get_config_environment_vars
|
38
50
|
|
39
51
|
logger = get_logger(__name__)
|
@@ -65,212 +77,228 @@ def main() -> None:
|
|
65
77
|
TypeError: If the active stack's orchestrator is not an instance of
|
66
78
|
SkypilotBaseOrchestrator.
|
67
79
|
ValueError: If the active stack's container registry is None.
|
80
|
+
Exception: If the orchestration or one of the steps fails.
|
68
81
|
"""
|
69
82
|
# Log to the container's stdout so it can be streamed by the client.
|
70
83
|
logger.info("Skypilot orchestrator VM started.")
|
71
84
|
|
72
85
|
# Parse / extract args.
|
73
86
|
args = parse_args()
|
74
|
-
|
75
87
|
orchestrator_run_id = socket.gethostname()
|
76
88
|
|
77
|
-
|
78
|
-
|
79
|
-
pipeline_dag = {
|
80
|
-
step_name: step.spec.upstream_steps
|
81
|
-
for step_name, step in deployment.step_configurations.items()
|
82
|
-
}
|
83
|
-
step_command = StepEntrypointConfiguration.get_entrypoint_command()
|
84
|
-
entrypoint_str = " ".join(step_command)
|
85
|
-
|
86
|
-
active_stack = Client().active_stack
|
87
|
-
|
88
|
-
orchestrator = active_stack.orchestrator
|
89
|
-
if not isinstance(orchestrator, SkypilotBaseOrchestrator):
|
90
|
-
raise TypeError(
|
91
|
-
"The active stack's orchestrator is not an instance of SkypilotBaseOrchestrator."
|
92
|
-
)
|
93
|
-
|
94
|
-
# Set up credentials
|
95
|
-
orchestrator.setup_credentials()
|
96
|
-
|
97
|
-
# Set the service connector AWS profile ENV variable
|
98
|
-
orchestrator.prepare_environment_variable(set=True)
|
89
|
+
run = None
|
99
90
|
|
100
|
-
|
101
|
-
|
102
|
-
if container_registry is None:
|
103
|
-
raise ValueError("Container registry cannot be None.")
|
91
|
+
try:
|
92
|
+
deployment = Client().get_deployment(args.deployment_id)
|
104
93
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
f"docker login --username $DOCKER_USERNAME --password "
|
109
|
-
f"$DOCKER_PASSWORD {container_registry.config.uri}"
|
110
|
-
)
|
111
|
-
task_envs = {
|
112
|
-
"DOCKER_USERNAME": docker_username,
|
113
|
-
"DOCKER_PASSWORD": docker_password,
|
94
|
+
pipeline_dag = {
|
95
|
+
step_name: step.spec.upstream_steps
|
96
|
+
for step_name, step in deployment.step_configurations.items()
|
114
97
|
}
|
115
|
-
|
116
|
-
|
117
|
-
task_envs = None
|
118
|
-
|
119
|
-
unique_resource_configs: Dict[str, str] = {}
|
120
|
-
for step_name, step in deployment.step_configurations.items():
|
121
|
-
settings = cast(
|
122
|
-
SkypilotBaseOrchestratorSettings,
|
123
|
-
orchestrator.get_settings(step),
|
124
|
-
)
|
125
|
-
# Handle both str and Dict[str, int] types for accelerators
|
126
|
-
if isinstance(settings.accelerators, dict):
|
127
|
-
accelerators_hashable = frozenset(settings.accelerators.items())
|
128
|
-
elif isinstance(settings.accelerators, str):
|
129
|
-
accelerators_hashable = frozenset({(settings.accelerators, 1)})
|
130
|
-
else:
|
131
|
-
accelerators_hashable = None
|
132
|
-
resource_config = (
|
133
|
-
settings.instance_type,
|
134
|
-
settings.cpus,
|
135
|
-
settings.memory,
|
136
|
-
settings.disk_size, # Assuming disk_size is part of the settings
|
137
|
-
settings.disk_tier, # Assuming disk_tier is part of the settings
|
138
|
-
settings.use_spot,
|
139
|
-
settings.job_recovery,
|
140
|
-
settings.region,
|
141
|
-
settings.zone,
|
142
|
-
accelerators_hashable,
|
143
|
-
)
|
144
|
-
cluster_name_parts = [
|
145
|
-
orchestrator.sanitize_cluster_name(str(part))
|
146
|
-
for part in resource_config
|
147
|
-
if part is not None
|
148
|
-
]
|
149
|
-
cluster_name = f"cluster-{orchestrator_run_id}" + "-".join(
|
150
|
-
cluster_name_parts
|
151
|
-
)
|
152
|
-
unique_resource_configs[step_name] = cluster_name
|
153
|
-
|
154
|
-
run = Client().list_pipeline_runs(
|
155
|
-
sort_by="asc:created",
|
156
|
-
size=1,
|
157
|
-
deployment_id=args.deployment_id,
|
158
|
-
status=ExecutionStatus.INITIALIZING,
|
159
|
-
)[0]
|
98
|
+
step_command = StepEntrypointConfiguration.get_entrypoint_command()
|
99
|
+
entrypoint_str = " ".join(step_command)
|
160
100
|
|
161
|
-
|
101
|
+
active_stack = Client().active_stack
|
162
102
|
|
163
|
-
|
164
|
-
|
103
|
+
orchestrator = active_stack.orchestrator
|
104
|
+
if not isinstance(orchestrator, SkypilotBaseOrchestrator):
|
105
|
+
raise TypeError(
|
106
|
+
"The active stack's orchestrator is not an instance of SkypilotBaseOrchestrator."
|
107
|
+
)
|
165
108
|
|
166
|
-
|
167
|
-
|
168
|
-
"""
|
169
|
-
cluster_name = unique_resource_configs[step_name]
|
109
|
+
# Set up credentials
|
110
|
+
orchestrator.setup_credentials()
|
170
111
|
|
171
|
-
|
172
|
-
|
173
|
-
)
|
112
|
+
# Set the service connector AWS profile ENV variable
|
113
|
+
orchestrator.prepare_environment_variable(set=True)
|
174
114
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
115
|
+
# get active container registry
|
116
|
+
container_registry = active_stack.container_registry
|
117
|
+
if container_registry is None:
|
118
|
+
raise ValueError("Container registry cannot be None.")
|
179
119
|
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
120
|
+
# Prepare Docker setup
|
121
|
+
setup, task_envs = prepare_docker_setup(
|
122
|
+
container_registry_uri=container_registry.config.uri,
|
123
|
+
credentials=container_registry.credentials,
|
124
|
+
use_sudo=False, # Entrypoint doesn't use sudo
|
184
125
|
)
|
185
|
-
env = get_config_environment_vars()
|
186
|
-
env[ENV_ZENML_SKYPILOT_ORCHESTRATOR_RUN_ID] = orchestrator_run_id
|
187
126
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
custom_run_args += " "
|
194
|
-
|
195
|
-
# Set up the task
|
196
|
-
run_command = f"docker run --rm {custom_run_args}{docker_environment_str} {image} {entrypoint_str} {arguments_str}"
|
197
|
-
task_name = f"{deployment.id}-{step_name}-{time.time()}"
|
198
|
-
task = sky.Task(
|
199
|
-
run=run_command,
|
200
|
-
setup=setup,
|
201
|
-
envs=task_envs,
|
202
|
-
name=task_name,
|
203
|
-
)
|
204
|
-
task = task.set_resources(
|
205
|
-
sky.Resources(
|
206
|
-
cloud=orchestrator.cloud,
|
207
|
-
instance_type=settings.instance_type
|
208
|
-
or orchestrator.DEFAULT_INSTANCE_TYPE,
|
209
|
-
cpus=settings.cpus,
|
210
|
-
memory=settings.memory,
|
211
|
-
disk_size=settings.disk_size,
|
212
|
-
disk_tier=settings.disk_tier,
|
213
|
-
accelerators=settings.accelerators,
|
214
|
-
accelerator_args=settings.accelerator_args,
|
215
|
-
use_spot=settings.use_spot,
|
216
|
-
job_recovery=settings.job_recovery,
|
217
|
-
region=settings.region,
|
218
|
-
zone=settings.zone,
|
219
|
-
image_id=settings.image_id,
|
127
|
+
unique_resource_configs: Dict[str, str] = {}
|
128
|
+
for step_name, step in deployment.step_configurations.items():
|
129
|
+
settings = cast(
|
130
|
+
SkypilotBaseOrchestratorSettings,
|
131
|
+
orchestrator.get_settings(step),
|
220
132
|
)
|
221
|
-
|
133
|
+
# Handle both str and Dict[str, int] types for accelerators
|
134
|
+
if isinstance(settings.accelerators, dict):
|
135
|
+
accelerators_hashable = frozenset(
|
136
|
+
settings.accelerators.items()
|
137
|
+
)
|
138
|
+
elif isinstance(settings.accelerators, str):
|
139
|
+
accelerators_hashable = frozenset({(settings.accelerators, 1)})
|
140
|
+
else:
|
141
|
+
accelerators_hashable = None
|
142
|
+
resource_config = (
|
143
|
+
settings.instance_type,
|
144
|
+
settings.cpus,
|
145
|
+
settings.memory,
|
146
|
+
settings.disk_size, # Assuming disk_size is part of the settings
|
147
|
+
settings.disk_tier, # Assuming disk_tier is part of the settings
|
148
|
+
settings.use_spot,
|
149
|
+
settings.job_recovery,
|
150
|
+
settings.region,
|
151
|
+
settings.zone,
|
152
|
+
accelerators_hashable,
|
153
|
+
)
|
154
|
+
cluster_name_parts = [
|
155
|
+
sanitize_cluster_name(str(part))
|
156
|
+
for part in resource_config
|
157
|
+
if part is not None
|
158
|
+
]
|
159
|
+
cluster_name = f"cluster-{orchestrator_run_id}" + "-".join(
|
160
|
+
cluster_name_parts
|
161
|
+
)
|
162
|
+
unique_resource_configs[step_name] = cluster_name
|
222
163
|
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
stream_logs=settings.stream_logs,
|
230
|
-
detach_setup=True,
|
231
|
-
detach_run=True,
|
232
|
-
)
|
164
|
+
run = Client().list_pipeline_runs(
|
165
|
+
sort_by="asc:created",
|
166
|
+
size=1,
|
167
|
+
deployment_id=args.deployment_id,
|
168
|
+
status=ExecutionStatus.INITIALIZING,
|
169
|
+
)[0]
|
233
170
|
|
234
|
-
|
235
|
-
logger.info(f"Waiting for pod of step `{step_name}` to start...")
|
171
|
+
logger.info("Fetching pipeline run: %s", run.id)
|
236
172
|
|
237
|
-
|
173
|
+
def run_step_on_skypilot_vm(step_name: str) -> None:
|
174
|
+
"""Run a pipeline step in a separate Skypilot VM.
|
238
175
|
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
176
|
+
Args:
|
177
|
+
step_name: Name of the step.
|
178
|
+
|
179
|
+
Raises:
|
180
|
+
Exception: If the step execution fails.
|
181
|
+
"""
|
182
|
+
logger.info(f"Running step `{step_name}` on a VM...")
|
243
183
|
try:
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
184
|
+
cluster_name = unique_resource_configs[step_name]
|
185
|
+
|
186
|
+
image = SkypilotBaseOrchestrator.get_image(
|
187
|
+
deployment=deployment, step_name=step_name
|
188
|
+
)
|
189
|
+
|
190
|
+
step_args = (
|
191
|
+
StepEntrypointConfiguration.get_entrypoint_arguments(
|
192
|
+
step_name=step_name, deployment_id=deployment.id
|
193
|
+
)
|
194
|
+
)
|
195
|
+
arguments_str = " ".join(step_args)
|
196
|
+
|
197
|
+
step = deployment.step_configurations[step_name]
|
198
|
+
settings = cast(
|
199
|
+
SkypilotBaseOrchestratorSettings,
|
200
|
+
orchestrator.get_settings(step),
|
201
|
+
)
|
202
|
+
env = get_config_environment_vars()
|
203
|
+
env[ENV_ZENML_SKYPILOT_ORCHESTRATOR_RUN_ID] = (
|
204
|
+
orchestrator_run_id
|
205
|
+
)
|
206
|
+
|
207
|
+
# Create the Docker run command
|
208
|
+
run_command = create_docker_run_command(
|
209
|
+
image=image,
|
210
|
+
entrypoint_str=entrypoint_str,
|
211
|
+
arguments_str=arguments_str,
|
212
|
+
environment=env,
|
213
|
+
docker_run_args=settings.docker_run_args,
|
214
|
+
use_sudo=False, # Entrypoint doesn't use sudo
|
215
|
+
)
|
216
|
+
|
217
|
+
task_name = f"{deployment.id}-{step_name}-{time.time()}"
|
218
|
+
|
219
|
+
# Create task kwargs
|
220
|
+
task_kwargs = prepare_task_kwargs(
|
221
|
+
settings=settings,
|
222
|
+
run_command=run_command,
|
223
|
+
setup=setup,
|
224
|
+
task_envs=task_envs,
|
225
|
+
task_name=task_name,
|
226
|
+
)
|
227
|
+
|
228
|
+
task = sky.Task(**task_kwargs)
|
229
|
+
|
230
|
+
# Set resources
|
231
|
+
resources_kwargs = prepare_resources_kwargs(
|
232
|
+
cloud=orchestrator.cloud,
|
233
|
+
settings=settings,
|
234
|
+
default_instance_type=orchestrator.DEFAULT_INSTANCE_TYPE,
|
235
|
+
)
|
236
|
+
|
237
|
+
task = task.set_resources(sky.Resources(**resources_kwargs))
|
238
|
+
|
239
|
+
# Prepare launch parameters
|
240
|
+
launch_kwargs = prepare_launch_kwargs(
|
241
|
+
settings=settings,
|
242
|
+
)
|
243
|
+
|
244
|
+
# sky.launch now returns a request ID (async). Capture it so we can
|
245
|
+
# optionally stream logs and block until completion when desired.
|
246
|
+
launch_request_id = sky.launch(
|
247
|
+
task,
|
248
|
+
cluster_name,
|
249
|
+
**launch_kwargs,
|
250
|
+
)
|
251
|
+
sky_job_get(launch_request_id, True, cluster_name)
|
252
|
+
|
253
|
+
# Pop the resource configuration for this step
|
254
|
+
unique_resource_configs.pop(step_name)
|
255
|
+
|
256
|
+
if cluster_name in unique_resource_configs.values():
|
257
|
+
# If there are more steps using this configuration, skip deprovisioning the cluster
|
258
|
+
logger.info(
|
259
|
+
f"Resource configuration for cluster '{cluster_name}' "
|
260
|
+
"is used by subsequent steps. Skipping the deprovisioning of "
|
261
|
+
"the cluster."
|
262
|
+
)
|
263
|
+
else:
|
264
|
+
# If there are no more steps using this configuration, down the cluster
|
265
|
+
logger.info(
|
266
|
+
f"Resource configuration for cluster '{cluster_name}' "
|
267
|
+
"is not used by subsequent steps. deprovisioning the cluster."
|
268
|
+
)
|
269
|
+
down_request_id = sky.down(cluster_name)
|
270
|
+
# Wait for the cluster to be terminated
|
271
|
+
sky.stream_and_get(down_request_id)
|
272
|
+
|
273
|
+
logger.info(
|
274
|
+
f"Running step `{step_name}` on a VM is completed."
|
275
|
+
)
|
276
|
+
|
277
|
+
except Exception as e:
|
278
|
+
logger.error(f"Failed while launching step `{step_name}`: {e}")
|
279
|
+
raise
|
280
|
+
|
281
|
+
dag_runner = ThreadedDagRunner(
|
282
|
+
dag=pipeline_dag, run_fn=run_step_on_skypilot_vm
|
283
|
+
)
|
284
|
+
dag_runner.run()
|
285
|
+
|
286
|
+
failed_nodes = []
|
287
|
+
for node in dag_runner.nodes:
|
288
|
+
if dag_runner.node_states[node] == NodeStatus.FAILED:
|
289
|
+
failed_nodes.append(node)
|
268
290
|
|
269
|
-
|
291
|
+
if failed_nodes:
|
292
|
+
raise Exception(f"One or more steps failed: {failed_nodes}")
|
270
293
|
|
271
|
-
|
294
|
+
except Exception as e:
|
295
|
+
logger.error(f"Orchestrator failed: {e}")
|
272
296
|
|
273
|
-
|
297
|
+
# Try to mark the pipeline run as failed
|
298
|
+
if run:
|
299
|
+
publish_failed_pipeline_run(run.id)
|
300
|
+
logger.info("Marked pipeline run as failed in ZenML.")
|
301
|
+
raise
|
274
302
|
|
275
303
|
|
276
304
|
if __name__ == "__main__":
|