zenml-nightly 0.58.2.dev20240626__py3-none-any.whl → 0.62.0.dev20240726__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. README.md +31 -10
  2. RELEASE_NOTES.md +280 -0
  3. zenml/VERSION +1 -1
  4. zenml/__init__.py +2 -0
  5. zenml/analytics/enums.py +3 -0
  6. zenml/cli/__init__.py +28 -0
  7. zenml/cli/artifact.py +1 -2
  8. zenml/cli/integration.py +9 -8
  9. zenml/cli/server.py +6 -0
  10. zenml/cli/stack.py +812 -39
  11. zenml/cli/stack_components.py +9 -0
  12. zenml/cli/text_utils.py +35 -1
  13. zenml/cli/utils.py +127 -10
  14. zenml/client.py +23 -14
  15. zenml/config/docker_settings.py +8 -5
  16. zenml/constants.py +13 -1
  17. zenml/container_registries/base_container_registry.py +1 -0
  18. zenml/enums.py +23 -0
  19. zenml/event_hub/event_hub.py +5 -8
  20. zenml/integrations/__init__.py +1 -0
  21. zenml/integrations/aws/__init__.py +1 -0
  22. zenml/integrations/azure/__init__.py +3 -2
  23. zenml/integrations/constants.py +1 -0
  24. zenml/integrations/databricks/__init__.py +52 -0
  25. zenml/integrations/databricks/flavors/__init__.py +30 -0
  26. zenml/integrations/databricks/flavors/databricks_model_deployer_flavor.py +118 -0
  27. zenml/integrations/databricks/flavors/databricks_orchestrator_flavor.py +147 -0
  28. zenml/integrations/databricks/model_deployers/__init__.py +20 -0
  29. zenml/integrations/databricks/model_deployers/databricks_model_deployer.py +249 -0
  30. zenml/integrations/databricks/orchestrators/__init__.py +20 -0
  31. zenml/integrations/databricks/orchestrators/databricks_orchestrator.py +497 -0
  32. zenml/integrations/databricks/orchestrators/databricks_orchestrator_entrypoint_config.py +97 -0
  33. zenml/integrations/databricks/services/__init__.py +19 -0
  34. zenml/integrations/databricks/services/databricks_deployment.py +407 -0
  35. zenml/integrations/databricks/utils/__init__.py +14 -0
  36. zenml/integrations/databricks/utils/databricks_utils.py +87 -0
  37. zenml/integrations/deepchecks/__init__.py +1 -0
  38. zenml/integrations/discord/__init__.py +1 -0
  39. zenml/integrations/evidently/__init__.py +1 -0
  40. zenml/integrations/facets/__init__.py +1 -0
  41. zenml/integrations/feast/__init__.py +1 -0
  42. zenml/integrations/gcp/__init__.py +3 -1
  43. zenml/integrations/gcp/google_credentials_mixin.py +1 -1
  44. zenml/integrations/gcp/service_connectors/gcp_service_connector.py +320 -64
  45. zenml/integrations/great_expectations/data_validators/ge_data_validator.py +12 -8
  46. zenml/integrations/huggingface/__init__.py +1 -0
  47. zenml/integrations/huggingface/materializers/huggingface_datasets_materializer.py +88 -3
  48. zenml/integrations/huggingface/steps/accelerate_runner.py +1 -7
  49. zenml/integrations/integration.py +24 -0
  50. zenml/integrations/kubeflow/__init__.py +3 -0
  51. zenml/integrations/kubeflow/flavors/kubeflow_orchestrator_flavor.py +1 -1
  52. zenml/integrations/kubeflow/orchestrators/kubeflow_orchestrator.py +0 -1
  53. zenml/integrations/kubernetes/__init__.py +3 -1
  54. zenml/integrations/kubernetes/orchestrators/kube_utils.py +4 -1
  55. zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator.py +1 -13
  56. zenml/integrations/kubernetes/orchestrators/manifest_utils.py +22 -4
  57. zenml/integrations/kubernetes/pod_settings.py +4 -0
  58. zenml/integrations/label_studio/annotators/label_studio_annotator.py +1 -0
  59. zenml/integrations/langchain/__init__.py +1 -0
  60. zenml/integrations/lightgbm/__init__.py +1 -0
  61. zenml/integrations/mlflow/__init__.py +4 -2
  62. zenml/integrations/mlflow/model_registries/mlflow_model_registry.py +6 -2
  63. zenml/integrations/mlflow/services/mlflow_deployment.py +1 -1
  64. zenml/integrations/neural_prophet/__init__.py +1 -0
  65. zenml/integrations/polars/__init__.py +1 -0
  66. zenml/integrations/prodigy/__init__.py +1 -0
  67. zenml/integrations/pycaret/__init__.py +6 -0
  68. zenml/integrations/registry.py +37 -0
  69. zenml/integrations/s3/artifact_stores/s3_artifact_store.py +17 -6
  70. zenml/integrations/seldon/__init__.py +1 -0
  71. zenml/integrations/seldon/model_deployers/seldon_model_deployer.py +1 -0
  72. zenml/integrations/skypilot/flavors/skypilot_orchestrator_base_vm_config.py +2 -2
  73. zenml/integrations/skypilot/orchestrators/skypilot_base_vm_orchestrator.py +1 -1
  74. zenml/integrations/skypilot/orchestrators/skypilot_orchestrator_entrypoint.py +2 -2
  75. zenml/integrations/skypilot_aws/__init__.py +2 -1
  76. zenml/integrations/skypilot_azure/__init__.py +1 -3
  77. zenml/integrations/skypilot_gcp/__init__.py +1 -1
  78. zenml/integrations/skypilot_lambda/__init__.py +1 -1
  79. zenml/integrations/skypilot_lambda/flavors/skypilot_orchestrator_lambda_vm_flavor.py +1 -1
  80. zenml/integrations/slack/__init__.py +1 -0
  81. zenml/integrations/tekton/__init__.py +1 -0
  82. zenml/integrations/tensorboard/__init__.py +0 -1
  83. zenml/integrations/tensorflow/__init__.py +18 -6
  84. zenml/integrations/wandb/__init__.py +1 -0
  85. zenml/logging/step_logging.py +34 -35
  86. zenml/materializers/built_in_materializer.py +1 -1
  87. zenml/materializers/cloudpickle_materializer.py +1 -1
  88. zenml/model/model.py +1 -1
  89. zenml/models/__init__.py +11 -0
  90. zenml/models/v2/core/component.py +47 -0
  91. zenml/models/v2/core/model.py +1 -2
  92. zenml/models/v2/core/server_settings.py +0 -20
  93. zenml/models/v2/core/service_connector.py +17 -0
  94. zenml/models/v2/core/stack.py +31 -0
  95. zenml/models/v2/misc/full_stack.py +129 -0
  96. zenml/models/v2/misc/stack_deployment.py +91 -0
  97. zenml/new/pipelines/pipeline.py +1 -1
  98. zenml/new/pipelines/run_utils.py +1 -1
  99. zenml/orchestrators/__init__.py +4 -0
  100. zenml/orchestrators/input_utils.py +3 -6
  101. zenml/orchestrators/step_launcher.py +1 -0
  102. zenml/orchestrators/wheeled_orchestrator.py +147 -0
  103. zenml/service_connectors/service_connector_utils.py +408 -0
  104. zenml/stack/stack.py +3 -6
  105. zenml/stack_deployments/__init__.py +14 -0
  106. zenml/stack_deployments/aws_stack_deployment.py +254 -0
  107. zenml/stack_deployments/azure_stack_deployment.py +179 -0
  108. zenml/stack_deployments/gcp_stack_deployment.py +269 -0
  109. zenml/stack_deployments/stack_deployment.py +218 -0
  110. zenml/stack_deployments/utils.py +48 -0
  111. zenml/steps/base_step.py +7 -5
  112. zenml/utils/function_utils.py +2 -2
  113. zenml/utils/pagination_utils.py +7 -5
  114. zenml/utils/pipeline_docker_image_builder.py +105 -68
  115. zenml/utils/pydantic_utils.py +6 -5
  116. zenml/utils/source_utils.py +4 -1
  117. zenml/zen_server/cloud_utils.py +18 -3
  118. zenml/zen_server/dashboard/assets/{404-CDPQCl4D.js → 404-B_YdvmwS.js} +1 -1
  119. zenml/zen_server/dashboard/assets/@radix-CFOkMR_E.js +85 -0
  120. zenml/zen_server/dashboard/assets/{@react-router-DYovave8.js → @react-router-CO-OsFwI.js} +2 -2
  121. zenml/zen_server/dashboard/assets/{@reactflow-CHBapDaj.js → @reactflow-l_1hUr1S.js} +2 -2
  122. zenml/zen_server/dashboard/assets/@tanstack-DYiOyJUL.js +22 -0
  123. zenml/zen_server/dashboard/assets/AwarenessChannel-CFg5iX4Z.js +1 -0
  124. zenml/zen_server/dashboard/assets/{CodeSnippet-BidtnWOi.js → CodeSnippet-Dvkx_82E.js} +2 -2
  125. zenml/zen_server/dashboard/assets/CollapsibleCard-opiuBHHc.js +1 -0
  126. zenml/zen_server/dashboard/assets/Commands-DoN1xrEq.js +1 -0
  127. zenml/zen_server/dashboard/assets/CopyButton-Cr7xYEPb.js +2 -0
  128. zenml/zen_server/dashboard/assets/{CsvVizualization-BOuez-fG.js → CsvVizualization-Ck-nZ43m.js} +7 -7
  129. zenml/zen_server/dashboard/assets/DisplayDate-DYgIjlDF.js +1 -0
  130. zenml/zen_server/dashboard/assets/EmptyState-BMLnFVlB.js +1 -0
  131. zenml/zen_server/dashboard/assets/Error-kLtljEOM.js +1 -0
  132. zenml/zen_server/dashboard/assets/ExecutionStatus-DguLLgTK.js +1 -0
  133. zenml/zen_server/dashboard/assets/Helpbox-BXUMP21n.js +1 -0
  134. zenml/zen_server/dashboard/assets/Infobox-DSt0O-dm.js +1 -0
  135. zenml/zen_server/dashboard/assets/InlineAvatar-xsrsIGE-.js +1 -0
  136. zenml/zen_server/dashboard/assets/{MarkdownVisualization-DsB2QZiK.js → MarkdownVisualization-xp3hhULl.js} +2 -2
  137. zenml/zen_server/dashboard/assets/Pagination-C6X-mifw.js +1 -0
  138. zenml/zen_server/dashboard/assets/PasswordChecker-DUveqlva.js +1 -0
  139. zenml/zen_server/dashboard/assets/SetPassword-BXGTWiwj.js +1 -0
  140. zenml/zen_server/dashboard/assets/SuccessStep-DZC60t0x.js +1 -0
  141. zenml/zen_server/dashboard/assets/{UpdatePasswordSchemas-DnM-c11H.js → UpdatePasswordSchemas-DGvwFWO1.js} +1 -1
  142. zenml/zen_server/dashboard/assets/{aws-t0gKCj_R.js → aws-BgKTfTfx.js} +1 -1
  143. zenml/zen_server/dashboard/assets/{check-circle-BVvhm5dy.js → check-circle-i56092KI.js} +1 -1
  144. zenml/zen_server/dashboard/assets/{chevron-right-double-CJ50E9Gr.js → chevron-right-double-CZBOf6JM.js} +1 -1
  145. zenml/zen_server/dashboard/assets/cloud-only-C_yFCAkP.js +1 -0
  146. zenml/zen_server/dashboard/assets/{copy-BRhQz3j-.js → copy-BXNk6BjL.js} +1 -1
  147. zenml/zen_server/dashboard/assets/{database-CRRnyFWh.js → database-1xWSgZfO.js} +1 -1
  148. zenml/zen_server/dashboard/assets/{docker-BAonhm6G.js → docker-CQMVm_4d.js} +1 -1
  149. zenml/zen_server/dashboard/assets/{file-text-CbVERUON.js → file-text-CqD_iu6l.js} +1 -1
  150. zenml/zen_server/dashboard/assets/{help-B8rqCvqn.js → help-bu_DgLKI.js} +1 -1
  151. zenml/zen_server/dashboard/assets/index-BczVOqUf.js +55 -0
  152. zenml/zen_server/dashboard/assets/index-EpMIKgrI.css +1 -0
  153. zenml/zen_server/dashboard/assets/index-rK_Wuy2W.js +1 -0
  154. zenml/zen_server/dashboard/assets/index.esm-Corw4lXQ.js +1 -0
  155. zenml/zen_server/dashboard/assets/{login-mutation-wzzl23C6.js → login-mutation-CrHrndTI.js} +1 -1
  156. zenml/zen_server/dashboard/assets/logs-D8k8BVFf.js +1 -0
  157. zenml/zen_server/dashboard/assets/not-found-DYa4pC-C.js +1 -0
  158. zenml/zen_server/dashboard/assets/package-B3fWP-Dh.js +1 -0
  159. zenml/zen_server/dashboard/assets/page-1h_sD1jz.js +1 -0
  160. zenml/zen_server/dashboard/assets/{page-yN4rZ-ZS.js → page-1iL8aMqs.js} +1 -1
  161. zenml/zen_server/dashboard/assets/{page-Bi5AI0S7.js → page-2grKx_MY.js} +1 -1
  162. zenml/zen_server/dashboard/assets/page-5NCOHOsy.js +1 -0
  163. zenml/zen_server/dashboard/assets/page-8a4UMKXZ.js +1 -0
  164. zenml/zen_server/dashboard/assets/{page-AQKopn_4.js → page-B6h3iaHJ.js} +1 -1
  165. zenml/zen_server/dashboard/assets/page-BDns21Iz.js +1 -0
  166. zenml/zen_server/dashboard/assets/{page-BmkSiYeQ.js → page-BhgCDInH.js} +2 -2
  167. zenml/zen_server/dashboard/assets/{page-BzVZGExK.js → page-Bi-wtWiO.js} +2 -2
  168. zenml/zen_server/dashboard/assets/page-BkeAAYwp.js +1 -0
  169. zenml/zen_server/dashboard/assets/page-BkuQDIf-.js +1 -0
  170. zenml/zen_server/dashboard/assets/page-BnaevhnB.js +1 -0
  171. zenml/zen_server/dashboard/assets/page-Bq0YxkLV.js +1 -0
  172. zenml/zen_server/dashboard/assets/page-Bs2F4eoD.js +2 -0
  173. zenml/zen_server/dashboard/assets/page-C6-UGEbH.js +1 -0
  174. zenml/zen_server/dashboard/assets/page-CCNRIt_f.js +1 -0
  175. zenml/zen_server/dashboard/assets/page-CHNxpz3n.js +1 -0
  176. zenml/zen_server/dashboard/assets/page-DgorQFqi.js +1 -0
  177. zenml/zen_server/dashboard/assets/page-K8ebxVIs.js +1 -0
  178. zenml/zen_server/dashboard/assets/{page-CuT1SUik.js → page-MFQyIJd3.js} +1 -1
  179. zenml/zen_server/dashboard/assets/page-TgCF0P_U.js +1 -0
  180. zenml/zen_server/dashboard/assets/page-ZnCEe-eK.js +9 -0
  181. zenml/zen_server/dashboard/assets/{page-BW6Ket3a.js → page-uA5prJGY.js} +1 -1
  182. zenml/zen_server/dashboard/assets/persist-D7HJNBWx.js +1 -0
  183. zenml/zen_server/dashboard/assets/{play-circle-DK5QMJyp.js → play-circle-CNtZKDnW.js} +1 -1
  184. zenml/zen_server/dashboard/assets/plus-C8WOyCzt.js +1 -0
  185. zenml/zen_server/dashboard/assets/stack-detail-query-Cficsl6d.js +1 -0
  186. zenml/zen_server/dashboard/assets/{terminal-B2ovgWuz.js → terminal-By9cErXc.js} +1 -1
  187. zenml/zen_server/dashboard/assets/update-server-settings-mutation-7d8xi1tS.js +1 -0
  188. zenml/zen_server/dashboard/assets/{url-6_xv0WJS.js → url-D7mAQGUM.js} +1 -1
  189. zenml/zen_server/dashboard/assets/{zod-DrZvVLjd.js → zod-BhoGpZ63.js} +1 -1
  190. zenml/zen_server/dashboard/index.html +7 -7
  191. zenml/zen_server/dashboard_legacy/asset-manifest.json +4 -4
  192. zenml/zen_server/dashboard_legacy/index.html +1 -1
  193. zenml/zen_server/dashboard_legacy/{precache-manifest.f4abc5b7cfa7d90c1caf5521918e29a8.js → precache-manifest.12246c7548e71e2c4438e496360de80c.js} +4 -4
  194. zenml/zen_server/dashboard_legacy/service-worker.js +1 -1
  195. zenml/zen_server/dashboard_legacy/static/js/main.3b27024b.chunk.js +2 -0
  196. zenml/zen_server/dashboard_legacy/static/js/{main.ac2f17d0.chunk.js.map → main.3b27024b.chunk.js.map} +1 -1
  197. zenml/zen_server/deploy/helm/Chart.yaml +1 -1
  198. zenml/zen_server/deploy/helm/README.md +2 -2
  199. zenml/zen_server/feature_gate/zenml_cloud_feature_gate.py +11 -5
  200. zenml/zen_server/pipeline_deployment/utils.py +57 -44
  201. zenml/zen_server/rbac/utils.py +10 -2
  202. zenml/zen_server/rbac/zenml_cloud_rbac.py +11 -5
  203. zenml/zen_server/routers/devices_endpoints.py +4 -1
  204. zenml/zen_server/routers/server_endpoints.py +29 -2
  205. zenml/zen_server/routers/service_connectors_endpoints.py +57 -0
  206. zenml/zen_server/routers/stack_deployment_endpoints.py +158 -0
  207. zenml/zen_server/routers/steps_endpoints.py +2 -1
  208. zenml/zen_server/routers/workspaces_endpoints.py +64 -0
  209. zenml/zen_server/zen_server_api.py +2 -0
  210. zenml/zen_stores/migrations/utils.py +1 -1
  211. zenml/zen_stores/migrations/versions/0.60.0_release.py +23 -0
  212. zenml/zen_stores/migrations/versions/0.61.0_release.py +23 -0
  213. zenml/zen_stores/migrations/versions/0.62.0_release.py +23 -0
  214. zenml/zen_stores/migrations/versions/0d707865f404_adding_labels_to_stacks.py +30 -0
  215. zenml/zen_stores/migrations/versions/b4fca5241eea_migrate_onboarding_state.py +167 -0
  216. zenml/zen_stores/rest_zen_store.py +149 -4
  217. zenml/zen_stores/schemas/component_schemas.py +14 -0
  218. zenml/zen_stores/schemas/server_settings_schemas.py +23 -11
  219. zenml/zen_stores/schemas/stack_schemas.py +10 -0
  220. zenml/zen_stores/schemas/step_run_schemas.py +27 -11
  221. zenml/zen_stores/sql_zen_store.py +450 -6
  222. zenml/zen_stores/zen_store_interface.py +80 -0
  223. {zenml_nightly-0.58.2.dev20240626.dist-info → zenml_nightly-0.62.0.dev20240726.dist-info}/METADATA +35 -13
  224. {zenml_nightly-0.58.2.dev20240626.dist-info → zenml_nightly-0.62.0.dev20240726.dist-info}/RECORD +227 -191
  225. zenml/zen_server/dashboard/assets/@radix-C9DBgJhe.js +0 -77
  226. zenml/zen_server/dashboard/assets/@tanstack-CEbkxrhX.js +0 -30
  227. zenml/zen_server/dashboard/assets/AwarenessChannel-nXGpmj_f.js +0 -1
  228. zenml/zen_server/dashboard/assets/Cards-nwsvQLVS.js +0 -1
  229. zenml/zen_server/dashboard/assets/Commands-DuIWKg_Q.js +0 -1
  230. zenml/zen_server/dashboard/assets/CopyButton-B_YSm-Ds.js +0 -2
  231. zenml/zen_server/dashboard/assets/DisplayDate-BdguISQF.js +0 -1
  232. zenml/zen_server/dashboard/assets/EmptyState-BkooiGtL.js +0 -1
  233. zenml/zen_server/dashboard/assets/Error-B6M0dPph.js +0 -1
  234. zenml/zen_server/dashboard/assets/Helpbox-BQoqCm04.js +0 -1
  235. zenml/zen_server/dashboard/assets/Infobox-Ce9mefqU.js +0 -1
  236. zenml/zen_server/dashboard/assets/InlineAvatar-DGf3dVhV.js +0 -1
  237. zenml/zen_server/dashboard/assets/PageHeader-DGaemzjc.js +0 -1
  238. zenml/zen_server/dashboard/assets/Pagination-DVYfBCCc.js +0 -1
  239. zenml/zen_server/dashboard/assets/PasswordChecker-DSLBp7Vl.js +0 -1
  240. zenml/zen_server/dashboard/assets/SetPassword-B5s7DJug.js +0 -1
  241. zenml/zen_server/dashboard/assets/SuccessStep-ZzczaM7g.js +0 -1
  242. zenml/zen_server/dashboard/assets/chevron-down-zcvCWmyP.js +0 -1
  243. zenml/zen_server/dashboard/assets/cloud-only-Ba_ShBR5.js +0 -1
  244. zenml/zen_server/dashboard/assets/index-CWJ3xbIf.css +0 -1
  245. zenml/zen_server/dashboard/assets/index-QORVVTMN.js +0 -55
  246. zenml/zen_server/dashboard/assets/index.esm-F7nqy9zY.js +0 -1
  247. zenml/zen_server/dashboard/assets/not-found-Dh2la7kh.js +0 -1
  248. zenml/zen_server/dashboard/assets/page-B-5jAKoO.js +0 -1
  249. zenml/zen_server/dashboard/assets/page-B-vWk8a6.js +0 -1
  250. zenml/zen_server/dashboard/assets/page-B0BrqfS8.js +0 -1
  251. zenml/zen_server/dashboard/assets/page-BQxVFlUl.js +0 -1
  252. zenml/zen_server/dashboard/assets/page-ByrHy6Ss.js +0 -1
  253. zenml/zen_server/dashboard/assets/page-CPtY4Kv_.js +0 -1
  254. zenml/zen_server/dashboard/assets/page-CmmukLsl.js +0 -1
  255. zenml/zen_server/dashboard/assets/page-D2D-7qyr.js +0 -9
  256. zenml/zen_server/dashboard/assets/page-DAQQyLxT.js +0 -1
  257. zenml/zen_server/dashboard/assets/page-DHkUMl_E.js +0 -1
  258. zenml/zen_server/dashboard/assets/page-DZCbwOEs.js +0 -2
  259. zenml/zen_server/dashboard/assets/page-DdaIt20-.js +0 -1
  260. zenml/zen_server/dashboard/assets/page-LqLs24Ot.js +0 -1
  261. zenml/zen_server/dashboard/assets/page-lebv0c7C.js +0 -1
  262. zenml/zen_server/dashboard/assets/update-server-settings-mutation-0Wgz8pUE.js +0 -1
  263. zenml/zen_server/dashboard_legacy/static/js/main.ac2f17d0.chunk.js +0 -2
  264. {zenml_nightly-0.58.2.dev20240626.dist-info → zenml_nightly-0.62.0.dev20240726.dist-info}/LICENSE +0 -0
  265. {zenml_nightly-0.58.2.dev20240626.dist-info → zenml_nightly-0.62.0.dev20240726.dist-info}/WHEEL +0 -0
  266. {zenml_nightly-0.58.2.dev20240626.dist-info → zenml_nightly-0.62.0.dev20240726.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,497 @@
1
+ # Copyright (c) ZenML GmbH 2023. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12
+ # or implied. See the License for the specific language governing
13
+ # permissions and limitations under the License.
14
+ """Implementation of the Databricks orchestrator."""
15
+
16
+ import itertools
17
+ import os
18
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, cast
19
+ from uuid import UUID
20
+
21
+ from databricks.sdk import WorkspaceClient as DatabricksClient
22
+ from databricks.sdk.service.compute import (
23
+ AutoScale,
24
+ ClientsTypes,
25
+ ClusterSpec,
26
+ WorkloadType,
27
+ )
28
+ from databricks.sdk.service.jobs import CronSchedule, JobCluster
29
+ from databricks.sdk.service.jobs import Task as DatabricksTask
30
+
31
+ from zenml.client import Client
32
+ from zenml.constants import (
33
+ ENV_ZENML_CUSTOM_SOURCE_ROOT,
34
+ METADATA_ORCHESTRATOR_URL,
35
+ )
36
+ from zenml.integrations.databricks.flavors.databricks_orchestrator_flavor import (
37
+ DatabricksOrchestratorConfig,
38
+ DatabricksOrchestratorSettings,
39
+ )
40
+ from zenml.integrations.databricks.orchestrators.databricks_orchestrator_entrypoint_config import (
41
+ ENV_ZENML_DATABRICKS_ORCHESTRATOR_RUN_ID,
42
+ DatabricksEntrypointConfiguration,
43
+ )
44
+ from zenml.integrations.databricks.utils.databricks_utils import (
45
+ convert_step_to_task,
46
+ )
47
+ from zenml.io import fileio
48
+ from zenml.logger import get_logger
49
+ from zenml.metadata.metadata_types import MetadataType, Uri
50
+ from zenml.models.v2.core.schedule import ScheduleResponse
51
+ from zenml.orchestrators.utils import get_orchestrator_run_name
52
+ from zenml.orchestrators.wheeled_orchestrator import WheeledOrchestrator
53
+ from zenml.stack import StackValidator
54
+ from zenml.utils import io_utils
55
+ from zenml.utils.pipeline_docker_image_builder import (
56
+ PipelineDockerImageBuilder,
57
+ )
58
+
59
+ if TYPE_CHECKING:
60
+ from zenml.models import PipelineDeploymentResponse
61
+ from zenml.stack import Stack
62
+
63
+
64
+ logger = get_logger(__name__)
65
+
66
+ ZENML_STEP_DEFAULT_ENTRYPOINT_COMMAND = "entrypoint.main"
67
+ DATABRICKS_WHEELS_DIRECTORY_PREFIX = "dbfs:/FileStore/zenml"
68
+ DATABRICKS_LOCAL_FILESYSTEM_PREFIX = "file:/"
69
+ DATABRICKS_CLUSTER_DEFAULT_NAME = "zenml-databricks-cluster"
70
+ DATABRICKS_SPARK_DEFAULT_VERSION = "15.3.x-scala2.12"
71
+ DATABRICKS_JOB_ID_PARAMETER_REFERENCE = "{{job.id}}"
72
+ DATABRICKS_ZENML_DEFAULT_CUSTOM_REPOSITORY_PATH = "."
73
+
74
+
75
+ class DatabricksOrchestrator(WheeledOrchestrator):
76
+ """Base class for Orchestrator responsible for running pipelines remotely in a VM.
77
+
78
+ This orchestrator does not support running on a schedule.
79
+ """
80
+
81
+ # The default instance type to use if none is specified in settings
82
+ DEFAULT_INSTANCE_TYPE: Optional[str] = None
83
+
84
+ @property
85
+ def validator(self) -> Optional[StackValidator]:
86
+ """Validates the stack.
87
+
88
+ In the remote case, checks that the stack contains a container registry,
89
+ image builder and only remote components.
90
+
91
+ Returns:
92
+ A `StackValidator` instance.
93
+ """
94
+
95
+ def _validate_remote_components(
96
+ stack: "Stack",
97
+ ) -> Tuple[bool, str]:
98
+ for component in stack.components.values():
99
+ if not component.config.is_local:
100
+ continue
101
+
102
+ return False, (
103
+ f"The Databricks orchestrator runs pipelines remotely, "
104
+ f"but the '{component.name}' {component.type.value} is "
105
+ "a local stack component and will not be available in "
106
+ "the Databricks step.\nPlease ensure that you always "
107
+ "use non-local stack components with the Databricks "
108
+ "orchestrator."
109
+ )
110
+
111
+ return True, ""
112
+
113
+ return StackValidator(
114
+ custom_validation_function=_validate_remote_components,
115
+ )
116
+
117
+ def _get_databricks_client(
118
+ self,
119
+ ) -> DatabricksClient:
120
+ """Creates a Databricks client.
121
+
122
+ Returns:
123
+ The Databricks client.
124
+ """
125
+ return DatabricksClient(
126
+ host=self.config.host,
127
+ client_id=self.config.client_id,
128
+ client_secret=self.config.client_secret,
129
+ )
130
+
131
+ @property
132
+ def config(self) -> DatabricksOrchestratorConfig:
133
+ """Returns the `DatabricksOrchestratorConfig` config.
134
+
135
+ Returns:
136
+ The configuration.
137
+ """
138
+ return cast(DatabricksOrchestratorConfig, self._config)
139
+
140
+ @property
141
+ def settings_class(self) -> Type[DatabricksOrchestratorSettings]:
142
+ """Settings class for the Databricks orchestrator.
143
+
144
+ Returns:
145
+ The settings class.
146
+ """
147
+ return DatabricksOrchestratorSettings
148
+
149
+ def get_orchestrator_run_id(self) -> str:
150
+ """Returns the active orchestrator run id.
151
+
152
+ Raises:
153
+ RuntimeError: If no run id exists. This happens when this method
154
+ gets called while the orchestrator is not running a pipeline.
155
+
156
+ Returns:
157
+ The orchestrator run id.
158
+
159
+ Raises:
160
+ RuntimeError: If the run id cannot be read from the environment.
161
+ """
162
+ try:
163
+ return os.environ[ENV_ZENML_DATABRICKS_ORCHESTRATOR_RUN_ID]
164
+ except KeyError:
165
+ raise RuntimeError(
166
+ "Unable to read run id from environment variable "
167
+ f"{ENV_ZENML_DATABRICKS_ORCHESTRATOR_RUN_ID}."
168
+ )
169
+
170
+ @property
171
+ def root_directory(self) -> str:
172
+ """Path to the root directory for all files concerning this orchestrator.
173
+
174
+ Returns:
175
+ Path to the root directory.
176
+ """
177
+ return os.path.join(
178
+ io_utils.get_global_config_directory(),
179
+ "databricks",
180
+ str(self.id),
181
+ )
182
+
183
+ @property
184
+ def pipeline_directory(self) -> str:
185
+ """Returns path to a directory in which the kubeflow pipeline files are stored.
186
+
187
+ Returns:
188
+ Path to the pipeline directory.
189
+ """
190
+ return os.path.join(self.root_directory, "pipelines")
191
+
192
+ def setup_credentials(self) -> None:
193
+ """Set up credentials for the orchestrator."""
194
+ connector = self.get_connector()
195
+ assert connector is not None
196
+ connector.configure_local_client()
197
+
198
+ def prepare_or_run_pipeline(
199
+ self,
200
+ deployment: "PipelineDeploymentResponse",
201
+ stack: "Stack",
202
+ environment: Dict[str, str],
203
+ ) -> Any:
204
+ """Creates a wheel and uploads the pipeline to Databricks.
205
+
206
+ This functions as an intermediary representation of the pipeline which
207
+ is then deployed to the kubeflow pipelines instance.
208
+
209
+ How it works:
210
+ -------------
211
+ Before this method is called the `prepare_pipeline_deployment()`
212
+ method builds a docker image that contains the code for the
213
+ pipeline, all steps the context around these files.
214
+
215
+ Based on this docker image a callable is created which builds
216
+ task for each step (`_construct_databricks_pipeline`).
217
+ To do this the entrypoint of the docker image is configured to
218
+ run the correct step within the docker image. The dependencies
219
+ between these task are then also configured onto each
220
+ task by pointing at the downstream steps.
221
+
222
+ Args:
223
+ deployment: The pipeline deployment to prepare or run.
224
+ stack: The stack the pipeline will run on.
225
+ environment: Environment variables to set in the orchestration
226
+ environment.
227
+
228
+ Raises:
229
+ ValueError: If the schedule is not set or if the cron expression
230
+ is not set.
231
+ """
232
+ if deployment.schedule:
233
+ if (
234
+ deployment.schedule.catchup
235
+ or deployment.schedule.interval_second
236
+ ):
237
+ logger.warning(
238
+ "Databricks orchestrator only uses schedules with the "
239
+ "`cron_expression` property, with optional `start_time` and/or `end_time`. "
240
+ "All other properties are ignored."
241
+ )
242
+ if deployment.schedule.cron_expression is None:
243
+ raise ValueError(
244
+ "Property `cron_expression` must be set when passing "
245
+ "schedule to a Databricks orchestrator."
246
+ )
247
+ if (
248
+ deployment.schedule.cron_expression
249
+ and self.settings_class().schedule_timezone is None
250
+ ):
251
+ raise ValueError(
252
+ "Property `schedule_timezone` must be set when passing "
253
+ "`cron_expression` to a Databricks orchestrator."
254
+ "Databricks orchestrator requires a Java Timezone ID to run the pipeline on schedule."
255
+ "Please refer to https://docs.oracle.com/middleware/1221/wcs/tag-ref/MISC/TimeZones.html for more information."
256
+ )
257
+
258
+ # Get deployment id
259
+ deployment_id = deployment.id
260
+
261
+ # Create a callable for future compilation into a dsl.Pipeline.
262
+ def _construct_databricks_pipeline(
263
+ zenml_project_wheel: str, job_cluster_key: str
264
+ ) -> List[DatabricksTask]:
265
+ """Create a databrcks task for each step.
266
+
267
+ This should contain the name of the step or task and configures the
268
+ entrypoint of the task to run the step.
269
+
270
+ Additionally, this gives each task information about its
271
+ direct downstream steps.
272
+
273
+ Args:
274
+ zenml_project_wheel: The wheel package containing the ZenML
275
+ project.
276
+ job_cluster_key: The ID of the Databricks job cluster.
277
+
278
+ Returns:
279
+ A list of Databricks tasks.
280
+ """
281
+ tasks = []
282
+ for step_name, step in deployment.step_configurations.items():
283
+ # The arguments are passed to configure the entrypoint of the
284
+ # docker container when the step is called.
285
+ arguments = DatabricksEntrypointConfiguration.get_entrypoint_arguments(
286
+ step_name=step_name,
287
+ deployment_id=deployment_id,
288
+ wheel_package=self.package_name,
289
+ databricks_job_id=DATABRICKS_JOB_ID_PARAMETER_REFERENCE,
290
+ )
291
+
292
+ # Find the upstream container ops of the current step and
293
+ # configure the current container op to run after them
294
+ upstream_steps = [
295
+ f"{deployment_id}_{upstream_step_name}"
296
+ for upstream_step_name in step.spec.upstream_steps
297
+ ]
298
+
299
+ docker_settings = step.config.docker_settings
300
+ docker_image_builder = PipelineDockerImageBuilder()
301
+ # Gather the requirements files
302
+ requirements_files = (
303
+ docker_image_builder.gather_requirements_files(
304
+ docker_settings=docker_settings,
305
+ stack=Client().active_stack,
306
+ log=False,
307
+ )
308
+ )
309
+
310
+ # Extract and clean the requirements
311
+ requirements = list(
312
+ itertools.chain.from_iterable(
313
+ r[1].strip().split("\n") for r in requirements_files
314
+ )
315
+ )
316
+
317
+ # Remove empty items and duplicates
318
+ requirements = sorted(set(filter(None, requirements)))
319
+
320
+ task = convert_step_to_task(
321
+ f"{deployment_id}_{step_name}",
322
+ ZENML_STEP_DEFAULT_ENTRYPOINT_COMMAND,
323
+ arguments,
324
+ requirements,
325
+ depends_on=upstream_steps,
326
+ zenml_project_wheel=zenml_project_wheel,
327
+ job_cluster_key=job_cluster_key,
328
+ )
329
+ tasks.append(task)
330
+ return tasks
331
+
332
+ # Get the orchestrator run name
333
+ orchestrator_run_name = get_orchestrator_run_name(
334
+ pipeline_name=deployment.pipeline_configuration.name
335
+ )
336
+ # Get a filepath to use to save the finished yaml to
337
+ fileio.makedirs(self.pipeline_directory)
338
+ pipeline_file_path = os.path.join(
339
+ self.pipeline_directory, f"{orchestrator_run_name}.yaml"
340
+ )
341
+
342
+ # Copy the repository to a temporary directory and add a setup.py file
343
+ repository_temp_dir = (
344
+ self.copy_repository_to_temp_dir_and_add_setup_py()
345
+ )
346
+
347
+ # Create a wheel for the package in the temporary directory
348
+ wheel_path = self.create_wheel(temp_dir=repository_temp_dir)
349
+
350
+ databricks_client = self._get_databricks_client()
351
+
352
+ # Create an empty folder in a volume.
353
+ deployment_name = (
354
+ deployment.pipeline.name if deployment.pipeline else "default"
355
+ )
356
+ databricks_directory = f"{DATABRICKS_WHEELS_DIRECTORY_PREFIX}/{deployment_name}/{orchestrator_run_name}"
357
+ databricks_wheel_path = (
358
+ f"{databricks_directory}/{wheel_path.rsplit('/', 1)[-1]}"
359
+ )
360
+
361
+ databricks_client.dbutils.fs.mkdirs(databricks_directory)
362
+ databricks_client.dbutils.fs.cp(
363
+ f"{DATABRICKS_LOCAL_FILESYSTEM_PREFIX}/{wheel_path}",
364
+ databricks_wheel_path,
365
+ )
366
+
367
+ # Construct the env variables for the pipeline
368
+ env_vars = environment.copy()
369
+ spark_env_vars = self.settings_class().spark_env_vars
370
+ if spark_env_vars:
371
+ for key, value in spark_env_vars.items():
372
+ env_vars[key] = value
373
+ env_vars[ENV_ZENML_CUSTOM_SOURCE_ROOT] = (
374
+ DATABRICKS_ZENML_DEFAULT_CUSTOM_REPOSITORY_PATH
375
+ )
376
+
377
+ fileio.rmtree(repository_temp_dir)
378
+
379
+ logger.info(
380
+ "Writing Databricks workflow definition to `%s`.",
381
+ pipeline_file_path,
382
+ )
383
+
384
+ # using the databricks client uploads the pipeline to databricks
385
+ job_cluster_key = self.sanitize_name(f"{deployment_id}")
386
+ self._upload_and_run_pipeline(
387
+ pipeline_name=orchestrator_run_name,
388
+ tasks=_construct_databricks_pipeline(
389
+ databricks_wheel_path, job_cluster_key
390
+ ),
391
+ env_vars=env_vars,
392
+ job_cluster_key=job_cluster_key,
393
+ schedule=deployment.schedule,
394
+ )
395
+
396
+ def _upload_and_run_pipeline(
397
+ self,
398
+ pipeline_name: str,
399
+ tasks: List[DatabricksTask],
400
+ env_vars: Dict[str, str],
401
+ job_cluster_key: str,
402
+ schedule: Optional["ScheduleResponse"] = None,
403
+ ) -> None:
404
+ """Uploads and run the pipeline on the Databricks jobs.
405
+
406
+ Args:
407
+ pipeline_name: The name of the pipeline.
408
+ tasks: The list of tasks to run.
409
+ env_vars: The environment variables.
410
+ job_cluster_key: The ID of the Databricks job cluster.
411
+ schedule: The schedule to run the pipeline
412
+
413
+ Raises:
414
+ ValueError: If the `Job Compute` policy is not found.
415
+ ValueError: If the `schedule_timezone` is not set when passing
416
+
417
+ """
418
+ databricks_client = self._get_databricks_client()
419
+ spark_conf = self.settings_class().spark_conf or {}
420
+ spark_conf[
421
+ "spark.databricks.driver.dbfsLibraryInstallationAllowed"
422
+ ] = "true"
423
+
424
+ policy_id = self.settings_class().policy_id or None
425
+ for policy in databricks_client.cluster_policies.list():
426
+ if policy.name == "Job Compute":
427
+ policy_id = policy.policy_id
428
+ if policy_id is None:
429
+ raise ValueError(
430
+ "Could not find the `Job Compute` policy in Databricks."
431
+ )
432
+ job_cluster = JobCluster(
433
+ job_cluster_key=job_cluster_key,
434
+ new_cluster=ClusterSpec(
435
+ spark_version=self.settings_class().spark_version
436
+ or DATABRICKS_SPARK_DEFAULT_VERSION,
437
+ num_workers=self.settings_class().num_workers,
438
+ node_type_id=self.settings_class().node_type_id
439
+ or "Standard_D4s_v5",
440
+ policy_id=policy_id,
441
+ autoscale=AutoScale(
442
+ min_workers=self.settings_class().autoscale[0],
443
+ max_workers=self.settings_class().autoscale[1],
444
+ ),
445
+ single_user_name=self.settings_class().single_user_name,
446
+ spark_env_vars=env_vars,
447
+ spark_conf=spark_conf,
448
+ workload_type=WorkloadType(
449
+ clients=ClientsTypes(jobs=True, notebooks=False)
450
+ ),
451
+ ),
452
+ )
453
+ if schedule and schedule.cron_expression:
454
+ schedule_timezone = self.settings_class().schedule_timezone
455
+ if schedule_timezone:
456
+ databricks_schedule = CronSchedule(
457
+ quartz_cron_expression=schedule.cron_expression,
458
+ timezone_id=schedule_timezone,
459
+ )
460
+ else:
461
+ raise ValueError(
462
+ "Property `schedule_timezone` must be set when passing "
463
+ "`cron_expression` to a Databricks orchestrator. "
464
+ "Databricks orchestrator requires a Java Timezone ID to run the pipeline on schedule. "
465
+ "Please refer to https://docs.oracle.com/middleware/1221/wcs/tag-ref/MISC/TimeZones.html for more information."
466
+ )
467
+ else:
468
+ databricks_schedule = None
469
+
470
+ job = databricks_client.jobs.create(
471
+ name=pipeline_name,
472
+ tasks=tasks,
473
+ job_clusters=[job_cluster],
474
+ schedule=databricks_schedule,
475
+ )
476
+ if job.job_id:
477
+ databricks_client.jobs.run_now(job_id=job.job_id)
478
+ else:
479
+ raise ValueError("An error occurred while getting the job id.")
480
+
481
+ def get_pipeline_run_metadata(
482
+ self, run_id: UUID
483
+ ) -> Dict[str, "MetadataType"]:
484
+ """Get general component-specific metadata for a pipeline run.
485
+
486
+ Args:
487
+ run_id: The ID of the pipeline run.
488
+
489
+ Returns:
490
+ A dictionary of metadata.
491
+ """
492
+ run_url = (
493
+ f"{self.config.host}/jobs/" f"{self.get_orchestrator_run_id()}"
494
+ )
495
+ return {
496
+ METADATA_ORCHESTRATOR_URL: Uri(run_url),
497
+ }
@@ -0,0 +1,97 @@
1
+ # Copyright (c) ZenML GmbH 2023. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12
+ # or implied. See the License for the specific language governing
13
+ # permissions and limitations under the License.
14
+ """Entrypoint configuration for ZenML Databricks pipeline steps."""
15
+
16
+ import os
17
+ import sys
18
+ from typing import Any, List, Set
19
+
20
+ import pkg_resources
21
+
22
+ from zenml.entrypoints.step_entrypoint_configuration import (
23
+ StepEntrypointConfiguration,
24
+ )
25
+
26
+ WHEEL_PACKAGE_OPTION = "wheel_package"
27
+ DATABRICKS_JOB_ID_OPTION = "databricks_job_id"
28
+ ENV_ZENML_DATABRICKS_ORCHESTRATOR_RUN_ID = (
29
+ "ZENML_DATABRICKS_ORCHESTRATOR_RUN_ID"
30
+ )
31
+
32
+
33
+ class DatabricksEntrypointConfiguration(StepEntrypointConfiguration):
34
+ """Entrypoint configuration for ZenML Databricks pipeline steps.
35
+
36
+ The only purpose of this entrypoint configuration is to reconstruct the
37
+ environment variables that exceed the maximum length of 256 characters
38
+ allowed for Databricks Processor steps from their individual components.
39
+ """
40
+
41
+ @classmethod
42
+ def get_entrypoint_options(cls) -> Set[str]:
43
+ """Gets all options required for running with this configuration.
44
+
45
+ Returns:
46
+ The superclass options as well as an option for the wheel package.
47
+ """
48
+ return (
49
+ super().get_entrypoint_options()
50
+ | {WHEEL_PACKAGE_OPTION}
51
+ | {DATABRICKS_JOB_ID_OPTION}
52
+ )
53
+
54
+ @classmethod
55
+ def get_entrypoint_arguments(
56
+ cls,
57
+ **kwargs: Any,
58
+ ) -> List[str]:
59
+ """Gets all arguments that the entrypoint command should be called with.
60
+
61
+ The argument list should be something that
62
+ `argparse.ArgumentParser.parse_args(...)` can handle (e.g.
63
+ `["--some_option", "some_value"]` or `["--some_option=some_value"]`).
64
+ It needs to provide values for all options returned by the
65
+ `get_entrypoint_options()` method of this class.
66
+
67
+ Args:
68
+ **kwargs: Kwargs, must include the step name.
69
+
70
+ Returns:
71
+ The superclass arguments as well as arguments for the wheel package.
72
+ """
73
+ return super().get_entrypoint_arguments(**kwargs) + [
74
+ f"--{WHEEL_PACKAGE_OPTION}",
75
+ kwargs[WHEEL_PACKAGE_OPTION],
76
+ f"--{DATABRICKS_JOB_ID_OPTION}",
77
+ kwargs[DATABRICKS_JOB_ID_OPTION],
78
+ ]
79
+
80
+ def run(self) -> None:
81
+ """Runs the step."""
82
+ # Get the wheel package and add it to the sys path
83
+ wheel_package = self.entrypoint_args[WHEEL_PACKAGE_OPTION]
84
+ distribution = pkg_resources.get_distribution(wheel_package)
85
+ project_root = os.path.join(distribution.location, wheel_package)
86
+ if project_root not in sys.path:
87
+ sys.path.insert(0, project_root)
88
+ sys.path.insert(-1, project_root)
89
+
90
+ # Get the job id and add it to the environment
91
+ databricks_job_id = self.entrypoint_args[DATABRICKS_JOB_ID_OPTION]
92
+ os.environ[ENV_ZENML_DATABRICKS_ORCHESTRATOR_RUN_ID] = (
93
+ databricks_job_id
94
+ )
95
+
96
+ # Run the step
97
+ super().run()
@@ -0,0 +1,19 @@
1
+ # Copyright (c) ZenML GmbH 2023. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12
+ # or implied. See the License for the specific language governing
13
+ # permissions and limitations under the License.
14
+ """Initialization of the Databricks Service."""
15
+
16
+ from zenml.integrations.databricks.services.databricks_deployment import ( # noqa
17
+ DatabricksDeploymentConfig,
18
+ DatabricksDeploymentService,
19
+ )