ygg 0.1.48__py3-none-any.whl → 0.1.49__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.48.dist-info → ygg-0.1.49.dist-info}/METADATA +3 -1
- {ygg-0.1.48.dist-info → ygg-0.1.49.dist-info}/RECORD +14 -14
- yggdrasil/databricks/compute/cluster.py +45 -19
- yggdrasil/databricks/compute/execution_context.py +19 -11
- yggdrasil/databricks/compute/remote.py +4 -1
- yggdrasil/databricks/workspaces/io.py +21 -9
- yggdrasil/databricks/workspaces/path.py +9 -5
- yggdrasil/databricks/workspaces/workspace.py +45 -27
- yggdrasil/pyutils/python_env.py +7 -4
- yggdrasil/version.py +1 -1
- {ygg-0.1.48.dist-info → ygg-0.1.49.dist-info}/WHEEL +0 -0
- {ygg-0.1.48.dist-info → ygg-0.1.49.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.48.dist-info → ygg-0.1.49.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.48.dist-info → ygg-0.1.49.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ygg
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.49
|
|
4
4
|
Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
|
|
5
5
|
Author: Yggdrasil contributors
|
|
6
6
|
License: Apache License
|
|
@@ -235,6 +235,8 @@ Requires-Dist: pytest-asyncio; extra == "dev"
|
|
|
235
235
|
Requires-Dist: black; extra == "dev"
|
|
236
236
|
Requires-Dist: ruff; extra == "dev"
|
|
237
237
|
Requires-Dist: mypy; extra == "dev"
|
|
238
|
+
Requires-Dist: build; extra == "dev"
|
|
239
|
+
Requires-Dist: twine; extra == "dev"
|
|
238
240
|
Dynamic: license-file
|
|
239
241
|
|
|
240
242
|
# Yggdrasil (Python)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
ygg-0.1.
|
|
1
|
+
ygg-0.1.49.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
2
2
|
yggdrasil/__init__.py,sha256=4-ghPak2S6zfMqmnlxW2GCgPb5s79znpKa2hGEGXcE4,24
|
|
3
|
-
yggdrasil/version.py,sha256=
|
|
3
|
+
yggdrasil/version.py,sha256=pnii9XXudF0U50FobVvNgNzGy9lA9q_DntGQAvyqaFA,22
|
|
4
4
|
yggdrasil/databricks/__init__.py,sha256=skctY2c8W-hI81upx9F_PWRe5ishL3hrdiTuizgDjdw,152
|
|
5
5
|
yggdrasil/databricks/compute/__init__.py,sha256=NvdzmaJSNYY1uJthv1hHdBuNu3bD_-Z65DWnaJt9yXg,289
|
|
6
|
-
yggdrasil/databricks/compute/cluster.py,sha256=
|
|
7
|
-
yggdrasil/databricks/compute/execution_context.py,sha256=
|
|
8
|
-
yggdrasil/databricks/compute/remote.py,sha256=
|
|
6
|
+
yggdrasil/databricks/compute/cluster.py,sha256=YomLfvB0oxbgl6WDgBRxI1UXsxwlEbR6gq3FUbPHscY,44199
|
|
7
|
+
yggdrasil/databricks/compute/execution_context.py,sha256=jIV6uru2NeX3O5lg-3KEqmXtLxxq45CFgkBQgQIIOHQ,23327
|
|
8
|
+
yggdrasil/databricks/compute/remote.py,sha256=yicEhyQypssRa2ByscO36s3cBkEgORFsRME9aaq91Pc,3045
|
|
9
9
|
yggdrasil/databricks/jobs/__init__.py,sha256=snxGSJb0M5I39v0y3IR-uEeSlZR248cQ_4DJ1sYs-h8,154
|
|
10
10
|
yggdrasil/databricks/jobs/config.py,sha256=9LGeHD04hbfy0xt8_6oobC4moKJh4_DTjZiK4Q2Tqjk,11557
|
|
11
11
|
yggdrasil/databricks/sql/__init__.py,sha256=Vp_1cFaX1l-JGzCknvkbiB8CBFX2fQbBNntIeVn3lEg,231
|
|
@@ -16,10 +16,10 @@ yggdrasil/databricks/sql/types.py,sha256=5G-BM9_eOsRKEMzeDTWUsWW5g4Idvs-czVCpOCr
|
|
|
16
16
|
yggdrasil/databricks/sql/warehouse.py,sha256=1J0dyQLJb-OS1_1xU1eAVZ4CoL2-FhFeowKSvU3RzFc,9773
|
|
17
17
|
yggdrasil/databricks/workspaces/__init__.py,sha256=dv2zotoFVhNFlTCdRq6gwf5bEzeZkOZszoNZMs0k59g,114
|
|
18
18
|
yggdrasil/databricks/workspaces/filesytem.py,sha256=Z8JXU7_XUEbw9fpTQT1avRQKi-IAP2KemXBMPkUoY4w,9805
|
|
19
|
-
yggdrasil/databricks/workspaces/io.py,sha256=
|
|
20
|
-
yggdrasil/databricks/workspaces/path.py,sha256
|
|
19
|
+
yggdrasil/databricks/workspaces/io.py,sha256=CDq9NsYFjlSJ1QbKFlfWvZLQPVoWyZ4b3XR_lxNPcZE,32776
|
|
20
|
+
yggdrasil/databricks/workspaces/path.py,sha256=BxDwxE7q1-NLKEZQT4xLM3LeCeQKO3wUy7R-Ce-cSMk,47875
|
|
21
21
|
yggdrasil/databricks/workspaces/path_kind.py,sha256=Xc319NysH8_6E9C0Q8nCxDHYG07_SnzyUVKHe0dNdDQ,305
|
|
22
|
-
yggdrasil/databricks/workspaces/workspace.py,sha256=
|
|
22
|
+
yggdrasil/databricks/workspaces/workspace.py,sha256=zBlQdYNT_xKwUCYo3O4Q4g-8pfMvff3I26efyCfY_TY,24961
|
|
23
23
|
yggdrasil/dataclasses/__init__.py,sha256=_RkhfF3KC1eSORby1dzvBXQ0-UGG3u6wyUQWX2jq1Pc,108
|
|
24
24
|
yggdrasil/dataclasses/dataclass.py,sha256=LxrCjwvmBnb8yRI_N-c31RHHxB4XoJPixmKg9iBIuaI,1148
|
|
25
25
|
yggdrasil/libs/__init__.py,sha256=zdC9OU0Xy36CLY9mg2drxN6S7isPR8aTLzJA6xVIeLE,91
|
|
@@ -37,7 +37,7 @@ yggdrasil/pyutils/exceptions.py,sha256=ssKNm-rjhavHUOZmGA7_1Gq9tSHDrb2EFI-cnBuWg
|
|
|
37
37
|
yggdrasil/pyutils/expiring_dict.py,sha256=pr2u25LGwPVbLfsLptiHGovUtYRRo0AMjaJtCtJl7nQ,8477
|
|
38
38
|
yggdrasil/pyutils/modules.py,sha256=B7IP99YqUMW6-DIESFzBx8-09V1d0a8qrIJUDFhhL2g,11424
|
|
39
39
|
yggdrasil/pyutils/parallel.py,sha256=ubuq2m9dJzWYUyKCga4Y_9bpaeMYUrleYxdp49CHr44,6781
|
|
40
|
-
yggdrasil/pyutils/python_env.py,sha256=
|
|
40
|
+
yggdrasil/pyutils/python_env.py,sha256=Gh5geFK9ABpyWEfyegGUfIJUoPxKwcH0pqLBiMrW9Rw,51103
|
|
41
41
|
yggdrasil/pyutils/retry.py,sha256=n5sr-Zu7fYrdLbjJ4WifK2lk0gEGmHv5FYt2HaCm1Qc,11916
|
|
42
42
|
yggdrasil/requests/__init__.py,sha256=dMesyzq97_DmI765x0TwaDPEfsxFtgGNgchk8LvEN-o,103
|
|
43
43
|
yggdrasil/requests/msal.py,sha256=s2GCyzbgFdgdlJ1JqMrZ4qYVbmoG46-ZOTcaVQhZ-sQ,9220
|
|
@@ -55,8 +55,8 @@ yggdrasil/types/cast/registry.py,sha256=_zdFGmUBB7P-e_LIcJlOxMcxAkXoA-UXB6HqLMgT
|
|
|
55
55
|
yggdrasil/types/cast/spark_cast.py,sha256=_KAsl1DqmKMSfWxqhVE7gosjYdgiL1C5bDQv6eP3HtA,24926
|
|
56
56
|
yggdrasil/types/cast/spark_pandas_cast.py,sha256=BuTiWrdCANZCdD_p2MAytqm74eq-rdRXd-LGojBRrfU,5023
|
|
57
57
|
yggdrasil/types/cast/spark_polars_cast.py,sha256=btmZNHXn2NSt3fUuB4xg7coaE0RezIBdZD92H8NK0Jw,9073
|
|
58
|
-
ygg-0.1.
|
|
59
|
-
ygg-0.1.
|
|
60
|
-
ygg-0.1.
|
|
61
|
-
ygg-0.1.
|
|
62
|
-
ygg-0.1.
|
|
58
|
+
ygg-0.1.49.dist-info/METADATA,sha256=CHTqeVyiYa1868ZDwISDHKyXYxPeUH0mHhvHLYYoDbg,18528
|
|
59
|
+
ygg-0.1.49.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
60
|
+
ygg-0.1.49.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
|
|
61
|
+
ygg-0.1.49.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
|
|
62
|
+
ygg-0.1.49.dist-info/RECORD,,
|
|
@@ -144,6 +144,7 @@ class Cluster(WorkspaceService):
|
|
|
144
144
|
single_user_name: Optional[str] = None,
|
|
145
145
|
runtime_engine: Optional["RuntimeEngine"] = None,
|
|
146
146
|
libraries: Optional[list[str]] = None,
|
|
147
|
+
update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
|
|
147
148
|
**kwargs
|
|
148
149
|
) -> "Cluster":
|
|
149
150
|
"""Create or reuse a cluster that mirrors the current Python environment.
|
|
@@ -152,9 +153,10 @@ class Cluster(WorkspaceService):
|
|
|
152
153
|
workspace: Workspace to use for the cluster.
|
|
153
154
|
cluster_id: Optional cluster id to reuse.
|
|
154
155
|
cluster_name: Optional cluster name to reuse.
|
|
155
|
-
single_user_name: Optional
|
|
156
|
+
single_user_name: Optional username for single-user clusters.
|
|
156
157
|
runtime_engine: Optional Databricks runtime engine.
|
|
157
158
|
libraries: Optional list of libraries to install.
|
|
159
|
+
update_timeout: wait timeout, if None it will not wait completion
|
|
158
160
|
**kwargs: Additional cluster specification overrides.
|
|
159
161
|
|
|
160
162
|
Returns:
|
|
@@ -176,6 +178,7 @@ class Cluster(WorkspaceService):
|
|
|
176
178
|
single_user_name=single_user_name,
|
|
177
179
|
runtime_engine=runtime_engine,
|
|
178
180
|
libraries=libraries,
|
|
181
|
+
update_timeout=update_timeout,
|
|
179
182
|
**kwargs
|
|
180
183
|
)
|
|
181
184
|
)
|
|
@@ -190,6 +193,7 @@ class Cluster(WorkspaceService):
|
|
|
190
193
|
single_user_name: Optional[str] = "current",
|
|
191
194
|
runtime_engine: Optional["RuntimeEngine"] = None,
|
|
192
195
|
libraries: Optional[list[str]] = None,
|
|
196
|
+
update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
|
|
193
197
|
**kwargs
|
|
194
198
|
) -> "Cluster":
|
|
195
199
|
"""Create/update a cluster to match the local Python environment.
|
|
@@ -198,9 +202,10 @@ class Cluster(WorkspaceService):
|
|
|
198
202
|
source: Optional PythonEnv to mirror (defaults to current).
|
|
199
203
|
cluster_id: Optional cluster id to update.
|
|
200
204
|
cluster_name: Optional cluster name to update.
|
|
201
|
-
single_user_name: Optional single
|
|
205
|
+
single_user_name: Optional single username for the cluster.
|
|
202
206
|
runtime_engine: Optional runtime engine selection.
|
|
203
207
|
libraries: Optional list of libraries to install.
|
|
208
|
+
update_timeout: wait timeout, if None it will not wait completion
|
|
204
209
|
**kwargs: Additional cluster specification overrides.
|
|
205
210
|
|
|
206
211
|
Returns:
|
|
@@ -242,6 +247,7 @@ class Cluster(WorkspaceService):
|
|
|
242
247
|
single_user_name=single_user_name,
|
|
243
248
|
runtime_engine=runtime_engine or RuntimeEngine.PHOTON,
|
|
244
249
|
libraries=libraries,
|
|
250
|
+
update_timeout=update_timeout,
|
|
245
251
|
**kwargs
|
|
246
252
|
)
|
|
247
253
|
|
|
@@ -380,7 +386,9 @@ class Cluster(WorkspaceService):
|
|
|
380
386
|
start = time.time()
|
|
381
387
|
sleep_time = tick
|
|
382
388
|
|
|
383
|
-
if
|
|
389
|
+
if not timeout:
|
|
390
|
+
timeout = 20 * 60.0
|
|
391
|
+
elif isinstance(timeout, dt.timedelta):
|
|
384
392
|
timeout = timeout.total_seconds()
|
|
385
393
|
|
|
386
394
|
while self.is_pending:
|
|
@@ -412,12 +420,14 @@ class Cluster(WorkspaceService):
|
|
|
412
420
|
# Extract "major.minor" from strings like "17.3.x-scala2.13-ml-gpu"
|
|
413
421
|
v = self.spark_version
|
|
414
422
|
|
|
415
|
-
if v
|
|
423
|
+
if not v:
|
|
416
424
|
return None
|
|
417
425
|
|
|
418
426
|
parts = v.split(".")
|
|
427
|
+
|
|
419
428
|
if len(parts) < 2:
|
|
420
429
|
return None
|
|
430
|
+
|
|
421
431
|
return ".".join(parts[:2]) # e.g. "17.3"
|
|
422
432
|
|
|
423
433
|
@property
|
|
@@ -428,8 +438,10 @@ class Cluster(WorkspaceService):
|
|
|
428
438
|
When the runtime can't be mapped, returns ``None``.
|
|
429
439
|
"""
|
|
430
440
|
v = self.runtime_version
|
|
431
|
-
|
|
441
|
+
|
|
442
|
+
if not v:
|
|
432
443
|
return None
|
|
444
|
+
|
|
433
445
|
return _PYTHON_BY_DBR.get(v)
|
|
434
446
|
|
|
435
447
|
# ------------------------------------------------------------------ #
|
|
@@ -586,6 +598,7 @@ class Cluster(WorkspaceService):
|
|
|
586
598
|
cluster_id: Optional[str] = None,
|
|
587
599
|
cluster_name: Optional[str] = None,
|
|
588
600
|
libraries: Optional[List[Union[str, "Library"]]] = None,
|
|
601
|
+
update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
|
|
589
602
|
**cluster_spec: Any
|
|
590
603
|
):
|
|
591
604
|
"""Create a new cluster or update an existing one.
|
|
@@ -594,6 +607,7 @@ class Cluster(WorkspaceService):
|
|
|
594
607
|
cluster_id: Optional cluster id to update.
|
|
595
608
|
cluster_name: Optional cluster name to update or create.
|
|
596
609
|
libraries: Optional libraries to install.
|
|
610
|
+
update_timeout: wait timeout, if None it will not wait completion
|
|
597
611
|
**cluster_spec: Cluster specification overrides.
|
|
598
612
|
|
|
599
613
|
Returns:
|
|
@@ -609,24 +623,28 @@ class Cluster(WorkspaceService):
|
|
|
609
623
|
return found.update(
|
|
610
624
|
cluster_name=cluster_name,
|
|
611
625
|
libraries=libraries,
|
|
626
|
+
wait_timeout=update_timeout,
|
|
612
627
|
**cluster_spec
|
|
613
628
|
)
|
|
614
629
|
|
|
615
630
|
return self.create(
|
|
616
631
|
cluster_name=cluster_name,
|
|
617
632
|
libraries=libraries,
|
|
633
|
+
wait_timeout=update_timeout,
|
|
618
634
|
**cluster_spec
|
|
619
635
|
)
|
|
620
636
|
|
|
621
637
|
def create(
|
|
622
638
|
self,
|
|
623
639
|
libraries: Optional[List[Union[str, "Library"]]] = None,
|
|
640
|
+
wait_timeout: Union[float, dt.timedelta] = dt.timedelta(minutes=20),
|
|
624
641
|
**cluster_spec: Any
|
|
625
642
|
) -> str:
|
|
626
643
|
"""Create a new cluster and optionally install libraries.
|
|
627
644
|
|
|
628
645
|
Args:
|
|
629
646
|
libraries: Optional list of libraries to install after creation.
|
|
647
|
+
wait_timeout: wait timeout, if None it will not wait completion
|
|
630
648
|
**cluster_spec: Cluster specification overrides.
|
|
631
649
|
|
|
632
650
|
Returns:
|
|
@@ -646,14 +664,17 @@ class Cluster(WorkspaceService):
|
|
|
646
664
|
update_details,
|
|
647
665
|
)
|
|
648
666
|
|
|
649
|
-
self.details = self.clusters_client().
|
|
667
|
+
self.details = self.clusters_client().create(**update_details)
|
|
650
668
|
|
|
651
669
|
LOGGER.info(
|
|
652
670
|
"Created %s",
|
|
653
671
|
self
|
|
654
672
|
)
|
|
655
673
|
|
|
656
|
-
self.install_libraries(libraries=libraries, raise_error=False)
|
|
674
|
+
self.install_libraries(libraries=libraries, raise_error=False, wait_timeout=None)
|
|
675
|
+
|
|
676
|
+
if wait_timeout:
|
|
677
|
+
self.wait_for_status(timeout=wait_timeout)
|
|
657
678
|
|
|
658
679
|
return self
|
|
659
680
|
|
|
@@ -661,7 +682,7 @@ class Cluster(WorkspaceService):
|
|
|
661
682
|
self,
|
|
662
683
|
libraries: Optional[List[Union[str, "Library"]]] = None,
|
|
663
684
|
access_control_list: Optional[List["ClusterAccessControlRequest"]] = None,
|
|
664
|
-
wait_timeout: Union[float, dt.timedelta] = dt.timedelta(minutes=20),
|
|
685
|
+
wait_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
|
|
665
686
|
**cluster_spec: Any
|
|
666
687
|
) -> "Cluster":
|
|
667
688
|
"""Update cluster configuration and optionally install libraries.
|
|
@@ -708,7 +729,7 @@ class Cluster(WorkspaceService):
|
|
|
708
729
|
self, diff
|
|
709
730
|
)
|
|
710
731
|
|
|
711
|
-
self.wait_for_status()
|
|
732
|
+
self.wait_for_status(timeout=wait_timeout)
|
|
712
733
|
self.clusters_client().edit(**update_details)
|
|
713
734
|
self.update_permissions(access_control_list=access_control_list)
|
|
714
735
|
|
|
@@ -727,7 +748,7 @@ class Cluster(WorkspaceService):
|
|
|
727
748
|
access_control_list: Optional[List["ClusterAccessControlRequest"]] = None,
|
|
728
749
|
):
|
|
729
750
|
if not access_control_list:
|
|
730
|
-
|
|
751
|
+
return self
|
|
731
752
|
|
|
732
753
|
access_control_list = self._check_permission(access_control_list)
|
|
733
754
|
|
|
@@ -745,6 +766,7 @@ class Cluster(WorkspaceService):
|
|
|
745
766
|
permission_level=ClusterPermissionLevel.CAN_MANAGE
|
|
746
767
|
)
|
|
747
768
|
for name in current_groups
|
|
769
|
+
if name not in {"users"}
|
|
748
770
|
]
|
|
749
771
|
|
|
750
772
|
def _check_permission(
|
|
@@ -862,18 +884,22 @@ class Cluster(WorkspaceService):
|
|
|
862
884
|
Returns:
|
|
863
885
|
The current Cluster instance.
|
|
864
886
|
"""
|
|
887
|
+
if self.is_running:
|
|
888
|
+
return self
|
|
889
|
+
|
|
865
890
|
self.wait_for_status()
|
|
866
891
|
|
|
867
|
-
if
|
|
868
|
-
|
|
892
|
+
if self.is_running:
|
|
893
|
+
return self
|
|
869
894
|
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
else:
|
|
874
|
-
self.clusters_client().start(cluster_id=self.cluster_id)
|
|
895
|
+
LOGGER.debug("Starting %s", self)
|
|
896
|
+
|
|
897
|
+
self.clusters_client().start(cluster_id=self.cluster_id)
|
|
875
898
|
|
|
876
|
-
|
|
899
|
+
LOGGER.info("Started %s", self)
|
|
900
|
+
|
|
901
|
+
if wait_timeout:
|
|
902
|
+
self.wait_for_status(timeout=wait_timeout.total_seconds())
|
|
877
903
|
|
|
878
904
|
return self
|
|
879
905
|
|
|
@@ -889,7 +915,7 @@ class Cluster(WorkspaceService):
|
|
|
889
915
|
|
|
890
916
|
if self.is_running:
|
|
891
917
|
self.details = self.clusters_client().restart_and_wait(cluster_id=self.cluster_id)
|
|
892
|
-
return self
|
|
918
|
+
return self
|
|
893
919
|
|
|
894
920
|
return self.start()
|
|
895
921
|
|
|
@@ -180,7 +180,7 @@ print(json.dumps(meta))"""
|
|
|
180
180
|
"""
|
|
181
181
|
return self.cluster.workspace.sdk()
|
|
182
182
|
|
|
183
|
-
def
|
|
183
|
+
def create_command(
|
|
184
184
|
self,
|
|
185
185
|
language: "Language",
|
|
186
186
|
) -> any:
|
|
@@ -192,17 +192,29 @@ print(json.dumps(meta))"""
|
|
|
192
192
|
Returns:
|
|
193
193
|
The created command execution context response.
|
|
194
194
|
"""
|
|
195
|
-
self.cluster.ensure_running()
|
|
196
|
-
|
|
197
195
|
LOGGER.debug(
|
|
198
196
|
"Creating Databricks command execution context for %s",
|
|
199
197
|
self.cluster
|
|
200
198
|
)
|
|
201
199
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
200
|
+
try:
|
|
201
|
+
created = self._workspace_client().command_execution.create_and_wait(
|
|
202
|
+
cluster_id=self.cluster.cluster_id,
|
|
203
|
+
language=language,
|
|
204
|
+
)
|
|
205
|
+
except:
|
|
206
|
+
self.cluster.ensure_running()
|
|
207
|
+
|
|
208
|
+
created = self._workspace_client().command_execution.create_and_wait(
|
|
209
|
+
cluster_id=self.cluster.cluster_id,
|
|
210
|
+
language=language,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
LOGGER.info(
|
|
214
|
+
"Created Databricks command execution context %s",
|
|
215
|
+
self
|
|
205
216
|
)
|
|
217
|
+
|
|
206
218
|
created = getattr(created, "response", created)
|
|
207
219
|
|
|
208
220
|
return created
|
|
@@ -220,10 +232,6 @@ print(json.dumps(meta))"""
|
|
|
220
232
|
The connected ExecutionContext instance.
|
|
221
233
|
"""
|
|
222
234
|
if self.context_id is not None:
|
|
223
|
-
LOGGER.debug(
|
|
224
|
-
"Execution context already open for %s",
|
|
225
|
-
self
|
|
226
|
-
)
|
|
227
235
|
return self
|
|
228
236
|
|
|
229
237
|
self.language = language or self.language
|
|
@@ -231,7 +239,7 @@ print(json.dumps(meta))"""
|
|
|
231
239
|
if self.language is None:
|
|
232
240
|
self.language = Language.PYTHON
|
|
233
241
|
|
|
234
|
-
ctx = self.
|
|
242
|
+
ctx = self.create_command(language=self.language)
|
|
235
243
|
|
|
236
244
|
context_id = ctx.id
|
|
237
245
|
if not context_id:
|
|
@@ -39,6 +39,7 @@ def databricks_remote_compute(
|
|
|
39
39
|
timeout: Optional[dt.timedelta] = None,
|
|
40
40
|
env_keys: Optional[List[str]] = None,
|
|
41
41
|
force_local: bool = False,
|
|
42
|
+
update_timeout: Optional[Union[float, dt.timedelta]] = None,
|
|
42
43
|
**options
|
|
43
44
|
) -> Callable[[Callable[..., ReturnType]], Callable[..., ReturnType]]:
|
|
44
45
|
"""Return a decorator that executes functions on a remote cluster.
|
|
@@ -52,6 +53,7 @@ def databricks_remote_compute(
|
|
|
52
53
|
timeout: Optional execution timeout for remote calls.
|
|
53
54
|
env_keys: Optional environment variable names to forward.
|
|
54
55
|
force_local: Force local execution
|
|
56
|
+
update_timeout: creation or update wait timeout
|
|
55
57
|
**options: Extra options forwarded to the execution decorator.
|
|
56
58
|
|
|
57
59
|
Returns:
|
|
@@ -82,7 +84,8 @@ def databricks_remote_compute(
|
|
|
82
84
|
cluster = workspace.clusters().replicated_current_environment(
|
|
83
85
|
workspace=workspace,
|
|
84
86
|
cluster_name=cluster_name,
|
|
85
|
-
single_user_name=workspace.current_user.user_name
|
|
87
|
+
single_user_name=workspace.current_user.user_name,
|
|
88
|
+
update_timeout=update_timeout
|
|
86
89
|
)
|
|
87
90
|
|
|
88
91
|
cluster.ensure_running(wait_timeout=None)
|
|
@@ -975,28 +975,40 @@ class DatabricksVolumeIO(DatabricksIO):
|
|
|
975
975
|
"""Read bytes from a volume file.
|
|
976
976
|
|
|
977
977
|
Args:
|
|
978
|
-
start: Starting byte offset.
|
|
978
|
+
start: Starting byte offset (0-based).
|
|
979
979
|
length: Number of bytes to read.
|
|
980
980
|
allow_not_found: Whether to suppress missing-path errors.
|
|
981
981
|
|
|
982
982
|
Returns:
|
|
983
983
|
Bytes read from the file.
|
|
984
984
|
"""
|
|
985
|
-
if length
|
|
985
|
+
if length <= 0:
|
|
986
986
|
return b""
|
|
987
|
+
if start < 0:
|
|
988
|
+
raise ValueError(f"start must be >= 0, got {start}")
|
|
989
|
+
if length < 0:
|
|
990
|
+
raise ValueError(f"length must be >= 0, got {length}")
|
|
987
991
|
|
|
988
992
|
sdk = self.workspace.sdk()
|
|
989
993
|
client = sdk.files
|
|
990
994
|
full_path = self.path.files_full_path()
|
|
991
995
|
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
.
|
|
997
|
-
|
|
996
|
+
try:
|
|
997
|
+
resp = client.download(full_path)
|
|
998
|
+
except Exception as e:
|
|
999
|
+
# Databricks SDK exceptions vary a bit by version; keep it pragmatic.
|
|
1000
|
+
if allow_not_found and any(s in str(e).lower() for s in ("not found", "does not exist", "404")):
|
|
1001
|
+
return b""
|
|
1002
|
+
raise
|
|
1003
|
+
|
|
1004
|
+
data = resp.contents.read()
|
|
998
1005
|
|
|
999
|
-
return
|
|
1006
|
+
# If start is past EOF, return empty (common file-like behavior).
|
|
1007
|
+
if start >= len(data):
|
|
1008
|
+
return b""
|
|
1009
|
+
|
|
1010
|
+
end = start + length
|
|
1011
|
+
return data[start:end]
|
|
1000
1012
|
|
|
1001
1013
|
def write_all_bytes(self, data: bytes):
|
|
1002
1014
|
"""Write bytes to a volume file.
|
|
@@ -494,11 +494,15 @@ class DatabricksPath:
|
|
|
494
494
|
|
|
495
495
|
try:
|
|
496
496
|
info = sdk.files.get_directory_metadata(full_path)
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
497
|
+
|
|
498
|
+
if info is None:
|
|
499
|
+
mtime = dt.datetime.now(tz=dt.timezone.utc)
|
|
500
|
+
else:
|
|
501
|
+
mtime = (
|
|
502
|
+
dt.datetime.strptime(info.last_modified, "%a, %d %b %Y %H:%M:%S %Z").replace(tzinfo=dt.timezone.utc)
|
|
503
|
+
if info.last_modified
|
|
504
|
+
else None
|
|
505
|
+
)
|
|
502
506
|
|
|
503
507
|
return self.reset_metadata(is_file=False, is_dir=True, size=info, mtime=mtime)
|
|
504
508
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
@@ -8,7 +8,6 @@ from abc import ABC
|
|
|
8
8
|
from dataclasses import dataclass
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
from typing import (
|
|
11
|
-
Any,
|
|
12
11
|
BinaryIO,
|
|
13
12
|
Iterator,
|
|
14
13
|
Optional,
|
|
@@ -55,7 +54,9 @@ def _get_env_product_version():
|
|
|
55
54
|
v = os.getenv("DATABRICKS_PRODUCT_VERSION")
|
|
56
55
|
|
|
57
56
|
if not v:
|
|
58
|
-
|
|
57
|
+
if _get_env_product() == "yggdrasil":
|
|
58
|
+
return YGGDRASIL_VERSION
|
|
59
|
+
return None
|
|
59
60
|
return v.strip().lower()
|
|
60
61
|
|
|
61
62
|
|
|
@@ -106,11 +107,12 @@ class Workspace:
|
|
|
106
107
|
product: Optional[str] = dataclasses.field(default_factory=_get_env_product, repr=False)
|
|
107
108
|
product_version: Optional[str] = dataclasses.field(default_factory=_get_env_product_version, repr=False)
|
|
108
109
|
product_tag: Optional[str] = dataclasses.field(default_factory=_get_env_product_tag, repr=False)
|
|
110
|
+
custom_tags: Optional[dict] = dataclasses.field(default=None, repr=False)
|
|
109
111
|
|
|
110
112
|
# Runtime cache (never serialized)
|
|
111
|
-
_sdk:
|
|
112
|
-
_was_connected: bool = dataclasses.field(
|
|
113
|
-
_cached_token: Optional[str] = dataclasses.field(
|
|
113
|
+
_sdk: Optional["WorkspaceClient"] = dataclasses.field(default=None, repr=False, compare=False, hash=False)
|
|
114
|
+
_was_connected: bool = dataclasses.field(default=None, repr=False, compare=False, hash=False)
|
|
115
|
+
_cached_token: Optional[str] = dataclasses.field(default=None, repr=False, compare=False, hash=False)
|
|
114
116
|
|
|
115
117
|
# -------------------------
|
|
116
118
|
# Pickle support
|
|
@@ -175,19 +177,43 @@ class Workspace:
|
|
|
175
177
|
# -------------------------
|
|
176
178
|
def clone_instance(
|
|
177
179
|
self,
|
|
178
|
-
**kwargs
|
|
179
180
|
) -> "Workspace":
|
|
180
181
|
"""Clone the workspace config with overrides.
|
|
181
182
|
|
|
182
|
-
Args:
|
|
183
|
-
**kwargs: Field overrides for the clone.
|
|
184
|
-
|
|
185
183
|
Returns:
|
|
186
184
|
A new Workspace instance with updated fields.
|
|
187
185
|
"""
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
186
|
+
return Workspace(
|
|
187
|
+
host = self.host,
|
|
188
|
+
account_id = self.account_id,
|
|
189
|
+
token = self.token,
|
|
190
|
+
client_id = self.client_id,
|
|
191
|
+
client_secret = self.client_secret,
|
|
192
|
+
token_audience = self.token_audience,
|
|
193
|
+
azure_workspace_resource_id = self.azure_workspace_resource_id,
|
|
194
|
+
azure_use_msi = self.azure_use_msi,
|
|
195
|
+
azure_client_secret = self.azure_client_secret,
|
|
196
|
+
azure_client_id = self.azure_client_id,
|
|
197
|
+
azure_tenant_id = self.azure_tenant_id,
|
|
198
|
+
azure_environment = self.azure_environment,
|
|
199
|
+
google_credentials = self.google_credentials,
|
|
200
|
+
google_service_account = self.google_service_account,
|
|
201
|
+
profile = self.profile,
|
|
202
|
+
config_file = self.config_file,
|
|
203
|
+
auth_type = self.auth_type,
|
|
204
|
+
http_timeout_seconds = self.http_timeout_seconds,
|
|
205
|
+
retry_timeout_seconds = self.retry_timeout_seconds,
|
|
206
|
+
debug_truncate_bytes = self.debug_truncate_bytes,
|
|
207
|
+
debug_headers = self.debug_headers,
|
|
208
|
+
rate_limit = self.rate_limit,
|
|
209
|
+
product = self.product,
|
|
210
|
+
product_version = self.product_version,
|
|
211
|
+
product_tag = self.product_tag,
|
|
212
|
+
custom_tags = self.custom_tags,
|
|
213
|
+
_sdk = self._sdk,
|
|
214
|
+
_was_connected = self._was_connected,
|
|
215
|
+
_cached_token = self._cached_token,
|
|
216
|
+
)
|
|
191
217
|
|
|
192
218
|
# -------------------------
|
|
193
219
|
# SDK connection
|
|
@@ -300,8 +326,9 @@ class Workspace:
|
|
|
300
326
|
Drop the cached WorkspaceClient (no actual close needed, but this
|
|
301
327
|
avoids reusing stale config).
|
|
302
328
|
"""
|
|
303
|
-
self._sdk
|
|
304
|
-
|
|
329
|
+
if self._sdk is not None:
|
|
330
|
+
self._sdk = None
|
|
331
|
+
self._was_connected = False
|
|
305
332
|
|
|
306
333
|
# ------------------------------------------------------------------ #
|
|
307
334
|
# Properties
|
|
@@ -561,28 +588,19 @@ class Workspace:
|
|
|
561
588
|
Returns:
|
|
562
589
|
A dict of default tags.
|
|
563
590
|
"""
|
|
564
|
-
|
|
591
|
+
base = {
|
|
565
592
|
k: v
|
|
566
593
|
for k, v in (
|
|
567
594
|
("Product", self.product),
|
|
568
|
-
("ProductVersion", self.product_version),
|
|
569
595
|
("ProductTag", self.product_tag),
|
|
570
|
-
("ProductUser", self.current_user.user_name)
|
|
571
596
|
)
|
|
572
597
|
if v
|
|
573
598
|
}
|
|
574
599
|
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
Args:
|
|
579
|
-
existing: Optional existing tags.
|
|
600
|
+
if self.custom_tags:
|
|
601
|
+
base.update(self.custom_tags)
|
|
580
602
|
|
|
581
|
-
|
|
582
|
-
A dict of merged tags.
|
|
583
|
-
"""
|
|
584
|
-
if existing:
|
|
585
|
-
return self.default_tags()
|
|
603
|
+
return base
|
|
586
604
|
|
|
587
605
|
def sql(
|
|
588
606
|
self,
|
yggdrasil/pyutils/python_env.py
CHANGED
|
@@ -16,7 +16,7 @@ import sys
|
|
|
16
16
|
import tempfile
|
|
17
17
|
import threading
|
|
18
18
|
from contextlib import contextmanager
|
|
19
|
-
from dataclasses import dataclass
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
20
|
from pathlib import Path
|
|
21
21
|
from typing import Any, Iterable, Iterator, Mapping, MutableMapping, Optional, Union, List, Tuple
|
|
22
22
|
|
|
@@ -415,11 +415,13 @@ def _locked_env(root: Path):
|
|
|
415
415
|
# PythonEnv
|
|
416
416
|
# -----------------------
|
|
417
417
|
|
|
418
|
-
@dataclass
|
|
418
|
+
@dataclass
|
|
419
419
|
class PythonEnv:
|
|
420
420
|
"""Represent a managed Python environment rooted at a filesystem path."""
|
|
421
421
|
root: Path
|
|
422
422
|
|
|
423
|
+
_version: Optional[str] = field(default=None, repr=False)
|
|
424
|
+
|
|
423
425
|
def __post_init__(self) -> None:
|
|
424
426
|
"""Normalize the root path after dataclass initialization.
|
|
425
427
|
|
|
@@ -862,8 +864,9 @@ class PythonEnv:
|
|
|
862
864
|
Returns:
|
|
863
865
|
Version string.
|
|
864
866
|
"""
|
|
865
|
-
|
|
866
|
-
|
|
867
|
+
if self._version is None:
|
|
868
|
+
self._version = self.exec_code("import sys; print(sys.version.split()[0])", check=True).strip()
|
|
869
|
+
return self._version
|
|
867
870
|
|
|
868
871
|
@property
|
|
869
872
|
def version_info(self) -> tuple[int, int, int]:
|
yggdrasil/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.49"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|