ygg 0.1.48__py3-none-any.whl → 0.1.50__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/METADATA +3 -1
- {ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/RECORD +18 -18
- yggdrasil/databricks/compute/cluster.py +45 -19
- yggdrasil/databricks/compute/execution_context.py +19 -11
- yggdrasil/databricks/compute/remote.py +4 -1
- yggdrasil/databricks/sql/statement_result.py +12 -5
- yggdrasil/databricks/workspaces/io.py +80 -56
- yggdrasil/databricks/workspaces/path.py +101 -50
- yggdrasil/databricks/workspaces/workspace.py +45 -27
- yggdrasil/libs/pandaslib.py +6 -0
- yggdrasil/libs/polarslib.py +5 -0
- yggdrasil/pyutils/python_env.py +7 -4
- yggdrasil/types/cast/polars_cast.py +1 -0
- yggdrasil/version.py +1 -1
- {ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/WHEEL +0 -0
- {ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ygg
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.50
|
|
4
4
|
Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
|
|
5
5
|
Author: Yggdrasil contributors
|
|
6
6
|
License: Apache License
|
|
@@ -235,6 +235,8 @@ Requires-Dist: pytest-asyncio; extra == "dev"
|
|
|
235
235
|
Requires-Dist: black; extra == "dev"
|
|
236
236
|
Requires-Dist: ruff; extra == "dev"
|
|
237
237
|
Requires-Dist: mypy; extra == "dev"
|
|
238
|
+
Requires-Dist: build; extra == "dev"
|
|
239
|
+
Requires-Dist: twine; extra == "dev"
|
|
238
240
|
Dynamic: license-file
|
|
239
241
|
|
|
240
242
|
# Yggdrasil (Python)
|
|
@@ -1,31 +1,31 @@
|
|
|
1
|
-
ygg-0.1.
|
|
1
|
+
ygg-0.1.50.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
2
2
|
yggdrasil/__init__.py,sha256=4-ghPak2S6zfMqmnlxW2GCgPb5s79znpKa2hGEGXcE4,24
|
|
3
|
-
yggdrasil/version.py,sha256=
|
|
3
|
+
yggdrasil/version.py,sha256=pMWaMbj0sqJPaN27zeKuthOtJ3nuofEVeTxWuJmKhTw,22
|
|
4
4
|
yggdrasil/databricks/__init__.py,sha256=skctY2c8W-hI81upx9F_PWRe5ishL3hrdiTuizgDjdw,152
|
|
5
5
|
yggdrasil/databricks/compute/__init__.py,sha256=NvdzmaJSNYY1uJthv1hHdBuNu3bD_-Z65DWnaJt9yXg,289
|
|
6
|
-
yggdrasil/databricks/compute/cluster.py,sha256=
|
|
7
|
-
yggdrasil/databricks/compute/execution_context.py,sha256=
|
|
8
|
-
yggdrasil/databricks/compute/remote.py,sha256=
|
|
6
|
+
yggdrasil/databricks/compute/cluster.py,sha256=YomLfvB0oxbgl6WDgBRxI1UXsxwlEbR6gq3FUbPHscY,44199
|
|
7
|
+
yggdrasil/databricks/compute/execution_context.py,sha256=jIV6uru2NeX3O5lg-3KEqmXtLxxq45CFgkBQgQIIOHQ,23327
|
|
8
|
+
yggdrasil/databricks/compute/remote.py,sha256=yicEhyQypssRa2ByscO36s3cBkEgORFsRME9aaq91Pc,3045
|
|
9
9
|
yggdrasil/databricks/jobs/__init__.py,sha256=snxGSJb0M5I39v0y3IR-uEeSlZR248cQ_4DJ1sYs-h8,154
|
|
10
10
|
yggdrasil/databricks/jobs/config.py,sha256=9LGeHD04hbfy0xt8_6oobC4moKJh4_DTjZiK4Q2Tqjk,11557
|
|
11
11
|
yggdrasil/databricks/sql/__init__.py,sha256=Vp_1cFaX1l-JGzCknvkbiB8CBFX2fQbBNntIeVn3lEg,231
|
|
12
12
|
yggdrasil/databricks/sql/engine.py,sha256=K5WmGKpXU78JA3UdK8dLxBD_GXKidZJFe7hytuC5UHg,41029
|
|
13
13
|
yggdrasil/databricks/sql/exceptions.py,sha256=uC-BoG0u0LtORKUS1X3iLID8nc-0TV5MQN3M8RXHsO4,1495
|
|
14
|
-
yggdrasil/databricks/sql/statement_result.py,sha256=
|
|
14
|
+
yggdrasil/databricks/sql/statement_result.py,sha256=GZyVhhrUK5opNo-8HGqsMx0Rp9fa_0zqvn8McSHPQ8U,16310
|
|
15
15
|
yggdrasil/databricks/sql/types.py,sha256=5G-BM9_eOsRKEMzeDTWUsWW5g4Idvs-czVCpOCrMhdA,6412
|
|
16
16
|
yggdrasil/databricks/sql/warehouse.py,sha256=1J0dyQLJb-OS1_1xU1eAVZ4CoL2-FhFeowKSvU3RzFc,9773
|
|
17
17
|
yggdrasil/databricks/workspaces/__init__.py,sha256=dv2zotoFVhNFlTCdRq6gwf5bEzeZkOZszoNZMs0k59g,114
|
|
18
18
|
yggdrasil/databricks/workspaces/filesytem.py,sha256=Z8JXU7_XUEbw9fpTQT1avRQKi-IAP2KemXBMPkUoY4w,9805
|
|
19
|
-
yggdrasil/databricks/workspaces/io.py,sha256=
|
|
20
|
-
yggdrasil/databricks/workspaces/path.py,sha256
|
|
19
|
+
yggdrasil/databricks/workspaces/io.py,sha256=D-B31roMGEJesAtUWl-O30lZJfgo-oFdK6KExzFc13I,33260
|
|
20
|
+
yggdrasil/databricks/workspaces/path.py,sha256=BAzaxEL2mWJ_6EnETnQdsPj06zkrbTO2f3reruR439k,49265
|
|
21
21
|
yggdrasil/databricks/workspaces/path_kind.py,sha256=Xc319NysH8_6E9C0Q8nCxDHYG07_SnzyUVKHe0dNdDQ,305
|
|
22
|
-
yggdrasil/databricks/workspaces/workspace.py,sha256=
|
|
22
|
+
yggdrasil/databricks/workspaces/workspace.py,sha256=zBlQdYNT_xKwUCYo3O4Q4g-8pfMvff3I26efyCfY_TY,24961
|
|
23
23
|
yggdrasil/dataclasses/__init__.py,sha256=_RkhfF3KC1eSORby1dzvBXQ0-UGG3u6wyUQWX2jq1Pc,108
|
|
24
24
|
yggdrasil/dataclasses/dataclass.py,sha256=LxrCjwvmBnb8yRI_N-c31RHHxB4XoJPixmKg9iBIuaI,1148
|
|
25
25
|
yggdrasil/libs/__init__.py,sha256=zdC9OU0Xy36CLY9mg2drxN6S7isPR8aTLzJA6xVIeLE,91
|
|
26
26
|
yggdrasil/libs/databrickslib.py,sha256=NHJeUViHhZc8LI5oDVfi1axRyUy_pDJLy4hjD0KZEBQ,980
|
|
27
|
-
yggdrasil/libs/pandaslib.py,sha256=
|
|
28
|
-
yggdrasil/libs/polarslib.py,sha256=
|
|
27
|
+
yggdrasil/libs/pandaslib.py,sha256=GoUjh9dxZAFLe9hs8-6RliLD3jsH_BexYW1w-8BZzb0,618
|
|
28
|
+
yggdrasil/libs/polarslib.py,sha256=hnL8x6ygsyIoiJyIUMaeoji3fRzab4lBiHcMqa29C_Q,618
|
|
29
29
|
yggdrasil/libs/sparklib.py,sha256=FQ3W1iz2EIpQreorOiQuFt15rdhq2QhGEAWp8Zrbl9A,10177
|
|
30
30
|
yggdrasil/libs/extensions/__init__.py,sha256=mcXW5Li3Cbprbs4Ci-b5A0Ju0wmLcfvEiFusTx6xNjU,117
|
|
31
31
|
yggdrasil/libs/extensions/polars_extensions.py,sha256=RTkGi8llhPJjX7x9egix7-yXWo2X24zIAPSKXV37SSA,12397
|
|
@@ -37,7 +37,7 @@ yggdrasil/pyutils/exceptions.py,sha256=ssKNm-rjhavHUOZmGA7_1Gq9tSHDrb2EFI-cnBuWg
|
|
|
37
37
|
yggdrasil/pyutils/expiring_dict.py,sha256=pr2u25LGwPVbLfsLptiHGovUtYRRo0AMjaJtCtJl7nQ,8477
|
|
38
38
|
yggdrasil/pyutils/modules.py,sha256=B7IP99YqUMW6-DIESFzBx8-09V1d0a8qrIJUDFhhL2g,11424
|
|
39
39
|
yggdrasil/pyutils/parallel.py,sha256=ubuq2m9dJzWYUyKCga4Y_9bpaeMYUrleYxdp49CHr44,6781
|
|
40
|
-
yggdrasil/pyutils/python_env.py,sha256=
|
|
40
|
+
yggdrasil/pyutils/python_env.py,sha256=Gh5geFK9ABpyWEfyegGUfIJUoPxKwcH0pqLBiMrW9Rw,51103
|
|
41
41
|
yggdrasil/pyutils/retry.py,sha256=n5sr-Zu7fYrdLbjJ4WifK2lk0gEGmHv5FYt2HaCm1Qc,11916
|
|
42
42
|
yggdrasil/requests/__init__.py,sha256=dMesyzq97_DmI765x0TwaDPEfsxFtgGNgchk8LvEN-o,103
|
|
43
43
|
yggdrasil/requests/msal.py,sha256=s2GCyzbgFdgdlJ1JqMrZ4qYVbmoG46-ZOTcaVQhZ-sQ,9220
|
|
@@ -49,14 +49,14 @@ yggdrasil/types/cast/__init__.py,sha256=Oft3pTs2bRM5hT7YqJAuOKTYYk-SACLaMOXUVdaf
|
|
|
49
49
|
yggdrasil/types/cast/arrow_cast.py,sha256=_OMYc4t5GlgE4ztlWaCoK8Jnba09rgDbmHVP-QXhOL0,41523
|
|
50
50
|
yggdrasil/types/cast/cast_options.py,sha256=nDaEvCCs7TBamhTWyDrYf3LVaBWzioIP2Q5_LXrChF4,15532
|
|
51
51
|
yggdrasil/types/cast/pandas_cast.py,sha256=I3xu0sZ59ZbK3NDcQ2dslzdeKzhpFV5zR02ZEixd5hI,8713
|
|
52
|
-
yggdrasil/types/cast/polars_cast.py,sha256=
|
|
52
|
+
yggdrasil/types/cast/polars_cast.py,sha256=RILcbfL4o1XDMp5H-06c0BMrDal5pehOT7ACiItDB6E,28791
|
|
53
53
|
yggdrasil/types/cast/polars_pandas_cast.py,sha256=CS0P7teVv15IdX5g7v40RfkH1VMg6b-HM0V_gOfacm8,5071
|
|
54
54
|
yggdrasil/types/cast/registry.py,sha256=_zdFGmUBB7P-e_LIcJlOxMcxAkXoA-UXB6HqLMgTokg,21491
|
|
55
55
|
yggdrasil/types/cast/spark_cast.py,sha256=_KAsl1DqmKMSfWxqhVE7gosjYdgiL1C5bDQv6eP3HtA,24926
|
|
56
56
|
yggdrasil/types/cast/spark_pandas_cast.py,sha256=BuTiWrdCANZCdD_p2MAytqm74eq-rdRXd-LGojBRrfU,5023
|
|
57
57
|
yggdrasil/types/cast/spark_polars_cast.py,sha256=btmZNHXn2NSt3fUuB4xg7coaE0RezIBdZD92H8NK0Jw,9073
|
|
58
|
-
ygg-0.1.
|
|
59
|
-
ygg-0.1.
|
|
60
|
-
ygg-0.1.
|
|
61
|
-
ygg-0.1.
|
|
62
|
-
ygg-0.1.
|
|
58
|
+
ygg-0.1.50.dist-info/METADATA,sha256=ygOCZJjNIbuKuD-qKLnttguy71qIBxR0KnHDJE_XPSU,18528
|
|
59
|
+
ygg-0.1.50.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
60
|
+
ygg-0.1.50.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
|
|
61
|
+
ygg-0.1.50.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
|
|
62
|
+
ygg-0.1.50.dist-info/RECORD,,
|
|
@@ -144,6 +144,7 @@ class Cluster(WorkspaceService):
|
|
|
144
144
|
single_user_name: Optional[str] = None,
|
|
145
145
|
runtime_engine: Optional["RuntimeEngine"] = None,
|
|
146
146
|
libraries: Optional[list[str]] = None,
|
|
147
|
+
update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
|
|
147
148
|
**kwargs
|
|
148
149
|
) -> "Cluster":
|
|
149
150
|
"""Create or reuse a cluster that mirrors the current Python environment.
|
|
@@ -152,9 +153,10 @@ class Cluster(WorkspaceService):
|
|
|
152
153
|
workspace: Workspace to use for the cluster.
|
|
153
154
|
cluster_id: Optional cluster id to reuse.
|
|
154
155
|
cluster_name: Optional cluster name to reuse.
|
|
155
|
-
single_user_name: Optional
|
|
156
|
+
single_user_name: Optional username for single-user clusters.
|
|
156
157
|
runtime_engine: Optional Databricks runtime engine.
|
|
157
158
|
libraries: Optional list of libraries to install.
|
|
159
|
+
update_timeout: wait timeout, if None it will not wait completion
|
|
158
160
|
**kwargs: Additional cluster specification overrides.
|
|
159
161
|
|
|
160
162
|
Returns:
|
|
@@ -176,6 +178,7 @@ class Cluster(WorkspaceService):
|
|
|
176
178
|
single_user_name=single_user_name,
|
|
177
179
|
runtime_engine=runtime_engine,
|
|
178
180
|
libraries=libraries,
|
|
181
|
+
update_timeout=update_timeout,
|
|
179
182
|
**kwargs
|
|
180
183
|
)
|
|
181
184
|
)
|
|
@@ -190,6 +193,7 @@ class Cluster(WorkspaceService):
|
|
|
190
193
|
single_user_name: Optional[str] = "current",
|
|
191
194
|
runtime_engine: Optional["RuntimeEngine"] = None,
|
|
192
195
|
libraries: Optional[list[str]] = None,
|
|
196
|
+
update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
|
|
193
197
|
**kwargs
|
|
194
198
|
) -> "Cluster":
|
|
195
199
|
"""Create/update a cluster to match the local Python environment.
|
|
@@ -198,9 +202,10 @@ class Cluster(WorkspaceService):
|
|
|
198
202
|
source: Optional PythonEnv to mirror (defaults to current).
|
|
199
203
|
cluster_id: Optional cluster id to update.
|
|
200
204
|
cluster_name: Optional cluster name to update.
|
|
201
|
-
single_user_name: Optional single
|
|
205
|
+
single_user_name: Optional single username for the cluster.
|
|
202
206
|
runtime_engine: Optional runtime engine selection.
|
|
203
207
|
libraries: Optional list of libraries to install.
|
|
208
|
+
update_timeout: wait timeout, if None it will not wait completion
|
|
204
209
|
**kwargs: Additional cluster specification overrides.
|
|
205
210
|
|
|
206
211
|
Returns:
|
|
@@ -242,6 +247,7 @@ class Cluster(WorkspaceService):
|
|
|
242
247
|
single_user_name=single_user_name,
|
|
243
248
|
runtime_engine=runtime_engine or RuntimeEngine.PHOTON,
|
|
244
249
|
libraries=libraries,
|
|
250
|
+
update_timeout=update_timeout,
|
|
245
251
|
**kwargs
|
|
246
252
|
)
|
|
247
253
|
|
|
@@ -380,7 +386,9 @@ class Cluster(WorkspaceService):
|
|
|
380
386
|
start = time.time()
|
|
381
387
|
sleep_time = tick
|
|
382
388
|
|
|
383
|
-
if
|
|
389
|
+
if not timeout:
|
|
390
|
+
timeout = 20 * 60.0
|
|
391
|
+
elif isinstance(timeout, dt.timedelta):
|
|
384
392
|
timeout = timeout.total_seconds()
|
|
385
393
|
|
|
386
394
|
while self.is_pending:
|
|
@@ -412,12 +420,14 @@ class Cluster(WorkspaceService):
|
|
|
412
420
|
# Extract "major.minor" from strings like "17.3.x-scala2.13-ml-gpu"
|
|
413
421
|
v = self.spark_version
|
|
414
422
|
|
|
415
|
-
if v
|
|
423
|
+
if not v:
|
|
416
424
|
return None
|
|
417
425
|
|
|
418
426
|
parts = v.split(".")
|
|
427
|
+
|
|
419
428
|
if len(parts) < 2:
|
|
420
429
|
return None
|
|
430
|
+
|
|
421
431
|
return ".".join(parts[:2]) # e.g. "17.3"
|
|
422
432
|
|
|
423
433
|
@property
|
|
@@ -428,8 +438,10 @@ class Cluster(WorkspaceService):
|
|
|
428
438
|
When the runtime can't be mapped, returns ``None``.
|
|
429
439
|
"""
|
|
430
440
|
v = self.runtime_version
|
|
431
|
-
|
|
441
|
+
|
|
442
|
+
if not v:
|
|
432
443
|
return None
|
|
444
|
+
|
|
433
445
|
return _PYTHON_BY_DBR.get(v)
|
|
434
446
|
|
|
435
447
|
# ------------------------------------------------------------------ #
|
|
@@ -586,6 +598,7 @@ class Cluster(WorkspaceService):
|
|
|
586
598
|
cluster_id: Optional[str] = None,
|
|
587
599
|
cluster_name: Optional[str] = None,
|
|
588
600
|
libraries: Optional[List[Union[str, "Library"]]] = None,
|
|
601
|
+
update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
|
|
589
602
|
**cluster_spec: Any
|
|
590
603
|
):
|
|
591
604
|
"""Create a new cluster or update an existing one.
|
|
@@ -594,6 +607,7 @@ class Cluster(WorkspaceService):
|
|
|
594
607
|
cluster_id: Optional cluster id to update.
|
|
595
608
|
cluster_name: Optional cluster name to update or create.
|
|
596
609
|
libraries: Optional libraries to install.
|
|
610
|
+
update_timeout: wait timeout, if None it will not wait completion
|
|
597
611
|
**cluster_spec: Cluster specification overrides.
|
|
598
612
|
|
|
599
613
|
Returns:
|
|
@@ -609,24 +623,28 @@ class Cluster(WorkspaceService):
|
|
|
609
623
|
return found.update(
|
|
610
624
|
cluster_name=cluster_name,
|
|
611
625
|
libraries=libraries,
|
|
626
|
+
wait_timeout=update_timeout,
|
|
612
627
|
**cluster_spec
|
|
613
628
|
)
|
|
614
629
|
|
|
615
630
|
return self.create(
|
|
616
631
|
cluster_name=cluster_name,
|
|
617
632
|
libraries=libraries,
|
|
633
|
+
wait_timeout=update_timeout,
|
|
618
634
|
**cluster_spec
|
|
619
635
|
)
|
|
620
636
|
|
|
621
637
|
def create(
|
|
622
638
|
self,
|
|
623
639
|
libraries: Optional[List[Union[str, "Library"]]] = None,
|
|
640
|
+
wait_timeout: Union[float, dt.timedelta] = dt.timedelta(minutes=20),
|
|
624
641
|
**cluster_spec: Any
|
|
625
642
|
) -> str:
|
|
626
643
|
"""Create a new cluster and optionally install libraries.
|
|
627
644
|
|
|
628
645
|
Args:
|
|
629
646
|
libraries: Optional list of libraries to install after creation.
|
|
647
|
+
wait_timeout: wait timeout, if None it will not wait completion
|
|
630
648
|
**cluster_spec: Cluster specification overrides.
|
|
631
649
|
|
|
632
650
|
Returns:
|
|
@@ -646,14 +664,17 @@ class Cluster(WorkspaceService):
|
|
|
646
664
|
update_details,
|
|
647
665
|
)
|
|
648
666
|
|
|
649
|
-
self.details = self.clusters_client().
|
|
667
|
+
self.details = self.clusters_client().create(**update_details)
|
|
650
668
|
|
|
651
669
|
LOGGER.info(
|
|
652
670
|
"Created %s",
|
|
653
671
|
self
|
|
654
672
|
)
|
|
655
673
|
|
|
656
|
-
self.install_libraries(libraries=libraries, raise_error=False)
|
|
674
|
+
self.install_libraries(libraries=libraries, raise_error=False, wait_timeout=None)
|
|
675
|
+
|
|
676
|
+
if wait_timeout:
|
|
677
|
+
self.wait_for_status(timeout=wait_timeout)
|
|
657
678
|
|
|
658
679
|
return self
|
|
659
680
|
|
|
@@ -661,7 +682,7 @@ class Cluster(WorkspaceService):
|
|
|
661
682
|
self,
|
|
662
683
|
libraries: Optional[List[Union[str, "Library"]]] = None,
|
|
663
684
|
access_control_list: Optional[List["ClusterAccessControlRequest"]] = None,
|
|
664
|
-
wait_timeout: Union[float, dt.timedelta] = dt.timedelta(minutes=20),
|
|
685
|
+
wait_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
|
|
665
686
|
**cluster_spec: Any
|
|
666
687
|
) -> "Cluster":
|
|
667
688
|
"""Update cluster configuration and optionally install libraries.
|
|
@@ -708,7 +729,7 @@ class Cluster(WorkspaceService):
|
|
|
708
729
|
self, diff
|
|
709
730
|
)
|
|
710
731
|
|
|
711
|
-
self.wait_for_status()
|
|
732
|
+
self.wait_for_status(timeout=wait_timeout)
|
|
712
733
|
self.clusters_client().edit(**update_details)
|
|
713
734
|
self.update_permissions(access_control_list=access_control_list)
|
|
714
735
|
|
|
@@ -727,7 +748,7 @@ class Cluster(WorkspaceService):
|
|
|
727
748
|
access_control_list: Optional[List["ClusterAccessControlRequest"]] = None,
|
|
728
749
|
):
|
|
729
750
|
if not access_control_list:
|
|
730
|
-
|
|
751
|
+
return self
|
|
731
752
|
|
|
732
753
|
access_control_list = self._check_permission(access_control_list)
|
|
733
754
|
|
|
@@ -745,6 +766,7 @@ class Cluster(WorkspaceService):
|
|
|
745
766
|
permission_level=ClusterPermissionLevel.CAN_MANAGE
|
|
746
767
|
)
|
|
747
768
|
for name in current_groups
|
|
769
|
+
if name not in {"users"}
|
|
748
770
|
]
|
|
749
771
|
|
|
750
772
|
def _check_permission(
|
|
@@ -862,18 +884,22 @@ class Cluster(WorkspaceService):
|
|
|
862
884
|
Returns:
|
|
863
885
|
The current Cluster instance.
|
|
864
886
|
"""
|
|
887
|
+
if self.is_running:
|
|
888
|
+
return self
|
|
889
|
+
|
|
865
890
|
self.wait_for_status()
|
|
866
891
|
|
|
867
|
-
if
|
|
868
|
-
|
|
892
|
+
if self.is_running:
|
|
893
|
+
return self
|
|
869
894
|
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
else:
|
|
874
|
-
self.clusters_client().start(cluster_id=self.cluster_id)
|
|
895
|
+
LOGGER.debug("Starting %s", self)
|
|
896
|
+
|
|
897
|
+
self.clusters_client().start(cluster_id=self.cluster_id)
|
|
875
898
|
|
|
876
|
-
|
|
899
|
+
LOGGER.info("Started %s", self)
|
|
900
|
+
|
|
901
|
+
if wait_timeout:
|
|
902
|
+
self.wait_for_status(timeout=wait_timeout.total_seconds())
|
|
877
903
|
|
|
878
904
|
return self
|
|
879
905
|
|
|
@@ -889,7 +915,7 @@ class Cluster(WorkspaceService):
|
|
|
889
915
|
|
|
890
916
|
if self.is_running:
|
|
891
917
|
self.details = self.clusters_client().restart_and_wait(cluster_id=self.cluster_id)
|
|
892
|
-
return self
|
|
918
|
+
return self
|
|
893
919
|
|
|
894
920
|
return self.start()
|
|
895
921
|
|
|
@@ -180,7 +180,7 @@ print(json.dumps(meta))"""
|
|
|
180
180
|
"""
|
|
181
181
|
return self.cluster.workspace.sdk()
|
|
182
182
|
|
|
183
|
-
def
|
|
183
|
+
def create_command(
|
|
184
184
|
self,
|
|
185
185
|
language: "Language",
|
|
186
186
|
) -> any:
|
|
@@ -192,17 +192,29 @@ print(json.dumps(meta))"""
|
|
|
192
192
|
Returns:
|
|
193
193
|
The created command execution context response.
|
|
194
194
|
"""
|
|
195
|
-
self.cluster.ensure_running()
|
|
196
|
-
|
|
197
195
|
LOGGER.debug(
|
|
198
196
|
"Creating Databricks command execution context for %s",
|
|
199
197
|
self.cluster
|
|
200
198
|
)
|
|
201
199
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
200
|
+
try:
|
|
201
|
+
created = self._workspace_client().command_execution.create_and_wait(
|
|
202
|
+
cluster_id=self.cluster.cluster_id,
|
|
203
|
+
language=language,
|
|
204
|
+
)
|
|
205
|
+
except:
|
|
206
|
+
self.cluster.ensure_running()
|
|
207
|
+
|
|
208
|
+
created = self._workspace_client().command_execution.create_and_wait(
|
|
209
|
+
cluster_id=self.cluster.cluster_id,
|
|
210
|
+
language=language,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
LOGGER.info(
|
|
214
|
+
"Created Databricks command execution context %s",
|
|
215
|
+
self
|
|
205
216
|
)
|
|
217
|
+
|
|
206
218
|
created = getattr(created, "response", created)
|
|
207
219
|
|
|
208
220
|
return created
|
|
@@ -220,10 +232,6 @@ print(json.dumps(meta))"""
|
|
|
220
232
|
The connected ExecutionContext instance.
|
|
221
233
|
"""
|
|
222
234
|
if self.context_id is not None:
|
|
223
|
-
LOGGER.debug(
|
|
224
|
-
"Execution context already open for %s",
|
|
225
|
-
self
|
|
226
|
-
)
|
|
227
235
|
return self
|
|
228
236
|
|
|
229
237
|
self.language = language or self.language
|
|
@@ -231,7 +239,7 @@ print(json.dumps(meta))"""
|
|
|
231
239
|
if self.language is None:
|
|
232
240
|
self.language = Language.PYTHON
|
|
233
241
|
|
|
234
|
-
ctx = self.
|
|
242
|
+
ctx = self.create_command(language=self.language)
|
|
235
243
|
|
|
236
244
|
context_id = ctx.id
|
|
237
245
|
if not context_id:
|
|
@@ -39,6 +39,7 @@ def databricks_remote_compute(
|
|
|
39
39
|
timeout: Optional[dt.timedelta] = None,
|
|
40
40
|
env_keys: Optional[List[str]] = None,
|
|
41
41
|
force_local: bool = False,
|
|
42
|
+
update_timeout: Optional[Union[float, dt.timedelta]] = None,
|
|
42
43
|
**options
|
|
43
44
|
) -> Callable[[Callable[..., ReturnType]], Callable[..., ReturnType]]:
|
|
44
45
|
"""Return a decorator that executes functions on a remote cluster.
|
|
@@ -52,6 +53,7 @@ def databricks_remote_compute(
|
|
|
52
53
|
timeout: Optional execution timeout for remote calls.
|
|
53
54
|
env_keys: Optional environment variable names to forward.
|
|
54
55
|
force_local: Force local execution
|
|
56
|
+
update_timeout: creation or update wait timeout
|
|
55
57
|
**options: Extra options forwarded to the execution decorator.
|
|
56
58
|
|
|
57
59
|
Returns:
|
|
@@ -82,7 +84,8 @@ def databricks_remote_compute(
|
|
|
82
84
|
cluster = workspace.clusters().replicated_current_environment(
|
|
83
85
|
workspace=workspace,
|
|
84
86
|
cluster_name=cluster_name,
|
|
85
|
-
single_user_name=workspace.current_user.user_name
|
|
87
|
+
single_user_name=workspace.current_user.user_name,
|
|
88
|
+
update_timeout=update_timeout
|
|
86
89
|
)
|
|
87
90
|
|
|
88
91
|
cluster.ensure_running(wait_timeout=None)
|
|
@@ -344,10 +344,17 @@ class StatementResult:
|
|
|
344
344
|
if self.persisted:
|
|
345
345
|
if self._arrow_table is not None:
|
|
346
346
|
return self._arrow_table.schema
|
|
347
|
-
|
|
347
|
+
elif self._spark_df is not None:
|
|
348
|
+
return spark_schema_to_arrow_schema(self._spark_df.schema)
|
|
349
|
+
raise NotImplementedError("")
|
|
350
|
+
|
|
351
|
+
manifest = self.manifest
|
|
352
|
+
|
|
353
|
+
if manifest is None:
|
|
354
|
+
return pa.schema([])
|
|
348
355
|
|
|
349
356
|
fields = [
|
|
350
|
-
column_info_to_arrow_field(_) for _ in
|
|
357
|
+
column_info_to_arrow_field(_) for _ in manifest.schema.columns
|
|
351
358
|
]
|
|
352
359
|
|
|
353
360
|
return pa.schema(fields)
|
|
@@ -362,7 +369,7 @@ class StatementResult:
|
|
|
362
369
|
An Arrow Table containing all rows.
|
|
363
370
|
"""
|
|
364
371
|
if self.persisted:
|
|
365
|
-
if self._arrow_table:
|
|
372
|
+
if self._arrow_table is not None:
|
|
366
373
|
return self._arrow_table
|
|
367
374
|
else:
|
|
368
375
|
return self._spark_df.toArrow()
|
|
@@ -370,7 +377,6 @@ class StatementResult:
|
|
|
370
377
|
batches = list(self.to_arrow_batches(parallel_pool=parallel_pool))
|
|
371
378
|
|
|
372
379
|
if not batches:
|
|
373
|
-
# empty table with no columns
|
|
374
380
|
return pa.Table.from_batches([], schema=self.arrow_schema())
|
|
375
381
|
|
|
376
382
|
return pa.Table.from_batches(batches)
|
|
@@ -501,8 +507,9 @@ class StatementResult:
|
|
|
501
507
|
Returns:
|
|
502
508
|
A Spark DataFrame with the result rows.
|
|
503
509
|
"""
|
|
504
|
-
if self._spark_df:
|
|
510
|
+
if self._spark_df is not None:
|
|
505
511
|
return self._spark_df
|
|
506
512
|
|
|
507
513
|
self._spark_df = arrow_table_to_spark_dataframe(self.to_arrow_table())
|
|
514
|
+
|
|
508
515
|
return self._spark_df
|
|
@@ -13,8 +13,8 @@ from pyarrow.dataset import FileFormat, ParquetFileFormat, CsvFileFormat
|
|
|
13
13
|
|
|
14
14
|
from .path_kind import DatabricksPathKind
|
|
15
15
|
from ...libs.databrickslib import databricks
|
|
16
|
-
from ...
|
|
17
|
-
from ...
|
|
16
|
+
from ...libs.pandaslib import pandas, PandasDataFrame
|
|
17
|
+
from ...libs.polarslib import polars, PolarsDataFrame
|
|
18
18
|
from ...types.cast.registry import convert
|
|
19
19
|
|
|
20
20
|
if databricks is not None:
|
|
@@ -42,7 +42,6 @@ class DatabricksIO(ABC, IO):
|
|
|
42
42
|
path: "DatabricksPath",
|
|
43
43
|
mode: str,
|
|
44
44
|
encoding: Optional[str] = None,
|
|
45
|
-
compression: Optional[str] = "detect",
|
|
46
45
|
position: int = 0,
|
|
47
46
|
buffer: Optional[io.BytesIO] = None,
|
|
48
47
|
):
|
|
@@ -50,7 +49,6 @@ class DatabricksIO(ABC, IO):
|
|
|
50
49
|
|
|
51
50
|
self.encoding = encoding
|
|
52
51
|
self.mode = mode
|
|
53
|
-
self.compression = compression
|
|
54
52
|
|
|
55
53
|
self.path = path
|
|
56
54
|
|
|
@@ -111,7 +109,6 @@ class DatabricksIO(ABC, IO):
|
|
|
111
109
|
path=path,
|
|
112
110
|
mode=mode,
|
|
113
111
|
encoding=encoding,
|
|
114
|
-
compression=compression,
|
|
115
112
|
position=position,
|
|
116
113
|
buffer=buffer,
|
|
117
114
|
)
|
|
@@ -120,7 +117,6 @@ class DatabricksIO(ABC, IO):
|
|
|
120
117
|
path=path,
|
|
121
118
|
mode=mode,
|
|
122
119
|
encoding=encoding,
|
|
123
|
-
compression=compression,
|
|
124
120
|
position=position,
|
|
125
121
|
buffer=buffer,
|
|
126
122
|
)
|
|
@@ -129,7 +125,6 @@ class DatabricksIO(ABC, IO):
|
|
|
129
125
|
path=path,
|
|
130
126
|
mode=mode,
|
|
131
127
|
encoding=encoding,
|
|
132
|
-
compression=compression,
|
|
133
128
|
position=position,
|
|
134
129
|
buffer=buffer,
|
|
135
130
|
)
|
|
@@ -226,7 +221,6 @@ class DatabricksIO(ABC, IO):
|
|
|
226
221
|
path=kwargs.get("path", self.path),
|
|
227
222
|
mode=kwargs.get("mode", self.mode),
|
|
228
223
|
encoding=kwargs.get("encoding", self.encoding),
|
|
229
|
-
compression=kwargs.get("compression", self.compression),
|
|
230
224
|
position=kwargs.get("position", self.position),
|
|
231
225
|
buffer=kwargs.get("buffer", self._buffer),
|
|
232
226
|
)
|
|
@@ -264,8 +258,7 @@ class DatabricksIO(ABC, IO):
|
|
|
264
258
|
None.
|
|
265
259
|
"""
|
|
266
260
|
self.flush()
|
|
267
|
-
|
|
268
|
-
self._buffer.close()
|
|
261
|
+
self.clear_buffer()
|
|
269
262
|
|
|
270
263
|
def fileno(self):
|
|
271
264
|
"""Return a pseudo file descriptor based on object hash.
|
|
@@ -403,9 +396,6 @@ class DatabricksIO(ABC, IO):
|
|
|
403
396
|
Returns:
|
|
404
397
|
The read bytes or string depending on mode.
|
|
405
398
|
"""
|
|
406
|
-
if not self.readable():
|
|
407
|
-
raise IOError("File not open for reading")
|
|
408
|
-
|
|
409
399
|
current_position = self.position
|
|
410
400
|
all_data = self.read_all_bytes(use_cache=use_cache)
|
|
411
401
|
|
|
@@ -431,9 +421,6 @@ class DatabricksIO(ABC, IO):
|
|
|
431
421
|
Returns:
|
|
432
422
|
The next line as bytes or string.
|
|
433
423
|
"""
|
|
434
|
-
if not self.readable():
|
|
435
|
-
raise IOError("File not open for reading")
|
|
436
|
-
|
|
437
424
|
if self.encoding:
|
|
438
425
|
# Text-mode: accumulate characters
|
|
439
426
|
out_chars = []
|
|
@@ -475,9 +462,6 @@ class DatabricksIO(ABC, IO):
|
|
|
475
462
|
Returns:
|
|
476
463
|
A list of lines.
|
|
477
464
|
"""
|
|
478
|
-
if not self.readable():
|
|
479
|
-
raise IOError("File not open for reading")
|
|
480
|
-
|
|
481
465
|
lines = []
|
|
482
466
|
total = 0
|
|
483
467
|
|
|
@@ -492,14 +476,6 @@ class DatabricksIO(ABC, IO):
|
|
|
492
476
|
|
|
493
477
|
return lines
|
|
494
478
|
|
|
495
|
-
def appendable(self):
|
|
496
|
-
"""Return True when the file is open in append mode.
|
|
497
|
-
|
|
498
|
-
Returns:
|
|
499
|
-
True if in append mode.
|
|
500
|
-
"""
|
|
501
|
-
return "a" in self.mode
|
|
502
|
-
|
|
503
479
|
def writable(self):
|
|
504
480
|
"""Return True to indicate write support.
|
|
505
481
|
|
|
@@ -561,9 +537,6 @@ class DatabricksIO(ABC, IO):
|
|
|
561
537
|
Returns:
|
|
562
538
|
The number of bytes written.
|
|
563
539
|
"""
|
|
564
|
-
if not self.writable():
|
|
565
|
-
raise IOError("File not open for writing")
|
|
566
|
-
|
|
567
540
|
if isinstance(data, str):
|
|
568
541
|
data = data.encode(self.encoding or "utf-8")
|
|
569
542
|
|
|
@@ -664,8 +637,12 @@ class DatabricksIO(ABC, IO):
|
|
|
664
637
|
return self.write_polars(table, file_format=file_format, batch_size=batch_size, **kwargs)
|
|
665
638
|
elif isinstance(table, PandasDataFrame):
|
|
666
639
|
return self.write_pandas(table, file_format=file_format, batch_size=batch_size, **kwargs)
|
|
667
|
-
|
|
668
|
-
|
|
640
|
+
|
|
641
|
+
return self.write_arrow(
|
|
642
|
+
table=table,
|
|
643
|
+
file_format=file_format,
|
|
644
|
+
batch_size=batch_size
|
|
645
|
+
)
|
|
669
646
|
|
|
670
647
|
# ---- Arrow ----
|
|
671
648
|
|
|
@@ -689,16 +666,18 @@ class DatabricksIO(ABC, IO):
|
|
|
689
666
|
self.seek(0)
|
|
690
667
|
|
|
691
668
|
if isinstance(file_format, ParquetFileFormat):
|
|
692
|
-
|
|
669
|
+
pq.read_table(self, **kwargs)
|
|
693
670
|
|
|
694
|
-
|
|
695
|
-
|
|
671
|
+
elif isinstance(file_format, CsvFileFormat):
|
|
672
|
+
pcsv.read_csv(self, parse_options=file_format.parse_options)
|
|
696
673
|
|
|
697
|
-
|
|
674
|
+
else:
|
|
675
|
+
ValueError(f"Unsupported file format for Arrow table: {file_format}")
|
|
698
676
|
|
|
699
677
|
def write_arrow(
|
|
700
678
|
self,
|
|
701
679
|
table: Union[pa.Table, pa.RecordBatch],
|
|
680
|
+
file_format: Optional[FileFormat] = None,
|
|
702
681
|
batch_size: Optional[int] = None,
|
|
703
682
|
**kwargs
|
|
704
683
|
):
|
|
@@ -706,6 +685,7 @@ class DatabricksIO(ABC, IO):
|
|
|
706
685
|
|
|
707
686
|
Args:
|
|
708
687
|
table: Arrow table or batch to write.
|
|
688
|
+
file_format: Optional file format override.
|
|
709
689
|
batch_size: Optional batch size for writes.
|
|
710
690
|
**kwargs: Format-specific options.
|
|
711
691
|
|
|
@@ -717,6 +697,7 @@ class DatabricksIO(ABC, IO):
|
|
|
717
697
|
|
|
718
698
|
return self.write_arrow_table(
|
|
719
699
|
table=table,
|
|
700
|
+
file_format=file_format,
|
|
720
701
|
batch_size=batch_size,
|
|
721
702
|
**kwargs
|
|
722
703
|
)
|
|
@@ -776,12 +757,14 @@ class DatabricksIO(ABC, IO):
|
|
|
776
757
|
|
|
777
758
|
def read_arrow_batches(
|
|
778
759
|
self,
|
|
760
|
+
file_format: Optional[FileFormat] = None,
|
|
779
761
|
batch_size: Optional[int] = None,
|
|
780
762
|
**kwargs
|
|
781
763
|
):
|
|
782
764
|
"""Yield Arrow record batches from the file.
|
|
783
765
|
|
|
784
766
|
Args:
|
|
767
|
+
file_format: Optional file format override.
|
|
785
768
|
batch_size: Optional batch size for reads.
|
|
786
769
|
**kwargs: Format-specific options.
|
|
787
770
|
|
|
@@ -790,7 +773,11 @@ class DatabricksIO(ABC, IO):
|
|
|
790
773
|
"""
|
|
791
774
|
return (
|
|
792
775
|
self
|
|
793
|
-
.read_arrow_table(
|
|
776
|
+
.read_arrow_table(
|
|
777
|
+
file_format=file_format,
|
|
778
|
+
batch_size=batch_size,
|
|
779
|
+
**kwargs
|
|
780
|
+
)
|
|
794
781
|
.to_batches(max_chunksize=batch_size)
|
|
795
782
|
)
|
|
796
783
|
|
|
@@ -798,23 +785,36 @@ class DatabricksIO(ABC, IO):
|
|
|
798
785
|
|
|
799
786
|
def read_pandas(
|
|
800
787
|
self,
|
|
788
|
+
file_format: Optional[FileFormat] = None,
|
|
801
789
|
batch_size: Optional[int] = None,
|
|
802
790
|
**kwargs
|
|
803
791
|
):
|
|
804
792
|
"""Read the file into a pandas DataFrame.
|
|
805
793
|
|
|
806
794
|
Args:
|
|
795
|
+
file_format: Optional file format override.
|
|
807
796
|
batch_size: Optional batch size for reads.
|
|
808
797
|
**kwargs: Format-specific options.
|
|
809
798
|
|
|
810
799
|
Returns:
|
|
811
800
|
A pandas DataFrame with the file contents.
|
|
812
801
|
"""
|
|
813
|
-
|
|
802
|
+
file_format = self.path.file_format if file_format is None else file_format
|
|
803
|
+
self.seek(0)
|
|
804
|
+
|
|
805
|
+
if isinstance(file_format, ParquetFileFormat):
|
|
806
|
+
pandas.read_parquet(self, **kwargs)
|
|
807
|
+
|
|
808
|
+
elif isinstance(file_format, CsvFileFormat):
|
|
809
|
+
pandas.read_csv(self, **kwargs)
|
|
810
|
+
|
|
811
|
+
else:
|
|
812
|
+
raise ValueError(f"Unsupported file format for Pandas DataFrame: {file_format}")
|
|
814
813
|
|
|
815
814
|
def write_pandas(
|
|
816
815
|
self,
|
|
817
|
-
df,
|
|
816
|
+
df: PandasDataFrame,
|
|
817
|
+
file_format: Optional[FileFormat] = None,
|
|
818
818
|
batch_size: Optional[int] = None,
|
|
819
819
|
**kwargs
|
|
820
820
|
):
|
|
@@ -822,13 +822,26 @@ class DatabricksIO(ABC, IO):
|
|
|
822
822
|
|
|
823
823
|
Args:
|
|
824
824
|
df: pandas DataFrame to write.
|
|
825
|
+
file_format: Optional file format override.
|
|
825
826
|
batch_size: Optional batch size for writes.
|
|
826
827
|
**kwargs: Format-specific options.
|
|
827
828
|
|
|
828
829
|
Returns:
|
|
829
830
|
None.
|
|
830
831
|
"""
|
|
831
|
-
self.
|
|
832
|
+
file_format = self.path.file_format if file_format is None else FileFormat
|
|
833
|
+
buffer = io.BytesIO()
|
|
834
|
+
|
|
835
|
+
if isinstance(file_format, ParquetFileFormat):
|
|
836
|
+
df.to_parquet(buffer, **kwargs)
|
|
837
|
+
|
|
838
|
+
elif isinstance(file_format, CsvFileFormat):
|
|
839
|
+
df.to_csv(buffer, **kwargs)
|
|
840
|
+
|
|
841
|
+
else:
|
|
842
|
+
raise ValueError(f"Unsupported file format for Pandas DataFrame: {file_format}")
|
|
843
|
+
|
|
844
|
+
self.write_all_bytes(data=buffer.getvalue())
|
|
832
845
|
|
|
833
846
|
# ---- Polars ----
|
|
834
847
|
|
|
@@ -848,22 +861,21 @@ class DatabricksIO(ABC, IO):
|
|
|
848
861
|
Returns:
|
|
849
862
|
A polars DataFrame with the file contents.
|
|
850
863
|
"""
|
|
851
|
-
import polars as pl
|
|
852
|
-
|
|
853
864
|
file_format = self.path.file_format if file_format is None else file_format
|
|
854
865
|
self.seek(0)
|
|
855
866
|
|
|
856
867
|
if isinstance(file_format, ParquetFileFormat):
|
|
857
|
-
|
|
868
|
+
polars.read_parquet(self, **kwargs)
|
|
858
869
|
|
|
859
|
-
|
|
860
|
-
|
|
870
|
+
elif isinstance(file_format, CsvFileFormat):
|
|
871
|
+
polars.read_csv(self, **kwargs)
|
|
861
872
|
|
|
862
|
-
|
|
873
|
+
else:
|
|
874
|
+
raise ValueError(f"Unsupported file format for Polars DataFrame: {file_format}")
|
|
863
875
|
|
|
864
876
|
def write_polars(
|
|
865
877
|
self,
|
|
866
|
-
df,
|
|
878
|
+
df: PolarsDataFrame,
|
|
867
879
|
file_format: Optional[FileFormat] = None,
|
|
868
880
|
batch_size: Optional[int] = None,
|
|
869
881
|
**kwargs
|
|
@@ -975,28 +987,40 @@ class DatabricksVolumeIO(DatabricksIO):
|
|
|
975
987
|
"""Read bytes from a volume file.
|
|
976
988
|
|
|
977
989
|
Args:
|
|
978
|
-
start: Starting byte offset.
|
|
990
|
+
start: Starting byte offset (0-based).
|
|
979
991
|
length: Number of bytes to read.
|
|
980
992
|
allow_not_found: Whether to suppress missing-path errors.
|
|
981
993
|
|
|
982
994
|
Returns:
|
|
983
995
|
Bytes read from the file.
|
|
984
996
|
"""
|
|
985
|
-
if length
|
|
997
|
+
if length <= 0:
|
|
986
998
|
return b""
|
|
999
|
+
if start < 0:
|
|
1000
|
+
raise ValueError(f"start must be >= 0, got {start}")
|
|
1001
|
+
if length < 0:
|
|
1002
|
+
raise ValueError(f"length must be >= 0, got {length}")
|
|
987
1003
|
|
|
988
1004
|
sdk = self.workspace.sdk()
|
|
989
1005
|
client = sdk.files
|
|
990
1006
|
full_path = self.path.files_full_path()
|
|
991
1007
|
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
.
|
|
997
|
-
|
|
1008
|
+
try:
|
|
1009
|
+
resp = client.download(full_path)
|
|
1010
|
+
except Exception as e:
|
|
1011
|
+
# Databricks SDK exceptions vary a bit by version; keep it pragmatic.
|
|
1012
|
+
if allow_not_found and any(s in str(e).lower() for s in ("not found", "not exist", "404")):
|
|
1013
|
+
return b""
|
|
1014
|
+
raise
|
|
998
1015
|
|
|
999
|
-
|
|
1016
|
+
data = resp.contents.read()
|
|
1017
|
+
|
|
1018
|
+
# If start is past EOF, return empty (common file-like behavior).
|
|
1019
|
+
if start >= len(data):
|
|
1020
|
+
return b""
|
|
1021
|
+
|
|
1022
|
+
end = start + length
|
|
1023
|
+
return data[start:end]
|
|
1000
1024
|
|
|
1001
1025
|
def write_all_bytes(self, data: bytes):
|
|
1002
1026
|
"""Write bytes to a volume file.
|
|
@@ -12,17 +12,18 @@ from pathlib import PurePosixPath
|
|
|
12
12
|
from typing import Optional, Tuple, Union, TYPE_CHECKING, List, Iterable
|
|
13
13
|
|
|
14
14
|
import pyarrow as pa
|
|
15
|
+
import pyarrow.dataset as ds
|
|
15
16
|
from pyarrow.dataset import FileFormat, ParquetFileFormat, CsvFileFormat, JsonFileFormat
|
|
16
17
|
from pyarrow.fs import FileInfo, FileType, FileSystem
|
|
17
|
-
import pyarrow.dataset as ds
|
|
18
18
|
|
|
19
19
|
from .io import DatabricksIO
|
|
20
20
|
from .path_kind import DatabricksPathKind
|
|
21
21
|
from ...libs.databrickslib import databricks
|
|
22
|
-
from ...
|
|
22
|
+
from ...libs.pandaslib import PandasDataFrame
|
|
23
|
+
from ...libs.polarslib import polars, PolarsDataFrame
|
|
24
|
+
from ...types.cast.arrow_cast import cast_arrow_tabular
|
|
23
25
|
from ...types.cast.cast_options import CastOptions
|
|
24
|
-
from ...types.cast.polars_cast import polars_converter
|
|
25
|
-
from ...types.cast.polars_pandas_cast import PolarsDataFrame
|
|
26
|
+
from ...types.cast.polars_cast import polars_converter, cast_polars_dataframe
|
|
26
27
|
from ...types.cast.registry import convert, register_converter
|
|
27
28
|
|
|
28
29
|
if databricks is not None:
|
|
@@ -494,13 +495,17 @@ class DatabricksPath:
|
|
|
494
495
|
|
|
495
496
|
try:
|
|
496
497
|
info = sdk.files.get_directory_metadata(full_path)
|
|
497
|
-
mtime = (
|
|
498
|
-
dt.datetime.strptime(info.last_modified, "%a, %d %b %Y %H:%M:%S %Z").replace(tzinfo=dt.timezone.utc)
|
|
499
|
-
if info.last_modified
|
|
500
|
-
else None
|
|
501
|
-
)
|
|
502
498
|
|
|
503
|
-
|
|
499
|
+
if info is None:
|
|
500
|
+
mtime = dt.datetime.now(tz=dt.timezone.utc)
|
|
501
|
+
else:
|
|
502
|
+
mtime = (
|
|
503
|
+
dt.datetime.strptime(info.last_modified, "%a, %d %b %Y %H:%M:%S %Z").replace(tzinfo=dt.timezone.utc)
|
|
504
|
+
if info.last_modified
|
|
505
|
+
else None
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
return self.reset_metadata(is_file=False, is_dir=True, size=0, mtime=mtime)
|
|
504
509
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
505
510
|
pass
|
|
506
511
|
|
|
@@ -635,22 +640,12 @@ class DatabricksPath:
|
|
|
635
640
|
Returns:
|
|
636
641
|
The DatabricksPath instance.
|
|
637
642
|
"""
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
self.make_dbfs_dir(parents=parents, exist_ok=exist_ok)
|
|
645
|
-
except (NotFound, ResourceDoesNotExist):
|
|
646
|
-
if not parents or self.parent == self:
|
|
647
|
-
raise
|
|
648
|
-
|
|
649
|
-
self.parent.mkdir(parents=True, exist_ok=True)
|
|
650
|
-
self.mkdir(parents=False, exist_ok=exist_ok)
|
|
651
|
-
except (AlreadyExists, ResourceAlreadyExists):
|
|
652
|
-
if not exist_ok:
|
|
653
|
-
raise
|
|
643
|
+
if self.kind == DatabricksPathKind.WORKSPACE:
|
|
644
|
+
self.make_workspace_dir(parents=parents, exist_ok=exist_ok)
|
|
645
|
+
elif self.kind == DatabricksPathKind.VOLUME:
|
|
646
|
+
self.make_volume_dir(parents=parents, exist_ok=exist_ok)
|
|
647
|
+
elif self.kind == DatabricksPathKind.DBFS:
|
|
648
|
+
self.make_dbfs_dir(parents=parents, exist_ok=exist_ok)
|
|
654
649
|
|
|
655
650
|
return self
|
|
656
651
|
|
|
@@ -766,15 +761,13 @@ class DatabricksPath:
|
|
|
766
761
|
Returns:
|
|
767
762
|
The DatabricksPath instance.
|
|
768
763
|
"""
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
finally:
|
|
777
|
-
self.reset_metadata()
|
|
764
|
+
if self.kind == DatabricksPathKind.VOLUME:
|
|
765
|
+
return self._remove_volume_file()
|
|
766
|
+
elif self.kind == DatabricksPathKind.WORKSPACE:
|
|
767
|
+
return self._remove_workspace_file()
|
|
768
|
+
elif self.kind == DatabricksPathKind.DBFS:
|
|
769
|
+
return self._remove_dbfs_file()
|
|
770
|
+
|
|
778
771
|
return self
|
|
779
772
|
|
|
780
773
|
def _remove_volume_file(self):
|
|
@@ -783,6 +776,9 @@ class DatabricksPath:
|
|
|
783
776
|
sdk.files.delete(self.files_full_path())
|
|
784
777
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
785
778
|
pass
|
|
779
|
+
finally:
|
|
780
|
+
self.reset_metadata()
|
|
781
|
+
|
|
786
782
|
return self
|
|
787
783
|
|
|
788
784
|
def _remove_workspace_file(self):
|
|
@@ -791,6 +787,9 @@ class DatabricksPath:
|
|
|
791
787
|
sdk.workspace.delete(self.workspace_full_path(), recursive=True)
|
|
792
788
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
793
789
|
pass
|
|
790
|
+
finally:
|
|
791
|
+
self.reset_metadata()
|
|
792
|
+
|
|
794
793
|
return self
|
|
795
794
|
|
|
796
795
|
def _remove_dbfs_file(self):
|
|
@@ -799,6 +798,9 @@ class DatabricksPath:
|
|
|
799
798
|
sdk.dbfs.delete(self.dbfs_full_path(), recursive=True)
|
|
800
799
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
801
800
|
pass
|
|
801
|
+
finally:
|
|
802
|
+
self.reset_metadata()
|
|
803
|
+
|
|
802
804
|
return self
|
|
803
805
|
|
|
804
806
|
def rmdir(self, recursive: bool = True):
|
|
@@ -823,7 +825,9 @@ class DatabricksPath:
|
|
|
823
825
|
sdk.workspace.delete(self.workspace_full_path(), recursive=recursive)
|
|
824
826
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
825
827
|
pass
|
|
826
|
-
|
|
828
|
+
finally:
|
|
829
|
+
self.reset_metadata()
|
|
830
|
+
|
|
827
831
|
return self
|
|
828
832
|
|
|
829
833
|
def _remove_dbfs_dir(self, recursive: bool = True):
|
|
@@ -832,7 +836,9 @@ class DatabricksPath:
|
|
|
832
836
|
sdk.dbfs.delete(self.dbfs_full_path(), recursive=recursive)
|
|
833
837
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
834
838
|
pass
|
|
835
|
-
|
|
839
|
+
finally:
|
|
840
|
+
self.reset_metadata()
|
|
841
|
+
|
|
836
842
|
return self
|
|
837
843
|
|
|
838
844
|
def _remove_volume_dir(self, recursive: bool = True):
|
|
@@ -1038,7 +1044,7 @@ class DatabricksPath:
|
|
|
1038
1044
|
Returns:
|
|
1039
1045
|
None.
|
|
1040
1046
|
"""
|
|
1041
|
-
if self.is_file()
|
|
1047
|
+
if self.is_file():
|
|
1042
1048
|
with self.open(mode="rb") as src:
|
|
1043
1049
|
src.copy_to(dest=dest)
|
|
1044
1050
|
|
|
@@ -1063,6 +1069,13 @@ class DatabricksPath:
|
|
|
1063
1069
|
else:
|
|
1064
1070
|
raise FileNotFoundError(f"Path {self} does not exist, or dest is not same file or folder type")
|
|
1065
1071
|
|
|
1072
|
+
def write_bytes(self, data: bytes):
|
|
1073
|
+
if hasattr(data, "read"):
|
|
1074
|
+
data = data.read()
|
|
1075
|
+
|
|
1076
|
+
with self.open("wb") as f:
|
|
1077
|
+
f.write_all_bytes(data=data)
|
|
1078
|
+
|
|
1066
1079
|
# -------------------------
|
|
1067
1080
|
# Data ops (Arrow / Pandas / Polars)
|
|
1068
1081
|
# -------------------------
|
|
@@ -1206,6 +1219,7 @@ class DatabricksPath:
|
|
|
1206
1219
|
|
|
1207
1220
|
def read_pandas(
|
|
1208
1221
|
self,
|
|
1222
|
+
file_format: Optional[FileFormat] = None,
|
|
1209
1223
|
batch_size: Optional[int] = None,
|
|
1210
1224
|
concat: bool = True,
|
|
1211
1225
|
**kwargs
|
|
@@ -1213,6 +1227,7 @@ class DatabricksPath:
|
|
|
1213
1227
|
"""Read the path into a pandas DataFrame.
|
|
1214
1228
|
|
|
1215
1229
|
Args:
|
|
1230
|
+
file_format: Optional file format override.
|
|
1216
1231
|
batch_size: Optional batch size for reads.
|
|
1217
1232
|
concat: Whether to concatenate results for directories.
|
|
1218
1233
|
**kwargs: Format-specific options.
|
|
@@ -1221,14 +1236,26 @@ class DatabricksPath:
|
|
|
1221
1236
|
A pandas DataFrame or list of DataFrames if concat=False.
|
|
1222
1237
|
"""
|
|
1223
1238
|
if concat:
|
|
1224
|
-
return self.read_arrow_table(
|
|
1239
|
+
return self.read_arrow_table(
|
|
1240
|
+
file_format=file_format,
|
|
1241
|
+
batch_size=batch_size,
|
|
1242
|
+
concat=True,
|
|
1243
|
+
**kwargs
|
|
1244
|
+
).to_pandas()
|
|
1245
|
+
|
|
1246
|
+
tables = self.read_arrow_table(
|
|
1247
|
+
batch_size=batch_size,
|
|
1248
|
+
file_format=file_format,
|
|
1249
|
+
concat=False,
|
|
1250
|
+
**kwargs
|
|
1251
|
+
)
|
|
1225
1252
|
|
|
1226
|
-
tables = self.read_arrow_table(batch_size=batch_size, concat=False, **kwargs)
|
|
1227
1253
|
return [t.to_pandas() for t in tables] # type: ignore[arg-type]
|
|
1228
1254
|
|
|
1229
1255
|
def write_pandas(
|
|
1230
1256
|
self,
|
|
1231
|
-
df,
|
|
1257
|
+
df: PandasDataFrame,
|
|
1258
|
+
file_format: Optional[FileFormat] = None,
|
|
1232
1259
|
batch_size: Optional[int] = None,
|
|
1233
1260
|
**kwargs
|
|
1234
1261
|
):
|
|
@@ -1236,13 +1263,41 @@ class DatabricksPath:
|
|
|
1236
1263
|
|
|
1237
1264
|
Args:
|
|
1238
1265
|
df: pandas DataFrame to write.
|
|
1266
|
+
file_format: Optional file format override.
|
|
1239
1267
|
batch_size: Optional batch size for writes.
|
|
1240
1268
|
**kwargs: Format-specific options.
|
|
1241
1269
|
|
|
1242
1270
|
Returns:
|
|
1243
1271
|
The DatabricksPath instance.
|
|
1244
1272
|
"""
|
|
1245
|
-
|
|
1273
|
+
with self.connect(clone=False) as connected:
|
|
1274
|
+
if connected.is_dir_sink():
|
|
1275
|
+
seed = int(time.time() * 1000)
|
|
1276
|
+
|
|
1277
|
+
def df_batches(pdf, bs: int):
|
|
1278
|
+
for start in range(0, len(pdf), batch_size):
|
|
1279
|
+
yield pdf.iloc[start:start + batch_size]
|
|
1280
|
+
|
|
1281
|
+
for i, batch in enumerate(df_batches(df, batch_size)):
|
|
1282
|
+
part_path = connected / f"{seed}-{i:05d}-{_rand_str(4)}.parquet"
|
|
1283
|
+
|
|
1284
|
+
with part_path.open(mode="wb", clone=False) as f:
|
|
1285
|
+
f.write_pandas(
|
|
1286
|
+
batch,
|
|
1287
|
+
file_format=file_format,
|
|
1288
|
+
batch_size=batch_size,
|
|
1289
|
+
**kwargs
|
|
1290
|
+
)
|
|
1291
|
+
else:
|
|
1292
|
+
with connected.open(mode="wb", clone=False) as f:
|
|
1293
|
+
f.write_pandas(
|
|
1294
|
+
df,
|
|
1295
|
+
file_format=file_format,
|
|
1296
|
+
batch_size=batch_size,
|
|
1297
|
+
**kwargs
|
|
1298
|
+
)
|
|
1299
|
+
|
|
1300
|
+
return self
|
|
1246
1301
|
|
|
1247
1302
|
def read_polars(
|
|
1248
1303
|
self,
|
|
@@ -1264,8 +1319,6 @@ class DatabricksPath:
|
|
|
1264
1319
|
Returns:
|
|
1265
1320
|
A polars DataFrame or list of DataFrames if concat=False.
|
|
1266
1321
|
"""
|
|
1267
|
-
import polars as pl
|
|
1268
|
-
|
|
1269
1322
|
if self.is_file():
|
|
1270
1323
|
with self.open("rb") as f:
|
|
1271
1324
|
return f.read_polars(batch_size=batch_size, **kwargs)
|
|
@@ -1278,10 +1331,10 @@ class DatabricksPath:
|
|
|
1278
1331
|
dfs.append(f.read_polars(batch_size=batch_size, **kwargs))
|
|
1279
1332
|
|
|
1280
1333
|
if not dfs:
|
|
1281
|
-
return
|
|
1334
|
+
return polars.DataFrame()
|
|
1282
1335
|
|
|
1283
1336
|
if concat:
|
|
1284
|
-
return
|
|
1337
|
+
return polars.concat(dfs, how=how, rechunk=rechunk)
|
|
1285
1338
|
return dfs # type: ignore[return-value]
|
|
1286
1339
|
|
|
1287
1340
|
raise FileNotFoundError(f"Path does not exist: {self}")
|
|
@@ -1312,12 +1365,10 @@ class DatabricksPath:
|
|
|
1312
1365
|
Notes:
|
|
1313
1366
|
- If `df` is a LazyFrame, we collect it first (optionally streaming).
|
|
1314
1367
|
"""
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
if isinstance(df, pl.LazyFrame):
|
|
1368
|
+
if isinstance(df, polars.LazyFrame):
|
|
1318
1369
|
df = df.collect()
|
|
1319
1370
|
|
|
1320
|
-
if not isinstance(df,
|
|
1371
|
+
if not isinstance(df, polars.DataFrame):
|
|
1321
1372
|
raise TypeError(f"write_polars expects pl.DataFrame or pl.LazyFrame, got {type(df)!r}")
|
|
1322
1373
|
|
|
1323
1374
|
with self.connect() as connected:
|
|
@@ -8,7 +8,6 @@ from abc import ABC
|
|
|
8
8
|
from dataclasses import dataclass
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
from typing import (
|
|
11
|
-
Any,
|
|
12
11
|
BinaryIO,
|
|
13
12
|
Iterator,
|
|
14
13
|
Optional,
|
|
@@ -55,7 +54,9 @@ def _get_env_product_version():
|
|
|
55
54
|
v = os.getenv("DATABRICKS_PRODUCT_VERSION")
|
|
56
55
|
|
|
57
56
|
if not v:
|
|
58
|
-
|
|
57
|
+
if _get_env_product() == "yggdrasil":
|
|
58
|
+
return YGGDRASIL_VERSION
|
|
59
|
+
return None
|
|
59
60
|
return v.strip().lower()
|
|
60
61
|
|
|
61
62
|
|
|
@@ -106,11 +107,12 @@ class Workspace:
|
|
|
106
107
|
product: Optional[str] = dataclasses.field(default_factory=_get_env_product, repr=False)
|
|
107
108
|
product_version: Optional[str] = dataclasses.field(default_factory=_get_env_product_version, repr=False)
|
|
108
109
|
product_tag: Optional[str] = dataclasses.field(default_factory=_get_env_product_tag, repr=False)
|
|
110
|
+
custom_tags: Optional[dict] = dataclasses.field(default=None, repr=False)
|
|
109
111
|
|
|
110
112
|
# Runtime cache (never serialized)
|
|
111
|
-
_sdk:
|
|
112
|
-
_was_connected: bool = dataclasses.field(
|
|
113
|
-
_cached_token: Optional[str] = dataclasses.field(
|
|
113
|
+
_sdk: Optional["WorkspaceClient"] = dataclasses.field(default=None, repr=False, compare=False, hash=False)
|
|
114
|
+
_was_connected: bool = dataclasses.field(default=None, repr=False, compare=False, hash=False)
|
|
115
|
+
_cached_token: Optional[str] = dataclasses.field(default=None, repr=False, compare=False, hash=False)
|
|
114
116
|
|
|
115
117
|
# -------------------------
|
|
116
118
|
# Pickle support
|
|
@@ -175,19 +177,43 @@ class Workspace:
|
|
|
175
177
|
# -------------------------
|
|
176
178
|
def clone_instance(
|
|
177
179
|
self,
|
|
178
|
-
**kwargs
|
|
179
180
|
) -> "Workspace":
|
|
180
181
|
"""Clone the workspace config with overrides.
|
|
181
182
|
|
|
182
|
-
Args:
|
|
183
|
-
**kwargs: Field overrides for the clone.
|
|
184
|
-
|
|
185
183
|
Returns:
|
|
186
184
|
A new Workspace instance with updated fields.
|
|
187
185
|
"""
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
186
|
+
return Workspace(
|
|
187
|
+
host = self.host,
|
|
188
|
+
account_id = self.account_id,
|
|
189
|
+
token = self.token,
|
|
190
|
+
client_id = self.client_id,
|
|
191
|
+
client_secret = self.client_secret,
|
|
192
|
+
token_audience = self.token_audience,
|
|
193
|
+
azure_workspace_resource_id = self.azure_workspace_resource_id,
|
|
194
|
+
azure_use_msi = self.azure_use_msi,
|
|
195
|
+
azure_client_secret = self.azure_client_secret,
|
|
196
|
+
azure_client_id = self.azure_client_id,
|
|
197
|
+
azure_tenant_id = self.azure_tenant_id,
|
|
198
|
+
azure_environment = self.azure_environment,
|
|
199
|
+
google_credentials = self.google_credentials,
|
|
200
|
+
google_service_account = self.google_service_account,
|
|
201
|
+
profile = self.profile,
|
|
202
|
+
config_file = self.config_file,
|
|
203
|
+
auth_type = self.auth_type,
|
|
204
|
+
http_timeout_seconds = self.http_timeout_seconds,
|
|
205
|
+
retry_timeout_seconds = self.retry_timeout_seconds,
|
|
206
|
+
debug_truncate_bytes = self.debug_truncate_bytes,
|
|
207
|
+
debug_headers = self.debug_headers,
|
|
208
|
+
rate_limit = self.rate_limit,
|
|
209
|
+
product = self.product,
|
|
210
|
+
product_version = self.product_version,
|
|
211
|
+
product_tag = self.product_tag,
|
|
212
|
+
custom_tags = self.custom_tags,
|
|
213
|
+
_sdk = self._sdk,
|
|
214
|
+
_was_connected = self._was_connected,
|
|
215
|
+
_cached_token = self._cached_token,
|
|
216
|
+
)
|
|
191
217
|
|
|
192
218
|
# -------------------------
|
|
193
219
|
# SDK connection
|
|
@@ -300,8 +326,9 @@ class Workspace:
|
|
|
300
326
|
Drop the cached WorkspaceClient (no actual close needed, but this
|
|
301
327
|
avoids reusing stale config).
|
|
302
328
|
"""
|
|
303
|
-
self._sdk
|
|
304
|
-
|
|
329
|
+
if self._sdk is not None:
|
|
330
|
+
self._sdk = None
|
|
331
|
+
self._was_connected = False
|
|
305
332
|
|
|
306
333
|
# ------------------------------------------------------------------ #
|
|
307
334
|
# Properties
|
|
@@ -561,28 +588,19 @@ class Workspace:
|
|
|
561
588
|
Returns:
|
|
562
589
|
A dict of default tags.
|
|
563
590
|
"""
|
|
564
|
-
|
|
591
|
+
base = {
|
|
565
592
|
k: v
|
|
566
593
|
for k, v in (
|
|
567
594
|
("Product", self.product),
|
|
568
|
-
("ProductVersion", self.product_version),
|
|
569
595
|
("ProductTag", self.product_tag),
|
|
570
|
-
("ProductUser", self.current_user.user_name)
|
|
571
596
|
)
|
|
572
597
|
if v
|
|
573
598
|
}
|
|
574
599
|
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
Args:
|
|
579
|
-
existing: Optional existing tags.
|
|
600
|
+
if self.custom_tags:
|
|
601
|
+
base.update(self.custom_tags)
|
|
580
602
|
|
|
581
|
-
|
|
582
|
-
A dict of merged tags.
|
|
583
|
-
"""
|
|
584
|
-
if existing:
|
|
585
|
-
return self.default_tags()
|
|
603
|
+
return base
|
|
586
604
|
|
|
587
605
|
def sql(
|
|
588
606
|
self,
|
yggdrasil/libs/pandaslib.py
CHANGED
|
@@ -3,9 +3,14 @@
|
|
|
3
3
|
try:
|
|
4
4
|
import pandas # type: ignore
|
|
5
5
|
pandas = pandas
|
|
6
|
+
|
|
7
|
+
PandasDataFrame = pandas.DataFrame
|
|
6
8
|
except ImportError:
|
|
7
9
|
pandas = None
|
|
8
10
|
|
|
11
|
+
class PandasDataFrame:
|
|
12
|
+
pass
|
|
13
|
+
|
|
9
14
|
|
|
10
15
|
def require_pandas():
|
|
11
16
|
"""Ensure pandas is available before using pandas helpers.
|
|
@@ -23,4 +28,5 @@ def require_pandas():
|
|
|
23
28
|
__all__ = [
|
|
24
29
|
"pandas",
|
|
25
30
|
"require_pandas",
|
|
31
|
+
"PandasDataFrame"
|
|
26
32
|
]
|
yggdrasil/libs/polarslib.py
CHANGED
|
@@ -4,13 +4,18 @@ try:
|
|
|
4
4
|
import polars # type: ignore
|
|
5
5
|
|
|
6
6
|
polars = polars
|
|
7
|
+
|
|
8
|
+
PolarsDataFrame = polars.DataFrame
|
|
7
9
|
except ImportError:
|
|
8
10
|
polars = None
|
|
9
11
|
|
|
12
|
+
class PolarsDataFrame:
|
|
13
|
+
pass
|
|
10
14
|
|
|
11
15
|
__all__ = [
|
|
12
16
|
"polars",
|
|
13
17
|
"require_polars",
|
|
18
|
+
"PolarsDataFrame"
|
|
14
19
|
]
|
|
15
20
|
|
|
16
21
|
|
yggdrasil/pyutils/python_env.py
CHANGED
|
@@ -16,7 +16,7 @@ import sys
|
|
|
16
16
|
import tempfile
|
|
17
17
|
import threading
|
|
18
18
|
from contextlib import contextmanager
|
|
19
|
-
from dataclasses import dataclass
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
20
|
from pathlib import Path
|
|
21
21
|
from typing import Any, Iterable, Iterator, Mapping, MutableMapping, Optional, Union, List, Tuple
|
|
22
22
|
|
|
@@ -415,11 +415,13 @@ def _locked_env(root: Path):
|
|
|
415
415
|
# PythonEnv
|
|
416
416
|
# -----------------------
|
|
417
417
|
|
|
418
|
-
@dataclass
|
|
418
|
+
@dataclass
|
|
419
419
|
class PythonEnv:
|
|
420
420
|
"""Represent a managed Python environment rooted at a filesystem path."""
|
|
421
421
|
root: Path
|
|
422
422
|
|
|
423
|
+
_version: Optional[str] = field(default=None, repr=False)
|
|
424
|
+
|
|
423
425
|
def __post_init__(self) -> None:
|
|
424
426
|
"""Normalize the root path after dataclass initialization.
|
|
425
427
|
|
|
@@ -862,8 +864,9 @@ class PythonEnv:
|
|
|
862
864
|
Returns:
|
|
863
865
|
Version string.
|
|
864
866
|
"""
|
|
865
|
-
|
|
866
|
-
|
|
867
|
+
if self._version is None:
|
|
868
|
+
self._version = self.exec_code("import sys; print(sys.version.split()[0])", check=True).strip()
|
|
869
|
+
return self._version
|
|
867
870
|
|
|
868
871
|
@property
|
|
869
872
|
def version_info(self) -> tuple[int, int, int]:
|
yggdrasil/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.50"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|