ygg 0.1.47__py3-none-any.whl → 0.1.49__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.47.dist-info → ygg-0.1.49.dist-info}/METADATA +3 -29
- {ygg-0.1.47.dist-info → ygg-0.1.49.dist-info}/RECORD +24 -25
- yggdrasil/__init__.py +0 -4
- yggdrasil/databricks/compute/cluster.py +99 -20
- yggdrasil/databricks/compute/execution_context.py +19 -11
- yggdrasil/databricks/compute/remote.py +4 -1
- yggdrasil/databricks/sql/__init__.py +1 -2
- yggdrasil/databricks/sql/exceptions.py +44 -0
- yggdrasil/databricks/sql/statement_result.py +17 -40
- yggdrasil/databricks/workspaces/__init__.py +0 -1
- yggdrasil/databricks/workspaces/io.py +21 -9
- yggdrasil/databricks/workspaces/path.py +9 -5
- yggdrasil/databricks/workspaces/workspace.py +45 -27
- yggdrasil/dataclasses/__init__.py +1 -3
- yggdrasil/dataclasses/dataclass.py +1 -167
- yggdrasil/libs/__init__.py +0 -3
- yggdrasil/pyutils/python_env.py +7 -4
- yggdrasil/requests/__init__.py +2 -3
- yggdrasil/types/cast/cast_options.py +3 -4
- yggdrasil/version.py +1 -1
- yggdrasil/types/libs.py +0 -12
- {ygg-0.1.47.dist-info → ygg-0.1.49.dist-info}/WHEEL +0 -0
- {ygg-0.1.47.dist-info → ygg-0.1.49.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.47.dist-info → ygg-0.1.49.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.47.dist-info → ygg-0.1.49.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ygg
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.49
|
|
4
4
|
Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
|
|
5
5
|
Author: Yggdrasil contributors
|
|
6
6
|
License: Apache License
|
|
@@ -235,6 +235,8 @@ Requires-Dist: pytest-asyncio; extra == "dev"
|
|
|
235
235
|
Requires-Dist: black; extra == "dev"
|
|
236
236
|
Requires-Dist: ruff; extra == "dev"
|
|
237
237
|
Requires-Dist: mypy; extra == "dev"
|
|
238
|
+
Requires-Dist: build; extra == "dev"
|
|
239
|
+
Requires-Dist: twine; extra == "dev"
|
|
238
240
|
Dynamic: license-file
|
|
239
241
|
|
|
240
242
|
# Yggdrasil (Python)
|
|
@@ -270,34 +272,6 @@ Extras are grouped by engine:
|
|
|
270
272
|
- `.[polars]`, `.[pandas]`, `.[spark]`, `.[databricks]` – install only the integrations you need.
|
|
271
273
|
- `.[dev]` – adds testing, linting, and typing tools (`pytest`, `ruff`, `black`, `mypy`).
|
|
272
274
|
|
|
273
|
-
## Quickstart
|
|
274
|
-
Define an Arrow-aware dataclass, coerce inputs, and cast across containers:
|
|
275
|
-
|
|
276
|
-
```python
|
|
277
|
-
from yggdrasil import yggdataclass
|
|
278
|
-
from yggdrasil.types.cast import convert
|
|
279
|
-
from yggdrasil.types import arrow_field_from_hint
|
|
280
|
-
|
|
281
|
-
@yggdataclass
|
|
282
|
-
class User:
|
|
283
|
-
id: int
|
|
284
|
-
email: str
|
|
285
|
-
active: bool = True
|
|
286
|
-
|
|
287
|
-
user = User.__safe_init__("123", email="alice@example.com")
|
|
288
|
-
assert user.id == 123 and user.active is True
|
|
289
|
-
|
|
290
|
-
payload = {"id": "45", "email": "bob@example.com", "active": "false"}
|
|
291
|
-
clean = User.from_dict(payload)
|
|
292
|
-
print(clean.to_dict())
|
|
293
|
-
|
|
294
|
-
field = arrow_field_from_hint(User, name="user")
|
|
295
|
-
print(field) # user: struct<id: int64, email: string, active: bool>
|
|
296
|
-
|
|
297
|
-
numbers = convert(["1", "2", "3"], list[int])
|
|
298
|
-
print(numbers)
|
|
299
|
-
```
|
|
300
|
-
|
|
301
275
|
### Databricks example
|
|
302
276
|
Install the `databricks` extra and run SQL with typed results:
|
|
303
277
|
|
|
@@ -1,28 +1,28 @@
|
|
|
1
|
-
ygg-0.1.
|
|
2
|
-
yggdrasil/__init__.py,sha256=
|
|
3
|
-
yggdrasil/version.py,sha256=
|
|
1
|
+
ygg-0.1.49.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
2
|
+
yggdrasil/__init__.py,sha256=4-ghPak2S6zfMqmnlxW2GCgPb5s79znpKa2hGEGXcE4,24
|
|
3
|
+
yggdrasil/version.py,sha256=pnii9XXudF0U50FobVvNgNzGy9lA9q_DntGQAvyqaFA,22
|
|
4
4
|
yggdrasil/databricks/__init__.py,sha256=skctY2c8W-hI81upx9F_PWRe5ishL3hrdiTuizgDjdw,152
|
|
5
5
|
yggdrasil/databricks/compute/__init__.py,sha256=NvdzmaJSNYY1uJthv1hHdBuNu3bD_-Z65DWnaJt9yXg,289
|
|
6
|
-
yggdrasil/databricks/compute/cluster.py,sha256=
|
|
7
|
-
yggdrasil/databricks/compute/execution_context.py,sha256=
|
|
8
|
-
yggdrasil/databricks/compute/remote.py,sha256=
|
|
6
|
+
yggdrasil/databricks/compute/cluster.py,sha256=YomLfvB0oxbgl6WDgBRxI1UXsxwlEbR6gq3FUbPHscY,44199
|
|
7
|
+
yggdrasil/databricks/compute/execution_context.py,sha256=jIV6uru2NeX3O5lg-3KEqmXtLxxq45CFgkBQgQIIOHQ,23327
|
|
8
|
+
yggdrasil/databricks/compute/remote.py,sha256=yicEhyQypssRa2ByscO36s3cBkEgORFsRME9aaq91Pc,3045
|
|
9
9
|
yggdrasil/databricks/jobs/__init__.py,sha256=snxGSJb0M5I39v0y3IR-uEeSlZR248cQ_4DJ1sYs-h8,154
|
|
10
10
|
yggdrasil/databricks/jobs/config.py,sha256=9LGeHD04hbfy0xt8_6oobC4moKJh4_DTjZiK4Q2Tqjk,11557
|
|
11
|
-
yggdrasil/databricks/sql/__init__.py,sha256=
|
|
11
|
+
yggdrasil/databricks/sql/__init__.py,sha256=Vp_1cFaX1l-JGzCknvkbiB8CBFX2fQbBNntIeVn3lEg,231
|
|
12
12
|
yggdrasil/databricks/sql/engine.py,sha256=K5WmGKpXU78JA3UdK8dLxBD_GXKidZJFe7hytuC5UHg,41029
|
|
13
|
-
yggdrasil/databricks/sql/exceptions.py,sha256=
|
|
14
|
-
yggdrasil/databricks/sql/statement_result.py,sha256=
|
|
13
|
+
yggdrasil/databricks/sql/exceptions.py,sha256=uC-BoG0u0LtORKUS1X3iLID8nc-0TV5MQN3M8RXHsO4,1495
|
|
14
|
+
yggdrasil/databricks/sql/statement_result.py,sha256=kMBvpwyRv3_JUZSvxMS0c9Vqlh6LtCRJvXsDpu9RIAs,16137
|
|
15
15
|
yggdrasil/databricks/sql/types.py,sha256=5G-BM9_eOsRKEMzeDTWUsWW5g4Idvs-czVCpOCrMhdA,6412
|
|
16
16
|
yggdrasil/databricks/sql/warehouse.py,sha256=1J0dyQLJb-OS1_1xU1eAVZ4CoL2-FhFeowKSvU3RzFc,9773
|
|
17
|
-
yggdrasil/databricks/workspaces/__init__.py,sha256=
|
|
17
|
+
yggdrasil/databricks/workspaces/__init__.py,sha256=dv2zotoFVhNFlTCdRq6gwf5bEzeZkOZszoNZMs0k59g,114
|
|
18
18
|
yggdrasil/databricks/workspaces/filesytem.py,sha256=Z8JXU7_XUEbw9fpTQT1avRQKi-IAP2KemXBMPkUoY4w,9805
|
|
19
|
-
yggdrasil/databricks/workspaces/io.py,sha256=
|
|
20
|
-
yggdrasil/databricks/workspaces/path.py,sha256
|
|
19
|
+
yggdrasil/databricks/workspaces/io.py,sha256=CDq9NsYFjlSJ1QbKFlfWvZLQPVoWyZ4b3XR_lxNPcZE,32776
|
|
20
|
+
yggdrasil/databricks/workspaces/path.py,sha256=BxDwxE7q1-NLKEZQT4xLM3LeCeQKO3wUy7R-Ce-cSMk,47875
|
|
21
21
|
yggdrasil/databricks/workspaces/path_kind.py,sha256=Xc319NysH8_6E9C0Q8nCxDHYG07_SnzyUVKHe0dNdDQ,305
|
|
22
|
-
yggdrasil/databricks/workspaces/workspace.py,sha256=
|
|
23
|
-
yggdrasil/dataclasses/__init__.py,sha256=
|
|
24
|
-
yggdrasil/dataclasses/dataclass.py,sha256=
|
|
25
|
-
yggdrasil/libs/__init__.py,sha256=
|
|
22
|
+
yggdrasil/databricks/workspaces/workspace.py,sha256=zBlQdYNT_xKwUCYo3O4Q4g-8pfMvff3I26efyCfY_TY,24961
|
|
23
|
+
yggdrasil/dataclasses/__init__.py,sha256=_RkhfF3KC1eSORby1dzvBXQ0-UGG3u6wyUQWX2jq1Pc,108
|
|
24
|
+
yggdrasil/dataclasses/dataclass.py,sha256=LxrCjwvmBnb8yRI_N-c31RHHxB4XoJPixmKg9iBIuaI,1148
|
|
25
|
+
yggdrasil/libs/__init__.py,sha256=zdC9OU0Xy36CLY9mg2drxN6S7isPR8aTLzJA6xVIeLE,91
|
|
26
26
|
yggdrasil/libs/databrickslib.py,sha256=NHJeUViHhZc8LI5oDVfi1axRyUy_pDJLy4hjD0KZEBQ,980
|
|
27
27
|
yggdrasil/libs/pandaslib.py,sha256=Edm3SXgvr8qe2wsojuRvD1ewNB-Sff0RWoTqaddVruI,509
|
|
28
28
|
yggdrasil/libs/polarslib.py,sha256=7EWP5iS8F9cW79M6d8Yg5ysjnOY3w4_k7TW-5DCRACw,511
|
|
@@ -37,18 +37,17 @@ yggdrasil/pyutils/exceptions.py,sha256=ssKNm-rjhavHUOZmGA7_1Gq9tSHDrb2EFI-cnBuWg
|
|
|
37
37
|
yggdrasil/pyutils/expiring_dict.py,sha256=pr2u25LGwPVbLfsLptiHGovUtYRRo0AMjaJtCtJl7nQ,8477
|
|
38
38
|
yggdrasil/pyutils/modules.py,sha256=B7IP99YqUMW6-DIESFzBx8-09V1d0a8qrIJUDFhhL2g,11424
|
|
39
39
|
yggdrasil/pyutils/parallel.py,sha256=ubuq2m9dJzWYUyKCga4Y_9bpaeMYUrleYxdp49CHr44,6781
|
|
40
|
-
yggdrasil/pyutils/python_env.py,sha256=
|
|
40
|
+
yggdrasil/pyutils/python_env.py,sha256=Gh5geFK9ABpyWEfyegGUfIJUoPxKwcH0pqLBiMrW9Rw,51103
|
|
41
41
|
yggdrasil/pyutils/retry.py,sha256=n5sr-Zu7fYrdLbjJ4WifK2lk0gEGmHv5FYt2HaCm1Qc,11916
|
|
42
|
-
yggdrasil/requests/__init__.py,sha256=
|
|
42
|
+
yggdrasil/requests/__init__.py,sha256=dMesyzq97_DmI765x0TwaDPEfsxFtgGNgchk8LvEN-o,103
|
|
43
43
|
yggdrasil/requests/msal.py,sha256=s2GCyzbgFdgdlJ1JqMrZ4qYVbmoG46-ZOTcaVQhZ-sQ,9220
|
|
44
44
|
yggdrasil/requests/session.py,sha256=SLnrgHY0Lby7ZxclRFUjHdfM8euN_8bSQEWl7TkJY2U,1461
|
|
45
45
|
yggdrasil/types/__init__.py,sha256=CrLiDeYNM9fO975sE5ufeVKcy7Ca702IsaG2Pk8T3YU,139
|
|
46
|
-
yggdrasil/types/libs.py,sha256=2iRT9JDUdr9seuGz9ZR3wWdrxZ8LRnc9i-m_tkKdKgI,293
|
|
47
46
|
yggdrasil/types/python_arrow.py,sha256=mOhyecAxa5u8JWsyTO26OMOWimHHgwLKWlkNSAyIVas,25636
|
|
48
47
|
yggdrasil/types/python_defaults.py,sha256=GO3hZBZcwRHs9qiXes75y8l5X00kZHTfEC7el_x73uw,10184
|
|
49
48
|
yggdrasil/types/cast/__init__.py,sha256=Oft3pTs2bRM5hT7YqJAuOKTYYk-SACLaMOXUVdafy_I,311
|
|
50
49
|
yggdrasil/types/cast/arrow_cast.py,sha256=_OMYc4t5GlgE4ztlWaCoK8Jnba09rgDbmHVP-QXhOL0,41523
|
|
51
|
-
yggdrasil/types/cast/cast_options.py,sha256=
|
|
50
|
+
yggdrasil/types/cast/cast_options.py,sha256=nDaEvCCs7TBamhTWyDrYf3LVaBWzioIP2Q5_LXrChF4,15532
|
|
52
51
|
yggdrasil/types/cast/pandas_cast.py,sha256=I3xu0sZ59ZbK3NDcQ2dslzdeKzhpFV5zR02ZEixd5hI,8713
|
|
53
52
|
yggdrasil/types/cast/polars_cast.py,sha256=K2nnQ7bexArneYEhUPgV_6er4JNq6N5RmbMUhw-2_Xw,28766
|
|
54
53
|
yggdrasil/types/cast/polars_pandas_cast.py,sha256=CS0P7teVv15IdX5g7v40RfkH1VMg6b-HM0V_gOfacm8,5071
|
|
@@ -56,8 +55,8 @@ yggdrasil/types/cast/registry.py,sha256=_zdFGmUBB7P-e_LIcJlOxMcxAkXoA-UXB6HqLMgT
|
|
|
56
55
|
yggdrasil/types/cast/spark_cast.py,sha256=_KAsl1DqmKMSfWxqhVE7gosjYdgiL1C5bDQv6eP3HtA,24926
|
|
57
56
|
yggdrasil/types/cast/spark_pandas_cast.py,sha256=BuTiWrdCANZCdD_p2MAytqm74eq-rdRXd-LGojBRrfU,5023
|
|
58
57
|
yggdrasil/types/cast/spark_polars_cast.py,sha256=btmZNHXn2NSt3fUuB4xg7coaE0RezIBdZD92H8NK0Jw,9073
|
|
59
|
-
ygg-0.1.
|
|
60
|
-
ygg-0.1.
|
|
61
|
-
ygg-0.1.
|
|
62
|
-
ygg-0.1.
|
|
63
|
-
ygg-0.1.
|
|
58
|
+
ygg-0.1.49.dist-info/METADATA,sha256=CHTqeVyiYa1868ZDwISDHKyXYxPeUH0mHhvHLYYoDbg,18528
|
|
59
|
+
ygg-0.1.49.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
60
|
+
ygg-0.1.49.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
|
|
61
|
+
ygg-0.1.49.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
|
|
62
|
+
ygg-0.1.49.dist-info/RECORD,,
|
yggdrasil/__init__.py
CHANGED
|
@@ -22,8 +22,8 @@ from typing import Any, Iterator, Optional, Union, List, Callable, Dict, ClassVa
|
|
|
22
22
|
|
|
23
23
|
from .execution_context import ExecutionContext
|
|
24
24
|
from ..workspaces.workspace import WorkspaceService, Workspace
|
|
25
|
-
from ... import CallableSerde
|
|
26
25
|
from ...libs.databrickslib import databricks_sdk
|
|
26
|
+
from ...pyutils.callable_serde import CallableSerde
|
|
27
27
|
from ...pyutils.equality import dicts_equal, dict_diff
|
|
28
28
|
from ...pyutils.expiring_dict import ExpiringDict
|
|
29
29
|
from ...pyutils.modules import PipIndexSettings
|
|
@@ -36,7 +36,8 @@ else: # pragma: no cover - runtime fallback when SDK is missing
|
|
|
36
36
|
from databricks.sdk.errors import DatabricksError
|
|
37
37
|
from databricks.sdk.errors.platform import ResourceDoesNotExist
|
|
38
38
|
from databricks.sdk.service.compute import (
|
|
39
|
-
ClusterDetails, Language, Kind, State, DataSecurityMode, Library, PythonPyPiLibrary, LibraryInstallStatus
|
|
39
|
+
ClusterDetails, Language, Kind, State, DataSecurityMode, Library, PythonPyPiLibrary, LibraryInstallStatus,
|
|
40
|
+
ClusterAccessControlRequest, ClusterPermissionLevel
|
|
40
41
|
)
|
|
41
42
|
from databricks.sdk.service.compute import SparkVersion, RuntimeEngine
|
|
42
43
|
|
|
@@ -143,6 +144,7 @@ class Cluster(WorkspaceService):
|
|
|
143
144
|
single_user_name: Optional[str] = None,
|
|
144
145
|
runtime_engine: Optional["RuntimeEngine"] = None,
|
|
145
146
|
libraries: Optional[list[str]] = None,
|
|
147
|
+
update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
|
|
146
148
|
**kwargs
|
|
147
149
|
) -> "Cluster":
|
|
148
150
|
"""Create or reuse a cluster that mirrors the current Python environment.
|
|
@@ -151,9 +153,10 @@ class Cluster(WorkspaceService):
|
|
|
151
153
|
workspace: Workspace to use for the cluster.
|
|
152
154
|
cluster_id: Optional cluster id to reuse.
|
|
153
155
|
cluster_name: Optional cluster name to reuse.
|
|
154
|
-
single_user_name: Optional
|
|
156
|
+
single_user_name: Optional username for single-user clusters.
|
|
155
157
|
runtime_engine: Optional Databricks runtime engine.
|
|
156
158
|
libraries: Optional list of libraries to install.
|
|
159
|
+
update_timeout: wait timeout, if None it will not wait completion
|
|
157
160
|
**kwargs: Additional cluster specification overrides.
|
|
158
161
|
|
|
159
162
|
Returns:
|
|
@@ -175,6 +178,7 @@ class Cluster(WorkspaceService):
|
|
|
175
178
|
single_user_name=single_user_name,
|
|
176
179
|
runtime_engine=runtime_engine,
|
|
177
180
|
libraries=libraries,
|
|
181
|
+
update_timeout=update_timeout,
|
|
178
182
|
**kwargs
|
|
179
183
|
)
|
|
180
184
|
)
|
|
@@ -189,6 +193,7 @@ class Cluster(WorkspaceService):
|
|
|
189
193
|
single_user_name: Optional[str] = "current",
|
|
190
194
|
runtime_engine: Optional["RuntimeEngine"] = None,
|
|
191
195
|
libraries: Optional[list[str]] = None,
|
|
196
|
+
update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
|
|
192
197
|
**kwargs
|
|
193
198
|
) -> "Cluster":
|
|
194
199
|
"""Create/update a cluster to match the local Python environment.
|
|
@@ -197,9 +202,10 @@ class Cluster(WorkspaceService):
|
|
|
197
202
|
source: Optional PythonEnv to mirror (defaults to current).
|
|
198
203
|
cluster_id: Optional cluster id to update.
|
|
199
204
|
cluster_name: Optional cluster name to update.
|
|
200
|
-
single_user_name: Optional single
|
|
205
|
+
single_user_name: Optional single username for the cluster.
|
|
201
206
|
runtime_engine: Optional runtime engine selection.
|
|
202
207
|
libraries: Optional list of libraries to install.
|
|
208
|
+
update_timeout: wait timeout, if None it will not wait completion
|
|
203
209
|
**kwargs: Additional cluster specification overrides.
|
|
204
210
|
|
|
205
211
|
Returns:
|
|
@@ -241,6 +247,7 @@ class Cluster(WorkspaceService):
|
|
|
241
247
|
single_user_name=single_user_name,
|
|
242
248
|
runtime_engine=runtime_engine or RuntimeEngine.PHOTON,
|
|
243
249
|
libraries=libraries,
|
|
250
|
+
update_timeout=update_timeout,
|
|
244
251
|
**kwargs
|
|
245
252
|
)
|
|
246
253
|
|
|
@@ -379,7 +386,9 @@ class Cluster(WorkspaceService):
|
|
|
379
386
|
start = time.time()
|
|
380
387
|
sleep_time = tick
|
|
381
388
|
|
|
382
|
-
if
|
|
389
|
+
if not timeout:
|
|
390
|
+
timeout = 20 * 60.0
|
|
391
|
+
elif isinstance(timeout, dt.timedelta):
|
|
383
392
|
timeout = timeout.total_seconds()
|
|
384
393
|
|
|
385
394
|
while self.is_pending:
|
|
@@ -411,12 +420,14 @@ class Cluster(WorkspaceService):
|
|
|
411
420
|
# Extract "major.minor" from strings like "17.3.x-scala2.13-ml-gpu"
|
|
412
421
|
v = self.spark_version
|
|
413
422
|
|
|
414
|
-
if v
|
|
423
|
+
if not v:
|
|
415
424
|
return None
|
|
416
425
|
|
|
417
426
|
parts = v.split(".")
|
|
427
|
+
|
|
418
428
|
if len(parts) < 2:
|
|
419
429
|
return None
|
|
430
|
+
|
|
420
431
|
return ".".join(parts[:2]) # e.g. "17.3"
|
|
421
432
|
|
|
422
433
|
@property
|
|
@@ -427,8 +438,10 @@ class Cluster(WorkspaceService):
|
|
|
427
438
|
When the runtime can't be mapped, returns ``None``.
|
|
428
439
|
"""
|
|
429
440
|
v = self.runtime_version
|
|
430
|
-
|
|
441
|
+
|
|
442
|
+
if not v:
|
|
431
443
|
return None
|
|
444
|
+
|
|
432
445
|
return _PYTHON_BY_DBR.get(v)
|
|
433
446
|
|
|
434
447
|
# ------------------------------------------------------------------ #
|
|
@@ -585,6 +598,7 @@ class Cluster(WorkspaceService):
|
|
|
585
598
|
cluster_id: Optional[str] = None,
|
|
586
599
|
cluster_name: Optional[str] = None,
|
|
587
600
|
libraries: Optional[List[Union[str, "Library"]]] = None,
|
|
601
|
+
update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
|
|
588
602
|
**cluster_spec: Any
|
|
589
603
|
):
|
|
590
604
|
"""Create a new cluster or update an existing one.
|
|
@@ -593,6 +607,7 @@ class Cluster(WorkspaceService):
|
|
|
593
607
|
cluster_id: Optional cluster id to update.
|
|
594
608
|
cluster_name: Optional cluster name to update or create.
|
|
595
609
|
libraries: Optional libraries to install.
|
|
610
|
+
update_timeout: wait timeout, if None it will not wait completion
|
|
596
611
|
**cluster_spec: Cluster specification overrides.
|
|
597
612
|
|
|
598
613
|
Returns:
|
|
@@ -608,24 +623,28 @@ class Cluster(WorkspaceService):
|
|
|
608
623
|
return found.update(
|
|
609
624
|
cluster_name=cluster_name,
|
|
610
625
|
libraries=libraries,
|
|
626
|
+
wait_timeout=update_timeout,
|
|
611
627
|
**cluster_spec
|
|
612
628
|
)
|
|
613
629
|
|
|
614
630
|
return self.create(
|
|
615
631
|
cluster_name=cluster_name,
|
|
616
632
|
libraries=libraries,
|
|
633
|
+
wait_timeout=update_timeout,
|
|
617
634
|
**cluster_spec
|
|
618
635
|
)
|
|
619
636
|
|
|
620
637
|
def create(
|
|
621
638
|
self,
|
|
622
639
|
libraries: Optional[List[Union[str, "Library"]]] = None,
|
|
640
|
+
wait_timeout: Union[float, dt.timedelta] = dt.timedelta(minutes=20),
|
|
623
641
|
**cluster_spec: Any
|
|
624
642
|
) -> str:
|
|
625
643
|
"""Create a new cluster and optionally install libraries.
|
|
626
644
|
|
|
627
645
|
Args:
|
|
628
646
|
libraries: Optional list of libraries to install after creation.
|
|
647
|
+
wait_timeout: wait timeout, if None it will not wait completion
|
|
629
648
|
**cluster_spec: Cluster specification overrides.
|
|
630
649
|
|
|
631
650
|
Returns:
|
|
@@ -645,27 +664,32 @@ class Cluster(WorkspaceService):
|
|
|
645
664
|
update_details,
|
|
646
665
|
)
|
|
647
666
|
|
|
648
|
-
self.details = self.clusters_client().
|
|
667
|
+
self.details = self.clusters_client().create(**update_details)
|
|
649
668
|
|
|
650
669
|
LOGGER.info(
|
|
651
670
|
"Created %s",
|
|
652
671
|
self
|
|
653
672
|
)
|
|
654
673
|
|
|
655
|
-
self.install_libraries(libraries=libraries, raise_error=False)
|
|
674
|
+
self.install_libraries(libraries=libraries, raise_error=False, wait_timeout=None)
|
|
675
|
+
|
|
676
|
+
if wait_timeout:
|
|
677
|
+
self.wait_for_status(timeout=wait_timeout)
|
|
656
678
|
|
|
657
679
|
return self
|
|
658
680
|
|
|
659
681
|
def update(
|
|
660
682
|
self,
|
|
661
683
|
libraries: Optional[List[Union[str, "Library"]]] = None,
|
|
662
|
-
|
|
684
|
+
access_control_list: Optional[List["ClusterAccessControlRequest"]] = None,
|
|
685
|
+
wait_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
|
|
663
686
|
**cluster_spec: Any
|
|
664
687
|
) -> "Cluster":
|
|
665
688
|
"""Update cluster configuration and optionally install libraries.
|
|
666
689
|
|
|
667
690
|
Args:
|
|
668
691
|
libraries: Optional libraries to install.
|
|
692
|
+
access_control_list: List of permissions
|
|
669
693
|
wait_timeout: waiting timeout until done, if None it does not wait
|
|
670
694
|
**cluster_spec: Cluster specification overrides.
|
|
671
695
|
|
|
@@ -705,8 +729,9 @@ class Cluster(WorkspaceService):
|
|
|
705
729
|
self, diff
|
|
706
730
|
)
|
|
707
731
|
|
|
708
|
-
self.wait_for_status()
|
|
732
|
+
self.wait_for_status(timeout=wait_timeout)
|
|
709
733
|
self.clusters_client().edit(**update_details)
|
|
734
|
+
self.update_permissions(access_control_list=access_control_list)
|
|
710
735
|
|
|
711
736
|
LOGGER.info(
|
|
712
737
|
"Updated %s",
|
|
@@ -718,6 +743,56 @@ class Cluster(WorkspaceService):
|
|
|
718
743
|
|
|
719
744
|
return self
|
|
720
745
|
|
|
746
|
+
def update_permissions(
|
|
747
|
+
self,
|
|
748
|
+
access_control_list: Optional[List["ClusterAccessControlRequest"]] = None,
|
|
749
|
+
):
|
|
750
|
+
if not access_control_list:
|
|
751
|
+
return self
|
|
752
|
+
|
|
753
|
+
access_control_list = self._check_permission(access_control_list)
|
|
754
|
+
|
|
755
|
+
self.clusters_client().update_permissions(
|
|
756
|
+
cluster_id=self.cluster_id,
|
|
757
|
+
access_control_list=access_control_list
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
def default_permissions(self):
|
|
761
|
+
current_groups = self.current_user.groups or []
|
|
762
|
+
|
|
763
|
+
return [
|
|
764
|
+
ClusterAccessControlRequest(
|
|
765
|
+
group_name=name,
|
|
766
|
+
permission_level=ClusterPermissionLevel.CAN_MANAGE
|
|
767
|
+
)
|
|
768
|
+
for name in current_groups
|
|
769
|
+
if name not in {"users"}
|
|
770
|
+
]
|
|
771
|
+
|
|
772
|
+
def _check_permission(
|
|
773
|
+
self,
|
|
774
|
+
permission: Union[str, "ClusterAccessControlRequest", List[Union[str, "ClusterAccessControlRequest"]]],
|
|
775
|
+
):
|
|
776
|
+
if isinstance(permission, ClusterAccessControlRequest):
|
|
777
|
+
return permission
|
|
778
|
+
|
|
779
|
+
if isinstance(permission, str):
|
|
780
|
+
if "@" in permission:
|
|
781
|
+
group_name, user_name = None, permission
|
|
782
|
+
else:
|
|
783
|
+
group_name, user_name = permission, None
|
|
784
|
+
|
|
785
|
+
return ClusterAccessControlRequest(
|
|
786
|
+
group_name=group_name,
|
|
787
|
+
user_name=user_name,
|
|
788
|
+
permission_level=ClusterPermissionLevel.CAN_MANAGE
|
|
789
|
+
)
|
|
790
|
+
|
|
791
|
+
return [
|
|
792
|
+
self._check_permission(_)
|
|
793
|
+
for _ in permission
|
|
794
|
+
]
|
|
795
|
+
|
|
721
796
|
def list_clusters(self) -> Iterator["Cluster"]:
|
|
722
797
|
"""Iterate clusters, yielding helpers annotated with metadata.
|
|
723
798
|
|
|
@@ -809,18 +884,22 @@ class Cluster(WorkspaceService):
|
|
|
809
884
|
Returns:
|
|
810
885
|
The current Cluster instance.
|
|
811
886
|
"""
|
|
887
|
+
if self.is_running:
|
|
888
|
+
return self
|
|
889
|
+
|
|
812
890
|
self.wait_for_status()
|
|
813
891
|
|
|
814
|
-
if
|
|
815
|
-
|
|
892
|
+
if self.is_running:
|
|
893
|
+
return self
|
|
816
894
|
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
else:
|
|
821
|
-
self.clusters_client().start(cluster_id=self.cluster_id)
|
|
895
|
+
LOGGER.debug("Starting %s", self)
|
|
896
|
+
|
|
897
|
+
self.clusters_client().start(cluster_id=self.cluster_id)
|
|
822
898
|
|
|
823
|
-
|
|
899
|
+
LOGGER.info("Started %s", self)
|
|
900
|
+
|
|
901
|
+
if wait_timeout:
|
|
902
|
+
self.wait_for_status(timeout=wait_timeout.total_seconds())
|
|
824
903
|
|
|
825
904
|
return self
|
|
826
905
|
|
|
@@ -836,7 +915,7 @@ class Cluster(WorkspaceService):
|
|
|
836
915
|
|
|
837
916
|
if self.is_running:
|
|
838
917
|
self.details = self.clusters_client().restart_and_wait(cluster_id=self.cluster_id)
|
|
839
|
-
return self
|
|
918
|
+
return self
|
|
840
919
|
|
|
841
920
|
return self.start()
|
|
842
921
|
|
|
@@ -180,7 +180,7 @@ print(json.dumps(meta))"""
|
|
|
180
180
|
"""
|
|
181
181
|
return self.cluster.workspace.sdk()
|
|
182
182
|
|
|
183
|
-
def
|
|
183
|
+
def create_command(
|
|
184
184
|
self,
|
|
185
185
|
language: "Language",
|
|
186
186
|
) -> any:
|
|
@@ -192,17 +192,29 @@ print(json.dumps(meta))"""
|
|
|
192
192
|
Returns:
|
|
193
193
|
The created command execution context response.
|
|
194
194
|
"""
|
|
195
|
-
self.cluster.ensure_running()
|
|
196
|
-
|
|
197
195
|
LOGGER.debug(
|
|
198
196
|
"Creating Databricks command execution context for %s",
|
|
199
197
|
self.cluster
|
|
200
198
|
)
|
|
201
199
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
200
|
+
try:
|
|
201
|
+
created = self._workspace_client().command_execution.create_and_wait(
|
|
202
|
+
cluster_id=self.cluster.cluster_id,
|
|
203
|
+
language=language,
|
|
204
|
+
)
|
|
205
|
+
except:
|
|
206
|
+
self.cluster.ensure_running()
|
|
207
|
+
|
|
208
|
+
created = self._workspace_client().command_execution.create_and_wait(
|
|
209
|
+
cluster_id=self.cluster.cluster_id,
|
|
210
|
+
language=language,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
LOGGER.info(
|
|
214
|
+
"Created Databricks command execution context %s",
|
|
215
|
+
self
|
|
205
216
|
)
|
|
217
|
+
|
|
206
218
|
created = getattr(created, "response", created)
|
|
207
219
|
|
|
208
220
|
return created
|
|
@@ -220,10 +232,6 @@ print(json.dumps(meta))"""
|
|
|
220
232
|
The connected ExecutionContext instance.
|
|
221
233
|
"""
|
|
222
234
|
if self.context_id is not None:
|
|
223
|
-
LOGGER.debug(
|
|
224
|
-
"Execution context already open for %s",
|
|
225
|
-
self
|
|
226
|
-
)
|
|
227
235
|
return self
|
|
228
236
|
|
|
229
237
|
self.language = language or self.language
|
|
@@ -231,7 +239,7 @@ print(json.dumps(meta))"""
|
|
|
231
239
|
if self.language is None:
|
|
232
240
|
self.language = Language.PYTHON
|
|
233
241
|
|
|
234
|
-
ctx = self.
|
|
242
|
+
ctx = self.create_command(language=self.language)
|
|
235
243
|
|
|
236
244
|
context_id = ctx.id
|
|
237
245
|
if not context_id:
|
|
@@ -39,6 +39,7 @@ def databricks_remote_compute(
|
|
|
39
39
|
timeout: Optional[dt.timedelta] = None,
|
|
40
40
|
env_keys: Optional[List[str]] = None,
|
|
41
41
|
force_local: bool = False,
|
|
42
|
+
update_timeout: Optional[Union[float, dt.timedelta]] = None,
|
|
42
43
|
**options
|
|
43
44
|
) -> Callable[[Callable[..., ReturnType]], Callable[..., ReturnType]]:
|
|
44
45
|
"""Return a decorator that executes functions on a remote cluster.
|
|
@@ -52,6 +53,7 @@ def databricks_remote_compute(
|
|
|
52
53
|
timeout: Optional execution timeout for remote calls.
|
|
53
54
|
env_keys: Optional environment variable names to forward.
|
|
54
55
|
force_local: Force local execution
|
|
56
|
+
update_timeout: creation or update wait timeout
|
|
55
57
|
**options: Extra options forwarded to the execution decorator.
|
|
56
58
|
|
|
57
59
|
Returns:
|
|
@@ -82,7 +84,8 @@ def databricks_remote_compute(
|
|
|
82
84
|
cluster = workspace.clusters().replicated_current_environment(
|
|
83
85
|
workspace=workspace,
|
|
84
86
|
cluster_name=cluster_name,
|
|
85
|
-
single_user_name=workspace.current_user.user_name
|
|
87
|
+
single_user_name=workspace.current_user.user_name,
|
|
88
|
+
update_timeout=update_timeout
|
|
86
89
|
)
|
|
87
90
|
|
|
88
91
|
cluster.ensure_running(wait_timeout=None)
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
"""Databricks SQL helpers and engine wrappers."""
|
|
2
2
|
|
|
3
3
|
from .engine import SQLEngine, StatementResult
|
|
4
|
+
from .exceptions import SqlStatementError
|
|
4
5
|
|
|
5
6
|
# Backwards compatibility
|
|
6
7
|
DBXSQL = SQLEngine
|
|
7
8
|
DBXStatementResult = StatementResult
|
|
8
|
-
|
|
9
|
-
__all__ = ["SQLEngine", "StatementResult"]
|
|
@@ -1 +1,45 @@
|
|
|
1
1
|
"""Custom exceptions for Databricks SQL helpers."""
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Optional, Any
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"SqlStatementError"
|
|
7
|
+
]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class SqlStatementError(RuntimeError):
|
|
12
|
+
statement_id: str
|
|
13
|
+
state: str
|
|
14
|
+
message: str
|
|
15
|
+
error_code: Optional[str] = None
|
|
16
|
+
sql_state: Optional[str] = None
|
|
17
|
+
|
|
18
|
+
def __str__(self) -> str:
|
|
19
|
+
meta = []
|
|
20
|
+
if self.error_code:
|
|
21
|
+
meta.append(f"code={self.error_code}")
|
|
22
|
+
if self.sql_state:
|
|
23
|
+
meta.append(f"state={self.sql_state}")
|
|
24
|
+
|
|
25
|
+
meta_str = f" ({', '.join(meta)})" if meta else ""
|
|
26
|
+
return f"SQL statement {self.statement_id} failed [{self.state}]: {self.message}{meta_str}"
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def from_statement(cls, stmt: Any) -> "SqlStatementError":
|
|
30
|
+
statement_id = getattr(stmt, "statement_id", "<unknown>")
|
|
31
|
+
state = getattr(stmt, "state", "<unknown>")
|
|
32
|
+
|
|
33
|
+
err = getattr(getattr(stmt, "status", None), "error", None)
|
|
34
|
+
|
|
35
|
+
message = getattr(err, "message", None) or "Unknown SQL error"
|
|
36
|
+
error_code = getattr(err, "error_code", None)
|
|
37
|
+
sql_state = getattr(err, "sql_state", None)
|
|
38
|
+
|
|
39
|
+
return cls(
|
|
40
|
+
statement_id=str(statement_id),
|
|
41
|
+
state=str(state),
|
|
42
|
+
message=str(message),
|
|
43
|
+
error_code=str(error_code) if error_code is not None else None,
|
|
44
|
+
sql_state=str(sql_state) if sql_state is not None else None,
|
|
45
|
+
)
|
|
@@ -9,6 +9,7 @@ from typing import Optional, Iterator, TYPE_CHECKING
|
|
|
9
9
|
import pyarrow as pa
|
|
10
10
|
import pyarrow.ipc as pipc
|
|
11
11
|
|
|
12
|
+
from .exceptions import SqlStatementError
|
|
12
13
|
from .types import column_info_to_arrow_field
|
|
13
14
|
from ...libs.databrickslib import databricks_sdk
|
|
14
15
|
from ...libs.pandaslib import pandas
|
|
@@ -32,9 +33,7 @@ except ImportError:
|
|
|
32
33
|
if databricks_sdk is not None:
|
|
33
34
|
from databricks.sdk.service.sql import (
|
|
34
35
|
StatementState, StatementResponse, Disposition, StatementStatus
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
StatementResponse = StatementResponse
|
|
36
|
+
)
|
|
38
37
|
else:
|
|
39
38
|
class StatementResponse:
|
|
40
39
|
pass
|
|
@@ -299,28 +298,8 @@ class StatementResult:
|
|
|
299
298
|
)
|
|
300
299
|
|
|
301
300
|
def raise_for_status(self):
|
|
302
|
-
"""Raise a ValueError if the statement failed.
|
|
303
|
-
|
|
304
|
-
Returns:
|
|
305
|
-
None.
|
|
306
|
-
"""
|
|
307
301
|
if self.failed:
|
|
308
|
-
|
|
309
|
-
err = self.status.error
|
|
310
|
-
message = err.message or "Unknown SQL error"
|
|
311
|
-
error_code = err.error_code
|
|
312
|
-
sql_state = getattr(err, "sql_state", None)
|
|
313
|
-
|
|
314
|
-
parts = [message]
|
|
315
|
-
if error_code:
|
|
316
|
-
parts.append(f"error_code={error_code}")
|
|
317
|
-
if sql_state:
|
|
318
|
-
parts.append(f"sql_state={sql_state}")
|
|
319
|
-
|
|
320
|
-
raise ValueError(
|
|
321
|
-
f"Statement {self.statement_id} {self.state}: " + " | ".join(parts)
|
|
322
|
-
)
|
|
323
|
-
|
|
302
|
+
raise SqlStatementError.from_statement(self)
|
|
324
303
|
return self
|
|
325
304
|
|
|
326
305
|
def wait(
|
|
@@ -337,22 +316,20 @@ class StatementResult:
|
|
|
337
316
|
Returns:
|
|
338
317
|
The current StatementResult instance.
|
|
339
318
|
"""
|
|
340
|
-
if self.done:
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
)
|
|
353
|
-
|
|
354
|
-
poll_interval = max(10, poll_interval * 1.2)
|
|
355
|
-
time.sleep(poll_interval)
|
|
319
|
+
if not self.done:
|
|
320
|
+
start = time.time()
|
|
321
|
+
poll_interval = poll_interval or 1
|
|
322
|
+
|
|
323
|
+
while not self.done:
|
|
324
|
+
# still running / queued / pending
|
|
325
|
+
if timeout is not None and (time.time() - start) > timeout:
|
|
326
|
+
raise TimeoutError(
|
|
327
|
+
f"Statement {self.statement_id} did not finish within {timeout} seconds "
|
|
328
|
+
f"(last state={self.state})"
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
poll_interval = max(10, poll_interval * 1.2)
|
|
332
|
+
time.sleep(poll_interval)
|
|
356
333
|
|
|
357
334
|
self.raise_for_status()
|
|
358
335
|
|
|
@@ -975,28 +975,40 @@ class DatabricksVolumeIO(DatabricksIO):
|
|
|
975
975
|
"""Read bytes from a volume file.
|
|
976
976
|
|
|
977
977
|
Args:
|
|
978
|
-
start: Starting byte offset.
|
|
978
|
+
start: Starting byte offset (0-based).
|
|
979
979
|
length: Number of bytes to read.
|
|
980
980
|
allow_not_found: Whether to suppress missing-path errors.
|
|
981
981
|
|
|
982
982
|
Returns:
|
|
983
983
|
Bytes read from the file.
|
|
984
984
|
"""
|
|
985
|
-
if length
|
|
985
|
+
if length <= 0:
|
|
986
986
|
return b""
|
|
987
|
+
if start < 0:
|
|
988
|
+
raise ValueError(f"start must be >= 0, got {start}")
|
|
989
|
+
if length < 0:
|
|
990
|
+
raise ValueError(f"length must be >= 0, got {length}")
|
|
987
991
|
|
|
988
992
|
sdk = self.workspace.sdk()
|
|
989
993
|
client = sdk.files
|
|
990
994
|
full_path = self.path.files_full_path()
|
|
991
995
|
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
.
|
|
997
|
-
|
|
996
|
+
try:
|
|
997
|
+
resp = client.download(full_path)
|
|
998
|
+
except Exception as e:
|
|
999
|
+
# Databricks SDK exceptions vary a bit by version; keep it pragmatic.
|
|
1000
|
+
if allow_not_found and any(s in str(e).lower() for s in ("not found", "does not exist", "404")):
|
|
1001
|
+
return b""
|
|
1002
|
+
raise
|
|
1003
|
+
|
|
1004
|
+
data = resp.contents.read()
|
|
998
1005
|
|
|
999
|
-
return
|
|
1006
|
+
# If start is past EOF, return empty (common file-like behavior).
|
|
1007
|
+
if start >= len(data):
|
|
1008
|
+
return b""
|
|
1009
|
+
|
|
1010
|
+
end = start + length
|
|
1011
|
+
return data[start:end]
|
|
1000
1012
|
|
|
1001
1013
|
def write_all_bytes(self, data: bytes):
|
|
1002
1014
|
"""Write bytes to a volume file.
|
|
@@ -494,11 +494,15 @@ class DatabricksPath:
|
|
|
494
494
|
|
|
495
495
|
try:
|
|
496
496
|
info = sdk.files.get_directory_metadata(full_path)
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
497
|
+
|
|
498
|
+
if info is None:
|
|
499
|
+
mtime = dt.datetime.now(tz=dt.timezone.utc)
|
|
500
|
+
else:
|
|
501
|
+
mtime = (
|
|
502
|
+
dt.datetime.strptime(info.last_modified, "%a, %d %b %Y %H:%M:%S %Z").replace(tzinfo=dt.timezone.utc)
|
|
503
|
+
if info.last_modified
|
|
504
|
+
else None
|
|
505
|
+
)
|
|
502
506
|
|
|
503
507
|
return self.reset_metadata(is_file=False, is_dir=True, size=info, mtime=mtime)
|
|
504
508
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
@@ -8,7 +8,6 @@ from abc import ABC
|
|
|
8
8
|
from dataclasses import dataclass
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
from typing import (
|
|
11
|
-
Any,
|
|
12
11
|
BinaryIO,
|
|
13
12
|
Iterator,
|
|
14
13
|
Optional,
|
|
@@ -55,7 +54,9 @@ def _get_env_product_version():
|
|
|
55
54
|
v = os.getenv("DATABRICKS_PRODUCT_VERSION")
|
|
56
55
|
|
|
57
56
|
if not v:
|
|
58
|
-
|
|
57
|
+
if _get_env_product() == "yggdrasil":
|
|
58
|
+
return YGGDRASIL_VERSION
|
|
59
|
+
return None
|
|
59
60
|
return v.strip().lower()
|
|
60
61
|
|
|
61
62
|
|
|
@@ -106,11 +107,12 @@ class Workspace:
|
|
|
106
107
|
product: Optional[str] = dataclasses.field(default_factory=_get_env_product, repr=False)
|
|
107
108
|
product_version: Optional[str] = dataclasses.field(default_factory=_get_env_product_version, repr=False)
|
|
108
109
|
product_tag: Optional[str] = dataclasses.field(default_factory=_get_env_product_tag, repr=False)
|
|
110
|
+
custom_tags: Optional[dict] = dataclasses.field(default=None, repr=False)
|
|
109
111
|
|
|
110
112
|
# Runtime cache (never serialized)
|
|
111
|
-
_sdk:
|
|
112
|
-
_was_connected: bool = dataclasses.field(
|
|
113
|
-
_cached_token: Optional[str] = dataclasses.field(
|
|
113
|
+
_sdk: Optional["WorkspaceClient"] = dataclasses.field(default=None, repr=False, compare=False, hash=False)
|
|
114
|
+
_was_connected: bool = dataclasses.field(default=None, repr=False, compare=False, hash=False)
|
|
115
|
+
_cached_token: Optional[str] = dataclasses.field(default=None, repr=False, compare=False, hash=False)
|
|
114
116
|
|
|
115
117
|
# -------------------------
|
|
116
118
|
# Pickle support
|
|
@@ -175,19 +177,43 @@ class Workspace:
|
|
|
175
177
|
# -------------------------
|
|
176
178
|
def clone_instance(
|
|
177
179
|
self,
|
|
178
|
-
**kwargs
|
|
179
180
|
) -> "Workspace":
|
|
180
181
|
"""Clone the workspace config with overrides.
|
|
181
182
|
|
|
182
|
-
Args:
|
|
183
|
-
**kwargs: Field overrides for the clone.
|
|
184
|
-
|
|
185
183
|
Returns:
|
|
186
184
|
A new Workspace instance with updated fields.
|
|
187
185
|
"""
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
186
|
+
return Workspace(
|
|
187
|
+
host = self.host,
|
|
188
|
+
account_id = self.account_id,
|
|
189
|
+
token = self.token,
|
|
190
|
+
client_id = self.client_id,
|
|
191
|
+
client_secret = self.client_secret,
|
|
192
|
+
token_audience = self.token_audience,
|
|
193
|
+
azure_workspace_resource_id = self.azure_workspace_resource_id,
|
|
194
|
+
azure_use_msi = self.azure_use_msi,
|
|
195
|
+
azure_client_secret = self.azure_client_secret,
|
|
196
|
+
azure_client_id = self.azure_client_id,
|
|
197
|
+
azure_tenant_id = self.azure_tenant_id,
|
|
198
|
+
azure_environment = self.azure_environment,
|
|
199
|
+
google_credentials = self.google_credentials,
|
|
200
|
+
google_service_account = self.google_service_account,
|
|
201
|
+
profile = self.profile,
|
|
202
|
+
config_file = self.config_file,
|
|
203
|
+
auth_type = self.auth_type,
|
|
204
|
+
http_timeout_seconds = self.http_timeout_seconds,
|
|
205
|
+
retry_timeout_seconds = self.retry_timeout_seconds,
|
|
206
|
+
debug_truncate_bytes = self.debug_truncate_bytes,
|
|
207
|
+
debug_headers = self.debug_headers,
|
|
208
|
+
rate_limit = self.rate_limit,
|
|
209
|
+
product = self.product,
|
|
210
|
+
product_version = self.product_version,
|
|
211
|
+
product_tag = self.product_tag,
|
|
212
|
+
custom_tags = self.custom_tags,
|
|
213
|
+
_sdk = self._sdk,
|
|
214
|
+
_was_connected = self._was_connected,
|
|
215
|
+
_cached_token = self._cached_token,
|
|
216
|
+
)
|
|
191
217
|
|
|
192
218
|
# -------------------------
|
|
193
219
|
# SDK connection
|
|
@@ -300,8 +326,9 @@ class Workspace:
|
|
|
300
326
|
Drop the cached WorkspaceClient (no actual close needed, but this
|
|
301
327
|
avoids reusing stale config).
|
|
302
328
|
"""
|
|
303
|
-
self._sdk
|
|
304
|
-
|
|
329
|
+
if self._sdk is not None:
|
|
330
|
+
self._sdk = None
|
|
331
|
+
self._was_connected = False
|
|
305
332
|
|
|
306
333
|
# ------------------------------------------------------------------ #
|
|
307
334
|
# Properties
|
|
@@ -561,28 +588,19 @@ class Workspace:
|
|
|
561
588
|
Returns:
|
|
562
589
|
A dict of default tags.
|
|
563
590
|
"""
|
|
564
|
-
|
|
591
|
+
base = {
|
|
565
592
|
k: v
|
|
566
593
|
for k, v in (
|
|
567
594
|
("Product", self.product),
|
|
568
|
-
("ProductVersion", self.product_version),
|
|
569
595
|
("ProductTag", self.product_tag),
|
|
570
|
-
("ProductUser", self.current_user.user_name)
|
|
571
596
|
)
|
|
572
597
|
if v
|
|
573
598
|
}
|
|
574
599
|
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
Args:
|
|
579
|
-
existing: Optional existing tags.
|
|
600
|
+
if self.custom_tags:
|
|
601
|
+
base.update(self.custom_tags)
|
|
580
602
|
|
|
581
|
-
|
|
582
|
-
A dict of merged tags.
|
|
583
|
-
"""
|
|
584
|
-
if existing:
|
|
585
|
-
return self.default_tags()
|
|
603
|
+
return base
|
|
586
604
|
|
|
587
605
|
def sql(
|
|
588
606
|
self,
|
|
@@ -2,32 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
import dataclasses
|
|
4
4
|
from inspect import isclass
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any
|
|
6
6
|
|
|
7
7
|
import pyarrow as pa
|
|
8
8
|
|
|
9
9
|
__all__ = [
|
|
10
|
-
"yggdataclass",
|
|
11
|
-
"is_yggdataclass",
|
|
12
10
|
"get_dataclass_arrow_field"
|
|
13
11
|
]
|
|
14
12
|
|
|
15
13
|
DATACLASS_ARROW_FIELD_CACHE: dict[type, pa.Field] = {}
|
|
16
14
|
|
|
17
15
|
|
|
18
|
-
def is_yggdataclass(cls_or_instance: Any) -> bool:
|
|
19
|
-
"""Check if a class or instance is a yggdrasil dataclass.
|
|
20
|
-
|
|
21
|
-
Args:
|
|
22
|
-
cls_or_instance: The class or instance to check.
|
|
23
|
-
|
|
24
|
-
Returns:
|
|
25
|
-
True if the class or instance
|
|
26
|
-
is a yggdrasil dataclass, False otherwise.
|
|
27
|
-
"""
|
|
28
|
-
return hasattr(cls_or_instance, "__arrow_field__")
|
|
29
|
-
|
|
30
|
-
|
|
31
16
|
def get_dataclass_arrow_field(cls_or_instance: Any) -> pa.Field:
|
|
32
17
|
"""Return a cached Arrow Field describing the dataclass type.
|
|
33
18
|
|
|
@@ -37,9 +22,6 @@ def get_dataclass_arrow_field(cls_or_instance: Any) -> pa.Field:
|
|
|
37
22
|
Returns:
|
|
38
23
|
Arrow field describing the dataclass schema.
|
|
39
24
|
"""
|
|
40
|
-
if is_yggdataclass(cls_or_instance):
|
|
41
|
-
return cls_or_instance.__arrow_field__()
|
|
42
|
-
|
|
43
25
|
if dataclasses.is_dataclass(cls_or_instance):
|
|
44
26
|
cls = cls_or_instance
|
|
45
27
|
if not isclass(cls_or_instance):
|
|
@@ -56,151 +38,3 @@ def get_dataclass_arrow_field(cls_or_instance: Any) -> pa.Field:
|
|
|
56
38
|
return built
|
|
57
39
|
|
|
58
40
|
raise ValueError(f"{cls_or_instance!r} is not a dataclass or yggdrasil dataclass")
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def yggdataclass(
|
|
62
|
-
cls=None, /,
|
|
63
|
-
*,
|
|
64
|
-
init=True,
|
|
65
|
-
repr=True,
|
|
66
|
-
eq=True,
|
|
67
|
-
order=False,
|
|
68
|
-
unsafe_hash=False, frozen=False, match_args=True,
|
|
69
|
-
kw_only=False, slots=False,
|
|
70
|
-
weakref_slot=False
|
|
71
|
-
):
|
|
72
|
-
"""Decorate a class with dataclass behavior plus Arrow helpers.
|
|
73
|
-
|
|
74
|
-
Examines PEP 526 __annotations__ to determine fields.
|
|
75
|
-
|
|
76
|
-
If init is true, an __init__() method is added to the class. If repr
|
|
77
|
-
is true, a __repr__() method is added. If order is true, rich
|
|
78
|
-
comparison dunder methods are added. If unsafe_hash is true, a
|
|
79
|
-
__hash__() method is added. If frozen is true, fields may not be
|
|
80
|
-
assigned to after instance creation. If match_args is true, the
|
|
81
|
-
__match_args__ tuple is added. If kw_only is true, then by default
|
|
82
|
-
all fields are keyword-only. If slots is true, a new class with a
|
|
83
|
-
__slots__ attribute is returned.
|
|
84
|
-
"""
|
|
85
|
-
|
|
86
|
-
def wrap(c):
|
|
87
|
-
"""Wrap a class with yggdrasil dataclass enhancements.
|
|
88
|
-
|
|
89
|
-
Args:
|
|
90
|
-
c: Class to decorate.
|
|
91
|
-
|
|
92
|
-
Returns:
|
|
93
|
-
Decorated dataclass type.
|
|
94
|
-
"""
|
|
95
|
-
|
|
96
|
-
def _init_public_fields(cls):
|
|
97
|
-
"""Return init-enabled, public dataclass fields.
|
|
98
|
-
|
|
99
|
-
Args:
|
|
100
|
-
cls: Dataclass type.
|
|
101
|
-
|
|
102
|
-
Returns:
|
|
103
|
-
List of dataclasses.Field objects.
|
|
104
|
-
"""
|
|
105
|
-
return [
|
|
106
|
-
field
|
|
107
|
-
for field in dataclasses.fields(cls)
|
|
108
|
-
if field.init and not field.name.startswith("_")
|
|
109
|
-
]
|
|
110
|
-
|
|
111
|
-
if not hasattr(c, "default_instance"):
|
|
112
|
-
@classmethod
|
|
113
|
-
def default_instance(cls):
|
|
114
|
-
"""Return a default instance built from type defaults.
|
|
115
|
-
|
|
116
|
-
Returns:
|
|
117
|
-
Default instance of the dataclass.
|
|
118
|
-
"""
|
|
119
|
-
from yggdrasil.types import default_scalar
|
|
120
|
-
|
|
121
|
-
if not hasattr(cls, "__default_instance__"):
|
|
122
|
-
cls.__default_instance__ = default_scalar(cls)
|
|
123
|
-
|
|
124
|
-
return dataclasses.replace(cls.__default_instance__)
|
|
125
|
-
|
|
126
|
-
c.default_instance = default_instance
|
|
127
|
-
|
|
128
|
-
if not hasattr(c, "__safe_init__"):
|
|
129
|
-
@classmethod
|
|
130
|
-
def __safe_init__(cls, *args, **kwargs):
|
|
131
|
-
"""Safely initialize a dataclass using type conversion and defaults."""
|
|
132
|
-
|
|
133
|
-
fields = _init_public_fields(cls)
|
|
134
|
-
field_names = [field.name for field in fields]
|
|
135
|
-
|
|
136
|
-
if len(args) > len(field_names):
|
|
137
|
-
raise TypeError(
|
|
138
|
-
f"Expected at most {len(field_names)} positional arguments, got {len(args)}"
|
|
139
|
-
)
|
|
140
|
-
|
|
141
|
-
provided = {name: value for name, value in zip(field_names, args)}
|
|
142
|
-
|
|
143
|
-
for key, value in kwargs.items():
|
|
144
|
-
if key in provided:
|
|
145
|
-
raise TypeError(f"Got multiple values for argument '{key}'")
|
|
146
|
-
if key not in field_names:
|
|
147
|
-
raise TypeError(
|
|
148
|
-
f"{key!r} is an invalid field for {cls.__name__}"
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
provided[key] = value
|
|
152
|
-
|
|
153
|
-
from yggdrasil.types.cast import convert
|
|
154
|
-
|
|
155
|
-
defaults = cls.default_instance()
|
|
156
|
-
init_kwargs = {}
|
|
157
|
-
|
|
158
|
-
for field in fields:
|
|
159
|
-
if field.name in provided:
|
|
160
|
-
init_kwargs[field.name] = convert(provided[field.name], field.type)
|
|
161
|
-
else:
|
|
162
|
-
init_kwargs[field.name] = getattr(defaults, field.name, None)
|
|
163
|
-
|
|
164
|
-
return cls(**init_kwargs)
|
|
165
|
-
|
|
166
|
-
c.__safe_init__ = __safe_init__
|
|
167
|
-
|
|
168
|
-
if not hasattr(c, "__arrow_field__"):
|
|
169
|
-
@classmethod
|
|
170
|
-
def __arrow_field__(cls, name: str | None = None):
|
|
171
|
-
"""Return an Arrow field representing the dataclass schema.
|
|
172
|
-
|
|
173
|
-
Args:
|
|
174
|
-
name: Optional override for the field name.
|
|
175
|
-
|
|
176
|
-
Returns:
|
|
177
|
-
Arrow field describing the dataclass schema.
|
|
178
|
-
"""
|
|
179
|
-
from yggdrasil.types.python_arrow import arrow_field_from_hint
|
|
180
|
-
|
|
181
|
-
return arrow_field_from_hint(cls, name=name)
|
|
182
|
-
|
|
183
|
-
c.__arrow_field__ = __arrow_field__
|
|
184
|
-
|
|
185
|
-
base = dataclasses.dataclass(
|
|
186
|
-
c,
|
|
187
|
-
init=init,
|
|
188
|
-
repr=repr,
|
|
189
|
-
eq=eq,
|
|
190
|
-
order=order,
|
|
191
|
-
unsafe_hash=unsafe_hash,
|
|
192
|
-
frozen=frozen,
|
|
193
|
-
match_args=match_args,
|
|
194
|
-
kw_only=kw_only,
|
|
195
|
-
slots=slots,
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
return base
|
|
199
|
-
|
|
200
|
-
# See if we're being called as @dataclass or @dataclass().
|
|
201
|
-
if cls is None:
|
|
202
|
-
# We're called with parens.
|
|
203
|
-
return wrap
|
|
204
|
-
|
|
205
|
-
# We're called as @dataclass without parens.
|
|
206
|
-
return wrap(cls)
|
yggdrasil/libs/__init__.py
CHANGED
yggdrasil/pyutils/python_env.py
CHANGED
|
@@ -16,7 +16,7 @@ import sys
|
|
|
16
16
|
import tempfile
|
|
17
17
|
import threading
|
|
18
18
|
from contextlib import contextmanager
|
|
19
|
-
from dataclasses import dataclass
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
20
|
from pathlib import Path
|
|
21
21
|
from typing import Any, Iterable, Iterator, Mapping, MutableMapping, Optional, Union, List, Tuple
|
|
22
22
|
|
|
@@ -415,11 +415,13 @@ def _locked_env(root: Path):
|
|
|
415
415
|
# PythonEnv
|
|
416
416
|
# -----------------------
|
|
417
417
|
|
|
418
|
-
@dataclass
|
|
418
|
+
@dataclass
|
|
419
419
|
class PythonEnv:
|
|
420
420
|
"""Represent a managed Python environment rooted at a filesystem path."""
|
|
421
421
|
root: Path
|
|
422
422
|
|
|
423
|
+
_version: Optional[str] = field(default=None, repr=False)
|
|
424
|
+
|
|
423
425
|
def __post_init__(self) -> None:
|
|
424
426
|
"""Normalize the root path after dataclass initialization.
|
|
425
427
|
|
|
@@ -862,8 +864,9 @@ class PythonEnv:
|
|
|
862
864
|
Returns:
|
|
863
865
|
Version string.
|
|
864
866
|
"""
|
|
865
|
-
|
|
866
|
-
|
|
867
|
+
if self._version is None:
|
|
868
|
+
self._version = self.exec_code("import sys; print(sys.version.split()[0])", check=True).strip()
|
|
869
|
+
return self._version
|
|
867
870
|
|
|
868
871
|
@property
|
|
869
872
|
def version_info(self) -> tuple[int, int, int]:
|
yggdrasil/requests/__init__.py
CHANGED
|
@@ -7,16 +7,15 @@ import pyarrow as pa
|
|
|
7
7
|
|
|
8
8
|
from .registry import convert
|
|
9
9
|
from ..python_arrow import is_arrow_type_list_like
|
|
10
|
-
from ...
|
|
10
|
+
from ...libs.polarslib import polars
|
|
11
|
+
from ...libs.sparklib import pyspark
|
|
11
12
|
|
|
12
13
|
__all__ = [
|
|
13
14
|
"CastOptions",
|
|
14
15
|
]
|
|
15
16
|
|
|
16
|
-
from ...libs import pyspark, polars
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
@yggdataclass
|
|
18
|
+
@dataclasses.dataclass
|
|
20
19
|
class CastOptions:
|
|
21
20
|
"""
|
|
22
21
|
Options controlling Arrow casting behavior.
|
yggdrasil/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.49"
|
yggdrasil/types/libs.py
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
"""Re-export optional dependency helpers for types modules."""
|
|
2
|
-
|
|
3
|
-
from ..libs import pandas, polars, pyspark, require_pandas, require_polars, require_pyspark
|
|
4
|
-
|
|
5
|
-
__all__ = [
|
|
6
|
-
"pandas",
|
|
7
|
-
"polars",
|
|
8
|
-
"pyspark",
|
|
9
|
-
"require_pandas",
|
|
10
|
-
"require_polars",
|
|
11
|
-
"require_pyspark",
|
|
12
|
-
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|